root/tools/perf/bench/futex-wake-parallel.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. bench_futex_wake_parallel
  2. waking_workerfn
  3. wakeup_threads
  4. blocked_workerfn
  5. block_threads
  6. print_run
  7. print_summary
  8. do_run_stats
  9. toggle_done
  10. bench_futex_wake_parallel

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Copyright (C) 2015 Davidlohr Bueso.
   4  *
   5  * Block a bunch of threads and let parallel waker threads wakeup an
   6  * equal amount of them. The program output reflects the avg latency
   7  * for each individual thread to service its share of work. Ultimately
   8  * it can be used to measure futex_wake() changes.
   9  */
  10 #include "bench.h"
  11 #include <linux/compiler.h>
  12 #include "../util/debug.h"
  13 
  14 #ifndef HAVE_PTHREAD_BARRIER
  15 int bench_futex_wake_parallel(int argc __maybe_unused, const char **argv __maybe_unused)
  16 {
  17         pr_err("%s: pthread_barrier_t unavailable, disabling this test...\n", __func__);
  18         return 0;
  19 }
  20 #else /* HAVE_PTHREAD_BARRIER */
  21 /* For the CLR_() macros */
  22 #include <string.h>
  23 #include <pthread.h>
  24 
  25 #include <signal.h>
  26 #include "../util/stat.h"
  27 #include <subcmd/parse-options.h>
  28 #include <linux/kernel.h>
  29 #include <linux/time64.h>
  30 #include <errno.h>
  31 #include "futex.h"
  32 #include <internal/cpumap.h>
  33 #include <perf/cpumap.h>
  34 
  35 #include <err.h>
  36 #include <stdlib.h>
  37 #include <sys/time.h>
  38 
  39 struct thread_data {
  40         pthread_t worker;
  41         unsigned int nwoken;
  42         struct timeval runtime;
  43 };
  44 
  45 static unsigned int nwakes = 1;
  46 
  47 /* all threads will block on the same futex -- hash bucket chaos ;) */
  48 static u_int32_t futex = 0;
  49 
  50 static pthread_t *blocked_worker;
  51 static bool done = false, silent = false, fshared = false;
  52 static unsigned int nblocked_threads = 0, nwaking_threads = 0;
  53 static pthread_mutex_t thread_lock;
  54 static pthread_cond_t thread_parent, thread_worker;
  55 static pthread_barrier_t barrier;
  56 static struct stats waketime_stats, wakeup_stats;
  57 static unsigned int threads_starting;
  58 static int futex_flag = 0;
  59 
  60 static const struct option options[] = {
  61         OPT_UINTEGER('t', "threads", &nblocked_threads, "Specify amount of threads"),
  62         OPT_UINTEGER('w', "nwakers", &nwaking_threads, "Specify amount of waking threads"),
  63         OPT_BOOLEAN( 's', "silent",  &silent,   "Silent mode: do not display data/details"),
  64         OPT_BOOLEAN( 'S', "shared",  &fshared,  "Use shared futexes instead of private ones"),
  65         OPT_END()
  66 };
  67 
  68 static const char * const bench_futex_wake_parallel_usage[] = {
  69         "perf bench futex wake-parallel <options>",
  70         NULL
  71 };
  72 
  73 static void *waking_workerfn(void *arg)
  74 {
  75         struct thread_data *waker = (struct thread_data *) arg;
  76         struct timeval start, end;
  77 
  78         pthread_barrier_wait(&barrier);
  79 
  80         gettimeofday(&start, NULL);
  81 
  82         waker->nwoken = futex_wake(&futex, nwakes, futex_flag);
  83         if (waker->nwoken != nwakes)
  84                 warnx("couldn't wakeup all tasks (%d/%d)",
  85                       waker->nwoken, nwakes);
  86 
  87         gettimeofday(&end, NULL);
  88         timersub(&end, &start, &waker->runtime);
  89 
  90         pthread_exit(NULL);
  91         return NULL;
  92 }
  93 
  94 static void wakeup_threads(struct thread_data *td, pthread_attr_t thread_attr)
  95 {
  96         unsigned int i;
  97 
  98         pthread_attr_setdetachstate(&thread_attr, PTHREAD_CREATE_JOINABLE);
  99 
 100         pthread_barrier_init(&barrier, NULL, nwaking_threads + 1);
 101 
 102         /* create and block all threads */
 103         for (i = 0; i < nwaking_threads; i++) {
 104                 /*
 105                  * Thread creation order will impact per-thread latency
 106                  * as it will affect the order to acquire the hb spinlock.
 107                  * For now let the scheduler decide.
 108                  */
 109                 if (pthread_create(&td[i].worker, &thread_attr,
 110                                    waking_workerfn, (void *)&td[i]))
 111                         err(EXIT_FAILURE, "pthread_create");
 112         }
 113 
 114         pthread_barrier_wait(&barrier);
 115 
 116         for (i = 0; i < nwaking_threads; i++)
 117                 if (pthread_join(td[i].worker, NULL))
 118                         err(EXIT_FAILURE, "pthread_join");
 119 
 120         pthread_barrier_destroy(&barrier);
 121 }
 122 
 123 static void *blocked_workerfn(void *arg __maybe_unused)
 124 {
 125         pthread_mutex_lock(&thread_lock);
 126         threads_starting--;
 127         if (!threads_starting)
 128                 pthread_cond_signal(&thread_parent);
 129         pthread_cond_wait(&thread_worker, &thread_lock);
 130         pthread_mutex_unlock(&thread_lock);
 131 
 132         while (1) { /* handle spurious wakeups */
 133                 if (futex_wait(&futex, 0, NULL, futex_flag) != EINTR)
 134                         break;
 135         }
 136 
 137         pthread_exit(NULL);
 138         return NULL;
 139 }
 140 
 141 static void block_threads(pthread_t *w, pthread_attr_t thread_attr,
 142                           struct perf_cpu_map *cpu)
 143 {
 144         cpu_set_t cpuset;
 145         unsigned int i;
 146 
 147         threads_starting = nblocked_threads;
 148 
 149         /* create and block all threads */
 150         for (i = 0; i < nblocked_threads; i++) {
 151                 CPU_ZERO(&cpuset);
 152                 CPU_SET(cpu->map[i % cpu->nr], &cpuset);
 153 
 154                 if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
 155                         err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
 156 
 157                 if (pthread_create(&w[i], &thread_attr, blocked_workerfn, NULL))
 158                         err(EXIT_FAILURE, "pthread_create");
 159         }
 160 }
 161 
 162 static void print_run(struct thread_data *waking_worker, unsigned int run_num)
 163 {
 164         unsigned int i, wakeup_avg;
 165         double waketime_avg, waketime_stddev;
 166         struct stats __waketime_stats, __wakeup_stats;
 167 
 168         init_stats(&__wakeup_stats);
 169         init_stats(&__waketime_stats);
 170 
 171         for (i = 0; i < nwaking_threads; i++) {
 172                 update_stats(&__waketime_stats, waking_worker[i].runtime.tv_usec);
 173                 update_stats(&__wakeup_stats, waking_worker[i].nwoken);
 174         }
 175 
 176         waketime_avg = avg_stats(&__waketime_stats);
 177         waketime_stddev = stddev_stats(&__waketime_stats);
 178         wakeup_avg = avg_stats(&__wakeup_stats);
 179 
 180         printf("[Run %d]: Avg per-thread latency (waking %d/%d threads) "
 181                "in %.4f ms (+-%.2f%%)\n", run_num + 1, wakeup_avg,
 182                nblocked_threads, waketime_avg / USEC_PER_MSEC,
 183                rel_stddev_stats(waketime_stddev, waketime_avg));
 184 }
 185 
 186 static void print_summary(void)
 187 {
 188         unsigned int wakeup_avg;
 189         double waketime_avg, waketime_stddev;
 190 
 191         waketime_avg = avg_stats(&waketime_stats);
 192         waketime_stddev = stddev_stats(&waketime_stats);
 193         wakeup_avg = avg_stats(&wakeup_stats);
 194 
 195         printf("Avg per-thread latency (waking %d/%d threads) in %.4f ms (+-%.2f%%)\n",
 196                wakeup_avg,
 197                nblocked_threads,
 198                waketime_avg / USEC_PER_MSEC,
 199                rel_stddev_stats(waketime_stddev, waketime_avg));
 200 }
 201 
 202 
 203 static void do_run_stats(struct thread_data *waking_worker)
 204 {
 205         unsigned int i;
 206 
 207         for (i = 0; i < nwaking_threads; i++) {
 208                 update_stats(&waketime_stats, waking_worker[i].runtime.tv_usec);
 209                 update_stats(&wakeup_stats, waking_worker[i].nwoken);
 210         }
 211 
 212 }
 213 
 214 static void toggle_done(int sig __maybe_unused,
 215                         siginfo_t *info __maybe_unused,
 216                         void *uc __maybe_unused)
 217 {
 218         done = true;
 219 }
 220 
 221 int bench_futex_wake_parallel(int argc, const char **argv)
 222 {
 223         int ret = 0;
 224         unsigned int i, j;
 225         struct sigaction act;
 226         pthread_attr_t thread_attr;
 227         struct thread_data *waking_worker;
 228         struct perf_cpu_map *cpu;
 229 
 230         argc = parse_options(argc, argv, options,
 231                              bench_futex_wake_parallel_usage, 0);
 232         if (argc) {
 233                 usage_with_options(bench_futex_wake_parallel_usage, options);
 234                 exit(EXIT_FAILURE);
 235         }
 236 
 237         sigfillset(&act.sa_mask);
 238         act.sa_sigaction = toggle_done;
 239         sigaction(SIGINT, &act, NULL);
 240 
 241         cpu = perf_cpu_map__new(NULL);
 242         if (!cpu)
 243                 err(EXIT_FAILURE, "calloc");
 244 
 245         if (!nblocked_threads)
 246                 nblocked_threads = cpu->nr;
 247 
 248         /* some sanity checks */
 249         if (nwaking_threads > nblocked_threads || !nwaking_threads)
 250                 nwaking_threads = nblocked_threads;
 251 
 252         if (nblocked_threads % nwaking_threads)
 253                 errx(EXIT_FAILURE, "Must be perfectly divisible");
 254         /*
 255          * Each thread will wakeup nwakes tasks in
 256          * a single futex_wait call.
 257          */
 258         nwakes = nblocked_threads/nwaking_threads;
 259 
 260         blocked_worker = calloc(nblocked_threads, sizeof(*blocked_worker));
 261         if (!blocked_worker)
 262                 err(EXIT_FAILURE, "calloc");
 263 
 264         if (!fshared)
 265                 futex_flag = FUTEX_PRIVATE_FLAG;
 266 
 267         printf("Run summary [PID %d]: blocking on %d threads (at [%s] "
 268                "futex %p), %d threads waking up %d at a time.\n\n",
 269                getpid(), nblocked_threads, fshared ? "shared":"private",
 270                &futex, nwaking_threads, nwakes);
 271 
 272         init_stats(&wakeup_stats);
 273         init_stats(&waketime_stats);
 274 
 275         pthread_attr_init(&thread_attr);
 276         pthread_mutex_init(&thread_lock, NULL);
 277         pthread_cond_init(&thread_parent, NULL);
 278         pthread_cond_init(&thread_worker, NULL);
 279 
 280         for (j = 0; j < bench_repeat && !done; j++) {
 281                 waking_worker = calloc(nwaking_threads, sizeof(*waking_worker));
 282                 if (!waking_worker)
 283                         err(EXIT_FAILURE, "calloc");
 284 
 285                 /* create, launch & block all threads */
 286                 block_threads(blocked_worker, thread_attr, cpu);
 287 
 288                 /* make sure all threads are already blocked */
 289                 pthread_mutex_lock(&thread_lock);
 290                 while (threads_starting)
 291                         pthread_cond_wait(&thread_parent, &thread_lock);
 292                 pthread_cond_broadcast(&thread_worker);
 293                 pthread_mutex_unlock(&thread_lock);
 294 
 295                 usleep(100000);
 296 
 297                 /* Ok, all threads are patiently blocked, start waking folks up */
 298                 wakeup_threads(waking_worker, thread_attr);
 299 
 300                 for (i = 0; i < nblocked_threads; i++) {
 301                         ret = pthread_join(blocked_worker[i], NULL);
 302                         if (ret)
 303                                 err(EXIT_FAILURE, "pthread_join");
 304                 }
 305 
 306                 do_run_stats(waking_worker);
 307                 if (!silent)
 308                         print_run(waking_worker, j);
 309 
 310                 free(waking_worker);
 311         }
 312 
 313         /* cleanup & report results */
 314         pthread_cond_destroy(&thread_parent);
 315         pthread_cond_destroy(&thread_worker);
 316         pthread_mutex_destroy(&thread_lock);
 317         pthread_attr_destroy(&thread_attr);
 318 
 319         print_summary();
 320 
 321         free(blocked_worker);
 322         return ret;
 323 }
 324 #endif /* HAVE_PTHREAD_BARRIER */

/* [<][>][^][v][top][bottom][index][help] */