| xj | b04a402 | 2021-11-25 15:01:52 +0800 | [diff] [blame] | 1 | // SPDX-License-Identifier: GPL-2.0 | 
|  | 2 | /* | 
|  | 3 | * Copyright (C) 2013  Davidlohr Bueso <davidlohr@hp.com> | 
|  | 4 | * | 
|  | 5 | * futex-requeue: Block a bunch of threads on futex1 and requeue them | 
|  | 6 | *                on futex2, N at a time. | 
|  | 7 | * | 
|  | 8 | * This program is particularly useful to measure the latency of nthread | 
|  | 9 | * requeues without waking up any tasks -- thus mimicking a regular futex_wait. | 
|  | 10 | */ | 
|  | 11 |  | 
|  | 12 | /* For the CLR_() macros */ | 
|  | 13 | #include <string.h> | 
|  | 14 | #include <pthread.h> | 
|  | 15 |  | 
|  | 16 | #include <signal.h> | 
|  | 17 | #include "../util/stat.h" | 
|  | 18 | #include <subcmd/parse-options.h> | 
|  | 19 | #include <linux/compiler.h> | 
|  | 20 | #include <linux/kernel.h> | 
|  | 21 | #include <linux/time64.h> | 
|  | 22 | #include <errno.h> | 
|  | 23 | #include "bench.h" | 
|  | 24 | #include "futex.h" | 
|  | 25 | #include "cpumap.h" | 
|  | 26 |  | 
|  | 27 | #include <err.h> | 
|  | 28 | #include <stdlib.h> | 
|  | 29 | #include <sys/time.h> | 
|  | 30 |  | 
|  | 31 | static u_int32_t futex1 = 0, futex2 = 0; | 
|  | 32 |  | 
|  | 33 | /* | 
|  | 34 | * How many tasks to requeue at a time. | 
|  | 35 | * Default to 1 in order to make the kernel work more. | 
|  | 36 | */ | 
|  | 37 | static unsigned int nrequeue = 1; | 
|  | 38 |  | 
|  | 39 | static pthread_t *worker; | 
|  | 40 | static bool done = false, silent = false, fshared = false; | 
|  | 41 | static pthread_mutex_t thread_lock; | 
|  | 42 | static pthread_cond_t thread_parent, thread_worker; | 
|  | 43 | static struct stats requeuetime_stats, requeued_stats; | 
|  | 44 | static unsigned int threads_starting, nthreads = 0; | 
|  | 45 | static int futex_flag = 0; | 
|  | 46 |  | 
|  | 47 | static const struct option options[] = { | 
|  | 48 | OPT_UINTEGER('t', "threads",  &nthreads, "Specify amount of threads"), | 
|  | 49 | OPT_UINTEGER('q', "nrequeue", &nrequeue, "Specify amount of threads to requeue at once"), | 
|  | 50 | OPT_BOOLEAN( 's', "silent",   &silent,   "Silent mode: do not display data/details"), | 
|  | 51 | OPT_BOOLEAN( 'S', "shared",   &fshared,  "Use shared futexes instead of private ones"), | 
|  | 52 | OPT_END() | 
|  | 53 | }; | 
|  | 54 |  | 
|  | 55 | static const char * const bench_futex_requeue_usage[] = { | 
|  | 56 | "perf bench futex requeue <options>", | 
|  | 57 | NULL | 
|  | 58 | }; | 
|  | 59 |  | 
|  | 60 | static void print_summary(void) | 
|  | 61 | { | 
|  | 62 | double requeuetime_avg = avg_stats(&requeuetime_stats); | 
|  | 63 | double requeuetime_stddev = stddev_stats(&requeuetime_stats); | 
|  | 64 | unsigned int requeued_avg = avg_stats(&requeued_stats); | 
|  | 65 |  | 
|  | 66 | printf("Requeued %d of %d threads in %.4f ms (+-%.2f%%)\n", | 
|  | 67 | requeued_avg, | 
|  | 68 | nthreads, | 
|  | 69 | requeuetime_avg / USEC_PER_MSEC, | 
|  | 70 | rel_stddev_stats(requeuetime_stddev, requeuetime_avg)); | 
|  | 71 | } | 
|  | 72 |  | 
|  | 73 | static void *workerfn(void *arg __maybe_unused) | 
|  | 74 | { | 
|  | 75 | pthread_mutex_lock(&thread_lock); | 
|  | 76 | threads_starting--; | 
|  | 77 | if (!threads_starting) | 
|  | 78 | pthread_cond_signal(&thread_parent); | 
|  | 79 | pthread_cond_wait(&thread_worker, &thread_lock); | 
|  | 80 | pthread_mutex_unlock(&thread_lock); | 
|  | 81 |  | 
|  | 82 | futex_wait(&futex1, 0, NULL, futex_flag); | 
|  | 83 | return NULL; | 
|  | 84 | } | 
|  | 85 |  | 
|  | 86 | static void block_threads(pthread_t *w, | 
|  | 87 | pthread_attr_t thread_attr, struct cpu_map *cpu) | 
|  | 88 | { | 
|  | 89 | cpu_set_t cpuset; | 
|  | 90 | unsigned int i; | 
|  | 91 |  | 
|  | 92 | threads_starting = nthreads; | 
|  | 93 |  | 
|  | 94 | /* create and block all threads */ | 
|  | 95 | for (i = 0; i < nthreads; i++) { | 
|  | 96 | CPU_ZERO(&cpuset); | 
|  | 97 | CPU_SET(cpu->map[i % cpu->nr], &cpuset); | 
|  | 98 |  | 
|  | 99 | if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset)) | 
|  | 100 | err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); | 
|  | 101 |  | 
|  | 102 | if (pthread_create(&w[i], &thread_attr, workerfn, NULL)) | 
|  | 103 | err(EXIT_FAILURE, "pthread_create"); | 
|  | 104 | } | 
|  | 105 | } | 
|  | 106 |  | 
|  | 107 | static void toggle_done(int sig __maybe_unused, | 
|  | 108 | siginfo_t *info __maybe_unused, | 
|  | 109 | void *uc __maybe_unused) | 
|  | 110 | { | 
|  | 111 | done = true; | 
|  | 112 | } | 
|  | 113 |  | 
|  | 114 | int bench_futex_requeue(int argc, const char **argv) | 
|  | 115 | { | 
|  | 116 | int ret = 0; | 
|  | 117 | unsigned int i, j; | 
|  | 118 | struct sigaction act; | 
|  | 119 | pthread_attr_t thread_attr; | 
|  | 120 | struct cpu_map *cpu; | 
|  | 121 |  | 
|  | 122 | argc = parse_options(argc, argv, options, bench_futex_requeue_usage, 0); | 
|  | 123 | if (argc) | 
|  | 124 | goto err; | 
|  | 125 |  | 
|  | 126 | cpu = cpu_map__new(NULL); | 
|  | 127 | if (!cpu) | 
|  | 128 | err(EXIT_FAILURE, "cpu_map__new"); | 
|  | 129 |  | 
|  | 130 | sigfillset(&act.sa_mask); | 
|  | 131 | act.sa_sigaction = toggle_done; | 
|  | 132 | sigaction(SIGINT, &act, NULL); | 
|  | 133 |  | 
|  | 134 | if (!nthreads) | 
|  | 135 | nthreads = cpu->nr; | 
|  | 136 |  | 
|  | 137 | worker = calloc(nthreads, sizeof(*worker)); | 
|  | 138 | if (!worker) | 
|  | 139 | err(EXIT_FAILURE, "calloc"); | 
|  | 140 |  | 
|  | 141 | if (!fshared) | 
|  | 142 | futex_flag = FUTEX_PRIVATE_FLAG; | 
|  | 143 |  | 
|  | 144 | if (nrequeue > nthreads) | 
|  | 145 | nrequeue = nthreads; | 
|  | 146 |  | 
|  | 147 | printf("Run summary [PID %d]: Requeuing %d threads (from [%s] %p to %p), " | 
|  | 148 | "%d at a time.\n\n",  getpid(), nthreads, | 
|  | 149 | fshared ? "shared":"private", &futex1, &futex2, nrequeue); | 
|  | 150 |  | 
|  | 151 | init_stats(&requeued_stats); | 
|  | 152 | init_stats(&requeuetime_stats); | 
|  | 153 | pthread_attr_init(&thread_attr); | 
|  | 154 | pthread_mutex_init(&thread_lock, NULL); | 
|  | 155 | pthread_cond_init(&thread_parent, NULL); | 
|  | 156 | pthread_cond_init(&thread_worker, NULL); | 
|  | 157 |  | 
|  | 158 | for (j = 0; j < bench_repeat && !done; j++) { | 
|  | 159 | unsigned int nrequeued = 0; | 
|  | 160 | struct timeval start, end, runtime; | 
|  | 161 |  | 
|  | 162 | /* create, launch & block all threads */ | 
|  | 163 | block_threads(worker, thread_attr, cpu); | 
|  | 164 |  | 
|  | 165 | /* make sure all threads are already blocked */ | 
|  | 166 | pthread_mutex_lock(&thread_lock); | 
|  | 167 | while (threads_starting) | 
|  | 168 | pthread_cond_wait(&thread_parent, &thread_lock); | 
|  | 169 | pthread_cond_broadcast(&thread_worker); | 
|  | 170 | pthread_mutex_unlock(&thread_lock); | 
|  | 171 |  | 
|  | 172 | usleep(100000); | 
|  | 173 |  | 
|  | 174 | /* Ok, all threads are patiently blocked, start requeueing */ | 
|  | 175 | gettimeofday(&start, NULL); | 
|  | 176 | while (nrequeued < nthreads) { | 
|  | 177 | /* | 
|  | 178 | * Do not wakeup any tasks blocked on futex1, allowing | 
|  | 179 | * us to really measure futex_wait functionality. | 
|  | 180 | */ | 
|  | 181 | nrequeued += futex_cmp_requeue(&futex1, 0, &futex2, 0, | 
|  | 182 | nrequeue, futex_flag); | 
|  | 183 | } | 
|  | 184 |  | 
|  | 185 | gettimeofday(&end, NULL); | 
|  | 186 | timersub(&end, &start, &runtime); | 
|  | 187 |  | 
|  | 188 | update_stats(&requeued_stats, nrequeued); | 
|  | 189 | update_stats(&requeuetime_stats, runtime.tv_usec); | 
|  | 190 |  | 
|  | 191 | if (!silent) { | 
|  | 192 | printf("[Run %d]: Requeued %d of %d threads in %.4f ms\n", | 
|  | 193 | j + 1, nrequeued, nthreads, runtime.tv_usec / (double)USEC_PER_MSEC); | 
|  | 194 | } | 
|  | 195 |  | 
|  | 196 | /* everybody should be blocked on futex2, wake'em up */ | 
|  | 197 | nrequeued = futex_wake(&futex2, nrequeued, futex_flag); | 
|  | 198 | if (nthreads != nrequeued) | 
|  | 199 | warnx("couldn't wakeup all tasks (%d/%d)", nrequeued, nthreads); | 
|  | 200 |  | 
|  | 201 | for (i = 0; i < nthreads; i++) { | 
|  | 202 | ret = pthread_join(worker[i], NULL); | 
|  | 203 | if (ret) | 
|  | 204 | err(EXIT_FAILURE, "pthread_join"); | 
|  | 205 | } | 
|  | 206 | } | 
|  | 207 |  | 
|  | 208 | /* cleanup & report results */ | 
|  | 209 | pthread_cond_destroy(&thread_parent); | 
|  | 210 | pthread_cond_destroy(&thread_worker); | 
|  | 211 | pthread_mutex_destroy(&thread_lock); | 
|  | 212 | pthread_attr_destroy(&thread_attr); | 
|  | 213 |  | 
|  | 214 | print_summary(); | 
|  | 215 |  | 
|  | 216 | free(worker); | 
|  | 217 | return ret; | 
|  | 218 | err: | 
|  | 219 | usage_with_options(bench_futex_requeue_usage, options); | 
|  | 220 | exit(EXIT_FAILURE); | 
|  | 221 | } |