xf.li | bdd93d5 | 2023-05-12 07:10:14 -0700 | [diff] [blame^] | 1 | /* Benchmark malloc and free functions. |
| 2 | Copyright (C) 2013-2016 Free Software Foundation, Inc. |
| 3 | This file is part of the GNU C Library. |
| 4 | |
| 5 | The GNU C Library is free software; you can redistribute it and/or |
| 6 | modify it under the terms of the GNU Lesser General Public |
| 7 | License as published by the Free Software Foundation; either |
| 8 | version 2.1 of the License, or (at your option) any later version. |
| 9 | |
| 10 | The GNU C Library is distributed in the hope that it will be useful, |
| 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 13 | Lesser General Public License for more details. |
| 14 | |
| 15 | You should have received a copy of the GNU Lesser General Public |
| 16 | License along with the GNU C Library; if not, see |
| 17 | <http://www.gnu.org/licenses/>. */ |
| 18 | |
| 19 | #include <errno.h> |
| 20 | #include <math.h> |
| 21 | #include <pthread.h> |
| 22 | #include <signal.h> |
| 23 | #include <stdio.h> |
| 24 | #include <stdlib.h> |
| 25 | #include <string.h> |
| 26 | #include <sys/time.h> |
| 27 | #include <sys/resource.h> |
| 28 | #include <unistd.h> |
| 29 | |
| 30 | #include "bench-timing.h" |
| 31 | #include "json-lib.h" |
| 32 | |
| 33 | /* Benchmark duration in seconds. */ |
| 34 | #define BENCHMARK_DURATION 60 |
| 35 | #define RAND_SEED 88 |
| 36 | |
| 37 | #ifndef NUM_THREADS |
| 38 | # define NUM_THREADS 1 |
| 39 | #endif |
| 40 | |
| 41 | /* Maximum memory that can be allocated at any one time is: |
| 42 | |
| 43 | NUM_THREADS * WORKING_SET_SIZE * MAX_ALLOCATION_SIZE |
| 44 | |
| 45 | However due to the distribution of the random block sizes |
| 46 | the typical amount allocated will be much smaller. */ |
| 47 | #define WORKING_SET_SIZE 1024 |
| 48 | |
| 49 | #define MIN_ALLOCATION_SIZE 4 |
| 50 | #define MAX_ALLOCATION_SIZE 32768 |
| 51 | |
| 52 | /* Get a random block size with an inverse square distribution. */ |
| 53 | static unsigned int |
| 54 | get_block_size (unsigned int rand_data) |
| 55 | { |
| 56 | /* Inverse square. */ |
| 57 | const float exponent = -2; |
| 58 | /* Minimum value of distribution. */ |
| 59 | const float dist_min = MIN_ALLOCATION_SIZE; |
| 60 | /* Maximum value of distribution. */ |
| 61 | const float dist_max = MAX_ALLOCATION_SIZE; |
| 62 | |
| 63 | float min_pow = powf (dist_min, exponent + 1); |
| 64 | float max_pow = powf (dist_max, exponent + 1); |
| 65 | |
| 66 | float r = (float) rand_data / RAND_MAX; |
| 67 | |
| 68 | return (unsigned int) powf ((max_pow - min_pow) * r + min_pow, |
| 69 | 1 / (exponent + 1)); |
| 70 | } |
| 71 | |
| 72 | #define NUM_BLOCK_SIZES 8000 |
| 73 | #define NUM_OFFSETS ((WORKING_SET_SIZE) * 4) |
| 74 | |
| 75 | static unsigned int random_block_sizes[NUM_BLOCK_SIZES]; |
| 76 | static unsigned int random_offsets[NUM_OFFSETS]; |
| 77 | |
| 78 | static void |
| 79 | init_random_values (void) |
| 80 | { |
| 81 | for (size_t i = 0; i < NUM_BLOCK_SIZES; i++) |
| 82 | random_block_sizes[i] = get_block_size (rand ()); |
| 83 | |
| 84 | for (size_t i = 0; i < NUM_OFFSETS; i++) |
| 85 | random_offsets[i] = rand () % WORKING_SET_SIZE; |
| 86 | } |
| 87 | |
| 88 | static unsigned int |
| 89 | get_random_block_size (unsigned int *state) |
| 90 | { |
| 91 | unsigned int idx = *state; |
| 92 | |
| 93 | if (idx >= NUM_BLOCK_SIZES - 1) |
| 94 | idx = 0; |
| 95 | else |
| 96 | idx++; |
| 97 | |
| 98 | *state = idx; |
| 99 | |
| 100 | return random_block_sizes[idx]; |
| 101 | } |
| 102 | |
| 103 | static unsigned int |
| 104 | get_random_offset (unsigned int *state) |
| 105 | { |
| 106 | unsigned int idx = *state; |
| 107 | |
| 108 | if (idx >= NUM_OFFSETS - 1) |
| 109 | idx = 0; |
| 110 | else |
| 111 | idx++; |
| 112 | |
| 113 | *state = idx; |
| 114 | |
| 115 | return random_offsets[idx]; |
| 116 | } |
| 117 | |
| 118 | static volatile bool timeout; |
| 119 | |
| 120 | static void |
| 121 | alarm_handler (int signum) |
| 122 | { |
| 123 | timeout = true; |
| 124 | } |
| 125 | |
| 126 | /* Allocate and free blocks in a random order. */ |
| 127 | static size_t |
| 128 | malloc_benchmark_loop (void **ptr_arr) |
| 129 | { |
| 130 | unsigned int offset_state = 0, block_state = 0; |
| 131 | size_t iters = 0; |
| 132 | |
| 133 | while (!timeout) |
| 134 | { |
| 135 | unsigned int next_idx = get_random_offset (&offset_state); |
| 136 | unsigned int next_block = get_random_block_size (&block_state); |
| 137 | |
| 138 | free (ptr_arr[next_idx]); |
| 139 | |
| 140 | ptr_arr[next_idx] = malloc (next_block); |
| 141 | |
| 142 | iters++; |
| 143 | } |
| 144 | |
| 145 | return iters; |
| 146 | } |
| 147 | |
| 148 | struct thread_args |
| 149 | { |
| 150 | size_t iters; |
| 151 | void **working_set; |
| 152 | timing_t elapsed; |
| 153 | }; |
| 154 | |
| 155 | static void * |
| 156 | benchmark_thread (void *arg) |
| 157 | { |
| 158 | struct thread_args *args = (struct thread_args *) arg; |
| 159 | size_t iters; |
| 160 | void *thread_set = args->working_set; |
| 161 | timing_t start, stop; |
| 162 | |
| 163 | TIMING_NOW (start); |
| 164 | iters = malloc_benchmark_loop (thread_set); |
| 165 | TIMING_NOW (stop); |
| 166 | |
| 167 | TIMING_DIFF (args->elapsed, start, stop); |
| 168 | args->iters = iters; |
| 169 | |
| 170 | return NULL; |
| 171 | } |
| 172 | |
| 173 | static timing_t |
| 174 | do_benchmark (size_t num_threads, size_t *iters) |
| 175 | { |
| 176 | timing_t elapsed = 0; |
| 177 | |
| 178 | if (num_threads == 1) |
| 179 | { |
| 180 | timing_t start, stop; |
| 181 | void *working_set[WORKING_SET_SIZE]; |
| 182 | |
| 183 | memset (working_set, 0, sizeof (working_set)); |
| 184 | |
| 185 | TIMING_NOW (start); |
| 186 | *iters = malloc_benchmark_loop (working_set); |
| 187 | TIMING_NOW (stop); |
| 188 | |
| 189 | TIMING_DIFF (elapsed, start, stop); |
| 190 | } |
| 191 | else |
| 192 | { |
| 193 | struct thread_args args[num_threads]; |
| 194 | void *working_set[num_threads][WORKING_SET_SIZE]; |
| 195 | pthread_t threads[num_threads]; |
| 196 | |
| 197 | memset (working_set, 0, sizeof (working_set)); |
| 198 | |
| 199 | *iters = 0; |
| 200 | |
| 201 | for (size_t i = 0; i < num_threads; i++) |
| 202 | { |
| 203 | args[i].working_set = working_set[i]; |
| 204 | pthread_create(&threads[i], NULL, benchmark_thread, &args[i]); |
| 205 | } |
| 206 | |
| 207 | for (size_t i = 0; i < num_threads; i++) |
| 208 | { |
| 209 | pthread_join(threads[i], NULL); |
| 210 | TIMING_ACCUM (elapsed, args[i].elapsed); |
| 211 | *iters += args[i].iters; |
| 212 | } |
| 213 | } |
| 214 | return elapsed; |
| 215 | } |
| 216 | |
| 217 | static void usage(const char *name) |
| 218 | { |
| 219 | fprintf (stderr, "%s: <num_threads>\n", name); |
| 220 | exit (1); |
| 221 | } |
| 222 | |
| 223 | int |
| 224 | main (int argc, char **argv) |
| 225 | { |
| 226 | timing_t cur; |
| 227 | size_t iters = 0, num_threads = 1; |
| 228 | unsigned long res; |
| 229 | json_ctx_t json_ctx; |
| 230 | double d_total_s, d_total_i; |
| 231 | struct sigaction act; |
| 232 | |
| 233 | if (argc == 1) |
| 234 | num_threads = 1; |
| 235 | else if (argc == 2) |
| 236 | { |
| 237 | long ret; |
| 238 | |
| 239 | errno = 0; |
| 240 | ret = strtol(argv[1], NULL, 10); |
| 241 | |
| 242 | if (errno || ret == 0) |
| 243 | usage(argv[0]); |
| 244 | |
| 245 | num_threads = ret; |
| 246 | } |
| 247 | else |
| 248 | usage(argv[0]); |
| 249 | |
| 250 | init_random_values (); |
| 251 | |
| 252 | json_init (&json_ctx, 0, stdout); |
| 253 | |
| 254 | json_document_begin (&json_ctx); |
| 255 | |
| 256 | json_attr_string (&json_ctx, "timing_type", TIMING_TYPE); |
| 257 | |
| 258 | json_attr_object_begin (&json_ctx, "functions"); |
| 259 | |
| 260 | json_attr_object_begin (&json_ctx, "malloc"); |
| 261 | |
| 262 | json_attr_object_begin (&json_ctx, ""); |
| 263 | |
| 264 | TIMING_INIT (res); |
| 265 | |
| 266 | (void) res; |
| 267 | |
| 268 | memset (&act, 0, sizeof (act)); |
| 269 | act.sa_handler = &alarm_handler; |
| 270 | |
| 271 | sigaction (SIGALRM, &act, NULL); |
| 272 | |
| 273 | alarm (BENCHMARK_DURATION); |
| 274 | |
| 275 | cur = do_benchmark (num_threads, &iters); |
| 276 | |
| 277 | struct rusage usage; |
| 278 | getrusage(RUSAGE_SELF, &usage); |
| 279 | |
| 280 | d_total_s = cur; |
| 281 | d_total_i = iters; |
| 282 | |
| 283 | json_attr_double (&json_ctx, "duration", d_total_s); |
| 284 | json_attr_double (&json_ctx, "iterations", d_total_i); |
| 285 | json_attr_double (&json_ctx, "time_per_iteration", d_total_s / d_total_i); |
| 286 | json_attr_double (&json_ctx, "max_rss", usage.ru_maxrss); |
| 287 | |
| 288 | json_attr_double (&json_ctx, "threads", num_threads); |
| 289 | json_attr_double (&json_ctx, "min_size", MIN_ALLOCATION_SIZE); |
| 290 | json_attr_double (&json_ctx, "max_size", MAX_ALLOCATION_SIZE); |
| 291 | json_attr_double (&json_ctx, "random_seed", RAND_SEED); |
| 292 | |
| 293 | json_attr_object_end (&json_ctx); |
| 294 | |
| 295 | json_attr_object_end (&json_ctx); |
| 296 | |
| 297 | json_attr_object_end (&json_ctx); |
| 298 | |
| 299 | json_document_end (&json_ctx); |
| 300 | |
| 301 | return 0; |
| 302 | } |