xf.li | bdd93d5 | 2023-05-12 07:10:14 -0700 | [diff] [blame] | 1 | /* Profiling of shared libraries. |
| 2 | Copyright (C) 1997-2016 Free Software Foundation, Inc. |
| 3 | This file is part of the GNU C Library. |
| 4 | Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. |
| 5 | Based on the BSD mcount implementation. |
| 6 | |
| 7 | The GNU C Library is free software; you can redistribute it and/or |
| 8 | modify it under the terms of the GNU Lesser General Public |
| 9 | License as published by the Free Software Foundation; either |
| 10 | version 2.1 of the License, or (at your option) any later version. |
| 11 | |
| 12 | The GNU C Library is distributed in the hope that it will be useful, |
| 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 15 | Lesser General Public License for more details. |
| 16 | |
| 17 | You should have received a copy of the GNU Lesser General Public |
| 18 | License along with the GNU C Library; if not, see |
| 19 | <http://www.gnu.org/licenses/>. */ |
| 20 | |
| 21 | #include <assert.h> |
| 22 | #include <errno.h> |
| 23 | #include <fcntl.h> |
| 24 | #include <inttypes.h> |
| 25 | #include <limits.h> |
| 26 | #include <stdio.h> |
| 27 | #include <stdlib.h> |
| 28 | #include <string.h> |
| 29 | #include <unistd.h> |
| 30 | #include <stdint.h> |
| 31 | #include <ldsodefs.h> |
| 32 | #include <sys/gmon.h> |
| 33 | #include <sys/gmon_out.h> |
| 34 | #include <sys/mman.h> |
| 35 | #include <sys/param.h> |
| 36 | #include <sys/stat.h> |
| 37 | #include <atomic.h> |
| 38 | |
| 39 | /* The LD_PROFILE feature has to be implemented different to the |
| 40 | normal profiling using the gmon/ functions. The problem is that an |
| 41 | arbitrary amount of processes simulataneously can be run using |
| 42 | profiling and all write the results in the same file. To provide |
| 43 | this mechanism one could implement a complicated mechanism to merge |
| 44 | the content of two profiling runs or one could extend the file |
| 45 | format to allow more than one data set. For the second solution we |
| 46 | would have the problem that the file can grow in size beyond any |
| 47 | limit and both solutions have the problem that the concurrency of |
| 48 | writing the results is a big problem. |
| 49 | |
| 50 | Another much simpler method is to use mmap to map the same file in |
| 51 | all using programs and modify the data in the mmap'ed area and so |
| 52 | also automatically on the disk. Using the MAP_SHARED option of |
| 53 | mmap(2) this can be done without big problems in more than one |
| 54 | file. |
| 55 | |
| 56 | This approach is very different from the normal profiling. We have |
| 57 | to use the profiling data in exactly the way they are expected to |
| 58 | be written to disk. But the normal format used by gprof is not usable |
| 59 | to do this. It is optimized for size. It writes the tags as single |
| 60 | bytes but this means that the following 32/64 bit values are |
| 61 | unaligned. |
| 62 | |
| 63 | Therefore we use a new format. This will look like this |
| 64 | |
| 65 | 0 1 2 3 <- byte is 32 bit word |
| 66 | 0000 g m o n |
| 67 | 0004 *version* <- GMON_SHOBJ_VERSION |
| 68 | 0008 00 00 00 00 |
| 69 | 000c 00 00 00 00 |
| 70 | 0010 00 00 00 00 |
| 71 | |
| 72 | 0014 *tag* <- GMON_TAG_TIME_HIST |
| 73 | 0018 ?? ?? ?? ?? |
| 74 | ?? ?? ?? ?? <- 32/64 bit LowPC |
| 75 | 0018+A ?? ?? ?? ?? |
| 76 | ?? ?? ?? ?? <- 32/64 bit HighPC |
| 77 | 0018+2*A *histsize* |
| 78 | 001c+2*A *profrate* |
| 79 | 0020+2*A s e c o |
| 80 | 0024+2*A n d s \0 |
| 81 | 0028+2*A \0 \0 \0 \0 |
| 82 | 002c+2*A \0 \0 \0 |
| 83 | 002f+2*A s |
| 84 | |
| 85 | 0030+2*A ?? ?? ?? ?? <- Count data |
| 86 | ... ... |
| 87 | 0030+2*A+K ?? ?? ?? ?? |
| 88 | |
| 89 | 0030+2*A+K *tag* <- GMON_TAG_CG_ARC |
| 90 | 0034+2*A+K *lastused* |
| 91 | 0038+2*A+K ?? ?? ?? ?? |
| 92 | ?? ?? ?? ?? <- FromPC#1 |
| 93 | 0038+3*A+K ?? ?? ?? ?? |
| 94 | ?? ?? ?? ?? <- ToPC#1 |
| 95 | 0038+4*A+K ?? ?? ?? ?? <- Count#1 |
| 96 | ... ... ... |
| 97 | 0038+(2*(CN-1)+2)*A+(CN-1)*4+K ?? ?? ?? ?? |
| 98 | ?? ?? ?? ?? <- FromPC#CGN |
| 99 | 0038+(2*(CN-1)+3)*A+(CN-1)*4+K ?? ?? ?? ?? |
| 100 | ?? ?? ?? ?? <- ToPC#CGN |
| 101 | 0038+(2*CN+2)*A+(CN-1)*4+K ?? ?? ?? ?? <- Count#CGN |
| 102 | |
| 103 | We put (for now?) no basic block information in the file since this would |
| 104 | introduce rase conditions among all the processes who want to write them. |
| 105 | |
| 106 | `K' is the number of count entries which is computed as |
| 107 | |
| 108 | textsize / HISTFRACTION |
| 109 | |
| 110 | `CG' in the above table is the number of call graph arcs. Normally, |
| 111 | the table is sparse and the profiling code writes out only the those |
| 112 | entries which are really used in the program run. But since we must |
| 113 | not extend this table (the profiling file) we'll keep them all here. |
| 114 | So CN can be executed in advance as |
| 115 | |
| 116 | MINARCS <= textsize*(ARCDENSITY/100) <= MAXARCS |
| 117 | |
| 118 | Now the remaining question is: how to build the data structures we can |
| 119 | work with from this data. We need the from set and must associate the |
| 120 | froms with all the associated tos. We will do this by constructing this |
| 121 | data structures at the program start. To do this we'll simply visit all |
| 122 | entries in the call graph table and add it to the appropriate list. */ |
| 123 | |
| 124 | extern int __profile_frequency (void); |
| 125 | libc_hidden_proto (__profile_frequency) |
| 126 | |
| 127 | /* We define a special type to address the elements of the arc table. |
| 128 | This is basically the `gmon_cg_arc_record' format but it includes |
| 129 | the room for the tag and it uses real types. */ |
| 130 | struct here_cg_arc_record |
| 131 | { |
| 132 | uintptr_t from_pc; |
| 133 | uintptr_t self_pc; |
| 134 | /* The count field is atomically incremented in _dl_mcount, which |
| 135 | requires it to be properly aligned for its type, and for this |
| 136 | alignment to be visible to the compiler. The amount of data |
| 137 | before an array of this structure is calculated as |
| 138 | expected_size in _dl_start_profile. Everything in that |
| 139 | calculation is a multiple of 4 bytes (in the case of |
| 140 | kcountsize, because it is derived from a subtraction of |
| 141 | page-aligned values, and the corresponding calculation in |
| 142 | __monstartup also ensures it is at least a multiple of the size |
| 143 | of u_long), so all copies of this field do in fact have the |
| 144 | appropriate alignment. */ |
| 145 | uint32_t count __attribute__ ((aligned (__alignof__ (uint32_t)))); |
| 146 | } __attribute__ ((packed)); |
| 147 | |
| 148 | static struct here_cg_arc_record *data; |
| 149 | |
| 150 | /* Nonzero if profiling is under way. */ |
| 151 | static int running; |
| 152 | |
| 153 | /* This is the number of entry which have been incorporated in the toset. */ |
| 154 | static uint32_t narcs; |
| 155 | /* This is a pointer to the object representing the number of entries |
| 156 | currently in the mmaped file. At no point of time this has to be the |
| 157 | same as NARCS. If it is equal all entries from the file are in our |
| 158 | lists. */ |
| 159 | static volatile uint32_t *narcsp; |
| 160 | |
| 161 | |
| 162 | struct here_fromstruct |
| 163 | { |
| 164 | struct here_cg_arc_record volatile *here; |
| 165 | uint16_t link; |
| 166 | }; |
| 167 | |
| 168 | static volatile uint16_t *tos; |
| 169 | |
| 170 | static struct here_fromstruct *froms; |
| 171 | static uint32_t fromlimit; |
| 172 | static volatile uint32_t fromidx; |
| 173 | |
| 174 | static uintptr_t lowpc; |
| 175 | static size_t textsize; |
| 176 | static unsigned int log_hashfraction; |
| 177 | |
| 178 | |
| 179 | |
| 180 | /* Set up profiling data to profile object desribed by MAP. The output |
| 181 | file is found (or created) in OUTPUT_DIR. */ |
| 182 | void |
| 183 | internal_function |
| 184 | _dl_start_profile (void) |
| 185 | { |
| 186 | char *filename; |
| 187 | int fd; |
| 188 | struct stat64 st; |
| 189 | const ElfW(Phdr) *ph; |
| 190 | ElfW(Addr) mapstart = ~((ElfW(Addr)) 0); |
| 191 | ElfW(Addr) mapend = 0; |
| 192 | char *hist, *cp; |
| 193 | size_t idx; |
| 194 | size_t tossize; |
| 195 | size_t fromssize; |
| 196 | uintptr_t highpc; |
| 197 | uint16_t *kcount; |
| 198 | size_t kcountsize; |
| 199 | struct gmon_hdr *addr = NULL; |
| 200 | off_t expected_size; |
| 201 | /* See profil(2) where this is described. */ |
| 202 | int s_scale; |
| 203 | #define SCALE_1_TO_1 0x10000L |
| 204 | const char *errstr = NULL; |
| 205 | |
| 206 | /* Compute the size of the sections which contain program code. */ |
| 207 | for (ph = GL(dl_profile_map)->l_phdr; |
| 208 | ph < &GL(dl_profile_map)->l_phdr[GL(dl_profile_map)->l_phnum]; ++ph) |
| 209 | if (ph->p_type == PT_LOAD && (ph->p_flags & PF_X)) |
| 210 | { |
| 211 | ElfW(Addr) start = (ph->p_vaddr & ~(GLRO(dl_pagesize) - 1)); |
| 212 | ElfW(Addr) end = ((ph->p_vaddr + ph->p_memsz + GLRO(dl_pagesize) - 1) |
| 213 | & ~(GLRO(dl_pagesize) - 1)); |
| 214 | |
| 215 | if (start < mapstart) |
| 216 | mapstart = start; |
| 217 | if (end > mapend) |
| 218 | mapend = end; |
| 219 | } |
| 220 | |
| 221 | /* Now we can compute the size of the profiling data. This is done |
| 222 | with the same formulars as in `monstartup' (see gmon.c). */ |
| 223 | running = 0; |
| 224 | lowpc = ROUNDDOWN (mapstart + GL(dl_profile_map)->l_addr, |
| 225 | HISTFRACTION * sizeof (HISTCOUNTER)); |
| 226 | highpc = ROUNDUP (mapend + GL(dl_profile_map)->l_addr, |
| 227 | HISTFRACTION * sizeof (HISTCOUNTER)); |
| 228 | textsize = highpc - lowpc; |
| 229 | kcountsize = textsize / HISTFRACTION; |
| 230 | if ((HASHFRACTION & (HASHFRACTION - 1)) == 0) |
| 231 | { |
| 232 | /* If HASHFRACTION is a power of two, mcount can use shifting |
| 233 | instead of integer division. Precompute shift amount. |
| 234 | |
| 235 | This is a constant but the compiler cannot compile the |
| 236 | expression away since the __ffs implementation is not known |
| 237 | to the compiler. Help the compiler by precomputing the |
| 238 | usual cases. */ |
| 239 | assert (HASHFRACTION == 2); |
| 240 | |
| 241 | if (sizeof (*froms) == 8) |
| 242 | log_hashfraction = 4; |
| 243 | else if (sizeof (*froms) == 16) |
| 244 | log_hashfraction = 5; |
| 245 | else |
| 246 | log_hashfraction = __ffs (HASHFRACTION * sizeof (*froms)) - 1; |
| 247 | } |
| 248 | else |
| 249 | log_hashfraction = -1; |
| 250 | tossize = textsize / HASHFRACTION; |
| 251 | fromlimit = textsize * ARCDENSITY / 100; |
| 252 | if (fromlimit < MINARCS) |
| 253 | fromlimit = MINARCS; |
| 254 | if (fromlimit > MAXARCS) |
| 255 | fromlimit = MAXARCS; |
| 256 | fromssize = fromlimit * sizeof (struct here_fromstruct); |
| 257 | |
| 258 | expected_size = (sizeof (struct gmon_hdr) |
| 259 | + 4 + sizeof (struct gmon_hist_hdr) + kcountsize |
| 260 | + 4 + 4 + fromssize * sizeof (struct here_cg_arc_record)); |
| 261 | |
| 262 | /* Create the gmon_hdr we expect or write. */ |
| 263 | struct real_gmon_hdr |
| 264 | { |
| 265 | char cookie[4]; |
| 266 | int32_t version; |
| 267 | char spare[3 * 4]; |
| 268 | } gmon_hdr; |
| 269 | if (sizeof (gmon_hdr) != sizeof (struct gmon_hdr) |
| 270 | || (offsetof (struct real_gmon_hdr, cookie) |
| 271 | != offsetof (struct gmon_hdr, cookie)) |
| 272 | || (offsetof (struct real_gmon_hdr, version) |
| 273 | != offsetof (struct gmon_hdr, version))) |
| 274 | abort (); |
| 275 | |
| 276 | memcpy (&gmon_hdr.cookie[0], GMON_MAGIC, sizeof (gmon_hdr.cookie)); |
| 277 | gmon_hdr.version = GMON_SHOBJ_VERSION; |
| 278 | memset (gmon_hdr.spare, '\0', sizeof (gmon_hdr.spare)); |
| 279 | |
| 280 | /* Create the hist_hdr we expect or write. */ |
| 281 | struct real_gmon_hist_hdr |
| 282 | { |
| 283 | char *low_pc; |
| 284 | char *high_pc; |
| 285 | int32_t hist_size; |
| 286 | int32_t prof_rate; |
| 287 | char dimen[15]; |
| 288 | char dimen_abbrev; |
| 289 | } hist_hdr; |
| 290 | if (sizeof (hist_hdr) != sizeof (struct gmon_hist_hdr) |
| 291 | || (offsetof (struct real_gmon_hist_hdr, low_pc) |
| 292 | != offsetof (struct gmon_hist_hdr, low_pc)) |
| 293 | || (offsetof (struct real_gmon_hist_hdr, high_pc) |
| 294 | != offsetof (struct gmon_hist_hdr, high_pc)) |
| 295 | || (offsetof (struct real_gmon_hist_hdr, hist_size) |
| 296 | != offsetof (struct gmon_hist_hdr, hist_size)) |
| 297 | || (offsetof (struct real_gmon_hist_hdr, prof_rate) |
| 298 | != offsetof (struct gmon_hist_hdr, prof_rate)) |
| 299 | || (offsetof (struct real_gmon_hist_hdr, dimen) |
| 300 | != offsetof (struct gmon_hist_hdr, dimen)) |
| 301 | || (offsetof (struct real_gmon_hist_hdr, dimen_abbrev) |
| 302 | != offsetof (struct gmon_hist_hdr, dimen_abbrev))) |
| 303 | abort (); |
| 304 | |
| 305 | hist_hdr.low_pc = (char *) mapstart; |
| 306 | hist_hdr.high_pc = (char *) mapend; |
| 307 | hist_hdr.hist_size = kcountsize / sizeof (HISTCOUNTER); |
| 308 | hist_hdr.prof_rate = __profile_frequency (); |
| 309 | if (sizeof (hist_hdr.dimen) >= sizeof ("seconds")) |
| 310 | { |
| 311 | memcpy (hist_hdr.dimen, "seconds", sizeof ("seconds")); |
| 312 | memset (hist_hdr.dimen + sizeof ("seconds"), '\0', |
| 313 | sizeof (hist_hdr.dimen) - sizeof ("seconds")); |
| 314 | } |
| 315 | else |
| 316 | strncpy (hist_hdr.dimen, "seconds", sizeof (hist_hdr.dimen)); |
| 317 | hist_hdr.dimen_abbrev = 's'; |
| 318 | |
| 319 | /* First determine the output name. We write in the directory |
| 320 | OUTPUT_DIR and the name is composed from the shared objects |
| 321 | soname (or the file name) and the ending ".profile". */ |
| 322 | filename = (char *) alloca (strlen (GLRO(dl_profile_output)) + 1 |
| 323 | + strlen (GLRO(dl_profile)) + sizeof ".profile"); |
| 324 | cp = __stpcpy (filename, GLRO(dl_profile_output)); |
| 325 | *cp++ = '/'; |
| 326 | __stpcpy (__stpcpy (cp, GLRO(dl_profile)), ".profile"); |
| 327 | |
| 328 | #ifdef O_NOFOLLOW |
| 329 | # define EXTRA_FLAGS | O_NOFOLLOW |
| 330 | #else |
| 331 | # define EXTRA_FLAGS |
| 332 | #endif |
| 333 | fd = __open (filename, O_RDWR | O_CREAT EXTRA_FLAGS, DEFFILEMODE); |
| 334 | if (fd == -1) |
| 335 | { |
| 336 | char buf[400]; |
| 337 | int errnum; |
| 338 | |
| 339 | /* We cannot write the profiling data so don't do anything. */ |
| 340 | errstr = "%s: cannot open file: %s\n"; |
| 341 | print_error: |
| 342 | errnum = errno; |
| 343 | if (fd != -1) |
| 344 | __close (fd); |
| 345 | _dl_error_printf (errstr, filename, |
| 346 | __strerror_r (errnum, buf, sizeof buf)); |
| 347 | return; |
| 348 | } |
| 349 | |
| 350 | if (__fxstat64 (_STAT_VER, fd, &st) < 0 || !S_ISREG (st.st_mode)) |
| 351 | { |
| 352 | /* Not stat'able or not a regular file => don't use it. */ |
| 353 | errstr = "%s: cannot stat file: %s\n"; |
| 354 | goto print_error; |
| 355 | } |
| 356 | |
| 357 | /* Test the size. If it does not match what we expect from the size |
| 358 | values in the map MAP we don't use it and warn the user. */ |
| 359 | if (st.st_size == 0) |
| 360 | { |
| 361 | /* We have to create the file. */ |
| 362 | char buf[GLRO(dl_pagesize)]; |
| 363 | |
| 364 | memset (buf, '\0', GLRO(dl_pagesize)); |
| 365 | |
| 366 | if (__lseek (fd, expected_size & ~(GLRO(dl_pagesize) - 1), SEEK_SET) == -1) |
| 367 | { |
| 368 | cannot_create: |
| 369 | errstr = "%s: cannot create file: %s\n"; |
| 370 | goto print_error; |
| 371 | } |
| 372 | |
| 373 | if (TEMP_FAILURE_RETRY (__libc_write (fd, buf, (expected_size |
| 374 | & (GLRO(dl_pagesize) |
| 375 | - 1)))) |
| 376 | < 0) |
| 377 | goto cannot_create; |
| 378 | } |
| 379 | else if (st.st_size != expected_size) |
| 380 | { |
| 381 | __close (fd); |
| 382 | wrong_format: |
| 383 | |
| 384 | if (addr != NULL) |
| 385 | __munmap ((void *) addr, expected_size); |
| 386 | |
| 387 | _dl_error_printf ("%s: file is no correct profile data file for `%s'\n", |
| 388 | filename, GLRO(dl_profile)); |
| 389 | return; |
| 390 | } |
| 391 | |
| 392 | addr = (struct gmon_hdr *) __mmap (NULL, expected_size, PROT_READ|PROT_WRITE, |
| 393 | MAP_SHARED|MAP_FILE, fd, 0); |
| 394 | if (addr == (struct gmon_hdr *) MAP_FAILED) |
| 395 | { |
| 396 | errstr = "%s: cannot map file: %s\n"; |
| 397 | goto print_error; |
| 398 | } |
| 399 | |
| 400 | /* We don't need the file descriptor anymore. */ |
| 401 | __close (fd); |
| 402 | |
| 403 | /* Pointer to data after the header. */ |
| 404 | hist = (char *) (addr + 1); |
| 405 | kcount = (uint16_t *) ((char *) hist + sizeof (uint32_t) |
| 406 | + sizeof (struct gmon_hist_hdr)); |
| 407 | |
| 408 | /* Compute pointer to array of the arc information. */ |
| 409 | narcsp = (uint32_t *) ((char *) kcount + kcountsize + sizeof (uint32_t)); |
| 410 | data = (struct here_cg_arc_record *) ((char *) narcsp + sizeof (uint32_t)); |
| 411 | |
| 412 | if (st.st_size == 0) |
| 413 | { |
| 414 | /* Create the signature. */ |
| 415 | memcpy (addr, &gmon_hdr, sizeof (struct gmon_hdr)); |
| 416 | |
| 417 | *(uint32_t *) hist = GMON_TAG_TIME_HIST; |
| 418 | memcpy (hist + sizeof (uint32_t), &hist_hdr, |
| 419 | sizeof (struct gmon_hist_hdr)); |
| 420 | |
| 421 | narcsp[-1] = GMON_TAG_CG_ARC; |
| 422 | } |
| 423 | else |
| 424 | { |
| 425 | /* Test the signature in the file. */ |
| 426 | if (memcmp (addr, &gmon_hdr, sizeof (struct gmon_hdr)) != 0 |
| 427 | || *(uint32_t *) hist != GMON_TAG_TIME_HIST |
| 428 | || memcmp (hist + sizeof (uint32_t), &hist_hdr, |
| 429 | sizeof (struct gmon_hist_hdr)) != 0 |
| 430 | || narcsp[-1] != GMON_TAG_CG_ARC) |
| 431 | goto wrong_format; |
| 432 | } |
| 433 | |
| 434 | /* Allocate memory for the froms data and the pointer to the tos records. */ |
| 435 | tos = (uint16_t *) calloc (tossize + fromssize, 1); |
| 436 | if (tos == NULL) |
| 437 | { |
| 438 | __munmap ((void *) addr, expected_size); |
| 439 | _dl_fatal_printf ("Out of memory while initializing profiler\n"); |
| 440 | /* NOTREACHED */ |
| 441 | } |
| 442 | |
| 443 | froms = (struct here_fromstruct *) ((char *) tos + tossize); |
| 444 | fromidx = 0; |
| 445 | |
| 446 | /* Now we have to process all the arc count entries. BTW: it is |
| 447 | not critical whether the *NARCSP value changes meanwhile. Before |
| 448 | we enter a new entry in to toset we will check that everything is |
| 449 | available in TOS. This happens in _dl_mcount. |
| 450 | |
| 451 | Loading the entries in reverse order should help to get the most |
| 452 | frequently used entries at the front of the list. */ |
| 453 | for (idx = narcs = MIN (*narcsp, fromlimit); idx > 0; ) |
| 454 | { |
| 455 | size_t to_index; |
| 456 | size_t newfromidx; |
| 457 | --idx; |
| 458 | to_index = (data[idx].self_pc / (HASHFRACTION * sizeof (*tos))); |
| 459 | newfromidx = fromidx++; |
| 460 | froms[newfromidx].here = &data[idx]; |
| 461 | froms[newfromidx].link = tos[to_index]; |
| 462 | tos[to_index] = newfromidx; |
| 463 | } |
| 464 | |
| 465 | /* Setup counting data. */ |
| 466 | if (kcountsize < highpc - lowpc) |
| 467 | { |
| 468 | #if 0 |
| 469 | s_scale = ((double) kcountsize / (highpc - lowpc)) * SCALE_1_TO_1; |
| 470 | #else |
| 471 | size_t range = highpc - lowpc; |
| 472 | size_t quot = range / kcountsize; |
| 473 | |
| 474 | if (quot >= SCALE_1_TO_1) |
| 475 | s_scale = 1; |
| 476 | else if (quot >= SCALE_1_TO_1 / 256) |
| 477 | s_scale = SCALE_1_TO_1 / quot; |
| 478 | else if (range > ULONG_MAX / 256) |
| 479 | s_scale = (SCALE_1_TO_1 * 256) / (range / (kcountsize / 256)); |
| 480 | else |
| 481 | s_scale = (SCALE_1_TO_1 * 256) / ((range * 256) / kcountsize); |
| 482 | #endif |
| 483 | } |
| 484 | else |
| 485 | s_scale = SCALE_1_TO_1; |
| 486 | |
| 487 | /* Start the profiler. */ |
| 488 | __profil ((void *) kcount, kcountsize, lowpc, s_scale); |
| 489 | |
| 490 | /* Turn on profiling. */ |
| 491 | running = 1; |
| 492 | } |
| 493 | |
| 494 | |
| 495 | void |
| 496 | _dl_mcount (ElfW(Addr) frompc, ElfW(Addr) selfpc) |
| 497 | { |
| 498 | volatile uint16_t *topcindex; |
| 499 | size_t i, fromindex; |
| 500 | struct here_fromstruct *fromp; |
| 501 | |
| 502 | if (! running) |
| 503 | return; |
| 504 | |
| 505 | /* Compute relative addresses. The shared object can be loaded at |
| 506 | any address. The value of frompc could be anything. We cannot |
| 507 | restrict it in any way, just set to a fixed value (0) in case it |
| 508 | is outside the allowed range. These calls show up as calls from |
| 509 | <external> in the gprof output. */ |
| 510 | frompc -= lowpc; |
| 511 | if (frompc >= textsize) |
| 512 | frompc = 0; |
| 513 | selfpc -= lowpc; |
| 514 | if (selfpc >= textsize) |
| 515 | goto done; |
| 516 | |
| 517 | /* Getting here we now have to find out whether the location was |
| 518 | already used. If yes we are lucky and only have to increment a |
| 519 | counter (this also has to be atomic). If the entry is new things |
| 520 | are getting complicated... */ |
| 521 | |
| 522 | /* Avoid integer divide if possible. */ |
| 523 | if ((HASHFRACTION & (HASHFRACTION - 1)) == 0) |
| 524 | i = selfpc >> log_hashfraction; |
| 525 | else |
| 526 | i = selfpc / (HASHFRACTION * sizeof (*tos)); |
| 527 | |
| 528 | topcindex = &tos[i]; |
| 529 | fromindex = *topcindex; |
| 530 | |
| 531 | if (fromindex == 0) |
| 532 | goto check_new_or_add; |
| 533 | |
| 534 | fromp = &froms[fromindex]; |
| 535 | |
| 536 | /* We have to look through the chain of arcs whether there is already |
| 537 | an entry for our arc. */ |
| 538 | while (fromp->here->from_pc != frompc) |
| 539 | { |
| 540 | if (fromp->link != 0) |
| 541 | do |
| 542 | fromp = &froms[fromp->link]; |
| 543 | while (fromp->link != 0 && fromp->here->from_pc != frompc); |
| 544 | |
| 545 | if (fromp->here->from_pc != frompc) |
| 546 | { |
| 547 | topcindex = &fromp->link; |
| 548 | |
| 549 | check_new_or_add: |
| 550 | /* Our entry is not among the entries we read so far from the |
| 551 | data file. Now see whether we have to update the list. */ |
| 552 | while (narcs != *narcsp && narcs < fromlimit) |
| 553 | { |
| 554 | size_t to_index; |
| 555 | size_t newfromidx; |
| 556 | to_index = (data[narcs].self_pc |
| 557 | / (HASHFRACTION * sizeof (*tos))); |
| 558 | newfromidx = catomic_exchange_and_add (&fromidx, 1) + 1; |
| 559 | froms[newfromidx].here = &data[narcs]; |
| 560 | froms[newfromidx].link = tos[to_index]; |
| 561 | tos[to_index] = newfromidx; |
| 562 | catomic_increment (&narcs); |
| 563 | } |
| 564 | |
| 565 | /* If we still have no entry stop searching and insert. */ |
| 566 | if (*topcindex == 0) |
| 567 | { |
| 568 | uint_fast32_t newarc = catomic_exchange_and_add (narcsp, 1); |
| 569 | |
| 570 | /* In rare cases it could happen that all entries in FROMS are |
| 571 | occupied. So we cannot count this anymore. */ |
| 572 | if (newarc >= fromlimit) |
| 573 | goto done; |
| 574 | |
| 575 | *topcindex = catomic_exchange_and_add (&fromidx, 1) + 1; |
| 576 | fromp = &froms[*topcindex]; |
| 577 | |
| 578 | fromp->here = &data[newarc]; |
| 579 | data[newarc].from_pc = frompc; |
| 580 | data[newarc].self_pc = selfpc; |
| 581 | data[newarc].count = 0; |
| 582 | fromp->link = 0; |
| 583 | catomic_increment (&narcs); |
| 584 | |
| 585 | break; |
| 586 | } |
| 587 | |
| 588 | fromp = &froms[*topcindex]; |
| 589 | } |
| 590 | else |
| 591 | /* Found in. */ |
| 592 | break; |
| 593 | } |
| 594 | |
| 595 | /* Increment the counter. */ |
| 596 | catomic_increment (&fromp->here->count); |
| 597 | |
| 598 | done: |
| 599 | ; |
| 600 | } |
| 601 | rtld_hidden_def (_dl_mcount) |