| xf.li | bdd93d5 | 2023-05-12 07:10:14 -0700 | [diff] [blame] | 1 | /* Profiling of shared libraries. | 
|  | 2 | Copyright (C) 1997-2016 Free Software Foundation, Inc. | 
|  | 3 | This file is part of the GNU C Library. | 
|  | 4 | Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. | 
|  | 5 | Based on the BSD mcount implementation. | 
|  | 6 |  | 
|  | 7 | The GNU C Library is free software; you can redistribute it and/or | 
|  | 8 | modify it under the terms of the GNU Lesser General Public | 
|  | 9 | License as published by the Free Software Foundation; either | 
|  | 10 | version 2.1 of the License, or (at your option) any later version. | 
|  | 11 |  | 
|  | 12 | The GNU C Library is distributed in the hope that it will be useful, | 
|  | 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|  | 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | 
|  | 15 | Lesser General Public License for more details. | 
|  | 16 |  | 
|  | 17 | You should have received a copy of the GNU Lesser General Public | 
|  | 18 | License along with the GNU C Library; if not, see | 
|  | 19 | <http://www.gnu.org/licenses/>.  */ | 
|  | 20 |  | 
|  | 21 | #include <assert.h> | 
|  | 22 | #include <errno.h> | 
|  | 23 | #include <fcntl.h> | 
|  | 24 | #include <inttypes.h> | 
|  | 25 | #include <limits.h> | 
|  | 26 | #include <stdio.h> | 
|  | 27 | #include <stdlib.h> | 
|  | 28 | #include <string.h> | 
|  | 29 | #include <unistd.h> | 
|  | 30 | #include <stdint.h> | 
|  | 31 | #include <ldsodefs.h> | 
|  | 32 | #include <sys/gmon.h> | 
|  | 33 | #include <sys/gmon_out.h> | 
|  | 34 | #include <sys/mman.h> | 
|  | 35 | #include <sys/param.h> | 
|  | 36 | #include <sys/stat.h> | 
|  | 37 | #include <atomic.h> | 
|  | 38 |  | 
|  | 39 | /* The LD_PROFILE feature has to be implemented different to the | 
|  | 40 | normal profiling using the gmon/ functions.  The problem is that an | 
|  | 41 | arbitrary amount of processes simulataneously can be run using | 
|  | 42 | profiling and all write the results in the same file.  To provide | 
|  | 43 | this mechanism one could implement a complicated mechanism to merge | 
|  | 44 | the content of two profiling runs or one could extend the file | 
|  | 45 | format to allow more than one data set.  For the second solution we | 
|  | 46 | would have the problem that the file can grow in size beyond any | 
|  | 47 | limit and both solutions have the problem that the concurrency of | 
|  | 48 | writing the results is a big problem. | 
|  | 49 |  | 
|  | 50 | Another much simpler method is to use mmap to map the same file in | 
|  | 51 | all using programs and modify the data in the mmap'ed area and so | 
|  | 52 | also automatically on the disk.  Using the MAP_SHARED option of | 
|  | 53 | mmap(2) this can be done without big problems in more than one | 
|  | 54 | file. | 
|  | 55 |  | 
|  | 56 | This approach is very different from the normal profiling.  We have | 
|  | 57 | to use the profiling data in exactly the way they are expected to | 
|  | 58 | be written to disk.  But the normal format used by gprof is not usable | 
|  | 59 | to do this.  It is optimized for size.  It writes the tags as single | 
|  | 60 | bytes but this means that the following 32/64 bit values are | 
|  | 61 | unaligned. | 
|  | 62 |  | 
|  | 63 | Therefore we use a new format.  This will look like this | 
|  | 64 |  | 
|  | 65 | 0  1  2  3	<- byte is 32 bit word | 
|  | 66 | 0000				g  m  o  n | 
|  | 67 | 0004				*version*	<- GMON_SHOBJ_VERSION | 
|  | 68 | 0008				00 00 00 00 | 
|  | 69 | 000c				00 00 00 00 | 
|  | 70 | 0010				00 00 00 00 | 
|  | 71 |  | 
|  | 72 | 0014				*tag*		<- GMON_TAG_TIME_HIST | 
|  | 73 | 0018				?? ?? ?? ?? | 
|  | 74 | ?? ?? ?? ??	<- 32/64 bit LowPC | 
|  | 75 | 0018+A				?? ?? ?? ?? | 
|  | 76 | ?? ?? ?? ??	<- 32/64 bit HighPC | 
|  | 77 | 0018+2*A			*histsize* | 
|  | 78 | 001c+2*A			*profrate* | 
|  | 79 | 0020+2*A			s  e  c  o | 
|  | 80 | 0024+2*A			n  d  s  \0 | 
|  | 81 | 0028+2*A			\0 \0 \0 \0 | 
|  | 82 | 002c+2*A			\0 \0 \0 | 
|  | 83 | 002f+2*A			s | 
|  | 84 |  | 
|  | 85 | 0030+2*A			?? ?? ?? ??	<- Count data | 
|  | 86 | ...				... | 
|  | 87 | 0030+2*A+K			?? ?? ?? ?? | 
|  | 88 |  | 
|  | 89 | 0030+2*A+K			*tag*		<- GMON_TAG_CG_ARC | 
|  | 90 | 0034+2*A+K			*lastused* | 
|  | 91 | 0038+2*A+K			?? ?? ?? ?? | 
|  | 92 | ?? ?? ?? ??	<- FromPC#1 | 
|  | 93 | 0038+3*A+K			?? ?? ?? ?? | 
|  | 94 | ?? ?? ?? ??	<- ToPC#1 | 
|  | 95 | 0038+4*A+K			?? ?? ?? ??	<- Count#1 | 
|  | 96 | ...				...		   ... | 
|  | 97 | 0038+(2*(CN-1)+2)*A+(CN-1)*4+K	?? ?? ?? ?? | 
|  | 98 | ?? ?? ?? ??	<- FromPC#CGN | 
|  | 99 | 0038+(2*(CN-1)+3)*A+(CN-1)*4+K	?? ?? ?? ?? | 
|  | 100 | ?? ?? ?? ??	<- ToPC#CGN | 
|  | 101 | 0038+(2*CN+2)*A+(CN-1)*4+K	?? ?? ?? ??	<- Count#CGN | 
|  | 102 |  | 
|  | 103 | We put (for now?) no basic block information in the file since this would | 
|  | 104 | introduce rase conditions among all the processes who want to write them. | 
|  | 105 |  | 
|  | 106 | `K' is the number of count entries which is computed as | 
|  | 107 |  | 
|  | 108 | textsize / HISTFRACTION | 
|  | 109 |  | 
|  | 110 | `CG' in the above table is the number of call graph arcs.  Normally, | 
|  | 111 | the table is sparse and the profiling code writes out only the those | 
|  | 112 | entries which are really used in the program run.  But since we must | 
|  | 113 | not extend this table (the profiling file) we'll keep them all here. | 
|  | 114 | So CN can be executed in advance as | 
|  | 115 |  | 
|  | 116 | MINARCS <= textsize*(ARCDENSITY/100) <= MAXARCS | 
|  | 117 |  | 
|  | 118 | Now the remaining question is: how to build the data structures we can | 
|  | 119 | work with from this data.  We need the from set and must associate the | 
|  | 120 | froms with all the associated tos.  We will do this by constructing this | 
|  | 121 | data structures at the program start.  To do this we'll simply visit all | 
|  | 122 | entries in the call graph table and add it to the appropriate list.  */ | 
|  | 123 |  | 
|  | 124 | extern int __profile_frequency (void); | 
|  | 125 | libc_hidden_proto (__profile_frequency) | 
|  | 126 |  | 
|  | 127 | /* We define a special type to address the elements of the arc table. | 
|  | 128 | This is basically the `gmon_cg_arc_record' format but it includes | 
|  | 129 | the room for the tag and it uses real types.  */ | 
|  | 130 | struct here_cg_arc_record | 
|  | 131 | { | 
|  | 132 | uintptr_t from_pc; | 
|  | 133 | uintptr_t self_pc; | 
|  | 134 | /* The count field is atomically incremented in _dl_mcount, which | 
|  | 135 | requires it to be properly aligned for its type, and for this | 
|  | 136 | alignment to be visible to the compiler.  The amount of data | 
|  | 137 | before an array of this structure is calculated as | 
|  | 138 | expected_size in _dl_start_profile.  Everything in that | 
|  | 139 | calculation is a multiple of 4 bytes (in the case of | 
|  | 140 | kcountsize, because it is derived from a subtraction of | 
|  | 141 | page-aligned values, and the corresponding calculation in | 
|  | 142 | __monstartup also ensures it is at least a multiple of the size | 
|  | 143 | of u_long), so all copies of this field do in fact have the | 
|  | 144 | appropriate alignment.  */ | 
|  | 145 | uint32_t count __attribute__ ((aligned (__alignof__ (uint32_t)))); | 
|  | 146 | } __attribute__ ((packed)); | 
|  | 147 |  | 
|  | 148 | static struct here_cg_arc_record *data; | 
|  | 149 |  | 
|  | 150 | /* Nonzero if profiling is under way.  */ | 
|  | 151 | static int running; | 
|  | 152 |  | 
|  | 153 | /* This is the number of entry which have been incorporated in the toset.  */ | 
|  | 154 | static uint32_t narcs; | 
|  | 155 | /* This is a pointer to the object representing the number of entries | 
|  | 156 | currently in the mmaped file.  At no point of time this has to be the | 
|  | 157 | same as NARCS.  If it is equal all entries from the file are in our | 
|  | 158 | lists.  */ | 
|  | 159 | static volatile uint32_t *narcsp; | 
|  | 160 |  | 
|  | 161 |  | 
|  | 162 | struct here_fromstruct | 
|  | 163 | { | 
|  | 164 | struct here_cg_arc_record volatile *here; | 
|  | 165 | uint16_t link; | 
|  | 166 | }; | 
|  | 167 |  | 
|  | 168 | static volatile uint16_t *tos; | 
|  | 169 |  | 
|  | 170 | static struct here_fromstruct *froms; | 
|  | 171 | static uint32_t fromlimit; | 
|  | 172 | static volatile uint32_t fromidx; | 
|  | 173 |  | 
|  | 174 | static uintptr_t lowpc; | 
|  | 175 | static size_t textsize; | 
|  | 176 | static unsigned int log_hashfraction; | 
|  | 177 |  | 
|  | 178 |  | 
|  | 179 |  | 
|  | 180 | /* Set up profiling data to profile object desribed by MAP.  The output | 
|  | 181 | file is found (or created) in OUTPUT_DIR.  */ | 
|  | 182 | void | 
|  | 183 | internal_function | 
|  | 184 | _dl_start_profile (void) | 
|  | 185 | { | 
|  | 186 | char *filename; | 
|  | 187 | int fd; | 
|  | 188 | struct stat64 st; | 
|  | 189 | const ElfW(Phdr) *ph; | 
|  | 190 | ElfW(Addr) mapstart = ~((ElfW(Addr)) 0); | 
|  | 191 | ElfW(Addr) mapend = 0; | 
|  | 192 | char *hist, *cp; | 
|  | 193 | size_t idx; | 
|  | 194 | size_t tossize; | 
|  | 195 | size_t fromssize; | 
|  | 196 | uintptr_t highpc; | 
|  | 197 | uint16_t *kcount; | 
|  | 198 | size_t kcountsize; | 
|  | 199 | struct gmon_hdr *addr = NULL; | 
|  | 200 | off_t expected_size; | 
|  | 201 | /* See profil(2) where this is described.  */ | 
|  | 202 | int s_scale; | 
|  | 203 | #define SCALE_1_TO_1	0x10000L | 
|  | 204 | const char *errstr = NULL; | 
|  | 205 |  | 
|  | 206 | /* Compute the size of the sections which contain program code.  */ | 
|  | 207 | for (ph = GL(dl_profile_map)->l_phdr; | 
|  | 208 | ph < &GL(dl_profile_map)->l_phdr[GL(dl_profile_map)->l_phnum]; ++ph) | 
|  | 209 | if (ph->p_type == PT_LOAD && (ph->p_flags & PF_X)) | 
|  | 210 | { | 
|  | 211 | ElfW(Addr) start = (ph->p_vaddr & ~(GLRO(dl_pagesize) - 1)); | 
|  | 212 | ElfW(Addr) end = ((ph->p_vaddr + ph->p_memsz + GLRO(dl_pagesize) - 1) | 
|  | 213 | & ~(GLRO(dl_pagesize) - 1)); | 
|  | 214 |  | 
|  | 215 | if (start < mapstart) | 
|  | 216 | mapstart = start; | 
|  | 217 | if (end > mapend) | 
|  | 218 | mapend = end; | 
|  | 219 | } | 
|  | 220 |  | 
|  | 221 | /* Now we can compute the size of the profiling data.  This is done | 
|  | 222 | with the same formulars as in `monstartup' (see gmon.c).  */ | 
|  | 223 | running = 0; | 
|  | 224 | lowpc = ROUNDDOWN (mapstart + GL(dl_profile_map)->l_addr, | 
|  | 225 | HISTFRACTION * sizeof (HISTCOUNTER)); | 
|  | 226 | highpc = ROUNDUP (mapend + GL(dl_profile_map)->l_addr, | 
|  | 227 | HISTFRACTION * sizeof (HISTCOUNTER)); | 
|  | 228 | textsize = highpc - lowpc; | 
|  | 229 | kcountsize = textsize / HISTFRACTION; | 
|  | 230 | if ((HASHFRACTION & (HASHFRACTION - 1)) == 0) | 
|  | 231 | { | 
|  | 232 | /* If HASHFRACTION is a power of two, mcount can use shifting | 
|  | 233 | instead of integer division.  Precompute shift amount. | 
|  | 234 |  | 
|  | 235 | This is a constant but the compiler cannot compile the | 
|  | 236 | expression away since the __ffs implementation is not known | 
|  | 237 | to the compiler.  Help the compiler by precomputing the | 
|  | 238 | usual cases.  */ | 
|  | 239 | assert (HASHFRACTION == 2); | 
|  | 240 |  | 
|  | 241 | if (sizeof (*froms) == 8) | 
|  | 242 | log_hashfraction = 4; | 
|  | 243 | else if (sizeof (*froms) == 16) | 
|  | 244 | log_hashfraction = 5; | 
|  | 245 | else | 
|  | 246 | log_hashfraction = __ffs (HASHFRACTION * sizeof (*froms)) - 1; | 
|  | 247 | } | 
|  | 248 | else | 
|  | 249 | log_hashfraction = -1; | 
|  | 250 | tossize = textsize / HASHFRACTION; | 
|  | 251 | fromlimit = textsize * ARCDENSITY / 100; | 
|  | 252 | if (fromlimit < MINARCS) | 
|  | 253 | fromlimit = MINARCS; | 
|  | 254 | if (fromlimit > MAXARCS) | 
|  | 255 | fromlimit = MAXARCS; | 
|  | 256 | fromssize = fromlimit * sizeof (struct here_fromstruct); | 
|  | 257 |  | 
|  | 258 | expected_size = (sizeof (struct gmon_hdr) | 
|  | 259 | + 4 + sizeof (struct gmon_hist_hdr) + kcountsize | 
|  | 260 | + 4 + 4 + fromssize * sizeof (struct here_cg_arc_record)); | 
|  | 261 |  | 
|  | 262 | /* Create the gmon_hdr we expect or write.  */ | 
|  | 263 | struct real_gmon_hdr | 
|  | 264 | { | 
|  | 265 | char cookie[4]; | 
|  | 266 | int32_t version; | 
|  | 267 | char spare[3 * 4]; | 
|  | 268 | } gmon_hdr; | 
|  | 269 | if (sizeof (gmon_hdr) != sizeof (struct gmon_hdr) | 
|  | 270 | || (offsetof (struct real_gmon_hdr, cookie) | 
|  | 271 | != offsetof (struct gmon_hdr, cookie)) | 
|  | 272 | || (offsetof (struct real_gmon_hdr, version) | 
|  | 273 | != offsetof (struct gmon_hdr, version))) | 
|  | 274 | abort (); | 
|  | 275 |  | 
|  | 276 | memcpy (&gmon_hdr.cookie[0], GMON_MAGIC, sizeof (gmon_hdr.cookie)); | 
|  | 277 | gmon_hdr.version = GMON_SHOBJ_VERSION; | 
|  | 278 | memset (gmon_hdr.spare, '\0', sizeof (gmon_hdr.spare)); | 
|  | 279 |  | 
|  | 280 | /* Create the hist_hdr we expect or write.  */ | 
|  | 281 | struct real_gmon_hist_hdr | 
|  | 282 | { | 
|  | 283 | char *low_pc; | 
|  | 284 | char *high_pc; | 
|  | 285 | int32_t hist_size; | 
|  | 286 | int32_t prof_rate; | 
|  | 287 | char dimen[15]; | 
|  | 288 | char dimen_abbrev; | 
|  | 289 | } hist_hdr; | 
|  | 290 | if (sizeof (hist_hdr) != sizeof (struct gmon_hist_hdr) | 
|  | 291 | || (offsetof (struct real_gmon_hist_hdr, low_pc) | 
|  | 292 | != offsetof (struct gmon_hist_hdr, low_pc)) | 
|  | 293 | || (offsetof (struct real_gmon_hist_hdr, high_pc) | 
|  | 294 | != offsetof (struct gmon_hist_hdr, high_pc)) | 
|  | 295 | || (offsetof (struct real_gmon_hist_hdr, hist_size) | 
|  | 296 | != offsetof (struct gmon_hist_hdr, hist_size)) | 
|  | 297 | || (offsetof (struct real_gmon_hist_hdr, prof_rate) | 
|  | 298 | != offsetof (struct gmon_hist_hdr, prof_rate)) | 
|  | 299 | || (offsetof (struct real_gmon_hist_hdr, dimen) | 
|  | 300 | != offsetof (struct gmon_hist_hdr, dimen)) | 
|  | 301 | || (offsetof (struct real_gmon_hist_hdr, dimen_abbrev) | 
|  | 302 | != offsetof (struct gmon_hist_hdr, dimen_abbrev))) | 
|  | 303 | abort (); | 
|  | 304 |  | 
|  | 305 | hist_hdr.low_pc = (char *) mapstart; | 
|  | 306 | hist_hdr.high_pc = (char *) mapend; | 
|  | 307 | hist_hdr.hist_size = kcountsize / sizeof (HISTCOUNTER); | 
|  | 308 | hist_hdr.prof_rate = __profile_frequency (); | 
|  | 309 | if (sizeof (hist_hdr.dimen) >= sizeof ("seconds")) | 
|  | 310 | { | 
|  | 311 | memcpy (hist_hdr.dimen, "seconds", sizeof ("seconds")); | 
|  | 312 | memset (hist_hdr.dimen + sizeof ("seconds"), '\0', | 
|  | 313 | sizeof (hist_hdr.dimen) - sizeof ("seconds")); | 
|  | 314 | } | 
|  | 315 | else | 
|  | 316 | strncpy (hist_hdr.dimen, "seconds", sizeof (hist_hdr.dimen)); | 
|  | 317 | hist_hdr.dimen_abbrev = 's'; | 
|  | 318 |  | 
|  | 319 | /* First determine the output name.  We write in the directory | 
|  | 320 | OUTPUT_DIR and the name is composed from the shared objects | 
|  | 321 | soname (or the file name) and the ending ".profile".  */ | 
|  | 322 | filename = (char *) alloca (strlen (GLRO(dl_profile_output)) + 1 | 
|  | 323 | + strlen (GLRO(dl_profile)) + sizeof ".profile"); | 
|  | 324 | cp = __stpcpy (filename, GLRO(dl_profile_output)); | 
|  | 325 | *cp++ = '/'; | 
|  | 326 | __stpcpy (__stpcpy (cp, GLRO(dl_profile)), ".profile"); | 
|  | 327 |  | 
|  | 328 | #ifdef O_NOFOLLOW | 
|  | 329 | # define EXTRA_FLAGS | O_NOFOLLOW | 
|  | 330 | #else | 
|  | 331 | # define EXTRA_FLAGS | 
|  | 332 | #endif | 
|  | 333 | fd = __open (filename, O_RDWR | O_CREAT EXTRA_FLAGS, DEFFILEMODE); | 
|  | 334 | if (fd == -1) | 
|  | 335 | { | 
|  | 336 | char buf[400]; | 
|  | 337 | int errnum; | 
|  | 338 |  | 
|  | 339 | /* We cannot write the profiling data so don't do anything.  */ | 
|  | 340 | errstr = "%s: cannot open file: %s\n"; | 
|  | 341 | print_error: | 
|  | 342 | errnum = errno; | 
|  | 343 | if (fd != -1) | 
|  | 344 | __close (fd); | 
|  | 345 | _dl_error_printf (errstr, filename, | 
|  | 346 | __strerror_r (errnum, buf, sizeof buf)); | 
|  | 347 | return; | 
|  | 348 | } | 
|  | 349 |  | 
|  | 350 | if (__fxstat64 (_STAT_VER, fd, &st) < 0 || !S_ISREG (st.st_mode)) | 
|  | 351 | { | 
|  | 352 | /* Not stat'able or not a regular file => don't use it.  */ | 
|  | 353 | errstr = "%s: cannot stat file: %s\n"; | 
|  | 354 | goto print_error; | 
|  | 355 | } | 
|  | 356 |  | 
|  | 357 | /* Test the size.  If it does not match what we expect from the size | 
|  | 358 | values in the map MAP we don't use it and warn the user.  */ | 
|  | 359 | if (st.st_size == 0) | 
|  | 360 | { | 
|  | 361 | /* We have to create the file.  */ | 
|  | 362 | char buf[GLRO(dl_pagesize)]; | 
|  | 363 |  | 
|  | 364 | memset (buf, '\0', GLRO(dl_pagesize)); | 
|  | 365 |  | 
|  | 366 | if (__lseek (fd, expected_size & ~(GLRO(dl_pagesize) - 1), SEEK_SET) == -1) | 
|  | 367 | { | 
|  | 368 | cannot_create: | 
|  | 369 | errstr = "%s: cannot create file: %s\n"; | 
|  | 370 | goto print_error; | 
|  | 371 | } | 
|  | 372 |  | 
|  | 373 | if (TEMP_FAILURE_RETRY (__libc_write (fd, buf, (expected_size | 
|  | 374 | & (GLRO(dl_pagesize) | 
|  | 375 | - 1)))) | 
|  | 376 | < 0) | 
|  | 377 | goto cannot_create; | 
|  | 378 | } | 
|  | 379 | else if (st.st_size != expected_size) | 
|  | 380 | { | 
|  | 381 | __close (fd); | 
|  | 382 | wrong_format: | 
|  | 383 |  | 
|  | 384 | if (addr != NULL) | 
|  | 385 | __munmap ((void *) addr, expected_size); | 
|  | 386 |  | 
|  | 387 | _dl_error_printf ("%s: file is no correct profile data file for `%s'\n", | 
|  | 388 | filename, GLRO(dl_profile)); | 
|  | 389 | return; | 
|  | 390 | } | 
|  | 391 |  | 
|  | 392 | addr = (struct gmon_hdr *) __mmap (NULL, expected_size, PROT_READ|PROT_WRITE, | 
|  | 393 | MAP_SHARED|MAP_FILE, fd, 0); | 
|  | 394 | if (addr == (struct gmon_hdr *) MAP_FAILED) | 
|  | 395 | { | 
|  | 396 | errstr = "%s: cannot map file: %s\n"; | 
|  | 397 | goto print_error; | 
|  | 398 | } | 
|  | 399 |  | 
|  | 400 | /* We don't need the file descriptor anymore.  */ | 
|  | 401 | __close (fd); | 
|  | 402 |  | 
|  | 403 | /* Pointer to data after the header.  */ | 
|  | 404 | hist = (char *) (addr + 1); | 
|  | 405 | kcount = (uint16_t *) ((char *) hist + sizeof (uint32_t) | 
|  | 406 | + sizeof (struct gmon_hist_hdr)); | 
|  | 407 |  | 
|  | 408 | /* Compute pointer to array of the arc information.  */ | 
|  | 409 | narcsp = (uint32_t *) ((char *) kcount + kcountsize + sizeof (uint32_t)); | 
|  | 410 | data = (struct here_cg_arc_record *) ((char *) narcsp + sizeof (uint32_t)); | 
|  | 411 |  | 
|  | 412 | if (st.st_size == 0) | 
|  | 413 | { | 
|  | 414 | /* Create the signature.  */ | 
|  | 415 | memcpy (addr, &gmon_hdr, sizeof (struct gmon_hdr)); | 
|  | 416 |  | 
|  | 417 | *(uint32_t *) hist = GMON_TAG_TIME_HIST; | 
|  | 418 | memcpy (hist + sizeof (uint32_t), &hist_hdr, | 
|  | 419 | sizeof (struct gmon_hist_hdr)); | 
|  | 420 |  | 
|  | 421 | narcsp[-1] = GMON_TAG_CG_ARC; | 
|  | 422 | } | 
|  | 423 | else | 
|  | 424 | { | 
|  | 425 | /* Test the signature in the file.  */ | 
|  | 426 | if (memcmp (addr, &gmon_hdr, sizeof (struct gmon_hdr)) != 0 | 
|  | 427 | || *(uint32_t *) hist != GMON_TAG_TIME_HIST | 
|  | 428 | || memcmp (hist + sizeof (uint32_t), &hist_hdr, | 
|  | 429 | sizeof (struct gmon_hist_hdr)) != 0 | 
|  | 430 | || narcsp[-1] != GMON_TAG_CG_ARC) | 
|  | 431 | goto wrong_format; | 
|  | 432 | } | 
|  | 433 |  | 
|  | 434 | /* Allocate memory for the froms data and the pointer to the tos records.  */ | 
|  | 435 | tos = (uint16_t *) calloc (tossize + fromssize, 1); | 
|  | 436 | if (tos == NULL) | 
|  | 437 | { | 
|  | 438 | __munmap ((void *) addr, expected_size); | 
|  | 439 | _dl_fatal_printf ("Out of memory while initializing profiler\n"); | 
|  | 440 | /* NOTREACHED */ | 
|  | 441 | } | 
|  | 442 |  | 
|  | 443 | froms = (struct here_fromstruct *) ((char *) tos + tossize); | 
|  | 444 | fromidx = 0; | 
|  | 445 |  | 
|  | 446 | /* Now we have to process all the arc count entries.  BTW: it is | 
|  | 447 | not critical whether the *NARCSP value changes meanwhile.  Before | 
|  | 448 | we enter a new entry in to toset we will check that everything is | 
|  | 449 | available in TOS.  This happens in _dl_mcount. | 
|  | 450 |  | 
|  | 451 | Loading the entries in reverse order should help to get the most | 
|  | 452 | frequently used entries at the front of the list.  */ | 
|  | 453 | for (idx = narcs = MIN (*narcsp, fromlimit); idx > 0; ) | 
|  | 454 | { | 
|  | 455 | size_t to_index; | 
|  | 456 | size_t newfromidx; | 
|  | 457 | --idx; | 
|  | 458 | to_index = (data[idx].self_pc / (HASHFRACTION * sizeof (*tos))); | 
|  | 459 | newfromidx = fromidx++; | 
|  | 460 | froms[newfromidx].here = &data[idx]; | 
|  | 461 | froms[newfromidx].link = tos[to_index]; | 
|  | 462 | tos[to_index] = newfromidx; | 
|  | 463 | } | 
|  | 464 |  | 
|  | 465 | /* Setup counting data.  */ | 
|  | 466 | if (kcountsize < highpc - lowpc) | 
|  | 467 | { | 
|  | 468 | #if 0 | 
|  | 469 | s_scale = ((double) kcountsize / (highpc - lowpc)) * SCALE_1_TO_1; | 
|  | 470 | #else | 
|  | 471 | size_t range = highpc - lowpc; | 
|  | 472 | size_t quot = range / kcountsize; | 
|  | 473 |  | 
|  | 474 | if (quot >= SCALE_1_TO_1) | 
|  | 475 | s_scale = 1; | 
|  | 476 | else if (quot >= SCALE_1_TO_1 / 256) | 
|  | 477 | s_scale = SCALE_1_TO_1 / quot; | 
|  | 478 | else if (range > ULONG_MAX / 256) | 
|  | 479 | s_scale = (SCALE_1_TO_1 * 256) / (range / (kcountsize / 256)); | 
|  | 480 | else | 
|  | 481 | s_scale = (SCALE_1_TO_1 * 256) / ((range * 256) / kcountsize); | 
|  | 482 | #endif | 
|  | 483 | } | 
|  | 484 | else | 
|  | 485 | s_scale = SCALE_1_TO_1; | 
|  | 486 |  | 
|  | 487 | /* Start the profiler.  */ | 
|  | 488 | __profil ((void *) kcount, kcountsize, lowpc, s_scale); | 
|  | 489 |  | 
|  | 490 | /* Turn on profiling.  */ | 
|  | 491 | running = 1; | 
|  | 492 | } | 
|  | 493 |  | 
|  | 494 |  | 
|  | 495 | void | 
|  | 496 | _dl_mcount (ElfW(Addr) frompc, ElfW(Addr) selfpc) | 
|  | 497 | { | 
|  | 498 | volatile uint16_t *topcindex; | 
|  | 499 | size_t i, fromindex; | 
|  | 500 | struct here_fromstruct *fromp; | 
|  | 501 |  | 
|  | 502 | if (! running) | 
|  | 503 | return; | 
|  | 504 |  | 
|  | 505 | /* Compute relative addresses.  The shared object can be loaded at | 
|  | 506 | any address.  The value of frompc could be anything.  We cannot | 
|  | 507 | restrict it in any way, just set to a fixed value (0) in case it | 
|  | 508 | is outside the allowed range.  These calls show up as calls from | 
|  | 509 | <external> in the gprof output.  */ | 
|  | 510 | frompc -= lowpc; | 
|  | 511 | if (frompc >= textsize) | 
|  | 512 | frompc = 0; | 
|  | 513 | selfpc -= lowpc; | 
|  | 514 | if (selfpc >= textsize) | 
|  | 515 | goto done; | 
|  | 516 |  | 
|  | 517 | /* Getting here we now have to find out whether the location was | 
|  | 518 | already used.  If yes we are lucky and only have to increment a | 
|  | 519 | counter (this also has to be atomic).  If the entry is new things | 
|  | 520 | are getting complicated...  */ | 
|  | 521 |  | 
|  | 522 | /* Avoid integer divide if possible.  */ | 
|  | 523 | if ((HASHFRACTION & (HASHFRACTION - 1)) == 0) | 
|  | 524 | i = selfpc >> log_hashfraction; | 
|  | 525 | else | 
|  | 526 | i = selfpc / (HASHFRACTION * sizeof (*tos)); | 
|  | 527 |  | 
|  | 528 | topcindex = &tos[i]; | 
|  | 529 | fromindex = *topcindex; | 
|  | 530 |  | 
|  | 531 | if (fromindex == 0) | 
|  | 532 | goto check_new_or_add; | 
|  | 533 |  | 
|  | 534 | fromp = &froms[fromindex]; | 
|  | 535 |  | 
|  | 536 | /* We have to look through the chain of arcs whether there is already | 
|  | 537 | an entry for our arc.  */ | 
|  | 538 | while (fromp->here->from_pc != frompc) | 
|  | 539 | { | 
|  | 540 | if (fromp->link != 0) | 
|  | 541 | do | 
|  | 542 | fromp = &froms[fromp->link]; | 
|  | 543 | while (fromp->link != 0 && fromp->here->from_pc != frompc); | 
|  | 544 |  | 
|  | 545 | if (fromp->here->from_pc != frompc) | 
|  | 546 | { | 
|  | 547 | topcindex = &fromp->link; | 
|  | 548 |  | 
|  | 549 | check_new_or_add: | 
|  | 550 | /* Our entry is not among the entries we read so far from the | 
|  | 551 | data file.  Now see whether we have to update the list.  */ | 
|  | 552 | while (narcs != *narcsp && narcs < fromlimit) | 
|  | 553 | { | 
|  | 554 | size_t to_index; | 
|  | 555 | size_t newfromidx; | 
|  | 556 | to_index = (data[narcs].self_pc | 
|  | 557 | / (HASHFRACTION * sizeof (*tos))); | 
|  | 558 | newfromidx = catomic_exchange_and_add (&fromidx, 1) + 1; | 
|  | 559 | froms[newfromidx].here = &data[narcs]; | 
|  | 560 | froms[newfromidx].link = tos[to_index]; | 
|  | 561 | tos[to_index] = newfromidx; | 
|  | 562 | catomic_increment (&narcs); | 
|  | 563 | } | 
|  | 564 |  | 
|  | 565 | /* If we still have no entry stop searching and insert.  */ | 
|  | 566 | if (*topcindex == 0) | 
|  | 567 | { | 
|  | 568 | uint_fast32_t newarc = catomic_exchange_and_add (narcsp, 1); | 
|  | 569 |  | 
|  | 570 | /* In rare cases it could happen that all entries in FROMS are | 
|  | 571 | occupied.  So we cannot count this anymore.  */ | 
|  | 572 | if (newarc >= fromlimit) | 
|  | 573 | goto done; | 
|  | 574 |  | 
|  | 575 | *topcindex = catomic_exchange_and_add (&fromidx, 1) + 1; | 
|  | 576 | fromp = &froms[*topcindex]; | 
|  | 577 |  | 
|  | 578 | fromp->here = &data[newarc]; | 
|  | 579 | data[newarc].from_pc = frompc; | 
|  | 580 | data[newarc].self_pc = selfpc; | 
|  | 581 | data[newarc].count = 0; | 
|  | 582 | fromp->link = 0; | 
|  | 583 | catomic_increment (&narcs); | 
|  | 584 |  | 
|  | 585 | break; | 
|  | 586 | } | 
|  | 587 |  | 
|  | 588 | fromp = &froms[*topcindex]; | 
|  | 589 | } | 
|  | 590 | else | 
|  | 591 | /* Found in.  */ | 
|  | 592 | break; | 
|  | 593 | } | 
|  | 594 |  | 
|  | 595 | /* Increment the counter.  */ | 
|  | 596 | catomic_increment (&fromp->here->count); | 
|  | 597 |  | 
|  | 598 | done: | 
|  | 599 | ; | 
|  | 600 | } | 
|  | 601 | rtld_hidden_def (_dl_mcount) |