lh | 9ed821d | 2023-04-07 01:36:19 -0700 | [diff] [blame^] | 1 | /* |
| 2 | This is a version (aka dlmalloc) of malloc/free/realloc written by |
| 3 | Doug Lea and released to the public domain. Use, modify, and |
| 4 | redistribute this code without permission or acknowledgement in any |
| 5 | way you wish. Send questions, comments, complaints, performance |
| 6 | data, etc to dl@cs.oswego.edu |
| 7 | |
| 8 | VERSION 2.7.2 Sat Aug 17 09:07:30 2002 Doug Lea (dl at gee) |
| 9 | |
| 10 | Note: There may be an updated version of this malloc obtainable at |
| 11 | ftp://gee.cs.oswego.edu/pub/misc/malloc.c |
| 12 | Check before installing! |
| 13 | |
| 14 | Hacked up for uClibc by Erik Andersen <andersen@codepoet.org> |
| 15 | */ |
| 16 | |
| 17 | #include <features.h> |
| 18 | #include <stddef.h> |
| 19 | #include <unistd.h> |
| 20 | #include <errno.h> |
| 21 | #include <string.h> |
| 22 | #include <malloc.h> |
| 23 | #include <stdlib.h> |
| 24 | #include <sys/mman.h> |
| 25 | #include <bits/uClibc_mutex.h> |
| 26 | |
| 27 | |
| 28 | |
| 29 | __UCLIBC_MUTEX_EXTERN(__malloc_lock); |
| 30 | #define __MALLOC_LOCK __UCLIBC_MUTEX_LOCK(__malloc_lock) |
| 31 | #define __MALLOC_UNLOCK __UCLIBC_MUTEX_UNLOCK(__malloc_lock) |
| 32 | |
| 33 | |
| 34 | |
| 35 | /* |
| 36 | MALLOC_ALIGNMENT is the minimum alignment for malloc'ed chunks. |
| 37 | It must be a power of two at least 2 * (sizeof(size_t)), even on machines |
| 38 | for which smaller alignments would suffice. It may be defined as |
| 39 | larger than this though. Note however that code and data structures |
| 40 | are optimized for the case of 8-byte alignment. |
| 41 | */ |
| 42 | #ifndef MALLOC_ALIGNMENT |
| 43 | #define MALLOC_ALIGNMENT (2 * (sizeof(size_t))) |
| 44 | #endif |
| 45 | |
| 46 | /* The corresponding bit mask value */ |
| 47 | #define MALLOC_ALIGN_MASK (MALLOC_ALIGNMENT - 1) |
| 48 | |
| 49 | /* |
| 50 | TRIM_FASTBINS controls whether free() of a very small chunk can |
| 51 | immediately lead to trimming. Setting to true (1) can reduce memory |
| 52 | footprint, but will almost always slow down programs that use a lot |
| 53 | of small chunks. |
| 54 | |
| 55 | Define this only if you are willing to give up some speed to more |
| 56 | aggressively reduce system-level memory footprint when releasing |
| 57 | memory in programs that use many small chunks. You can get |
| 58 | essentially the same effect by setting MXFAST to 0, but this can |
| 59 | lead to even greater slowdowns in programs using many small chunks. |
| 60 | TRIM_FASTBINS is an in-between compile-time option, that disables |
| 61 | only those chunks bordering topmost memory from being placed in |
| 62 | fastbins. |
| 63 | */ |
| 64 | #ifndef TRIM_FASTBINS |
| 65 | #define TRIM_FASTBINS 0 |
| 66 | #endif |
| 67 | |
| 68 | |
| 69 | /* |
| 70 | MORECORE-related declarations. By default, rely on sbrk |
| 71 | */ |
| 72 | |
| 73 | |
| 74 | /* |
| 75 | MORECORE is the name of the routine to call to obtain more memory |
| 76 | from the system. See below for general guidance on writing |
| 77 | alternative MORECORE functions, as well as a version for WIN32 and a |
| 78 | sample version for pre-OSX macos. |
| 79 | */ |
| 80 | #ifndef MORECORE |
| 81 | #define MORECORE sbrk |
| 82 | #endif |
| 83 | |
| 84 | /* |
| 85 | MORECORE_FAILURE is the value returned upon failure of MORECORE |
| 86 | as well as mmap. Since it cannot be an otherwise valid memory address, |
| 87 | and must reflect values of standard sys calls, you probably ought not |
| 88 | try to redefine it. |
| 89 | */ |
| 90 | #ifndef MORECORE_FAILURE |
| 91 | #define MORECORE_FAILURE (-1) |
| 92 | #endif |
| 93 | |
| 94 | /* |
| 95 | If MORECORE_CONTIGUOUS is true, take advantage of fact that |
| 96 | consecutive calls to MORECORE with positive arguments always return |
| 97 | contiguous increasing addresses. This is true of unix sbrk. Even |
| 98 | if not defined, when regions happen to be contiguous, malloc will |
| 99 | permit allocations spanning regions obtained from different |
| 100 | calls. But defining this when applicable enables some stronger |
| 101 | consistency checks and space efficiencies. |
| 102 | */ |
| 103 | #ifndef MORECORE_CONTIGUOUS |
| 104 | #define MORECORE_CONTIGUOUS 1 |
| 105 | #endif |
| 106 | |
| 107 | /* |
| 108 | MMAP_AS_MORECORE_SIZE is the minimum mmap size argument to use if |
| 109 | sbrk fails, and mmap is used as a backup (which is done only if |
| 110 | HAVE_MMAP). The value must be a multiple of page size. This |
| 111 | backup strategy generally applies only when systems have "holes" in |
| 112 | address space, so sbrk cannot perform contiguous expansion, but |
| 113 | there is still space available on system. On systems for which |
| 114 | this is known to be useful (i.e. most linux kernels), this occurs |
| 115 | only when programs allocate huge amounts of memory. Between this, |
| 116 | and the fact that mmap regions tend to be limited, the size should |
| 117 | be large, to avoid too many mmap calls and thus avoid running out |
| 118 | of kernel resources. |
| 119 | */ |
| 120 | #ifndef MMAP_AS_MORECORE_SIZE |
| 121 | #define MMAP_AS_MORECORE_SIZE (1024 * 1024) |
| 122 | #endif |
| 123 | |
| 124 | /* |
| 125 | The system page size. To the extent possible, this malloc manages |
| 126 | memory from the system in page-size units. Note that this value is |
| 127 | cached during initialization into a field of malloc_state. So even |
| 128 | if malloc_getpagesize is a function, it is only called once. |
| 129 | |
| 130 | The following mechanics for getpagesize were adapted from bsd/gnu |
| 131 | getpagesize.h. If none of the system-probes here apply, a value of |
| 132 | 4096 is used, which should be OK: If they don't apply, then using |
| 133 | the actual value probably doesn't impact performance. |
| 134 | */ |
| 135 | #ifndef malloc_getpagesize |
| 136 | # include <unistd.h> |
| 137 | # define malloc_getpagesize sysconf(_SC_PAGESIZE) |
| 138 | #else /* just guess */ |
| 139 | # define malloc_getpagesize (4096) |
| 140 | #endif |
| 141 | |
| 142 | |
| 143 | /* mallopt tuning options */ |
| 144 | |
| 145 | /* |
| 146 | M_MXFAST is the maximum request size used for "fastbins", special bins |
| 147 | that hold returned chunks without consolidating their spaces. This |
| 148 | enables future requests for chunks of the same size to be handled |
| 149 | very quickly, but can increase fragmentation, and thus increase the |
| 150 | overall memory footprint of a program. |
| 151 | |
| 152 | This malloc manages fastbins very conservatively yet still |
| 153 | efficiently, so fragmentation is rarely a problem for values less |
| 154 | than or equal to the default. The maximum supported value of MXFAST |
| 155 | is 80. You wouldn't want it any higher than this anyway. Fastbins |
| 156 | are designed especially for use with many small structs, objects or |
| 157 | strings -- the default handles structs/objects/arrays with sizes up |
| 158 | to 16 4byte fields, or small strings representing words, tokens, |
| 159 | etc. Using fastbins for larger objects normally worsens |
| 160 | fragmentation without improving speed. |
| 161 | |
| 162 | M_MXFAST is set in REQUEST size units. It is internally used in |
| 163 | chunksize units, which adds padding and alignment. You can reduce |
| 164 | M_MXFAST to 0 to disable all use of fastbins. This causes the malloc |
| 165 | algorithm to be a closer approximation of fifo-best-fit in all cases, |
| 166 | not just for larger requests, but will generally cause it to be |
| 167 | slower. |
| 168 | */ |
| 169 | |
| 170 | |
| 171 | /* M_MXFAST is a standard SVID/XPG tuning option, usually listed in malloc.h */ |
| 172 | #ifndef M_MXFAST |
| 173 | #define M_MXFAST 1 |
| 174 | #endif |
| 175 | |
| 176 | #ifndef DEFAULT_MXFAST |
| 177 | #define DEFAULT_MXFAST 64 |
| 178 | #endif |
| 179 | |
| 180 | |
| 181 | /* |
| 182 | M_TRIM_THRESHOLD is the maximum amount of unused top-most memory |
| 183 | to keep before releasing via malloc_trim in free(). |
| 184 | |
| 185 | Automatic trimming is mainly useful in long-lived programs. |
| 186 | Because trimming via sbrk can be slow on some systems, and can |
| 187 | sometimes be wasteful (in cases where programs immediately |
| 188 | afterward allocate more large chunks) the value should be high |
| 189 | enough so that your overall system performance would improve by |
| 190 | releasing this much memory. |
| 191 | |
| 192 | The trim threshold and the mmap control parameters (see below) |
| 193 | can be traded off with one another. Trimming and mmapping are |
| 194 | two different ways of releasing unused memory back to the |
| 195 | system. Between these two, it is often possible to keep |
| 196 | system-level demands of a long-lived program down to a bare |
| 197 | minimum. For example, in one test suite of sessions measuring |
| 198 | the XF86 X server on Linux, using a trim threshold of 128K and a |
| 199 | mmap threshold of 192K led to near-minimal long term resource |
| 200 | consumption. |
| 201 | |
| 202 | If you are using this malloc in a long-lived program, it should |
| 203 | pay to experiment with these values. As a rough guide, you |
| 204 | might set to a value close to the average size of a process |
| 205 | (program) running on your system. Releasing this much memory |
| 206 | would allow such a process to run in memory. Generally, it's |
| 207 | worth it to tune for trimming rather tham memory mapping when a |
| 208 | program undergoes phases where several large chunks are |
| 209 | allocated and released in ways that can reuse each other's |
| 210 | storage, perhaps mixed with phases where there are no such |
| 211 | chunks at all. And in well-behaved long-lived programs, |
| 212 | controlling release of large blocks via trimming versus mapping |
| 213 | is usually faster. |
| 214 | |
| 215 | However, in most programs, these parameters serve mainly as |
| 216 | protection against the system-level effects of carrying around |
| 217 | massive amounts of unneeded memory. Since frequent calls to |
| 218 | sbrk, mmap, and munmap otherwise degrade performance, the default |
| 219 | parameters are set to relatively high values that serve only as |
| 220 | safeguards. |
| 221 | |
| 222 | The trim value must be greater than page size to have any useful |
| 223 | effect. To disable trimming completely, you can set to |
| 224 | (unsigned long)(-1) |
| 225 | |
| 226 | Trim settings interact with fastbin (MXFAST) settings: Unless |
| 227 | TRIM_FASTBINS is defined, automatic trimming never takes place upon |
| 228 | freeing a chunk with size less than or equal to MXFAST. Trimming is |
| 229 | instead delayed until subsequent freeing of larger chunks. However, |
| 230 | you can still force an attempted trim by calling malloc_trim. |
| 231 | |
| 232 | Also, trimming is not generally possible in cases where |
| 233 | the main arena is obtained via mmap. |
| 234 | |
| 235 | Note that the trick some people use of mallocing a huge space and |
| 236 | then freeing it at program startup, in an attempt to reserve system |
| 237 | memory, doesn't have the intended effect under automatic trimming, |
| 238 | since that memory will immediately be returned to the system. |
| 239 | */ |
| 240 | #define M_TRIM_THRESHOLD -1 |
| 241 | |
| 242 | #ifndef DEFAULT_TRIM_THRESHOLD |
| 243 | #define DEFAULT_TRIM_THRESHOLD (256 * 1024) |
| 244 | #endif |
| 245 | |
| 246 | /* |
| 247 | M_TOP_PAD is the amount of extra `padding' space to allocate or |
| 248 | retain whenever sbrk is called. It is used in two ways internally: |
| 249 | |
| 250 | * When sbrk is called to extend the top of the arena to satisfy |
| 251 | a new malloc request, this much padding is added to the sbrk |
| 252 | request. |
| 253 | |
| 254 | * When malloc_trim is called automatically from free(), |
| 255 | it is used as the `pad' argument. |
| 256 | |
| 257 | In both cases, the actual amount of padding is rounded |
| 258 | so that the end of the arena is always a system page boundary. |
| 259 | |
| 260 | The main reason for using padding is to avoid calling sbrk so |
| 261 | often. Having even a small pad greatly reduces the likelihood |
| 262 | that nearly every malloc request during program start-up (or |
| 263 | after trimming) will invoke sbrk, which needlessly wastes |
| 264 | time. |
| 265 | |
| 266 | Automatic rounding-up to page-size units is normally sufficient |
| 267 | to avoid measurable overhead, so the default is 0. However, in |
| 268 | systems where sbrk is relatively slow, it can pay to increase |
| 269 | this value, at the expense of carrying around more memory than |
| 270 | the program needs. |
| 271 | */ |
| 272 | #define M_TOP_PAD -2 |
| 273 | |
| 274 | #ifndef DEFAULT_TOP_PAD |
| 275 | #define DEFAULT_TOP_PAD (0) |
| 276 | #endif |
| 277 | |
| 278 | /* |
| 279 | M_MMAP_THRESHOLD is the request size threshold for using mmap() |
| 280 | to service a request. Requests of at least this size that cannot |
| 281 | be allocated using already-existing space will be serviced via mmap. |
| 282 | (If enough normal freed space already exists it is used instead.) |
| 283 | |
| 284 | Using mmap segregates relatively large chunks of memory so that |
| 285 | they can be individually obtained and released from the host |
| 286 | system. A request serviced through mmap is never reused by any |
| 287 | other request (at least not directly; the system may just so |
| 288 | happen to remap successive requests to the same locations). |
| 289 | |
| 290 | Segregating space in this way has the benefits that: |
| 291 | |
| 292 | 1. Mmapped space can ALWAYS be individually released back |
| 293 | to the system, which helps keep the system level memory |
| 294 | demands of a long-lived program low. |
| 295 | 2. Mapped memory can never become `locked' between |
| 296 | other chunks, as can happen with normally allocated chunks, which |
| 297 | means that even trimming via malloc_trim would not release them. |
| 298 | 3. On some systems with "holes" in address spaces, mmap can obtain |
| 299 | memory that sbrk cannot. |
| 300 | |
| 301 | However, it has the disadvantages that: |
| 302 | |
| 303 | 1. The space cannot be reclaimed, consolidated, and then |
| 304 | used to service later requests, as happens with normal chunks. |
| 305 | 2. It can lead to more wastage because of mmap page alignment |
| 306 | requirements |
| 307 | 3. It causes malloc performance to be more dependent on host |
| 308 | system memory management support routines which may vary in |
| 309 | implementation quality and may impose arbitrary |
| 310 | limitations. Generally, servicing a request via normal |
| 311 | malloc steps is faster than going through a system's mmap. |
| 312 | |
| 313 | The advantages of mmap nearly always outweigh disadvantages for |
| 314 | "large" chunks, but the value of "large" varies across systems. The |
| 315 | default is an empirically derived value that works well in most |
| 316 | systems. |
| 317 | */ |
| 318 | #define M_MMAP_THRESHOLD -3 |
| 319 | |
| 320 | #ifndef DEFAULT_MMAP_THRESHOLD |
| 321 | #define DEFAULT_MMAP_THRESHOLD (256 * 1024) |
| 322 | #endif |
| 323 | |
| 324 | /* |
| 325 | M_MMAP_MAX is the maximum number of requests to simultaneously |
| 326 | service using mmap. This parameter exists because |
| 327 | . Some systems have a limited number of internal tables for |
| 328 | use by mmap, and using more than a few of them may degrade |
| 329 | performance. |
| 330 | |
| 331 | The default is set to a value that serves only as a safeguard. |
| 332 | Setting to 0 disables use of mmap for servicing large requests. If |
| 333 | HAVE_MMAP is not set, the default value is 0, and attempts to set it |
| 334 | to non-zero values in mallopt will fail. |
| 335 | */ |
| 336 | #define M_MMAP_MAX -4 |
| 337 | |
| 338 | #ifndef DEFAULT_MMAP_MAX |
| 339 | #define DEFAULT_MMAP_MAX (65536) |
| 340 | #endif |
| 341 | |
| 342 | |
| 343 | /* ------------------ MMAP support ------------------ */ |
| 344 | #include <fcntl.h> |
| 345 | #include <sys/mman.h> |
| 346 | |
| 347 | #if !defined(MAP_ANONYMOUS) && defined(MAP_ANON) |
| 348 | #define MAP_ANONYMOUS MAP_ANON |
| 349 | #endif |
| 350 | |
| 351 | #ifdef __ARCH_USE_MMU__ |
| 352 | # define _MAP_UNINITIALIZE 0 |
| 353 | #else |
| 354 | # define _MAP_UNINITIALIZE MAP_UNINITIALIZE |
| 355 | #endif |
| 356 | |
| 357 | #define MMAP(addr, size, prot) \ |
| 358 | (mmap((addr), (size), (prot), MAP_PRIVATE|MAP_ANONYMOUS|_MAP_UNINITIALIZE, 0, 0)) |
| 359 | |
| 360 | |
| 361 | /* ----------------------- Chunk representations ----------------------- */ |
| 362 | |
| 363 | |
| 364 | /* |
| 365 | This struct declaration is misleading (but accurate and necessary). |
| 366 | It declares a "view" into memory allowing access to necessary |
| 367 | fields at known offsets from a given base. See explanation below. |
| 368 | */ |
| 369 | |
| 370 | struct malloc_chunk { |
| 371 | |
| 372 | size_t prev_size; /* Size of previous chunk (if free). */ |
| 373 | size_t size; /* Size in bytes, including overhead. */ |
| 374 | |
| 375 | struct malloc_chunk* fd; /* double links -- used only if free. */ |
| 376 | struct malloc_chunk* bk; |
| 377 | }; |
| 378 | |
| 379 | |
| 380 | typedef struct malloc_chunk* mchunkptr; |
| 381 | |
| 382 | /* |
| 383 | malloc_chunk details: |
| 384 | |
| 385 | (The following includes lightly edited explanations by Colin Plumb.) |
| 386 | |
| 387 | Chunks of memory are maintained using a `boundary tag' method as |
| 388 | described in e.g., Knuth or Standish. (See the paper by Paul |
| 389 | Wilson ftp://ftp.cs.utexas.edu/pub/garbage/allocsrv.ps for a |
| 390 | survey of such techniques.) Sizes of free chunks are stored both |
| 391 | in the front of each chunk and at the end. This makes |
| 392 | consolidating fragmented chunks into bigger chunks very fast. The |
| 393 | size fields also hold bits representing whether chunks are free or |
| 394 | in use. |
| 395 | |
| 396 | An allocated chunk looks like this: |
| 397 | |
| 398 | |
| 399 | chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
| 400 | | Size of previous chunk, if allocated | | |
| 401 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
| 402 | | Size of chunk, in bytes |P| |
| 403 | mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
| 404 | | User data starts here... . |
| 405 | . . |
| 406 | . (malloc_usable_space() bytes) . |
| 407 | . | |
| 408 | nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
| 409 | | Size of chunk | |
| 410 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
| 411 | |
| 412 | |
| 413 | Where "chunk" is the front of the chunk for the purpose of most of |
| 414 | the malloc code, but "mem" is the pointer that is returned to the |
| 415 | user. "Nextchunk" is the beginning of the next contiguous chunk. |
| 416 | |
| 417 | Chunks always begin on even word boundries, so the mem portion |
| 418 | (which is returned to the user) is also on an even word boundary, and |
| 419 | thus at least double-word aligned. |
| 420 | |
| 421 | Free chunks are stored in circular doubly-linked lists, and look like this: |
| 422 | |
| 423 | chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
| 424 | | Size of previous chunk | |
| 425 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
| 426 | `head:' | Size of chunk, in bytes |P| |
| 427 | mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
| 428 | | Forward pointer to next chunk in list | |
| 429 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
| 430 | | Back pointer to previous chunk in list | |
| 431 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
| 432 | | Unused space (may be 0 bytes long) . |
| 433 | . . |
| 434 | . | |
| 435 | nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
| 436 | `foot:' | Size of chunk, in bytes | |
| 437 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
| 438 | |
| 439 | The P (PREV_INUSE) bit, stored in the unused low-order bit of the |
| 440 | chunk size (which is always a multiple of two words), is an in-use |
| 441 | bit for the *previous* chunk. If that bit is *clear*, then the |
| 442 | word before the current chunk size contains the previous chunk |
| 443 | size, and can be used to find the front of the previous chunk. |
| 444 | The very first chunk allocated always has this bit set, |
| 445 | preventing access to non-existent (or non-owned) memory. If |
| 446 | prev_inuse is set for any given chunk, then you CANNOT determine |
| 447 | the size of the previous chunk, and might even get a memory |
| 448 | addressing fault when trying to do so. |
| 449 | |
| 450 | Note that the `foot' of the current chunk is actually represented |
| 451 | as the prev_size of the NEXT chunk. This makes it easier to |
| 452 | deal with alignments etc but can be very confusing when trying |
| 453 | to extend or adapt this code. |
| 454 | |
| 455 | The two exceptions to all this are |
| 456 | |
| 457 | 1. The special chunk `top' doesn't bother using the |
| 458 | trailing size field since there is no next contiguous chunk |
| 459 | that would have to index off it. After initialization, `top' |
| 460 | is forced to always exist. If it would become less than |
| 461 | MINSIZE bytes long, it is replenished. |
| 462 | |
| 463 | 2. Chunks allocated via mmap, which have the second-lowest-order |
| 464 | bit (IS_MMAPPED) set in their size fields. Because they are |
| 465 | allocated one-by-one, each must contain its own trailing size field. |
| 466 | |
| 467 | */ |
| 468 | |
| 469 | /* |
| 470 | ---------- Size and alignment checks and conversions ---------- |
| 471 | */ |
| 472 | |
| 473 | /* conversion from malloc headers to user pointers, and back */ |
| 474 | |
| 475 | #define chunk2mem(p) ((void*)((char*)(p) + 2*(sizeof(size_t)))) |
| 476 | #define mem2chunk(mem) ((mchunkptr)((char*)(mem) - 2*(sizeof(size_t)))) |
| 477 | |
| 478 | /* The smallest possible chunk */ |
| 479 | #define MIN_CHUNK_SIZE (sizeof(struct malloc_chunk)) |
| 480 | |
| 481 | /* The smallest size we can malloc is an aligned minimal chunk */ |
| 482 | |
| 483 | #define MINSIZE \ |
| 484 | (unsigned long)(((MIN_CHUNK_SIZE+MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK)) |
| 485 | |
| 486 | /* Check if m has acceptable alignment */ |
| 487 | |
| 488 | #define aligned_OK(m) (((unsigned long)((m)) & (MALLOC_ALIGN_MASK)) == 0) |
| 489 | |
| 490 | |
| 491 | /* Check if a request is so large that it would wrap around zero when |
| 492 | padded and aligned. To simplify some other code, the bound is made |
| 493 | low enough so that adding MINSIZE will also not wrap around sero. |
| 494 | */ |
| 495 | |
| 496 | #define REQUEST_OUT_OF_RANGE(req) \ |
| 497 | ((unsigned long)(req) >= \ |
| 498 | (unsigned long)(size_t)(-2 * MINSIZE)) |
| 499 | |
| 500 | /* pad request bytes into a usable size -- internal version */ |
| 501 | |
| 502 | #define request2size(req) \ |
| 503 | (((req) + (sizeof(size_t)) + MALLOC_ALIGN_MASK < MINSIZE) ? \ |
| 504 | MINSIZE : \ |
| 505 | ((req) + (sizeof(size_t)) + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK) |
| 506 | |
| 507 | /* Same, except also perform argument check */ |
| 508 | |
| 509 | #define checked_request2size(req, sz) \ |
| 510 | if (REQUEST_OUT_OF_RANGE(req)) { \ |
| 511 | errno = ENOMEM; \ |
| 512 | return 0; \ |
| 513 | } \ |
| 514 | (sz) = request2size(req); |
| 515 | |
| 516 | /* |
| 517 | --------------- Physical chunk operations --------------- |
| 518 | */ |
| 519 | |
| 520 | |
| 521 | /* size field is or'ed with PREV_INUSE when previous adjacent chunk in use */ |
| 522 | #define PREV_INUSE 0x1 |
| 523 | |
| 524 | /* extract inuse bit of previous chunk */ |
| 525 | #define prev_inuse(p) ((p)->size & PREV_INUSE) |
| 526 | |
| 527 | |
| 528 | /* size field is or'ed with IS_MMAPPED if the chunk was obtained with mmap() */ |
| 529 | #define IS_MMAPPED 0x2 |
| 530 | |
| 531 | /* check for mmap()'ed chunk */ |
| 532 | #define chunk_is_mmapped(p) ((p)->size & IS_MMAPPED) |
| 533 | |
| 534 | /* Bits to mask off when extracting size |
| 535 | |
| 536 | Note: IS_MMAPPED is intentionally not masked off from size field in |
| 537 | macros for which mmapped chunks should never be seen. This should |
| 538 | cause helpful core dumps to occur if it is tried by accident by |
| 539 | people extending or adapting this malloc. |
| 540 | */ |
| 541 | #define SIZE_BITS (PREV_INUSE|IS_MMAPPED) |
| 542 | |
| 543 | /* Get size, ignoring use bits */ |
| 544 | #define chunksize(p) ((p)->size & ~(SIZE_BITS)) |
| 545 | |
| 546 | |
| 547 | /* Ptr to next physical malloc_chunk. */ |
| 548 | #define next_chunk(p) ((mchunkptr)( ((char*)(p)) + ((p)->size & ~PREV_INUSE) )) |
| 549 | |
| 550 | /* Ptr to previous physical malloc_chunk */ |
| 551 | #define prev_chunk(p) ((mchunkptr)( ((char*)(p)) - ((p)->prev_size) )) |
| 552 | |
| 553 | /* Treat space at ptr + offset as a chunk */ |
| 554 | #define chunk_at_offset(p, s) ((mchunkptr)(((char*)(p)) + (s))) |
| 555 | |
| 556 | /* extract p's inuse bit */ |
| 557 | #define inuse(p)\ |
| 558 | ((((mchunkptr)(((char*)(p))+((p)->size & ~PREV_INUSE)))->size) & PREV_INUSE) |
| 559 | |
| 560 | /* set/clear chunk as being inuse without otherwise disturbing */ |
| 561 | #define set_inuse(p)\ |
| 562 | ((mchunkptr)(((char*)(p)) + ((p)->size & ~PREV_INUSE)))->size |= PREV_INUSE |
| 563 | |
| 564 | #define clear_inuse(p)\ |
| 565 | ((mchunkptr)(((char*)(p)) + ((p)->size & ~PREV_INUSE)))->size &= ~(PREV_INUSE) |
| 566 | |
| 567 | |
| 568 | /* check/set/clear inuse bits in known places */ |
| 569 | #define inuse_bit_at_offset(p, s)\ |
| 570 | (((mchunkptr)(((char*)(p)) + (s)))->size & PREV_INUSE) |
| 571 | |
| 572 | #define set_inuse_bit_at_offset(p, s)\ |
| 573 | (((mchunkptr)(((char*)(p)) + (s)))->size |= PREV_INUSE) |
| 574 | |
| 575 | #define clear_inuse_bit_at_offset(p, s)\ |
| 576 | (((mchunkptr)(((char*)(p)) + (s)))->size &= ~(PREV_INUSE)) |
| 577 | |
| 578 | |
| 579 | /* Set size at head, without disturbing its use bit */ |
| 580 | #define set_head_size(p, s) ((p)->size = (((p)->size & PREV_INUSE) | (s))) |
| 581 | |
| 582 | /* Set size/use field */ |
| 583 | #define set_head(p, s) ((p)->size = (s)) |
| 584 | |
| 585 | /* Set size at footer (only when chunk is not in use) */ |
| 586 | #define set_foot(p, s) (((mchunkptr)((char*)(p) + (s)))->prev_size = (s)) |
| 587 | |
| 588 | |
| 589 | /* -------------------- Internal data structures -------------------- */ |
| 590 | |
| 591 | /* |
| 592 | Bins |
| 593 | |
| 594 | An array of bin headers for free chunks. Each bin is doubly |
| 595 | linked. The bins are approximately proportionally (log) spaced. |
| 596 | There are a lot of these bins (128). This may look excessive, but |
| 597 | works very well in practice. Most bins hold sizes that are |
| 598 | unusual as malloc request sizes, but are more usual for fragments |
| 599 | and consolidated sets of chunks, which is what these bins hold, so |
| 600 | they can be found quickly. All procedures maintain the invariant |
| 601 | that no consolidated chunk physically borders another one, so each |
| 602 | chunk in a list is known to be preceeded and followed by either |
| 603 | inuse chunks or the ends of memory. |
| 604 | |
| 605 | Chunks in bins are kept in size order, with ties going to the |
| 606 | approximately least recently used chunk. Ordering isn't needed |
| 607 | for the small bins, which all contain the same-sized chunks, but |
| 608 | facilitates best-fit allocation for larger chunks. These lists |
| 609 | are just sequential. Keeping them in order almost never requires |
| 610 | enough traversal to warrant using fancier ordered data |
| 611 | structures. |
| 612 | |
| 613 | Chunks of the same size are linked with the most |
| 614 | recently freed at the front, and allocations are taken from the |
| 615 | back. This results in LRU (FIFO) allocation order, which tends |
| 616 | to give each chunk an equal opportunity to be consolidated with |
| 617 | adjacent freed chunks, resulting in larger free chunks and less |
| 618 | fragmentation. |
| 619 | |
| 620 | To simplify use in double-linked lists, each bin header acts |
| 621 | as a malloc_chunk. This avoids special-casing for headers. |
| 622 | But to conserve space and improve locality, we allocate |
| 623 | only the fd/bk pointers of bins, and then use repositioning tricks |
| 624 | to treat these as the fields of a malloc_chunk*. |
| 625 | */ |
| 626 | |
| 627 | typedef struct malloc_chunk* mbinptr; |
| 628 | |
| 629 | /* addressing -- note that bin_at(0) does not exist */ |
| 630 | #define bin_at(m, i) ((mbinptr)((char*)&((m)->bins[(i)<<1]) - ((sizeof(size_t))<<1))) |
| 631 | |
| 632 | /* analog of ++bin */ |
| 633 | #define next_bin(b) ((mbinptr)((char*)(b) + (sizeof(mchunkptr)<<1))) |
| 634 | |
| 635 | /* Reminders about list directionality within bins */ |
| 636 | #define first(b) ((b)->fd) |
| 637 | #define last(b) ((b)->bk) |
| 638 | |
| 639 | /* Take a chunk off a bin list */ |
| 640 | #define unlink(P, BK, FD) { \ |
| 641 | FD = P->fd; \ |
| 642 | BK = P->bk; \ |
| 643 | if (FD->bk != P || BK->fd != P) \ |
| 644 | abort(); \ |
| 645 | FD->bk = BK; \ |
| 646 | BK->fd = FD; \ |
| 647 | } |
| 648 | |
| 649 | /* |
| 650 | Indexing |
| 651 | |
| 652 | Bins for sizes < 512 bytes contain chunks of all the same size, spaced |
| 653 | 8 bytes apart. Larger bins are approximately logarithmically spaced: |
| 654 | |
| 655 | 64 bins of size 8 |
| 656 | 32 bins of size 64 |
| 657 | 16 bins of size 512 |
| 658 | 8 bins of size 4096 |
| 659 | 4 bins of size 32768 |
| 660 | 2 bins of size 262144 |
| 661 | 1 bin of size what's left |
| 662 | |
| 663 | The bins top out around 1MB because we expect to service large |
| 664 | requests via mmap. |
| 665 | */ |
| 666 | |
| 667 | #define NBINS 96 |
| 668 | #define NSMALLBINS 32 |
| 669 | #define SMALLBIN_WIDTH 8 |
| 670 | #define MIN_LARGE_SIZE 256 |
| 671 | |
| 672 | #define in_smallbin_range(sz) \ |
| 673 | ((unsigned long)(sz) < (unsigned long)MIN_LARGE_SIZE) |
| 674 | |
| 675 | #define smallbin_index(sz) (((unsigned)(sz)) >> 3) |
| 676 | |
| 677 | #define bin_index(sz) \ |
| 678 | ((in_smallbin_range(sz)) ? smallbin_index(sz) : __malloc_largebin_index(sz)) |
| 679 | |
| 680 | /* |
| 681 | FIRST_SORTED_BIN_SIZE is the chunk size corresponding to the |
| 682 | first bin that is maintained in sorted order. This must |
| 683 | be the smallest size corresponding to a given bin. |
| 684 | |
| 685 | Normally, this should be MIN_LARGE_SIZE. But you can weaken |
| 686 | best fit guarantees to sometimes speed up malloc by increasing value. |
| 687 | Doing this means that malloc may choose a chunk that is |
| 688 | non-best-fitting by up to the width of the bin. |
| 689 | |
| 690 | Some useful cutoff values: |
| 691 | 512 - all bins sorted |
| 692 | 2560 - leaves bins <= 64 bytes wide unsorted |
| 693 | 12288 - leaves bins <= 512 bytes wide unsorted |
| 694 | 65536 - leaves bins <= 4096 bytes wide unsorted |
| 695 | 262144 - leaves bins <= 32768 bytes wide unsorted |
| 696 | -1 - no bins sorted (not recommended!) |
| 697 | */ |
| 698 | |
| 699 | #define FIRST_SORTED_BIN_SIZE MIN_LARGE_SIZE |
| 700 | /* #define FIRST_SORTED_BIN_SIZE 65536 */ |
| 701 | |
| 702 | /* |
| 703 | Unsorted chunks |
| 704 | |
| 705 | All remainders from chunk splits, as well as all returned chunks, |
| 706 | are first placed in the "unsorted" bin. They are then placed |
| 707 | in regular bins after malloc gives them ONE chance to be used before |
| 708 | binning. So, basically, the unsorted_chunks list acts as a queue, |
| 709 | with chunks being placed on it in free (and __malloc_consolidate), |
| 710 | and taken off (to be either used or placed in bins) in malloc. |
| 711 | */ |
| 712 | |
| 713 | /* The otherwise unindexable 1-bin is used to hold unsorted chunks. */ |
| 714 | #define unsorted_chunks(M) (bin_at(M, 1)) |
| 715 | |
| 716 | /* |
| 717 | Top |
| 718 | |
| 719 | The top-most available chunk (i.e., the one bordering the end of |
| 720 | available memory) is treated specially. It is never included in |
| 721 | any bin, is used only if no other chunk is available, and is |
| 722 | released back to the system if it is very large (see |
| 723 | M_TRIM_THRESHOLD). Because top initially |
| 724 | points to its own bin with initial zero size, thus forcing |
| 725 | extension on the first malloc request, we avoid having any special |
| 726 | code in malloc to check whether it even exists yet. But we still |
| 727 | need to do so when getting memory from system, so we make |
| 728 | initial_top treat the bin as a legal but unusable chunk during the |
| 729 | interval between initialization and the first call to |
| 730 | __malloc_alloc. (This is somewhat delicate, since it relies on |
| 731 | the 2 preceding words to be zero during this interval as well.) |
| 732 | */ |
| 733 | |
| 734 | /* Conveniently, the unsorted bin can be used as dummy top on first call */ |
| 735 | #define initial_top(M) (unsorted_chunks(M)) |
| 736 | |
| 737 | /* |
| 738 | Binmap |
| 739 | |
| 740 | To help compensate for the large number of bins, a one-level index |
| 741 | structure is used for bin-by-bin searching. `binmap' is a |
| 742 | bitvector recording whether bins are definitely empty so they can |
| 743 | be skipped over during during traversals. The bits are NOT always |
| 744 | cleared as soon as bins are empty, but instead only |
| 745 | when they are noticed to be empty during traversal in malloc. |
| 746 | */ |
| 747 | |
| 748 | /* Conservatively use 32 bits per map word, even if on 64bit system */ |
| 749 | #define BINMAPSHIFT 5 |
| 750 | #define BITSPERMAP (1U << BINMAPSHIFT) |
| 751 | #define BINMAPSIZE (NBINS / BITSPERMAP) |
| 752 | |
| 753 | #define idx2block(i) ((i) >> BINMAPSHIFT) |
| 754 | #define idx2bit(i) ((1U << ((i) & ((1U << BINMAPSHIFT)-1)))) |
| 755 | |
| 756 | #define mark_bin(m,i) ((m)->binmap[idx2block(i)] |= idx2bit(i)) |
| 757 | #define unmark_bin(m,i) ((m)->binmap[idx2block(i)] &= ~(idx2bit(i))) |
| 758 | #define get_binmap(m,i) ((m)->binmap[idx2block(i)] & idx2bit(i)) |
| 759 | |
| 760 | /* |
| 761 | Fastbins |
| 762 | |
| 763 | An array of lists holding recently freed small chunks. Fastbins |
| 764 | are not doubly linked. It is faster to single-link them, and |
| 765 | since chunks are never removed from the middles of these lists, |
| 766 | double linking is not necessary. Also, unlike regular bins, they |
| 767 | are not even processed in FIFO order (they use faster LIFO) since |
| 768 | ordering doesn't much matter in the transient contexts in which |
| 769 | fastbins are normally used. |
| 770 | |
| 771 | Chunks in fastbins keep their inuse bit set, so they cannot |
| 772 | be consolidated with other free chunks. __malloc_consolidate |
| 773 | releases all chunks in fastbins and consolidates them with |
| 774 | other free chunks. |
| 775 | */ |
| 776 | |
| 777 | typedef struct malloc_chunk* mfastbinptr; |
| 778 | |
| 779 | /* offset 2 to use otherwise unindexable first 2 bins */ |
| 780 | #define fastbin_index(sz) ((((unsigned int)(sz)) >> 3) - 2) |
| 781 | |
| 782 | /* The maximum fastbin request size we support */ |
| 783 | #define MAX_FAST_SIZE 80 |
| 784 | |
| 785 | #define NFASTBINS (fastbin_index(request2size(MAX_FAST_SIZE))+1) |
| 786 | |
| 787 | /* |
| 788 | FASTBIN_CONSOLIDATION_THRESHOLD is the size of a chunk in free() |
| 789 | that triggers automatic consolidation of possibly-surrounding |
| 790 | fastbin chunks. This is a heuristic, so the exact value should not |
| 791 | matter too much. It is defined at half the default trim threshold as a |
| 792 | compromise heuristic to only attempt consolidation if it is likely |
| 793 | to lead to trimming. However, it is not dynamically tunable, since |
| 794 | consolidation reduces fragmentation surrounding loarge chunks even |
| 795 | if trimming is not used. |
| 796 | */ |
| 797 | |
| 798 | #define FASTBIN_CONSOLIDATION_THRESHOLD \ |
| 799 | ((unsigned long)(DEFAULT_TRIM_THRESHOLD) >> 1) |
| 800 | |
| 801 | /* |
| 802 | Since the lowest 2 bits in max_fast don't matter in size comparisons, |
| 803 | they are used as flags. |
| 804 | */ |
| 805 | |
| 806 | /* |
| 807 | ANYCHUNKS_BIT held in max_fast indicates that there may be any |
| 808 | freed chunks at all. It is set true when entering a chunk into any |
| 809 | bin. |
| 810 | */ |
| 811 | |
| 812 | #define ANYCHUNKS_BIT (1U) |
| 813 | |
| 814 | #define have_anychunks(M) (((M)->max_fast & ANYCHUNKS_BIT)) |
| 815 | #define set_anychunks(M) ((M)->max_fast |= ANYCHUNKS_BIT) |
| 816 | #define clear_anychunks(M) ((M)->max_fast &= ~ANYCHUNKS_BIT) |
| 817 | |
| 818 | /* |
| 819 | FASTCHUNKS_BIT held in max_fast indicates that there are probably |
| 820 | some fastbin chunks. It is set true on entering a chunk into any |
| 821 | fastbin, and cleared only in __malloc_consolidate. |
| 822 | */ |
| 823 | |
| 824 | #define FASTCHUNKS_BIT (2U) |
| 825 | |
| 826 | #define have_fastchunks(M) (((M)->max_fast & FASTCHUNKS_BIT)) |
| 827 | #define set_fastchunks(M) ((M)->max_fast |= (FASTCHUNKS_BIT|ANYCHUNKS_BIT)) |
| 828 | #define clear_fastchunks(M) ((M)->max_fast &= ~(FASTCHUNKS_BIT)) |
| 829 | |
| 830 | /* Set value of max_fast. Use impossibly small value if 0. */ |
| 831 | #define set_max_fast(M, s) \ |
| 832 | (M)->max_fast = (((s) == 0)? SMALLBIN_WIDTH: request2size(s)) | \ |
| 833 | ((M)->max_fast & (FASTCHUNKS_BIT|ANYCHUNKS_BIT)) |
| 834 | |
| 835 | #define get_max_fast(M) \ |
| 836 | ((M)->max_fast & ~(FASTCHUNKS_BIT | ANYCHUNKS_BIT)) |
| 837 | |
| 838 | |
| 839 | /* |
| 840 | morecore_properties is a status word holding dynamically discovered |
| 841 | or controlled properties of the morecore function |
| 842 | */ |
| 843 | |
| 844 | #define MORECORE_CONTIGUOUS_BIT (1U) |
| 845 | |
| 846 | #define contiguous(M) \ |
| 847 | (((M)->morecore_properties & MORECORE_CONTIGUOUS_BIT)) |
| 848 | #define noncontiguous(M) \ |
| 849 | (((M)->morecore_properties & MORECORE_CONTIGUOUS_BIT) == 0) |
| 850 | #define set_contiguous(M) \ |
| 851 | ((M)->morecore_properties |= MORECORE_CONTIGUOUS_BIT) |
| 852 | #define set_noncontiguous(M) \ |
| 853 | ((M)->morecore_properties &= ~MORECORE_CONTIGUOUS_BIT) |
| 854 | |
| 855 | |
| 856 | /* |
| 857 | ----------- Internal state representation and initialization ----------- |
| 858 | */ |
| 859 | |
| 860 | struct malloc_state { |
| 861 | |
| 862 | /* The maximum chunk size to be eligible for fastbin */ |
| 863 | size_t max_fast; /* low 2 bits used as flags */ |
| 864 | |
| 865 | /* Fastbins */ |
| 866 | mfastbinptr fastbins[NFASTBINS]; |
| 867 | |
| 868 | /* Base of the topmost chunk -- not otherwise kept in a bin */ |
| 869 | mchunkptr top; |
| 870 | |
| 871 | /* The remainder from the most recent split of a small request */ |
| 872 | mchunkptr last_remainder; |
| 873 | |
| 874 | /* Normal bins packed as described above */ |
| 875 | mchunkptr bins[NBINS * 2]; |
| 876 | |
| 877 | /* Bitmap of bins. Trailing zero map handles cases of largest binned size */ |
| 878 | unsigned int binmap[BINMAPSIZE+1]; |
| 879 | |
| 880 | /* Tunable parameters */ |
| 881 | unsigned long trim_threshold; |
| 882 | size_t top_pad; |
| 883 | size_t mmap_threshold; |
| 884 | |
| 885 | /* Memory map support */ |
| 886 | int n_mmaps; |
| 887 | int n_mmaps_max; |
| 888 | int max_n_mmaps; |
| 889 | |
| 890 | /* Cache malloc_getpagesize */ |
| 891 | unsigned int pagesize; |
| 892 | |
| 893 | /* Track properties of MORECORE */ |
| 894 | unsigned int morecore_properties; |
| 895 | |
| 896 | /* Statistics */ |
| 897 | size_t mmapped_mem; |
| 898 | size_t sbrked_mem; |
| 899 | size_t max_sbrked_mem; |
| 900 | size_t max_mmapped_mem; |
| 901 | size_t max_total_mem; |
| 902 | }; |
| 903 | |
| 904 | typedef struct malloc_state *mstate; |
| 905 | |
| 906 | /* |
| 907 | There is exactly one instance of this struct in this malloc. |
| 908 | If you are adapting this malloc in a way that does NOT use a static |
| 909 | malloc_state, you MUST explicitly zero-fill it before using. This |
| 910 | malloc relies on the property that malloc_state is initialized to |
| 911 | all zeroes (as is true of C statics). |
| 912 | */ |
| 913 | extern struct malloc_state __malloc_state; /* never directly referenced */ |
| 914 | |
| 915 | /* |
| 916 | All uses of av_ are via get_malloc_state(). |
| 917 | At most one "call" to get_malloc_state is made per invocation of |
| 918 | the public versions of malloc and free, but other routines |
| 919 | that in turn invoke malloc and/or free may call more then once. |
| 920 | Also, it is called in check* routines if __UCLIBC_MALLOC_DEBUGGING__ is set. |
| 921 | */ |
| 922 | |
| 923 | #define get_malloc_state() (&(__malloc_state)) |
| 924 | |
| 925 | /* External internal utilities operating on mstates */ |
| 926 | void __malloc_consolidate(mstate) attribute_hidden; |
| 927 | |
| 928 | |
| 929 | /* Debugging support */ |
| 930 | #ifndef __UCLIBC_MALLOC_DEBUGGING__ |
| 931 | |
| 932 | #define check_chunk(P) |
| 933 | #define check_free_chunk(P) |
| 934 | #define check_inuse_chunk(P) |
| 935 | #define check_remalloced_chunk(P,N) |
| 936 | #define check_malloced_chunk(P,N) |
| 937 | #define check_malloc_state() |
| 938 | #define assert(x) ((void)0) |
| 939 | |
| 940 | |
| 941 | #else |
| 942 | |
| 943 | #define check_chunk(P) __do_check_chunk(P) |
| 944 | #define check_free_chunk(P) __do_check_free_chunk(P) |
| 945 | #define check_inuse_chunk(P) __do_check_inuse_chunk(P) |
| 946 | #define check_remalloced_chunk(P,N) __do_check_remalloced_chunk(P,N) |
| 947 | #define check_malloced_chunk(P,N) __do_check_malloced_chunk(P,N) |
| 948 | #define check_malloc_state() __do_check_malloc_state() |
| 949 | |
| 950 | extern void __do_check_chunk(mchunkptr p); |
| 951 | extern void __do_check_free_chunk(mchunkptr p); |
| 952 | extern void __do_check_inuse_chunk(mchunkptr p); |
| 953 | extern void __do_check_remalloced_chunk(mchunkptr p, size_t s); |
| 954 | extern void __do_check_malloced_chunk(mchunkptr p, size_t s); |
| 955 | extern void __do_check_malloc_state(void); |
| 956 | |
| 957 | #include <assert.h> |
| 958 | |
| 959 | #endif |