| /* |
| This is a version (aka dlmalloc) of malloc/free/realloc written by |
| Doug Lea and released to the public domain. Use, modify, and |
| redistribute this code without permission or acknowledgement in any |
| way you wish. Send questions, comments, complaints, performance |
| data, etc to dl@cs.oswego.edu |
| |
| VERSION 2.7.2 Sat Aug 17 09:07:30 2002 Doug Lea (dl at gee) |
| |
| Note: There may be an updated version of this malloc obtainable at |
| ftp://gee.cs.oswego.edu/pub/misc/malloc.c |
| Check before installing! |
| |
| Hacked up for uClibc by Erik Andersen <andersen@codepoet.org> |
| */ |
| |
| #include <features.h> |
| #include <stddef.h> |
| #include <unistd.h> |
| #include <errno.h> |
| #include <string.h> |
| #include <malloc.h> |
| #include <stdlib.h> |
| #include <sys/mman.h> |
| #include <bits/uClibc_mutex.h> |
| |
| |
| |
| __UCLIBC_MUTEX_EXTERN(__malloc_lock); |
| #define __MALLOC_LOCK __UCLIBC_MUTEX_LOCK(__malloc_lock) |
| #define __MALLOC_UNLOCK __UCLIBC_MUTEX_UNLOCK(__malloc_lock) |
| |
| |
| |
| /* |
| MALLOC_ALIGNMENT is the minimum alignment for malloc'ed chunks. |
| It must be a power of two at least 2 * (sizeof(size_t)), even on machines |
| for which smaller alignments would suffice. It may be defined as |
| larger than this though. Note however that code and data structures |
| are optimized for the case of 8-byte alignment. |
| */ |
| #ifndef MALLOC_ALIGNMENT |
| #define MALLOC_ALIGNMENT (2 * (sizeof(size_t))) |
| #endif |
| |
| /* The corresponding bit mask value */ |
| #define MALLOC_ALIGN_MASK (MALLOC_ALIGNMENT - 1) |
| |
| /* |
| TRIM_FASTBINS controls whether free() of a very small chunk can |
| immediately lead to trimming. Setting to true (1) can reduce memory |
| footprint, but will almost always slow down programs that use a lot |
| of small chunks. |
| |
| Define this only if you are willing to give up some speed to more |
| aggressively reduce system-level memory footprint when releasing |
| memory in programs that use many small chunks. You can get |
| essentially the same effect by setting MXFAST to 0, but this can |
| lead to even greater slowdowns in programs using many small chunks. |
| TRIM_FASTBINS is an in-between compile-time option, that disables |
| only those chunks bordering topmost memory from being placed in |
| fastbins. |
| */ |
| #ifndef TRIM_FASTBINS |
| #define TRIM_FASTBINS 0 |
| #endif |
| |
| |
| /* |
| MORECORE-related declarations. By default, rely on sbrk |
| */ |
| |
| |
| /* |
| MORECORE is the name of the routine to call to obtain more memory |
| from the system. See below for general guidance on writing |
| alternative MORECORE functions, as well as a version for WIN32 and a |
| sample version for pre-OSX macos. |
| */ |
| #ifndef MORECORE |
| #define MORECORE sbrk |
| #endif |
| |
| /* |
| MORECORE_FAILURE is the value returned upon failure of MORECORE |
| as well as mmap. Since it cannot be an otherwise valid memory address, |
| and must reflect values of standard sys calls, you probably ought not |
| try to redefine it. |
| */ |
| #ifndef MORECORE_FAILURE |
| #define MORECORE_FAILURE (-1) |
| #endif |
| |
| /* |
| If MORECORE_CONTIGUOUS is true, take advantage of fact that |
| consecutive calls to MORECORE with positive arguments always return |
| contiguous increasing addresses. This is true of unix sbrk. Even |
| if not defined, when regions happen to be contiguous, malloc will |
| permit allocations spanning regions obtained from different |
| calls. But defining this when applicable enables some stronger |
| consistency checks and space efficiencies. |
| */ |
| #ifndef MORECORE_CONTIGUOUS |
| #define MORECORE_CONTIGUOUS 1 |
| #endif |
| |
| /* |
| MMAP_AS_MORECORE_SIZE is the minimum mmap size argument to use if |
| sbrk fails, and mmap is used as a backup (which is done only if |
| HAVE_MMAP). The value must be a multiple of page size. This |
| backup strategy generally applies only when systems have "holes" in |
| address space, so sbrk cannot perform contiguous expansion, but |
| there is still space available on system. On systems for which |
| this is known to be useful (i.e. most linux kernels), this occurs |
| only when programs allocate huge amounts of memory. Between this, |
| and the fact that mmap regions tend to be limited, the size should |
| be large, to avoid too many mmap calls and thus avoid running out |
| of kernel resources. |
| */ |
| #ifndef MMAP_AS_MORECORE_SIZE |
| #define MMAP_AS_MORECORE_SIZE (1024 * 1024) |
| #endif |
| |
| /* |
| The system page size. To the extent possible, this malloc manages |
| memory from the system in page-size units. Note that this value is |
| cached during initialization into a field of malloc_state. So even |
| if malloc_getpagesize is a function, it is only called once. |
| |
| The following mechanics for getpagesize were adapted from bsd/gnu |
| getpagesize.h. If none of the system-probes here apply, a value of |
| 4096 is used, which should be OK: If they don't apply, then using |
| the actual value probably doesn't impact performance. |
| */ |
| #ifndef malloc_getpagesize |
| # include <unistd.h> |
| # define malloc_getpagesize sysconf(_SC_PAGESIZE) |
| #else /* just guess */ |
| # define malloc_getpagesize (4096) |
| #endif |
| |
| |
| /* mallopt tuning options */ |
| |
| /* |
| M_MXFAST is the maximum request size used for "fastbins", special bins |
| that hold returned chunks without consolidating their spaces. This |
| enables future requests for chunks of the same size to be handled |
| very quickly, but can increase fragmentation, and thus increase the |
| overall memory footprint of a program. |
| |
| This malloc manages fastbins very conservatively yet still |
| efficiently, so fragmentation is rarely a problem for values less |
| than or equal to the default. The maximum supported value of MXFAST |
| is 80. You wouldn't want it any higher than this anyway. Fastbins |
| are designed especially for use with many small structs, objects or |
| strings -- the default handles structs/objects/arrays with sizes up |
| to 16 4byte fields, or small strings representing words, tokens, |
| etc. Using fastbins for larger objects normally worsens |
| fragmentation without improving speed. |
| |
| M_MXFAST is set in REQUEST size units. It is internally used in |
| chunksize units, which adds padding and alignment. You can reduce |
| M_MXFAST to 0 to disable all use of fastbins. This causes the malloc |
| algorithm to be a closer approximation of fifo-best-fit in all cases, |
| not just for larger requests, but will generally cause it to be |
| slower. |
| */ |
| |
| |
| /* M_MXFAST is a standard SVID/XPG tuning option, usually listed in malloc.h */ |
| #ifndef M_MXFAST |
| #define M_MXFAST 1 |
| #endif |
| |
| #ifndef DEFAULT_MXFAST |
| #define DEFAULT_MXFAST 64 |
| #endif |
| |
| |
| /* |
| M_TRIM_THRESHOLD is the maximum amount of unused top-most memory |
| to keep before releasing via malloc_trim in free(). |
| |
| Automatic trimming is mainly useful in long-lived programs. |
| Because trimming via sbrk can be slow on some systems, and can |
| sometimes be wasteful (in cases where programs immediately |
| afterward allocate more large chunks) the value should be high |
| enough so that your overall system performance would improve by |
| releasing this much memory. |
| |
| The trim threshold and the mmap control parameters (see below) |
| can be traded off with one another. Trimming and mmapping are |
| two different ways of releasing unused memory back to the |
| system. Between these two, it is often possible to keep |
| system-level demands of a long-lived program down to a bare |
| minimum. For example, in one test suite of sessions measuring |
| the XF86 X server on Linux, using a trim threshold of 128K and a |
| mmap threshold of 192K led to near-minimal long term resource |
| consumption. |
| |
| If you are using this malloc in a long-lived program, it should |
| pay to experiment with these values. As a rough guide, you |
| might set to a value close to the average size of a process |
| (program) running on your system. Releasing this much memory |
| would allow such a process to run in memory. Generally, it's |
| worth it to tune for trimming rather tham memory mapping when a |
| program undergoes phases where several large chunks are |
| allocated and released in ways that can reuse each other's |
| storage, perhaps mixed with phases where there are no such |
| chunks at all. And in well-behaved long-lived programs, |
| controlling release of large blocks via trimming versus mapping |
| is usually faster. |
| |
| However, in most programs, these parameters serve mainly as |
| protection against the system-level effects of carrying around |
| massive amounts of unneeded memory. Since frequent calls to |
| sbrk, mmap, and munmap otherwise degrade performance, the default |
| parameters are set to relatively high values that serve only as |
| safeguards. |
| |
| The trim value must be greater than page size to have any useful |
| effect. To disable trimming completely, you can set to |
| (unsigned long)(-1) |
| |
| Trim settings interact with fastbin (MXFAST) settings: Unless |
| TRIM_FASTBINS is defined, automatic trimming never takes place upon |
| freeing a chunk with size less than or equal to MXFAST. Trimming is |
| instead delayed until subsequent freeing of larger chunks. However, |
| you can still force an attempted trim by calling malloc_trim. |
| |
| Also, trimming is not generally possible in cases where |
| the main arena is obtained via mmap. |
| |
| Note that the trick some people use of mallocing a huge space and |
| then freeing it at program startup, in an attempt to reserve system |
| memory, doesn't have the intended effect under automatic trimming, |
| since that memory will immediately be returned to the system. |
| */ |
| #define M_TRIM_THRESHOLD -1 |
| |
| #ifndef DEFAULT_TRIM_THRESHOLD |
| #define DEFAULT_TRIM_THRESHOLD (256 * 1024) |
| #endif |
| |
| /* |
| M_TOP_PAD is the amount of extra `padding' space to allocate or |
| retain whenever sbrk is called. It is used in two ways internally: |
| |
| * When sbrk is called to extend the top of the arena to satisfy |
| a new malloc request, this much padding is added to the sbrk |
| request. |
| |
| * When malloc_trim is called automatically from free(), |
| it is used as the `pad' argument. |
| |
| In both cases, the actual amount of padding is rounded |
| so that the end of the arena is always a system page boundary. |
| |
| The main reason for using padding is to avoid calling sbrk so |
| often. Having even a small pad greatly reduces the likelihood |
| that nearly every malloc request during program start-up (or |
| after trimming) will invoke sbrk, which needlessly wastes |
| time. |
| |
| Automatic rounding-up to page-size units is normally sufficient |
| to avoid measurable overhead, so the default is 0. However, in |
| systems where sbrk is relatively slow, it can pay to increase |
| this value, at the expense of carrying around more memory than |
| the program needs. |
| */ |
| #define M_TOP_PAD -2 |
| |
| #ifndef DEFAULT_TOP_PAD |
| #define DEFAULT_TOP_PAD (0) |
| #endif |
| |
| /* |
| M_MMAP_THRESHOLD is the request size threshold for using mmap() |
| to service a request. Requests of at least this size that cannot |
| be allocated using already-existing space will be serviced via mmap. |
| (If enough normal freed space already exists it is used instead.) |
| |
| Using mmap segregates relatively large chunks of memory so that |
| they can be individually obtained and released from the host |
| system. A request serviced through mmap is never reused by any |
| other request (at least not directly; the system may just so |
| happen to remap successive requests to the same locations). |
| |
| Segregating space in this way has the benefits that: |
| |
| 1. Mmapped space can ALWAYS be individually released back |
| to the system, which helps keep the system level memory |
| demands of a long-lived program low. |
| 2. Mapped memory can never become `locked' between |
| other chunks, as can happen with normally allocated chunks, which |
| means that even trimming via malloc_trim would not release them. |
| 3. On some systems with "holes" in address spaces, mmap can obtain |
| memory that sbrk cannot. |
| |
| However, it has the disadvantages that: |
| |
| 1. The space cannot be reclaimed, consolidated, and then |
| used to service later requests, as happens with normal chunks. |
| 2. It can lead to more wastage because of mmap page alignment |
| requirements |
| 3. It causes malloc performance to be more dependent on host |
| system memory management support routines which may vary in |
| implementation quality and may impose arbitrary |
| limitations. Generally, servicing a request via normal |
| malloc steps is faster than going through a system's mmap. |
| |
| The advantages of mmap nearly always outweigh disadvantages for |
| "large" chunks, but the value of "large" varies across systems. The |
| default is an empirically derived value that works well in most |
| systems. |
| */ |
| #define M_MMAP_THRESHOLD -3 |
| |
| #ifndef DEFAULT_MMAP_THRESHOLD |
| #define DEFAULT_MMAP_THRESHOLD (256 * 1024) |
| #endif |
| |
| /* |
| M_MMAP_MAX is the maximum number of requests to simultaneously |
| service using mmap. This parameter exists because |
| . Some systems have a limited number of internal tables for |
| use by mmap, and using more than a few of them may degrade |
| performance. |
| |
| The default is set to a value that serves only as a safeguard. |
| Setting to 0 disables use of mmap for servicing large requests. If |
| HAVE_MMAP is not set, the default value is 0, and attempts to set it |
| to non-zero values in mallopt will fail. |
| */ |
| #define M_MMAP_MAX -4 |
| |
| #ifndef DEFAULT_MMAP_MAX |
| #define DEFAULT_MMAP_MAX (65536) |
| #endif |
| |
| |
| /* ------------------ MMAP support ------------------ */ |
| #include <fcntl.h> |
| #include <sys/mman.h> |
| |
| #if !defined(MAP_ANONYMOUS) && defined(MAP_ANON) |
| #define MAP_ANONYMOUS MAP_ANON |
| #endif |
| |
| #ifdef __ARCH_USE_MMU__ |
| # define _MAP_UNINITIALIZE 0 |
| #else |
| # define _MAP_UNINITIALIZE MAP_UNINITIALIZE |
| #endif |
| |
| #define MMAP(addr, size, prot) \ |
| (mmap((addr), (size), (prot), MAP_PRIVATE|MAP_ANONYMOUS|_MAP_UNINITIALIZE, 0, 0)) |
| |
| |
| /* ----------------------- Chunk representations ----------------------- */ |
| |
| |
| /* |
| This struct declaration is misleading (but accurate and necessary). |
| It declares a "view" into memory allowing access to necessary |
| fields at known offsets from a given base. See explanation below. |
| */ |
| |
| struct malloc_chunk { |
| |
| size_t prev_size; /* Size of previous chunk (if free). */ |
| size_t size; /* Size in bytes, including overhead. */ |
| |
| struct malloc_chunk* fd; /* double links -- used only if free. */ |
| struct malloc_chunk* bk; |
| }; |
| |
| |
| typedef struct malloc_chunk* mchunkptr; |
| |
| /* |
| malloc_chunk details: |
| |
| (The following includes lightly edited explanations by Colin Plumb.) |
| |
| Chunks of memory are maintained using a `boundary tag' method as |
| described in e.g., Knuth or Standish. (See the paper by Paul |
| Wilson ftp://ftp.cs.utexas.edu/pub/garbage/allocsrv.ps for a |
| survey of such techniques.) Sizes of free chunks are stored both |
| in the front of each chunk and at the end. This makes |
| consolidating fragmented chunks into bigger chunks very fast. The |
| size fields also hold bits representing whether chunks are free or |
| in use. |
| |
| An allocated chunk looks like this: |
| |
| |
| chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
| | Size of previous chunk, if allocated | | |
| +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
| | Size of chunk, in bytes |P| |
| mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
| | User data starts here... . |
| . . |
| . (malloc_usable_space() bytes) . |
| . | |
| nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
| | Size of chunk | |
| +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
| |
| |
| Where "chunk" is the front of the chunk for the purpose of most of |
| the malloc code, but "mem" is the pointer that is returned to the |
| user. "Nextchunk" is the beginning of the next contiguous chunk. |
| |
| Chunks always begin on even word boundries, so the mem portion |
| (which is returned to the user) is also on an even word boundary, and |
| thus at least double-word aligned. |
| |
| Free chunks are stored in circular doubly-linked lists, and look like this: |
| |
| chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
| | Size of previous chunk | |
| +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
| `head:' | Size of chunk, in bytes |P| |
| mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
| | Forward pointer to next chunk in list | |
| +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
| | Back pointer to previous chunk in list | |
| +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
| | Unused space (may be 0 bytes long) . |
| . . |
| . | |
| nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
| `foot:' | Size of chunk, in bytes | |
| +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
| |
| The P (PREV_INUSE) bit, stored in the unused low-order bit of the |
| chunk size (which is always a multiple of two words), is an in-use |
| bit for the *previous* chunk. If that bit is *clear*, then the |
| word before the current chunk size contains the previous chunk |
| size, and can be used to find the front of the previous chunk. |
| The very first chunk allocated always has this bit set, |
| preventing access to non-existent (or non-owned) memory. If |
| prev_inuse is set for any given chunk, then you CANNOT determine |
| the size of the previous chunk, and might even get a memory |
| addressing fault when trying to do so. |
| |
| Note that the `foot' of the current chunk is actually represented |
| as the prev_size of the NEXT chunk. This makes it easier to |
| deal with alignments etc but can be very confusing when trying |
| to extend or adapt this code. |
| |
| The two exceptions to all this are |
| |
| 1. The special chunk `top' doesn't bother using the |
| trailing size field since there is no next contiguous chunk |
| that would have to index off it. After initialization, `top' |
| is forced to always exist. If it would become less than |
| MINSIZE bytes long, it is replenished. |
| |
| 2. Chunks allocated via mmap, which have the second-lowest-order |
| bit (IS_MMAPPED) set in their size fields. Because they are |
| allocated one-by-one, each must contain its own trailing size field. |
| |
| */ |
| |
| /* |
| ---------- Size and alignment checks and conversions ---------- |
| */ |
| |
| /* conversion from malloc headers to user pointers, and back */ |
| |
| #define chunk2mem(p) ((void*)((char*)(p) + 2*(sizeof(size_t)))) |
| #define mem2chunk(mem) ((mchunkptr)((char*)(mem) - 2*(sizeof(size_t)))) |
| |
| /* The smallest possible chunk */ |
| #define MIN_CHUNK_SIZE (sizeof(struct malloc_chunk)) |
| |
| /* The smallest size we can malloc is an aligned minimal chunk */ |
| |
| #define MINSIZE \ |
| (unsigned long)(((MIN_CHUNK_SIZE+MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK)) |
| |
| /* Check if m has acceptable alignment */ |
| |
| #define aligned_OK(m) (((unsigned long)((m)) & (MALLOC_ALIGN_MASK)) == 0) |
| |
| |
| /* Check if a request is so large that it would wrap around zero when |
| padded and aligned. To simplify some other code, the bound is made |
| low enough so that adding MINSIZE will also not wrap around sero. |
| */ |
| |
| #define REQUEST_OUT_OF_RANGE(req) \ |
| ((unsigned long)(req) >= \ |
| (unsigned long)(size_t)(-2 * MINSIZE)) |
| |
| /* pad request bytes into a usable size -- internal version */ |
| |
| #define request2size(req) \ |
| (((req) + (sizeof(size_t)) + MALLOC_ALIGN_MASK < MINSIZE) ? \ |
| MINSIZE : \ |
| ((req) + (sizeof(size_t)) + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK) |
| |
| /* Same, except also perform argument check */ |
| |
| #define checked_request2size(req, sz) \ |
| if (REQUEST_OUT_OF_RANGE(req)) { \ |
| errno = ENOMEM; \ |
| return 0; \ |
| } \ |
| (sz) = request2size(req); |
| |
| /* |
| --------------- Physical chunk operations --------------- |
| */ |
| |
| |
| /* size field is or'ed with PREV_INUSE when previous adjacent chunk in use */ |
| #define PREV_INUSE 0x1 |
| |
| /* extract inuse bit of previous chunk */ |
| #define prev_inuse(p) ((p)->size & PREV_INUSE) |
| |
| |
| /* size field is or'ed with IS_MMAPPED if the chunk was obtained with mmap() */ |
| #define IS_MMAPPED 0x2 |
| |
| /* check for mmap()'ed chunk */ |
| #define chunk_is_mmapped(p) ((p)->size & IS_MMAPPED) |
| |
| /* Bits to mask off when extracting size |
| |
| Note: IS_MMAPPED is intentionally not masked off from size field in |
| macros for which mmapped chunks should never be seen. This should |
| cause helpful core dumps to occur if it is tried by accident by |
| people extending or adapting this malloc. |
| */ |
| #define SIZE_BITS (PREV_INUSE|IS_MMAPPED) |
| |
| /* Get size, ignoring use bits */ |
| #define chunksize(p) ((p)->size & ~(SIZE_BITS)) |
| |
| |
| /* Ptr to next physical malloc_chunk. */ |
| #define next_chunk(p) ((mchunkptr)( ((char*)(p)) + ((p)->size & ~PREV_INUSE) )) |
| |
| /* Ptr to previous physical malloc_chunk */ |
| #define prev_chunk(p) ((mchunkptr)( ((char*)(p)) - ((p)->prev_size) )) |
| |
| /* Treat space at ptr + offset as a chunk */ |
| #define chunk_at_offset(p, s) ((mchunkptr)(((char*)(p)) + (s))) |
| |
| /* extract p's inuse bit */ |
| #define inuse(p)\ |
| ((((mchunkptr)(((char*)(p))+((p)->size & ~PREV_INUSE)))->size) & PREV_INUSE) |
| |
| /* set/clear chunk as being inuse without otherwise disturbing */ |
| #define set_inuse(p)\ |
| ((mchunkptr)(((char*)(p)) + ((p)->size & ~PREV_INUSE)))->size |= PREV_INUSE |
| |
| #define clear_inuse(p)\ |
| ((mchunkptr)(((char*)(p)) + ((p)->size & ~PREV_INUSE)))->size &= ~(PREV_INUSE) |
| |
| |
| /* check/set/clear inuse bits in known places */ |
| #define inuse_bit_at_offset(p, s)\ |
| (((mchunkptr)(((char*)(p)) + (s)))->size & PREV_INUSE) |
| |
| #define set_inuse_bit_at_offset(p, s)\ |
| (((mchunkptr)(((char*)(p)) + (s)))->size |= PREV_INUSE) |
| |
| #define clear_inuse_bit_at_offset(p, s)\ |
| (((mchunkptr)(((char*)(p)) + (s)))->size &= ~(PREV_INUSE)) |
| |
| |
| /* Set size at head, without disturbing its use bit */ |
| #define set_head_size(p, s) ((p)->size = (((p)->size & PREV_INUSE) | (s))) |
| |
| /* Set size/use field */ |
| #define set_head(p, s) ((p)->size = (s)) |
| |
| /* Set size at footer (only when chunk is not in use) */ |
| #define set_foot(p, s) (((mchunkptr)((char*)(p) + (s)))->prev_size = (s)) |
| |
| |
| /* -------------------- Internal data structures -------------------- */ |
| |
| /* |
| Bins |
| |
| An array of bin headers for free chunks. Each bin is doubly |
| linked. The bins are approximately proportionally (log) spaced. |
| There are a lot of these bins (128). This may look excessive, but |
| works very well in practice. Most bins hold sizes that are |
| unusual as malloc request sizes, but are more usual for fragments |
| and consolidated sets of chunks, which is what these bins hold, so |
| they can be found quickly. All procedures maintain the invariant |
| that no consolidated chunk physically borders another one, so each |
| chunk in a list is known to be preceeded and followed by either |
| inuse chunks or the ends of memory. |
| |
| Chunks in bins are kept in size order, with ties going to the |
| approximately least recently used chunk. Ordering isn't needed |
| for the small bins, which all contain the same-sized chunks, but |
| facilitates best-fit allocation for larger chunks. These lists |
| are just sequential. Keeping them in order almost never requires |
| enough traversal to warrant using fancier ordered data |
| structures. |
| |
| Chunks of the same size are linked with the most |
| recently freed at the front, and allocations are taken from the |
| back. This results in LRU (FIFO) allocation order, which tends |
| to give each chunk an equal opportunity to be consolidated with |
| adjacent freed chunks, resulting in larger free chunks and less |
| fragmentation. |
| |
| To simplify use in double-linked lists, each bin header acts |
| as a malloc_chunk. This avoids special-casing for headers. |
| But to conserve space and improve locality, we allocate |
| only the fd/bk pointers of bins, and then use repositioning tricks |
| to treat these as the fields of a malloc_chunk*. |
| */ |
| |
| typedef struct malloc_chunk* mbinptr; |
| |
| /* addressing -- note that bin_at(0) does not exist */ |
| #define bin_at(m, i) ((mbinptr)((char*)&((m)->bins[(i)<<1]) - ((sizeof(size_t))<<1))) |
| |
| /* analog of ++bin */ |
| #define next_bin(b) ((mbinptr)((char*)(b) + (sizeof(mchunkptr)<<1))) |
| |
| /* Reminders about list directionality within bins */ |
| #define first(b) ((b)->fd) |
| #define last(b) ((b)->bk) |
| |
| /* Take a chunk off a bin list */ |
| #define unlink(P, BK, FD) { \ |
| FD = P->fd; \ |
| BK = P->bk; \ |
| if (FD->bk != P || BK->fd != P) \ |
| abort(); \ |
| FD->bk = BK; \ |
| BK->fd = FD; \ |
| } |
| |
| /* |
| Indexing |
| |
| Bins for sizes < 512 bytes contain chunks of all the same size, spaced |
| 8 bytes apart. Larger bins are approximately logarithmically spaced: |
| |
| 64 bins of size 8 |
| 32 bins of size 64 |
| 16 bins of size 512 |
| 8 bins of size 4096 |
| 4 bins of size 32768 |
| 2 bins of size 262144 |
| 1 bin of size what's left |
| |
| The bins top out around 1MB because we expect to service large |
| requests via mmap. |
| */ |
| |
| #define NBINS 96 |
| #define NSMALLBINS 32 |
| #define SMALLBIN_WIDTH 8 |
| #define MIN_LARGE_SIZE 256 |
| |
| #define in_smallbin_range(sz) \ |
| ((unsigned long)(sz) < (unsigned long)MIN_LARGE_SIZE) |
| |
| #define smallbin_index(sz) (((unsigned)(sz)) >> 3) |
| |
| #define bin_index(sz) \ |
| ((in_smallbin_range(sz)) ? smallbin_index(sz) : __malloc_largebin_index(sz)) |
| |
| /* |
| FIRST_SORTED_BIN_SIZE is the chunk size corresponding to the |
| first bin that is maintained in sorted order. This must |
| be the smallest size corresponding to a given bin. |
| |
| Normally, this should be MIN_LARGE_SIZE. But you can weaken |
| best fit guarantees to sometimes speed up malloc by increasing value. |
| Doing this means that malloc may choose a chunk that is |
| non-best-fitting by up to the width of the bin. |
| |
| Some useful cutoff values: |
| 512 - all bins sorted |
| 2560 - leaves bins <= 64 bytes wide unsorted |
| 12288 - leaves bins <= 512 bytes wide unsorted |
| 65536 - leaves bins <= 4096 bytes wide unsorted |
| 262144 - leaves bins <= 32768 bytes wide unsorted |
| -1 - no bins sorted (not recommended!) |
| */ |
| |
| #define FIRST_SORTED_BIN_SIZE MIN_LARGE_SIZE |
| /* #define FIRST_SORTED_BIN_SIZE 65536 */ |
| |
| /* |
| Unsorted chunks |
| |
| All remainders from chunk splits, as well as all returned chunks, |
| are first placed in the "unsorted" bin. They are then placed |
| in regular bins after malloc gives them ONE chance to be used before |
| binning. So, basically, the unsorted_chunks list acts as a queue, |
| with chunks being placed on it in free (and __malloc_consolidate), |
| and taken off (to be either used or placed in bins) in malloc. |
| */ |
| |
| /* The otherwise unindexable 1-bin is used to hold unsorted chunks. */ |
| #define unsorted_chunks(M) (bin_at(M, 1)) |
| |
| /* |
| Top |
| |
| The top-most available chunk (i.e., the one bordering the end of |
| available memory) is treated specially. It is never included in |
| any bin, is used only if no other chunk is available, and is |
| released back to the system if it is very large (see |
| M_TRIM_THRESHOLD). Because top initially |
| points to its own bin with initial zero size, thus forcing |
| extension on the first malloc request, we avoid having any special |
| code in malloc to check whether it even exists yet. But we still |
| need to do so when getting memory from system, so we make |
| initial_top treat the bin as a legal but unusable chunk during the |
| interval between initialization and the first call to |
| __malloc_alloc. (This is somewhat delicate, since it relies on |
| the 2 preceding words to be zero during this interval as well.) |
| */ |
| |
| /* Conveniently, the unsorted bin can be used as dummy top on first call */ |
| #define initial_top(M) (unsorted_chunks(M)) |
| |
| /* |
| Binmap |
| |
| To help compensate for the large number of bins, a one-level index |
| structure is used for bin-by-bin searching. `binmap' is a |
| bitvector recording whether bins are definitely empty so they can |
| be skipped over during during traversals. The bits are NOT always |
| cleared as soon as bins are empty, but instead only |
| when they are noticed to be empty during traversal in malloc. |
| */ |
| |
| /* Conservatively use 32 bits per map word, even if on 64bit system */ |
| #define BINMAPSHIFT 5 |
| #define BITSPERMAP (1U << BINMAPSHIFT) |
| #define BINMAPSIZE (NBINS / BITSPERMAP) |
| |
| #define idx2block(i) ((i) >> BINMAPSHIFT) |
| #define idx2bit(i) ((1U << ((i) & ((1U << BINMAPSHIFT)-1)))) |
| |
| #define mark_bin(m,i) ((m)->binmap[idx2block(i)] |= idx2bit(i)) |
| #define unmark_bin(m,i) ((m)->binmap[idx2block(i)] &= ~(idx2bit(i))) |
| #define get_binmap(m,i) ((m)->binmap[idx2block(i)] & idx2bit(i)) |
| |
| /* |
| Fastbins |
| |
| An array of lists holding recently freed small chunks. Fastbins |
| are not doubly linked. It is faster to single-link them, and |
| since chunks are never removed from the middles of these lists, |
| double linking is not necessary. Also, unlike regular bins, they |
| are not even processed in FIFO order (they use faster LIFO) since |
| ordering doesn't much matter in the transient contexts in which |
| fastbins are normally used. |
| |
| Chunks in fastbins keep their inuse bit set, so they cannot |
| be consolidated with other free chunks. __malloc_consolidate |
| releases all chunks in fastbins and consolidates them with |
| other free chunks. |
| */ |
| |
| typedef struct malloc_chunk* mfastbinptr; |
| |
| /* offset 2 to use otherwise unindexable first 2 bins */ |
| #define fastbin_index(sz) ((((unsigned int)(sz)) >> 3) - 2) |
| |
| /* The maximum fastbin request size we support */ |
| #define MAX_FAST_SIZE 80 |
| |
| #define NFASTBINS (fastbin_index(request2size(MAX_FAST_SIZE))+1) |
| |
| /* |
| FASTBIN_CONSOLIDATION_THRESHOLD is the size of a chunk in free() |
| that triggers automatic consolidation of possibly-surrounding |
| fastbin chunks. This is a heuristic, so the exact value should not |
| matter too much. It is defined at half the default trim threshold as a |
| compromise heuristic to only attempt consolidation if it is likely |
| to lead to trimming. However, it is not dynamically tunable, since |
| consolidation reduces fragmentation surrounding loarge chunks even |
| if trimming is not used. |
| */ |
| |
| #define FASTBIN_CONSOLIDATION_THRESHOLD \ |
| ((unsigned long)(DEFAULT_TRIM_THRESHOLD) >> 1) |
| |
| /* |
| Since the lowest 2 bits in max_fast don't matter in size comparisons, |
| they are used as flags. |
| */ |
| |
| /* |
| ANYCHUNKS_BIT held in max_fast indicates that there may be any |
| freed chunks at all. It is set true when entering a chunk into any |
| bin. |
| */ |
| |
| #define ANYCHUNKS_BIT (1U) |
| |
| #define have_anychunks(M) (((M)->max_fast & ANYCHUNKS_BIT)) |
| #define set_anychunks(M) ((M)->max_fast |= ANYCHUNKS_BIT) |
| #define clear_anychunks(M) ((M)->max_fast &= ~ANYCHUNKS_BIT) |
| |
| /* |
| FASTCHUNKS_BIT held in max_fast indicates that there are probably |
| some fastbin chunks. It is set true on entering a chunk into any |
| fastbin, and cleared only in __malloc_consolidate. |
| */ |
| |
| #define FASTCHUNKS_BIT (2U) |
| |
| #define have_fastchunks(M) (((M)->max_fast & FASTCHUNKS_BIT)) |
| #define set_fastchunks(M) ((M)->max_fast |= (FASTCHUNKS_BIT|ANYCHUNKS_BIT)) |
| #define clear_fastchunks(M) ((M)->max_fast &= ~(FASTCHUNKS_BIT)) |
| |
| /* Set value of max_fast. Use impossibly small value if 0. */ |
| #define set_max_fast(M, s) \ |
| (M)->max_fast = (((s) == 0)? SMALLBIN_WIDTH: request2size(s)) | \ |
| ((M)->max_fast & (FASTCHUNKS_BIT|ANYCHUNKS_BIT)) |
| |
| #define get_max_fast(M) \ |
| ((M)->max_fast & ~(FASTCHUNKS_BIT | ANYCHUNKS_BIT)) |
| |
| |
| /* |
| morecore_properties is a status word holding dynamically discovered |
| or controlled properties of the morecore function |
| */ |
| |
| #define MORECORE_CONTIGUOUS_BIT (1U) |
| |
| #define contiguous(M) \ |
| (((M)->morecore_properties & MORECORE_CONTIGUOUS_BIT)) |
| #define noncontiguous(M) \ |
| (((M)->morecore_properties & MORECORE_CONTIGUOUS_BIT) == 0) |
| #define set_contiguous(M) \ |
| ((M)->morecore_properties |= MORECORE_CONTIGUOUS_BIT) |
| #define set_noncontiguous(M) \ |
| ((M)->morecore_properties &= ~MORECORE_CONTIGUOUS_BIT) |
| |
| |
| /* |
| ----------- Internal state representation and initialization ----------- |
| */ |
| |
| struct malloc_state { |
| |
| /* The maximum chunk size to be eligible for fastbin */ |
| size_t max_fast; /* low 2 bits used as flags */ |
| |
| /* Fastbins */ |
| mfastbinptr fastbins[NFASTBINS]; |
| |
| /* Base of the topmost chunk -- not otherwise kept in a bin */ |
| mchunkptr top; |
| |
| /* The remainder from the most recent split of a small request */ |
| mchunkptr last_remainder; |
| |
| /* Normal bins packed as described above */ |
| mchunkptr bins[NBINS * 2]; |
| |
| /* Bitmap of bins. Trailing zero map handles cases of largest binned size */ |
| unsigned int binmap[BINMAPSIZE+1]; |
| |
| /* Tunable parameters */ |
| unsigned long trim_threshold; |
| size_t top_pad; |
| size_t mmap_threshold; |
| |
| /* Memory map support */ |
| int n_mmaps; |
| int n_mmaps_max; |
| int max_n_mmaps; |
| |
| /* Cache malloc_getpagesize */ |
| unsigned int pagesize; |
| |
| /* Track properties of MORECORE */ |
| unsigned int morecore_properties; |
| |
| /* Statistics */ |
| size_t mmapped_mem; |
| size_t sbrked_mem; |
| size_t max_sbrked_mem; |
| size_t max_mmapped_mem; |
| size_t max_total_mem; |
| }; |
| |
| typedef struct malloc_state *mstate; |
| |
| /* |
| There is exactly one instance of this struct in this malloc. |
| If you are adapting this malloc in a way that does NOT use a static |
| malloc_state, you MUST explicitly zero-fill it before using. This |
| malloc relies on the property that malloc_state is initialized to |
| all zeroes (as is true of C statics). |
| */ |
| extern struct malloc_state __malloc_state; /* never directly referenced */ |
| |
| /* |
| All uses of av_ are via get_malloc_state(). |
| At most one "call" to get_malloc_state is made per invocation of |
| the public versions of malloc and free, but other routines |
| that in turn invoke malloc and/or free may call more then once. |
| Also, it is called in check* routines if __UCLIBC_MALLOC_DEBUGGING__ is set. |
| */ |
| |
| #define get_malloc_state() (&(__malloc_state)) |
| |
| /* External internal utilities operating on mstates */ |
| void __malloc_consolidate(mstate) attribute_hidden; |
| |
| |
| /* Debugging support */ |
| #ifndef __UCLIBC_MALLOC_DEBUGGING__ |
| |
| #define check_chunk(P) |
| #define check_free_chunk(P) |
| #define check_inuse_chunk(P) |
| #define check_remalloced_chunk(P,N) |
| #define check_malloced_chunk(P,N) |
| #define check_malloc_state() |
| #define assert(x) ((void)0) |
| |
| |
| #else |
| |
| #define check_chunk(P) __do_check_chunk(P) |
| #define check_free_chunk(P) __do_check_free_chunk(P) |
| #define check_inuse_chunk(P) __do_check_inuse_chunk(P) |
| #define check_remalloced_chunk(P,N) __do_check_remalloced_chunk(P,N) |
| #define check_malloced_chunk(P,N) __do_check_malloced_chunk(P,N) |
| #define check_malloc_state() __do_check_malloc_state() |
| |
| extern void __do_check_chunk(mchunkptr p); |
| extern void __do_check_free_chunk(mchunkptr p); |
| extern void __do_check_inuse_chunk(mchunkptr p); |
| extern void __do_check_remalloced_chunk(mchunkptr p, size_t s); |
| extern void __do_check_malloced_chunk(mchunkptr p, size_t s); |
| extern void __do_check_malloc_state(void); |
| |
| #include <assert.h> |
| |
| #endif |