| lh | 9ed821d | 2023-04-07 01:36:19 -0700 | [diff] [blame] | 1 | /* | 
|  | 2 | *  bootmem - A boot-time physical memory allocator and configurator | 
|  | 3 | * | 
|  | 4 | *  Copyright (C) 1999 Ingo Molnar | 
|  | 5 | *                1999 Kanoj Sarcar, SGI | 
|  | 6 | *                2008 Johannes Weiner | 
|  | 7 | * | 
|  | 8 | * Access to this subsystem has to be serialized externally (which is true | 
|  | 9 | * for the boot process anyway). | 
|  | 10 | */ | 
|  | 11 | #include <linux/init.h> | 
|  | 12 | #include <linux/pfn.h> | 
|  | 13 | #include <linux/slab.h> | 
|  | 14 | #include <linux/bootmem.h> | 
|  | 15 | #include <linux/export.h> | 
|  | 16 | #include <linux/kmemleak.h> | 
|  | 17 | #include <linux/range.h> | 
|  | 18 | #include <linux/memblock.h> | 
|  | 19 |  | 
|  | 20 | #include <asm/bug.h> | 
|  | 21 | #include <asm/io.h> | 
|  | 22 | #include <asm/processor.h> | 
|  | 23 |  | 
|  | 24 | #include "internal.h" | 
|  | 25 |  | 
|  | 26 | #ifndef CONFIG_NEED_MULTIPLE_NODES | 
|  | 27 | struct pglist_data __refdata contig_page_data = { | 
|  | 28 | .bdata = &bootmem_node_data[0] | 
|  | 29 | }; | 
|  | 30 | EXPORT_SYMBOL(contig_page_data); | 
|  | 31 | #endif | 
|  | 32 |  | 
|  | 33 | unsigned long max_low_pfn; | 
|  | 34 | unsigned long min_low_pfn; | 
|  | 35 | unsigned long max_pfn; | 
|  | 36 |  | 
|  | 37 | bootmem_data_t bootmem_node_data[MAX_NUMNODES] __initdata; | 
|  | 38 |  | 
|  | 39 | static struct list_head bdata_list __initdata = LIST_HEAD_INIT(bdata_list); | 
|  | 40 |  | 
|  | 41 | static int bootmem_debug; | 
|  | 42 |  | 
|  | 43 | static int __init bootmem_debug_setup(char *buf) | 
|  | 44 | { | 
|  | 45 | bootmem_debug = 1; | 
|  | 46 | return 0; | 
|  | 47 | } | 
|  | 48 | early_param("bootmem_debug", bootmem_debug_setup); | 
|  | 49 |  | 
|  | 50 | #define bdebug(fmt, args...) ({				\ | 
|  | 51 | if (unlikely(bootmem_debug))			\ | 
|  | 52 | printk(KERN_INFO			\ | 
|  | 53 | "bootmem::%s " fmt,		\ | 
|  | 54 | __func__, ## args);		\ | 
|  | 55 | }) | 
|  | 56 |  | 
|  | 57 | static unsigned long __init bootmap_bytes(unsigned long pages) | 
|  | 58 | { | 
|  | 59 | unsigned long bytes = DIV_ROUND_UP(pages, 8); | 
|  | 60 |  | 
|  | 61 | return ALIGN(bytes, sizeof(long)); | 
|  | 62 | } | 
|  | 63 |  | 
|  | 64 | /** | 
|  | 65 | * bootmem_bootmap_pages - calculate bitmap size in pages | 
|  | 66 | * @pages: number of pages the bitmap has to represent | 
|  | 67 | */ | 
|  | 68 | unsigned long __init bootmem_bootmap_pages(unsigned long pages) | 
|  | 69 | { | 
|  | 70 | unsigned long bytes = bootmap_bytes(pages); | 
|  | 71 |  | 
|  | 72 | return PAGE_ALIGN(bytes) >> PAGE_SHIFT; | 
|  | 73 | } | 
|  | 74 |  | 
|  | 75 | /* | 
|  | 76 | * link bdata in order | 
|  | 77 | */ | 
|  | 78 | static void __init link_bootmem(bootmem_data_t *bdata) | 
|  | 79 | { | 
|  | 80 | struct list_head *iter; | 
|  | 81 |  | 
|  | 82 | list_for_each(iter, &bdata_list) { | 
|  | 83 | bootmem_data_t *ent; | 
|  | 84 |  | 
|  | 85 | ent = list_entry(iter, bootmem_data_t, list); | 
|  | 86 | if (bdata->node_min_pfn < ent->node_min_pfn) | 
|  | 87 | break; | 
|  | 88 | } | 
|  | 89 | list_add_tail(&bdata->list, iter); | 
|  | 90 | } | 
|  | 91 |  | 
|  | 92 | /* | 
|  | 93 | * Called once to set up the allocator itself. | 
|  | 94 | */ | 
|  | 95 | static unsigned long __init init_bootmem_core(bootmem_data_t *bdata, | 
|  | 96 | unsigned long mapstart, unsigned long start, unsigned long end) | 
|  | 97 | { | 
|  | 98 | unsigned long mapsize; | 
|  | 99 |  | 
|  | 100 | mminit_validate_memmodel_limits(&start, &end); | 
|  | 101 | bdata->node_bootmem_map = phys_to_virt(PFN_PHYS(mapstart)); | 
|  | 102 | bdata->node_min_pfn = start; | 
|  | 103 | bdata->node_low_pfn = end; | 
|  | 104 | link_bootmem(bdata); | 
|  | 105 |  | 
|  | 106 | /* | 
|  | 107 | * Initially all pages are reserved - setup_arch() has to | 
|  | 108 | * register free RAM areas explicitly. | 
|  | 109 | */ | 
|  | 110 | mapsize = bootmap_bytes(end - start); | 
|  | 111 | memset(bdata->node_bootmem_map, 0xff, mapsize); | 
|  | 112 |  | 
|  | 113 | bdebug("nid=%td start=%lx map=%lx end=%lx mapsize=%lx\n", | 
|  | 114 | bdata - bootmem_node_data, start, mapstart, end, mapsize); | 
|  | 115 |  | 
|  | 116 | return mapsize; | 
|  | 117 | } | 
|  | 118 |  | 
|  | 119 | /** | 
|  | 120 | * init_bootmem_node - register a node as boot memory | 
|  | 121 | * @pgdat: node to register | 
|  | 122 | * @freepfn: pfn where the bitmap for this node is to be placed | 
|  | 123 | * @startpfn: first pfn on the node | 
|  | 124 | * @endpfn: first pfn after the node | 
|  | 125 | * | 
|  | 126 | * Returns the number of bytes needed to hold the bitmap for this node. | 
|  | 127 | */ | 
|  | 128 | unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn, | 
|  | 129 | unsigned long startpfn, unsigned long endpfn) | 
|  | 130 | { | 
|  | 131 | return init_bootmem_core(pgdat->bdata, freepfn, startpfn, endpfn); | 
|  | 132 | } | 
|  | 133 |  | 
|  | 134 | /** | 
|  | 135 | * init_bootmem - register boot memory | 
|  | 136 | * @start: pfn where the bitmap is to be placed | 
|  | 137 | * @pages: number of available physical pages | 
|  | 138 | * | 
|  | 139 | * Returns the number of bytes needed to hold the bitmap. | 
|  | 140 | */ | 
|  | 141 | unsigned long __init init_bootmem(unsigned long start, unsigned long pages) | 
|  | 142 | { | 
|  | 143 | max_low_pfn = pages; | 
|  | 144 | min_low_pfn = start; | 
|  | 145 | return init_bootmem_core(NODE_DATA(0)->bdata, start, 0, pages); | 
|  | 146 | } | 
|  | 147 |  | 
|  | 148 | /* | 
|  | 149 | * free_bootmem_late - free bootmem pages directly to page allocator | 
|  | 150 | * @addr: starting address of the range | 
|  | 151 | * @size: size of the range in bytes | 
|  | 152 | * | 
|  | 153 | * This is only useful when the bootmem allocator has already been torn | 
|  | 154 | * down, but we are still initializing the system.  Pages are given directly | 
|  | 155 | * to the page allocator, no bootmem metadata is updated because it is gone. | 
|  | 156 | */ | 
|  | 157 | void __init free_bootmem_late(unsigned long addr, unsigned long size) | 
|  | 158 | { | 
|  | 159 | unsigned long cursor, end; | 
|  | 160 |  | 
|  | 161 | kmemleak_free_part(__va(addr), size); | 
|  | 162 |  | 
|  | 163 | cursor = PFN_UP(addr); | 
|  | 164 | end = PFN_DOWN(addr + size); | 
|  | 165 |  | 
|  | 166 | for (; cursor < end; cursor++) { | 
|  | 167 | __free_pages_bootmem(pfn_to_page(cursor), 0); | 
|  | 168 | totalram_pages++; | 
|  | 169 | } | 
|  | 170 | } | 
|  | 171 |  | 
|  | 172 | static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) | 
|  | 173 | { | 
|  | 174 | struct page *page; | 
|  | 175 | unsigned long start, end, pages, count = 0; | 
|  | 176 |  | 
|  | 177 | if (!bdata->node_bootmem_map) | 
|  | 178 | return 0; | 
|  | 179 |  | 
|  | 180 | start = bdata->node_min_pfn; | 
|  | 181 | end = bdata->node_low_pfn; | 
|  | 182 |  | 
|  | 183 | bdebug("nid=%td start=%lx end=%lx\n", | 
|  | 184 | bdata - bootmem_node_data, start, end); | 
|  | 185 |  | 
|  | 186 | while (start < end) { | 
|  | 187 | unsigned long *map, idx, vec; | 
|  | 188 |  | 
|  | 189 | map = bdata->node_bootmem_map; | 
|  | 190 | idx = start - bdata->node_min_pfn; | 
|  | 191 | vec = ~map[idx / BITS_PER_LONG]; | 
|  | 192 | /* | 
|  | 193 | * If we have a properly aligned and fully unreserved | 
|  | 194 | * BITS_PER_LONG block of pages in front of us, free | 
|  | 195 | * it in one go. | 
|  | 196 | */ | 
|  | 197 | if (IS_ALIGNED(start, BITS_PER_LONG) && vec == ~0UL) { | 
|  | 198 | int order = ilog2(BITS_PER_LONG); | 
|  | 199 |  | 
|  | 200 | __free_pages_bootmem(pfn_to_page(start), order); | 
|  | 201 | count += BITS_PER_LONG; | 
|  | 202 | start += BITS_PER_LONG; | 
|  | 203 | } else { | 
|  | 204 | unsigned long off = 0; | 
|  | 205 |  | 
|  | 206 | while (vec && off < BITS_PER_LONG) { | 
|  | 207 | if (vec & 1) { | 
|  | 208 | page = pfn_to_page(start + off); | 
|  | 209 | __free_pages_bootmem(page, 0); | 
|  | 210 | count++; | 
|  | 211 | } | 
|  | 212 | vec >>= 1; | 
|  | 213 | off++; | 
|  | 214 | } | 
|  | 215 | start = ALIGN(start + 1, BITS_PER_LONG); | 
|  | 216 | } | 
|  | 217 | } | 
|  | 218 |  | 
|  | 219 | page = virt_to_page(bdata->node_bootmem_map); | 
|  | 220 | pages = bdata->node_low_pfn - bdata->node_min_pfn; | 
|  | 221 | pages = bootmem_bootmap_pages(pages); | 
|  | 222 | count += pages; | 
|  | 223 | while (pages--) | 
|  | 224 | __free_pages_bootmem(page++, 0); | 
|  | 225 |  | 
|  | 226 | bdebug("nid=%td released=%lx\n", bdata - bootmem_node_data, count); | 
|  | 227 |  | 
|  | 228 | return count; | 
|  | 229 | } | 
|  | 230 |  | 
|  | 231 | /** | 
|  | 232 | * free_all_bootmem_node - release a node's free pages to the buddy allocator | 
|  | 233 | * @pgdat: node to be released | 
|  | 234 | * | 
|  | 235 | * Returns the number of pages actually released. | 
|  | 236 | */ | 
|  | 237 | unsigned long __init free_all_bootmem_node(pg_data_t *pgdat) | 
|  | 238 | { | 
|  | 239 | register_page_bootmem_info_node(pgdat); | 
|  | 240 | return free_all_bootmem_core(pgdat->bdata); | 
|  | 241 | } | 
|  | 242 |  | 
|  | 243 | /** | 
|  | 244 | * free_all_bootmem - release free pages to the buddy allocator | 
|  | 245 | * | 
|  | 246 | * Returns the number of pages actually released. | 
|  | 247 | */ | 
|  | 248 | unsigned long __init free_all_bootmem(void) | 
|  | 249 | { | 
|  | 250 | unsigned long total_pages = 0; | 
|  | 251 | bootmem_data_t *bdata; | 
|  | 252 |  | 
|  | 253 | list_for_each_entry(bdata, &bdata_list, list) | 
|  | 254 | total_pages += free_all_bootmem_core(bdata); | 
|  | 255 |  | 
|  | 256 | return total_pages; | 
|  | 257 | } | 
|  | 258 |  | 
|  | 259 | static void __init __free(bootmem_data_t *bdata, | 
|  | 260 | unsigned long sidx, unsigned long eidx) | 
|  | 261 | { | 
|  | 262 | unsigned long idx; | 
|  | 263 |  | 
|  | 264 | bdebug("nid=%td start=%lx end=%lx\n", bdata - bootmem_node_data, | 
|  | 265 | sidx + bdata->node_min_pfn, | 
|  | 266 | eidx + bdata->node_min_pfn); | 
|  | 267 |  | 
|  | 268 | if (bdata->hint_idx > sidx) | 
|  | 269 | bdata->hint_idx = sidx; | 
|  | 270 |  | 
|  | 271 | for (idx = sidx; idx < eidx; idx++) | 
|  | 272 | if (!test_and_clear_bit(idx, bdata->node_bootmem_map)) | 
|  | 273 | BUG(); | 
|  | 274 | } | 
|  | 275 |  | 
|  | 276 | static int __init __reserve(bootmem_data_t *bdata, unsigned long sidx, | 
|  | 277 | unsigned long eidx, int flags) | 
|  | 278 | { | 
|  | 279 | unsigned long idx; | 
|  | 280 | int exclusive = flags & BOOTMEM_EXCLUSIVE; | 
|  | 281 |  | 
|  | 282 | bdebug("nid=%td start=%lx end=%lx flags=%x\n", | 
|  | 283 | bdata - bootmem_node_data, | 
|  | 284 | sidx + bdata->node_min_pfn, | 
|  | 285 | eidx + bdata->node_min_pfn, | 
|  | 286 | flags); | 
|  | 287 |  | 
|  | 288 | for (idx = sidx; idx < eidx; idx++) | 
|  | 289 | if (test_and_set_bit(idx, bdata->node_bootmem_map)) { | 
|  | 290 | if (exclusive) { | 
|  | 291 | __free(bdata, sidx, idx); | 
|  | 292 | return -EBUSY; | 
|  | 293 | } | 
|  | 294 | bdebug("silent double reserve of PFN %lx\n", | 
|  | 295 | idx + bdata->node_min_pfn); | 
|  | 296 | } | 
|  | 297 | return 0; | 
|  | 298 | } | 
|  | 299 |  | 
|  | 300 | static int __init mark_bootmem_node(bootmem_data_t *bdata, | 
|  | 301 | unsigned long start, unsigned long end, | 
|  | 302 | int reserve, int flags) | 
|  | 303 | { | 
|  | 304 | unsigned long sidx, eidx; | 
|  | 305 |  | 
|  | 306 | bdebug("nid=%td start=%lx end=%lx reserve=%d flags=%x\n", | 
|  | 307 | bdata - bootmem_node_data, start, end, reserve, flags); | 
|  | 308 |  | 
|  | 309 | BUG_ON(start < bdata->node_min_pfn); | 
|  | 310 | BUG_ON(end > bdata->node_low_pfn); | 
|  | 311 |  | 
|  | 312 | sidx = start - bdata->node_min_pfn; | 
|  | 313 | eidx = end - bdata->node_min_pfn; | 
|  | 314 |  | 
|  | 315 | if (reserve) | 
|  | 316 | return __reserve(bdata, sidx, eidx, flags); | 
|  | 317 | else | 
|  | 318 | __free(bdata, sidx, eidx); | 
|  | 319 | return 0; | 
|  | 320 | } | 
|  | 321 |  | 
|  | 322 | static int __init mark_bootmem(unsigned long start, unsigned long end, | 
|  | 323 | int reserve, int flags) | 
|  | 324 | { | 
|  | 325 | unsigned long pos; | 
|  | 326 | bootmem_data_t *bdata; | 
|  | 327 |  | 
|  | 328 | pos = start; | 
|  | 329 | list_for_each_entry(bdata, &bdata_list, list) { | 
|  | 330 | int err; | 
|  | 331 | unsigned long max; | 
|  | 332 |  | 
|  | 333 | if (pos < bdata->node_min_pfn || | 
|  | 334 | pos >= bdata->node_low_pfn) { | 
|  | 335 | BUG_ON(pos != start); | 
|  | 336 | continue; | 
|  | 337 | } | 
|  | 338 |  | 
|  | 339 | max = min(bdata->node_low_pfn, end); | 
|  | 340 |  | 
|  | 341 | err = mark_bootmem_node(bdata, pos, max, reserve, flags); | 
|  | 342 | if (reserve && err) { | 
|  | 343 | mark_bootmem(start, pos, 0, 0); | 
|  | 344 | return err; | 
|  | 345 | } | 
|  | 346 |  | 
|  | 347 | if (max == end) | 
|  | 348 | return 0; | 
|  | 349 | pos = bdata->node_low_pfn; | 
|  | 350 | } | 
|  | 351 | BUG(); | 
|  | 352 | } | 
|  | 353 |  | 
|  | 354 | /** | 
|  | 355 | * free_bootmem_node - mark a page range as usable | 
|  | 356 | * @pgdat: node the range resides on | 
|  | 357 | * @physaddr: starting address of the range | 
|  | 358 | * @size: size of the range in bytes | 
|  | 359 | * | 
|  | 360 | * Partial pages will be considered reserved and left as they are. | 
|  | 361 | * | 
|  | 362 | * The range must reside completely on the specified node. | 
|  | 363 | */ | 
|  | 364 | void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, | 
|  | 365 | unsigned long size) | 
|  | 366 | { | 
|  | 367 | unsigned long start, end; | 
|  | 368 |  | 
|  | 369 | kmemleak_free_part(__va(physaddr), size); | 
|  | 370 |  | 
|  | 371 | start = PFN_UP(physaddr); | 
|  | 372 | end = PFN_DOWN(physaddr + size); | 
|  | 373 |  | 
|  | 374 | mark_bootmem_node(pgdat->bdata, start, end, 0, 0); | 
|  | 375 | } | 
|  | 376 |  | 
|  | 377 | /** | 
|  | 378 | * free_bootmem - mark a page range as usable | 
|  | 379 | * @addr: starting address of the range | 
|  | 380 | * @size: size of the range in bytes | 
|  | 381 | * | 
|  | 382 | * Partial pages will be considered reserved and left as they are. | 
|  | 383 | * | 
|  | 384 | * The range must be contiguous but may span node boundaries. | 
|  | 385 | */ | 
|  | 386 | void __init free_bootmem(unsigned long addr, unsigned long size) | 
|  | 387 | { | 
|  | 388 | unsigned long start, end; | 
|  | 389 |  | 
|  | 390 | kmemleak_free_part(__va(addr), size); | 
|  | 391 |  | 
|  | 392 | start = PFN_UP(addr); | 
|  | 393 | end = PFN_DOWN(addr + size); | 
|  | 394 |  | 
|  | 395 | mark_bootmem(start, end, 0, 0); | 
|  | 396 | } | 
|  | 397 |  | 
|  | 398 | /** | 
|  | 399 | * reserve_bootmem_node - mark a page range as reserved | 
|  | 400 | * @pgdat: node the range resides on | 
|  | 401 | * @physaddr: starting address of the range | 
|  | 402 | * @size: size of the range in bytes | 
|  | 403 | * @flags: reservation flags (see linux/bootmem.h) | 
|  | 404 | * | 
|  | 405 | * Partial pages will be reserved. | 
|  | 406 | * | 
|  | 407 | * The range must reside completely on the specified node. | 
|  | 408 | */ | 
|  | 409 | int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, | 
|  | 410 | unsigned long size, int flags) | 
|  | 411 | { | 
|  | 412 | unsigned long start, end; | 
|  | 413 |  | 
|  | 414 | start = PFN_DOWN(physaddr); | 
|  | 415 | end = PFN_UP(physaddr + size); | 
|  | 416 |  | 
|  | 417 | return mark_bootmem_node(pgdat->bdata, start, end, 1, flags); | 
|  | 418 | } | 
|  | 419 |  | 
|  | 420 | /** | 
|  | 421 | * reserve_bootmem - mark a page range as usable | 
|  | 422 | * @addr: starting address of the range | 
|  | 423 | * @size: size of the range in bytes | 
|  | 424 | * @flags: reservation flags (see linux/bootmem.h) | 
|  | 425 | * | 
|  | 426 | * Partial pages will be reserved. | 
|  | 427 | * | 
|  | 428 | * The range must be contiguous but may span node boundaries. | 
|  | 429 | */ | 
|  | 430 | int __init reserve_bootmem(unsigned long addr, unsigned long size, | 
|  | 431 | int flags) | 
|  | 432 | { | 
|  | 433 | unsigned long start, end; | 
|  | 434 |  | 
|  | 435 | start = PFN_DOWN(addr); | 
|  | 436 | end = PFN_UP(addr + size); | 
|  | 437 |  | 
|  | 438 | return mark_bootmem(start, end, 1, flags); | 
|  | 439 | } | 
|  | 440 |  | 
|  | 441 | int __weak __init reserve_bootmem_generic(unsigned long phys, unsigned long len, | 
|  | 442 | int flags) | 
|  | 443 | { | 
|  | 444 | return reserve_bootmem(phys, len, flags); | 
|  | 445 | } | 
|  | 446 |  | 
|  | 447 | static unsigned long __init align_idx(struct bootmem_data *bdata, | 
|  | 448 | unsigned long idx, unsigned long step) | 
|  | 449 | { | 
|  | 450 | unsigned long base = bdata->node_min_pfn; | 
|  | 451 |  | 
|  | 452 | /* | 
|  | 453 | * Align the index with respect to the node start so that the | 
|  | 454 | * combination of both satisfies the requested alignment. | 
|  | 455 | */ | 
|  | 456 |  | 
|  | 457 | return ALIGN(base + idx, step) - base; | 
|  | 458 | } | 
|  | 459 |  | 
|  | 460 | static unsigned long __init align_off(struct bootmem_data *bdata, | 
|  | 461 | unsigned long off, unsigned long align) | 
|  | 462 | { | 
|  | 463 | unsigned long base = PFN_PHYS(bdata->node_min_pfn); | 
|  | 464 |  | 
|  | 465 | /* Same as align_idx for byte offsets */ | 
|  | 466 |  | 
|  | 467 | return ALIGN(base + off, align) - base; | 
|  | 468 | } | 
|  | 469 |  | 
|  | 470 | static void * __init alloc_bootmem_core(struct bootmem_data *bdata, | 
|  | 471 | unsigned long size, unsigned long align, | 
|  | 472 | unsigned long goal, unsigned long limit) | 
|  | 473 | { | 
|  | 474 | unsigned long fallback = 0; | 
|  | 475 | unsigned long min, max, start, sidx, midx, step; | 
|  | 476 |  | 
|  | 477 | bdebug("nid=%td size=%lx [%lu pages] align=%lx goal=%lx limit=%lx\n", | 
|  | 478 | bdata - bootmem_node_data, size, PAGE_ALIGN(size) >> PAGE_SHIFT, | 
|  | 479 | align, goal, limit); | 
|  | 480 |  | 
|  | 481 | BUG_ON(!size); | 
|  | 482 | BUG_ON(align & (align - 1)); | 
|  | 483 | BUG_ON(limit && goal + size > limit); | 
|  | 484 |  | 
|  | 485 | if (!bdata->node_bootmem_map) | 
|  | 486 | return NULL; | 
|  | 487 |  | 
|  | 488 | min = bdata->node_min_pfn; | 
|  | 489 | max = bdata->node_low_pfn; | 
|  | 490 |  | 
|  | 491 | goal >>= PAGE_SHIFT; | 
|  | 492 | limit >>= PAGE_SHIFT; | 
|  | 493 |  | 
|  | 494 | if (limit && max > limit) | 
|  | 495 | max = limit; | 
|  | 496 | if (max <= min) | 
|  | 497 | return NULL; | 
|  | 498 |  | 
|  | 499 | step = max(align >> PAGE_SHIFT, 1UL); | 
|  | 500 |  | 
|  | 501 | if (goal && min < goal && goal < max) | 
|  | 502 | start = ALIGN(goal, step); | 
|  | 503 | else | 
|  | 504 | start = ALIGN(min, step); | 
|  | 505 |  | 
|  | 506 | sidx = start - bdata->node_min_pfn; | 
|  | 507 | midx = max - bdata->node_min_pfn; | 
|  | 508 |  | 
|  | 509 | if (bdata->hint_idx > sidx) { | 
|  | 510 | /* | 
|  | 511 | * Handle the valid case of sidx being zero and still | 
|  | 512 | * catch the fallback below. | 
|  | 513 | */ | 
|  | 514 | fallback = sidx + 1; | 
|  | 515 | sidx = align_idx(bdata, bdata->hint_idx, step); | 
|  | 516 | } | 
|  | 517 |  | 
|  | 518 | while (1) { | 
|  | 519 | int merge; | 
|  | 520 | void *region; | 
|  | 521 | unsigned long eidx, i, start_off, end_off; | 
|  | 522 | find_block: | 
|  | 523 | sidx = find_next_zero_bit(bdata->node_bootmem_map, midx, sidx); | 
|  | 524 | sidx = align_idx(bdata, sidx, step); | 
|  | 525 | eidx = sidx + PFN_UP(size); | 
|  | 526 |  | 
|  | 527 | if (sidx >= midx || eidx > midx) | 
|  | 528 | break; | 
|  | 529 |  | 
|  | 530 | for (i = sidx; i < eidx; i++) | 
|  | 531 | if (test_bit(i, bdata->node_bootmem_map)) { | 
|  | 532 | sidx = align_idx(bdata, i, step); | 
|  | 533 | if (sidx == i) | 
|  | 534 | sidx += step; | 
|  | 535 | goto find_block; | 
|  | 536 | } | 
|  | 537 |  | 
|  | 538 | if (bdata->last_end_off & (PAGE_SIZE - 1) && | 
|  | 539 | PFN_DOWN(bdata->last_end_off) + 1 == sidx) | 
|  | 540 | start_off = align_off(bdata, bdata->last_end_off, align); | 
|  | 541 | else | 
|  | 542 | start_off = PFN_PHYS(sidx); | 
|  | 543 |  | 
|  | 544 | merge = PFN_DOWN(start_off) < sidx; | 
|  | 545 | end_off = start_off + size; | 
|  | 546 |  | 
|  | 547 | bdata->last_end_off = end_off; | 
|  | 548 | bdata->hint_idx = PFN_UP(end_off); | 
|  | 549 |  | 
|  | 550 | /* | 
|  | 551 | * Reserve the area now: | 
|  | 552 | */ | 
|  | 553 | if (__reserve(bdata, PFN_DOWN(start_off) + merge, | 
|  | 554 | PFN_UP(end_off), BOOTMEM_EXCLUSIVE)) | 
|  | 555 | BUG(); | 
|  | 556 |  | 
|  | 557 | region = phys_to_virt(PFN_PHYS(bdata->node_min_pfn) + | 
|  | 558 | start_off); | 
|  | 559 | memset(region, 0, size); | 
|  | 560 | /* | 
|  | 561 | * The min_count is set to 0 so that bootmem allocated blocks | 
|  | 562 | * are never reported as leaks. | 
|  | 563 | */ | 
|  | 564 | kmemleak_alloc(region, size, 0, 0); | 
|  | 565 | return region; | 
|  | 566 | } | 
|  | 567 |  | 
|  | 568 | if (fallback) { | 
|  | 569 | sidx = align_idx(bdata, fallback - 1, step); | 
|  | 570 | fallback = 0; | 
|  | 571 | goto find_block; | 
|  | 572 | } | 
|  | 573 |  | 
|  | 574 | return NULL; | 
|  | 575 | } | 
|  | 576 |  | 
|  | 577 | static void * __init alloc_arch_preferred_bootmem(bootmem_data_t *bdata, | 
|  | 578 | unsigned long size, unsigned long align, | 
|  | 579 | unsigned long goal, unsigned long limit) | 
|  | 580 | { | 
|  | 581 | if (WARN_ON_ONCE(slab_is_available())) | 
|  | 582 | return kzalloc(size, GFP_NOWAIT); | 
|  | 583 |  | 
|  | 584 | #ifdef CONFIG_HAVE_ARCH_BOOTMEM | 
|  | 585 | { | 
|  | 586 | bootmem_data_t *p_bdata; | 
|  | 587 |  | 
|  | 588 | p_bdata = bootmem_arch_preferred_node(bdata, size, align, | 
|  | 589 | goal, limit); | 
|  | 590 | if (p_bdata) | 
|  | 591 | return alloc_bootmem_core(p_bdata, size, align, | 
|  | 592 | goal, limit); | 
|  | 593 | } | 
|  | 594 | #endif | 
|  | 595 | return NULL; | 
|  | 596 | } | 
|  | 597 |  | 
|  | 598 | static void * __init ___alloc_bootmem_nopanic(unsigned long size, | 
|  | 599 | unsigned long align, | 
|  | 600 | unsigned long goal, | 
|  | 601 | unsigned long limit) | 
|  | 602 | { | 
|  | 603 | bootmem_data_t *bdata; | 
|  | 604 | void *region; | 
|  | 605 |  | 
|  | 606 | restart: | 
|  | 607 | region = alloc_arch_preferred_bootmem(NULL, size, align, goal, limit); | 
|  | 608 | if (region) | 
|  | 609 | return region; | 
|  | 610 |  | 
|  | 611 | list_for_each_entry(bdata, &bdata_list, list) { | 
|  | 612 | if (goal && bdata->node_low_pfn <= PFN_DOWN(goal)) | 
|  | 613 | continue; | 
|  | 614 | if (limit && bdata->node_min_pfn >= PFN_DOWN(limit)) | 
|  | 615 | break; | 
|  | 616 |  | 
|  | 617 | region = alloc_bootmem_core(bdata, size, align, goal, limit); | 
|  | 618 | if (region) | 
|  | 619 | return region; | 
|  | 620 | } | 
|  | 621 |  | 
|  | 622 | if (goal) { | 
|  | 623 | goal = 0; | 
|  | 624 | goto restart; | 
|  | 625 | } | 
|  | 626 |  | 
|  | 627 | return NULL; | 
|  | 628 | } | 
|  | 629 |  | 
|  | 630 | /** | 
|  | 631 | * __alloc_bootmem_nopanic - allocate boot memory without panicking | 
|  | 632 | * @size: size of the request in bytes | 
|  | 633 | * @align: alignment of the region | 
|  | 634 | * @goal: preferred starting address of the region | 
|  | 635 | * | 
|  | 636 | * The goal is dropped if it can not be satisfied and the allocation will | 
|  | 637 | * fall back to memory below @goal. | 
|  | 638 | * | 
|  | 639 | * Allocation may happen on any node in the system. | 
|  | 640 | * | 
|  | 641 | * Returns NULL on failure. | 
|  | 642 | */ | 
|  | 643 | void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align, | 
|  | 644 | unsigned long goal) | 
|  | 645 | { | 
|  | 646 | unsigned long limit = 0; | 
|  | 647 |  | 
|  | 648 | return ___alloc_bootmem_nopanic(size, align, goal, limit); | 
|  | 649 | } | 
|  | 650 |  | 
|  | 651 | static void * __init ___alloc_bootmem(unsigned long size, unsigned long align, | 
|  | 652 | unsigned long goal, unsigned long limit) | 
|  | 653 | { | 
|  | 654 | void *mem = ___alloc_bootmem_nopanic(size, align, goal, limit); | 
|  | 655 |  | 
|  | 656 | if (mem) | 
|  | 657 | return mem; | 
|  | 658 | /* | 
|  | 659 | * Whoops, we cannot satisfy the allocation request. | 
|  | 660 | */ | 
|  | 661 | printk(KERN_ALERT "bootmem alloc of %lu bytes failed!\n", size); | 
|  | 662 | panic("Out of memory"); | 
|  | 663 | return NULL; | 
|  | 664 | } | 
|  | 665 |  | 
|  | 666 | /** | 
|  | 667 | * __alloc_bootmem - allocate boot memory | 
|  | 668 | * @size: size of the request in bytes | 
|  | 669 | * @align: alignment of the region | 
|  | 670 | * @goal: preferred starting address of the region | 
|  | 671 | * | 
|  | 672 | * The goal is dropped if it can not be satisfied and the allocation will | 
|  | 673 | * fall back to memory below @goal. | 
|  | 674 | * | 
|  | 675 | * Allocation may happen on any node in the system. | 
|  | 676 | * | 
|  | 677 | * The function panics if the request can not be satisfied. | 
|  | 678 | */ | 
|  | 679 | void * __init __alloc_bootmem(unsigned long size, unsigned long align, | 
|  | 680 | unsigned long goal) | 
|  | 681 | { | 
|  | 682 | unsigned long limit = 0; | 
|  | 683 |  | 
|  | 684 | return ___alloc_bootmem(size, align, goal, limit); | 
|  | 685 | } | 
|  | 686 |  | 
|  | 687 | static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata, | 
|  | 688 | unsigned long size, unsigned long align, | 
|  | 689 | unsigned long goal, unsigned long limit) | 
|  | 690 | { | 
|  | 691 | void *ptr; | 
|  | 692 |  | 
|  | 693 | ptr = alloc_arch_preferred_bootmem(bdata, size, align, goal, limit); | 
|  | 694 | if (ptr) | 
|  | 695 | return ptr; | 
|  | 696 |  | 
|  | 697 | ptr = alloc_bootmem_core(bdata, size, align, goal, limit); | 
|  | 698 | if (ptr) | 
|  | 699 | return ptr; | 
|  | 700 |  | 
|  | 701 | return ___alloc_bootmem(size, align, goal, limit); | 
|  | 702 | } | 
|  | 703 |  | 
|  | 704 | /** | 
|  | 705 | * __alloc_bootmem_node - allocate boot memory from a specific node | 
|  | 706 | * @pgdat: node to allocate from | 
|  | 707 | * @size: size of the request in bytes | 
|  | 708 | * @align: alignment of the region | 
|  | 709 | * @goal: preferred starting address of the region | 
|  | 710 | * | 
|  | 711 | * The goal is dropped if it can not be satisfied and the allocation will | 
|  | 712 | * fall back to memory below @goal. | 
|  | 713 | * | 
|  | 714 | * Allocation may fall back to any node in the system if the specified node | 
|  | 715 | * can not hold the requested memory. | 
|  | 716 | * | 
|  | 717 | * The function panics if the request can not be satisfied. | 
|  | 718 | */ | 
|  | 719 | void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size, | 
|  | 720 | unsigned long align, unsigned long goal) | 
|  | 721 | { | 
|  | 722 | if (WARN_ON_ONCE(slab_is_available())) | 
|  | 723 | return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); | 
|  | 724 |  | 
|  | 725 | return  ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0); | 
|  | 726 | } | 
|  | 727 |  | 
|  | 728 | void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size, | 
|  | 729 | unsigned long align, unsigned long goal) | 
|  | 730 | { | 
|  | 731 | #ifdef MAX_DMA32_PFN | 
|  | 732 | unsigned long end_pfn; | 
|  | 733 |  | 
|  | 734 | if (WARN_ON_ONCE(slab_is_available())) | 
|  | 735 | return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); | 
|  | 736 |  | 
|  | 737 | /* update goal according ...MAX_DMA32_PFN */ | 
|  | 738 | end_pfn = pgdat->node_start_pfn + pgdat->node_spanned_pages; | 
|  | 739 |  | 
|  | 740 | if (end_pfn > MAX_DMA32_PFN + (128 >> (20 - PAGE_SHIFT)) && | 
|  | 741 | (goal >> PAGE_SHIFT) < MAX_DMA32_PFN) { | 
|  | 742 | void *ptr; | 
|  | 743 | unsigned long new_goal; | 
|  | 744 |  | 
|  | 745 | new_goal = MAX_DMA32_PFN << PAGE_SHIFT; | 
|  | 746 | ptr = alloc_bootmem_core(pgdat->bdata, size, align, | 
|  | 747 | new_goal, 0); | 
|  | 748 | if (ptr) | 
|  | 749 | return ptr; | 
|  | 750 | } | 
|  | 751 | #endif | 
|  | 752 |  | 
|  | 753 | return __alloc_bootmem_node(pgdat, size, align, goal); | 
|  | 754 |  | 
|  | 755 | } | 
|  | 756 |  | 
|  | 757 | #ifdef CONFIG_SPARSEMEM | 
|  | 758 | /** | 
|  | 759 | * alloc_bootmem_section - allocate boot memory from a specific section | 
|  | 760 | * @size: size of the request in bytes | 
|  | 761 | * @section_nr: sparse map section to allocate from | 
|  | 762 | * | 
|  | 763 | * Return NULL on failure. | 
|  | 764 | */ | 
|  | 765 | void * __init alloc_bootmem_section(unsigned long size, | 
|  | 766 | unsigned long section_nr) | 
|  | 767 | { | 
|  | 768 | bootmem_data_t *bdata; | 
|  | 769 | unsigned long pfn, goal, limit; | 
|  | 770 |  | 
|  | 771 | pfn = section_nr_to_pfn(section_nr); | 
|  | 772 | goal = pfn << PAGE_SHIFT; | 
|  | 773 | limit = section_nr_to_pfn(section_nr + 1) << PAGE_SHIFT; | 
|  | 774 | bdata = &bootmem_node_data[early_pfn_to_nid(pfn)]; | 
|  | 775 |  | 
|  | 776 | if (goal + size > limit) | 
|  | 777 | limit = 0; | 
|  | 778 |  | 
|  | 779 | return alloc_bootmem_core(bdata, size, SMP_CACHE_BYTES, goal, limit); | 
|  | 780 | } | 
|  | 781 | #endif | 
|  | 782 |  | 
|  | 783 | void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size, | 
|  | 784 | unsigned long align, unsigned long goal) | 
|  | 785 | { | 
|  | 786 | void *ptr; | 
|  | 787 |  | 
|  | 788 | if (WARN_ON_ONCE(slab_is_available())) | 
|  | 789 | return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); | 
|  | 790 |  | 
|  | 791 | ptr = alloc_arch_preferred_bootmem(pgdat->bdata, size, align, goal, 0); | 
|  | 792 | if (ptr) | 
|  | 793 | return ptr; | 
|  | 794 |  | 
|  | 795 | ptr = alloc_bootmem_core(pgdat->bdata, size, align, goal, 0); | 
|  | 796 | if (ptr) | 
|  | 797 | return ptr; | 
|  | 798 |  | 
|  | 799 | return __alloc_bootmem_nopanic(size, align, goal); | 
|  | 800 | } | 
|  | 801 |  | 
|  | 802 | #ifndef ARCH_LOW_ADDRESS_LIMIT | 
|  | 803 | #define ARCH_LOW_ADDRESS_LIMIT	0xffffffffUL | 
|  | 804 | #endif | 
|  | 805 |  | 
|  | 806 | /** | 
|  | 807 | * __alloc_bootmem_low - allocate low boot memory | 
|  | 808 | * @size: size of the request in bytes | 
|  | 809 | * @align: alignment of the region | 
|  | 810 | * @goal: preferred starting address of the region | 
|  | 811 | * | 
|  | 812 | * The goal is dropped if it can not be satisfied and the allocation will | 
|  | 813 | * fall back to memory below @goal. | 
|  | 814 | * | 
|  | 815 | * Allocation may happen on any node in the system. | 
|  | 816 | * | 
|  | 817 | * The function panics if the request can not be satisfied. | 
|  | 818 | */ | 
|  | 819 | void * __init __alloc_bootmem_low(unsigned long size, unsigned long align, | 
|  | 820 | unsigned long goal) | 
|  | 821 | { | 
|  | 822 | return ___alloc_bootmem(size, align, goal, ARCH_LOW_ADDRESS_LIMIT); | 
|  | 823 | } | 
|  | 824 |  | 
|  | 825 | /** | 
|  | 826 | * __alloc_bootmem_low_node - allocate low boot memory from a specific node | 
|  | 827 | * @pgdat: node to allocate from | 
|  | 828 | * @size: size of the request in bytes | 
|  | 829 | * @align: alignment of the region | 
|  | 830 | * @goal: preferred starting address of the region | 
|  | 831 | * | 
|  | 832 | * The goal is dropped if it can not be satisfied and the allocation will | 
|  | 833 | * fall back to memory below @goal. | 
|  | 834 | * | 
|  | 835 | * Allocation may fall back to any node in the system if the specified node | 
|  | 836 | * can not hold the requested memory. | 
|  | 837 | * | 
|  | 838 | * The function panics if the request can not be satisfied. | 
|  | 839 | */ | 
|  | 840 | void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size, | 
|  | 841 | unsigned long align, unsigned long goal) | 
|  | 842 | { | 
|  | 843 | if (WARN_ON_ONCE(slab_is_available())) | 
|  | 844 | return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); | 
|  | 845 |  | 
|  | 846 | return ___alloc_bootmem_node(pgdat->bdata, size, align, | 
|  | 847 | goal, ARCH_LOW_ADDRESS_LIMIT); | 
|  | 848 | } |