blob: 258a4f9a12812db1bbab81a251abe14669c89d70 [file] [log] [blame]
xjb04a4022021-11-25 15:01:52 +08001/*
2 * Procedures for maintaining information about logical memory blocks.
3 *
4 * Peter Bergner, IBM Corp. June 2001.
5 * Copyright (C) 2001 Peter Bergner.
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 */
12
13#include <linux/kernel.h>
14#include <linux/slab.h>
15#include <linux/init.h>
16#include <linux/bitops.h>
17#include <linux/poison.h>
18#include <linux/pfn.h>
19#include <linux/debugfs.h>
20#include <linux/kmemleak.h>
21#include <linux/seq_file.h>
22#include <linux/memblock.h>
23#include <linux/bootmem.h>
24
25#include <asm/sections.h>
26#include <linux/io.h>
27
28#include "internal.h"
29
30/**
31 * DOC: memblock overview
32 *
33 * Memblock is a method of managing memory regions during the early
34 * boot period when the usual kernel memory allocators are not up and
35 * running.
36 *
37 * Memblock views the system memory as collections of contiguous
38 * regions. There are several types of these collections:
39 *
40 * * ``memory`` - describes the physical memory available to the
41 * kernel; this may differ from the actual physical memory installed
42 * in the system, for instance when the memory is restricted with
43 * ``mem=`` command line parameter
44 * * ``reserved`` - describes the regions that were allocated
45 * * ``physmap`` - describes the actual physical memory regardless of
46 * the possible restrictions; the ``physmap`` type is only available
47 * on some architectures.
48 *
49 * Each region is represented by :c:type:`struct memblock_region` that
50 * defines the region extents, its attributes and NUMA node id on NUMA
51 * systems. Every memory type is described by the :c:type:`struct
52 * memblock_type` which contains an array of memory regions along with
53 * the allocator metadata. The memory types are nicely wrapped with
54 * :c:type:`struct memblock`. This structure is statically initialzed
55 * at build time. The region arrays for the "memory" and "reserved"
56 * types are initially sized to %INIT_MEMBLOCK_REGIONS and for the
57 * "physmap" type to %INIT_PHYSMEM_REGIONS.
58 * The :c:func:`memblock_allow_resize` enables automatic resizing of
59 * the region arrays during addition of new regions. This feature
60 * should be used with care so that memory allocated for the region
61 * array will not overlap with areas that should be reserved, for
62 * example initrd.
63 *
64 * The early architecture setup should tell memblock what the physical
65 * memory layout is by using :c:func:`memblock_add` or
66 * :c:func:`memblock_add_node` functions. The first function does not
67 * assign the region to a NUMA node and it is appropriate for UMA
68 * systems. Yet, it is possible to use it on NUMA systems as well and
69 * assign the region to a NUMA node later in the setup process using
70 * :c:func:`memblock_set_node`. The :c:func:`memblock_add_node`
71 * performs such an assignment directly.
72 *
73 * Once memblock is setup the memory can be allocated using either
74 * memblock or bootmem APIs.
75 *
76 * As the system boot progresses, the architecture specific
77 * :c:func:`mem_init` function frees all the memory to the buddy page
78 * allocator.
79 *
80 * If an architecure enables %CONFIG_ARCH_DISCARD_MEMBLOCK, the
81 * memblock data structures will be discarded after the system
82 * initialization compltes.
83 */
84
85static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock;
86static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock;
87#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
88static struct memblock_region memblock_physmem_init_regions[INIT_PHYSMEM_REGIONS] __initdata_memblock;
89#endif
90
91struct memblock memblock __initdata_memblock = {
92 .memory.regions = memblock_memory_init_regions,
93 .memory.cnt = 1, /* empty dummy entry */
94 .memory.max = INIT_MEMBLOCK_REGIONS,
95 .memory.name = "memory",
96
97 .reserved.regions = memblock_reserved_init_regions,
98 .reserved.cnt = 1, /* empty dummy entry */
99 .reserved.max = INIT_MEMBLOCK_REGIONS,
100 .reserved.name = "reserved",
101
102#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
103 .physmem.regions = memblock_physmem_init_regions,
104 .physmem.cnt = 1, /* empty dummy entry */
105 .physmem.max = INIT_PHYSMEM_REGIONS,
106 .physmem.name = "physmem",
107#endif
108
109 .bottom_up = false,
110 .current_limit = MEMBLOCK_ALLOC_ANYWHERE,
111};
112
113int memblock_debug __initdata_memblock;
114static bool system_has_some_mirror __initdata_memblock = false;
115static int memblock_can_resize __initdata_memblock;
116static int memblock_memory_in_slab __initdata_memblock = 0;
117static int memblock_reserved_in_slab __initdata_memblock = 0;
118
119enum memblock_flags __init_memblock choose_memblock_flags(void)
120{
121 return system_has_some_mirror ? MEMBLOCK_MIRROR : MEMBLOCK_NONE;
122}
123
124/* adjust *@size so that (@base + *@size) doesn't overflow, return new size */
125static inline phys_addr_t memblock_cap_size(phys_addr_t base, phys_addr_t *size)
126{
127 return *size = min(*size, PHYS_ADDR_MAX - base);
128}
129
130/*
131 * Address comparison utilities
132 */
133static unsigned long __init_memblock memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1,
134 phys_addr_t base2, phys_addr_t size2)
135{
136 return ((base1 < (base2 + size2)) && (base2 < (base1 + size1)));
137}
138
139bool __init_memblock memblock_overlaps_region(struct memblock_type *type,
140 phys_addr_t base, phys_addr_t size)
141{
142 unsigned long i;
143
144 for (i = 0; i < type->cnt; i++)
145 if (memblock_addrs_overlap(base, size, type->regions[i].base,
146 type->regions[i].size))
147 break;
148 return i < type->cnt;
149}
150
151/**
152 * __memblock_find_range_bottom_up - find free area utility in bottom-up
153 * @start: start of candidate range
154 * @end: end of candidate range, can be %MEMBLOCK_ALLOC_ANYWHERE or
155 * %MEMBLOCK_ALLOC_ACCESSIBLE
156 * @size: size of free area to find
157 * @align: alignment of free area to find
158 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
159 * @flags: pick from blocks based on memory attributes
160 *
161 * Utility called from memblock_find_in_range_node(), find free area bottom-up.
162 *
163 * Return:
164 * Found address on success, 0 on failure.
165 */
166static phys_addr_t __init_memblock
167__memblock_find_range_bottom_up(phys_addr_t start, phys_addr_t end,
168 phys_addr_t size, phys_addr_t align, int nid,
169 enum memblock_flags flags)
170{
171 phys_addr_t this_start, this_end, cand;
172 u64 i;
173
174 for_each_free_mem_range(i, nid, flags, &this_start, &this_end, NULL) {
175 this_start = clamp(this_start, start, end);
176 this_end = clamp(this_end, start, end);
177
178 cand = round_up(this_start, align);
179 if (cand < this_end && this_end - cand >= size)
180 return cand;
181 }
182
183 return 0;
184}
185
186/**
187 * __memblock_find_range_top_down - find free area utility, in top-down
188 * @start: start of candidate range
189 * @end: end of candidate range, can be %MEMBLOCK_ALLOC_ANYWHERE or
190 * %MEMBLOCK_ALLOC_ACCESSIBLE
191 * @size: size of free area to find
192 * @align: alignment of free area to find
193 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
194 * @flags: pick from blocks based on memory attributes
195 *
196 * Utility called from memblock_find_in_range_node(), find free area top-down.
197 *
198 * Return:
199 * Found address on success, 0 on failure.
200 */
201static phys_addr_t __init_memblock
202__memblock_find_range_top_down(phys_addr_t start, phys_addr_t end,
203 phys_addr_t size, phys_addr_t align, int nid,
204 enum memblock_flags flags)
205{
206 phys_addr_t this_start, this_end, cand;
207 u64 i;
208
209 for_each_free_mem_range_reverse(i, nid, flags, &this_start, &this_end,
210 NULL) {
211 this_start = clamp(this_start, start, end);
212 this_end = clamp(this_end, start, end);
213
214 if (this_end < size)
215 continue;
216
217 cand = round_down(this_end - size, align);
218 if (cand >= this_start)
219 return cand;
220 }
221
222 return 0;
223}
224
225/**
226 * memblock_find_in_range_node - find free area in given range and node
227 * @size: size of free area to find
228 * @align: alignment of free area to find
229 * @start: start of candidate range
230 * @end: end of candidate range, can be %MEMBLOCK_ALLOC_ANYWHERE or
231 * %MEMBLOCK_ALLOC_ACCESSIBLE
232 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
233 * @flags: pick from blocks based on memory attributes
234 *
235 * Find @size free area aligned to @align in the specified range and node.
236 *
237 * When allocation direction is bottom-up, the @start should be greater
238 * than the end of the kernel image. Otherwise, it will be trimmed. The
239 * reason is that we want the bottom-up allocation just near the kernel
240 * image so it is highly likely that the allocated memory and the kernel
241 * will reside in the same node.
242 *
243 * If bottom-up allocation failed, will try to allocate memory top-down.
244 *
245 * Return:
246 * Found address on success, 0 on failure.
247 */
248phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size,
249 phys_addr_t align, phys_addr_t start,
250 phys_addr_t end, int nid,
251 enum memblock_flags flags)
252{
253 phys_addr_t kernel_end, ret;
254
255 /* pump up @end */
256 if (end == MEMBLOCK_ALLOC_ACCESSIBLE ||
257 end == MEMBLOCK_ALLOC_KASAN)
258 end = memblock.current_limit;
259
260 /* avoid allocating the first page */
261 start = max_t(phys_addr_t, start, PAGE_SIZE);
262 end = max(start, end);
263 kernel_end = __pa_symbol(_end);
264
265 /*
266 * try bottom-up allocation only when bottom-up mode
267 * is set and @end is above the kernel image.
268 */
269 if (memblock_bottom_up() && end > kernel_end) {
270 phys_addr_t bottom_up_start;
271
272 /* make sure we will allocate above the kernel */
273 bottom_up_start = max(start, kernel_end);
274
275 /* ok, try bottom-up allocation first */
276 ret = __memblock_find_range_bottom_up(bottom_up_start, end,
277 size, align, nid, flags);
278 if (ret)
279 return ret;
280
281 /*
282 * we always limit bottom-up allocation above the kernel,
283 * but top-down allocation doesn't have the limit, so
284 * retrying top-down allocation may succeed when bottom-up
285 * allocation failed.
286 *
287 * bottom-up allocation is expected to be fail very rarely,
288 * so we use WARN_ONCE() here to see the stack trace if
289 * fail happens.
290 */
291 WARN_ONCE(IS_ENABLED(CONFIG_MEMORY_HOTREMOVE),
292 "memblock: bottom-up allocation failed, memory hotremove may be affected\n");
293 }
294
295 return __memblock_find_range_top_down(start, end, size, align, nid,
296 flags);
297}
298
299/**
300 * memblock_find_in_range - find free area in given range
301 * @start: start of candidate range
302 * @end: end of candidate range, can be %MEMBLOCK_ALLOC_ANYWHERE or
303 * %MEMBLOCK_ALLOC_ACCESSIBLE
304 * @size: size of free area to find
305 * @align: alignment of free area to find
306 *
307 * Find @size free area aligned to @align in the specified range.
308 *
309 * Return:
310 * Found address on success, 0 on failure.
311 */
312phys_addr_t __init_memblock memblock_find_in_range(phys_addr_t start,
313 phys_addr_t end, phys_addr_t size,
314 phys_addr_t align)
315{
316 phys_addr_t ret;
317 enum memblock_flags flags = choose_memblock_flags();
318
319again:
320 ret = memblock_find_in_range_node(size, align, start, end,
321 NUMA_NO_NODE, flags);
322
323 if (!ret && (flags & MEMBLOCK_MIRROR)) {
324 pr_warn("Could not allocate %pap bytes of mirrored memory\n",
325 &size);
326 flags &= ~MEMBLOCK_MIRROR;
327 goto again;
328 }
329
330 return ret;
331}
332
333static void __init_memblock memblock_remove_region(struct memblock_type *type, unsigned long r)
334{
335 type->total_size -= type->regions[r].size;
336 memmove(&type->regions[r], &type->regions[r + 1],
337 (type->cnt - (r + 1)) * sizeof(type->regions[r]));
338 type->cnt--;
339
340 /* Special case for empty arrays */
341 if (type->cnt == 0) {
342 WARN_ON(type->total_size != 0);
343 type->cnt = 1;
344 type->regions[0].base = 0;
345 type->regions[0].size = 0;
346 type->regions[0].flags = 0;
347 memblock_set_region_node(&type->regions[0], MAX_NUMNODES);
348 }
349}
350
351#ifdef CONFIG_ARCH_DISCARD_MEMBLOCK
352/**
353 * memblock_discard - discard memory and reserved arrays if they were allocated
354 */
355void __init memblock_discard(void)
356{
357 phys_addr_t addr, size;
358
359 if (memblock.reserved.regions != memblock_reserved_init_regions) {
360 addr = __pa(memblock.reserved.regions);
361 size = PAGE_ALIGN(sizeof(struct memblock_region) *
362 memblock.reserved.max);
363 __memblock_free_late(addr, size);
364 }
365
366 if (memblock.memory.regions != memblock_memory_init_regions) {
367 addr = __pa(memblock.memory.regions);
368 size = PAGE_ALIGN(sizeof(struct memblock_region) *
369 memblock.memory.max);
370 __memblock_free_late(addr, size);
371 }
372}
373#endif
374
375/**
376 * memblock_double_array - double the size of the memblock regions array
377 * @type: memblock type of the regions array being doubled
378 * @new_area_start: starting address of memory range to avoid overlap with
379 * @new_area_size: size of memory range to avoid overlap with
380 *
381 * Double the size of the @type regions array. If memblock is being used to
382 * allocate memory for a new reserved regions array and there is a previously
383 * allocated memory range [@new_area_start, @new_area_start + @new_area_size]
384 * waiting to be reserved, ensure the memory used by the new array does
385 * not overlap.
386 *
387 * Return:
388 * 0 on success, -1 on failure.
389 */
390static int __init_memblock memblock_double_array(struct memblock_type *type,
391 phys_addr_t new_area_start,
392 phys_addr_t new_area_size)
393{
394 struct memblock_region *new_array, *old_array;
395 phys_addr_t old_alloc_size, new_alloc_size;
396 phys_addr_t old_size, new_size, addr, new_end;
397 int use_slab = slab_is_available();
398 int *in_slab;
399
400 /* We don't allow resizing until we know about the reserved regions
401 * of memory that aren't suitable for allocation
402 */
403 if (!memblock_can_resize)
404 return -1;
405
406 /* Calculate new doubled size */
407 old_size = type->max * sizeof(struct memblock_region);
408 new_size = old_size << 1;
409 /*
410 * We need to allocated new one align to PAGE_SIZE,
411 * so we can free them completely later.
412 */
413 old_alloc_size = PAGE_ALIGN(old_size);
414 new_alloc_size = PAGE_ALIGN(new_size);
415
416 /* Retrieve the slab flag */
417 if (type == &memblock.memory)
418 in_slab = &memblock_memory_in_slab;
419 else
420 in_slab = &memblock_reserved_in_slab;
421
422 /* Try to find some space for it.
423 *
424 * WARNING: We assume that either slab_is_available() and we use it or
425 * we use MEMBLOCK for allocations. That means that this is unsafe to
426 * use when bootmem is currently active (unless bootmem itself is
427 * implemented on top of MEMBLOCK which isn't the case yet)
428 *
429 * This should however not be an issue for now, as we currently only
430 * call into MEMBLOCK while it's still active, or much later when slab
431 * is active for memory hotplug operations
432 */
433 if (use_slab) {
434 new_array = kmalloc(new_size, GFP_KERNEL);
435 addr = new_array ? __pa(new_array) : 0;
436 } else {
437 /* only exclude range when trying to double reserved.regions */
438 if (type != &memblock.reserved)
439 new_area_start = new_area_size = 0;
440
441 addr = memblock_find_in_range(new_area_start + new_area_size,
442 memblock.current_limit,
443 new_alloc_size, PAGE_SIZE);
444 if (!addr && new_area_size)
445 addr = memblock_find_in_range(0,
446 min(new_area_start, memblock.current_limit),
447 new_alloc_size, PAGE_SIZE);
448
449 new_array = addr ? __va(addr) : NULL;
450 }
451 if (!addr) {
452 pr_err("memblock: Failed to double %s array from %ld to %ld entries !\n",
453 type->name, type->max, type->max * 2);
454 return -1;
455 }
456
457 new_end = addr + new_size - 1;
458 memblock_dbg("memblock: %s is doubled to %ld at [%pa-%pa]",
459 type->name, type->max * 2, &addr, &new_end);
460
461 /*
462 * Found space, we now need to move the array over before we add the
463 * reserved region since it may be our reserved array itself that is
464 * full.
465 */
466 memcpy(new_array, type->regions, old_size);
467 memset(new_array + type->max, 0, old_size);
468 old_array = type->regions;
469 type->regions = new_array;
470 type->max <<= 1;
471
472 /* Free old array. We needn't free it if the array is the static one */
473 if (*in_slab)
474 kfree(old_array);
475 else if (old_array != memblock_memory_init_regions &&
476 old_array != memblock_reserved_init_regions)
477 memblock_free(__pa(old_array), old_alloc_size);
478
479 /*
480 * Reserve the new array if that comes from the memblock. Otherwise, we
481 * needn't do it
482 */
483 if (!use_slab)
484 BUG_ON(memblock_reserve(addr, new_alloc_size));
485
486 /* Update slab flag */
487 *in_slab = use_slab;
488
489 return 0;
490}
491
492/**
493 * memblock_merge_regions - merge neighboring compatible regions
494 * @type: memblock type to scan
495 *
496 * Scan @type and merge neighboring compatible regions.
497 */
498static void __init_memblock memblock_merge_regions(struct memblock_type *type)
499{
500 int i = 0;
501
502 /* cnt never goes below 1 */
503 while (i < type->cnt - 1) {
504 struct memblock_region *this = &type->regions[i];
505 struct memblock_region *next = &type->regions[i + 1];
506
507 if (this->base + this->size != next->base ||
508 memblock_get_region_node(this) !=
509 memblock_get_region_node(next) ||
510 this->flags != next->flags) {
511 BUG_ON(this->base + this->size > next->base);
512 i++;
513 continue;
514 }
515
516 this->size += next->size;
517 /* move forward from next + 1, index of which is i + 2 */
518 memmove(next, next + 1, (type->cnt - (i + 2)) * sizeof(*next));
519 type->cnt--;
520 }
521}
522
523/**
524 * memblock_insert_region - insert new memblock region
525 * @type: memblock type to insert into
526 * @idx: index for the insertion point
527 * @base: base address of the new region
528 * @size: size of the new region
529 * @nid: node id of the new region
530 * @flags: flags of the new region
531 *
532 * Insert new memblock region [@base, @base + @size) into @type at @idx.
533 * @type must already have extra room to accommodate the new region.
534 */
535static void __init_memblock memblock_insert_region(struct memblock_type *type,
536 int idx, phys_addr_t base,
537 phys_addr_t size,
538 int nid,
539 enum memblock_flags flags)
540{
541 struct memblock_region *rgn = &type->regions[idx];
542
543 BUG_ON(type->cnt >= type->max);
544 memmove(rgn + 1, rgn, (type->cnt - idx) * sizeof(*rgn));
545 rgn->base = base;
546 rgn->size = size;
547 rgn->flags = flags;
548 memblock_set_region_node(rgn, nid);
549 type->cnt++;
550 type->total_size += size;
551}
552
553/**
554 * memblock_add_range - add new memblock region
555 * @type: memblock type to add new region into
556 * @base: base address of the new region
557 * @size: size of the new region
558 * @nid: nid of the new region
559 * @flags: flags of the new region
560 *
561 * Add new memblock region [@base, @base + @size) into @type. The new region
562 * is allowed to overlap with existing ones - overlaps don't affect already
563 * existing regions. @type is guaranteed to be minimal (all neighbouring
564 * compatible regions are merged) after the addition.
565 *
566 * Return:
567 * 0 on success, -errno on failure.
568 */
569int __init_memblock memblock_add_range(struct memblock_type *type,
570 phys_addr_t base, phys_addr_t size,
571 int nid, enum memblock_flags flags)
572{
573 bool insert = false;
574 phys_addr_t obase = base;
575 phys_addr_t end = base + memblock_cap_size(base, &size);
576 int idx, nr_new;
577 struct memblock_region *rgn;
578
579 if (!size)
580 return 0;
581
582 /* special case for empty array */
583 if (type->regions[0].size == 0) {
584 WARN_ON(type->cnt != 1 || type->total_size);
585 type->regions[0].base = base;
586 type->regions[0].size = size;
587 type->regions[0].flags = flags;
588 memblock_set_region_node(&type->regions[0], nid);
589 type->total_size = size;
590 return 0;
591 }
592repeat:
593 /*
594 * The following is executed twice. Once with %false @insert and
595 * then with %true. The first counts the number of regions needed
596 * to accommodate the new area. The second actually inserts them.
597 */
598 base = obase;
599 nr_new = 0;
600
601 for_each_memblock_type(idx, type, rgn) {
602 phys_addr_t rbase = rgn->base;
603 phys_addr_t rend = rbase + rgn->size;
604
605 if (rbase >= end)
606 break;
607 if (rend <= base)
608 continue;
609 /*
610 * @rgn overlaps. If it separates the lower part of new
611 * area, insert that portion.
612 */
613 if (rbase > base) {
614#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
615 WARN_ON(nid != memblock_get_region_node(rgn));
616#endif
617 WARN_ON(flags != rgn->flags);
618 nr_new++;
619 if (insert)
620 memblock_insert_region(type, idx++, base,
621 rbase - base, nid,
622 flags);
623 }
624 /* area below @rend is dealt with, forget about it */
625 base = min(rend, end);
626 }
627
628 /* insert the remaining portion */
629 if (base < end) {
630 nr_new++;
631 if (insert)
632 memblock_insert_region(type, idx, base, end - base,
633 nid, flags);
634 }
635
636 if (!nr_new)
637 return 0;
638
639 /*
640 * If this was the first round, resize array and repeat for actual
641 * insertions; otherwise, merge and return.
642 */
643 if (!insert) {
644 while (type->cnt + nr_new > type->max)
645 if (memblock_double_array(type, obase, size) < 0)
646 return -ENOMEM;
647 insert = true;
648 goto repeat;
649 } else {
650 memblock_merge_regions(type);
651 return 0;
652 }
653}
654
655/**
656 * memblock_add_node - add new memblock region within a NUMA node
657 * @base: base address of the new region
658 * @size: size of the new region
659 * @nid: nid of the new region
660 *
661 * Add new memblock region [@base, @base + @size) to the "memory"
662 * type. See memblock_add_range() description for mode details
663 *
664 * Return:
665 * 0 on success, -errno on failure.
666 */
667int __init_memblock memblock_add_node(phys_addr_t base, phys_addr_t size,
668 int nid)
669{
670 return memblock_add_range(&memblock.memory, base, size, nid, 0);
671}
672
673/**
674 * memblock_add - add new memblock region
675 * @base: base address of the new region
676 * @size: size of the new region
677 *
678 * Add new memblock region [@base, @base + @size) to the "memory"
679 * type. See memblock_add_range() description for mode details
680 *
681 * Return:
682 * 0 on success, -errno on failure.
683 */
684int __init_memblock memblock_add(phys_addr_t base, phys_addr_t size)
685{
686 phys_addr_t end = base + size - 1;
687
688 memblock_dbg("memblock_add: [%pa-%pa] %pF\n",
689 &base, &end, (void *)_RET_IP_);
690
691 return memblock_add_range(&memblock.memory, base, size, MAX_NUMNODES, 0);
692}
693
694/**
695 * memblock_isolate_range - isolate given range into disjoint memblocks
696 * @type: memblock type to isolate range for
697 * @base: base of range to isolate
698 * @size: size of range to isolate
699 * @start_rgn: out parameter for the start of isolated region
700 * @end_rgn: out parameter for the end of isolated region
701 *
702 * Walk @type and ensure that regions don't cross the boundaries defined by
703 * [@base, @base + @size). Crossing regions are split at the boundaries,
704 * which may create at most two more regions. The index of the first
705 * region inside the range is returned in *@start_rgn and end in *@end_rgn.
706 *
707 * Return:
708 * 0 on success, -errno on failure.
709 */
710static int __init_memblock memblock_isolate_range(struct memblock_type *type,
711 phys_addr_t base, phys_addr_t size,
712 int *start_rgn, int *end_rgn)
713{
714 phys_addr_t end = base + memblock_cap_size(base, &size);
715 int idx;
716 struct memblock_region *rgn;
717
718 *start_rgn = *end_rgn = 0;
719
720 if (!size)
721 return 0;
722
723 /* we'll create at most two more regions */
724 while (type->cnt + 2 > type->max)
725 if (memblock_double_array(type, base, size) < 0)
726 return -ENOMEM;
727
728 for_each_memblock_type(idx, type, rgn) {
729 phys_addr_t rbase = rgn->base;
730 phys_addr_t rend = rbase + rgn->size;
731
732 if (rbase >= end)
733 break;
734 if (rend <= base)
735 continue;
736
737 if (rbase < base) {
738 /*
739 * @rgn intersects from below. Split and continue
740 * to process the next region - the new top half.
741 */
742 rgn->base = base;
743 rgn->size -= base - rbase;
744 type->total_size -= base - rbase;
745 memblock_insert_region(type, idx, rbase, base - rbase,
746 memblock_get_region_node(rgn),
747 rgn->flags);
748 } else if (rend > end) {
749 /*
750 * @rgn intersects from above. Split and redo the
751 * current region - the new bottom half.
752 */
753 rgn->base = end;
754 rgn->size -= end - rbase;
755 type->total_size -= end - rbase;
756 memblock_insert_region(type, idx--, rbase, end - rbase,
757 memblock_get_region_node(rgn),
758 rgn->flags);
759 } else {
760 /* @rgn is fully contained, record it */
761 if (!*end_rgn)
762 *start_rgn = idx;
763 *end_rgn = idx + 1;
764 }
765 }
766
767 return 0;
768}
769
770static int __init_memblock memblock_remove_range(struct memblock_type *type,
771 phys_addr_t base, phys_addr_t size)
772{
773 int start_rgn, end_rgn;
774 int i, ret;
775
776 ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn);
777 if (ret)
778 return ret;
779
780 for (i = end_rgn - 1; i >= start_rgn; i--)
781 memblock_remove_region(type, i);
782 return 0;
783}
784
785int __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size)
786{
787 phys_addr_t end = base + size - 1;
788
789 memblock_dbg("memblock_remove: [%pa-%pa] %pS\n",
790 &base, &end, (void *)_RET_IP_);
791
792 return memblock_remove_range(&memblock.memory, base, size);
793}
794
795
796int __init_memblock memblock_free(phys_addr_t base, phys_addr_t size)
797{
798 phys_addr_t end = base + size - 1;
799
800 memblock_dbg(" memblock_free: [%pa-%pa] %pF\n",
801 &base, &end, (void *)_RET_IP_);
802
803 kmemleak_free_part_phys(base, size);
804 return memblock_remove_range(&memblock.reserved, base, size);
805}
806
807int __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size)
808{
809 phys_addr_t end = base + size - 1;
810
811 memblock_dbg("memblock_reserve: [%pa-%pa] %pF\n",
812 &base, &end, (void *)_RET_IP_);
813
814 return memblock_add_range(&memblock.reserved, base, size, MAX_NUMNODES, 0);
815}
816
817/**
818 * memblock_setclr_flag - set or clear flag for a memory region
819 * @base: base address of the region
820 * @size: size of the region
821 * @set: set or clear the flag
822 * @flag: the flag to udpate
823 *
824 * This function isolates region [@base, @base + @size), and sets/clears flag
825 *
826 * Return: 0 on success, -errno on failure.
827 */
828static int __init_memblock memblock_setclr_flag(phys_addr_t base,
829 phys_addr_t size, int set, int flag)
830{
831 struct memblock_type *type = &memblock.memory;
832 int i, ret, start_rgn, end_rgn;
833
834 ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn);
835 if (ret)
836 return ret;
837
838 for (i = start_rgn; i < end_rgn; i++)
839 if (set)
840 memblock_set_region_flags(&type->regions[i], flag);
841 else
842 memblock_clear_region_flags(&type->regions[i], flag);
843
844 memblock_merge_regions(type);
845 return 0;
846}
847
848/**
849 * memblock_mark_hotplug - Mark hotpluggable memory with flag MEMBLOCK_HOTPLUG.
850 * @base: the base phys addr of the region
851 * @size: the size of the region
852 *
853 * Return: 0 on success, -errno on failure.
854 */
855int __init_memblock memblock_mark_hotplug(phys_addr_t base, phys_addr_t size)
856{
857 return memblock_setclr_flag(base, size, 1, MEMBLOCK_HOTPLUG);
858}
859
860/**
861 * memblock_clear_hotplug - Clear flag MEMBLOCK_HOTPLUG for a specified region.
862 * @base: the base phys addr of the region
863 * @size: the size of the region
864 *
865 * Return: 0 on success, -errno on failure.
866 */
867int __init_memblock memblock_clear_hotplug(phys_addr_t base, phys_addr_t size)
868{
869 return memblock_setclr_flag(base, size, 0, MEMBLOCK_HOTPLUG);
870}
871
872/**
873 * memblock_mark_mirror - Mark mirrored memory with flag MEMBLOCK_MIRROR.
874 * @base: the base phys addr of the region
875 * @size: the size of the region
876 *
877 * Return: 0 on success, -errno on failure.
878 */
879int __init_memblock memblock_mark_mirror(phys_addr_t base, phys_addr_t size)
880{
881 system_has_some_mirror = true;
882
883 return memblock_setclr_flag(base, size, 1, MEMBLOCK_MIRROR);
884}
885
886/**
887 * memblock_mark_nomap - Mark a memory region with flag MEMBLOCK_NOMAP.
888 * @base: the base phys addr of the region
889 * @size: the size of the region
890 *
891 * Return: 0 on success, -errno on failure.
892 */
893int __init_memblock memblock_mark_nomap(phys_addr_t base, phys_addr_t size)
894{
895 return memblock_setclr_flag(base, size, 1, MEMBLOCK_NOMAP);
896}
897
898/**
899 * memblock_clear_nomap - Clear flag MEMBLOCK_NOMAP for a specified region.
900 * @base: the base phys addr of the region
901 * @size: the size of the region
902 *
903 * Return: 0 on success, -errno on failure.
904 */
905int __init_memblock memblock_clear_nomap(phys_addr_t base, phys_addr_t size)
906{
907 return memblock_setclr_flag(base, size, 0, MEMBLOCK_NOMAP);
908}
909
910/**
911 * __next_reserved_mem_region - next function for for_each_reserved_region()
912 * @idx: pointer to u64 loop variable
913 * @out_start: ptr to phys_addr_t for start address of the region, can be %NULL
914 * @out_end: ptr to phys_addr_t for end address of the region, can be %NULL
915 *
916 * Iterate over all reserved memory regions.
917 */
918void __init_memblock __next_reserved_mem_region(u64 *idx,
919 phys_addr_t *out_start,
920 phys_addr_t *out_end)
921{
922 struct memblock_type *type = &memblock.reserved;
923
924 if (*idx < type->cnt) {
925 struct memblock_region *r = &type->regions[*idx];
926 phys_addr_t base = r->base;
927 phys_addr_t size = r->size;
928
929 if (out_start)
930 *out_start = base;
931 if (out_end)
932 *out_end = base + size - 1;
933
934 *idx += 1;
935 return;
936 }
937
938 /* signal end of iteration */
939 *idx = ULLONG_MAX;
940}
941
942/**
943 * __next__mem_range - next function for for_each_free_mem_range() etc.
944 * @idx: pointer to u64 loop variable
945 * @nid: node selector, %NUMA_NO_NODE for all nodes
946 * @flags: pick from blocks based on memory attributes
947 * @type_a: pointer to memblock_type from where the range is taken
948 * @type_b: pointer to memblock_type which excludes memory from being taken
949 * @out_start: ptr to phys_addr_t for start address of the range, can be %NULL
950 * @out_end: ptr to phys_addr_t for end address of the range, can be %NULL
951 * @out_nid: ptr to int for nid of the range, can be %NULL
952 *
953 * Find the first area from *@idx which matches @nid, fill the out
954 * parameters, and update *@idx for the next iteration. The lower 32bit of
955 * *@idx contains index into type_a and the upper 32bit indexes the
956 * areas before each region in type_b. For example, if type_b regions
957 * look like the following,
958 *
959 * 0:[0-16), 1:[32-48), 2:[128-130)
960 *
961 * The upper 32bit indexes the following regions.
962 *
963 * 0:[0-0), 1:[16-32), 2:[48-128), 3:[130-MAX)
964 *
965 * As both region arrays are sorted, the function advances the two indices
966 * in lockstep and returns each intersection.
967 */
968void __init_memblock __next_mem_range(u64 *idx, int nid,
969 enum memblock_flags flags,
970 struct memblock_type *type_a,
971 struct memblock_type *type_b,
972 phys_addr_t *out_start,
973 phys_addr_t *out_end, int *out_nid)
974{
975 int idx_a = *idx & 0xffffffff;
976 int idx_b = *idx >> 32;
977
978 if (WARN_ONCE(nid == MAX_NUMNODES,
979 "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n"))
980 nid = NUMA_NO_NODE;
981
982 for (; idx_a < type_a->cnt; idx_a++) {
983 struct memblock_region *m = &type_a->regions[idx_a];
984
985 phys_addr_t m_start = m->base;
986 phys_addr_t m_end = m->base + m->size;
987 int m_nid = memblock_get_region_node(m);
988
989 /* only memory regions are associated with nodes, check it */
990 if (nid != NUMA_NO_NODE && nid != m_nid)
991 continue;
992
993 /* skip hotpluggable memory regions if needed */
994 if (movable_node_is_enabled() && memblock_is_hotpluggable(m))
995 continue;
996
997 /* if we want mirror memory skip non-mirror memory regions */
998 if ((flags & MEMBLOCK_MIRROR) && !memblock_is_mirror(m))
999 continue;
1000
1001 /* skip nomap memory unless we were asked for it explicitly */
1002 if (!(flags & MEMBLOCK_NOMAP) && memblock_is_nomap(m))
1003 continue;
1004
1005 if (!type_b) {
1006 if (out_start)
1007 *out_start = m_start;
1008 if (out_end)
1009 *out_end = m_end;
1010 if (out_nid)
1011 *out_nid = m_nid;
1012 idx_a++;
1013 *idx = (u32)idx_a | (u64)idx_b << 32;
1014 return;
1015 }
1016
1017 /* scan areas before each reservation */
1018 for (; idx_b < type_b->cnt + 1; idx_b++) {
1019 struct memblock_region *r;
1020 phys_addr_t r_start;
1021 phys_addr_t r_end;
1022
1023 r = &type_b->regions[idx_b];
1024 r_start = idx_b ? r[-1].base + r[-1].size : 0;
1025 r_end = idx_b < type_b->cnt ?
1026 r->base : PHYS_ADDR_MAX;
1027
1028 /*
1029 * if idx_b advanced past idx_a,
1030 * break out to advance idx_a
1031 */
1032 if (r_start >= m_end)
1033 break;
1034 /* if the two regions intersect, we're done */
1035 if (m_start < r_end) {
1036 if (out_start)
1037 *out_start =
1038 max(m_start, r_start);
1039 if (out_end)
1040 *out_end = min(m_end, r_end);
1041 if (out_nid)
1042 *out_nid = m_nid;
1043 /*
1044 * The region which ends first is
1045 * advanced for the next iteration.
1046 */
1047 if (m_end <= r_end)
1048 idx_a++;
1049 else
1050 idx_b++;
1051 *idx = (u32)idx_a | (u64)idx_b << 32;
1052 return;
1053 }
1054 }
1055 }
1056
1057 /* signal end of iteration */
1058 *idx = ULLONG_MAX;
1059}
1060
1061/**
1062 * __next_mem_range_rev - generic next function for for_each_*_range_rev()
1063 *
1064 * @idx: pointer to u64 loop variable
1065 * @nid: node selector, %NUMA_NO_NODE for all nodes
1066 * @flags: pick from blocks based on memory attributes
1067 * @type_a: pointer to memblock_type from where the range is taken
1068 * @type_b: pointer to memblock_type which excludes memory from being taken
1069 * @out_start: ptr to phys_addr_t for start address of the range, can be %NULL
1070 * @out_end: ptr to phys_addr_t for end address of the range, can be %NULL
1071 * @out_nid: ptr to int for nid of the range, can be %NULL
1072 *
1073 * Finds the next range from type_a which is not marked as unsuitable
1074 * in type_b.
1075 *
1076 * Reverse of __next_mem_range().
1077 */
1078void __init_memblock __next_mem_range_rev(u64 *idx, int nid,
1079 enum memblock_flags flags,
1080 struct memblock_type *type_a,
1081 struct memblock_type *type_b,
1082 phys_addr_t *out_start,
1083 phys_addr_t *out_end, int *out_nid)
1084{
1085 int idx_a = *idx & 0xffffffff;
1086 int idx_b = *idx >> 32;
1087
1088 if (WARN_ONCE(nid == MAX_NUMNODES, "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n"))
1089 nid = NUMA_NO_NODE;
1090
1091 if (*idx == (u64)ULLONG_MAX) {
1092 idx_a = type_a->cnt - 1;
1093 if (type_b != NULL)
1094 idx_b = type_b->cnt;
1095 else
1096 idx_b = 0;
1097 }
1098
1099 for (; idx_a >= 0; idx_a--) {
1100 struct memblock_region *m = &type_a->regions[idx_a];
1101
1102 phys_addr_t m_start = m->base;
1103 phys_addr_t m_end = m->base + m->size;
1104 int m_nid = memblock_get_region_node(m);
1105
1106 /* only memory regions are associated with nodes, check it */
1107 if (nid != NUMA_NO_NODE && nid != m_nid)
1108 continue;
1109
1110 /* skip hotpluggable memory regions if needed */
1111 if (movable_node_is_enabled() && memblock_is_hotpluggable(m))
1112 continue;
1113
1114 /* if we want mirror memory skip non-mirror memory regions */
1115 if ((flags & MEMBLOCK_MIRROR) && !memblock_is_mirror(m))
1116 continue;
1117
1118 /* skip nomap memory unless we were asked for it explicitly */
1119 if (!(flags & MEMBLOCK_NOMAP) && memblock_is_nomap(m))
1120 continue;
1121
1122 if (!type_b) {
1123 if (out_start)
1124 *out_start = m_start;
1125 if (out_end)
1126 *out_end = m_end;
1127 if (out_nid)
1128 *out_nid = m_nid;
1129 idx_a--;
1130 *idx = (u32)idx_a | (u64)idx_b << 32;
1131 return;
1132 }
1133
1134 /* scan areas before each reservation */
1135 for (; idx_b >= 0; idx_b--) {
1136 struct memblock_region *r;
1137 phys_addr_t r_start;
1138 phys_addr_t r_end;
1139
1140 r = &type_b->regions[idx_b];
1141 r_start = idx_b ? r[-1].base + r[-1].size : 0;
1142 r_end = idx_b < type_b->cnt ?
1143 r->base : PHYS_ADDR_MAX;
1144 /*
1145 * if idx_b advanced past idx_a,
1146 * break out to advance idx_a
1147 */
1148
1149 if (r_end <= m_start)
1150 break;
1151 /* if the two regions intersect, we're done */
1152 if (m_end > r_start) {
1153 if (out_start)
1154 *out_start = max(m_start, r_start);
1155 if (out_end)
1156 *out_end = min(m_end, r_end);
1157 if (out_nid)
1158 *out_nid = m_nid;
1159 if (m_start >= r_start)
1160 idx_a--;
1161 else
1162 idx_b--;
1163 *idx = (u32)idx_a | (u64)idx_b << 32;
1164 return;
1165 }
1166 }
1167 }
1168 /* signal end of iteration */
1169 *idx = ULLONG_MAX;
1170}
1171
1172#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
1173/*
1174 * Common iterator interface used to define for_each_mem_range().
1175 */
1176void __init_memblock __next_mem_pfn_range(int *idx, int nid,
1177 unsigned long *out_start_pfn,
1178 unsigned long *out_end_pfn, int *out_nid)
1179{
1180 struct memblock_type *type = &memblock.memory;
1181 struct memblock_region *r;
1182
1183 while (++*idx < type->cnt) {
1184 r = &type->regions[*idx];
1185
1186 if (PFN_UP(r->base) >= PFN_DOWN(r->base + r->size))
1187 continue;
1188 if (nid == MAX_NUMNODES || nid == r->nid)
1189 break;
1190 }
1191 if (*idx >= type->cnt) {
1192 *idx = -1;
1193 return;
1194 }
1195
1196 if (out_start_pfn)
1197 *out_start_pfn = PFN_UP(r->base);
1198 if (out_end_pfn)
1199 *out_end_pfn = PFN_DOWN(r->base + r->size);
1200 if (out_nid)
1201 *out_nid = r->nid;
1202}
1203
1204/**
1205 * memblock_set_node - set node ID on memblock regions
1206 * @base: base of area to set node ID for
1207 * @size: size of area to set node ID for
1208 * @type: memblock type to set node ID for
1209 * @nid: node ID to set
1210 *
1211 * Set the nid of memblock @type regions in [@base, @base + @size) to @nid.
1212 * Regions which cross the area boundaries are split as necessary.
1213 *
1214 * Return:
1215 * 0 on success, -errno on failure.
1216 */
1217int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size,
1218 struct memblock_type *type, int nid)
1219{
1220 int start_rgn, end_rgn;
1221 int i, ret;
1222
1223 ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn);
1224 if (ret)
1225 return ret;
1226
1227 for (i = start_rgn; i < end_rgn; i++)
1228 memblock_set_region_node(&type->regions[i], nid);
1229
1230 memblock_merge_regions(type);
1231 return 0;
1232}
1233#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
1234
1235static phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size,
1236 phys_addr_t align, phys_addr_t start,
1237 phys_addr_t end, int nid,
1238 enum memblock_flags flags)
1239{
1240 phys_addr_t found;
1241
1242 if (!align)
1243 align = SMP_CACHE_BYTES;
1244
1245 found = memblock_find_in_range_node(size, align, start, end, nid,
1246 flags);
1247 if (found && !memblock_reserve(found, size)) {
1248 /*
1249 * The min_count is set to 0 so that memblock allocations are
1250 * never reported as leaks.
1251 */
1252 kmemleak_alloc_phys(found, size, 0, 0);
1253 return found;
1254 }
1255 return 0;
1256}
1257
1258phys_addr_t __init memblock_alloc_range(phys_addr_t size, phys_addr_t align,
1259 phys_addr_t start, phys_addr_t end,
1260 enum memblock_flags flags)
1261{
1262 return memblock_alloc_range_nid(size, align, start, end, NUMA_NO_NODE,
1263 flags);
1264}
1265
1266phys_addr_t __init memblock_alloc_base_nid(phys_addr_t size,
1267 phys_addr_t align, phys_addr_t max_addr,
1268 int nid, enum memblock_flags flags)
1269{
1270 return memblock_alloc_range_nid(size, align, 0, max_addr, nid, flags);
1271}
1272
1273phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid)
1274{
1275 enum memblock_flags flags = choose_memblock_flags();
1276 phys_addr_t ret;
1277
1278again:
1279 ret = memblock_alloc_base_nid(size, align, MEMBLOCK_ALLOC_ACCESSIBLE,
1280 nid, flags);
1281
1282 if (!ret && (flags & MEMBLOCK_MIRROR)) {
1283 flags &= ~MEMBLOCK_MIRROR;
1284 goto again;
1285 }
1286 return ret;
1287}
1288
1289phys_addr_t __init __memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr)
1290{
1291 return memblock_alloc_base_nid(size, align, max_addr, NUMA_NO_NODE,
1292 MEMBLOCK_NONE);
1293}
1294
1295phys_addr_t __init memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr)
1296{
1297 phys_addr_t alloc;
1298
1299 alloc = __memblock_alloc_base(size, align, max_addr);
1300
1301 if (alloc == 0)
1302 panic("ERROR: Failed to allocate %pa bytes below %pa.\n",
1303 &size, &max_addr);
1304
1305 return alloc;
1306}
1307
1308phys_addr_t __init memblock_alloc(phys_addr_t size, phys_addr_t align)
1309{
1310 return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE);
1311}
1312
1313phys_addr_t __init memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid)
1314{
1315 phys_addr_t res = memblock_alloc_nid(size, align, nid);
1316
1317 if (res)
1318 return res;
1319 return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE);
1320}
1321
1322#if defined(CONFIG_NO_BOOTMEM)
1323/**
1324 * memblock_virt_alloc_internal - allocate boot memory block
1325 * @size: size of memory block to be allocated in bytes
1326 * @align: alignment of the region and block's size
1327 * @min_addr: the lower bound of the memory region to allocate (phys address)
1328 * @max_addr: the upper bound of the memory region to allocate (phys address)
1329 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
1330 *
1331 * The @min_addr limit is dropped if it can not be satisfied and the allocation
1332 * will fall back to memory below @min_addr. Also, allocation may fall back
1333 * to any node in the system if the specified node can not
1334 * hold the requested memory.
1335 *
1336 * The allocation is performed from memory region limited by
1337 * memblock.current_limit if @max_addr == %BOOTMEM_ALLOC_ACCESSIBLE.
1338 *
1339 * The memory block is aligned on %SMP_CACHE_BYTES if @align == 0.
1340 *
1341 * The phys address of allocated boot memory block is converted to virtual and
1342 * allocated memory is reset to 0.
1343 *
1344 * In addition, function sets the min_count to 0 using kmemleak_alloc for
1345 * allocated boot memory block, so that it is never reported as leaks.
1346 *
1347 * Return:
1348 * Virtual address of allocated memory block on success, NULL on failure.
1349 */
1350static void * __init memblock_virt_alloc_internal(
1351 phys_addr_t size, phys_addr_t align,
1352 phys_addr_t min_addr, phys_addr_t max_addr,
1353 int nid)
1354{
1355 phys_addr_t alloc;
1356 void *ptr;
1357 enum memblock_flags flags = choose_memblock_flags();
1358
1359 if (WARN_ONCE(nid == MAX_NUMNODES, "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n"))
1360 nid = NUMA_NO_NODE;
1361
1362 /*
1363 * Detect any accidental use of these APIs after slab is ready, as at
1364 * this moment memblock may be deinitialized already and its
1365 * internal data may be destroyed (after execution of free_all_bootmem)
1366 */
1367 if (WARN_ON_ONCE(slab_is_available()))
1368 return kzalloc_node(size, GFP_NOWAIT, nid);
1369
1370 if (!align)
1371 align = SMP_CACHE_BYTES;
1372
1373 if (max_addr > memblock.current_limit)
1374 max_addr = memblock.current_limit;
1375again:
1376 alloc = memblock_find_in_range_node(size, align, min_addr, max_addr,
1377 nid, flags);
1378 if (alloc && !memblock_reserve(alloc, size))
1379 goto done;
1380
1381 if (nid != NUMA_NO_NODE) {
1382 alloc = memblock_find_in_range_node(size, align, min_addr,
1383 max_addr, NUMA_NO_NODE,
1384 flags);
1385 if (alloc && !memblock_reserve(alloc, size))
1386 goto done;
1387 }
1388
1389 if (min_addr) {
1390 min_addr = 0;
1391 goto again;
1392 }
1393
1394 if (flags & MEMBLOCK_MIRROR) {
1395 flags &= ~MEMBLOCK_MIRROR;
1396 pr_warn("Could not allocate %pap bytes of mirrored memory\n",
1397 &size);
1398 goto again;
1399 }
1400
1401 return NULL;
1402done:
1403 ptr = phys_to_virt(alloc);
1404
1405 /* Skip kmemleak for kasan_init() due to high volume. */
1406 if (max_addr != MEMBLOCK_ALLOC_KASAN)
1407 /*
1408 * The min_count is set to 0 so that bootmem allocated
1409 * blocks are never reported as leaks. This is because many
1410 * of these blocks are only referred via the physical
1411 * address which is not looked up by kmemleak.
1412 */
1413 kmemleak_alloc(ptr, size, 0, 0);
1414
1415 return ptr;
1416}
1417
1418/**
1419 * memblock_virt_alloc_try_nid_raw - allocate boot memory block without zeroing
1420 * memory and without panicking
1421 * @size: size of memory block to be allocated in bytes
1422 * @align: alignment of the region and block's size
1423 * @min_addr: the lower bound of the memory region from where the allocation
1424 * is preferred (phys address)
1425 * @max_addr: the upper bound of the memory region from where the allocation
1426 * is preferred (phys address), or %BOOTMEM_ALLOC_ACCESSIBLE to
1427 * allocate only from memory limited by memblock.current_limit value
1428 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
1429 *
1430 * Public function, provides additional debug information (including caller
1431 * info), if enabled. Does not zero allocated memory, does not panic if request
1432 * cannot be satisfied.
1433 *
1434 * Return:
1435 * Virtual address of allocated memory block on success, NULL on failure.
1436 */
1437void * __init memblock_virt_alloc_try_nid_raw(
1438 phys_addr_t size, phys_addr_t align,
1439 phys_addr_t min_addr, phys_addr_t max_addr,
1440 int nid)
1441{
1442 void *ptr;
1443
1444 memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pF\n",
1445 __func__, (u64)size, (u64)align, nid, &min_addr,
1446 &max_addr, (void *)_RET_IP_);
1447
1448 ptr = memblock_virt_alloc_internal(size, align,
1449 min_addr, max_addr, nid);
1450#ifdef CONFIG_DEBUG_VM
1451 if (ptr && size > 0)
1452 memset(ptr, PAGE_POISON_PATTERN, size);
1453#endif
1454 return ptr;
1455}
1456
1457/**
1458 * memblock_virt_alloc_try_nid_nopanic - allocate boot memory block
1459 * @size: size of memory block to be allocated in bytes
1460 * @align: alignment of the region and block's size
1461 * @min_addr: the lower bound of the memory region from where the allocation
1462 * is preferred (phys address)
1463 * @max_addr: the upper bound of the memory region from where the allocation
1464 * is preferred (phys address), or %BOOTMEM_ALLOC_ACCESSIBLE to
1465 * allocate only from memory limited by memblock.current_limit value
1466 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
1467 *
1468 * Public function, provides additional debug information (including caller
1469 * info), if enabled. This function zeroes the allocated memory.
1470 *
1471 * Return:
1472 * Virtual address of allocated memory block on success, NULL on failure.
1473 */
1474void * __init memblock_virt_alloc_try_nid_nopanic(
1475 phys_addr_t size, phys_addr_t align,
1476 phys_addr_t min_addr, phys_addr_t max_addr,
1477 int nid)
1478{
1479 void *ptr;
1480
1481 memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pF\n",
1482 __func__, (u64)size, (u64)align, nid, &min_addr,
1483 &max_addr, (void *)_RET_IP_);
1484
1485 ptr = memblock_virt_alloc_internal(size, align,
1486 min_addr, max_addr, nid);
1487 if (ptr)
1488 memset(ptr, 0, size);
1489 return ptr;
1490}
1491
1492/**
1493 * memblock_virt_alloc_try_nid - allocate boot memory block with panicking
1494 * @size: size of memory block to be allocated in bytes
1495 * @align: alignment of the region and block's size
1496 * @min_addr: the lower bound of the memory region from where the allocation
1497 * is preferred (phys address)
1498 * @max_addr: the upper bound of the memory region from where the allocation
1499 * is preferred (phys address), or %BOOTMEM_ALLOC_ACCESSIBLE to
1500 * allocate only from memory limited by memblock.current_limit value
1501 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
1502 *
1503 * Public panicking version of memblock_virt_alloc_try_nid_nopanic()
1504 * which provides debug information (including caller info), if enabled,
1505 * and panics if the request can not be satisfied.
1506 *
1507 * Return:
1508 * Virtual address of allocated memory block on success, NULL on failure.
1509 */
1510void * __init memblock_virt_alloc_try_nid(
1511 phys_addr_t size, phys_addr_t align,
1512 phys_addr_t min_addr, phys_addr_t max_addr,
1513 int nid)
1514{
1515 void *ptr;
1516
1517 memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pF\n",
1518 __func__, (u64)size, (u64)align, nid, &min_addr,
1519 &max_addr, (void *)_RET_IP_);
1520 ptr = memblock_virt_alloc_internal(size, align,
1521 min_addr, max_addr, nid);
1522 if (ptr) {
1523 memset(ptr, 0, size);
1524 return ptr;
1525 }
1526
1527 panic("%s: Failed to allocate %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa\n",
1528 __func__, (u64)size, (u64)align, nid, &min_addr, &max_addr);
1529 return NULL;
1530}
1531#endif
1532
1533/**
1534 * __memblock_free_early - free boot memory block
1535 * @base: phys starting address of the boot memory block
1536 * @size: size of the boot memory block in bytes
1537 *
1538 * Free boot memory block previously allocated by memblock_virt_alloc_xx() API.
1539 * The freeing memory will not be released to the buddy allocator.
1540 */
1541void __init __memblock_free_early(phys_addr_t base, phys_addr_t size)
1542{
1543 memblock_free(base, size);
1544}
1545
1546/**
1547 * __memblock_free_late - free bootmem block pages directly to buddy allocator
1548 * @base: phys starting address of the boot memory block
1549 * @size: size of the boot memory block in bytes
1550 *
1551 * This is only useful when the bootmem allocator has already been torn
1552 * down, but we are still initializing the system. Pages are released directly
1553 * to the buddy allocator, no bootmem metadata is updated because it is gone.
1554 */
1555void __init __memblock_free_late(phys_addr_t base, phys_addr_t size)
1556{
1557 phys_addr_t cursor, end;
1558
1559 end = base + size - 1;
1560 memblock_dbg("%s: [%pa-%pa] %pF\n",
1561 __func__, &base, &end, (void *)_RET_IP_);
1562 kmemleak_free_part_phys(base, size);
1563 cursor = PFN_UP(base);
1564 end = PFN_DOWN(base + size);
1565
1566 for (; cursor < end; cursor++) {
1567 __free_pages_bootmem(pfn_to_page(cursor), cursor, 0);
1568 totalram_pages++;
1569 }
1570}
1571
1572/*
1573 * Remaining API functions
1574 */
1575
1576phys_addr_t __init_memblock memblock_phys_mem_size(void)
1577{
1578 return memblock.memory.total_size;
1579}
1580
1581phys_addr_t __init_memblock memblock_reserved_size(void)
1582{
1583 return memblock.reserved.total_size;
1584}
1585
1586phys_addr_t __init memblock_mem_size(unsigned long limit_pfn)
1587{
1588 unsigned long pages = 0;
1589 struct memblock_region *r;
1590 unsigned long start_pfn, end_pfn;
1591
1592 for_each_memblock(memory, r) {
1593 start_pfn = memblock_region_memory_base_pfn(r);
1594 end_pfn = memblock_region_memory_end_pfn(r);
1595 start_pfn = min_t(unsigned long, start_pfn, limit_pfn);
1596 end_pfn = min_t(unsigned long, end_pfn, limit_pfn);
1597 pages += end_pfn - start_pfn;
1598 }
1599
1600 return PFN_PHYS(pages);
1601}
1602
1603/* lowest address */
1604phys_addr_t __init_memblock memblock_start_of_DRAM(void)
1605{
1606 return memblock.memory.regions[0].base;
1607}
1608
1609phys_addr_t __init_memblock memblock_end_of_DRAM(void)
1610{
1611 int idx = memblock.memory.cnt - 1;
1612
1613 return (memblock.memory.regions[idx].base + memblock.memory.regions[idx].size);
1614}
1615
1616static phys_addr_t __init_memblock __find_max_addr(phys_addr_t limit)
1617{
1618 phys_addr_t max_addr = PHYS_ADDR_MAX;
1619 struct memblock_region *r;
1620
1621 /*
1622 * translate the memory @limit size into the max address within one of
1623 * the memory memblock regions, if the @limit exceeds the total size
1624 * of those regions, max_addr will keep original value PHYS_ADDR_MAX
1625 */
1626 for_each_memblock(memory, r) {
1627 if (limit <= r->size) {
1628 max_addr = r->base + limit;
1629 break;
1630 }
1631 limit -= r->size;
1632 }
1633
1634 return max_addr;
1635}
1636
1637void __init memblock_enforce_memory_limit(phys_addr_t limit)
1638{
1639 phys_addr_t max_addr = PHYS_ADDR_MAX;
1640
1641 if (!limit)
1642 return;
1643
1644 max_addr = __find_max_addr(limit);
1645
1646 /* @limit exceeds the total size of the memory, do nothing */
1647 if (max_addr == PHYS_ADDR_MAX)
1648 return;
1649
1650 /* truncate both memory and reserved regions */
1651 memblock_remove_range(&memblock.memory, max_addr,
1652 PHYS_ADDR_MAX);
1653 memblock_remove_range(&memblock.reserved, max_addr,
1654 PHYS_ADDR_MAX);
1655}
1656
1657void __init memblock_cap_memory_range(phys_addr_t base, phys_addr_t size)
1658{
1659 int start_rgn, end_rgn;
1660 int i, ret;
1661
1662 if (!size)
1663 return;
1664
1665 ret = memblock_isolate_range(&memblock.memory, base, size,
1666 &start_rgn, &end_rgn);
1667 if (ret)
1668 return;
1669
1670 /* remove all the MAP regions */
1671 for (i = memblock.memory.cnt - 1; i >= end_rgn; i--)
1672 if (!memblock_is_nomap(&memblock.memory.regions[i]))
1673 memblock_remove_region(&memblock.memory, i);
1674
1675 for (i = start_rgn - 1; i >= 0; i--)
1676 if (!memblock_is_nomap(&memblock.memory.regions[i]))
1677 memblock_remove_region(&memblock.memory, i);
1678
1679 /* truncate the reserved regions */
1680 memblock_remove_range(&memblock.reserved, 0, base);
1681 memblock_remove_range(&memblock.reserved,
1682 base + size, PHYS_ADDR_MAX);
1683}
1684
1685void __init memblock_mem_limit_remove_map(phys_addr_t limit)
1686{
1687 phys_addr_t max_addr;
1688
1689 if (!limit)
1690 return;
1691
1692 max_addr = __find_max_addr(limit);
1693
1694 /* @limit exceeds the total size of the memory, do nothing */
1695 if (max_addr == PHYS_ADDR_MAX)
1696 return;
1697
1698 memblock_cap_memory_range(0, max_addr);
1699}
1700
1701static int __init_memblock memblock_search(struct memblock_type *type, phys_addr_t addr)
1702{
1703 unsigned int left = 0, right = type->cnt;
1704
1705 do {
1706 unsigned int mid = (right + left) / 2;
1707
1708 if (addr < type->regions[mid].base)
1709 right = mid;
1710 else if (addr >= (type->regions[mid].base +
1711 type->regions[mid].size))
1712 left = mid + 1;
1713 else
1714 return mid;
1715 } while (left < right);
1716 return -1;
1717}
1718
1719bool __init memblock_is_reserved(phys_addr_t addr)
1720{
1721 return memblock_search(&memblock.reserved, addr) != -1;
1722}
1723
1724bool __init_memblock memblock_is_memory(phys_addr_t addr)
1725{
1726 return memblock_search(&memblock.memory, addr) != -1;
1727}
1728
1729bool __init_memblock memblock_is_map_memory(phys_addr_t addr)
1730{
1731 int i = memblock_search(&memblock.memory, addr);
1732
1733 if (i == -1)
1734 return false;
1735 return !memblock_is_nomap(&memblock.memory.regions[i]);
1736}
1737
1738#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
1739int __init_memblock memblock_search_pfn_nid(unsigned long pfn,
1740 unsigned long *start_pfn, unsigned long *end_pfn)
1741{
1742 struct memblock_type *type = &memblock.memory;
1743 int mid = memblock_search(type, PFN_PHYS(pfn));
1744
1745 if (mid == -1)
1746 return -1;
1747
1748 *start_pfn = PFN_DOWN(type->regions[mid].base);
1749 *end_pfn = PFN_DOWN(type->regions[mid].base + type->regions[mid].size);
1750
1751 return type->regions[mid].nid;
1752}
1753#endif
1754
1755/**
1756 * memblock_is_region_memory - check if a region is a subset of memory
1757 * @base: base of region to check
1758 * @size: size of region to check
1759 *
1760 * Check if the region [@base, @base + @size) is a subset of a memory block.
1761 *
1762 * Return:
1763 * 0 if false, non-zero if true
1764 */
1765bool __init_memblock memblock_is_region_memory(phys_addr_t base, phys_addr_t size)
1766{
1767 int idx = memblock_search(&memblock.memory, base);
1768 phys_addr_t end = base + memblock_cap_size(base, &size);
1769
1770 if (idx == -1)
1771 return false;
1772 return (memblock.memory.regions[idx].base +
1773 memblock.memory.regions[idx].size) >= end;
1774}
1775
1776/**
1777 * memblock_is_region_reserved - check if a region intersects reserved memory
1778 * @base: base of region to check
1779 * @size: size of region to check
1780 *
1781 * Check if the region [@base, @base + @size) intersects a reserved
1782 * memory block.
1783 *
1784 * Return:
1785 * True if they intersect, false if not.
1786 */
1787bool __init_memblock memblock_is_region_reserved(phys_addr_t base, phys_addr_t size)
1788{
1789 memblock_cap_size(base, &size);
1790 return memblock_overlaps_region(&memblock.reserved, base, size);
1791}
1792
1793void __init_memblock memblock_trim_memory(phys_addr_t align)
1794{
1795 phys_addr_t start, end, orig_start, orig_end;
1796 struct memblock_region *r;
1797
1798 for_each_memblock(memory, r) {
1799 orig_start = r->base;
1800 orig_end = r->base + r->size;
1801 start = round_up(orig_start, align);
1802 end = round_down(orig_end, align);
1803
1804 if (start == orig_start && end == orig_end)
1805 continue;
1806
1807 if (start < end) {
1808 r->base = start;
1809 r->size = end - start;
1810 } else {
1811 memblock_remove_region(&memblock.memory,
1812 r - memblock.memory.regions);
1813 r--;
1814 }
1815 }
1816}
1817
1818void __init_memblock memblock_set_current_limit(phys_addr_t limit)
1819{
1820 memblock.current_limit = limit;
1821}
1822
1823phys_addr_t __init_memblock memblock_get_current_limit(void)
1824{
1825 return memblock.current_limit;
1826}
1827
1828static void __init_memblock memblock_dump(struct memblock_type *type)
1829{
1830 phys_addr_t base, end, size;
1831 enum memblock_flags flags;
1832 int idx;
1833 struct memblock_region *rgn;
1834
1835 pr_info(" %s.cnt = 0x%lx\n", type->name, type->cnt);
1836
1837 for_each_memblock_type(idx, type, rgn) {
1838 char nid_buf[32] = "";
1839
1840 base = rgn->base;
1841 size = rgn->size;
1842 end = base + size - 1;
1843 flags = rgn->flags;
1844#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
1845 if (memblock_get_region_node(rgn) != MAX_NUMNODES)
1846 snprintf(nid_buf, sizeof(nid_buf), " on node %d",
1847 memblock_get_region_node(rgn));
1848#endif
1849 pr_info(" %s[%#x]\t[%pa-%pa], %pa bytes%s flags: %#x\n",
1850 type->name, idx, &base, &end, &size, nid_buf, flags);
1851 }
1852}
1853
1854void __init_memblock __memblock_dump_all(void)
1855{
1856 pr_info("MEMBLOCK configuration:\n");
1857 pr_info(" memory size = %pa reserved size = %pa\n",
1858 &memblock.memory.total_size,
1859 &memblock.reserved.total_size);
1860
1861 memblock_dump(&memblock.memory);
1862 memblock_dump(&memblock.reserved);
1863#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
1864 memblock_dump(&memblock.physmem);
1865#endif
1866}
1867
1868void __init memblock_allow_resize(void)
1869{
1870 memblock_can_resize = 1;
1871}
1872
1873static int __init early_memblock(char *p)
1874{
1875 if (p && strstr(p, "debug"))
1876 memblock_debug = 1;
1877 return 0;
1878}
1879early_param("memblock", early_memblock);
1880
1881#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_ARCH_DISCARD_MEMBLOCK)
1882
1883static int memblock_debug_show(struct seq_file *m, void *private)
1884{
1885 struct memblock_type *type = m->private;
1886 struct memblock_region *reg;
1887 int i;
1888 phys_addr_t end;
1889
1890 for (i = 0; i < type->cnt; i++) {
1891 reg = &type->regions[i];
1892 end = reg->base + reg->size - 1;
1893
1894 seq_printf(m, "%4d: ", i);
1895 seq_printf(m, "%pa..%pa\n", &reg->base, &end);
1896 }
1897 return 0;
1898}
1899DEFINE_SHOW_ATTRIBUTE(memblock_debug);
1900
1901static int __init memblock_init_debugfs(void)
1902{
1903 struct dentry *root = debugfs_create_dir("memblock", NULL);
1904 if (!root)
1905 return -ENXIO;
1906 debugfs_create_file("memory", 0444, root,
1907 &memblock.memory, &memblock_debug_fops);
1908 debugfs_create_file("reserved", 0444, root,
1909 &memblock.reserved, &memblock_debug_fops);
1910#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
1911 debugfs_create_file("physmem", 0444, root,
1912 &memblock.physmem, &memblock_debug_fops);
1913#endif
1914
1915 return 0;
1916}
1917__initcall(memblock_init_debugfs);
1918
1919#endif /* CONFIG_DEBUG_FS */