Blame - ap/os/linux/linux-3.4.x/mm/vmalloc.c - T106_DC

blob: 42eee4cd48916987500c969600b24c0924bd02e0 [file] [log] [blame]

lh	9ed821d	2023-04-07 01:36:19 -0700	[diff] [blame]	1	/*
				2	* linux/mm/vmalloc.c
				3	*
				4	* Copyright (C) 1993 Linus Torvalds
				5	* Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
				6	* SMP-safe vmalloc/vfree/ioremap, Tigran Aivazian <tigran@veritas.com>, May 2000
				7	* Major rework to support vmap/vunmap, Christoph Hellwig, SGI, August 2002
				8	* Numa awareness, Christoph Lameter, SGI, June 2005
				9	*/
				10
				11	#include <linux/vmalloc.h>
				12	#include <linux/mm.h>
				13	#include <linux/module.h>
				14	#include <linux/highmem.h>
				15	#include <linux/sched.h>
				16	#include <linux/slab.h>
				17	#include <linux/spinlock.h>
				18	#include <linux/interrupt.h>
				19	#include <linux/proc_fs.h>
				20	#include <linux/seq_file.h>
				21	#include <linux/debugobjects.h>
				22	#include <linux/kallsyms.h>
				23	#include <linux/list.h>
				24	#include <linux/rbtree.h>
				25	#include <linux/radix-tree.h>
				26	#include <linux/rcupdate.h>
				27	#include <linux/pfn.h>
				28	#include <linux/kmemleak.h>
				29	#include <linux/atomic.h>
				30	#include <asm/uaccess.h>
				31	#include <asm/tlbflush.h>
				32	#include <asm/shmparam.h>
				33
				34	/* Page table manipulation functions */
				35
				36	static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end)
				37	{
				38	pte_t *pte;
				39
				40	pte = pte_offset_kernel(pmd, addr);
				41	do {
				42	pte_t ptent = ptep_get_and_clear(&init_mm, addr, pte);
				43	WARN_ON(!pte_none(ptent) && !pte_present(ptent));
				44	} while (pte++, addr += PAGE_SIZE, addr != end);
				45	}
				46
				47	static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end)
				48	{
				49	pmd_t *pmd;
				50	unsigned long next;
				51
				52	pmd = pmd_offset(pud, addr);
				53	do {
				54	next = pmd_addr_end(addr, end);
				55	if (pmd_none_or_clear_bad(pmd))
				56	continue;
				57	vunmap_pte_range(pmd, addr, next);
				58	} while (pmd++, addr = next, addr != end);
				59	}
				60
				61	static void vunmap_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end)
				62	{
				63	pud_t *pud;
				64	unsigned long next;
				65
				66	pud = pud_offset(pgd, addr);
				67	do {
				68	next = pud_addr_end(addr, end);
				69	if (pud_none_or_clear_bad(pud))
				70	continue;
				71	vunmap_pmd_range(pud, addr, next);
				72	} while (pud++, addr = next, addr != end);
				73	}
				74
				75	static void vunmap_page_range(unsigned long addr, unsigned long end)
				76	{
				77	pgd_t *pgd;
				78	unsigned long next;
				79
				80	BUG_ON(addr >= end);
				81	pgd = pgd_offset_k(addr);
				82	do {
				83	next = pgd_addr_end(addr, end);
				84	if (pgd_none_or_clear_bad(pgd))
				85	continue;
				86	vunmap_pud_range(pgd, addr, next);
				87	} while (pgd++, addr = next, addr != end);
				88	}
				89
				90	static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
				91	unsigned long end, pgprot_t prot, struct page *pages, int nr)
				92	{
				93	pte_t *pte;
				94
				95	/*
				96	* nr is a running index into the array which helps higher level
				97	* callers keep track of where we're up to.
				98	*/
				99
				100	pte = pte_alloc_kernel(pmd, addr);
				101	if (!pte)
				102	return -ENOMEM;
				103	do {
				104	struct page page = pages[nr];
				105
				106	if (WARN_ON(!pte_none(*pte)))
				107	return -EBUSY;
				108	if (WARN_ON(!page))
				109	return -ENOMEM;
				110	set_pte_at(&init_mm, addr, pte, mk_pte(page, prot));
				111	(*nr)++;
				112	} while (pte++, addr += PAGE_SIZE, addr != end);
				113	return 0;
				114	}
				115
				116	static int vmap_pmd_range(pud_t *pud, unsigned long addr,
				117	unsigned long end, pgprot_t prot, struct page *pages, int nr)
				118	{
				119	pmd_t *pmd;
				120	unsigned long next;
				121
				122	pmd = pmd_alloc(&init_mm, pud, addr);
				123	if (!pmd)
				124	return -ENOMEM;
				125	do {
				126	next = pmd_addr_end(addr, end);
				127	if (vmap_pte_range(pmd, addr, next, prot, pages, nr))
				128	return -ENOMEM;
				129	} while (pmd++, addr = next, addr != end);
				130	return 0;
				131	}
				132
				133	static int vmap_pud_range(pgd_t *pgd, unsigned long addr,
				134	unsigned long end, pgprot_t prot, struct page *pages, int nr)
				135	{
				136	pud_t *pud;
				137	unsigned long next;
				138
				139	pud = pud_alloc(&init_mm, pgd, addr);
				140	if (!pud)
				141	return -ENOMEM;
				142	do {
				143	next = pud_addr_end(addr, end);
				144	if (vmap_pmd_range(pud, addr, next, prot, pages, nr))
				145	return -ENOMEM;
				146	} while (pud++, addr = next, addr != end);
				147	return 0;
				148	}
				149
				150	/*
				151	* Set up page tables in kva (addr, end). The ptes shall have prot "prot", and
				152	* will have pfns corresponding to the "pages" array.
				153	*
				154	* Ie. pte at addr+N*PAGE_SIZE shall point to pfn corresponding to pages[N]
				155	*/
				156	static int vmap_page_range_noflush(unsigned long start, unsigned long end,
				157	pgprot_t prot, struct page **pages)
				158	{
				159	pgd_t *pgd;
				160	unsigned long next;
				161	unsigned long addr = start;
				162	int err = 0;
				163	int nr = 0;
				164
				165	BUG_ON(addr >= end);
				166	pgd = pgd_offset_k(addr);
				167	do {
				168	next = pgd_addr_end(addr, end);
				169	err = vmap_pud_range(pgd, addr, next, prot, pages, &nr);
				170	if (err)
				171	return err;
				172	} while (pgd++, addr = next, addr != end);
				173
				174	return nr;
				175	}
				176
				177	static int vmap_page_range(unsigned long start, unsigned long end,
				178	pgprot_t prot, struct page **pages)
				179	{
				180	int ret;
				181
				182	ret = vmap_page_range_noflush(start, end, prot, pages);
				183	flush_cache_vmap(start, end);
				184	return ret;
				185	}
				186
				187	int is_vmalloc_or_module_addr(const void *x)
				188	{
				189	/*
				190	* ARM, x86-64 and sparc64 put modules in a special place,
				191	* and fall back on vmalloc() if that fails. Others
				192	* just put it in the vmalloc space.
				193	*/
				194	#if defined(CONFIG_MODULES) && defined(MODULES_VADDR)
				195	unsigned long addr = (unsigned long)x;
				196	if (addr >= MODULES_VADDR && addr < MODULES_END)
				197	return 1;
				198	#endif
				199	return is_vmalloc_addr(x);
				200	}
				201
				202	/*
				203	* Walk a vmap address to the struct page it maps.
				204	*/
				205	struct page vmalloc_to_page(const void vmalloc_addr)
				206	{
				207	unsigned long addr = (unsigned long) vmalloc_addr;
				208	struct page *page = NULL;
				209	pgd_t *pgd = pgd_offset_k(addr);
				210
				211	/*
				212	* XXX we might need to change this if we add VIRTUAL_BUG_ON for
				213	* architectures that do not vmalloc module space
				214	*/
				215	VIRTUAL_BUG_ON(!is_vmalloc_or_module_addr(vmalloc_addr));
				216
				217	if (!pgd_none(*pgd)) {
				218	pud_t *pud = pud_offset(pgd, addr);
				219	if (!pud_none(*pud)) {
				220	pmd_t *pmd = pmd_offset(pud, addr);
				221	if (!pmd_none(*pmd)) {
				222	pte_t *ptep, pte;
				223
				224	ptep = pte_offset_map(pmd, addr);
				225	pte = *ptep;
				226	if (pte_present(pte))
				227	page = pte_page(pte);
				228	pte_unmap(ptep);
				229	}
				230	}
				231	}
				232	return page;
				233	}
				234	EXPORT_SYMBOL(vmalloc_to_page);
				235
				236	/*
				237	* Map a vmalloc()-space virtual address to the physical page frame number.
				238	*/
				239	unsigned long vmalloc_to_pfn(const void *vmalloc_addr)
				240	{
				241	return page_to_pfn(vmalloc_to_page(vmalloc_addr));
				242	}
				243	EXPORT_SYMBOL(vmalloc_to_pfn);
				244
				245
				246	/* Global kva allocator */
				247
				248	#define VM_LAZY_FREE 0x01
				249	#define VM_LAZY_FREEING 0x02
				250	#define VM_VM_AREA 0x04
				251
				252	struct vmap_area {
				253	unsigned long va_start;
				254	unsigned long va_end;
				255	unsigned long flags;
				256	struct rb_node rb_node; /* address sorted rbtree */
				257	struct list_head list; /* address sorted list */
				258	struct list_head purge_list; /* "lazy purge" list */
				259	struct vm_struct *vm;
				260	struct rcu_head rcu_head;
				261	};
				262
				263	static DEFINE_SPINLOCK(vmap_area_lock);
				264	static LIST_HEAD(vmap_area_list);
				265	static struct rb_root vmap_area_root = RB_ROOT;
				266
				267	/* The vmap cache globals are protected by vmap_area_lock */
				268	static struct rb_node *free_vmap_cache;
				269	static unsigned long cached_hole_size;
				270	static unsigned long cached_vstart;
				271	static unsigned long cached_align;
				272
				273	static unsigned long vmap_area_pcpu_hole;
				274
				275	static struct vmap_area *__find_vmap_area(unsigned long addr)
				276	{
				277	struct rb_node *n = vmap_area_root.rb_node;
				278
				279	while (n) {
				280	struct vmap_area *va;
				281
				282	va = rb_entry(n, struct vmap_area, rb_node);
				283	if (addr < va->va_start)
				284	n = n->rb_left;
				285	else if (addr > va->va_start)
				286	n = n->rb_right;
				287	else
				288	return va;
				289	}
				290
				291	return NULL;
				292	}
				293
				294	static void __insert_vmap_area(struct vmap_area *va)
				295	{
				296	struct rb_node **p = &vmap_area_root.rb_node;
				297	struct rb_node *parent = NULL;
				298	struct rb_node *tmp;
				299
				300	while (*p) {
				301	struct vmap_area *tmp_va;
				302
				303	parent = *p;
				304	tmp_va = rb_entry(parent, struct vmap_area, rb_node);
				305	if (va->va_start < tmp_va->va_end)
				306	p = &(*p)->rb_left;
				307	else if (va->va_end > tmp_va->va_start)
				308	p = &(*p)->rb_right;
				309	else
				310	BUG();
				311	}
				312
				313	rb_link_node(&va->rb_node, parent, p);
				314	rb_insert_color(&va->rb_node, &vmap_area_root);
				315
				316	/* address-sort this list so it is usable like the vmlist */
				317	tmp = rb_prev(&va->rb_node);
				318	if (tmp) {
				319	struct vmap_area *prev;
				320	prev = rb_entry(tmp, struct vmap_area, rb_node);
				321	list_add_rcu(&va->list, &prev->list);
				322	} else
				323	list_add_rcu(&va->list, &vmap_area_list);
				324	}
				325
				326	static void purge_vmap_area_lazy(void);
				327
				328	/*
				329	* Allocate a region of KVA of the specified size and alignment, within the
				330	* vstart and vend.
				331	*/
				332	static struct vmap_area *alloc_vmap_area(unsigned long size,
				333	unsigned long align,
				334	unsigned long vstart, unsigned long vend,
				335	int node, gfp_t gfp_mask)
				336	{
				337	struct vmap_area *va;
				338	struct rb_node *n;
				339	unsigned long addr;
				340	int purged = 0;
				341	struct vmap_area *first;
				342
				343	BUG_ON(!size);
				344	BUG_ON(size & ~PAGE_MASK);
				345	BUG_ON(!is_power_of_2(align));
				346
				347	va = kmalloc_node(sizeof(struct vmap_area),
				348	gfp_mask & GFP_RECLAIM_MASK, node);
				349	if (unlikely(!va))
				350	return ERR_PTR(-ENOMEM);
				351
				352	/*
				353	* Only scan the relevant parts containing pointers to other objects
				354	* to avoid false negatives.
				355	*/
				356	kmemleak_scan_area(&va->rb_node, SIZE_MAX, gfp_mask & GFP_RECLAIM_MASK);
				357
				358	retry:
				359	spin_lock(&vmap_area_lock);
				360	/*
				361	* Invalidate cache if we have more permissive parameters.
				362	* cached_hole_size notes the largest hole noticed _below_
				363	* the vmap_area cached in free_vmap_cache: if size fits
				364	* into that hole, we want to scan from vstart to reuse
				365	* the hole instead of allocating above free_vmap_cache.
				366	* Note that __free_vmap_area may update free_vmap_cache
				367	* without updating cached_hole_size or cached_align.
				368	*/
				369	if (!free_vmap_cache \|\|
				370	size < cached_hole_size \|\|
				371	vstart < cached_vstart \|\|
				372	align < cached_align) {
				373	nocache:
				374	cached_hole_size = 0;
				375	free_vmap_cache = NULL;
				376	}
				377	/* record if we encounter less permissive parameters */
				378	cached_vstart = vstart;
				379	cached_align = align;
				380
				381	/* find starting point for our search */
				382	if (free_vmap_cache) {
				383	first = rb_entry(free_vmap_cache, struct vmap_area, rb_node);
				384	addr = ALIGN(first->va_end, align);
				385	if (addr < vstart)
				386	goto nocache;
				387	if (addr + size - 1 < addr)
				388	goto overflow;
				389
				390	} else {
				391	addr = ALIGN(vstart, align);
				392	if (addr + size - 1 < addr)
				393	goto overflow;
				394
				395	n = vmap_area_root.rb_node;
				396	first = NULL;
				397
				398	while (n) {
				399	struct vmap_area *tmp;
				400	tmp = rb_entry(n, struct vmap_area, rb_node);
				401	if (tmp->va_end >= addr) {
				402	first = tmp;
				403	if (tmp->va_start <= addr)
				404	break;
				405	n = n->rb_left;
				406	} else
				407	n = n->rb_right;
				408	}
				409
				410	if (!first)
				411	goto found;
				412	}
				413
				414	/* from the starting point, walk areas until a suitable hole is found */
				415	while (addr + size > first->va_start && addr + size <= vend) {
				416	if (addr + cached_hole_size < first->va_start)
				417	cached_hole_size = first->va_start - addr;
				418	addr = ALIGN(first->va_end, align);
				419	if (addr + size - 1 < addr)
				420	goto overflow;
				421
				422	n = rb_next(&first->rb_node);
				423	if (n)
				424	first = rb_entry(n, struct vmap_area, rb_node);
				425	else
				426	goto found;
				427	}
				428
				429	found:
				430	if (addr + size > vend)
				431	goto overflow;
				432
				433	va->va_start = addr;
				434	va->va_end = addr + size;
				435	va->flags = 0;
				436	__insert_vmap_area(va);
				437	free_vmap_cache = &va->rb_node;
				438	spin_unlock(&vmap_area_lock);
				439
				440	BUG_ON(va->va_start & (align-1));
				441	BUG_ON(va->va_start < vstart);
				442	BUG_ON(va->va_end > vend);
				443
				444	return va;
				445
				446	overflow:
				447	spin_unlock(&vmap_area_lock);
				448	if (!purged) {
				449	purge_vmap_area_lazy();
				450	purged = 1;
				451	goto retry;
				452	}
				453	if (printk_ratelimit())
				454	printk(KERN_WARNING
				455	"vmap allocation for size %lu failed: "
				456	"use vmalloc=<size> to increase size.\n", size);
				457	kfree(va);
				458	return ERR_PTR(-EBUSY);
				459	}
				460
				461	static void __free_vmap_area(struct vmap_area *va)
				462	{
				463	BUG_ON(RB_EMPTY_NODE(&va->rb_node));
				464
				465	if (free_vmap_cache) {
				466	if (va->va_end < cached_vstart) {
				467	free_vmap_cache = NULL;
				468	} else {
				469	struct vmap_area *cache;
				470	cache = rb_entry(free_vmap_cache, struct vmap_area, rb_node);
				471	if (va->va_start <= cache->va_start) {
				472	free_vmap_cache = rb_prev(&va->rb_node);
				473	/*
				474	* We don't try to update cached_hole_size or
				475	* cached_align, but it won't go very wrong.
				476	*/
				477	}
				478	}
				479	}
				480	rb_erase(&va->rb_node, &vmap_area_root);
				481	RB_CLEAR_NODE(&va->rb_node);
				482	list_del_rcu(&va->list);
				483
				484	/*
				485	* Track the highest possible candidate for pcpu area
				486	* allocation. Areas outside of vmalloc area can be returned
				487	* here too, consider only end addresses which fall inside
				488	* vmalloc area proper.
				489	*/
				490	if (va->va_end > VMALLOC_START && va->va_end <= VMALLOC_END)
				491	vmap_area_pcpu_hole = max(vmap_area_pcpu_hole, va->va_end);
				492
				493	kfree_rcu(va, rcu_head);
				494	}
				495
				496	/*
				497	* Free a region of KVA allocated by alloc_vmap_area
				498	*/
				499	static void free_vmap_area(struct vmap_area *va)
				500	{
				501	spin_lock(&vmap_area_lock);
				502	__free_vmap_area(va);
				503	spin_unlock(&vmap_area_lock);
				504	}
				505
				506	/*
				507	* Clear the pagetable entries of a given vmap_area
				508	*/
				509	static void unmap_vmap_area(struct vmap_area *va)
				510	{
				511	vunmap_page_range(va->va_start, va->va_end);
				512	}
				513
				514	static void vmap_debug_free_range(unsigned long start, unsigned long end)
				515	{
				516	/*
				517	* Unmap page tables and force a TLB flush immediately if
				518	* CONFIG_DEBUG_PAGEALLOC is set. This catches use after free
				519	* bugs similarly to those in linear kernel virtual address
				520	* space after a page has been freed.
				521	*
				522	* All the lazy freeing logic is still retained, in order to
				523	* minimise intrusiveness of this debugging feature.
				524	*
				525	* This is going to be slow (linear kernel virtual address
				526	* debugging doesn't do a broadcast TLB flush so it is a lot
				527	* faster).
				528	*/
				529	#ifdef CONFIG_DEBUG_PAGEALLOC
				530	vunmap_page_range(start, end);
				531	flush_tlb_kernel_range(start, end);
				532	#endif
				533	}
				534
				535	/*
				536	* lazy_max_pages is the maximum amount of virtual address space we gather up
				537	* before attempting to purge with a TLB flush.
				538	*
				539	* There is a tradeoff here: a larger number will cover more kernel page tables
				540	* and take slightly longer to purge, but it will linearly reduce the number of
				541	* global TLB flushes that must be performed. It would seem natural to scale
				542	* this number up linearly with the number of CPUs (because vmapping activity
				543	* could also scale linearly with the number of CPUs), however it is likely
				544	* that in practice, workloads might be constrained in other ways that mean
				545	* vmap activity will not scale linearly with CPUs. Also, I want to be
				546	* conservative and not introduce a big latency on huge systems, so go with
				547	* a less aggressive log scale. It will still be an improvement over the old
				548	* code, and it will be simple to change the scale factor if we find that it
				549	* becomes a problem on bigger systems.
				550	*/
				551	static unsigned long lazy_max_pages(void)
				552	{
				553	unsigned int log;
				554
				555	log = fls(num_online_cpus());
				556
				557	return log * (32UL * 1024 * 1024 / PAGE_SIZE);
				558	}
				559
				560	static atomic_t vmap_lazy_nr = ATOMIC_INIT(0);
				561
				562	/* for per-CPU blocks */
				563	static void purge_fragmented_blocks_allcpus(void);
				564
				565	/*
				566	* called before a call to iounmap() if the caller wants vm_area_struct's
				567	* immediately freed.
				568	*/
				569	void set_iounmap_nonlazy(void)
				570	{
				571	atomic_set(&vmap_lazy_nr, lazy_max_pages()+1);
				572	}
				573
				574	/*
				575	* Purges all lazily-freed vmap areas.
				576	*
				577	* If sync is 0 then don't purge if there is already a purge in progress.
				578	* If force_flush is 1, then flush kernel TLBs between start and end even
				579	* if we found no lazy vmap areas to unmap (callers can use this to optimise
				580	* their own TLB flushing).
				581	* Returns with start = min(start, lowest purged address)
				582	* end = max(end, highest purged address)
				583	*/
				584	static void __purge_vmap_area_lazy(unsigned long start, unsigned long end,
				585	int sync, int force_flush)
				586	{
				587	static DEFINE_SPINLOCK(purge_lock);
				588	LIST_HEAD(valist);
				589	struct vmap_area *va;
				590	struct vmap_area *n_va;
				591	int nr = 0;
				592
				593	/*
				594	* If sync is 0 but force_flush is 1, we'll go sync anyway but callers
				595	* should not expect such behaviour. This just simplifies locking for
				596	* the case that isn't actually used at the moment anyway.
				597	*/
				598	if (!sync && !force_flush) {
				599	if (!spin_trylock(&purge_lock))
				600	return;
				601	} else
				602	spin_lock(&purge_lock);
				603
				604	if (sync)
				605	purge_fragmented_blocks_allcpus();
				606
				607	rcu_read_lock();
				608	list_for_each_entry_rcu(va, &vmap_area_list, list) {
				609	if (va->flags & VM_LAZY_FREE) {
				610	if (va->va_start < *start)
				611	*start = va->va_start;
				612	if (va->va_end > *end)
				613	*end = va->va_end;
				614	nr += (va->va_end - va->va_start) >> PAGE_SHIFT;
				615	list_add_tail(&va->purge_list, &valist);
				616	va->flags \|= VM_LAZY_FREEING;
				617	va->flags &= ~VM_LAZY_FREE;
				618	}
				619	}
				620	rcu_read_unlock();
				621
				622	if (nr)
				623	atomic_sub(nr, &vmap_lazy_nr);
				624
				625	if (nr \|\| force_flush)
				626	flush_tlb_kernel_range(start, end);
				627
				628	if (nr) {
				629	spin_lock(&vmap_area_lock);
				630	list_for_each_entry_safe(va, n_va, &valist, purge_list)
				631	__free_vmap_area(va);
				632	spin_unlock(&vmap_area_lock);
				633	}
				634	spin_unlock(&purge_lock);
				635	}
				636
				637	/*
				638	* Kick off a purge of the outstanding lazy areas. Don't bother if somebody
				639	* is already purging.
				640	*/
				641	static void try_purge_vmap_area_lazy(void)
				642	{
				643	unsigned long start = ULONG_MAX, end = 0;
				644
				645	__purge_vmap_area_lazy(&start, &end, 0, 0);
				646	}
				647
				648	/*
				649	* Kick off a purge of the outstanding lazy areas.
				650	*/
				651	static void purge_vmap_area_lazy(void)
				652	{
				653	unsigned long start = ULONG_MAX, end = 0;
				654
				655	__purge_vmap_area_lazy(&start, &end, 1, 0);
				656	}
				657
				658	/*
				659	* Free a vmap area, caller ensuring that the area has been unmapped
				660	* and flush_cache_vunmap had been called for the correct range
				661	* previously.
				662	*/
				663	static void free_vmap_area_noflush(struct vmap_area *va)
				664	{
				665	va->flags \|= VM_LAZY_FREE;
				666	atomic_add((va->va_end - va->va_start) >> PAGE_SHIFT, &vmap_lazy_nr);
				667	if (unlikely(atomic_read(&vmap_lazy_nr) > lazy_max_pages()))
				668	try_purge_vmap_area_lazy();
				669	}
				670
				671	/*
				672	* Free and unmap a vmap area, caller ensuring flush_cache_vunmap had been
				673	* called for the correct range previously.
				674	*/
				675	static void free_unmap_vmap_area_noflush(struct vmap_area *va)
				676	{
				677	unmap_vmap_area(va);
				678	free_vmap_area_noflush(va);
				679	}
				680
				681	/*
				682	* Free and unmap a vmap area
				683	*/
				684	static void free_unmap_vmap_area(struct vmap_area *va)
				685	{
				686	flush_cache_vunmap(va->va_start, va->va_end);
				687	free_unmap_vmap_area_noflush(va);
				688	}
				689
				690	static struct vmap_area *find_vmap_area(unsigned long addr)
				691	{
				692	struct vmap_area *va;
				693
				694	spin_lock(&vmap_area_lock);
				695	va = __find_vmap_area(addr);
				696	spin_unlock(&vmap_area_lock);
				697
				698	return va;
				699	}
				700
				701	static void free_unmap_vmap_area_addr(unsigned long addr)
				702	{
				703	struct vmap_area *va;
				704
				705	va = find_vmap_area(addr);
				706	BUG_ON(!va);
				707	free_unmap_vmap_area(va);
				708	}
				709
				710
				711	/* Per cpu kva allocator */
				712
				713	/*
				714	* vmap space is limited especially on 32 bit architectures. Ensure there is
				715	* room for at least 16 percpu vmap blocks per CPU.
				716	*/
				717	/*
				718	* If we had a constant VMALLOC_START and VMALLOC_END, we'd like to be able
				719	* to #define VMALLOC_SPACE (VMALLOC_END-VMALLOC_START). Guess
				720	* instead (we just need a rough idea)
				721	*/
				722	#if BITS_PER_LONG == 32
				723	#define VMALLOC_SPACE (128UL10241024)
				724	#else
				725	#define VMALLOC_SPACE (128UL10241024*1024)
				726	#endif
				727
				728	#define VMALLOC_PAGES (VMALLOC_SPACE / PAGE_SIZE)
				729	#define VMAP_MAX_ALLOC BITS_PER_LONG /* 256K with 4K pages */
				730	#define VMAP_BBMAP_BITS_MAX 1024 /* 4MB with 4K pages */
				731	#define VMAP_BBMAP_BITS_MIN (VMAP_MAX_ALLOC*2)
				732	#define VMAP_MIN(x, y) ((x) < (y) ? (x) : (y)) /* can't use min() */
				733	#define VMAP_MAX(x, y) ((x) > (y) ? (x) : (y)) /* can't use max() */
				734	#define VMAP_BBMAP_BITS \
				735	VMAP_MIN(VMAP_BBMAP_BITS_MAX, \
				736	VMAP_MAX(VMAP_BBMAP_BITS_MIN, \
				737	VMALLOC_PAGES / roundup_pow_of_two(NR_CPUS) / 16))
				738
				739	#define VMAP_BLOCK_SIZE (VMAP_BBMAP_BITS * PAGE_SIZE)
				740
				741	static bool vmap_initialized __read_mostly = false;
				742
				743	struct vmap_block_queue {
				744	spinlock_t lock;
				745	struct list_head free;
				746	};
				747
				748	struct vmap_block {
				749	spinlock_t lock;
				750	struct vmap_area *va;
				751	struct vmap_block_queue *vbq;
				752	unsigned long free, dirty;
				753	DECLARE_BITMAP(alloc_map, VMAP_BBMAP_BITS);
				754	DECLARE_BITMAP(dirty_map, VMAP_BBMAP_BITS);
				755	struct list_head free_list;
				756	struct rcu_head rcu_head;
				757	struct list_head purge;
				758	};
				759
				760	/* Queue of free and dirty vmap blocks, for allocation and flushing purposes */
				761	static DEFINE_PER_CPU(struct vmap_block_queue, vmap_block_queue);
				762
				763	/*
				764	* Radix tree of vmap blocks, indexed by address, to quickly find a vmap block
				765	* in the free path. Could get rid of this if we change the API to return a
				766	* "cookie" from alloc, to be passed to free. But no big deal yet.
				767	*/
				768	static DEFINE_SPINLOCK(vmap_block_tree_lock);
				769	static RADIX_TREE(vmap_block_tree, GFP_ATOMIC);
				770
				771	/*
				772	* We should probably have a fallback mechanism to allocate virtual memory
				773	* out of partially filled vmap blocks. However vmap block sizing should be
				774	* fairly reasonable according to the vmalloc size, so it shouldn't be a
				775	* big problem.
				776	*/
				777
				778	static unsigned long addr_to_vb_idx(unsigned long addr)
				779	{
				780	addr -= VMALLOC_START & ~(VMAP_BLOCK_SIZE-1);
				781	addr /= VMAP_BLOCK_SIZE;
				782	return addr;
				783	}
				784
				785	static struct vmap_block *new_vmap_block(gfp_t gfp_mask)
				786	{
				787	struct vmap_block_queue *vbq;
				788	struct vmap_block *vb;
				789	struct vmap_area *va;
				790	unsigned long vb_idx;
				791	int node, err, cpu;
				792
				793	node = numa_node_id();
				794
				795	vb = kmalloc_node(sizeof(struct vmap_block),
				796	gfp_mask & GFP_RECLAIM_MASK, node);
				797	if (unlikely(!vb))
				798	return ERR_PTR(-ENOMEM);
				799
				800	va = alloc_vmap_area(VMAP_BLOCK_SIZE, VMAP_BLOCK_SIZE,
				801	VMALLOC_START, VMALLOC_END,
				802	node, gfp_mask);
				803	if (IS_ERR(va)) {
				804	kfree(vb);
				805	return ERR_CAST(va);
				806	}
				807
				808	err = radix_tree_preload(gfp_mask);
				809	if (unlikely(err)) {
				810	kfree(vb);
				811	free_vmap_area(va);
				812	return ERR_PTR(err);
				813	}
				814
				815	spin_lock_init(&vb->lock);
				816	vb->va = va;
				817	vb->free = VMAP_BBMAP_BITS;
				818	vb->dirty = 0;
				819	bitmap_zero(vb->alloc_map, VMAP_BBMAP_BITS);
				820	bitmap_zero(vb->dirty_map, VMAP_BBMAP_BITS);
				821	INIT_LIST_HEAD(&vb->free_list);
				822
				823	vb_idx = addr_to_vb_idx(va->va_start);
				824	spin_lock(&vmap_block_tree_lock);
				825	err = radix_tree_insert(&vmap_block_tree, vb_idx, vb);
				826	spin_unlock(&vmap_block_tree_lock);
				827	BUG_ON(err);
				828	radix_tree_preload_end();
				829
				830	cpu = get_cpu_light();
				831	vbq = &__get_cpu_var(vmap_block_queue);
				832	vb->vbq = vbq;
				833	spin_lock(&vbq->lock);
				834	list_add_rcu(&vb->free_list, &vbq->free);
				835	spin_unlock(&vbq->lock);
				836	put_cpu_light();
				837
				838	return vb;
				839	}
				840
				841	static void free_vmap_block(struct vmap_block *vb)
				842	{
				843	struct vmap_block *tmp;
				844	unsigned long vb_idx;
				845
				846	vb_idx = addr_to_vb_idx(vb->va->va_start);
				847	spin_lock(&vmap_block_tree_lock);
				848	tmp = radix_tree_delete(&vmap_block_tree, vb_idx);
				849	spin_unlock(&vmap_block_tree_lock);
				850	BUG_ON(tmp != vb);
				851
				852	free_vmap_area_noflush(vb->va);
				853	kfree_rcu(vb, rcu_head);
				854	}
				855
				856	static void purge_fragmented_blocks(int cpu)
				857	{
				858	LIST_HEAD(purge);
				859	struct vmap_block *vb;
				860	struct vmap_block *n_vb;
				861	struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
				862
				863	rcu_read_lock();
				864	list_for_each_entry_rcu(vb, &vbq->free, free_list) {
				865
				866	if (!(vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS))
				867	continue;
				868
				869	spin_lock(&vb->lock);
				870	if (vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS) {
				871	vb->free = 0; /* prevent further allocs after releasing lock */
				872	vb->dirty = VMAP_BBMAP_BITS; /* prevent purging it again */
				873	bitmap_fill(vb->alloc_map, VMAP_BBMAP_BITS);
				874	bitmap_fill(vb->dirty_map, VMAP_BBMAP_BITS);
				875	spin_lock(&vbq->lock);
				876	list_del_rcu(&vb->free_list);
				877	spin_unlock(&vbq->lock);
				878	spin_unlock(&vb->lock);
				879	list_add_tail(&vb->purge, &purge);
				880	} else
				881	spin_unlock(&vb->lock);
				882	}
				883	rcu_read_unlock();
				884
				885	list_for_each_entry_safe(vb, n_vb, &purge, purge) {
				886	list_del(&vb->purge);
				887	free_vmap_block(vb);
				888	}
				889	}
				890
				891	static void purge_fragmented_blocks_thiscpu(void)
				892	{
				893	purge_fragmented_blocks(smp_processor_id());
				894	}
				895
				896	static void purge_fragmented_blocks_allcpus(void)
				897	{
				898	int cpu;
				899
				900	for_each_possible_cpu(cpu)
				901	purge_fragmented_blocks(cpu);
				902	}
				903
				904	static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
				905	{
				906	struct vmap_block_queue *vbq;
				907	struct vmap_block *vb;
				908	unsigned long addr = 0;
				909	unsigned int order;
				910	int purge = 0, cpu;
				911
				912	BUG_ON(size & ~PAGE_MASK);
				913	BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
				914	order = get_order(size);
				915
				916	again:
				917	rcu_read_lock();
				918	cpu = get_cpu_light();
				919	vbq = &__get_cpu_var(vmap_block_queue);
				920	list_for_each_entry_rcu(vb, &vbq->free, free_list) {
				921	int i;
				922
				923	spin_lock(&vb->lock);
				924	if (vb->free < 1UL << order)
				925	goto next;
				926
				927	i = bitmap_find_free_region(vb->alloc_map,
				928	VMAP_BBMAP_BITS, order);
				929
				930	if (i < 0) {
				931	if (vb->free + vb->dirty == VMAP_BBMAP_BITS) {
				932	/* fragmented and no outstanding allocations */
				933	BUG_ON(vb->dirty != VMAP_BBMAP_BITS);
				934	purge = 1;
				935	}
				936	goto next;
				937	}
				938	addr = vb->va->va_start + (i << PAGE_SHIFT);
				939	BUG_ON(addr_to_vb_idx(addr) !=
				940	addr_to_vb_idx(vb->va->va_start));
				941	vb->free -= 1UL << order;
				942	if (vb->free == 0) {
				943	spin_lock(&vbq->lock);
				944	list_del_rcu(&vb->free_list);
				945	spin_unlock(&vbq->lock);
				946	}
				947	spin_unlock(&vb->lock);
				948	break;
				949	next:
				950	spin_unlock(&vb->lock);
				951	}
				952
				953	if (purge)
				954	purge_fragmented_blocks_thiscpu();
				955
				956	put_cpu_light();
				957	rcu_read_unlock();
				958
				959	if (!addr) {
				960	vb = new_vmap_block(gfp_mask);
				961	if (IS_ERR(vb))
				962	return vb;
				963	goto again;
				964	}
				965
				966	return (void *)addr;
				967	}
				968
				969	static void vb_free(const void *addr, unsigned long size)
				970	{
				971	unsigned long offset;
				972	unsigned long vb_idx;
				973	unsigned int order;
				974	struct vmap_block *vb;
				975
				976	BUG_ON(size & ~PAGE_MASK);
				977	BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
				978
				979	flush_cache_vunmap((unsigned long)addr, (unsigned long)addr + size);
				980
				981	order = get_order(size);
				982
				983	offset = (unsigned long)addr & (VMAP_BLOCK_SIZE - 1);
				984
				985	vb_idx = addr_to_vb_idx((unsigned long)addr);
				986	rcu_read_lock();
				987	vb = radix_tree_lookup(&vmap_block_tree, vb_idx);
				988	rcu_read_unlock();
				989	BUG_ON(!vb);
				990
				991	vunmap_page_range((unsigned long)addr, (unsigned long)addr + size);
				992
				993	spin_lock(&vb->lock);
				994	BUG_ON(bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order));
				995
				996	vb->dirty += 1UL << order;
				997	if (vb->dirty == VMAP_BBMAP_BITS) {
				998	BUG_ON(vb->free);
				999	spin_unlock(&vb->lock);
				1000	free_vmap_block(vb);
				1001	} else
				1002	spin_unlock(&vb->lock);
				1003	}
				1004
				1005	/**
				1006	* vm_unmap_aliases - unmap outstanding lazy aliases in the vmap layer
				1007	*
				1008	* The vmap/vmalloc layer lazily flushes kernel virtual mappings primarily
				1009	* to amortize TLB flushing overheads. What this means is that any page you
				1010	* have now, may, in a former life, have been mapped into kernel virtual
				1011	* address by the vmap layer and so there might be some CPUs with TLB entries
				1012	* still referencing that page (additional to the regular 1:1 kernel mapping).
				1013	*
				1014	* vm_unmap_aliases flushes all such lazy mappings. After it returns, we can
				1015	* be sure that none of the pages we have control over will have any aliases
				1016	* from the vmap layer.
				1017	*/
				1018	void vm_unmap_aliases(void)
				1019	{
				1020	unsigned long start = ULONG_MAX, end = 0;
				1021	int cpu;
				1022	int flush = 0;
				1023
				1024	if (unlikely(!vmap_initialized))
				1025	return;
				1026
				1027	for_each_possible_cpu(cpu) {
				1028	struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
				1029	struct vmap_block *vb;
				1030
				1031	rcu_read_lock();
				1032	list_for_each_entry_rcu(vb, &vbq->free, free_list) {
				1033	int i;
				1034
				1035	spin_lock(&vb->lock);
				1036	i = find_first_bit(vb->dirty_map, VMAP_BBMAP_BITS);
				1037	while (i < VMAP_BBMAP_BITS) {
				1038	unsigned long s, e;
				1039	int j;
				1040	j = find_next_zero_bit(vb->dirty_map,
				1041	VMAP_BBMAP_BITS, i);
				1042
				1043	s = vb->va->va_start + (i << PAGE_SHIFT);
				1044	e = vb->va->va_start + (j << PAGE_SHIFT);
				1045	flush = 1;
				1046
				1047	if (s < start)
				1048	start = s;
				1049	if (e > end)
				1050	end = e;
				1051
				1052	i = j;
				1053	i = find_next_bit(vb->dirty_map,
				1054	VMAP_BBMAP_BITS, i);
				1055	}
				1056	spin_unlock(&vb->lock);
				1057	}
				1058	rcu_read_unlock();
				1059	}
				1060
				1061	__purge_vmap_area_lazy(&start, &end, 1, flush);
				1062	}
				1063	EXPORT_SYMBOL_GPL(vm_unmap_aliases);
				1064
				1065	/**
				1066	* vm_unmap_ram - unmap linear kernel address space set up by vm_map_ram
				1067	* @mem: the pointer returned by vm_map_ram
				1068	* @count: the count passed to that vm_map_ram call (cannot unmap partial)
				1069	*/
				1070	void vm_unmap_ram(const void *mem, unsigned int count)
				1071	{
				1072	unsigned long size = count << PAGE_SHIFT;
				1073	unsigned long addr = (unsigned long)mem;
				1074
				1075	BUG_ON(!addr);
				1076	BUG_ON(addr < VMALLOC_START);
				1077	BUG_ON(addr > VMALLOC_END);
				1078	BUG_ON(addr & (PAGE_SIZE-1));
				1079
				1080	debug_check_no_locks_freed(mem, size);
				1081	vmap_debug_free_range(addr, addr+size);
				1082
				1083	if (likely(count <= VMAP_MAX_ALLOC))
				1084	vb_free(mem, size);
				1085	else
				1086	free_unmap_vmap_area_addr(addr);
				1087	}
				1088	EXPORT_SYMBOL(vm_unmap_ram);
				1089
				1090	/**
				1091	* vm_map_ram - map pages linearly into kernel virtual address (vmalloc space)
				1092	* @pages: an array of pointers to the pages to be mapped
				1093	* @count: number of pages
				1094	* @node: prefer to allocate data structures on this node
				1095	* @prot: memory protection to use. PAGE_KERNEL for regular RAM
				1096	*
				1097	* Returns: a pointer to the address that has been mapped, or %NULL on failure
				1098	*/
				1099	void vm_map_ram(struct page *pages, unsigned int count, int node, pgprot_t prot)
				1100	{
				1101	unsigned long size = count << PAGE_SHIFT;
				1102	unsigned long addr;
				1103	void *mem;
				1104
				1105	if (likely(count <= VMAP_MAX_ALLOC)) {
				1106	mem = vb_alloc(size, GFP_KERNEL);
				1107	if (IS_ERR(mem))
				1108	return NULL;
				1109	addr = (unsigned long)mem;
				1110	} else {
				1111	struct vmap_area *va;
				1112	va = alloc_vmap_area(size, PAGE_SIZE,
				1113	VMALLOC_START, VMALLOC_END, node, GFP_KERNEL);
				1114	if (IS_ERR(va))
				1115	return NULL;
				1116
				1117	addr = va->va_start;
				1118	mem = (void *)addr;
				1119	}
				1120	if (vmap_page_range(addr, addr + size, prot, pages) < 0) {
				1121	vm_unmap_ram(mem, count);
				1122	return NULL;
				1123	}
				1124	return mem;
				1125	}
				1126	EXPORT_SYMBOL(vm_map_ram);
				1127
				1128	/**
				1129	* vm_area_add_early - add vmap area early during boot
				1130	* @vm: vm_struct to add
				1131	*
				1132	* This function is used to add fixed kernel vm area to vmlist before
				1133	* vmalloc_init() is called. @vm->addr, @vm->size, and @vm->flags
				1134	* should contain proper values and the other fields should be zero.
				1135	*
				1136	* DO NOT USE THIS FUNCTION UNLESS YOU KNOW WHAT YOU'RE DOING.
				1137	*/
				1138	void __init vm_area_add_early(struct vm_struct *vm)
				1139	{
				1140	struct vm_struct tmp, *p;
				1141
				1142	BUG_ON(vmap_initialized);
				1143	for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) {
				1144	if (tmp->addr >= vm->addr) {
				1145	BUG_ON(tmp->addr < vm->addr + vm->size);
				1146	break;
				1147	} else
				1148	BUG_ON(tmp->addr + tmp->size > vm->addr);
				1149	}
				1150	vm->next = *p;
				1151	*p = vm;
				1152	}
				1153
				1154	/**
				1155	* vm_area_register_early - register vmap area early during boot
				1156	* @vm: vm_struct to register
				1157	* @align: requested alignment
				1158	*
				1159	* This function is used to register kernel vm area before
				1160	* vmalloc_init() is called. @vm->size and @vm->flags should contain
				1161	* proper values on entry and other fields should be zero. On return,
				1162	* vm->addr contains the allocated address.
				1163	*
				1164	* DO NOT USE THIS FUNCTION UNLESS YOU KNOW WHAT YOU'RE DOING.
				1165	*/
				1166	void __init vm_area_register_early(struct vm_struct *vm, size_t align)
				1167	{
				1168	static size_t vm_init_off __initdata;
				1169	unsigned long addr;
				1170
				1171	addr = ALIGN(VMALLOC_START + vm_init_off, align);
				1172	vm_init_off = PFN_ALIGN(addr + vm->size) - VMALLOC_START;
				1173
				1174	vm->addr = (void *)addr;
				1175
				1176	vm_area_add_early(vm);
				1177	}
				1178
				1179	void __init vmalloc_init(void)
				1180	{
				1181	struct vmap_area *va;
				1182	struct vm_struct *tmp;
				1183	int i;
				1184
				1185	for_each_possible_cpu(i) {
				1186	struct vmap_block_queue *vbq;
				1187
				1188	vbq = &per_cpu(vmap_block_queue, i);
				1189	spin_lock_init(&vbq->lock);
				1190	INIT_LIST_HEAD(&vbq->free);
				1191	}
				1192
				1193	/* Import existing vmlist entries. */
				1194	for (tmp = vmlist; tmp; tmp = tmp->next) {
				1195	va = kzalloc(sizeof(struct vmap_area), GFP_NOWAIT);
				1196	va->flags = VM_VM_AREA;
				1197	va->va_start = (unsigned long)tmp->addr;
				1198	va->va_end = va->va_start + tmp->size;
				1199	va->vm = tmp;
				1200	__insert_vmap_area(va);
				1201	}
				1202
				1203	vmap_area_pcpu_hole = VMALLOC_END;
				1204
				1205	vmap_initialized = true;
				1206	}
				1207
				1208	/**
				1209	* map_kernel_range_noflush - map kernel VM area with the specified pages
				1210	* @addr: start of the VM area to map
				1211	* @size: size of the VM area to map
				1212	* @prot: page protection flags to use
				1213	* @pages: pages to map
				1214	*
				1215	* Map PFN_UP(@size) pages at @addr. The VM area @addr and @size
				1216	* specify should have been allocated using get_vm_area() and its
				1217	* friends.
				1218	*
				1219	* NOTE:
				1220	* This function does NOT do any cache flushing. The caller is
				1221	* responsible for calling flush_cache_vmap() on to-be-mapped areas
				1222	* before calling this function.
				1223	*
				1224	* RETURNS:
				1225	* The number of pages mapped on success, -errno on failure.
				1226	*/
				1227	int map_kernel_range_noflush(unsigned long addr, unsigned long size,
				1228	pgprot_t prot, struct page **pages)
				1229	{
				1230	return vmap_page_range_noflush(addr, addr + size, prot, pages);
				1231	}
				1232
				1233	/**
				1234	* unmap_kernel_range_noflush - unmap kernel VM area
				1235	* @addr: start of the VM area to unmap
				1236	* @size: size of the VM area to unmap
				1237	*
				1238	* Unmap PFN_UP(@size) pages at @addr. The VM area @addr and @size
				1239	* specify should have been allocated using get_vm_area() and its
				1240	* friends.
				1241	*
				1242	* NOTE:
				1243	* This function does NOT do any cache flushing. The caller is
				1244	* responsible for calling flush_cache_vunmap() on to-be-mapped areas
				1245	* before calling this function and flush_tlb_kernel_range() after.
				1246	*/
				1247	void unmap_kernel_range_noflush(unsigned long addr, unsigned long size)
				1248	{
				1249	vunmap_page_range(addr, addr + size);
				1250	}
				1251	EXPORT_SYMBOL_GPL(unmap_kernel_range_noflush);
				1252
				1253	/**
				1254	* unmap_kernel_range - unmap kernel VM area and flush cache and TLB
				1255	* @addr: start of the VM area to unmap
				1256	* @size: size of the VM area to unmap
				1257	*
				1258	* Similar to unmap_kernel_range_noflush() but flushes vcache before
				1259	* the unmapping and tlb after.
				1260	*/
				1261	void unmap_kernel_range(unsigned long addr, unsigned long size)
				1262	{
				1263	unsigned long end = addr + size;
				1264
				1265	flush_cache_vunmap(addr, end);
				1266	vunmap_page_range(addr, end);
				1267	flush_tlb_kernel_range(addr, end);
				1268	}
				1269
				1270	int map_vm_area(struct vm_struct area, pgprot_t prot, struct page **pages)
				1271	{
				1272	unsigned long addr = (unsigned long)area->addr;
				1273	unsigned long end = addr + area->size - PAGE_SIZE;
				1274	int err;
				1275
				1276	err = vmap_page_range(addr, end, prot, *pages);
				1277	if (err > 0) {
				1278	*pages += err;
				1279	err = 0;
				1280	}
				1281
				1282	return err;
				1283	}
				1284	EXPORT_SYMBOL_GPL(map_vm_area);
				1285
				1286	/* Old vmalloc interfaces */
				1287	DEFINE_RWLOCK(vmlist_lock);
				1288	struct vm_struct *vmlist;
				1289
				1290	static void setup_vmalloc_vm(struct vm_struct vm, struct vmap_area va,
				1291	unsigned long flags, void *caller)
				1292	{
				1293	vm->flags = flags;
				1294	vm->addr = (void *)va->va_start;
				1295	vm->size = va->va_end - va->va_start;
				1296	vm->caller = caller;
				1297	va->vm = vm;
				1298	va->flags \|= VM_VM_AREA;
				1299	}
				1300
				1301	static void insert_vmalloc_vmlist(struct vm_struct *vm)
				1302	{
				1303	struct vm_struct tmp, *p;
				1304
				1305	vm->flags &= ~VM_UNLIST;
				1306	write_lock(&vmlist_lock);
				1307	for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) {
				1308	if (tmp->addr >= vm->addr)
				1309	break;
				1310	}
				1311	vm->next = *p;
				1312	*p = vm;
				1313	write_unlock(&vmlist_lock);
				1314	}
				1315
				1316	static void insert_vmalloc_vm(struct vm_struct vm, struct vmap_area va,
				1317	unsigned long flags, void *caller)
				1318	{
				1319	setup_vmalloc_vm(vm, va, flags, caller);
				1320	insert_vmalloc_vmlist(vm);
				1321	}
				1322
				1323	static struct vm_struct *__get_vm_area_node(unsigned long size,
				1324	unsigned long align, unsigned long flags, unsigned long start,
				1325	unsigned long end, int node, gfp_t gfp_mask, void *caller)
				1326	{
				1327	struct vmap_area *va;
				1328	struct vm_struct *area;
				1329
				1330	BUG_ON(in_interrupt());
				1331	if (flags & VM_IOREMAP) {
				1332	int bit = fls(size);
				1333
				1334	if (bit > IOREMAP_MAX_ORDER)
				1335	bit = IOREMAP_MAX_ORDER;
				1336	else if (bit < PAGE_SHIFT)
				1337	bit = PAGE_SHIFT;
				1338
				1339	align = 1ul << bit;
				1340	}
				1341
				1342	size = PAGE_ALIGN(size);
				1343	if (unlikely(!size))
				1344	return NULL;
				1345
				1346	area = kzalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node);
				1347	if (unlikely(!area))
				1348	return NULL;
				1349
				1350	/*
				1351	* We always allocate a guard page.
				1352	*/
				1353	size += PAGE_SIZE;
				1354
				1355	va = alloc_vmap_area(size, align, start, end, node, gfp_mask);
				1356	if (IS_ERR(va)) {
				1357	kfree(area);
				1358	return NULL;
				1359	}
				1360
				1361	/*
				1362	* When this function is called from __vmalloc_node_range,
				1363	* we do not add vm_struct to vmlist here to avoid
				1364	* accessing uninitialized members of vm_struct such as
				1365	* pages and nr_pages fields. They will be set later.
				1366	* To distinguish it from others, we use a VM_UNLIST flag.
				1367	*/
				1368	if (flags & VM_UNLIST)
				1369	setup_vmalloc_vm(area, va, flags, caller);
				1370	else
				1371	insert_vmalloc_vm(area, va, flags, caller);
				1372
				1373	return area;
				1374	}
				1375
				1376	struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
				1377	unsigned long start, unsigned long end)
				1378	{
				1379	return __get_vm_area_node(size, 1, flags, start, end, -1, GFP_KERNEL,
				1380	__builtin_return_address(0));
				1381	}
				1382	EXPORT_SYMBOL_GPL(__get_vm_area);
				1383
				1384	struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags,
				1385	unsigned long start, unsigned long end,
				1386	void *caller)
				1387	{
				1388	return __get_vm_area_node(size, 1, flags, start, end, -1, GFP_KERNEL,
				1389	caller);
				1390	}
				1391
				1392	/**
				1393	* get_vm_area - reserve a contiguous kernel virtual area
				1394	* @size: size of the area
				1395	* @flags: %VM_IOREMAP for I/O mappings or VM_ALLOC
				1396	*
				1397	* Search an area of @size in the kernel virtual mapping area,
				1398	* and reserved it for out purposes. Returns the area descriptor
				1399	* on success or %NULL on failure.
				1400	*/
				1401	struct vm_struct *get_vm_area(unsigned long size, unsigned long flags)
				1402	{
				1403	return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END,
				1404	-1, GFP_KERNEL, __builtin_return_address(0));
				1405	}
				1406
				1407	struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags,
				1408	void *caller)
				1409	{
				1410	return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END,
				1411	-1, GFP_KERNEL, caller);
				1412	}
				1413
				1414	static struct vm_struct find_vm_area(const void addr)
				1415	{
				1416	struct vmap_area *va;
				1417
				1418	va = find_vmap_area((unsigned long)addr);
				1419	if (va && va->flags & VM_VM_AREA)
				1420	return va->vm;
				1421
				1422	return NULL;
				1423	}
				1424
				1425	/**
				1426	* remove_vm_area - find and remove a continuous kernel virtual area
				1427	* @addr: base address
				1428	*
				1429	* Search for the kernel VM area starting at @addr, and remove it.
				1430	* This function returns the found VM area, but using it is NOT safe
				1431	* on SMP machines, except for its size or flags.
				1432	*/
				1433	struct vm_struct remove_vm_area(const void addr)
				1434	{
				1435	struct vmap_area *va;
				1436
				1437	va = find_vmap_area((unsigned long)addr);
				1438	if (va && va->flags & VM_VM_AREA) {
				1439	struct vm_struct *vm = va->vm;
				1440
				1441	if (!(vm->flags & VM_UNLIST)) {
				1442	struct vm_struct tmp, *p;
				1443	/*
				1444	* remove from list and disallow access to
				1445	* this vm_struct before unmap. (address range
				1446	* confliction is maintained by vmap.)
				1447	*/
				1448	write_lock(&vmlist_lock);
				1449	for (p = &vmlist; (tmp = *p) != vm; p = &tmp->next)
				1450	;
				1451	*p = tmp->next;
				1452	write_unlock(&vmlist_lock);
				1453	}
				1454
				1455	vmap_debug_free_range(va->va_start, va->va_end);
				1456	free_unmap_vmap_area(va);
				1457	vm->size -= PAGE_SIZE;
				1458
				1459	return vm;
				1460	}
				1461	return NULL;
				1462	}
				1463
				1464	static void __vunmap(const void *addr, int deallocate_pages)
				1465	{
				1466	struct vm_struct *area;
				1467
				1468	if (!addr)
				1469	return;
				1470
				1471	if ((PAGE_SIZE-1) & (unsigned long)addr) {
				1472	WARN(1, KERN_ERR "Trying to vfree() bad address (%p)\n", addr);
				1473	return;
				1474	}
				1475
				1476	area = remove_vm_area(addr);
				1477	if (unlikely(!area)) {
				1478	WARN(1, KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n",
				1479	addr);
				1480	return;
				1481	}
				1482
				1483	debug_check_no_locks_freed(addr, area->size);
				1484	debug_check_no_obj_freed(addr, area->size);
				1485
				1486	if (deallocate_pages) {
				1487	int i;
				1488
				1489	for (i = 0; i < area->nr_pages; i++) {
				1490	struct page *page = area->pages[i];
				1491
				1492	BUG_ON(!page);
				1493	__free_page(page);
				1494	}
				1495
				1496	if (area->flags & VM_VPAGES)
				1497	vfree(area->pages);
				1498	else
				1499	kfree(area->pages);
				1500	}
				1501
				1502	kfree(area);
				1503	return;
				1504	}
				1505
				1506	/**
				1507	* vfree - release memory allocated by vmalloc()
				1508	* @addr: memory base address
				1509	*
				1510	* Free the virtually continuous memory area starting at @addr, as
				1511	* obtained from vmalloc(), vmalloc_32() or __vmalloc(). If @addr is
				1512	* NULL, no operation is performed.
				1513	*
				1514	* Must not be called in interrupt context.
				1515	*/
				1516	void vfree(const void *addr)
				1517	{
				1518	BUG_ON(in_interrupt());
				1519
				1520	kmemleak_free(addr);
				1521
				1522	__vunmap(addr, 1);
				1523	}
				1524	EXPORT_SYMBOL(vfree);
				1525
				1526	#ifdef CONFIG_MODEM_CODE_IS_MAPPING
				1527	void vfree_modem_section(unsigned long start,unsigned long end)
				1528	{
				1529	struct vmap_area *va;
				1530	if (!start)
				1531	return;
				1532	if ((PAGE_SIZE-1) & (unsigned long)start)
				1533	{
				1534	WARN(1, KERN_ERR "Trying to vfree() bad address (%p)\n", start);
				1535	return;
				1536	}
				1537
				1538	va = find_vmap_area(MODULES_VADDR);
				1539	if (va && va->flags & VM_VM_AREA)
				1540	{
				1541	struct vm_struct *vm = va->vm;
				1542	int i = 0;
				1543	unsigned long nr_pages = 0;
				1544	flush_icache_range(start,end);
				1545	flush_cache_vunmap(start, end);
				1546	vunmap_page_range(start, end);
				1547	flush_tlb_kernel_range(start, end);
				1548
				1549	nr_pages = (end-MODULES_VADDR) >> PAGE_SHIFT;
				1550	for (i = (start-MODULES_VADDR) >> PAGE_SHIFT; i < nr_pages; i++)
				1551	{
				1552	struct page *page = vm->pages[i];
				1553	BUG_ON(!page);
				1554	__free_page(page);
				1555	}
				1556	}
				1557	}
				1558	EXPORT_SYMBOL(vfree_modem_section);
				1559	#endif
				1560	/**
				1561	* vunmap - release virtual mapping obtained by vmap()
				1562	* @addr: memory base address
				1563	*
				1564	* Free the virtually contiguous memory area starting at @addr,
				1565	* which was created from the page array passed to vmap().
				1566	*
				1567	* Must not be called in interrupt context.
				1568	*/
				1569	void vunmap(const void *addr)
				1570	{
				1571	BUG_ON(in_interrupt());
				1572	might_sleep();
				1573	__vunmap(addr, 0);
				1574	}
				1575	EXPORT_SYMBOL(vunmap);
				1576
				1577	/**
				1578	* vmap - map an array of pages into virtually contiguous space
				1579	* @pages: array of page pointers
				1580	* @count: number of pages to map
				1581	* @flags: vm_area->flags
				1582	* @prot: page protection for the mapping
				1583	*
				1584	* Maps @count pages from @pages into contiguous kernel virtual
				1585	* space.
				1586	*/
				1587	void vmap(struct page *pages, unsigned int count,
				1588	unsigned long flags, pgprot_t prot)
				1589	{
				1590	struct vm_struct *area;
				1591
				1592	might_sleep();
				1593
				1594	if (count > totalram_pages)
				1595	return NULL;
				1596
				1597	area = get_vm_area_caller((count << PAGE_SHIFT), flags,
				1598	__builtin_return_address(0));
				1599	if (!area)
				1600	return NULL;
				1601
				1602	if (map_vm_area(area, prot, &pages)) {
				1603	vunmap(area->addr);
				1604	return NULL;
				1605	}
				1606
				1607	return area->addr;
				1608	}
				1609	EXPORT_SYMBOL(vmap);
				1610
				1611	static void *__vmalloc_node(unsigned long size, unsigned long align,
				1612	gfp_t gfp_mask, pgprot_t prot,
				1613	int node, void *caller);
				1614	static void __vmalloc_area_node(struct vm_struct area, gfp_t gfp_mask,
				1615	pgprot_t prot, int node, void *caller)
				1616	{
				1617	const int order = 0;
				1618	struct page **pages;
				1619	unsigned int nr_pages, array_size, i;
				1620	gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) \| __GFP_ZERO;
				1621
				1622	nr_pages = (area->size - PAGE_SIZE) >> PAGE_SHIFT;
				1623	array_size = (nr_pages * sizeof(struct page *));
				1624
				1625	area->nr_pages = nr_pages;
				1626	/* Please note that the recursion is strictly bounded. */
				1627	if (array_size > PAGE_SIZE) {
				1628	pages = __vmalloc_node(array_size, 1, nested_gfp\|__GFP_HIGHMEM,
				1629	PAGE_KERNEL, node, caller);
				1630	area->flags \|= VM_VPAGES;
				1631	} else {
				1632	pages = kmalloc_node(array_size, nested_gfp, node);
				1633	}
				1634	area->pages = pages;
				1635	area->caller = caller;
				1636	if (!area->pages) {
				1637	remove_vm_area(area->addr);
				1638	kfree(area);
				1639	return NULL;
				1640	}
				1641
				1642	for (i = 0; i < area->nr_pages; i++) {
				1643	struct page *page;
				1644	gfp_t tmp_mask = gfp_mask \| __GFP_NOWARN;
				1645
				1646	if (node < 0)
				1647	page = alloc_page(tmp_mask);
				1648	else
				1649	page = alloc_pages_node(node, tmp_mask, order);
				1650
				1651	if (unlikely(!page)) {
				1652	/* Successfully allocated i pages, free them in __vunmap() */
				1653	area->nr_pages = i;
				1654	goto fail;
				1655	}
				1656	area->pages[i] = page;
				1657	}
				1658
				1659	if (map_vm_area(area, prot, &pages))
				1660	goto fail;
				1661	return area->addr;
				1662
				1663	fail:
				1664	warn_alloc_failed(gfp_mask, order,
				1665	"vmalloc: allocation failure, allocated %ld of %ld bytes\n",
				1666	(area->nr_pages*PAGE_SIZE), area->size);
				1667	vfree(area->addr);
				1668	return NULL;
				1669	}
				1670
				1671	/**
				1672	* __vmalloc_node_range - allocate virtually contiguous memory
				1673	* @size: allocation size
				1674	* @align: desired alignment
				1675	* @start: vm area range start
				1676	* @end: vm area range end
				1677	* @gfp_mask: flags for the page level allocator
				1678	* @prot: protection mask for the allocated pages
				1679	* @node: node to use for allocation or -1
				1680	* @caller: caller's return address
				1681	*
				1682	* Allocate enough pages to cover @size from the page level
				1683	* allocator with @gfp_mask flags. Map them into contiguous
				1684	* kernel virtual space, using a pagetable protection of @prot.
				1685	*/
				1686	void *__vmalloc_node_range(unsigned long size, unsigned long align,
				1687	unsigned long start, unsigned long end, gfp_t gfp_mask,
				1688	pgprot_t prot, int node, void *caller)
				1689	{
				1690	struct vm_struct *area;
				1691	void *addr;
				1692	unsigned long real_size = size;
				1693
				1694	#ifdef CONFIG_MEM_CHECK
				1695	if (size > CONFIG_MEM_CHECK_SIZE) {
				1696	printk(KERN_ALERT"memcheck_vmalloc %d %s (%pS)\n", size, current->comm, __builtin_return_address(0));
				1697	if (strcmp(current->comm,MEM_CHECK_THREAD_NAME)==0)
				1698	dump_stack();
				1699	}
				1700	#endif
				1701
				1702	size = PAGE_ALIGN(size);
				1703	if (!size \|\| (size >> PAGE_SHIFT) > totalram_pages)
				1704	goto fail;
				1705
				1706	area = __get_vm_area_node(size, align, VM_ALLOC \| VM_UNLIST,
				1707	start, end, node, gfp_mask, caller);
				1708	if (!area)
				1709	goto fail;
				1710
				1711	addr = __vmalloc_area_node(area, gfp_mask, prot, node, caller);
				1712	if (!addr)
				1713	return NULL;
				1714
				1715	/*
				1716	* In this function, newly allocated vm_struct is not added
				1717	* to vmlist at __get_vm_area_node(). so, it is added here.
				1718	*/
				1719	insert_vmalloc_vmlist(area);
				1720
				1721	/*
				1722	* A ref_count = 2 is needed because vm_struct allocated in
				1723	* __get_vm_area_node() contains a reference to the virtual address of
				1724	* the vmalloc'ed block.
				1725	*/
				1726	kmemleak_alloc(addr, real_size, 2, gfp_mask);
				1727
				1728	return addr;
				1729
				1730	fail:
				1731	warn_alloc_failed(gfp_mask, 0,
				1732	"vmalloc: allocation failure: %lu bytes\n",
				1733	real_size);
				1734	return NULL;
				1735	}
				1736
				1737	/**
				1738	* __vmalloc_node - allocate virtually contiguous memory
				1739	* @size: allocation size
				1740	* @align: desired alignment
				1741	* @gfp_mask: flags for the page level allocator
				1742	* @prot: protection mask for the allocated pages
				1743	* @node: node to use for allocation or -1
				1744	* @caller: caller's return address
				1745	*
				1746	* Allocate enough pages to cover @size from the page level
				1747	* allocator with @gfp_mask flags. Map them into contiguous
				1748	* kernel virtual space, using a pagetable protection of @prot.
				1749	*/
				1750	static void *__vmalloc_node(unsigned long size, unsigned long align,
				1751	gfp_t gfp_mask, pgprot_t prot,
				1752	int node, void *caller)
				1753	{
				1754	return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END,
				1755	gfp_mask, prot, node, caller);
				1756	}
				1757
				1758	void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
				1759	{
				1760	return __vmalloc_node(size, 1, gfp_mask, prot, -1,
				1761	__builtin_return_address(0));
				1762	}
				1763	EXPORT_SYMBOL(__vmalloc);
				1764
				1765	static inline void *__vmalloc_node_flags(unsigned long size,
				1766	int node, gfp_t flags)
				1767	{
				1768	return __vmalloc_node(size, 1, flags, PAGE_KERNEL,
				1769	node, __builtin_return_address(0));
				1770	}
				1771
				1772	/**
				1773	* vmalloc - allocate virtually contiguous memory
				1774	* @size: allocation size
				1775	* Allocate enough pages to cover @size from the page level
				1776	* allocator and map them into contiguous kernel virtual space.
				1777	*
				1778	* For tight control over page level allocator and protection flags
				1779	* use __vmalloc() instead.
				1780	*/
				1781	void *vmalloc(unsigned long size)
				1782	{
				1783	return __vmalloc_node_flags(size, -1, GFP_KERNEL \| __GFP_HIGHMEM);
				1784	}
				1785	EXPORT_SYMBOL(vmalloc);
				1786
				1787	/**
				1788	* vzalloc - allocate virtually contiguous memory with zero fill
				1789	* @size: allocation size
				1790	* Allocate enough pages to cover @size from the page level
				1791	* allocator and map them into contiguous kernel virtual space.
				1792	* The memory allocated is set to zero.
				1793	*
				1794	* For tight control over page level allocator and protection flags
				1795	* use __vmalloc() instead.
				1796	*/
				1797	void *vzalloc(unsigned long size)
				1798	{
				1799	return __vmalloc_node_flags(size, -1,
				1800	GFP_KERNEL \| __GFP_HIGHMEM \| __GFP_ZERO);
				1801	}
				1802	EXPORT_SYMBOL(vzalloc);
				1803
				1804	/**
				1805	* vmalloc_user - allocate zeroed virtually contiguous memory for userspace
				1806	* @size: allocation size
				1807	*
				1808	* The resulting memory area is zeroed so it can be mapped to userspace
				1809	* without leaking data.
				1810	*/
				1811	void *vmalloc_user(unsigned long size)
				1812	{
				1813	struct vm_struct *area;
				1814	void *ret;
				1815
				1816	ret = __vmalloc_node(size, SHMLBA,
				1817	GFP_KERNEL \| __GFP_HIGHMEM \| __GFP_ZERO,
				1818	PAGE_KERNEL, -1, __builtin_return_address(0));
				1819	if (ret) {
				1820	area = find_vm_area(ret);
				1821	if (area)
				1822	area->flags \|= VM_USERMAP;
				1823	}
				1824	return ret;
				1825	}
				1826	EXPORT_SYMBOL(vmalloc_user);
				1827
				1828	/**
				1829	* vmalloc_node - allocate memory on a specific node
				1830	* @size: allocation size
				1831	* @node: numa node
				1832	*
				1833	* Allocate enough pages to cover @size from the page level
				1834	* allocator and map them into contiguous kernel virtual space.
				1835	*
				1836	* For tight control over page level allocator and protection flags
				1837	* use __vmalloc() instead.
				1838	*/
				1839	void *vmalloc_node(unsigned long size, int node)
				1840	{
				1841	return __vmalloc_node(size, 1, GFP_KERNEL \| __GFP_HIGHMEM, PAGE_KERNEL,
				1842	node, __builtin_return_address(0));
				1843	}
				1844	EXPORT_SYMBOL(vmalloc_node);
				1845
				1846	/**
				1847	* vzalloc_node - allocate memory on a specific node with zero fill
				1848	* @size: allocation size
				1849	* @node: numa node
				1850	*
				1851	* Allocate enough pages to cover @size from the page level
				1852	* allocator and map them into contiguous kernel virtual space.
				1853	* The memory allocated is set to zero.
				1854	*
				1855	* For tight control over page level allocator and protection flags
				1856	* use __vmalloc_node() instead.
				1857	*/
				1858	void *vzalloc_node(unsigned long size, int node)
				1859	{
				1860	return __vmalloc_node_flags(size, node,
				1861	GFP_KERNEL \| __GFP_HIGHMEM \| __GFP_ZERO);
				1862	}
				1863	EXPORT_SYMBOL(vzalloc_node);
				1864
				1865	#ifndef PAGE_KERNEL_EXEC
				1866	# define PAGE_KERNEL_EXEC PAGE_KERNEL
				1867	#endif
				1868
				1869	/**
				1870	* vmalloc_exec - allocate virtually contiguous, executable memory
				1871	* @size: allocation size
				1872	*
				1873	* Kernel-internal function to allocate enough pages to cover @size
				1874	* the page level allocator and map them into contiguous and
				1875	* executable kernel virtual space.
				1876	*
				1877	* For tight control over page level allocator and protection flags
				1878	* use __vmalloc() instead.
				1879	*/
				1880
				1881	void *vmalloc_exec(unsigned long size)
				1882	{
				1883	return __vmalloc_node(size, 1, GFP_KERNEL \| __GFP_HIGHMEM, PAGE_KERNEL_EXEC,
				1884	-1, __builtin_return_address(0));
				1885	}
				1886
				1887	#if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32)
				1888	#define GFP_VMALLOC32 GFP_DMA32 \| GFP_KERNEL
				1889	#elif defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA)
				1890	#define GFP_VMALLOC32 GFP_DMA \| GFP_KERNEL
				1891	#else
				1892	#define GFP_VMALLOC32 GFP_KERNEL
				1893	#endif
				1894
				1895	/**
				1896	* vmalloc_32 - allocate virtually contiguous memory (32bit addressable)
				1897	* @size: allocation size
				1898	*
				1899	* Allocate enough 32bit PA addressable pages to cover @size from the
				1900	* page level allocator and map them into contiguous kernel virtual space.
				1901	*/
				1902	void *vmalloc_32(unsigned long size)
				1903	{
				1904	return __vmalloc_node(size, 1, GFP_VMALLOC32, PAGE_KERNEL,
				1905	-1, __builtin_return_address(0));
				1906	}
				1907	EXPORT_SYMBOL(vmalloc_32);
				1908
				1909	/**
				1910	* vmalloc_32_user - allocate zeroed virtually contiguous 32bit memory
				1911	* @size: allocation size
				1912	*
				1913	* The resulting memory area is 32bit addressable and zeroed so it can be
				1914	* mapped to userspace without leaking data.
				1915	*/
				1916	void *vmalloc_32_user(unsigned long size)
				1917	{
				1918	struct vm_struct *area;
				1919	void *ret;
				1920
				1921	ret = __vmalloc_node(size, 1, GFP_VMALLOC32 \| __GFP_ZERO, PAGE_KERNEL,
				1922	-1, __builtin_return_address(0));
				1923	if (ret) {
				1924	area = find_vm_area(ret);
				1925	if (area)
				1926	area->flags \|= VM_USERMAP;
				1927	}
				1928	return ret;
				1929	}
				1930	EXPORT_SYMBOL(vmalloc_32_user);
				1931
				1932	/*
				1933	* small helper routine , copy contents to buf from addr.
				1934	* If the page is not present, fill zero.
				1935	*/
				1936
				1937	static int aligned_vread(char buf, char addr, unsigned long count)
				1938	{
				1939	struct page *p;
				1940	int copied = 0;
				1941
				1942	while (count) {
				1943	unsigned long offset, length;
				1944
				1945	offset = (unsigned long)addr & ~PAGE_MASK;
				1946	length = PAGE_SIZE - offset;
				1947	if (length > count)
				1948	length = count;
				1949	p = vmalloc_to_page(addr);
				1950	/*
				1951	* To do safe access to this _mapped_ area, we need
				1952	* lock. But adding lock here means that we need to add
				1953	* overhead of vmalloc()/vfree() calles for this _debug_
				1954	* interface, rarely used. Instead of that, we'll use
				1955	* kmap() and get small overhead in this access function.
				1956	*/
				1957	if (p) {
				1958	/*
				1959	* we can expect USER0 is not used (see vread/vwrite's
				1960	* function description)
				1961	*/
				1962	void *map = kmap_atomic(p);
				1963	memcpy(buf, map + offset, length);
				1964	kunmap_atomic(map);
				1965	} else
				1966	memset(buf, 0, length);
				1967
				1968	addr += length;
				1969	buf += length;
				1970	copied += length;
				1971	count -= length;
				1972	}
				1973	return copied;
				1974	}
				1975
				1976	static int aligned_vwrite(char buf, char addr, unsigned long count)
				1977	{
				1978	struct page *p;
				1979	int copied = 0;
				1980
				1981	while (count) {
				1982	unsigned long offset, length;
				1983
				1984	offset = (unsigned long)addr & ~PAGE_MASK;
				1985	length = PAGE_SIZE - offset;
				1986	if (length > count)
				1987	length = count;
				1988	p = vmalloc_to_page(addr);
				1989	/*
				1990	* To do safe access to this _mapped_ area, we need
				1991	* lock. But adding lock here means that we need to add
				1992	* overhead of vmalloc()/vfree() calles for this _debug_
				1993	* interface, rarely used. Instead of that, we'll use
				1994	* kmap() and get small overhead in this access function.
				1995	*/
				1996	if (p) {
				1997	/*
				1998	* we can expect USER0 is not used (see vread/vwrite's
				1999	* function description)
				2000	*/
				2001	void *map = kmap_atomic(p);
				2002	memcpy(map + offset, buf, length);
				2003	kunmap_atomic(map);
				2004	}
				2005	addr += length;
				2006	buf += length;
				2007	copied += length;
				2008	count -= length;
				2009	}
				2010	return copied;
				2011	}
				2012
				2013	/**
				2014	* vread() - read vmalloc area in a safe way.
				2015	* @buf: buffer for reading data
				2016	* @addr: vm address.
				2017	* @count: number of bytes to be read.
				2018	*
				2019	* Returns # of bytes which addr and buf should be increased.
				2020	* (same number to @count). Returns 0 if [addr...addr+count) doesn't
				2021	* includes any intersect with alive vmalloc area.
				2022	*
				2023	* This function checks that addr is a valid vmalloc'ed area, and
				2024	* copy data from that area to a given buffer. If the given memory range
				2025	* of [addr...addr+count) includes some valid address, data is copied to
				2026	* proper area of @buf. If there are memory holes, they'll be zero-filled.
				2027	* IOREMAP area is treated as memory hole and no copy is done.
				2028	*
				2029	* If [addr...addr+count) doesn't includes any intersects with alive
				2030	* vm_struct area, returns 0.
				2031	* @buf should be kernel's buffer. Because this function uses KM_USER0,
				2032	* the caller should guarantee KM_USER0 is not used.
				2033	*
				2034	* Note: In usual ops, vread() is never necessary because the caller
				2035	* should know vmalloc() area is valid and can use memcpy().
				2036	* This is for routines which have to access vmalloc area without
				2037	* any informaion, as /dev/kmem.
				2038	*
				2039	*/
				2040
				2041	long vread(char buf, char addr, unsigned long count)
				2042	{
				2043	struct vm_struct *tmp;
				2044	char vaddr, buf_start = buf;
				2045	unsigned long buflen = count;
				2046	unsigned long n;
				2047
				2048	/* Don't allow overflow */
				2049	if ((unsigned long) addr + count < count)
				2050	count = -(unsigned long) addr;
				2051
				2052	read_lock(&vmlist_lock);
				2053	for (tmp = vmlist; count && tmp; tmp = tmp->next) {
				2054	vaddr = (char *) tmp->addr;
				2055	if (addr >= vaddr + tmp->size - PAGE_SIZE)
				2056	continue;
				2057	while (addr < vaddr) {
				2058	if (count == 0)
				2059	goto finished;
				2060	*buf = '\0';
				2061	buf++;
				2062	addr++;
				2063	count--;
				2064	}
				2065	n = vaddr + tmp->size - PAGE_SIZE - addr;
				2066	if (n > count)
				2067	n = count;
				2068	if (!(tmp->flags & VM_IOREMAP))
				2069	aligned_vread(buf, addr, n);
				2070	else /* IOREMAP area is treated as memory hole */
				2071	memset(buf, 0, n);
				2072	buf += n;
				2073	addr += n;
				2074	count -= n;
				2075	}
				2076	finished:
				2077	read_unlock(&vmlist_lock);
				2078
				2079	if (buf == buf_start)
				2080	return 0;
				2081	/* zero-fill memory holes */
				2082	if (buf != buf_start + buflen)
				2083	memset(buf, 0, buflen - (buf - buf_start));
				2084
				2085	return buflen;
				2086	}
				2087
				2088	/**
				2089	* vwrite() - write vmalloc area in a safe way.
				2090	* @buf: buffer for source data
				2091	* @addr: vm address.
				2092	* @count: number of bytes to be read.
				2093	*
				2094	* Returns # of bytes which addr and buf should be incresed.
				2095	* (same number to @count).
				2096	* If [addr...addr+count) doesn't includes any intersect with valid
				2097	* vmalloc area, returns 0.
				2098	*
				2099	* This function checks that addr is a valid vmalloc'ed area, and
				2100	* copy data from a buffer to the given addr. If specified range of
				2101	* [addr...addr+count) includes some valid address, data is copied from
				2102	* proper area of @buf. If there are memory holes, no copy to hole.
				2103	* IOREMAP area is treated as memory hole and no copy is done.
				2104	*
				2105	* If [addr...addr+count) doesn't includes any intersects with alive
				2106	* vm_struct area, returns 0.
				2107	* @buf should be kernel's buffer. Because this function uses KM_USER0,
				2108	* the caller should guarantee KM_USER0 is not used.
				2109	*
				2110	* Note: In usual ops, vwrite() is never necessary because the caller
				2111	* should know vmalloc() area is valid and can use memcpy().
				2112	* This is for routines which have to access vmalloc area without
				2113	* any informaion, as /dev/kmem.
				2114	*/
				2115
				2116	long vwrite(char buf, char addr, unsigned long count)
				2117	{
				2118	struct vm_struct *tmp;
				2119	char *vaddr;
				2120	unsigned long n, buflen;
				2121	int copied = 0;
				2122
				2123	/* Don't allow overflow */
				2124	if ((unsigned long) addr + count < count)
				2125	count = -(unsigned long) addr;
				2126	buflen = count;
				2127
				2128	read_lock(&vmlist_lock);
				2129	for (tmp = vmlist; count && tmp; tmp = tmp->next) {
				2130	vaddr = (char *) tmp->addr;
				2131	if (addr >= vaddr + tmp->size - PAGE_SIZE)
				2132	continue;
				2133	while (addr < vaddr) {
				2134	if (count == 0)
				2135	goto finished;
				2136	buf++;
				2137	addr++;
				2138	count--;
				2139	}
				2140	n = vaddr + tmp->size - PAGE_SIZE - addr;
				2141	if (n > count)
				2142	n = count;
				2143	if (!(tmp->flags & VM_IOREMAP)) {
				2144	aligned_vwrite(buf, addr, n);
				2145	copied++;
				2146	}
				2147	buf += n;
				2148	addr += n;
				2149	count -= n;
				2150	}
				2151	finished:
				2152	read_unlock(&vmlist_lock);
				2153	if (!copied)
				2154	return 0;
				2155	return buflen;
				2156	}
				2157
				2158	/**
				2159	* remap_vmalloc_range - map vmalloc pages to userspace
				2160	* @vma: vma to cover (map full range of vma)
				2161	* @addr: vmalloc memory
				2162	* @pgoff: number of pages into addr before first page to map
				2163	*
				2164	* Returns: 0 for success, -Exxx on failure
				2165	*
				2166	* This function checks that addr is a valid vmalloc'ed area, and
				2167	* that it is big enough to cover the vma. Will return failure if
				2168	* that criteria isn't met.
				2169	*
				2170	* Similar to remap_pfn_range() (see mm/memory.c)
				2171	*/
				2172	int remap_vmalloc_range(struct vm_area_struct vma, void addr,
				2173	unsigned long pgoff)
				2174	{
				2175	struct vm_struct *area;
				2176	unsigned long uaddr = vma->vm_start;
				2177	unsigned long usize = vma->vm_end - vma->vm_start;
				2178
				2179	if ((PAGE_SIZE-1) & (unsigned long)addr)
				2180	return -EINVAL;
				2181
				2182	area = find_vm_area(addr);
				2183	if (!area)
				2184	return -EINVAL;
				2185
				2186	if (!(area->flags & VM_USERMAP))
				2187	return -EINVAL;
				2188
				2189	if (usize + (pgoff << PAGE_SHIFT) > area->size - PAGE_SIZE)
				2190	return -EINVAL;
				2191
				2192	addr += pgoff << PAGE_SHIFT;
				2193	do {
				2194	struct page *page = vmalloc_to_page(addr);
				2195	int ret;
				2196
				2197	ret = vm_insert_page(vma, uaddr, page);
				2198	if (ret)
				2199	return ret;
				2200
				2201	uaddr += PAGE_SIZE;
				2202	addr += PAGE_SIZE;
				2203	usize -= PAGE_SIZE;
				2204	} while (usize > 0);
				2205
				2206	/* Prevent "things" like memory migration? VM_flags need a cleanup... */
				2207	vma->vm_flags \|= VM_RESERVED;
				2208
				2209	return 0;
				2210	}
				2211	EXPORT_SYMBOL(remap_vmalloc_range);
				2212
				2213	/*
				2214	* Implement a stub for vmalloc_sync_all() if the architecture chose not to
				2215	* have one.
				2216	*/
				2217	void __attribute__((weak)) vmalloc_sync_all(void)
				2218	{
				2219	}
				2220
				2221
				2222	static int f(pte_t pte, pgtable_t table, unsigned long addr, void data)
				2223	{
				2224	pte_t ***p = data;
				2225
				2226	if (p) {
				2227	(p) = pte;
				2228	(*p)++;
				2229	}
				2230	return 0;
				2231	}
				2232
				2233	/**
				2234	* alloc_vm_area - allocate a range of kernel address space
				2235	* @size: size of the area
				2236	* @ptes: returns the PTEs for the address space
				2237	*
				2238	* Returns: NULL on failure, vm_struct on success
				2239	*
				2240	* This function reserves a range of kernel address space, and
				2241	* allocates pagetables to map that range. No actual mappings
				2242	* are created.
				2243	*
				2244	* If @ptes is non-NULL, pointers to the PTEs (in init_mm)
				2245	* allocated for the VM area are returned.
				2246	*/
				2247	struct vm_struct alloc_vm_area(size_t size, pte_t *ptes)
				2248	{
				2249	struct vm_struct *area;
				2250
				2251	area = get_vm_area_caller(size, VM_IOREMAP,
				2252	__builtin_return_address(0));
				2253	if (area == NULL)
				2254	return NULL;
				2255
				2256	/*
				2257	* This ensures that page tables are constructed for this region
				2258	* of kernel virtual address space and mapped into init_mm.
				2259	*/
				2260	if (apply_to_page_range(&init_mm, (unsigned long)area->addr,
				2261	size, f, ptes ? &ptes : NULL)) {
				2262	free_vm_area(area);
				2263	return NULL;
				2264	}
				2265
				2266	return area;
				2267	}
				2268	EXPORT_SYMBOL_GPL(alloc_vm_area);
				2269
				2270	void free_vm_area(struct vm_struct *area)
				2271	{
				2272	struct vm_struct *ret;
				2273	ret = remove_vm_area(area->addr);
				2274	BUG_ON(ret != area);
				2275	kfree(area);
				2276	}
				2277	EXPORT_SYMBOL_GPL(free_vm_area);
				2278
				2279	#ifdef CONFIG_SMP
				2280	static struct vmap_area node_to_va(struct rb_node n)
				2281	{
				2282	return n ? rb_entry(n, struct vmap_area, rb_node) : NULL;
				2283	}
				2284
				2285	/**
				2286	* pvm_find_next_prev - find the next and prev vmap_area surrounding @end
				2287	* @end: target address
				2288	* @pnext: out arg for the next vmap_area
				2289	* @pprev: out arg for the previous vmap_area
				2290	*
				2291	* Returns: %true if either or both of next and prev are found,
				2292	* %false if no vmap_area exists
				2293	*
				2294	* Find vmap_areas end addresses of which enclose @end. ie. if not
				2295	* NULL, pnext->va_end > @end and pprev->va_end <= @end.
				2296	*/
				2297	static bool pvm_find_next_prev(unsigned long end,
				2298	struct vmap_area **pnext,
				2299	struct vmap_area **pprev)
				2300	{
				2301	struct rb_node *n = vmap_area_root.rb_node;
				2302	struct vmap_area *va = NULL;
				2303
				2304	while (n) {
				2305	va = rb_entry(n, struct vmap_area, rb_node);
				2306	if (end < va->va_end)
				2307	n = n->rb_left;
				2308	else if (end > va->va_end)
				2309	n = n->rb_right;
				2310	else
				2311	break;
				2312	}
				2313
				2314	if (!va)
				2315	return false;
				2316
				2317	if (va->va_end > end) {
				2318	*pnext = va;
				2319	pprev = node_to_va(rb_prev(&(pnext)->rb_node));
				2320	} else {
				2321	*pprev = va;
				2322	pnext = node_to_va(rb_next(&(pprev)->rb_node));
				2323	}
				2324	return true;
				2325	}
				2326
				2327	/**
				2328	* pvm_determine_end - find the highest aligned address between two vmap_areas
				2329	* @pnext: in/out arg for the next vmap_area
				2330	* @pprev: in/out arg for the previous vmap_area
				2331	* @align: alignment
				2332	*
				2333	* Returns: determined end address
				2334	*
				2335	* Find the highest aligned address between @pnext and @pprev below
				2336	* VMALLOC_END. @pnext and @pprev are adjusted so that the aligned
				2337	* down address is between the end addresses of the two vmap_areas.
				2338	*
				2339	* Please note that the address returned by this function may fall
				2340	* inside *@pnext vmap_area. The caller is responsible for checking
				2341	* that.
				2342	*/
				2343	static unsigned long pvm_determine_end(struct vmap_area **pnext,
				2344	struct vmap_area **pprev,
				2345	unsigned long align)
				2346	{
				2347	const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
				2348	unsigned long addr;
				2349
				2350	if (*pnext)
				2351	addr = min((*pnext)->va_start & ~(align - 1), vmalloc_end);
				2352	else
				2353	addr = vmalloc_end;
				2354
				2355	while (pprev && (pprev)->va_end > addr) {
				2356	pnext = pprev;
				2357	pprev = node_to_va(rb_prev(&(pnext)->rb_node));
				2358	}
				2359
				2360	return addr;
				2361	}
				2362
				2363	/**
				2364	* pcpu_get_vm_areas - allocate vmalloc areas for percpu allocator
				2365	* @offsets: array containing offset of each area
				2366	* @sizes: array containing size of each area
				2367	* @nr_vms: the number of areas to allocate
				2368	* @align: alignment, all entries in @offsets and @sizes must be aligned to this
				2369	*
				2370	* Returns: kmalloc'd vm_struct pointer array pointing to allocated
				2371	* vm_structs on success, %NULL on failure
				2372	*
				2373	* Percpu allocator wants to use congruent vm areas so that it can
				2374	* maintain the offsets among percpu areas. This function allocates
				2375	* congruent vmalloc areas for it with GFP_KERNEL. These areas tend to
				2376	* be scattered pretty far, distance between two areas easily going up
				2377	* to gigabytes. To avoid interacting with regular vmallocs, these
				2378	* areas are allocated from top.
				2379	*
				2380	* Despite its complicated look, this allocator is rather simple. It
				2381	* does everything top-down and scans areas from the end looking for
				2382	* matching slot. While scanning, if any of the areas overlaps with
				2383	* existing vmap_area, the base address is pulled down to fit the
				2384	* area. Scanning is repeated till all the areas fit and then all
				2385	* necessary data structres are inserted and the result is returned.
				2386	*/
				2387	struct vm_struct *pcpu_get_vm_areas(const unsigned long offsets,
				2388	const size_t *sizes, int nr_vms,
				2389	size_t align)
				2390	{
				2391	const unsigned long vmalloc_start = ALIGN(VMALLOC_START, align);
				2392	const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
				2393	struct vmap_area *vas, prev, *next;
				2394	struct vm_struct **vms;
				2395	int area, area2, last_area, term_area;
				2396	unsigned long base, start, end, last_end;
				2397	bool purged = false;
				2398
				2399	/* verify parameters and allocate data structures */
				2400	BUG_ON(align & ~PAGE_MASK \|\| !is_power_of_2(align));
				2401	for (last_area = 0, area = 0; area < nr_vms; area++) {
				2402	start = offsets[area];
				2403	end = start + sizes[area];
				2404
				2405	/* is everything aligned properly? */
				2406	BUG_ON(!IS_ALIGNED(offsets[area], align));
				2407	BUG_ON(!IS_ALIGNED(sizes[area], align));
				2408
				2409	/* detect the area with the highest address */
				2410	if (start > offsets[last_area])
				2411	last_area = area;
				2412
				2413	for (area2 = 0; area2 < nr_vms; area2++) {
				2414	unsigned long start2 = offsets[area2];
				2415	unsigned long end2 = start2 + sizes[area2];
				2416
				2417	if (area2 == area)
				2418	continue;
				2419
				2420	BUG_ON(start2 >= start && start2 < end);
				2421	BUG_ON(end2 <= end && end2 > start);
				2422	}
				2423	}
				2424	last_end = offsets[last_area] + sizes[last_area];
				2425
				2426	if (vmalloc_end - vmalloc_start < last_end) {
				2427	WARN_ON(true);
				2428	return NULL;
				2429	}
				2430
				2431	vms = kzalloc(sizeof(vms[0]) * nr_vms, GFP_KERNEL);
				2432	vas = kzalloc(sizeof(vas[0]) * nr_vms, GFP_KERNEL);
				2433	if (!vas \|\| !vms)
				2434	goto err_free2;
				2435
				2436	for (area = 0; area < nr_vms; area++) {
				2437	vas[area] = kzalloc(sizeof(struct vmap_area), GFP_KERNEL);
				2438	vms[area] = kzalloc(sizeof(struct vm_struct), GFP_KERNEL);
				2439	if (!vas[area] \|\| !vms[area])
				2440	goto err_free;
				2441	}
				2442	retry:
				2443	spin_lock(&vmap_area_lock);
				2444
				2445	/* start scanning - we scan from the top, begin with the last area */
				2446	area = term_area = last_area;
				2447	start = offsets[area];
				2448	end = start + sizes[area];
				2449
				2450	if (!pvm_find_next_prev(vmap_area_pcpu_hole, &next, &prev)) {
				2451	base = vmalloc_end - last_end;
				2452	goto found;
				2453	}
				2454	base = pvm_determine_end(&next, &prev, align) - end;
				2455
				2456	while (true) {
				2457	BUG_ON(next && next->va_end <= base + end);
				2458	BUG_ON(prev && prev->va_end > base + end);
				2459
				2460	/*
				2461	* base might have underflowed, add last_end before
				2462	* comparing.
				2463	*/
				2464	if (base + last_end < vmalloc_start + last_end) {
				2465	spin_unlock(&vmap_area_lock);
				2466	if (!purged) {
				2467	purge_vmap_area_lazy();
				2468	purged = true;
				2469	goto retry;
				2470	}
				2471	goto err_free;
				2472	}
				2473
				2474	/*
				2475	* If next overlaps, move base downwards so that it's
				2476	* right below next and then recheck.
				2477	*/
				2478	if (next && next->va_start < base + end) {
				2479	base = pvm_determine_end(&next, &prev, align) - end;
				2480	term_area = area;
				2481	continue;
				2482	}
				2483
				2484	/*
				2485	* If prev overlaps, shift down next and prev and move
				2486	* base so that it's right below new next and then
				2487	* recheck.
				2488	*/
				2489	if (prev && prev->va_end > base + start) {
				2490	next = prev;
				2491	prev = node_to_va(rb_prev(&next->rb_node));
				2492	base = pvm_determine_end(&next, &prev, align) - end;
				2493	term_area = area;
				2494	continue;
				2495	}
				2496
				2497	/*
				2498	* This area fits, move on to the previous one. If
				2499	* the previous one is the terminal one, we're done.
				2500	*/
				2501	area = (area + nr_vms - 1) % nr_vms;
				2502	if (area == term_area)
				2503	break;
				2504	start = offsets[area];
				2505	end = start + sizes[area];
				2506	pvm_find_next_prev(base + end, &next, &prev);
				2507	}
				2508	found:
				2509	/* we've found a fitting base, insert all va's */
				2510	for (area = 0; area < nr_vms; area++) {
				2511	struct vmap_area *va = vas[area];
				2512
				2513	va->va_start = base + offsets[area];
				2514	va->va_end = va->va_start + sizes[area];
				2515	__insert_vmap_area(va);
				2516	}
				2517
				2518	vmap_area_pcpu_hole = base + offsets[last_area];
				2519
				2520	spin_unlock(&vmap_area_lock);
				2521
				2522	/* insert all vm's */
				2523	for (area = 0; area < nr_vms; area++)
				2524	insert_vmalloc_vm(vms[area], vas[area], VM_ALLOC,
				2525	pcpu_get_vm_areas);
				2526
				2527	kfree(vas);
				2528	return vms;
				2529
				2530	err_free:
				2531	for (area = 0; area < nr_vms; area++) {
				2532	kfree(vas[area]);
				2533	kfree(vms[area]);
				2534	}
				2535	err_free2:
				2536	kfree(vas);
				2537	kfree(vms);
				2538	return NULL;
				2539	}
				2540
				2541	/**
				2542	* pcpu_free_vm_areas - free vmalloc areas for percpu allocator
				2543	* @vms: vm_struct pointer array returned by pcpu_get_vm_areas()
				2544	* @nr_vms: the number of allocated areas
				2545	*
				2546	* Free vm_structs and the array allocated by pcpu_get_vm_areas().
				2547	*/
				2548	void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms)
				2549	{
				2550	int i;
				2551
				2552	for (i = 0; i < nr_vms; i++)
				2553	free_vm_area(vms[i]);
				2554	kfree(vms);
				2555	}
				2556	#endif /* CONFIG_SMP */
				2557
				2558	#ifdef CONFIG_PROC_FS
				2559	static void s_start(struct seq_file m, loff_t *pos)
				2560	__acquires(&vmlist_lock)
				2561	{
				2562	loff_t n = *pos;
				2563	struct vm_struct *v;
				2564
				2565	read_lock(&vmlist_lock);
				2566	v = vmlist;
				2567	while (n > 0 && v) {
				2568	n--;
				2569	v = v->next;
				2570	}
				2571	if (!n)
				2572	return v;
				2573
				2574	return NULL;
				2575
				2576	}
				2577
				2578	static void s_next(struct seq_file m, void p, loff_t pos)
				2579	{
				2580	struct vm_struct *v = p;
				2581
				2582	++*pos;
				2583	return v->next;
				2584	}
				2585
				2586	static void s_stop(struct seq_file m, void p)
				2587	__releases(&vmlist_lock)
				2588	{
				2589	read_unlock(&vmlist_lock);
				2590	}
				2591
				2592	static void show_numa_info(struct seq_file m, struct vm_struct v)
				2593	{
				2594	if (NUMA_BUILD) {
				2595	unsigned int nr, *counters = m->private;
				2596
				2597	if (!counters)
				2598	return;
				2599
				2600	memset(counters, 0, nr_node_ids * sizeof(unsigned int));
				2601
				2602	for (nr = 0; nr < v->nr_pages; nr++)
				2603	counters[page_to_nid(v->pages[nr])]++;
				2604
				2605	for_each_node_state(nr, N_HIGH_MEMORY)
				2606	if (counters[nr])
				2607	seq_printf(m, " N%u=%u", nr, counters[nr]);
				2608	}
				2609	}
				2610
				2611	static int s_show(struct seq_file m, void p)
				2612	{
				2613	struct vm_struct *v = p;
				2614
				2615	seq_printf(m, "0x%p-0x%p %7ld",
				2616	v->addr, v->addr + v->size, v->size);
				2617
				2618	if (v->caller)
				2619	seq_printf(m, " %pS", v->caller);
				2620
				2621	if (v->nr_pages)
				2622	seq_printf(m, " pages=%d", v->nr_pages);
				2623
				2624	if (v->phys_addr)
				2625	seq_printf(m, " phys=%llx", (unsigned long long)v->phys_addr);
				2626
				2627	if (v->flags & VM_IOREMAP)
				2628	seq_printf(m, " ioremap");
				2629
				2630	if (v->flags & VM_ALLOC)
				2631	seq_printf(m, " vmalloc");
				2632
				2633	if (v->flags & VM_MAP)
				2634	seq_printf(m, " vmap");
				2635
				2636	if (v->flags & VM_USERMAP)
				2637	seq_printf(m, " user");
				2638
				2639	if (v->flags & VM_VPAGES)
				2640	seq_printf(m, " vpages");
				2641
				2642	show_numa_info(m, v);
				2643	seq_putc(m, '\n');
				2644	return 0;
				2645	}
				2646
				2647	static const struct seq_operations vmalloc_op = {
				2648	.start = s_start,
				2649	.next = s_next,
				2650	.stop = s_stop,
				2651	.show = s_show,
				2652	};
				2653
				2654	static int vmalloc_open(struct inode inode, struct file file)
				2655	{
				2656	unsigned int *ptr = NULL;
				2657	int ret;
				2658
				2659	if (NUMA_BUILD) {
				2660	ptr = kmalloc(nr_node_ids * sizeof(unsigned int), GFP_KERNEL);
				2661	if (ptr == NULL)
				2662	return -ENOMEM;
				2663	}
				2664	ret = seq_open(file, &vmalloc_op);
				2665	if (!ret) {
				2666	struct seq_file *m = file->private_data;
				2667	m->private = ptr;
				2668	} else
				2669	kfree(ptr);
				2670	return ret;
				2671	}
				2672
				2673	static const struct file_operations proc_vmalloc_operations = {
				2674	.open = vmalloc_open,
				2675	.read = seq_read,
				2676	.llseek = seq_lseek,
				2677	.release = seq_release_private,
				2678	};
				2679
				2680	static int __init proc_vmalloc_init(void)
				2681	{
				2682	if (IS_ENABLED(CONFIG_PROC_STRIPPED))
				2683	return 0;
				2684
				2685	proc_create("vmallocinfo", S_IRUSR, NULL, &proc_vmalloc_operations);
				2686	return 0;
				2687	}
				2688	module_init(proc_vmalloc_init);
				2689	#endif
				2690