Blame - src/kernel/linux/v4.19/mm/percpu-vm.c - T800

blob: d8078de912de38a15626771fea1b25c5b3baf0a6 [file] [log] [blame]

xj	b04a402	2021-11-25 15:01:52 +0800	[diff] [blame]	1	/*
				2	* mm/percpu-vm.c - vmalloc area based chunk allocation
				3	*
				4	* Copyright (C) 2010 SUSE Linux Products GmbH
				5	* Copyright (C) 2010 Tejun Heo <tj@kernel.org>
				6	*
				7	* This file is released under the GPLv2.
				8	*
				9	* Chunks are mapped into vmalloc areas and populated page by page.
				10	* This is the default chunk allocator.
				11	*/
				12
				13	static struct page pcpu_chunk_page(struct pcpu_chunk chunk,
				14	unsigned int cpu, int page_idx)
				15	{
				16	/* must not be used on pre-mapped chunk */
				17	WARN_ON(chunk->immutable);
				18
				19	return vmalloc_to_page((void *)pcpu_chunk_addr(chunk, cpu, page_idx));
				20	}
				21
				22	/**
				23	* pcpu_get_pages - get temp pages array
				24	*
				25	* Returns pointer to array of pointers to struct page which can be indexed
				26	* with pcpu_page_idx(). Note that there is only one array and accesses
				27	* should be serialized by pcpu_alloc_mutex.
				28	*
				29	* RETURNS:
				30	* Pointer to temp pages array on success.
				31	*/
				32	static struct page **pcpu_get_pages(void)
				33	{
				34	static struct page **pages;
				35	size_t pages_size = pcpu_nr_units * pcpu_unit_pages * sizeof(pages[0]);
				36
				37	lockdep_assert_held(&pcpu_alloc_mutex);
				38
				39	if (!pages)
				40	pages = pcpu_mem_zalloc(pages_size, GFP_KERNEL);
				41	return pages;
				42	}
				43
				44	/**
				45	* pcpu_free_pages - free pages which were allocated for @chunk
				46	* @chunk: chunk pages were allocated for
				47	* @pages: array of pages to be freed, indexed by pcpu_page_idx()
				48	* @page_start: page index of the first page to be freed
				49	* @page_end: page index of the last page to be freed + 1
				50	*
				51	* Free pages [@page_start and @page_end) in @pages for all units.
				52	* The pages were allocated for @chunk.
				53	*/
				54	static void pcpu_free_pages(struct pcpu_chunk *chunk,
				55	struct page **pages, int page_start, int page_end)
				56	{
				57	unsigned int cpu;
				58	int i;
				59
				60	for_each_possible_cpu(cpu) {
				61	for (i = page_start; i < page_end; i++) {
				62	struct page *page = pages[pcpu_page_idx(cpu, i)];
				63
				64	if (page)
				65	__free_page(page);
				66	}
				67	}
				68	}
				69
				70	/**
				71	* pcpu_alloc_pages - allocates pages for @chunk
				72	* @chunk: target chunk
				73	* @pages: array to put the allocated pages into, indexed by pcpu_page_idx()
				74	* @page_start: page index of the first page to be allocated
				75	* @page_end: page index of the last page to be allocated + 1
				76	* @gfp: allocation flags passed to the underlying allocator
				77	*
				78	* Allocate pages [@page_start,@page_end) into @pages for all units.
				79	* The allocation is for @chunk. Percpu core doesn't care about the
				80	* content of @pages and will pass it verbatim to pcpu_map_pages().
				81	*/
				82	static int pcpu_alloc_pages(struct pcpu_chunk *chunk,
				83	struct page **pages, int page_start, int page_end,
				84	gfp_t gfp)
				85	{
				86	unsigned int cpu, tcpu;
				87	int i;
				88
				89	gfp \|= __GFP_HIGHMEM;
				90
				91	for_each_possible_cpu(cpu) {
				92	for (i = page_start; i < page_end; i++) {
				93	struct page **pagep = &pages[pcpu_page_idx(cpu, i)];
				94
				95	*pagep = alloc_pages_node(cpu_to_node(cpu), gfp, 0);
				96	if (!*pagep)
				97	goto err;
				98	}
				99	}
				100	return 0;
				101
				102	err:
				103	while (--i >= page_start)
				104	__free_page(pages[pcpu_page_idx(cpu, i)]);
				105
				106	for_each_possible_cpu(tcpu) {
				107	if (tcpu == cpu)
				108	break;
				109	for (i = page_start; i < page_end; i++)
				110	__free_page(pages[pcpu_page_idx(tcpu, i)]);
				111	}
				112	return -ENOMEM;
				113	}
				114
				115	/**
				116	* pcpu_pre_unmap_flush - flush cache prior to unmapping
				117	* @chunk: chunk the regions to be flushed belongs to
				118	* @page_start: page index of the first page to be flushed
				119	* @page_end: page index of the last page to be flushed + 1
				120	*
				121	* Pages in [@page_start,@page_end) of @chunk are about to be
				122	* unmapped. Flush cache. As each flushing trial can be very
				123	* expensive, issue flush on the whole region at once rather than
				124	* doing it for each cpu. This could be an overkill but is more
				125	* scalable.
				126	*/
				127	static void pcpu_pre_unmap_flush(struct pcpu_chunk *chunk,
				128	int page_start, int page_end)
				129	{
				130	flush_cache_vunmap(
				131	pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start),
				132	pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end));
				133	}
				134
				135	static void __pcpu_unmap_pages(unsigned long addr, int nr_pages)
				136	{
				137	unmap_kernel_range_noflush(addr, nr_pages << PAGE_SHIFT);
				138	}
				139
				140	/**
				141	* pcpu_unmap_pages - unmap pages out of a pcpu_chunk
				142	* @chunk: chunk of interest
				143	* @pages: pages array which can be used to pass information to free
				144	* @page_start: page index of the first page to unmap
				145	* @page_end: page index of the last page to unmap + 1
				146	*
				147	* For each cpu, unmap pages [@page_start,@page_end) out of @chunk.
				148	* Corresponding elements in @pages were cleared by the caller and can
				149	* be used to carry information to pcpu_free_pages() which will be
				150	* called after all unmaps are finished. The caller should call
				151	* proper pre/post flush functions.
				152	*/
				153	static void pcpu_unmap_pages(struct pcpu_chunk *chunk,
				154	struct page **pages, int page_start, int page_end)
				155	{
				156	unsigned int cpu;
				157	int i;
				158
				159	for_each_possible_cpu(cpu) {
				160	for (i = page_start; i < page_end; i++) {
				161	struct page *page;
				162
				163	page = pcpu_chunk_page(chunk, cpu, i);
				164	WARN_ON(!page);
				165	pages[pcpu_page_idx(cpu, i)] = page;
				166	}
				167	__pcpu_unmap_pages(pcpu_chunk_addr(chunk, cpu, page_start),
				168	page_end - page_start);
				169	}
				170	}
				171
				172	/**
				173	* pcpu_post_unmap_tlb_flush - flush TLB after unmapping
				174	* @chunk: pcpu_chunk the regions to be flushed belong to
				175	* @page_start: page index of the first page to be flushed
				176	* @page_end: page index of the last page to be flushed + 1
				177	*
				178	* Pages [@page_start,@page_end) of @chunk have been unmapped. Flush
				179	* TLB for the regions. This can be skipped if the area is to be
				180	* returned to vmalloc as vmalloc will handle TLB flushing lazily.
				181	*
				182	* As with pcpu_pre_unmap_flush(), TLB flushing also is done at once
				183	* for the whole region.
				184	*/
				185	static void pcpu_post_unmap_tlb_flush(struct pcpu_chunk *chunk,
				186	int page_start, int page_end)
				187	{
				188	flush_tlb_kernel_range(
				189	pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start),
				190	pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end));
				191	}
				192
				193	static int __pcpu_map_pages(unsigned long addr, struct page **pages,
				194	int nr_pages)
				195	{
				196	return map_kernel_range_noflush(addr, nr_pages << PAGE_SHIFT,
				197	PAGE_KERNEL, pages);
				198	}
				199
				200	/**
				201	* pcpu_map_pages - map pages into a pcpu_chunk
				202	* @chunk: chunk of interest
				203	* @pages: pages array containing pages to be mapped
				204	* @page_start: page index of the first page to map
				205	* @page_end: page index of the last page to map + 1
				206	*
				207	* For each cpu, map pages [@page_start,@page_end) into @chunk. The
				208	* caller is responsible for calling pcpu_post_map_flush() after all
				209	* mappings are complete.
				210	*
				211	* This function is responsible for setting up whatever is necessary for
				212	* reverse lookup (addr -> chunk).
				213	*/
				214	static int pcpu_map_pages(struct pcpu_chunk *chunk,
				215	struct page **pages, int page_start, int page_end)
				216	{
				217	unsigned int cpu, tcpu;
				218	int i, err;
				219
				220	for_each_possible_cpu(cpu) {
				221	err = __pcpu_map_pages(pcpu_chunk_addr(chunk, cpu, page_start),
				222	&pages[pcpu_page_idx(cpu, page_start)],
				223	page_end - page_start);
				224	if (err < 0)
				225	goto err;
				226
				227	for (i = page_start; i < page_end; i++)
				228	pcpu_set_page_chunk(pages[pcpu_page_idx(cpu, i)],
				229	chunk);
				230	}
				231	return 0;
				232	err:
				233	for_each_possible_cpu(tcpu) {
				234	if (tcpu == cpu)
				235	break;
				236	__pcpu_unmap_pages(pcpu_chunk_addr(chunk, tcpu, page_start),
				237	page_end - page_start);
				238	}
				239	pcpu_post_unmap_tlb_flush(chunk, page_start, page_end);
				240	return err;
				241	}
				242
				243	/**
				244	* pcpu_post_map_flush - flush cache after mapping
				245	* @chunk: pcpu_chunk the regions to be flushed belong to
				246	* @page_start: page index of the first page to be flushed
				247	* @page_end: page index of the last page to be flushed + 1
				248	*
				249	* Pages [@page_start,@page_end) of @chunk have been mapped. Flush
				250	* cache.
				251	*
				252	* As with pcpu_pre_unmap_flush(), TLB flushing also is done at once
				253	* for the whole region.
				254	*/
				255	static void pcpu_post_map_flush(struct pcpu_chunk *chunk,
				256	int page_start, int page_end)
				257	{
				258	flush_cache_vmap(
				259	pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start),
				260	pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end));
				261	}
				262
				263	/**
				264	* pcpu_populate_chunk - populate and map an area of a pcpu_chunk
				265	* @chunk: chunk of interest
				266	* @page_start: the start page
				267	* @page_end: the end page
				268	* @gfp: allocation flags passed to the underlying memory allocator
				269	*
				270	* For each cpu, populate and map pages [@page_start,@page_end) into
				271	* @chunk.
				272	*
				273	* CONTEXT:
				274	* pcpu_alloc_mutex, does GFP_KERNEL allocation.
				275	*/
				276	static int pcpu_populate_chunk(struct pcpu_chunk *chunk,
				277	int page_start, int page_end, gfp_t gfp)
				278	{
				279	struct page **pages;
				280
				281	pages = pcpu_get_pages();
				282	if (!pages)
				283	return -ENOMEM;
				284
				285	if (pcpu_alloc_pages(chunk, pages, page_start, page_end, gfp))
				286	return -ENOMEM;
				287
				288	if (pcpu_map_pages(chunk, pages, page_start, page_end)) {
				289	pcpu_free_pages(chunk, pages, page_start, page_end);
				290	return -ENOMEM;
				291	}
				292	pcpu_post_map_flush(chunk, page_start, page_end);
				293
				294	return 0;
				295	}
				296
				297	/**
				298	* pcpu_depopulate_chunk - depopulate and unmap an area of a pcpu_chunk
				299	* @chunk: chunk to depopulate
				300	* @page_start: the start page
				301	* @page_end: the end page
				302	*
				303	* For each cpu, depopulate and unmap pages [@page_start,@page_end)
				304	* from @chunk.
				305	*
				306	* CONTEXT:
				307	* pcpu_alloc_mutex.
				308	*/
				309	static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk,
				310	int page_start, int page_end)
				311	{
				312	struct page **pages;
				313
				314	/*
				315	* If control reaches here, there must have been at least one
				316	* successful population attempt so the temp pages array must
				317	* be available now.
				318	*/
				319	pages = pcpu_get_pages();
				320	BUG_ON(!pages);
				321
				322	/* unmap and free */
				323	pcpu_pre_unmap_flush(chunk, page_start, page_end);
				324
				325	pcpu_unmap_pages(chunk, pages, page_start, page_end);
				326
				327	/* no need to flush tlb, vmalloc will handle it lazily */
				328
				329	pcpu_free_pages(chunk, pages, page_start, page_end);
				330	}
				331
				332	static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp)
				333	{
				334	struct pcpu_chunk *chunk;
				335	struct vm_struct **vms;
				336
				337	chunk = pcpu_alloc_chunk(gfp);
				338	if (!chunk)
				339	return NULL;
				340
				341	vms = pcpu_get_vm_areas(pcpu_group_offsets, pcpu_group_sizes,
				342	pcpu_nr_groups, pcpu_atom_size);
				343	if (!vms) {
				344	pcpu_free_chunk(chunk);
				345	return NULL;
				346	}
				347
				348	chunk->data = vms;
				349	chunk->base_addr = vms[0]->addr - pcpu_group_offsets[0];
				350
				351	pcpu_stats_chunk_alloc();
				352	trace_percpu_create_chunk(chunk->base_addr);
				353
				354	return chunk;
				355	}
				356
				357	static void pcpu_destroy_chunk(struct pcpu_chunk *chunk)
				358	{
				359	if (!chunk)
				360	return;
				361
				362	pcpu_stats_chunk_dealloc();
				363	trace_percpu_destroy_chunk(chunk->base_addr);
				364
				365	if (chunk->data)
				366	pcpu_free_vm_areas(chunk->data, pcpu_nr_groups);
				367	pcpu_free_chunk(chunk);
				368	}
				369
				370	static struct page pcpu_addr_to_page(void addr)
				371	{
				372	return vmalloc_to_page(addr);
				373	}
				374
				375	static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai)
				376	{
				377	/* no extra restriction */
				378	return 0;
				379	}