Blame - marvell/linux/mm/slub.c - T108

blob: 32955a181081c67e9e240b1167fb1c9d4fdbc9cf [file] [log] [blame]

b.liu	e958203	2025-04-17 19:18:16 +0800	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0
				2	/*
				3	* SLUB: A slab allocator that limits cache line use instead of queuing
				4	* objects in per cpu and per node lists.
				5	*
				6	* The allocator synchronizes using per slab locks or atomic operatios
				7	* and only uses a centralized lock to manage a pool of partial slabs.
				8	*
				9	* (C) 2007 SGI, Christoph Lameter
				10	* (C) 2011 Linux Foundation, Christoph Lameter
				11	*/
				12
				13	#include <linux/mm.h>
				14	#include <linux/swap.h> /* struct reclaim_state */
				15	#include <linux/module.h>
				16	#include <linux/bit_spinlock.h>
				17	#include <linux/interrupt.h>
				18	#include <linux/swab.h>
				19	#include <linux/bitops.h>
				20	#include <linux/slab.h>
				21	#include "slab.h"
				22	#include <linux/proc_fs.h>
				23	#include <linux/seq_file.h>
				24	#include <linux/kasan.h>
				25	#include <linux/cpu.h>
				26	#include <linux/cpuset.h>
				27	#include <linux/mempolicy.h>
				28	#include <linux/ctype.h>
				29	#include <linux/debugobjects.h>
				30	#include <linux/kallsyms.h>
				31	#include <linux/kfence.h>
				32	#include <linux/memory.h>
				33	#include <linux/math64.h>
				34	#include <linux/fault-inject.h>
				35	#include <linux/stacktrace.h>
				36	#include <linux/prefetch.h>
				37	#include <linux/memcontrol.h>
				38	#include <linux/random.h>
				39
				40	#include <trace/events/kmem.h>
				41
				42	#include "internal.h"
				43
				44	/*
				45	* Lock order:
				46	* 1. slab_mutex (Global Mutex)
				47	* 2. node->list_lock
				48	* 3. slab_lock(page) (Only on some arches and for debugging)
				49	*
				50	* slab_mutex
				51	*
				52	* The role of the slab_mutex is to protect the list of all the slabs
				53	* and to synchronize major metadata changes to slab cache structures.
				54	*
				55	* The slab_lock is only used for debugging and on arches that do not
				56	* have the ability to do a cmpxchg_double. It only protects:
				57	* A. page->freelist -> List of object free in a page
				58	* B. page->inuse -> Number of objects in use
				59	* C. page->objects -> Number of objects in page
				60	* D. page->frozen -> frozen state
				61	*
				62	* If a slab is frozen then it is exempt from list management. It is not
				63	* on any list except per cpu partial list. The processor that froze the
				64	* slab is the one who can perform list operations on the page. Other
				65	* processors may put objects onto the freelist but the processor that
				66	* froze the slab is the only one that can retrieve the objects from the
				67	* page's freelist.
				68	*
				69	* The list_lock protects the partial and full list on each node and
				70	* the partial slab counter. If taken then no new slabs may be added or
				71	* removed from the lists nor make the number of partial slabs be modified.
				72	* (Note that the total number of slabs is an atomic value that may be
				73	* modified without taking the list lock).
				74	*
				75	* The list_lock is a centralized lock and thus we avoid taking it as
				76	* much as possible. As long as SLUB does not have to handle partial
				77	* slabs, operations can continue without any centralized lock. F.e.
				78	* allocating a long series of objects that fill up slabs does not require
				79	* the list lock.
				80	* Interrupts are disabled during allocation and deallocation in order to
				81	* make the slab allocator safe to use in the context of an irq. In addition
				82	* interrupts are disabled to ensure that the processor does not change
				83	* while handling per_cpu slabs, due to kernel preemption.
				84	*
				85	* SLUB assigns one slab for allocation to each processor.
				86	* Allocations only occur from these slabs called cpu slabs.
				87	*
				88	* Slabs with free elements are kept on a partial list and during regular
				89	* operations no list for full slabs is used. If an object in a full slab is
				90	* freed then the slab will show up again on the partial lists.
				91	* We track full slabs for debugging purposes though because otherwise we
				92	* cannot scan all objects.
				93	*
				94	* Slabs are freed when they become empty. Teardown and setup is
				95	* minimal so we rely on the page allocators per cpu caches for
				96	* fast frees and allocs.
				97	*
				98	* Overloading of page flags that are otherwise used for LRU management.
				99	*
				100	* PageActive The slab is frozen and exempt from list processing.
				101	* This means that the slab is dedicated to a purpose
				102	* such as satisfying allocations for a specific
				103	* processor. Objects may be freed in the slab while
				104	* it is frozen but slab_free will then skip the usual
				105	* list operations. It is up to the processor holding
				106	* the slab to integrate the slab into the slab lists
				107	* when the slab is no longer needed.
				108	*
				109	* One use of this flag is to mark slabs that are
				110	* used for allocations. Then such a slab becomes a cpu
				111	* slab. The cpu slab may be equipped with an additional
				112	* freelist that allows lockless access to
				113	* free objects in addition to the regular freelist
				114	* that requires the slab lock.
				115	*
				116	* PageError Slab requires special handling due to debug
				117	* options set. This moves slab handling out of
				118	* the fast path and disables lockless freelists.
				119	*/
				120
				121	static inline int kmem_cache_debug(struct kmem_cache *s)
				122	{
				123	#ifdef CONFIG_SLUB_DEBUG
				124	return unlikely(s->flags & SLAB_DEBUG_FLAGS);
				125	#else
				126	return 0;
				127	#endif
				128	}
				129
				130	void fixup_red_left(struct kmem_cache s, void *p)
				131	{
				132	if (kmem_cache_debug(s) && s->flags & SLAB_RED_ZONE)
				133	p += s->red_left_pad;
				134
				135	return p;
				136	}
				137
				138	static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
				139	{
				140	#ifdef CONFIG_SLUB_CPU_PARTIAL
				141	return !kmem_cache_debug(s);
				142	#else
				143	return false;
				144	#endif
				145	}
				146
				147	/*
				148	* Issues still to be resolved:
				149	*
				150	* - Support PAGE_ALLOC_DEBUG. Should be easy to do.
				151	*
				152	* - Variable sizing of the per node arrays
				153	*/
				154
				155	/* Enable to test recovery from slab corruption on boot */
				156	#undef SLUB_RESILIENCY_TEST
				157
				158	/* Enable to log cmpxchg failures */
				159	#undef SLUB_DEBUG_CMPXCHG
				160
				161	/*
				162	* Mininum number of partial slabs. These will be left on the partial
				163	* lists even if they are empty. kmem_cache_shrink may reclaim them.
				164	*/
				165	#define MIN_PARTIAL 5
				166
				167	/*
				168	* Maximum number of desirable partial slabs.
				169	* The existence of more partial slabs makes kmem_cache_shrink
				170	* sort the partial list by the number of objects in use.
				171	*/
				172	#define MAX_PARTIAL 10
				173
				174	#define DEBUG_DEFAULT_FLAGS (SLAB_CONSISTENCY_CHECKS \| SLAB_RED_ZONE \| \
				175	SLAB_POISON \| SLAB_STORE_USER)
				176
				177	/*
				178	* These debug flags cannot use CMPXCHG because there might be consistency
				179	* issues when checking or reading debug information
				180	*/
				181	#define SLAB_NO_CMPXCHG (SLAB_CONSISTENCY_CHECKS \| SLAB_STORE_USER \| \
				182	SLAB_TRACE)
				183
				184
				185	/*
				186	* Debugging flags that require metadata to be stored in the slab. These get
				187	* disabled when slub_debug=O is used and a cache's min order increases with
				188	* metadata.
				189	*/
				190	#define DEBUG_METADATA_FLAGS (SLAB_RED_ZONE \| SLAB_POISON \| SLAB_STORE_USER)
				191
				192	#define OO_SHIFT 16
				193	#define OO_MASK ((1 << OO_SHIFT) - 1)
				194	#define MAX_OBJS_PER_PAGE 32767 /* since page.objects is u15 */
				195
				196	/* Internal SLUB flags */
				197	/* Poison object */
				198	#define __OBJECT_POISON ((slab_flags_t __force)0x80000000U)
				199	/* Use cmpxchg_double */
				200	#define __CMPXCHG_DOUBLE ((slab_flags_t __force)0x40000000U)
				201
				202	/*
				203	* Tracking user of a slab.
				204	*/
				205	#define TRACK_ADDRS_COUNT 16
				206	struct track {
				207	unsigned long addr; /* Called from address */
				208	#ifdef CONFIG_STACKTRACE
				209	unsigned long addrs[TRACK_ADDRS_COUNT]; /* Called from address */
				210	#endif
				211	int cpu; /* Was running on cpu */
				212	int pid; /* Pid context */
				213	unsigned long when; /* When did the operation occur */
				214	};
				215
				216	enum track_item { TRACK_ALLOC, TRACK_FREE };
				217
				218	#ifdef CONFIG_SYSFS
				219	static int sysfs_slab_add(struct kmem_cache *);
				220	static int sysfs_slab_alias(struct kmem_cache , const char );
				221	static void memcg_propagate_slab_attrs(struct kmem_cache *s);
				222	static void sysfs_slab_remove(struct kmem_cache *s);
				223	#else
				224	static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }
				225	static inline int sysfs_slab_alias(struct kmem_cache s, const char p)
				226	{ return 0; }
				227	static inline void memcg_propagate_slab_attrs(struct kmem_cache *s) { }
				228	static inline void sysfs_slab_remove(struct kmem_cache *s) { }
				229	#endif
				230
				231	static inline void stat(const struct kmem_cache *s, enum stat_item si)
				232	{
				233	#ifdef CONFIG_SLUB_STATS
				234	/*
				235	* The rmw is racy on a preemptible kernel but this is acceptable, so
				236	* avoid this_cpu_add()'s irq-disable overhead.
				237	*/
				238	raw_cpu_inc(s->cpu_slab->stat[si]);
				239	#endif
				240	}
				241
				242	/********************************************************************
				243	* Core slab cache functions
				244	*******************************************************************/
				245
				246	/*
				247	* Returns freelist pointer (ptr). With hardening, this is obfuscated
				248	* with an XOR of the address where the pointer is held and a per-cache
				249	* random number.
				250	*/
				251	static inline void freelist_ptr(const struct kmem_cache s, void *ptr,
				252	unsigned long ptr_addr)
				253	{
				254	#ifdef CONFIG_SLAB_FREELIST_HARDENED
				255	/*
				256	* When CONFIG_KASAN_SW_TAGS is enabled, ptr_addr might be tagged.
				257	* Normally, this doesn't cause any issues, as both set_freepointer()
				258	* and get_freepointer() are called with a pointer with the same tag.
				259	* However, there are some issues with CONFIG_SLUB_DEBUG code. For
				260	* example, when __free_slub() iterates over objects in a cache, it
				261	* passes untagged pointers to check_object(). check_object() in turns
				262	* calls get_freepointer() with an untagged pointer, which causes the
				263	* freepointer to be restored incorrectly.
				264	*/
				265	return (void *)((unsigned long)ptr ^ s->random ^
				266	swab((unsigned long)kasan_reset_tag((void *)ptr_addr)));
				267	#else
				268	return ptr;
				269	#endif
				270	}
				271
				272	/* Returns the freelist pointer recorded at location ptr_addr. */
				273	static inline void freelist_dereference(const struct kmem_cache s,
				274	void *ptr_addr)
				275	{
				276	return freelist_ptr(s, (void )(unsigned long *)(ptr_addr),
				277	(unsigned long)ptr_addr);
				278	}
				279
				280	static inline void get_freepointer(struct kmem_cache s, void *object)
				281	{
				282	return freelist_dereference(s, object + s->offset);
				283	}
				284
				285	static void prefetch_freepointer(const struct kmem_cache s, void object)
				286	{
				287	prefetch(object + s->offset);
				288	}
				289
				290	static inline void get_freepointer_safe(struct kmem_cache s, void *object)
				291	{
				292	unsigned long freepointer_addr;
				293	void *p;
				294
				295	if (!debug_pagealloc_enabled_static())
				296	return get_freepointer(s, object);
				297
				298	freepointer_addr = (unsigned long)object + s->offset;
				299	probe_kernel_read(&p, (void **)freepointer_addr, sizeof(p));
				300	return freelist_ptr(s, p, freepointer_addr);
				301	}
				302
				303	static inline void set_freepointer(struct kmem_cache s, void object, void *fp)
				304	{
				305	unsigned long freeptr_addr = (unsigned long)object + s->offset;
				306
				307	#ifdef CONFIG_SLAB_FREELIST_HARDENED
				308	BUG_ON(object == fp); /* naive detection of double free or corruption */
				309	#endif
				310
				311	(void *)freeptr_addr = freelist_ptr(s, fp, freeptr_addr);
				312	}
				313
				314	/* Loop over all objects in a slab */
				315	#define for_each_object(__p, __s, __addr, __objects) \
				316	for (__p = fixup_red_left(__s, __addr); \
				317	__p < (__addr) + (__objects) * (__s)->size; \
				318	__p += (__s)->size)
				319
				320	/* Determine object index from a given position */
				321	static inline unsigned int slab_index(void p, struct kmem_cache s, void *addr)
				322	{
				323	return (kasan_reset_tag(p) - addr) / s->size;
				324	}
				325
				326	static inline unsigned int order_objects(unsigned int order, unsigned int size)
				327	{
				328	return ((unsigned int)PAGE_SIZE << order) / size;
				329	}
				330
				331	static inline struct kmem_cache_order_objects oo_make(unsigned int order,
				332	unsigned int size)
				333	{
				334	struct kmem_cache_order_objects x = {
				335	(order << OO_SHIFT) + order_objects(order, size)
				336	};
				337
				338	return x;
				339	}
				340
				341	static inline unsigned int oo_order(struct kmem_cache_order_objects x)
				342	{
				343	return x.x >> OO_SHIFT;
				344	}
				345
				346	static inline unsigned int oo_objects(struct kmem_cache_order_objects x)
				347	{
				348	return x.x & OO_MASK;
				349	}
				350
				351	/*
				352	* Per slab locking using the pagelock
				353	*/
				354	static __always_inline void slab_lock(struct page *page)
				355	{
				356	VM_BUG_ON_PAGE(PageTail(page), page);
				357	bit_spin_lock(PG_locked, &page->flags);
				358	}
				359
				360	static __always_inline void slab_unlock(struct page *page)
				361	{
				362	VM_BUG_ON_PAGE(PageTail(page), page);
				363	__bit_spin_unlock(PG_locked, &page->flags);
				364	}
				365
				366	/* Interrupts must be disabled (for the fallback code to work right) */
				367	static inline bool __cmpxchg_double_slab(struct kmem_cache s, struct page page,
				368	void *freelist_old, unsigned long counters_old,
				369	void *freelist_new, unsigned long counters_new,
				370	const char *n)
				371	{
				372	VM_BUG_ON(!irqs_disabled());
				373	#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
				374	defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
				375	if (s->flags & __CMPXCHG_DOUBLE) {
				376	if (cmpxchg_double(&page->freelist, &page->counters,
				377	freelist_old, counters_old,
				378	freelist_new, counters_new))
				379	return true;
				380	} else
				381	#endif
				382	{
				383	slab_lock(page);
				384	if (page->freelist == freelist_old &&
				385	page->counters == counters_old) {
				386	page->freelist = freelist_new;
				387	page->counters = counters_new;
				388	slab_unlock(page);
				389	return true;
				390	}
				391	slab_unlock(page);
				392	}
				393
				394	cpu_relax();
				395	stat(s, CMPXCHG_DOUBLE_FAIL);
				396
				397	#ifdef SLUB_DEBUG_CMPXCHG
				398	pr_info("%s %s: cmpxchg double redo ", n, s->name);
				399	#endif
				400
				401	return false;
				402	}
				403
				404	static inline bool cmpxchg_double_slab(struct kmem_cache s, struct page page,
				405	void *freelist_old, unsigned long counters_old,
				406	void *freelist_new, unsigned long counters_new,
				407	const char *n)
				408	{
				409	#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
				410	defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
				411	if (s->flags & __CMPXCHG_DOUBLE) {
				412	if (cmpxchg_double(&page->freelist, &page->counters,
				413	freelist_old, counters_old,
				414	freelist_new, counters_new))
				415	return true;
				416	} else
				417	#endif
				418	{
				419	unsigned long flags;
				420
				421	local_irq_save(flags);
				422	slab_lock(page);
				423	if (page->freelist == freelist_old &&
				424	page->counters == counters_old) {
				425	page->freelist = freelist_new;
				426	page->counters = counters_new;
				427	slab_unlock(page);
				428	local_irq_restore(flags);
				429	return true;
				430	}
				431	slab_unlock(page);
				432	local_irq_restore(flags);
				433	}
				434
				435	cpu_relax();
				436	stat(s, CMPXCHG_DOUBLE_FAIL);
				437
				438	#ifdef SLUB_DEBUG_CMPXCHG
				439	pr_info("%s %s: cmpxchg double redo ", n, s->name);
				440	#endif
				441
				442	return false;
				443	}
				444
				445	#ifdef CONFIG_SLUB_DEBUG
				446	/*
				447	* Determine a map of object in use on a page.
				448	*
				449	* Node listlock must be held to guarantee that the page does
				450	* not vanish from under us.
				451	*/
				452	static void get_map(struct kmem_cache s, struct page page, unsigned long *map)
				453	{
				454	void *p;
				455	void *addr = page_address(page);
				456
				457	for (p = page->freelist; p; p = get_freepointer(s, p))
				458	set_bit(slab_index(p, s, addr), map);
				459	}
				460
				461	static inline unsigned int size_from_object(struct kmem_cache *s)
				462	{
				463	if (s->flags & SLAB_RED_ZONE)
				464	return s->size - s->red_left_pad;
				465
				466	return s->size;
				467	}
				468
				469	static inline void restore_red_left(struct kmem_cache s, void *p)
				470	{
				471	if (s->flags & SLAB_RED_ZONE)
				472	p -= s->red_left_pad;
				473
				474	return p;
				475	}
				476
				477	/*
				478	* Debug settings:
				479	*/
				480	#if defined(CONFIG_SLUB_DEBUG_ON)
				481	static slab_flags_t slub_debug = DEBUG_DEFAULT_FLAGS;
				482	#else
				483	static slab_flags_t slub_debug;
				484	#endif
				485
				486	static char *slub_debug_slabs;
				487	static int disable_higher_order_debug;
				488
				489	/*
				490	* slub is about to manipulate internal object metadata. This memory lies
				491	* outside the range of the allocated object, so accessing it would normally
				492	* be reported by kasan as a bounds error. metadata_access_enable() is used
				493	* to tell kasan that these accesses are OK.
				494	*/
				495	static inline void metadata_access_enable(void)
				496	{
				497	kasan_disable_current();
				498	}
				499
				500	static inline void metadata_access_disable(void)
				501	{
				502	kasan_enable_current();
				503	}
				504
				505	/*
				506	* Object debugging
				507	*/
				508
				509	/* Verify that a pointer has an address that is valid within a slab page */
				510	static inline int check_valid_pointer(struct kmem_cache *s,
				511	struct page page, void object)
				512	{
				513	void *base;
				514
				515	if (!object)
				516	return 1;
				517
				518	base = page_address(page);
				519	object = kasan_reset_tag(object);
				520	object = restore_red_left(s, object);
				521	if (object < base \|\| object >= base + page->objects * s->size \|\|
				522	(object - base) % s->size) {
				523	return 0;
				524	}
				525
				526	return 1;
				527	}
				528
				529	static void print_section(char level, char text, u8 *addr,
				530	unsigned int length)
				531	{
				532	metadata_access_enable();
				533	print_hex_dump(level, text, DUMP_PREFIX_ADDRESS, 16, 1, addr,
				534	length, 1);
				535	metadata_access_disable();
				536	}
				537
				538	/*
				539	* See comment in calculate_sizes().
				540	*/
				541	static inline bool freeptr_outside_object(struct kmem_cache *s)
				542	{
				543	return s->offset >= s->inuse;
				544	}
				545
				546	/*
				547	* Return offset of the end of info block which is inuse + free pointer if
				548	* not overlapping with object.
				549	*/
				550	static inline unsigned int get_info_end(struct kmem_cache *s)
				551	{
				552	if (freeptr_outside_object(s))
				553	return s->inuse + sizeof(void *);
				554	else
				555	return s->inuse;
				556	}
				557
				558	static struct track get_track(struct kmem_cache s, void *object,
				559	enum track_item alloc)
				560	{
				561	struct track *p;
				562
				563	p = object + get_info_end(s);
				564
				565	return p + alloc;
				566	}
				567
				568	static void set_track(struct kmem_cache s, void object,
				569	enum track_item alloc, unsigned long addr)
				570	{
				571	struct track *p = get_track(s, object, alloc);
				572
				573	if (addr) {
				574	#ifdef CONFIG_STACKTRACE
				575	unsigned int nr_entries;
				576
				577	metadata_access_enable();
				578	nr_entries = stack_trace_save(p->addrs, TRACK_ADDRS_COUNT, 3);
				579	metadata_access_disable();
				580
				581	if (nr_entries < TRACK_ADDRS_COUNT)
				582	p->addrs[nr_entries] = 0;
				583	#endif
				584	p->addr = addr;
				585	p->cpu = smp_processor_id();
				586	p->pid = current->pid;
				587	p->when = jiffies;
				588	} else {
				589	memset(p, 0, sizeof(struct track));
				590	}
				591	}
				592
				593	static void init_tracking(struct kmem_cache s, void object)
				594	{
				595	if (!(s->flags & SLAB_STORE_USER))
				596	return;
				597
				598	set_track(s, object, TRACK_FREE, 0UL);
				599	set_track(s, object, TRACK_ALLOC, 0UL);
				600	}
				601
				602	static void print_track(const char s, struct track t, unsigned long pr_time)
				603	{
				604	if (!t->addr)
				605	return;
				606
				607	pr_err("INFO: %s in %pS age=%lu cpu=%u pid=%d\n",
				608	s, (void *)t->addr, pr_time - t->when, t->cpu, t->pid);
				609	#ifdef CONFIG_STACKTRACE
				610	{
				611	int i;
				612	for (i = 0; i < TRACK_ADDRS_COUNT; i++)
				613	if (t->addrs[i])
				614	pr_err("\t%pS\n", (void *)t->addrs[i]);
				615	else
				616	break;
				617	}
				618	#endif
				619	}
				620
				621	static void print_tracking(struct kmem_cache s, void object)
				622	{
				623	unsigned long pr_time = jiffies;
				624	if (!(s->flags & SLAB_STORE_USER))
				625	return;
				626
				627	print_track("Allocated", get_track(s, object, TRACK_ALLOC), pr_time);
				628	print_track("Freed", get_track(s, object, TRACK_FREE), pr_time);
				629	}
				630
				631	static void print_page_info(struct page *page)
				632	{
				633	pr_err("INFO: Slab 0x%p objects=%u used=%u fp=0x%p flags=0x%04lx\n",
				634	page, page->objects, page->inuse, page->freelist, page->flags);
				635
				636	}
				637
				638	static void slab_bug(struct kmem_cache s, char fmt, ...)
				639	{
				640	struct va_format vaf;
				641	va_list args;
				642
				643	va_start(args, fmt);
				644	vaf.fmt = fmt;
				645	vaf.va = &args;
				646	pr_err("=============================================================================\n");
				647	pr_err("BUG %s (%s): %pV\n", s->name, print_tainted(), &vaf);
				648	pr_err("-----------------------------------------------------------------------------\n\n");
				649
				650	add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
				651	va_end(args);
				652	}
				653
				654	static void slab_fix(struct kmem_cache s, char fmt, ...)
				655	{
				656	struct va_format vaf;
				657	va_list args;
				658
				659	va_start(args, fmt);
				660	vaf.fmt = fmt;
				661	vaf.va = &args;
				662	pr_err("FIX %s: %pV\n", s->name, &vaf);
				663	va_end(args);
				664	}
				665
				666	static bool freelist_corrupted(struct kmem_cache s, struct page page,
				667	void *freelist, void nextfree)
				668	{
				669	if ((s->flags & SLAB_CONSISTENCY_CHECKS) &&
				670	!check_valid_pointer(s, page, nextfree) && freelist) {
				671	object_err(s, page, *freelist, "Freechain corrupt");
				672	*freelist = NULL;
				673	slab_fix(s, "Isolate corrupted freechain");
				674	return true;
				675	}
				676
				677	return false;
				678	}
				679
				680	static void print_trailer(struct kmem_cache s, struct page page, u8 *p)
				681	{
				682	unsigned int off; /* Offset of last byte */
				683	u8 *addr = page_address(page);
				684
				685	print_tracking(s, p);
				686
				687	print_page_info(page);
				688
				689	pr_err("INFO: Object 0x%p @offset=%tu fp=0x%p\n\n",
				690	p, p - addr, get_freepointer(s, p));
				691
				692	if (s->flags & SLAB_RED_ZONE)
				693	print_section(KERN_ERR, "Redzone ", p - s->red_left_pad,
				694	s->red_left_pad);
				695	else if (p > addr + 16)
				696	print_section(KERN_ERR, "Bytes b4 ", p - 16, 16);
				697
				698	print_section(KERN_ERR, "Object ", p,
				699	min_t(unsigned int, s->object_size, PAGE_SIZE));
				700	if (s->flags & SLAB_RED_ZONE)
				701	print_section(KERN_ERR, "Redzone ", p + s->object_size,
				702	s->inuse - s->object_size);
				703
				704	off = get_info_end(s);
				705
				706	if (s->flags & SLAB_STORE_USER)
				707	off += 2 * sizeof(struct track);
				708
				709	off += kasan_metadata_size(s);
				710
				711	if (off != size_from_object(s))
				712	/* Beginning of the filler is the free pointer */
				713	print_section(KERN_ERR, "Padding ", p + off,
				714	size_from_object(s) - off);
				715
				716	dump_stack();
				717	}
				718
				719	void object_err(struct kmem_cache s, struct page page,
				720	u8 object, char reason)
				721	{
				722	slab_bug(s, "%s", reason);
				723	print_trailer(s, page, object);
				724	}
				725
				726	static __printf(3, 4) void slab_err(struct kmem_cache s, struct page page,
				727	const char *fmt, ...)
				728	{
				729	va_list args;
				730	char buf[100];
				731
				732	va_start(args, fmt);
				733	vsnprintf(buf, sizeof(buf), fmt, args);
				734	va_end(args);
				735	slab_bug(s, "%s", buf);
				736	print_page_info(page);
				737	dump_stack();
				738	}
				739
				740	static void init_object(struct kmem_cache s, void object, u8 val)
				741	{
				742	u8 *p = object;
				743
				744	if (s->flags & SLAB_RED_ZONE)
				745	memset(p - s->red_left_pad, val, s->red_left_pad);
				746
				747	if (s->flags & __OBJECT_POISON) {
				748	memset(p, POISON_FREE, s->object_size - 1);
				749	p[s->object_size - 1] = POISON_END;
				750	}
				751
				752	if (s->flags & SLAB_RED_ZONE)
				753	memset(p + s->object_size, val, s->inuse - s->object_size);
				754	}
				755
				756	static void restore_bytes(struct kmem_cache s, char message, u8 data,
				757	void from, void to)
				758	{
				759	slab_fix(s, "Restoring 0x%p-0x%p=0x%x\n", from, to - 1, data);
				760	memset(from, data, to - from);
				761	}
				762
				763	static int check_bytes_and_report(struct kmem_cache s, struct page page,
				764	u8 object, char what,
				765	u8 *start, unsigned int value, unsigned int bytes)
				766	{
				767	u8 *fault;
				768	u8 *end;
				769
				770	metadata_access_enable();
				771	fault = memchr_inv(start, value, bytes);
				772	metadata_access_disable();
				773	if (!fault)
				774	return 1;
				775
				776	end = start + bytes;
				777	while (end > fault && end[-1] == value)
				778	end--;
				779
				780	slab_bug(s, "%s overwritten", what);
				781	pr_err("INFO: 0x%p-0x%p. First byte 0x%x instead of 0x%x\n",
				782	fault, end - 1, fault[0], value);
				783	print_trailer(s, page, object);
				784
				785	restore_bytes(s, what, value, fault, end);
				786	return 0;
				787	}
				788
				789	/*
				790	* Object layout:
				791	*
				792	* object address
				793	* Bytes of the object to be managed.
				794	* If the freepointer may overlay the object then the free
				795	* pointer is at the middle of the object.
				796	*
				797	* Poisoning uses 0x6b (POISON_FREE) and the last byte is
				798	* 0xa5 (POISON_END)
				799	*
				800	* object + s->object_size
				801	* Padding to reach word boundary. This is also used for Redzoning.
				802	* Padding is extended by another word if Redzoning is enabled and
				803	* object_size == inuse.
				804	*
				805	* We fill with 0xbb (RED_INACTIVE) for inactive objects and with
				806	* 0xcc (RED_ACTIVE) for objects in use.
				807	*
				808	* object + s->inuse
				809	* Meta data starts here.
				810	*
				811	* A. Free pointer (if we cannot overwrite object on free)
				812	* B. Tracking data for SLAB_STORE_USER
				813	* C. Padding to reach required alignment boundary or at mininum
				814	* one word if debugging is on to be able to detect writes
				815	* before the word boundary.
				816	*
				817	* Padding is done using 0x5a (POISON_INUSE)
				818	*
				819	* object + s->size
				820	* Nothing is used beyond s->size.
				821	*
				822	* If slabcaches are merged then the object_size and inuse boundaries are mostly
				823	* ignored. And therefore no slab options that rely on these boundaries
				824	* may be used with merged slabcaches.
				825	*/
				826
				827	static int check_pad_bytes(struct kmem_cache s, struct page page, u8 *p)
				828	{
				829	unsigned long off = get_info_end(s); /* The end of info */
				830
				831	if (s->flags & SLAB_STORE_USER)
				832	/* We also have user information there */
				833	off += 2 * sizeof(struct track);
				834
				835	off += kasan_metadata_size(s);
				836
				837	if (size_from_object(s) == off)
				838	return 1;
				839
				840	return check_bytes_and_report(s, page, p, "Object padding",
				841	p + off, POISON_INUSE, size_from_object(s) - off);
				842	}
				843
				844	/* Check the pad bytes at the end of a slab page */
				845	static int slab_pad_check(struct kmem_cache s, struct page page)
				846	{
				847	u8 *start;
				848	u8 *fault;
				849	u8 *end;
				850	u8 *pad;
				851	int length;
				852	int remainder;
				853
				854	if (!(s->flags & SLAB_POISON))
				855	return 1;
				856
				857	start = page_address(page);
				858	length = page_size(page);
				859	end = start + length;
				860	remainder = length % s->size;
				861	if (!remainder)
				862	return 1;
				863
				864	pad = end - remainder;
				865	metadata_access_enable();
				866	fault = memchr_inv(pad, POISON_INUSE, remainder);
				867	metadata_access_disable();
				868	if (!fault)
				869	return 1;
				870	while (end > fault && end[-1] == POISON_INUSE)
				871	end--;
				872
				873	slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1);
				874	print_section(KERN_ERR, "Padding ", pad, remainder);
				875
				876	restore_bytes(s, "slab padding", POISON_INUSE, fault, end);
				877	return 0;
				878	}
				879
				880	static int check_object(struct kmem_cache s, struct page page,
				881	void *object, u8 val)
				882	{
				883	u8 *p = object;
				884	u8 *endobject = object + s->object_size;
				885
				886	if (s->flags & SLAB_RED_ZONE) {
				887	if (!check_bytes_and_report(s, page, object, "Left Redzone",
				888	object - s->red_left_pad, val, s->red_left_pad))
				889	return 0;
				890
				891	if (!check_bytes_and_report(s, page, object, "Right Redzone",
				892	endobject, val, s->inuse - s->object_size))
				893	return 0;
				894	} else {
				895	if ((s->flags & SLAB_POISON) && s->object_size < s->inuse) {
				896	check_bytes_and_report(s, page, p, "Alignment padding",
				897	endobject, POISON_INUSE,
				898	s->inuse - s->object_size);
				899	}
				900	}
				901
				902	if (s->flags & SLAB_POISON) {
				903	if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) &&
				904	(!check_bytes_and_report(s, page, p, "Poison", p,
				905	POISON_FREE, s->object_size - 1) \|\|
				906	!check_bytes_and_report(s, page, p, "End Poison",
				907	p + s->object_size - 1, POISON_END, 1)))
				908	return 0;
				909	/*
				910	* check_pad_bytes cleans up on its own.
				911	*/
				912	check_pad_bytes(s, page, p);
				913	}
				914
				915	if (!freeptr_outside_object(s) && val == SLUB_RED_ACTIVE)
				916	/*
				917	* Object and freepointer overlap. Cannot check
				918	* freepointer while object is allocated.
				919	*/
				920	return 1;
				921
				922	/* Check free pointer validity */
				923	if (!check_valid_pointer(s, page, get_freepointer(s, p))) {
				924	object_err(s, page, p, "Freepointer corrupt");
				925	/*
				926	* No choice but to zap it and thus lose the remainder
				927	* of the free objects in this slab. May cause
				928	* another error because the object count is now wrong.
				929	*/
				930	set_freepointer(s, p, NULL);
				931	return 0;
				932	}
				933	return 1;
				934	}
				935
				936	static int check_slab(struct kmem_cache s, struct page page)
				937	{
				938	int maxobj;
				939
				940	VM_BUG_ON(!irqs_disabled());
				941
				942	if (!PageSlab(page)) {
				943	slab_err(s, page, "Not a valid slab page");
				944	return 0;
				945	}
				946
				947	maxobj = order_objects(compound_order(page), s->size);
				948	if (page->objects > maxobj) {
				949	slab_err(s, page, "objects %u > max %u",
				950	page->objects, maxobj);
				951	return 0;
				952	}
				953	if (page->inuse > page->objects) {
				954	slab_err(s, page, "inuse %u > max %u",
				955	page->inuse, page->objects);
				956	return 0;
				957	}
				958	/* Slab_pad_check fixes things up after itself */
				959	slab_pad_check(s, page);
				960	return 1;
				961	}
				962
				963	/*
				964	* Determine if a certain object on a page is on the freelist. Must hold the
				965	* slab lock to guarantee that the chains are in a consistent state.
				966	*/
				967	static int on_freelist(struct kmem_cache s, struct page page, void *search)
				968	{
				969	int nr = 0;
				970	void *fp;
				971	void *object = NULL;
				972	int max_objects;
				973
				974	fp = page->freelist;
				975	while (fp && nr <= page->objects) {
				976	if (fp == search)
				977	return 1;
				978	if (!check_valid_pointer(s, page, fp)) {
				979	if (object) {
				980	object_err(s, page, object,
				981	"Freechain corrupt");
				982	set_freepointer(s, object, NULL);
				983	} else {
				984	slab_err(s, page, "Freepointer corrupt");
				985	page->freelist = NULL;
				986	page->inuse = page->objects;
				987	slab_fix(s, "Freelist cleared");
				988	return 0;
				989	}
				990	break;
				991	}
				992	object = fp;
				993	fp = get_freepointer(s, object);
				994	nr++;
				995	}
				996
				997	max_objects = order_objects(compound_order(page), s->size);
				998	if (max_objects > MAX_OBJS_PER_PAGE)
				999	max_objects = MAX_OBJS_PER_PAGE;
				1000
				1001	if (page->objects != max_objects) {
				1002	slab_err(s, page, "Wrong number of objects. Found %d but should be %d",
				1003	page->objects, max_objects);
				1004	page->objects = max_objects;
				1005	slab_fix(s, "Number of objects adjusted.");
				1006	}
				1007	if (page->inuse != page->objects - nr) {
				1008	slab_err(s, page, "Wrong object count. Counter is %d but counted were %d",
				1009	page->inuse, page->objects - nr);
				1010	page->inuse = page->objects - nr;
				1011	slab_fix(s, "Object count adjusted.");
				1012	}
				1013	return search == NULL;
				1014	}
				1015
				1016	static void trace(struct kmem_cache s, struct page page, void *object,
				1017	int alloc)
				1018	{
				1019	if (s->flags & SLAB_TRACE) {
				1020	pr_info("TRACE %s %s 0x%p inuse=%d fp=0x%p\n",
				1021	s->name,
				1022	alloc ? "alloc" : "free",
				1023	object, page->inuse,
				1024	page->freelist);
				1025
				1026	if (!alloc)
				1027	print_section(KERN_INFO, "Object ", (void *)object,
				1028	s->object_size);
				1029
				1030	dump_stack();
				1031	}
				1032	}
				1033
				1034	/*
				1035	* Tracking of fully allocated slabs for debugging purposes.
				1036	*/
				1037	static void add_full(struct kmem_cache *s,
				1038	struct kmem_cache_node n, struct page page)
				1039	{
				1040	if (!(s->flags & SLAB_STORE_USER))
				1041	return;
				1042
				1043	lockdep_assert_held(&n->list_lock);
				1044	list_add(&page->slab_list, &n->full);
				1045	}
				1046
				1047	static void remove_full(struct kmem_cache s, struct kmem_cache_node n, struct page *page)
				1048	{
				1049	if (!(s->flags & SLAB_STORE_USER))
				1050	return;
				1051
				1052	lockdep_assert_held(&n->list_lock);
				1053	list_del(&page->slab_list);
				1054	}
				1055
				1056	/* Tracking of the number of slabs for debugging purposes */
				1057	static inline unsigned long slabs_node(struct kmem_cache *s, int node)
				1058	{
				1059	struct kmem_cache_node *n = get_node(s, node);
				1060
				1061	return atomic_long_read(&n->nr_slabs);
				1062	}
				1063
				1064	static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
				1065	{
				1066	return atomic_long_read(&n->nr_slabs);
				1067	}
				1068
				1069	static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)
				1070	{
				1071	struct kmem_cache_node *n = get_node(s, node);
				1072
				1073	/*
				1074	* May be called early in order to allocate a slab for the
				1075	* kmem_cache_node structure. Solve the chicken-egg
				1076	* dilemma by deferring the increment of the count during
				1077	* bootstrap (see early_kmem_cache_node_alloc).
				1078	*/
				1079	if (likely(n)) {
				1080	atomic_long_inc(&n->nr_slabs);
				1081	atomic_long_add(objects, &n->total_objects);
				1082	}
				1083	}
				1084	static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects)
				1085	{
				1086	struct kmem_cache_node *n = get_node(s, node);
				1087
				1088	atomic_long_dec(&n->nr_slabs);
				1089	atomic_long_sub(objects, &n->total_objects);
				1090	}
				1091
				1092	/* Object debug checks for alloc/free paths */
				1093	static void setup_object_debug(struct kmem_cache s, struct page page,
				1094	void *object)
				1095	{
				1096	if (!(s->flags & (SLAB_STORE_USER\|SLAB_RED_ZONE\|__OBJECT_POISON)))
				1097	return;
				1098
				1099	init_object(s, object, SLUB_RED_INACTIVE);
				1100	init_tracking(s, object);
				1101	}
				1102
				1103	static
				1104	void setup_page_debug(struct kmem_cache s, struct page page, void *addr)
				1105	{
				1106	if (!(s->flags & SLAB_POISON))
				1107	return;
				1108
				1109	metadata_access_enable();
				1110	memset(addr, POISON_INUSE, page_size(page));
				1111	metadata_access_disable();
				1112	}
				1113
				1114	static inline int alloc_consistency_checks(struct kmem_cache *s,
				1115	struct page page, void object)
				1116	{
				1117	if (!check_slab(s, page))
				1118	return 0;
				1119
				1120	if (!check_valid_pointer(s, page, object)) {
				1121	object_err(s, page, object, "Freelist Pointer check fails");
				1122	return 0;
				1123	}
				1124
				1125	if (!check_object(s, page, object, SLUB_RED_INACTIVE))
				1126	return 0;
				1127
				1128	return 1;
				1129	}
				1130
				1131	static noinline int alloc_debug_processing(struct kmem_cache *s,
				1132	struct page *page,
				1133	void *object, unsigned long addr)
				1134	{
				1135	if (s->flags & SLAB_CONSISTENCY_CHECKS) {
				1136	if (!alloc_consistency_checks(s, page, object))
				1137	goto bad;
				1138	}
				1139
				1140	/* Success perform special debug activities for allocs */
				1141	if (s->flags & SLAB_STORE_USER)
				1142	set_track(s, object, TRACK_ALLOC, addr);
				1143	trace(s, page, object, 1);
				1144	init_object(s, object, SLUB_RED_ACTIVE);
				1145	return 1;
				1146
				1147	bad:
				1148	if (PageSlab(page)) {
				1149	/*
				1150	* If this is a slab page then lets do the best we can
				1151	* to avoid issues in the future. Marking all objects
				1152	* as used avoids touching the remaining objects.
				1153	*/
				1154	slab_fix(s, "Marking all objects used");
				1155	page->inuse = page->objects;
				1156	page->freelist = NULL;
				1157	}
				1158	return 0;
				1159	}
				1160
				1161	static inline int free_consistency_checks(struct kmem_cache *s,
				1162	struct page page, void object, unsigned long addr)
				1163	{
				1164	if (!check_valid_pointer(s, page, object)) {
				1165	slab_err(s, page, "Invalid object pointer 0x%p", object);
				1166	return 0;
				1167	}
				1168
				1169	if (on_freelist(s, page, object)) {
				1170	object_err(s, page, object, "Object already free");
				1171	return 0;
				1172	}
				1173
				1174	if (!check_object(s, page, object, SLUB_RED_ACTIVE))
				1175	return 0;
				1176
				1177	if (unlikely(s != page->slab_cache)) {
				1178	if (!PageSlab(page)) {
				1179	slab_err(s, page, "Attempt to free object(0x%p) outside of slab",
				1180	object);
				1181	} else if (!page->slab_cache) {
				1182	pr_err("SLUB <none>: no slab for object 0x%p.\n",
				1183	object);
				1184	dump_stack();
				1185	} else
				1186	object_err(s, page, object,
				1187	"page slab pointer corrupt.");
				1188	return 0;
				1189	}
				1190	return 1;
				1191	}
				1192
				1193	/* Supports checking bulk free of a constructed freelist */
				1194	static noinline int free_debug_processing(
				1195	struct kmem_cache s, struct page page,
				1196	void head, void tail, int bulk_cnt,
				1197	unsigned long addr)
				1198	{
				1199	struct kmem_cache_node *n = get_node(s, page_to_nid(page));
				1200	void *object = head;
				1201	int cnt = 0;
				1202	unsigned long flags;
				1203	int ret = 0;
				1204
				1205	spin_lock_irqsave(&n->list_lock, flags);
				1206	slab_lock(page);
				1207
				1208	if (s->flags & SLAB_CONSISTENCY_CHECKS) {
				1209	if (!check_slab(s, page))
				1210	goto out;
				1211	}
				1212
				1213	next_object:
				1214	cnt++;
				1215
				1216	if (s->flags & SLAB_CONSISTENCY_CHECKS) {
				1217	if (!free_consistency_checks(s, page, object, addr))
				1218	goto out;
				1219	}
				1220
				1221	if (s->flags & SLAB_STORE_USER)
				1222	set_track(s, object, TRACK_FREE, addr);
				1223	trace(s, page, object, 0);
				1224	/* Freepointer not overwritten by init_object(), SLAB_POISON moved it */
				1225	init_object(s, object, SLUB_RED_INACTIVE);
				1226
				1227	/* Reached end of constructed freelist yet? */
				1228	if (object != tail) {
				1229	object = get_freepointer(s, object);
				1230	goto next_object;
				1231	}
				1232	ret = 1;
				1233
				1234	out:
				1235	if (cnt != bulk_cnt)
				1236	slab_err(s, page, "Bulk freelist count(%d) invalid(%d)\n",
				1237	bulk_cnt, cnt);
				1238
				1239	slab_unlock(page);
				1240	spin_unlock_irqrestore(&n->list_lock, flags);
				1241	if (!ret)
				1242	slab_fix(s, "Object at 0x%p not freed", object);
				1243	return ret;
				1244	}
				1245
				1246	static int __init setup_slub_debug(char *str)
				1247	{
				1248	slub_debug = DEBUG_DEFAULT_FLAGS;
				1249	if (str++ != '=' \|\| !str)
				1250	/*
				1251	* No options specified. Switch on full debugging.
				1252	*/
				1253	goto out;
				1254
				1255	if (*str == ',')
				1256	/*
				1257	* No options but restriction on slabs. This means full
				1258	* debugging for slabs matching a pattern.
				1259	*/
				1260	goto check_slabs;
				1261
				1262	slub_debug = 0;
				1263	if (*str == '-')
				1264	/*
				1265	* Switch off all debugging measures.
				1266	*/
				1267	goto out;
				1268
				1269	/*
				1270	* Determine which debug features should be switched on
				1271	*/
				1272	for (; str && str != ','; str++) {
				1273	switch (tolower(*str)) {
				1274	case 'f':
				1275	slub_debug \|= SLAB_CONSISTENCY_CHECKS;
				1276	break;
				1277	case 'z':
				1278	slub_debug \|= SLAB_RED_ZONE;
				1279	break;
				1280	case 'p':
				1281	slub_debug \|= SLAB_POISON;
				1282	break;
				1283	case 'u':
				1284	slub_debug \|= SLAB_STORE_USER;
				1285	break;
				1286	case 't':
				1287	slub_debug \|= SLAB_TRACE;
				1288	break;
				1289	case 'a':
				1290	slub_debug \|= SLAB_FAILSLAB;
				1291	break;
				1292	case 'o':
				1293	/*
				1294	* Avoid enabling debugging on caches if its minimum
				1295	* order would increase as a result.
				1296	*/
				1297	disable_higher_order_debug = 1;
				1298	break;
				1299	default:
				1300	pr_err("slub_debug option '%c' unknown. skipped\n",
				1301	*str);
				1302	}
				1303	}
				1304
				1305	check_slabs:
				1306	if (*str == ',')
				1307	slub_debug_slabs = str + 1;
				1308	out:
				1309	if ((static_branch_unlikely(&init_on_alloc) \|\|
				1310	static_branch_unlikely(&init_on_free)) &&
				1311	(slub_debug & SLAB_POISON))
				1312	pr_info("mem auto-init: SLAB_POISON will take precedence over init_on_alloc/init_on_free\n");
				1313	return 1;
				1314	}
				1315
				1316	__setup("slub_debug", setup_slub_debug);
				1317
				1318	/*
				1319	* kmem_cache_flags - apply debugging options to the cache
				1320	* @object_size: the size of an object without meta data
				1321	* @flags: flags to set
				1322	* @name: name of the cache
				1323	* @ctor: constructor function
				1324	*
				1325	* Debug option(s) are applied to @flags. In addition to the debug
				1326	* option(s), if a slab name (or multiple) is specified i.e.
				1327	* slub_debug=<Debug-Options>,<slab name1>,<slab name2> ...
				1328	* then only the select slabs will receive the debug option(s).
				1329	*/
				1330	slab_flags_t kmem_cache_flags(unsigned int object_size,
				1331	slab_flags_t flags, const char *name,
				1332	void (ctor)(void ))
				1333	{
				1334	char *iter;
				1335	size_t len;
				1336
				1337	/* If slub_debug = 0, it folds into the if conditional. */
				1338	if (!slub_debug_slabs)
				1339	return flags \| slub_debug;
				1340
				1341	len = strlen(name);
				1342	iter = slub_debug_slabs;
				1343	while (*iter) {
				1344	char end, glob;
				1345	size_t cmplen;
				1346
				1347	end = strchrnul(iter, ',');
				1348
				1349	glob = strnchr(iter, end - iter, '*');
				1350	if (glob)
				1351	cmplen = glob - iter;
				1352	else
				1353	cmplen = max_t(size_t, len, (end - iter));
				1354
				1355	if (!strncmp(name, iter, cmplen)) {
				1356	flags \|= slub_debug;
				1357	break;
				1358	}
				1359
				1360	if (!*end)
				1361	break;
				1362	iter = end + 1;
				1363	}
				1364
				1365	return flags;
				1366	}
				1367	#else /* !CONFIG_SLUB_DEBUG */
				1368	static inline void setup_object_debug(struct kmem_cache *s,
				1369	struct page page, void object) {}
				1370	static inline
				1371	void setup_page_debug(struct kmem_cache s, struct page page, void *addr) {}
				1372
				1373	static inline int alloc_debug_processing(struct kmem_cache *s,
				1374	struct page page, void object, unsigned long addr) { return 0; }
				1375
				1376	static inline int free_debug_processing(
				1377	struct kmem_cache s, struct page page,
				1378	void head, void tail, int bulk_cnt,
				1379	unsigned long addr) { return 0; }
				1380
				1381	static inline int slab_pad_check(struct kmem_cache s, struct page page)
				1382	{ return 1; }
				1383	static inline int check_object(struct kmem_cache s, struct page page,
				1384	void *object, u8 val) { return 1; }
				1385	static inline void add_full(struct kmem_cache s, struct kmem_cache_node n,
				1386	struct page *page) {}
				1387	static inline void remove_full(struct kmem_cache s, struct kmem_cache_node n,
				1388	struct page *page) {}
				1389	slab_flags_t kmem_cache_flags(unsigned int object_size,
				1390	slab_flags_t flags, const char *name,
				1391	void (ctor)(void ))
				1392	{
				1393	return flags;
				1394	}
				1395	#define slub_debug 0
				1396
				1397	#define disable_higher_order_debug 0
				1398
				1399	static inline unsigned long slabs_node(struct kmem_cache *s, int node)
				1400	{ return 0; }
				1401	static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
				1402	{ return 0; }
				1403	static inline void inc_slabs_node(struct kmem_cache *s, int node,
				1404	int objects) {}
				1405	static inline void dec_slabs_node(struct kmem_cache *s, int node,
				1406	int objects) {}
				1407
				1408	static bool freelist_corrupted(struct kmem_cache s, struct page page,
				1409	void *freelist, void nextfree)
				1410	{
				1411	return false;
				1412	}
				1413	#endif /* CONFIG_SLUB_DEBUG */
				1414
				1415	/*
				1416	* Hooks for other subsystems that check memory allocations. In a typical
				1417	* production configuration these hooks all should produce no code at all.
				1418	*/
				1419	static inline void kmalloc_large_node_hook(void ptr, size_t size, gfp_t flags)
				1420	{
				1421	ptr = kasan_kmalloc_large(ptr, size, flags);
				1422	/* As ptr might get tagged, call kmemleak hook after KASAN. */
				1423	kmemleak_alloc(ptr, size, 1, flags);
				1424	return ptr;
				1425	}
				1426
				1427	static __always_inline void kfree_hook(void *x)
				1428	{
				1429	kmemleak_free(x);
				1430	kasan_kfree_large(x, _RET_IP_);
				1431	}
				1432
				1433	static __always_inline bool slab_free_hook(struct kmem_cache s, void x)
				1434	{
				1435	kmemleak_free_recursive(x, s->flags);
				1436
				1437	/*
				1438	* Trouble is that we may no longer disable interrupts in the fast path
				1439	* So in order to make the debug calls that expect irqs to be
				1440	* disabled we need to disable interrupts temporarily.
				1441	*/
				1442	#ifdef CONFIG_LOCKDEP
				1443	{
				1444	unsigned long flags;
				1445
				1446	local_irq_save(flags);
				1447	debug_check_no_locks_freed(x, s->object_size);
				1448	local_irq_restore(flags);
				1449	}
				1450	#endif
				1451	if (!(s->flags & SLAB_DEBUG_OBJECTS))
				1452	debug_check_no_obj_freed(x, s->object_size);
				1453
				1454	/* KASAN might put x into memory quarantine, delaying its reuse */
				1455	return kasan_slab_free(s, x, _RET_IP_);
				1456	}
				1457
				1458	static inline bool slab_free_freelist_hook(struct kmem_cache *s,
				1459	void head, void tail,
				1460	int *cnt)
				1461	{
				1462
				1463	void *object;
				1464	void next = head;
				1465	void old_tail = tail ? tail : head;
				1466	int rsize;
				1467
				1468	if (is_kfence_address(next)) {
				1469	slab_free_hook(s, next);
				1470	return true;
				1471	}
				1472
				1473	/* Head and tail of the reconstructed freelist */
				1474	*head = NULL;
				1475	*tail = NULL;
				1476
				1477	do {
				1478	object = next;
				1479	next = get_freepointer(s, object);
				1480
				1481	if (slab_want_init_on_free(s)) {
				1482	/*
				1483	* Clear the object and the metadata, but don't touch
				1484	* the redzone.
				1485	*/
				1486	memset(object, 0, s->object_size);
				1487	rsize = (s->flags & SLAB_RED_ZONE) ? s->red_left_pad
				1488	: 0;
				1489	memset((char *)object + s->inuse, 0,
				1490	s->size - s->inuse - rsize);
				1491
				1492	}
				1493	/* If object's reuse doesn't have to be delayed */
				1494	if (!slab_free_hook(s, object)) {
				1495	/* Move object to the new freelist */
				1496	set_freepointer(s, object, *head);
				1497	*head = object;
				1498	if (!*tail)
				1499	*tail = object;
				1500	} else {
				1501	/*
				1502	* Adjust the reconstructed freelist depth
				1503	* accordingly if object's reuse is delayed.
				1504	*/
				1505	--(*cnt);
				1506	}
				1507	} while (object != old_tail);
				1508
				1509	if (head == tail)
				1510	*tail = NULL;
				1511
				1512	return *head != NULL;
				1513	}
				1514
				1515	static void setup_object(struct kmem_cache s, struct page *page,
				1516	void *object)
				1517	{
				1518	setup_object_debug(s, page, object);
				1519	object = kasan_init_slab_obj(s, object);
				1520	if (unlikely(s->ctor)) {
				1521	kasan_unpoison_object_data(s, object);
				1522	s->ctor(object);
				1523	kasan_poison_object_data(s, object);
				1524	}
				1525	return object;
				1526	}
				1527
				1528	/*
				1529	* Slab allocation and freeing
				1530	*/
				1531	static inline struct page alloc_slab_page(struct kmem_cache s,
				1532	gfp_t flags, int node, struct kmem_cache_order_objects oo)
				1533	{
				1534	struct page *page;
				1535	unsigned int order = oo_order(oo);
				1536
				1537	if (node == NUMA_NO_NODE)
				1538	page = alloc_pages(flags, order);
				1539	else
				1540	page = __alloc_pages_node(node, flags, order);
				1541
				1542	if (page && charge_slab_page(page, flags, order, s)) {
				1543	__free_pages(page, order);
				1544	page = NULL;
				1545	}
				1546
				1547	return page;
				1548	}
				1549
				1550	#ifdef CONFIG_SLAB_FREELIST_RANDOM
				1551	/* Pre-initialize the random sequence cache */
				1552	static int init_cache_random_seq(struct kmem_cache *s)
				1553	{
				1554	unsigned int count = oo_objects(s->oo);
				1555	int err;
				1556
				1557	/* Bailout if already initialised */
				1558	if (s->random_seq)
				1559	return 0;
				1560
				1561	err = cache_random_seq_create(s, count, GFP_KERNEL);
				1562	if (err) {
				1563	pr_err("SLUB: Unable to initialize free list for %s\n",
				1564	s->name);
				1565	return err;
				1566	}
				1567
				1568	/* Transform to an offset on the set of pages */
				1569	if (s->random_seq) {
				1570	unsigned int i;
				1571
				1572	for (i = 0; i < count; i++)
				1573	s->random_seq[i] *= s->size;
				1574	}
				1575	return 0;
				1576	}
				1577
				1578	/* Initialize each random sequence freelist per cache */
				1579	static void __init init_freelist_randomization(void)
				1580	{
				1581	struct kmem_cache *s;
				1582
				1583	mutex_lock(&slab_mutex);
				1584
				1585	list_for_each_entry(s, &slab_caches, list)
				1586	init_cache_random_seq(s);
				1587
				1588	mutex_unlock(&slab_mutex);
				1589	}
				1590
				1591	/* Get the next entry on the pre-computed freelist randomized */
				1592	static void next_freelist_entry(struct kmem_cache s, struct page *page,
				1593	unsigned long pos, void start,
				1594	unsigned long page_limit,
				1595	unsigned long freelist_count)
				1596	{
				1597	unsigned int idx;
				1598
				1599	/*
				1600	* If the target page allocation failed, the number of objects on the
				1601	* page might be smaller than the usual size defined by the cache.
				1602	*/
				1603	do {
				1604	idx = s->random_seq[*pos];
				1605	*pos += 1;
				1606	if (*pos >= freelist_count)
				1607	*pos = 0;
				1608	} while (unlikely(idx >= page_limit));
				1609
				1610	return (char *)start + idx;
				1611	}
				1612
				1613	/* Shuffle the single linked freelist based on a random pre-computed sequence */
				1614	static bool shuffle_freelist(struct kmem_cache s, struct page page)
				1615	{
				1616	void *start;
				1617	void *cur;
				1618	void *next;
				1619	unsigned long idx, pos, page_limit, freelist_count;
				1620
				1621	if (page->objects < 2 \|\| !s->random_seq)
				1622	return false;
				1623
				1624	freelist_count = oo_objects(s->oo);
				1625	pos = get_random_int() % freelist_count;
				1626
				1627	page_limit = page->objects * s->size;
				1628	start = fixup_red_left(s, page_address(page));
				1629
				1630	/* First entry is used as the base of the freelist */
				1631	cur = next_freelist_entry(s, page, &pos, start, page_limit,
				1632	freelist_count);
				1633	cur = setup_object(s, page, cur);
				1634	page->freelist = cur;
				1635
				1636	for (idx = 1; idx < page->objects; idx++) {
				1637	next = next_freelist_entry(s, page, &pos, start, page_limit,
				1638	freelist_count);
				1639	next = setup_object(s, page, next);
				1640	set_freepointer(s, cur, next);
				1641	cur = next;
				1642	}
				1643	set_freepointer(s, cur, NULL);
				1644
				1645	return true;
				1646	}
				1647	#else
				1648	static inline int init_cache_random_seq(struct kmem_cache *s)
				1649	{
				1650	return 0;
				1651	}
				1652	static inline void init_freelist_randomization(void) { }
				1653	static inline bool shuffle_freelist(struct kmem_cache s, struct page page)
				1654	{
				1655	return false;
				1656	}
				1657	#endif /* CONFIG_SLAB_FREELIST_RANDOM */
				1658
				1659	static struct page allocate_slab(struct kmem_cache s, gfp_t flags, int node)
				1660	{
				1661	struct page *page;
				1662	struct kmem_cache_order_objects oo = s->oo;
				1663	gfp_t alloc_gfp;
				1664	void start, p, *next;
				1665	int idx;
				1666	bool shuffle;
				1667
				1668	flags &= gfp_allowed_mask;
				1669
				1670	if (gfpflags_allow_blocking(flags))
				1671	local_irq_enable();
				1672
				1673	flags \|= s->allocflags;
				1674
				1675	/*
				1676	* Let the initial higher-order allocation fail under memory pressure
				1677	* so we fall-back to the minimum order allocation.
				1678	*/
				1679	alloc_gfp = (flags \| __GFP_NOWARN \| __GFP_NORETRY) & ~__GFP_NOFAIL;
				1680	if ((alloc_gfp & __GFP_DIRECT_RECLAIM) && oo_order(oo) > oo_order(s->min))
				1681	alloc_gfp = (alloc_gfp \| __GFP_NOMEMALLOC) & ~(__GFP_RECLAIM\|__GFP_NOFAIL);
				1682
				1683	page = alloc_slab_page(s, alloc_gfp, node, oo);
				1684	if (unlikely(!page)) {
				1685	oo = s->min;
				1686	alloc_gfp = flags;
				1687	/*
				1688	* Allocation may have failed due to fragmentation.
				1689	* Try a lower order alloc if possible
				1690	*/
				1691	page = alloc_slab_page(s, alloc_gfp, node, oo);
				1692	if (unlikely(!page))
				1693	goto out;
				1694	stat(s, ORDER_FALLBACK);
				1695	}
				1696
				1697	page->objects = oo_objects(oo);
				1698
				1699	page->slab_cache = s;
				1700	__SetPageSlab(page);
				1701	if (page_is_pfmemalloc(page))
				1702	SetPageSlabPfmemalloc(page);
				1703
				1704	kasan_poison_slab(page);
				1705
				1706	start = page_address(page);
				1707
				1708	setup_page_debug(s, page, start);
				1709
				1710	shuffle = shuffle_freelist(s, page);
				1711
				1712	if (!shuffle) {
				1713	start = fixup_red_left(s, start);
				1714	start = setup_object(s, page, start);
				1715	page->freelist = start;
				1716	for (idx = 0, p = start; idx < page->objects - 1; idx++) {
				1717	next = p + s->size;
				1718	next = setup_object(s, page, next);
				1719	set_freepointer(s, p, next);
				1720	p = next;
				1721	}
				1722	set_freepointer(s, p, NULL);
				1723	}
				1724
				1725	page->inuse = page->objects;
				1726	page->frozen = 1;
				1727
				1728	out:
				1729	if (gfpflags_allow_blocking(flags))
				1730	local_irq_disable();
				1731	if (!page)
				1732	return NULL;
				1733
				1734	inc_slabs_node(s, page_to_nid(page), page->objects);
				1735
				1736	return page;
				1737	}
				1738
				1739	static struct page new_slab(struct kmem_cache s, gfp_t flags, int node)
				1740	{
				1741	if (unlikely(flags & GFP_SLAB_BUG_MASK)) {
				1742	gfp_t invalid_mask = flags & GFP_SLAB_BUG_MASK;
				1743	flags &= ~GFP_SLAB_BUG_MASK;
				1744	pr_warn("Unexpected gfp: %#x (%pGg). Fixing up to gfp: %#x (%pGg). Fix your code!\n",
				1745	invalid_mask, &invalid_mask, flags, &flags);
				1746	dump_stack();
				1747	}
				1748
				1749	return allocate_slab(s,
				1750	flags & (GFP_RECLAIM_MASK \| GFP_CONSTRAINT_MASK), node);
				1751	}
				1752
				1753	static void __free_slab(struct kmem_cache s, struct page page)
				1754	{
				1755	int order = compound_order(page);
				1756	int pages = 1 << order;
				1757
				1758	if (s->flags & SLAB_CONSISTENCY_CHECKS) {
				1759	void *p;
				1760
				1761	slab_pad_check(s, page);
				1762	for_each_object(p, s, page_address(page),
				1763	page->objects)
				1764	check_object(s, page, p, SLUB_RED_INACTIVE);
				1765	}
				1766
				1767	__ClearPageSlabPfmemalloc(page);
				1768	__ClearPageSlab(page);
				1769
				1770	page->mapping = NULL;
				1771	if (current->reclaim_state)
				1772	current->reclaim_state->reclaimed_slab += pages;
				1773	uncharge_slab_page(page, order, s);
				1774	__free_pages(page, order);
				1775	}
				1776
				1777	static void rcu_free_slab(struct rcu_head *h)
				1778	{
				1779	struct page *page = container_of(h, struct page, rcu_head);
				1780
				1781	__free_slab(page->slab_cache, page);
				1782	}
				1783
				1784	static void free_slab(struct kmem_cache s, struct page page)
				1785	{
				1786	if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) {
				1787	call_rcu(&page->rcu_head, rcu_free_slab);
				1788	} else
				1789	__free_slab(s, page);
				1790	}
				1791
				1792	static void discard_slab(struct kmem_cache s, struct page page)
				1793	{
				1794	dec_slabs_node(s, page_to_nid(page), page->objects);
				1795	free_slab(s, page);
				1796	}
				1797
				1798	/*
				1799	* Management of partially allocated slabs.
				1800	*/
				1801	static inline void
				1802	__add_partial(struct kmem_cache_node n, struct page page, int tail)
				1803	{
				1804	n->nr_partial++;
				1805	if (tail == DEACTIVATE_TO_TAIL)
				1806	list_add_tail(&page->slab_list, &n->partial);
				1807	else
				1808	list_add(&page->slab_list, &n->partial);
				1809	}
				1810
				1811	static inline void add_partial(struct kmem_cache_node *n,
				1812	struct page *page, int tail)
				1813	{
				1814	lockdep_assert_held(&n->list_lock);
				1815	__add_partial(n, page, tail);
				1816	}
				1817
				1818	static inline void remove_partial(struct kmem_cache_node *n,
				1819	struct page *page)
				1820	{
				1821	lockdep_assert_held(&n->list_lock);
				1822	list_del(&page->slab_list);
				1823	n->nr_partial--;
				1824	}
				1825
				1826	/*
				1827	* Remove slab from the partial list, freeze it and
				1828	* return the pointer to the freelist.
				1829	*
				1830	* Returns a list of objects or NULL if it fails.
				1831	*/
				1832	static inline void acquire_slab(struct kmem_cache s,
				1833	struct kmem_cache_node n, struct page page,
				1834	int mode, int *objects)
				1835	{
				1836	void *freelist;
				1837	unsigned long counters;
				1838	struct page new;
				1839
				1840	lockdep_assert_held(&n->list_lock);
				1841
				1842	/*
				1843	* Zap the freelist and set the frozen bit.
				1844	* The old freelist is the list of objects for the
				1845	* per cpu allocation list.
				1846	*/
				1847	freelist = page->freelist;
				1848	counters = page->counters;
				1849	new.counters = counters;
				1850	*objects = new.objects - new.inuse;
				1851	if (mode) {
				1852	new.inuse = page->objects;
				1853	new.freelist = NULL;
				1854	} else {
				1855	new.freelist = freelist;
				1856	}
				1857
				1858	VM_BUG_ON(new.frozen);
				1859	new.frozen = 1;
				1860
				1861	if (!__cmpxchg_double_slab(s, page,
				1862	freelist, counters,
				1863	new.freelist, new.counters,
				1864	"acquire_slab"))
				1865	return NULL;
				1866
				1867	remove_partial(n, page);
				1868	WARN_ON(!freelist);
				1869	return freelist;
				1870	}
				1871
				1872	static void put_cpu_partial(struct kmem_cache s, struct page page, int drain);
				1873	static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags);
				1874
				1875	/*
				1876	* Try to allocate a partial slab from a specific node.
				1877	*/
				1878	static void get_partial_node(struct kmem_cache s, struct kmem_cache_node *n,
				1879	struct kmem_cache_cpu *c, gfp_t flags)
				1880	{
				1881	struct page page, page2;
				1882	void *object = NULL;
				1883	unsigned int available = 0;
				1884	int objects;
				1885
				1886	/*
				1887	* Racy check. If we mistakenly see no partial slabs then we
				1888	* just allocate an empty slab. If we mistakenly try to get a
				1889	* partial slab and there is none available then get_partials()
				1890	* will return NULL.
				1891	*/
				1892	if (!n \|\| !n->nr_partial)
				1893	return NULL;
				1894
				1895	spin_lock(&n->list_lock);
				1896	list_for_each_entry_safe(page, page2, &n->partial, slab_list) {
				1897	void *t;
				1898
				1899	if (!pfmemalloc_match(page, flags))
				1900	continue;
				1901
				1902	t = acquire_slab(s, n, page, object == NULL, &objects);
				1903	if (!t)
				1904	break;
				1905
				1906	available += objects;
				1907	if (!object) {
				1908	c->page = page;
				1909	stat(s, ALLOC_FROM_PARTIAL);
				1910	object = t;
				1911	} else {
				1912	put_cpu_partial(s, page, 0);
				1913	stat(s, CPU_PARTIAL_NODE);
				1914	}
				1915	if (!kmem_cache_has_cpu_partial(s)
				1916	\|\| available > slub_cpu_partial(s) / 2)
				1917	break;
				1918
				1919	}
				1920	spin_unlock(&n->list_lock);
				1921	return object;
				1922	}
				1923
				1924	/*
				1925	* Get a page from somewhere. Search in increasing NUMA distances.
				1926	*/
				1927	static void get_any_partial(struct kmem_cache s, gfp_t flags,
				1928	struct kmem_cache_cpu *c)
				1929	{
				1930	#ifdef CONFIG_NUMA
				1931	struct zonelist *zonelist;
				1932	struct zoneref *z;
				1933	struct zone *zone;
				1934	enum zone_type high_zoneidx = gfp_zone(flags);
				1935	void *object;
				1936	unsigned int cpuset_mems_cookie;
				1937
				1938	/*
				1939	* The defrag ratio allows a configuration of the tradeoffs between
				1940	* inter node defragmentation and node local allocations. A lower
				1941	* defrag_ratio increases the tendency to do local allocations
				1942	* instead of attempting to obtain partial slabs from other nodes.
				1943	*
				1944	* If the defrag_ratio is set to 0 then kmalloc() always
				1945	* returns node local objects. If the ratio is higher then kmalloc()
				1946	* may return off node objects because partial slabs are obtained
				1947	* from other nodes and filled up.
				1948	*
				1949	* If /sys/kernel/slab/xx/remote_node_defrag_ratio is set to 100
				1950	* (which makes defrag_ratio = 1000) then every (well almost)
				1951	* allocation will first attempt to defrag slab caches on other nodes.
				1952	* This means scanning over all nodes to look for partial slabs which
				1953	* may be expensive if we do it every time we are trying to find a slab
				1954	* with available objects.
				1955	*/
				1956	if (!s->remote_node_defrag_ratio \|\|
				1957	get_cycles() % 1024 > s->remote_node_defrag_ratio)
				1958	return NULL;
				1959
				1960	do {
				1961	cpuset_mems_cookie = read_mems_allowed_begin();
				1962	zonelist = node_zonelist(mempolicy_slab_node(), flags);
				1963	for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
				1964	struct kmem_cache_node *n;
				1965
				1966	n = get_node(s, zone_to_nid(zone));
				1967
				1968	if (n && cpuset_zone_allowed(zone, flags) &&
				1969	n->nr_partial > s->min_partial) {
				1970	object = get_partial_node(s, n, c, flags);
				1971	if (object) {
				1972	/*
				1973	* Don't check read_mems_allowed_retry()
				1974	* here - if mems_allowed was updated in
				1975	* parallel, that was a harmless race
				1976	* between allocation and the cpuset
				1977	* update
				1978	*/
				1979	return object;
				1980	}
				1981	}
				1982	}
				1983	} while (read_mems_allowed_retry(cpuset_mems_cookie));
				1984	#endif /* CONFIG_NUMA */
				1985	return NULL;
				1986	}
				1987
				1988	/*
				1989	* Get a partial page, lock it and return it.
				1990	*/
				1991	static void get_partial(struct kmem_cache s, gfp_t flags, int node,
				1992	struct kmem_cache_cpu *c)
				1993	{
				1994	void *object;
				1995	int searchnode = node;
				1996
				1997	if (node == NUMA_NO_NODE)
				1998	searchnode = numa_mem_id();
				1999
				2000	object = get_partial_node(s, get_node(s, searchnode), c, flags);
				2001	if (object \|\| node != NUMA_NO_NODE)
				2002	return object;
				2003
				2004	return get_any_partial(s, flags, c);
				2005	}
				2006
				2007	#ifdef CONFIG_PREEMPT
				2008	/*
				2009	* Calculate the next globally unique transaction for disambiguiation
				2010	* during cmpxchg. The transactions start with the cpu number and are then
				2011	* incremented by CONFIG_NR_CPUS.
				2012	*/
				2013	#define TID_STEP roundup_pow_of_two(CONFIG_NR_CPUS)
				2014	#else
				2015	/*
				2016	* No preemption supported therefore also no need to check for
				2017	* different cpus.
				2018	*/
				2019	#define TID_STEP 1
				2020	#endif
				2021
				2022	static inline unsigned long next_tid(unsigned long tid)
				2023	{
				2024	return tid + TID_STEP;
				2025	}
				2026
				2027	#ifdef SLUB_DEBUG_CMPXCHG
				2028	static inline unsigned int tid_to_cpu(unsigned long tid)
				2029	{
				2030	return tid % TID_STEP;
				2031	}
				2032
				2033	static inline unsigned long tid_to_event(unsigned long tid)
				2034	{
				2035	return tid / TID_STEP;
				2036	}
				2037	#endif
				2038
				2039	static inline unsigned int init_tid(int cpu)
				2040	{
				2041	return cpu;
				2042	}
				2043
				2044	static inline void note_cmpxchg_failure(const char *n,
				2045	const struct kmem_cache *s, unsigned long tid)
				2046	{
				2047	#ifdef SLUB_DEBUG_CMPXCHG
				2048	unsigned long actual_tid = __this_cpu_read(s->cpu_slab->tid);
				2049
				2050	pr_info("%s %s: cmpxchg redo ", n, s->name);
				2051
				2052	#ifdef CONFIG_PREEMPT
				2053	if (tid_to_cpu(tid) != tid_to_cpu(actual_tid))
				2054	pr_warn("due to cpu change %d -> %d\n",
				2055	tid_to_cpu(tid), tid_to_cpu(actual_tid));
				2056	else
				2057	#endif
				2058	if (tid_to_event(tid) != tid_to_event(actual_tid))
				2059	pr_warn("due to cpu running other code. Event %ld->%ld\n",
				2060	tid_to_event(tid), tid_to_event(actual_tid));
				2061	else
				2062	pr_warn("for unknown reason: actual=%lx was=%lx target=%lx\n",
				2063	actual_tid, tid, next_tid(tid));
				2064	#endif
				2065	stat(s, CMPXCHG_DOUBLE_CPU_FAIL);
				2066	}
				2067
				2068	static void init_kmem_cache_cpus(struct kmem_cache *s)
				2069	{
				2070	int cpu;
				2071
				2072	for_each_possible_cpu(cpu)
				2073	per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu);
				2074	}
				2075
				2076	/*
				2077	* Remove the cpu slab
				2078	*/
				2079	static void deactivate_slab(struct kmem_cache s, struct page page,
				2080	void freelist, struct kmem_cache_cpu c)
				2081	{
				2082	enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE };
				2083	struct kmem_cache_node *n = get_node(s, page_to_nid(page));
				2084	int lock = 0;
				2085	enum slab_modes l = M_NONE, m = M_NONE;
				2086	void *nextfree;
				2087	int tail = DEACTIVATE_TO_HEAD;
				2088	struct page new;
				2089	struct page old;
				2090
				2091	if (page->freelist) {
				2092	stat(s, DEACTIVATE_REMOTE_FREES);
				2093	tail = DEACTIVATE_TO_TAIL;
				2094	}
				2095
				2096	/*
				2097	* Stage one: Free all available per cpu objects back
				2098	* to the page freelist while it is still frozen. Leave the
				2099	* last one.
				2100	*
				2101	* There is no need to take the list->lock because the page
				2102	* is still frozen.
				2103	*/
				2104	while (freelist && (nextfree = get_freepointer(s, freelist))) {
				2105	void *prior;
				2106	unsigned long counters;
				2107
				2108	/*
				2109	* If 'nextfree' is invalid, it is possible that the object at
				2110	* 'freelist' is already corrupted. So isolate all objects
				2111	* starting at 'freelist'.
				2112	*/
				2113	if (freelist_corrupted(s, page, &freelist, nextfree))
				2114	break;
				2115
				2116	do {
				2117	prior = page->freelist;
				2118	counters = page->counters;
				2119	set_freepointer(s, freelist, prior);
				2120	new.counters = counters;
				2121	new.inuse--;
				2122	VM_BUG_ON(!new.frozen);
				2123
				2124	} while (!__cmpxchg_double_slab(s, page,
				2125	prior, counters,
				2126	freelist, new.counters,
				2127	"drain percpu freelist"));
				2128
				2129	freelist = nextfree;
				2130	}
				2131
				2132	/*
				2133	* Stage two: Ensure that the page is unfrozen while the
				2134	* list presence reflects the actual number of objects
				2135	* during unfreeze.
				2136	*
				2137	* We setup the list membership and then perform a cmpxchg
				2138	* with the count. If there is a mismatch then the page
				2139	* is not unfrozen but the page is on the wrong list.
				2140	*
				2141	* Then we restart the process which may have to remove
				2142	* the page from the list that we just put it on again
				2143	* because the number of objects in the slab may have
				2144	* changed.
				2145	*/
				2146	redo:
				2147
				2148	old.freelist = page->freelist;
				2149	old.counters = page->counters;
				2150	VM_BUG_ON(!old.frozen);
				2151
				2152	/* Determine target state of the slab */
				2153	new.counters = old.counters;
				2154	if (freelist) {
				2155	new.inuse--;
				2156	set_freepointer(s, freelist, old.freelist);
				2157	new.freelist = freelist;
				2158	} else
				2159	new.freelist = old.freelist;
				2160
				2161	new.frozen = 0;
				2162
				2163	if (!new.inuse && n->nr_partial >= s->min_partial)
				2164	m = M_FREE;
				2165	else if (new.freelist) {
				2166	m = M_PARTIAL;
				2167	if (!lock) {
				2168	lock = 1;
				2169	/*
				2170	* Taking the spinlock removes the possibility
				2171	* that acquire_slab() will see a slab page that
				2172	* is frozen
				2173	*/
				2174	spin_lock(&n->list_lock);
				2175	}
				2176	} else {
				2177	m = M_FULL;
				2178	if (kmem_cache_debug(s) && !lock) {
				2179	lock = 1;
				2180	/*
				2181	* This also ensures that the scanning of full
				2182	* slabs from diagnostic functions will not see
				2183	* any frozen slabs.
				2184	*/
				2185	spin_lock(&n->list_lock);
				2186	}
				2187	}
				2188
				2189	if (l != m) {
				2190	if (l == M_PARTIAL)
				2191	remove_partial(n, page);
				2192	else if (l == M_FULL)
				2193	remove_full(s, n, page);
				2194
				2195	if (m == M_PARTIAL)
				2196	add_partial(n, page, tail);
				2197	else if (m == M_FULL)
				2198	add_full(s, n, page);
				2199	}
				2200
				2201	l = m;
				2202	if (!__cmpxchg_double_slab(s, page,
				2203	old.freelist, old.counters,
				2204	new.freelist, new.counters,
				2205	"unfreezing slab"))
				2206	goto redo;
				2207
				2208	if (lock)
				2209	spin_unlock(&n->list_lock);
				2210
				2211	if (m == M_PARTIAL)
				2212	stat(s, tail);
				2213	else if (m == M_FULL)
				2214	stat(s, DEACTIVATE_FULL);
				2215	else if (m == M_FREE) {
				2216	stat(s, DEACTIVATE_EMPTY);
				2217	discard_slab(s, page);
				2218	stat(s, FREE_SLAB);
				2219	}
				2220
				2221	c->page = NULL;
				2222	c->freelist = NULL;
				2223	c->tid = next_tid(c->tid);
				2224	}
				2225
				2226	/*
				2227	* Unfreeze all the cpu partial slabs.
				2228	*
				2229	* This function must be called with interrupts disabled
				2230	* for the cpu using c (or some other guarantee must be there
				2231	* to guarantee no concurrent accesses).
				2232	*/
				2233	static void unfreeze_partials(struct kmem_cache *s,
				2234	struct kmem_cache_cpu *c)
				2235	{
				2236	#ifdef CONFIG_SLUB_CPU_PARTIAL
				2237	struct kmem_cache_node n = NULL, n2 = NULL;
				2238	struct page page, discard_page = NULL;
				2239
				2240	while ((page = c->partial)) {
				2241	struct page new;
				2242	struct page old;
				2243
				2244	c->partial = page->next;
				2245
				2246	n2 = get_node(s, page_to_nid(page));
				2247	if (n != n2) {
				2248	if (n)
				2249	spin_unlock(&n->list_lock);
				2250
				2251	n = n2;
				2252	spin_lock(&n->list_lock);
				2253	}
				2254
				2255	do {
				2256
				2257	old.freelist = page->freelist;
				2258	old.counters = page->counters;
				2259	VM_BUG_ON(!old.frozen);
				2260
				2261	new.counters = old.counters;
				2262	new.freelist = old.freelist;
				2263
				2264	new.frozen = 0;
				2265
				2266	} while (!__cmpxchg_double_slab(s, page,
				2267	old.freelist, old.counters,
				2268	new.freelist, new.counters,
				2269	"unfreezing slab"));
				2270
				2271	if (unlikely(!new.inuse && n->nr_partial >= s->min_partial)) {
				2272	page->next = discard_page;
				2273	discard_page = page;
				2274	} else {
				2275	add_partial(n, page, DEACTIVATE_TO_TAIL);
				2276	stat(s, FREE_ADD_PARTIAL);
				2277	}
				2278	}
				2279
				2280	if (n)
				2281	spin_unlock(&n->list_lock);
				2282
				2283	while (discard_page) {
				2284	page = discard_page;
				2285	discard_page = discard_page->next;
				2286
				2287	stat(s, DEACTIVATE_EMPTY);
				2288	discard_slab(s, page);
				2289	stat(s, FREE_SLAB);
				2290	}
				2291	#endif /* CONFIG_SLUB_CPU_PARTIAL */
				2292	}
				2293
				2294	/*
				2295	* Put a page that was just frozen (in __slab_free\|get_partial_node) into a
				2296	* partial page slot if available.
				2297	*
				2298	* If we did not find a slot then simply move all the partials to the
				2299	* per node partial list.
				2300	*/
				2301	static void put_cpu_partial(struct kmem_cache s, struct page page, int drain)
				2302	{
				2303	#ifdef CONFIG_SLUB_CPU_PARTIAL
				2304	struct page *oldpage;
				2305	int pages;
				2306	int pobjects;
				2307
				2308	preempt_disable();
				2309	do {
				2310	pages = 0;
				2311	pobjects = 0;
				2312	oldpage = this_cpu_read(s->cpu_slab->partial);
				2313
				2314	if (oldpage) {
				2315	pobjects = oldpage->pobjects;
				2316	pages = oldpage->pages;
				2317	if (drain && pobjects > s->cpu_partial) {
				2318	unsigned long flags;
				2319	/*
				2320	* partial array is full. Move the existing
				2321	* set to the per node partial list.
				2322	*/
				2323	local_irq_save(flags);
				2324	unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
				2325	local_irq_restore(flags);
				2326	oldpage = NULL;
				2327	pobjects = 0;
				2328	pages = 0;
				2329	stat(s, CPU_PARTIAL_DRAIN);
				2330	}
				2331	}
				2332
				2333	pages++;
				2334	pobjects += page->objects - page->inuse;
				2335
				2336	page->pages = pages;
				2337	page->pobjects = pobjects;
				2338	page->next = oldpage;
				2339
				2340	} while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page)
				2341	!= oldpage);
				2342	if (unlikely(!s->cpu_partial)) {
				2343	unsigned long flags;
				2344
				2345	local_irq_save(flags);
				2346	unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
				2347	local_irq_restore(flags);
				2348	}
				2349	preempt_enable();
				2350	#endif /* CONFIG_SLUB_CPU_PARTIAL */
				2351	}
				2352
				2353	static inline void flush_slab(struct kmem_cache s, struct kmem_cache_cpu c)
				2354	{
				2355	stat(s, CPUSLAB_FLUSH);
				2356	deactivate_slab(s, c->page, c->freelist, c);
				2357	}
				2358
				2359	/*
				2360	* Flush cpu slab.
				2361	*
				2362	* Called from IPI handler with interrupts disabled.
				2363	*/
				2364	static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
				2365	{
				2366	struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
				2367
				2368	if (c->page)
				2369	flush_slab(s, c);
				2370
				2371	unfreeze_partials(s, c);
				2372	}
				2373
				2374	static void flush_cpu_slab(void *d)
				2375	{
				2376	struct kmem_cache *s = d;
				2377
				2378	__flush_cpu_slab(s, smp_processor_id());
				2379	}
				2380
				2381	static bool has_cpu_slab(int cpu, void *info)
				2382	{
				2383	struct kmem_cache *s = info;
				2384	struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
				2385
				2386	return c->page \|\| slub_percpu_partial(c);
				2387	}
				2388
				2389	static void flush_all(struct kmem_cache *s)
				2390	{
				2391	on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1, GFP_ATOMIC);
				2392	}
				2393
				2394	/*
				2395	* Use the cpu notifier to insure that the cpu slabs are flushed when
				2396	* necessary.
				2397	*/
				2398	static int slub_cpu_dead(unsigned int cpu)
				2399	{
				2400	struct kmem_cache *s;
				2401	unsigned long flags;
				2402
				2403	mutex_lock(&slab_mutex);
				2404	list_for_each_entry(s, &slab_caches, list) {
				2405	local_irq_save(flags);
				2406	__flush_cpu_slab(s, cpu);
				2407	local_irq_restore(flags);
				2408	}
				2409	mutex_unlock(&slab_mutex);
				2410	return 0;
				2411	}
				2412
				2413	/*
				2414	* Check if the objects in a per cpu structure fit numa
				2415	* locality expectations.
				2416	*/
				2417	static inline int node_match(struct page *page, int node)
				2418	{
				2419	#ifdef CONFIG_NUMA
				2420	if (node != NUMA_NO_NODE && page_to_nid(page) != node)
				2421	return 0;
				2422	#endif
				2423	return 1;
				2424	}
				2425
				2426	#ifdef CONFIG_SLUB_DEBUG
				2427	static int count_free(struct page *page)
				2428	{
				2429	return page->objects - page->inuse;
				2430	}
				2431
				2432	static inline unsigned long node_nr_objs(struct kmem_cache_node *n)
				2433	{
				2434	return atomic_long_read(&n->total_objects);
				2435	}
				2436	#endif /* CONFIG_SLUB_DEBUG */
				2437
				2438	#if defined(CONFIG_SLUB_DEBUG) \|\| defined(CONFIG_SYSFS)
				2439	static unsigned long count_partial(struct kmem_cache_node *n,
				2440	int (get_count)(struct page ))
				2441	{
				2442	unsigned long flags;
				2443	unsigned long x = 0;
				2444	struct page *page;
				2445
				2446	spin_lock_irqsave(&n->list_lock, flags);
				2447	list_for_each_entry(page, &n->partial, slab_list)
				2448	x += get_count(page);
				2449	spin_unlock_irqrestore(&n->list_lock, flags);
				2450	return x;
				2451	}
				2452	#endif /* CONFIG_SLUB_DEBUG \|\| CONFIG_SYSFS */
				2453
				2454	static noinline void
				2455	slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
				2456	{
				2457	#ifdef CONFIG_SLUB_DEBUG
				2458	static DEFINE_RATELIMIT_STATE(slub_oom_rs, DEFAULT_RATELIMIT_INTERVAL,
				2459	DEFAULT_RATELIMIT_BURST);
				2460	int node;
				2461	struct kmem_cache_node *n;
				2462
				2463	if ((gfpflags & __GFP_NOWARN) \|\| !__ratelimit(&slub_oom_rs))
				2464	return;
				2465
				2466	pr_warn("SLUB: Unable to allocate memory on node %d, gfp=%#x(%pGg)\n",
				2467	nid, gfpflags, &gfpflags);
				2468	pr_warn(" cache: %s, object size: %u, buffer size: %u, default order: %u, min order: %u\n",
				2469	s->name, s->object_size, s->size, oo_order(s->oo),
				2470	oo_order(s->min));
				2471
				2472	if (oo_order(s->min) > get_order(s->object_size))
				2473	pr_warn(" %s debugging increased min order, use slub_debug=O to disable.\n",
				2474	s->name);
				2475
				2476	for_each_kmem_cache_node(s, node, n) {
				2477	unsigned long nr_slabs;
				2478	unsigned long nr_objs;
				2479	unsigned long nr_free;
				2480
				2481	nr_free = count_partial(n, count_free);
				2482	nr_slabs = node_nr_slabs(n);
				2483	nr_objs = node_nr_objs(n);
				2484
				2485	pr_warn(" node %d: slabs: %ld, objs: %ld, free: %ld\n",
				2486	node, nr_slabs, nr_objs, nr_free);
				2487	}
				2488	#endif
				2489	}
				2490
				2491	static inline void new_slab_objects(struct kmem_cache s, gfp_t flags,
				2492	int node, struct kmem_cache_cpu **pc)
				2493	{
				2494	void *freelist;
				2495	struct kmem_cache_cpu c = pc;
				2496	struct page *page;
				2497
				2498	WARN_ON_ONCE(s->ctor && (flags & __GFP_ZERO));
				2499
				2500	freelist = get_partial(s, flags, node, c);
				2501
				2502	if (freelist)
				2503	return freelist;
				2504
				2505	page = new_slab(s, flags, node);
				2506	if (page) {
				2507	c = raw_cpu_ptr(s->cpu_slab);
				2508	if (c->page)
				2509	flush_slab(s, c);
				2510
				2511	/*
				2512	* No other reference to the page yet so we can
				2513	* muck around with it freely without cmpxchg
				2514	*/
				2515	freelist = page->freelist;
				2516	page->freelist = NULL;
				2517
				2518	stat(s, ALLOC_SLAB);
				2519	c->page = page;
				2520	*pc = c;
				2521	}
				2522
				2523	return freelist;
				2524	}
				2525
				2526	static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags)
				2527	{
				2528	if (unlikely(PageSlabPfmemalloc(page)))
				2529	return gfp_pfmemalloc_allowed(gfpflags);
				2530
				2531	return true;
				2532	}
				2533
				2534	/*
				2535	* Check the page->freelist of a page and either transfer the freelist to the
				2536	* per cpu freelist or deactivate the page.
				2537	*
				2538	* The page is still frozen if the return value is not NULL.
				2539	*
				2540	* If this function returns NULL then the page has been unfrozen.
				2541	*
				2542	* This function must be called with interrupt disabled.
				2543	*/
				2544	static inline void get_freelist(struct kmem_cache s, struct page *page)
				2545	{
				2546	struct page new;
				2547	unsigned long counters;
				2548	void *freelist;
				2549
				2550	do {
				2551	freelist = page->freelist;
				2552	counters = page->counters;
				2553
				2554	new.counters = counters;
				2555	VM_BUG_ON(!new.frozen);
				2556
				2557	new.inuse = page->objects;
				2558	new.frozen = freelist != NULL;
				2559
				2560	} while (!__cmpxchg_double_slab(s, page,
				2561	freelist, counters,
				2562	NULL, new.counters,
				2563	"get_freelist"));
				2564
				2565	return freelist;
				2566	}
				2567
				2568	#ifndef CONFIG_SLUB_DEBUG
				2569	/*
				2570	* reduced version of check_valid_pointer when CONFIG_SLUB_DEBUG is not defined
				2571	* Verify that a pointer has an address that is valid within a slab page
				2572	*/
				2573	static inline int check_valid_pointer_simple(struct kmem_cache *s,
				2574	struct page page, void object)
				2575	{
				2576	void *base;
				2577
				2578	if (!object)
				2579	goto out_warn;
				2580
				2581	base = page_address(page);
				2582	if (object < base \|\| object >= base + page->objects * s->size \|\|
				2583	(object - base) % s->size) {
				2584	goto out_warn;
				2585	}
				2586	return 1;
				2587
				2588	out_warn:
				2589	WARN(1, "slub error obj: 0x%lx, pg: 0x%lx, base: 0x%lx, objs: %d, sz: %d\n",
				2590	(ulong)object, (ulong)page, (ulong)base, page->objects, s->size);
				2591	return 0;
				2592	}
				2593	#endif
				2594
				2595	/*
				2596	* Slow path. The lockless freelist is empty or we need to perform
				2597	* debugging duties.
				2598	*
				2599	* Processing is still very fast if new objects have been freed to the
				2600	* regular freelist. In that case we simply take over the regular freelist
				2601	* as the lockless freelist and zap the regular freelist.
				2602	*
				2603	* If that is not working then we fall back to the partial lists. We take the
				2604	* first element of the freelist as the object to allocate now and move the
				2605	* rest of the freelist to the lockless freelist.
				2606	*
				2607	* And if we were unable to get a new slab from the partial slab lists then
				2608	* we need to allocate a new slab. This is the slowest path since it involves
				2609	* a call to the page allocator and the setup of a new slab.
				2610	*
				2611	* Version of __slab_alloc to use when we know that interrupts are
				2612	* already disabled (which is the case for bulk allocation).
				2613	*/
				2614	static void ___slab_alloc(struct kmem_cache s, gfp_t gfpflags, int node,
				2615	unsigned long addr, struct kmem_cache_cpu *c)
				2616	{
				2617	void *freelist;
				2618	struct page *page;
				2619
				2620	page = c->page;
				2621	if (!page) {
				2622	/*
				2623	* if the node is not online or has no normal memory, just
				2624	* ignore the node constraint
				2625	*/
				2626	if (unlikely(node != NUMA_NO_NODE &&
				2627	!node_state(node, N_NORMAL_MEMORY)))
				2628	node = NUMA_NO_NODE;
				2629	goto new_slab;
				2630	}
				2631	redo:
				2632
				2633	if (unlikely(!node_match(page, node))) {
				2634	/*
				2635	* same as above but node_match() being false already
				2636	* implies node != NUMA_NO_NODE
				2637	*/
				2638	if (!node_state(node, N_NORMAL_MEMORY)) {
				2639	node = NUMA_NO_NODE;
				2640	goto redo;
				2641	} else {
				2642	stat(s, ALLOC_NODE_MISMATCH);
				2643	deactivate_slab(s, page, c->freelist, c);
				2644	goto new_slab;
				2645	}
				2646	}
				2647
				2648	/*
				2649	* By rights, we should be searching for a slab page that was
				2650	* PFMEMALLOC but right now, we are losing the pfmemalloc
				2651	* information when the page leaves the per-cpu allocator
				2652	*/
				2653	if (unlikely(!pfmemalloc_match(page, gfpflags))) {
				2654	deactivate_slab(s, page, c->freelist, c);
				2655	goto new_slab;
				2656	}
				2657
				2658	/* must check again c->freelist in case of cpu migration or IRQ */
				2659	freelist = c->freelist;
				2660	if (freelist)
				2661	goto load_freelist;
				2662
				2663	freelist = get_freelist(s, page);
				2664
				2665	if (!freelist) {
				2666	c->page = NULL;
				2667	c->tid = next_tid(c->tid);
				2668	stat(s, DEACTIVATE_BYPASS);
				2669	goto new_slab;
				2670	}
				2671
				2672	stat(s, ALLOC_REFILL);
				2673
				2674	load_freelist:
				2675	/*
				2676	* freelist is pointing to the list of objects to be used.
				2677	* page is pointing to the page from which the objects are obtained.
				2678	* That page must be frozen for per cpu allocations to work.
				2679	*/
				2680	VM_BUG_ON(!c->page->frozen);
				2681	c->freelist = get_freepointer(s, freelist);
				2682	#if defined(CONFIG_CPU_ASR18XX) \|\| defined(CONFIG_CPU_ASR1901)
				2683	if (unlikely((((u32)c->freelist) < PAGE_OFFSET) && (c->freelist != NULL))) {
				2684	pr_err("wrong freelist: 0x%x c: 0x%x freelist: %x, freeptr: 0x%x\n",
				2685	(u32)c->freelist, (u32)c, (u32)freelist, (u32)get_freepointer(s, freelist));
				2686	BUG();
				2687	}
				2688	if (unlikely((s->size & 0x3) == 0 && ((ulong)freelist & 0x3))) {
				2689	pr_err("wrong freelist addr: 0x%x c: 0x%x freelist: %x, freeptr: 0x%x\n",
				2690	(u32)c->freelist, (u32)c, (u32)freelist, (u32)get_freepointer(s, freelist));
				2691	WARN_ON(1);
				2692	}
				2693	#endif
				2694	c->tid = next_tid(c->tid);
				2695	return freelist;
				2696
				2697	new_slab:
				2698
				2699	if (slub_percpu_partial(c)) {
				2700	page = c->page = slub_percpu_partial(c);
				2701	slub_set_percpu_partial(c, page);
				2702	stat(s, CPU_PARTIAL_ALLOC);
				2703	goto redo;
				2704	}
				2705
				2706	freelist = new_slab_objects(s, gfpflags, node, &c);
				2707
				2708	if (unlikely(!freelist)) {
				2709	slab_out_of_memory(s, gfpflags, node);
				2710	return NULL;
				2711	}
				2712
				2713	page = c->page;
				2714	if (likely(!kmem_cache_debug(s) && pfmemalloc_match(page, gfpflags)))
				2715	goto load_freelist;
				2716
				2717	/* Only entered in the debug case */
				2718	if (kmem_cache_debug(s) &&
				2719	!alloc_debug_processing(s, page, freelist, addr))
				2720	goto new_slab; /* Slab failed checks. Next slab needed */
				2721
				2722	deactivate_slab(s, page, get_freepointer(s, freelist), c);
				2723	return freelist;
				2724	}
				2725
				2726	/*
				2727	* Another one that disabled interrupt and compensates for possible
				2728	* cpu changes by refetching the per cpu area pointer.
				2729	*/
				2730	static void __slab_alloc(struct kmem_cache s, gfp_t gfpflags, int node,
				2731	unsigned long addr, struct kmem_cache_cpu *c)
				2732	{
				2733	void *p;
				2734	unsigned long flags;
				2735
				2736	local_irq_save(flags);
				2737	#ifdef CONFIG_PREEMPT
				2738	/*
				2739	* We may have been preempted and rescheduled on a different
				2740	* cpu before disabling interrupts. Need to reload cpu area
				2741	* pointer.
				2742	*/
				2743	c = this_cpu_ptr(s->cpu_slab);
				2744	#endif
				2745
				2746	p = ___slab_alloc(s, gfpflags, node, addr, c);
				2747	local_irq_restore(flags);
				2748	return p;
				2749	}
				2750
				2751	/*
				2752	* If the object has been wiped upon free, make sure it's fully initialized by
				2753	* zeroing out freelist pointer.
				2754	*/
				2755	static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s,
				2756	void *obj)
				2757	{
				2758	if (unlikely(slab_want_init_on_free(s)) && obj)
				2759	memset((void )((char )obj + s->offset), 0, sizeof(void *));
				2760	}
				2761
				2762	/*
				2763	* Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc)
				2764	* have the fastpath folded into their functions. So no function call
				2765	* overhead for requests that can be satisfied on the fastpath.
				2766	*
				2767	* The fastpath works by first checking if the lockless freelist can be used.
				2768	* If not then __slab_alloc is called for slow processing.
				2769	*
				2770	* Otherwise we can simply pick the next object from the lockless free list.
				2771	*/
				2772	static __always_inline void slab_alloc_node(struct kmem_cache s,
				2773	gfp_t gfpflags, int node, unsigned long addr, size_t orig_size)
				2774	{
				2775	void *object;
				2776	struct kmem_cache_cpu *c;
				2777	struct page *page;
				2778	unsigned long tid;
				2779	struct kmem_cache *root_s = s;
				2780
				2781	s = slab_pre_alloc_hook(s, gfpflags);
				2782	if (!s)
				2783	return NULL;
				2784
				2785	/*
				2786	* 5.4 note: passing in original cachep to avoid problems with memcg
				2787	* accounting. Making KFENCE properly work with memcgs on older kernels
				2788	* is not worth the effort.
				2789	*/
				2790	object = kfence_alloc(root_s, orig_size, gfpflags);
				2791	if (unlikely(object))
				2792	goto out;
				2793
				2794	redo:
				2795	/*
				2796	* Must read kmem_cache cpu data via this cpu ptr. Preemption is
				2797	* enabled. We may switch back and forth between cpus while
				2798	* reading from one cpu area. That does not matter as long
				2799	* as we end up on the original cpu again when doing the cmpxchg.
				2800	*
				2801	* We should guarantee that tid and kmem_cache are retrieved on
				2802	* the same cpu. It could be different if CONFIG_PREEMPT so we need
				2803	* to check if it is matched or not.
				2804	*/
				2805	do {
				2806	tid = this_cpu_read(s->cpu_slab->tid);
				2807	c = raw_cpu_ptr(s->cpu_slab);
				2808	} while (IS_ENABLED(CONFIG_PREEMPT) &&
				2809	unlikely(tid != READ_ONCE(c->tid)));
				2810
				2811	/*
				2812	* Irqless object alloc/free algorithm used here depends on sequence
				2813	* of fetching cpu_slab's data. tid should be fetched before anything
				2814	* on c to guarantee that object and page associated with previous tid
				2815	* won't be used with current tid. If we fetch tid first, object and
				2816	* page could be one associated with next tid and our alloc/free
				2817	* request will be failed. In this case, we will retry. So, no problem.
				2818	*/
				2819	barrier();
				2820
				2821	/*
				2822	* The transaction ids are globally unique per cpu and per operation on
				2823	* a per cpu queue. Thus they can be guarantee that the cmpxchg_double
				2824	* occurs on the right processor and that there was no operation on the
				2825	* linked list in between.
				2826	*/
				2827
				2828	object = c->freelist;
				2829	page = c->page;
				2830	if (unlikely(!object \|\| !page \|\| !node_match(page, node))) {
				2831	object = __slab_alloc(s, gfpflags, node, addr, c);
				2832	stat(s, ALLOC_SLOWPATH);
				2833	} else {
				2834	void *next_object = get_freepointer_safe(s, object);
				2835
				2836	/*
				2837	* The cmpxchg will only match if there was no additional
				2838	* operation and if we are on the right processor.
				2839	*
				2840	* The cmpxchg does the following atomically (without lock
				2841	* semantics!)
				2842	* 1. Relocate first pointer to the current per cpu area.
				2843	* 2. Verify that tid and freelist have not been changed
				2844	* 3. If they were not changed replace tid and freelist
				2845	*
				2846	* Since this is without lock semantics the protection is only
				2847	* against code executing on this cpu not from access by
				2848	* other cpus.
				2849	*/
				2850	if (unlikely(!this_cpu_cmpxchg_double(
				2851	s->cpu_slab->freelist, s->cpu_slab->tid,
				2852	object, tid,
				2853	next_object, next_tid(tid)))) {
				2854
				2855	note_cmpxchg_failure("slab_alloc", s, tid);
				2856	goto redo;
				2857	}
				2858	#if defined(CONFIG_CPU_ASR18XX) \|\| defined(CONFIG_CPU_ASR1901)
				2859	if (unlikely((((u32)next_object) < PAGE_OFFSET) && (next_object != NULL))) {
				2860	pr_err("wrong freelist: 0x%x cache: 0x%x next_object: %x, object: 0x%x\n",
				2861	(u32)c->freelist, (u32)c, (u32)next_object, (u32)object);
				2862	BUG();
				2863	}
				2864	if (unlikely((s->size & 0x3) == 0 && ((ulong)object & 0x3))) {
				2865	pr_err("wrong freelist addr: 0x%x cache: 0x%x next_object: %x, object: 0x%x\n",
				2866	(u32)c->freelist, (u32)c, (u32)next_object, (u32)object);
				2867	WARN_ON(1);
				2868	}
				2869	#endif
				2870	prefetch_freepointer(s, next_object);
				2871	stat(s, ALLOC_FASTPATH);
				2872	}
				2873
				2874	maybe_wipe_obj_freeptr(s, object);
				2875
				2876	if (unlikely(slab_want_init_on_alloc(gfpflags, s)) && object)
				2877	memset(object, 0, s->object_size);
				2878
				2879	out:
				2880	slab_post_alloc_hook(s, gfpflags, 1, &object);
				2881
				2882	return object;
				2883	}
				2884
				2885	static __always_inline void slab_alloc(struct kmem_cache s,
				2886	gfp_t gfpflags, unsigned long addr, size_t orig_size)
				2887	{
				2888	return slab_alloc_node(s, gfpflags, NUMA_NO_NODE, addr, orig_size);
				2889	}
				2890
				2891	void kmem_cache_alloc(struct kmem_cache s, gfp_t gfpflags)
				2892	{
				2893	void *ret = slab_alloc(s, gfpflags, _RET_IP_, s->object_size);
				2894
				2895	trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size,
				2896	s->size, gfpflags);
				2897
				2898	return ret;
				2899	}
				2900	EXPORT_SYMBOL(kmem_cache_alloc);
				2901
				2902	#ifdef CONFIG_TRACING
				2903	void kmem_cache_alloc_trace(struct kmem_cache s, gfp_t gfpflags, size_t size)
				2904	{
				2905	void *ret = slab_alloc(s, gfpflags, _RET_IP_, size);
				2906	trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);
				2907	ret = kasan_kmalloc(s, ret, size, gfpflags);
				2908	return ret;
				2909	}
				2910	EXPORT_SYMBOL(kmem_cache_alloc_trace);
				2911	#endif
				2912
				2913	#ifdef CONFIG_NUMA
				2914	void kmem_cache_alloc_node(struct kmem_cache s, gfp_t gfpflags, int node)
				2915	{
				2916	void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_, s->object_size);
				2917
				2918	trace_kmem_cache_alloc_node(_RET_IP_, ret,
				2919	s->object_size, s->size, gfpflags, node);
				2920
				2921	return ret;
				2922	}
				2923	EXPORT_SYMBOL(kmem_cache_alloc_node);
				2924
				2925	#ifdef CONFIG_TRACING
				2926	void kmem_cache_alloc_node_trace(struct kmem_cache s,
				2927	gfp_t gfpflags,
				2928	int node, size_t size)
				2929	{
				2930	void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_, size);
				2931
				2932	trace_kmalloc_node(_RET_IP_, ret,
				2933	size, s->size, gfpflags, node);
				2934
				2935	ret = kasan_kmalloc(s, ret, size, gfpflags);
				2936	return ret;
				2937	}
				2938	EXPORT_SYMBOL(kmem_cache_alloc_node_trace);
				2939	#endif
				2940	#endif /* CONFIG_NUMA */
				2941
				2942	/*
				2943	* Slow path handling. This may still be called frequently since objects
				2944	* have a longer lifetime than the cpu slabs in most processing loads.
				2945	*
				2946	* So we still attempt to reduce cache line usage. Just take the slab
				2947	* lock and free the item. If there is no additional partial page
				2948	* handling required then we can return immediately.
				2949	*/
				2950	static void __slab_free(struct kmem_cache s, struct page page,
				2951	void head, void tail, int cnt,
				2952	unsigned long addr)
				2953
				2954	{
				2955	void *prior;
				2956	int was_frozen;
				2957	struct page new;
				2958	unsigned long counters;
				2959	struct kmem_cache_node *n = NULL;
				2960	unsigned long flags;
				2961
				2962	stat(s, FREE_SLOWPATH);
				2963
				2964	if (kfence_free(head))
				2965	return;
				2966
				2967	#ifdef CONFIG_SLUB_DEBUG
				2968	if (kmem_cache_debug(s) &&
				2969	!free_debug_processing(s, page, head, tail, cnt, addr))
				2970	return;
				2971	#else
				2972	check_valid_pointer_simple(s, page, head);
				2973	#endif
				2974	do {
				2975	if (unlikely(n)) {
				2976	spin_unlock_irqrestore(&n->list_lock, flags);
				2977	n = NULL;
				2978	}
				2979	prior = page->freelist;
				2980	counters = page->counters;
				2981	set_freepointer(s, tail, prior);
				2982	new.counters = counters;
				2983	was_frozen = new.frozen;
				2984	new.inuse -= cnt;
				2985	if ((!new.inuse \|\| !prior) && !was_frozen) {
				2986
				2987	if (kmem_cache_has_cpu_partial(s) && !prior) {
				2988
				2989	/*
				2990	* Slab was on no list before and will be
				2991	* partially empty
				2992	* We can defer the list move and instead
				2993	* freeze it.
				2994	*/
				2995	new.frozen = 1;
				2996
				2997	} else { /* Needs to be taken off a list */
				2998
				2999	n = get_node(s, page_to_nid(page));
				3000	/*
				3001	* Speculatively acquire the list_lock.
				3002	* If the cmpxchg does not succeed then we may
				3003	* drop the list_lock without any processing.
				3004	*
				3005	* Otherwise the list_lock will synchronize with
				3006	* other processors updating the list of slabs.
				3007	*/
				3008	spin_lock_irqsave(&n->list_lock, flags);
				3009
				3010	}
				3011	}
				3012
				3013	} while (!cmpxchg_double_slab(s, page,
				3014	prior, counters,
				3015	head, new.counters,
				3016	"__slab_free"));
				3017
				3018	#if defined(CONFIG_CPU_ASR18XX) \|\| defined(CONFIG_CPU_ASR1901)
				3019	if (unlikely((((u32)head) < PAGE_OFFSET) && (head != NULL))) {
				3020	pr_err("wrong freelist: 0x%x page: 0x%x head: %x, tail: 0x%x, addr: 0x%x\n",
				3021	(u32)page->freelist, (u32)page, (u32)head, (u32)tail, (u32)addr);
				3022	BUG();
				3023	}
				3024	#endif
				3025
				3026	if (likely(!n)) {
				3027
				3028	/*
				3029	* If we just froze the page then put it onto the
				3030	* per cpu partial list.
				3031	*/
				3032	if (new.frozen && !was_frozen) {
				3033	put_cpu_partial(s, page, 1);
				3034	stat(s, CPU_PARTIAL_FREE);
				3035	}
				3036	/*
				3037	* The list lock was not taken therefore no list
				3038	* activity can be necessary.
				3039	*/
				3040	if (was_frozen)
				3041	stat(s, FREE_FROZEN);
				3042	return;
				3043	}
				3044
				3045	if (unlikely(!new.inuse && n->nr_partial >= s->min_partial))
				3046	goto slab_empty;
				3047
				3048	/*
				3049	* Objects left in the slab. If it was not on the partial list before
				3050	* then add it.
				3051	*/
				3052	if (!kmem_cache_has_cpu_partial(s) && unlikely(!prior)) {
				3053	remove_full(s, n, page);
				3054	add_partial(n, page, DEACTIVATE_TO_TAIL);
				3055	stat(s, FREE_ADD_PARTIAL);
				3056	}
				3057	spin_unlock_irqrestore(&n->list_lock, flags);
				3058	return;
				3059
				3060	slab_empty:
				3061	if (prior) {
				3062	/*
				3063	* Slab on the partial list.
				3064	*/
				3065	remove_partial(n, page);
				3066	stat(s, FREE_REMOVE_PARTIAL);
				3067	} else {
				3068	/* Slab must be on the full list */
				3069	remove_full(s, n, page);
				3070	}
				3071
				3072	spin_unlock_irqrestore(&n->list_lock, flags);
				3073	stat(s, FREE_SLAB);
				3074	discard_slab(s, page);
				3075	}
				3076
				3077	/*
				3078	* Fastpath with forced inlining to produce a kfree and kmem_cache_free that
				3079	* can perform fastpath freeing without additional function calls.
				3080	*
				3081	* The fastpath is only possible if we are freeing to the current cpu slab
				3082	* of this processor. This typically the case if we have just allocated
				3083	* the item before.
				3084	*
				3085	* If fastpath is not possible then fall back to __slab_free where we deal
				3086	* with all sorts of special processing.
				3087	*
				3088	* Bulk free of a freelist with several objects (all pointing to the
				3089	* same page) possible by specifying head and tail ptr, plus objects
				3090	* count (cnt). Bulk free indicated by tail pointer being set.
				3091	*/
				3092	static __always_inline void do_slab_free(struct kmem_cache *s,
				3093	struct page page, void head, void *tail,
				3094	int cnt, unsigned long addr)
				3095	{
				3096	void *tail_obj = tail ? : head;
				3097	struct kmem_cache_cpu *c;
				3098	unsigned long tid;
				3099	redo:
				3100	/*
				3101	* Determine the currently cpus per cpu slab.
				3102	* The cpu may change afterward. However that does not matter since
				3103	* data is retrieved via this pointer. If we are on the same cpu
				3104	* during the cmpxchg then the free will succeed.
				3105	*/
				3106	do {
				3107	tid = this_cpu_read(s->cpu_slab->tid);
				3108	c = raw_cpu_ptr(s->cpu_slab);
				3109	} while (IS_ENABLED(CONFIG_PREEMPT) &&
				3110	unlikely(tid != READ_ONCE(c->tid)));
				3111
				3112	/* Same with comment on barrier() in slab_alloc_node() */
				3113	barrier();
				3114
				3115	if (likely(page == c->page)) {
				3116	void **freelist = READ_ONCE(c->freelist);
				3117
				3118	#ifndef CONFIG_SLUB_DEBUG
				3119	check_valid_pointer_simple(s, page, head);
				3120	#endif
				3121	set_freepointer(s, tail_obj, freelist);
				3122
				3123	if (unlikely(!this_cpu_cmpxchg_double(
				3124	s->cpu_slab->freelist, s->cpu_slab->tid,
				3125	freelist, tid,
				3126	head, next_tid(tid)))) {
				3127
				3128	note_cmpxchg_failure("slab_free", s, tid);
				3129	goto redo;
				3130	}
				3131	#if defined(CONFIG_CPU_ASR18XX) \|\| defined(CONFIG_CPU_ASR1901)
				3132	if (unlikely((((u32)head) < PAGE_OFFSET) && (head != NULL))) {
				3133	pr_err("wrong freelist: 0x%x ccache: 0x%x head: %x, freelist: 0x%x\n",
				3134	(u32)c->freelist, (u32)c, (u32)head, (u32)freelist);
				3135	BUG();
				3136	}
				3137	#endif
				3138	stat(s, FREE_FASTPATH);
				3139	} else
				3140	__slab_free(s, page, head, tail_obj, cnt, addr);
				3141
				3142	}
				3143
				3144	static __always_inline void slab_free(struct kmem_cache s, struct page page,
				3145	void head, void tail, int cnt,
				3146	unsigned long addr)
				3147	{
				3148	/*
				3149	* With KASAN enabled slab_free_freelist_hook modifies the freelist
				3150	* to remove objects, whose reuse must be delayed.
				3151	*/
				3152	if (slab_free_freelist_hook(s, &head, &tail, &cnt))
				3153	do_slab_free(s, page, head, tail, cnt, addr);
				3154	}
				3155
				3156	#ifdef CONFIG_KASAN_GENERIC
				3157	void ___cache_free(struct kmem_cache cache, void x, unsigned long addr)
				3158	{
				3159	do_slab_free(cache, virt_to_head_page(x), x, NULL, 1, addr);
				3160	}
				3161	#endif
				3162
				3163	void kmem_cache_free(struct kmem_cache s, void x)
				3164	{
				3165	s = cache_from_obj(s, x);
				3166	if (!s)
				3167	return;
				3168	slab_free(s, virt_to_head_page(x), x, NULL, 1, _RET_IP_);
				3169	trace_kmem_cache_free(_RET_IP_, x);
				3170	}
				3171	EXPORT_SYMBOL(kmem_cache_free);
				3172
				3173	struct detached_freelist {
				3174	struct page *page;
				3175	void *tail;
				3176	void *freelist;
				3177	int cnt;
				3178	struct kmem_cache *s;
				3179	};
				3180
				3181	/*
				3182	* This function progressively scans the array with free objects (with
				3183	* a limited look ahead) and extract objects belonging to the same
				3184	* page. It builds a detached freelist directly within the given
				3185	* page/objects. This can happen without any need for
				3186	* synchronization, because the objects are owned by running process.
				3187	* The freelist is build up as a single linked list in the objects.
				3188	* The idea is, that this detached freelist can then be bulk
				3189	* transferred to the real freelist(s), but only requiring a single
				3190	* synchronization primitive. Look ahead in the array is limited due
				3191	* to performance reasons.
				3192	*/
				3193	static inline
				3194	int build_detached_freelist(struct kmem_cache *s, size_t size,
				3195	void *p, struct detached_freelist df)
				3196	{
				3197	size_t first_skipped_index = 0;
				3198	int lookahead = 3;
				3199	void *object;
				3200	struct page *page;
				3201
				3202	/* Always re-init detached_freelist */
				3203	df->page = NULL;
				3204
				3205	do {
				3206	object = p[--size];
				3207	/* Do we need !ZERO_OR_NULL_PTR(object) here? (for kfree) */
				3208	} while (!object && size);
				3209
				3210	if (!object)
				3211	return 0;
				3212
				3213	page = virt_to_head_page(object);
				3214	if (!s) {
				3215	/* Handle kalloc'ed objects */
				3216	if (unlikely(!PageSlab(page))) {
				3217	BUG_ON(!PageCompound(page));
				3218	kfree_hook(object);
				3219	__free_pages(page, compound_order(page));
				3220	p[size] = NULL; /* mark object processed */
				3221	return size;
				3222	}
				3223	/* Derive kmem_cache from object */
				3224	df->s = page->slab_cache;
				3225	} else {
				3226	df->s = cache_from_obj(s, object); /* Support for memcg */
				3227	}
				3228
				3229	if (is_kfence_address(object)) {
				3230	slab_free_hook(df->s, object);
				3231	__kfence_free(object);
				3232	p[size] = NULL; /* mark object processed */
				3233	return size;
				3234	}
				3235
				3236	/* Start new detached freelist */
				3237	df->page = page;
				3238	set_freepointer(df->s, object, NULL);
				3239	df->tail = object;
				3240	df->freelist = object;
				3241	p[size] = NULL; /* mark object processed */
				3242	df->cnt = 1;
				3243
				3244	while (size) {
				3245	object = p[--size];
				3246	if (!object)
				3247	continue; /* Skip processed objects */
				3248
				3249	/* df->page is always set at this point */
				3250	if (df->page == virt_to_head_page(object)) {
				3251	/* Opportunity build freelist */
				3252	set_freepointer(df->s, object, df->freelist);
				3253	df->freelist = object;
				3254	df->cnt++;
				3255	p[size] = NULL; /* mark object processed */
				3256
				3257	continue;
				3258	}
				3259
				3260	/* Limit look ahead search */
				3261	if (!--lookahead)
				3262	break;
				3263
				3264	if (!first_skipped_index)
				3265	first_skipped_index = size + 1;
				3266	}
				3267
				3268	return first_skipped_index;
				3269	}
				3270
				3271	/* Note that interrupts must be enabled when calling this function. */
				3272	void kmem_cache_free_bulk(struct kmem_cache s, size_t size, void *p)
				3273	{
				3274	if (WARN_ON(!size))
				3275	return;
				3276
				3277	do {
				3278	struct detached_freelist df;
				3279
				3280	size = build_detached_freelist(s, size, p, &df);
				3281	if (!df.page)
				3282	continue;
				3283
				3284	slab_free(df.s, df.page, df.freelist, df.tail, df.cnt,_RET_IP_);
				3285	} while (likely(size));
				3286	}
				3287	EXPORT_SYMBOL(kmem_cache_free_bulk);
				3288
				3289	/* Note that interrupts must be enabled when calling this function. */
				3290	int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
				3291	void **p)
				3292	{
				3293	struct kmem_cache_cpu *c;
				3294	int i;
				3295	struct kmem_cache *root_s = s;
				3296
				3297	/* memcg and kmem_cache debug support */
				3298	s = slab_pre_alloc_hook(s, flags);
				3299	if (unlikely(!s))
				3300	return false;
				3301	/*
				3302	* Drain objects in the per cpu slab, while disabling local
				3303	* IRQs, which protects against PREEMPT and interrupts
				3304	* handlers invoking normal fastpath.
				3305	*/
				3306	local_irq_disable();
				3307	c = this_cpu_ptr(s->cpu_slab);
				3308
				3309	for (i = 0; i < size; i++) {
				3310	/*
				3311	* 5.4 note: passing in original cachep to avoid problems with memcg
				3312	* accounting. Making KFENCE properly work with memcgs on older kernels
				3313	* is not worth the effort.
				3314	*/
				3315	void *object = kfence_alloc(root_s, s->object_size, flags);
				3316
				3317	if (unlikely(object)) {
				3318	p[i] = object;
				3319	continue;
				3320	}
				3321
				3322	object = c->freelist;
				3323	if (unlikely(!object)) {
				3324	/*
				3325	* We may have removed an object from c->freelist using
				3326	* the fastpath in the previous iteration; in that case,
				3327	* c->tid has not been bumped yet.
				3328	* Since ___slab_alloc() may reenable interrupts while
				3329	* allocating memory, we should bump c->tid now.
				3330	*/
				3331	c->tid = next_tid(c->tid);
				3332
				3333	/*
				3334	* Invoking slow path likely have side-effect
				3335	* of re-populating per CPU c->freelist
				3336	*/
				3337	p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE,
				3338	_RET_IP_, c);
				3339	if (unlikely(!p[i]))
				3340	goto error;
				3341
				3342	c = this_cpu_ptr(s->cpu_slab);
				3343	maybe_wipe_obj_freeptr(s, p[i]);
				3344
				3345	continue; /* goto for-loop */
				3346	}
				3347	c->freelist = get_freepointer(s, object);
				3348	#if defined(CONFIG_CPU_ASR18XX) \|\| defined(CONFIG_CPU_ASR1901)
				3349	if (unlikely((((u32)c->freelist) < PAGE_OFFSET) && (c->freelist != NULL))) {
				3350	pr_err("wrong freelist: 0x%x c: 0x%x object: %x, freeptr: 0x%x\n",
				3351	(u32)c->freelist, (u32)c, (u32)object, (u32)get_freepointer(s, object));
				3352	BUG();
				3353	}
				3354	if (unlikely((s->size & 0x3) == 0 && ((ulong)c->freelist & 0x3))) {
				3355	pr_err("wrong freelist: 0x%x c: 0x%x object: %x, freeptr: 0x%x\n",
				3356	(u32)c->freelist, (u32)c, (u32)object, (u32)get_freepointer(s, object));
				3357	WARN_ON(1);
				3358	}
				3359	#endif
				3360	p[i] = object;
				3361	maybe_wipe_obj_freeptr(s, p[i]);
				3362	}
				3363	c->tid = next_tid(c->tid);
				3364	local_irq_enable();
				3365
				3366	/* Clear memory outside IRQ disabled fastpath loop */
				3367	if (unlikely(slab_want_init_on_alloc(flags, s))) {
				3368	int j;
				3369
				3370	for (j = 0; j < i; j++)
				3371	memset(p[j], 0, s->object_size);
				3372	}
				3373
				3374	/* memcg and kmem_cache debug support */
				3375	slab_post_alloc_hook(s, flags, size, p);
				3376	return i;
				3377	error:
				3378	local_irq_enable();
				3379	slab_post_alloc_hook(s, flags, i, p);
				3380	__kmem_cache_free_bulk(s, i, p);
				3381	return 0;
				3382	}
				3383	EXPORT_SYMBOL(kmem_cache_alloc_bulk);
				3384
				3385
				3386	/*
				3387	* Object placement in a slab is made very easy because we always start at
				3388	* offset 0. If we tune the size of the object to the alignment then we can
				3389	* get the required alignment by putting one properly sized object after
				3390	* another.
				3391	*
				3392	* Notice that the allocation order determines the sizes of the per cpu
				3393	* caches. Each processor has always one slab available for allocations.
				3394	* Increasing the allocation order reduces the number of times that slabs
				3395	* must be moved on and off the partial lists and is therefore a factor in
				3396	* locking overhead.
				3397	*/
				3398
				3399	/*
				3400	* Mininum / Maximum order of slab pages. This influences locking overhead
				3401	* and slab fragmentation. A higher order reduces the number of partial slabs
				3402	* and increases the number of allocations possible without having to
				3403	* take the list_lock.
				3404	*/
				3405	static unsigned int slub_min_order;
				3406	static unsigned int slub_max_order = PAGE_ALLOC_COSTLY_ORDER;
				3407	static unsigned int slub_min_objects;
				3408
				3409	/*
				3410	* Calculate the order of allocation given an slab object size.
				3411	*
				3412	* The order of allocation has significant impact on performance and other
				3413	* system components. Generally order 0 allocations should be preferred since
				3414	* order 0 does not cause fragmentation in the page allocator. Larger objects
				3415	* be problematic to put into order 0 slabs because there may be too much
				3416	* unused space left. We go to a higher order if more than 1/16th of the slab
				3417	* would be wasted.
				3418	*
				3419	* In order to reach satisfactory performance we must ensure that a minimum
				3420	* number of objects is in one slab. Otherwise we may generate too much
				3421	* activity on the partial lists which requires taking the list_lock. This is
				3422	* less a concern for large slabs though which are rarely used.
				3423	*
				3424	* slub_max_order specifies the order where we begin to stop considering the
				3425	* number of objects in a slab as critical. If we reach slub_max_order then
				3426	* we try to keep the page order as low as possible. So we accept more waste
				3427	* of space in favor of a small page order.
				3428	*
				3429	* Higher order allocations also allow the placement of more objects in a
				3430	* slab and thereby reduce object handling overhead. If the user has
				3431	* requested a higher mininum order then we start with that one instead of
				3432	* the smallest order which will fit the object.
				3433	*/
				3434	static inline unsigned int slab_order(unsigned int size,
				3435	unsigned int min_objects, unsigned int max_order,
				3436	unsigned int fract_leftover)
				3437	{
				3438	unsigned int min_order = slub_min_order;
				3439	unsigned int order;
				3440
				3441	if (order_objects(min_order, size) > MAX_OBJS_PER_PAGE)
				3442	return get_order(size * MAX_OBJS_PER_PAGE) - 1;
				3443
				3444	for (order = max(min_order, (unsigned int)get_order(min_objects * size));
				3445	order <= max_order; order++) {
				3446
				3447	unsigned int slab_size = (unsigned int)PAGE_SIZE << order;
				3448	unsigned int rem;
				3449
				3450	rem = slab_size % size;
				3451
				3452	if (rem <= slab_size / fract_leftover)
				3453	break;
				3454	}
				3455
				3456	return order;
				3457	}
				3458
				3459	static inline int calculate_order(unsigned int size)
				3460	{
				3461	unsigned int order;
				3462	unsigned int min_objects;
				3463	unsigned int max_objects;
				3464
				3465	/*
				3466	* Attempt to find best configuration for a slab. This
				3467	* works by first attempting to generate a layout with
				3468	* the best configuration and backing off gradually.
				3469	*
				3470	* First we increase the acceptable waste in a slab. Then
				3471	* we reduce the minimum objects required in a slab.
				3472	*/
				3473	min_objects = slub_min_objects;
				3474	if (!min_objects)
				3475	min_objects = 4 * (fls(nr_cpu_ids) + 1);
				3476	max_objects = order_objects(slub_max_order, size);
				3477	min_objects = min(min_objects, max_objects);
				3478
				3479	while (min_objects > 1) {
				3480	unsigned int fraction;
				3481
				3482	fraction = 16;
				3483	while (fraction >= 4) {
				3484	order = slab_order(size, min_objects,
				3485	slub_max_order, fraction);
				3486	if (order <= slub_max_order)
				3487	return order;
				3488	fraction /= 2;
				3489	}
				3490	min_objects--;
				3491	}
				3492
				3493	/*
				3494	* We were unable to place multiple objects in a slab. Now
				3495	* lets see if we can place a single object there.
				3496	*/
				3497	order = slab_order(size, 1, slub_max_order, 1);
				3498	if (order <= slub_max_order)
				3499	return order;
				3500
				3501	/*
				3502	* Doh this slab cannot be placed using slub_max_order.
				3503	*/
				3504	order = slab_order(size, 1, MAX_ORDER, 1);
				3505	if (order < MAX_ORDER)
				3506	return order;
				3507	return -ENOSYS;
				3508	}
				3509
				3510	static void
				3511	init_kmem_cache_node(struct kmem_cache_node *n)
				3512	{
				3513	n->nr_partial = 0;
				3514	spin_lock_init(&n->list_lock);
				3515	INIT_LIST_HEAD(&n->partial);
				3516	#ifdef CONFIG_SLUB_DEBUG
				3517	atomic_long_set(&n->nr_slabs, 0);
				3518	atomic_long_set(&n->total_objects, 0);
				3519	INIT_LIST_HEAD(&n->full);
				3520	#endif
				3521	}
				3522
				3523	static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
				3524	{
				3525	BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE <
				3526	KMALLOC_SHIFT_HIGH * sizeof(struct kmem_cache_cpu));
				3527
				3528	/*
				3529	* Must align to double word boundary for the double cmpxchg
				3530	* instructions to work; see __pcpu_double_call_return_bool().
				3531	*/
				3532	s->cpu_slab = __alloc_percpu(sizeof(struct kmem_cache_cpu),
				3533	2 * sizeof(void *));
				3534
				3535	if (!s->cpu_slab)
				3536	return 0;
				3537
				3538	init_kmem_cache_cpus(s);
				3539
				3540	return 1;
				3541	}
				3542
				3543	static struct kmem_cache *kmem_cache_node;
				3544
				3545	/*
				3546	* No kmalloc_node yet so do it by hand. We know that this is the first
				3547	* slab on the node for this slabcache. There are no concurrent accesses
				3548	* possible.
				3549	*
				3550	* Note that this function only works on the kmem_cache_node
				3551	* when allocating for the kmem_cache_node. This is used for bootstrapping
				3552	* memory on a fresh node that has no slab structures yet.
				3553	*/
				3554	static void early_kmem_cache_node_alloc(int node)
				3555	{
				3556	struct page *page;
				3557	struct kmem_cache_node *n;
				3558
				3559	BUG_ON(kmem_cache_node->size < sizeof(struct kmem_cache_node));
				3560
				3561	page = new_slab(kmem_cache_node, GFP_NOWAIT, node);
				3562
				3563	BUG_ON(!page);
				3564	if (page_to_nid(page) != node) {
				3565	pr_err("SLUB: Unable to allocate memory from node %d\n", node);
				3566	pr_err("SLUB: Allocating a useless per node structure in order to be able to continue\n");
				3567	}
				3568
				3569	n = page->freelist;
				3570	BUG_ON(!n);
				3571	#ifdef CONFIG_SLUB_DEBUG
				3572	init_object(kmem_cache_node, n, SLUB_RED_ACTIVE);
				3573	init_tracking(kmem_cache_node, n);
				3574	#endif
				3575	n = kasan_kmalloc(kmem_cache_node, n, sizeof(struct kmem_cache_node),
				3576	GFP_KERNEL);
				3577	page->freelist = get_freepointer(kmem_cache_node, n);
				3578	page->inuse = 1;
				3579	page->frozen = 0;
				3580	kmem_cache_node->node[node] = n;
				3581	init_kmem_cache_node(n);
				3582	inc_slabs_node(kmem_cache_node, node, page->objects);
				3583
				3584	/*
				3585	* No locks need to be taken here as it has just been
				3586	* initialized and there is no concurrent access.
				3587	*/
				3588	__add_partial(n, page, DEACTIVATE_TO_HEAD);
				3589	}
				3590
				3591	static void free_kmem_cache_nodes(struct kmem_cache *s)
				3592	{
				3593	int node;
				3594	struct kmem_cache_node *n;
				3595
				3596	for_each_kmem_cache_node(s, node, n) {
				3597	s->node[node] = NULL;
				3598	kmem_cache_free(kmem_cache_node, n);
				3599	}
				3600	}
				3601
				3602	void __kmem_cache_release(struct kmem_cache *s)
				3603	{
				3604	cache_random_seq_destroy(s);
				3605	free_percpu(s->cpu_slab);
				3606	free_kmem_cache_nodes(s);
				3607	}
				3608
				3609	static int init_kmem_cache_nodes(struct kmem_cache *s)
				3610	{
				3611	int node;
				3612
				3613	for_each_node_state(node, N_NORMAL_MEMORY) {
				3614	struct kmem_cache_node *n;
				3615
				3616	if (slab_state == DOWN) {
				3617	early_kmem_cache_node_alloc(node);
				3618	continue;
				3619	}
				3620	n = kmem_cache_alloc_node(kmem_cache_node,
				3621	GFP_KERNEL, node);
				3622
				3623	if (!n) {
				3624	free_kmem_cache_nodes(s);
				3625	return 0;
				3626	}
				3627
				3628	init_kmem_cache_node(n);
				3629	s->node[node] = n;
				3630	}
				3631	return 1;
				3632	}
				3633
				3634	static void set_min_partial(struct kmem_cache *s, unsigned long min)
				3635	{
				3636	if (min < MIN_PARTIAL)
				3637	min = MIN_PARTIAL;
				3638	else if (min > MAX_PARTIAL)
				3639	min = MAX_PARTIAL;
				3640	s->min_partial = min;
				3641	}
				3642
				3643	static void set_cpu_partial(struct kmem_cache *s)
				3644	{
				3645	#ifdef CONFIG_SLUB_CPU_PARTIAL
				3646	/*
				3647	* cpu_partial determined the maximum number of objects kept in the
				3648	* per cpu partial lists of a processor.
				3649	*
				3650	* Per cpu partial lists mainly contain slabs that just have one
				3651	* object freed. If they are used for allocation then they can be
				3652	* filled up again with minimal effort. The slab will never hit the
				3653	* per node partial lists and therefore no locking will be required.
				3654	*
				3655	* This setting also determines
				3656	*
				3657	* A) The number of objects from per cpu partial slabs dumped to the
				3658	* per node list when we reach the limit.
				3659	* B) The number of objects in cpu partial slabs to extract from the
				3660	* per node list when we run out of per cpu objects. We only fetch
				3661	* 50% to keep some capacity around for frees.
				3662	*/
				3663	if (!kmem_cache_has_cpu_partial(s))
				3664	s->cpu_partial = 0;
				3665	else if (s->size >= PAGE_SIZE)
				3666	s->cpu_partial = 2;
				3667	else if (s->size >= 1024)
				3668	s->cpu_partial = 6;
				3669	else if (s->size >= 256)
				3670	s->cpu_partial = 13;
				3671	else
				3672	s->cpu_partial = 30;
				3673	#endif
				3674	}
				3675
				3676	/*
				3677	* calculate_sizes() determines the order and the distribution of data within
				3678	* a slab object.
				3679	*/
				3680	static int calculate_sizes(struct kmem_cache *s, int forced_order)
				3681	{
				3682	slab_flags_t flags = s->flags;
				3683	unsigned int size = s->object_size;
				3684	unsigned int order;
				3685
				3686	/*
				3687	* Round up object size to the next word boundary. We can only
				3688	* place the free pointer at word boundaries and this determines
				3689	* the possible location of the free pointer.
				3690	*/
				3691	size = ALIGN(size, sizeof(void *));
				3692
				3693	#ifdef CONFIG_SLUB_DEBUG
				3694	/*
				3695	* Determine if we can poison the object itself. If the user of
				3696	* the slab may touch the object after free or before allocation
				3697	* then we should never poison the object itself.
				3698	*/
				3699	if ((flags & SLAB_POISON) && !(flags & SLAB_TYPESAFE_BY_RCU) &&
				3700	!s->ctor)
				3701	s->flags \|= __OBJECT_POISON;
				3702	else
				3703	s->flags &= ~__OBJECT_POISON;
				3704
				3705
				3706	/*
				3707	* If we are Redzoning then check if there is some space between the
				3708	* end of the object and the free pointer. If not then add an
				3709	* additional word to have some bytes to store Redzone information.
				3710	*/
				3711	if ((flags & SLAB_RED_ZONE) && size == s->object_size)
				3712	size += sizeof(void *);
				3713	#endif
				3714
				3715	/*
				3716	* With that we have determined the number of bytes in actual use
				3717	* by the object. This is the potential offset to the free pointer.
				3718	*/
				3719	s->inuse = size;
				3720
				3721	if ((flags & (SLAB_TYPESAFE_BY_RCU \| SLAB_POISON)) \|\|
				3722	((flags & SLAB_RED_ZONE) && s->object_size < sizeof(void *)) \|\|
				3723	s->ctor) {
				3724	/*
				3725	* Relocate free pointer after the object if it is not
				3726	* permitted to overwrite the first word of the object on
				3727	* kmem_cache_free.
				3728	*
				3729	* This is the case if we do RCU, have a constructor or
				3730	* destructor, are poisoning the objects, or are
				3731	* redzoning an object smaller than sizeof(void *).
				3732	*
				3733	* The assumption that s->offset >= s->inuse means free
				3734	* pointer is outside of the object is used in the
				3735	* freeptr_outside_object() function. If that is no
				3736	* longer true, the function needs to be modified.
				3737	*/
				3738	s->offset = size;
				3739	size += sizeof(void *);
				3740	}
				3741
				3742	#ifdef CONFIG_SLUB_DEBUG
				3743	if (flags & SLAB_STORE_USER)
				3744	/*
				3745	* Need to store information about allocs and frees after
				3746	* the object.
				3747	*/
				3748	size += 2 * sizeof(struct track);
				3749	#endif
				3750
				3751	kasan_cache_create(s, &size, &s->flags);
				3752	#ifdef CONFIG_SLUB_DEBUG
				3753	if (flags & SLAB_RED_ZONE) {
				3754	/*
				3755	* Add some empty padding so that we can catch
				3756	* overwrites from earlier objects rather than let
				3757	* tracking information or the free pointer be
				3758	* corrupted if a user writes before the start
				3759	* of the object.
				3760	*/
				3761	size += sizeof(void *);
				3762
				3763	s->red_left_pad = sizeof(void *);
				3764	s->red_left_pad = ALIGN(s->red_left_pad, s->align);
				3765	size += s->red_left_pad;
				3766	}
				3767	#endif
				3768
				3769	/*
				3770	* SLUB stores one object immediately after another beginning from
				3771	* offset 0. In order to align the objects we have to simply size
				3772	* each object to conform to the alignment.
				3773	*/
				3774	size = ALIGN(size, s->align);
				3775	s->size = size;
				3776	if (forced_order >= 0)
				3777	order = forced_order;
				3778	else
				3779	order = calculate_order(size);
				3780
				3781	if ((int)order < 0)
				3782	return 0;
				3783
				3784	s->allocflags = 0;
				3785	if (order)
				3786	s->allocflags \|= __GFP_COMP;
				3787
				3788	if (s->flags & SLAB_CACHE_DMA)
				3789	s->allocflags \|= GFP_DMA;
				3790
				3791	if (s->flags & SLAB_CACHE_DMA32)
				3792	s->allocflags \|= GFP_DMA32;
				3793
				3794	if (s->flags & SLAB_RECLAIM_ACCOUNT)
				3795	s->allocflags \|= __GFP_RECLAIMABLE;
				3796
				3797	/*
				3798	* Determine the number of objects per slab
				3799	*/
				3800	s->oo = oo_make(order, size);
				3801	s->min = oo_make(get_order(size), size);
				3802	if (oo_objects(s->oo) > oo_objects(s->max))
				3803	s->max = s->oo;
				3804
				3805	return !!oo_objects(s->oo);
				3806	}
				3807
				3808	static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags)
				3809	{
				3810	s->flags = kmem_cache_flags(s->size, flags, s->name, s->ctor);
				3811	#ifdef CONFIG_SLAB_FREELIST_HARDENED
				3812	s->random = get_random_long();
				3813	#endif
				3814
				3815	if (!calculate_sizes(s, -1))
				3816	goto error;
				3817	if (disable_higher_order_debug) {
				3818	/*
				3819	* Disable debugging flags that store metadata if the min slab
				3820	* order increased.
				3821	*/
				3822	if (get_order(s->size) > get_order(s->object_size)) {
				3823	s->flags &= ~DEBUG_METADATA_FLAGS;
				3824	s->offset = 0;
				3825	if (!calculate_sizes(s, -1))
				3826	goto error;
				3827	}
				3828	}
				3829
				3830	#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
				3831	defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
				3832	if (system_has_cmpxchg_double() && (s->flags & SLAB_NO_CMPXCHG) == 0)
				3833	/* Enable fast mode */
				3834	s->flags \|= __CMPXCHG_DOUBLE;
				3835	#endif
				3836
				3837	/*
				3838	* The larger the object size is, the more pages we want on the partial
				3839	* list to avoid pounding the page allocator excessively.
				3840	*/
				3841	set_min_partial(s, ilog2(s->size) / 2);
				3842
				3843	set_cpu_partial(s);
				3844
				3845	#ifdef CONFIG_NUMA
				3846	s->remote_node_defrag_ratio = 1000;
				3847	#endif
				3848
				3849	/* Initialize the pre-computed randomized freelist if slab is up */
				3850	if (slab_state >= UP) {
				3851	if (init_cache_random_seq(s))
				3852	goto error;
				3853	}
				3854
				3855	if (!init_kmem_cache_nodes(s))
				3856	goto error;
				3857
				3858	if (alloc_kmem_cache_cpus(s))
				3859	return 0;
				3860
				3861	error:
				3862	__kmem_cache_release(s);
				3863	return -EINVAL;
				3864	}
				3865
				3866	static void list_slab_objects(struct kmem_cache s, struct page page,
				3867	const char *text)
				3868	{
				3869	#ifdef CONFIG_SLUB_DEBUG
				3870	void *addr = page_address(page);
				3871	void *p;
				3872	unsigned long *map = bitmap_zalloc(page->objects, GFP_ATOMIC);
				3873	if (!map)
				3874	return;
				3875	slab_err(s, page, text, s->name);
				3876	slab_lock(page);
				3877
				3878	get_map(s, page, map);
				3879	for_each_object(p, s, addr, page->objects) {
				3880
				3881	if (!test_bit(slab_index(p, s, addr), map)) {
				3882	pr_err("INFO: Object 0x%p @offset=%tu\n", p, p - addr);
				3883	print_tracking(s, p);
				3884	}
				3885	}
				3886	slab_unlock(page);
				3887	bitmap_free(map);
				3888	#endif
				3889	}
				3890
				3891	/*
				3892	* Attempt to free all partial slabs on a node.
				3893	* This is called from __kmem_cache_shutdown(). We must take list_lock
				3894	* because sysfs file might still access partial list after the shutdowning.
				3895	*/
				3896	static void free_partial(struct kmem_cache s, struct kmem_cache_node n)
				3897	{
				3898	LIST_HEAD(discard);
				3899	struct page page, h;
				3900
				3901	BUG_ON(irqs_disabled());
				3902	spin_lock_irq(&n->list_lock);
				3903	list_for_each_entry_safe(page, h, &n->partial, slab_list) {
				3904	if (!page->inuse) {
				3905	remove_partial(n, page);
				3906	list_add(&page->slab_list, &discard);
				3907	} else {
				3908	list_slab_objects(s, page,
				3909	"Objects remaining in %s on __kmem_cache_shutdown()");
				3910	}
				3911	}
				3912	spin_unlock_irq(&n->list_lock);
				3913
				3914	list_for_each_entry_safe(page, h, &discard, slab_list)
				3915	discard_slab(s, page);
				3916	}
				3917
				3918	bool __kmem_cache_empty(struct kmem_cache *s)
				3919	{
				3920	int node;
				3921	struct kmem_cache_node *n;
				3922
				3923	for_each_kmem_cache_node(s, node, n)
				3924	if (n->nr_partial \|\| slabs_node(s, node))
				3925	return false;
				3926	return true;
				3927	}
				3928
				3929	/*
				3930	* Release all resources used by a slab cache.
				3931	*/
				3932	int __kmem_cache_shutdown(struct kmem_cache *s)
				3933	{
				3934	int node;
				3935	struct kmem_cache_node *n;
				3936
				3937	flush_all(s);
				3938	/* Attempt to free all objects */
				3939	for_each_kmem_cache_node(s, node, n) {
				3940	free_partial(s, n);
				3941	if (n->nr_partial \|\| slabs_node(s, node))
				3942	return 1;
				3943	}
				3944	sysfs_slab_remove(s);
				3945	return 0;
				3946	}
				3947
				3948	/********************************************************************
				3949	* Kmalloc subsystem
				3950	*******************************************************************/
				3951
				3952	static int __init setup_slub_min_order(char *str)
				3953	{
				3954	get_option(&str, (int *)&slub_min_order);
				3955
				3956	return 1;
				3957	}
				3958
				3959	__setup("slub_min_order=", setup_slub_min_order);
				3960
				3961	static int __init setup_slub_max_order(char *str)
				3962	{
				3963	get_option(&str, (int *)&slub_max_order);
				3964	slub_max_order = min(slub_max_order, (unsigned int)MAX_ORDER - 1);
				3965
				3966	return 1;
				3967	}
				3968
				3969	__setup("slub_max_order=", setup_slub_max_order);
				3970
				3971	static int __init setup_slub_min_objects(char *str)
				3972	{
				3973	get_option(&str, (int *)&slub_min_objects);
				3974
				3975	return 1;
				3976	}
				3977
				3978	__setup("slub_min_objects=", setup_slub_min_objects);
				3979
				3980	void *__kmalloc(size_t size, gfp_t flags)
				3981	{
				3982	struct kmem_cache *s;
				3983	void *ret;
				3984
				3985	if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
				3986	return kmalloc_large(size, flags);
				3987
				3988	s = kmalloc_slab(size, flags);
				3989
				3990	if (unlikely(ZERO_OR_NULL_PTR(s)))
				3991	return s;
				3992
				3993	ret = slab_alloc(s, flags, _RET_IP_, size);
				3994
				3995	trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
				3996
				3997	ret = kasan_kmalloc(s, ret, size, flags);
				3998
				3999	return ret;
				4000	}
				4001	EXPORT_SYMBOL(__kmalloc);
				4002
				4003	#ifdef CONFIG_NUMA
				4004	static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
				4005	{
				4006	struct page *page;
				4007	void *ptr = NULL;
				4008	unsigned int order = get_order(size);
				4009
				4010	flags \|= __GFP_COMP;
				4011	page = alloc_pages_node(node, flags, order);
				4012	if (page) {
				4013	ptr = page_address(page);
				4014	mod_node_page_state(page_pgdat(page), NR_SLAB_UNRECLAIMABLE,
				4015	1 << order);
				4016	}
				4017
				4018	return kmalloc_large_node_hook(ptr, size, flags);
				4019	}
				4020
				4021	void *__kmalloc_node(size_t size, gfp_t flags, int node)
				4022	{
				4023	struct kmem_cache *s;
				4024	void *ret;
				4025
				4026	if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
				4027	ret = kmalloc_large_node(size, flags, node);
				4028
				4029	trace_kmalloc_node(_RET_IP_, ret,
				4030	size, PAGE_SIZE << get_order(size),
				4031	flags, node);
				4032
				4033	return ret;
				4034	}
				4035
				4036	s = kmalloc_slab(size, flags);
				4037
				4038	if (unlikely(ZERO_OR_NULL_PTR(s)))
				4039	return s;
				4040
				4041	ret = slab_alloc_node(s, flags, node, _RET_IP_, size);
				4042
				4043	trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
				4044
				4045	ret = kasan_kmalloc(s, ret, size, flags);
				4046
				4047	return ret;
				4048	}
				4049	EXPORT_SYMBOL(__kmalloc_node);
				4050	#endif /* CONFIG_NUMA */
				4051
				4052	#ifdef CONFIG_HARDENED_USERCOPY
				4053	/*
				4054	* Rejects incorrectly sized objects and objects that are to be copied
				4055	* to/from userspace but do not fall entirely within the containing slab
				4056	* cache's usercopy region.
				4057	*
				4058	* Returns NULL if check passes, otherwise const char * to name of cache
				4059	* to indicate an error.
				4060	*/
				4061	void __check_heap_object(const void ptr, unsigned long n, struct page page,
				4062	bool to_user)
				4063	{
				4064	struct kmem_cache *s;
				4065	unsigned int offset;
				4066	size_t object_size;
				4067	bool is_kfence = is_kfence_address(ptr);
				4068
				4069	ptr = kasan_reset_tag(ptr);
				4070
				4071	/* Find object and usable object size. */
				4072	s = page->slab_cache;
				4073
				4074	/* Reject impossible pointers. */
				4075	if (ptr < page_address(page))
				4076	usercopy_abort("SLUB object not in SLUB page?!", NULL,
				4077	to_user, 0, n);
				4078
				4079	/* Find offset within object. */
				4080	if (is_kfence)
				4081	offset = ptr - kfence_object_start(ptr);
				4082	else
				4083	offset = (ptr - page_address(page)) % s->size;
				4084
				4085	/* Adjust for redzone and reject if within the redzone. */
				4086	if (!is_kfence && kmem_cache_debug(s) && s->flags & SLAB_RED_ZONE) {
				4087	if (offset < s->red_left_pad)
				4088	usercopy_abort("SLUB object in left red zone",
				4089	s->name, to_user, offset, n);
				4090	offset -= s->red_left_pad;
				4091	}
				4092
				4093	/* Allow address range falling entirely within usercopy region. */
				4094	if (offset >= s->useroffset &&
				4095	offset - s->useroffset <= s->usersize &&
				4096	n <= s->useroffset - offset + s->usersize)
				4097	return;
				4098
				4099	/*
				4100	* If the copy is still within the allocated object, produce
				4101	* a warning instead of rejecting the copy. This is intended
				4102	* to be a temporary method to find any missing usercopy
				4103	* whitelists.
				4104	*/
				4105	object_size = slab_ksize(s);
				4106	if (usercopy_fallback &&
				4107	offset <= object_size && n <= object_size - offset) {
				4108	usercopy_warn("SLUB object", s->name, to_user, offset, n);
				4109	return;
				4110	}
				4111
				4112	usercopy_abort("SLUB object", s->name, to_user, offset, n);
				4113	}
				4114	#endif /* CONFIG_HARDENED_USERCOPY */
				4115
				4116	size_t __ksize(const void *object)
				4117	{
				4118	struct page *page;
				4119
				4120	if (unlikely(object == ZERO_SIZE_PTR))
				4121	return 0;
				4122
				4123	page = virt_to_head_page(object);
				4124
				4125	if (unlikely(!PageSlab(page))) {
				4126	WARN_ON(!PageCompound(page));
				4127	return page_size(page);
				4128	}
				4129
				4130	return slab_ksize(page->slab_cache);
				4131	}
				4132	EXPORT_SYMBOL(__ksize);
				4133
				4134	void kfree(const void *x)
				4135	{
				4136	struct page *page;
				4137	void object = (void )x;
				4138
				4139	trace_kfree(_RET_IP_, x);
				4140
				4141	if (unlikely(ZERO_OR_NULL_PTR(x)))
				4142	return;
				4143
				4144	page = virt_to_head_page(x);
				4145	if (unlikely(!PageSlab(page))) {
				4146	unsigned int order = compound_order(page);
				4147
				4148	BUG_ON(!PageCompound(page));
				4149	kfree_hook(object);
				4150	mod_node_page_state(page_pgdat(page), NR_SLAB_UNRECLAIMABLE,
				4151	-(1 << order));
				4152	__free_pages(page, order);
				4153	return;
				4154	}
				4155	slab_free(page->slab_cache, page, object, NULL, 1, _RET_IP_);
				4156	}
				4157	EXPORT_SYMBOL(kfree);
				4158
				4159	#define SHRINK_PROMOTE_MAX 32
				4160
				4161	/*
				4162	* kmem_cache_shrink discards empty slabs and promotes the slabs filled
				4163	* up most to the head of the partial lists. New allocations will then
				4164	* fill those up and thus they can be removed from the partial lists.
				4165	*
				4166	* The slabs with the least items are placed last. This results in them
				4167	* being allocated from last increasing the chance that the last objects
				4168	* are freed in them.
				4169	*/
				4170	int __kmem_cache_shrink(struct kmem_cache *s)
				4171	{
				4172	int node;
				4173	int i;
				4174	struct kmem_cache_node *n;
				4175	struct page *page;
				4176	struct page *t;
				4177	struct list_head discard;
				4178	struct list_head promote[SHRINK_PROMOTE_MAX];
				4179	unsigned long flags;
				4180	int ret = 0;
				4181
				4182	flush_all(s);
				4183	for_each_kmem_cache_node(s, node, n) {
				4184	INIT_LIST_HEAD(&discard);
				4185	for (i = 0; i < SHRINK_PROMOTE_MAX; i++)
				4186	INIT_LIST_HEAD(promote + i);
				4187
				4188	spin_lock_irqsave(&n->list_lock, flags);
				4189
				4190	/*
				4191	* Build lists of slabs to discard or promote.
				4192	*
				4193	* Note that concurrent frees may occur while we hold the
				4194	* list_lock. page->inuse here is the upper limit.
				4195	*/
				4196	list_for_each_entry_safe(page, t, &n->partial, slab_list) {
				4197	int free = page->objects - page->inuse;
				4198
				4199	/* Do not reread page->inuse */
				4200	barrier();
				4201
				4202	/* We do not keep full slabs on the list */
				4203	BUG_ON(free <= 0);
				4204
				4205	if (free == page->objects) {
				4206	list_move(&page->slab_list, &discard);
				4207	n->nr_partial--;
				4208	} else if (free <= SHRINK_PROMOTE_MAX)
				4209	list_move(&page->slab_list, promote + free - 1);
				4210	}
				4211
				4212	/*
				4213	* Promote the slabs filled up most to the head of the
				4214	* partial list.
				4215	*/
				4216	for (i = SHRINK_PROMOTE_MAX - 1; i >= 0; i--)
				4217	list_splice(promote + i, &n->partial);
				4218
				4219	spin_unlock_irqrestore(&n->list_lock, flags);
				4220
				4221	/* Release empty slabs */
				4222	list_for_each_entry_safe(page, t, &discard, slab_list)
				4223	discard_slab(s, page);
				4224
				4225	if (slabs_node(s, node))
				4226	ret = 1;
				4227	}
				4228
				4229	return ret;
				4230	}
				4231
				4232	#ifdef CONFIG_MEMCG
				4233	void __kmemcg_cache_deactivate_after_rcu(struct kmem_cache *s)
				4234	{
				4235	/*
				4236	* Called with all the locks held after a sched RCU grace period.
				4237	* Even if @s becomes empty after shrinking, we can't know that @s
				4238	* doesn't have allocations already in-flight and thus can't
				4239	* destroy @s until the associated memcg is released.
				4240	*
				4241	* However, let's remove the sysfs files for empty caches here.
				4242	* Each cache has a lot of interface files which aren't
				4243	* particularly useful for empty draining caches; otherwise, we can
				4244	* easily end up with millions of unnecessary sysfs files on
				4245	* systems which have a lot of memory and transient cgroups.
				4246	*/
				4247	if (!__kmem_cache_shrink(s))
				4248	sysfs_slab_remove(s);
				4249	}
				4250
				4251	void __kmemcg_cache_deactivate(struct kmem_cache *s)
				4252	{
				4253	/*
				4254	* Disable empty slabs caching. Used to avoid pinning offline
				4255	* memory cgroups by kmem pages that can be freed.
				4256	*/
				4257	slub_set_cpu_partial(s, 0);
				4258	s->min_partial = 0;
				4259	}
				4260	#endif /* CONFIG_MEMCG */
				4261
				4262	static int slab_mem_going_offline_callback(void *arg)
				4263	{
				4264	struct kmem_cache *s;
				4265
				4266	mutex_lock(&slab_mutex);
				4267	list_for_each_entry(s, &slab_caches, list)
				4268	__kmem_cache_shrink(s);
				4269	mutex_unlock(&slab_mutex);
				4270
				4271	return 0;
				4272	}
				4273
				4274	static void slab_mem_offline_callback(void *arg)
				4275	{
				4276	struct kmem_cache_node *n;
				4277	struct kmem_cache *s;
				4278	struct memory_notify *marg = arg;
				4279	int offline_node;
				4280
				4281	offline_node = marg->status_change_nid_normal;
				4282
				4283	/*
				4284	* If the node still has available memory. we need kmem_cache_node
				4285	* for it yet.
				4286	*/
				4287	if (offline_node < 0)
				4288	return;
				4289
				4290	mutex_lock(&slab_mutex);
				4291	list_for_each_entry(s, &slab_caches, list) {
				4292	n = get_node(s, offline_node);
				4293	if (n) {
				4294	/*
				4295	* if n->nr_slabs > 0, slabs still exist on the node
				4296	* that is going down. We were unable to free them,
				4297	* and offline_pages() function shouldn't call this
				4298	* callback. So, we must fail.
				4299	*/
				4300	BUG_ON(slabs_node(s, offline_node));
				4301
				4302	s->node[offline_node] = NULL;
				4303	kmem_cache_free(kmem_cache_node, n);
				4304	}
				4305	}
				4306	mutex_unlock(&slab_mutex);
				4307	}
				4308
				4309	static int slab_mem_going_online_callback(void *arg)
				4310	{
				4311	struct kmem_cache_node *n;
				4312	struct kmem_cache *s;
				4313	struct memory_notify *marg = arg;
				4314	int nid = marg->status_change_nid_normal;
				4315	int ret = 0;
				4316
				4317	/*
				4318	* If the node's memory is already available, then kmem_cache_node is
				4319	* already created. Nothing to do.
				4320	*/
				4321	if (nid < 0)
				4322	return 0;
				4323
				4324	/*
				4325	* We are bringing a node online. No memory is available yet. We must
				4326	* allocate a kmem_cache_node structure in order to bring the node
				4327	* online.
				4328	*/
				4329	mutex_lock(&slab_mutex);
				4330	list_for_each_entry(s, &slab_caches, list) {
				4331	/*
				4332	* XXX: kmem_cache_alloc_node will fallback to other nodes
				4333	* since memory is not yet available from the node that
				4334	* is brought up.
				4335	*/
				4336	n = kmem_cache_alloc(kmem_cache_node, GFP_KERNEL);
				4337	if (!n) {
				4338	ret = -ENOMEM;
				4339	goto out;
				4340	}
				4341	init_kmem_cache_node(n);
				4342	s->node[nid] = n;
				4343	}
				4344	out:
				4345	mutex_unlock(&slab_mutex);
				4346	return ret;
				4347	}
				4348
				4349	static int slab_memory_callback(struct notifier_block *self,
				4350	unsigned long action, void *arg)
				4351	{
				4352	int ret = 0;
				4353
				4354	switch (action) {
				4355	case MEM_GOING_ONLINE:
				4356	ret = slab_mem_going_online_callback(arg);
				4357	break;
				4358	case MEM_GOING_OFFLINE:
				4359	ret = slab_mem_going_offline_callback(arg);
				4360	break;
				4361	case MEM_OFFLINE:
				4362	case MEM_CANCEL_ONLINE:
				4363	slab_mem_offline_callback(arg);
				4364	break;
				4365	case MEM_ONLINE:
				4366	case MEM_CANCEL_OFFLINE:
				4367	break;
				4368	}
				4369	if (ret)
				4370	ret = notifier_from_errno(ret);
				4371	else
				4372	ret = NOTIFY_OK;
				4373	return ret;
				4374	}
				4375
				4376	static struct notifier_block slab_memory_callback_nb = {
				4377	.notifier_call = slab_memory_callback,
				4378	.priority = SLAB_CALLBACK_PRI,
				4379	};
				4380
				4381	/********************************************************************
				4382	* Basic setup of slabs
				4383	*******************************************************************/
				4384
				4385	/*
				4386	* Used for early kmem_cache structures that were allocated using
				4387	* the page allocator. Allocate them properly then fix up the pointers
				4388	* that may be pointing to the wrong kmem_cache structure.
				4389	*/
				4390
				4391	static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
				4392	{
				4393	int node;
				4394	struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
				4395	struct kmem_cache_node *n;
				4396
				4397	memcpy(s, static_cache, kmem_cache->object_size);
				4398
				4399	/*
				4400	* This runs very early, and only the boot processor is supposed to be
				4401	* up. Even if it weren't true, IRQs are not up so we couldn't fire
				4402	* IPIs around.
				4403	*/
				4404	__flush_cpu_slab(s, smp_processor_id());
				4405	for_each_kmem_cache_node(s, node, n) {
				4406	struct page *p;
				4407
				4408	list_for_each_entry(p, &n->partial, slab_list)
				4409	p->slab_cache = s;
				4410
				4411	#ifdef CONFIG_SLUB_DEBUG
				4412	list_for_each_entry(p, &n->full, slab_list)
				4413	p->slab_cache = s;
				4414	#endif
				4415	}
				4416	slab_init_memcg_params(s);
				4417	list_add(&s->list, &slab_caches);
				4418	memcg_link_cache(s, NULL);
				4419	return s;
				4420	}
				4421
				4422	void __init kmem_cache_init(void)
				4423	{
				4424	static __initdata struct kmem_cache boot_kmem_cache,
				4425	boot_kmem_cache_node;
				4426
				4427	if (debug_guardpage_minorder())
				4428	slub_max_order = 0;
				4429
				4430	kmem_cache_node = &boot_kmem_cache_node;
				4431	kmem_cache = &boot_kmem_cache;
				4432
				4433	create_boot_cache(kmem_cache_node, "kmem_cache_node",
				4434	sizeof(struct kmem_cache_node), SLAB_HWCACHE_ALIGN, 0, 0);
				4435
				4436	register_hotmemory_notifier(&slab_memory_callback_nb);
				4437
				4438	/* Able to allocate the per node structures */
				4439	slab_state = PARTIAL;
				4440
				4441	create_boot_cache(kmem_cache, "kmem_cache",
				4442	offsetof(struct kmem_cache, node) +
				4443	nr_node_ids * sizeof(struct kmem_cache_node *),
				4444	SLAB_HWCACHE_ALIGN, 0, 0);
				4445
				4446	kmem_cache = bootstrap(&boot_kmem_cache);
				4447	kmem_cache_node = bootstrap(&boot_kmem_cache_node);
				4448
				4449	/* Now we can use the kmem_cache to allocate kmalloc slabs */
				4450	setup_kmalloc_cache_index_table();
				4451	create_kmalloc_caches(0);
				4452
				4453	/* Setup random freelists for each cache */
				4454	init_freelist_randomization();
				4455
				4456	cpuhp_setup_state_nocalls(CPUHP_SLUB_DEAD, "slub:dead", NULL,
				4457	slub_cpu_dead);
				4458
				4459	pr_info("SLUB: HWalign=%d, Order=%u-%u, MinObjects=%u, CPUs=%u, Nodes=%u\n",
				4460	cache_line_size(),
				4461	slub_min_order, slub_max_order, slub_min_objects,
				4462	nr_cpu_ids, nr_node_ids);
				4463	}
				4464
				4465	void __init kmem_cache_init_late(void)
				4466	{
				4467	}
				4468
				4469	struct kmem_cache *
				4470	__kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
				4471	slab_flags_t flags, void (ctor)(void ))
				4472	{
				4473	struct kmem_cache s, c;
				4474
				4475	s = find_mergeable(size, align, flags, name, ctor);
				4476	if (s) {
				4477	s->refcount++;
				4478
				4479	/*
				4480	* Adjust the object sizes so that we clear
				4481	* the complete object on kzalloc.
				4482	*/
				4483	s->object_size = max(s->object_size, size);
				4484	s->inuse = max(s->inuse, ALIGN(size, sizeof(void *)));
				4485
				4486	for_each_memcg_cache(c, s) {
				4487	c->object_size = s->object_size;
				4488	c->inuse = max(c->inuse, ALIGN(size, sizeof(void *)));
				4489	}
				4490
				4491	if (sysfs_slab_alias(s, name)) {
				4492	s->refcount--;
				4493	s = NULL;
				4494	}
				4495	}
				4496
				4497	return s;
				4498	}
				4499
				4500	int __kmem_cache_create(struct kmem_cache *s, slab_flags_t flags)
				4501	{
				4502	int err;
				4503
				4504	err = kmem_cache_open(s, flags);
				4505	if (err)
				4506	return err;
				4507
				4508	/* Mutex is not taken during early boot */
				4509	if (slab_state <= UP)
				4510	return 0;
				4511
				4512	memcg_propagate_slab_attrs(s);
				4513	err = sysfs_slab_add(s);
				4514	if (err)
				4515	__kmem_cache_release(s);
				4516
				4517	return err;
				4518	}
				4519
				4520	void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
				4521	{
				4522	struct kmem_cache *s;
				4523	void *ret;
				4524
				4525	if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
				4526	return kmalloc_large(size, gfpflags);
				4527
				4528	s = kmalloc_slab(size, gfpflags);
				4529
				4530	if (unlikely(ZERO_OR_NULL_PTR(s)))
				4531	return s;
				4532
				4533	ret = slab_alloc(s, gfpflags, caller, size);
				4534
				4535	/* Honor the call site pointer we received. */
				4536	trace_kmalloc(caller, ret, size, s->size, gfpflags);
				4537
				4538	return ret;
				4539	}
				4540	EXPORT_SYMBOL(__kmalloc_track_caller);
				4541
				4542	#ifdef CONFIG_NUMA
				4543	void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
				4544	int node, unsigned long caller)
				4545	{
				4546	struct kmem_cache *s;
				4547	void *ret;
				4548
				4549	if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
				4550	ret = kmalloc_large_node(size, gfpflags, node);
				4551
				4552	trace_kmalloc_node(caller, ret,
				4553	size, PAGE_SIZE << get_order(size),
				4554	gfpflags, node);
				4555
				4556	return ret;
				4557	}
				4558
				4559	s = kmalloc_slab(size, gfpflags);
				4560
				4561	if (unlikely(ZERO_OR_NULL_PTR(s)))
				4562	return s;
				4563
				4564	ret = slab_alloc_node(s, gfpflags, node, caller, size);
				4565
				4566	/* Honor the call site pointer we received. */
				4567	trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
				4568
				4569	return ret;
				4570	}
				4571	EXPORT_SYMBOL(__kmalloc_node_track_caller);
				4572	#endif
				4573
				4574	#ifdef CONFIG_SYSFS
				4575	static int count_inuse(struct page *page)
				4576	{
				4577	return page->inuse;
				4578	}
				4579
				4580	static int count_total(struct page *page)
				4581	{
				4582	return page->objects;
				4583	}
				4584	#endif
				4585
				4586	#ifdef CONFIG_SLUB_DEBUG
				4587	static int validate_slab(struct kmem_cache s, struct page page,
				4588	unsigned long *map)
				4589	{
				4590	void *p;
				4591	void *addr = page_address(page);
				4592
				4593	if (!check_slab(s, page) \|\|
				4594	!on_freelist(s, page, NULL))
				4595	return 0;
				4596
				4597	/* Now we know that a valid freelist exists */
				4598	bitmap_zero(map, page->objects);
				4599
				4600	get_map(s, page, map);
				4601	for_each_object(p, s, addr, page->objects) {
				4602	if (test_bit(slab_index(p, s, addr), map))
				4603	if (!check_object(s, page, p, SLUB_RED_INACTIVE))
				4604	return 0;
				4605	}
				4606
				4607	for_each_object(p, s, addr, page->objects)
				4608	if (!test_bit(slab_index(p, s, addr), map))
				4609	if (!check_object(s, page, p, SLUB_RED_ACTIVE))
				4610	return 0;
				4611	return 1;
				4612	}
				4613
				4614	static void validate_slab_slab(struct kmem_cache s, struct page page,
				4615	unsigned long *map)
				4616	{
				4617	slab_lock(page);
				4618	validate_slab(s, page, map);
				4619	slab_unlock(page);
				4620	}
				4621
				4622	static int validate_slab_node(struct kmem_cache *s,
				4623	struct kmem_cache_node n, unsigned long map)
				4624	{
				4625	unsigned long count = 0;
				4626	struct page *page;
				4627	unsigned long flags;
				4628
				4629	spin_lock_irqsave(&n->list_lock, flags);
				4630
				4631	list_for_each_entry(page, &n->partial, slab_list) {
				4632	validate_slab_slab(s, page, map);
				4633	count++;
				4634	}
				4635	if (count != n->nr_partial)
				4636	pr_err("SLUB %s: %ld partial slabs counted but counter=%ld\n",
				4637	s->name, count, n->nr_partial);
				4638
				4639	if (!(s->flags & SLAB_STORE_USER))
				4640	goto out;
				4641
				4642	list_for_each_entry(page, &n->full, slab_list) {
				4643	validate_slab_slab(s, page, map);
				4644	count++;
				4645	}
				4646	if (count != atomic_long_read(&n->nr_slabs))
				4647	pr_err("SLUB: %s %ld slabs counted but counter=%ld\n",
				4648	s->name, count, atomic_long_read(&n->nr_slabs));
				4649
				4650	out:
				4651	spin_unlock_irqrestore(&n->list_lock, flags);
				4652	return count;
				4653	}
				4654
				4655	static long validate_slab_cache(struct kmem_cache *s)
				4656	{
				4657	int node;
				4658	unsigned long count = 0;
				4659	struct kmem_cache_node *n;
				4660	unsigned long *map = bitmap_alloc(oo_objects(s->max), GFP_KERNEL);
				4661
				4662	if (!map)
				4663	return -ENOMEM;
				4664
				4665	flush_all(s);
				4666	for_each_kmem_cache_node(s, node, n)
				4667	count += validate_slab_node(s, n, map);
				4668	bitmap_free(map);
				4669	return count;
				4670	}
				4671	/*
				4672	* Generate lists of code addresses where slabcache objects are allocated
				4673	* and freed.
				4674	*/
				4675
				4676	struct location {
				4677	unsigned long count;
				4678	unsigned long addr;
				4679	long long sum_time;
				4680	long min_time;
				4681	long max_time;
				4682	long min_pid;
				4683	long max_pid;
				4684	DECLARE_BITMAP(cpus, NR_CPUS);
				4685	nodemask_t nodes;
				4686	};
				4687
				4688	struct loc_track {
				4689	unsigned long max;
				4690	unsigned long count;
				4691	struct location *loc;
				4692	};
				4693
				4694	static void free_loc_track(struct loc_track *t)
				4695	{
				4696	if (t->max)
				4697	free_pages((unsigned long)t->loc,
				4698	get_order(sizeof(struct location) * t->max));
				4699	}
				4700
				4701	static int alloc_loc_track(struct loc_track *t, unsigned long max, gfp_t flags)
				4702	{
				4703	struct location *l;
				4704	int order;
				4705
				4706	order = get_order(sizeof(struct location) * max);
				4707
				4708	l = (void *)__get_free_pages(flags, order);
				4709	if (!l)
				4710	return 0;
				4711
				4712	if (t->count) {
				4713	memcpy(l, t->loc, sizeof(struct location) * t->count);
				4714	free_loc_track(t);
				4715	}
				4716	t->max = max;
				4717	t->loc = l;
				4718	return 1;
				4719	}
				4720
				4721	static int add_location(struct loc_track t, struct kmem_cache s,
				4722	const struct track *track)
				4723	{
				4724	long start, end, pos;
				4725	struct location *l;
				4726	unsigned long caddr;
				4727	unsigned long age = jiffies - track->when;
				4728
				4729	start = -1;
				4730	end = t->count;
				4731
				4732	for ( ; ; ) {
				4733	pos = start + (end - start + 1) / 2;
				4734
				4735	/*
				4736	* There is nothing at "end". If we end up there
				4737	* we need to add something to before end.
				4738	*/
				4739	if (pos == end)
				4740	break;
				4741
				4742	caddr = t->loc[pos].addr;
				4743	if (track->addr == caddr) {
				4744
				4745	l = &t->loc[pos];
				4746	l->count++;
				4747	if (track->when) {
				4748	l->sum_time += age;
				4749	if (age < l->min_time)
				4750	l->min_time = age;
				4751	if (age > l->max_time)
				4752	l->max_time = age;
				4753
				4754	if (track->pid < l->min_pid)
				4755	l->min_pid = track->pid;
				4756	if (track->pid > l->max_pid)
				4757	l->max_pid = track->pid;
				4758
				4759	cpumask_set_cpu(track->cpu,
				4760	to_cpumask(l->cpus));
				4761	}
				4762	node_set(page_to_nid(virt_to_page(track)), l->nodes);
				4763	return 1;
				4764	}
				4765
				4766	if (track->addr < caddr)
				4767	end = pos;
				4768	else
				4769	start = pos;
				4770	}
				4771
				4772	/*
				4773	* Not found. Insert new tracking element.
				4774	*/
				4775	if (t->count >= t->max && !alloc_loc_track(t, 2 * t->max, GFP_ATOMIC))
				4776	return 0;
				4777
				4778	l = t->loc + pos;
				4779	if (pos < t->count)
				4780	memmove(l + 1, l,
				4781	(t->count - pos) * sizeof(struct location));
				4782	t->count++;
				4783	l->count = 1;
				4784	l->addr = track->addr;
				4785	l->sum_time = age;
				4786	l->min_time = age;
				4787	l->max_time = age;
				4788	l->min_pid = track->pid;
				4789	l->max_pid = track->pid;
				4790	cpumask_clear(to_cpumask(l->cpus));
				4791	cpumask_set_cpu(track->cpu, to_cpumask(l->cpus));
				4792	nodes_clear(l->nodes);
				4793	node_set(page_to_nid(virt_to_page(track)), l->nodes);
				4794	return 1;
				4795	}
				4796
				4797	static void process_slab(struct loc_track t, struct kmem_cache s,
				4798	struct page *page, enum track_item alloc,
				4799	unsigned long *map)
				4800	{
				4801	void *addr = page_address(page);
				4802	void *p;
				4803
				4804	bitmap_zero(map, page->objects);
				4805	get_map(s, page, map);
				4806
				4807	for_each_object(p, s, addr, page->objects)
				4808	if (!test_bit(slab_index(p, s, addr), map))
				4809	add_location(t, s, get_track(s, p, alloc));
				4810	}
				4811
				4812	static int list_locations(struct kmem_cache s, char buf,
				4813	enum track_item alloc)
				4814	{
				4815	int len = 0;
				4816	unsigned long i;
				4817	struct loc_track t = { 0, 0, NULL };
				4818	int node;
				4819	struct kmem_cache_node *n;
				4820	unsigned long *map = bitmap_alloc(oo_objects(s->max), GFP_KERNEL);
				4821
				4822	if (!map \|\| !alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
				4823	GFP_KERNEL)) {
				4824	bitmap_free(map);
				4825	return sprintf(buf, "Out of memory\n");
				4826	}
				4827	/* Push back cpu slabs */
				4828	flush_all(s);
				4829
				4830	for_each_kmem_cache_node(s, node, n) {
				4831	unsigned long flags;
				4832	struct page *page;
				4833
				4834	if (!atomic_long_read(&n->nr_slabs))
				4835	continue;
				4836
				4837	spin_lock_irqsave(&n->list_lock, flags);
				4838	list_for_each_entry(page, &n->partial, slab_list)
				4839	process_slab(&t, s, page, alloc, map);
				4840	list_for_each_entry(page, &n->full, slab_list)
				4841	process_slab(&t, s, page, alloc, map);
				4842	spin_unlock_irqrestore(&n->list_lock, flags);
				4843	}
				4844
				4845	for (i = 0; i < t.count; i++) {
				4846	struct location *l = &t.loc[i];
				4847
				4848	if (len > PAGE_SIZE - KSYM_SYMBOL_LEN - 100)
				4849	break;
				4850	len += sprintf(buf + len, "%7ld ", l->count);
				4851
				4852	if (l->addr)
				4853	len += sprintf(buf + len, "%pS", (void *)l->addr);
				4854	else
				4855	len += sprintf(buf + len, "<not-available>");
				4856
				4857	if (l->sum_time != l->min_time) {
				4858	len += sprintf(buf + len, " age=%ld/%ld/%ld",
				4859	l->min_time,
				4860	(long)div_u64(l->sum_time, l->count),
				4861	l->max_time);
				4862	} else
				4863	len += sprintf(buf + len, " age=%ld",
				4864	l->min_time);
				4865
				4866	if (l->min_pid != l->max_pid)
				4867	len += sprintf(buf + len, " pid=%ld-%ld",
				4868	l->min_pid, l->max_pid);
				4869	else
				4870	len += sprintf(buf + len, " pid=%ld",
				4871	l->min_pid);
				4872
				4873	if (num_online_cpus() > 1 &&
				4874	!cpumask_empty(to_cpumask(l->cpus)) &&
				4875	len < PAGE_SIZE - 60)
				4876	len += scnprintf(buf + len, PAGE_SIZE - len - 50,
				4877	" cpus=%*pbl",
				4878	cpumask_pr_args(to_cpumask(l->cpus)));
				4879
				4880	if (nr_online_nodes > 1 && !nodes_empty(l->nodes) &&
				4881	len < PAGE_SIZE - 60)
				4882	len += scnprintf(buf + len, PAGE_SIZE - len - 50,
				4883	" nodes=%*pbl",
				4884	nodemask_pr_args(&l->nodes));
				4885
				4886	len += sprintf(buf + len, "\n");
				4887	}
				4888
				4889	free_loc_track(&t);
				4890	bitmap_free(map);
				4891	if (!t.count)
				4892	len += sprintf(buf, "No data\n");
				4893	return len;
				4894	}
				4895	#endif /* CONFIG_SLUB_DEBUG */
				4896
				4897	#ifdef SLUB_RESILIENCY_TEST
				4898	static void __init resiliency_test(void)
				4899	{
				4900	u8 *p;
				4901	int type = KMALLOC_NORMAL;
				4902
				4903	BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 \|\| KMALLOC_SHIFT_HIGH < 10);
				4904
				4905	pr_err("SLUB resiliency testing\n");
				4906	pr_err("-----------------------\n");
				4907	pr_err("A. Corruption after allocation\n");
				4908
				4909	p = kzalloc(16, GFP_KERNEL);
				4910	p[16] = 0x12;
				4911	pr_err("\n1. kmalloc-16: Clobber Redzone/next pointer 0x12->0x%p\n\n",
				4912	p + 16);
				4913
				4914	validate_slab_cache(kmalloc_caches[type][4]);
				4915
				4916	/* Hmmm... The next two are dangerous */
				4917	p = kzalloc(32, GFP_KERNEL);
				4918	p[32 + sizeof(void *)] = 0x34;
				4919	pr_err("\n2. kmalloc-32: Clobber next pointer/next slab 0x34 -> -0x%p\n",
				4920	p);
				4921	pr_err("If allocated object is overwritten then not detectable\n\n");
				4922
				4923	validate_slab_cache(kmalloc_caches[type][5]);
				4924	p = kzalloc(64, GFP_KERNEL);
				4925	p += 64 + (get_cycles() & 0xff) * sizeof(void *);
				4926	*p = 0x56;
				4927	pr_err("\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n",
				4928	p);
				4929	pr_err("If allocated object is overwritten then not detectable\n\n");
				4930	validate_slab_cache(kmalloc_caches[type][6]);
				4931
				4932	pr_err("\nB. Corruption after free\n");
				4933	p = kzalloc(128, GFP_KERNEL);
				4934	kfree(p);
				4935	*p = 0x78;
				4936	pr_err("1. kmalloc-128: Clobber first word 0x78->0x%p\n\n", p);
				4937	validate_slab_cache(kmalloc_caches[type][7]);
				4938
				4939	p = kzalloc(256, GFP_KERNEL);
				4940	kfree(p);
				4941	p[50] = 0x9a;
				4942	pr_err("\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n", p);
				4943	validate_slab_cache(kmalloc_caches[type][8]);
				4944
				4945	p = kzalloc(512, GFP_KERNEL);
				4946	kfree(p);
				4947	p[512] = 0xab;
				4948	pr_err("\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p);
				4949	validate_slab_cache(kmalloc_caches[type][9]);
				4950	}
				4951	#else
				4952	#ifdef CONFIG_SYSFS
				4953	static void resiliency_test(void) {};
				4954	#endif
				4955	#endif /* SLUB_RESILIENCY_TEST */
				4956
				4957	#ifdef CONFIG_SYSFS
				4958	enum slab_stat_type {
				4959	SL_ALL, /* All slabs */
				4960	SL_PARTIAL, /* Only partially allocated slabs */
				4961	SL_CPU, /* Only slabs used for cpu caches */
				4962	SL_OBJECTS, /* Determine allocated objects not slabs */
				4963	SL_TOTAL /* Determine object capacity not slabs */
				4964	};
				4965
				4966	#define SO_ALL (1 << SL_ALL)
				4967	#define SO_PARTIAL (1 << SL_PARTIAL)
				4968	#define SO_CPU (1 << SL_CPU)
				4969	#define SO_OBJECTS (1 << SL_OBJECTS)
				4970	#define SO_TOTAL (1 << SL_TOTAL)
				4971
				4972	#ifdef CONFIG_MEMCG
				4973	static bool memcg_sysfs_enabled = IS_ENABLED(CONFIG_SLUB_MEMCG_SYSFS_ON);
				4974
				4975	static int __init setup_slub_memcg_sysfs(char *str)
				4976	{
				4977	int v;
				4978
				4979	if (get_option(&str, &v) > 0)
				4980	memcg_sysfs_enabled = v;
				4981
				4982	return 1;
				4983	}
				4984
				4985	__setup("slub_memcg_sysfs=", setup_slub_memcg_sysfs);
				4986	#endif
				4987
				4988	static ssize_t show_slab_objects(struct kmem_cache *s,
				4989	char *buf, unsigned long flags)
				4990	{
				4991	unsigned long total = 0;
				4992	int node;
				4993	int x;
				4994	unsigned long *nodes;
				4995
				4996	nodes = kcalloc(nr_node_ids, sizeof(unsigned long), GFP_KERNEL);
				4997	if (!nodes)
				4998	return -ENOMEM;
				4999
				5000	if (flags & SO_CPU) {
				5001	int cpu;
				5002
				5003	for_each_possible_cpu(cpu) {
				5004	struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab,
				5005	cpu);
				5006	int node;
				5007	struct page *page;
				5008
				5009	page = READ_ONCE(c->page);
				5010	if (!page)
				5011	continue;
				5012
				5013	node = page_to_nid(page);
				5014	if (flags & SO_TOTAL)
				5015	x = page->objects;
				5016	else if (flags & SO_OBJECTS)
				5017	x = page->inuse;
				5018	else
				5019	x = 1;
				5020
				5021	total += x;
				5022	nodes[node] += x;
				5023
				5024	page = slub_percpu_partial_read_once(c);
				5025	if (page) {
				5026	node = page_to_nid(page);
				5027	if (flags & SO_TOTAL)
				5028	WARN_ON_ONCE(1);
				5029	else if (flags & SO_OBJECTS)
				5030	WARN_ON_ONCE(1);
				5031	else
				5032	x = page->pages;
				5033	total += x;
				5034	nodes[node] += x;
				5035	}
				5036	}
				5037	}
				5038
				5039	/*
				5040	* It is impossible to take "mem_hotplug_lock" here with "kernfs_mutex"
				5041	* already held which will conflict with an existing lock order:
				5042	*
				5043	* mem_hotplug_lock->slab_mutex->kernfs_mutex
				5044	*
				5045	* We don't really need mem_hotplug_lock (to hold off
				5046	* slab_mem_going_offline_callback) here because slab's memory hot
				5047	* unplug code doesn't destroy the kmem_cache->node[] data.
				5048	*/
				5049
				5050	#ifdef CONFIG_SLUB_DEBUG
				5051	if (flags & SO_ALL) {
				5052	struct kmem_cache_node *n;
				5053
				5054	for_each_kmem_cache_node(s, node, n) {
				5055
				5056	if (flags & SO_TOTAL)
				5057	x = atomic_long_read(&n->total_objects);
				5058	else if (flags & SO_OBJECTS)
				5059	x = atomic_long_read(&n->total_objects) -
				5060	count_partial(n, count_free);
				5061	else
				5062	x = atomic_long_read(&n->nr_slabs);
				5063	total += x;
				5064	nodes[node] += x;
				5065	}
				5066
				5067	} else
				5068	#endif
				5069	if (flags & SO_PARTIAL) {
				5070	struct kmem_cache_node *n;
				5071
				5072	for_each_kmem_cache_node(s, node, n) {
				5073	if (flags & SO_TOTAL)
				5074	x = count_partial(n, count_total);
				5075	else if (flags & SO_OBJECTS)
				5076	x = count_partial(n, count_inuse);
				5077	else
				5078	x = n->nr_partial;
				5079	total += x;
				5080	nodes[node] += x;
				5081	}
				5082	}
				5083	x = sprintf(buf, "%lu", total);
				5084	#ifdef CONFIG_NUMA
				5085	for (node = 0; node < nr_node_ids; node++)
				5086	if (nodes[node])
				5087	x += sprintf(buf + x, " N%d=%lu",
				5088	node, nodes[node]);
				5089	#endif
				5090	kfree(nodes);
				5091	return x + sprintf(buf + x, "\n");
				5092	}
				5093
				5094	#ifdef CONFIG_SLUB_DEBUG
				5095	static int any_slab_objects(struct kmem_cache *s)
				5096	{
				5097	int node;
				5098	struct kmem_cache_node *n;
				5099
				5100	for_each_kmem_cache_node(s, node, n)
				5101	if (atomic_long_read(&n->total_objects))
				5102	return 1;
				5103
				5104	return 0;
				5105	}
				5106	#endif
				5107
				5108	#define to_slab_attr(n) container_of(n, struct slab_attribute, attr)
				5109	#define to_slab(n) container_of(n, struct kmem_cache, kobj)
				5110
				5111	struct slab_attribute {
				5112	struct attribute attr;
				5113	ssize_t (show)(struct kmem_cache s, char *buf);
				5114	ssize_t (store)(struct kmem_cache s, const char *x, size_t count);
				5115	};
				5116
				5117	#define SLAB_ATTR_RO(_name) \
				5118	static struct slab_attribute _name##_attr = \
				5119	__ATTR(_name, 0400, _name##_show, NULL)
				5120
				5121	#define SLAB_ATTR(_name) \
				5122	static struct slab_attribute _name##_attr = \
				5123	__ATTR(_name, 0600, _name##_show, _name##_store)
				5124
				5125	static ssize_t slab_size_show(struct kmem_cache s, char buf)
				5126	{
				5127	return sprintf(buf, "%u\n", s->size);
				5128	}
				5129	SLAB_ATTR_RO(slab_size);
				5130
				5131	static ssize_t align_show(struct kmem_cache s, char buf)
				5132	{
				5133	return sprintf(buf, "%u\n", s->align);
				5134	}
				5135	SLAB_ATTR_RO(align);
				5136
				5137	static ssize_t object_size_show(struct kmem_cache s, char buf)
				5138	{
				5139	return sprintf(buf, "%u\n", s->object_size);
				5140	}
				5141	SLAB_ATTR_RO(object_size);
				5142
				5143	static ssize_t objs_per_slab_show(struct kmem_cache s, char buf)
				5144	{
				5145	return sprintf(buf, "%u\n", oo_objects(s->oo));
				5146	}
				5147	SLAB_ATTR_RO(objs_per_slab);
				5148
				5149	static ssize_t order_store(struct kmem_cache *s,
				5150	const char *buf, size_t length)
				5151	{
				5152	unsigned int order;
				5153	int err;
				5154
				5155	err = kstrtouint(buf, 10, &order);
				5156	if (err)
				5157	return err;
				5158
				5159	if (order > slub_max_order \|\| order < slub_min_order)
				5160	return -EINVAL;
				5161
				5162	calculate_sizes(s, order);
				5163	return length;
				5164	}
				5165
				5166	static ssize_t order_show(struct kmem_cache s, char buf)
				5167	{
				5168	return sprintf(buf, "%u\n", oo_order(s->oo));
				5169	}
				5170	SLAB_ATTR(order);
				5171
				5172	static ssize_t min_partial_show(struct kmem_cache s, char buf)
				5173	{
				5174	return sprintf(buf, "%lu\n", s->min_partial);
				5175	}
				5176
				5177	static ssize_t min_partial_store(struct kmem_cache s, const char buf,
				5178	size_t length)
				5179	{
				5180	unsigned long min;
				5181	int err;
				5182
				5183	err = kstrtoul(buf, 10, &min);
				5184	if (err)
				5185	return err;
				5186
				5187	set_min_partial(s, min);
				5188	return length;
				5189	}
				5190	SLAB_ATTR(min_partial);
				5191
				5192	static ssize_t cpu_partial_show(struct kmem_cache s, char buf)
				5193	{
				5194	return sprintf(buf, "%u\n", slub_cpu_partial(s));
				5195	}
				5196
				5197	static ssize_t cpu_partial_store(struct kmem_cache s, const char buf,
				5198	size_t length)
				5199	{
				5200	unsigned int objects;
				5201	int err;
				5202
				5203	err = kstrtouint(buf, 10, &objects);
				5204	if (err)
				5205	return err;
				5206	if (objects && !kmem_cache_has_cpu_partial(s))
				5207	return -EINVAL;
				5208
				5209	slub_set_cpu_partial(s, objects);
				5210	flush_all(s);
				5211	return length;
				5212	}
				5213	SLAB_ATTR(cpu_partial);
				5214
				5215	static ssize_t ctor_show(struct kmem_cache s, char buf)
				5216	{
				5217	if (!s->ctor)
				5218	return 0;
				5219	return sprintf(buf, "%pS\n", s->ctor);
				5220	}
				5221	SLAB_ATTR_RO(ctor);
				5222
				5223	static ssize_t aliases_show(struct kmem_cache s, char buf)
				5224	{
				5225	return sprintf(buf, "%d\n", s->refcount < 0 ? 0 : s->refcount - 1);
				5226	}
				5227	SLAB_ATTR_RO(aliases);
				5228
				5229	static ssize_t partial_show(struct kmem_cache s, char buf)
				5230	{
				5231	return show_slab_objects(s, buf, SO_PARTIAL);
				5232	}
				5233	SLAB_ATTR_RO(partial);
				5234
				5235	static ssize_t cpu_slabs_show(struct kmem_cache s, char buf)
				5236	{
				5237	return show_slab_objects(s, buf, SO_CPU);
				5238	}
				5239	SLAB_ATTR_RO(cpu_slabs);
				5240
				5241	static ssize_t objects_show(struct kmem_cache s, char buf)
				5242	{
				5243	return show_slab_objects(s, buf, SO_ALL\|SO_OBJECTS);
				5244	}
				5245	SLAB_ATTR_RO(objects);
				5246
				5247	static ssize_t objects_partial_show(struct kmem_cache s, char buf)
				5248	{
				5249	return show_slab_objects(s, buf, SO_PARTIAL\|SO_OBJECTS);
				5250	}
				5251	SLAB_ATTR_RO(objects_partial);
				5252
				5253	static ssize_t slabs_cpu_partial_show(struct kmem_cache s, char buf)
				5254	{
				5255	int objects = 0;
				5256	int pages = 0;
				5257	int cpu;
				5258	int len;
				5259
				5260	for_each_online_cpu(cpu) {
				5261	struct page *page;
				5262
				5263	page = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu));
				5264
				5265	if (page) {
				5266	pages += page->pages;
				5267	objects += page->pobjects;
				5268	}
				5269	}
				5270
				5271	len = sprintf(buf, "%d(%d)", objects, pages);
				5272
				5273	#ifdef CONFIG_SMP
				5274	for_each_online_cpu(cpu) {
				5275	struct page *page;
				5276
				5277	page = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu));
				5278
				5279	if (page && len < PAGE_SIZE - 20)
				5280	len += sprintf(buf + len, " C%d=%d(%d)", cpu,
				5281	page->pobjects, page->pages);
				5282	}
				5283	#endif
				5284	return len + sprintf(buf + len, "\n");
				5285	}
				5286	SLAB_ATTR_RO(slabs_cpu_partial);
				5287
				5288	static ssize_t reclaim_account_show(struct kmem_cache s, char buf)
				5289	{
				5290	return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT));
				5291	}
				5292
				5293	static ssize_t reclaim_account_store(struct kmem_cache *s,
				5294	const char *buf, size_t length)
				5295	{
				5296	s->flags &= ~SLAB_RECLAIM_ACCOUNT;
				5297	if (buf[0] == '1')
				5298	s->flags \|= SLAB_RECLAIM_ACCOUNT;
				5299	return length;
				5300	}
				5301	SLAB_ATTR(reclaim_account);
				5302
				5303	static ssize_t hwcache_align_show(struct kmem_cache s, char buf)
				5304	{
				5305	return sprintf(buf, "%d\n", !!(s->flags & SLAB_HWCACHE_ALIGN));
				5306	}
				5307	SLAB_ATTR_RO(hwcache_align);
				5308
				5309	#ifdef CONFIG_ZONE_DMA
				5310	static ssize_t cache_dma_show(struct kmem_cache s, char buf)
				5311	{
				5312	return sprintf(buf, "%d\n", !!(s->flags & SLAB_CACHE_DMA));
				5313	}
				5314	SLAB_ATTR_RO(cache_dma);
				5315	#endif
				5316
				5317	static ssize_t usersize_show(struct kmem_cache s, char buf)
				5318	{
				5319	return sprintf(buf, "%u\n", s->usersize);
				5320	}
				5321	SLAB_ATTR_RO(usersize);
				5322
				5323	static ssize_t destroy_by_rcu_show(struct kmem_cache s, char buf)
				5324	{
				5325	return sprintf(buf, "%d\n", !!(s->flags & SLAB_TYPESAFE_BY_RCU));
				5326	}
				5327	SLAB_ATTR_RO(destroy_by_rcu);
				5328
				5329	#ifdef CONFIG_SLUB_DEBUG
				5330	static ssize_t slabs_show(struct kmem_cache s, char buf)
				5331	{
				5332	return show_slab_objects(s, buf, SO_ALL);
				5333	}
				5334	SLAB_ATTR_RO(slabs);
				5335
				5336	static ssize_t total_objects_show(struct kmem_cache s, char buf)
				5337	{
				5338	return show_slab_objects(s, buf, SO_ALL\|SO_TOTAL);
				5339	}
				5340	SLAB_ATTR_RO(total_objects);
				5341
				5342	static ssize_t sanity_checks_show(struct kmem_cache s, char buf)
				5343	{
				5344	return sprintf(buf, "%d\n", !!(s->flags & SLAB_CONSISTENCY_CHECKS));
				5345	}
				5346
				5347	static ssize_t sanity_checks_store(struct kmem_cache *s,
				5348	const char *buf, size_t length)
				5349	{
				5350	s->flags &= ~SLAB_CONSISTENCY_CHECKS;
				5351	if (buf[0] == '1') {
				5352	s->flags &= ~__CMPXCHG_DOUBLE;
				5353	s->flags \|= SLAB_CONSISTENCY_CHECKS;
				5354	}
				5355	return length;
				5356	}
				5357	SLAB_ATTR(sanity_checks);
				5358
				5359	static ssize_t trace_show(struct kmem_cache s, char buf)
				5360	{
				5361	return sprintf(buf, "%d\n", !!(s->flags & SLAB_TRACE));
				5362	}
				5363
				5364	static ssize_t trace_store(struct kmem_cache s, const char buf,
				5365	size_t length)
				5366	{
				5367	/*
				5368	* Tracing a merged cache is going to give confusing results
				5369	* as well as cause other issues like converting a mergeable
				5370	* cache into an umergeable one.
				5371	*/
				5372	if (s->refcount > 1)
				5373	return -EINVAL;
				5374
				5375	s->flags &= ~SLAB_TRACE;
				5376	if (buf[0] == '1') {
				5377	s->flags &= ~__CMPXCHG_DOUBLE;
				5378	s->flags \|= SLAB_TRACE;
				5379	}
				5380	return length;
				5381	}
				5382	SLAB_ATTR(trace);
				5383
				5384	static ssize_t red_zone_show(struct kmem_cache s, char buf)
				5385	{
				5386	return sprintf(buf, "%d\n", !!(s->flags & SLAB_RED_ZONE));
				5387	}
				5388
				5389	static ssize_t red_zone_store(struct kmem_cache *s,
				5390	const char *buf, size_t length)
				5391	{
				5392	if (any_slab_objects(s))
				5393	return -EBUSY;
				5394
				5395	s->flags &= ~SLAB_RED_ZONE;
				5396	if (buf[0] == '1') {
				5397	s->flags \|= SLAB_RED_ZONE;
				5398	}
				5399	calculate_sizes(s, -1);
				5400	return length;
				5401	}
				5402	SLAB_ATTR(red_zone);
				5403
				5404	static ssize_t poison_show(struct kmem_cache s, char buf)
				5405	{
				5406	return sprintf(buf, "%d\n", !!(s->flags & SLAB_POISON));
				5407	}
				5408
				5409	static ssize_t poison_store(struct kmem_cache *s,
				5410	const char *buf, size_t length)
				5411	{
				5412	if (any_slab_objects(s))
				5413	return -EBUSY;
				5414
				5415	s->flags &= ~SLAB_POISON;
				5416	if (buf[0] == '1') {
				5417	s->flags \|= SLAB_POISON;
				5418	}
				5419	calculate_sizes(s, -1);
				5420	return length;
				5421	}
				5422	SLAB_ATTR(poison);
				5423
				5424	static ssize_t store_user_show(struct kmem_cache s, char buf)
				5425	{
				5426	return sprintf(buf, "%d\n", !!(s->flags & SLAB_STORE_USER));
				5427	}
				5428
				5429	static ssize_t store_user_store(struct kmem_cache *s,
				5430	const char *buf, size_t length)
				5431	{
				5432	if (any_slab_objects(s))
				5433	return -EBUSY;
				5434
				5435	s->flags &= ~SLAB_STORE_USER;
				5436	if (buf[0] == '1') {
				5437	s->flags &= ~__CMPXCHG_DOUBLE;
				5438	s->flags \|= SLAB_STORE_USER;
				5439	}
				5440	calculate_sizes(s, -1);
				5441	return length;
				5442	}
				5443	SLAB_ATTR(store_user);
				5444
				5445	static ssize_t validate_show(struct kmem_cache s, char buf)
				5446	{
				5447	return 0;
				5448	}
				5449
				5450	static ssize_t validate_store(struct kmem_cache *s,
				5451	const char *buf, size_t length)
				5452	{
				5453	int ret = -EINVAL;
				5454
				5455	if (buf[0] == '1') {
				5456	ret = validate_slab_cache(s);
				5457	if (ret >= 0)
				5458	ret = length;
				5459	}
				5460	return ret;
				5461	}
				5462	SLAB_ATTR(validate);
				5463
				5464	static ssize_t alloc_calls_show(struct kmem_cache s, char buf)
				5465	{
				5466	if (!(s->flags & SLAB_STORE_USER))
				5467	return -ENOSYS;
				5468	return list_locations(s, buf, TRACK_ALLOC);
				5469	}
				5470	SLAB_ATTR_RO(alloc_calls);
				5471
				5472	static ssize_t free_calls_show(struct kmem_cache s, char buf)
				5473	{
				5474	if (!(s->flags & SLAB_STORE_USER))
				5475	return -ENOSYS;
				5476	return list_locations(s, buf, TRACK_FREE);
				5477	}
				5478	SLAB_ATTR_RO(free_calls);
				5479	#endif /* CONFIG_SLUB_DEBUG */
				5480
				5481	#ifdef CONFIG_FAILSLAB
				5482	static ssize_t failslab_show(struct kmem_cache s, char buf)
				5483	{
				5484	return sprintf(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB));
				5485	}
				5486
				5487	static ssize_t failslab_store(struct kmem_cache s, const char buf,
				5488	size_t length)
				5489	{
				5490	if (s->refcount > 1)
				5491	return -EINVAL;
				5492
				5493	s->flags &= ~SLAB_FAILSLAB;
				5494	if (buf[0] == '1')
				5495	s->flags \|= SLAB_FAILSLAB;
				5496	return length;
				5497	}
				5498	SLAB_ATTR(failslab);
				5499	#endif
				5500
				5501	static ssize_t shrink_show(struct kmem_cache s, char buf)
				5502	{
				5503	return 0;
				5504	}
				5505
				5506	static ssize_t shrink_store(struct kmem_cache *s,
				5507	const char *buf, size_t length)
				5508	{
				5509	if (buf[0] == '1')
				5510	kmem_cache_shrink_all(s);
				5511	else
				5512	return -EINVAL;
				5513	return length;
				5514	}
				5515	SLAB_ATTR(shrink);
				5516
				5517	#ifdef CONFIG_NUMA
				5518	static ssize_t remote_node_defrag_ratio_show(struct kmem_cache s, char buf)
				5519	{
				5520	return sprintf(buf, "%u\n", s->remote_node_defrag_ratio / 10);
				5521	}
				5522
				5523	static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s,
				5524	const char *buf, size_t length)
				5525	{
				5526	unsigned int ratio;
				5527	int err;
				5528
				5529	err = kstrtouint(buf, 10, &ratio);
				5530	if (err)
				5531	return err;
				5532	if (ratio > 100)
				5533	return -ERANGE;
				5534
				5535	s->remote_node_defrag_ratio = ratio * 10;
				5536
				5537	return length;
				5538	}
				5539	SLAB_ATTR(remote_node_defrag_ratio);
				5540	#endif
				5541
				5542	#ifdef CONFIG_SLUB_STATS
				5543	static int show_stat(struct kmem_cache s, char buf, enum stat_item si)
				5544	{
				5545	unsigned long sum = 0;
				5546	int cpu;
				5547	int len;
				5548	int *data = kmalloc_array(nr_cpu_ids, sizeof(int), GFP_KERNEL);
				5549
				5550	if (!data)
				5551	return -ENOMEM;
				5552
				5553	for_each_online_cpu(cpu) {
				5554	unsigned x = per_cpu_ptr(s->cpu_slab, cpu)->stat[si];
				5555
				5556	data[cpu] = x;
				5557	sum += x;
				5558	}
				5559
				5560	len = sprintf(buf, "%lu", sum);
				5561
				5562	#ifdef CONFIG_SMP
				5563	for_each_online_cpu(cpu) {
				5564	if (data[cpu] && len < PAGE_SIZE - 20)
				5565	len += sprintf(buf + len, " C%d=%u", cpu, data[cpu]);
				5566	}
				5567	#endif
				5568	kfree(data);
				5569	return len + sprintf(buf + len, "\n");
				5570	}
				5571
				5572	static void clear_stat(struct kmem_cache *s, enum stat_item si)
				5573	{
				5574	int cpu;
				5575
				5576	for_each_online_cpu(cpu)
				5577	per_cpu_ptr(s->cpu_slab, cpu)->stat[si] = 0;
				5578	}
				5579
				5580	#define STAT_ATTR(si, text) \
				5581	static ssize_t text##_show(struct kmem_cache s, char buf) \
				5582	{ \
				5583	return show_stat(s, buf, si); \
				5584	} \
				5585	static ssize_t text##_store(struct kmem_cache *s, \
				5586	const char *buf, size_t length) \
				5587	{ \
				5588	if (buf[0] != '0') \
				5589	return -EINVAL; \
				5590	clear_stat(s, si); \
				5591	return length; \
				5592	} \
				5593	SLAB_ATTR(text); \
				5594
				5595	STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath);
				5596	STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath);
				5597	STAT_ATTR(FREE_FASTPATH, free_fastpath);
				5598	STAT_ATTR(FREE_SLOWPATH, free_slowpath);
				5599	STAT_ATTR(FREE_FROZEN, free_frozen);
				5600	STAT_ATTR(FREE_ADD_PARTIAL, free_add_partial);
				5601	STAT_ATTR(FREE_REMOVE_PARTIAL, free_remove_partial);
				5602	STAT_ATTR(ALLOC_FROM_PARTIAL, alloc_from_partial);
				5603	STAT_ATTR(ALLOC_SLAB, alloc_slab);
				5604	STAT_ATTR(ALLOC_REFILL, alloc_refill);
				5605	STAT_ATTR(ALLOC_NODE_MISMATCH, alloc_node_mismatch);
				5606	STAT_ATTR(FREE_SLAB, free_slab);
				5607	STAT_ATTR(CPUSLAB_FLUSH, cpuslab_flush);
				5608	STAT_ATTR(DEACTIVATE_FULL, deactivate_full);
				5609	STAT_ATTR(DEACTIVATE_EMPTY, deactivate_empty);
				5610	STAT_ATTR(DEACTIVATE_TO_HEAD, deactivate_to_head);
				5611	STAT_ATTR(DEACTIVATE_TO_TAIL, deactivate_to_tail);
				5612	STAT_ATTR(DEACTIVATE_REMOTE_FREES, deactivate_remote_frees);
				5613	STAT_ATTR(DEACTIVATE_BYPASS, deactivate_bypass);
				5614	STAT_ATTR(ORDER_FALLBACK, order_fallback);
				5615	STAT_ATTR(CMPXCHG_DOUBLE_CPU_FAIL, cmpxchg_double_cpu_fail);
				5616	STAT_ATTR(CMPXCHG_DOUBLE_FAIL, cmpxchg_double_fail);
				5617	STAT_ATTR(CPU_PARTIAL_ALLOC, cpu_partial_alloc);
				5618	STAT_ATTR(CPU_PARTIAL_FREE, cpu_partial_free);
				5619	STAT_ATTR(CPU_PARTIAL_NODE, cpu_partial_node);
				5620	STAT_ATTR(CPU_PARTIAL_DRAIN, cpu_partial_drain);
				5621	#endif /* CONFIG_SLUB_STATS */
				5622
				5623	static struct attribute *slab_attrs[] = {
				5624	&slab_size_attr.attr,
				5625	&object_size_attr.attr,
				5626	&objs_per_slab_attr.attr,
				5627	&order_attr.attr,
				5628	&min_partial_attr.attr,
				5629	&cpu_partial_attr.attr,
				5630	&objects_attr.attr,
				5631	&objects_partial_attr.attr,
				5632	&partial_attr.attr,
				5633	&cpu_slabs_attr.attr,
				5634	&ctor_attr.attr,
				5635	&aliases_attr.attr,
				5636	&align_attr.attr,
				5637	&hwcache_align_attr.attr,
				5638	&reclaim_account_attr.attr,
				5639	&destroy_by_rcu_attr.attr,
				5640	&shrink_attr.attr,
				5641	&slabs_cpu_partial_attr.attr,
				5642	#ifdef CONFIG_SLUB_DEBUG
				5643	&total_objects_attr.attr,
				5644	&slabs_attr.attr,
				5645	&sanity_checks_attr.attr,
				5646	&trace_attr.attr,
				5647	&red_zone_attr.attr,
				5648	&poison_attr.attr,
				5649	&store_user_attr.attr,
				5650	&validate_attr.attr,
				5651	&alloc_calls_attr.attr,
				5652	&free_calls_attr.attr,
				5653	#endif
				5654	#ifdef CONFIG_ZONE_DMA
				5655	&cache_dma_attr.attr,
				5656	#endif
				5657	#ifdef CONFIG_NUMA
				5658	&remote_node_defrag_ratio_attr.attr,
				5659	#endif
				5660	#ifdef CONFIG_SLUB_STATS
				5661	&alloc_fastpath_attr.attr,
				5662	&alloc_slowpath_attr.attr,
				5663	&free_fastpath_attr.attr,
				5664	&free_slowpath_attr.attr,
				5665	&free_frozen_attr.attr,
				5666	&free_add_partial_attr.attr,
				5667	&free_remove_partial_attr.attr,
				5668	&alloc_from_partial_attr.attr,
				5669	&alloc_slab_attr.attr,
				5670	&alloc_refill_attr.attr,
				5671	&alloc_node_mismatch_attr.attr,
				5672	&free_slab_attr.attr,
				5673	&cpuslab_flush_attr.attr,
				5674	&deactivate_full_attr.attr,
				5675	&deactivate_empty_attr.attr,
				5676	&deactivate_to_head_attr.attr,
				5677	&deactivate_to_tail_attr.attr,
				5678	&deactivate_remote_frees_attr.attr,
				5679	&deactivate_bypass_attr.attr,
				5680	&order_fallback_attr.attr,
				5681	&cmpxchg_double_fail_attr.attr,
				5682	&cmpxchg_double_cpu_fail_attr.attr,
				5683	&cpu_partial_alloc_attr.attr,
				5684	&cpu_partial_free_attr.attr,
				5685	&cpu_partial_node_attr.attr,
				5686	&cpu_partial_drain_attr.attr,
				5687	#endif
				5688	#ifdef CONFIG_FAILSLAB
				5689	&failslab_attr.attr,
				5690	#endif
				5691	&usersize_attr.attr,
				5692
				5693	NULL
				5694	};
				5695
				5696	static const struct attribute_group slab_attr_group = {
				5697	.attrs = slab_attrs,
				5698	};
				5699
				5700	static ssize_t slab_attr_show(struct kobject *kobj,
				5701	struct attribute *attr,
				5702	char *buf)
				5703	{
				5704	struct slab_attribute *attribute;
				5705	struct kmem_cache *s;
				5706	int err;
				5707
				5708	attribute = to_slab_attr(attr);
				5709	s = to_slab(kobj);
				5710
				5711	if (!attribute->show)
				5712	return -EIO;
				5713
				5714	err = attribute->show(s, buf);
				5715
				5716	return err;
				5717	}
				5718
				5719	static ssize_t slab_attr_store(struct kobject *kobj,
				5720	struct attribute *attr,
				5721	const char *buf, size_t len)
				5722	{
				5723	struct slab_attribute *attribute;
				5724	struct kmem_cache *s;
				5725	int err;
				5726
				5727	attribute = to_slab_attr(attr);
				5728	s = to_slab(kobj);
				5729
				5730	if (!attribute->store)
				5731	return -EIO;
				5732
				5733	err = attribute->store(s, buf, len);
				5734	#ifdef CONFIG_MEMCG
				5735	if (slab_state >= FULL && err >= 0 && is_root_cache(s)) {
				5736	struct kmem_cache *c;
				5737
				5738	mutex_lock(&slab_mutex);
				5739	if (s->max_attr_size < len)
				5740	s->max_attr_size = len;
				5741
				5742	/*
				5743	* This is a best effort propagation, so this function's return
				5744	* value will be determined by the parent cache only. This is
				5745	* basically because not all attributes will have a well
				5746	* defined semantics for rollbacks - most of the actions will
				5747	* have permanent effects.
				5748	*
				5749	* Returning the error value of any of the children that fail
				5750	* is not 100 % defined, in the sense that users seeing the
				5751	* error code won't be able to know anything about the state of
				5752	* the cache.
				5753	*
				5754	* Only returning the error code for the parent cache at least
				5755	* has well defined semantics. The cache being written to
				5756	* directly either failed or succeeded, in which case we loop
				5757	* through the descendants with best-effort propagation.
				5758	*/
				5759	for_each_memcg_cache(c, s)
				5760	attribute->store(c, buf, len);
				5761	mutex_unlock(&slab_mutex);
				5762	}
				5763	#endif
				5764	return err;
				5765	}
				5766
				5767	static void memcg_propagate_slab_attrs(struct kmem_cache *s)
				5768	{
				5769	#ifdef CONFIG_MEMCG
				5770	int i;
				5771	char *buffer = NULL;
				5772	struct kmem_cache *root_cache;
				5773
				5774	if (is_root_cache(s))
				5775	return;
				5776
				5777	root_cache = s->memcg_params.root_cache;
				5778
				5779	/*
				5780	* This mean this cache had no attribute written. Therefore, no point
				5781	* in copying default values around
				5782	*/
				5783	if (!root_cache->max_attr_size)
				5784	return;
				5785
				5786	for (i = 0; i < ARRAY_SIZE(slab_attrs); i++) {
				5787	char mbuf[64];
				5788	char *buf;
				5789	struct slab_attribute *attr = to_slab_attr(slab_attrs[i]);
				5790	ssize_t len;
				5791
				5792	if (!attr \|\| !attr->store \|\| !attr->show)
				5793	continue;
				5794
				5795	/*
				5796	* It is really bad that we have to allocate here, so we will
				5797	* do it only as a fallback. If we actually allocate, though,
				5798	* we can just use the allocated buffer until the end.
				5799	*
				5800	* Most of the slub attributes will tend to be very small in
				5801	* size, but sysfs allows buffers up to a page, so they can
				5802	* theoretically happen.
				5803	*/
				5804	if (buffer)
				5805	buf = buffer;
				5806	else if (root_cache->max_attr_size < ARRAY_SIZE(mbuf) &&
				5807	!IS_ENABLED(CONFIG_SLUB_STATS))
				5808	buf = mbuf;
				5809	else {
				5810	buffer = (char *) get_zeroed_page(GFP_KERNEL);
				5811	if (WARN_ON(!buffer))
				5812	continue;
				5813	buf = buffer;
				5814	}
				5815
				5816	len = attr->show(root_cache, buf);
				5817	if (len > 0)
				5818	attr->store(s, buf, len);
				5819	}
				5820
				5821	if (buffer)
				5822	free_page((unsigned long)buffer);
				5823	#endif /* CONFIG_MEMCG */
				5824	}
				5825
				5826	static void kmem_cache_release(struct kobject *k)
				5827	{
				5828	slab_kmem_cache_release(to_slab(k));
				5829	}
				5830
				5831	static const struct sysfs_ops slab_sysfs_ops = {
				5832	.show = slab_attr_show,
				5833	.store = slab_attr_store,
				5834	};
				5835
				5836	static struct kobj_type slab_ktype = {
				5837	.sysfs_ops = &slab_sysfs_ops,
				5838	.release = kmem_cache_release,
				5839	};
				5840
				5841	static int uevent_filter(struct kset kset, struct kobject kobj)
				5842	{
				5843	struct kobj_type *ktype = get_ktype(kobj);
				5844
				5845	if (ktype == &slab_ktype)
				5846	return 1;
				5847	return 0;
				5848	}
				5849
				5850	static const struct kset_uevent_ops slab_uevent_ops = {
				5851	.filter = uevent_filter,
				5852	};
				5853
				5854	static struct kset *slab_kset;
				5855
				5856	static inline struct kset cache_kset(struct kmem_cache s)
				5857	{
				5858	#ifdef CONFIG_MEMCG
				5859	if (!is_root_cache(s))
				5860	return s->memcg_params.root_cache->memcg_kset;
				5861	#endif
				5862	return slab_kset;
				5863	}
				5864
				5865	#define ID_STR_LENGTH 64
				5866
				5867	/* Create a unique string id for a slab cache:
				5868	*
				5869	* Format :[flags-]size
				5870	*/
				5871	static char create_unique_id(struct kmem_cache s)
				5872	{
				5873	char *name = kmalloc(ID_STR_LENGTH, GFP_KERNEL);
				5874	char *p = name;
				5875
				5876	if (!name)
				5877	return ERR_PTR(-ENOMEM);
				5878
				5879	*p++ = ':';
				5880	/*
				5881	* First flags affecting slabcache operations. We will only
				5882	* get here for aliasable slabs so we do not need to support
				5883	* too many flags. The flags here must cover all flags that
				5884	* are matched during merging to guarantee that the id is
				5885	* unique.
				5886	*/
				5887	if (s->flags & SLAB_CACHE_DMA)
				5888	*p++ = 'd';
				5889	if (s->flags & SLAB_CACHE_DMA32)
				5890	*p++ = 'D';
				5891	if (s->flags & SLAB_RECLAIM_ACCOUNT)
				5892	*p++ = 'a';
				5893	if (s->flags & SLAB_CONSISTENCY_CHECKS)
				5894	*p++ = 'F';
				5895	if (s->flags & SLAB_ACCOUNT)
				5896	*p++ = 'A';
				5897	if (p != name + 1)
				5898	*p++ = '-';
				5899	p += sprintf(p, "%07u", s->size);
				5900
				5901	BUG_ON(p > name + ID_STR_LENGTH - 1);
				5902	return name;
				5903	}
				5904
				5905	static void sysfs_slab_remove_workfn(struct work_struct *work)
				5906	{
				5907	struct kmem_cache *s =
				5908	container_of(work, struct kmem_cache, kobj_remove_work);
				5909
				5910	if (!s->kobj.state_in_sysfs)
				5911	/*
				5912	* For a memcg cache, this may be called during
				5913	* deactivation and again on shutdown. Remove only once.
				5914	* A cache is never shut down before deactivation is
				5915	* complete, so no need to worry about synchronization.
				5916	*/
				5917	goto out;
				5918
				5919	#ifdef CONFIG_MEMCG
				5920	kset_unregister(s->memcg_kset);
				5921	#endif
				5922	kobject_uevent(&s->kobj, KOBJ_REMOVE);
				5923	out:
				5924	kobject_put(&s->kobj);
				5925	}
				5926
				5927	static int sysfs_slab_add(struct kmem_cache *s)
				5928	{
				5929	int err;
				5930	const char *name;
				5931	struct kset *kset = cache_kset(s);
				5932	int unmergeable = slab_unmergeable(s);
				5933
				5934	INIT_WORK(&s->kobj_remove_work, sysfs_slab_remove_workfn);
				5935
				5936	if (!kset) {
				5937	kobject_init(&s->kobj, &slab_ktype);
				5938	return 0;
				5939	}
				5940
				5941	if (!unmergeable && disable_higher_order_debug &&
				5942	(slub_debug & DEBUG_METADATA_FLAGS))
				5943	unmergeable = 1;
				5944
				5945	if (unmergeable) {
				5946	/*
				5947	* Slabcache can never be merged so we can use the name proper.
				5948	* This is typically the case for debug situations. In that
				5949	* case we can catch duplicate names easily.
				5950	*/
				5951	sysfs_remove_link(&slab_kset->kobj, s->name);
				5952	name = s->name;
				5953	} else {
				5954	/*
				5955	* Create a unique name for the slab as a target
				5956	* for the symlinks.
				5957	*/
				5958	name = create_unique_id(s);
				5959	if (IS_ERR(name))
				5960	return PTR_ERR(name);
				5961	}
				5962
				5963	s->kobj.kset = kset;
				5964	err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, "%s", name);
				5965	if (err)
				5966	goto out;
				5967
				5968	err = sysfs_create_group(&s->kobj, &slab_attr_group);
				5969	if (err)
				5970	goto out_del_kobj;
				5971
				5972	#ifdef CONFIG_MEMCG
				5973	if (is_root_cache(s) && memcg_sysfs_enabled) {
				5974	s->memcg_kset = kset_create_and_add("cgroup", NULL, &s->kobj);
				5975	if (!s->memcg_kset) {
				5976	err = -ENOMEM;
				5977	goto out_del_kobj;
				5978	}
				5979	}
				5980	#endif
				5981
				5982	kobject_uevent(&s->kobj, KOBJ_ADD);
				5983	if (!unmergeable) {
				5984	/* Setup first alias */
				5985	sysfs_slab_alias(s, s->name);
				5986	}
				5987	out:
				5988	if (!unmergeable)
				5989	kfree(name);
				5990	return err;
				5991	out_del_kobj:
				5992	kobject_del(&s->kobj);
				5993	goto out;
				5994	}
				5995
				5996	static void sysfs_slab_remove(struct kmem_cache *s)
				5997	{
				5998	if (slab_state < FULL)
				5999	/*
				6000	* Sysfs has not been setup yet so no need to remove the
				6001	* cache from sysfs.
				6002	*/
				6003	return;
				6004
				6005	kobject_get(&s->kobj);
				6006	schedule_work(&s->kobj_remove_work);
				6007	}
				6008
				6009	void sysfs_slab_unlink(struct kmem_cache *s)
				6010	{
				6011	if (slab_state >= FULL)
				6012	kobject_del(&s->kobj);
				6013	}
				6014
				6015	void sysfs_slab_release(struct kmem_cache *s)
				6016	{
				6017	if (slab_state >= FULL)
				6018	kobject_put(&s->kobj);
				6019	}
				6020
				6021	/*
				6022	* Need to buffer aliases during bootup until sysfs becomes
				6023	* available lest we lose that information.
				6024	*/
				6025	struct saved_alias {
				6026	struct kmem_cache *s;
				6027	const char *name;
				6028	struct saved_alias *next;
				6029	};
				6030
				6031	static struct saved_alias *alias_list;
				6032
				6033	static int sysfs_slab_alias(struct kmem_cache s, const char name)
				6034	{
				6035	struct saved_alias *al;
				6036
				6037	if (slab_state == FULL) {
				6038	/*
				6039	* If we have a leftover link then remove it.
				6040	*/
				6041	sysfs_remove_link(&slab_kset->kobj, name);
				6042	return sysfs_create_link(&slab_kset->kobj, &s->kobj, name);
				6043	}
				6044
				6045	al = kmalloc(sizeof(struct saved_alias), GFP_KERNEL);
				6046	if (!al)
				6047	return -ENOMEM;
				6048
				6049	al->s = s;
				6050	al->name = name;
				6051	al->next = alias_list;
				6052	alias_list = al;
				6053	return 0;
				6054	}
				6055
				6056	static int __init slab_sysfs_init(void)
				6057	{
				6058	struct kmem_cache *s;
				6059	int err;
				6060
				6061	mutex_lock(&slab_mutex);
				6062
				6063	slab_kset = kset_create_and_add("slab", &slab_uevent_ops, kernel_kobj);
				6064	if (!slab_kset) {
				6065	mutex_unlock(&slab_mutex);
				6066	pr_err("Cannot register slab subsystem.\n");
				6067	return -ENOSYS;
				6068	}
				6069
				6070	slab_state = FULL;
				6071
				6072	list_for_each_entry(s, &slab_caches, list) {
				6073	err = sysfs_slab_add(s);
				6074	if (err)
				6075	pr_err("SLUB: Unable to add boot slab %s to sysfs\n",
				6076	s->name);
				6077	}
				6078
				6079	while (alias_list) {
				6080	struct saved_alias *al = alias_list;
				6081
				6082	alias_list = alias_list->next;
				6083	err = sysfs_slab_alias(al->s, al->name);
				6084	if (err)
				6085	pr_err("SLUB: Unable to add boot slab alias %s to sysfs\n",
				6086	al->name);
				6087	kfree(al);
				6088	}
				6089
				6090	mutex_unlock(&slab_mutex);
				6091	resiliency_test();
				6092	return 0;
				6093	}
				6094
				6095	__initcall(slab_sysfs_init);
				6096	#endif /* CONFIG_SYSFS */
				6097
				6098	/*
				6099	* The /proc/slabinfo ABI
				6100	*/
				6101	#ifdef CONFIG_SLUB_DEBUG
				6102	void get_slabinfo(struct kmem_cache s, struct slabinfo sinfo)
				6103	{
				6104	unsigned long nr_slabs = 0;
				6105	unsigned long nr_objs = 0;
				6106	unsigned long nr_free = 0;
				6107	int node;
				6108	struct kmem_cache_node *n;
				6109
				6110	for_each_kmem_cache_node(s, node, n) {
				6111	nr_slabs += node_nr_slabs(n);
				6112	nr_objs += node_nr_objs(n);
				6113	nr_free += count_partial(n, count_free);
				6114	}
				6115
				6116	sinfo->active_objs = nr_objs - nr_free;
				6117	sinfo->num_objs = nr_objs;
				6118	sinfo->active_slabs = nr_slabs;
				6119	sinfo->num_slabs = nr_slabs;
				6120	sinfo->objects_per_slab = oo_objects(s->oo);
				6121	sinfo->cache_order = oo_order(s->oo);
				6122	}
				6123
				6124	void slabinfo_show_stats(struct seq_file m, struct kmem_cache s)
				6125	{
				6126	}
				6127
				6128	ssize_t slabinfo_write(struct file file, const char __user buffer,
				6129	size_t count, loff_t *ppos)
				6130	{
				6131	return -EIO;
				6132	}
				6133	#endif /* CONFIG_SLUB_DEBUG */