Blame - src/kernel/linux/v4.19/mm/slub.c - T800

blob: b6f57c88f25cf214c2f20e150c27fcf68bd91462 [file] [log] [blame]

xj	b04a402	2021-11-25 15:01:52 +0800	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0
				2	/*
				3	* SLUB: A slab allocator that limits cache line use instead of queuing
				4	* objects in per cpu and per node lists.
				5	*
				6	* The allocator synchronizes using per slab locks or atomic operatios
				7	* and only uses a centralized lock to manage a pool of partial slabs.
				8	*
				9	* (C) 2007 SGI, Christoph Lameter
				10	* (C) 2011 Linux Foundation, Christoph Lameter
				11	*/
				12
				13	#include <linux/mm.h>
				14	#include <linux/swap.h> /* struct reclaim_state */
				15	#include <linux/module.h>
				16	#include <linux/bit_spinlock.h>
				17	#include <linux/interrupt.h>
				18	#include <linux/bitops.h>
				19	#include <linux/slab.h>
				20	#include "slab.h"
				21	#include <linux/proc_fs.h>
				22	#include <linux/seq_file.h>
				23	#include <linux/kasan.h>
				24	#include <linux/cpu.h>
				25	#include <linux/cpuset.h>
				26	#include <linux/mempolicy.h>
				27	#include <linux/ctype.h>
				28	#include <linux/debugobjects.h>
				29	#include <linux/kallsyms.h>
				30	#include <linux/memory.h>
				31	#include <linux/math64.h>
				32	#include <linux/fault-inject.h>
				33	#include <linux/stacktrace.h>
				34	#include <linux/prefetch.h>
				35	#include <linux/memcontrol.h>
				36	#include <linux/random.h>
				37
				38	#include <trace/events/kmem.h>
				39
				40	#include "internal.h"
				41
				42	/*
				43	* Lock order:
				44	* 1. slab_mutex (Global Mutex)
				45	* 2. node->list_lock
				46	* 3. slab_lock(page) (Only on some arches and for debugging)
				47	*
				48	* slab_mutex
				49	*
				50	* The role of the slab_mutex is to protect the list of all the slabs
				51	* and to synchronize major metadata changes to slab cache structures.
				52	*
				53	* The slab_lock is only used for debugging and on arches that do not
				54	* have the ability to do a cmpxchg_double. It only protects:
				55	* A. page->freelist -> List of object free in a page
				56	* B. page->inuse -> Number of objects in use
				57	* C. page->objects -> Number of objects in page
				58	* D. page->frozen -> frozen state
				59	*
				60	* If a slab is frozen then it is exempt from list management. It is not
				61	* on any list. The processor that froze the slab is the one who can
				62	* perform list operations on the page. Other processors may put objects
				63	* onto the freelist but the processor that froze the slab is the only
				64	* one that can retrieve the objects from the page's freelist.
				65	*
				66	* The list_lock protects the partial and full list on each node and
				67	* the partial slab counter. If taken then no new slabs may be added or
				68	* removed from the lists nor make the number of partial slabs be modified.
				69	* (Note that the total number of slabs is an atomic value that may be
				70	* modified without taking the list lock).
				71	*
				72	* The list_lock is a centralized lock and thus we avoid taking it as
				73	* much as possible. As long as SLUB does not have to handle partial
				74	* slabs, operations can continue without any centralized lock. F.e.
				75	* allocating a long series of objects that fill up slabs does not require
				76	* the list lock.
				77	* Interrupts are disabled during allocation and deallocation in order to
				78	* make the slab allocator safe to use in the context of an irq. In addition
				79	* interrupts are disabled to ensure that the processor does not change
				80	* while handling per_cpu slabs, due to kernel preemption.
				81	*
				82	* SLUB assigns one slab for allocation to each processor.
				83	* Allocations only occur from these slabs called cpu slabs.
				84	*
				85	* Slabs with free elements are kept on a partial list and during regular
				86	* operations no list for full slabs is used. If an object in a full slab is
				87	* freed then the slab will show up again on the partial lists.
				88	* We track full slabs for debugging purposes though because otherwise we
				89	* cannot scan all objects.
				90	*
				91	* Slabs are freed when they become empty. Teardown and setup is
				92	* minimal so we rely on the page allocators per cpu caches for
				93	* fast frees and allocs.
				94	*
				95	* Overloading of page flags that are otherwise used for LRU management.
				96	*
				97	* PageActive The slab is frozen and exempt from list processing.
				98	* This means that the slab is dedicated to a purpose
				99	* such as satisfying allocations for a specific
				100	* processor. Objects may be freed in the slab while
				101	* it is frozen but slab_free will then skip the usual
				102	* list operations. It is up to the processor holding
				103	* the slab to integrate the slab into the slab lists
				104	* when the slab is no longer needed.
				105	*
				106	* One use of this flag is to mark slabs that are
				107	* used for allocations. Then such a slab becomes a cpu
				108	* slab. The cpu slab may be equipped with an additional
				109	* freelist that allows lockless access to
				110	* free objects in addition to the regular freelist
				111	* that requires the slab lock.
				112	*
				113	* PageError Slab requires special handling due to debug
				114	* options set. This moves slab handling out of
				115	* the fast path and disables lockless freelists.
				116	*/
				117
				118	static inline int kmem_cache_debug(struct kmem_cache *s)
				119	{
				120	#ifdef CONFIG_SLUB_DEBUG
				121	return unlikely(s->flags & SLAB_DEBUG_FLAGS);
				122	#else
				123	return 0;
				124	#endif
				125	}
				126
				127	void fixup_red_left(struct kmem_cache s, void *p)
				128	{
				129	if (kmem_cache_debug(s) && s->flags & SLAB_RED_ZONE)
				130	p += s->red_left_pad;
				131
				132	return p;
				133	}
				134
				135	static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
				136	{
				137	#ifdef CONFIG_SLUB_CPU_PARTIAL
				138	return !kmem_cache_debug(s);
				139	#else
				140	return false;
				141	#endif
				142	}
				143
				144	/*
				145	* Issues still to be resolved:
				146	*
				147	* - Support PAGE_ALLOC_DEBUG. Should be easy to do.
				148	*
				149	* - Variable sizing of the per node arrays
				150	*/
				151
				152	/* Enable to test recovery from slab corruption on boot */
				153	#undef SLUB_RESILIENCY_TEST
				154
				155	/* Enable to log cmpxchg failures */
				156	#undef SLUB_DEBUG_CMPXCHG
				157
				158	/*
				159	* Mininum number of partial slabs. These will be left on the partial
				160	* lists even if they are empty. kmem_cache_shrink may reclaim them.
				161	*/
				162	#define MIN_PARTIAL 5
				163
				164	/*
				165	* Maximum number of desirable partial slabs.
				166	* The existence of more partial slabs makes kmem_cache_shrink
				167	* sort the partial list by the number of objects in use.
				168	*/
				169	#define MAX_PARTIAL 10
				170
				171	#define DEBUG_DEFAULT_FLAGS (SLAB_CONSISTENCY_CHECKS \| SLAB_RED_ZONE \| \
				172	SLAB_POISON \| SLAB_STORE_USER)
				173
				174	/*
				175	* These debug flags cannot use CMPXCHG because there might be consistency
				176	* issues when checking or reading debug information
				177	*/
				178	#define SLAB_NO_CMPXCHG (SLAB_CONSISTENCY_CHECKS \| SLAB_STORE_USER \| \
				179	SLAB_TRACE)
				180
				181
				182	/*
				183	* Debugging flags that require metadata to be stored in the slab. These get
				184	* disabled when slub_debug=O is used and a cache's min order increases with
				185	* metadata.
				186	*/
				187	#define DEBUG_METADATA_FLAGS (SLAB_RED_ZONE \| SLAB_POISON \| SLAB_STORE_USER)
				188
				189	#define OO_SHIFT 16
				190	#define OO_MASK ((1 << OO_SHIFT) - 1)
				191	#define MAX_OBJS_PER_PAGE 32767 /* since page.objects is u15 */
				192
				193	/* Internal SLUB flags */
				194	/* Poison object */
				195	#define __OBJECT_POISON ((slab_flags_t __force)0x80000000U)
				196	/* Use cmpxchg_double */
				197	#define __CMPXCHG_DOUBLE ((slab_flags_t __force)0x40000000U)
				198
				199	/*
				200	* Tracking user of a slab.
				201	*/
				202	#define TRACK_ADDRS_COUNT 16
				203	struct track {
				204	unsigned long addr; /* Called from address */
				205	#ifdef CONFIG_STACKTRACE
				206	unsigned long addrs[TRACK_ADDRS_COUNT]; /* Called from address */
				207	#endif
				208	int cpu; /* Was running on cpu */
				209	int pid; /* Pid context */
				210	unsigned long when; /* When did the operation occur */
				211	};
				212
				213	enum track_item { TRACK_ALLOC, TRACK_FREE };
				214
				215	#ifdef CONFIG_SYSFS
				216	static int sysfs_slab_add(struct kmem_cache *);
				217	static int sysfs_slab_alias(struct kmem_cache , const char );
				218	static void memcg_propagate_slab_attrs(struct kmem_cache *s);
				219	static void sysfs_slab_remove(struct kmem_cache *s);
				220	#else
				221	static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }
				222	static inline int sysfs_slab_alias(struct kmem_cache s, const char p)
				223	{ return 0; }
				224	static inline void memcg_propagate_slab_attrs(struct kmem_cache *s) { }
				225	static inline void sysfs_slab_remove(struct kmem_cache *s) { }
				226	#endif
				227
				228	static inline void stat(const struct kmem_cache *s, enum stat_item si)
				229	{
				230	#ifdef CONFIG_SLUB_STATS
				231	/*
				232	* The rmw is racy on a preemptible kernel but this is acceptable, so
				233	* avoid this_cpu_add()'s irq-disable overhead.
				234	*/
				235	raw_cpu_inc(s->cpu_slab->stat[si]);
				236	#endif
				237	}
				238
				239	/********************************************************************
				240	* Core slab cache functions
				241	*******************************************************************/
				242
				243	/*
				244	* Returns freelist pointer (ptr). With hardening, this is obfuscated
				245	* with an XOR of the address where the pointer is held and a per-cache
				246	* random number.
				247	*/
				248	static inline void freelist_ptr(const struct kmem_cache s, void *ptr,
				249	unsigned long ptr_addr)
				250	{
				251	#ifdef CONFIG_SLAB_FREELIST_HARDENED
				252	/*
				253	* When CONFIG_KASAN_SW_TAGS is enabled, ptr_addr might be tagged.
				254	* Normally, this doesn't cause any issues, as both set_freepointer()
				255	* and get_freepointer() are called with a pointer with the same tag.
				256	* However, there are some issues with CONFIG_SLUB_DEBUG code. For
				257	* example, when __free_slub() iterates over objects in a cache, it
				258	* passes untagged pointers to check_object(). check_object() in turns
				259	* calls get_freepointer() with an untagged pointer, which causes the
				260	* freepointer to be restored incorrectly.
				261	*/
				262	return (void *)((unsigned long)ptr ^ s->random ^
				263	(unsigned long)kasan_reset_tag((void *)ptr_addr));
				264	#else
				265	return ptr;
				266	#endif
				267	}
				268
				269	/* Returns the freelist pointer recorded at location ptr_addr. */
				270	static inline void freelist_dereference(const struct kmem_cache s,
				271	void *ptr_addr)
				272	{
				273	return freelist_ptr(s, (void )(unsigned long *)(ptr_addr),
				274	(unsigned long)ptr_addr);
				275	}
				276
				277	static inline void get_freepointer(struct kmem_cache s, void *object)
				278	{
				279	return freelist_dereference(s, object + s->offset);
				280	}
				281
				282	static void prefetch_freepointer(const struct kmem_cache s, void object)
				283	{
				284	prefetch(object + s->offset);
				285	}
				286
				287	static inline void get_freepointer_safe(struct kmem_cache s, void *object)
				288	{
				289	unsigned long freepointer_addr;
				290	void *p;
				291
				292	if (!debug_pagealloc_enabled())
				293	return get_freepointer(s, object);
				294
				295	freepointer_addr = (unsigned long)object + s->offset;
				296	probe_kernel_read(&p, (void **)freepointer_addr, sizeof(p));
				297	return freelist_ptr(s, p, freepointer_addr);
				298	}
				299
				300	static inline void set_freepointer(struct kmem_cache s, void object, void *fp)
				301	{
				302	unsigned long freeptr_addr = (unsigned long)object + s->offset;
				303
				304	#ifdef CONFIG_SLAB_FREELIST_HARDENED
				305	BUG_ON(object == fp); /* naive detection of double free or corruption */
				306	#endif
				307
				308	(void *)freeptr_addr = freelist_ptr(s, fp, freeptr_addr);
				309	}
				310
				311	/* Loop over all objects in a slab */
				312	#define for_each_object(__p, __s, __addr, __objects) \
				313	for (__p = fixup_red_left(__s, __addr); \
				314	__p < (__addr) + (__objects) * (__s)->size; \
				315	__p += (__s)->size)
				316
				317	/* Determine object index from a given position */
				318	static inline unsigned int slab_index(void p, struct kmem_cache s, void *addr)
				319	{
				320	return (kasan_reset_tag(p) - addr) / s->size;
				321	}
				322
				323	static inline unsigned int order_objects(unsigned int order, unsigned int size)
				324	{
				325	return ((unsigned int)PAGE_SIZE << order) / size;
				326	}
				327
				328	static inline struct kmem_cache_order_objects oo_make(unsigned int order,
				329	unsigned int size)
				330	{
				331	struct kmem_cache_order_objects x = {
				332	(order << OO_SHIFT) + order_objects(order, size)
				333	};
				334
				335	return x;
				336	}
				337
				338	static inline unsigned int oo_order(struct kmem_cache_order_objects x)
				339	{
				340	return x.x >> OO_SHIFT;
				341	}
				342
				343	static inline unsigned int oo_objects(struct kmem_cache_order_objects x)
				344	{
				345	return x.x & OO_MASK;
				346	}
				347
				348	/*
				349	* Per slab locking using the pagelock
				350	*/
				351	static __always_inline void slab_lock(struct page *page)
				352	{
				353	VM_BUG_ON_PAGE(PageTail(page), page);
				354	bit_spin_lock(PG_locked, &page->flags);
				355	}
				356
				357	static __always_inline void slab_unlock(struct page *page)
				358	{
				359	VM_BUG_ON_PAGE(PageTail(page), page);
				360	__bit_spin_unlock(PG_locked, &page->flags);
				361	}
				362
				363	/* Interrupts must be disabled (for the fallback code to work right) */
				364	static inline bool __cmpxchg_double_slab(struct kmem_cache s, struct page page,
				365	void *freelist_old, unsigned long counters_old,
				366	void *freelist_new, unsigned long counters_new,
				367	const char *n)
				368	{
				369	VM_BUG_ON(!irqs_disabled());
				370	#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
				371	defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
				372	if (s->flags & __CMPXCHG_DOUBLE) {
				373	if (cmpxchg_double(&page->freelist, &page->counters,
				374	freelist_old, counters_old,
				375	freelist_new, counters_new))
				376	return true;
				377	} else
				378	#endif
				379	{
				380	slab_lock(page);
				381	if (page->freelist == freelist_old &&
				382	page->counters == counters_old) {
				383	page->freelist = freelist_new;
				384	page->counters = counters_new;
				385	slab_unlock(page);
				386	return true;
				387	}
				388	slab_unlock(page);
				389	}
				390
				391	cpu_relax();
				392	stat(s, CMPXCHG_DOUBLE_FAIL);
				393
				394	#ifdef SLUB_DEBUG_CMPXCHG
				395	pr_info("%s %s: cmpxchg double redo ", n, s->name);
				396	#endif
				397
				398	return false;
				399	}
				400
				401	static inline bool cmpxchg_double_slab(struct kmem_cache s, struct page page,
				402	void *freelist_old, unsigned long counters_old,
				403	void *freelist_new, unsigned long counters_new,
				404	const char *n)
				405	{
				406	#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
				407	defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
				408	if (s->flags & __CMPXCHG_DOUBLE) {
				409	if (cmpxchg_double(&page->freelist, &page->counters,
				410	freelist_old, counters_old,
				411	freelist_new, counters_new))
				412	return true;
				413	} else
				414	#endif
				415	{
				416	unsigned long flags;
				417
				418	local_irq_save(flags);
				419	slab_lock(page);
				420	if (page->freelist == freelist_old &&
				421	page->counters == counters_old) {
				422	page->freelist = freelist_new;
				423	page->counters = counters_new;
				424	slab_unlock(page);
				425	local_irq_restore(flags);
				426	return true;
				427	}
				428	slab_unlock(page);
				429	local_irq_restore(flags);
				430	}
				431
				432	cpu_relax();
				433	stat(s, CMPXCHG_DOUBLE_FAIL);
				434
				435	#ifdef SLUB_DEBUG_CMPXCHG
				436	pr_info("%s %s: cmpxchg double redo ", n, s->name);
				437	#endif
				438
				439	return false;
				440	}
				441
				442	#ifdef CONFIG_SLUB_DEBUG
				443	/*
				444	* Determine a map of object in use on a page.
				445	*
				446	* Node listlock must be held to guarantee that the page does
				447	* not vanish from under us.
				448	*/
				449	static void get_map(struct kmem_cache s, struct page page, unsigned long *map)
				450	{
				451	void *p;
				452	void *addr = page_address(page);
				453
				454	for (p = page->freelist; p; p = get_freepointer(s, p))
				455	set_bit(slab_index(p, s, addr), map);
				456	}
				457
				458	static inline unsigned int size_from_object(struct kmem_cache *s)
				459	{
				460	if (s->flags & SLAB_RED_ZONE)
				461	return s->size - s->red_left_pad;
				462
				463	return s->size;
				464	}
				465
				466	static inline void restore_red_left(struct kmem_cache s, void *p)
				467	{
				468	if (s->flags & SLAB_RED_ZONE)
				469	p -= s->red_left_pad;
				470
				471	return p;
				472	}
				473
				474	/*
				475	* Debug settings:
				476	*/
				477	#if defined(CONFIG_SLUB_DEBUG_ON)
				478	static slab_flags_t slub_debug = DEBUG_DEFAULT_FLAGS;
				479	#else
				480	static slab_flags_t slub_debug;
				481	#endif
				482
				483	static char *slub_debug_slabs;
				484	static int disable_higher_order_debug;
				485
				486	/*
				487	* slub is about to manipulate internal object metadata. This memory lies
				488	* outside the range of the allocated object, so accessing it would normally
				489	* be reported by kasan as a bounds error. metadata_access_enable() is used
				490	* to tell kasan that these accesses are OK.
				491	*/
				492	static inline void metadata_access_enable(void)
				493	{
				494	kasan_disable_current();
				495	}
				496
				497	static inline void metadata_access_disable(void)
				498	{
				499	kasan_enable_current();
				500	}
				501
				502	/*
				503	* Object debugging
				504	*/
				505
				506	/* Verify that a pointer has an address that is valid within a slab page */
				507	static inline int check_valid_pointer(struct kmem_cache *s,
				508	struct page page, void object)
				509	{
				510	void *base;
				511
				512	if (!object)
				513	return 1;
				514
				515	base = page_address(page);
				516	object = kasan_reset_tag(object);
				517	object = restore_red_left(s, object);
				518	if (object < base \|\| object >= base + page->objects * s->size \|\|
				519	(object - base) % s->size) {
				520	return 0;
				521	}
				522
				523	return 1;
				524	}
				525
				526	static void print_section(char level, char text, u8 *addr,
				527	unsigned int length)
				528	{
				529	metadata_access_enable();
				530	print_hex_dump(level, text, DUMP_PREFIX_ADDRESS, 16, 1, addr,
				531	length, 1);
				532	metadata_access_disable();
				533	}
				534
				535	static struct track get_track(struct kmem_cache s, void *object,
				536	enum track_item alloc)
				537	{
				538	struct track *p;
				539
				540	if (s->offset)
				541	p = object + s->offset + sizeof(void *);
				542	else
				543	p = object + s->inuse;
				544
				545	return p + alloc;
				546	}
				547
				548	static void set_track(struct kmem_cache s, void object,
				549	enum track_item alloc, unsigned long addr)
				550	{
				551	struct track *p = get_track(s, object, alloc);
				552
				553	if (addr) {
				554	#ifdef CONFIG_STACKTRACE
				555	struct stack_trace trace;
				556	int i;
				557
				558	trace.nr_entries = 0;
				559	trace.max_entries = TRACK_ADDRS_COUNT;
				560	trace.entries = p->addrs;
				561	trace.skip = 3;
				562	metadata_access_enable();
				563	save_stack_trace(&trace);
				564	metadata_access_disable();
				565
				566	/* See rant in lockdep.c */
				567	if (trace.nr_entries != 0 &&
				568	trace.entries[trace.nr_entries - 1] == ULONG_MAX)
				569	trace.nr_entries--;
				570
				571	for (i = trace.nr_entries; i < TRACK_ADDRS_COUNT; i++)
				572	p->addrs[i] = 0;
				573	#endif
				574	p->addr = addr;
				575	p->cpu = smp_processor_id();
				576	p->pid = current->pid;
				577	p->when = jiffies;
				578	} else
				579	memset(p, 0, sizeof(struct track));
				580	}
				581
				582	static void init_tracking(struct kmem_cache s, void object)
				583	{
				584	if (!(s->flags & SLAB_STORE_USER))
				585	return;
				586
				587	set_track(s, object, TRACK_FREE, 0UL);
				588	set_track(s, object, TRACK_ALLOC, 0UL);
				589	}
				590
				591	static void print_track(const char s, struct track t, unsigned long pr_time)
				592	{
				593	if (!t->addr)
				594	return;
				595
				596	pr_err("INFO: %s in %pS age=%lu cpu=%u pid=%d\n",
				597	s, (void *)t->addr, pr_time - t->when, t->cpu, t->pid);
				598	#ifdef CONFIG_STACKTRACE
				599	{
				600	int i;
				601	for (i = 0; i < TRACK_ADDRS_COUNT; i++)
				602	if (t->addrs[i])
				603	pr_err("\t%pS\n", (void *)t->addrs[i]);
				604	else
				605	break;
				606	}
				607	#endif
				608	}
				609
				610	static void print_tracking(struct kmem_cache s, void object)
				611	{
				612	unsigned long pr_time = jiffies;
				613	if (!(s->flags & SLAB_STORE_USER))
				614	return;
				615
				616	print_track("Allocated", get_track(s, object, TRACK_ALLOC), pr_time);
				617	print_track("Freed", get_track(s, object, TRACK_FREE), pr_time);
				618	}
				619
				620	static void print_page_info(struct page *page)
				621	{
				622	pr_err("INFO: Slab 0x%p objects=%u used=%u fp=0x%p flags=0x%04lx\n",
				623	page, page->objects, page->inuse, page->freelist, page->flags);
				624
				625	}
				626
				627	static void slab_bug(struct kmem_cache s, char fmt, ...)
				628	{
				629	struct va_format vaf;
				630	va_list args;
				631
				632	va_start(args, fmt);
				633	vaf.fmt = fmt;
				634	vaf.va = &args;
				635	pr_err("=============================================================================\n");
				636	pr_err("BUG %s (%s): %pV\n", s->name, print_tainted(), &vaf);
				637	pr_err("-----------------------------------------------------------------------------\n\n");
				638
				639	add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
				640	va_end(args);
				641	}
				642
				643	static void slab_fix(struct kmem_cache s, char fmt, ...)
				644	{
				645	struct va_format vaf;
				646	va_list args;
				647
				648	va_start(args, fmt);
				649	vaf.fmt = fmt;
				650	vaf.va = &args;
				651	pr_err("FIX %s: %pV\n", s->name, &vaf);
				652	va_end(args);
				653	}
				654
				655	static void print_trailer(struct kmem_cache s, struct page page, u8 *p)
				656	{
				657	unsigned int off; /* Offset of last byte */
				658	u8 *addr = page_address(page);
				659
				660	print_tracking(s, p);
				661
				662	print_page_info(page);
				663
				664	pr_err("INFO: Object 0x%p @offset=%tu fp=0x%p\n\n",
				665	p, p - addr, get_freepointer(s, p));
				666
				667	if (s->flags & SLAB_RED_ZONE)
				668	print_section(KERN_ERR, "Redzone ", p - s->red_left_pad,
				669	s->red_left_pad);
				670	else if (p > addr + 16)
				671	print_section(KERN_ERR, "Bytes b4 ", p - 16, 16);
				672
				673	print_section(KERN_ERR, "Object ", p,
				674	min_t(unsigned int, s->object_size, PAGE_SIZE));
				675	if (s->flags & SLAB_RED_ZONE)
				676	print_section(KERN_ERR, "Redzone ", p + s->object_size,
				677	s->inuse - s->object_size);
				678
				679	if (s->offset)
				680	off = s->offset + sizeof(void *);
				681	else
				682	off = s->inuse;
				683
				684	if (s->flags & SLAB_STORE_USER)
				685	off += 2 * sizeof(struct track);
				686
				687	off += kasan_metadata_size(s);
				688
				689	if (off != size_from_object(s))
				690	/* Beginning of the filler is the free pointer */
				691	print_section(KERN_ERR, "Padding ", p + off,
				692	size_from_object(s) - off);
				693
				694	WARN_ON(1);
				695	}
				696
				697	void object_err(struct kmem_cache s, struct page page,
				698	u8 object, char reason)
				699	{
				700	slab_bug(s, "%s", reason);
				701	print_trailer(s, page, object);
				702	}
				703
				704	static __printf(3, 4) void slab_err(struct kmem_cache s, struct page page,
				705	const char *fmt, ...)
				706	{
				707	va_list args;
				708	char buf[100];
				709
				710	va_start(args, fmt);
				711	vsnprintf(buf, sizeof(buf), fmt, args);
				712	va_end(args);
				713	slab_bug(s, "%s", buf);
				714	print_page_info(page);
				715	WARN_ON(1);
				716	}
				717
				718	static void init_object(struct kmem_cache s, void object, u8 val)
				719	{
				720	u8 *p = object;
				721
				722	if (s->flags & SLAB_RED_ZONE)
				723	memset(p - s->red_left_pad, val, s->red_left_pad);
				724
				725	if (s->flags & __OBJECT_POISON) {
				726	memset(p, POISON_FREE, s->object_size - 1);
				727	p[s->object_size - 1] = POISON_END;
				728	}
				729
				730	if (s->flags & SLAB_RED_ZONE)
				731	memset(p + s->object_size, val, s->inuse - s->object_size);
				732	}
				733
				734	static void restore_bytes(struct kmem_cache s, char message, u8 data,
				735	void from, void to)
				736	{
				737	slab_fix(s, "Restoring 0x%p-0x%p=0x%x\n", from, to - 1, data);
				738	memset(from, data, to - from);
				739	}
				740
				741	static int check_bytes_and_report(struct kmem_cache s, struct page page,
				742	u8 object, char what,
				743	u8 *start, unsigned int value, unsigned int bytes)
				744	{
				745	u8 *fault;
				746	u8 *end;
				747
				748	metadata_access_enable();
				749	fault = memchr_inv(start, value, bytes);
				750	metadata_access_disable();
				751	if (!fault)
				752	return 1;
				753
				754	end = start + bytes;
				755	while (end > fault && end[-1] == value)
				756	end--;
				757
				758	slab_bug(s, "%s overwritten", what);
				759	pr_err("INFO: 0x%p-0x%p. First byte 0x%x instead of 0x%x\n",
				760	fault, end - 1, fault[0], value);
				761	print_trailer(s, page, object);
				762
				763	restore_bytes(s, what, value, fault, end);
				764	return 0;
				765	}
				766
				767	/*
				768	* Object layout:
				769	*
				770	* object address
				771	* Bytes of the object to be managed.
				772	* If the freepointer may overlay the object then the free
				773	* pointer is the first word of the object.
				774	*
				775	* Poisoning uses 0x6b (POISON_FREE) and the last byte is
				776	* 0xa5 (POISON_END)
				777	*
				778	* object + s->object_size
				779	* Padding to reach word boundary. This is also used for Redzoning.
				780	* Padding is extended by another word if Redzoning is enabled and
				781	* object_size == inuse.
				782	*
				783	* We fill with 0xbb (RED_INACTIVE) for inactive objects and with
				784	* 0xcc (RED_ACTIVE) for objects in use.
				785	*
				786	* object + s->inuse
				787	* Meta data starts here.
				788	*
				789	* A. Free pointer (if we cannot overwrite object on free)
				790	* B. Tracking data for SLAB_STORE_USER
				791	* C. Padding to reach required alignment boundary or at mininum
				792	* one word if debugging is on to be able to detect writes
				793	* before the word boundary.
				794	*
				795	* Padding is done using 0x5a (POISON_INUSE)
				796	*
				797	* object + s->size
				798	* Nothing is used beyond s->size.
				799	*
				800	* If slabcaches are merged then the object_size and inuse boundaries are mostly
				801	* ignored. And therefore no slab options that rely on these boundaries
				802	* may be used with merged slabcaches.
				803	*/
				804
				805	static int check_pad_bytes(struct kmem_cache s, struct page page, u8 *p)
				806	{
				807	unsigned long off = s->inuse; /* The end of info */
				808
				809	if (s->offset)
				810	/* Freepointer is placed after the object. */
				811	off += sizeof(void *);
				812
				813	if (s->flags & SLAB_STORE_USER)
				814	/* We also have user information there */
				815	off += 2 * sizeof(struct track);
				816
				817	off += kasan_metadata_size(s);
				818
				819	if (size_from_object(s) == off)
				820	return 1;
				821
				822	return check_bytes_and_report(s, page, p, "Object padding",
				823	p + off, POISON_INUSE, size_from_object(s) - off);
				824	}
				825
				826	/* Check the pad bytes at the end of a slab page */
				827	static int slab_pad_check(struct kmem_cache s, struct page page)
				828	{
				829	u8 *start;
				830	u8 *fault;
				831	u8 *end;
				832	u8 *pad;
				833	int length;
				834	int remainder;
				835
				836	if (!(s->flags & SLAB_POISON))
				837	return 1;
				838
				839	start = page_address(page);
				840	length = PAGE_SIZE << compound_order(page);
				841	end = start + length;
				842	remainder = length % s->size;
				843	if (!remainder)
				844	return 1;
				845
				846	pad = end - remainder;
				847	metadata_access_enable();
				848	fault = memchr_inv(pad, POISON_INUSE, remainder);
				849	metadata_access_disable();
				850	if (!fault)
				851	return 1;
				852	while (end > fault && end[-1] == POISON_INUSE)
				853	end--;
				854
				855	slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1);
				856	print_section(KERN_ERR, "Padding ", pad, remainder);
				857
				858	restore_bytes(s, "slab padding", POISON_INUSE, fault, end);
				859	return 0;
				860	}
				861
				862	static int check_object(struct kmem_cache s, struct page page,
				863	void *object, u8 val)
				864	{
				865	u8 *p = object;
				866	u8 *endobject = object + s->object_size;
				867
				868	if (s->flags & SLAB_RED_ZONE) {
				869	if (!check_bytes_and_report(s, page, object, "Redzone",
				870	object - s->red_left_pad, val, s->red_left_pad))
				871	return 0;
				872
				873	if (!check_bytes_and_report(s, page, object, "Redzone",
				874	endobject, val, s->inuse - s->object_size))
				875	return 0;
				876	} else {
				877	if ((s->flags & SLAB_POISON) && s->object_size < s->inuse) {
				878	check_bytes_and_report(s, page, p, "Alignment padding",
				879	endobject, POISON_INUSE,
				880	s->inuse - s->object_size);
				881	}
				882	}
				883
				884	if (s->flags & SLAB_POISON) {
				885	if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) &&
				886	(!check_bytes_and_report(s, page, p, "Poison", p,
				887	POISON_FREE, s->object_size - 1) \|\|
				888	!check_bytes_and_report(s, page, p, "Poison",
				889	p + s->object_size - 1, POISON_END, 1)))
				890	return 0;
				891	/*
				892	* check_pad_bytes cleans up on its own.
				893	*/
				894	check_pad_bytes(s, page, p);
				895	}
				896
				897	if (!s->offset && val == SLUB_RED_ACTIVE)
				898	/*
				899	* Object and freepointer overlap. Cannot check
				900	* freepointer while object is allocated.
				901	*/
				902	return 1;
				903
				904	/* Check free pointer validity */
				905	if (!check_valid_pointer(s, page, get_freepointer(s, p))) {
				906	object_err(s, page, p, "Freepointer corrupt");
				907	/*
				908	* No choice but to zap it and thus lose the remainder
				909	* of the free objects in this slab. May cause
				910	* another error because the object count is now wrong.
				911	*/
				912	set_freepointer(s, p, NULL);
				913	return 0;
				914	}
				915	return 1;
				916	}
				917
				918	static int check_slab(struct kmem_cache s, struct page page)
				919	{
				920	int maxobj;
				921
				922	VM_BUG_ON(!irqs_disabled());
				923
				924	if (!PageSlab(page)) {
				925	slab_err(s, page, "Not a valid slab page");
				926	return 0;
				927	}
				928
				929	maxobj = order_objects(compound_order(page), s->size);
				930	if (page->objects > maxobj) {
				931	slab_err(s, page, "objects %u > max %u",
				932	page->objects, maxobj);
				933	return 0;
				934	}
				935	if (page->inuse > page->objects) {
				936	slab_err(s, page, "inuse %u > max %u",
				937	page->inuse, page->objects);
				938	return 0;
				939	}
				940	/* Slab_pad_check fixes things up after itself */
				941	slab_pad_check(s, page);
				942	return 1;
				943	}
				944
				945	/*
				946	* Determine if a certain object on a page is on the freelist. Must hold the
				947	* slab lock to guarantee that the chains are in a consistent state.
				948	*/
				949	static int on_freelist(struct kmem_cache s, struct page page, void *search)
				950	{
				951	int nr = 0;
				952	void *fp;
				953	void *object = NULL;
				954	int max_objects;
				955
				956	fp = page->freelist;
				957	while (fp && nr <= page->objects) {
				958	if (fp == search)
				959	return 1;
				960	if (!check_valid_pointer(s, page, fp)) {
				961	if (object) {
				962	object_err(s, page, object,
				963	"Freechain corrupt");
				964	set_freepointer(s, object, NULL);
				965	} else {
				966	slab_err(s, page, "Freepointer corrupt");
				967	page->freelist = NULL;
				968	page->inuse = page->objects;
				969	slab_fix(s, "Freelist cleared");
				970	return 0;
				971	}
				972	break;
				973	}
				974	object = fp;
				975	fp = get_freepointer(s, object);
				976	nr++;
				977	}
				978
				979	max_objects = order_objects(compound_order(page), s->size);
				980	if (max_objects > MAX_OBJS_PER_PAGE)
				981	max_objects = MAX_OBJS_PER_PAGE;
				982
				983	if (page->objects != max_objects) {
				984	slab_err(s, page, "Wrong number of objects. Found %d but should be %d",
				985	page->objects, max_objects);
				986	page->objects = max_objects;
				987	slab_fix(s, "Number of objects adjusted.");
				988	}
				989	if (page->inuse != page->objects - nr) {
				990	slab_err(s, page, "Wrong object count. Counter is %d but counted were %d",
				991	page->inuse, page->objects - nr);
				992	page->inuse = page->objects - nr;
				993	slab_fix(s, "Object count adjusted.");
				994	}
				995	return search == NULL;
				996	}
				997
				998	static void trace(struct kmem_cache s, struct page page, void *object,
				999	int alloc)
				1000	{
				1001	if (s->flags & SLAB_TRACE) {
				1002	pr_info("TRACE %s %s 0x%p inuse=%d fp=0x%p\n",
				1003	s->name,
				1004	alloc ? "alloc" : "free",
				1005	object, page->inuse,
				1006	page->freelist);
				1007
				1008	if (!alloc)
				1009	print_section(KERN_INFO, "Object ", (void *)object,
				1010	s->object_size);
				1011
				1012	WARN_ON(1);
				1013	}
				1014	}
				1015
				1016	/*
				1017	* Tracking of fully allocated slabs for debugging purposes.
				1018	*/
				1019	static void add_full(struct kmem_cache *s,
				1020	struct kmem_cache_node n, struct page page)
				1021	{
				1022	if (!(s->flags & SLAB_STORE_USER))
				1023	return;
				1024
				1025	lockdep_assert_held(&n->list_lock);
				1026	list_add(&page->lru, &n->full);
				1027	}
				1028
				1029	static void remove_full(struct kmem_cache s, struct kmem_cache_node n, struct page *page)
				1030	{
				1031	if (!(s->flags & SLAB_STORE_USER))
				1032	return;
				1033
				1034	lockdep_assert_held(&n->list_lock);
				1035	list_del(&page->lru);
				1036	}
				1037
				1038	/* Tracking of the number of slabs for debugging purposes */
				1039	static inline unsigned long slabs_node(struct kmem_cache *s, int node)
				1040	{
				1041	struct kmem_cache_node *n = get_node(s, node);
				1042
				1043	return atomic_long_read(&n->nr_slabs);
				1044	}
				1045
				1046	static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
				1047	{
				1048	return atomic_long_read(&n->nr_slabs);
				1049	}
				1050
				1051	static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)
				1052	{
				1053	struct kmem_cache_node *n = get_node(s, node);
				1054
				1055	/*
				1056	* May be called early in order to allocate a slab for the
				1057	* kmem_cache_node structure. Solve the chicken-egg
				1058	* dilemma by deferring the increment of the count during
				1059	* bootstrap (see early_kmem_cache_node_alloc).
				1060	*/
				1061	if (likely(n)) {
				1062	atomic_long_inc(&n->nr_slabs);
				1063	atomic_long_add(objects, &n->total_objects);
				1064	}
				1065	}
				1066	static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects)
				1067	{
				1068	struct kmem_cache_node *n = get_node(s, node);
				1069
				1070	atomic_long_dec(&n->nr_slabs);
				1071	atomic_long_sub(objects, &n->total_objects);
				1072	}
				1073
				1074	/* Object debug checks for alloc/free paths */
				1075	static void setup_object_debug(struct kmem_cache s, struct page page,
				1076	void *object)
				1077	{
				1078	if (!(s->flags & (SLAB_STORE_USER\|SLAB_RED_ZONE\|__OBJECT_POISON)))
				1079	return;
				1080
				1081	init_object(s, object, SLUB_RED_INACTIVE);
				1082	init_tracking(s, object);
				1083	}
				1084
				1085	static void setup_page_debug(struct kmem_cache s, void addr, int order)
				1086	{
				1087	if (!(s->flags & SLAB_POISON))
				1088	return;
				1089
				1090	metadata_access_enable();
				1091	memset(addr, POISON_INUSE, PAGE_SIZE << order);
				1092	metadata_access_disable();
				1093	}
				1094
				1095	static inline int alloc_consistency_checks(struct kmem_cache *s,
				1096	struct page *page,
				1097	void *object, unsigned long addr)
				1098	{
				1099	if (!check_slab(s, page))
				1100	return 0;
				1101
				1102	if (!check_valid_pointer(s, page, object)) {
				1103	object_err(s, page, object, "Freelist Pointer check fails");
				1104	return 0;
				1105	}
				1106
				1107	if (!check_object(s, page, object, SLUB_RED_INACTIVE))
				1108	return 0;
				1109
				1110	return 1;
				1111	}
				1112
				1113	static noinline int alloc_debug_processing(struct kmem_cache *s,
				1114	struct page *page,
				1115	void *object, unsigned long addr)
				1116	{
				1117	if (s->flags & SLAB_CONSISTENCY_CHECKS) {
				1118	if (!alloc_consistency_checks(s, page, object, addr))
				1119	goto bad;
				1120	}
				1121
				1122	/* Success perform special debug activities for allocs */
				1123	if (s->flags & SLAB_STORE_USER)
				1124	set_track(s, object, TRACK_ALLOC, addr);
				1125	trace(s, page, object, 1);
				1126	init_object(s, object, SLUB_RED_ACTIVE);
				1127	return 1;
				1128
				1129	bad:
				1130	if (PageSlab(page)) {
				1131	/*
				1132	* If this is a slab page then lets do the best we can
				1133	* to avoid issues in the future. Marking all objects
				1134	* as used avoids touching the remaining objects.
				1135	*/
				1136	slab_fix(s, "Marking all objects used");
				1137	page->inuse = page->objects;
				1138	page->freelist = NULL;
				1139	}
				1140	return 0;
				1141	}
				1142
				1143	static inline int free_consistency_checks(struct kmem_cache *s,
				1144	struct page page, void object, unsigned long addr)
				1145	{
				1146	if (!check_valid_pointer(s, page, object)) {
				1147	slab_err(s, page, "Invalid object pointer 0x%p", object);
				1148	return 0;
				1149	}
				1150
				1151	if (on_freelist(s, page, object)) {
				1152	object_err(s, page, object, "Object already free");
				1153	return 0;
				1154	}
				1155
				1156	if (!check_object(s, page, object, SLUB_RED_ACTIVE))
				1157	return 0;
				1158
				1159	if (unlikely(s != page->slab_cache)) {
				1160	if (!PageSlab(page)) {
				1161	slab_err(s, page, "Attempt to free object(0x%p) outside of slab",
				1162	object);
				1163	} else if (!page->slab_cache) {
				1164	pr_err("SLUB <none>: no slab for object 0x%p.\n",
				1165	object);
				1166	dump_stack();
				1167	} else
				1168	object_err(s, page, object,
				1169	"page slab pointer corrupt.");
				1170	return 0;
				1171	}
				1172	return 1;
				1173	}
				1174
				1175	/* Supports checking bulk free of a constructed freelist */
				1176	static noinline int free_debug_processing(
				1177	struct kmem_cache s, struct page page,
				1178	void head, void tail, int bulk_cnt,
				1179	unsigned long addr)
				1180	{
				1181	struct kmem_cache_node *n = get_node(s, page_to_nid(page));
				1182	void *object = head;
				1183	int cnt = 0;
				1184	unsigned long uninitialized_var(flags);
				1185	int ret = 0;
				1186
				1187	spin_lock_irqsave(&n->list_lock, flags);
				1188	slab_lock(page);
				1189
				1190	if (s->flags & SLAB_CONSISTENCY_CHECKS) {
				1191	if (!check_slab(s, page))
				1192	goto out;
				1193	}
				1194
				1195	next_object:
				1196	cnt++;
				1197
				1198	if (s->flags & SLAB_CONSISTENCY_CHECKS) {
				1199	if (!free_consistency_checks(s, page, object, addr))
				1200	goto out;
				1201	}
				1202
				1203	if (s->flags & SLAB_STORE_USER)
				1204	set_track(s, object, TRACK_FREE, addr);
				1205	trace(s, page, object, 0);
				1206	/* Freepointer not overwritten by init_object(), SLAB_POISON moved it */
				1207	init_object(s, object, SLUB_RED_INACTIVE);
				1208
				1209	/* Reached end of constructed freelist yet? */
				1210	if (object != tail) {
				1211	object = get_freepointer(s, object);
				1212	goto next_object;
				1213	}
				1214	ret = 1;
				1215
				1216	out:
				1217	if (cnt != bulk_cnt)
				1218	slab_err(s, page, "Bulk freelist count(%d) invalid(%d)\n",
				1219	bulk_cnt, cnt);
				1220
				1221	slab_unlock(page);
				1222	spin_unlock_irqrestore(&n->list_lock, flags);
				1223	if (!ret)
				1224	slab_fix(s, "Object at 0x%p not freed", object);
				1225	return ret;
				1226	}
				1227
				1228	static int __init setup_slub_debug(char *str)
				1229	{
				1230	slub_debug = DEBUG_DEFAULT_FLAGS;
				1231	if (str++ != '=' \|\| !str)
				1232	/*
				1233	* No options specified. Switch on full debugging.
				1234	*/
				1235	goto out;
				1236
				1237	if (*str == ',')
				1238	/*
				1239	* No options but restriction on slabs. This means full
				1240	* debugging for slabs matching a pattern.
				1241	*/
				1242	goto check_slabs;
				1243
				1244	slub_debug = 0;
				1245	if (*str == '-')
				1246	/*
				1247	* Switch off all debugging measures.
				1248	*/
				1249	goto out;
				1250
				1251	/*
				1252	* Determine which debug features should be switched on
				1253	*/
				1254	for (; str && str != ','; str++) {
				1255	switch (tolower(*str)) {
				1256	case 'f':
				1257	slub_debug \|= SLAB_CONSISTENCY_CHECKS;
				1258	break;
				1259	case 'z':
				1260	slub_debug \|= SLAB_RED_ZONE;
				1261	break;
				1262	case 'p':
				1263	slub_debug \|= SLAB_POISON;
				1264	break;
				1265	case 'u':
				1266	slub_debug \|= SLAB_STORE_USER;
				1267	break;
				1268	case 't':
				1269	slub_debug \|= SLAB_TRACE;
				1270	break;
				1271	case 'a':
				1272	slub_debug \|= SLAB_FAILSLAB;
				1273	break;
				1274	case 'o':
				1275	/*
				1276	* Avoid enabling debugging on caches if its minimum
				1277	* order would increase as a result.
				1278	*/
				1279	disable_higher_order_debug = 1;
				1280	break;
				1281	default:
				1282	pr_err("slub_debug option '%c' unknown. skipped\n",
				1283	*str);
				1284	}
				1285	}
				1286
				1287	check_slabs:
				1288	if (*str == ',')
				1289	slub_debug_slabs = str + 1;
				1290	out:
				1291	if ((static_branch_unlikely(&init_on_alloc) \|\|
				1292	static_branch_unlikely(&init_on_free)) &&
				1293	(slub_debug & SLAB_POISON))
				1294	pr_info("mem auto-init: SLAB_POISON will take precedence over init_on_alloc/init_on_free\n");
				1295	return 1;
				1296	}
				1297
				1298	__setup("slub_debug", setup_slub_debug);
				1299
				1300	slab_flags_t kmem_cache_flags(unsigned int object_size,
				1301	slab_flags_t flags, const char *name,
				1302	void (ctor)(void ))
				1303	{
				1304	/*
				1305	* Enable debugging if selected on the kernel commandline.
				1306	*/
				1307	if (slub_debug && (!slub_debug_slabs \|\| (name &&
				1308	!strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs)))))
				1309	flags \|= slub_debug;
				1310
				1311	return flags;
				1312	}
				1313	#else /* !CONFIG_SLUB_DEBUG */
				1314	static inline void setup_object_debug(struct kmem_cache *s,
				1315	struct page page, void object) {}
				1316	static inline void setup_page_debug(struct kmem_cache *s,
				1317	void *addr, int order) {}
				1318
				1319	static inline int alloc_debug_processing(struct kmem_cache *s,
				1320	struct page page, void object, unsigned long addr) { return 0; }
				1321
				1322	static inline int free_debug_processing(
				1323	struct kmem_cache s, struct page page,
				1324	void head, void tail, int bulk_cnt,
				1325	unsigned long addr) { return 0; }
				1326
				1327	static inline int slab_pad_check(struct kmem_cache s, struct page page)
				1328	{ return 1; }
				1329	static inline int check_object(struct kmem_cache s, struct page page,
				1330	void *object, u8 val) { return 1; }
				1331	static inline void add_full(struct kmem_cache s, struct kmem_cache_node n,
				1332	struct page *page) {}
				1333	static inline void remove_full(struct kmem_cache s, struct kmem_cache_node n,
				1334	struct page *page) {}
				1335	slab_flags_t kmem_cache_flags(unsigned int object_size,
				1336	slab_flags_t flags, const char *name,
				1337	void (ctor)(void ))
				1338	{
				1339	return flags;
				1340	}
				1341	#define slub_debug 0
				1342
				1343	#define disable_higher_order_debug 0
				1344
				1345	static inline unsigned long slabs_node(struct kmem_cache *s, int node)
				1346	{ return 0; }
				1347	static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
				1348	{ return 0; }
				1349	static inline void inc_slabs_node(struct kmem_cache *s, int node,
				1350	int objects) {}
				1351	static inline void dec_slabs_node(struct kmem_cache *s, int node,
				1352	int objects) {}
				1353
				1354	#endif /* CONFIG_SLUB_DEBUG */
				1355
				1356	/*
				1357	* Hooks for other subsystems that check memory allocations. In a typical
				1358	* production configuration these hooks all should produce no code at all.
				1359	*/
				1360	static inline void kmalloc_large_node_hook(void ptr, size_t size, gfp_t flags)
				1361	{
				1362	ptr = kasan_kmalloc_large(ptr, size, flags);
				1363	kmemleak_alloc(ptr, size, 1, flags);
				1364	return ptr;
				1365	}
				1366
				1367	static __always_inline void kfree_hook(void *x)
				1368	{
				1369	kmemleak_free(x);
				1370	kasan_kfree_large(x, _RET_IP_);
				1371	}
				1372
				1373	static __always_inline bool slab_free_hook(struct kmem_cache s, void x)
				1374	{
				1375	kmemleak_free_recursive(x, s->flags);
				1376
				1377	/*
				1378	* Trouble is that we may no longer disable interrupts in the fast path
				1379	* So in order to make the debug calls that expect irqs to be
				1380	* disabled we need to disable interrupts temporarily.
				1381	*/
				1382	#ifdef CONFIG_LOCKDEP
				1383	{
				1384	unsigned long flags;
				1385
				1386	local_irq_save(flags);
				1387	debug_check_no_locks_freed(x, s->object_size);
				1388	local_irq_restore(flags);
				1389	}
				1390	#endif
				1391	if (!(s->flags & SLAB_DEBUG_OBJECTS))
				1392	debug_check_no_obj_freed(x, s->object_size);
				1393
				1394	/* KASAN might put x into memory quarantine, delaying its reuse */
				1395	return kasan_slab_free(s, x, _RET_IP_);
				1396	}
				1397
				1398	static inline bool slab_free_freelist_hook(struct kmem_cache *s,
				1399	void head, void tail)
				1400	{
				1401
				1402	void *object;
				1403	void next = head;
				1404	void old_tail = tail ? tail : head;
				1405	int rsize;
				1406
				1407	/* Head and tail of the reconstructed freelist */
				1408	*head = NULL;
				1409	*tail = NULL;
				1410
				1411	do {
				1412	object = next;
				1413	next = get_freepointer(s, object);
				1414
				1415	if (slab_want_init_on_free(s)) {
				1416	/*
				1417	* Clear the object and the metadata, but don't touch
				1418	* the redzone.
				1419	*/
				1420	memset(object, 0, s->object_size);
				1421	rsize = (s->flags & SLAB_RED_ZONE) ? s->red_left_pad
				1422	: 0;
				1423	memset((char *)object + s->inuse, 0,
				1424	s->size - s->inuse - rsize);
				1425
				1426	}
				1427	/* If object's reuse doesn't have to be delayed */
				1428	if (!slab_free_hook(s, object)) {
				1429	/* Move object to the new freelist */
				1430	set_freepointer(s, object, *head);
				1431	*head = object;
				1432	if (!*tail)
				1433	*tail = object;
				1434	}
				1435	} while (object != old_tail);
				1436
				1437	if (head == tail)
				1438	*tail = NULL;
				1439
				1440	return *head != NULL;
				1441	}
				1442
				1443	static void setup_object(struct kmem_cache s, struct page *page,
				1444	void *object)
				1445	{
				1446	setup_object_debug(s, page, object);
				1447	object = kasan_init_slab_obj(s, object);
				1448	if (unlikely(s->ctor)) {
				1449	kasan_unpoison_object_data(s, object);
				1450	s->ctor(object);
				1451	kasan_poison_object_data(s, object);
				1452	}
				1453	return object;
				1454	}
				1455
				1456	/*
				1457	* Slab allocation and freeing
				1458	*/
				1459	static inline struct page alloc_slab_page(struct kmem_cache s,
				1460	gfp_t flags, int node, struct kmem_cache_order_objects oo)
				1461	{
				1462	struct page *page;
				1463	unsigned int order = oo_order(oo);
				1464
				1465	if (node == NUMA_NO_NODE)
				1466	page = alloc_pages(flags, order);
				1467	else
				1468	page = __alloc_pages_node(node, flags, order);
				1469
				1470	if (page && memcg_charge_slab(page, flags, order, s)) {
				1471	__free_pages(page, order);
				1472	page = NULL;
				1473	}
				1474
				1475	return page;
				1476	}
				1477
				1478	#ifdef CONFIG_SLAB_FREELIST_RANDOM
				1479	/* Pre-initialize the random sequence cache */
				1480	static int init_cache_random_seq(struct kmem_cache *s)
				1481	{
				1482	unsigned int count = oo_objects(s->oo);
				1483	int err;
				1484
				1485	/* Bailout if already initialised */
				1486	if (s->random_seq)
				1487	return 0;
				1488
				1489	err = cache_random_seq_create(s, count, GFP_KERNEL);
				1490	if (err) {
				1491	pr_err("SLUB: Unable to initialize free list for %s\n",
				1492	s->name);
				1493	return err;
				1494	}
				1495
				1496	/* Transform to an offset on the set of pages */
				1497	if (s->random_seq) {
				1498	unsigned int i;
				1499
				1500	for (i = 0; i < count; i++)
				1501	s->random_seq[i] *= s->size;
				1502	}
				1503	return 0;
				1504	}
				1505
				1506	/* Initialize each random sequence freelist per cache */
				1507	static void __init init_freelist_randomization(void)
				1508	{
				1509	struct kmem_cache *s;
				1510
				1511	mutex_lock(&slab_mutex);
				1512
				1513	list_for_each_entry(s, &slab_caches, list)
				1514	init_cache_random_seq(s);
				1515
				1516	mutex_unlock(&slab_mutex);
				1517	}
				1518
				1519	/* Get the next entry on the pre-computed freelist randomized */
				1520	static void next_freelist_entry(struct kmem_cache s, struct page *page,
				1521	unsigned long pos, void start,
				1522	unsigned long page_limit,
				1523	unsigned long freelist_count)
				1524	{
				1525	unsigned int idx;
				1526
				1527	/*
				1528	* If the target page allocation failed, the number of objects on the
				1529	* page might be smaller than the usual size defined by the cache.
				1530	*/
				1531	do {
				1532	idx = s->random_seq[*pos];
				1533	*pos += 1;
				1534	if (*pos >= freelist_count)
				1535	*pos = 0;
				1536	} while (unlikely(idx >= page_limit));
				1537
				1538	return (char *)start + idx;
				1539	}
				1540
				1541	/* Shuffle the single linked freelist based on a random pre-computed sequence */
				1542	static bool shuffle_freelist(struct kmem_cache s, struct page page)
				1543	{
				1544	void *start;
				1545	void *cur;
				1546	void *next;
				1547	unsigned long idx, pos, page_limit, freelist_count;
				1548
				1549	if (page->objects < 2 \|\| !s->random_seq)
				1550	return false;
				1551
				1552	freelist_count = oo_objects(s->oo);
				1553	pos = get_random_int() % freelist_count;
				1554
				1555	page_limit = page->objects * s->size;
				1556	start = fixup_red_left(s, page_address(page));
				1557
				1558	/* First entry is used as the base of the freelist */
				1559	cur = next_freelist_entry(s, page, &pos, start, page_limit,
				1560	freelist_count);
				1561	cur = setup_object(s, page, cur);
				1562	page->freelist = cur;
				1563
				1564	for (idx = 1; idx < page->objects; idx++) {
				1565	next = next_freelist_entry(s, page, &pos, start, page_limit,
				1566	freelist_count);
				1567	next = setup_object(s, page, next);
				1568	set_freepointer(s, cur, next);
				1569	cur = next;
				1570	}
				1571	set_freepointer(s, cur, NULL);
				1572
				1573	return true;
				1574	}
				1575	#else
				1576	static inline int init_cache_random_seq(struct kmem_cache *s)
				1577	{
				1578	return 0;
				1579	}
				1580	static inline void init_freelist_randomization(void) { }
				1581	static inline bool shuffle_freelist(struct kmem_cache s, struct page page)
				1582	{
				1583	return false;
				1584	}
				1585	#endif /* CONFIG_SLAB_FREELIST_RANDOM */
				1586
				1587	static struct page allocate_slab(struct kmem_cache s, gfp_t flags, int node)
				1588	{
				1589	struct page *page;
				1590	struct kmem_cache_order_objects oo = s->oo;
				1591	gfp_t alloc_gfp;
				1592	void start, p, *next;
				1593	int idx, order;
				1594	bool shuffle;
				1595
				1596	flags &= gfp_allowed_mask;
				1597
				1598	if (gfpflags_allow_blocking(flags))
				1599	local_irq_enable();
				1600
				1601	flags \|= s->allocflags;
				1602
				1603	/*
				1604	* Let the initial higher-order allocation fail under memory pressure
				1605	* so we fall-back to the minimum order allocation.
				1606	*/
				1607	alloc_gfp = (flags \| __GFP_NOWARN \| __GFP_NORETRY) & ~__GFP_NOFAIL;
				1608	if ((alloc_gfp & __GFP_DIRECT_RECLAIM) && oo_order(oo) > oo_order(s->min))
				1609	alloc_gfp = (alloc_gfp \| __GFP_NOMEMALLOC) & ~(__GFP_RECLAIM\|__GFP_NOFAIL);
				1610
				1611	page = alloc_slab_page(s, alloc_gfp, node, oo);
				1612	if (unlikely(!page)) {
				1613	oo = s->min;
				1614	alloc_gfp = flags;
				1615	/*
				1616	* Allocation may have failed due to fragmentation.
				1617	* Try a lower order alloc if possible
				1618	*/
				1619	page = alloc_slab_page(s, alloc_gfp, node, oo);
				1620	if (unlikely(!page))
				1621	goto out;
				1622	stat(s, ORDER_FALLBACK);
				1623	}
				1624
				1625	page->objects = oo_objects(oo);
				1626
				1627	order = compound_order(page);
				1628	page->slab_cache = s;
				1629	__SetPageSlab(page);
				1630	if (page_is_pfmemalloc(page))
				1631	SetPageSlabPfmemalloc(page);
				1632
				1633	kasan_poison_slab(page);
				1634
				1635	start = page_address(page);
				1636
				1637	setup_page_debug(s, start, order);
				1638
				1639	shuffle = shuffle_freelist(s, page);
				1640
				1641	if (!shuffle) {
				1642	start = fixup_red_left(s, start);
				1643	start = setup_object(s, page, start);
				1644	page->freelist = start;
				1645	for (idx = 0, p = start; idx < page->objects - 1; idx++) {
				1646	next = p + s->size;
				1647	next = setup_object(s, page, next);
				1648	set_freepointer(s, p, next);
				1649	p = next;
				1650	}
				1651	set_freepointer(s, p, NULL);
				1652	}
				1653
				1654	page->inuse = page->objects;
				1655	page->frozen = 1;
				1656
				1657	out:
				1658	if (gfpflags_allow_blocking(flags))
				1659	local_irq_disable();
				1660	if (!page)
				1661	return NULL;
				1662
				1663	mod_lruvec_page_state(page,
				1664	(s->flags & SLAB_RECLAIM_ACCOUNT) ?
				1665	NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
				1666	1 << oo_order(oo));
				1667
				1668	inc_slabs_node(s, page_to_nid(page), page->objects);
				1669
				1670	return page;
				1671	}
				1672
				1673	static struct page new_slab(struct kmem_cache s, gfp_t flags, int node)
				1674	{
				1675	if (unlikely(flags & GFP_SLAB_BUG_MASK)) {
				1676	gfp_t invalid_mask = flags & GFP_SLAB_BUG_MASK;
				1677	flags &= ~GFP_SLAB_BUG_MASK;
				1678	pr_warn("Unexpected gfp: %#x (%pGg). Fixing up to gfp: %#x (%pGg). Fix your code!\n",
				1679	invalid_mask, &invalid_mask, flags, &flags);
				1680	dump_stack();
				1681	}
				1682
				1683	return allocate_slab(s,
				1684	flags & (GFP_RECLAIM_MASK \| GFP_CONSTRAINT_MASK), node);
				1685	}
				1686
				1687	static void __free_slab(struct kmem_cache s, struct page page)
				1688	{
				1689	int order = compound_order(page);
				1690	int pages = 1 << order;
				1691
				1692	if (s->flags & SLAB_CONSISTENCY_CHECKS) {
				1693	void *p;
				1694
				1695	slab_pad_check(s, page);
				1696	for_each_object(p, s, page_address(page),
				1697	page->objects)
				1698	check_object(s, page, p, SLUB_RED_INACTIVE);
				1699	}
				1700
				1701	mod_lruvec_page_state(page,
				1702	(s->flags & SLAB_RECLAIM_ACCOUNT) ?
				1703	NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
				1704	-pages);
				1705
				1706	__ClearPageSlabPfmemalloc(page);
				1707	__ClearPageSlab(page);
				1708
				1709	page->mapping = NULL;
				1710	if (current->reclaim_state)
				1711	current->reclaim_state->reclaimed_slab += pages;
				1712	memcg_uncharge_slab(page, order, s);
				1713	__free_pages(page, order);
				1714	}
				1715
				1716	static void rcu_free_slab(struct rcu_head *h)
				1717	{
				1718	struct page *page = container_of(h, struct page, rcu_head);
				1719
				1720	__free_slab(page->slab_cache, page);
				1721	}
				1722
				1723	static void free_slab(struct kmem_cache s, struct page page)
				1724	{
				1725	if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) {
				1726	call_rcu(&page->rcu_head, rcu_free_slab);
				1727	} else
				1728	__free_slab(s, page);
				1729	}
				1730
				1731	static void discard_slab(struct kmem_cache s, struct page page)
				1732	{
				1733	dec_slabs_node(s, page_to_nid(page), page->objects);
				1734	free_slab(s, page);
				1735	}
				1736
				1737	/*
				1738	* Management of partially allocated slabs.
				1739	*/
				1740	static inline void
				1741	__add_partial(struct kmem_cache_node n, struct page page, int tail)
				1742	{
				1743	n->nr_partial++;
				1744	if (tail == DEACTIVATE_TO_TAIL)
				1745	list_add_tail(&page->lru, &n->partial);
				1746	else
				1747	list_add(&page->lru, &n->partial);
				1748	}
				1749
				1750	static inline void add_partial(struct kmem_cache_node *n,
				1751	struct page *page, int tail)
				1752	{
				1753	lockdep_assert_held(&n->list_lock);
				1754	__add_partial(n, page, tail);
				1755	}
				1756
				1757	static inline void remove_partial(struct kmem_cache_node *n,
				1758	struct page *page)
				1759	{
				1760	lockdep_assert_held(&n->list_lock);
				1761	list_del(&page->lru);
				1762	n->nr_partial--;
				1763	}
				1764
				1765	/*
				1766	* Remove slab from the partial list, freeze it and
				1767	* return the pointer to the freelist.
				1768	*
				1769	* Returns a list of objects or NULL if it fails.
				1770	*/
				1771	static inline void acquire_slab(struct kmem_cache s,
				1772	struct kmem_cache_node n, struct page page,
				1773	int mode, int *objects)
				1774	{
				1775	void *freelist;
				1776	unsigned long counters;
				1777	struct page new;
				1778
				1779	lockdep_assert_held(&n->list_lock);
				1780
				1781	/*
				1782	* Zap the freelist and set the frozen bit.
				1783	* The old freelist is the list of objects for the
				1784	* per cpu allocation list.
				1785	*/
				1786	freelist = page->freelist;
				1787	counters = page->counters;
				1788	new.counters = counters;
				1789	*objects = new.objects - new.inuse;
				1790	if (mode) {
				1791	new.inuse = page->objects;
				1792	new.freelist = NULL;
				1793	} else {
				1794	new.freelist = freelist;
				1795	}
				1796
				1797	VM_BUG_ON(new.frozen);
				1798	new.frozen = 1;
				1799
				1800	if (!__cmpxchg_double_slab(s, page,
				1801	freelist, counters,
				1802	new.freelist, new.counters,
				1803	"acquire_slab"))
				1804	return NULL;
				1805
				1806	remove_partial(n, page);
				1807	WARN_ON(!freelist);
				1808	return freelist;
				1809	}
				1810
				1811	static void put_cpu_partial(struct kmem_cache s, struct page page, int drain);
				1812	static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags);
				1813
				1814	/*
				1815	* Try to allocate a partial slab from a specific node.
				1816	*/
				1817	static void get_partial_node(struct kmem_cache s, struct kmem_cache_node *n,
				1818	struct kmem_cache_cpu *c, gfp_t flags)
				1819	{
				1820	struct page page, page2;
				1821	void *object = NULL;
				1822	unsigned int available = 0;
				1823	int objects;
				1824
				1825	/*
				1826	* Racy check. If we mistakenly see no partial slabs then we
				1827	* just allocate an empty slab. If we mistakenly try to get a
				1828	* partial slab and there is none available then get_partials()
				1829	* will return NULL.
				1830	*/
				1831	if (!n \|\| !n->nr_partial)
				1832	return NULL;
				1833
				1834	spin_lock(&n->list_lock);
				1835	list_for_each_entry_safe(page, page2, &n->partial, lru) {
				1836	void *t;
				1837
				1838	if (!pfmemalloc_match(page, flags))
				1839	continue;
				1840
				1841	t = acquire_slab(s, n, page, object == NULL, &objects);
				1842	if (!t)
				1843	break;
				1844
				1845	available += objects;
				1846	if (!object) {
				1847	c->page = page;
				1848	stat(s, ALLOC_FROM_PARTIAL);
				1849	object = t;
				1850	} else {
				1851	put_cpu_partial(s, page, 0);
				1852	stat(s, CPU_PARTIAL_NODE);
				1853	}
				1854	if (!kmem_cache_has_cpu_partial(s)
				1855	\|\| available > slub_cpu_partial(s) / 2)
				1856	break;
				1857
				1858	}
				1859	spin_unlock(&n->list_lock);
				1860	return object;
				1861	}
				1862
				1863	/*
				1864	* Get a page from somewhere. Search in increasing NUMA distances.
				1865	*/
				1866	static void get_any_partial(struct kmem_cache s, gfp_t flags,
				1867	struct kmem_cache_cpu *c)
				1868	{
				1869	#ifdef CONFIG_NUMA
				1870	struct zonelist *zonelist;
				1871	struct zoneref *z;
				1872	struct zone *zone;
				1873	enum zone_type high_zoneidx = gfp_zone(flags);
				1874	void *object;
				1875	unsigned int cpuset_mems_cookie;
				1876
				1877	/*
				1878	* The defrag ratio allows a configuration of the tradeoffs between
				1879	* inter node defragmentation and node local allocations. A lower
				1880	* defrag_ratio increases the tendency to do local allocations
				1881	* instead of attempting to obtain partial slabs from other nodes.
				1882	*
				1883	* If the defrag_ratio is set to 0 then kmalloc() always
				1884	* returns node local objects. If the ratio is higher then kmalloc()
				1885	* may return off node objects because partial slabs are obtained
				1886	* from other nodes and filled up.
				1887	*
				1888	* If /sys/kernel/slab/xx/remote_node_defrag_ratio is set to 100
				1889	* (which makes defrag_ratio = 1000) then every (well almost)
				1890	* allocation will first attempt to defrag slab caches on other nodes.
				1891	* This means scanning over all nodes to look for partial slabs which
				1892	* may be expensive if we do it every time we are trying to find a slab
				1893	* with available objects.
				1894	*/
				1895	if (!s->remote_node_defrag_ratio \|\|
				1896	get_cycles() % 1024 > s->remote_node_defrag_ratio)
				1897	return NULL;
				1898
				1899	do {
				1900	cpuset_mems_cookie = read_mems_allowed_begin();
				1901	zonelist = node_zonelist(mempolicy_slab_node(), flags);
				1902	for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
				1903	struct kmem_cache_node *n;
				1904
				1905	n = get_node(s, zone_to_nid(zone));
				1906
				1907	if (n && cpuset_zone_allowed(zone, flags) &&
				1908	n->nr_partial > s->min_partial) {
				1909	object = get_partial_node(s, n, c, flags);
				1910	if (object) {
				1911	/*
				1912	* Don't check read_mems_allowed_retry()
				1913	* here - if mems_allowed was updated in
				1914	* parallel, that was a harmless race
				1915	* between allocation and the cpuset
				1916	* update
				1917	*/
				1918	return object;
				1919	}
				1920	}
				1921	}
				1922	} while (read_mems_allowed_retry(cpuset_mems_cookie));
				1923	#endif
				1924	return NULL;
				1925	}
				1926
				1927	/*
				1928	* Get a partial page, lock it and return it.
				1929	*/
				1930	static void get_partial(struct kmem_cache s, gfp_t flags, int node,
				1931	struct kmem_cache_cpu *c)
				1932	{
				1933	void *object;
				1934	int searchnode = node;
				1935
				1936	if (node == NUMA_NO_NODE)
				1937	searchnode = numa_mem_id();
				1938	else if (!node_present_pages(node))
				1939	searchnode = node_to_mem_node(node);
				1940
				1941	object = get_partial_node(s, get_node(s, searchnode), c, flags);
				1942	if (object \|\| node != NUMA_NO_NODE)
				1943	return object;
				1944
				1945	return get_any_partial(s, flags, c);
				1946	}
				1947
				1948	#ifdef CONFIG_PREEMPT
				1949	/*
				1950	* Calculate the next globally unique transaction for disambiguiation
				1951	* during cmpxchg. The transactions start with the cpu number and are then
				1952	* incremented by CONFIG_NR_CPUS.
				1953	*/
				1954	#define TID_STEP roundup_pow_of_two(CONFIG_NR_CPUS)
				1955	#else
				1956	/*
				1957	* No preemption supported therefore also no need to check for
				1958	* different cpus.
				1959	*/
				1960	#define TID_STEP 1
				1961	#endif
				1962
				1963	static inline unsigned long next_tid(unsigned long tid)
				1964	{
				1965	return tid + TID_STEP;
				1966	}
				1967
				1968	static inline unsigned int tid_to_cpu(unsigned long tid)
				1969	{
				1970	return tid % TID_STEP;
				1971	}
				1972
				1973	static inline unsigned long tid_to_event(unsigned long tid)
				1974	{
				1975	return tid / TID_STEP;
				1976	}
				1977
				1978	static inline unsigned int init_tid(int cpu)
				1979	{
				1980	return cpu;
				1981	}
				1982
				1983	static inline void note_cmpxchg_failure(const char *n,
				1984	const struct kmem_cache *s, unsigned long tid)
				1985	{
				1986	#ifdef SLUB_DEBUG_CMPXCHG
				1987	unsigned long actual_tid = __this_cpu_read(s->cpu_slab->tid);
				1988
				1989	pr_info("%s %s: cmpxchg redo ", n, s->name);
				1990
				1991	#ifdef CONFIG_PREEMPT
				1992	if (tid_to_cpu(tid) != tid_to_cpu(actual_tid))
				1993	pr_warn("due to cpu change %d -> %d\n",
				1994	tid_to_cpu(tid), tid_to_cpu(actual_tid));
				1995	else
				1996	#endif
				1997	if (tid_to_event(tid) != tid_to_event(actual_tid))
				1998	pr_warn("due to cpu running other code. Event %ld->%ld\n",
				1999	tid_to_event(tid), tid_to_event(actual_tid));
				2000	else
				2001	pr_warn("for unknown reason: actual=%lx was=%lx target=%lx\n",
				2002	actual_tid, tid, next_tid(tid));
				2003	#endif
				2004	stat(s, CMPXCHG_DOUBLE_CPU_FAIL);
				2005	}
				2006
				2007	static void init_kmem_cache_cpus(struct kmem_cache *s)
				2008	{
				2009	int cpu;
				2010
				2011	for_each_possible_cpu(cpu) {
				2012	#ifdef CONFIG_MTK_MM_DEBUG
				2013	pr_info("s=%s, pcpuptr=%p\n", s->name,
				2014	per_cpu_ptr(s->cpu_slab, cpu));
				2015	#endif
				2016	per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu);
				2017	}
				2018	}
				2019
				2020	/*
				2021	* Remove the cpu slab
				2022	*/
				2023	static void deactivate_slab(struct kmem_cache s, struct page page,
				2024	void freelist, struct kmem_cache_cpu c)
				2025	{
				2026	enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE };
				2027	struct kmem_cache_node *n = get_node(s, page_to_nid(page));
				2028	int lock = 0;
				2029	enum slab_modes l = M_NONE, m = M_NONE;
				2030	void *nextfree;
				2031	int tail = DEACTIVATE_TO_HEAD;
				2032	struct page new;
				2033	struct page old;
				2034
				2035	if (page->freelist) {
				2036	stat(s, DEACTIVATE_REMOTE_FREES);
				2037	tail = DEACTIVATE_TO_TAIL;
				2038	}
				2039
				2040	/*
				2041	* Stage one: Free all available per cpu objects back
				2042	* to the page freelist while it is still frozen. Leave the
				2043	* last one.
				2044	*
				2045	* There is no need to take the list->lock because the page
				2046	* is still frozen.
				2047	*/
				2048	while (freelist && (nextfree = get_freepointer(s, freelist))) {
				2049	void *prior;
				2050	unsigned long counters;
				2051
				2052	do {
				2053	prior = page->freelist;
				2054	counters = page->counters;
				2055	set_freepointer(s, freelist, prior);
				2056	new.counters = counters;
				2057	new.inuse--;
				2058	VM_BUG_ON(!new.frozen);
				2059
				2060	} while (!__cmpxchg_double_slab(s, page,
				2061	prior, counters,
				2062	freelist, new.counters,
				2063	"drain percpu freelist"));
				2064
				2065	freelist = nextfree;
				2066	}
				2067
				2068	/*
				2069	* Stage two: Ensure that the page is unfrozen while the
				2070	* list presence reflects the actual number of objects
				2071	* during unfreeze.
				2072	*
				2073	* We setup the list membership and then perform a cmpxchg
				2074	* with the count. If there is a mismatch then the page
				2075	* is not unfrozen but the page is on the wrong list.
				2076	*
				2077	* Then we restart the process which may have to remove
				2078	* the page from the list that we just put it on again
				2079	* because the number of objects in the slab may have
				2080	* changed.
				2081	*/
				2082	redo:
				2083
				2084	old.freelist = page->freelist;
				2085	old.counters = page->counters;
				2086	VM_BUG_ON(!old.frozen);
				2087
				2088	/* Determine target state of the slab */
				2089	new.counters = old.counters;
				2090	if (freelist) {
				2091	new.inuse--;
				2092	set_freepointer(s, freelist, old.freelist);
				2093	new.freelist = freelist;
				2094	} else
				2095	new.freelist = old.freelist;
				2096
				2097	new.frozen = 0;
				2098
				2099	if (!new.inuse && n->nr_partial >= s->min_partial)
				2100	m = M_FREE;
				2101	else if (new.freelist) {
				2102	m = M_PARTIAL;
				2103	if (!lock) {
				2104	lock = 1;
				2105	/*
				2106	* Taking the spinlock removes the possiblity
				2107	* that acquire_slab() will see a slab page that
				2108	* is frozen
				2109	*/
				2110	spin_lock(&n->list_lock);
				2111	}
				2112	} else {
				2113	m = M_FULL;
				2114	if (kmem_cache_debug(s) && !lock) {
				2115	lock = 1;
				2116	/*
				2117	* This also ensures that the scanning of full
				2118	* slabs from diagnostic functions will not see
				2119	* any frozen slabs.
				2120	*/
				2121	spin_lock(&n->list_lock);
				2122	}
				2123	}
				2124
				2125	if (l != m) {
				2126
				2127	if (l == M_PARTIAL)
				2128
				2129	remove_partial(n, page);
				2130
				2131	else if (l == M_FULL)
				2132
				2133	remove_full(s, n, page);
				2134
				2135	if (m == M_PARTIAL) {
				2136
				2137	add_partial(n, page, tail);
				2138	stat(s, tail);
				2139
				2140	} else if (m == M_FULL) {
				2141
				2142	stat(s, DEACTIVATE_FULL);
				2143	add_full(s, n, page);
				2144
				2145	}
				2146	}
				2147
				2148	l = m;
				2149	if (!__cmpxchg_double_slab(s, page,
				2150	old.freelist, old.counters,
				2151	new.freelist, new.counters,
				2152	"unfreezing slab"))
				2153	goto redo;
				2154
				2155	if (lock)
				2156	spin_unlock(&n->list_lock);
				2157
				2158	if (m == M_FREE) {
				2159	stat(s, DEACTIVATE_EMPTY);
				2160	discard_slab(s, page);
				2161	stat(s, FREE_SLAB);
				2162	}
				2163
				2164	c->page = NULL;
				2165	c->freelist = NULL;
				2166	}
				2167
				2168	/*
				2169	* Unfreeze all the cpu partial slabs.
				2170	*
				2171	* This function must be called with interrupts disabled
				2172	* for the cpu using c (or some other guarantee must be there
				2173	* to guarantee no concurrent accesses).
				2174	*/
				2175	static void unfreeze_partials(struct kmem_cache *s,
				2176	struct kmem_cache_cpu *c)
				2177	{
				2178	#ifdef CONFIG_SLUB_CPU_PARTIAL
				2179	struct kmem_cache_node n = NULL, n2 = NULL;
				2180	struct page page, discard_page = NULL;
				2181
				2182	while ((page = c->partial)) {
				2183	struct page new;
				2184	struct page old;
				2185
				2186	c->partial = page->next;
				2187
				2188	n2 = get_node(s, page_to_nid(page));
				2189	if (n != n2) {
				2190	if (n)
				2191	spin_unlock(&n->list_lock);
				2192
				2193	n = n2;
				2194	spin_lock(&n->list_lock);
				2195	}
				2196
				2197	do {
				2198
				2199	old.freelist = page->freelist;
				2200	old.counters = page->counters;
				2201	VM_BUG_ON(!old.frozen);
				2202
				2203	new.counters = old.counters;
				2204	new.freelist = old.freelist;
				2205
				2206	new.frozen = 0;
				2207
				2208	} while (!__cmpxchg_double_slab(s, page,
				2209	old.freelist, old.counters,
				2210	new.freelist, new.counters,
				2211	"unfreezing slab"));
				2212
				2213	if (unlikely(!new.inuse && n->nr_partial >= s->min_partial)) {
				2214	page->next = discard_page;
				2215	discard_page = page;
				2216	} else {
				2217	add_partial(n, page, DEACTIVATE_TO_TAIL);
				2218	stat(s, FREE_ADD_PARTIAL);
				2219	}
				2220	}
				2221
				2222	if (n)
				2223	spin_unlock(&n->list_lock);
				2224
				2225	while (discard_page) {
				2226	page = discard_page;
				2227	discard_page = discard_page->next;
				2228
				2229	stat(s, DEACTIVATE_EMPTY);
				2230	discard_slab(s, page);
				2231	stat(s, FREE_SLAB);
				2232	}
				2233	#endif
				2234	}
				2235
				2236	/*
				2237	* Put a page that was just frozen (in __slab_free) into a partial page
				2238	* slot if available.
				2239	*
				2240	* If we did not find a slot then simply move all the partials to the
				2241	* per node partial list.
				2242	*/
				2243	static void put_cpu_partial(struct kmem_cache s, struct page page, int drain)
				2244	{
				2245	#ifdef CONFIG_SLUB_CPU_PARTIAL
				2246	struct page *oldpage;
				2247	int pages;
				2248	int pobjects;
				2249
				2250	preempt_disable();
				2251	do {
				2252	pages = 0;
				2253	pobjects = 0;
				2254	oldpage = this_cpu_read(s->cpu_slab->partial);
				2255
				2256	if (oldpage) {
				2257	pobjects = oldpage->pobjects;
				2258	pages = oldpage->pages;
				2259	if (drain && pobjects > s->cpu_partial) {
				2260	unsigned long flags;
				2261	/*
				2262	* partial array is full. Move the existing
				2263	* set to the per node partial list.
				2264	*/
				2265	local_irq_save(flags);
				2266	unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
				2267	local_irq_restore(flags);
				2268	oldpage = NULL;
				2269	pobjects = 0;
				2270	pages = 0;
				2271	stat(s, CPU_PARTIAL_DRAIN);
				2272	}
				2273	}
				2274
				2275	pages++;
				2276	pobjects += page->objects - page->inuse;
				2277
				2278	page->pages = pages;
				2279	page->pobjects = pobjects;
				2280	page->next = oldpage;
				2281
				2282	} while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page)
				2283	!= oldpage);
				2284	if (unlikely(!s->cpu_partial)) {
				2285	unsigned long flags;
				2286
				2287	local_irq_save(flags);
				2288	unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
				2289	local_irq_restore(flags);
				2290	}
				2291	preempt_enable();
				2292	#endif
				2293	}
				2294
				2295	static inline void flush_slab(struct kmem_cache s, struct kmem_cache_cpu c)
				2296	{
				2297	stat(s, CPUSLAB_FLUSH);
				2298	deactivate_slab(s, c->page, c->freelist, c);
				2299
				2300	c->tid = next_tid(c->tid);
				2301	}
				2302
				2303	/*
				2304	* Flush cpu slab.
				2305	*
				2306	* Called from IPI handler with interrupts disabled.
				2307	*/
				2308	static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
				2309	{
				2310	struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
				2311
				2312	if (likely(c)) {
				2313	if (c->page)
				2314	flush_slab(s, c);
				2315
				2316	unfreeze_partials(s, c);
				2317	}
				2318	}
				2319
				2320	static void flush_cpu_slab(void *d)
				2321	{
				2322	struct kmem_cache *s = d;
				2323
				2324	__flush_cpu_slab(s, smp_processor_id());
				2325	}
				2326
				2327	static bool has_cpu_slab(int cpu, void *info)
				2328	{
				2329	struct kmem_cache *s = info;
				2330	struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
				2331
				2332	return c->page \|\| slub_percpu_partial(c);
				2333	}
				2334
				2335	static void flush_all(struct kmem_cache *s)
				2336	{
				2337	on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1, GFP_ATOMIC);
				2338	}
				2339
				2340	/*
				2341	* Use the cpu notifier to insure that the cpu slabs are flushed when
				2342	* necessary.
				2343	*/
				2344	static int slub_cpu_dead(unsigned int cpu)
				2345	{
				2346	struct kmem_cache *s;
				2347	unsigned long flags;
				2348
				2349	mutex_lock(&slab_mutex);
				2350	list_for_each_entry(s, &slab_caches, list) {
				2351	local_irq_save(flags);
				2352	__flush_cpu_slab(s, cpu);
				2353	local_irq_restore(flags);
				2354	}
				2355	mutex_unlock(&slab_mutex);
				2356	return 0;
				2357	}
				2358
				2359	/*
				2360	* Check if the objects in a per cpu structure fit numa
				2361	* locality expectations.
				2362	*/
				2363	static inline int node_match(struct page *page, int node)
				2364	{
				2365	#ifdef CONFIG_NUMA
				2366	if (!page \|\| (node != NUMA_NO_NODE && page_to_nid(page) != node))
				2367	return 0;
				2368	#endif
				2369	return 1;
				2370	}
				2371
				2372	#ifdef CONFIG_SLUB_DEBUG
				2373	static int count_free(struct page *page)
				2374	{
				2375	return page->objects - page->inuse;
				2376	}
				2377
				2378	static inline unsigned long node_nr_objs(struct kmem_cache_node *n)
				2379	{
				2380	return atomic_long_read(&n->total_objects);
				2381	}
				2382	#endif /* CONFIG_SLUB_DEBUG */
				2383
				2384	#if defined(CONFIG_SLUB_DEBUG) \|\| defined(CONFIG_SYSFS)
				2385	static unsigned long count_partial(struct kmem_cache_node *n,
				2386	int (get_count)(struct page ))
				2387	{
				2388	unsigned long flags;
				2389	unsigned long x = 0;
				2390	struct page *page;
				2391
				2392	spin_lock_irqsave(&n->list_lock, flags);
				2393	list_for_each_entry(page, &n->partial, lru)
				2394	x += get_count(page);
				2395	spin_unlock_irqrestore(&n->list_lock, flags);
				2396	return x;
				2397	}
				2398	#endif /* CONFIG_SLUB_DEBUG \|\| CONFIG_SYSFS */
				2399
				2400	static noinline void
				2401	slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
				2402	{
				2403	#ifdef CONFIG_SLUB_DEBUG
				2404	static DEFINE_RATELIMIT_STATE(slub_oom_rs, DEFAULT_RATELIMIT_INTERVAL,
				2405	DEFAULT_RATELIMIT_BURST);
				2406	int node;
				2407	struct kmem_cache_node *n;
				2408
				2409	if ((gfpflags & __GFP_NOWARN) \|\| !__ratelimit(&slub_oom_rs))
				2410	return;
				2411
				2412	pr_warn("SLUB: Unable to allocate memory on node %d, gfp=%#x(%pGg)\n",
				2413	nid, gfpflags, &gfpflags);
				2414	pr_warn(" cache: %s, object size: %u, buffer size: %u, default order: %u, min order: %u\n",
				2415	s->name, s->object_size, s->size, oo_order(s->oo),
				2416	oo_order(s->min));
				2417
				2418	if (oo_order(s->min) > get_order(s->object_size))
				2419	pr_warn(" %s debugging increased min order, use slub_debug=O to disable.\n",
				2420	s->name);
				2421
				2422	for_each_kmem_cache_node(s, node, n) {
				2423	unsigned long nr_slabs;
				2424	unsigned long nr_objs;
				2425	unsigned long nr_free;
				2426
				2427	nr_free = count_partial(n, count_free);
				2428	nr_slabs = node_nr_slabs(n);
				2429	nr_objs = node_nr_objs(n);
				2430
				2431	pr_warn(" node %d: slabs: %ld, objs: %ld, free: %ld\n",
				2432	node, nr_slabs, nr_objs, nr_free);
				2433	}
				2434	#endif
				2435	}
				2436
				2437	static inline void new_slab_objects(struct kmem_cache s, gfp_t flags,
				2438	int node, struct kmem_cache_cpu **pc)
				2439	{
				2440	void *freelist;
				2441	struct kmem_cache_cpu c = pc;
				2442	struct page *page;
				2443
				2444	WARN_ON_ONCE(s->ctor && (flags & __GFP_ZERO));
				2445
				2446	freelist = get_partial(s, flags, node, c);
				2447
				2448	if (freelist)
				2449	return freelist;
				2450
				2451	page = new_slab(s, flags, node);
				2452	if (page) {
				2453	c = raw_cpu_ptr(s->cpu_slab);
				2454	if (c->page)
				2455	flush_slab(s, c);
				2456
				2457	/*
				2458	* No other reference to the page yet so we can
				2459	* muck around with it freely without cmpxchg
				2460	*/
				2461	freelist = page->freelist;
				2462	page->freelist = NULL;
				2463
				2464	stat(s, ALLOC_SLAB);
				2465	c->page = page;
				2466	*pc = c;
				2467	} else
				2468	freelist = NULL;
				2469
				2470	return freelist;
				2471	}
				2472
				2473	static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags)
				2474	{
				2475	if (unlikely(PageSlabPfmemalloc(page)))
				2476	return gfp_pfmemalloc_allowed(gfpflags);
				2477
				2478	return true;
				2479	}
				2480
				2481	/*
				2482	* Check the page->freelist of a page and either transfer the freelist to the
				2483	* per cpu freelist or deactivate the page.
				2484	*
				2485	* The page is still frozen if the return value is not NULL.
				2486	*
				2487	* If this function returns NULL then the page has been unfrozen.
				2488	*
				2489	* This function must be called with interrupt disabled.
				2490	*/
				2491	static inline void get_freelist(struct kmem_cache s, struct page *page)
				2492	{
				2493	struct page new;
				2494	unsigned long counters;
				2495	void *freelist;
				2496
				2497	do {
				2498	freelist = page->freelist;
				2499	counters = page->counters;
				2500
				2501	new.counters = counters;
				2502	VM_BUG_ON(!new.frozen);
				2503
				2504	new.inuse = page->objects;
				2505	new.frozen = freelist != NULL;
				2506
				2507	} while (!__cmpxchg_double_slab(s, page,
				2508	freelist, counters,
				2509	NULL, new.counters,
				2510	"get_freelist"));
				2511
				2512	return freelist;
				2513	}
				2514
				2515	/*
				2516	* Slow path. The lockless freelist is empty or we need to perform
				2517	* debugging duties.
				2518	*
				2519	* Processing is still very fast if new objects have been freed to the
				2520	* regular freelist. In that case we simply take over the regular freelist
				2521	* as the lockless freelist and zap the regular freelist.
				2522	*
				2523	* If that is not working then we fall back to the partial lists. We take the
				2524	* first element of the freelist as the object to allocate now and move the
				2525	* rest of the freelist to the lockless freelist.
				2526	*
				2527	* And if we were unable to get a new slab from the partial slab lists then
				2528	* we need to allocate a new slab. This is the slowest path since it involves
				2529	* a call to the page allocator and the setup of a new slab.
				2530	*
				2531	* Version of __slab_alloc to use when we know that interrupts are
				2532	* already disabled (which is the case for bulk allocation).
				2533	*/
				2534	static void ___slab_alloc(struct kmem_cache s, gfp_t gfpflags, int node,
				2535	unsigned long addr, struct kmem_cache_cpu *c)
				2536	{
				2537	void *freelist;
				2538	struct page *page;
				2539
				2540	page = c->page;
				2541	if (!page)
				2542	goto new_slab;
				2543	redo:
				2544
				2545	if (unlikely(!node_match(page, node))) {
				2546	int searchnode = node;
				2547
				2548	if (node != NUMA_NO_NODE && !node_present_pages(node))
				2549	searchnode = node_to_mem_node(node);
				2550
				2551	if (unlikely(!node_match(page, searchnode))) {
				2552	stat(s, ALLOC_NODE_MISMATCH);
				2553	deactivate_slab(s, page, c->freelist, c);
				2554	goto new_slab;
				2555	}
				2556	}
				2557
				2558	/*
				2559	* By rights, we should be searching for a slab page that was
				2560	* PFMEMALLOC but right now, we are losing the pfmemalloc
				2561	* information when the page leaves the per-cpu allocator
				2562	*/
				2563	if (unlikely(!pfmemalloc_match(page, gfpflags))) {
				2564	deactivate_slab(s, page, c->freelist, c);
				2565	goto new_slab;
				2566	}
				2567
				2568	/* must check again c->freelist in case of cpu migration or IRQ */
				2569	freelist = c->freelist;
				2570	if (freelist)
				2571	goto load_freelist;
				2572
				2573	freelist = get_freelist(s, page);
				2574
				2575	if (!freelist) {
				2576	c->page = NULL;
				2577	stat(s, DEACTIVATE_BYPASS);
				2578	goto new_slab;
				2579	}
				2580
				2581	stat(s, ALLOC_REFILL);
				2582
				2583	load_freelist:
				2584	/*
				2585	* freelist is pointing to the list of objects to be used.
				2586	* page is pointing to the page from which the objects are obtained.
				2587	* That page must be frozen for per cpu allocations to work.
				2588	*/
				2589	VM_BUG_ON(!c->page->frozen);
				2590	c->freelist = get_freepointer(s, freelist);
				2591	c->tid = next_tid(c->tid);
				2592	return freelist;
				2593
				2594	new_slab:
				2595
				2596	if (slub_percpu_partial(c)) {
				2597	page = c->page = slub_percpu_partial(c);
				2598	slub_set_percpu_partial(c, page);
				2599	stat(s, CPU_PARTIAL_ALLOC);
				2600	goto redo;
				2601	}
				2602
				2603	freelist = new_slab_objects(s, gfpflags, node, &c);
				2604
				2605	if (unlikely(!freelist)) {
				2606	slab_out_of_memory(s, gfpflags, node);
				2607	return NULL;
				2608	}
				2609
				2610	page = c->page;
				2611	if (likely(!kmem_cache_debug(s) && pfmemalloc_match(page, gfpflags)))
				2612	goto load_freelist;
				2613
				2614	/* Only entered in the debug case */
				2615	if (kmem_cache_debug(s) &&
				2616	!alloc_debug_processing(s, page, freelist, addr))
				2617	goto new_slab; /* Slab failed checks. Next slab needed */
				2618
				2619	deactivate_slab(s, page, get_freepointer(s, freelist), c);
				2620	return freelist;
				2621	}
				2622
				2623	/*
				2624	* Another one that disabled interrupt and compensates for possible
				2625	* cpu changes by refetching the per cpu area pointer.
				2626	*/
				2627	static void __slab_alloc(struct kmem_cache s, gfp_t gfpflags, int node,
				2628	unsigned long addr, struct kmem_cache_cpu *c)
				2629	{
				2630	void *p;
				2631	unsigned long flags;
				2632
				2633	local_irq_save(flags);
				2634	#ifdef CONFIG_PREEMPT
				2635	/*
				2636	* We may have been preempted and rescheduled on a different
				2637	* cpu before disabling interrupts. Need to reload cpu area
				2638	* pointer.
				2639	*/
				2640	c = this_cpu_ptr(s->cpu_slab);
				2641	#endif
				2642
				2643	p = ___slab_alloc(s, gfpflags, node, addr, c);
				2644	local_irq_restore(flags);
				2645	return p;
				2646	}
				2647
				2648	/*
				2649	* Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc)
				2650	* have the fastpath folded into their functions. So no function call
				2651	* overhead for requests that can be satisfied on the fastpath.
				2652	*
				2653	* The fastpath works by first checking if the lockless freelist can be used.
				2654	* If not then __slab_alloc is called for slow processing.
				2655	*
				2656	* Otherwise we can simply pick the next object from the lockless free list.
				2657	*/
				2658	static __always_inline void slab_alloc_node(struct kmem_cache s,
				2659	gfp_t gfpflags, int node, unsigned long addr)
				2660	{
				2661	void *object;
				2662	struct kmem_cache_cpu *c;
				2663	struct page *page;
				2664	unsigned long tid;
				2665
				2666	s = slab_pre_alloc_hook(s, gfpflags);
				2667	if (!s)
				2668	return NULL;
				2669	redo:
				2670	/*
				2671	* Must read kmem_cache cpu data via this cpu ptr. Preemption is
				2672	* enabled. We may switch back and forth between cpus while
				2673	* reading from one cpu area. That does not matter as long
				2674	* as we end up on the original cpu again when doing the cmpxchg.
				2675	*
				2676	* We should guarantee that tid and kmem_cache are retrieved on
				2677	* the same cpu. It could be different if CONFIG_PREEMPT so we need
				2678	* to check if it is matched or not.
				2679	*/
				2680	do {
				2681	tid = this_cpu_read(s->cpu_slab->tid);
				2682	c = raw_cpu_ptr(s->cpu_slab);
				2683	} while (IS_ENABLED(CONFIG_PREEMPT) &&
				2684	unlikely(tid != READ_ONCE(c->tid)));
				2685
				2686	/*
				2687	* Irqless object alloc/free algorithm used here depends on sequence
				2688	* of fetching cpu_slab's data. tid should be fetched before anything
				2689	* on c to guarantee that object and page associated with previous tid
				2690	* won't be used with current tid. If we fetch tid first, object and
				2691	* page could be one associated with next tid and our alloc/free
				2692	* request will be failed. In this case, we will retry. So, no problem.
				2693	*/
				2694	barrier();
				2695
				2696	/*
				2697	* The transaction ids are globally unique per cpu and per operation on
				2698	* a per cpu queue. Thus they can be guarantee that the cmpxchg_double
				2699	* occurs on the right processor and that there was no operation on the
				2700	* linked list in between.
				2701	*/
				2702
				2703	object = c->freelist;
				2704	page = c->page;
				2705	if (unlikely(!object \|\| !node_match(page, node))) {
				2706	object = __slab_alloc(s, gfpflags, node, addr, c);
				2707	stat(s, ALLOC_SLOWPATH);
				2708	} else {
				2709	void *next_object = get_freepointer_safe(s, object);
				2710
				2711	/*
				2712	* The cmpxchg will only match if there was no additional
				2713	* operation and if we are on the right processor.
				2714	*
				2715	* The cmpxchg does the following atomically (without lock
				2716	* semantics!)
				2717	* 1. Relocate first pointer to the current per cpu area.
				2718	* 2. Verify that tid and freelist have not been changed
				2719	* 3. If they were not changed replace tid and freelist
				2720	*
				2721	* Since this is without lock semantics the protection is only
				2722	* against code executing on this cpu not from access by
				2723	* other cpus.
				2724	*/
				2725	if (unlikely(!this_cpu_cmpxchg_double(
				2726	s->cpu_slab->freelist, s->cpu_slab->tid,
				2727	object, tid,
				2728	next_object, next_tid(tid)))) {
				2729
				2730	note_cmpxchg_failure("slab_alloc", s, tid);
				2731	goto redo;
				2732	}
				2733	prefetch_freepointer(s, next_object);
				2734	stat(s, ALLOC_FASTPATH);
				2735	}
				2736	/*
				2737	* If the object has been wiped upon free, make sure it's fully
				2738	* initialized by zeroing out freelist pointer.
				2739	*/
				2740	if (unlikely(slab_want_init_on_free(s)) && object)
				2741	memset(object + s->offset, 0, sizeof(void *));
				2742
				2743	if (unlikely(slab_want_init_on_alloc(gfpflags, s)) && object)
				2744	memset(object, 0, s->object_size);
				2745
				2746	slab_post_alloc_hook(s, gfpflags, 1, &object);
				2747
				2748	return object;
				2749	}
				2750
				2751	static __always_inline void slab_alloc(struct kmem_cache s,
				2752	gfp_t gfpflags, unsigned long addr)
				2753	{
				2754	return slab_alloc_node(s, gfpflags, NUMA_NO_NODE, addr);
				2755	}
				2756
				2757	void kmem_cache_alloc(struct kmem_cache s, gfp_t gfpflags)
				2758	{
				2759	void *ret = slab_alloc(s, gfpflags, _RET_IP_);
				2760
				2761	trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size,
				2762	s->size, gfpflags);
				2763
				2764	return ret;
				2765	}
				2766	EXPORT_SYMBOL(kmem_cache_alloc);
				2767
				2768	#ifdef CONFIG_TRACING
				2769	void kmem_cache_alloc_trace(struct kmem_cache s, gfp_t gfpflags, size_t size)
				2770	{
				2771	void *ret = slab_alloc(s, gfpflags, _RET_IP_);
				2772	trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);
				2773	ret = kasan_kmalloc(s, ret, size, gfpflags);
				2774	return ret;
				2775	}
				2776	EXPORT_SYMBOL(kmem_cache_alloc_trace);
				2777	#endif
				2778
				2779	#ifdef CONFIG_NUMA
				2780	void kmem_cache_alloc_node(struct kmem_cache s, gfp_t gfpflags, int node)
				2781	{
				2782	void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_);
				2783
				2784	trace_kmem_cache_alloc_node(_RET_IP_, ret,
				2785	s->object_size, s->size, gfpflags, node);
				2786
				2787	return ret;
				2788	}
				2789	EXPORT_SYMBOL(kmem_cache_alloc_node);
				2790
				2791	#ifdef CONFIG_TRACING
				2792	void kmem_cache_alloc_node_trace(struct kmem_cache s,
				2793	gfp_t gfpflags,
				2794	int node, size_t size)
				2795	{
				2796	void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_);
				2797
				2798	trace_kmalloc_node(_RET_IP_, ret,
				2799	size, s->size, gfpflags, node);
				2800
				2801	ret = kasan_kmalloc(s, ret, size, gfpflags);
				2802	return ret;
				2803	}
				2804	EXPORT_SYMBOL(kmem_cache_alloc_node_trace);
				2805	#endif
				2806	#endif
				2807
				2808	/*
				2809	* Slow path handling. This may still be called frequently since objects
				2810	* have a longer lifetime than the cpu slabs in most processing loads.
				2811	*
				2812	* So we still attempt to reduce cache line usage. Just take the slab
				2813	* lock and free the item. If there is no additional partial page
				2814	* handling required then we can return immediately.
				2815	*/
				2816	static void __slab_free(struct kmem_cache s, struct page page,
				2817	void head, void tail, int cnt,
				2818	unsigned long addr)
				2819
				2820	{
				2821	void *prior;
				2822	int was_frozen;
				2823	struct page new;
				2824	unsigned long counters;
				2825	struct kmem_cache_node *n = NULL;
				2826	unsigned long uninitialized_var(flags);
				2827
				2828	stat(s, FREE_SLOWPATH);
				2829
				2830	if (kmem_cache_debug(s) &&
				2831	!free_debug_processing(s, page, head, tail, cnt, addr))
				2832	return;
				2833
				2834	do {
				2835	if (unlikely(n)) {
				2836	spin_unlock_irqrestore(&n->list_lock, flags);
				2837	n = NULL;
				2838	}
				2839	prior = page->freelist;
				2840	counters = page->counters;
				2841	set_freepointer(s, tail, prior);
				2842	new.counters = counters;
				2843	was_frozen = new.frozen;
				2844	new.inuse -= cnt;
				2845	if ((!new.inuse \|\| !prior) && !was_frozen) {
				2846
				2847	if (kmem_cache_has_cpu_partial(s) && !prior) {
				2848
				2849	/*
				2850	* Slab was on no list before and will be
				2851	* partially empty
				2852	* We can defer the list move and instead
				2853	* freeze it.
				2854	*/
				2855	new.frozen = 1;
				2856
				2857	} else { /* Needs to be taken off a list */
				2858
				2859	n = get_node(s, page_to_nid(page));
				2860	/*
				2861	* Speculatively acquire the list_lock.
				2862	* If the cmpxchg does not succeed then we may
				2863	* drop the list_lock without any processing.
				2864	*
				2865	* Otherwise the list_lock will synchronize with
				2866	* other processors updating the list of slabs.
				2867	*/
				2868	spin_lock_irqsave(&n->list_lock, flags);
				2869
				2870	}
				2871	}
				2872
				2873	} while (!cmpxchg_double_slab(s, page,
				2874	prior, counters,
				2875	head, new.counters,
				2876	"__slab_free"));
				2877
				2878	if (likely(!n)) {
				2879
				2880	/*
				2881	* If we just froze the page then put it onto the
				2882	* per cpu partial list.
				2883	*/
				2884	if (new.frozen && !was_frozen) {
				2885	put_cpu_partial(s, page, 1);
				2886	stat(s, CPU_PARTIAL_FREE);
				2887	}
				2888	/*
				2889	* The list lock was not taken therefore no list
				2890	* activity can be necessary.
				2891	*/
				2892	if (was_frozen)
				2893	stat(s, FREE_FROZEN);
				2894	return;
				2895	}
				2896
				2897	if (unlikely(!new.inuse && n->nr_partial >= s->min_partial))
				2898	goto slab_empty;
				2899
				2900	/*
				2901	* Objects left in the slab. If it was not on the partial list before
				2902	* then add it.
				2903	*/
				2904	if (!kmem_cache_has_cpu_partial(s) && unlikely(!prior)) {
				2905	if (kmem_cache_debug(s))
				2906	remove_full(s, n, page);
				2907	add_partial(n, page, DEACTIVATE_TO_TAIL);
				2908	stat(s, FREE_ADD_PARTIAL);
				2909	}
				2910	spin_unlock_irqrestore(&n->list_lock, flags);
				2911	return;
				2912
				2913	slab_empty:
				2914	if (prior) {
				2915	/*
				2916	* Slab on the partial list.
				2917	*/
				2918	remove_partial(n, page);
				2919	stat(s, FREE_REMOVE_PARTIAL);
				2920	} else {
				2921	/* Slab must be on the full list */
				2922	remove_full(s, n, page);
				2923	}
				2924
				2925	spin_unlock_irqrestore(&n->list_lock, flags);
				2926	stat(s, FREE_SLAB);
				2927	discard_slab(s, page);
				2928	}
				2929
				2930	/*
				2931	* Fastpath with forced inlining to produce a kfree and kmem_cache_free that
				2932	* can perform fastpath freeing without additional function calls.
				2933	*
				2934	* The fastpath is only possible if we are freeing to the current cpu slab
				2935	* of this processor. This typically the case if we have just allocated
				2936	* the item before.
				2937	*
				2938	* If fastpath is not possible then fall back to __slab_free where we deal
				2939	* with all sorts of special processing.
				2940	*
				2941	* Bulk free of a freelist with several objects (all pointing to the
				2942	* same page) possible by specifying head and tail ptr, plus objects
				2943	* count (cnt). Bulk free indicated by tail pointer being set.
				2944	*/
				2945	static __always_inline void do_slab_free(struct kmem_cache *s,
				2946	struct page page, void head, void *tail,
				2947	int cnt, unsigned long addr)
				2948	{
				2949	void *tail_obj = tail ? : head;
				2950	struct kmem_cache_cpu *c;
				2951	unsigned long tid;
				2952	redo:
				2953	/*
				2954	* Determine the currently cpus per cpu slab.
				2955	* The cpu may change afterward. However that does not matter since
				2956	* data is retrieved via this pointer. If we are on the same cpu
				2957	* during the cmpxchg then the free will succeed.
				2958	*/
				2959	do {
				2960	tid = this_cpu_read(s->cpu_slab->tid);
				2961	c = raw_cpu_ptr(s->cpu_slab);
				2962	} while (IS_ENABLED(CONFIG_PREEMPT) &&
				2963	unlikely(tid != READ_ONCE(c->tid)));
				2964
				2965	/* Same with comment on barrier() in slab_alloc_node() */
				2966	barrier();
				2967
				2968	if (likely(page == c->page)) {
				2969	set_freepointer(s, tail_obj, c->freelist);
				2970
				2971	if (unlikely(!this_cpu_cmpxchg_double(
				2972	s->cpu_slab->freelist, s->cpu_slab->tid,
				2973	c->freelist, tid,
				2974	head, next_tid(tid)))) {
				2975
				2976	note_cmpxchg_failure("slab_free", s, tid);
				2977	goto redo;
				2978	}
				2979	stat(s, FREE_FASTPATH);
				2980	} else
				2981	__slab_free(s, page, head, tail_obj, cnt, addr);
				2982
				2983	}
				2984
				2985	static __always_inline void slab_free(struct kmem_cache s, struct page page,
				2986	void head, void tail, int cnt,
				2987	unsigned long addr)
				2988	{
				2989	/*
				2990	* With KASAN enabled slab_free_freelist_hook modifies the freelist
				2991	* to remove objects, whose reuse must be delayed.
				2992	*/
				2993	if (slab_free_freelist_hook(s, &head, &tail))
				2994	do_slab_free(s, page, head, tail, cnt, addr);
				2995	}
				2996
				2997	#ifdef CONFIG_KASAN_GENERIC
				2998	void ___cache_free(struct kmem_cache cache, void x, unsigned long addr)
				2999	{
				3000	do_slab_free(cache, virt_to_head_page(x), x, NULL, 1, addr);
				3001	}
				3002	#endif
				3003
				3004	void kmem_cache_free(struct kmem_cache s, void x)
				3005	{
				3006	s = cache_from_obj(s, x);
				3007	if (!s)
				3008	return;
				3009	slab_free(s, virt_to_head_page(x), x, NULL, 1, _RET_IP_);
				3010	trace_kmem_cache_free(_RET_IP_, x);
				3011	}
				3012	EXPORT_SYMBOL(kmem_cache_free);
				3013
				3014	struct detached_freelist {
				3015	struct page *page;
				3016	void *tail;
				3017	void *freelist;
				3018	int cnt;
				3019	struct kmem_cache *s;
				3020	};
				3021
				3022	/*
				3023	* This function progressively scans the array with free objects (with
				3024	* a limited look ahead) and extract objects belonging to the same
				3025	* page. It builds a detached freelist directly within the given
				3026	* page/objects. This can happen without any need for
				3027	* synchronization, because the objects are owned by running process.
				3028	* The freelist is build up as a single linked list in the objects.
				3029	* The idea is, that this detached freelist can then be bulk
				3030	* transferred to the real freelist(s), but only requiring a single
				3031	* synchronization primitive. Look ahead in the array is limited due
				3032	* to performance reasons.
				3033	*/
				3034	static inline
				3035	int build_detached_freelist(struct kmem_cache *s, size_t size,
				3036	void *p, struct detached_freelist df)
				3037	{
				3038	size_t first_skipped_index = 0;
				3039	int lookahead = 3;
				3040	void *object;
				3041	struct page *page;
				3042
				3043	/* Always re-init detached_freelist */
				3044	df->page = NULL;
				3045
				3046	do {
				3047	object = p[--size];
				3048	/* Do we need !ZERO_OR_NULL_PTR(object) here? (for kfree) */
				3049	} while (!object && size);
				3050
				3051	if (!object)
				3052	return 0;
				3053
				3054	page = virt_to_head_page(object);
				3055	if (!s) {
				3056	/* Handle kalloc'ed objects */
				3057	if (unlikely(!PageSlab(page))) {
				3058	BUG_ON(!PageCompound(page));
				3059	kfree_hook(object);
				3060	__free_pages(page, compound_order(page));
				3061	p[size] = NULL; /* mark object processed */
				3062	return size;
				3063	}
				3064	/* Derive kmem_cache from object */
				3065	df->s = page->slab_cache;
				3066	} else {
				3067	df->s = cache_from_obj(s, object); /* Support for memcg */
				3068	}
				3069
				3070	/* Start new detached freelist */
				3071	df->page = page;
				3072	set_freepointer(df->s, object, NULL);
				3073	df->tail = object;
				3074	df->freelist = object;
				3075	p[size] = NULL; /* mark object processed */
				3076	df->cnt = 1;
				3077
				3078	while (size) {
				3079	object = p[--size];
				3080	if (!object)
				3081	continue; /* Skip processed objects */
				3082
				3083	/* df->page is always set at this point */
				3084	if (df->page == virt_to_head_page(object)) {
				3085	/* Opportunity build freelist */
				3086	set_freepointer(df->s, object, df->freelist);
				3087	df->freelist = object;
				3088	df->cnt++;
				3089	p[size] = NULL; /* mark object processed */
				3090
				3091	continue;
				3092	}
				3093
				3094	/* Limit look ahead search */
				3095	if (!--lookahead)
				3096	break;
				3097
				3098	if (!first_skipped_index)
				3099	first_skipped_index = size + 1;
				3100	}
				3101
				3102	return first_skipped_index;
				3103	}
				3104
				3105	/* Note that interrupts must be enabled when calling this function. */
				3106	void kmem_cache_free_bulk(struct kmem_cache s, size_t size, void *p)
				3107	{
				3108	if (WARN_ON(!size))
				3109	return;
				3110
				3111	do {
				3112	struct detached_freelist df;
				3113
				3114	size = build_detached_freelist(s, size, p, &df);
				3115	if (!df.page)
				3116	continue;
				3117
				3118	slab_free(df.s, df.page, df.freelist, df.tail, df.cnt,_RET_IP_);
				3119	} while (likely(size));
				3120	}
				3121	EXPORT_SYMBOL(kmem_cache_free_bulk);
				3122
				3123	/* Note that interrupts must be enabled when calling this function. */
				3124	int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
				3125	void **p)
				3126	{
				3127	struct kmem_cache_cpu *c;
				3128	int i;
				3129
				3130	/* memcg and kmem_cache debug support */
				3131	s = slab_pre_alloc_hook(s, flags);
				3132	if (unlikely(!s))
				3133	return false;
				3134	/*
				3135	* Drain objects in the per cpu slab, while disabling local
				3136	* IRQs, which protects against PREEMPT and interrupts
				3137	* handlers invoking normal fastpath.
				3138	*/
				3139	local_irq_disable();
				3140	c = this_cpu_ptr(s->cpu_slab);
				3141
				3142	for (i = 0; i < size; i++) {
				3143	void *object = c->freelist;
				3144
				3145	if (unlikely(!object)) {
				3146	/*
				3147	* Invoking slow path likely have side-effect
				3148	* of re-populating per CPU c->freelist
				3149	*/
				3150	p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE,
				3151	_RET_IP_, c);
				3152	if (unlikely(!p[i]))
				3153	goto error;
				3154
				3155	c = this_cpu_ptr(s->cpu_slab);
				3156	continue; /* goto for-loop */
				3157	}
				3158	c->freelist = get_freepointer(s, object);
				3159	p[i] = object;
				3160	}
				3161	c->tid = next_tid(c->tid);
				3162	local_irq_enable();
				3163
				3164	/* Clear memory outside IRQ disabled fastpath loop */
				3165	if (unlikely(slab_want_init_on_alloc(flags, s))) {
				3166	int j;
				3167
				3168	for (j = 0; j < i; j++)
				3169	memset(p[j], 0, s->object_size);
				3170	}
				3171
				3172	/* memcg and kmem_cache debug support */
				3173	slab_post_alloc_hook(s, flags, size, p);
				3174	return i;
				3175	error:
				3176	local_irq_enable();
				3177	slab_post_alloc_hook(s, flags, i, p);
				3178	__kmem_cache_free_bulk(s, i, p);
				3179	return 0;
				3180	}
				3181	EXPORT_SYMBOL(kmem_cache_alloc_bulk);
				3182
				3183
				3184	/*
				3185	* Object placement in a slab is made very easy because we always start at
				3186	* offset 0. If we tune the size of the object to the alignment then we can
				3187	* get the required alignment by putting one properly sized object after
				3188	* another.
				3189	*
				3190	* Notice that the allocation order determines the sizes of the per cpu
				3191	* caches. Each processor has always one slab available for allocations.
				3192	* Increasing the allocation order reduces the number of times that slabs
				3193	* must be moved on and off the partial lists and is therefore a factor in
				3194	* locking overhead.
				3195	*/
				3196
				3197	/*
				3198	* Mininum / Maximum order of slab pages. This influences locking overhead
				3199	* and slab fragmentation. A higher order reduces the number of partial slabs
				3200	* and increases the number of allocations possible without having to
				3201	* take the list_lock.
				3202	*/
				3203	static unsigned int slub_min_order;
				3204	static unsigned int slub_max_order = PAGE_ALLOC_COSTLY_ORDER;
				3205	static unsigned int slub_min_objects;
				3206
				3207	/*
				3208	* Calculate the order of allocation given an slab object size.
				3209	*
				3210	* The order of allocation has significant impact on performance and other
				3211	* system components. Generally order 0 allocations should be preferred since
				3212	* order 0 does not cause fragmentation in the page allocator. Larger objects
				3213	* be problematic to put into order 0 slabs because there may be too much
				3214	* unused space left. We go to a higher order if more than 1/16th of the slab
				3215	* would be wasted.
				3216	*
				3217	* In order to reach satisfactory performance we must ensure that a minimum
				3218	* number of objects is in one slab. Otherwise we may generate too much
				3219	* activity on the partial lists which requires taking the list_lock. This is
				3220	* less a concern for large slabs though which are rarely used.
				3221	*
				3222	* slub_max_order specifies the order where we begin to stop considering the
				3223	* number of objects in a slab as critical. If we reach slub_max_order then
				3224	* we try to keep the page order as low as possible. So we accept more waste
				3225	* of space in favor of a small page order.
				3226	*
				3227	* Higher order allocations also allow the placement of more objects in a
				3228	* slab and thereby reduce object handling overhead. If the user has
				3229	* requested a higher mininum order then we start with that one instead of
				3230	* the smallest order which will fit the object.
				3231	*/
				3232	static inline unsigned int slab_order(unsigned int size,
				3233	unsigned int min_objects, unsigned int max_order,
				3234	unsigned int fract_leftover)
				3235	{
				3236	unsigned int min_order = slub_min_order;
				3237	unsigned int order;
				3238
				3239	if (order_objects(min_order, size) > MAX_OBJS_PER_PAGE)
				3240	return get_order(size * MAX_OBJS_PER_PAGE) - 1;
				3241
				3242	for (order = max(min_order, (unsigned int)get_order(min_objects * size));
				3243	order <= max_order; order++) {
				3244
				3245	unsigned int slab_size = (unsigned int)PAGE_SIZE << order;
				3246	unsigned int rem;
				3247
				3248	rem = slab_size % size;
				3249
				3250	if (rem <= slab_size / fract_leftover)
				3251	break;
				3252	}
				3253
				3254	return order;
				3255	}
				3256
				3257	static inline int calculate_order(unsigned int size)
				3258	{
				3259	unsigned int order;
				3260	unsigned int min_objects;
				3261	unsigned int max_objects;
				3262
				3263	/*
				3264	* Attempt to find best configuration for a slab. This
				3265	* works by first attempting to generate a layout with
				3266	* the best configuration and backing off gradually.
				3267	*
				3268	* First we increase the acceptable waste in a slab. Then
				3269	* we reduce the minimum objects required in a slab.
				3270	*/
				3271	min_objects = slub_min_objects;
				3272	if (!min_objects)
				3273	min_objects = 4 * (fls(nr_cpu_ids) + 1);
				3274	max_objects = order_objects(slub_max_order, size);
				3275	min_objects = min(min_objects, max_objects);
				3276
				3277	while (min_objects > 1) {
				3278	unsigned int fraction;
				3279
				3280	fraction = 16;
				3281	while (fraction >= 4) {
				3282	order = slab_order(size, min_objects,
				3283	slub_max_order, fraction);
				3284	if (order <= slub_max_order)
				3285	return order;
				3286	fraction /= 2;
				3287	}
				3288	min_objects--;
				3289	}
				3290
				3291	/*
				3292	* We were unable to place multiple objects in a slab. Now
				3293	* lets see if we can place a single object there.
				3294	*/
				3295	order = slab_order(size, 1, slub_max_order, 1);
				3296	if (order <= slub_max_order)
				3297	return order;
				3298
				3299	/*
				3300	* Doh this slab cannot be placed using slub_max_order.
				3301	*/
				3302	order = slab_order(size, 1, MAX_ORDER, 1);
				3303	if (order < MAX_ORDER)
				3304	return order;
				3305	return -ENOSYS;
				3306	}
				3307
				3308	static void
				3309	init_kmem_cache_node(struct kmem_cache_node *n)
				3310	{
				3311	n->nr_partial = 0;
				3312	spin_lock_init(&n->list_lock);
				3313	INIT_LIST_HEAD(&n->partial);
				3314	#ifdef CONFIG_SLUB_DEBUG
				3315	atomic_long_set(&n->nr_slabs, 0);
				3316	atomic_long_set(&n->total_objects, 0);
				3317	INIT_LIST_HEAD(&n->full);
				3318	#endif
				3319	}
				3320
				3321	#ifdef CONFIG_MTK_MM_DEBUG
				3322	struct kmem_cache debug_kmem_cache = {
				3323	.name = "debug_kmem_cache",
				3324	};
				3325	#endif
				3326	static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
				3327	{
				3328	BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE <
				3329	KMALLOC_SHIFT_HIGH * sizeof(struct kmem_cache_cpu));
				3330
				3331	/*
				3332	* Must align to double word boundary for the double cmpxchg
				3333	* instructions to work; see __pcpu_double_call_return_bool().
				3334	*/
				3335	#ifdef CONFIG_MTK_MM_DEBUG
				3336	if (!strcmp(s->name, "kmalloc-256")) {
				3337	debug_kmem_cache.cpu_slab = __alloc_percpu(
				3338	sizeof(struct kmem_cache_cpu),
				3339	2 * sizeof(void *));
				3340	init_kmem_cache_cpus(&debug_kmem_cache);
				3341	}
				3342	#endif
				3343	s->cpu_slab = __alloc_percpu(sizeof(struct kmem_cache_cpu),
				3344	2 * sizeof(void *));
				3345
				3346	if (!s->cpu_slab)
				3347	return 0;
				3348
				3349	init_kmem_cache_cpus(s);
				3350
				3351	return 1;
				3352	}
				3353
				3354	static struct kmem_cache *kmem_cache_node;
				3355
				3356	/*
				3357	* No kmalloc_node yet so do it by hand. We know that this is the first
				3358	* slab on the node for this slabcache. There are no concurrent accesses
				3359	* possible.
				3360	*
				3361	* Note that this function only works on the kmem_cache_node
				3362	* when allocating for the kmem_cache_node. This is used for bootstrapping
				3363	* memory on a fresh node that has no slab structures yet.
				3364	*/
				3365	static void early_kmem_cache_node_alloc(int node)
				3366	{
				3367	struct page *page;
				3368	struct kmem_cache_node *n;
				3369
				3370	BUG_ON(kmem_cache_node->size < sizeof(struct kmem_cache_node));
				3371
				3372	page = new_slab(kmem_cache_node, GFP_NOWAIT, node);
				3373
				3374	BUG_ON(!page);
				3375	if (page_to_nid(page) != node) {
				3376	pr_err("SLUB: Unable to allocate memory from node %d\n", node);
				3377	pr_err("SLUB: Allocating a useless per node structure in order to be able to continue\n");
				3378	}
				3379
				3380	n = page->freelist;
				3381	BUG_ON(!n);
				3382	#ifdef CONFIG_SLUB_DEBUG
				3383	init_object(kmem_cache_node, n, SLUB_RED_ACTIVE);
				3384	init_tracking(kmem_cache_node, n);
				3385	#endif
				3386	n = kasan_kmalloc(kmem_cache_node, n, sizeof(struct kmem_cache_node),
				3387	GFP_KERNEL);
				3388	page->freelist = get_freepointer(kmem_cache_node, n);
				3389	page->inuse = 1;
				3390	page->frozen = 0;
				3391	kmem_cache_node->node[node] = n;
				3392	init_kmem_cache_node(n);
				3393	inc_slabs_node(kmem_cache_node, node, page->objects);
				3394
				3395	/*
				3396	* No locks need to be taken here as it has just been
				3397	* initialized and there is no concurrent access.
				3398	*/
				3399	__add_partial(n, page, DEACTIVATE_TO_HEAD);
				3400	}
				3401
				3402	static void free_kmem_cache_nodes(struct kmem_cache *s)
				3403	{
				3404	int node;
				3405	struct kmem_cache_node *n;
				3406
				3407	for_each_kmem_cache_node(s, node, n) {
				3408	s->node[node] = NULL;
				3409	kmem_cache_free(kmem_cache_node, n);
				3410	}
				3411	}
				3412
				3413	void __kmem_cache_release(struct kmem_cache *s)
				3414	{
				3415	cache_random_seq_destroy(s);
				3416	free_percpu(s->cpu_slab);
				3417	free_kmem_cache_nodes(s);
				3418	}
				3419
				3420	static int init_kmem_cache_nodes(struct kmem_cache *s)
				3421	{
				3422	int node;
				3423
				3424	for_each_node_state(node, N_NORMAL_MEMORY) {
				3425	struct kmem_cache_node *n;
				3426
				3427	if (slab_state == DOWN) {
				3428	early_kmem_cache_node_alloc(node);
				3429	continue;
				3430	}
				3431	n = kmem_cache_alloc_node(kmem_cache_node,
				3432	GFP_KERNEL, node);
				3433
				3434	if (!n) {
				3435	free_kmem_cache_nodes(s);
				3436	return 0;
				3437	}
				3438
				3439	init_kmem_cache_node(n);
				3440	s->node[node] = n;
				3441	}
				3442	return 1;
				3443	}
				3444
				3445	static void set_min_partial(struct kmem_cache *s, unsigned long min)
				3446	{
				3447	if (min < MIN_PARTIAL)
				3448	min = MIN_PARTIAL;
				3449	else if (min > MAX_PARTIAL)
				3450	min = MAX_PARTIAL;
				3451	s->min_partial = min;
				3452	}
				3453
				3454	static void set_cpu_partial(struct kmem_cache *s)
				3455	{
				3456	#ifdef CONFIG_SLUB_CPU_PARTIAL
				3457	/*
				3458	* cpu_partial determined the maximum number of objects kept in the
				3459	* per cpu partial lists of a processor.
				3460	*
				3461	* Per cpu partial lists mainly contain slabs that just have one
				3462	* object freed. If they are used for allocation then they can be
				3463	* filled up again with minimal effort. The slab will never hit the
				3464	* per node partial lists and therefore no locking will be required.
				3465	*
				3466	* This setting also determines
				3467	*
				3468	* A) The number of objects from per cpu partial slabs dumped to the
				3469	* per node list when we reach the limit.
				3470	* B) The number of objects in cpu partial slabs to extract from the
				3471	* per node list when we run out of per cpu objects. We only fetch
				3472	* 50% to keep some capacity around for frees.
				3473	*/
				3474	if (!kmem_cache_has_cpu_partial(s))
				3475	s->cpu_partial = 0;
				3476	else if (s->size >= PAGE_SIZE)
				3477	s->cpu_partial = 2;
				3478	else if (s->size >= 1024)
				3479	s->cpu_partial = 6;
				3480	else if (s->size >= 256)
				3481	s->cpu_partial = 13;
				3482	else
				3483	s->cpu_partial = 30;
				3484	#endif
				3485	}
				3486
				3487	/*
				3488	* calculate_sizes() determines the order and the distribution of data within
				3489	* a slab object.
				3490	*/
				3491	static int calculate_sizes(struct kmem_cache *s, int forced_order)
				3492	{
				3493	slab_flags_t flags = s->flags;
				3494	unsigned int size = s->object_size;
				3495	unsigned int order;
				3496
				3497	/*
				3498	* Round up object size to the next word boundary. We can only
				3499	* place the free pointer at word boundaries and this determines
				3500	* the possible location of the free pointer.
				3501	*/
				3502	size = ALIGN(size, sizeof(void *));
				3503
				3504	#ifdef CONFIG_SLUB_DEBUG
				3505	/*
				3506	* Determine if we can poison the object itself. If the user of
				3507	* the slab may touch the object after free or before allocation
				3508	* then we should never poison the object itself.
				3509	*/
				3510	if ((flags & SLAB_POISON) && !(flags & SLAB_TYPESAFE_BY_RCU) &&
				3511	!s->ctor)
				3512	s->flags \|= __OBJECT_POISON;
				3513	else
				3514	s->flags &= ~__OBJECT_POISON;
				3515
				3516
				3517	/*
				3518	* If we are Redzoning then check if there is some space between the
				3519	* end of the object and the free pointer. If not then add an
				3520	* additional word to have some bytes to store Redzone information.
				3521	*/
				3522	if ((flags & SLAB_RED_ZONE) && size == s->object_size)
				3523	size += sizeof(void *);
				3524	#endif
				3525
				3526	/*
				3527	* With that we have determined the number of bytes in actual use
				3528	* by the object. This is the potential offset to the free pointer.
				3529	*/
				3530	s->inuse = size;
				3531
				3532	if (((flags & (SLAB_TYPESAFE_BY_RCU \| SLAB_POISON)) \|\|
				3533	s->ctor)) {
				3534	/*
				3535	* Relocate free pointer after the object if it is not
				3536	* permitted to overwrite the first word of the object on
				3537	* kmem_cache_free.
				3538	*
				3539	* This is the case if we do RCU, have a constructor or
				3540	* destructor or are poisoning the objects.
				3541	*/
				3542	s->offset = size;
				3543	size += sizeof(void *);
				3544	}
				3545
				3546	#ifdef CONFIG_SLUB_DEBUG
				3547	if (flags & SLAB_STORE_USER)
				3548	/*
				3549	* Need to store information about allocs and frees after
				3550	* the object.
				3551	*/
				3552	size += 2 * sizeof(struct track);
				3553	#endif
				3554
				3555	kasan_cache_create(s, &size, &s->flags);
				3556	#ifdef CONFIG_SLUB_DEBUG
				3557	if (flags & SLAB_RED_ZONE) {
				3558	/*
				3559	* Add some empty padding so that we can catch
				3560	* overwrites from earlier objects rather than let
				3561	* tracking information or the free pointer be
				3562	* corrupted if a user writes before the start
				3563	* of the object.
				3564	*/
				3565	size += sizeof(void *);
				3566
				3567	s->red_left_pad = sizeof(void *);
				3568	s->red_left_pad = ALIGN(s->red_left_pad, s->align);
				3569	size += s->red_left_pad;
				3570	}
				3571	#endif
				3572
				3573	/*
				3574	* SLUB stores one object immediately after another beginning from
				3575	* offset 0. In order to align the objects we have to simply size
				3576	* each object to conform to the alignment.
				3577	*/
				3578	size = ALIGN(size, s->align);
				3579	s->size = size;
				3580	if (forced_order >= 0)
				3581	order = forced_order;
				3582	else
				3583	order = calculate_order(size);
				3584
				3585	if ((int)order < 0)
				3586	return 0;
				3587
				3588	s->allocflags = 0;
				3589	if (order)
				3590	s->allocflags \|= __GFP_COMP;
				3591
				3592	if (s->flags & SLAB_CACHE_DMA)
				3593	s->allocflags \|= GFP_DMA;
				3594
				3595	if (s->flags & SLAB_CACHE_DMA32)
				3596	s->allocflags \|= GFP_DMA32;
				3597
				3598	if (s->flags & SLAB_RECLAIM_ACCOUNT)
				3599	s->allocflags \|= __GFP_RECLAIMABLE;
				3600
				3601	/*
				3602	* Determine the number of objects per slab
				3603	*/
				3604	s->oo = oo_make(order, size);
				3605	s->min = oo_make(get_order(size), size);
				3606	if (oo_objects(s->oo) > oo_objects(s->max))
				3607	s->max = s->oo;
				3608
				3609	return !!oo_objects(s->oo);
				3610	}
				3611
				3612	static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags)
				3613	{
				3614	s->flags = kmem_cache_flags(s->size, flags, s->name, s->ctor);
				3615	#ifdef CONFIG_SLAB_FREELIST_HARDENED
				3616	s->random = get_random_long();
				3617	#endif
				3618
				3619	if (!calculate_sizes(s, -1))
				3620	goto error;
				3621	if (disable_higher_order_debug) {
				3622	/*
				3623	* Disable debugging flags that store metadata if the min slab
				3624	* order increased.
				3625	*/
				3626	if (get_order(s->size) > get_order(s->object_size)) {
				3627	s->flags &= ~DEBUG_METADATA_FLAGS;
				3628	s->offset = 0;
				3629	if (!calculate_sizes(s, -1))
				3630	goto error;
				3631	}
				3632	}
				3633
				3634	#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
				3635	defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
				3636	if (system_has_cmpxchg_double() && (s->flags & SLAB_NO_CMPXCHG) == 0)
				3637	/* Enable fast mode */
				3638	s->flags \|= __CMPXCHG_DOUBLE;
				3639	#endif
				3640
				3641	/*
				3642	* The larger the object size is, the more pages we want on the partial
				3643	* list to avoid pounding the page allocator excessively.
				3644	*/
				3645	set_min_partial(s, ilog2(s->size) / 2);
				3646
				3647	set_cpu_partial(s);
				3648
				3649	#ifdef CONFIG_NUMA
				3650	s->remote_node_defrag_ratio = 1000;
				3651	#endif
				3652
				3653	/* Initialize the pre-computed randomized freelist if slab is up */
				3654	if (slab_state >= UP) {
				3655	if (init_cache_random_seq(s))
				3656	goto error;
				3657	}
				3658
				3659	if (!init_kmem_cache_nodes(s))
				3660	goto error;
				3661
				3662	if (alloc_kmem_cache_cpus(s))
				3663	return 0;
				3664
				3665	free_kmem_cache_nodes(s);
				3666	error:
				3667	if (flags & SLAB_PANIC)
				3668	panic("Cannot create slab %s size=%u realsize=%u order=%u offset=%u flags=%lx\n",
				3669	s->name, s->size, s->size,
				3670	oo_order(s->oo), s->offset, (unsigned long)flags);
				3671	return -EINVAL;
				3672	}
				3673
				3674	static void list_slab_objects(struct kmem_cache s, struct page page,
				3675	const char *text)
				3676	{
				3677	#ifdef CONFIG_SLUB_DEBUG
				3678	void *addr = page_address(page);
				3679	void *p;
				3680	unsigned long *map = kcalloc(BITS_TO_LONGS(page->objects),
				3681	sizeof(long),
				3682	GFP_ATOMIC);
				3683	if (!map)
				3684	return;
				3685	slab_err(s, page, text, s->name);
				3686	slab_lock(page);
				3687
				3688	get_map(s, page, map);
				3689	for_each_object(p, s, addr, page->objects) {
				3690
				3691	if (!test_bit(slab_index(p, s, addr), map)) {
				3692	pr_err("INFO: Object 0x%p @offset=%tu\n", p, p - addr);
				3693	print_tracking(s, p);
				3694	}
				3695	}
				3696	slab_unlock(page);
				3697	kfree(map);
				3698	#endif
				3699	}
				3700
				3701	/*
				3702	* Attempt to free all partial slabs on a node.
				3703	* This is called from __kmem_cache_shutdown(). We must take list_lock
				3704	* because sysfs file might still access partial list after the shutdowning.
				3705	*/
				3706	static void free_partial(struct kmem_cache s, struct kmem_cache_node n)
				3707	{
				3708	LIST_HEAD(discard);
				3709	struct page page, h;
				3710
				3711	BUG_ON(irqs_disabled());
				3712	spin_lock_irq(&n->list_lock);
				3713	list_for_each_entry_safe(page, h, &n->partial, lru) {
				3714	if (!page->inuse) {
				3715	remove_partial(n, page);
				3716	list_add(&page->lru, &discard);
				3717	} else {
				3718	list_slab_objects(s, page,
				3719	"Objects remaining in %s on __kmem_cache_shutdown()");
				3720	}
				3721	}
				3722	spin_unlock_irq(&n->list_lock);
				3723
				3724	list_for_each_entry_safe(page, h, &discard, lru)
				3725	discard_slab(s, page);
				3726	}
				3727
				3728	bool __kmem_cache_empty(struct kmem_cache *s)
				3729	{
				3730	int node;
				3731	struct kmem_cache_node *n;
				3732
				3733	for_each_kmem_cache_node(s, node, n)
				3734	if (n->nr_partial \|\| slabs_node(s, node))
				3735	return false;
				3736	return true;
				3737	}
				3738
				3739	/*
				3740	* Release all resources used by a slab cache.
				3741	*/
				3742	int __kmem_cache_shutdown(struct kmem_cache *s)
				3743	{
				3744	int node;
				3745	struct kmem_cache_node *n;
				3746
				3747	flush_all(s);
				3748	/* Attempt to free all objects */
				3749	for_each_kmem_cache_node(s, node, n) {
				3750	free_partial(s, n);
				3751	if (n->nr_partial \|\| slabs_node(s, node))
				3752	return 1;
				3753	}
				3754	sysfs_slab_remove(s);
				3755	return 0;
				3756	}
				3757
				3758	/********************************************************************
				3759	* Kmalloc subsystem
				3760	*******************************************************************/
				3761
				3762	static int __init setup_slub_min_order(char *str)
				3763	{
				3764	get_option(&str, (int *)&slub_min_order);
				3765
				3766	return 1;
				3767	}
				3768
				3769	__setup("slub_min_order=", setup_slub_min_order);
				3770
				3771	static int __init setup_slub_max_order(char *str)
				3772	{
				3773	get_option(&str, (int *)&slub_max_order);
				3774	slub_max_order = min(slub_max_order, (unsigned int)MAX_ORDER - 1);
				3775
				3776	return 1;
				3777	}
				3778
				3779	__setup("slub_max_order=", setup_slub_max_order);
				3780
				3781	static int __init setup_slub_min_objects(char *str)
				3782	{
				3783	get_option(&str, (int *)&slub_min_objects);
				3784
				3785	return 1;
				3786	}
				3787
				3788	__setup("slub_min_objects=", setup_slub_min_objects);
				3789
				3790	void *__kmalloc(size_t size, gfp_t flags)
				3791	{
				3792	struct kmem_cache *s;
				3793	void *ret;
				3794
				3795	if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
				3796	return kmalloc_large(size, flags);
				3797
				3798	s = kmalloc_slab(size, flags);
				3799
				3800	if (unlikely(ZERO_OR_NULL_PTR(s)))
				3801	return s;
				3802
				3803	ret = slab_alloc(s, flags, _RET_IP_);
				3804
				3805	trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
				3806
				3807	ret = kasan_kmalloc(s, ret, size, flags);
				3808
				3809	return ret;
				3810	}
				3811	EXPORT_SYMBOL(__kmalloc);
				3812
				3813	#ifdef CONFIG_NUMA
				3814	static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
				3815	{
				3816	struct page *page;
				3817	void *ptr = NULL;
				3818
				3819	flags \|= __GFP_COMP;
				3820	page = alloc_pages_node(node, flags, get_order(size));
				3821	if (page)
				3822	ptr = page_address(page);
				3823
				3824	return kmalloc_large_node_hook(ptr, size, flags);
				3825	}
				3826
				3827	void *__kmalloc_node(size_t size, gfp_t flags, int node)
				3828	{
				3829	struct kmem_cache *s;
				3830	void *ret;
				3831
				3832	if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
				3833	ret = kmalloc_large_node(size, flags, node);
				3834
				3835	trace_kmalloc_node(_RET_IP_, ret,
				3836	size, PAGE_SIZE << get_order(size),
				3837	flags, node);
				3838
				3839	return ret;
				3840	}
				3841
				3842	s = kmalloc_slab(size, flags);
				3843
				3844	if (unlikely(ZERO_OR_NULL_PTR(s)))
				3845	return s;
				3846
				3847	ret = slab_alloc_node(s, flags, node, _RET_IP_);
				3848
				3849	trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
				3850
				3851	ret = kasan_kmalloc(s, ret, size, flags);
				3852
				3853	return ret;
				3854	}
				3855	EXPORT_SYMBOL(__kmalloc_node);
				3856	#endif
				3857
				3858	#ifdef CONFIG_HARDENED_USERCOPY
				3859	/*
				3860	* Rejects incorrectly sized objects and objects that are to be copied
				3861	* to/from userspace but do not fall entirely within the containing slab
				3862	* cache's usercopy region.
				3863	*
				3864	* Returns NULL if check passes, otherwise const char * to name of cache
				3865	* to indicate an error.
				3866	*/
				3867	void __check_heap_object(const void ptr, unsigned long n, struct page page,
				3868	bool to_user)
				3869	{
				3870	struct kmem_cache *s;
				3871	unsigned int offset;
				3872	size_t object_size;
				3873
				3874	ptr = kasan_reset_tag(ptr);
				3875
				3876	/* Find object and usable object size. */
				3877	s = page->slab_cache;
				3878
				3879	/* Reject impossible pointers. */
				3880	if (ptr < page_address(page))
				3881	usercopy_abort("SLUB object not in SLUB page?!", NULL,
				3882	to_user, 0, n);
				3883
				3884	/* Find offset within object. */
				3885	offset = (ptr - page_address(page)) % s->size;
				3886
				3887	/* Adjust for redzone and reject if within the redzone. */
				3888	if (kmem_cache_debug(s) && s->flags & SLAB_RED_ZONE) {
				3889	if (offset < s->red_left_pad)
				3890	usercopy_abort("SLUB object in left red zone",
				3891	s->name, to_user, offset, n);
				3892	offset -= s->red_left_pad;
				3893	}
				3894
				3895	/* Allow address range falling entirely within usercopy region. */
				3896	if (offset >= s->useroffset &&
				3897	offset - s->useroffset <= s->usersize &&
				3898	n <= s->useroffset - offset + s->usersize)
				3899	return;
				3900
				3901	/*
				3902	* If the copy is still within the allocated object, produce
				3903	* a warning instead of rejecting the copy. This is intended
				3904	* to be a temporary method to find any missing usercopy
				3905	* whitelists.
				3906	*/
				3907	object_size = slab_ksize(s);
				3908	if (usercopy_fallback &&
				3909	offset <= object_size && n <= object_size - offset) {
				3910	usercopy_warn("SLUB object", s->name, to_user, offset, n);
				3911	return;
				3912	}
				3913
				3914	usercopy_abort("SLUB object", s->name, to_user, offset, n);
				3915	}
				3916	#endif /* CONFIG_HARDENED_USERCOPY */
				3917
				3918	static size_t __ksize(const void *object)
				3919	{
				3920	struct page *page;
				3921
				3922	if (unlikely(object == ZERO_SIZE_PTR))
				3923	return 0;
				3924
				3925	page = virt_to_head_page(object);
				3926
				3927	if (unlikely(!PageSlab(page))) {
				3928	WARN_ON(!PageCompound(page));
				3929	return PAGE_SIZE << compound_order(page);
				3930	}
				3931
				3932	return slab_ksize(page->slab_cache);
				3933	}
				3934
				3935	size_t ksize(const void *object)
				3936	{
				3937	size_t size = __ksize(object);
				3938	/* We assume that ksize callers could use whole allocated area,
				3939	* so we need to unpoison this area.
				3940	*/
				3941	kasan_unpoison_shadow(object, size);
				3942	return size;
				3943	}
				3944	EXPORT_SYMBOL(ksize);
				3945
				3946	void kfree(const void *x)
				3947	{
				3948	struct page *page;
				3949	void object = (void )x;
				3950
				3951	trace_kfree(_RET_IP_, x);
				3952
				3953	if (unlikely(ZERO_OR_NULL_PTR(x)))
				3954	return;
				3955
				3956	page = virt_to_head_page(x);
				3957	if (unlikely(!PageSlab(page))) {
				3958	BUG_ON(!PageCompound(page));
				3959	kfree_hook(object);
				3960	__free_pages(page, compound_order(page));
				3961	return;
				3962	}
				3963	slab_free(page->slab_cache, page, object, NULL, 1, _RET_IP_);
				3964	}
				3965	EXPORT_SYMBOL(kfree);
				3966
				3967	#define SHRINK_PROMOTE_MAX 32
				3968
				3969	/*
				3970	* kmem_cache_shrink discards empty slabs and promotes the slabs filled
				3971	* up most to the head of the partial lists. New allocations will then
				3972	* fill those up and thus they can be removed from the partial lists.
				3973	*
				3974	* The slabs with the least items are placed last. This results in them
				3975	* being allocated from last increasing the chance that the last objects
				3976	* are freed in them.
				3977	*/
				3978	int __kmem_cache_shrink(struct kmem_cache *s)
				3979	{
				3980	int node;
				3981	int i;
				3982	struct kmem_cache_node *n;
				3983	struct page *page;
				3984	struct page *t;
				3985	struct list_head discard;
				3986	struct list_head promote[SHRINK_PROMOTE_MAX];
				3987	unsigned long flags;
				3988	int ret = 0;
				3989
				3990	flush_all(s);
				3991	for_each_kmem_cache_node(s, node, n) {
				3992	INIT_LIST_HEAD(&discard);
				3993	for (i = 0; i < SHRINK_PROMOTE_MAX; i++)
				3994	INIT_LIST_HEAD(promote + i);
				3995
				3996	spin_lock_irqsave(&n->list_lock, flags);
				3997
				3998	/*
				3999	* Build lists of slabs to discard or promote.
				4000	*
				4001	* Note that concurrent frees may occur while we hold the
				4002	* list_lock. page->inuse here is the upper limit.
				4003	*/
				4004	list_for_each_entry_safe(page, t, &n->partial, lru) {
				4005	int free = page->objects - page->inuse;
				4006
				4007	/* Do not reread page->inuse */
				4008	barrier();
				4009
				4010	/* We do not keep full slabs on the list */
				4011	BUG_ON(free <= 0);
				4012
				4013	if (free == page->objects) {
				4014	list_move(&page->lru, &discard);
				4015	n->nr_partial--;
				4016	} else if (free <= SHRINK_PROMOTE_MAX)
				4017	list_move(&page->lru, promote + free - 1);
				4018	}
				4019
				4020	/*
				4021	* Promote the slabs filled up most to the head of the
				4022	* partial list.
				4023	*/
				4024	for (i = SHRINK_PROMOTE_MAX - 1; i >= 0; i--)
				4025	list_splice(promote + i, &n->partial);
				4026
				4027	spin_unlock_irqrestore(&n->list_lock, flags);
				4028
				4029	/* Release empty slabs */
				4030	list_for_each_entry_safe(page, t, &discard, lru)
				4031	discard_slab(s, page);
				4032
				4033	if (slabs_node(s, node))
				4034	ret = 1;
				4035	}
				4036
				4037	return ret;
				4038	}
				4039
				4040	#ifdef CONFIG_MEMCG
				4041	static void kmemcg_cache_deact_after_rcu(struct kmem_cache *s)
				4042	{
				4043	/*
				4044	* Called with all the locks held after a sched RCU grace period.
				4045	* Even if @s becomes empty after shrinking, we can't know that @s
				4046	* doesn't have allocations already in-flight and thus can't
				4047	* destroy @s until the associated memcg is released.
				4048	*
				4049	* However, let's remove the sysfs files for empty caches here.
				4050	* Each cache has a lot of interface files which aren't
				4051	* particularly useful for empty draining caches; otherwise, we can
				4052	* easily end up with millions of unnecessary sysfs files on
				4053	* systems which have a lot of memory and transient cgroups.
				4054	*/
				4055	if (!__kmem_cache_shrink(s))
				4056	sysfs_slab_remove(s);
				4057	}
				4058
				4059	void __kmemcg_cache_deactivate(struct kmem_cache *s)
				4060	{
				4061	/*
				4062	* Disable empty slabs caching. Used to avoid pinning offline
				4063	* memory cgroups by kmem pages that can be freed.
				4064	*/
				4065	slub_set_cpu_partial(s, 0);
				4066	s->min_partial = 0;
				4067
				4068	/*
				4069	* s->cpu_partial is checked locklessly (see put_cpu_partial), so
				4070	* we have to make sure the change is visible before shrinking.
				4071	*/
				4072	slab_deactivate_memcg_cache_rcu_sched(s, kmemcg_cache_deact_after_rcu);
				4073	}
				4074	#endif
				4075
				4076	static int slab_mem_going_offline_callback(void *arg)
				4077	{
				4078	struct kmem_cache *s;
				4079
				4080	mutex_lock(&slab_mutex);
				4081	list_for_each_entry(s, &slab_caches, list)
				4082	__kmem_cache_shrink(s);
				4083	mutex_unlock(&slab_mutex);
				4084
				4085	return 0;
				4086	}
				4087
				4088	static void slab_mem_offline_callback(void *arg)
				4089	{
				4090	struct kmem_cache_node *n;
				4091	struct kmem_cache *s;
				4092	struct memory_notify *marg = arg;
				4093	int offline_node;
				4094
				4095	offline_node = marg->status_change_nid_normal;
				4096
				4097	/*
				4098	* If the node still has available memory. we need kmem_cache_node
				4099	* for it yet.
				4100	*/
				4101	if (offline_node < 0)
				4102	return;
				4103
				4104	mutex_lock(&slab_mutex);
				4105	list_for_each_entry(s, &slab_caches, list) {
				4106	n = get_node(s, offline_node);
				4107	if (n) {
				4108	/*
				4109	* if n->nr_slabs > 0, slabs still exist on the node
				4110	* that is going down. We were unable to free them,
				4111	* and offline_pages() function shouldn't call this
				4112	* callback. So, we must fail.
				4113	*/
				4114	BUG_ON(slabs_node(s, offline_node));
				4115
				4116	s->node[offline_node] = NULL;
				4117	kmem_cache_free(kmem_cache_node, n);
				4118	}
				4119	}
				4120	mutex_unlock(&slab_mutex);
				4121	}
				4122
				4123	static int slab_mem_going_online_callback(void *arg)
				4124	{
				4125	struct kmem_cache_node *n;
				4126	struct kmem_cache *s;
				4127	struct memory_notify *marg = arg;
				4128	int nid = marg->status_change_nid_normal;
				4129	int ret = 0;
				4130
				4131	/*
				4132	* If the node's memory is already available, then kmem_cache_node is
				4133	* already created. Nothing to do.
				4134	*/
				4135	if (nid < 0)
				4136	return 0;
				4137
				4138	/*
				4139	* We are bringing a node online. No memory is available yet. We must
				4140	* allocate a kmem_cache_node structure in order to bring the node
				4141	* online.
				4142	*/
				4143	mutex_lock(&slab_mutex);
				4144	list_for_each_entry(s, &slab_caches, list) {
				4145	/*
				4146	* XXX: kmem_cache_alloc_node will fallback to other nodes
				4147	* since memory is not yet available from the node that
				4148	* is brought up.
				4149	*/
				4150	n = kmem_cache_alloc(kmem_cache_node, GFP_KERNEL);
				4151	if (!n) {
				4152	ret = -ENOMEM;
				4153	goto out;
				4154	}
				4155	init_kmem_cache_node(n);
				4156	s->node[nid] = n;
				4157	}
				4158	out:
				4159	mutex_unlock(&slab_mutex);
				4160	return ret;
				4161	}
				4162
				4163	static int slab_memory_callback(struct notifier_block *self,
				4164	unsigned long action, void *arg)
				4165	{
				4166	int ret = 0;
				4167
				4168	switch (action) {
				4169	case MEM_GOING_ONLINE:
				4170	ret = slab_mem_going_online_callback(arg);
				4171	break;
				4172	case MEM_GOING_OFFLINE:
				4173	ret = slab_mem_going_offline_callback(arg);
				4174	break;
				4175	case MEM_OFFLINE:
				4176	case MEM_CANCEL_ONLINE:
				4177	slab_mem_offline_callback(arg);
				4178	break;
				4179	case MEM_ONLINE:
				4180	case MEM_CANCEL_OFFLINE:
				4181	break;
				4182	}
				4183	if (ret)
				4184	ret = notifier_from_errno(ret);
				4185	else
				4186	ret = NOTIFY_OK;
				4187	return ret;
				4188	}
				4189
				4190	static struct notifier_block slab_memory_callback_nb = {
				4191	.notifier_call = slab_memory_callback,
				4192	.priority = SLAB_CALLBACK_PRI,
				4193	};
				4194
				4195	/********************************************************************
				4196	* Basic setup of slabs
				4197	*******************************************************************/
				4198
				4199	/*
				4200	* Used for early kmem_cache structures that were allocated using
				4201	* the page allocator. Allocate them properly then fix up the pointers
				4202	* that may be pointing to the wrong kmem_cache structure.
				4203	*/
				4204
				4205	static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
				4206	{
				4207	int node;
				4208	struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
				4209	struct kmem_cache_node *n;
				4210
				4211	memcpy(s, static_cache, kmem_cache->object_size);
				4212
				4213	/*
				4214	* This runs very early, and only the boot processor is supposed to be
				4215	* up. Even if it weren't true, IRQs are not up so we couldn't fire
				4216	* IPIs around.
				4217	*/
				4218	__flush_cpu_slab(s, smp_processor_id());
				4219	for_each_kmem_cache_node(s, node, n) {
				4220	struct page *p;
				4221
				4222	list_for_each_entry(p, &n->partial, lru)
				4223	p->slab_cache = s;
				4224
				4225	#ifdef CONFIG_SLUB_DEBUG
				4226	list_for_each_entry(p, &n->full, lru)
				4227	p->slab_cache = s;
				4228	#endif
				4229	}
				4230	slab_init_memcg_params(s);
				4231	list_add(&s->list, &slab_caches);
				4232	memcg_link_cache(s);
				4233	return s;
				4234	}
				4235
				4236	void __init kmem_cache_init(void)
				4237	{
				4238	static __initdata struct kmem_cache boot_kmem_cache,
				4239	boot_kmem_cache_node;
				4240
				4241	if (debug_guardpage_minorder())
				4242	slub_max_order = 0;
				4243
				4244	kmem_cache_node = &boot_kmem_cache_node;
				4245	kmem_cache = &boot_kmem_cache;
				4246
				4247	create_boot_cache(kmem_cache_node, "kmem_cache_node",
				4248	sizeof(struct kmem_cache_node), SLAB_HWCACHE_ALIGN, 0, 0);
				4249
				4250	register_hotmemory_notifier(&slab_memory_callback_nb);
				4251
				4252	/* Able to allocate the per node structures */
				4253	slab_state = PARTIAL;
				4254
				4255	create_boot_cache(kmem_cache, "kmem_cache",
				4256	offsetof(struct kmem_cache, node) +
				4257	nr_node_ids * sizeof(struct kmem_cache_node *),
				4258	SLAB_HWCACHE_ALIGN, 0, 0);
				4259
				4260	kmem_cache = bootstrap(&boot_kmem_cache);
				4261	kmem_cache_node = bootstrap(&boot_kmem_cache_node);
				4262
				4263	/* Now we can use the kmem_cache to allocate kmalloc slabs */
				4264	setup_kmalloc_cache_index_table();
				4265	create_kmalloc_caches(0);
				4266
				4267	/* Setup random freelists for each cache */
				4268	init_freelist_randomization();
				4269
				4270	cpuhp_setup_state_nocalls(CPUHP_SLUB_DEAD, "slub:dead", NULL,
				4271	slub_cpu_dead);
				4272
				4273	pr_info("SLUB: HWalign=%d, Order=%u-%u, MinObjects=%u, CPUs=%u, Nodes=%d\n",
				4274	cache_line_size(),
				4275	slub_min_order, slub_max_order, slub_min_objects,
				4276	nr_cpu_ids, nr_node_ids);
				4277	}
				4278
				4279	void __init kmem_cache_init_late(void)
				4280	{
				4281	}
				4282
				4283	struct kmem_cache *
				4284	__kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
				4285	slab_flags_t flags, void (ctor)(void ))
				4286	{
				4287	struct kmem_cache s, c;
				4288
				4289	s = find_mergeable(size, align, flags, name, ctor);
				4290	if (s) {
				4291	s->refcount++;
				4292
				4293	/*
				4294	* Adjust the object sizes so that we clear
				4295	* the complete object on kzalloc.
				4296	*/
				4297	s->object_size = max(s->object_size, size);
				4298	s->inuse = max(s->inuse, ALIGN(size, sizeof(void *)));
				4299
				4300	for_each_memcg_cache(c, s) {
				4301	c->object_size = s->object_size;
				4302	c->inuse = max(c->inuse, ALIGN(size, sizeof(void *)));
				4303	}
				4304
				4305	if (sysfs_slab_alias(s, name)) {
				4306	s->refcount--;
				4307	s = NULL;
				4308	}
				4309	}
				4310
				4311	return s;
				4312	}
				4313
				4314	int __kmem_cache_create(struct kmem_cache *s, slab_flags_t flags)
				4315	{
				4316	int err;
				4317
				4318	err = kmem_cache_open(s, flags);
				4319	if (err)
				4320	return err;
				4321
				4322	/* Mutex is not taken during early boot */
				4323	if (slab_state <= UP)
				4324	return 0;
				4325
				4326	memcg_propagate_slab_attrs(s);
				4327	err = sysfs_slab_add(s);
				4328	if (err)
				4329	__kmem_cache_release(s);
				4330
				4331	return err;
				4332	}
				4333
				4334	void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
				4335	{
				4336	struct kmem_cache *s;
				4337	void *ret;
				4338
				4339	if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
				4340	return kmalloc_large(size, gfpflags);
				4341
				4342	s = kmalloc_slab(size, gfpflags);
				4343
				4344	if (unlikely(ZERO_OR_NULL_PTR(s)))
				4345	return s;
				4346
				4347	ret = slab_alloc(s, gfpflags, caller);
				4348
				4349	/* Honor the call site pointer we received. */
				4350	trace_kmalloc(caller, ret, size, s->size, gfpflags);
				4351
				4352	return ret;
				4353	}
				4354
				4355	#ifdef CONFIG_NUMA
				4356	void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
				4357	int node, unsigned long caller)
				4358	{
				4359	struct kmem_cache *s;
				4360	void *ret;
				4361
				4362	if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
				4363	ret = kmalloc_large_node(size, gfpflags, node);
				4364
				4365	trace_kmalloc_node(caller, ret,
				4366	size, PAGE_SIZE << get_order(size),
				4367	gfpflags, node);
				4368
				4369	return ret;
				4370	}
				4371
				4372	s = kmalloc_slab(size, gfpflags);
				4373
				4374	if (unlikely(ZERO_OR_NULL_PTR(s)))
				4375	return s;
				4376
				4377	ret = slab_alloc_node(s, gfpflags, node, caller);
				4378
				4379	/* Honor the call site pointer we received. */
				4380	trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
				4381
				4382	return ret;
				4383	}
				4384	#endif
				4385
				4386	#ifdef CONFIG_SYSFS
				4387	static int count_inuse(struct page *page)
				4388	{
				4389	return page->inuse;
				4390	}
				4391
				4392	static int count_total(struct page *page)
				4393	{
				4394	return page->objects;
				4395	}
				4396	#endif
				4397
				4398	#ifdef CONFIG_SLUB_DEBUG
				4399	static int validate_slab(struct kmem_cache s, struct page page,
				4400	unsigned long *map)
				4401	{
				4402	void *p;
				4403	void *addr = page_address(page);
				4404
				4405	if (!check_slab(s, page) \|\|
				4406	!on_freelist(s, page, NULL))
				4407	return 0;
				4408
				4409	/* Now we know that a valid freelist exists */
				4410	bitmap_zero(map, page->objects);
				4411
				4412	get_map(s, page, map);
				4413	for_each_object(p, s, addr, page->objects) {
				4414	if (test_bit(slab_index(p, s, addr), map))
				4415	if (!check_object(s, page, p, SLUB_RED_INACTIVE))
				4416	return 0;
				4417	}
				4418
				4419	for_each_object(p, s, addr, page->objects)
				4420	if (!test_bit(slab_index(p, s, addr), map))
				4421	if (!check_object(s, page, p, SLUB_RED_ACTIVE))
				4422	return 0;
				4423	return 1;
				4424	}
				4425
				4426	static void validate_slab_slab(struct kmem_cache s, struct page page,
				4427	unsigned long *map)
				4428	{
				4429	slab_lock(page);
				4430	validate_slab(s, page, map);
				4431	slab_unlock(page);
				4432	}
				4433
				4434	static int validate_slab_node(struct kmem_cache *s,
				4435	struct kmem_cache_node n, unsigned long map)
				4436	{
				4437	unsigned long count = 0;
				4438	struct page *page;
				4439	unsigned long flags;
				4440
				4441	spin_lock_irqsave(&n->list_lock, flags);
				4442
				4443	list_for_each_entry(page, &n->partial, lru) {
				4444	validate_slab_slab(s, page, map);
				4445	count++;
				4446	}
				4447	if (count != n->nr_partial)
				4448	pr_err("SLUB %s: %ld partial slabs counted but counter=%ld\n",
				4449	s->name, count, n->nr_partial);
				4450
				4451	if (!(s->flags & SLAB_STORE_USER))
				4452	goto out;
				4453
				4454	list_for_each_entry(page, &n->full, lru) {
				4455	validate_slab_slab(s, page, map);
				4456	count++;
				4457	}
				4458	if (count != atomic_long_read(&n->nr_slabs))
				4459	pr_err("SLUB: %s %ld slabs counted but counter=%ld\n",
				4460	s->name, count, atomic_long_read(&n->nr_slabs));
				4461
				4462	out:
				4463	spin_unlock_irqrestore(&n->list_lock, flags);
				4464	return count;
				4465	}
				4466
				4467	static long validate_slab_cache(struct kmem_cache *s)
				4468	{
				4469	int node;
				4470	unsigned long count = 0;
				4471	unsigned long *map = kmalloc_array(BITS_TO_LONGS(oo_objects(s->max)),
				4472	sizeof(unsigned long),
				4473	GFP_KERNEL);
				4474	struct kmem_cache_node *n;
				4475
				4476	if (!map)
				4477	return -ENOMEM;
				4478
				4479	flush_all(s);
				4480	for_each_kmem_cache_node(s, node, n)
				4481	count += validate_slab_node(s, n, map);
				4482	kfree(map);
				4483	return count;
				4484	}
				4485	/*
				4486	* Generate lists of code addresses where slabcache objects are allocated
				4487	* and freed.
				4488	*/
				4489
				4490	struct location {
				4491	unsigned long count;
				4492	unsigned long addr;
				4493	long long sum_time;
				4494	long min_time;
				4495	long max_time;
				4496	long min_pid;
				4497	long max_pid;
				4498	DECLARE_BITMAP(cpus, NR_CPUS);
				4499	nodemask_t nodes;
				4500	};
				4501
				4502	struct loc_track {
				4503	unsigned long max;
				4504	unsigned long count;
				4505	struct location *loc;
				4506	};
				4507
				4508	static void free_loc_track(struct loc_track *t)
				4509	{
				4510	if (t->max)
				4511	free_pages((unsigned long)t->loc,
				4512	get_order(sizeof(struct location) * t->max));
				4513	}
				4514
				4515	static int alloc_loc_track(struct loc_track *t, unsigned long max, gfp_t flags)
				4516	{
				4517	struct location *l;
				4518	int order;
				4519
				4520	order = get_order(sizeof(struct location) * max);
				4521
				4522	l = (void *)__get_free_pages(flags, order);
				4523	if (!l)
				4524	return 0;
				4525
				4526	if (t->count) {
				4527	memcpy(l, t->loc, sizeof(struct location) * t->count);
				4528	free_loc_track(t);
				4529	}
				4530	t->max = max;
				4531	t->loc = l;
				4532	return 1;
				4533	}
				4534
				4535	static int add_location(struct loc_track t, struct kmem_cache s,
				4536	const struct track *track)
				4537	{
				4538	long start, end, pos;
				4539	struct location *l;
				4540	unsigned long caddr;
				4541	unsigned long age = jiffies - track->when;
				4542
				4543	start = -1;
				4544	end = t->count;
				4545
				4546	for ( ; ; ) {
				4547	pos = start + (end - start + 1) / 2;
				4548
				4549	/*
				4550	* There is nothing at "end". If we end up there
				4551	* we need to add something to before end.
				4552	*/
				4553	if (pos == end)
				4554	break;
				4555
				4556	caddr = t->loc[pos].addr;
				4557	if (track->addr == caddr) {
				4558
				4559	l = &t->loc[pos];
				4560	l->count++;
				4561	if (track->when) {
				4562	l->sum_time += age;
				4563	if (age < l->min_time)
				4564	l->min_time = age;
				4565	if (age > l->max_time)
				4566	l->max_time = age;
				4567
				4568	if (track->pid < l->min_pid)
				4569	l->min_pid = track->pid;
				4570	if (track->pid > l->max_pid)
				4571	l->max_pid = track->pid;
				4572
				4573	cpumask_set_cpu(track->cpu,
				4574	to_cpumask(l->cpus));
				4575	}
				4576	node_set(page_to_nid(virt_to_page(track)), l->nodes);
				4577	return 1;
				4578	}
				4579
				4580	if (track->addr < caddr)
				4581	end = pos;
				4582	else
				4583	start = pos;
				4584	}
				4585
				4586	/*
				4587	* Not found. Insert new tracking element.
				4588	*/
				4589	if (t->count >= t->max && !alloc_loc_track(t, 2 * t->max, GFP_ATOMIC))
				4590	return 0;
				4591
				4592	l = t->loc + pos;
				4593	if (pos < t->count)
				4594	memmove(l + 1, l,
				4595	(t->count - pos) * sizeof(struct location));
				4596	t->count++;
				4597	l->count = 1;
				4598	l->addr = track->addr;
				4599	l->sum_time = age;
				4600	l->min_time = age;
				4601	l->max_time = age;
				4602	l->min_pid = track->pid;
				4603	l->max_pid = track->pid;
				4604	cpumask_clear(to_cpumask(l->cpus));
				4605	cpumask_set_cpu(track->cpu, to_cpumask(l->cpus));
				4606	nodes_clear(l->nodes);
				4607	node_set(page_to_nid(virt_to_page(track)), l->nodes);
				4608	return 1;
				4609	}
				4610
				4611	static void process_slab(struct loc_track t, struct kmem_cache s,
				4612	struct page *page, enum track_item alloc,
				4613	unsigned long *map)
				4614	{
				4615	void *addr = page_address(page);
				4616	void *p;
				4617
				4618	bitmap_zero(map, page->objects);
				4619	get_map(s, page, map);
				4620
				4621	for_each_object(p, s, addr, page->objects)
				4622	if (!test_bit(slab_index(p, s, addr), map))
				4623	add_location(t, s, get_track(s, p, alloc));
				4624	}
				4625
				4626	static int list_locations(struct kmem_cache s, char buf,
				4627	enum track_item alloc)
				4628	{
				4629	int len = 0;
				4630	unsigned long i;
				4631	struct loc_track t = { 0, 0, NULL };
				4632	int node;
				4633	unsigned long *map = kmalloc_array(BITS_TO_LONGS(oo_objects(s->max)),
				4634	sizeof(unsigned long),
				4635	GFP_KERNEL);
				4636	struct kmem_cache_node *n;
				4637
				4638	if (!map \|\| !alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
				4639	GFP_KERNEL)) {
				4640	kfree(map);
				4641	return sprintf(buf, "Out of memory\n");
				4642	}
				4643	/* Push back cpu slabs */
				4644	flush_all(s);
				4645
				4646	for_each_kmem_cache_node(s, node, n) {
				4647	unsigned long flags;
				4648	struct page *page;
				4649
				4650	if (!atomic_long_read(&n->nr_slabs))
				4651	continue;
				4652
				4653	spin_lock_irqsave(&n->list_lock, flags);
				4654	list_for_each_entry(page, &n->partial, lru)
				4655	process_slab(&t, s, page, alloc, map);
				4656	list_for_each_entry(page, &n->full, lru)
				4657	process_slab(&t, s, page, alloc, map);
				4658	spin_unlock_irqrestore(&n->list_lock, flags);
				4659	}
				4660
				4661	for (i = 0; i < t.count; i++) {
				4662	struct location *l = &t.loc[i];
				4663
				4664	if (len > PAGE_SIZE - KSYM_SYMBOL_LEN - 100)
				4665	break;
				4666	len += sprintf(buf + len, "%7ld ", l->count);
				4667
				4668	if (l->addr)
				4669	len += sprintf(buf + len, "%pS", (void *)l->addr);
				4670	else
				4671	len += sprintf(buf + len, "<not-available>");
				4672
				4673	if (l->sum_time != l->min_time) {
				4674	len += sprintf(buf + len, " age=%ld/%ld/%ld",
				4675	l->min_time,
				4676	(long)div_u64(l->sum_time, l->count),
				4677	l->max_time);
				4678	} else
				4679	len += sprintf(buf + len, " age=%ld",
				4680	l->min_time);
				4681
				4682	if (l->min_pid != l->max_pid)
				4683	len += sprintf(buf + len, " pid=%ld-%ld",
				4684	l->min_pid, l->max_pid);
				4685	else
				4686	len += sprintf(buf + len, " pid=%ld",
				4687	l->min_pid);
				4688
				4689	if (num_online_cpus() > 1 &&
				4690	!cpumask_empty(to_cpumask(l->cpus)) &&
				4691	len < PAGE_SIZE - 60)
				4692	len += scnprintf(buf + len, PAGE_SIZE - len - 50,
				4693	" cpus=%*pbl",
				4694	cpumask_pr_args(to_cpumask(l->cpus)));
				4695
				4696	if (nr_online_nodes > 1 && !nodes_empty(l->nodes) &&
				4697	len < PAGE_SIZE - 60)
				4698	len += scnprintf(buf + len, PAGE_SIZE - len - 50,
				4699	" nodes=%*pbl",
				4700	nodemask_pr_args(&l->nodes));
				4701
				4702	len += sprintf(buf + len, "\n");
				4703	}
				4704
				4705	free_loc_track(&t);
				4706	kfree(map);
				4707	if (!t.count)
				4708	len += sprintf(buf, "No data\n");
				4709	return len;
				4710	}
				4711	#endif
				4712
				4713	#ifdef SLUB_RESILIENCY_TEST
				4714	static void __init resiliency_test(void)
				4715	{
				4716	u8 *p;
				4717	int type = KMALLOC_NORMAL;
				4718
				4719	BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 \|\| KMALLOC_SHIFT_HIGH < 10);
				4720
				4721	pr_err("SLUB resiliency testing\n");
				4722	pr_err("-----------------------\n");
				4723	pr_err("A. Corruption after allocation\n");
				4724
				4725	p = kzalloc(16, GFP_KERNEL);
				4726	p[16] = 0x12;
				4727	pr_err("\n1. kmalloc-16: Clobber Redzone/next pointer 0x12->0x%p\n\n",
				4728	p + 16);
				4729
				4730	validate_slab_cache(kmalloc_caches[type][4]);
				4731
				4732	/* Hmmm... The next two are dangerous */
				4733	p = kzalloc(32, GFP_KERNEL);
				4734	p[32 + sizeof(void *)] = 0x34;
				4735	pr_err("\n2. kmalloc-32: Clobber next pointer/next slab 0x34 -> -0x%p\n",
				4736	p);
				4737	pr_err("If allocated object is overwritten then not detectable\n\n");
				4738
				4739	validate_slab_cache(kmalloc_caches[type][5]);
				4740	p = kzalloc(64, GFP_KERNEL);
				4741	p += 64 + (get_cycles() & 0xff) * sizeof(void *);
				4742	*p = 0x56;
				4743	pr_err("\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n",
				4744	p);
				4745	pr_err("If allocated object is overwritten then not detectable\n\n");
				4746	validate_slab_cache(kmalloc_caches[type][6]);
				4747
				4748	pr_err("\nB. Corruption after free\n");
				4749	p = kzalloc(128, GFP_KERNEL);
				4750	kfree(p);
				4751	*p = 0x78;
				4752	pr_err("1. kmalloc-128: Clobber first word 0x78->0x%p\n\n", p);
				4753	validate_slab_cache(kmalloc_caches[type][7]);
				4754
				4755	p = kzalloc(256, GFP_KERNEL);
				4756	kfree(p);
				4757	p[50] = 0x9a;
				4758	pr_err("\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n", p);
				4759	validate_slab_cache(kmalloc_caches[type][8]);
				4760
				4761	p = kzalloc(512, GFP_KERNEL);
				4762	kfree(p);
				4763	p[512] = 0xab;
				4764	pr_err("\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p);
				4765	validate_slab_cache(kmalloc_caches[type][9]);
				4766	}
				4767	#else
				4768	#ifdef CONFIG_SYSFS
				4769	static void resiliency_test(void) {};
				4770	#endif
				4771	#endif
				4772
				4773	#ifdef CONFIG_SYSFS
				4774	enum slab_stat_type {
				4775	SL_ALL, /* All slabs */
				4776	SL_PARTIAL, /* Only partially allocated slabs */
				4777	SL_CPU, /* Only slabs used for cpu caches */
				4778	SL_OBJECTS, /* Determine allocated objects not slabs */
				4779	SL_TOTAL /* Determine object capacity not slabs */
				4780	};
				4781
				4782	#define SO_ALL (1 << SL_ALL)
				4783	#define SO_PARTIAL (1 << SL_PARTIAL)
				4784	#define SO_CPU (1 << SL_CPU)
				4785	#define SO_OBJECTS (1 << SL_OBJECTS)
				4786	#define SO_TOTAL (1 << SL_TOTAL)
				4787
				4788	#ifdef CONFIG_MEMCG
				4789	static bool memcg_sysfs_enabled = IS_ENABLED(CONFIG_SLUB_MEMCG_SYSFS_ON);
				4790
				4791	static int __init setup_slub_memcg_sysfs(char *str)
				4792	{
				4793	int v;
				4794
				4795	if (get_option(&str, &v) > 0)
				4796	memcg_sysfs_enabled = v;
				4797
				4798	return 1;
				4799	}
				4800
				4801	__setup("slub_memcg_sysfs=", setup_slub_memcg_sysfs);
				4802	#endif
				4803
				4804	static ssize_t show_slab_objects(struct kmem_cache *s,
				4805	char *buf, unsigned long flags)
				4806	{
				4807	unsigned long total = 0;
				4808	int node;
				4809	int x;
				4810	unsigned long *nodes;
				4811
				4812	nodes = kcalloc(nr_node_ids, sizeof(unsigned long), GFP_KERNEL);
				4813	if (!nodes)
				4814	return -ENOMEM;
				4815
				4816	if (flags & SO_CPU) {
				4817	int cpu;
				4818
				4819	for_each_possible_cpu(cpu) {
				4820	struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab,
				4821	cpu);
				4822	int node;
				4823	struct page *page;
				4824
				4825	page = READ_ONCE(c->page);
				4826	if (!page)
				4827	continue;
				4828
				4829	node = page_to_nid(page);
				4830	if (flags & SO_TOTAL)
				4831	x = page->objects;
				4832	else if (flags & SO_OBJECTS)
				4833	x = page->inuse;
				4834	else
				4835	x = 1;
				4836
				4837	total += x;
				4838	nodes[node] += x;
				4839
				4840	page = slub_percpu_partial_read_once(c);
				4841	if (page) {
				4842	node = page_to_nid(page);
				4843	if (flags & SO_TOTAL)
				4844	WARN_ON_ONCE(1);
				4845	else if (flags & SO_OBJECTS)
				4846	WARN_ON_ONCE(1);
				4847	else
				4848	x = page->pages;
				4849	total += x;
				4850	nodes[node] += x;
				4851	}
				4852	}
				4853	}
				4854
				4855	/*
				4856	* It is impossible to take "mem_hotplug_lock" here with "kernfs_mutex"
				4857	* already held which will conflict with an existing lock order:
				4858	*
				4859	* mem_hotplug_lock->slab_mutex->kernfs_mutex
				4860	*
				4861	* We don't really need mem_hotplug_lock (to hold off
				4862	* slab_mem_going_offline_callback) here because slab's memory hot
				4863	* unplug code doesn't destroy the kmem_cache->node[] data.
				4864	*/
				4865
				4866	#ifdef CONFIG_SLUB_DEBUG
				4867	if (flags & SO_ALL) {
				4868	struct kmem_cache_node *n;
				4869
				4870	for_each_kmem_cache_node(s, node, n) {
				4871
				4872	if (flags & SO_TOTAL)
				4873	x = atomic_long_read(&n->total_objects);
				4874	else if (flags & SO_OBJECTS)
				4875	x = atomic_long_read(&n->total_objects) -
				4876	count_partial(n, count_free);
				4877	else
				4878	x = atomic_long_read(&n->nr_slabs);
				4879	total += x;
				4880	nodes[node] += x;
				4881	}
				4882
				4883	} else
				4884	#endif
				4885	if (flags & SO_PARTIAL) {
				4886	struct kmem_cache_node *n;
				4887
				4888	for_each_kmem_cache_node(s, node, n) {
				4889	if (flags & SO_TOTAL)
				4890	x = count_partial(n, count_total);
				4891	else if (flags & SO_OBJECTS)
				4892	x = count_partial(n, count_inuse);
				4893	else
				4894	x = n->nr_partial;
				4895	total += x;
				4896	nodes[node] += x;
				4897	}
				4898	}
				4899	x = sprintf(buf, "%lu", total);
				4900	#ifdef CONFIG_NUMA
				4901	for (node = 0; node < nr_node_ids; node++)
				4902	if (nodes[node])
				4903	x += sprintf(buf + x, " N%d=%lu",
				4904	node, nodes[node]);
				4905	#endif
				4906	kfree(nodes);
				4907	return x + sprintf(buf + x, "\n");
				4908	}
				4909
				4910	#ifdef CONFIG_SLUB_DEBUG
				4911	static int any_slab_objects(struct kmem_cache *s)
				4912	{
				4913	int node;
				4914	struct kmem_cache_node *n;
				4915
				4916	for_each_kmem_cache_node(s, node, n)
				4917	if (atomic_long_read(&n->total_objects))
				4918	return 1;
				4919
				4920	return 0;
				4921	}
				4922	#endif
				4923
				4924	#define to_slab_attr(n) container_of(n, struct slab_attribute, attr)
				4925	#define to_slab(n) container_of(n, struct kmem_cache, kobj)
				4926
				4927	struct slab_attribute {
				4928	struct attribute attr;
				4929	ssize_t (show)(struct kmem_cache s, char *buf);
				4930	ssize_t (store)(struct kmem_cache s, const char *x, size_t count);
				4931	};
				4932
				4933	#define SLAB_ATTR_RO(_name) \
				4934	static struct slab_attribute _name##_attr = \
				4935	__ATTR(_name, 0400, _name##_show, NULL)
				4936
				4937	#define SLAB_ATTR(_name) \
				4938	static struct slab_attribute _name##_attr = \
				4939	__ATTR(_name, 0600, _name##_show, _name##_store)
				4940
				4941	static ssize_t slab_size_show(struct kmem_cache s, char buf)
				4942	{
				4943	return sprintf(buf, "%u\n", s->size);
				4944	}
				4945	SLAB_ATTR_RO(slab_size);
				4946
				4947	static ssize_t align_show(struct kmem_cache s, char buf)
				4948	{
				4949	return sprintf(buf, "%u\n", s->align);
				4950	}
				4951	SLAB_ATTR_RO(align);
				4952
				4953	static ssize_t object_size_show(struct kmem_cache s, char buf)
				4954	{
				4955	return sprintf(buf, "%u\n", s->object_size);
				4956	}
				4957	SLAB_ATTR_RO(object_size);
				4958
				4959	static ssize_t objs_per_slab_show(struct kmem_cache s, char buf)
				4960	{
				4961	return sprintf(buf, "%u\n", oo_objects(s->oo));
				4962	}
				4963	SLAB_ATTR_RO(objs_per_slab);
				4964
				4965	static ssize_t order_store(struct kmem_cache *s,
				4966	const char *buf, size_t length)
				4967	{
				4968	unsigned int order;
				4969	int err;
				4970
				4971	err = kstrtouint(buf, 10, &order);
				4972	if (err)
				4973	return err;
				4974
				4975	if (order > slub_max_order \|\| order < slub_min_order)
				4976	return -EINVAL;
				4977
				4978	calculate_sizes(s, order);
				4979	return length;
				4980	}
				4981
				4982	static ssize_t order_show(struct kmem_cache s, char buf)
				4983	{
				4984	return sprintf(buf, "%u\n", oo_order(s->oo));
				4985	}
				4986	SLAB_ATTR(order);
				4987
				4988	static ssize_t min_partial_show(struct kmem_cache s, char buf)
				4989	{
				4990	return sprintf(buf, "%lu\n", s->min_partial);
				4991	}
				4992
				4993	static ssize_t min_partial_store(struct kmem_cache s, const char buf,
				4994	size_t length)
				4995	{
				4996	unsigned long min;
				4997	int err;
				4998
				4999	err = kstrtoul(buf, 10, &min);
				5000	if (err)
				5001	return err;
				5002
				5003	set_min_partial(s, min);
				5004	return length;
				5005	}
				5006	SLAB_ATTR(min_partial);
				5007
				5008	static ssize_t cpu_partial_show(struct kmem_cache s, char buf)
				5009	{
				5010	return sprintf(buf, "%u\n", slub_cpu_partial(s));
				5011	}
				5012
				5013	static ssize_t cpu_partial_store(struct kmem_cache s, const char buf,
				5014	size_t length)
				5015	{
				5016	unsigned int objects;
				5017	int err;
				5018
				5019	err = kstrtouint(buf, 10, &objects);
				5020	if (err)
				5021	return err;
				5022	if (objects && !kmem_cache_has_cpu_partial(s))
				5023	return -EINVAL;
				5024
				5025	slub_set_cpu_partial(s, objects);
				5026	flush_all(s);
				5027	return length;
				5028	}
				5029	SLAB_ATTR(cpu_partial);
				5030
				5031	static ssize_t ctor_show(struct kmem_cache s, char buf)
				5032	{
				5033	if (!s->ctor)
				5034	return 0;
				5035	return sprintf(buf, "%pS\n", s->ctor);
				5036	}
				5037	SLAB_ATTR_RO(ctor);
				5038
				5039	static ssize_t aliases_show(struct kmem_cache s, char buf)
				5040	{
				5041	return sprintf(buf, "%d\n", s->refcount < 0 ? 0 : s->refcount - 1);
				5042	}
				5043	SLAB_ATTR_RO(aliases);
				5044
				5045	static ssize_t partial_show(struct kmem_cache s, char buf)
				5046	{
				5047	return show_slab_objects(s, buf, SO_PARTIAL);
				5048	}
				5049	SLAB_ATTR_RO(partial);
				5050
				5051	static ssize_t cpu_slabs_show(struct kmem_cache s, char buf)
				5052	{
				5053	return show_slab_objects(s, buf, SO_CPU);
				5054	}
				5055	SLAB_ATTR_RO(cpu_slabs);
				5056
				5057	static ssize_t objects_show(struct kmem_cache s, char buf)
				5058	{
				5059	return show_slab_objects(s, buf, SO_ALL\|SO_OBJECTS);
				5060	}
				5061	SLAB_ATTR_RO(objects);
				5062
				5063	static ssize_t objects_partial_show(struct kmem_cache s, char buf)
				5064	{
				5065	return show_slab_objects(s, buf, SO_PARTIAL\|SO_OBJECTS);
				5066	}
				5067	SLAB_ATTR_RO(objects_partial);
				5068
				5069	static ssize_t slabs_cpu_partial_show(struct kmem_cache s, char buf)
				5070	{
				5071	int objects = 0;
				5072	int pages = 0;
				5073	int cpu;
				5074	int len;
				5075
				5076	for_each_online_cpu(cpu) {
				5077	struct page *page;
				5078
				5079	page = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu));
				5080
				5081	if (page) {
				5082	pages += page->pages;
				5083	objects += page->pobjects;
				5084	}
				5085	}
				5086
				5087	len = sprintf(buf, "%d(%d)", objects, pages);
				5088
				5089	#ifdef CONFIG_SMP
				5090	for_each_online_cpu(cpu) {
				5091	struct page *page;
				5092
				5093	page = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu));
				5094
				5095	if (page && len < PAGE_SIZE - 20)
				5096	len += sprintf(buf + len, " C%d=%d(%d)", cpu,
				5097	page->pobjects, page->pages);
				5098	}
				5099	#endif
				5100	return len + sprintf(buf + len, "\n");
				5101	}
				5102	SLAB_ATTR_RO(slabs_cpu_partial);
				5103
				5104	static ssize_t reclaim_account_show(struct kmem_cache s, char buf)
				5105	{
				5106	return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT));
				5107	}
				5108
				5109	static ssize_t reclaim_account_store(struct kmem_cache *s,
				5110	const char *buf, size_t length)
				5111	{
				5112	s->flags &= ~SLAB_RECLAIM_ACCOUNT;
				5113	if (buf[0] == '1')
				5114	s->flags \|= SLAB_RECLAIM_ACCOUNT;
				5115	return length;
				5116	}
				5117	SLAB_ATTR(reclaim_account);
				5118
				5119	static ssize_t hwcache_align_show(struct kmem_cache s, char buf)
				5120	{
				5121	return sprintf(buf, "%d\n", !!(s->flags & SLAB_HWCACHE_ALIGN));
				5122	}
				5123	SLAB_ATTR_RO(hwcache_align);
				5124
				5125	#ifdef CONFIG_ZONE_DMA
				5126	static ssize_t cache_dma_show(struct kmem_cache s, char buf)
				5127	{
				5128	return sprintf(buf, "%d\n", !!(s->flags & SLAB_CACHE_DMA));
				5129	}
				5130	SLAB_ATTR_RO(cache_dma);
				5131	#endif
				5132
				5133	#ifdef CONFIG_ZONE_DMA32
				5134	static ssize_t cache_dma32_show(struct kmem_cache s, char buf)
				5135	{
				5136	return sprintf(buf, "%d\n", !!(s->flags & SLAB_CACHE_DMA32));
				5137	}
				5138	SLAB_ATTR_RO(cache_dma32);
				5139	#endif
				5140
				5141	static ssize_t usersize_show(struct kmem_cache s, char buf)
				5142	{
				5143	return sprintf(buf, "%u\n", s->usersize);
				5144	}
				5145	SLAB_ATTR_RO(usersize);
				5146
				5147	static ssize_t destroy_by_rcu_show(struct kmem_cache s, char buf)
				5148	{
				5149	return sprintf(buf, "%d\n", !!(s->flags & SLAB_TYPESAFE_BY_RCU));
				5150	}
				5151	SLAB_ATTR_RO(destroy_by_rcu);
				5152
				5153	#ifdef CONFIG_SLUB_DEBUG
				5154	static ssize_t slabs_show(struct kmem_cache s, char buf)
				5155	{
				5156	return show_slab_objects(s, buf, SO_ALL);
				5157	}
				5158	SLAB_ATTR_RO(slabs);
				5159
				5160	static ssize_t total_objects_show(struct kmem_cache s, char buf)
				5161	{
				5162	return show_slab_objects(s, buf, SO_ALL\|SO_TOTAL);
				5163	}
				5164	SLAB_ATTR_RO(total_objects);
				5165
				5166	static ssize_t sanity_checks_show(struct kmem_cache s, char buf)
				5167	{
				5168	return sprintf(buf, "%d\n", !!(s->flags & SLAB_CONSISTENCY_CHECKS));
				5169	}
				5170
				5171	static ssize_t sanity_checks_store(struct kmem_cache *s,
				5172	const char *buf, size_t length)
				5173	{
				5174	s->flags &= ~SLAB_CONSISTENCY_CHECKS;
				5175	if (buf[0] == '1') {
				5176	s->flags &= ~__CMPXCHG_DOUBLE;
				5177	s->flags \|= SLAB_CONSISTENCY_CHECKS;
				5178	}
				5179	return length;
				5180	}
				5181	SLAB_ATTR(sanity_checks);
				5182
				5183	static ssize_t trace_show(struct kmem_cache s, char buf)
				5184	{
				5185	return sprintf(buf, "%d\n", !!(s->flags & SLAB_TRACE));
				5186	}
				5187
				5188	static ssize_t trace_store(struct kmem_cache s, const char buf,
				5189	size_t length)
				5190	{
				5191	/*
				5192	* Tracing a merged cache is going to give confusing results
				5193	* as well as cause other issues like converting a mergeable
				5194	* cache into an umergeable one.
				5195	*/
				5196	if (s->refcount > 1)
				5197	return -EINVAL;
				5198
				5199	s->flags &= ~SLAB_TRACE;
				5200	if (buf[0] == '1') {
				5201	s->flags &= ~__CMPXCHG_DOUBLE;
				5202	s->flags \|= SLAB_TRACE;
				5203	}
				5204	return length;
				5205	}
				5206	SLAB_ATTR(trace);
				5207
				5208	static ssize_t red_zone_show(struct kmem_cache s, char buf)
				5209	{
				5210	return sprintf(buf, "%d\n", !!(s->flags & SLAB_RED_ZONE));
				5211	}
				5212
				5213	static ssize_t red_zone_store(struct kmem_cache *s,
				5214	const char *buf, size_t length)
				5215	{
				5216	if (any_slab_objects(s))
				5217	return -EBUSY;
				5218
				5219	s->flags &= ~SLAB_RED_ZONE;
				5220	if (buf[0] == '1') {
				5221	s->flags \|= SLAB_RED_ZONE;
				5222	}
				5223	calculate_sizes(s, -1);
				5224	return length;
				5225	}
				5226	SLAB_ATTR(red_zone);
				5227
				5228	static ssize_t poison_show(struct kmem_cache s, char buf)
				5229	{
				5230	return sprintf(buf, "%d\n", !!(s->flags & SLAB_POISON));
				5231	}
				5232
				5233	static ssize_t poison_store(struct kmem_cache *s,
				5234	const char *buf, size_t length)
				5235	{
				5236	if (any_slab_objects(s))
				5237	return -EBUSY;
				5238
				5239	s->flags &= ~SLAB_POISON;
				5240	if (buf[0] == '1') {
				5241	s->flags \|= SLAB_POISON;
				5242	}
				5243	calculate_sizes(s, -1);
				5244	return length;
				5245	}
				5246	SLAB_ATTR(poison);
				5247
				5248	static ssize_t store_user_show(struct kmem_cache s, char buf)
				5249	{
				5250	return sprintf(buf, "%d\n", !!(s->flags & SLAB_STORE_USER));
				5251	}
				5252
				5253	static ssize_t store_user_store(struct kmem_cache *s,
				5254	const char *buf, size_t length)
				5255	{
				5256	if (any_slab_objects(s))
				5257	return -EBUSY;
				5258
				5259	s->flags &= ~SLAB_STORE_USER;
				5260	if (buf[0] == '1') {
				5261	s->flags &= ~__CMPXCHG_DOUBLE;
				5262	s->flags \|= SLAB_STORE_USER;
				5263	}
				5264	calculate_sizes(s, -1);
				5265	return length;
				5266	}
				5267	SLAB_ATTR(store_user);
				5268
				5269	static ssize_t validate_show(struct kmem_cache s, char buf)
				5270	{
				5271	return 0;
				5272	}
				5273
				5274	static ssize_t validate_store(struct kmem_cache *s,
				5275	const char *buf, size_t length)
				5276	{
				5277	int ret = -EINVAL;
				5278
				5279	if (buf[0] == '1') {
				5280	ret = validate_slab_cache(s);
				5281	if (ret >= 0)
				5282	ret = length;
				5283	}
				5284	return ret;
				5285	}
				5286	SLAB_ATTR(validate);
				5287
				5288	static ssize_t alloc_calls_show(struct kmem_cache s, char buf)
				5289	{
				5290	if (!(s->flags & SLAB_STORE_USER))
				5291	return -ENOSYS;
				5292	return list_locations(s, buf, TRACK_ALLOC);
				5293	}
				5294	SLAB_ATTR_RO(alloc_calls);
				5295
				5296	static ssize_t free_calls_show(struct kmem_cache s, char buf)
				5297	{
				5298	if (!(s->flags & SLAB_STORE_USER))
				5299	return -ENOSYS;
				5300	return list_locations(s, buf, TRACK_FREE);
				5301	}
				5302	SLAB_ATTR_RO(free_calls);
				5303	#endif /* CONFIG_SLUB_DEBUG */
				5304
				5305	#ifdef CONFIG_FAILSLAB
				5306	static ssize_t failslab_show(struct kmem_cache s, char buf)
				5307	{
				5308	return sprintf(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB));
				5309	}
				5310
				5311	static ssize_t failslab_store(struct kmem_cache s, const char buf,
				5312	size_t length)
				5313	{
				5314	if (s->refcount > 1)
				5315	return -EINVAL;
				5316
				5317	s->flags &= ~SLAB_FAILSLAB;
				5318	if (buf[0] == '1')
				5319	s->flags \|= SLAB_FAILSLAB;
				5320	return length;
				5321	}
				5322	SLAB_ATTR(failslab);
				5323	#endif
				5324
				5325	static ssize_t shrink_show(struct kmem_cache s, char buf)
				5326	{
				5327	return 0;
				5328	}
				5329
				5330	static ssize_t shrink_store(struct kmem_cache *s,
				5331	const char *buf, size_t length)
				5332	{
				5333	if (buf[0] == '1')
				5334	kmem_cache_shrink(s);
				5335	else
				5336	return -EINVAL;
				5337	return length;
				5338	}
				5339	SLAB_ATTR(shrink);
				5340
				5341	#ifdef CONFIG_NUMA
				5342	static ssize_t remote_node_defrag_ratio_show(struct kmem_cache s, char buf)
				5343	{
				5344	return sprintf(buf, "%u\n", s->remote_node_defrag_ratio / 10);
				5345	}
				5346
				5347	static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s,
				5348	const char *buf, size_t length)
				5349	{
				5350	unsigned int ratio;
				5351	int err;
				5352
				5353	err = kstrtouint(buf, 10, &ratio);
				5354	if (err)
				5355	return err;
				5356	if (ratio > 100)
				5357	return -ERANGE;
				5358
				5359	s->remote_node_defrag_ratio = ratio * 10;
				5360
				5361	return length;
				5362	}
				5363	SLAB_ATTR(remote_node_defrag_ratio);
				5364	#endif
				5365
				5366	#ifdef CONFIG_SLUB_STATS
				5367	static int show_stat(struct kmem_cache s, char buf, enum stat_item si)
				5368	{
				5369	unsigned long sum = 0;
				5370	int cpu;
				5371	int len;
				5372	int *data = kmalloc_array(nr_cpu_ids, sizeof(int), GFP_KERNEL);
				5373
				5374	if (!data)
				5375	return -ENOMEM;
				5376
				5377	for_each_online_cpu(cpu) {
				5378	unsigned x = per_cpu_ptr(s->cpu_slab, cpu)->stat[si];
				5379
				5380	data[cpu] = x;
				5381	sum += x;
				5382	}
				5383
				5384	len = sprintf(buf, "%lu", sum);
				5385
				5386	#ifdef CONFIG_SMP
				5387	for_each_online_cpu(cpu) {
				5388	if (data[cpu] && len < PAGE_SIZE - 20)
				5389	len += sprintf(buf + len, " C%d=%u", cpu, data[cpu]);
				5390	}
				5391	#endif
				5392	kfree(data);
				5393	return len + sprintf(buf + len, "\n");
				5394	}
				5395
				5396	static void clear_stat(struct kmem_cache *s, enum stat_item si)
				5397	{
				5398	int cpu;
				5399
				5400	for_each_online_cpu(cpu)
				5401	per_cpu_ptr(s->cpu_slab, cpu)->stat[si] = 0;
				5402	}
				5403
				5404	#define STAT_ATTR(si, text) \
				5405	static ssize_t text##_show(struct kmem_cache s, char buf) \
				5406	{ \
				5407	return show_stat(s, buf, si); \
				5408	} \
				5409	static ssize_t text##_store(struct kmem_cache *s, \
				5410	const char *buf, size_t length) \
				5411	{ \
				5412	if (buf[0] != '0') \
				5413	return -EINVAL; \
				5414	clear_stat(s, si); \
				5415	return length; \
				5416	} \
				5417	SLAB_ATTR(text); \
				5418
				5419	STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath);
				5420	STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath);
				5421	STAT_ATTR(FREE_FASTPATH, free_fastpath);
				5422	STAT_ATTR(FREE_SLOWPATH, free_slowpath);
				5423	STAT_ATTR(FREE_FROZEN, free_frozen);
				5424	STAT_ATTR(FREE_ADD_PARTIAL, free_add_partial);
				5425	STAT_ATTR(FREE_REMOVE_PARTIAL, free_remove_partial);
				5426	STAT_ATTR(ALLOC_FROM_PARTIAL, alloc_from_partial);
				5427	STAT_ATTR(ALLOC_SLAB, alloc_slab);
				5428	STAT_ATTR(ALLOC_REFILL, alloc_refill);
				5429	STAT_ATTR(ALLOC_NODE_MISMATCH, alloc_node_mismatch);
				5430	STAT_ATTR(FREE_SLAB, free_slab);
				5431	STAT_ATTR(CPUSLAB_FLUSH, cpuslab_flush);
				5432	STAT_ATTR(DEACTIVATE_FULL, deactivate_full);
				5433	STAT_ATTR(DEACTIVATE_EMPTY, deactivate_empty);
				5434	STAT_ATTR(DEACTIVATE_TO_HEAD, deactivate_to_head);
				5435	STAT_ATTR(DEACTIVATE_TO_TAIL, deactivate_to_tail);
				5436	STAT_ATTR(DEACTIVATE_REMOTE_FREES, deactivate_remote_frees);
				5437	STAT_ATTR(DEACTIVATE_BYPASS, deactivate_bypass);
				5438	STAT_ATTR(ORDER_FALLBACK, order_fallback);
				5439	STAT_ATTR(CMPXCHG_DOUBLE_CPU_FAIL, cmpxchg_double_cpu_fail);
				5440	STAT_ATTR(CMPXCHG_DOUBLE_FAIL, cmpxchg_double_fail);
				5441	STAT_ATTR(CPU_PARTIAL_ALLOC, cpu_partial_alloc);
				5442	STAT_ATTR(CPU_PARTIAL_FREE, cpu_partial_free);
				5443	STAT_ATTR(CPU_PARTIAL_NODE, cpu_partial_node);
				5444	STAT_ATTR(CPU_PARTIAL_DRAIN, cpu_partial_drain);
				5445	#endif
				5446
				5447	static struct attribute *slab_attrs[] = {
				5448	&slab_size_attr.attr,
				5449	&object_size_attr.attr,
				5450	&objs_per_slab_attr.attr,
				5451	&order_attr.attr,
				5452	&min_partial_attr.attr,
				5453	&cpu_partial_attr.attr,
				5454	&objects_attr.attr,
				5455	&objects_partial_attr.attr,
				5456	&partial_attr.attr,
				5457	&cpu_slabs_attr.attr,
				5458	&ctor_attr.attr,
				5459	&aliases_attr.attr,
				5460	&align_attr.attr,
				5461	&hwcache_align_attr.attr,
				5462	&reclaim_account_attr.attr,
				5463	&destroy_by_rcu_attr.attr,
				5464	&shrink_attr.attr,
				5465	&slabs_cpu_partial_attr.attr,
				5466	#ifdef CONFIG_SLUB_DEBUG
				5467	&total_objects_attr.attr,
				5468	&slabs_attr.attr,
				5469	&sanity_checks_attr.attr,
				5470	&trace_attr.attr,
				5471	&red_zone_attr.attr,
				5472	&poison_attr.attr,
				5473	&store_user_attr.attr,
				5474	&validate_attr.attr,
				5475	&alloc_calls_attr.attr,
				5476	&free_calls_attr.attr,
				5477	#endif
				5478	#ifdef CONFIG_ZONE_DMA
				5479	&cache_dma_attr.attr,
				5480	#endif
				5481	#ifdef CONFIG_ZONE_DMA32
				5482	&cache_dma32_attr.attr,
				5483	#endif
				5484	#ifdef CONFIG_NUMA
				5485	&remote_node_defrag_ratio_attr.attr,
				5486	#endif
				5487	#ifdef CONFIG_SLUB_STATS
				5488	&alloc_fastpath_attr.attr,
				5489	&alloc_slowpath_attr.attr,
				5490	&free_fastpath_attr.attr,
				5491	&free_slowpath_attr.attr,
				5492	&free_frozen_attr.attr,
				5493	&free_add_partial_attr.attr,
				5494	&free_remove_partial_attr.attr,
				5495	&alloc_from_partial_attr.attr,
				5496	&alloc_slab_attr.attr,
				5497	&alloc_refill_attr.attr,
				5498	&alloc_node_mismatch_attr.attr,
				5499	&free_slab_attr.attr,
				5500	&cpuslab_flush_attr.attr,
				5501	&deactivate_full_attr.attr,
				5502	&deactivate_empty_attr.attr,
				5503	&deactivate_to_head_attr.attr,
				5504	&deactivate_to_tail_attr.attr,
				5505	&deactivate_remote_frees_attr.attr,
				5506	&deactivate_bypass_attr.attr,
				5507	&order_fallback_attr.attr,
				5508	&cmpxchg_double_fail_attr.attr,
				5509	&cmpxchg_double_cpu_fail_attr.attr,
				5510	&cpu_partial_alloc_attr.attr,
				5511	&cpu_partial_free_attr.attr,
				5512	&cpu_partial_node_attr.attr,
				5513	&cpu_partial_drain_attr.attr,
				5514	#endif
				5515	#ifdef CONFIG_FAILSLAB
				5516	&failslab_attr.attr,
				5517	#endif
				5518	&usersize_attr.attr,
				5519
				5520	NULL
				5521	};
				5522
				5523	static const struct attribute_group slab_attr_group = {
				5524	.attrs = slab_attrs,
				5525	};
				5526
				5527	static ssize_t slab_attr_show(struct kobject *kobj,
				5528	struct attribute *attr,
				5529	char *buf)
				5530	{
				5531	struct slab_attribute *attribute;
				5532	struct kmem_cache *s;
				5533	int err;
				5534
				5535	attribute = to_slab_attr(attr);
				5536	s = to_slab(kobj);
				5537
				5538	if (!attribute->show)
				5539	return -EIO;
				5540
				5541	err = attribute->show(s, buf);
				5542
				5543	return err;
				5544	}
				5545
				5546	static ssize_t slab_attr_store(struct kobject *kobj,
				5547	struct attribute *attr,
				5548	const char *buf, size_t len)
				5549	{
				5550	struct slab_attribute *attribute;
				5551	struct kmem_cache *s;
				5552	int err;
				5553
				5554	attribute = to_slab_attr(attr);
				5555	s = to_slab(kobj);
				5556
				5557	if (!attribute->store)
				5558	return -EIO;
				5559
				5560	err = attribute->store(s, buf, len);
				5561	#ifdef CONFIG_MEMCG
				5562	if (slab_state >= FULL && err >= 0 && is_root_cache(s)) {
				5563	struct kmem_cache *c;
				5564
				5565	mutex_lock(&slab_mutex);
				5566	if (s->max_attr_size < len)
				5567	s->max_attr_size = len;
				5568
				5569	/*
				5570	* This is a best effort propagation, so this function's return
				5571	* value will be determined by the parent cache only. This is
				5572	* basically because not all attributes will have a well
				5573	* defined semantics for rollbacks - most of the actions will
				5574	* have permanent effects.
				5575	*
				5576	* Returning the error value of any of the children that fail
				5577	* is not 100 % defined, in the sense that users seeing the
				5578	* error code won't be able to know anything about the state of
				5579	* the cache.
				5580	*
				5581	* Only returning the error code for the parent cache at least
				5582	* has well defined semantics. The cache being written to
				5583	* directly either failed or succeeded, in which case we loop
				5584	* through the descendants with best-effort propagation.
				5585	*/
				5586	for_each_memcg_cache(c, s)
				5587	attribute->store(c, buf, len);
				5588	mutex_unlock(&slab_mutex);
				5589	}
				5590	#endif
				5591	return err;
				5592	}
				5593
				5594	static void memcg_propagate_slab_attrs(struct kmem_cache *s)
				5595	{
				5596	#ifdef CONFIG_MEMCG
				5597	int i;
				5598	char *buffer = NULL;
				5599	struct kmem_cache *root_cache;
				5600
				5601	if (is_root_cache(s))
				5602	return;
				5603
				5604	root_cache = s->memcg_params.root_cache;
				5605
				5606	/*
				5607	* This mean this cache had no attribute written. Therefore, no point
				5608	* in copying default values around
				5609	*/
				5610	if (!root_cache->max_attr_size)
				5611	return;
				5612
				5613	for (i = 0; i < ARRAY_SIZE(slab_attrs); i++) {
				5614	char mbuf[64];
				5615	char *buf;
				5616	struct slab_attribute *attr = to_slab_attr(slab_attrs[i]);
				5617	ssize_t len;
				5618
				5619	if (!attr \|\| !attr->store \|\| !attr->show)
				5620	continue;
				5621
				5622	/*
				5623	* It is really bad that we have to allocate here, so we will
				5624	* do it only as a fallback. If we actually allocate, though,
				5625	* we can just use the allocated buffer until the end.
				5626	*
				5627	* Most of the slub attributes will tend to be very small in
				5628	* size, but sysfs allows buffers up to a page, so they can
				5629	* theoretically happen.
				5630	*/
				5631	if (buffer)
				5632	buf = buffer;
				5633	else if (root_cache->max_attr_size < ARRAY_SIZE(mbuf))
				5634	buf = mbuf;
				5635	else {
				5636	buffer = (char *) get_zeroed_page(GFP_KERNEL);
				5637	if (WARN_ON(!buffer))
				5638	continue;
				5639	buf = buffer;
				5640	}
				5641
				5642	len = attr->show(root_cache, buf);
				5643	if (len > 0)
				5644	attr->store(s, buf, len);
				5645	}
				5646
				5647	if (buffer)
				5648	free_page((unsigned long)buffer);
				5649	#endif
				5650	}
				5651
				5652	static void kmem_cache_release(struct kobject *k)
				5653	{
				5654	slab_kmem_cache_release(to_slab(k));
				5655	}
				5656
				5657	static const struct sysfs_ops slab_sysfs_ops = {
				5658	.show = slab_attr_show,
				5659	.store = slab_attr_store,
				5660	};
				5661
				5662	static struct kobj_type slab_ktype = {
				5663	.sysfs_ops = &slab_sysfs_ops,
				5664	.release = kmem_cache_release,
				5665	};
				5666
				5667	static int uevent_filter(struct kset kset, struct kobject kobj)
				5668	{
				5669	struct kobj_type *ktype = get_ktype(kobj);
				5670
				5671	if (ktype == &slab_ktype)
				5672	return 1;
				5673	return 0;
				5674	}
				5675
				5676	static const struct kset_uevent_ops slab_uevent_ops = {
				5677	.filter = uevent_filter,
				5678	};
				5679
				5680	static struct kset *slab_kset;
				5681
				5682	static inline struct kset cache_kset(struct kmem_cache s)
				5683	{
				5684	#ifdef CONFIG_MEMCG
				5685	if (!is_root_cache(s))
				5686	return s->memcg_params.root_cache->memcg_kset;
				5687	#endif
				5688	return slab_kset;
				5689	}
				5690
				5691	#define ID_STR_LENGTH 64
				5692
				5693	/* Create a unique string id for a slab cache:
				5694	*
				5695	* Format :[flags-]size
				5696	*/
				5697	static char create_unique_id(struct kmem_cache s)
				5698	{
				5699	char *name = kmalloc(ID_STR_LENGTH, GFP_KERNEL);
				5700	char *p = name;
				5701
				5702	BUG_ON(!name);
				5703
				5704	*p++ = ':';
				5705	/*
				5706	* First flags affecting slabcache operations. We will only
				5707	* get here for aliasable slabs so we do not need to support
				5708	* too many flags. The flags here must cover all flags that
				5709	* are matched during merging to guarantee that the id is
				5710	* unique.
				5711	*/
				5712	if (s->flags & SLAB_CACHE_DMA)
				5713	*p++ = 'd';
				5714	if (s->flags & SLAB_CACHE_DMA32)
				5715	*p++ = 'D';
				5716	if (s->flags & SLAB_RECLAIM_ACCOUNT)
				5717	*p++ = 'a';
				5718	if (s->flags & SLAB_CONSISTENCY_CHECKS)
				5719	*p++ = 'F';
				5720	if (s->flags & SLAB_ACCOUNT)
				5721	*p++ = 'A';
				5722	if (p != name + 1)
				5723	*p++ = '-';
				5724	p += sprintf(p, "%07u", s->size);
				5725
				5726	BUG_ON(p > name + ID_STR_LENGTH - 1);
				5727	return name;
				5728	}
				5729
				5730	static void sysfs_slab_remove_workfn(struct work_struct *work)
				5731	{
				5732	struct kmem_cache *s =
				5733	container_of(work, struct kmem_cache, kobj_remove_work);
				5734
				5735	if (!s->kobj.state_in_sysfs)
				5736	/*
				5737	* For a memcg cache, this may be called during
				5738	* deactivation and again on shutdown. Remove only once.
				5739	* A cache is never shut down before deactivation is
				5740	* complete, so no need to worry about synchronization.
				5741	*/
				5742	goto out;
				5743
				5744	#ifdef CONFIG_MEMCG
				5745	kset_unregister(s->memcg_kset);
				5746	#endif
				5747	kobject_uevent(&s->kobj, KOBJ_REMOVE);
				5748	out:
				5749	kobject_put(&s->kobj);
				5750	}
				5751
				5752	static int sysfs_slab_add(struct kmem_cache *s)
				5753	{
				5754	int err;
				5755	const char *name;
				5756	struct kset *kset = cache_kset(s);
				5757	int unmergeable = slab_unmergeable(s);
				5758
				5759	INIT_WORK(&s->kobj_remove_work, sysfs_slab_remove_workfn);
				5760
				5761	if (!kset) {
				5762	kobject_init(&s->kobj, &slab_ktype);
				5763	return 0;
				5764	}
				5765
				5766	if (!unmergeable && disable_higher_order_debug &&
				5767	(slub_debug & DEBUG_METADATA_FLAGS))
				5768	unmergeable = 1;
				5769
				5770	if (unmergeable) {
				5771	/*
				5772	* Slabcache can never be merged so we can use the name proper.
				5773	* This is typically the case for debug situations. In that
				5774	* case we can catch duplicate names easily.
				5775	*/
				5776	sysfs_remove_link(&slab_kset->kobj, s->name);
				5777	name = s->name;
				5778	} else {
				5779	/*
				5780	* Create a unique name for the slab as a target
				5781	* for the symlinks.
				5782	*/
				5783	name = create_unique_id(s);
				5784	}
				5785
				5786	s->kobj.kset = kset;
				5787	err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, "%s", name);
				5788	if (err)
				5789	goto out;
				5790
				5791	err = sysfs_create_group(&s->kobj, &slab_attr_group);
				5792	if (err)
				5793	goto out_del_kobj;
				5794
				5795	#ifdef CONFIG_MEMCG
				5796	if (is_root_cache(s) && memcg_sysfs_enabled) {
				5797	s->memcg_kset = kset_create_and_add("cgroup", NULL, &s->kobj);
				5798	if (!s->memcg_kset) {
				5799	err = -ENOMEM;
				5800	goto out_del_kobj;
				5801	}
				5802	}
				5803	#endif
				5804
				5805	kobject_uevent(&s->kobj, KOBJ_ADD);
				5806	if (!unmergeable) {
				5807	/* Setup first alias */
				5808	sysfs_slab_alias(s, s->name);
				5809	}
				5810	out:
				5811	if (!unmergeable)
				5812	kfree(name);
				5813	return err;
				5814	out_del_kobj:
				5815	kobject_del(&s->kobj);
				5816	goto out;
				5817	}
				5818
				5819	static void sysfs_slab_remove(struct kmem_cache *s)
				5820	{
				5821	if (slab_state < FULL)
				5822	/*
				5823	* Sysfs has not been setup yet so no need to remove the
				5824	* cache from sysfs.
				5825	*/
				5826	return;
				5827
				5828	kobject_get(&s->kobj);
				5829	schedule_work(&s->kobj_remove_work);
				5830	}
				5831
				5832	void sysfs_slab_unlink(struct kmem_cache *s)
				5833	{
				5834	if (slab_state >= FULL)
				5835	kobject_del(&s->kobj);
				5836	}
				5837
				5838	void sysfs_slab_release(struct kmem_cache *s)
				5839	{
				5840	if (slab_state >= FULL)
				5841	kobject_put(&s->kobj);
				5842	}
				5843
				5844	/*
				5845	* Need to buffer aliases during bootup until sysfs becomes
				5846	* available lest we lose that information.
				5847	*/
				5848	struct saved_alias {
				5849	struct kmem_cache *s;
				5850	const char *name;
				5851	struct saved_alias *next;
				5852	};
				5853
				5854	static struct saved_alias *alias_list;
				5855
				5856	static int sysfs_slab_alias(struct kmem_cache s, const char name)
				5857	{
				5858	struct saved_alias *al;
				5859
				5860	if (slab_state == FULL) {
				5861	/*
				5862	* If we have a leftover link then remove it.
				5863	*/
				5864	sysfs_remove_link(&slab_kset->kobj, name);
				5865	return sysfs_create_link(&slab_kset->kobj, &s->kobj, name);
				5866	}
				5867
				5868	al = kmalloc(sizeof(struct saved_alias), GFP_KERNEL);
				5869	if (!al)
				5870	return -ENOMEM;
				5871
				5872	al->s = s;
				5873	al->name = name;
				5874	al->next = alias_list;
				5875	alias_list = al;
				5876	return 0;
				5877	}
				5878
				5879	static int __init slab_sysfs_init(void)
				5880	{
				5881	struct kmem_cache *s;
				5882	int err;
				5883
				5884	mutex_lock(&slab_mutex);
				5885
				5886	slab_kset = kset_create_and_add("slab", &slab_uevent_ops, kernel_kobj);
				5887	if (!slab_kset) {
				5888	mutex_unlock(&slab_mutex);
				5889	pr_err("Cannot register slab subsystem.\n");
				5890	return -ENOSYS;
				5891	}
				5892
				5893	slab_state = FULL;
				5894
				5895	list_for_each_entry(s, &slab_caches, list) {
				5896	err = sysfs_slab_add(s);
				5897	if (err)
				5898	pr_err("SLUB: Unable to add boot slab %s to sysfs\n",
				5899	s->name);
				5900	}
				5901
				5902	while (alias_list) {
				5903	struct saved_alias *al = alias_list;
				5904
				5905	alias_list = alias_list->next;
				5906	err = sysfs_slab_alias(al->s, al->name);
				5907	if (err)
				5908	pr_err("SLUB: Unable to add boot slab alias %s to sysfs\n",
				5909	al->name);
				5910	kfree(al);
				5911	}
				5912
				5913	mutex_unlock(&slab_mutex);
				5914	resiliency_test();
				5915	return 0;
				5916	}
				5917
				5918	__initcall(slab_sysfs_init);
				5919	#endif /* CONFIG_SYSFS */
				5920
				5921	/*
				5922	* The /proc/slabinfo ABI
				5923	*/
				5924	#ifdef CONFIG_SLUB_DEBUG
				5925	void get_slabinfo(struct kmem_cache s, struct slabinfo sinfo)
				5926	{
				5927	unsigned long nr_slabs = 0;
				5928	unsigned long nr_objs = 0;
				5929	unsigned long nr_free = 0;
				5930	int node;
				5931	struct kmem_cache_node *n;
				5932
				5933	for_each_kmem_cache_node(s, node, n) {
				5934	nr_slabs += node_nr_slabs(n);
				5935	nr_objs += node_nr_objs(n);
				5936	nr_free += count_partial(n, count_free);
				5937	}
				5938
				5939	sinfo->active_objs = nr_objs - nr_free;
				5940	sinfo->num_objs = nr_objs;
				5941	sinfo->active_slabs = nr_slabs;
				5942	sinfo->num_slabs = nr_slabs;
				5943	sinfo->objects_per_slab = oo_objects(s->oo);
				5944	sinfo->cache_order = oo_order(s->oo);
				5945	}
				5946
				5947	void slabinfo_show_stats(struct seq_file m, struct kmem_cache s)
				5948	{
				5949	}
				5950
				5951	ssize_t slabinfo_write(struct file file, const char __user buffer,
				5952	size_t count, loff_t *ppos)
				5953	{
				5954	return -EIO;
				5955	}
				5956	#endif /* CONFIG_SLUB_DEBUG */