Blame - ap/os/linux/linux-3.4.x/kernel/workqueue.c - T106_DC

blob: 653d7fccb7621d7815d94bed41655983f63c613c [file] [log] [blame]

lh	9ed821d	2023-04-07 01:36:19 -0700	[diff] [blame]	1	/*
				2	* kernel/workqueue.c - generic async execution with shared worker pool
				3	*
				4	* Copyright (C) 2002 Ingo Molnar
				5	*
				6	* Derived from the taskqueue/keventd code by:
				7	* David Woodhouse <dwmw2@infradead.org>
				8	* Andrew Morton
				9	* Kai Petzke <wpp@marie.physik.tu-berlin.de>
				10	* Theodore Ts'o <tytso@mit.edu>
				11	*
				12	* Made to use alloc_percpu by Christoph Lameter.
				13	*
				14	* Copyright (C) 2010 SUSE Linux Products GmbH
				15	* Copyright (C) 2010 Tejun Heo <tj@kernel.org>
				16	*
				17	* This is the generic async execution mechanism. Work items as are
				18	* executed in process context. The worker pool is shared and
				19	* automatically managed. There is one worker pool for each CPU and
				20	* one extra for works which are better served by workers which are
				21	* not bound to any specific CPU.
				22	*
				23	* Please read Documentation/workqueue.txt for details.
				24	*/
				25
				26	#include <linux/export.h>
				27	#include <linux/kernel.h>
				28	#include <linux/sched.h>
				29	#include <linux/init.h>
				30	#include <linux/signal.h>
				31	#include <linux/completion.h>
				32	#include <linux/workqueue.h>
				33	#include <linux/slab.h>
				34	#include <linux/cpu.h>
				35	#include <linux/notifier.h>
				36	#include <linux/kthread.h>
				37	#include <linux/hardirq.h>
				38	#include <linux/mempolicy.h>
				39	#include <linux/freezer.h>
				40	#include <linux/kallsyms.h>
				41	#include <linux/debug_locks.h>
				42	#include <linux/lockdep.h>
				43	#include <linux/idr.h>
				44
				45	#include "workqueue_sched.h"
				46
				47	enum {
				48	/* global_cwq flags */
				49	GCWQ_MANAGE_WORKERS = 1 << 0, /* need to manage workers */
				50	GCWQ_MANAGING_WORKERS = 1 << 1, /* managing workers */
				51	GCWQ_DISASSOCIATED = 1 << 2, /* cpu can't serve workers */
				52	GCWQ_FREEZING = 1 << 3, /* freeze in progress */
				53	GCWQ_HIGHPRI_PENDING = 1 << 4, /* highpri works on queue */
				54
				55	/* worker flags */
				56	WORKER_STARTED = 1 << 0, /* started */
				57	WORKER_DIE = 1 << 1, /* die die die */
				58	WORKER_IDLE = 1 << 2, /* is idle */
				59	WORKER_PREP = 1 << 3, /* preparing to run works */
				60	WORKER_ROGUE = 1 << 4, /* not bound to any cpu */
				61	WORKER_REBIND = 1 << 5, /* mom is home, come back */
				62	WORKER_CPU_INTENSIVE = 1 << 6, /* cpu intensive */
				63	WORKER_UNBOUND = 1 << 7, /* worker is unbound */
				64
				65	WORKER_NOT_RUNNING = WORKER_PREP \| WORKER_ROGUE \| WORKER_REBIND \|
				66	WORKER_CPU_INTENSIVE \| WORKER_UNBOUND,
				67
				68	/* gcwq->trustee_state */
				69	TRUSTEE_START = 0, /* start */
				70	TRUSTEE_IN_CHARGE = 1, /* trustee in charge of gcwq */
				71	TRUSTEE_BUTCHER = 2, /* butcher workers */
				72	TRUSTEE_RELEASE = 3, /* release workers */
				73	TRUSTEE_DONE = 4, /* trustee is done */
				74
				75	BUSY_WORKER_HASH_ORDER = 6, /* 64 pointers */
				76	BUSY_WORKER_HASH_SIZE = 1 << BUSY_WORKER_HASH_ORDER,
				77	BUSY_WORKER_HASH_MASK = BUSY_WORKER_HASH_SIZE - 1,
				78
				79	MAX_IDLE_WORKERS_RATIO = 4, /* 1/4 of busy can be idle */
				80	IDLE_WORKER_TIMEOUT = 300 * HZ, /* keep idle ones for 5 mins */
				81
				82	MAYDAY_INITIAL_TIMEOUT = HZ / 100 >= 2 ? HZ / 100 : 2,
				83	/* call for help after 10ms
				84	(min two ticks) */
				85	MAYDAY_INTERVAL = HZ / 10, /* and then every 100ms */
				86	CREATE_COOLDOWN = HZ, /* time to breath after fail */
				87	TRUSTEE_COOLDOWN = HZ / 10, /* for trustee draining */
				88
				89	/*
				90	* Rescue workers are used only on emergencies and shared by
				91	* all cpus. Give -20.
				92	*/
				93	RESCUER_NICE_LEVEL = -20,
				94	};
				95
				96	/*
				97	* Structure fields follow one of the following exclusion rules.
				98	*
				99	* I: Modifiable by initialization/destruction paths and read-only for
				100	* everyone else.
				101	*
				102	* P: Preemption protected. Disabling preemption is enough and should
				103	* only be modified and accessed from the local cpu.
				104	*
				105	* L: gcwq->lock protected. Access with gcwq->lock held.
				106	*
				107	* X: During normal operation, modification requires gcwq->lock and
				108	* should be done only from local cpu. Either disabling preemption
				109	* on local cpu or grabbing gcwq->lock is enough for read access.
				110	* If GCWQ_DISASSOCIATED is set, it's identical to L.
				111	*
				112	* F: wq->flush_mutex protected.
				113	*
				114	* W: workqueue_lock protected.
				115	*/
				116
				117	struct global_cwq;
				118
				119	/*
				120	* The poor guys doing the actual heavy lifting. All on-duty workers
				121	* are either serving the manager role, on idle list or on busy hash.
				122	*/
				123	struct worker {
				124	/* on idle list while idle, on busy hash table while busy */
				125	union {
				126	struct list_head entry; /* L: while idle */
				127	struct hlist_node hentry; /* L: while busy */
				128	};
				129
				130	struct work_struct current_work; / L: work being processed */
				131	work_func_t current_func; /* L: current_work's fn */
				132	struct cpu_workqueue_struct current_cwq; / L: current_work's cwq */
				133	struct list_head scheduled; /* L: scheduled works */
				134	struct task_struct task; / I: worker task */
				135	struct global_cwq gcwq; / I: the associated gcwq */
				136	/* 64 bytes boundary on 64bit, 32 on 32bit */
				137	unsigned long last_active; /* L: last active timestamp */
				138	unsigned int flags; /* X: flags */
				139	int id; /* I: worker id */
				140	struct work_struct rebind_work; /* L: rebind worker to cpu */
				141	int sleeping; /* None */
				142	};
				143
				144	/*
				145	* Global per-cpu workqueue. There's one and only one for each cpu
				146	* and all works are queued and processed here regardless of their
				147	* target workqueues.
				148	*/
				149	struct global_cwq {
				150	spinlock_t lock; /* the gcwq lock */
				151	struct list_head worklist; /* L: list of pending works */
				152	unsigned int cpu; /* I: the associated cpu */
				153	unsigned int flags; /* L: GCWQ_* flags */
				154
				155	int nr_workers; /* L: total number of workers */
				156	int nr_idle; /* L: currently idle ones */
				157
				158	/* workers are chained either in the idle_list or busy_hash */
				159	struct list_head idle_list; /* X: list of idle workers */
				160	struct hlist_head busy_hash[BUSY_WORKER_HASH_SIZE];
				161	/* L: hash of busy workers */
				162
				163	struct timer_list idle_timer; /* L: worker idle timeout */
				164	struct timer_list mayday_timer; /* L: SOS timer for dworkers */
				165
				166	struct ida worker_ida; /* L: for worker IDs */
				167
				168	struct task_struct trustee; / L: for gcwq shutdown */
				169	unsigned int trustee_state; /* L: trustee state */
				170	wait_queue_head_t trustee_wait; /* trustee wait */
				171	struct worker first_idle; / L: first idle worker */
				172	} ____cacheline_aligned_in_smp;
				173
				174	/*
				175	* The per-CPU workqueue. The lower WORK_STRUCT_FLAG_BITS of
				176	* work_struct->data are used for flags and thus cwqs need to be
				177	* aligned at two's power of the number of flag bits.
				178	*/
				179	struct cpu_workqueue_struct {
				180	struct global_cwq gcwq; / I: the associated gcwq */
				181	struct workqueue_struct wq; / I: the owning workqueue */
				182	int work_color; /* L: current color */
				183	int flush_color; /* L: flushing color */
				184	int nr_in_flight[WORK_NR_COLORS];
				185	/* L: nr of in_flight works */
				186	int nr_active; /* L: nr of active works */
				187	int max_active; /* L: max active works */
				188	struct list_head delayed_works; /* L: delayed works */
				189	};
				190
				191	/*
				192	* Structure used to wait for workqueue flush.
				193	*/
				194	struct wq_flusher {
				195	struct list_head list; /* F: list of flushers */
				196	int flush_color; /* F: flush color waiting for */
				197	struct completion done; /* flush completion */
				198	};
				199
				200	/*
				201	* All cpumasks are assumed to be always set on UP and thus can't be
				202	* used to determine whether there's something to be done.
				203	*/
				204	#ifdef CONFIG_SMP
				205	typedef cpumask_var_t mayday_mask_t;
				206	#define mayday_test_and_set_cpu(cpu, mask) \
				207	cpumask_test_and_set_cpu((cpu), (mask))
				208	#define mayday_clear_cpu(cpu, mask) cpumask_clear_cpu((cpu), (mask))
				209	#define for_each_mayday_cpu(cpu, mask) for_each_cpu((cpu), (mask))
				210	#define alloc_mayday_mask(maskp, gfp) zalloc_cpumask_var((maskp), (gfp))
				211	#define free_mayday_mask(mask) free_cpumask_var((mask))
				212	#else
				213	typedef unsigned long mayday_mask_t;
				214	#define mayday_test_and_set_cpu(cpu, mask) test_and_set_bit(0, &(mask))
				215	#define mayday_clear_cpu(cpu, mask) clear_bit(0, &(mask))
				216	#define for_each_mayday_cpu(cpu, mask) if ((cpu) = 0, (mask))
				217	#define alloc_mayday_mask(maskp, gfp) true
				218	#define free_mayday_mask(mask) do { } while (0)
				219	#endif
				220
				221	/*
				222	* The externally visible workqueue abstraction is an array of
				223	* per-CPU workqueues:
				224	*/
				225	struct workqueue_struct {
				226	unsigned int flags; /* W: WQ_* flags */
				227	union {
				228	struct cpu_workqueue_struct __percpu *pcpu;
				229	struct cpu_workqueue_struct *single;
				230	unsigned long v;
				231	} cpu_wq; /* I: cwq's */
				232	struct list_head list; /* W: list of all workqueues */
				233
				234	struct mutex flush_mutex; /* protects wq flushing */
				235	int work_color; /* F: current work color */
				236	int flush_color; /* F: current flush color */
				237	atomic_t nr_cwqs_to_flush; /* flush in progress */
				238	struct wq_flusher first_flusher; / F: first flusher */
				239	struct list_head flusher_queue; /* F: flush waiters */
				240	struct list_head flusher_overflow; /* F: flush overflow list */
				241
				242	mayday_mask_t mayday_mask; /* cpus requesting rescue */
				243	struct worker rescuer; / I: rescue worker */
				244
				245	int nr_drainers; /* W: drain in progress */
				246	int saved_max_active; /* W: saved cwq max_active */
				247	#ifdef CONFIG_LOCKDEP
				248	struct lockdep_map lockdep_map;
				249	#endif
				250	char name[]; /* I: workqueue name */
				251	};
				252
				253	struct workqueue_struct *system_wq __read_mostly;
				254	struct workqueue_struct *system_long_wq __read_mostly;
				255	struct workqueue_struct *system_nrt_wq __read_mostly;
				256	struct workqueue_struct *system_unbound_wq __read_mostly;
				257	struct workqueue_struct *system_freezable_wq __read_mostly;
				258	struct workqueue_struct *system_nrt_freezable_wq __read_mostly;
				259	EXPORT_SYMBOL_GPL(system_wq);
				260	EXPORT_SYMBOL_GPL(system_long_wq);
				261	EXPORT_SYMBOL_GPL(system_nrt_wq);
				262	EXPORT_SYMBOL_GPL(system_unbound_wq);
				263	EXPORT_SYMBOL_GPL(system_freezable_wq);
				264	EXPORT_SYMBOL_GPL(system_nrt_freezable_wq);
				265
				266	#define CREATE_TRACE_POINTS
				267	#include <trace/events/workqueue.h>
				268
				269	#define for_each_busy_worker(worker, i, pos, gcwq) \
				270	for (i = 0; i < BUSY_WORKER_HASH_SIZE; i++) \
				271	hlist_for_each_entry(worker, pos, &gcwq->busy_hash[i], hentry)
				272
				273	static inline int __next_gcwq_cpu(int cpu, const struct cpumask *mask,
				274	unsigned int sw)
				275	{
				276	if (cpu < nr_cpu_ids) {
				277	if (sw & 1) {
				278	cpu = cpumask_next(cpu, mask);
				279	if (cpu < nr_cpu_ids)
				280	return cpu;
				281	}
				282	if (sw & 2)
				283	return WORK_CPU_UNBOUND;
				284	}
				285	return WORK_CPU_NONE;
				286	}
				287
				288	static inline int __next_wq_cpu(int cpu, const struct cpumask *mask,
				289	struct workqueue_struct *wq)
				290	{
				291	return __next_gcwq_cpu(cpu, mask, !(wq->flags & WQ_UNBOUND) ? 1 : 2);
				292	}
				293
				294	/*
				295	* CPU iterators
				296	*
				297	* An extra gcwq is defined for an invalid cpu number
				298	* (WORK_CPU_UNBOUND) to host workqueues which are not bound to any
				299	* specific CPU. The following iterators are similar to
				300	* for_each_*_cpu() iterators but also considers the unbound gcwq.
				301	*
				302	* for_each_gcwq_cpu() : possible CPUs + WORK_CPU_UNBOUND
				303	* for_each_online_gcwq_cpu() : online CPUs + WORK_CPU_UNBOUND
				304	* for_each_cwq_cpu() : possible CPUs for bound workqueues,
				305	* WORK_CPU_UNBOUND for unbound workqueues
				306	*/
				307	#define for_each_gcwq_cpu(cpu) \
				308	for ((cpu) = __next_gcwq_cpu(-1, cpu_possible_mask, 3); \
				309	(cpu) < WORK_CPU_NONE; \
				310	(cpu) = __next_gcwq_cpu((cpu), cpu_possible_mask, 3))
				311
				312	#define for_each_online_gcwq_cpu(cpu) \
				313	for ((cpu) = __next_gcwq_cpu(-1, cpu_online_mask, 3); \
				314	(cpu) < WORK_CPU_NONE; \
				315	(cpu) = __next_gcwq_cpu((cpu), cpu_online_mask, 3))
				316
				317	#define for_each_cwq_cpu(cpu, wq) \
				318	for ((cpu) = __next_wq_cpu(-1, cpu_possible_mask, (wq)); \
				319	(cpu) < WORK_CPU_NONE; \
				320	(cpu) = __next_wq_cpu((cpu), cpu_possible_mask, (wq)))
				321
				322	#ifdef CONFIG_PREEMPT_RT_BASE
				323	static inline void rt_lock_idle_list(struct global_cwq *gcwq)
				324	{
				325	preempt_disable();
				326	}
				327	static inline void rt_unlock_idle_list(struct global_cwq *gcwq)
				328	{
				329	preempt_enable();
				330	}
				331	static inline void sched_lock_idle_list(struct global_cwq *gcwq) { }
				332	static inline void sched_unlock_idle_list(struct global_cwq *gcwq) { }
				333	#else
				334	static inline void rt_lock_idle_list(struct global_cwq *gcwq) { }
				335	static inline void rt_unlock_idle_list(struct global_cwq *gcwq) { }
				336	static inline void sched_lock_idle_list(struct global_cwq *gcwq)
				337	{
				338	spin_lock_irq(&gcwq->lock);
				339	}
				340	static inline void sched_unlock_idle_list(struct global_cwq *gcwq)
				341	{
				342	spin_unlock_irq(&gcwq->lock);
				343	}
				344	#endif
				345
				346
				347	#ifdef CONFIG_DEBUG_OBJECTS_WORK
				348
				349	static struct debug_obj_descr work_debug_descr;
				350
				351	static void work_debug_hint(void addr)
				352	{
				353	return ((struct work_struct *) addr)->func;
				354	}
				355
				356	/*
				357	* fixup_init is called when:
				358	* - an active object is initialized
				359	*/
				360	static int work_fixup_init(void *addr, enum debug_obj_state state)
				361	{
				362	struct work_struct *work = addr;
				363
				364	switch (state) {
				365	case ODEBUG_STATE_ACTIVE:
				366	cancel_work_sync(work);
				367	debug_object_init(work, &work_debug_descr);
				368	return 1;
				369	default:
				370	return 0;
				371	}
				372	}
				373
				374	/*
				375	* fixup_activate is called when:
				376	* - an active object is activated
				377	* - an unknown object is activated (might be a statically initialized object)
				378	*/
				379	static int work_fixup_activate(void *addr, enum debug_obj_state state)
				380	{
				381	struct work_struct *work = addr;
				382
				383	switch (state) {
				384
				385	case ODEBUG_STATE_NOTAVAILABLE:
				386	/*
				387	* This is not really a fixup. The work struct was
				388	* statically initialized. We just make sure that it
				389	* is tracked in the object tracker.
				390	*/
				391	if (test_bit(WORK_STRUCT_STATIC_BIT, work_data_bits(work))) {
				392	debug_object_init(work, &work_debug_descr);
				393	debug_object_activate(work, &work_debug_descr);
				394	return 0;
				395	}
				396	WARN_ON_ONCE(1);
				397	return 0;
				398
				399	case ODEBUG_STATE_ACTIVE:
				400	WARN_ON(1);
				401
				402	default:
				403	return 0;
				404	}
				405	}
				406
				407	/*
				408	* fixup_free is called when:
				409	* - an active object is freed
				410	*/
				411	static int work_fixup_free(void *addr, enum debug_obj_state state)
				412	{
				413	struct work_struct *work = addr;
				414
				415	switch (state) {
				416	case ODEBUG_STATE_ACTIVE:
				417	cancel_work_sync(work);
				418	debug_object_free(work, &work_debug_descr);
				419	return 1;
				420	default:
				421	return 0;
				422	}
				423	}
				424
				425	static struct debug_obj_descr work_debug_descr = {
				426	.name = "work_struct",
				427	.debug_hint = work_debug_hint,
				428	.fixup_init = work_fixup_init,
				429	.fixup_activate = work_fixup_activate,
				430	.fixup_free = work_fixup_free,
				431	};
				432
				433	static inline void debug_work_activate(struct work_struct *work)
				434	{
				435	debug_object_activate(work, &work_debug_descr);
				436	}
				437
				438	static inline void debug_work_deactivate(struct work_struct *work)
				439	{
				440	debug_object_deactivate(work, &work_debug_descr);
				441	}
				442
				443	void __init_work(struct work_struct *work, int onstack)
				444	{
				445	if (onstack)
				446	debug_object_init_on_stack(work, &work_debug_descr);
				447	else
				448	debug_object_init(work, &work_debug_descr);
				449	}
				450	EXPORT_SYMBOL_GPL(__init_work);
				451
				452	void destroy_work_on_stack(struct work_struct *work)
				453	{
				454	debug_object_free(work, &work_debug_descr);
				455	}
				456	EXPORT_SYMBOL_GPL(destroy_work_on_stack);
				457
				458	#else
				459	static inline void debug_work_activate(struct work_struct *work) { }
				460	static inline void debug_work_deactivate(struct work_struct *work) { }
				461	#endif
				462
				463	/* Serializes the accesses to the list of workqueues. */
				464	static DEFINE_SPINLOCK(workqueue_lock);
				465	static LIST_HEAD(workqueues);
				466	static bool workqueue_freezing; /* W: have wqs started freezing? */
				467
				468	/*
				469	* The almighty global cpu workqueues. nr_running is the only field
				470	* which is expected to be used frequently by other cpus via
				471	* try_to_wake_up(). Put it in a separate cacheline.
				472	*/
				473	static DEFINE_PER_CPU(struct global_cwq, global_cwq);
				474	static DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, gcwq_nr_running);
				475
				476	/*
				477	* Global cpu workqueue and nr_running counter for unbound gcwq. The
				478	* gcwq is always online, has GCWQ_DISASSOCIATED set, and all its
				479	* workers have WORKER_UNBOUND set.
				480	*/
				481	static struct global_cwq unbound_global_cwq;
				482	static atomic_t unbound_gcwq_nr_running = ATOMIC_INIT(0); /* always 0 */
				483
				484	static int worker_thread(void *__worker);
				485
				486	static struct global_cwq *get_gcwq(unsigned int cpu)
				487	{
				488	if (cpu != WORK_CPU_UNBOUND)
				489	return &per_cpu(global_cwq, cpu);
				490	else
				491	return &unbound_global_cwq;
				492	}
				493
				494	static atomic_t *get_gcwq_nr_running(unsigned int cpu)
				495	{
				496	if (cpu != WORK_CPU_UNBOUND)
				497	return &per_cpu(gcwq_nr_running, cpu);
				498	else
				499	return &unbound_gcwq_nr_running;
				500	}
				501
				502	static struct cpu_workqueue_struct *get_cwq(unsigned int cpu,
				503	struct workqueue_struct *wq)
				504	{
				505	if (!(wq->flags & WQ_UNBOUND)) {
				506	if (likely(cpu < nr_cpu_ids))
				507	return per_cpu_ptr(wq->cpu_wq.pcpu, cpu);
				508	} else if (likely(cpu == WORK_CPU_UNBOUND))
				509	return wq->cpu_wq.single;
				510	return NULL;
				511	}
				512
				513	static unsigned int work_color_to_flags(int color)
				514	{
				515	return color << WORK_STRUCT_COLOR_SHIFT;
				516	}
				517
				518	static int get_work_color(struct work_struct *work)
				519	{
				520	return (*work_data_bits(work) >> WORK_STRUCT_COLOR_SHIFT) &
				521	((1 << WORK_STRUCT_COLOR_BITS) - 1);
				522	}
				523
				524	static int work_next_color(int color)
				525	{
				526	return (color + 1) % WORK_NR_COLORS;
				527	}
				528
				529	/*
				530	* A work's data points to the cwq with WORK_STRUCT_CWQ set while the
				531	* work is on queue. Once execution starts, WORK_STRUCT_CWQ is
				532	* cleared and the work data contains the cpu number it was last on.
				533	*
				534	* set_work_{cwq\|cpu}() and clear_work_data() can be used to set the
				535	* cwq, cpu or clear work->data. These functions should only be
				536	* called while the work is owned - ie. while the PENDING bit is set.
				537	*
				538	* get_work_[g]cwq() can be used to obtain the gcwq or cwq
				539	* corresponding to a work. gcwq is available once the work has been
				540	* queued anywhere after initialization. cwq is available only from
				541	* queueing until execution starts.
				542	*/
				543	static inline void set_work_data(struct work_struct *work, unsigned long data,
				544	unsigned long flags)
				545	{
				546	BUG_ON(!work_pending(work));
				547	atomic_long_set(&work->data, data \| flags \| work_static(work));
				548	}
				549
				550	static void set_work_cwq(struct work_struct *work,
				551	struct cpu_workqueue_struct *cwq,
				552	unsigned long extra_flags)
				553	{
				554	set_work_data(work, (unsigned long)cwq,
				555	WORK_STRUCT_PENDING \| WORK_STRUCT_CWQ \| extra_flags);
				556	}
				557
				558	static void set_work_cpu(struct work_struct *work, unsigned int cpu)
				559	{
				560	set_work_data(work, cpu << WORK_STRUCT_FLAG_BITS, WORK_STRUCT_PENDING);
				561	}
				562
				563	static void clear_work_data(struct work_struct *work)
				564	{
				565	set_work_data(work, WORK_STRUCT_NO_CPU, 0);
				566	}
				567
				568	static struct cpu_workqueue_struct get_work_cwq(struct work_struct work)
				569	{
				570	unsigned long data = atomic_long_read(&work->data);
				571
				572	if (data & WORK_STRUCT_CWQ)
				573	return (void *)(data & WORK_STRUCT_WQ_DATA_MASK);
				574	else
				575	return NULL;
				576	}
				577
				578	static struct global_cwq get_work_gcwq(struct work_struct work)
				579	{
				580	unsigned long data = atomic_long_read(&work->data);
				581	unsigned int cpu;
				582
				583	if (data & WORK_STRUCT_CWQ)
				584	return ((struct cpu_workqueue_struct *)
				585	(data & WORK_STRUCT_WQ_DATA_MASK))->gcwq;
				586
				587	cpu = data >> WORK_STRUCT_FLAG_BITS;
				588	if (cpu == WORK_CPU_NONE)
				589	return NULL;
				590
				591	BUG_ON(cpu >= nr_cpu_ids && cpu != WORK_CPU_UNBOUND);
				592	return get_gcwq(cpu);
				593	}
				594
				595	/*
				596	* Policy functions. These define the policies on how the global
				597	* worker pool is managed. Unless noted otherwise, these functions
				598	* assume that they're being called with gcwq->lock held.
				599	*/
				600
				601	static bool __need_more_worker(struct global_cwq *gcwq)
				602	{
				603	return !atomic_read(get_gcwq_nr_running(gcwq->cpu)) \|\|
				604	gcwq->flags & GCWQ_HIGHPRI_PENDING;
				605	}
				606
				607	/*
				608	* Need to wake up a worker? Called from anything but currently
				609	* running workers.
				610	*/
				611	static bool need_more_worker(struct global_cwq *gcwq)
				612	{
				613	return !list_empty(&gcwq->worklist) && __need_more_worker(gcwq);
				614	}
				615
				616	/* Can I start working? Called from busy but !running workers. */
				617	static bool may_start_working(struct global_cwq *gcwq)
				618	{
				619	return gcwq->nr_idle;
				620	}
				621
				622	/* Do I need to keep working? Called from currently running workers. */
				623	static bool keep_working(struct global_cwq *gcwq)
				624	{
				625	atomic_t *nr_running = get_gcwq_nr_running(gcwq->cpu);
				626
				627	return !list_empty(&gcwq->worklist) &&
				628	(atomic_read(nr_running) <= 1 \|\|
				629	gcwq->flags & GCWQ_HIGHPRI_PENDING);
				630	}
				631
				632	/* Do we need a new worker? Called from manager. */
				633	static bool need_to_create_worker(struct global_cwq *gcwq)
				634	{
				635	return need_more_worker(gcwq) && !may_start_working(gcwq);
				636	}
				637
				638	/* Do I need to be the manager? */
				639	static bool need_to_manage_workers(struct global_cwq *gcwq)
				640	{
				641	return need_to_create_worker(gcwq) \|\| gcwq->flags & GCWQ_MANAGE_WORKERS;
				642	}
				643
				644	/* Do we have too many workers and should some go away? */
				645	static bool too_many_workers(struct global_cwq *gcwq)
				646	{
				647	bool managing = gcwq->flags & GCWQ_MANAGING_WORKERS;
				648	int nr_idle = gcwq->nr_idle + managing; /* manager is considered idle */
				649	int nr_busy = gcwq->nr_workers - nr_idle;
				650
				651	return nr_idle > 2 && (nr_idle - 2) * MAX_IDLE_WORKERS_RATIO >= nr_busy;
				652	}
				653
				654	/*
				655	* Wake up functions.
				656	*/
				657
				658	/* Return the first worker. Safe with preemption disabled */
				659	static struct worker first_worker(struct global_cwq gcwq)
				660	{
				661	if (unlikely(list_empty(&gcwq->idle_list)))
				662	return NULL;
				663
				664	return list_first_entry(&gcwq->idle_list, struct worker, entry);
				665	}
				666
				667	/**
				668	* wake_up_worker - wake up an idle worker
				669	* @gcwq: gcwq to wake worker for
				670	*
				671	* Wake up the first idle worker of @gcwq.
				672	*
				673	* CONTEXT:
				674	* spin_lock_irq(gcwq->lock).
				675	*/
				676	static void wake_up_worker(struct global_cwq *gcwq)
				677	{
				678	struct worker *worker;
				679
				680	rt_lock_idle_list(gcwq);
				681
				682	worker = first_worker(gcwq);
				683
				684	if (likely(worker))
				685	wake_up_process(worker->task);
				686
				687	rt_unlock_idle_list(gcwq);
				688	}
				689
				690	/**
				691	* wq_worker_running - a worker is running again
				692	* @task: task returning from sleep
				693	*
				694	* This function is called when a worker returns from schedule()
				695	*/
				696	void wq_worker_running(struct task_struct *task)
				697	{
				698	struct worker *worker = kthread_data(task);
				699
				700	if (!worker->sleeping)
				701	return;
				702	if (!(worker->flags & WORKER_NOT_RUNNING))
				703	atomic_inc(get_gcwq_nr_running(smp_processor_id()));
				704	worker->sleeping = 0;
				705	}
				706
				707	/**
				708	* wq_worker_sleeping - a worker is going to sleep
				709	* @task: task going to sleep
				710	*
				711	* This function is called from schedule() when a busy worker is
				712	* going to sleep.
				713	*/
				714	void wq_worker_sleeping(struct task_struct *task)
				715	{
				716	struct worker *worker = kthread_data(task);
				717	struct global_cwq *gcwq;
				718	int cpu;
				719
				720	if (worker->flags & WORKER_NOT_RUNNING)
				721	return;
				722
				723	if (WARN_ON_ONCE(worker->sleeping))
				724	return;
				725
				726	worker->sleeping = 1;
				727
				728	cpu = smp_processor_id();
				729	gcwq = get_gcwq(cpu);
				730	/*
				731	* The counterpart of the following dec_and_test, implied mb,
				732	* worklist not empty test sequence is in insert_work().
				733	* Please read comment there.
				734	*/
				735	if (atomic_dec_and_test(get_gcwq_nr_running(cpu)) &&
				736	!list_empty(&gcwq->worklist)) {
				737	sched_lock_idle_list(gcwq);
				738	wake_up_worker(gcwq);
				739	sched_unlock_idle_list(gcwq);
				740	}
				741	}
				742
				743	/**
				744	* worker_set_flags - set worker flags and adjust nr_running accordingly
				745	* @worker: self
				746	* @flags: flags to set
				747	* @wakeup: wakeup an idle worker if necessary
				748	*
				749	* Set @flags in @worker->flags and adjust nr_running accordingly. If
				750	* nr_running becomes zero and @wakeup is %true, an idle worker is
				751	* woken up.
				752	*
				753	* CONTEXT:
				754	* spin_lock_irq(gcwq->lock)
				755	*/
				756	static inline void worker_set_flags(struct worker *worker, unsigned int flags,
				757	bool wakeup)
				758	{
				759	struct global_cwq *gcwq = worker->gcwq;
				760
				761	WARN_ON_ONCE(worker->task != current);
				762
				763	/*
				764	* If transitioning into NOT_RUNNING, adjust nr_running and
				765	* wake up an idle worker as necessary if requested by
				766	* @wakeup.
				767	*/
				768	if ((flags & WORKER_NOT_RUNNING) &&
				769	!(worker->flags & WORKER_NOT_RUNNING)) {
				770	atomic_t *nr_running = get_gcwq_nr_running(gcwq->cpu);
				771
				772	if (wakeup) {
				773	if (atomic_dec_and_test(nr_running) &&
				774	!list_empty(&gcwq->worklist))
				775	wake_up_worker(gcwq);
				776	} else
				777	atomic_dec(nr_running);
				778	}
				779
				780	worker->flags \|= flags;
				781	}
				782
				783	/**
				784	* worker_clr_flags - clear worker flags and adjust nr_running accordingly
				785	* @worker: self
				786	* @flags: flags to clear
				787	*
				788	* Clear @flags in @worker->flags and adjust nr_running accordingly.
				789	*
				790	* CONTEXT:
				791	* spin_lock_irq(gcwq->lock)
				792	*/
				793	static inline void worker_clr_flags(struct worker *worker, unsigned int flags)
				794	{
				795	struct global_cwq *gcwq = worker->gcwq;
				796	unsigned int oflags = worker->flags;
				797
				798	WARN_ON_ONCE(worker->task != current);
				799
				800	worker->flags &= ~flags;
				801
				802	/*
				803	* If transitioning out of NOT_RUNNING, increment nr_running. Note
				804	* that the nested NOT_RUNNING is not a noop. NOT_RUNNING is mask
				805	* of multiple flags, not a single flag.
				806	*/
				807	if ((flags & WORKER_NOT_RUNNING) && (oflags & WORKER_NOT_RUNNING))
				808	if (!(worker->flags & WORKER_NOT_RUNNING))
				809	atomic_inc(get_gcwq_nr_running(gcwq->cpu));
				810	}
				811
				812	/**
				813	* busy_worker_head - return the busy hash head for a work
				814	* @gcwq: gcwq of interest
				815	* @work: work to be hashed
				816	*
				817	* Return hash head of @gcwq for @work.
				818	*
				819	* CONTEXT:
				820	* spin_lock_irq(gcwq->lock).
				821	*
				822	* RETURNS:
				823	* Pointer to the hash head.
				824	*/
				825	static struct hlist_head busy_worker_head(struct global_cwq gcwq,
				826	struct work_struct *work)
				827	{
				828	const int base_shift = ilog2(sizeof(struct work_struct));
				829	unsigned long v = (unsigned long)work;
				830
				831	/* simple shift and fold hash, do we need something better? */
				832	v >>= base_shift;
				833	v += v >> BUSY_WORKER_HASH_ORDER;
				834	v &= BUSY_WORKER_HASH_MASK;
				835
				836	return &gcwq->busy_hash[v];
				837	}
				838
				839	/**
				840	* __find_worker_executing_work - find worker which is executing a work
				841	* @gcwq: gcwq of interest
				842	* @bwh: hash head as returned by busy_worker_head()
				843	* @work: work to find worker for
				844	*
				845	* Find a worker which is executing @work on @gcwq. @bwh should be
				846	* the hash head obtained by calling busy_worker_head() with the same
				847	* work.
				848	*
				849	* CONTEXT:
				850	* spin_lock_irq(gcwq->lock).
				851	*
				852	* RETURNS:
				853	* Pointer to worker which is executing @work if found, NULL
				854	* otherwise.
				855	*/
				856	static struct worker __find_worker_executing_work(struct global_cwq gcwq,
				857	struct hlist_head *bwh,
				858	struct work_struct *work)
				859	{
				860	struct worker *worker;
				861	struct hlist_node *tmp;
				862
				863	hlist_for_each_entry(worker, tmp, bwh, hentry)
				864	if (worker->current_work == work &&
				865	worker->current_func == work->func)
				866	return worker;
				867	return NULL;
				868	}
				869
				870	/**
				871	* find_worker_executing_work - find worker which is executing a work
				872	* @gcwq: gcwq of interest
				873	* @work: work to find worker for
				874	*
				875	* Find a worker which is executing @work on @gcwq by searching
				876	* @gcwq->busy_hash which is keyed by the address of @work. For a worker
				877	* to match, its current execution should match the address of @work and
				878	* its work function. This is to avoid unwanted dependency between
				879	* unrelated work executions through a work item being recycled while still
				880	* being executed.
				881	*
				882	* This is a bit tricky. A work item may be freed once its execution
				883	* starts and nothing prevents the freed area from being recycled for
				884	* another work item. If the same work item address ends up being reused
				885	* before the original execution finishes, workqueue will identify the
				886	* recycled work item as currently executing and make it wait until the
				887	* current execution finishes, introducing an unwanted dependency.
				888	*
				889	* This function checks the work item address, work function and workqueue
				890	* to avoid false positives. Note that this isn't complete as one may
				891	* construct a work function which can introduce dependency onto itself
				892	* through a recycled work item. Well, if somebody wants to shoot oneself
				893	* in the foot that badly, there's only so much we can do, and if such
				894	* deadlock actually occurs, it should be easy to locate the culprit work
				895	* function.
				896	*
				897	* CONTEXT:
				898	* spin_lock_irq(gcwq->lock).
				899	*
				900	* RETURNS:
				901	* Pointer to worker which is executing @work if found, NULL
				902	* otherwise.
				903	*/
				904	static struct worker find_worker_executing_work(struct global_cwq gcwq,
				905	struct work_struct *work)
				906	{
				907	return __find_worker_executing_work(gcwq, busy_worker_head(gcwq, work),
				908	work);
				909	}
				910
				911	/**
				912	* gcwq_determine_ins_pos - find insertion position
				913	* @gcwq: gcwq of interest
				914	* @cwq: cwq a work is being queued for
				915	*
				916	* A work for @cwq is about to be queued on @gcwq, determine insertion
				917	* position for the work. If @cwq is for HIGHPRI wq, the work is
				918	* queued at the head of the queue but in FIFO order with respect to
				919	* other HIGHPRI works; otherwise, at the end of the queue. This
				920	* function also sets GCWQ_HIGHPRI_PENDING flag to hint @gcwq that
				921	* there are HIGHPRI works pending.
				922	*
				923	* CONTEXT:
				924	* spin_lock_irq(gcwq->lock).
				925	*
				926	* RETURNS:
				927	* Pointer to inserstion position.
				928	*/
				929	static inline struct list_head gcwq_determine_ins_pos(struct global_cwq gcwq,
				930	struct cpu_workqueue_struct *cwq)
				931	{
				932	struct work_struct *twork;
				933
				934	if (likely(!(cwq->wq->flags & WQ_HIGHPRI)))
				935	return &gcwq->worklist;
				936
				937	list_for_each_entry(twork, &gcwq->worklist, entry) {
				938	struct cpu_workqueue_struct *tcwq = get_work_cwq(twork);
				939
				940	if (!(tcwq->wq->flags & WQ_HIGHPRI))
				941	break;
				942	}
				943
				944	gcwq->flags \|= GCWQ_HIGHPRI_PENDING;
				945	return &twork->entry;
				946	}
				947
				948	/**
				949	* insert_work - insert a work into gcwq
				950	* @cwq: cwq @work belongs to
				951	* @work: work to insert
				952	* @head: insertion point
				953	* @extra_flags: extra WORK_STRUCT_* flags to set
				954	*
				955	* Insert @work which belongs to @cwq into @gcwq after @head.
				956	* @extra_flags is or'd to work_struct flags.
				957	*
				958	* CONTEXT:
				959	* spin_lock_irq(gcwq->lock).
				960	*/
				961	static void insert_work(struct cpu_workqueue_struct *cwq,
				962	struct work_struct work, struct list_head head,
				963	unsigned int extra_flags)
				964	{
				965	struct global_cwq *gcwq = cwq->gcwq;
				966
				967	/* we own @work, set data and link */
				968	set_work_cwq(work, cwq, extra_flags);
				969
				970	/*
				971	* Ensure that we get the right work->data if we see the
				972	* result of list_add() below, see try_to_grab_pending().
				973	*/
				974	smp_wmb();
				975
				976	list_add_tail(&work->entry, head);
				977
				978	/*
				979	* Ensure either worker_sched_deactivated() sees the above
				980	* list_add_tail() or we see zero nr_running to avoid workers
				981	* lying around lazily while there are works to be processed.
				982	*/
				983	smp_mb();
				984
				985	if (__need_more_worker(gcwq))
				986	wake_up_worker(gcwq);
				987	}
				988
				989	/*
				990	* Test whether @work is being queued from another work executing on the
				991	* same workqueue. This is rather expensive and should only be used from
				992	* cold paths.
				993	*/
				994	static bool is_chained_work(struct workqueue_struct *wq)
				995	{
				996	unsigned long flags;
				997	unsigned int cpu;
				998
				999	for_each_gcwq_cpu(cpu) {
				1000	struct global_cwq *gcwq = get_gcwq(cpu);
				1001	struct worker *worker;
				1002	struct hlist_node *pos;
				1003	int i;
				1004
				1005	spin_lock_irqsave(&gcwq->lock, flags);
				1006	for_each_busy_worker(worker, i, pos, gcwq) {
				1007	if (worker->task != current)
				1008	continue;
				1009	spin_unlock_irqrestore(&gcwq->lock, flags);
				1010	/*
				1011	* I'm @worker, no locking necessary. See if @work
				1012	* is headed to the same workqueue.
				1013	*/
				1014	return worker->current_cwq->wq == wq;
				1015	}
				1016	spin_unlock_irqrestore(&gcwq->lock, flags);
				1017	}
				1018	return false;
				1019	}
				1020
				1021	static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
				1022	struct work_struct *work)
				1023	{
				1024	struct global_cwq *gcwq;
				1025	struct cpu_workqueue_struct *cwq;
				1026	struct list_head *worklist;
				1027	unsigned int work_flags;
				1028	unsigned long flags;
				1029
				1030	debug_work_activate(work);
				1031
				1032	/* if dying, only works from the same workqueue are allowed */
				1033	if (unlikely(wq->flags & WQ_DRAINING) &&
				1034	WARN_ON_ONCE(!is_chained_work(wq)))
				1035	return;
				1036
				1037	/* determine gcwq to use */
				1038	if (!(wq->flags & WQ_UNBOUND)) {
				1039	struct global_cwq *last_gcwq;
				1040
				1041	if (unlikely(cpu == WORK_CPU_UNBOUND))
				1042	cpu = raw_smp_processor_id();
				1043
				1044	/*
				1045	* It's multi cpu. If @wq is non-reentrant and @work
				1046	* was previously on a different cpu, it might still
				1047	* be running there, in which case the work needs to
				1048	* be queued on that cpu to guarantee non-reentrance.
				1049	*/
				1050	gcwq = get_gcwq(cpu);
				1051	if (wq->flags & WQ_NON_REENTRANT &&
				1052	(last_gcwq = get_work_gcwq(work)) && last_gcwq != gcwq) {
				1053	struct worker *worker;
				1054
				1055	spin_lock_irqsave(&last_gcwq->lock, flags);
				1056
				1057	worker = find_worker_executing_work(last_gcwq, work);
				1058
				1059	if (worker && worker->current_cwq->wq == wq)
				1060	gcwq = last_gcwq;
				1061	else {
				1062	/* meh... not running there, queue here */
				1063	spin_unlock_irqrestore(&last_gcwq->lock, flags);
				1064	spin_lock_irqsave(&gcwq->lock, flags);
				1065	}
				1066	} else
				1067	spin_lock_irqsave(&gcwq->lock, flags);
				1068	} else {
				1069	gcwq = get_gcwq(WORK_CPU_UNBOUND);
				1070	spin_lock_irqsave(&gcwq->lock, flags);
				1071	}
				1072
				1073	/* gcwq determined, get cwq and queue */
				1074	cwq = get_cwq(gcwq->cpu, wq);
				1075	trace_workqueue_queue_work(cpu, cwq, work);
				1076
				1077	BUG_ON(!list_empty(&work->entry));
				1078
				1079	cwq->nr_in_flight[cwq->work_color]++;
				1080	work_flags = work_color_to_flags(cwq->work_color);
				1081
				1082	if (likely(cwq->nr_active < cwq->max_active)) {
				1083	trace_workqueue_activate_work(work);
				1084	cwq->nr_active++;
				1085	worklist = gcwq_determine_ins_pos(gcwq, cwq);
				1086	} else {
				1087	work_flags \|= WORK_STRUCT_DELAYED;
				1088	worklist = &cwq->delayed_works;
				1089	}
				1090
				1091	insert_work(cwq, work, worklist, work_flags);
				1092
				1093	spin_unlock_irqrestore(&gcwq->lock, flags);
				1094	}
				1095
				1096	/**
				1097	* queue_work - queue work on a workqueue
				1098	* @wq: workqueue to use
				1099	* @work: work to queue
				1100	*
				1101	* Returns 0 if @work was already on a queue, non-zero otherwise.
				1102	*
				1103	* We queue the work to the CPU on which it was submitted, but if the CPU dies
				1104	* it can be processed by another CPU.
				1105	*/
				1106	int queue_work(struct workqueue_struct wq, struct work_struct work)
				1107	{
				1108	int ret;
				1109
				1110	ret = queue_work_on(get_cpu_light(), wq, work);
				1111	put_cpu_light();
				1112
				1113	return ret;
				1114	}
				1115	EXPORT_SYMBOL_GPL(queue_work);
				1116
				1117	/**
				1118	* queue_work_on - queue work on specific cpu
				1119	* @cpu: CPU number to execute work on
				1120	* @wq: workqueue to use
				1121	* @work: work to queue
				1122	*
				1123	* Returns 0 if @work was already on a queue, non-zero otherwise.
				1124	*
				1125	* We queue the work to a specific CPU, the caller must ensure it
				1126	* can't go away.
				1127	*/
				1128	int
				1129	queue_work_on(int cpu, struct workqueue_struct wq, struct work_struct work)
				1130	{
				1131	int ret = 0;
				1132
				1133	if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
				1134	__queue_work(cpu, wq, work);
				1135	ret = 1;
				1136	}
				1137	return ret;
				1138	}
				1139	EXPORT_SYMBOL_GPL(queue_work_on);
				1140
				1141	static void delayed_work_timer_fn(unsigned long __data)
				1142	{
				1143	struct delayed_work dwork = (struct delayed_work )__data;
				1144	struct cpu_workqueue_struct *cwq = get_work_cwq(&dwork->work);
				1145
				1146	__queue_work(smp_processor_id(), cwq->wq, &dwork->work);
				1147	}
				1148
				1149	/**
				1150	* queue_delayed_work - queue work on a workqueue after delay
				1151	* @wq: workqueue to use
				1152	* @dwork: delayable work to queue
				1153	* @delay: number of jiffies to wait before queueing
				1154	*
				1155	* Returns 0 if @work was already on a queue, non-zero otherwise.
				1156	*/
				1157	int queue_delayed_work(struct workqueue_struct *wq,
				1158	struct delayed_work *dwork, unsigned long delay)
				1159	{
				1160	if (delay == 0)
				1161	return queue_work(wq, &dwork->work);
				1162
				1163	return queue_delayed_work_on(-1, wq, dwork, delay);
				1164	}
				1165	EXPORT_SYMBOL_GPL(queue_delayed_work);
				1166
				1167	/**
				1168	* queue_delayed_work_on - queue work on specific CPU after delay
				1169	* @cpu: CPU number to execute work on
				1170	* @wq: workqueue to use
				1171	* @dwork: work to queue
				1172	* @delay: number of jiffies to wait before queueing
				1173	*
				1174	* Returns 0 if @work was already on a queue, non-zero otherwise.
				1175	*/
				1176	int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
				1177	struct delayed_work *dwork, unsigned long delay)
				1178	{
				1179	int ret = 0;
				1180	struct timer_list *timer = &dwork->timer;
				1181	struct work_struct *work = &dwork->work;
				1182
				1183	if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
				1184	unsigned int lcpu;
				1185
				1186	WARN_ON_ONCE(timer_pending(timer));
				1187	WARN_ON_ONCE(!list_empty(&work->entry));
				1188
				1189	timer_stats_timer_set_start_info(&dwork->timer);
				1190
				1191	/*
				1192	* This stores cwq for the moment, for the timer_fn.
				1193	* Note that the work's gcwq is preserved to allow
				1194	* reentrance detection for delayed works.
				1195	*/
				1196	if (!(wq->flags & WQ_UNBOUND)) {
				1197	struct global_cwq *gcwq = get_work_gcwq(work);
				1198
				1199	if (gcwq && gcwq->cpu != WORK_CPU_UNBOUND)
				1200	lcpu = gcwq->cpu;
				1201	else
				1202	lcpu = raw_smp_processor_id();
				1203	} else
				1204	lcpu = WORK_CPU_UNBOUND;
				1205
				1206	set_work_cwq(work, get_cwq(lcpu, wq), 0);
				1207
				1208	timer->expires = jiffies + delay;
				1209	timer->data = (unsigned long)dwork;
				1210	timer->function = delayed_work_timer_fn;
				1211
				1212	if (unlikely(cpu >= 0))
				1213	add_timer_on(timer, cpu);
				1214	else
				1215	add_timer(timer);
				1216	ret = 1;
				1217	}
				1218	return ret;
				1219	}
				1220	EXPORT_SYMBOL_GPL(queue_delayed_work_on);
				1221
				1222	/**
				1223	* worker_enter_idle - enter idle state
				1224	* @worker: worker which is entering idle state
				1225	*
				1226	* @worker is entering idle state. Update stats and idle timer if
				1227	* necessary.
				1228	*
				1229	* LOCKING:
				1230	* spin_lock_irq(gcwq->lock).
				1231	*/
				1232	static void worker_enter_idle(struct worker *worker)
				1233	{
				1234	struct global_cwq *gcwq = worker->gcwq;
				1235
				1236	BUG_ON(worker->flags & WORKER_IDLE);
				1237	BUG_ON(!list_empty(&worker->entry) &&
				1238	(worker->hentry.next \|\| worker->hentry.pprev));
				1239
				1240	/* can't use worker_set_flags(), also called from start_worker() */
				1241	worker->flags \|= WORKER_IDLE;
				1242	gcwq->nr_idle++;
				1243	worker->last_active = jiffies;
				1244
				1245	/* idle_list is LIFO */
				1246	list_add(&worker->entry, &gcwq->idle_list);
				1247
				1248	if (likely(!(worker->flags & WORKER_ROGUE))) {
				1249	if (too_many_workers(gcwq) && !timer_pending(&gcwq->idle_timer))
				1250	mod_timer(&gcwq->idle_timer,
				1251	jiffies + IDLE_WORKER_TIMEOUT);
				1252	} else
				1253	wake_up_all(&gcwq->trustee_wait);
				1254
				1255	/* sanity check nr_running */
				1256	WARN_ON_ONCE(gcwq->nr_workers == gcwq->nr_idle &&
				1257	atomic_read(get_gcwq_nr_running(gcwq->cpu)));
				1258	}
				1259
				1260	/**
				1261	* worker_leave_idle - leave idle state
				1262	* @worker: worker which is leaving idle state
				1263	*
				1264	* @worker is leaving idle state. Update stats.
				1265	*
				1266	* LOCKING:
				1267	* spin_lock_irq(gcwq->lock).
				1268	*/
				1269	static void worker_leave_idle(struct worker *worker)
				1270	{
				1271	struct global_cwq *gcwq = worker->gcwq;
				1272
				1273	BUG_ON(!(worker->flags & WORKER_IDLE));
				1274	worker_clr_flags(worker, WORKER_IDLE);
				1275	gcwq->nr_idle--;
				1276	list_del_init(&worker->entry);
				1277	}
				1278
				1279	/**
				1280	* worker_maybe_bind_and_lock - bind worker to its cpu if possible and lock gcwq
				1281	* @worker: self
				1282	*
				1283	* Works which are scheduled while the cpu is online must at least be
				1284	* scheduled to a worker which is bound to the cpu so that if they are
				1285	* flushed from cpu callbacks while cpu is going down, they are
				1286	* guaranteed to execute on the cpu.
				1287	*
				1288	* This function is to be used by rogue workers and rescuers to bind
				1289	* themselves to the target cpu and may race with cpu going down or
				1290	* coming online. kthread_bind() can't be used because it may put the
				1291	* worker to already dead cpu and set_cpus_allowed_ptr() can't be used
				1292	* verbatim as it's best effort and blocking and gcwq may be
				1293	* [dis]associated in the meantime.
				1294	*
				1295	* This function tries set_cpus_allowed() and locks gcwq and verifies
				1296	* the binding against GCWQ_DISASSOCIATED which is set during
				1297	* CPU_DYING and cleared during CPU_ONLINE, so if the worker enters
				1298	* idle state or fetches works without dropping lock, it can guarantee
				1299	* the scheduling requirement described in the first paragraph.
				1300	*
				1301	* CONTEXT:
				1302	* Might sleep. Called without any lock but returns with gcwq->lock
				1303	* held.
				1304	*
				1305	* RETURNS:
				1306	* %true if the associated gcwq is online (@worker is successfully
				1307	* bound), %false if offline.
				1308	*/
				1309	static bool worker_maybe_bind_and_lock(struct worker *worker)
				1310	__acquires(&gcwq->lock)
				1311	{
				1312	struct global_cwq *gcwq = worker->gcwq;
				1313	struct task_struct *task = worker->task;
				1314
				1315	while (true) {
				1316	/*
				1317	* The following call may fail, succeed or succeed
				1318	* without actually migrating the task to the cpu if
				1319	* it races with cpu hotunplug operation. Verify
				1320	* against GCWQ_DISASSOCIATED.
				1321	*/
				1322	if (!(gcwq->flags & GCWQ_DISASSOCIATED))
				1323	set_cpus_allowed_ptr(task, get_cpu_mask(gcwq->cpu));
				1324
				1325	spin_lock_irq(&gcwq->lock);
				1326	if (gcwq->flags & GCWQ_DISASSOCIATED)
				1327	return false;
				1328	if (task_cpu(task) == gcwq->cpu &&
				1329	cpumask_equal(&current->cpus_allowed,
				1330	get_cpu_mask(gcwq->cpu)))
				1331	return true;
				1332	spin_unlock_irq(&gcwq->lock);
				1333
				1334	/*
				1335	* We've raced with CPU hot[un]plug. Give it a breather
				1336	* and retry migration. cond_resched() is required here;
				1337	* otherwise, we might deadlock against cpu_stop trying to
				1338	* bring down the CPU on non-preemptive kernel.
				1339	*/
				1340	cpu_relax();
				1341	cond_resched();
				1342	}
				1343	}
				1344
				1345	/*
				1346	* Function for worker->rebind_work used to rebind rogue busy workers
				1347	* to the associated cpu which is coming back online. This is
				1348	* scheduled by cpu up but can race with other cpu hotplug operations
				1349	* and may be executed twice without intervening cpu down.
				1350	*/
				1351	static void worker_rebind_fn(struct work_struct *work)
				1352	{
				1353	struct worker *worker = container_of(work, struct worker, rebind_work);
				1354	struct global_cwq *gcwq = worker->gcwq;
				1355
				1356	if (worker_maybe_bind_and_lock(worker))
				1357	worker_clr_flags(worker, WORKER_REBIND);
				1358
				1359	spin_unlock_irq(&gcwq->lock);
				1360	}
				1361
				1362	static struct worker *alloc_worker(void)
				1363	{
				1364	struct worker *worker;
				1365
				1366	worker = kzalloc(sizeof(*worker), GFP_KERNEL);
				1367	if (worker) {
				1368	INIT_LIST_HEAD(&worker->entry);
				1369	INIT_LIST_HEAD(&worker->scheduled);
				1370	INIT_WORK(&worker->rebind_work, worker_rebind_fn);
				1371	/* on creation a worker is in !idle && prep state */
				1372	worker->flags = WORKER_PREP;
				1373	}
				1374	return worker;
				1375	}
				1376
				1377	/**
				1378	* create_worker - create a new workqueue worker
				1379	* @gcwq: gcwq the new worker will belong to
				1380	* @bind: whether to set affinity to @cpu or not
				1381	*
				1382	* Create a new worker which is bound to @gcwq. The returned worker
				1383	* can be started by calling start_worker() or destroyed using
				1384	* destroy_worker().
				1385	*
				1386	* CONTEXT:
				1387	* Might sleep. Does GFP_KERNEL allocations.
				1388	*
				1389	* RETURNS:
				1390	* Pointer to the newly created worker.
				1391	*/
				1392	static struct worker create_worker(struct global_cwq gcwq, bool bind)
				1393	{
				1394	bool on_unbound_cpu = gcwq->cpu == WORK_CPU_UNBOUND;
				1395	struct worker *worker = NULL;
				1396	int id = -1;
				1397
				1398	spin_lock_irq(&gcwq->lock);
				1399	while (ida_get_new(&gcwq->worker_ida, &id)) {
				1400	spin_unlock_irq(&gcwq->lock);
				1401	if (!ida_pre_get(&gcwq->worker_ida, GFP_KERNEL))
				1402	goto fail;
				1403	spin_lock_irq(&gcwq->lock);
				1404	}
				1405	spin_unlock_irq(&gcwq->lock);
				1406
				1407	worker = alloc_worker();
				1408	if (!worker)
				1409	goto fail;
				1410
				1411	worker->gcwq = gcwq;
				1412	worker->id = id;
				1413
				1414	if (!on_unbound_cpu)
				1415	worker->task = kthread_create_on_node(worker_thread,
				1416	worker,
				1417	cpu_to_node(gcwq->cpu),
				1418	"kworker/%u:%d", gcwq->cpu, id);
				1419	else
				1420	worker->task = kthread_create(worker_thread, worker,
				1421	"kworker/u:%d", id);
				1422	if (IS_ERR(worker->task))
				1423	goto fail;
				1424
				1425	/*
				1426	* A rogue worker will become a regular one if CPU comes
				1427	* online later on. Make sure every worker has
				1428	* PF_THREAD_BOUND set.
				1429	*/
				1430	if (bind && !on_unbound_cpu)
				1431	kthread_bind(worker->task, gcwq->cpu);
				1432	else {
				1433	worker->task->flags \|= PF_THREAD_BOUND;
				1434	if (on_unbound_cpu)
				1435	worker->flags \|= WORKER_UNBOUND;
				1436	}
				1437
				1438	return worker;
				1439	fail:
				1440	if (id >= 0) {
				1441	spin_lock_irq(&gcwq->lock);
				1442	ida_remove(&gcwq->worker_ida, id);
				1443	spin_unlock_irq(&gcwq->lock);
				1444	}
				1445	kfree(worker);
				1446	return NULL;
				1447	}
				1448
				1449	/**
				1450	* start_worker - start a newly created worker
				1451	* @worker: worker to start
				1452	*
				1453	* Make the gcwq aware of @worker and start it.
				1454	*
				1455	* CONTEXT:
				1456	* spin_lock_irq(gcwq->lock).
				1457	*/
				1458	static void start_worker(struct worker *worker)
				1459	{
				1460	worker->flags \|= WORKER_STARTED;
				1461	worker->gcwq->nr_workers++;
				1462	worker_enter_idle(worker);
				1463	wake_up_process(worker->task);
				1464	}
				1465
				1466	/**
				1467	* destroy_worker - destroy a workqueue worker
				1468	* @worker: worker to be destroyed
				1469	*
				1470	* Destroy @worker and adjust @gcwq stats accordingly.
				1471	*
				1472	* CONTEXT:
				1473	* spin_lock_irq(gcwq->lock) which is released and regrabbed.
				1474	*/
				1475	static void destroy_worker(struct worker *worker)
				1476	{
				1477	struct global_cwq *gcwq = worker->gcwq;
				1478	int id = worker->id;
				1479
				1480	/* sanity check frenzy */
				1481	BUG_ON(worker->current_work);
				1482	BUG_ON(!list_empty(&worker->scheduled));
				1483
				1484	if (worker->flags & WORKER_STARTED)
				1485	gcwq->nr_workers--;
				1486	if (worker->flags & WORKER_IDLE)
				1487	gcwq->nr_idle--;
				1488
				1489	/*
				1490	* Once WORKER_DIE is set, the kworker may destroy itself at any
				1491	* point. Pin to ensure the task stays until we're done with it.
				1492	*/
				1493	get_task_struct(worker->task);
				1494
				1495	list_del_init(&worker->entry);
				1496	worker->flags \|= WORKER_DIE;
				1497
				1498	spin_unlock_irq(&gcwq->lock);
				1499
				1500	kthread_stop(worker->task);
				1501	put_task_struct(worker->task);
				1502	kfree(worker);
				1503
				1504	spin_lock_irq(&gcwq->lock);
				1505	ida_remove(&gcwq->worker_ida, id);
				1506	}
				1507
				1508	static void idle_worker_timeout(unsigned long __gcwq)
				1509	{
				1510	struct global_cwq gcwq = (void )__gcwq;
				1511
				1512	spin_lock_irq(&gcwq->lock);
				1513
				1514	if (too_many_workers(gcwq)) {
				1515	struct worker *worker;
				1516	unsigned long expires;
				1517
				1518	/* idle_list is kept in LIFO order, check the last one */
				1519	worker = list_entry(gcwq->idle_list.prev, struct worker, entry);
				1520	expires = worker->last_active + IDLE_WORKER_TIMEOUT;
				1521
				1522	if (time_before(jiffies, expires))
				1523	mod_timer(&gcwq->idle_timer, expires);
				1524	else {
				1525	/* it's been idle for too long, wake up manager */
				1526	gcwq->flags \|= GCWQ_MANAGE_WORKERS;
				1527	wake_up_worker(gcwq);
				1528	}
				1529	}
				1530
				1531	spin_unlock_irq(&gcwq->lock);
				1532	}
				1533
				1534	static bool send_mayday(struct work_struct *work)
				1535	{
				1536	struct cpu_workqueue_struct *cwq = get_work_cwq(work);
				1537	struct workqueue_struct *wq = cwq->wq;
				1538	unsigned int cpu;
				1539
				1540	if (!(wq->flags & WQ_RESCUER))
				1541	return false;
				1542
				1543	/* mayday mayday mayday */
				1544	cpu = cwq->gcwq->cpu;
				1545	/* WORK_CPU_UNBOUND can't be set in cpumask, use cpu 0 instead */
				1546	if (cpu == WORK_CPU_UNBOUND)
				1547	cpu = 0;
				1548	if (!mayday_test_and_set_cpu(cpu, wq->mayday_mask))
				1549	wake_up_process(wq->rescuer->task);
				1550	return true;
				1551	}
				1552
				1553	static void gcwq_mayday_timeout(unsigned long __gcwq)
				1554	{
				1555	struct global_cwq gcwq = (void )__gcwq;
				1556	struct work_struct *work;
				1557
				1558	spin_lock_irq(&gcwq->lock);
				1559
				1560	if (need_to_create_worker(gcwq)) {
				1561	/*
				1562	* We've been trying to create a new worker but
				1563	* haven't been successful. We might be hitting an
				1564	* allocation deadlock. Send distress signals to
				1565	* rescuers.
				1566	*/
				1567	list_for_each_entry(work, &gcwq->worklist, entry)
				1568	send_mayday(work);
				1569	}
				1570
				1571	spin_unlock_irq(&gcwq->lock);
				1572
				1573	mod_timer(&gcwq->mayday_timer, jiffies + MAYDAY_INTERVAL);
				1574	}
				1575
				1576	/**
				1577	* maybe_create_worker - create a new worker if necessary
				1578	* @gcwq: gcwq to create a new worker for
				1579	*
				1580	* Create a new worker for @gcwq if necessary. @gcwq is guaranteed to
				1581	* have at least one idle worker on return from this function. If
				1582	* creating a new worker takes longer than MAYDAY_INTERVAL, mayday is
				1583	* sent to all rescuers with works scheduled on @gcwq to resolve
				1584	* possible allocation deadlock.
				1585	*
				1586	* On return, need_to_create_worker() is guaranteed to be false and
				1587	* may_start_working() true.
				1588	*
				1589	* LOCKING:
				1590	* spin_lock_irq(gcwq->lock) which may be released and regrabbed
				1591	* multiple times. Does GFP_KERNEL allocations. Called only from
				1592	* manager.
				1593	*
				1594	* RETURNS:
				1595	* false if no action was taken and gcwq->lock stayed locked, true
				1596	* otherwise.
				1597	*/
				1598	static bool maybe_create_worker(struct global_cwq *gcwq)
				1599	__releases(&gcwq->lock)
				1600	__acquires(&gcwq->lock)
				1601	{
				1602	if (!need_to_create_worker(gcwq))
				1603	return false;
				1604	restart:
				1605	spin_unlock_irq(&gcwq->lock);
				1606
				1607	/* if we don't make progress in MAYDAY_INITIAL_TIMEOUT, call for help */
				1608	mod_timer(&gcwq->mayday_timer, jiffies + MAYDAY_INITIAL_TIMEOUT);
				1609
				1610	while (true) {
				1611	struct worker *worker;
				1612
				1613	worker = create_worker(gcwq, true);
				1614	if (worker) {
				1615	del_timer_sync(&gcwq->mayday_timer);
				1616	spin_lock_irq(&gcwq->lock);
				1617	start_worker(worker);
				1618	BUG_ON(need_to_create_worker(gcwq));
				1619	return true;
				1620	}
				1621
				1622	if (!need_to_create_worker(gcwq))
				1623	break;
				1624
				1625	__set_current_state(TASK_INTERRUPTIBLE);
				1626	schedule_timeout(CREATE_COOLDOWN);
				1627
				1628	if (!need_to_create_worker(gcwq))
				1629	break;
				1630	}
				1631
				1632	del_timer_sync(&gcwq->mayday_timer);
				1633	spin_lock_irq(&gcwq->lock);
				1634	if (need_to_create_worker(gcwq))
				1635	goto restart;
				1636	return true;
				1637	}
				1638
				1639	/**
				1640	* maybe_destroy_worker - destroy workers which have been idle for a while
				1641	* @gcwq: gcwq to destroy workers for
				1642	*
				1643	* Destroy @gcwq workers which have been idle for longer than
				1644	* IDLE_WORKER_TIMEOUT.
				1645	*
				1646	* LOCKING:
				1647	* spin_lock_irq(gcwq->lock) which may be released and regrabbed
				1648	* multiple times. Called only from manager.
				1649	*
				1650	* RETURNS:
				1651	* false if no action was taken and gcwq->lock stayed locked, true
				1652	* otherwise.
				1653	*/
				1654	static bool maybe_destroy_workers(struct global_cwq *gcwq)
				1655	{
				1656	bool ret = false;
				1657
				1658	while (too_many_workers(gcwq)) {
				1659	struct worker *worker;
				1660	unsigned long expires;
				1661
				1662	worker = list_entry(gcwq->idle_list.prev, struct worker, entry);
				1663	expires = worker->last_active + IDLE_WORKER_TIMEOUT;
				1664
				1665	if (time_before(jiffies, expires)) {
				1666	mod_timer(&gcwq->idle_timer, expires);
				1667	break;
				1668	}
				1669
				1670	destroy_worker(worker);
				1671	ret = true;
				1672	}
				1673
				1674	return ret;
				1675	}
				1676
				1677	/**
				1678	* manage_workers - manage worker pool
				1679	* @worker: self
				1680	*
				1681	* Assume the manager role and manage gcwq worker pool @worker belongs
				1682	* to. At any given time, there can be only zero or one manager per
				1683	* gcwq. The exclusion is handled automatically by this function.
				1684	*
				1685	* The caller can safely start processing works on false return. On
				1686	* true return, it's guaranteed that need_to_create_worker() is false
				1687	* and may_start_working() is true.
				1688	*
				1689	* CONTEXT:
				1690	* spin_lock_irq(gcwq->lock) which may be released and regrabbed
				1691	* multiple times. Does GFP_KERNEL allocations.
				1692	*
				1693	* RETURNS:
				1694	* false if no action was taken and gcwq->lock stayed locked, true if
				1695	* some action was taken.
				1696	*/
				1697	static bool manage_workers(struct worker *worker)
				1698	{
				1699	struct global_cwq *gcwq = worker->gcwq;
				1700	bool ret = false;
				1701
				1702	if (gcwq->flags & GCWQ_MANAGING_WORKERS)
				1703	return ret;
				1704
				1705	gcwq->flags &= ~GCWQ_MANAGE_WORKERS;
				1706	gcwq->flags \|= GCWQ_MANAGING_WORKERS;
				1707
				1708	/*
				1709	* Destroy and then create so that may_start_working() is true
				1710	* on return.
				1711	*/
				1712	ret \|= maybe_destroy_workers(gcwq);
				1713	ret \|= maybe_create_worker(gcwq);
				1714
				1715	gcwq->flags &= ~GCWQ_MANAGING_WORKERS;
				1716
				1717	/*
				1718	* The trustee might be waiting to take over the manager
				1719	* position, tell it we're done.
				1720	*/
				1721	if (unlikely(gcwq->trustee))
				1722	wake_up_all(&gcwq->trustee_wait);
				1723
				1724	return ret;
				1725	}
				1726
				1727	/**
				1728	* move_linked_works - move linked works to a list
				1729	* @work: start of series of works to be scheduled
				1730	* @head: target list to append @work to
				1731	* @nextp: out paramter for nested worklist walking
				1732	*
				1733	* Schedule linked works starting from @work to @head. Work series to
				1734	* be scheduled starts at @work and includes any consecutive work with
				1735	* WORK_STRUCT_LINKED set in its predecessor.
				1736	*
				1737	* If @nextp is not NULL, it's updated to point to the next work of
				1738	* the last scheduled work. This allows move_linked_works() to be
				1739	* nested inside outer list_for_each_entry_safe().
				1740	*
				1741	* CONTEXT:
				1742	* spin_lock_irq(gcwq->lock).
				1743	*/
				1744	static void move_linked_works(struct work_struct work, struct list_head head,
				1745	struct work_struct **nextp)
				1746	{
				1747	struct work_struct *n;
				1748
				1749	/*
				1750	* Linked worklist will always end before the end of the list,
				1751	* use NULL for list head.
				1752	*/
				1753	list_for_each_entry_safe_from(work, n, NULL, entry) {
				1754	list_move_tail(&work->entry, head);
				1755	if (!(*work_data_bits(work) & WORK_STRUCT_LINKED))
				1756	break;
				1757	}
				1758
				1759	/*
				1760	* If we're already inside safe list traversal and have moved
				1761	* multiple works to the scheduled queue, the next position
				1762	* needs to be updated.
				1763	*/
				1764	if (nextp)
				1765	*nextp = n;
				1766	}
				1767
				1768	static void cwq_activate_delayed_work(struct work_struct *work)
				1769	{
				1770	struct cpu_workqueue_struct *cwq = get_work_cwq(work);
				1771	struct list_head *pos = gcwq_determine_ins_pos(cwq->gcwq, cwq);
				1772
				1773	trace_workqueue_activate_work(work);
				1774	move_linked_works(work, pos, NULL);
				1775	__clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work));
				1776	cwq->nr_active++;
				1777	}
				1778
				1779	static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq)
				1780	{
				1781	struct work_struct *work = list_first_entry(&cwq->delayed_works,
				1782	struct work_struct, entry);
				1783
				1784	cwq_activate_delayed_work(work);
				1785	}
				1786
				1787	/**
				1788	* cwq_dec_nr_in_flight - decrement cwq's nr_in_flight
				1789	* @cwq: cwq of interest
				1790	* @color: color of work which left the queue
				1791	* @delayed: for a delayed work
				1792	*
				1793	* A work either has completed or is removed from pending queue,
				1794	* decrement nr_in_flight of its cwq and handle workqueue flushing.
				1795	*
				1796	* CONTEXT:
				1797	* spin_lock_irq(gcwq->lock).
				1798	*/
				1799	static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color,
				1800	bool delayed)
				1801	{
				1802	/* ignore uncolored works */
				1803	if (color == WORK_NO_COLOR)
				1804	return;
				1805
				1806	cwq->nr_in_flight[color]--;
				1807
				1808	if (!delayed) {
				1809	cwq->nr_active--;
				1810	if (!list_empty(&cwq->delayed_works)) {
				1811	/* one down, submit a delayed one */
				1812	if (cwq->nr_active < cwq->max_active)
				1813	cwq_activate_first_delayed(cwq);
				1814	}
				1815	}
				1816
				1817	/* is flush in progress and are we at the flushing tip? */
				1818	if (likely(cwq->flush_color != color))
				1819	return;
				1820
				1821	/* are there still in-flight works? */
				1822	if (cwq->nr_in_flight[color])
				1823	return;
				1824
				1825	/* this cwq is done, clear flush_color */
				1826	cwq->flush_color = -1;
				1827
				1828	/*
				1829	* If this was the last cwq, wake up the first flusher. It
				1830	* will handle the rest.
				1831	*/
				1832	if (atomic_dec_and_test(&cwq->wq->nr_cwqs_to_flush))
				1833	complete(&cwq->wq->first_flusher->done);
				1834	}
				1835
				1836	/**
				1837	* process_one_work - process single work
				1838	* @worker: self
				1839	* @work: work to process
				1840	*
				1841	* Process @work. This function contains all the logics necessary to
				1842	* process a single work including synchronization against and
				1843	* interaction with other workers on the same cpu, queueing and
				1844	* flushing. As long as context requirement is met, any worker can
				1845	* call this function to process a work.
				1846	*
				1847	* CONTEXT:
				1848	* spin_lock_irq(gcwq->lock) which is released and regrabbed.
				1849	*/
				1850	static void process_one_work(struct worker worker, struct work_struct work)
				1851	__releases(&gcwq->lock)
				1852	__acquires(&gcwq->lock)
				1853	{
				1854	struct cpu_workqueue_struct *cwq = get_work_cwq(work);
				1855	struct global_cwq *gcwq = cwq->gcwq;
				1856	struct hlist_head *bwh = busy_worker_head(gcwq, work);
				1857	bool cpu_intensive = cwq->wq->flags & WQ_CPU_INTENSIVE;
				1858	int work_color;
				1859	struct worker *collision;
				1860	#ifdef CONFIG_LOCKDEP
				1861	/*
				1862	* It is permissible to free the struct work_struct from
				1863	* inside the function that is called from it, this we need to
				1864	* take into account for lockdep too. To avoid bogus "held
				1865	* lock freed" warnings as well as problems when looking into
				1866	* work->lockdep_map, make a copy and use that here.
				1867	*/
				1868	struct lockdep_map lockdep_map = work->lockdep_map;
				1869	#endif
				1870	/*
				1871	* A single work shouldn't be executed concurrently by
				1872	* multiple workers on a single cpu. Check whether anyone is
				1873	* already processing the work. If so, defer the work to the
				1874	* currently executing one.
				1875	*/
				1876	collision = __find_worker_executing_work(gcwq, bwh, work);
				1877	if (unlikely(collision)) {
				1878	move_linked_works(work, &collision->scheduled, NULL);
				1879	return;
				1880	}
				1881
				1882	/* claim and process */
				1883	debug_work_deactivate(work);
				1884	hlist_add_head(&worker->hentry, bwh);
				1885	worker->current_work = work;
				1886	worker->current_func = work->func;
				1887	worker->current_cwq = cwq;
				1888	work_color = get_work_color(work);
				1889
				1890	/* record the current cpu number in the work data and dequeue */
				1891	set_work_cpu(work, gcwq->cpu);
				1892	list_del_init(&work->entry);
				1893
				1894	/*
				1895	* If HIGHPRI_PENDING, check the next work, and, if HIGHPRI,
				1896	* wake up another worker; otherwise, clear HIGHPRI_PENDING.
				1897	*/
				1898	if (unlikely(gcwq->flags & GCWQ_HIGHPRI_PENDING)) {
				1899	struct work_struct *nwork = list_first_entry(&gcwq->worklist,
				1900	struct work_struct, entry);
				1901
				1902	if (!list_empty(&gcwq->worklist) &&
				1903	get_work_cwq(nwork)->wq->flags & WQ_HIGHPRI)
				1904	wake_up_worker(gcwq);
				1905	else
				1906	gcwq->flags &= ~GCWQ_HIGHPRI_PENDING;
				1907	}
				1908
				1909	/*
				1910	* CPU intensive works don't participate in concurrency
				1911	* management. They're the scheduler's responsibility.
				1912	*/
				1913	if (unlikely(cpu_intensive))
				1914	worker_set_flags(worker, WORKER_CPU_INTENSIVE, true);
				1915
				1916	spin_unlock_irq(&gcwq->lock);
				1917
				1918	smp_wmb(); /* paired with test_and_set_bit(PENDING) */
				1919	work_clear_pending(work);
				1920
				1921	lock_map_acquire_read(&cwq->wq->lockdep_map);
				1922	lock_map_acquire(&lockdep_map);
				1923	trace_workqueue_execute_start(work);
				1924	worker->current_func(work);
				1925	/*
				1926	* While we must be careful to not use "work" after this, the trace
				1927	* point will only record its address.
				1928	*/
				1929	trace_workqueue_execute_end(work);
				1930	lock_map_release(&lockdep_map);
				1931	lock_map_release(&cwq->wq->lockdep_map);
				1932
				1933	if (unlikely(in_atomic() \|\| lockdep_depth(current) > 0)) {
				1934	pr_err("BUG: workqueue leaked lock or atomic: %s/0x%08x/%d\n"
				1935	" last function: %pf\n",
				1936	current->comm, preempt_count(), task_pid_nr(current),
				1937	worker->current_func);
				1938	debug_show_held_locks(current);
				1939	dump_stack();
				1940	}
				1941
				1942	/*
				1943	* The following prevents a kworker from hogging CPU on !PREEMPT
				1944	* kernels, where a requeueing work item waiting for something to
				1945	* happen could deadlock with stop_machine as such work item could
				1946	* indefinitely requeue itself while all other CPUs are trapped in
				1947	* stop_machine.
				1948	*/
				1949	cond_resched();
				1950
				1951	spin_lock_irq(&gcwq->lock);
				1952
				1953	/* clear cpu intensive status */
				1954	if (unlikely(cpu_intensive))
				1955	worker_clr_flags(worker, WORKER_CPU_INTENSIVE);
				1956
				1957	/* we're done with it, release */
				1958	hlist_del_init(&worker->hentry);
				1959	worker->current_work = NULL;
				1960	worker->current_func = NULL;
				1961	worker->current_cwq = NULL;
				1962	cwq_dec_nr_in_flight(cwq, work_color, false);
				1963	}
				1964
				1965	/**
				1966	* process_scheduled_works - process scheduled works
				1967	* @worker: self
				1968	*
				1969	* Process all scheduled works. Please note that the scheduled list
				1970	* may change while processing a work, so this function repeatedly
				1971	* fetches a work from the top and executes it.
				1972	*
				1973	* CONTEXT:
				1974	* spin_lock_irq(gcwq->lock) which may be released and regrabbed
				1975	* multiple times.
				1976	*/
				1977	static void process_scheduled_works(struct worker *worker)
				1978	{
				1979	while (!list_empty(&worker->scheduled)) {
				1980	struct work_struct *work = list_first_entry(&worker->scheduled,
				1981	struct work_struct, entry);
				1982	process_one_work(worker, work);
				1983	}
				1984	}
				1985
				1986	/**
				1987	* worker_thread - the worker thread function
				1988	* @__worker: self
				1989	*
				1990	* The gcwq worker thread function. There's a single dynamic pool of
				1991	* these per each cpu. These workers process all works regardless of
				1992	* their specific target workqueue. The only exception is works which
				1993	* belong to workqueues with a rescuer which will be explained in
				1994	* rescuer_thread().
				1995	*/
				1996	static int worker_thread(void *__worker)
				1997	{
				1998	struct worker *worker = __worker;
				1999	struct global_cwq *gcwq = worker->gcwq;
				2000
				2001	/* tell the scheduler that this is a workqueue worker */
				2002	worker->task->flags \|= PF_WQ_WORKER;
				2003	woke_up:
				2004	spin_lock_irq(&gcwq->lock);
				2005
				2006	/* DIE can be set only while we're idle, checking here is enough */
				2007	if (worker->flags & WORKER_DIE) {
				2008	spin_unlock_irq(&gcwq->lock);
				2009	worker->task->flags &= ~PF_WQ_WORKER;
				2010	return 0;
				2011	}
				2012
				2013	worker_leave_idle(worker);
				2014	recheck:
				2015	/* no more worker necessary? */
				2016	if (!need_more_worker(gcwq))
				2017	goto sleep;
				2018
				2019	/* do we need to manage? */
				2020	if (unlikely(!may_start_working(gcwq)) && manage_workers(worker))
				2021	goto recheck;
				2022
				2023	/*
				2024	* ->scheduled list can only be filled while a worker is
				2025	* preparing to process a work or actually processing it.
				2026	* Make sure nobody diddled with it while I was sleeping.
				2027	*/
				2028	BUG_ON(!list_empty(&worker->scheduled));
				2029
				2030	/*
				2031	* When control reaches this point, we're guaranteed to have
				2032	* at least one idle worker or that someone else has already
				2033	* assumed the manager role.
				2034	*/
				2035	worker_clr_flags(worker, WORKER_PREP);
				2036
				2037	do {
				2038	struct work_struct *work =
				2039	list_first_entry(&gcwq->worklist,
				2040	struct work_struct, entry);
				2041
				2042	if (likely(!(*work_data_bits(work) & WORK_STRUCT_LINKED))) {
				2043	/* optimization path, not strictly necessary */
				2044	process_one_work(worker, work);
				2045	if (unlikely(!list_empty(&worker->scheduled)))
				2046	process_scheduled_works(worker);
				2047	} else {
				2048	move_linked_works(work, &worker->scheduled, NULL);
				2049	process_scheduled_works(worker);
				2050	}
				2051	} while (keep_working(gcwq));
				2052
				2053	worker_set_flags(worker, WORKER_PREP, false);
				2054	sleep:
				2055	if (unlikely(need_to_manage_workers(gcwq)) && manage_workers(worker))
				2056	goto recheck;
				2057
				2058	/*
				2059	* gcwq->lock is held and there's no work to process and no
				2060	* need to manage, sleep. Workers are woken up only while
				2061	* holding gcwq->lock or from local cpu, so setting the
				2062	* current state before releasing gcwq->lock is enough to
				2063	* prevent losing any event.
				2064	*/
				2065	worker_enter_idle(worker);
				2066	__set_current_state(TASK_INTERRUPTIBLE);
				2067	spin_unlock_irq(&gcwq->lock);
				2068	schedule();
				2069	goto woke_up;
				2070	}
				2071
				2072	/**
				2073	* rescuer_thread - the rescuer thread function
				2074	* @__wq: the associated workqueue
				2075	*
				2076	* Workqueue rescuer thread function. There's one rescuer for each
				2077	* workqueue which has WQ_RESCUER set.
				2078	*
				2079	* Regular work processing on a gcwq may block trying to create a new
				2080	* worker which uses GFP_KERNEL allocation which has slight chance of
				2081	* developing into deadlock if some works currently on the same queue
				2082	* need to be processed to satisfy the GFP_KERNEL allocation. This is
				2083	* the problem rescuer solves.
				2084	*
				2085	* When such condition is possible, the gcwq summons rescuers of all
				2086	* workqueues which have works queued on the gcwq and let them process
				2087	* those works so that forward progress can be guaranteed.
				2088	*
				2089	* This should happen rarely.
				2090	*/
				2091	static int rescuer_thread(void *__wq)
				2092	{
				2093	struct workqueue_struct *wq = __wq;
				2094	struct worker *rescuer = wq->rescuer;
				2095	struct list_head *scheduled = &rescuer->scheduled;
				2096	bool is_unbound = wq->flags & WQ_UNBOUND;
				2097	unsigned int cpu;
				2098
				2099	set_user_nice(current, RESCUER_NICE_LEVEL);
				2100	repeat:
				2101	set_current_state(TASK_INTERRUPTIBLE);
				2102
				2103	if (kthread_should_stop()) {
				2104	__set_current_state(TASK_RUNNING);
				2105	return 0;
				2106	}
				2107
				2108	/*
				2109	* See whether any cpu is asking for help. Unbounded
				2110	* workqueues use cpu 0 in mayday_mask for CPU_UNBOUND.
				2111	*/
				2112	for_each_mayday_cpu(cpu, wq->mayday_mask) {
				2113	unsigned int tcpu = is_unbound ? WORK_CPU_UNBOUND : cpu;
				2114	struct cpu_workqueue_struct *cwq = get_cwq(tcpu, wq);
				2115	struct global_cwq *gcwq = cwq->gcwq;
				2116	struct work_struct work, n;
				2117
				2118	__set_current_state(TASK_RUNNING);
				2119	mayday_clear_cpu(cpu, wq->mayday_mask);
				2120
				2121	/* migrate to the target cpu if possible */
				2122	rescuer->gcwq = gcwq;
				2123	worker_maybe_bind_and_lock(rescuer);
				2124
				2125	/*
				2126	* Slurp in all works issued via this workqueue and
				2127	* process'em.
				2128	*/
				2129	BUG_ON(!list_empty(&rescuer->scheduled));
				2130	list_for_each_entry_safe(work, n, &gcwq->worklist, entry)
				2131	if (get_work_cwq(work) == cwq)
				2132	move_linked_works(work, scheduled, &n);
				2133
				2134	process_scheduled_works(rescuer);
				2135
				2136	/*
				2137	* Leave this gcwq. If keep_working() is %true, notify a
				2138	* regular worker; otherwise, we end up with 0 concurrency
				2139	* and stalling the execution.
				2140	*/
				2141	if (keep_working(gcwq))
				2142	wake_up_worker(gcwq);
				2143
				2144	spin_unlock_irq(&gcwq->lock);
				2145	}
				2146
				2147	schedule();
				2148	goto repeat;
				2149	}
				2150
				2151	struct wq_barrier {
				2152	struct work_struct work;
				2153	struct completion done;
				2154	};
				2155
				2156	static void wq_barrier_func(struct work_struct *work)
				2157	{
				2158	struct wq_barrier *barr = container_of(work, struct wq_barrier, work);
				2159	complete(&barr->done);
				2160	}
				2161
				2162	/**
				2163	* insert_wq_barrier - insert a barrier work
				2164	* @cwq: cwq to insert barrier into
				2165	* @barr: wq_barrier to insert
				2166	* @target: target work to attach @barr to
				2167	* @worker: worker currently executing @target, NULL if @target is not executing
				2168	*
				2169	* @barr is linked to @target such that @barr is completed only after
				2170	* @target finishes execution. Please note that the ordering
				2171	* guarantee is observed only with respect to @target and on the local
				2172	* cpu.
				2173	*
				2174	* Currently, a queued barrier can't be canceled. This is because
				2175	* try_to_grab_pending() can't determine whether the work to be
				2176	* grabbed is at the head of the queue and thus can't clear LINKED
				2177	* flag of the previous work while there must be a valid next work
				2178	* after a work with LINKED flag set.
				2179	*
				2180	* Note that when @worker is non-NULL, @target may be modified
				2181	* underneath us, so we can't reliably determine cwq from @target.
				2182	*
				2183	* CONTEXT:
				2184	* spin_lock_irq(gcwq->lock).
				2185	*/
				2186	static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
				2187	struct wq_barrier *barr,
				2188	struct work_struct target, struct worker worker)
				2189	{
				2190	struct list_head *head;
				2191	unsigned int linked = 0;
				2192
				2193	/*
				2194	* debugobject calls are safe here even with gcwq->lock locked
				2195	* as we know for sure that this will not trigger any of the
				2196	* checks and call back into the fixup functions where we
				2197	* might deadlock.
				2198	*/
				2199	INIT_WORK_ONSTACK(&barr->work, wq_barrier_func);
				2200	__set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work));
				2201	init_completion(&barr->done);
				2202
				2203	/*
				2204	* If @target is currently being executed, schedule the
				2205	* barrier to the worker; otherwise, put it after @target.
				2206	*/
				2207	if (worker)
				2208	head = worker->scheduled.next;
				2209	else {
				2210	unsigned long *bits = work_data_bits(target);
				2211
				2212	head = target->entry.next;
				2213	/* there can already be other linked works, inherit and set */
				2214	linked = *bits & WORK_STRUCT_LINKED;
				2215	__set_bit(WORK_STRUCT_LINKED_BIT, bits);
				2216	}
				2217
				2218	debug_work_activate(&barr->work);
				2219	insert_work(cwq, &barr->work, head,
				2220	work_color_to_flags(WORK_NO_COLOR) \| linked);
				2221	}
				2222
				2223	/**
				2224	* flush_workqueue_prep_cwqs - prepare cwqs for workqueue flushing
				2225	* @wq: workqueue being flushed
				2226	* @flush_color: new flush color, < 0 for no-op
				2227	* @work_color: new work color, < 0 for no-op
				2228	*
				2229	* Prepare cwqs for workqueue flushing.
				2230	*
				2231	* If @flush_color is non-negative, flush_color on all cwqs should be
				2232	* -1. If no cwq has in-flight commands at the specified color, all
				2233	* cwq->flush_color's stay at -1 and %false is returned. If any cwq
				2234	* has in flight commands, its cwq->flush_color is set to
				2235	* @flush_color, @wq->nr_cwqs_to_flush is updated accordingly, cwq
				2236	* wakeup logic is armed and %true is returned.
				2237	*
				2238	* The caller should have initialized @wq->first_flusher prior to
				2239	* calling this function with non-negative @flush_color. If
				2240	* @flush_color is negative, no flush color update is done and %false
				2241	* is returned.
				2242	*
				2243	* If @work_color is non-negative, all cwqs should have the same
				2244	* work_color which is previous to @work_color and all will be
				2245	* advanced to @work_color.
				2246	*
				2247	* CONTEXT:
				2248	* mutex_lock(wq->flush_mutex).
				2249	*
				2250	* RETURNS:
				2251	* %true if @flush_color >= 0 and there's something to flush. %false
				2252	* otherwise.
				2253	*/
				2254	static bool flush_workqueue_prep_cwqs(struct workqueue_struct *wq,
				2255	int flush_color, int work_color)
				2256	{
				2257	bool wait = false;
				2258	unsigned int cpu;
				2259
				2260	if (flush_color >= 0) {
				2261	BUG_ON(atomic_read(&wq->nr_cwqs_to_flush));
				2262	atomic_set(&wq->nr_cwqs_to_flush, 1);
				2263	}
				2264
				2265	for_each_cwq_cpu(cpu, wq) {
				2266	struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
				2267	struct global_cwq *gcwq = cwq->gcwq;
				2268
				2269	spin_lock_irq(&gcwq->lock);
				2270
				2271	if (flush_color >= 0) {
				2272	BUG_ON(cwq->flush_color != -1);
				2273
				2274	if (cwq->nr_in_flight[flush_color]) {
				2275	cwq->flush_color = flush_color;
				2276	atomic_inc(&wq->nr_cwqs_to_flush);
				2277	wait = true;
				2278	}
				2279	}
				2280
				2281	if (work_color >= 0) {
				2282	BUG_ON(work_color != work_next_color(cwq->work_color));
				2283	cwq->work_color = work_color;
				2284	}
				2285
				2286	spin_unlock_irq(&gcwq->lock);
				2287	}
				2288
				2289	if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_cwqs_to_flush))
				2290	complete(&wq->first_flusher->done);
				2291
				2292	return wait;
				2293	}
				2294
				2295	/**
				2296	* flush_workqueue - ensure that any scheduled work has run to completion.
				2297	* @wq: workqueue to flush
				2298	*
				2299	* Forces execution of the workqueue and blocks until its completion.
				2300	* This is typically used in driver shutdown handlers.
				2301	*
				2302	* We sleep until all works which were queued on entry have been handled,
				2303	* but we are not livelocked by new incoming ones.
				2304	*/
				2305	void flush_workqueue(struct workqueue_struct *wq)
				2306	{
				2307	struct wq_flusher this_flusher = {
				2308	.list = LIST_HEAD_INIT(this_flusher.list),
				2309	.flush_color = -1,
				2310	.done = COMPLETION_INITIALIZER_ONSTACK(this_flusher.done),
				2311	};
				2312	int next_color;
				2313
				2314	lock_map_acquire(&wq->lockdep_map);
				2315	lock_map_release(&wq->lockdep_map);
				2316
				2317	mutex_lock(&wq->flush_mutex);
				2318
				2319	/*
				2320	* Start-to-wait phase
				2321	*/
				2322	next_color = work_next_color(wq->work_color);
				2323
				2324	if (next_color != wq->flush_color) {
				2325	/*
				2326	* Color space is not full. The current work_color
				2327	* becomes our flush_color and work_color is advanced
				2328	* by one.
				2329	*/
				2330	BUG_ON(!list_empty(&wq->flusher_overflow));
				2331	this_flusher.flush_color = wq->work_color;
				2332	wq->work_color = next_color;
				2333
				2334	if (!wq->first_flusher) {
				2335	/* no flush in progress, become the first flusher */
				2336	BUG_ON(wq->flush_color != this_flusher.flush_color);
				2337
				2338	wq->first_flusher = &this_flusher;
				2339
				2340	if (!flush_workqueue_prep_cwqs(wq, wq->flush_color,
				2341	wq->work_color)) {
				2342	/* nothing to flush, done */
				2343	wq->flush_color = next_color;
				2344	wq->first_flusher = NULL;
				2345	goto out_unlock;
				2346	}
				2347	} else {
				2348	/* wait in queue */
				2349	BUG_ON(wq->flush_color == this_flusher.flush_color);
				2350	list_add_tail(&this_flusher.list, &wq->flusher_queue);
				2351	flush_workqueue_prep_cwqs(wq, -1, wq->work_color);
				2352	}
				2353	} else {
				2354	/*
				2355	* Oops, color space is full, wait on overflow queue.
				2356	* The next flush completion will assign us
				2357	* flush_color and transfer to flusher_queue.
				2358	*/
				2359	list_add_tail(&this_flusher.list, &wq->flusher_overflow);
				2360	}
				2361
				2362	mutex_unlock(&wq->flush_mutex);
				2363
				2364	wait_for_completion(&this_flusher.done);
				2365
				2366	/*
				2367	* Wake-up-and-cascade phase
				2368	*
				2369	* First flushers are responsible for cascading flushes and
				2370	* handling overflow. Non-first flushers can simply return.
				2371	*/
				2372	if (wq->first_flusher != &this_flusher)
				2373	return;
				2374
				2375	mutex_lock(&wq->flush_mutex);
				2376
				2377	/* we might have raced, check again with mutex held */
				2378	if (wq->first_flusher != &this_flusher)
				2379	goto out_unlock;
				2380
				2381	wq->first_flusher = NULL;
				2382
				2383	BUG_ON(!list_empty(&this_flusher.list));
				2384	BUG_ON(wq->flush_color != this_flusher.flush_color);
				2385
				2386	while (true) {
				2387	struct wq_flusher next, tmp;
				2388
				2389	/* complete all the flushers sharing the current flush color */
				2390	list_for_each_entry_safe(next, tmp, &wq->flusher_queue, list) {
				2391	if (next->flush_color != wq->flush_color)
				2392	break;
				2393	list_del_init(&next->list);
				2394	complete(&next->done);
				2395	}
				2396
				2397	BUG_ON(!list_empty(&wq->flusher_overflow) &&
				2398	wq->flush_color != work_next_color(wq->work_color));
				2399
				2400	/* this flush_color is finished, advance by one */
				2401	wq->flush_color = work_next_color(wq->flush_color);
				2402
				2403	/* one color has been freed, handle overflow queue */
				2404	if (!list_empty(&wq->flusher_overflow)) {
				2405	/*
				2406	* Assign the same color to all overflowed
				2407	* flushers, advance work_color and append to
				2408	* flusher_queue. This is the start-to-wait
				2409	* phase for these overflowed flushers.
				2410	*/
				2411	list_for_each_entry(tmp, &wq->flusher_overflow, list)
				2412	tmp->flush_color = wq->work_color;
				2413
				2414	wq->work_color = work_next_color(wq->work_color);
				2415
				2416	list_splice_tail_init(&wq->flusher_overflow,
				2417	&wq->flusher_queue);
				2418	flush_workqueue_prep_cwqs(wq, -1, wq->work_color);
				2419	}
				2420
				2421	if (list_empty(&wq->flusher_queue)) {
				2422	BUG_ON(wq->flush_color != wq->work_color);
				2423	break;
				2424	}
				2425
				2426	/*
				2427	* Need to flush more colors. Make the next flusher
				2428	* the new first flusher and arm cwqs.
				2429	*/
				2430	BUG_ON(wq->flush_color == wq->work_color);
				2431	BUG_ON(wq->flush_color != next->flush_color);
				2432
				2433	list_del_init(&next->list);
				2434	wq->first_flusher = next;
				2435
				2436	if (flush_workqueue_prep_cwqs(wq, wq->flush_color, -1))
				2437	break;
				2438
				2439	/*
				2440	* Meh... this color is already done, clear first
				2441	* flusher and repeat cascading.
				2442	*/
				2443	wq->first_flusher = NULL;
				2444	}
				2445
				2446	out_unlock:
				2447	mutex_unlock(&wq->flush_mutex);
				2448	}
				2449	EXPORT_SYMBOL_GPL(flush_workqueue);
				2450
				2451	/**
				2452	* drain_workqueue - drain a workqueue
				2453	* @wq: workqueue to drain
				2454	*
				2455	* Wait until the workqueue becomes empty. While draining is in progress,
				2456	* only chain queueing is allowed. IOW, only currently pending or running
				2457	* work items on @wq can queue further work items on it. @wq is flushed
				2458	* repeatedly until it becomes empty. The number of flushing is detemined
				2459	* by the depth of chaining and should be relatively short. Whine if it
				2460	* takes too long.
				2461	*/
				2462	void drain_workqueue(struct workqueue_struct *wq)
				2463	{
				2464	unsigned int flush_cnt = 0;
				2465	unsigned int cpu;
				2466
				2467	/*
				2468	* __queue_work() needs to test whether there are drainers, is much
				2469	* hotter than drain_workqueue() and already looks at @wq->flags.
				2470	* Use WQ_DRAINING so that queue doesn't have to check nr_drainers.
				2471	*/
				2472	spin_lock(&workqueue_lock);
				2473	if (!wq->nr_drainers++)
				2474	wq->flags \|= WQ_DRAINING;
				2475	spin_unlock(&workqueue_lock);
				2476	reflush:
				2477	flush_workqueue(wq);
				2478
				2479	for_each_cwq_cpu(cpu, wq) {
				2480	struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
				2481	bool drained;
				2482
				2483	spin_lock_irq(&cwq->gcwq->lock);
				2484	drained = !cwq->nr_active && list_empty(&cwq->delayed_works);
				2485	spin_unlock_irq(&cwq->gcwq->lock);
				2486
				2487	if (drained)
				2488	continue;
				2489
				2490	if (++flush_cnt == 10 \|\|
				2491	(flush_cnt % 100 == 0 && flush_cnt <= 1000))
				2492	pr_warning("workqueue %s: flush on destruction isn't complete after %u tries\n",
				2493	wq->name, flush_cnt);
				2494	goto reflush;
				2495	}
				2496
				2497	spin_lock(&workqueue_lock);
				2498	if (!--wq->nr_drainers)
				2499	wq->flags &= ~WQ_DRAINING;
				2500	spin_unlock(&workqueue_lock);
				2501	}
				2502	EXPORT_SYMBOL_GPL(drain_workqueue);
				2503
				2504	static bool start_flush_work(struct work_struct work, struct wq_barrier barr,
				2505	bool wait_executing)
				2506	{
				2507	struct worker *worker = NULL;
				2508	struct global_cwq *gcwq;
				2509	struct cpu_workqueue_struct *cwq;
				2510
				2511	might_sleep();
				2512	gcwq = get_work_gcwq(work);
				2513	if (!gcwq)
				2514	return false;
				2515
				2516	spin_lock_irq(&gcwq->lock);
				2517	if (!list_empty(&work->entry)) {
				2518	/*
				2519	* See the comment near try_to_grab_pending()->smp_rmb().
				2520	* If it was re-queued to a different gcwq under us, we
				2521	* are not going to wait.
				2522	*/
				2523	smp_rmb();
				2524	cwq = get_work_cwq(work);
				2525	if (unlikely(!cwq \|\| gcwq != cwq->gcwq))
				2526	goto already_gone;
				2527	} else if (wait_executing) {
				2528	worker = find_worker_executing_work(gcwq, work);
				2529	if (!worker)
				2530	goto already_gone;
				2531	cwq = worker->current_cwq;
				2532	} else
				2533	goto already_gone;
				2534
				2535	insert_wq_barrier(cwq, barr, work, worker);
				2536	spin_unlock_irq(&gcwq->lock);
				2537
				2538	/*
				2539	* If @max_active is 1 or rescuer is in use, flushing another work
				2540	* item on the same workqueue may lead to deadlock. Make sure the
				2541	* flusher is not running on the same workqueue by verifying write
				2542	* access.
				2543	*/
				2544	if (cwq->wq->saved_max_active == 1 \|\| cwq->wq->flags & WQ_RESCUER)
				2545	lock_map_acquire(&cwq->wq->lockdep_map);
				2546	else
				2547	lock_map_acquire_read(&cwq->wq->lockdep_map);
				2548	lock_map_release(&cwq->wq->lockdep_map);
				2549
				2550	return true;
				2551	already_gone:
				2552	spin_unlock_irq(&gcwq->lock);
				2553	return false;
				2554	}
				2555
				2556	/**
				2557	* flush_work - wait for a work to finish executing the last queueing instance
				2558	* @work: the work to flush
				2559	*
				2560	* Wait until @work has finished execution. This function considers
				2561	* only the last queueing instance of @work. If @work has been
				2562	* enqueued across different CPUs on a non-reentrant workqueue or on
				2563	* multiple workqueues, @work might still be executing on return on
				2564	* some of the CPUs from earlier queueing.
				2565	*
				2566	* If @work was queued only on a non-reentrant, ordered or unbound
				2567	* workqueue, @work is guaranteed to be idle on return if it hasn't
				2568	* been requeued since flush started.
				2569	*
				2570	* RETURNS:
				2571	* %true if flush_work() waited for the work to finish execution,
				2572	* %false if it was already idle.
				2573	*/
				2574	bool flush_work(struct work_struct *work)
				2575	{
				2576	struct wq_barrier barr;
				2577
				2578	if (start_flush_work(work, &barr, true)) {
				2579	wait_for_completion(&barr.done);
				2580	destroy_work_on_stack(&barr.work);
				2581	return true;
				2582	} else
				2583	return false;
				2584	}
				2585	EXPORT_SYMBOL_GPL(flush_work);
				2586
				2587	static bool wait_on_cpu_work(struct global_cwq gcwq, struct work_struct work)
				2588	{
				2589	struct wq_barrier barr;
				2590	struct worker *worker;
				2591
				2592	spin_lock_irq(&gcwq->lock);
				2593
				2594	worker = find_worker_executing_work(gcwq, work);
				2595	if (unlikely(worker))
				2596	insert_wq_barrier(worker->current_cwq, &barr, work, worker);
				2597
				2598	spin_unlock_irq(&gcwq->lock);
				2599
				2600	if (unlikely(worker)) {
				2601	wait_for_completion(&barr.done);
				2602	destroy_work_on_stack(&barr.work);
				2603	return true;
				2604	} else
				2605	return false;
				2606	}
				2607
				2608	static bool wait_on_work(struct work_struct *work)
				2609	{
				2610	bool ret = false;
				2611	int cpu;
				2612
				2613	might_sleep();
				2614
				2615	lock_map_acquire(&work->lockdep_map);
				2616	lock_map_release(&work->lockdep_map);
				2617
				2618	for_each_gcwq_cpu(cpu)
				2619	ret \|= wait_on_cpu_work(get_gcwq(cpu), work);
				2620	return ret;
				2621	}
				2622
				2623	/**
				2624	* flush_work_sync - wait until a work has finished execution
				2625	* @work: the work to flush
				2626	*
				2627	* Wait until @work has finished execution. On return, it's
				2628	* guaranteed that all queueing instances of @work which happened
				2629	* before this function is called are finished. In other words, if
				2630	* @work hasn't been requeued since this function was called, @work is
				2631	* guaranteed to be idle on return.
				2632	*
				2633	* RETURNS:
				2634	* %true if flush_work_sync() waited for the work to finish execution,
				2635	* %false if it was already idle.
				2636	*/
				2637	bool flush_work_sync(struct work_struct *work)
				2638	{
				2639	struct wq_barrier barr;
				2640	bool pending, waited;
				2641
				2642	/* we'll wait for executions separately, queue barr only if pending */
				2643	pending = start_flush_work(work, &barr, false);
				2644
				2645	/* wait for executions to finish */
				2646	waited = wait_on_work(work);
				2647
				2648	/* wait for the pending one */
				2649	if (pending) {
				2650	wait_for_completion(&barr.done);
				2651	destroy_work_on_stack(&barr.work);
				2652	}
				2653
				2654	return pending \|\| waited;
				2655	}
				2656	EXPORT_SYMBOL_GPL(flush_work_sync);
				2657
				2658	/*
				2659	* Upon a successful return (>= 0), the caller "owns" WORK_STRUCT_PENDING bit,
				2660	* so this work can't be re-armed in any way.
				2661	*/
				2662	static int try_to_grab_pending(struct work_struct *work)
				2663	{
				2664	struct global_cwq *gcwq;
				2665	int ret = -1;
				2666
				2667	if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)))
				2668	return 0;
				2669
				2670	/*
				2671	* The queueing is in progress, or it is already queued. Try to
				2672	* steal it from ->worklist without clearing WORK_STRUCT_PENDING.
				2673	*/
				2674	gcwq = get_work_gcwq(work);
				2675	if (!gcwq)
				2676	return ret;
				2677
				2678	spin_lock_irq(&gcwq->lock);
				2679	if (!list_empty(&work->entry)) {
				2680	/*
				2681	* This work is queued, but perhaps we locked the wrong gcwq.
				2682	* In that case we must see the new value after rmb(), see
				2683	* insert_work()->wmb().
				2684	*/
				2685	smp_rmb();
				2686	if (gcwq == get_work_gcwq(work)) {
				2687	debug_work_deactivate(work);
				2688
				2689	/*
				2690	* A delayed work item cannot be grabbed directly
				2691	* because it might have linked NO_COLOR work items
				2692	* which, if left on the delayed_list, will confuse
				2693	* cwq->nr_active management later on and cause
				2694	* stall. Make sure the work item is activated
				2695	* before grabbing.
				2696	*/
				2697	if (*work_data_bits(work) & WORK_STRUCT_DELAYED)
				2698	cwq_activate_delayed_work(work);
				2699
				2700	list_del_init(&work->entry);
				2701	cwq_dec_nr_in_flight(get_work_cwq(work),
				2702	get_work_color(work),
				2703	*work_data_bits(work) & WORK_STRUCT_DELAYED);
				2704	ret = 1;
				2705	}
				2706	}
				2707	spin_unlock_irq(&gcwq->lock);
				2708
				2709	return ret;
				2710	}
				2711
				2712	static bool __cancel_work_timer(struct work_struct *work,
				2713	struct timer_list* timer)
				2714	{
				2715	int ret;
				2716
				2717	do {
				2718	ret = (timer && likely(del_timer(timer)));
				2719	if (!ret)
				2720	ret = try_to_grab_pending(work);
				2721	wait_on_work(work);
				2722	} while (unlikely(ret < 0));
				2723
				2724	clear_work_data(work);
				2725	return ret;
				2726	}
				2727
				2728	/**
				2729	* cancel_work_sync - cancel a work and wait for it to finish
				2730	* @work: the work to cancel
				2731	*
				2732	* Cancel @work and wait for its execution to finish. This function
				2733	* can be used even if the work re-queues itself or migrates to
				2734	* another workqueue. On return from this function, @work is
				2735	* guaranteed to be not pending or executing on any CPU.
				2736	*
				2737	* cancel_work_sync(&delayed_work->work) must not be used for
				2738	* delayed_work's. Use cancel_delayed_work_sync() instead.
				2739	*
				2740	* The caller must ensure that the workqueue on which @work was last
				2741	* queued can't be destroyed before this function returns.
				2742	*
				2743	* RETURNS:
				2744	* %true if @work was pending, %false otherwise.
				2745	*/
				2746	bool cancel_work_sync(struct work_struct *work)
				2747	{
				2748	return __cancel_work_timer(work, NULL);
				2749	}
				2750	EXPORT_SYMBOL_GPL(cancel_work_sync);
				2751
				2752	/**
				2753	* flush_delayed_work - wait for a dwork to finish executing the last queueing
				2754	* @dwork: the delayed work to flush
				2755	*
				2756	* Delayed timer is cancelled and the pending work is queued for
				2757	* immediate execution. Like flush_work(), this function only
				2758	* considers the last queueing instance of @dwork.
				2759	*
				2760	* RETURNS:
				2761	* %true if flush_work() waited for the work to finish execution,
				2762	* %false if it was already idle.
				2763	*/
				2764	bool flush_delayed_work(struct delayed_work *dwork)
				2765	{
				2766	if (del_timer_sync(&dwork->timer))
				2767	__queue_work(raw_smp_processor_id(),
				2768	get_work_cwq(&dwork->work)->wq, &dwork->work);
				2769	return flush_work(&dwork->work);
				2770	}
				2771	EXPORT_SYMBOL(flush_delayed_work);
				2772
				2773	/**
				2774	* flush_delayed_work_sync - wait for a dwork to finish
				2775	* @dwork: the delayed work to flush
				2776	*
				2777	* Delayed timer is cancelled and the pending work is queued for
				2778	* execution immediately. Other than timer handling, its behavior
				2779	* is identical to flush_work_sync().
				2780	*
				2781	* RETURNS:
				2782	* %true if flush_work_sync() waited for the work to finish execution,
				2783	* %false if it was already idle.
				2784	*/
				2785	bool flush_delayed_work_sync(struct delayed_work *dwork)
				2786	{
				2787	if (del_timer_sync(&dwork->timer))
				2788	__queue_work(raw_smp_processor_id(),
				2789	get_work_cwq(&dwork->work)->wq, &dwork->work);
				2790	return flush_work_sync(&dwork->work);
				2791	}
				2792	EXPORT_SYMBOL(flush_delayed_work_sync);
				2793
				2794	/**
				2795	* cancel_delayed_work_sync - cancel a delayed work and wait for it to finish
				2796	* @dwork: the delayed work cancel
				2797	*
				2798	* This is cancel_work_sync() for delayed works.
				2799	*
				2800	* RETURNS:
				2801	* %true if @dwork was pending, %false otherwise.
				2802	*/
				2803	bool cancel_delayed_work_sync(struct delayed_work *dwork)
				2804	{
				2805	return __cancel_work_timer(&dwork->work, &dwork->timer);
				2806	}
				2807	EXPORT_SYMBOL(cancel_delayed_work_sync);
				2808
				2809	/**
				2810	* schedule_work - put work task in global workqueue
				2811	* @work: job to be done
				2812	*
				2813	* Returns zero if @work was already on the kernel-global workqueue and
				2814	* non-zero otherwise.
				2815	*
				2816	* This puts a job in the kernel-global workqueue if it was not already
				2817	* queued and leaves it in the same position on the kernel-global
				2818	* workqueue otherwise.
				2819	*/
				2820	int schedule_work(struct work_struct *work)
				2821	{
				2822	return queue_work(system_wq, work);
				2823	}
				2824	EXPORT_SYMBOL(schedule_work);
				2825
				2826	/*
				2827	* schedule_work_on - put work task on a specific cpu
				2828	* @cpu: cpu to put the work task on
				2829	* @work: job to be done
				2830	*
				2831	* This puts a job on a specific cpu
				2832	*/
				2833	int schedule_work_on(int cpu, struct work_struct *work)
				2834	{
				2835	return queue_work_on(cpu, system_wq, work);
				2836	}
				2837	EXPORT_SYMBOL(schedule_work_on);
				2838
				2839	/**
				2840	* schedule_delayed_work - put work task in global workqueue after delay
				2841	* @dwork: job to be done
				2842	* @delay: number of jiffies to wait or 0 for immediate execution
				2843	*
				2844	* After waiting for a given time this puts a job in the kernel-global
				2845	* workqueue.
				2846	*/
				2847	int schedule_delayed_work(struct delayed_work *dwork,
				2848	unsigned long delay)
				2849	{
				2850	return queue_delayed_work(system_wq, dwork, delay);
				2851	}
				2852	EXPORT_SYMBOL(schedule_delayed_work);
				2853
				2854	/**
				2855	* schedule_delayed_work_on - queue work in global workqueue on CPU after delay
				2856	* @cpu: cpu to use
				2857	* @dwork: job to be done
				2858	* @delay: number of jiffies to wait
				2859	*
				2860	* After waiting for a given time this puts a job in the kernel-global
				2861	* workqueue on the specified CPU.
				2862	*/
				2863	int schedule_delayed_work_on(int cpu,
				2864	struct delayed_work *dwork, unsigned long delay)
				2865	{
				2866	return queue_delayed_work_on(cpu, system_wq, dwork, delay);
				2867	}
				2868	EXPORT_SYMBOL(schedule_delayed_work_on);
				2869
				2870	/**
				2871	* schedule_on_each_cpu - execute a function synchronously on each online CPU
				2872	* @func: the function to call
				2873	*
				2874	* schedule_on_each_cpu() executes @func on each online CPU using the
				2875	* system workqueue and blocks until all CPUs have completed.
				2876	* schedule_on_each_cpu() is very slow.
				2877	*
				2878	* RETURNS:
				2879	* 0 on success, -errno on failure.
				2880	*/
				2881	int schedule_on_each_cpu(work_func_t func)
				2882	{
				2883	int cpu;
				2884	struct work_struct __percpu *works;
				2885
				2886	works = alloc_percpu(struct work_struct);
				2887	if (!works)
				2888	return -ENOMEM;
				2889
				2890	get_online_cpus();
				2891
				2892	for_each_online_cpu(cpu) {
				2893	struct work_struct *work = per_cpu_ptr(works, cpu);
				2894
				2895	INIT_WORK(work, func);
				2896	schedule_work_on(cpu, work);
				2897	}
				2898
				2899	for_each_online_cpu(cpu)
				2900	flush_work(per_cpu_ptr(works, cpu));
				2901
				2902	put_online_cpus();
				2903	free_percpu(works);
				2904	return 0;
				2905	}
				2906
				2907	/**
				2908	* flush_scheduled_work - ensure that any scheduled work has run to completion.
				2909	*
				2910	* Forces execution of the kernel-global workqueue and blocks until its
				2911	* completion.
				2912	*
				2913	* Think twice before calling this function! It's very easy to get into
				2914	* trouble if you don't take great care. Either of the following situations
				2915	* will lead to deadlock:
				2916	*
				2917	* One of the work items currently on the workqueue needs to acquire
				2918	* a lock held by your code or its caller.
				2919	*
				2920	* Your code is running in the context of a work routine.
				2921	*
				2922	* They will be detected by lockdep when they occur, but the first might not
				2923	* occur very often. It depends on what work items are on the workqueue and
				2924	* what locks they need, which you have no control over.
				2925	*
				2926	* In most situations flushing the entire workqueue is overkill; you merely
				2927	* need to know that a particular work item isn't queued and isn't running.
				2928	* In such cases you should use cancel_delayed_work_sync() or
				2929	* cancel_work_sync() instead.
				2930	*/
				2931	void flush_scheduled_work(void)
				2932	{
				2933	flush_workqueue(system_wq);
				2934	}
				2935	EXPORT_SYMBOL(flush_scheduled_work);
				2936
				2937	/**
				2938	* execute_in_process_context - reliably execute the routine with user context
				2939	* @fn: the function to execute
				2940	* @ew: guaranteed storage for the execute work structure (must
				2941	* be available when the work executes)
				2942	*
				2943	* Executes the function immediately if process context is available,
				2944	* otherwise schedules the function for delayed execution.
				2945	*
				2946	* Returns: 0 - function was executed
				2947	* 1 - function was scheduled for execution
				2948	*/
				2949	int execute_in_process_context(work_func_t fn, struct execute_work *ew)
				2950	{
				2951	if (!in_interrupt()) {
				2952	fn(&ew->work);
				2953	return 0;
				2954	}
				2955
				2956	INIT_WORK(&ew->work, fn);
				2957	schedule_work(&ew->work);
				2958
				2959	return 1;
				2960	}
				2961	EXPORT_SYMBOL_GPL(execute_in_process_context);
				2962
				2963	int keventd_up(void)
				2964	{
				2965	return system_wq != NULL;
				2966	}
				2967
				2968	static int alloc_cwqs(struct workqueue_struct *wq)
				2969	{
				2970	/*
				2971	* cwqs are forced aligned according to WORK_STRUCT_FLAG_BITS.
				2972	* Make sure that the alignment isn't lower than that of
				2973	* unsigned long long.
				2974	*/
				2975	const size_t size = sizeof(struct cpu_workqueue_struct);
				2976	const size_t align = max_t(size_t, 1 << WORK_STRUCT_FLAG_BITS,
				2977	__alignof__(unsigned long long));
				2978
				2979	if (!(wq->flags & WQ_UNBOUND))
				2980	wq->cpu_wq.pcpu = __alloc_percpu(size, align);
				2981	else {
				2982	void *ptr;
				2983
				2984	/*
				2985	* Allocate enough room to align cwq and put an extra
				2986	* pointer at the end pointing back to the originally
				2987	* allocated pointer which will be used for free.
				2988	*/
				2989	ptr = kzalloc(size + align + sizeof(void *), GFP_KERNEL);
				2990	if (ptr) {
				2991	wq->cpu_wq.single = PTR_ALIGN(ptr, align);
				2992	(void *)(wq->cpu_wq.single + 1) = ptr;
				2993	}
				2994	}
				2995
				2996	/* just in case, make sure it's actually aligned */
				2997	BUG_ON(!IS_ALIGNED(wq->cpu_wq.v, align));
				2998	return wq->cpu_wq.v ? 0 : -ENOMEM;
				2999	}
				3000
				3001	static void free_cwqs(struct workqueue_struct *wq)
				3002	{
				3003	if (!(wq->flags & WQ_UNBOUND))
				3004	free_percpu(wq->cpu_wq.pcpu);
				3005	else if (wq->cpu_wq.single) {
				3006	/* the pointer to free is stored right after the cwq */
				3007	kfree((void *)(wq->cpu_wq.single + 1));
				3008	}
				3009	}
				3010
				3011	static int wq_clamp_max_active(int max_active, unsigned int flags,
				3012	const char *name)
				3013	{
				3014	int lim = flags & WQ_UNBOUND ? WQ_UNBOUND_MAX_ACTIVE : WQ_MAX_ACTIVE;
				3015
				3016	if (max_active < 1 \|\| max_active > lim)
				3017	printk(KERN_WARNING "workqueue: max_active %d requested for %s "
				3018	"is out of range, clamping between %d and %d\n",
				3019	max_active, name, 1, lim);
				3020
				3021	return clamp_val(max_active, 1, lim);
				3022	}
				3023
				3024	struct workqueue_struct __alloc_workqueue_key(const char fmt,
				3025	unsigned int flags,
				3026	int max_active,
				3027	struct lock_class_key *key,
				3028	const char *lock_name, ...)
				3029	{
				3030	va_list args, args1;
				3031	struct workqueue_struct *wq;
				3032	unsigned int cpu;
				3033	size_t namelen;
				3034
				3035	/* determine namelen, allocate wq and format name */
				3036	va_start(args, lock_name);
				3037	va_copy(args1, args);
				3038	namelen = vsnprintf(NULL, 0, fmt, args) + 1;
				3039
				3040	wq = kzalloc(sizeof(*wq) + namelen, GFP_KERNEL);
				3041	if (!wq)
				3042	goto err;
				3043
				3044	vsnprintf(wq->name, namelen, fmt, args1);
				3045	va_end(args);
				3046	va_end(args1);
				3047
				3048	/*
				3049	* Workqueues which may be used during memory reclaim should
				3050	* have a rescuer to guarantee forward progress.
				3051	*/
				3052	if (flags & WQ_MEM_RECLAIM)
				3053	flags \|= WQ_RESCUER;
				3054
				3055	/*
				3056	* Unbound workqueues aren't concurrency managed and should be
				3057	* dispatched to workers immediately.
				3058	*/
				3059	if (flags & WQ_UNBOUND)
				3060	flags \|= WQ_HIGHPRI;
				3061
				3062	max_active = max_active ?: WQ_DFL_ACTIVE;
				3063	max_active = wq_clamp_max_active(max_active, flags, wq->name);
				3064
				3065	/* init wq */
				3066	wq->flags = flags;
				3067	wq->saved_max_active = max_active;
				3068	mutex_init(&wq->flush_mutex);
				3069	atomic_set(&wq->nr_cwqs_to_flush, 0);
				3070	INIT_LIST_HEAD(&wq->flusher_queue);
				3071	INIT_LIST_HEAD(&wq->flusher_overflow);
				3072
				3073	lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
				3074	INIT_LIST_HEAD(&wq->list);
				3075
				3076	if (alloc_cwqs(wq) < 0)
				3077	goto err;
				3078
				3079	for_each_cwq_cpu(cpu, wq) {
				3080	struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
				3081	struct global_cwq *gcwq = get_gcwq(cpu);
				3082
				3083	BUG_ON((unsigned long)cwq & WORK_STRUCT_FLAG_MASK);
				3084	cwq->gcwq = gcwq;
				3085	cwq->wq = wq;
				3086	cwq->flush_color = -1;
				3087	cwq->max_active = max_active;
				3088	INIT_LIST_HEAD(&cwq->delayed_works);
				3089	}
				3090
				3091	if (flags & WQ_RESCUER) {
				3092	struct worker *rescuer;
				3093
				3094	if (!alloc_mayday_mask(&wq->mayday_mask, GFP_KERNEL))
				3095	goto err;
				3096
				3097	wq->rescuer = rescuer = alloc_worker();
				3098	if (!rescuer)
				3099	goto err;
				3100
				3101	rescuer->task = kthread_create(rescuer_thread, wq, "%s",
				3102	wq->name);
				3103	if (IS_ERR(rescuer->task))
				3104	goto err;
				3105
				3106	rescuer->task->flags \|= PF_THREAD_BOUND;
				3107	wake_up_process(rescuer->task);
				3108	}
				3109
				3110	/*
				3111	* workqueue_lock protects global freeze state and workqueues
				3112	* list. Grab it, set max_active accordingly and add the new
				3113	* workqueue to workqueues list.
				3114	*/
				3115	spin_lock(&workqueue_lock);
				3116
				3117	if (workqueue_freezing && wq->flags & WQ_FREEZABLE)
				3118	for_each_cwq_cpu(cpu, wq)
				3119	get_cwq(cpu, wq)->max_active = 0;
				3120
				3121	list_add(&wq->list, &workqueues);
				3122
				3123	spin_unlock(&workqueue_lock);
				3124
				3125	return wq;
				3126	err:
				3127	if (wq) {
				3128	free_cwqs(wq);
				3129	free_mayday_mask(wq->mayday_mask);
				3130	kfree(wq->rescuer);
				3131	kfree(wq);
				3132	}
				3133	return NULL;
				3134	}
				3135	EXPORT_SYMBOL_GPL(__alloc_workqueue_key);
				3136
				3137	/**
				3138	* destroy_workqueue - safely terminate a workqueue
				3139	* @wq: target workqueue
				3140	*
				3141	* Safely destroy a workqueue. All work currently pending will be done first.
				3142	*/
				3143	void destroy_workqueue(struct workqueue_struct *wq)
				3144	{
				3145	unsigned int cpu;
				3146
				3147	/* drain it before proceeding with destruction */
				3148	drain_workqueue(wq);
				3149
				3150	/*
				3151	* wq list is used to freeze wq, remove from list after
				3152	* flushing is complete in case freeze races us.
				3153	*/
				3154	spin_lock(&workqueue_lock);
				3155	list_del(&wq->list);
				3156	spin_unlock(&workqueue_lock);
				3157
				3158	/* sanity check */
				3159	for_each_cwq_cpu(cpu, wq) {
				3160	struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
				3161	int i;
				3162
				3163	for (i = 0; i < WORK_NR_COLORS; i++)
				3164	BUG_ON(cwq->nr_in_flight[i]);
				3165	BUG_ON(cwq->nr_active);
				3166	BUG_ON(!list_empty(&cwq->delayed_works));
				3167	}
				3168
				3169	if (wq->flags & WQ_RESCUER) {
				3170	kthread_stop(wq->rescuer->task);
				3171	free_mayday_mask(wq->mayday_mask);
				3172	kfree(wq->rescuer);
				3173	}
				3174
				3175	free_cwqs(wq);
				3176	kfree(wq);
				3177	}
				3178	EXPORT_SYMBOL_GPL(destroy_workqueue);
				3179
				3180	/**
				3181	* workqueue_set_max_active - adjust max_active of a workqueue
				3182	* @wq: target workqueue
				3183	* @max_active: new max_active value.
				3184	*
				3185	* Set max_active of @wq to @max_active.
				3186	*
				3187	* CONTEXT:
				3188	* Don't call from IRQ context.
				3189	*/
				3190	void workqueue_set_max_active(struct workqueue_struct *wq, int max_active)
				3191	{
				3192	unsigned int cpu;
				3193
				3194	max_active = wq_clamp_max_active(max_active, wq->flags, wq->name);
				3195
				3196	spin_lock(&workqueue_lock);
				3197
				3198	wq->saved_max_active = max_active;
				3199
				3200	for_each_cwq_cpu(cpu, wq) {
				3201	struct global_cwq *gcwq = get_gcwq(cpu);
				3202
				3203	spin_lock_irq(&gcwq->lock);
				3204
				3205	if (!(wq->flags & WQ_FREEZABLE) \|\|
				3206	!(gcwq->flags & GCWQ_FREEZING))
				3207	get_cwq(gcwq->cpu, wq)->max_active = max_active;
				3208
				3209	spin_unlock_irq(&gcwq->lock);
				3210	}
				3211
				3212	spin_unlock(&workqueue_lock);
				3213	}
				3214	EXPORT_SYMBOL_GPL(workqueue_set_max_active);
				3215
				3216	/**
				3217	* workqueue_congested - test whether a workqueue is congested
				3218	* @cpu: CPU in question
				3219	* @wq: target workqueue
				3220	*
				3221	* Test whether @wq's cpu workqueue for @cpu is congested. There is
				3222	* no synchronization around this function and the test result is
				3223	* unreliable and only useful as advisory hints or for debugging.
				3224	*
				3225	* RETURNS:
				3226	* %true if congested, %false otherwise.
				3227	*/
				3228	bool workqueue_congested(unsigned int cpu, struct workqueue_struct *wq)
				3229	{
				3230	struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
				3231
				3232	return !list_empty(&cwq->delayed_works);
				3233	}
				3234	EXPORT_SYMBOL_GPL(workqueue_congested);
				3235
				3236	/**
				3237	* work_cpu - return the last known associated cpu for @work
				3238	* @work: the work of interest
				3239	*
				3240	* RETURNS:
				3241	* CPU number if @work was ever queued. WORK_CPU_NONE otherwise.
				3242	*/
				3243	unsigned int work_cpu(struct work_struct *work)
				3244	{
				3245	struct global_cwq *gcwq = get_work_gcwq(work);
				3246
				3247	return gcwq ? gcwq->cpu : WORK_CPU_NONE;
				3248	}
				3249	EXPORT_SYMBOL_GPL(work_cpu);
				3250
				3251	/**
				3252	* work_busy - test whether a work is currently pending or running
				3253	* @work: the work to be tested
				3254	*
				3255	* Test whether @work is currently pending or running. There is no
				3256	* synchronization around this function and the test result is
				3257	* unreliable and only useful as advisory hints or for debugging.
				3258	* Especially for reentrant wqs, the pending state might hide the
				3259	* running state.
				3260	*
				3261	* RETURNS:
				3262	* OR'd bitmask of WORK_BUSY_* bits.
				3263	*/
				3264	unsigned int work_busy(struct work_struct *work)
				3265	{
				3266	struct global_cwq *gcwq = get_work_gcwq(work);
				3267	unsigned long flags;
				3268	unsigned int ret = 0;
				3269
				3270	if (!gcwq)
				3271	return false;
				3272
				3273	spin_lock_irqsave(&gcwq->lock, flags);
				3274
				3275	if (work_pending(work))
				3276	ret \|= WORK_BUSY_PENDING;
				3277	if (find_worker_executing_work(gcwq, work))
				3278	ret \|= WORK_BUSY_RUNNING;
				3279
				3280	spin_unlock_irqrestore(&gcwq->lock, flags);
				3281
				3282	return ret;
				3283	}
				3284	EXPORT_SYMBOL_GPL(work_busy);
				3285
				3286	/*
				3287	* CPU hotplug.
				3288	*
				3289	* There are two challenges in supporting CPU hotplug. Firstly, there
				3290	* are a lot of assumptions on strong associations among work, cwq and
				3291	* gcwq which make migrating pending and scheduled works very
				3292	* difficult to implement without impacting hot paths. Secondly,
				3293	* gcwqs serve mix of short, long and very long running works making
				3294	* blocked draining impractical.
				3295	*
				3296	* This is solved by allowing a gcwq to be detached from CPU, running
				3297	* it with unbound (rogue) workers and allowing it to be reattached
				3298	* later if the cpu comes back online. A separate thread is created
				3299	* to govern a gcwq in such state and is called the trustee of the
				3300	* gcwq.
				3301	*
				3302	* Trustee states and their descriptions.
				3303	*
				3304	* START Command state used on startup. On CPU_DOWN_PREPARE, a
				3305	* new trustee is started with this state.
				3306	*
				3307	* IN_CHARGE Once started, trustee will enter this state after
				3308	* assuming the manager role and making all existing
				3309	* workers rogue. DOWN_PREPARE waits for trustee to
				3310	* enter this state. After reaching IN_CHARGE, trustee
				3311	* tries to execute the pending worklist until it's empty
				3312	* and the state is set to BUTCHER, or the state is set
				3313	* to RELEASE.
				3314	*
				3315	* BUTCHER Command state which is set by the cpu callback after
				3316	* the cpu has went down. Once this state is set trustee
				3317	* knows that there will be no new works on the worklist
				3318	* and once the worklist is empty it can proceed to
				3319	* killing idle workers.
				3320	*
				3321	* RELEASE Command state which is set by the cpu callback if the
				3322	* cpu down has been canceled or it has come online
				3323	* again. After recognizing this state, trustee stops
				3324	* trying to drain or butcher and clears ROGUE, rebinds
				3325	* all remaining workers back to the cpu and releases
				3326	* manager role.
				3327	*
				3328	* DONE Trustee will enter this state after BUTCHER or RELEASE
				3329	* is complete.
				3330	*
				3331	* trustee CPU draining
				3332	* took over down complete
				3333	* START -----------> IN_CHARGE -----------> BUTCHER -----------> DONE
				3334	* \| \| ^
				3335	* \| CPU is back online v return workers \|
				3336	* ----------------> RELEASE --------------
				3337	*/
				3338
				3339	/**
				3340	* trustee_wait_event_timeout - timed event wait for trustee
				3341	* @cond: condition to wait for
				3342	* @timeout: timeout in jiffies
				3343	*
				3344	* wait_event_timeout() for trustee to use. Handles locking and
				3345	* checks for RELEASE request.
				3346	*
				3347	* CONTEXT:
				3348	* spin_lock_irq(gcwq->lock) which may be released and regrabbed
				3349	* multiple times. To be used by trustee.
				3350	*
				3351	* RETURNS:
				3352	* Positive indicating left time if @cond is satisfied, 0 if timed
				3353	* out, -1 if canceled.
				3354	*/
				3355	#define trustee_wait_event_timeout(cond, timeout) ({ \
				3356	long __ret = (timeout); \
				3357	while (!((cond) \|\| (gcwq->trustee_state == TRUSTEE_RELEASE)) && \
				3358	__ret) { \
				3359	spin_unlock_irq(&gcwq->lock); \
				3360	__wait_event_timeout(gcwq->trustee_wait, (cond) \|\| \
				3361	(gcwq->trustee_state == TRUSTEE_RELEASE), \
				3362	__ret); \
				3363	spin_lock_irq(&gcwq->lock); \
				3364	} \
				3365	gcwq->trustee_state == TRUSTEE_RELEASE ? -1 : (__ret); \
				3366	})
				3367
				3368	/**
				3369	* trustee_wait_event - event wait for trustee
				3370	* @cond: condition to wait for
				3371	*
				3372	* wait_event() for trustee to use. Automatically handles locking and
				3373	* checks for CANCEL request.
				3374	*
				3375	* CONTEXT:
				3376	* spin_lock_irq(gcwq->lock) which may be released and regrabbed
				3377	* multiple times. To be used by trustee.
				3378	*
				3379	* RETURNS:
				3380	* 0 if @cond is satisfied, -1 if canceled.
				3381	*/
				3382	#define trustee_wait_event(cond) ({ \
				3383	long __ret1; \
				3384	__ret1 = trustee_wait_event_timeout(cond, MAX_SCHEDULE_TIMEOUT);\
				3385	__ret1 < 0 ? -1 : 0; \
				3386	})
				3387
				3388	static int __cpuinit trustee_thread(void *__gcwq)
				3389	{
				3390	struct global_cwq *gcwq = __gcwq;
				3391	struct worker *worker;
				3392	struct work_struct *work;
				3393	struct hlist_node *pos;
				3394	long rc;
				3395	int i;
				3396
				3397	BUG_ON(gcwq->cpu != smp_processor_id());
				3398
				3399	spin_lock_irq(&gcwq->lock);
				3400	/*
				3401	* Claim the manager position and make all workers rogue.
				3402	* Trustee must be bound to the target cpu and can't be
				3403	* cancelled.
				3404	*/
				3405	BUG_ON(gcwq->cpu != smp_processor_id());
				3406	rc = trustee_wait_event(!(gcwq->flags & GCWQ_MANAGING_WORKERS));
				3407	BUG_ON(rc < 0);
				3408
				3409	gcwq->flags \|= GCWQ_MANAGING_WORKERS;
				3410
				3411	list_for_each_entry(worker, &gcwq->idle_list, entry)
				3412	worker->flags \|= WORKER_ROGUE;
				3413
				3414	for_each_busy_worker(worker, i, pos, gcwq)
				3415	worker->flags \|= WORKER_ROGUE;
				3416
				3417	/*
				3418	* Call schedule() so that we cross rq->lock and thus can
				3419	* guarantee sched callbacks see the rogue flag. This is
				3420	* necessary as scheduler callbacks may be invoked from other
				3421	* cpus.
				3422	*/
				3423	spin_unlock_irq(&gcwq->lock);
				3424	schedule();
				3425	spin_lock_irq(&gcwq->lock);
				3426
				3427	/*
				3428	* Sched callbacks are disabled now. Zap nr_running. After
				3429	* this, nr_running stays zero and need_more_worker() and
				3430	* keep_working() are always true as long as the worklist is
				3431	* not empty.
				3432	*/
				3433	atomic_set(get_gcwq_nr_running(gcwq->cpu), 0);
				3434
				3435	spin_unlock_irq(&gcwq->lock);
				3436	del_timer_sync(&gcwq->idle_timer);
				3437	spin_lock_irq(&gcwq->lock);
				3438
				3439	/*
				3440	* We're now in charge. Notify and proceed to drain. We need
				3441	* to keep the gcwq running during the whole CPU down
				3442	* procedure as other cpu hotunplug callbacks may need to
				3443	* flush currently running tasks.
				3444	*/
				3445	gcwq->trustee_state = TRUSTEE_IN_CHARGE;
				3446	wake_up_all(&gcwq->trustee_wait);
				3447
				3448	/*
				3449	* The original cpu is in the process of dying and may go away
				3450	* anytime now. When that happens, we and all workers would
				3451	* be migrated to other cpus. Try draining any left work. We
				3452	* want to get it over with ASAP - spam rescuers, wake up as
				3453	* many idlers as necessary and create new ones till the
				3454	* worklist is empty. Note that if the gcwq is frozen, there
				3455	* may be frozen works in freezable cwqs. Don't declare
				3456	* completion while frozen.
				3457	*/
				3458	while (gcwq->nr_workers != gcwq->nr_idle \|\|
				3459	gcwq->flags & GCWQ_FREEZING \|\|
				3460	gcwq->trustee_state == TRUSTEE_IN_CHARGE) {
				3461	int nr_works = 0;
				3462
				3463	list_for_each_entry(work, &gcwq->worklist, entry) {
				3464	send_mayday(work);
				3465	nr_works++;
				3466	}
				3467
				3468	list_for_each_entry(worker, &gcwq->idle_list, entry) {
				3469	if (!nr_works--)
				3470	break;
				3471	wake_up_process(worker->task);
				3472	}
				3473
				3474	if (need_to_create_worker(gcwq)) {
				3475	spin_unlock_irq(&gcwq->lock);
				3476	worker = create_worker(gcwq, false);
				3477	spin_lock_irq(&gcwq->lock);
				3478	if (worker) {
				3479	worker->flags \|= WORKER_ROGUE;
				3480	start_worker(worker);
				3481	}
				3482	}
				3483
				3484	/* give a breather */
				3485	if (trustee_wait_event_timeout(false, TRUSTEE_COOLDOWN) < 0)
				3486	break;
				3487	}
				3488
				3489	/*
				3490	* Either all works have been scheduled and cpu is down, or
				3491	* cpu down has already been canceled. Wait for and butcher
				3492	* all workers till we're canceled.
				3493	*/
				3494	do {
				3495	rc = trustee_wait_event(!list_empty(&gcwq->idle_list));
				3496	while (!list_empty(&gcwq->idle_list))
				3497	destroy_worker(list_first_entry(&gcwq->idle_list,
				3498	struct worker, entry));
				3499	} while (gcwq->nr_workers && rc >= 0);
				3500
				3501	/*
				3502	* At this point, either draining has completed and no worker
				3503	* is left, or cpu down has been canceled or the cpu is being
				3504	* brought back up. There shouldn't be any idle one left.
				3505	* Tell the remaining busy ones to rebind once it finishes the
				3506	* currently scheduled works by scheduling the rebind_work.
				3507	*/
				3508	WARN_ON(!list_empty(&gcwq->idle_list));
				3509
				3510	for_each_busy_worker(worker, i, pos, gcwq) {
				3511	struct work_struct *rebind_work = &worker->rebind_work;
				3512	unsigned long worker_flags = worker->flags;
				3513
				3514	/*
				3515	* Rebind_work may race with future cpu hotplug
				3516	* operations. Use a separate flag to mark that
				3517	* rebinding is scheduled. The morphing should
				3518	* be atomic.
				3519	*/
				3520	worker_flags \|= WORKER_REBIND;
				3521	worker_flags &= ~WORKER_ROGUE;
				3522	ACCESS_ONCE(worker->flags) = worker_flags;
				3523
				3524	/* queue rebind_work, wq doesn't matter, use the default one */
				3525	if (test_and_set_bit(WORK_STRUCT_PENDING_BIT,
				3526	work_data_bits(rebind_work)))
				3527	continue;
				3528
				3529	debug_work_activate(rebind_work);
				3530	insert_work(get_cwq(gcwq->cpu, system_wq), rebind_work,
				3531	worker->scheduled.next,
				3532	work_color_to_flags(WORK_NO_COLOR));
				3533	}
				3534
				3535	/* relinquish manager role */
				3536	gcwq->flags &= ~GCWQ_MANAGING_WORKERS;
				3537
				3538	/* notify completion */
				3539	gcwq->trustee = NULL;
				3540	gcwq->trustee_state = TRUSTEE_DONE;
				3541	wake_up_all(&gcwq->trustee_wait);
				3542	spin_unlock_irq(&gcwq->lock);
				3543	return 0;
				3544	}
				3545
				3546	/**
				3547	* wait_trustee_state - wait for trustee to enter the specified state
				3548	* @gcwq: gcwq the trustee of interest belongs to
				3549	* @state: target state to wait for
				3550	*
				3551	* Wait for the trustee to reach @state. DONE is already matched.
				3552	*
				3553	* CONTEXT:
				3554	* spin_lock_irq(gcwq->lock) which may be released and regrabbed
				3555	* multiple times. To be used by cpu_callback.
				3556	*/
				3557	static void __cpuinit wait_trustee_state(struct global_cwq *gcwq, int state)
				3558	__releases(&gcwq->lock)
				3559	__acquires(&gcwq->lock)
				3560	{
				3561	if (!(gcwq->trustee_state == state \|\|
				3562	gcwq->trustee_state == TRUSTEE_DONE)) {
				3563	spin_unlock_irq(&gcwq->lock);
				3564	__wait_event(gcwq->trustee_wait,
				3565	gcwq->trustee_state == state \|\|
				3566	gcwq->trustee_state == TRUSTEE_DONE);
				3567	spin_lock_irq(&gcwq->lock);
				3568	}
				3569	}
				3570
				3571	static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
				3572	unsigned long action,
				3573	void *hcpu)
				3574	{
				3575	unsigned int cpu = (unsigned long)hcpu;
				3576	struct global_cwq *gcwq = get_gcwq(cpu);
				3577	struct task_struct *new_trustee = NULL;
				3578	struct worker *uninitialized_var(new_worker);
				3579	unsigned long flags;
				3580
				3581	action &= ~CPU_TASKS_FROZEN;
				3582
				3583	switch (action) {
				3584	case CPU_DOWN_PREPARE:
				3585	new_trustee = kthread_create(trustee_thread, gcwq,
				3586	"workqueue_trustee/%d\n", cpu);
				3587	if (IS_ERR(new_trustee))
				3588	return notifier_from_errno(PTR_ERR(new_trustee));
				3589	kthread_bind(new_trustee, cpu);
				3590	/* fall through */
				3591	case CPU_UP_PREPARE:
				3592	BUG_ON(gcwq->first_idle);
				3593	new_worker = create_worker(gcwq, false);
				3594	if (!new_worker) {
				3595	if (new_trustee)
				3596	kthread_stop(new_trustee);
				3597	return NOTIFY_BAD;
				3598	}
				3599	break;
				3600	case CPU_POST_DEAD:
				3601	case CPU_UP_CANCELED:
				3602	case CPU_DOWN_FAILED:
				3603	case CPU_ONLINE:
				3604	break;
				3605	case CPU_DYING:
				3606	/*
				3607	* We access this lockless. We are on the dying CPU
				3608	* and called from stomp machine.
				3609	*
				3610	* Before this, the trustee and all workers except for
				3611	* the ones which are still executing works from
				3612	* before the last CPU down must be on the cpu. After
				3613	* this, they'll all be diasporas.
				3614	*/
				3615	gcwq->flags \|= GCWQ_DISASSOCIATED;
				3616	default:
				3617	goto out;
				3618	}
				3619
				3620	/* some are called w/ irq disabled, don't disturb irq status */
				3621	spin_lock_irqsave(&gcwq->lock, flags);
				3622
				3623	switch (action) {
				3624	case CPU_DOWN_PREPARE:
				3625	/* initialize trustee and tell it to acquire the gcwq */
				3626	BUG_ON(gcwq->trustee \|\| gcwq->trustee_state != TRUSTEE_DONE);
				3627	gcwq->trustee = new_trustee;
				3628	gcwq->trustee_state = TRUSTEE_START;
				3629	wake_up_process(gcwq->trustee);
				3630	wait_trustee_state(gcwq, TRUSTEE_IN_CHARGE);
				3631	/* fall through */
				3632	case CPU_UP_PREPARE:
				3633	BUG_ON(gcwq->first_idle);
				3634	gcwq->first_idle = new_worker;
				3635	break;
				3636
				3637	case CPU_POST_DEAD:
				3638	gcwq->trustee_state = TRUSTEE_BUTCHER;
				3639	/* fall through */
				3640	case CPU_UP_CANCELED:
				3641	destroy_worker(gcwq->first_idle);
				3642	gcwq->first_idle = NULL;
				3643	break;
				3644
				3645	case CPU_DOWN_FAILED:
				3646	case CPU_ONLINE:
				3647	gcwq->flags &= ~GCWQ_DISASSOCIATED;
				3648	if (gcwq->trustee_state != TRUSTEE_DONE) {
				3649	gcwq->trustee_state = TRUSTEE_RELEASE;
				3650	wake_up_process(gcwq->trustee);
				3651	wait_trustee_state(gcwq, TRUSTEE_DONE);
				3652	}
				3653
				3654	/*
				3655	* Trustee is done and there might be no worker left.
				3656	* Put the first_idle in and request a real manager to
				3657	* take a look.
				3658	*/
				3659	spin_unlock_irq(&gcwq->lock);
				3660	kthread_bind(gcwq->first_idle->task, cpu);
				3661	spin_lock_irq(&gcwq->lock);
				3662	gcwq->flags \|= GCWQ_MANAGE_WORKERS;
				3663	start_worker(gcwq->first_idle);
				3664	gcwq->first_idle = NULL;
				3665	break;
				3666	}
				3667
				3668	spin_unlock_irqrestore(&gcwq->lock, flags);
				3669
				3670	out:
				3671	return notifier_from_errno(0);
				3672	}
				3673
				3674	/*
				3675	* Workqueues should be brought up before normal priority CPU notifiers.
				3676	* This will be registered high priority CPU notifier.
				3677	*/
				3678	static int __devinit workqueue_cpu_up_callback(struct notifier_block *nfb,
				3679	unsigned long action,
				3680	void *hcpu)
				3681	{
				3682	switch (action & ~CPU_TASKS_FROZEN) {
				3683	case CPU_UP_PREPARE:
				3684	case CPU_UP_CANCELED:
				3685	case CPU_DOWN_FAILED:
				3686	case CPU_ONLINE:
				3687	return workqueue_cpu_callback(nfb, action, hcpu);
				3688	}
				3689	return NOTIFY_OK;
				3690	}
				3691
				3692	/*
				3693	* Workqueues should be brought down after normal priority CPU notifiers.
				3694	* This will be registered as low priority CPU notifier.
				3695	*/
				3696	static int __devinit workqueue_cpu_down_callback(struct notifier_block *nfb,
				3697	unsigned long action,
				3698	void *hcpu)
				3699	{
				3700	switch (action & ~CPU_TASKS_FROZEN) {
				3701	case CPU_DOWN_PREPARE:
				3702	case CPU_DYING:
				3703	case CPU_POST_DEAD:
				3704	return workqueue_cpu_callback(nfb, action, hcpu);
				3705	}
				3706	return NOTIFY_OK;
				3707	}
				3708
				3709	#ifdef CONFIG_SMP
				3710
				3711	struct work_for_cpu {
				3712	struct work_struct work;
				3713	long (fn)(void );
				3714	void *arg;
				3715	long ret;
				3716	};
				3717
				3718	static void work_for_cpu_fn(struct work_struct *work)
				3719	{
				3720	struct work_for_cpu *wfc = container_of(work, struct work_for_cpu, work);
				3721
				3722	wfc->ret = wfc->fn(wfc->arg);
				3723	}
				3724
				3725	/**
				3726	* work_on_cpu - run a function in user context on a particular cpu
				3727	* @cpu: the cpu to run on
				3728	* @fn: the function to run
				3729	* @arg: the function arg
				3730	*
				3731	* This will return the value @fn returns.
				3732	* It is up to the caller to ensure that the cpu doesn't go offline.
				3733	* The caller must not hold any locks which would prevent @fn from completing.
				3734	*/
				3735	long work_on_cpu(unsigned int cpu, long (fn)(void ), void *arg)
				3736	{
				3737	struct work_for_cpu wfc = { .fn = fn, .arg = arg };
				3738
				3739	INIT_WORK_ONSTACK(&wfc.work, work_for_cpu_fn);
				3740	schedule_work_on(cpu, &wfc.work);
				3741	flush_work(&wfc.work);
				3742	return wfc.ret;
				3743	}
				3744	EXPORT_SYMBOL_GPL(work_on_cpu);
				3745	#endif /* CONFIG_SMP */
				3746
				3747	#ifdef CONFIG_FREEZER
				3748
				3749	/**
				3750	* freeze_workqueues_begin - begin freezing workqueues
				3751	*
				3752	* Start freezing workqueues. After this function returns, all freezable
				3753	* workqueues will queue new works to their frozen_works list instead of
				3754	* gcwq->worklist.
				3755	*
				3756	* CONTEXT:
				3757	* Grabs and releases workqueue_lock and gcwq->lock's.
				3758	*/
				3759	void freeze_workqueues_begin(void)
				3760	{
				3761	unsigned int cpu;
				3762
				3763	spin_lock(&workqueue_lock);
				3764
				3765	BUG_ON(workqueue_freezing);
				3766	workqueue_freezing = true;
				3767
				3768	for_each_gcwq_cpu(cpu) {
				3769	struct global_cwq *gcwq = get_gcwq(cpu);
				3770	struct workqueue_struct *wq;
				3771
				3772	spin_lock_irq(&gcwq->lock);
				3773
				3774	BUG_ON(gcwq->flags & GCWQ_FREEZING);
				3775	gcwq->flags \|= GCWQ_FREEZING;
				3776
				3777	list_for_each_entry(wq, &workqueues, list) {
				3778	struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
				3779
				3780	if (cwq && wq->flags & WQ_FREEZABLE)
				3781	cwq->max_active = 0;
				3782	}
				3783
				3784	spin_unlock_irq(&gcwq->lock);
				3785	}
				3786
				3787	spin_unlock(&workqueue_lock);
				3788	}
				3789
				3790	/**
				3791	* freeze_workqueues_busy - are freezable workqueues still busy?
				3792	*
				3793	* Check whether freezing is complete. This function must be called
				3794	* between freeze_workqueues_begin() and thaw_workqueues().
				3795	*
				3796	* CONTEXT:
				3797	* Grabs and releases workqueue_lock.
				3798	*
				3799	* RETURNS:
				3800	* %true if some freezable workqueues are still busy. %false if freezing
				3801	* is complete.
				3802	*/
				3803	bool freeze_workqueues_busy(void)
				3804	{
				3805	unsigned int cpu;
				3806	bool busy = false;
				3807
				3808	spin_lock(&workqueue_lock);
				3809
				3810	BUG_ON(!workqueue_freezing);
				3811
				3812	for_each_gcwq_cpu(cpu) {
				3813	struct workqueue_struct *wq;
				3814	/*
				3815	* nr_active is monotonically decreasing. It's safe
				3816	* to peek without lock.
				3817	*/
				3818	list_for_each_entry(wq, &workqueues, list) {
				3819	struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
				3820
				3821	if (!cwq \|\| !(wq->flags & WQ_FREEZABLE))
				3822	continue;
				3823
				3824	BUG_ON(cwq->nr_active < 0);
				3825	if (cwq->nr_active) {
				3826	busy = true;
				3827	goto out_unlock;
				3828	}
				3829	}
				3830	}
				3831	out_unlock:
				3832	spin_unlock(&workqueue_lock);
				3833	return busy;
				3834	}
				3835
				3836	/**
				3837	* thaw_workqueues - thaw workqueues
				3838	*
				3839	* Thaw workqueues. Normal queueing is restored and all collected
				3840	* frozen works are transferred to their respective gcwq worklists.
				3841	*
				3842	* CONTEXT:
				3843	* Grabs and releases workqueue_lock and gcwq->lock's.
				3844	*/
				3845	void thaw_workqueues(void)
				3846	{
				3847	unsigned int cpu;
				3848
				3849	spin_lock(&workqueue_lock);
				3850
				3851	if (!workqueue_freezing)
				3852	goto out_unlock;
				3853
				3854	for_each_gcwq_cpu(cpu) {
				3855	struct global_cwq *gcwq = get_gcwq(cpu);
				3856	struct workqueue_struct *wq;
				3857
				3858	spin_lock_irq(&gcwq->lock);
				3859
				3860	BUG_ON(!(gcwq->flags & GCWQ_FREEZING));
				3861	gcwq->flags &= ~GCWQ_FREEZING;
				3862
				3863	list_for_each_entry(wq, &workqueues, list) {
				3864	struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
				3865
				3866	if (!cwq \|\| !(wq->flags & WQ_FREEZABLE))
				3867	continue;
				3868
				3869	/* restore max_active and repopulate worklist */
				3870	cwq->max_active = wq->saved_max_active;
				3871
				3872	while (!list_empty(&cwq->delayed_works) &&
				3873	cwq->nr_active < cwq->max_active)
				3874	cwq_activate_first_delayed(cwq);
				3875	}
				3876
				3877	wake_up_worker(gcwq);
				3878
				3879	spin_unlock_irq(&gcwq->lock);
				3880	}
				3881
				3882	workqueue_freezing = false;
				3883	out_unlock:
				3884	spin_unlock(&workqueue_lock);
				3885	}
				3886	#endif /* CONFIG_FREEZER */
				3887
				3888	static int __init init_workqueues(void)
				3889	{
				3890	unsigned int cpu;
				3891	int i;
				3892
				3893	cpu_notifier(workqueue_cpu_up_callback, CPU_PRI_WORKQUEUE_UP);
				3894	cpu_notifier(workqueue_cpu_down_callback, CPU_PRI_WORKQUEUE_DOWN);
				3895
				3896	/* initialize gcwqs */
				3897	for_each_gcwq_cpu(cpu) {
				3898	struct global_cwq *gcwq = get_gcwq(cpu);
				3899
				3900	spin_lock_init(&gcwq->lock);
				3901	INIT_LIST_HEAD(&gcwq->worklist);
				3902	gcwq->cpu = cpu;
				3903	gcwq->flags \|= GCWQ_DISASSOCIATED;
				3904
				3905	INIT_LIST_HEAD(&gcwq->idle_list);
				3906	for (i = 0; i < BUSY_WORKER_HASH_SIZE; i++)
				3907	INIT_HLIST_HEAD(&gcwq->busy_hash[i]);
				3908
				3909	init_timer_deferrable(&gcwq->idle_timer);
				3910	gcwq->idle_timer.function = idle_worker_timeout;
				3911	gcwq->idle_timer.data = (unsigned long)gcwq;
				3912
				3913	setup_timer(&gcwq->mayday_timer, gcwq_mayday_timeout,
				3914	(unsigned long)gcwq);
				3915
				3916	ida_init(&gcwq->worker_ida);
				3917
				3918	gcwq->trustee_state = TRUSTEE_DONE;
				3919	init_waitqueue_head(&gcwq->trustee_wait);
				3920	}
				3921
				3922	/* create the initial worker */
				3923	for_each_online_gcwq_cpu(cpu) {
				3924	struct global_cwq *gcwq = get_gcwq(cpu);
				3925	struct worker *worker;
				3926
				3927	if (cpu != WORK_CPU_UNBOUND)
				3928	gcwq->flags &= ~GCWQ_DISASSOCIATED;
				3929	worker = create_worker(gcwq, true);
				3930	BUG_ON(!worker);
				3931	spin_lock_irq(&gcwq->lock);
				3932	start_worker(worker);
				3933	spin_unlock_irq(&gcwq->lock);
				3934	}
				3935
				3936	system_wq = alloc_workqueue("events", 0, 0);
				3937	system_long_wq = alloc_workqueue("events_long", 0, 0);
				3938	system_nrt_wq = alloc_workqueue("events_nrt", WQ_NON_REENTRANT, 0);
				3939	system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND,
				3940	WQ_UNBOUND_MAX_ACTIVE);
				3941	system_freezable_wq = alloc_workqueue("events_freezable",
				3942	WQ_FREEZABLE, 0);
				3943	system_nrt_freezable_wq = alloc_workqueue("events_nrt_freezable",
				3944	WQ_NON_REENTRANT \| WQ_FREEZABLE, 0);
				3945	BUG_ON(!system_wq \|\| !system_long_wq \|\| !system_nrt_wq \|\|
				3946	!system_unbound_wq \|\| !system_freezable_wq \|\|
				3947	!system_nrt_freezable_wq);
				3948	return 0;
				3949	}
				3950	early_initcall(init_workqueues);