Blame - ap/os/linux/linux-3.4.x/kernel/rcutree.c - T106_DC

blob: 055268b44f8bdd0e262a50a6aa21e6dd2c4ecda5 [file] [log] [blame]

lh	9ed821d	2023-04-07 01:36:19 -0700	[diff] [blame^]	1	/*
				2	* Read-Copy Update mechanism for mutual exclusion
				3	*
				4	* This program is free software; you can redistribute it and/or modify
				5	* it under the terms of the GNU General Public License as published by
				6	* the Free Software Foundation; either version 2 of the License, or
				7	* (at your option) any later version.
				8	*
				9	* This program is distributed in the hope that it will be useful,
				10	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				11	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				12	* GNU General Public License for more details.
				13	*
				14	* You should have received a copy of the GNU General Public License
				15	* along with this program; if not, write to the Free Software
				16	* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
				17	*
				18	* Copyright IBM Corporation, 2008
				19	*
				20	* Authors: Dipankar Sarma <dipankar@in.ibm.com>
				21	* Manfred Spraul <manfred@colorfullife.com>
				22	* Paul E. McKenney <paulmck@linux.vnet.ibm.com> Hierarchical version
				23	*
				24	* Based on the original work by Paul McKenney <paulmck@us.ibm.com>
				25	* and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
				26	*
				27	* For detailed explanation of Read-Copy Update mechanism see -
				28	* Documentation/RCU
				29	*/
				30	#include <linux/types.h>
				31	#include <linux/kernel.h>
				32	#include <linux/init.h>
				33	#include <linux/spinlock.h>
				34	#include <linux/smp.h>
				35	#include <linux/rcupdate.h>
				36	#include <linux/interrupt.h>
				37	#include <linux/sched.h>
				38	#include <linux/nmi.h>
				39	#include <linux/atomic.h>
				40	#include <linux/bitops.h>
				41	#include <linux/export.h>
				42	#include <linux/completion.h>
				43	#include <linux/moduleparam.h>
				44	#include <linux/percpu.h>
				45	#include <linux/notifier.h>
				46	#include <linux/cpu.h>
				47	#include <linux/mutex.h>
				48	#include <linux/time.h>
				49	#include <linux/kernel_stat.h>
				50	#include <linux/wait.h>
				51	#include <linux/kthread.h>
				52	#include <linux/prefetch.h>
				53	#include <linux/delay.h>
				54	#include <linux/stop_machine.h>
				55
				56	#include "rcutree.h"
				57	#include <trace/events/rcu.h>
				58
				59	#include "rcu.h"
				60
				61	/* Data structures. */
				62
				63	static struct lock_class_key rcu_node_class[NUM_RCU_LVLS];
				64
				65	#define RCU_STATE_INITIALIZER(structname) { \
				66	.level = { &structname##_state.node[0] }, \
				67	.levelcnt = { \
				68	NUM_RCU_LVL_0, /* root of hierarchy. */ \
				69	NUM_RCU_LVL_1, \
				70	NUM_RCU_LVL_2, \
				71	NUM_RCU_LVL_3, \
				72	NUM_RCU_LVL_4, /* == MAX_RCU_LVLS */ \
				73	}, \
				74	.fqs_state = RCU_GP_IDLE, \
				75	.gpnum = -300, \
				76	.completed = -300, \
				77	.onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.onofflock), \
				78	.fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.fqslock), \
				79	.n_force_qs = 0, \
				80	.n_force_qs_ngp = 0, \
				81	.name = #structname, \
				82	}
				83
				84	struct rcu_state rcu_sched_state = RCU_STATE_INITIALIZER(rcu_sched);
				85	DEFINE_PER_CPU(struct rcu_data, rcu_sched_data);
				86
				87	struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh);
				88	DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
				89
				90	static struct rcu_state *rcu_state;
				91
				92	/*
				93	* The rcu_scheduler_active variable transitions from zero to one just
				94	* before the first task is spawned. So when this variable is zero, RCU
				95	* can assume that there is but one task, allowing RCU to (for example)
				96	* optimized synchronize_sched() to a simple barrier(). When this variable
				97	* is one, RCU must actually do all the hard work required to detect real
				98	* grace periods. This variable is also used to suppress boot-time false
				99	* positives from lockdep-RCU error checking.
				100	*/
				101	int rcu_scheduler_active __read_mostly;
				102	EXPORT_SYMBOL_GPL(rcu_scheduler_active);
				103
				104	/*
				105	* The rcu_scheduler_fully_active variable transitions from zero to one
				106	* during the early_initcall() processing, which is after the scheduler
				107	* is capable of creating new tasks. So RCU processing (for example,
				108	* creating tasks for RCU priority boosting) must be delayed until after
				109	* rcu_scheduler_fully_active transitions from zero to one. We also
				110	* currently delay invocation of any RCU callbacks until after this point.
				111	*
				112	* It might later prove better for people registering RCU callbacks during
				113	* early boot to take responsibility for these callbacks, but one step at
				114	* a time.
				115	*/
				116	static int rcu_scheduler_fully_active __read_mostly;
				117
				118	#ifdef CONFIG_RCU_BOOST
				119
				120	/*
				121	* Control variables for per-CPU and per-rcu_node kthreads. These
				122	* handle all flavors of RCU.
				123	*/
				124	static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task);
				125	DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
				126	DEFINE_PER_CPU(int, rcu_cpu_kthread_cpu);
				127	DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
				128	DEFINE_PER_CPU(char, rcu_cpu_has_work);
				129
				130	#endif /* #ifdef CONFIG_RCU_BOOST */
				131
				132	static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu);
				133	static void invoke_rcu_core(void);
				134	static void invoke_rcu_callbacks(struct rcu_state rsp, struct rcu_data rdp);
				135
				136	/*
				137	* Track the rcutorture test sequence number and the update version
				138	* number within a given test. The rcutorture_testseq is incremented
				139	* on every rcutorture module load and unload, so has an odd value
				140	* when a test is running. The rcutorture_vernum is set to zero
				141	* when rcutorture starts and is incremented on each rcutorture update.
				142	* These variables enable correlating rcutorture output with the
				143	* RCU tracing information.
				144	*/
				145	unsigned long rcutorture_testseq;
				146	unsigned long rcutorture_vernum;
				147
				148	/*
				149	* Return true if an RCU grace period is in progress. The ACCESS_ONCE()s
				150	* permit this function to be invoked without holding the root rcu_node
				151	* structure's ->lock, but of course results can be subject to change.
				152	*/
				153	static int rcu_gp_in_progress(struct rcu_state *rsp)
				154	{
				155	return ACCESS_ONCE(rsp->completed) != ACCESS_ONCE(rsp->gpnum);
				156	}
				157
				158	/*
				159	* Note a quiescent state. Because we do not need to know
				160	* how many quiescent states passed, just if there was at least
				161	* one since the start of the grace period, this just sets a flag.
				162	* The caller must have disabled preemption.
				163	*/
				164	void rcu_sched_qs(int cpu)
				165	{
				166	struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu);
				167
				168	rdp->passed_quiesce_gpnum = rdp->gpnum;
				169	barrier();
				170	if (rdp->passed_quiesce == 0)
				171	trace_rcu_grace_period("rcu_sched", rdp->gpnum, "cpuqs");
				172	rdp->passed_quiesce = 1;
				173	}
				174
				175	#ifdef CONFIG_PREEMPT_RT_FULL
				176	static void rcu_preempt_qs(int cpu);
				177
				178	void rcu_bh_qs(int cpu)
				179	{
				180	unsigned long flags;
				181
				182	/* Callers to this function, rcu_preempt_qs(), must disable irqs. */
				183	local_irq_save(flags);
				184	rcu_preempt_qs(cpu);
				185	local_irq_restore(flags);
				186	}
				187	#else
				188	void rcu_bh_qs(int cpu)
				189	{
				190	struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu);
				191
				192	rdp->passed_quiesce_gpnum = rdp->gpnum;
				193	barrier();
				194	if (rdp->passed_quiesce == 0)
				195	trace_rcu_grace_period("rcu_bh", rdp->gpnum, "cpuqs");
				196	rdp->passed_quiesce = 1;
				197	}
				198	#endif
				199
				200	/*
				201	* Note a context switch. This is a quiescent state for RCU-sched,
				202	* and requires special handling for preemptible RCU.
				203	* The caller must have disabled preemption.
				204	*/
				205	void rcu_note_context_switch(int cpu)
				206	{
				207	trace_rcu_utilization("Start context switch");
				208	rcu_sched_qs(cpu);
				209	rcu_preempt_note_context_switch(cpu);
				210	trace_rcu_utilization("End context switch");
				211	}
				212	EXPORT_SYMBOL_GPL(rcu_note_context_switch);
				213
				214	DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
				215	.dynticks_nesting = DYNTICK_TASK_EXIT_IDLE,
				216	.dynticks = ATOMIC_INIT(1),
				217	};
				218
				219	static long blimit = 10; /* Maximum callbacks per rcu_do_batch. */
				220	static long qhimark = 10000; /* If this many pending, ignore blimit. */
				221	static long qlowmark = 100; /* Once only this many pending, use blimit. */
				222
				223	module_param(blimit, long, 0);
				224	module_param(qhimark, long, 0);
				225	module_param(qlowmark, long, 0);
				226
				227	int rcu_cpu_stall_suppress __read_mostly; /* 1 = suppress stall warnings. */
				228	int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT;
				229
				230	module_param(rcu_cpu_stall_suppress, int, 0644);
				231	module_param(rcu_cpu_stall_timeout, int, 0644);
				232
				233	static void force_quiescent_state(struct rcu_state *rsp, int relaxed);
				234	static int rcu_pending(int cpu);
				235
				236	/*
				237	* Return the number of RCU-sched batches processed thus far for debug & stats.
				238	*/
				239	long rcu_batches_completed_sched(void)
				240	{
				241	return rcu_sched_state.completed;
				242	}
				243	EXPORT_SYMBOL_GPL(rcu_batches_completed_sched);
				244
				245	#ifndef CONFIG_PREEMPT_RT_FULL
				246	/*
				247	* Return the number of RCU BH batches processed thus far for debug & stats.
				248	*/
				249	long rcu_batches_completed_bh(void)
				250	{
				251	return rcu_bh_state.completed;
				252	}
				253	EXPORT_SYMBOL_GPL(rcu_batches_completed_bh);
				254
				255	/*
				256	* Force a quiescent state for RCU BH.
				257	*/
				258	void rcu_bh_force_quiescent_state(void)
				259	{
				260	force_quiescent_state(&rcu_bh_state, 0);
				261	}
				262	EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state);
				263	#endif
				264
				265	/*
				266	* Record the number of times rcutorture tests have been initiated and
				267	* terminated. This information allows the debugfs tracing stats to be
				268	* correlated to the rcutorture messages, even when the rcutorture module
				269	* is being repeatedly loaded and unloaded. In other words, we cannot
				270	* store this state in rcutorture itself.
				271	*/
				272	void rcutorture_record_test_transition(void)
				273	{
				274	rcutorture_testseq++;
				275	rcutorture_vernum = 0;
				276	}
				277	EXPORT_SYMBOL_GPL(rcutorture_record_test_transition);
				278
				279	/*
				280	* Record the number of writer passes through the current rcutorture test.
				281	* This is also used to correlate debugfs tracing stats with the rcutorture
				282	* messages.
				283	*/
				284	void rcutorture_record_progress(unsigned long vernum)
				285	{
				286	rcutorture_vernum++;
				287	}
				288	EXPORT_SYMBOL_GPL(rcutorture_record_progress);
				289
				290	/*
				291	* Force a quiescent state for RCU-sched.
				292	*/
				293	void rcu_sched_force_quiescent_state(void)
				294	{
				295	force_quiescent_state(&rcu_sched_state, 0);
				296	}
				297	EXPORT_SYMBOL_GPL(rcu_sched_force_quiescent_state);
				298
				299	/*
				300	* Does the CPU have callbacks ready to be invoked?
				301	*/
				302	static int
				303	cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp)
				304	{
				305	return &rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL];
				306	}
				307
				308	/*
				309	* Does the current CPU require a yet-as-unscheduled grace period?
				310	*/
				311	static int
				312	cpu_needs_another_gp(struct rcu_state rsp, struct rcu_data rdp)
				313	{
				314	return *rdp->nxttail[RCU_DONE_TAIL +
				315	ACCESS_ONCE(rsp->completed) != rdp->completed] &&
				316	!rcu_gp_in_progress(rsp);
				317	}
				318
				319	/*
				320	* Return the root node of the specified rcu_state structure.
				321	*/
				322	static struct rcu_node rcu_get_root(struct rcu_state rsp)
				323	{
				324	return &rsp->node[0];
				325	}
				326
				327	/*
				328	* If the specified CPU is offline, tell the caller that it is in
				329	* a quiescent state. Otherwise, whack it with a reschedule IPI.
				330	* Grace periods can end up waiting on an offline CPU when that
				331	* CPU is in the process of coming online -- it will be added to the
				332	* rcu_node bitmasks before it actually makes it online. The same thing
				333	* can happen while a CPU is in the process of coming online. Because this
				334	* race is quite rare, we check for it after detecting that the grace
				335	* period has been delayed rather than checking each and every CPU
				336	* each and every time we start a new grace period.
				337	*/
				338	static int rcu_implicit_offline_qs(struct rcu_data *rdp)
				339	{
				340	/*
				341	* If the CPU is offline for more than a jiffy, it is in a quiescent
				342	* state. We can trust its state not to change because interrupts
				343	* are disabled. The reason for the jiffy's worth of slack is to
				344	* handle CPUs initializing on the way up and finding their way
				345	* to the idle loop on the way down.
				346	*/
				347	if (cpu_is_offline(rdp->cpu) &&
				348	ULONG_CMP_LT(rdp->rsp->gp_start + 2, jiffies)) {
				349	trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "ofl");
				350	rdp->offline_fqs++;
				351	return 1;
				352	}
				353	return 0;
				354	}
				355
				356	/*
				357	* rcu_idle_enter_common - inform RCU that current CPU is moving towards idle
				358	*
				359	* If the new value of the ->dynticks_nesting counter now is zero,
				360	* we really have entered idle, and must do the appropriate accounting.
				361	* The caller must have disabled interrupts.
				362	*/
				363	static void rcu_idle_enter_common(struct rcu_dynticks *rdtp, long long oldval)
				364	{
				365	trace_rcu_dyntick("Start", oldval, 0);
				366	if (!is_idle_task(current)) {
				367	struct task_struct *idle = idle_task(smp_processor_id());
				368
				369	trace_rcu_dyntick("Error on entry: not idle task", oldval, 0);
				370	ftrace_dump(DUMP_ALL);
				371	WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
				372	current->pid, current->comm,
				373	idle->pid, idle->comm); /* must be idle task! */
				374	}
				375	rcu_prepare_for_idle(smp_processor_id());
				376	/* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
				377	smp_mb__before_atomic_inc(); /* See above. */
				378	atomic_inc(&rdtp->dynticks);
				379	smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */
				380	WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
				381
				382	/*
				383	* The idle task is not permitted to enter the idle loop while
				384	* in an RCU read-side critical section.
				385	*/
				386	rcu_lockdep_assert(!lock_is_held(&rcu_lock_map),
				387	"Illegal idle entry in RCU read-side critical section.");
				388	rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map),
				389	"Illegal idle entry in RCU-bh read-side critical section.");
				390	rcu_lockdep_assert(!lock_is_held(&rcu_sched_lock_map),
				391	"Illegal idle entry in RCU-sched read-side critical section.");
				392	}
				393
				394	/**
				395	* rcu_idle_enter - inform RCU that current CPU is entering idle
				396	*
				397	* Enter idle mode, in other words, -leave- the mode in which RCU
				398	* read-side critical sections can occur. (Though RCU read-side
				399	* critical sections can occur in irq handlers in idle, a possibility
				400	* handled by irq_enter() and irq_exit().)
				401	*
				402	* We crowbar the ->dynticks_nesting field to zero to allow for
				403	* the possibility of usermode upcalls having messed up our count
				404	* of interrupt nesting level during the prior busy period.
				405	*/
				406	void rcu_idle_enter(void)
				407	{
				408	unsigned long flags;
				409	long long oldval;
				410	struct rcu_dynticks *rdtp;
				411
				412	local_irq_save(flags);
				413	rdtp = &__get_cpu_var(rcu_dynticks);
				414	oldval = rdtp->dynticks_nesting;
				415	WARN_ON_ONCE((oldval & DYNTICK_TASK_NEST_MASK) == 0);
				416	if ((oldval & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE)
				417	rdtp->dynticks_nesting = 0;
				418	else
				419	rdtp->dynticks_nesting -= DYNTICK_TASK_NEST_VALUE;
				420	rcu_idle_enter_common(rdtp, oldval);
				421	local_irq_restore(flags);
				422	}
				423	EXPORT_SYMBOL_GPL(rcu_idle_enter);
				424
				425	/**
				426	* rcu_irq_exit - inform RCU that current CPU is exiting irq towards idle
				427	*
				428	* Exit from an interrupt handler, which might possibly result in entering
				429	* idle mode, in other words, leaving the mode in which read-side critical
				430	* sections can occur.
				431	*
				432	* This code assumes that the idle loop never does anything that might
				433	* result in unbalanced calls to irq_enter() and irq_exit(). If your
				434	* architecture violates this assumption, RCU will give you what you
				435	* deserve, good and hard. But very infrequently and irreproducibly.
				436	*
				437	* Use things like work queues to work around this limitation.
				438	*
				439	* You have been warned.
				440	*/
				441	void rcu_irq_exit(void)
				442	{
				443	unsigned long flags;
				444	long long oldval;
				445	struct rcu_dynticks *rdtp;
				446
				447	local_irq_save(flags);
				448	rdtp = &__get_cpu_var(rcu_dynticks);
				449	oldval = rdtp->dynticks_nesting;
				450	rdtp->dynticks_nesting--;
				451	WARN_ON_ONCE(rdtp->dynticks_nesting < 0);
				452	if (rdtp->dynticks_nesting)
				453	trace_rcu_dyntick("--=", oldval, rdtp->dynticks_nesting);
				454	else
				455	rcu_idle_enter_common(rdtp, oldval);
				456	local_irq_restore(flags);
				457	}
				458
				459	/*
				460	* rcu_idle_exit_common - inform RCU that current CPU is moving away from idle
				461	*
				462	* If the new value of the ->dynticks_nesting counter was previously zero,
				463	* we really have exited idle, and must do the appropriate accounting.
				464	* The caller must have disabled interrupts.
				465	*/
				466	static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval)
				467	{
				468	smp_mb__before_atomic_inc(); /* Force ordering w/previous sojourn. */
				469	atomic_inc(&rdtp->dynticks);
				470	/* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
				471	smp_mb__after_atomic_inc(); /* See above. */
				472	WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
				473	rcu_cleanup_after_idle(smp_processor_id());
				474	trace_rcu_dyntick("End", oldval, rdtp->dynticks_nesting);
				475	if (!is_idle_task(current)) {
				476	struct task_struct *idle = idle_task(smp_processor_id());
				477
				478	trace_rcu_dyntick("Error on exit: not idle task",
				479	oldval, rdtp->dynticks_nesting);
				480	ftrace_dump(DUMP_ALL);
				481	WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
				482	current->pid, current->comm,
				483	idle->pid, idle->comm); /* must be idle task! */
				484	}
				485	}
				486
				487	/**
				488	* rcu_idle_exit - inform RCU that current CPU is leaving idle
				489	*
				490	* Exit idle mode, in other words, -enter- the mode in which RCU
				491	* read-side critical sections can occur.
				492	*
				493	* We crowbar the ->dynticks_nesting field to DYNTICK_TASK_NEST to
				494	* allow for the possibility of usermode upcalls messing up our count
				495	* of interrupt nesting level during the busy period that is just
				496	* now starting.
				497	*/
				498	void rcu_idle_exit(void)
				499	{
				500	unsigned long flags;
				501	struct rcu_dynticks *rdtp;
				502	long long oldval;
				503
				504	local_irq_save(flags);
				505	rdtp = &__get_cpu_var(rcu_dynticks);
				506	oldval = rdtp->dynticks_nesting;
				507	WARN_ON_ONCE(oldval < 0);
				508	if (oldval & DYNTICK_TASK_NEST_MASK)
				509	rdtp->dynticks_nesting += DYNTICK_TASK_NEST_VALUE;
				510	else
				511	rdtp->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
				512	rcu_idle_exit_common(rdtp, oldval);
				513	local_irq_restore(flags);
				514	}
				515	EXPORT_SYMBOL_GPL(rcu_idle_exit);
				516
				517	/**
				518	* rcu_irq_enter - inform RCU that current CPU is entering irq away from idle
				519	*
				520	* Enter an interrupt handler, which might possibly result in exiting
				521	* idle mode, in other words, entering the mode in which read-side critical
				522	* sections can occur.
				523	*
				524	* Note that the Linux kernel is fully capable of entering an interrupt
				525	* handler that it never exits, for example when doing upcalls to
				526	* user mode! This code assumes that the idle loop never does upcalls to
				527	* user mode. If your architecture does do upcalls from the idle loop (or
				528	* does anything else that results in unbalanced calls to the irq_enter()
				529	* and irq_exit() functions), RCU will give you what you deserve, good
				530	* and hard. But very infrequently and irreproducibly.
				531	*
				532	* Use things like work queues to work around this limitation.
				533	*
				534	* You have been warned.
				535	*/
				536	void rcu_irq_enter(void)
				537	{
				538	unsigned long flags;
				539	struct rcu_dynticks *rdtp;
				540	long long oldval;
				541
				542	local_irq_save(flags);
				543	rdtp = &__get_cpu_var(rcu_dynticks);
				544	oldval = rdtp->dynticks_nesting;
				545	rdtp->dynticks_nesting++;
				546	WARN_ON_ONCE(rdtp->dynticks_nesting == 0);
				547	if (oldval)
				548	trace_rcu_dyntick("++=", oldval, rdtp->dynticks_nesting);
				549	else
				550	rcu_idle_exit_common(rdtp, oldval);
				551	local_irq_restore(flags);
				552	}
				553
				554	/**
				555	* rcu_nmi_enter - inform RCU of entry to NMI context
				556	*
				557	* If the CPU was idle with dynamic ticks active, and there is no
				558	* irq handler running, this updates rdtp->dynticks_nmi to let the
				559	* RCU grace-period handling know that the CPU is active.
				560	*/
				561	void rcu_nmi_enter(void)
				562	{
				563	struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks);
				564
				565	if (rdtp->dynticks_nmi_nesting == 0 &&
				566	(atomic_read(&rdtp->dynticks) & 0x1))
				567	return;
				568	rdtp->dynticks_nmi_nesting++;
				569	smp_mb__before_atomic_inc(); /* Force delay from prior write. */
				570	atomic_inc(&rdtp->dynticks);
				571	/* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
				572	smp_mb__after_atomic_inc(); /* See above. */
				573	WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
				574	}
				575
				576	/**
				577	* rcu_nmi_exit - inform RCU of exit from NMI context
				578	*
				579	* If the CPU was idle with dynamic ticks active, and there is no
				580	* irq handler running, this updates rdtp->dynticks_nmi to let the
				581	* RCU grace-period handling know that the CPU is no longer active.
				582	*/
				583	void rcu_nmi_exit(void)
				584	{
				585	struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks);
				586
				587	if (rdtp->dynticks_nmi_nesting == 0 \|\|
				588	--rdtp->dynticks_nmi_nesting != 0)
				589	return;
				590	/* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
				591	smp_mb__before_atomic_inc(); /* See above. */
				592	atomic_inc(&rdtp->dynticks);
				593	smp_mb__after_atomic_inc(); /* Force delay to next write. */
				594	WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
				595	}
				596
				597	#ifdef CONFIG_PROVE_RCU
				598
				599	/**
				600	* rcu_is_cpu_idle - see if RCU thinks that the current CPU is idle
				601	*
				602	* If the current CPU is in its idle loop and is neither in an interrupt
				603	* or NMI handler, return true.
				604	*/
				605	int rcu_is_cpu_idle(void)
				606	{
				607	int ret;
				608
				609	preempt_disable();
				610	ret = (atomic_read(&__get_cpu_var(rcu_dynticks).dynticks) & 0x1) == 0;
				611	preempt_enable();
				612	return ret;
				613	}
				614	EXPORT_SYMBOL(rcu_is_cpu_idle);
				615
				616	#ifdef CONFIG_HOTPLUG_CPU
				617
				618	/*
				619	* Is the current CPU online? Disable preemption to avoid false positives
				620	* that could otherwise happen due to the current CPU number being sampled,
				621	* this task being preempted, its old CPU being taken offline, resuming
				622	* on some other CPU, then determining that its old CPU is now offline.
				623	* It is OK to use RCU on an offline processor during initial boot, hence
				624	* the check for rcu_scheduler_fully_active. Note also that it is OK
				625	* for a CPU coming online to use RCU for one jiffy prior to marking itself
				626	* online in the cpu_online_mask. Similarly, it is OK for a CPU going
				627	* offline to continue to use RCU for one jiffy after marking itself
				628	* offline in the cpu_online_mask. This leniency is necessary given the
				629	* non-atomic nature of the online and offline processing, for example,
				630	* the fact that a CPU enters the scheduler after completing the CPU_DYING
				631	* notifiers.
				632	*
				633	* This is also why RCU internally marks CPUs online during the
				634	* CPU_UP_PREPARE phase and offline during the CPU_DEAD phase.
				635	*
				636	* Disable checking if in an NMI handler because we cannot safely report
				637	* errors from NMI handlers anyway.
				638	*/
				639	bool rcu_lockdep_current_cpu_online(void)
				640	{
				641	struct rcu_data *rdp;
				642	struct rcu_node *rnp;
				643	bool ret;
				644
				645	if (in_nmi())
				646	return 1;
				647	preempt_disable();
				648	rdp = &__get_cpu_var(rcu_sched_data);
				649	rnp = rdp->mynode;
				650	ret = (rdp->grpmask & rnp->qsmaskinit) \|\|
				651	!rcu_scheduler_fully_active;
				652	preempt_enable();
				653	return ret;
				654	}
				655	EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online);
				656
				657	#endif /* #ifdef CONFIG_HOTPLUG_CPU */
				658
				659	#endif /* #ifdef CONFIG_PROVE_RCU */
				660
				661	/**
				662	* rcu_is_cpu_rrupt_from_idle - see if idle or immediately interrupted from idle
				663	*
				664	* If the current CPU is idle or running at a first-level (not nested)
				665	* interrupt from idle, return true. The caller must have at least
				666	* disabled preemption.
				667	*/
				668	int rcu_is_cpu_rrupt_from_idle(void)
				669	{
				670	return __get_cpu_var(rcu_dynticks).dynticks_nesting <= 1;
				671	}
				672
				673	/*
				674	* Snapshot the specified CPU's dynticks counter so that we can later
				675	* credit them with an implicit quiescent state. Return 1 if this CPU
				676	* is in dynticks idle mode, which is an extended quiescent state.
				677	*/
				678	static int dyntick_save_progress_counter(struct rcu_data *rdp)
				679	{
				680	rdp->dynticks_snap = atomic_add_return(0, &rdp->dynticks->dynticks);
				681	return (rdp->dynticks_snap & 0x1) == 0;
				682	}
				683
				684	/*
				685	* Return true if the specified CPU has passed through a quiescent
				686	* state by virtue of being in or having passed through an dynticks
				687	* idle state since the last call to dyntick_save_progress_counter()
				688	* for this same CPU.
				689	*/
				690	static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
				691	{
				692	unsigned int curr;
				693	unsigned int snap;
				694
				695	curr = (unsigned int)atomic_add_return(0, &rdp->dynticks->dynticks);
				696	snap = (unsigned int)rdp->dynticks_snap;
				697
				698	/*
				699	* If the CPU passed through or entered a dynticks idle phase with
				700	* no active irq/NMI handlers, then we can safely pretend that the CPU
				701	* already acknowledged the request to pass through a quiescent
				702	* state. Either way, that CPU cannot possibly be in an RCU
				703	* read-side critical section that started before the beginning
				704	* of the current RCU grace period.
				705	*/
				706	if ((curr & 0x1) == 0 \|\| UINT_CMP_GE(curr, snap + 2)) {
				707	trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "dti");
				708	rdp->dynticks_fqs++;
				709	return 1;
				710	}
				711
				712	/* Go check for the CPU being offline. */
				713	return rcu_implicit_offline_qs(rdp);
				714	}
				715
				716	static int jiffies_till_stall_check(void)
				717	{
				718	int till_stall_check = ACCESS_ONCE(rcu_cpu_stall_timeout);
				719
				720	/*
				721	* Limit check must be consistent with the Kconfig limits
				722	* for CONFIG_RCU_CPU_STALL_TIMEOUT.
				723	*/
				724	if (till_stall_check < 3) {
				725	ACCESS_ONCE(rcu_cpu_stall_timeout) = 3;
				726	till_stall_check = 3;
				727	} else if (till_stall_check > 300) {
				728	ACCESS_ONCE(rcu_cpu_stall_timeout) = 300;
				729	till_stall_check = 300;
				730	}
				731	return till_stall_check * HZ + RCU_STALL_DELAY_DELTA;
				732	}
				733
				734	static void record_gp_stall_check_time(struct rcu_state *rsp)
				735	{
				736	rsp->gp_start = jiffies;
				737	rsp->jiffies_stall = jiffies + jiffies_till_stall_check();
				738	}
				739
				740	static void print_other_cpu_stall(struct rcu_state *rsp)
				741	{
				742	int cpu;
				743	long delta;
				744	unsigned long flags;
				745	int ndetected;
				746	struct rcu_node *rnp = rcu_get_root(rsp);
				747
				748	/* Only let one CPU complain about others per time interval. */
				749
				750	raw_spin_lock_irqsave(&rnp->lock, flags);
				751	delta = jiffies - rsp->jiffies_stall;
				752	if (delta < RCU_STALL_RAT_DELAY \|\| !rcu_gp_in_progress(rsp)) {
				753	raw_spin_unlock_irqrestore(&rnp->lock, flags);
				754	return;
				755	}
				756	rsp->jiffies_stall = jiffies + 3 * jiffies_till_stall_check() + 3;
				757	raw_spin_unlock_irqrestore(&rnp->lock, flags);
				758
				759	/*
				760	* OK, time to rat on our buddy...
				761	* See Documentation/RCU/stallwarn.txt for info on how to debug
				762	* RCU CPU stall warnings.
				763	*/
				764	printk(KERN_ERR "INFO: %s detected stalls on CPUs/tasks:",
				765	rsp->name);
				766	print_cpu_stall_info_begin();
				767	rcu_for_each_leaf_node(rsp, rnp) {
				768	raw_spin_lock_irqsave(&rnp->lock, flags);
				769	ndetected += rcu_print_task_stall(rnp);
				770	raw_spin_unlock_irqrestore(&rnp->lock, flags);
				771	if (rnp->qsmask == 0)
				772	continue;
				773	for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++)
				774	if (rnp->qsmask & (1UL << cpu)) {
				775	print_cpu_stall_info(rsp, rnp->grplo + cpu);
				776	ndetected++;
				777	}
				778	}
				779
				780	/*
				781	* Now rat on any tasks that got kicked up to the root rcu_node
				782	* due to CPU offlining.
				783	*/
				784	rnp = rcu_get_root(rsp);
				785	raw_spin_lock_irqsave(&rnp->lock, flags);
				786	ndetected = rcu_print_task_stall(rnp);
				787	raw_spin_unlock_irqrestore(&rnp->lock, flags);
				788
				789	print_cpu_stall_info_end();
				790	printk(KERN_CONT "(detected by %d, t=%ld jiffies)\n",
				791	smp_processor_id(), (long)(jiffies - rsp->gp_start));
				792	if (ndetected == 0)
				793	printk(KERN_ERR "INFO: Stall ended before state dump start\n");
				794	else if (!trigger_all_cpu_backtrace())
				795	dump_stack();
				796
				797	/* If so configured, complain about tasks blocking the grace period. */
				798
				799	rcu_print_detail_task_stall(rsp);
				800
				801	force_quiescent_state(rsp, 0); /* Kick them all. */
				802	}
				803
				804	static void print_cpu_stall(struct rcu_state *rsp)
				805	{
				806	unsigned long flags;
				807	struct rcu_node *rnp = rcu_get_root(rsp);
				808
				809	/*
				810	* OK, time to rat on ourselves...
				811	* See Documentation/RCU/stallwarn.txt for info on how to debug
				812	* RCU CPU stall warnings.
				813	*/
				814	printk(KERN_ERR "INFO: %s self-detected stall on CPU", rsp->name);
				815	print_cpu_stall_info_begin();
				816	print_cpu_stall_info(rsp, smp_processor_id());
				817	print_cpu_stall_info_end();
				818	printk(KERN_CONT " (t=%lu jiffies)\n", jiffies - rsp->gp_start);
				819	if (!trigger_all_cpu_backtrace())
				820	dump_stack();
				821
				822	raw_spin_lock_irqsave(&rnp->lock, flags);
				823	if (ULONG_CMP_GE(jiffies, rsp->jiffies_stall))
				824	rsp->jiffies_stall = jiffies +
				825	3 * jiffies_till_stall_check() + 3;
				826	raw_spin_unlock_irqrestore(&rnp->lock, flags);
				827
				828	set_need_resched(); /* kick ourselves to get things going. */
				829	}
				830
				831	static void check_cpu_stall(struct rcu_state rsp, struct rcu_data rdp)
				832	{
				833	unsigned long j;
				834	unsigned long js;
				835	struct rcu_node *rnp;
				836
				837	if (rcu_cpu_stall_suppress)
				838	return;
				839	j = ACCESS_ONCE(jiffies);
				840	js = ACCESS_ONCE(rsp->jiffies_stall);
				841	rnp = rdp->mynode;
				842	if ((ACCESS_ONCE(rnp->qsmask) & rdp->grpmask) && ULONG_CMP_GE(j, js)) {
				843
				844	/* We haven't checked in, so go dump stack. */
				845	print_cpu_stall(rsp);
				846
				847	} else if (rcu_gp_in_progress(rsp) &&
				848	ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY)) {
				849
				850	/* They had a few time units to dump stack, so complain. */
				851	print_other_cpu_stall(rsp);
				852	}
				853	}
				854
				855	static int rcu_panic(struct notifier_block this, unsigned long ev, void ptr)
				856	{
				857	rcu_cpu_stall_suppress = 1;
				858	return NOTIFY_DONE;
				859	}
				860
				861	/**
				862	* rcu_cpu_stall_reset - prevent further stall warnings in current grace period
				863	*
				864	* Set the stall-warning timeout way off into the future, thus preventing
				865	* any RCU CPU stall-warning messages from appearing in the current set of
				866	* RCU grace periods.
				867	*
				868	* The caller must disable hard irqs.
				869	*/
				870	void rcu_cpu_stall_reset(void)
				871	{
				872	rcu_sched_state.jiffies_stall = jiffies + ULONG_MAX / 2;
				873	rcu_bh_state.jiffies_stall = jiffies + ULONG_MAX / 2;
				874	rcu_preempt_stall_reset();
				875	}
				876
				877	static struct notifier_block rcu_panic_block = {
				878	.notifier_call = rcu_panic,
				879	};
				880
				881	static void __init check_cpu_stall_init(void)
				882	{
				883	atomic_notifier_chain_register(&panic_notifier_list, &rcu_panic_block);
				884	}
				885
				886	/*
				887	* Update CPU-local rcu_data state to record the newly noticed grace period.
				888	* This is used both when we started the grace period and when we notice
				889	* that someone else started the grace period. The caller must hold the
				890	* ->lock of the leaf rcu_node structure corresponding to the current CPU,
				891	* and must have irqs disabled.
				892	*/
				893	static void __note_new_gpnum(struct rcu_state rsp, struct rcu_node rnp, struct rcu_data *rdp)
				894	{
				895	if (rdp->gpnum != rnp->gpnum) {
				896	/*
				897	* If the current grace period is waiting for this CPU,
				898	* set up to detect a quiescent state, otherwise don't
				899	* go looking for one.
				900	*/
				901	rdp->gpnum = rnp->gpnum;
				902	trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpustart");
				903	if (rnp->qsmask & rdp->grpmask) {
				904	rdp->qs_pending = 1;
				905	rdp->passed_quiesce = 0;
				906	} else
				907	rdp->qs_pending = 0;
				908	zero_cpu_stall_ticks(rdp);
				909	}
				910	}
				911
				912	static void note_new_gpnum(struct rcu_state rsp, struct rcu_data rdp)
				913	{
				914	unsigned long flags;
				915	struct rcu_node *rnp;
				916
				917	local_irq_save(flags);
				918	rnp = rdp->mynode;
				919	if (rdp->gpnum == ACCESS_ONCE(rnp->gpnum) \|\| /* outside lock. */
				920	!raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */
				921	local_irq_restore(flags);
				922	return;
				923	}
				924	__note_new_gpnum(rsp, rnp, rdp);
				925	raw_spin_unlock_irqrestore(&rnp->lock, flags);
				926	}
				927
				928	/*
				929	* Did someone else start a new RCU grace period start since we last
				930	* checked? Update local state appropriately if so. Must be called
				931	* on the CPU corresponding to rdp.
				932	*/
				933	static int
				934	check_for_new_grace_period(struct rcu_state rsp, struct rcu_data rdp)
				935	{
				936	unsigned long flags;
				937	int ret = 0;
				938
				939	local_irq_save(flags);
				940	if (rdp->gpnum != rsp->gpnum) {
				941	note_new_gpnum(rsp, rdp);
				942	ret = 1;
				943	}
				944	local_irq_restore(flags);
				945	return ret;
				946	}
				947
				948	/*
				949	* Advance this CPU's callbacks, but only if the current grace period
				950	* has ended. This may be called only from the CPU to whom the rdp
				951	* belongs. In addition, the corresponding leaf rcu_node structure's
				952	* ->lock must be held by the caller, with irqs disabled.
				953	*/
				954	static void
				955	__rcu_process_gp_end(struct rcu_state rsp, struct rcu_node rnp, struct rcu_data *rdp)
				956	{
				957	/* Did another grace period end? */
				958	if (rdp->completed != rnp->completed) {
				959
				960	/* Advance callbacks. No harm if list empty. */
				961	rdp->nxttail[RCU_DONE_TAIL] = rdp->nxttail[RCU_WAIT_TAIL];
				962	rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_READY_TAIL];
				963	rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
				964
				965	/* Remember that we saw this grace-period completion. */
				966	rdp->completed = rnp->completed;
				967	trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpuend");
				968
				969	/*
				970	* If we were in an extended quiescent state, we may have
				971	* missed some grace periods that others CPUs handled on
				972	* our behalf. Catch up with this state to avoid noting
				973	* spurious new grace periods. If another grace period
				974	* has started, then rnp->gpnum will have advanced, so
				975	* we will detect this later on.
				976	*/
				977	if (ULONG_CMP_LT(rdp->gpnum, rdp->completed))
				978	rdp->gpnum = rdp->completed;
				979
				980	/*
				981	* If RCU does not need a quiescent state from this CPU,
				982	* then make sure that this CPU doesn't go looking for one.
				983	*/
				984	if ((rnp->qsmask & rdp->grpmask) == 0)
				985	rdp->qs_pending = 0;
				986	}
				987	}
				988
				989	/*
				990	* Advance this CPU's callbacks, but only if the current grace period
				991	* has ended. This may be called only from the CPU to whom the rdp
				992	* belongs.
				993	*/
				994	static void
				995	rcu_process_gp_end(struct rcu_state rsp, struct rcu_data rdp)
				996	{
				997	unsigned long flags;
				998	struct rcu_node *rnp;
				999
				1000	local_irq_save(flags);
				1001	rnp = rdp->mynode;
				1002	if (rdp->completed == ACCESS_ONCE(rnp->completed) \|\| /* outside lock. */
				1003	!raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */
				1004	local_irq_restore(flags);
				1005	return;
				1006	}
				1007	__rcu_process_gp_end(rsp, rnp, rdp);
				1008	raw_spin_unlock_irqrestore(&rnp->lock, flags);
				1009	}
				1010
				1011	/*
				1012	* Do per-CPU grace-period initialization for running CPU. The caller
				1013	* must hold the lock of the leaf rcu_node structure corresponding to
				1014	* this CPU.
				1015	*/
				1016	static void
				1017	rcu_start_gp_per_cpu(struct rcu_state rsp, struct rcu_node rnp, struct rcu_data *rdp)
				1018	{
				1019	/* Prior grace period ended, so advance callbacks for current CPU. */
				1020	__rcu_process_gp_end(rsp, rnp, rdp);
				1021
				1022	/*
				1023	* Because this CPU just now started the new grace period, we know
				1024	* that all of its callbacks will be covered by this upcoming grace
				1025	* period, even the ones that were registered arbitrarily recently.
				1026	* Therefore, advance all outstanding callbacks to RCU_WAIT_TAIL.
				1027	*
				1028	* Other CPUs cannot be sure exactly when the grace period started.
				1029	* Therefore, their recently registered callbacks must pass through
				1030	* an additional RCU_NEXT_READY stage, so that they will be handled
				1031	* by the next RCU grace period.
				1032	*/
				1033	rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
				1034	rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
				1035
				1036	/* Set state so that this CPU will detect the next quiescent state. */
				1037	__note_new_gpnum(rsp, rnp, rdp);
				1038	}
				1039
				1040	/*
				1041	* Start a new RCU grace period if warranted, re-initializing the hierarchy
				1042	* in preparation for detecting the next grace period. The caller must hold
				1043	* the root node's ->lock, which is released before return. Hard irqs must
				1044	* be disabled.
				1045	*
				1046	* Note that it is legal for a dying CPU (which is marked as offline) to
				1047	* invoke this function. This can happen when the dying CPU reports its
				1048	* quiescent state.
				1049	*/
				1050	static void
				1051	rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
				1052	__releases(rcu_get_root(rsp)->lock)
				1053	{
				1054	struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
				1055	struct rcu_node *rnp = rcu_get_root(rsp);
				1056
				1057	if (!rcu_scheduler_fully_active \|\|
				1058	!cpu_needs_another_gp(rsp, rdp)) {
				1059	/*
				1060	* Either the scheduler hasn't yet spawned the first
				1061	* non-idle task or this CPU does not need another
				1062	* grace period. Either way, don't start a new grace
				1063	* period.
				1064	*/
				1065	raw_spin_unlock_irqrestore(&rnp->lock, flags);
				1066	return;
				1067	}
				1068
				1069	if (rsp->fqs_active) {
				1070	/*
				1071	* This CPU needs a grace period, but force_quiescent_state()
				1072	* is running. Tell it to start one on this CPU's behalf.
				1073	*/
				1074	rsp->fqs_need_gp = 1;
				1075	raw_spin_unlock_irqrestore(&rnp->lock, flags);
				1076	return;
				1077	}
				1078
				1079	/* Advance to a new grace period and initialize state. */
				1080	rsp->gpnum++;
				1081	trace_rcu_grace_period(rsp->name, rsp->gpnum, "start");
				1082	WARN_ON_ONCE(rsp->fqs_state == RCU_GP_INIT);
				1083	rsp->fqs_state = RCU_GP_INIT; /* Hold off force_quiescent_state. */
				1084	rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS;
				1085	record_gp_stall_check_time(rsp);
				1086	raw_spin_unlock(&rnp->lock); /* leave irqs disabled. */
				1087
				1088	/* Exclude any concurrent CPU-hotplug operations. */
				1089	raw_spin_lock(&rsp->onofflock); /* irqs already disabled. */
				1090
				1091	/*
				1092	* Set the quiescent-state-needed bits in all the rcu_node
				1093	* structures for all currently online CPUs in breadth-first
				1094	* order, starting from the root rcu_node structure. This
				1095	* operation relies on the layout of the hierarchy within the
				1096	* rsp->node[] array. Note that other CPUs will access only
				1097	* the leaves of the hierarchy, which still indicate that no
				1098	* grace period is in progress, at least until the corresponding
				1099	* leaf node has been initialized. In addition, we have excluded
				1100	* CPU-hotplug operations.
				1101	*
				1102	* Note that the grace period cannot complete until we finish
				1103	* the initialization process, as there will be at least one
				1104	* qsmask bit set in the root node until that time, namely the
				1105	* one corresponding to this CPU, due to the fact that we have
				1106	* irqs disabled.
				1107	*/
				1108	rcu_for_each_node_breadth_first(rsp, rnp) {
				1109	raw_spin_lock(&rnp->lock); /* irqs already disabled. */
				1110	rcu_preempt_check_blocked_tasks(rnp);
				1111	rnp->qsmask = rnp->qsmaskinit;
				1112	rnp->gpnum = rsp->gpnum;
				1113	rnp->completed = rsp->completed;
				1114	if (rnp == rdp->mynode)
				1115	rcu_start_gp_per_cpu(rsp, rnp, rdp);
				1116	rcu_preempt_boost_start_gp(rnp);
				1117	trace_rcu_grace_period_init(rsp->name, rnp->gpnum,
				1118	rnp->level, rnp->grplo,
				1119	rnp->grphi, rnp->qsmask);
				1120	raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
				1121	}
				1122
				1123	rnp = rcu_get_root(rsp);
				1124	raw_spin_lock(&rnp->lock); /* irqs already disabled. */
				1125	rsp->fqs_state = RCU_SIGNAL_INIT; /* force_quiescent_state now OK. */
				1126	raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
				1127	raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
				1128	}
				1129
				1130	/*
				1131	* Report a full set of quiescent states to the specified rcu_state
				1132	* data structure. This involves cleaning up after the prior grace
				1133	* period and letting rcu_start_gp() start up the next grace period
				1134	* if one is needed. Note that the caller must hold rnp->lock, as
				1135	* required by rcu_start_gp(), which will release it.
				1136	*/
				1137	static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
				1138	__releases(rcu_get_root(rsp)->lock)
				1139	{
				1140	unsigned long gp_duration;
				1141	struct rcu_node *rnp = rcu_get_root(rsp);
				1142	struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
				1143
				1144	WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
				1145
				1146	/*
				1147	* Ensure that all grace-period and pre-grace-period activity
				1148	* is seen before the assignment to rsp->completed.
				1149	*/
				1150	smp_mb(); /* See above block comment. */
				1151	gp_duration = jiffies - rsp->gp_start;
				1152	if (gp_duration > rsp->gp_max)
				1153	rsp->gp_max = gp_duration;
				1154
				1155	/*
				1156	* We know the grace period is complete, but to everyone else
				1157	* it appears to still be ongoing. But it is also the case
				1158	* that to everyone else it looks like there is nothing that
				1159	* they can do to advance the grace period. It is therefore
				1160	* safe for us to drop the lock in order to mark the grace
				1161	* period as completed in all of the rcu_node structures.
				1162	*
				1163	* But if this CPU needs another grace period, it will take
				1164	* care of this while initializing the next grace period.
				1165	* We use RCU_WAIT_TAIL instead of the usual RCU_DONE_TAIL
				1166	* because the callbacks have not yet been advanced: Those
				1167	* callbacks are waiting on the grace period that just now
				1168	* completed.
				1169	*/
				1170	if (*rdp->nxttail[RCU_WAIT_TAIL] == NULL) {
				1171	raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
				1172
				1173	/*
				1174	* Propagate new ->completed value to rcu_node structures
				1175	* so that other CPUs don't have to wait until the start
				1176	* of the next grace period to process their callbacks.
				1177	*/
				1178	rcu_for_each_node_breadth_first(rsp, rnp) {
				1179	raw_spin_lock(&rnp->lock); /* irqs already disabled. */
				1180	rnp->completed = rsp->gpnum;
				1181	raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
				1182	}
				1183	rnp = rcu_get_root(rsp);
				1184	raw_spin_lock(&rnp->lock); /* irqs already disabled. */
				1185	}
				1186
				1187	rsp->completed = rsp->gpnum; /* Declare the grace period complete. */
				1188	trace_rcu_grace_period(rsp->name, rsp->completed, "end");
				1189	rsp->fqs_state = RCU_GP_IDLE;
				1190	rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */
				1191	}
				1192
				1193	/*
				1194	* Similar to rcu_report_qs_rdp(), for which it is a helper function.
				1195	* Allows quiescent states for a group of CPUs to be reported at one go
				1196	* to the specified rcu_node structure, though all the CPUs in the group
				1197	* must be represented by the same rcu_node structure (which need not be
				1198	* a leaf rcu_node structure, though it often will be). That structure's
				1199	* lock must be held upon entry, and it is released before return.
				1200	*/
				1201	static void
				1202	rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
				1203	struct rcu_node *rnp, unsigned long flags)
				1204	__releases(rnp->lock)
				1205	{
				1206	struct rcu_node *rnp_c;
				1207
				1208	/* Walk up the rcu_node hierarchy. */
				1209	for (;;) {
				1210	if (!(rnp->qsmask & mask)) {
				1211
				1212	/* Our bit has already been cleared, so done. */
				1213	raw_spin_unlock_irqrestore(&rnp->lock, flags);
				1214	return;
				1215	}
				1216	rnp->qsmask &= ~mask;
				1217	trace_rcu_quiescent_state_report(rsp->name, rnp->gpnum,
				1218	mask, rnp->qsmask, rnp->level,
				1219	rnp->grplo, rnp->grphi,
				1220	!!rnp->gp_tasks);
				1221	if (rnp->qsmask != 0 \|\| rcu_preempt_blocked_readers_cgp(rnp)) {
				1222
				1223	/* Other bits still set at this level, so done. */
				1224	raw_spin_unlock_irqrestore(&rnp->lock, flags);
				1225	return;
				1226	}
				1227	mask = rnp->grpmask;
				1228	if (rnp->parent == NULL) {
				1229
				1230	/* No more levels. Exit loop holding root lock. */
				1231
				1232	break;
				1233	}
				1234	raw_spin_unlock_irqrestore(&rnp->lock, flags);
				1235	rnp_c = rnp;
				1236	rnp = rnp->parent;
				1237	raw_spin_lock_irqsave(&rnp->lock, flags);
				1238	WARN_ON_ONCE(rnp_c->qsmask);
				1239	}
				1240
				1241	/*
				1242	* Get here if we are the last CPU to pass through a quiescent
				1243	* state for this grace period. Invoke rcu_report_qs_rsp()
				1244	* to clean up and start the next grace period if one is needed.
				1245	*/
				1246	rcu_report_qs_rsp(rsp, flags); /* releases rnp->lock. */
				1247	}
				1248
				1249	/*
				1250	* Record a quiescent state for the specified CPU to that CPU's rcu_data
				1251	* structure. This must be either called from the specified CPU, or
				1252	* called when the specified CPU is known to be offline (and when it is
				1253	* also known that no other CPU is concurrently trying to help the offline
				1254	* CPU). The lastcomp argument is used to make sure we are still in the
				1255	* grace period of interest. We don't want to end the current grace period
				1256	* based on quiescent states detected in an earlier grace period!
				1257	*/
				1258	static void
				1259	rcu_report_qs_rdp(int cpu, struct rcu_state rsp, struct rcu_data rdp, long lastgp)
				1260	{
				1261	unsigned long flags;
				1262	unsigned long mask;
				1263	struct rcu_node *rnp;
				1264
				1265	rnp = rdp->mynode;
				1266	raw_spin_lock_irqsave(&rnp->lock, flags);
				1267	if (lastgp != rnp->gpnum \|\| rnp->completed == rnp->gpnum) {
				1268
				1269	/*
				1270	* The grace period in which this quiescent state was
				1271	* recorded has ended, so don't report it upwards.
				1272	* We will instead need a new quiescent state that lies
				1273	* within the current grace period.
				1274	*/
				1275	rdp->passed_quiesce = 0; /* need qs for new gp. */
				1276	raw_spin_unlock_irqrestore(&rnp->lock, flags);
				1277	return;
				1278	}
				1279	mask = rdp->grpmask;
				1280	if ((rnp->qsmask & mask) == 0) {
				1281	raw_spin_unlock_irqrestore(&rnp->lock, flags);
				1282	} else {
				1283	rdp->qs_pending = 0;
				1284
				1285	/*
				1286	* This GP can't end until cpu checks in, so all of our
				1287	* callbacks can be processed during the next GP.
				1288	*/
				1289	rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
				1290
				1291	rcu_report_qs_rnp(mask, rsp, rnp, flags); /* rlses rnp->lock */
				1292	}
				1293	}
				1294
				1295	/*
				1296	* Check to see if there is a new grace period of which this CPU
				1297	* is not yet aware, and if so, set up local rcu_data state for it.
				1298	* Otherwise, see if this CPU has just passed through its first
				1299	* quiescent state for this grace period, and record that fact if so.
				1300	*/
				1301	static void
				1302	rcu_check_quiescent_state(struct rcu_state rsp, struct rcu_data rdp)
				1303	{
				1304	/* If there is now a new grace period, record and return. */
				1305	if (check_for_new_grace_period(rsp, rdp))
				1306	return;
				1307
				1308	/*
				1309	* Does this CPU still need to do its part for current grace period?
				1310	* If no, return and let the other CPUs do their part as well.
				1311	*/
				1312	if (!rdp->qs_pending)
				1313	return;
				1314
				1315	/*
				1316	* Was there a quiescent state since the beginning of the grace
				1317	* period? If no, then exit and wait for the next call.
				1318	*/
				1319	if (!rdp->passed_quiesce)
				1320	return;
				1321
				1322	/*
				1323	* Tell RCU we are done (but rcu_report_qs_rdp() will be the
				1324	* judge of that).
				1325	*/
				1326	rcu_report_qs_rdp(rdp->cpu, rsp, rdp, rdp->passed_quiesce_gpnum);
				1327	}
				1328
				1329	#ifdef CONFIG_HOTPLUG_CPU
				1330
				1331	/*
				1332	* Move a dying CPU's RCU callbacks to online CPU's callback list.
				1333	* Also record a quiescent state for this CPU for the current grace period.
				1334	* Synchronization and interrupt disabling are not required because
				1335	* this function executes in stop_machine() context. Therefore, cleanup
				1336	* operations that might block must be done later from the CPU_DEAD
				1337	* notifier.
				1338	*
				1339	* Note that the outgoing CPU's bit has already been cleared in the
				1340	* cpu_online_mask. This allows us to randomly pick a callback
				1341	* destination from the bits set in that mask.
				1342	*/
				1343	static void rcu_cleanup_dying_cpu(struct rcu_state *rsp)
				1344	{
				1345	int i;
				1346	unsigned long mask;
				1347	int receive_cpu = cpumask_any(cpu_online_mask);
				1348	struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
				1349	struct rcu_data *receive_rdp = per_cpu_ptr(rsp->rda, receive_cpu);
				1350	RCU_TRACE(struct rcu_node rnp = rdp->mynode); / For dying CPU. */
				1351
				1352	/* First, adjust the counts. */
				1353	if (rdp->nxtlist != NULL) {
				1354	receive_rdp->qlen_lazy += rdp->qlen_lazy;
				1355	receive_rdp->qlen += rdp->qlen;
				1356	rdp->qlen_lazy = 0;
				1357	rdp->qlen = 0;
				1358	}
				1359
				1360	/*
				1361	* Next, move ready-to-invoke callbacks to be invoked on some
				1362	* other CPU. These will not be required to pass through another
				1363	* grace period: They are done, regardless of CPU.
				1364	*/
				1365	if (rdp->nxtlist != NULL &&
				1366	rdp->nxttail[RCU_DONE_TAIL] != &rdp->nxtlist) {
				1367	struct rcu_head *oldhead;
				1368	struct rcu_head **oldtail;
				1369	struct rcu_head **newtail;
				1370
				1371	oldhead = rdp->nxtlist;
				1372	oldtail = receive_rdp->nxttail[RCU_DONE_TAIL];
				1373	rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL];
				1374	rdp->nxttail[RCU_DONE_TAIL] = oldtail;
				1375	*receive_rdp->nxttail[RCU_DONE_TAIL] = oldhead;
				1376	newtail = rdp->nxttail[RCU_DONE_TAIL];
				1377	for (i = RCU_DONE_TAIL; i < RCU_NEXT_SIZE; i++) {
				1378	if (receive_rdp->nxttail[i] == oldtail)
				1379	receive_rdp->nxttail[i] = newtail;
				1380	if (rdp->nxttail[i] == newtail)
				1381	rdp->nxttail[i] = &rdp->nxtlist;
				1382	}
				1383	}
				1384
				1385	/*
				1386	* Finally, put the rest of the callbacks at the end of the list.
				1387	* The ones that made it partway through get to start over: We
				1388	* cannot assume that grace periods are synchronized across CPUs.
				1389	* (We could splice RCU_WAIT_TAIL into RCU_NEXT_READY_TAIL, but
				1390	* this does not seem compelling. Not yet, anyway.)
				1391	*/
				1392	if (rdp->nxtlist != NULL) {
				1393	*receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxtlist;
				1394	receive_rdp->nxttail[RCU_NEXT_TAIL] =
				1395	rdp->nxttail[RCU_NEXT_TAIL];
				1396	receive_rdp->n_cbs_adopted += rdp->qlen;
				1397	rdp->n_cbs_orphaned += rdp->qlen;
				1398
				1399	rdp->nxtlist = NULL;
				1400	for (i = 0; i < RCU_NEXT_SIZE; i++)
				1401	rdp->nxttail[i] = &rdp->nxtlist;
				1402	}
				1403
				1404	/*
				1405	* Record a quiescent state for the dying CPU. This is safe
				1406	* only because we have already cleared out the callbacks.
				1407	* (Otherwise, the RCU core might try to schedule the invocation
				1408	* of callbacks on this now-offline CPU, which would be bad.)
				1409	*/
				1410	mask = rdp->grpmask; /* rnp->grplo is constant. */
				1411	trace_rcu_grace_period(rsp->name,
				1412	rnp->gpnum + 1 - !!(rnp->qsmask & mask),
				1413	"cpuofl");
				1414	rcu_report_qs_rdp(smp_processor_id(), rsp, rdp, rsp->gpnum);
				1415	/* Note that rcu_report_qs_rdp() might call trace_rcu_grace_period(). */
				1416	}
				1417
				1418	/*
				1419	* The CPU has been completely removed, and some other CPU is reporting
				1420	* this fact from process context. Do the remainder of the cleanup.
				1421	* There can only be one CPU hotplug operation at a time, so no other
				1422	* CPU can be attempting to update rcu_cpu_kthread_task.
				1423	*/
				1424	static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
				1425	{
				1426	unsigned long flags;
				1427	unsigned long mask;
				1428	int need_report = 0;
				1429	struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
				1430	struct rcu_node rnp = rdp->mynode; / Outgoing CPU's rnp. */
				1431
				1432	/* Adjust any no-longer-needed kthreads. */
				1433	rcu_stop_cpu_kthread(cpu);
				1434	rcu_node_kthread_setaffinity(rnp, -1);
				1435
				1436	/* Remove the dying CPU from the bitmasks in the rcu_node hierarchy. */
				1437
				1438	/* Exclude any attempts to start a new grace period. */
				1439	raw_spin_lock_irqsave(&rsp->onofflock, flags);
				1440
				1441	/* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */
				1442	mask = rdp->grpmask; /* rnp->grplo is constant. */
				1443	do {
				1444	raw_spin_lock(&rnp->lock); /* irqs already disabled. */
				1445	rnp->qsmaskinit &= ~mask;
				1446	if (rnp->qsmaskinit != 0) {
				1447	if (rnp != rdp->mynode)
				1448	raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
				1449	break;
				1450	}
				1451	if (rnp == rdp->mynode)
				1452	need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp);
				1453	else
				1454	raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
				1455	mask = rnp->grpmask;
				1456	rnp = rnp->parent;
				1457	} while (rnp != NULL);
				1458
				1459	/*
				1460	* We still hold the leaf rcu_node structure lock here, and
				1461	* irqs are still disabled. The reason for this subterfuge is
				1462	* because invoking rcu_report_unblock_qs_rnp() with ->onofflock
				1463	* held leads to deadlock.
				1464	*/
				1465	raw_spin_unlock(&rsp->onofflock); /* irqs remain disabled. */
				1466	rnp = rdp->mynode;
				1467	if (need_report & RCU_OFL_TASKS_NORM_GP)
				1468	rcu_report_unblock_qs_rnp(rnp, flags);
				1469	else
				1470	raw_spin_unlock_irqrestore(&rnp->lock, flags);
				1471	if (need_report & RCU_OFL_TASKS_EXP_GP)
				1472	rcu_report_exp_rnp(rsp, rnp, true);
				1473	}
				1474
				1475	#else /* #ifdef CONFIG_HOTPLUG_CPU */
				1476
				1477	static void rcu_cleanup_dying_cpu(struct rcu_state *rsp)
				1478	{
				1479	}
				1480
				1481	static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
				1482	{
				1483	}
				1484
				1485	#endif /* #else #ifdef CONFIG_HOTPLUG_CPU */
				1486
				1487	/*
				1488	* Invoke any RCU callbacks that have made it to the end of their grace
				1489	* period. Thottle as specified by rdp->blimit.
				1490	*/
				1491	static void rcu_do_batch(struct rcu_state rsp, struct rcu_data rdp)
				1492	{
				1493	unsigned long flags;
				1494	struct rcu_head next, list, **tail;
				1495	long bl, count, count_lazy;
				1496
				1497	/* If no callbacks are ready, just return.*/
				1498	if (!cpu_has_callbacks_ready_to_invoke(rdp)) {
				1499	trace_rcu_batch_start(rsp->name, rdp->qlen_lazy, rdp->qlen, 0);
				1500	trace_rcu_batch_end(rsp->name, 0, !!ACCESS_ONCE(rdp->nxtlist),
				1501	need_resched(), is_idle_task(current),
				1502	rcu_is_callbacks_kthread());
				1503	return;
				1504	}
				1505
				1506	/*
				1507	* Extract the list of ready callbacks, disabling to prevent
				1508	* races with call_rcu() from interrupt handlers.
				1509	*/
				1510	local_irq_save(flags);
				1511	WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
				1512	bl = rdp->blimit;
				1513	trace_rcu_batch_start(rsp->name, rdp->qlen_lazy, rdp->qlen, bl);
				1514	list = rdp->nxtlist;
				1515	rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL];
				1516	*rdp->nxttail[RCU_DONE_TAIL] = NULL;
				1517	tail = rdp->nxttail[RCU_DONE_TAIL];
				1518	for (count = RCU_NEXT_SIZE - 1; count >= 0; count--)
				1519	if (rdp->nxttail[count] == rdp->nxttail[RCU_DONE_TAIL])
				1520	rdp->nxttail[count] = &rdp->nxtlist;
				1521	local_irq_restore(flags);
				1522
				1523	/* Invoke callbacks. */
				1524	count = count_lazy = 0;
				1525	while (list) {
				1526	next = list->next;
				1527	prefetch(next);
				1528	debug_rcu_head_unqueue(list);
				1529	if (__rcu_reclaim(rsp->name, list))
				1530	count_lazy++;
				1531	list = next;
				1532	/* Stop only if limit reached and CPU has something to do. */
				1533	if (++count >= bl &&
				1534	(need_resched() \|\|
				1535	(!is_idle_task(current) && !rcu_is_callbacks_kthread())))
				1536	break;
				1537	}
				1538
				1539	local_irq_save(flags);
				1540	trace_rcu_batch_end(rsp->name, count, !!list, need_resched(),
				1541	is_idle_task(current),
				1542	rcu_is_callbacks_kthread());
				1543
				1544	/* Update count, and requeue any remaining callbacks. */
				1545	rdp->qlen_lazy -= count_lazy;
				1546	rdp->qlen -= count;
				1547	rdp->n_cbs_invoked += count;
				1548	if (list != NULL) {
				1549	*tail = rdp->nxtlist;
				1550	rdp->nxtlist = list;
				1551	for (count = 0; count < RCU_NEXT_SIZE; count++)
				1552	if (&rdp->nxtlist == rdp->nxttail[count])
				1553	rdp->nxttail[count] = tail;
				1554	else
				1555	break;
				1556	}
				1557
				1558	/* Reinstate batch limit if we have worked down the excess. */
				1559	if (rdp->blimit == LONG_MAX && rdp->qlen <= qlowmark)
				1560	rdp->blimit = blimit;
				1561
				1562	/* Reset ->qlen_last_fqs_check trigger if enough CBs have drained. */
				1563	if (rdp->qlen == 0 && rdp->qlen_last_fqs_check != 0) {
				1564	rdp->qlen_last_fqs_check = 0;
				1565	rdp->n_force_qs_snap = rsp->n_force_qs;
				1566	} else if (rdp->qlen < rdp->qlen_last_fqs_check - qhimark)
				1567	rdp->qlen_last_fqs_check = rdp->qlen;
				1568
				1569	local_irq_restore(flags);
				1570
				1571	/* Re-invoke RCU core processing if there are callbacks remaining. */
				1572	if (cpu_has_callbacks_ready_to_invoke(rdp))
				1573	invoke_rcu_core();
				1574	}
				1575
				1576	/*
				1577	* Check to see if this CPU is in a non-context-switch quiescent state
				1578	* (user mode or idle loop for rcu, non-softirq execution for rcu_bh).
				1579	* Also schedule RCU core processing.
				1580	*
				1581	* This function must be called from hardirq context. It is normally
				1582	* invoked from the scheduling-clock interrupt. If rcu_pending returns
				1583	* false, there is no point in invoking rcu_check_callbacks().
				1584	*/
				1585	void rcu_check_callbacks(int cpu, int user)
				1586	{
				1587	trace_rcu_utilization("Start scheduler-tick");
				1588	increment_cpu_stall_ticks();
				1589	if (user \|\| rcu_is_cpu_rrupt_from_idle()) {
				1590
				1591	/*
				1592	* Get here if this CPU took its interrupt from user
				1593	* mode or from the idle loop, and if this is not a
				1594	* nested interrupt. In this case, the CPU is in
				1595	* a quiescent state, so note it.
				1596	*
				1597	* No memory barrier is required here because both
				1598	* rcu_sched_qs() and rcu_bh_qs() reference only CPU-local
				1599	* variables that other CPUs neither access nor modify,
				1600	* at least not while the corresponding CPU is online.
				1601	*/
				1602
				1603	rcu_sched_qs(cpu);
				1604	rcu_bh_qs(cpu);
				1605
				1606	} else if (!in_softirq()) {
				1607
				1608	/*
				1609	* Get here if this CPU did not take its interrupt from
				1610	* softirq, in other words, if it is not interrupting
				1611	* a rcu_bh read-side critical section. This is an _bh
				1612	* critical section, so note it.
				1613	*/
				1614
				1615	rcu_bh_qs(cpu);
				1616	}
				1617	rcu_preempt_check_callbacks(cpu);
				1618	if (rcu_pending(cpu))
				1619	invoke_rcu_core();
				1620	trace_rcu_utilization("End scheduler-tick");
				1621	}
				1622
				1623	/*
				1624	* Scan the leaf rcu_node structures, processing dyntick state for any that
				1625	* have not yet encountered a quiescent state, using the function specified.
				1626	* Also initiate boosting for any threads blocked on the root rcu_node.
				1627	*
				1628	* The caller must have suppressed start of new grace periods.
				1629	*/
				1630	static void force_qs_rnp(struct rcu_state rsp, int (f)(struct rcu_data *))
				1631	{
				1632	unsigned long bit;
				1633	int cpu;
				1634	unsigned long flags;
				1635	unsigned long mask;
				1636	struct rcu_node *rnp;
				1637
				1638	rcu_for_each_leaf_node(rsp, rnp) {
				1639	mask = 0;
				1640	raw_spin_lock_irqsave(&rnp->lock, flags);
				1641	if (!rcu_gp_in_progress(rsp)) {
				1642	raw_spin_unlock_irqrestore(&rnp->lock, flags);
				1643	return;
				1644	}
				1645	if (rnp->qsmask == 0) {
				1646	rcu_initiate_boost(rnp, flags); /* releases rnp->lock */
				1647	continue;
				1648	}
				1649	cpu = rnp->grplo;
				1650	bit = 1;
				1651	for (; cpu <= rnp->grphi; cpu++, bit <<= 1) {
				1652	if ((rnp->qsmask & bit) != 0 &&
				1653	f(per_cpu_ptr(rsp->rda, cpu)))
				1654	mask \|= bit;
				1655	}
				1656	if (mask != 0) {
				1657
				1658	/* rcu_report_qs_rnp() releases rnp->lock. */
				1659	rcu_report_qs_rnp(mask, rsp, rnp, flags);
				1660	continue;
				1661	}
				1662	raw_spin_unlock_irqrestore(&rnp->lock, flags);
				1663	}
				1664	rnp = rcu_get_root(rsp);
				1665	if (rnp->qsmask == 0) {
				1666	raw_spin_lock_irqsave(&rnp->lock, flags);
				1667	rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */
				1668	}
				1669	}
				1670
				1671	/*
				1672	* Force quiescent states on reluctant CPUs, and also detect which
				1673	* CPUs are in dyntick-idle mode.
				1674	*/
				1675	static void force_quiescent_state(struct rcu_state *rsp, int relaxed)
				1676	{
				1677	unsigned long flags;
				1678	struct rcu_node *rnp = rcu_get_root(rsp);
				1679
				1680	trace_rcu_utilization("Start fqs");
				1681	if (!rcu_gp_in_progress(rsp)) {
				1682	trace_rcu_utilization("End fqs");
				1683	return; /* No grace period in progress, nothing to force. */
				1684	}
				1685	if (!raw_spin_trylock_irqsave(&rsp->fqslock, flags)) {
				1686	rsp->n_force_qs_lh++; /* Inexact, can lose counts. Tough! */
				1687	trace_rcu_utilization("End fqs");
				1688	return; /* Someone else is already on the job. */
				1689	}
				1690	if (relaxed && ULONG_CMP_GE(rsp->jiffies_force_qs, jiffies))
				1691	goto unlock_fqs_ret; /* no emergency and done recently. */
				1692	rsp->n_force_qs++;
				1693	raw_spin_lock(&rnp->lock); /* irqs already disabled */
				1694	rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS;
				1695	if(!rcu_gp_in_progress(rsp)) {
				1696	rsp->n_force_qs_ngp++;
				1697	raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
				1698	goto unlock_fqs_ret; /* no GP in progress, time updated. */
				1699	}
				1700	rsp->fqs_active = 1;
				1701	switch (rsp->fqs_state) {
				1702	case RCU_GP_IDLE:
				1703	case RCU_GP_INIT:
				1704
				1705	break; /* grace period idle or initializing, ignore. */
				1706
				1707	case RCU_SAVE_DYNTICK:
				1708	if (RCU_SIGNAL_INIT != RCU_SAVE_DYNTICK)
				1709	break; /* So gcc recognizes the dead code. */
				1710
				1711	raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
				1712
				1713	/* Record dyntick-idle state. */
				1714	force_qs_rnp(rsp, dyntick_save_progress_counter);
				1715	raw_spin_lock(&rnp->lock); /* irqs already disabled */
				1716	if (rcu_gp_in_progress(rsp))
				1717	rsp->fqs_state = RCU_FORCE_QS;
				1718	break;
				1719
				1720	case RCU_FORCE_QS:
				1721
				1722	/* Check dyntick-idle state, send IPI to laggarts. */
				1723	raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
				1724	force_qs_rnp(rsp, rcu_implicit_dynticks_qs);
				1725
				1726	/* Leave state in case more forcing is required. */
				1727
				1728	raw_spin_lock(&rnp->lock); /* irqs already disabled */
				1729	break;
				1730	}
				1731	rsp->fqs_active = 0;
				1732	if (rsp->fqs_need_gp) {
				1733	raw_spin_unlock(&rsp->fqslock); /* irqs remain disabled */
				1734	rsp->fqs_need_gp = 0;
				1735	rcu_start_gp(rsp, flags); /* releases rnp->lock */
				1736	trace_rcu_utilization("End fqs");
				1737	return;
				1738	}
				1739	raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
				1740	unlock_fqs_ret:
				1741	raw_spin_unlock_irqrestore(&rsp->fqslock, flags);
				1742	trace_rcu_utilization("End fqs");
				1743	}
				1744
				1745	/*
				1746	* This does the RCU core processing work for the specified rcu_state
				1747	* and rcu_data structures. This may be called only from the CPU to
				1748	* whom the rdp belongs.
				1749	*/
				1750	static void
				1751	__rcu_process_callbacks(struct rcu_state rsp, struct rcu_data rdp)
				1752	{
				1753	unsigned long flags;
				1754
				1755	WARN_ON_ONCE(rdp->beenonline == 0);
				1756
				1757	/*
				1758	* If an RCU GP has gone long enough, go check for dyntick
				1759	* idle CPUs and, if needed, send resched IPIs.
				1760	*/
				1761	if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies))
				1762	force_quiescent_state(rsp, 1);
				1763
				1764	/*
				1765	* Advance callbacks in response to end of earlier grace
				1766	* period that some other CPU ended.
				1767	*/
				1768	rcu_process_gp_end(rsp, rdp);
				1769
				1770	/* Update RCU state based on any recent quiescent states. */
				1771	rcu_check_quiescent_state(rsp, rdp);
				1772
				1773	/* Does this CPU require a not-yet-started grace period? */
				1774	if (cpu_needs_another_gp(rsp, rdp)) {
				1775	raw_spin_lock_irqsave(&rcu_get_root(rsp)->lock, flags);
				1776	rcu_start_gp(rsp, flags); /* releases above lock */
				1777	}
				1778
				1779	/* If there are callbacks ready, invoke them. */
				1780	if (cpu_has_callbacks_ready_to_invoke(rdp))
				1781	invoke_rcu_callbacks(rsp, rdp);
				1782	}
				1783
				1784	/*
				1785	* Do RCU core processing for the current CPU.
				1786	*/
				1787	static void rcu_process_callbacks(struct softirq_action *unused)
				1788	{
				1789	trace_rcu_utilization("Start RCU core");
				1790	__rcu_process_callbacks(&rcu_sched_state,
				1791	&__get_cpu_var(rcu_sched_data));
				1792	__rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data));
				1793	rcu_preempt_process_callbacks();
				1794	trace_rcu_utilization("End RCU core");
				1795	}
				1796
				1797	/*
				1798	* Schedule RCU callback invocation. If the specified type of RCU
				1799	* does not support RCU priority boosting, just do a direct call,
				1800	* otherwise wake up the per-CPU kernel kthread. Note that because we
				1801	* are running on the current CPU with interrupts disabled, the
				1802	* rcu_cpu_kthread_task cannot disappear out from under us.
				1803	*/
				1804	static void invoke_rcu_callbacks(struct rcu_state rsp, struct rcu_data rdp)
				1805	{
				1806	if (unlikely(!ACCESS_ONCE(rcu_scheduler_fully_active)))
				1807	return;
				1808	if (likely(!rsp->boost)) {
				1809	rcu_do_batch(rsp, rdp);
				1810	return;
				1811	}
				1812	invoke_rcu_callbacks_kthread();
				1813	}
				1814
				1815	static void invoke_rcu_core(void)
				1816	{
				1817	raise_softirq(RCU_SOFTIRQ);
				1818	}
				1819
				1820	static void
				1821	__call_rcu(struct rcu_head head, void (func)(struct rcu_head *rcu),
				1822	struct rcu_state *rsp, bool lazy)
				1823	{
				1824	unsigned long flags;
				1825	struct rcu_data *rdp;
				1826
				1827	WARN_ON_ONCE((unsigned long)head & 0x3); /* Misaligned rcu_head! */
				1828	debug_rcu_head_queue(head);
				1829	head->func = func;
				1830	head->next = NULL;
				1831
				1832	smp_mb(); /* Ensure RCU update seen before callback registry. */
				1833
				1834	/*
				1835	* Opportunistically note grace-period endings and beginnings.
				1836	* Note that we might see a beginning right after we see an
				1837	* end, but never vice versa, since this CPU has to pass through
				1838	* a quiescent state betweentimes.
				1839	*/
				1840	local_irq_save(flags);
				1841	rdp = this_cpu_ptr(rsp->rda);
				1842
				1843	/* Add the callback to our list. */
				1844	*rdp->nxttail[RCU_NEXT_TAIL] = head;
				1845	rdp->nxttail[RCU_NEXT_TAIL] = &head->next;
				1846	rdp->qlen++;
				1847	if (lazy)
				1848	rdp->qlen_lazy++;
				1849
				1850	if (__is_kfree_rcu_offset((unsigned long)func))
				1851	trace_rcu_kfree_callback(rsp->name, head, (unsigned long)func,
				1852	rdp->qlen_lazy, rdp->qlen);
				1853	else
				1854	trace_rcu_callback(rsp->name, head, rdp->qlen_lazy, rdp->qlen);
				1855
				1856	/* If interrupts were disabled, don't dive into RCU core. */
				1857	if (irqs_disabled_flags(flags)) {
				1858	local_irq_restore(flags);
				1859	return;
				1860	}
				1861
				1862	/*
				1863	* Force the grace period if too many callbacks or too long waiting.
				1864	* Enforce hysteresis, and don't invoke force_quiescent_state()
				1865	* if some other CPU has recently done so. Also, don't bother
				1866	* invoking force_quiescent_state() if the newly enqueued callback
				1867	* is the only one waiting for a grace period to complete.
				1868	*/
				1869	if (unlikely(rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) {
				1870
				1871	/* Are we ignoring a completed grace period? */
				1872	rcu_process_gp_end(rsp, rdp);
				1873	check_for_new_grace_period(rsp, rdp);
				1874
				1875	/* Start a new grace period if one not already started. */
				1876	if (!rcu_gp_in_progress(rsp)) {
				1877	unsigned long nestflag;
				1878	struct rcu_node *rnp_root = rcu_get_root(rsp);
				1879
				1880	raw_spin_lock_irqsave(&rnp_root->lock, nestflag);
				1881	rcu_start_gp(rsp, nestflag); /* rlses rnp_root->lock */
				1882	} else {
				1883	/* Give the grace period a kick. */
				1884	rdp->blimit = LONG_MAX;
				1885	if (rsp->n_force_qs == rdp->n_force_qs_snap &&
				1886	*rdp->nxttail[RCU_DONE_TAIL] != head)
				1887	force_quiescent_state(rsp, 0);
				1888	rdp->n_force_qs_snap = rsp->n_force_qs;
				1889	rdp->qlen_last_fqs_check = rdp->qlen;
				1890	}
				1891	} else if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies))
				1892	force_quiescent_state(rsp, 1);
				1893	local_irq_restore(flags);
				1894	}
				1895
				1896	/*
				1897	* Queue an RCU-sched callback for invocation after a grace period.
				1898	*/
				1899	void call_rcu_sched(struct rcu_head head, void (func)(struct rcu_head *rcu))
				1900	{
				1901	__call_rcu(head, func, &rcu_sched_state, 0);
				1902	}
				1903	EXPORT_SYMBOL_GPL(call_rcu_sched);
				1904
				1905	#ifndef CONFIG_PREEMPT_RT_FULL
				1906	/*
				1907	* Queue an RCU callback for invocation after a quicker grace period.
				1908	*/
				1909	void call_rcu_bh(struct rcu_head head, void (func)(struct rcu_head *rcu))
				1910	{
				1911	__call_rcu(head, func, &rcu_bh_state, 0);
				1912	}
				1913	EXPORT_SYMBOL_GPL(call_rcu_bh);
				1914	#endif
				1915
				1916	/**
				1917	* synchronize_sched - wait until an rcu-sched grace period has elapsed.
				1918	*
				1919	* Control will return to the caller some time after a full rcu-sched
				1920	* grace period has elapsed, in other words after all currently executing
				1921	* rcu-sched read-side critical sections have completed. These read-side
				1922	* critical sections are delimited by rcu_read_lock_sched() and
				1923	* rcu_read_unlock_sched(), and may be nested. Note that preempt_disable(),
				1924	* local_irq_disable(), and so on may be used in place of
				1925	* rcu_read_lock_sched().
				1926	*
				1927	* This means that all preempt_disable code sequences, including NMI and
				1928	* hardware-interrupt handlers, in progress on entry will have completed
				1929	* before this primitive returns. However, this does not guarantee that
				1930	* softirq handlers will have completed, since in some kernels, these
				1931	* handlers can run in process context, and can block.
				1932	*
				1933	* This primitive provides the guarantees made by the (now removed)
				1934	* synchronize_kernel() API. In contrast, synchronize_rcu() only
				1935	* guarantees that rcu_read_lock() sections will have completed.
				1936	* In "classic RCU", these two guarantees happen to be one and
				1937	* the same, but can differ in realtime RCU implementations.
				1938	*/
				1939	void synchronize_sched(void)
				1940	{
				1941	rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) &&
				1942	!lock_is_held(&rcu_lock_map) &&
				1943	!lock_is_held(&rcu_sched_lock_map),
				1944	"Illegal synchronize_sched() in RCU-sched read-side critical section");
				1945	if (rcu_blocking_is_gp())
				1946	return;
				1947	wait_rcu_gp(call_rcu_sched);
				1948	}
				1949	EXPORT_SYMBOL_GPL(synchronize_sched);
				1950
				1951	#ifndef CONFIG_PREEMPT_RT_FULL
				1952	/**
				1953	* synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed.
				1954	*
				1955	* Control will return to the caller some time after a full rcu_bh grace
				1956	* period has elapsed, in other words after all currently executing rcu_bh
				1957	* read-side critical sections have completed. RCU read-side critical
				1958	* sections are delimited by rcu_read_lock_bh() and rcu_read_unlock_bh(),
				1959	* and may be nested.
				1960	*/
				1961	void synchronize_rcu_bh(void)
				1962	{
				1963	rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) &&
				1964	!lock_is_held(&rcu_lock_map) &&
				1965	!lock_is_held(&rcu_sched_lock_map),
				1966	"Illegal synchronize_rcu_bh() in RCU-bh read-side critical section");
				1967	if (rcu_blocking_is_gp())
				1968	return;
				1969	wait_rcu_gp(call_rcu_bh);
				1970	}
				1971	EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
				1972	#endif
				1973
				1974	static atomic_t sync_sched_expedited_started = ATOMIC_INIT(0);
				1975	static atomic_t sync_sched_expedited_done = ATOMIC_INIT(0);
				1976
				1977	static int synchronize_sched_expedited_cpu_stop(void *data)
				1978	{
				1979	/*
				1980	* There must be a full memory barrier on each affected CPU
				1981	* between the time that try_stop_cpus() is called and the
				1982	* time that it returns.
				1983	*
				1984	* In the current initial implementation of cpu_stop, the
				1985	* above condition is already met when the control reaches
				1986	* this point and the following smp_mb() is not strictly
				1987	* necessary. Do smp_mb() anyway for documentation and
				1988	* robustness against future implementation changes.
				1989	*/
				1990	smp_mb(); /* See above comment block. */
				1991	return 0;
				1992	}
				1993
				1994	/**
				1995	* synchronize_sched_expedited - Brute-force RCU-sched grace period
				1996	*
				1997	* Wait for an RCU-sched grace period to elapse, but use a "big hammer"
				1998	* approach to force the grace period to end quickly. This consumes
				1999	* significant time on all CPUs and is unfriendly to real-time workloads,
				2000	* so is thus not recommended for any sort of common-case code. In fact,
				2001	* if you are using synchronize_sched_expedited() in a loop, please
				2002	* restructure your code to batch your updates, and then use a single
				2003	* synchronize_sched() instead.
				2004	*
				2005	* Note that it is illegal to call this function while holding any lock
				2006	* that is acquired by a CPU-hotplug notifier. And yes, it is also illegal
				2007	* to call this function from a CPU-hotplug notifier. Failing to observe
				2008	* these restriction will result in deadlock.
				2009	*
				2010	* This implementation can be thought of as an application of ticket
				2011	* locking to RCU, with sync_sched_expedited_started and
				2012	* sync_sched_expedited_done taking on the roles of the halves
				2013	* of the ticket-lock word. Each task atomically increments
				2014	* sync_sched_expedited_started upon entry, snapshotting the old value,
				2015	* then attempts to stop all the CPUs. If this succeeds, then each
				2016	* CPU will have executed a context switch, resulting in an RCU-sched
				2017	* grace period. We are then done, so we use atomic_cmpxchg() to
				2018	* update sync_sched_expedited_done to match our snapshot -- but
				2019	* only if someone else has not already advanced past our snapshot.
				2020	*
				2021	* On the other hand, if try_stop_cpus() fails, we check the value
				2022	* of sync_sched_expedited_done. If it has advanced past our
				2023	* initial snapshot, then someone else must have forced a grace period
				2024	* some time after we took our snapshot. In this case, our work is
				2025	* done for us, and we can simply return. Otherwise, we try again,
				2026	* but keep our initial snapshot for purposes of checking for someone
				2027	* doing our work for us.
				2028	*
				2029	* If we fail too many times in a row, we fall back to synchronize_sched().
				2030	*/
				2031	void synchronize_sched_expedited(void)
				2032	{
				2033	int firstsnap, s, snap, trycount = 0;
				2034
				2035	/* Note that atomic_inc_return() implies full memory barrier. */
				2036	firstsnap = snap = atomic_inc_return(&sync_sched_expedited_started);
				2037	get_online_cpus();
				2038	WARN_ON_ONCE(cpu_is_offline(raw_smp_processor_id()));
				2039
				2040	/*
				2041	* Each pass through the following loop attempts to force a
				2042	* context switch on each CPU.
				2043	*/
				2044	while (try_stop_cpus(cpu_online_mask,
				2045	synchronize_sched_expedited_cpu_stop,
				2046	NULL) == -EAGAIN) {
				2047	put_online_cpus();
				2048
				2049	/* No joy, try again later. Or just synchronize_sched(). */
				2050	if (trycount++ < 10)
				2051	udelay(trycount * num_online_cpus());
				2052	else {
				2053	synchronize_sched();
				2054	return;
				2055	}
				2056
				2057	/* Check to see if someone else did our work for us. */
				2058	s = atomic_read(&sync_sched_expedited_done);
				2059	if (UINT_CMP_GE((unsigned)s, (unsigned)firstsnap)) {
				2060	smp_mb(); /* ensure test happens before caller kfree */
				2061	return;
				2062	}
				2063
				2064	/*
				2065	* Refetching sync_sched_expedited_started allows later
				2066	* callers to piggyback on our grace period. We subtract
				2067	* 1 to get the same token that the last incrementer got.
				2068	* We retry after they started, so our grace period works
				2069	* for them, and they started after our first try, so their
				2070	* grace period works for us.
				2071	*/
				2072	get_online_cpus();
				2073	snap = atomic_read(&sync_sched_expedited_started);
				2074	smp_mb(); /* ensure read is before try_stop_cpus(). */
				2075	}
				2076
				2077	/*
				2078	* Everyone up to our most recent fetch is covered by our grace
				2079	* period. Update the counter, but only if our work is still
				2080	* relevant -- which it won't be if someone who started later
				2081	* than we did beat us to the punch.
				2082	*/
				2083	do {
				2084	s = atomic_read(&sync_sched_expedited_done);
				2085	if (UINT_CMP_GE((unsigned)s, (unsigned)snap)) {
				2086	smp_mb(); /* ensure test happens before caller kfree */
				2087	break;
				2088	}
				2089	} while (atomic_cmpxchg(&sync_sched_expedited_done, s, snap) != s);
				2090
				2091	put_online_cpus();
				2092	}
				2093	EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
				2094
				2095	/*
				2096	* Check to see if there is any immediate RCU-related work to be done
				2097	* by the current CPU, for the specified type of RCU, returning 1 if so.
				2098	* The checks are in order of increasing expense: checks that can be
				2099	* carried out against CPU-local state are performed first. However,
				2100	* we must check for CPU stalls first, else we might not get a chance.
				2101	*/
				2102	static int __rcu_pending(struct rcu_state rsp, struct rcu_data rdp)
				2103	{
				2104	struct rcu_node *rnp = rdp->mynode;
				2105
				2106	rdp->n_rcu_pending++;
				2107
				2108	/* Check for CPU stalls, if enabled. */
				2109	check_cpu_stall(rsp, rdp);
				2110
				2111	/* Is the RCU core waiting for a quiescent state from this CPU? */
				2112	if (rcu_scheduler_fully_active &&
				2113	rdp->qs_pending && !rdp->passed_quiesce) {
				2114
				2115	/*
				2116	* If force_quiescent_state() coming soon and this CPU
				2117	* needs a quiescent state, and this is either RCU-sched
				2118	* or RCU-bh, force a local reschedule.
				2119	*/
				2120	rdp->n_rp_qs_pending++;
				2121	if (!rdp->preemptible &&
				2122	ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs) - 1,
				2123	jiffies))
				2124	set_need_resched();
				2125	} else if (rdp->qs_pending && rdp->passed_quiesce) {
				2126	rdp->n_rp_report_qs++;
				2127	return 1;
				2128	}
				2129
				2130	/* Does this CPU have callbacks ready to invoke? */
				2131	if (cpu_has_callbacks_ready_to_invoke(rdp)) {
				2132	rdp->n_rp_cb_ready++;
				2133	return 1;
				2134	}
				2135
				2136	/* Has RCU gone idle with this CPU needing another grace period? */
				2137	if (cpu_needs_another_gp(rsp, rdp)) {
				2138	rdp->n_rp_cpu_needs_gp++;
				2139	return 1;
				2140	}
				2141
				2142	/* Has another RCU grace period completed? */
				2143	if (ACCESS_ONCE(rnp->completed) != rdp->completed) { /* outside lock */
				2144	rdp->n_rp_gp_completed++;
				2145	return 1;
				2146	}
				2147
				2148	/* Has a new RCU grace period started? */
				2149	if (ACCESS_ONCE(rnp->gpnum) != rdp->gpnum) { /* outside lock */
				2150	rdp->n_rp_gp_started++;
				2151	return 1;
				2152	}
				2153
				2154	/* Has an RCU GP gone long enough to send resched IPIs &c? */
				2155	if (rcu_gp_in_progress(rsp) &&
				2156	ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies)) {
				2157	rdp->n_rp_need_fqs++;
				2158	return 1;
				2159	}
				2160
				2161	/* nothing to do */
				2162	rdp->n_rp_need_nothing++;
				2163	return 0;
				2164	}
				2165
				2166	/*
				2167	* Check to see if there is any immediate RCU-related work to be done
				2168	* by the current CPU, returning 1 if so. This function is part of the
				2169	* RCU implementation; it is -not- an exported member of the RCU API.
				2170	*/
				2171	static int rcu_pending(int cpu)
				2172	{
				2173	return __rcu_pending(&rcu_sched_state, &per_cpu(rcu_sched_data, cpu)) \|\|
				2174	__rcu_pending(&rcu_bh_state, &per_cpu(rcu_bh_data, cpu)) \|\|
				2175	rcu_preempt_pending(cpu);
				2176	}
				2177
				2178	/*
				2179	* Check to see if any future RCU-related work will need to be done
				2180	* by the current CPU, even if none need be done immediately, returning
				2181	* 1 if so.
				2182	*/
				2183	static int rcu_cpu_has_callbacks(int cpu)
				2184	{
				2185	/* RCU callbacks either ready or pending? */
				2186	return per_cpu(rcu_sched_data, cpu).nxtlist \|\|
				2187	per_cpu(rcu_bh_data, cpu).nxtlist \|\|
				2188	rcu_preempt_cpu_has_callbacks(cpu);
				2189	}
				2190
				2191	static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL};
				2192	static atomic_t rcu_barrier_cpu_count;
				2193	static DEFINE_MUTEX(rcu_barrier_mutex);
				2194	static struct completion rcu_barrier_completion;
				2195
				2196	static void rcu_barrier_callback(struct rcu_head *notused)
				2197	{
				2198	if (atomic_dec_and_test(&rcu_barrier_cpu_count))
				2199	complete(&rcu_barrier_completion);
				2200	}
				2201
				2202	/*
				2203	* Called with preemption disabled, and from cross-cpu IRQ context.
				2204	*/
				2205	static void rcu_barrier_func(void *type)
				2206	{
				2207	int cpu = smp_processor_id();
				2208	struct rcu_head *head = &per_cpu(rcu_barrier_head, cpu);
				2209	void (call_rcu_func)(struct rcu_head head,
				2210	void (func)(struct rcu_head head));
				2211
				2212	atomic_inc(&rcu_barrier_cpu_count);
				2213	call_rcu_func = type;
				2214	call_rcu_func(head, rcu_barrier_callback);
				2215	}
				2216
				2217	/*
				2218	* Orchestrate the specified type of RCU barrier, waiting for all
				2219	* RCU callbacks of the specified type to complete.
				2220	*/
				2221	static void _rcu_barrier(struct rcu_state *rsp,
				2222	void (call_rcu_func)(struct rcu_head head,
				2223	void (func)(struct rcu_head head)))
				2224	{
				2225	BUG_ON(in_interrupt());
				2226	/* Take mutex to serialize concurrent rcu_barrier() requests. */
				2227	mutex_lock(&rcu_barrier_mutex);
				2228	init_completion(&rcu_barrier_completion);
				2229	/*
				2230	* Initialize rcu_barrier_cpu_count to 1, then invoke
				2231	* rcu_barrier_func() on each CPU, so that each CPU also has
				2232	* incremented rcu_barrier_cpu_count. Only then is it safe to
				2233	* decrement rcu_barrier_cpu_count -- otherwise the first CPU
				2234	* might complete its grace period before all of the other CPUs
				2235	* did their increment, causing this function to return too
				2236	* early. Note that on_each_cpu() disables irqs, which prevents
				2237	* any CPUs from coming online or going offline until each online
				2238	* CPU has queued its RCU-barrier callback.
				2239	*/
				2240	atomic_set(&rcu_barrier_cpu_count, 1);
				2241	on_each_cpu(rcu_barrier_func, (void *)call_rcu_func, 1);
				2242	if (atomic_dec_and_test(&rcu_barrier_cpu_count))
				2243	complete(&rcu_barrier_completion);
				2244	wait_for_completion(&rcu_barrier_completion);
				2245	mutex_unlock(&rcu_barrier_mutex);
				2246	}
				2247
				2248	#ifndef CONFIG_PREEMPT_RT_FULL
				2249	/**
				2250	* rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete.
				2251	*/
				2252	void rcu_barrier_bh(void)
				2253	{
				2254	_rcu_barrier(&rcu_bh_state, call_rcu_bh);
				2255	}
				2256	EXPORT_SYMBOL_GPL(rcu_barrier_bh);
				2257	#endif
				2258
				2259	/**
				2260	* rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks.
				2261	*/
				2262	void rcu_barrier_sched(void)
				2263	{
				2264	_rcu_barrier(&rcu_sched_state, call_rcu_sched);
				2265	}
				2266	EXPORT_SYMBOL_GPL(rcu_barrier_sched);
				2267
				2268	/*
				2269	* Do boot-time initialization of a CPU's per-CPU RCU data.
				2270	*/
				2271	static void __init
				2272	rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
				2273	{
				2274	unsigned long flags;
				2275	int i;
				2276	struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
				2277	struct rcu_node *rnp = rcu_get_root(rsp);
				2278
				2279	/* Set up local state, ensuring consistent view of global state. */
				2280	raw_spin_lock_irqsave(&rnp->lock, flags);
				2281	rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo);
				2282	rdp->nxtlist = NULL;
				2283	for (i = 0; i < RCU_NEXT_SIZE; i++)
				2284	rdp->nxttail[i] = &rdp->nxtlist;
				2285	rdp->qlen_lazy = 0;
				2286	rdp->qlen = 0;
				2287	rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
				2288	WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE);
				2289	WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1);
				2290	rdp->cpu = cpu;
				2291	rdp->rsp = rsp;
				2292	raw_spin_unlock_irqrestore(&rnp->lock, flags);
				2293	}
				2294
				2295	/*
				2296	* Initialize a CPU's per-CPU RCU data. Note that only one online or
				2297	* offline event can be happening at a given time. Note also that we
				2298	* can accept some slop in the rsp->completed access due to the fact
				2299	* that this CPU cannot possibly have any RCU callbacks in flight yet.
				2300	*/
				2301	static void __cpuinit
				2302	rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
				2303	{
				2304	unsigned long flags;
				2305	unsigned long mask;
				2306	struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
				2307	struct rcu_node *rnp = rcu_get_root(rsp);
				2308
				2309	/* Set up local state, ensuring consistent view of global state. */
				2310	raw_spin_lock_irqsave(&rnp->lock, flags);
				2311	rdp->beenonline = 1; /* We have now been online. */
				2312	rdp->preemptible = preemptible;
				2313	rdp->qlen_last_fqs_check = 0;
				2314	rdp->n_force_qs_snap = rsp->n_force_qs;
				2315	rdp->blimit = blimit;
				2316	rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
				2317	atomic_set(&rdp->dynticks->dynticks,
				2318	(atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1);
				2319	rcu_prepare_for_idle_init(cpu);
				2320	raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
				2321
				2322	/*
				2323	* A new grace period might start here. If so, we won't be part
				2324	* of it, but that is OK, as we are currently in a quiescent state.
				2325	*/
				2326
				2327	/* Exclude any attempts to start a new GP on large systems. */
				2328	raw_spin_lock(&rsp->onofflock); /* irqs already disabled. */
				2329
				2330	/* Add CPU to rcu_node bitmasks. */
				2331	rnp = rdp->mynode;
				2332	mask = rdp->grpmask;
				2333	do {
				2334	/* Exclude any attempts to start a new GP on small systems. */
				2335	raw_spin_lock(&rnp->lock); /* irqs already disabled. */
				2336	rnp->qsmaskinit \|= mask;
				2337	mask = rnp->grpmask;
				2338	if (rnp == rdp->mynode) {
				2339	/*
				2340	* If there is a grace period in progress, we will
				2341	* set up to wait for it next time we run the
				2342	* RCU core code.
				2343	*/
				2344	rdp->gpnum = rnp->completed;
				2345	rdp->completed = rnp->completed;
				2346	rdp->passed_quiesce = 0;
				2347	rdp->qs_pending = 0;
				2348	rdp->passed_quiesce_gpnum = rnp->gpnum - 1;
				2349	trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpuonl");
				2350	}
				2351	raw_spin_unlock(&rnp->lock); /* irqs already disabled. */
				2352	rnp = rnp->parent;
				2353	} while (rnp != NULL && !(rnp->qsmaskinit & mask));
				2354
				2355	raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
				2356	}
				2357
				2358	static void __cpuinit rcu_prepare_cpu(int cpu)
				2359	{
				2360	rcu_init_percpu_data(cpu, &rcu_sched_state, 0);
				2361	rcu_init_percpu_data(cpu, &rcu_bh_state, 0);
				2362	rcu_preempt_init_percpu_data(cpu);
				2363	}
				2364
				2365	/*
				2366	* Handle CPU online/offline notification events.
				2367	*/
				2368	static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
				2369	unsigned long action, void *hcpu)
				2370	{
				2371	long cpu = (long)hcpu;
				2372	struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
				2373	struct rcu_node *rnp = rdp->mynode;
				2374
				2375	trace_rcu_utilization("Start CPU hotplug");
				2376	switch (action) {
				2377	case CPU_UP_PREPARE:
				2378	case CPU_UP_PREPARE_FROZEN:
				2379	rcu_prepare_cpu(cpu);
				2380	rcu_prepare_kthreads(cpu);
				2381	break;
				2382	case CPU_ONLINE:
				2383	case CPU_DOWN_FAILED:
				2384	rcu_node_kthread_setaffinity(rnp, -1);
				2385	rcu_cpu_kthread_setrt(cpu, 1);
				2386	break;
				2387	case CPU_DOWN_PREPARE:
				2388	rcu_node_kthread_setaffinity(rnp, cpu);
				2389	rcu_cpu_kthread_setrt(cpu, 0);
				2390	break;
				2391	case CPU_DYING:
				2392	case CPU_DYING_FROZEN:
				2393	/*
				2394	* The whole machine is "stopped" except this CPU, so we can
				2395	* touch any data without introducing corruption. We send the
				2396	* dying CPU's callbacks to an arbitrarily chosen online CPU.
				2397	*/
				2398	rcu_cleanup_dying_cpu(&rcu_bh_state);
				2399	rcu_cleanup_dying_cpu(&rcu_sched_state);
				2400	rcu_preempt_cleanup_dying_cpu();
				2401	rcu_cleanup_after_idle(cpu);
				2402	break;
				2403	case CPU_DEAD:
				2404	case CPU_DEAD_FROZEN:
				2405	case CPU_UP_CANCELED:
				2406	case CPU_UP_CANCELED_FROZEN:
				2407	rcu_cleanup_dead_cpu(cpu, &rcu_bh_state);
				2408	rcu_cleanup_dead_cpu(cpu, &rcu_sched_state);
				2409	rcu_preempt_cleanup_dead_cpu(cpu);
				2410	break;
				2411	default:
				2412	break;
				2413	}
				2414	trace_rcu_utilization("End CPU hotplug");
				2415	return NOTIFY_OK;
				2416	}
				2417
				2418	/*
				2419	* This function is invoked towards the end of the scheduler's initialization
				2420	* process. Before this is called, the idle task might contain
				2421	* RCU read-side critical sections (during which time, this idle
				2422	* task is booting the system). After this function is called, the
				2423	* idle tasks are prohibited from containing RCU read-side critical
				2424	* sections. This function also enables RCU lockdep checking.
				2425	*/
				2426	void rcu_scheduler_starting(void)
				2427	{
				2428	WARN_ON(num_online_cpus() != 1);
				2429	WARN_ON(nr_context_switches() > 0);
				2430	rcu_scheduler_active = 1;
				2431	}
				2432
				2433	/*
				2434	* Compute the per-level fanout, either using the exact fanout specified
				2435	* or balancing the tree, depending on CONFIG_RCU_FANOUT_EXACT.
				2436	*/
				2437	#ifdef CONFIG_RCU_FANOUT_EXACT
				2438	static void __init rcu_init_levelspread(struct rcu_state *rsp)
				2439	{
				2440	int i;
				2441
				2442	for (i = NUM_RCU_LVLS - 1; i > 0; i--)
				2443	rsp->levelspread[i] = CONFIG_RCU_FANOUT;
				2444	rsp->levelspread[0] = RCU_FANOUT_LEAF;
				2445	}
				2446	#else /* #ifdef CONFIG_RCU_FANOUT_EXACT */
				2447	static void __init rcu_init_levelspread(struct rcu_state *rsp)
				2448	{
				2449	int ccur;
				2450	int cprv;
				2451	int i;
				2452
				2453	cprv = NR_CPUS;
				2454	for (i = NUM_RCU_LVLS - 1; i >= 0; i--) {
				2455	ccur = rsp->levelcnt[i];
				2456	rsp->levelspread[i] = (cprv + ccur - 1) / ccur;
				2457	cprv = ccur;
				2458	}
				2459	}
				2460	#endif /* #else #ifdef CONFIG_RCU_FANOUT_EXACT */
				2461
				2462	/*
				2463	* Helper function for rcu_init() that initializes one rcu_state structure.
				2464	*/
				2465	static void __init rcu_init_one(struct rcu_state *rsp,
				2466	struct rcu_data __percpu *rda)
				2467	{
				2468	static char *buf[] = { "rcu_node_level_0",
				2469	"rcu_node_level_1",
				2470	"rcu_node_level_2",
				2471	"rcu_node_level_3" }; /* Match MAX_RCU_LVLS */
				2472	int cpustride = 1;
				2473	int i;
				2474	int j;
				2475	struct rcu_node *rnp;
				2476
				2477	BUILD_BUG_ON(MAX_RCU_LVLS > ARRAY_SIZE(buf)); /* Fix buf[] init! */
				2478
				2479	/* Initialize the level-tracking arrays. */
				2480
				2481	for (i = 1; i < NUM_RCU_LVLS; i++)
				2482	rsp->level[i] = rsp->level[i - 1] + rsp->levelcnt[i - 1];
				2483	rcu_init_levelspread(rsp);
				2484
				2485	/* Initialize the elements themselves, starting from the leaves. */
				2486
				2487	for (i = NUM_RCU_LVLS - 1; i >= 0; i--) {
				2488	cpustride *= rsp->levelspread[i];
				2489	rnp = rsp->level[i];
				2490	for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) {
				2491	raw_spin_lock_init(&rnp->lock);
				2492	lockdep_set_class_and_name(&rnp->lock,
				2493	&rcu_node_class[i], buf[i]);
				2494	rnp->gpnum = 0;
				2495	rnp->qsmask = 0;
				2496	rnp->qsmaskinit = 0;
				2497	rnp->grplo = j * cpustride;
				2498	rnp->grphi = (j + 1) * cpustride - 1;
				2499	if (rnp->grphi >= NR_CPUS)
				2500	rnp->grphi = NR_CPUS - 1;
				2501	if (i == 0) {
				2502	rnp->grpnum = 0;
				2503	rnp->grpmask = 0;
				2504	rnp->parent = NULL;
				2505	} else {
				2506	rnp->grpnum = j % rsp->levelspread[i - 1];
				2507	rnp->grpmask = 1UL << rnp->grpnum;
				2508	rnp->parent = rsp->level[i - 1] +
				2509	j / rsp->levelspread[i - 1];
				2510	}
				2511	rnp->level = i;
				2512	INIT_LIST_HEAD(&rnp->blkd_tasks);
				2513	}
				2514	}
				2515
				2516	rsp->rda = rda;
				2517	rnp = rsp->level[NUM_RCU_LVLS - 1];
				2518	for_each_possible_cpu(i) {
				2519	while (i > rnp->grphi)
				2520	rnp++;
				2521	per_cpu_ptr(rsp->rda, i)->mynode = rnp;
				2522	rcu_boot_init_percpu_data(i, rsp);
				2523	}
				2524	}
				2525
				2526	void __init rcu_init(void)
				2527	{
				2528	int cpu;
				2529
				2530	rcu_bootup_announce();
				2531	rcu_init_one(&rcu_sched_state, &rcu_sched_data);
				2532	rcu_init_one(&rcu_bh_state, &rcu_bh_data);
				2533	__rcu_init_preempt();
				2534	open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
				2535
				2536	/*
				2537	* We don't need protection against CPU-hotplug here because
				2538	* this is called early in boot, before either interrupts
				2539	* or the scheduler are operational.
				2540	*/
				2541	cpu_notifier(rcu_cpu_notify, 0);
				2542	for_each_online_cpu(cpu)
				2543	rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu);
				2544	check_cpu_stall_init();
				2545	}
				2546
				2547	#include "rcutree_plugin.h"