Blame - ap/os/linux/linux-3.4.x/kernel/timer.c - T106_DC

blob: 4fef5e93fe7b077cc3b030287f237c7f8290ae04 [file] [log] [blame]

lh	9ed821d	2023-04-07 01:36:19 -0700	[diff] [blame^]	1	/*
				2	* linux/kernel/timer.c
				3	*
				4	* Kernel internal timers, basic process system calls
				5	*
				6	* Copyright (C) 1991, 1992 Linus Torvalds
				7	*
				8	* 1997-01-28 Modified by Finn Arne Gangstad to make timers scale better.
				9	*
				10	* 1997-09-10 Updated NTP code according to technical memorandum Jan '96
				11	* "A Kernel Model for Precision Timekeeping" by Dave Mills
				12	* 1998-12-24 Fixed a xtime SMP race (we need the xtime_lock rw spinlock to
				13	* serialize accesses to xtime/lost_ticks).
				14	* Copyright (C) 1998 Andrea Arcangeli
				15	* 1999-03-10 Improved NTP compatibility by Ulrich Windl
				16	* 2002-05-31 Move sys_sysinfo here and make its locking sane, Robert Love
				17	* 2000-10-05 Implemented scalable SMP per-CPU timer handling.
				18	* Copyright (C) 2000, 2001, 2002 Ingo Molnar
				19	* Designed by David S. Miller, Alexey Kuznetsov and Ingo Molnar
				20	*/
				21
				22	#include <linux/kernel_stat.h>
				23	#include <linux/export.h>
				24	#include <linux/interrupt.h>
				25	#include <linux/percpu.h>
				26	#include <linux/init.h>
				27	#include <linux/mm.h>
				28	#include <linux/swap.h>
				29	#include <linux/pid_namespace.h>
				30	#include <linux/notifier.h>
				31	#include <linux/thread_info.h>
				32	#include <linux/time.h>
				33	#include <linux/jiffies.h>
				34	#include <linux/posix-timers.h>
				35	#include <linux/cpu.h>
				36	#include <linux/syscalls.h>
				37	#include <linux/delay.h>
				38	#include <linux/tick.h>
				39	#include <linux/kallsyms.h>
				40	#include <linux/irq_work.h>
				41	#include <linux/sched.h>
				42	#include <linux/slab.h>
				43
				44	#include <asm/uaccess.h>
				45	#include <asm/unistd.h>
				46	#include <asm/div64.h>
				47	#include <asm/timex.h>
				48	#include <asm/io.h>
				49
				50	#define CREATE_TRACE_POINTS
				51	#include <trace/events/timer.h>
				52
				53	u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES;
				54
				55	EXPORT_SYMBOL(jiffies_64);
				56
				57	/*
				58	* per-CPU timer vector definitions:
				59	*/
				60	#define TVN_BITS (CONFIG_BASE_SMALL ? 4 : 6)
				61	#define TVR_BITS (CONFIG_BASE_SMALL ? 6 : 8)
				62	#define TVN_SIZE (1 << TVN_BITS)
				63	#define TVR_SIZE (1 << TVR_BITS)
				64	#define TVN_MASK (TVN_SIZE - 1)
				65	#define TVR_MASK (TVR_SIZE - 1)
				66	#define MAX_TVAL ((unsigned long)((1ULL << (TVR_BITS + 4*TVN_BITS)) - 1))
				67
				68	struct tvec {
				69	struct list_head vec[TVN_SIZE];
				70	};
				71
				72	struct tvec_root {
				73	struct list_head vec[TVR_SIZE];
				74	};
				75
				76	struct tvec_base {
				77	spinlock_t lock;
				78	struct timer_list *running_timer;
				79	#ifdef CONFIG_PREEMPT_RT_FULL
				80	wait_queue_head_t wait_for_running_timer;
				81	#endif
				82	unsigned long timer_jiffies;
				83	unsigned long next_timer;
				84	struct tvec_root tv1;
				85	struct tvec tv2;
				86	struct tvec tv3;
				87	struct tvec tv4;
				88	struct tvec tv5;
				89	} ____cacheline_aligned;
				90
				91	struct tvec_base boot_tvec_bases;
				92	EXPORT_SYMBOL(boot_tvec_bases);
				93	static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases;
				94
				95	/* Functions below help us manage 'deferrable' flag */
				96	static inline unsigned int tbase_get_deferrable(struct tvec_base *base)
				97	{
				98	return ((unsigned int)(unsigned long)base & TBASE_DEFERRABLE_FLAG);
				99	}
				100
				101	static inline struct tvec_base tbase_get_base(struct tvec_base base)
				102	{
				103	return ((struct tvec_base *)((unsigned long)base & ~TBASE_DEFERRABLE_FLAG));
				104	}
				105
				106	static inline void timer_set_deferrable(struct timer_list *timer)
				107	{
				108	timer->base = TBASE_MAKE_DEFERRED(timer->base);
				109	}
				110
				111	static inline void
				112	timer_set_base(struct timer_list timer, struct tvec_base new_base)
				113	{
				114	timer->base = (struct tvec_base *)((unsigned long)(new_base) \|
				115	tbase_get_deferrable(timer->base));
				116	}
				117
				118	static unsigned long round_jiffies_common(unsigned long j, int cpu,
				119	bool force_up)
				120	{
				121	int rem;
				122	unsigned long original = j;
				123
				124	/*
				125	* We don't want all cpus firing their timers at once hitting the
				126	* same lock or cachelines, so we skew each extra cpu with an extra
				127	* 3 jiffies. This 3 jiffies came originally from the mm/ code which
				128	* already did this.
				129	* The skew is done by adding 3*cpunr, then round, then subtract this
				130	* extra offset again.
				131	*/
				132	j += cpu * 3;
				133
				134	rem = j % HZ;
				135
				136	/*
				137	* If the target jiffie is just after a whole second (which can happen
				138	* due to delays of the timer irq, long irq off times etc etc) then
				139	* we should round down to the whole second, not up. Use 1/4th second
				140	* as cutoff for this rounding as an extreme upper bound for this.
				141	* But never round down if @force_up is set.
				142	*/
				143	if (rem < HZ/4 && !force_up) /* round down */
				144	j = j - rem;
				145	else /* round up */
				146	j = j - rem + HZ;
				147
				148	/* now that we have rounded, subtract the extra skew again */
				149	j -= cpu * 3;
				150
				151	/*
				152	* Make sure j is still in the future. Otherwise return the
				153	* unmodified value.
				154	*/
				155	return time_is_after_jiffies(j) ? j : original;
				156	}
				157
				158	/**
				159	* __round_jiffies - function to round jiffies to a full second
				160	* @j: the time in (absolute) jiffies that should be rounded
				161	* @cpu: the processor number on which the timeout will happen
				162	*
				163	* __round_jiffies() rounds an absolute time in the future (in jiffies)
				164	* up or down to (approximately) full seconds. This is useful for timers
				165	* for which the exact time they fire does not matter too much, as long as
				166	* they fire approximately every X seconds.
				167	*
				168	* By rounding these timers to whole seconds, all such timers will fire
				169	* at the same time, rather than at various times spread out. The goal
				170	* of this is to have the CPU wake up less, which saves power.
				171	*
				172	* The exact rounding is skewed for each processor to avoid all
				173	* processors firing at the exact same time, which could lead
				174	* to lock contention or spurious cache line bouncing.
				175	*
				176	* The return value is the rounded version of the @j parameter.
				177	*/
				178	unsigned long __round_jiffies(unsigned long j, int cpu)
				179	{
				180	return round_jiffies_common(j, cpu, false);
				181	}
				182	EXPORT_SYMBOL_GPL(__round_jiffies);
				183
				184	/**
				185	* __round_jiffies_relative - function to round jiffies to a full second
				186	* @j: the time in (relative) jiffies that should be rounded
				187	* @cpu: the processor number on which the timeout will happen
				188	*
				189	* __round_jiffies_relative() rounds a time delta in the future (in jiffies)
				190	* up or down to (approximately) full seconds. This is useful for timers
				191	* for which the exact time they fire does not matter too much, as long as
				192	* they fire approximately every X seconds.
				193	*
				194	* By rounding these timers to whole seconds, all such timers will fire
				195	* at the same time, rather than at various times spread out. The goal
				196	* of this is to have the CPU wake up less, which saves power.
				197	*
				198	* The exact rounding is skewed for each processor to avoid all
				199	* processors firing at the exact same time, which could lead
				200	* to lock contention or spurious cache line bouncing.
				201	*
				202	* The return value is the rounded version of the @j parameter.
				203	*/
				204	unsigned long __round_jiffies_relative(unsigned long j, int cpu)
				205	{
				206	unsigned long j0 = jiffies;
				207
				208	/* Use j0 because jiffies might change while we run */
				209	return round_jiffies_common(j + j0, cpu, false) - j0;
				210	}
				211	EXPORT_SYMBOL_GPL(__round_jiffies_relative);
				212
				213	/**
				214	* round_jiffies - function to round jiffies to a full second
				215	* @j: the time in (absolute) jiffies that should be rounded
				216	*
				217	* round_jiffies() rounds an absolute time in the future (in jiffies)
				218	* up or down to (approximately) full seconds. This is useful for timers
				219	* for which the exact time they fire does not matter too much, as long as
				220	* they fire approximately every X seconds.
				221	*
				222	* By rounding these timers to whole seconds, all such timers will fire
				223	* at the same time, rather than at various times spread out. The goal
				224	* of this is to have the CPU wake up less, which saves power.
				225	*
				226	* The return value is the rounded version of the @j parameter.
				227	*/
				228	unsigned long round_jiffies(unsigned long j)
				229	{
				230	return round_jiffies_common(j, raw_smp_processor_id(), false);
				231	}
				232	EXPORT_SYMBOL_GPL(round_jiffies);
				233
				234	/**
				235	* round_jiffies_relative - function to round jiffies to a full second
				236	* @j: the time in (relative) jiffies that should be rounded
				237	*
				238	* round_jiffies_relative() rounds a time delta in the future (in jiffies)
				239	* up or down to (approximately) full seconds. This is useful for timers
				240	* for which the exact time they fire does not matter too much, as long as
				241	* they fire approximately every X seconds.
				242	*
				243	* By rounding these timers to whole seconds, all such timers will fire
				244	* at the same time, rather than at various times spread out. The goal
				245	* of this is to have the CPU wake up less, which saves power.
				246	*
				247	* The return value is the rounded version of the @j parameter.
				248	*/
				249	unsigned long round_jiffies_relative(unsigned long j)
				250	{
				251	return __round_jiffies_relative(j, raw_smp_processor_id());
				252	}
				253	EXPORT_SYMBOL_GPL(round_jiffies_relative);
				254
				255	/**
				256	* __round_jiffies_up - function to round jiffies up to a full second
				257	* @j: the time in (absolute) jiffies that should be rounded
				258	* @cpu: the processor number on which the timeout will happen
				259	*
				260	* This is the same as __round_jiffies() except that it will never
				261	* round down. This is useful for timeouts for which the exact time
				262	* of firing does not matter too much, as long as they don't fire too
				263	* early.
				264	*/
				265	unsigned long __round_jiffies_up(unsigned long j, int cpu)
				266	{
				267	return round_jiffies_common(j, cpu, true);
				268	}
				269	EXPORT_SYMBOL_GPL(__round_jiffies_up);
				270
				271	/**
				272	* __round_jiffies_up_relative - function to round jiffies up to a full second
				273	* @j: the time in (relative) jiffies that should be rounded
				274	* @cpu: the processor number on which the timeout will happen
				275	*
				276	* This is the same as __round_jiffies_relative() except that it will never
				277	* round down. This is useful for timeouts for which the exact time
				278	* of firing does not matter too much, as long as they don't fire too
				279	* early.
				280	*/
				281	unsigned long __round_jiffies_up_relative(unsigned long j, int cpu)
				282	{
				283	unsigned long j0 = jiffies;
				284
				285	/* Use j0 because jiffies might change while we run */
				286	return round_jiffies_common(j + j0, cpu, true) - j0;
				287	}
				288	EXPORT_SYMBOL_GPL(__round_jiffies_up_relative);
				289
				290	/**
				291	* round_jiffies_up - function to round jiffies up to a full second
				292	* @j: the time in (absolute) jiffies that should be rounded
				293	*
				294	* This is the same as round_jiffies() except that it will never
				295	* round down. This is useful for timeouts for which the exact time
				296	* of firing does not matter too much, as long as they don't fire too
				297	* early.
				298	*/
				299	unsigned long round_jiffies_up(unsigned long j)
				300	{
				301	return round_jiffies_common(j, raw_smp_processor_id(), true);
				302	}
				303	EXPORT_SYMBOL_GPL(round_jiffies_up);
				304
				305	/**
				306	* round_jiffies_up_relative - function to round jiffies up to a full second
				307	* @j: the time in (relative) jiffies that should be rounded
				308	*
				309	* This is the same as round_jiffies_relative() except that it will never
				310	* round down. This is useful for timeouts for which the exact time
				311	* of firing does not matter too much, as long as they don't fire too
				312	* early.
				313	*/
				314	unsigned long round_jiffies_up_relative(unsigned long j)
				315	{
				316	return __round_jiffies_up_relative(j, raw_smp_processor_id());
				317	}
				318	EXPORT_SYMBOL_GPL(round_jiffies_up_relative);
				319
				320	/**
				321	* set_timer_slack - set the allowed slack for a timer
				322	* @timer: the timer to be modified
				323	* @slack_hz: the amount of time (in jiffies) allowed for rounding
				324	*
				325	* Set the amount of time, in jiffies, that a certain timer has
				326	* in terms of slack. By setting this value, the timer subsystem
				327	* will schedule the actual timer somewhere between
				328	* the time mod_timer() asks for, and that time plus the slack.
				329	*
				330	* By setting the slack to -1, a percentage of the delay is used
				331	* instead.
				332	*/
				333	void set_timer_slack(struct timer_list *timer, int slack_hz)
				334	{
				335	timer->slack = slack_hz;
				336	}
				337	EXPORT_SYMBOL_GPL(set_timer_slack);
				338
				339	static void internal_add_timer(struct tvec_base base, struct timer_list timer)
				340	{
				341	unsigned long expires = timer->expires;
				342	unsigned long idx = expires - base->timer_jiffies;
				343	struct list_head *vec;
				344
				345	if (idx < TVR_SIZE) {
				346	int i = expires & TVR_MASK;
				347	vec = base->tv1.vec + i;
				348	} else if (idx < 1 << (TVR_BITS + TVN_BITS)) {
				349	int i = (expires >> TVR_BITS) & TVN_MASK;
				350	vec = base->tv2.vec + i;
				351	} else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) {
				352	int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK;
				353	vec = base->tv3.vec + i;
				354	} else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) {
				355	int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK;
				356	vec = base->tv4.vec + i;
				357	} else if ((signed long) idx < 0) {
				358	/*
				359	* Can happen if you add a timer with expires == jiffies,
				360	* or you set a timer to go off in the past
				361	*/
				362	vec = base->tv1.vec + (base->timer_jiffies & TVR_MASK);
				363	} else {
				364	int i;
				365	/* If the timeout is larger than MAX_TVAL (on 64-bit
				366	* architectures or with CONFIG_BASE_SMALL=1) then we
				367	* use the maximum timeout.
				368	*/
				369	if (idx > MAX_TVAL) {
				370	idx = MAX_TVAL;
				371	expires = idx + base->timer_jiffies;
				372	}
				373	i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
				374	vec = base->tv5.vec + i;
				375	}
				376	/*
				377	* Timers are FIFO:
				378	*/
				379	list_add_tail(&timer->entry, vec);
				380	}
				381
				382	#ifdef CONFIG_TIMER_STATS
				383	void __timer_stats_timer_set_start_info(struct timer_list timer, void addr)
				384	{
				385	if (timer->start_site)
				386	return;
				387
				388	timer->start_site = addr;
				389	memcpy(timer->start_comm, current->comm, TASK_COMM_LEN);
				390	timer->start_pid = current->pid;
				391	}
				392
				393	static void timer_stats_account_timer(struct timer_list *timer)
				394	{
				395	unsigned int flag = 0;
				396
				397	if (likely(!timer->start_site))
				398	return;
				399	if (unlikely(tbase_get_deferrable(timer->base)))
				400	flag \|= TIMER_STATS_FLAG_DEFERRABLE;
				401
				402	timer_stats_update_stats(timer, timer->start_pid, timer->start_site,
				403	timer->function, timer->start_comm, flag);
				404	}
				405
				406	#else
				407	static void timer_stats_account_timer(struct timer_list *timer) {}
				408	#endif
				409
				410	#ifdef CONFIG_DEBUG_OBJECTS_TIMERS
				411
				412	static struct debug_obj_descr timer_debug_descr;
				413
				414	static void timer_debug_hint(void addr)
				415	{
				416	return ((struct timer_list *) addr)->function;
				417	}
				418
				419	/*
				420	* fixup_init is called when:
				421	* - an active object is initialized
				422	*/
				423	static int timer_fixup_init(void *addr, enum debug_obj_state state)
				424	{
				425	struct timer_list *timer = addr;
				426
				427	switch (state) {
				428	case ODEBUG_STATE_ACTIVE:
				429	del_timer_sync(timer);
				430	debug_object_init(timer, &timer_debug_descr);
				431	return 1;
				432	default:
				433	return 0;
				434	}
				435	}
				436
				437	/* Stub timer callback for improperly used timers. */
				438	static void stub_timer(unsigned long data)
				439	{
				440	WARN_ON(1);
				441	}
				442
				443	/*
				444	* fixup_activate is called when:
				445	* - an active object is activated
				446	* - an unknown object is activated (might be a statically initialized object)
				447	*/
				448	static int timer_fixup_activate(void *addr, enum debug_obj_state state)
				449	{
				450	struct timer_list *timer = addr;
				451
				452	switch (state) {
				453
				454	case ODEBUG_STATE_NOTAVAILABLE:
				455	/*
				456	* This is not really a fixup. The timer was
				457	* statically initialized. We just make sure that it
				458	* is tracked in the object tracker.
				459	*/
				460	if (timer->entry.next == NULL &&
				461	timer->entry.prev == TIMER_ENTRY_STATIC) {
				462	debug_object_init(timer, &timer_debug_descr);
				463	debug_object_activate(timer, &timer_debug_descr);
				464	return 0;
				465	} else {
				466	setup_timer(timer, stub_timer, 0);
				467	return 1;
				468	}
				469	return 0;
				470
				471	case ODEBUG_STATE_ACTIVE:
				472	WARN_ON(1);
				473
				474	default:
				475	return 0;
				476	}
				477	}
				478
				479	/*
				480	* fixup_free is called when:
				481	* - an active object is freed
				482	*/
				483	static int timer_fixup_free(void *addr, enum debug_obj_state state)
				484	{
				485	struct timer_list *timer = addr;
				486
				487	switch (state) {
				488	case ODEBUG_STATE_ACTIVE:
				489	del_timer_sync(timer);
				490	debug_object_free(timer, &timer_debug_descr);
				491	return 1;
				492	default:
				493	return 0;
				494	}
				495	}
				496
				497	/*
				498	* fixup_assert_init is called when:
				499	* - an untracked/uninit-ed object is found
				500	*/
				501	static int timer_fixup_assert_init(void *addr, enum debug_obj_state state)
				502	{
				503	struct timer_list *timer = addr;
				504
				505	switch (state) {
				506	case ODEBUG_STATE_NOTAVAILABLE:
				507	if (timer->entry.prev == TIMER_ENTRY_STATIC) {
				508	/*
				509	* This is not really a fixup. The timer was
				510	* statically initialized. We just make sure that it
				511	* is tracked in the object tracker.
				512	*/
				513	debug_object_init(timer, &timer_debug_descr);
				514	return 0;
				515	} else {
				516	setup_timer(timer, stub_timer, 0);
				517	return 1;
				518	}
				519	default:
				520	return 0;
				521	}
				522	}
				523
				524	static struct debug_obj_descr timer_debug_descr = {
				525	.name = "timer_list",
				526	.debug_hint = timer_debug_hint,
				527	.fixup_init = timer_fixup_init,
				528	.fixup_activate = timer_fixup_activate,
				529	.fixup_free = timer_fixup_free,
				530	.fixup_assert_init = timer_fixup_assert_init,
				531	};
				532
				533	static inline void debug_timer_init(struct timer_list *timer)
				534	{
				535	debug_object_init(timer, &timer_debug_descr);
				536	}
				537
				538	static inline void debug_timer_activate(struct timer_list *timer)
				539	{
				540	debug_object_activate(timer, &timer_debug_descr);
				541	}
				542
				543	static inline void debug_timer_deactivate(struct timer_list *timer)
				544	{
				545	debug_object_deactivate(timer, &timer_debug_descr);
				546	}
				547
				548	static inline void debug_timer_free(struct timer_list *timer)
				549	{
				550	debug_object_free(timer, &timer_debug_descr);
				551	}
				552
				553	static inline void debug_timer_assert_init(struct timer_list *timer)
				554	{
				555	debug_object_assert_init(timer, &timer_debug_descr);
				556	}
				557
				558	static void __init_timer(struct timer_list *timer,
				559	const char *name,
				560	struct lock_class_key *key);
				561
				562	void init_timer_on_stack_key(struct timer_list *timer,
				563	const char *name,
				564	struct lock_class_key *key)
				565	{
				566	debug_object_init_on_stack(timer, &timer_debug_descr);
				567	__init_timer(timer, name, key);
				568	}
				569	EXPORT_SYMBOL_GPL(init_timer_on_stack_key);
				570
				571	void destroy_timer_on_stack(struct timer_list *timer)
				572	{
				573	debug_object_free(timer, &timer_debug_descr);
				574	}
				575	EXPORT_SYMBOL_GPL(destroy_timer_on_stack);
				576
				577	#else
				578	static inline void debug_timer_init(struct timer_list *timer) { }
				579	static inline void debug_timer_activate(struct timer_list *timer) { }
				580	static inline void debug_timer_deactivate(struct timer_list *timer) { }
				581	static inline void debug_timer_assert_init(struct timer_list *timer) { }
				582	#endif
				583
				584	static inline void debug_init(struct timer_list *timer)
				585	{
				586	debug_timer_init(timer);
				587	trace_timer_init(timer);
				588	}
				589
				590	static inline void
				591	debug_activate(struct timer_list *timer, unsigned long expires)
				592	{
				593	debug_timer_activate(timer);
				594	trace_timer_start(timer, expires);
				595	}
				596
				597	static inline void debug_deactivate(struct timer_list *timer)
				598	{
				599	debug_timer_deactivate(timer);
				600	trace_timer_cancel(timer);
				601	}
				602
				603	static inline void debug_assert_init(struct timer_list *timer)
				604	{
				605	debug_timer_assert_init(timer);
				606	}
				607
				608	static void __init_timer(struct timer_list *timer,
				609	const char *name,
				610	struct lock_class_key *key)
				611	{
				612	timer->entry.next = NULL;
				613	timer->base = __raw_get_cpu_var(tvec_bases);
				614	timer->slack = -1;
				615	#ifdef CONFIG_TIMER_STATS
				616	timer->start_site = NULL;
				617	timer->start_pid = -1;
				618	memset(timer->start_comm, 0, TASK_COMM_LEN);
				619	#endif
				620	lockdep_init_map(&timer->lockdep_map, name, key, 0);
				621	}
				622
				623	void setup_deferrable_timer_on_stack_key(struct timer_list *timer,
				624	const char *name,
				625	struct lock_class_key *key,
				626	void (*function)(unsigned long),
				627	unsigned long data)
				628	{
				629	timer->function = function;
				630	timer->data = data;
				631	init_timer_on_stack_key(timer, name, key);
				632	timer_set_deferrable(timer);
				633	}
				634	EXPORT_SYMBOL_GPL(setup_deferrable_timer_on_stack_key);
				635
				636	/**
				637	* init_timer_key - initialize a timer
				638	* @timer: the timer to be initialized
				639	* @name: name of the timer
				640	* @key: lockdep class key of the fake lock used for tracking timer
				641	* sync lock dependencies
				642	*
				643	* init_timer_key() must be done to a timer prior calling any of the
				644	* other timer functions.
				645	*/
				646	void init_timer_key(struct timer_list *timer,
				647	const char *name,
				648	struct lock_class_key *key)
				649	{
				650	debug_init(timer);
				651	__init_timer(timer, name, key);
				652	}
				653	EXPORT_SYMBOL(init_timer_key);
				654
				655	void init_timer_deferrable_key(struct timer_list *timer,
				656	const char *name,
				657	struct lock_class_key *key)
				658	{
				659	init_timer_key(timer, name, key);
				660	timer_set_deferrable(timer);
				661	}
				662	EXPORT_SYMBOL(init_timer_deferrable_key);
				663
				664	static inline void detach_timer(struct timer_list *timer,
				665	int clear_pending)
				666	{
				667	struct list_head *entry = &timer->entry;
				668
				669	debug_deactivate(timer);
				670
				671	__list_del(entry->prev, entry->next);
				672	if (clear_pending)
				673	entry->next = NULL;
				674	entry->prev = LIST_POISON2;
				675	}
				676
				677	/*
				678	* We are using hashed locking: holding per_cpu(tvec_bases).lock
				679	* means that all timers which are tied to this base via timer->base are
				680	* locked, and the base itself is locked too.
				681	*
				682	* So __run_timers/migrate_timers can safely modify all timers which could
				683	* be found on ->tvX lists.
				684	*
				685	* When the timer's base is locked, and the timer removed from list, it is
				686	* possible to set timer->base = NULL and drop the lock: the timer remains
				687	* locked.
				688	*/
				689	static struct tvec_base lock_timer_base(struct timer_list timer,
				690	unsigned long *flags)
				691	__acquires(timer->base->lock)
				692	{
				693	struct tvec_base *base;
				694
				695	for (;;) {
				696	struct tvec_base *prelock_base = timer->base;
				697	base = tbase_get_base(prelock_base);
				698	if (likely(base != NULL)) {
				699	spin_lock_irqsave(&base->lock, *flags);
				700	if (likely(prelock_base == timer->base))
				701	return base;
				702	/* The timer has migrated to another CPU */
				703	spin_unlock_irqrestore(&base->lock, *flags);
				704	}
				705	cpu_relax();
				706	}
				707	}
				708
				709	#ifndef CONFIG_PREEMPT_RT_FULL
				710	static inline struct tvec_base switch_timer_base(struct timer_list timer,
				711	struct tvec_base *old,
				712	struct tvec_base *new)
				713	{
				714	/* See the comment in lock_timer_base() */
				715	timer_set_base(timer, NULL);
				716	spin_unlock(&old->lock);
				717	spin_lock(&new->lock);
				718	timer_set_base(timer, new);
				719	return new;
				720	}
				721	#else
				722	static inline struct tvec_base switch_timer_base(struct timer_list timer,
				723	struct tvec_base *old,
				724	struct tvec_base *new)
				725	{
				726	/*
				727	* We cannot do the above because we might be preempted and
				728	* then the preempter would see NULL and loop forever.
				729	*/
				730	if (spin_trylock(&new->lock)) {
				731	timer_set_base(timer, new);
				732	spin_unlock(&old->lock);
				733	return new;
				734	}
				735	return old;
				736	}
				737	#endif
				738
				739	static inline int
				740	__mod_timer(struct timer_list *timer, unsigned long expires,
				741	bool pending_only, int pinned)
				742	{
				743	struct tvec_base base, new_base;
				744	unsigned long flags;
				745	int ret = 0 , cpu;
				746
				747	timer_stats_timer_set_start_info(timer);
				748	BUG_ON(!timer->function);
				749
				750	base = lock_timer_base(timer, &flags);
				751
				752	if (timer_pending(timer)) {
				753	detach_timer(timer, 0);
				754	if (timer->expires == base->next_timer &&
				755	!tbase_get_deferrable(timer->base))
				756	base->next_timer = base->timer_jiffies;
				757	ret = 1;
				758	} else {
				759	if (pending_only)
				760	goto out_unlock;
				761	}
				762
				763	debug_activate(timer, expires);
				764
				765	preempt_disable_rt();
				766	cpu = smp_processor_id();
				767
				768	#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
				769	if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu))
				770	cpu = get_nohz_timer_target();
				771	#endif
				772	preempt_enable_rt();
				773
				774	new_base = per_cpu(tvec_bases, cpu);
				775
				776	if (base != new_base) {
				777	/*
				778	* We are trying to schedule the timer on the local CPU.
				779	* However we can't change timer's base while it is running,
				780	* otherwise del_timer_sync() can't detect that the timer's
				781	* handler yet has not finished. This also guarantees that
				782	* the timer is serialized wrt itself.
				783	*/
				784	if (likely(base->running_timer != timer))
				785	base = switch_timer_base(timer, base, new_base);
				786	}
				787
				788	timer->expires = expires;
				789	if (time_before(timer->expires, base->next_timer) &&
				790	!tbase_get_deferrable(timer->base))
				791	base->next_timer = timer->expires;
				792	internal_add_timer(base, timer);
				793
				794	out_unlock:
				795	spin_unlock_irqrestore(&base->lock, flags);
				796
				797	return ret;
				798	}
				799
				800	/**
				801	* mod_timer_pending - modify a pending timer's timeout
				802	* @timer: the pending timer to be modified
				803	* @expires: new timeout in jiffies
				804	*
				805	* mod_timer_pending() is the same for pending timers as mod_timer(),
				806	* but will not re-activate and modify already deleted timers.
				807	*
				808	* It is useful for unserialized use of timers.
				809	*/
				810	int mod_timer_pending(struct timer_list *timer, unsigned long expires)
				811	{
				812	return __mod_timer(timer, expires, true, TIMER_NOT_PINNED);
				813	}
				814	EXPORT_SYMBOL(mod_timer_pending);
				815
				816	/*
				817	* Decide where to put the timer while taking the slack into account
				818	*
				819	* Algorithm:
				820	* 1) calculate the maximum (absolute) time
				821	* 2) calculate the highest bit where the expires and new max are different
				822	* 3) use this bit to make a mask
				823	* 4) use the bitmask to round down the maximum time, so that all last
				824	* bits are zeros
				825	*/
				826	static inline
				827	unsigned long apply_slack(struct timer_list *timer, unsigned long expires)
				828	{
				829	unsigned long expires_limit, mask;
				830	int bit;
				831
				832	if (timer->slack >= 0) {
				833	expires_limit = expires + timer->slack;
				834	} else {
				835	long delta = expires - jiffies;
				836
				837	if (delta < 256)
				838	return expires;
				839
				840	expires_limit = expires + delta / 256;
				841	}
				842	mask = expires ^ expires_limit;
				843	if (mask == 0)
				844	return expires;
				845
				846	bit = find_last_bit(&mask, BITS_PER_LONG);
				847
				848	mask = (1UL << bit) - 1;
				849
				850	expires_limit = expires_limit & ~(mask);
				851
				852	return expires_limit;
				853	}
				854
				855	/**
				856	* mod_timer - modify a timer's timeout
				857	* @timer: the timer to be modified
				858	* @expires: new timeout in jiffies
				859	*
				860	* mod_timer() is a more efficient way to update the expire field of an
				861	* active timer (if the timer is inactive it will be activated)
				862	*
				863	* mod_timer(timer, expires) is equivalent to:
				864	*
				865	* del_timer(timer); timer->expires = expires; add_timer(timer);
				866	*
				867	* Note that if there are multiple unserialized concurrent users of the
				868	* same timer, then mod_timer() is the only safe way to modify the timeout,
				869	* since add_timer() cannot modify an already running timer.
				870	*
				871	* The function returns whether it has modified a pending timer or not.
				872	* (ie. mod_timer() of an inactive timer returns 0, mod_timer() of an
				873	* active timer returns 1.)
				874	*/
				875	int mod_timer(struct timer_list *timer, unsigned long expires)
				876	{
				877	expires = apply_slack(timer, expires);
				878
				879	/*
				880	* This is a common optimization triggered by the
				881	* networking code - if the timer is re-modified
				882	* to be the same thing then just return:
				883	*/
				884	if (timer_pending(timer) && timer->expires == expires)
				885	return 1;
				886
				887	return __mod_timer(timer, expires, false, TIMER_NOT_PINNED);
				888	}
				889	EXPORT_SYMBOL(mod_timer);
				890
				891	/**
				892	* mod_timer_pinned - modify a timer's timeout
				893	* @timer: the timer to be modified
				894	* @expires: new timeout in jiffies
				895	*
				896	* mod_timer_pinned() is a way to update the expire field of an
				897	* active timer (if the timer is inactive it will be activated)
				898	* and not allow the timer to be migrated to a different CPU.
				899	*
				900	* mod_timer_pinned(timer, expires) is equivalent to:
				901	*
				902	* del_timer(timer); timer->expires = expires; add_timer(timer);
				903	*/
				904	int mod_timer_pinned(struct timer_list *timer, unsigned long expires)
				905	{
				906	if (timer->expires == expires && timer_pending(timer))
				907	return 1;
				908
				909	return __mod_timer(timer, expires, false, TIMER_PINNED);
				910	}
				911	EXPORT_SYMBOL(mod_timer_pinned);
				912
				913	/**
				914	* add_timer - start a timer
				915	* @timer: the timer to be added
				916	*
				917	* The kernel will do a ->function(->data) callback from the
				918	* timer interrupt at the ->expires point in the future. The
				919	* current time is 'jiffies'.
				920	*
				921	* The timer's ->expires, ->function (and if the handler uses it, ->data)
				922	* fields must be set prior calling this function.
				923	*
				924	* Timers with an ->expires field in the past will be executed in the next
				925	* timer tick.
				926	*/
				927	void add_timer(struct timer_list *timer)
				928	{
				929	BUG_ON(timer_pending(timer));
				930	mod_timer(timer, timer->expires);
				931	}
				932	EXPORT_SYMBOL(add_timer);
				933
				934	/**
				935	* add_timer_on - start a timer on a particular CPU
				936	* @timer: the timer to be added
				937	* @cpu: the CPU to start it on
				938	*
				939	* This is not very scalable on SMP. Double adds are not possible.
				940	*/
				941	void add_timer_on(struct timer_list *timer, int cpu)
				942	{
				943	struct tvec_base *base = per_cpu(tvec_bases, cpu);
				944	unsigned long flags;
				945
				946	timer_stats_timer_set_start_info(timer);
				947	BUG_ON(timer_pending(timer) \|\| !timer->function);
				948	spin_lock_irqsave(&base->lock, flags);
				949	timer_set_base(timer, base);
				950	debug_activate(timer, timer->expires);
				951	if (time_before(timer->expires, base->next_timer) &&
				952	!tbase_get_deferrable(timer->base))
				953	base->next_timer = timer->expires;
				954	internal_add_timer(base, timer);
				955	/*
				956	* Check whether the other CPU is idle and needs to be
				957	* triggered to reevaluate the timer wheel when nohz is
				958	* active. We are protected against the other CPU fiddling
				959	* with the timer by holding the timer base lock. This also
				960	* makes sure that a CPU on the way to idle can not evaluate
				961	* the timer wheel.
				962	*/
				963	wake_up_idle_cpu(cpu);
				964	spin_unlock_irqrestore(&base->lock, flags);
				965	}
				966	EXPORT_SYMBOL_GPL(add_timer_on);
				967
				968	#ifdef CONFIG_PREEMPT_RT_FULL
				969	/*
				970	* Wait for a running timer
				971	*/
				972	static void wait_for_running_timer(struct timer_list *timer)
				973	{
				974	struct tvec_base *base = timer->base;
				975
				976	if (base->running_timer == timer)
				977	wait_event(base->wait_for_running_timer,
				978	base->running_timer != timer);
				979	}
				980
				981	# define wakeup_timer_waiters(b) wake_up(&(b)->wait_for_running_timer)
				982	#else
				983	static inline void wait_for_running_timer(struct timer_list *timer)
				984	{
				985	cpu_relax();
				986	}
				987
				988	# define wakeup_timer_waiters(b) do { } while (0)
				989	#endif
				990
				991	/**
				992	* del_timer - deactive a timer.
				993	* @timer: the timer to be deactivated
				994	*
				995	* del_timer() deactivates a timer - this works on both active and inactive
				996	* timers.
				997	*
				998	* The function returns whether it has deactivated a pending timer or not.
				999	* (ie. del_timer() of an inactive timer returns 0, del_timer() of an
				1000	* active timer returns 1.)
				1001	*/
				1002	int del_timer(struct timer_list *timer)
				1003	{
				1004	struct tvec_base *base;
				1005	unsigned long flags;
				1006	int ret = 0;
				1007
				1008	debug_assert_init(timer);
				1009
				1010	timer_stats_timer_clear_start_info(timer);
				1011	if (timer_pending(timer)) {
				1012	base = lock_timer_base(timer, &flags);
				1013	if (timer_pending(timer)) {
				1014	detach_timer(timer, 1);
				1015	if (timer->expires == base->next_timer &&
				1016	!tbase_get_deferrable(timer->base))
				1017	base->next_timer = base->timer_jiffies;
				1018	ret = 1;
				1019	}
				1020	spin_unlock_irqrestore(&base->lock, flags);
				1021	}
				1022
				1023	return ret;
				1024	}
				1025	EXPORT_SYMBOL(del_timer);
				1026
				1027	/**
				1028	* try_to_del_timer_sync - Try to deactivate a timer
				1029	* @timer: timer do del
				1030	*
				1031	* This function tries to deactivate a timer. Upon successful (ret >= 0)
				1032	* exit the timer is not queued and the handler is not running on any CPU.
				1033	*/
				1034	int try_to_del_timer_sync(struct timer_list *timer)
				1035	{
				1036	struct tvec_base *base;
				1037	unsigned long flags;
				1038	int ret = -1;
				1039
				1040	debug_assert_init(timer);
				1041
				1042	base = lock_timer_base(timer, &flags);
				1043
				1044	if (base->running_timer == timer)
				1045	goto out;
				1046
				1047	timer_stats_timer_clear_start_info(timer);
				1048	ret = 0;
				1049	if (timer_pending(timer)) {
				1050	detach_timer(timer, 1);
				1051	if (timer->expires == base->next_timer &&
				1052	!tbase_get_deferrable(timer->base))
				1053	base->next_timer = base->timer_jiffies;
				1054	ret = 1;
				1055	}
				1056	out:
				1057	spin_unlock_irqrestore(&base->lock, flags);
				1058
				1059	return ret;
				1060	}
				1061	EXPORT_SYMBOL(try_to_del_timer_sync);
				1062
				1063	#if defined(CONFIG_SMP) \|\| defined(CONFIG_PREEMPT_RT_FULL)
				1064	/**
				1065	* del_timer_sync - deactivate a timer and wait for the handler to finish.
				1066	* @timer: the timer to be deactivated
				1067	*
				1068	* This function only differs from del_timer() on SMP: besides deactivating
				1069	* the timer it also makes sure the handler has finished executing on other
				1070	* CPUs.
				1071	*
				1072	* Synchronization rules: Callers must prevent restarting of the timer,
				1073	* otherwise this function is meaningless. It must not be called from
				1074	* interrupt contexts. The caller must not hold locks which would prevent
				1075	* completion of the timer's handler. The timer's handler must not call
				1076	* add_timer_on(). Upon exit the timer is not queued and the handler is
				1077	* not running on any CPU.
				1078	*
				1079	* Note: You must not hold locks that are held in interrupt context
				1080	* while calling this function. Even if the lock has nothing to do
				1081	* with the timer in question. Here's why:
				1082	*
				1083	* CPU0 CPU1
				1084	* ---- ----
				1085	* <SOFTIRQ>
				1086	* call_timer_fn();
				1087	* base->running_timer = mytimer;
				1088	* spin_lock_irq(somelock);
				1089	* <IRQ>
				1090	* spin_lock(somelock);
				1091	* del_timer_sync(mytimer);
				1092	* while (base->running_timer == mytimer);
				1093	*
				1094	* Now del_timer_sync() will never return and never release somelock.
				1095	* The interrupt on the other CPU is waiting to grab somelock but
				1096	* it has interrupted the softirq that CPU0 is waiting to finish.
				1097	*
				1098	* The function returns whether it has deactivated a pending timer or not.
				1099	*/
				1100	int del_timer_sync(struct timer_list *timer)
				1101	{
				1102	#ifdef CONFIG_LOCKDEP
				1103	unsigned long flags;
				1104
				1105	/*
				1106	* If lockdep gives a backtrace here, please reference
				1107	* the synchronization rules above.
				1108	*/
				1109	local_irq_save(flags);
				1110	lock_map_acquire(&timer->lockdep_map);
				1111	lock_map_release(&timer->lockdep_map);
				1112	local_irq_restore(flags);
				1113	#endif
				1114	/*
				1115	* don't use it in hardirq context, because it
				1116	* could lead to deadlock.
				1117	*/
				1118	WARN_ON(in_irq());
				1119	for (;;) {
				1120	int ret = try_to_del_timer_sync(timer);
				1121	if (ret >= 0)
				1122	return ret;
				1123	wait_for_running_timer(timer);
				1124	}
				1125	}
				1126	EXPORT_SYMBOL(del_timer_sync);
				1127	#endif
				1128
				1129	static int cascade(struct tvec_base base, struct tvec tv, int index)
				1130	{
				1131	/* cascade all the timers from tv up one level */
				1132	struct timer_list timer, tmp;
				1133	struct list_head tv_list;
				1134
				1135	list_replace_init(tv->vec + index, &tv_list);
				1136
				1137	/*
				1138	* We are removing _all_ timers from the list, so we
				1139	* don't have to detach them individually.
				1140	*/
				1141	list_for_each_entry_safe(timer, tmp, &tv_list, entry) {
				1142	BUG_ON(tbase_get_base(timer->base) != base);
				1143	internal_add_timer(base, timer);
				1144	}
				1145
				1146	return index;
				1147	}
				1148
				1149	static void call_timer_fn(struct timer_list timer, void (fn)(unsigned long),
				1150	unsigned long data)
				1151	{
				1152	int preempt_count = preempt_count();
				1153
				1154	#ifdef CONFIG_LOCKDEP
				1155	/*
				1156	* It is permissible to free the timer from inside the
				1157	* function that is called from it, this we need to take into
				1158	* account for lockdep too. To avoid bogus "held lock freed"
				1159	* warnings as well as problems when looking into
				1160	* timer->lockdep_map, make a copy and use that here.
				1161	*/
				1162	struct lockdep_map lockdep_map = timer->lockdep_map;
				1163	#endif
				1164	/*
				1165	* Couple the lock chain with the lock chain at
				1166	* del_timer_sync() by acquiring the lock_map around the fn()
				1167	* call here and in del_timer_sync().
				1168	*/
				1169	lock_map_acquire(&lockdep_map);
				1170
				1171	trace_timer_expire_entry(timer);
				1172	zxic_trace_timer_enter(fn);
				1173	fn(data);
				1174	zxic_trace_timer_exit(fn);
				1175	trace_timer_expire_exit(timer);
				1176
				1177	lock_map_release(&lockdep_map);
				1178
				1179	if (preempt_count != preempt_count()) {
				1180	WARN_ONCE(1, "timer: %pF preempt leak: %08x -> %08x\n",
				1181	fn, preempt_count, preempt_count());
				1182	/*
				1183	* Restore the preempt count. That gives us a decent
				1184	* chance to survive and extract information. If the
				1185	* callback kept a lock held, bad luck, but not worse
				1186	* than the BUG() we had.
				1187	*/
				1188	preempt_count() = preempt_count;
				1189	}
				1190	}
				1191
				1192	#define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK)
				1193
				1194	/**
				1195	* __run_timers - run all expired timers (if any) on this CPU.
				1196	* @base: the timer vector to be processed.
				1197	*
				1198	* This function cascades all vectors and executes all expired timer
				1199	* vectors.
				1200	*/
				1201	static inline void __run_timers(struct tvec_base *base)
				1202	{
				1203	struct timer_list *timer;
				1204
				1205	spin_lock_irq(&base->lock);
				1206	while (time_after_eq(jiffies, base->timer_jiffies)) {
				1207	struct list_head work_list;
				1208	struct list_head *head = &work_list;
				1209	int index = base->timer_jiffies & TVR_MASK;
				1210
				1211	/*
				1212	* Cascade timers:
				1213	*/
				1214	if (!index &&
				1215	(!cascade(base, &base->tv2, INDEX(0))) &&
				1216	(!cascade(base, &base->tv3, INDEX(1))) &&
				1217	!cascade(base, &base->tv4, INDEX(2)))
				1218	cascade(base, &base->tv5, INDEX(3));
				1219	++base->timer_jiffies;
				1220	list_replace_init(base->tv1.vec + index, &work_list);
				1221	while (!list_empty(head)) {
				1222	void (*fn)(unsigned long);
				1223	unsigned long data;
				1224
				1225	timer = list_first_entry(head, struct timer_list,entry);
				1226	fn = timer->function;
				1227	data = timer->data;
				1228
				1229	timer_stats_account_timer(timer);
				1230
				1231	base->running_timer = timer;
				1232	detach_timer(timer, 1);
				1233
				1234	spin_unlock_irq(&base->lock);
				1235	call_timer_fn(timer, fn, data);
				1236	base->running_timer = NULL;
				1237	spin_lock_irq(&base->lock);
				1238	}
				1239	}
				1240	wakeup_timer_waiters(base);
				1241	spin_unlock_irq(&base->lock);
				1242	}
				1243
				1244	#ifdef CONFIG_NO_HZ
				1245	/*
				1246	* Find out when the next timer event is due to happen. This
				1247	* is used on S/390 to stop all activity when a CPU is idle.
				1248	* This function needs to be called with interrupts disabled.
				1249	*/
				1250	static unsigned long __next_timer_interrupt(struct tvec_base *base)
				1251	{
				1252	unsigned long timer_jiffies = base->timer_jiffies;
				1253	unsigned long expires = timer_jiffies + NEXT_TIMER_MAX_DELTA;
				1254	int index, slot, array, found = 0;
				1255	struct timer_list *nte;
				1256	struct tvec *varray[4];
				1257
				1258	/* Look for timer events in tv1. */
				1259	index = slot = timer_jiffies & TVR_MASK;
				1260	do {
				1261	list_for_each_entry(nte, base->tv1.vec + slot, entry) {
				1262	if (tbase_get_deferrable(nte->base))
				1263	continue;
				1264
				1265	found = 1;
				1266	expires = nte->expires;
				1267	/* Look at the cascade bucket(s)? */
				1268	if (!index \|\| slot < index)
				1269	goto cascade;
				1270	return expires;
				1271	}
				1272	slot = (slot + 1) & TVR_MASK;
				1273	} while (slot != index);
				1274
				1275	cascade:
				1276	/* Calculate the next cascade event */
				1277	if (index)
				1278	timer_jiffies += TVR_SIZE - index;
				1279	timer_jiffies >>= TVR_BITS;
				1280
				1281	/* Check tv2-tv5. */
				1282	varray[0] = &base->tv2;
				1283	varray[1] = &base->tv3;
				1284	varray[2] = &base->tv4;
				1285	varray[3] = &base->tv5;
				1286
				1287	for (array = 0; array < 4; array++) {
				1288	struct tvec *varp = varray[array];
				1289
				1290	index = slot = timer_jiffies & TVN_MASK;
				1291	do {
				1292	list_for_each_entry(nte, varp->vec + slot, entry) {
				1293	if (tbase_get_deferrable(nte->base))
				1294	continue;
				1295
				1296	found = 1;
				1297	if (time_before(nte->expires, expires))
				1298	expires = nte->expires;
				1299	}
				1300	/*
				1301	* Do we still search for the first timer or are
				1302	* we looking up the cascade buckets ?
				1303	*/
				1304	if (found) {
				1305	/* Look at the cascade bucket(s)? */
				1306	if (!index \|\| slot < index)
				1307	break;
				1308	return expires;
				1309	}
				1310	slot = (slot + 1) & TVN_MASK;
				1311	} while (slot != index);
				1312
				1313	if (index)
				1314	timer_jiffies += TVN_SIZE - index;
				1315	timer_jiffies >>= TVN_BITS;
				1316	}
				1317	return expires;
				1318	}
				1319
				1320	/*
				1321	* Check, if the next hrtimer event is before the next timer wheel
				1322	* event:
				1323	*/
				1324	static unsigned long cmp_next_hrtimer_event(unsigned long now,
				1325	unsigned long expires)
				1326	{
				1327	ktime_t hr_delta = hrtimer_get_next_event();
				1328	struct timespec tsdelta;
				1329	unsigned long delta;
				1330
				1331	if (hr_delta.tv64 == KTIME_MAX)
				1332	return expires;
				1333
				1334	/*
				1335	* Expired timer available, let it expire in the next tick
				1336	*/
				1337	if (hr_delta.tv64 <= 0)
				1338	return now + 1;
				1339
				1340	tsdelta = ktime_to_timespec(hr_delta);
				1341	delta = timespec_to_jiffies(&tsdelta);
				1342
				1343	/*
				1344	* Limit the delta to the max value, which is checked in
				1345	* tick_nohz_stop_sched_tick():
				1346	*/
				1347	if (delta > NEXT_TIMER_MAX_DELTA)
				1348	delta = NEXT_TIMER_MAX_DELTA;
				1349
				1350	/*
				1351	* Take rounding errors in to account and make sure, that it
				1352	* expires in the next tick. Otherwise we go into an endless
				1353	* ping pong due to tick_nohz_stop_sched_tick() retriggering
				1354	* the timer softirq
				1355	*/
				1356	if (delta < 1)
				1357	delta = 1;
				1358	now += delta;
				1359	if (time_before(now, expires))
				1360	return now;
				1361	return expires;
				1362	}
				1363
				1364	/**
				1365	* get_next_timer_interrupt - return the jiffy of the next pending timer
				1366	* @now: current time (in jiffies)
				1367	*/
				1368	unsigned long get_next_timer_interrupt(unsigned long now)
				1369	{
				1370	struct tvec_base *base = __this_cpu_read(tvec_bases);
				1371	unsigned long expires;
				1372
				1373	/*
				1374	* Pretend that there is no timer pending if the cpu is offline.
				1375	* Possible pending timers will be migrated later to an active cpu.
				1376	*/
				1377	if (cpu_is_offline(smp_processor_id()))
				1378	return now + NEXT_TIMER_MAX_DELTA;
				1379
				1380	#ifdef CONFIG_PREEMPT_RT_FULL
				1381	/*
				1382	* On PREEMPT_RT we cannot sleep here. If the trylock does not
				1383	* succeed then we return the worst-case 'expires in 1 tick'
				1384	* value. We use the rt functions here directly to avoid a
				1385	* migrate_disable() call.
				1386	*/
				1387	if (spin_do_trylock(&base->lock)) {
				1388	if (time_before_eq(base->next_timer, base->timer_jiffies))
				1389	base->next_timer = __next_timer_interrupt(base);
				1390	expires = base->next_timer;
				1391	rt_spin_unlock_after_trylock_in_irq(&base->lock);
				1392	} else {
				1393	expires = now + 1;
				1394	}
				1395	#else
				1396	spin_lock(&base->lock);
				1397	if (time_before_eq(base->next_timer, base->timer_jiffies))
				1398	base->next_timer = __next_timer_interrupt(base);
				1399	expires = base->next_timer;
				1400	spin_unlock(&base->lock);
				1401
				1402	if (time_before_eq(expires, now))
				1403	return now;
				1404	#endif
				1405	return cmp_next_hrtimer_event(now, expires);
				1406	}
				1407	#endif
				1408
				1409	/*
				1410	* Called from the timer interrupt handler to charge one tick to the current
				1411	* process. user_tick is 1 if the tick is user time, 0 for system.
				1412	*/
				1413	void update_process_times(int user_tick)
				1414	{
				1415	struct task_struct *p = current;
				1416	int cpu = smp_processor_id();
				1417
				1418	/* Note: this timer irq context must be accounted for as well. */
				1419	account_process_tick(p, user_tick);
				1420	scheduler_tick();
				1421	run_local_timers();
				1422	rcu_check_callbacks(cpu, user_tick);
				1423	#if defined(CONFIG_IRQ_WORK) && !defined(CONFIG_PREEMPT_RT_FULL)
				1424	if (in_irq())
				1425	irq_work_run();
				1426	#endif
				1427	run_posix_cpu_timers(p);
				1428	}
				1429
				1430	/*
				1431	* This function runs timers and the timer-tq in bottom half context.
				1432	*/
				1433	static void run_timer_softirq(struct softirq_action *h)
				1434	{
				1435	struct tvec_base *base = __this_cpu_read(tvec_bases);
				1436
				1437	#if defined(CONFIG_IRQ_WORK) && defined(CONFIG_PREEMPT_RT_FULL)
				1438	irq_work_run();
				1439	#endif
				1440
				1441	printk_tick();
				1442	hrtimer_run_pending();
				1443
				1444	if (time_after_eq(jiffies, base->timer_jiffies))
				1445	__run_timers(base);
				1446	}
				1447
				1448	/*
				1449	* Called by the local, per-CPU timer interrupt on SMP.
				1450	*/
				1451	void run_local_timers(void)
				1452	{
				1453	hrtimer_run_queues();
				1454	raise_softirq(TIMER_SOFTIRQ);
				1455	}
				1456
				1457	#ifdef __ARCH_WANT_SYS_ALARM
				1458
				1459	/*
				1460	* For backwards compatibility? This can be done in libc so Alpha
				1461	* and all newer ports shouldn't need it.
				1462	*/
				1463	SYSCALL_DEFINE1(alarm, unsigned int, seconds)
				1464	{
				1465	return alarm_setitimer(seconds);
				1466	}
				1467
				1468	#endif
				1469
				1470	#ifndef __alpha__
				1471
				1472	/*
				1473	* The Alpha uses getxpid, getxuid, and getxgid instead. Maybe this
				1474	* should be moved into arch/i386 instead?
				1475	*/
				1476
				1477	/**
				1478	* sys_getpid - return the thread group id of the current process
				1479	*
				1480	* Note, despite the name, this returns the tgid not the pid. The tgid and
				1481	* the pid are identical unless CLONE_THREAD was specified on clone() in
				1482	* which case the tgid is the same in all threads of the same group.
				1483	*
				1484	* This is SMP safe as current->tgid does not change.
				1485	*/
				1486	SYSCALL_DEFINE0(getpid)
				1487	{
				1488	return task_tgid_vnr(current);
				1489	}
				1490
				1491	/*
				1492	* Accessing ->real_parent is not SMP-safe, it could
				1493	* change from under us. However, we can use a stale
				1494	* value of ->real_parent under rcu_read_lock(), see
				1495	* release_task()->call_rcu(delayed_put_task_struct).
				1496	*/
				1497	SYSCALL_DEFINE0(getppid)
				1498	{
				1499	int pid;
				1500
				1501	rcu_read_lock();
				1502	pid = task_tgid_vnr(rcu_dereference(current->real_parent));
				1503	rcu_read_unlock();
				1504
				1505	return pid;
				1506	}
				1507
				1508	SYSCALL_DEFINE0(getuid)
				1509	{
				1510	/* Only we change this so SMP safe */
				1511	return current_uid();
				1512	}
				1513
				1514	SYSCALL_DEFINE0(geteuid)
				1515	{
				1516	/* Only we change this so SMP safe */
				1517	return current_euid();
				1518	}
				1519
				1520	SYSCALL_DEFINE0(getgid)
				1521	{
				1522	/* Only we change this so SMP safe */
				1523	return current_gid();
				1524	}
				1525
				1526	SYSCALL_DEFINE0(getegid)
				1527	{
				1528	/* Only we change this so SMP safe */
				1529	return current_egid();
				1530	}
				1531
				1532	#endif
				1533
				1534	static void process_timeout(unsigned long __data)
				1535	{
				1536	wake_up_process((struct task_struct *)__data);
				1537	}
				1538
				1539	/**
				1540	* schedule_timeout - sleep until timeout
				1541	* @timeout: timeout value in jiffies
				1542	*
				1543	* Make the current task sleep until @timeout jiffies have
				1544	* elapsed. The routine will return immediately unless
				1545	* the current task state has been set (see set_current_state()).
				1546	*
				1547	* You can set the task state as follows -
				1548	*
				1549	* %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to
				1550	* pass before the routine returns. The routine will return 0
				1551	*
				1552	* %TASK_INTERRUPTIBLE - the routine may return early if a signal is
				1553	* delivered to the current task. In this case the remaining time
				1554	* in jiffies will be returned, or 0 if the timer expired in time
				1555	*
				1556	* The current task state is guaranteed to be TASK_RUNNING when this
				1557	* routine returns.
				1558	*
				1559	* Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule
				1560	* the CPU away without a bound on the timeout. In this case the return
				1561	* value will be %MAX_SCHEDULE_TIMEOUT.
				1562	*
				1563	* In all cases the return value is guaranteed to be non-negative.
				1564	*/
				1565	signed long __sched schedule_timeout(signed long timeout)
				1566	{
				1567	struct timer_list timer;
				1568	unsigned long expire;
				1569
				1570	switch (timeout)
				1571	{
				1572	case MAX_SCHEDULE_TIMEOUT:
				1573	/*
				1574	* These two special cases are useful to be comfortable
				1575	* in the caller. Nothing more. We could take
				1576	* MAX_SCHEDULE_TIMEOUT from one of the negative value
				1577	* but I' d like to return a valid offset (>=0) to allow
				1578	* the caller to do everything it want with the retval.
				1579	*/
				1580	schedule();
				1581	goto out;
				1582	default:
				1583	/*
				1584	* Another bit of PARANOID. Note that the retval will be
				1585	* 0 since no piece of kernel is supposed to do a check
				1586	* for a negative retval of schedule_timeout() (since it
				1587	* should never happens anyway). You just have the printk()
				1588	* that will tell you if something is gone wrong and where.
				1589	*/
				1590	if (timeout < 0) {
				1591	printk(KERN_ERR "schedule_timeout: wrong timeout "
				1592	"value %lx\n", timeout);
				1593	dump_stack();
				1594	current->state = TASK_RUNNING;
				1595	goto out;
				1596	}
				1597	}
				1598
				1599	expire = timeout + jiffies;
				1600
				1601	setup_timer_on_stack(&timer, process_timeout, (unsigned long)current);
				1602	__mod_timer(&timer, expire, false, TIMER_NOT_PINNED);
				1603	schedule();
				1604	del_singleshot_timer_sync(&timer);
				1605
				1606	/* Remove the timer from the object tracker */
				1607	destroy_timer_on_stack(&timer);
				1608
				1609	timeout = expire - jiffies;
				1610
				1611	out:
				1612	return timeout < 0 ? 0 : timeout;
				1613	}
				1614	EXPORT_SYMBOL(schedule_timeout);
				1615
				1616	/*
				1617	* We can use __set_current_state() here because schedule_timeout() calls
				1618	* schedule() unconditionally.
				1619	*/
				1620	signed long __sched schedule_timeout_interruptible(signed long timeout)
				1621	{
				1622	__set_current_state(TASK_INTERRUPTIBLE);
				1623	return schedule_timeout(timeout);
				1624	}
				1625	EXPORT_SYMBOL(schedule_timeout_interruptible);
				1626
				1627	signed long __sched schedule_timeout_killable(signed long timeout)
				1628	{
				1629	__set_current_state(TASK_KILLABLE);
				1630	return schedule_timeout(timeout);
				1631	}
				1632	EXPORT_SYMBOL(schedule_timeout_killable);
				1633
				1634	signed long __sched schedule_timeout_uninterruptible(signed long timeout)
				1635	{
				1636	__set_current_state(TASK_UNINTERRUPTIBLE);
				1637	return schedule_timeout(timeout);
				1638	}
				1639	EXPORT_SYMBOL(schedule_timeout_uninterruptible);
				1640
				1641	/* Thread ID - the internal kernel "pid" */
				1642	SYSCALL_DEFINE0(gettid)
				1643	{
				1644	return task_pid_vnr(current);
				1645	}
				1646
				1647	/**
				1648	* do_sysinfo - fill in sysinfo struct
				1649	* @info: pointer to buffer to fill
				1650	*/
				1651	int do_sysinfo(struct sysinfo *info)
				1652	{
				1653	unsigned long mem_total, sav_total;
				1654	unsigned int mem_unit, bitcount;
				1655	struct timespec tp;
				1656
				1657	memset(info, 0, sizeof(struct sysinfo));
				1658
				1659	ktime_get_ts(&tp);
				1660	monotonic_to_bootbased(&tp);
				1661	info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
				1662
				1663	get_avenrun(info->loads, 0, SI_LOAD_SHIFT - FSHIFT);
				1664
				1665	info->procs = nr_threads;
				1666
				1667	si_meminfo(info);
				1668	si_swapinfo(info);
				1669
				1670	/*
				1671	* If the sum of all the available memory (i.e. ram + swap)
				1672	* is less than can be stored in a 32 bit unsigned long then
				1673	* we can be binary compatible with 2.2.x kernels. If not,
				1674	* well, in that case 2.2.x was broken anyways...
				1675	*
				1676	* -Erik Andersen <andersee@debian.org>
				1677	*/
				1678
				1679	mem_total = info->totalram + info->totalswap;
				1680	if (mem_total < info->totalram \|\| mem_total < info->totalswap)
				1681	goto out;
				1682	bitcount = 0;
				1683	mem_unit = info->mem_unit;
				1684	while (mem_unit > 1) {
				1685	bitcount++;
				1686	mem_unit >>= 1;
				1687	sav_total = mem_total;
				1688	mem_total <<= 1;
				1689	if (mem_total < sav_total)
				1690	goto out;
				1691	}
				1692
				1693	/*
				1694	* If mem_total did not overflow, multiply all memory values by
				1695	* info->mem_unit and set it to 1. This leaves things compatible
				1696	* with 2.2.x, and also retains compatibility with earlier 2.4.x
				1697	* kernels...
				1698	*/
				1699
				1700	info->mem_unit = 1;
				1701	info->totalram <<= bitcount;
				1702	info->freeram <<= bitcount;
				1703	info->sharedram <<= bitcount;
				1704	info->bufferram <<= bitcount;
				1705	info->totalswap <<= bitcount;
				1706	info->freeswap <<= bitcount;
				1707	info->totalhigh <<= bitcount;
				1708	info->freehigh <<= bitcount;
				1709
				1710	out:
				1711	return 0;
				1712	}
				1713
				1714	SYSCALL_DEFINE1(sysinfo, struct sysinfo __user *, info)
				1715	{
				1716	struct sysinfo val;
				1717
				1718	do_sysinfo(&val);
				1719
				1720	if (copy_to_user(info, &val, sizeof(struct sysinfo)))
				1721	return -EFAULT;
				1722
				1723	return 0;
				1724	}
				1725
				1726	static int __cpuinit init_timers_cpu(int cpu)
				1727	{
				1728	int j;
				1729	struct tvec_base *base;
				1730	static char __cpuinitdata tvec_base_done[NR_CPUS];
				1731
				1732	if (!tvec_base_done[cpu]) {
				1733	static char boot_done;
				1734
				1735	if (boot_done) {
				1736	/*
				1737	* The APs use this path later in boot
				1738	*/
				1739	base = kmalloc_node(sizeof(*base),
				1740	GFP_KERNEL \| __GFP_ZERO,
				1741	cpu_to_node(cpu));
				1742	if (!base)
				1743	return -ENOMEM;
				1744
				1745	/* Make sure that tvec_base is 2 byte aligned */
				1746	if (tbase_get_deferrable(base)) {
				1747	WARN_ON(1);
				1748	kfree(base);
				1749	return -ENOMEM;
				1750	}
				1751	per_cpu(tvec_bases, cpu) = base;
				1752	} else {
				1753	/*
				1754	* This is for the boot CPU - we use compile-time
				1755	* static initialisation because per-cpu memory isn't
				1756	* ready yet and because the memory allocators are not
				1757	* initialised either.
				1758	*/
				1759	boot_done = 1;
				1760	base = &boot_tvec_bases;
				1761	}
				1762	spin_lock_init(&base->lock);
				1763	#ifdef CONFIG_PREEMPT_RT_FULL
				1764	init_waitqueue_head(&base->wait_for_running_timer);
				1765	#endif
				1766	tvec_base_done[cpu] = 1;
				1767	} else {
				1768	base = per_cpu(tvec_bases, cpu);
				1769	}
				1770
				1771	for (j = 0; j < TVN_SIZE; j++) {
				1772	INIT_LIST_HEAD(base->tv5.vec + j);
				1773	INIT_LIST_HEAD(base->tv4.vec + j);
				1774	INIT_LIST_HEAD(base->tv3.vec + j);
				1775	INIT_LIST_HEAD(base->tv2.vec + j);
				1776	}
				1777	for (j = 0; j < TVR_SIZE; j++)
				1778	INIT_LIST_HEAD(base->tv1.vec + j);
				1779
				1780	base->timer_jiffies = jiffies;
				1781	base->next_timer = base->timer_jiffies;
				1782	return 0;
				1783	}
				1784
				1785	#ifdef CONFIG_HOTPLUG_CPU
				1786	static void migrate_timer_list(struct tvec_base new_base, struct list_head head)
				1787	{
				1788	struct timer_list *timer;
				1789
				1790	while (!list_empty(head)) {
				1791	timer = list_first_entry(head, struct timer_list, entry);
				1792	detach_timer(timer, 0);
				1793	timer_set_base(timer, new_base);
				1794	if (time_before(timer->expires, new_base->next_timer) &&
				1795	!tbase_get_deferrable(timer->base))
				1796	new_base->next_timer = timer->expires;
				1797	internal_add_timer(new_base, timer);
				1798	}
				1799	}
				1800
				1801	static void __cpuinit migrate_timers(int cpu)
				1802	{
				1803	struct tvec_base *old_base;
				1804	struct tvec_base *new_base;
				1805	int i;
				1806
				1807	BUG_ON(cpu_online(cpu));
				1808	old_base = per_cpu(tvec_bases, cpu);
				1809	new_base = get_local_var(tvec_bases);
				1810	/*
				1811	* The caller is globally serialized and nobody else
				1812	* takes two locks at once, deadlock is not possible.
				1813	*/
				1814	spin_lock_irq(&new_base->lock);
				1815	spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
				1816
				1817	BUG_ON(old_base->running_timer);
				1818
				1819	for (i = 0; i < TVR_SIZE; i++)
				1820	migrate_timer_list(new_base, old_base->tv1.vec + i);
				1821	for (i = 0; i < TVN_SIZE; i++) {
				1822	migrate_timer_list(new_base, old_base->tv2.vec + i);
				1823	migrate_timer_list(new_base, old_base->tv3.vec + i);
				1824	migrate_timer_list(new_base, old_base->tv4.vec + i);
				1825	migrate_timer_list(new_base, old_base->tv5.vec + i);
				1826	}
				1827
				1828	spin_unlock(&old_base->lock);
				1829	spin_unlock_irq(&new_base->lock);
				1830	put_local_var(tvec_bases);
				1831	}
				1832	#endif /* CONFIG_HOTPLUG_CPU */
				1833
				1834	static int __cpuinit timer_cpu_notify(struct notifier_block *self,
				1835	unsigned long action, void *hcpu)
				1836	{
				1837	long cpu = (long)hcpu;
				1838	int err;
				1839
				1840	switch(action) {
				1841	case CPU_UP_PREPARE:
				1842	case CPU_UP_PREPARE_FROZEN:
				1843	err = init_timers_cpu(cpu);
				1844	if (err < 0)
				1845	return notifier_from_errno(err);
				1846	break;
				1847	#ifdef CONFIG_HOTPLUG_CPU
				1848	case CPU_DEAD:
				1849	case CPU_DEAD_FROZEN:
				1850	migrate_timers(cpu);
				1851	break;
				1852	#endif
				1853	default:
				1854	break;
				1855	}
				1856	return NOTIFY_OK;
				1857	}
				1858
				1859	static struct notifier_block __cpuinitdata timers_nb = {
				1860	.notifier_call = timer_cpu_notify,
				1861	};
				1862
				1863
				1864	void __init init_timers(void)
				1865	{
				1866	int err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE,
				1867	(void *)(long)smp_processor_id());
				1868
				1869	init_timer_stats();
				1870
				1871	BUG_ON(err != NOTIFY_OK);
				1872	register_cpu_notifier(&timers_nb);
				1873	open_softirq(TIMER_SOFTIRQ, run_timer_softirq);
				1874	}
				1875
				1876	/**
				1877	* msleep - sleep safely even with waitqueue interruptions
				1878	* @msecs: Time in milliseconds to sleep for
				1879	*/
				1880	void msleep(unsigned int msecs)
				1881	{
				1882	unsigned long timeout = msecs_to_jiffies(msecs) + 1;
				1883
				1884	while (timeout)
				1885	timeout = schedule_timeout_uninterruptible(timeout);
				1886	}
				1887
				1888	EXPORT_SYMBOL(msleep);
				1889
				1890	/**
				1891	* msleep_interruptible - sleep waiting for signals
				1892	* @msecs: Time in milliseconds to sleep for
				1893	*/
				1894	unsigned long msleep_interruptible(unsigned int msecs)
				1895	{
				1896	unsigned long timeout = msecs_to_jiffies(msecs) + 1;
				1897
				1898	while (timeout && !signal_pending(current))
				1899	timeout = schedule_timeout_interruptible(timeout);
				1900	return jiffies_to_msecs(timeout);
				1901	}
				1902
				1903	EXPORT_SYMBOL(msleep_interruptible);
				1904
				1905	static int __sched do_usleep_range(unsigned long min, unsigned long max)
				1906	{
				1907	ktime_t kmin;
				1908	unsigned long delta;
				1909
				1910	kmin = ktime_set(0, min * NSEC_PER_USEC);
				1911	delta = (max - min) * NSEC_PER_USEC;
				1912	return schedule_hrtimeout_range(&kmin, delta, HRTIMER_MODE_REL);
				1913	}
				1914
				1915	/**
				1916	* usleep_range - Drop in replacement for udelay where wakeup is flexible
				1917	* @min: Minimum time in usecs to sleep
				1918	* @max: Maximum time in usecs to sleep
				1919	*/
				1920	void usleep_range(unsigned long min, unsigned long max)
				1921	{
				1922	__set_current_state(TASK_UNINTERRUPTIBLE);
				1923	do_usleep_range(min, max);
				1924	}
				1925	EXPORT_SYMBOL(usleep_range);