Blame - ap/os/linux/linux-3.4.x/kernel/hrtimer.c - R306

blob: 3463f46d49fea7c25c8870a55dbe7f884ef5f351 [file] [log] [blame]

yuezonghe	824eb0c	2024-06-27 02:32:26 -0700	[diff] [blame]	1	/*
				2	* linux/kernel/hrtimer.c
				3	*
				4	* Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
				5	* Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
				6	* Copyright(C) 2006-2007 Timesys Corp., Thomas Gleixner
				7	*
				8	* High-resolution kernel timers
				9	*
				10	* In contrast to the low-resolution timeout API implemented in
				11	* kernel/timer.c, hrtimers provide finer resolution and accuracy
				12	* depending on system configuration and capabilities.
				13	*
				14	* These timers are currently used for:
				15	* - itimers
				16	* - POSIX timers
				17	* - nanosleep
				18	* - precise in-kernel timing
				19	*
				20	* Started by: Thomas Gleixner and Ingo Molnar
				21	*
				22	* Credits:
				23	* based on kernel/timer.c
				24	*
				25	* Help, testing, suggestions, bugfixes, improvements were
				26	* provided by:
				27	*
				28	* George Anzinger, Andrew Morton, Steven Rostedt, Roman Zippel
				29	* et. al.
				30	*
				31	* For licencing details see kernel-base/COPYING
				32	*/
				33
				34	#include <linux/cpu.h>
				35	#include <linux/export.h>
				36	#include <linux/percpu.h>
				37	#include <linux/hrtimer.h>
				38	#include <linux/notifier.h>
				39	#include <linux/syscalls.h>
				40	#include <linux/kallsyms.h>
				41	#include <linux/interrupt.h>
				42	#include <linux/tick.h>
				43	#include <linux/seq_file.h>
				44	#include <linux/err.h>
				45	#include <linux/debugobjects.h>
				46	#include <linux/sched.h>
				47	#include <linux/timer.h>
				48
				49	#include <asm/uaccess.h>
				50
				51	#include <trace/events/timer.h>
				52	#include <trace/events/hist.h>
				53
				54	/*
				55	* The timer bases:
				56	*
				57	* There are more clockids then hrtimer bases. Thus, we index
				58	* into the timer bases by the hrtimer_base_type enum. When trying
				59	* to reach a base using a clockid, hrtimer_clockid_to_base()
				60	* is used to convert from clockid to the proper hrtimer_base_type.
				61	*/
				62	DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
				63	{
				64
				65	.lock = __RAW_SPIN_LOCK_UNLOCKED(hrtimer_bases.lock),
				66	.clock_base =
				67	{
				68	{
				69	.index = HRTIMER_BASE_MONOTONIC,
				70	.clockid = CLOCK_MONOTONIC,
				71	.get_time = &ktime_get,
				72	.resolution = KTIME_LOW_RES,
				73	},
				74	{
				75	.index = HRTIMER_BASE_REALTIME,
				76	.clockid = CLOCK_REALTIME,
				77	.get_time = &ktime_get_real,
				78	.resolution = KTIME_LOW_RES,
				79	},
				80	{
				81	.index = HRTIMER_BASE_BOOTTIME,
				82	.clockid = CLOCK_BOOTTIME,
				83	.get_time = &ktime_get_boottime,
				84	.resolution = KTIME_LOW_RES,
				85	},
				86	}
				87	};
				88
				89	static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = {
				90	[CLOCK_REALTIME] = HRTIMER_BASE_REALTIME,
				91	[CLOCK_MONOTONIC] = HRTIMER_BASE_MONOTONIC,
				92	[CLOCK_BOOTTIME] = HRTIMER_BASE_BOOTTIME,
				93	};
				94
				95	static inline int hrtimer_clockid_to_base(clockid_t clock_id)
				96	{
				97	return hrtimer_clock_to_base_table[clock_id];
				98	}
				99
				100
				101	/*
				102	* Get the coarse grained time at the softirq based on xtime and
				103	* wall_to_monotonic.
				104	*/
				105	static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base)
				106	{
				107	ktime_t xtim, mono, boot;
				108	struct timespec xts, tom, slp;
				109
				110	get_xtime_and_monotonic_and_sleep_offset(&xts, &tom, &slp);
				111
				112	xtim = timespec_to_ktime(xts);
				113	mono = ktime_add(xtim, timespec_to_ktime(tom));
				114	boot = ktime_add(mono, timespec_to_ktime(slp));
				115	base->clock_base[HRTIMER_BASE_REALTIME].softirq_time = xtim;
				116	base->clock_base[HRTIMER_BASE_MONOTONIC].softirq_time = mono;
				117	base->clock_base[HRTIMER_BASE_BOOTTIME].softirq_time = boot;
				118	}
				119
				120	/*
				121	* Functions and macros which are different for UP/SMP systems are kept in a
				122	* single place
				123	*/
				124	#ifdef CONFIG_SMP
				125
				126	/*
				127	* We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock
				128	* means that all timers which are tied to this base via timer->base are
				129	* locked, and the base itself is locked too.
				130	*
				131	* So __run_timers/migrate_timers can safely modify all timers which could
				132	* be found on the lists/queues.
				133	*
				134	* When the timer's base is locked, and the timer removed from list, it is
				135	* possible to set timer->base = NULL and drop the lock: the timer remains
				136	* locked.
				137	*/
				138	static
				139	struct hrtimer_clock_base lock_hrtimer_base(const struct hrtimer timer,
				140	unsigned long *flags)
				141	{
				142	struct hrtimer_clock_base *base;
				143
				144	for (;;) {
				145	base = timer->base;
				146	if (likely(base != NULL)) {
				147	raw_spin_lock_irqsave(&base->cpu_base->lock, *flags);
				148	if (likely(base == timer->base))
				149	return base;
				150	/* The timer has migrated to another CPU: */
				151	raw_spin_unlock_irqrestore(&base->cpu_base->lock, *flags);
				152	}
				153	cpu_relax();
				154	}
				155	}
				156
				157
				158	/*
				159	* Get the preferred target CPU for NOHZ
				160	*/
				161	static int hrtimer_get_target(int this_cpu, int pinned)
				162	{
				163	#ifdef CONFIG_NO_HZ
				164	if (!pinned && get_sysctl_timer_migration() && idle_cpu(this_cpu))
				165	return get_nohz_timer_target();
				166	#endif
				167	return this_cpu;
				168	}
				169
				170	/*
				171	* With HIGHRES=y we do not migrate the timer when it is expiring
				172	* before the next event on the target cpu because we cannot reprogram
				173	* the target cpu hardware and we would cause it to fire late.
				174	*
				175	* Called with cpu_base->lock of target cpu held.
				176	*/
				177	static int
				178	hrtimer_check_target(struct hrtimer timer, struct hrtimer_clock_base new_base)
				179	{
				180	#ifdef CONFIG_HIGH_RES_TIMERS
				181	ktime_t expires;
				182
				183	if (!new_base->cpu_base->hres_active)
				184	return 0;
				185
				186	expires = ktime_sub(hrtimer_get_expires(timer), new_base->offset);
				187	return expires.tv64 <= new_base->cpu_base->expires_next.tv64;
				188	#else
				189	return 0;
				190	#endif
				191	}
				192
				193	/*
				194	* Switch the timer base to the current CPU when possible.
				195	*/
				196	static inline struct hrtimer_clock_base *
				197	switch_hrtimer_base(struct hrtimer timer, struct hrtimer_clock_base base,
				198	int pinned)
				199	{
				200	struct hrtimer_clock_base *new_base;
				201	struct hrtimer_cpu_base *new_cpu_base;
				202	int this_cpu = smp_processor_id();
				203	int cpu = hrtimer_get_target(this_cpu, pinned);
				204	int basenum = base->index;
				205
				206	again:
				207	new_cpu_base = &per_cpu(hrtimer_bases, cpu);
				208	new_base = &new_cpu_base->clock_base[basenum];
				209
				210	if (base != new_base) {
				211	/*
				212	* We are trying to move timer to new_base.
				213	* However we can't change timer's base while it is running,
				214	* so we keep it on the same CPU. No hassle vs. reprogramming
				215	* the event source in the high resolution case. The softirq
				216	* code will take care of this when the timer function has
				217	* completed. There is no conflict as we hold the lock until
				218	* the timer is enqueued.
				219	*/
				220	if (unlikely(hrtimer_callback_running(timer)))
				221	return base;
				222
				223	/* See the comment in lock_timer_base() */
				224	timer->base = NULL;
				225	raw_spin_unlock(&base->cpu_base->lock);
				226	raw_spin_lock(&new_base->cpu_base->lock);
				227
				228	if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) {
				229	cpu = this_cpu;
				230	raw_spin_unlock(&new_base->cpu_base->lock);
				231	raw_spin_lock(&base->cpu_base->lock);
				232	timer->base = base;
				233	goto again;
				234	}
				235	timer->base = new_base;
				236	} else {
				237	if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) {
				238	cpu = this_cpu;
				239	goto again;
				240	}
				241	}
				242	return new_base;
				243	}
				244
				245	#else /* CONFIG_SMP */
				246
				247	static inline struct hrtimer_clock_base *
				248	lock_hrtimer_base(const struct hrtimer timer, unsigned long flags)
				249	{
				250	struct hrtimer_clock_base *base = timer->base;
				251
				252	raw_spin_lock_irqsave(&base->cpu_base->lock, *flags);
				253
				254	return base;
				255	}
				256
				257	# define switch_hrtimer_base(t, b, p) (b)
				258
				259	#endif /* !CONFIG_SMP */
				260
				261	/*
				262	* Functions for the union type storage format of ktime_t which are
				263	* too large for inlining:
				264	*/
				265	#if BITS_PER_LONG < 64
				266	# ifndef CONFIG_KTIME_SCALAR
				267	/**
				268	* ktime_add_ns - Add a scalar nanoseconds value to a ktime_t variable
				269	* @kt: addend
				270	* @nsec: the scalar nsec value to add
				271	*
				272	* Returns the sum of kt and nsec in ktime_t format
				273	*/
				274	ktime_t ktime_add_ns(const ktime_t kt, u64 nsec)
				275	{
				276	ktime_t tmp;
				277
				278	if (likely(nsec < NSEC_PER_SEC)) {
				279	tmp.tv64 = nsec;
				280	} else {
				281	unsigned long rem = do_div(nsec, NSEC_PER_SEC);
				282
				283	tmp = ktime_set((long)nsec, rem);
				284	}
				285
				286	return ktime_add(kt, tmp);
				287	}
				288
				289	EXPORT_SYMBOL_GPL(ktime_add_ns);
				290
				291	/**
				292	* ktime_sub_ns - Subtract a scalar nanoseconds value from a ktime_t variable
				293	* @kt: minuend
				294	* @nsec: the scalar nsec value to subtract
				295	*
				296	* Returns the subtraction of @nsec from @kt in ktime_t format
				297	*/
				298	ktime_t ktime_sub_ns(const ktime_t kt, u64 nsec)
				299	{
				300	ktime_t tmp;
				301
				302	if (likely(nsec < NSEC_PER_SEC)) {
				303	tmp.tv64 = nsec;
				304	} else {
				305	unsigned long rem = do_div(nsec, NSEC_PER_SEC);
				306
				307	/* Make sure nsec fits into long */
				308	if (unlikely(nsec > KTIME_SEC_MAX))
				309	return (ktime_t){ .tv64 = KTIME_MAX };
				310
				311	tmp = ktime_set((long)nsec, rem);
				312	}
				313
				314	return ktime_sub(kt, tmp);
				315	}
				316
				317	EXPORT_SYMBOL_GPL(ktime_sub_ns);
				318	# endif /* !CONFIG_KTIME_SCALAR */
				319
				320	/*
				321	* Divide a ktime value by a nanosecond value
				322	*/
				323	u64 ktime_divns(const ktime_t kt, s64 div)
				324	{
				325	u64 dclc;
				326	int sft = 0;
				327
				328	dclc = ktime_to_ns(kt);
				329	/* Make sure the divisor is less than 2^32: */
				330	while (div >> 32) {
				331	sft++;
				332	div >>= 1;
				333	}
				334	dclc >>= sft;
				335	do_div(dclc, (unsigned long) div);
				336
				337	return dclc;
				338	}
				339	#endif /* BITS_PER_LONG >= 64 */
				340
				341	/*
				342	* Add two ktime values and do a safety check for overflow:
				343	*/
				344	ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs)
				345	{
				346	ktime_t res = ktime_add(lhs, rhs);
				347
				348	/*
				349	* We use KTIME_SEC_MAX here, the maximum timeout which we can
				350	* return to user space in a timespec:
				351	*/
				352	if (res.tv64 < 0 \|\| res.tv64 < lhs.tv64 \|\| res.tv64 < rhs.tv64)
				353	res = ktime_set(KTIME_SEC_MAX, 0);
				354
				355	return res;
				356	}
				357
				358	EXPORT_SYMBOL_GPL(ktime_add_safe);
				359
				360	#ifdef CONFIG_DEBUG_OBJECTS_TIMERS
				361
				362	static struct debug_obj_descr hrtimer_debug_descr;
				363
				364	static void hrtimer_debug_hint(void addr)
				365	{
				366	return ((struct hrtimer *) addr)->function;
				367	}
				368
				369	/*
				370	* fixup_init is called when:
				371	* - an active object is initialized
				372	*/
				373	static int hrtimer_fixup_init(void *addr, enum debug_obj_state state)
				374	{
				375	struct hrtimer *timer = addr;
				376
				377	switch (state) {
				378	case ODEBUG_STATE_ACTIVE:
				379	hrtimer_cancel(timer);
				380	debug_object_init(timer, &hrtimer_debug_descr);
				381	return 1;
				382	default:
				383	return 0;
				384	}
				385	}
				386
				387	/*
				388	* fixup_activate is called when:
				389	* - an active object is activated
				390	* - an unknown object is activated (might be a statically initialized object)
				391	*/
				392	static int hrtimer_fixup_activate(void *addr, enum debug_obj_state state)
				393	{
				394	switch (state) {
				395
				396	case ODEBUG_STATE_NOTAVAILABLE:
				397	WARN_ON_ONCE(1);
				398	return 0;
				399
				400	case ODEBUG_STATE_ACTIVE:
				401	WARN_ON(1);
				402
				403	default:
				404	return 0;
				405	}
				406	}
				407
				408	/*
				409	* fixup_free is called when:
				410	* - an active object is freed
				411	*/
				412	static int hrtimer_fixup_free(void *addr, enum debug_obj_state state)
				413	{
				414	struct hrtimer *timer = addr;
				415
				416	switch (state) {
				417	case ODEBUG_STATE_ACTIVE:
				418	hrtimer_cancel(timer);
				419	debug_object_free(timer, &hrtimer_debug_descr);
				420	return 1;
				421	default:
				422	return 0;
				423	}
				424	}
				425
				426	static struct debug_obj_descr hrtimer_debug_descr = {
				427	.name = "hrtimer",
				428	.debug_hint = hrtimer_debug_hint,
				429	.fixup_init = hrtimer_fixup_init,
				430	.fixup_activate = hrtimer_fixup_activate,
				431	.fixup_free = hrtimer_fixup_free,
				432	};
				433
				434	static inline void debug_hrtimer_init(struct hrtimer *timer)
				435	{
				436	debug_object_init(timer, &hrtimer_debug_descr);
				437	}
				438
				439	static inline void debug_hrtimer_activate(struct hrtimer *timer)
				440	{
				441	debug_object_activate(timer, &hrtimer_debug_descr);
				442	}
				443
				444	static inline void debug_hrtimer_deactivate(struct hrtimer *timer)
				445	{
				446	debug_object_deactivate(timer, &hrtimer_debug_descr);
				447	}
				448
				449	static inline void debug_hrtimer_free(struct hrtimer *timer)
				450	{
				451	debug_object_free(timer, &hrtimer_debug_descr);
				452	}
				453
				454	static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
				455	enum hrtimer_mode mode);
				456
				457	void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t clock_id,
				458	enum hrtimer_mode mode)
				459	{
				460	debug_object_init_on_stack(timer, &hrtimer_debug_descr);
				461	__hrtimer_init(timer, clock_id, mode);
				462	}
				463	EXPORT_SYMBOL_GPL(hrtimer_init_on_stack);
				464
				465	void destroy_hrtimer_on_stack(struct hrtimer *timer)
				466	{
				467	debug_object_free(timer, &hrtimer_debug_descr);
				468	}
				469
				470	#else
				471	static inline void debug_hrtimer_init(struct hrtimer *timer) { }
				472	static inline void debug_hrtimer_activate(struct hrtimer *timer) { }
				473	static inline void debug_hrtimer_deactivate(struct hrtimer *timer) { }
				474	#endif
				475
				476	static inline void
				477	debug_init(struct hrtimer *timer, clockid_t clockid,
				478	enum hrtimer_mode mode)
				479	{
				480	debug_hrtimer_init(timer);
				481	trace_hrtimer_init(timer, clockid, mode);
				482	}
				483
				484	static inline void debug_activate(struct hrtimer *timer)
				485	{
				486	debug_hrtimer_activate(timer);
				487	trace_hrtimer_start(timer);
				488	}
				489
				490	static inline void debug_deactivate(struct hrtimer *timer)
				491	{
				492	debug_hrtimer_deactivate(timer);
				493	trace_hrtimer_cancel(timer);
				494	}
				495
				496	/* High resolution timer related functions */
				497	#ifdef CONFIG_HIGH_RES_TIMERS
				498
				499	/*
				500	* High resolution timer enabled ?
				501	*/
				502	static int hrtimer_hres_enabled __read_mostly = 1;
				503
				504	/*
				505	* Enable / Disable high resolution mode
				506	*/
				507	static int __init setup_hrtimer_hres(char *str)
				508	{
				509	if (!strcmp(str, "off"))
				510	hrtimer_hres_enabled = 0;
				511	else if (!strcmp(str, "on"))
				512	hrtimer_hres_enabled = 1;
				513	else
				514	return 0;
				515	return 1;
				516	}
				517
				518	__setup("highres=", setup_hrtimer_hres);
				519
				520	/*
				521	* hrtimer_high_res_enabled - query, if the highres mode is enabled
				522	*/
				523	static inline int hrtimer_is_hres_enabled(void)
				524	{
				525	return hrtimer_hres_enabled;
				526	}
				527
				528	/*
				529	* Is the high resolution mode active ?
				530	*/
				531	static inline int hrtimer_hres_active(void)
				532	{
				533	return __this_cpu_read(hrtimer_bases.hres_active);
				534	}
				535
				536	/*
				537	* Reprogram the event source with checking both queues for the
				538	* next event
				539	* Called with interrupts disabled and base->lock held
				540	*/
				541	static void
				542	hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal)
				543	{
				544	int i;
				545	struct hrtimer_clock_base *base = cpu_base->clock_base;
				546	ktime_t expires, expires_next;
				547
				548	expires_next.tv64 = KTIME_MAX;
				549
				550	for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) {
				551	struct hrtimer *timer;
				552	struct timerqueue_node *next;
				553
				554	next = timerqueue_getnext(&base->active);
				555	if (!next)
				556	continue;
				557	timer = container_of(next, struct hrtimer, node);
				558
				559	expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
				560	/*
				561	* clock_was_set() has changed base->offset so the
				562	* result might be negative. Fix it up to prevent a
				563	* false positive in clockevents_program_event()
				564	*/
				565	if (expires.tv64 < 0)
				566	expires.tv64 = 0;
				567	if (expires.tv64 < expires_next.tv64)
				568	expires_next = expires;
				569	}
				570
				571	if (skip_equal && expires_next.tv64 == cpu_base->expires_next.tv64)
				572	return;
				573
				574	cpu_base->expires_next.tv64 = expires_next.tv64;
				575
				576	/*
				577	* If a hang was detected in the last timer interrupt then we
				578	* leave the hang delay active in the hardware. We want the
				579	* system to make progress. That also prevents the following
				580	* scenario:
				581	* T1 expires 50ms from now
				582	* T2 expires 5s from now
				583	*
				584	* T1 is removed, so this code is called and would reprogram
				585	* the hardware to 5s from now. Any hrtimer_start after that
				586	* will not reprogram the hardware due to hang_detected being
				587	* set. So we'd effectivly block all timers until the T2 event
				588	* fires.
				589	*/
				590	if (cpu_base->hang_detected)
				591	return;
				592
				593	if (cpu_base->expires_next.tv64 != KTIME_MAX)
				594	tick_program_event(cpu_base->expires_next, 1);
				595	}
				596
				597	/*
				598	* Shared reprogramming for clock_realtime and clock_monotonic
				599	*
				600	* When a timer is enqueued and expires earlier than the already enqueued
				601	* timers, we have to check, whether it expires earlier than the timer for
				602	* which the clock event device was armed.
				603	*
				604	* Called with interrupts disabled and base->cpu_base.lock held
				605	*/
				606	static int hrtimer_reprogram(struct hrtimer *timer,
				607	struct hrtimer_clock_base *base)
				608	{
				609	struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
				610	ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
				611	int res;
				612
				613	WARN_ON_ONCE(hrtimer_get_expires_tv64(timer) < 0);
				614
				615	/*
				616	* When the callback is running, we do not reprogram the clock event
				617	* device. The timer callback is either running on a different CPU or
				618	* the callback is executed in the hrtimer_interrupt context. The
				619	* reprogramming is handled at the end of the hrtimer_interrupt.
				620	*/
				621	if (hrtimer_callback_running(timer))
				622	return 0;
				623
				624	/*
				625	* CLOCK_REALTIME timer might be requested with an absolute
				626	* expiry time which is less than base->offset. Nothing wrong
				627	* about that, just avoid to call into the tick code, which
				628	* has now objections against negative expiry values.
				629	*/
				630	if (expires.tv64 < 0)
				631	return -ETIME;
				632
				633	if (expires.tv64 >= cpu_base->expires_next.tv64)
				634	return 0;
				635
				636	/*
				637	* If a hang was detected in the last timer interrupt then we
				638	* do not schedule a timer which is earlier than the expiry
				639	* which we enforced in the hang detection. We want the system
				640	* to make progress.
				641	*/
				642	if (cpu_base->hang_detected)
				643	return 0;
				644
				645	/*
				646	* Clockevents returns -ETIME, when the event was in the past.
				647	*/
				648	res = tick_program_event(expires, 0);
				649	if (!IS_ERR_VALUE(res))
				650	cpu_base->expires_next = expires;
				651	return res;
				652	}
				653
				654	static void __run_hrtimer(struct hrtimer timer, ktime_t now);
				655	static int hrtimer_rt_defer(struct hrtimer *timer);
				656
				657	/*
				658	* Initialize the high resolution related parts of cpu_base
				659	*/
				660	static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base)
				661	{
				662	base->expires_next.tv64 = KTIME_MAX;
				663	base->hres_active = 0;
				664	}
				665
				666	/*
				667	* When High resolution timers are active, try to reprogram. Note, that in case
				668	* the state has HRTIMER_STATE_CALLBACK set, no reprogramming and no expiry
				669	* check happens. The timer gets enqueued into the rbtree. The reprogramming
				670	* and expiry check is done in the hrtimer_interrupt or in the softirq.
				671	*/
				672	static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
				673	struct hrtimer_clock_base *base)
				674	{
				675	return base->cpu_base->hres_active && hrtimer_reprogram(timer, base);
				676	}
				677
				678	static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)
				679	{
				680	ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset;
				681	ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset;
				682
				683	return ktime_get_update_offsets(offs_real, offs_boot);
				684	}
				685
				686	/*
				687	* Retrigger next event is called after clock was set
				688	*
				689	* Called with interrupts disabled via on_each_cpu()
				690	*/
				691	static void retrigger_next_event(void *arg)
				692	{
				693	struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases);
				694
				695	if (!hrtimer_hres_active())
				696	return;
				697
				698	raw_spin_lock(&base->lock);
				699	hrtimer_update_base(base);
				700	hrtimer_force_reprogram(base, 0);
				701	raw_spin_unlock(&base->lock);
				702	}
				703
				704	/*
				705	* Switch to high resolution mode
				706	*/
				707	static int hrtimer_switch_to_hres(void)
				708	{
				709	int i, cpu = smp_processor_id();
				710	struct hrtimer_cpu_base *base = &per_cpu(hrtimer_bases, cpu);
				711	unsigned long flags;
				712
				713	if (base->hres_active)
				714	return 1;
				715
				716	local_irq_save(flags);
				717
				718	if (tick_init_highres()) {
				719	local_irq_restore(flags);
				720	printk(KERN_WARNING "Could not switch to high resolution "
				721	"mode on CPU %d\n", cpu);
				722	return 0;
				723	}
				724	base->hres_active = 1;
				725	for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++)
				726	base->clock_base[i].resolution = KTIME_HIGH_RES;
				727
				728	tick_setup_sched_timer();
				729	/* "Retrigger" the interrupt to get things going */
				730	retrigger_next_event(NULL);
				731	local_irq_restore(flags);
				732	return 1;
				733	}
				734
				735	static void clock_was_set_work(struct work_struct *work)
				736	{
				737	clock_was_set();
				738	}
				739
				740	static DECLARE_WORK(hrtimer_work, clock_was_set_work);
				741
				742	/*
				743	* Called from timekeeping and resume code to reprogramm the hrtimer
				744	* interrupt device on all cpus.
				745	*/
				746	void clock_was_set_delayed(void)
				747	{
				748	schedule_work(&hrtimer_work);
				749	}
				750
				751	#else
				752
				753	static inline int hrtimer_hres_active(void) { return 0; }
				754	static inline int hrtimer_is_hres_enabled(void) { return 0; }
				755	static inline int hrtimer_switch_to_hres(void) { return 0; }
				756	static inline void
				757	hrtimer_force_reprogram(struct hrtimer_cpu_base *base, int skip_equal) { }
				758	static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
				759	struct hrtimer_clock_base *base)
				760	{
				761	return 0;
				762	}
				763	static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { }
				764	static inline void retrigger_next_event(void *arg) { }
				765	static inline int hrtimer_reprogram(struct hrtimer *timer,
				766	struct hrtimer_clock_base *base)
				767	{
				768	return 0;
				769	}
				770
				771	#endif /* CONFIG_HIGH_RES_TIMERS */
				772
				773	/*
				774	* Clock realtime was set
				775	*
				776	* Change the offset of the realtime clock vs. the monotonic
				777	* clock.
				778	*
				779	* We might have to reprogram the high resolution timer interrupt. On
				780	* SMP we call the architecture specific code to retrigger _all_ high
				781	* resolution timer interrupts. On UP we just disable interrupts and
				782	* call the high resolution interrupt code.
				783	*/
				784	void clock_was_set(void)
				785	{
				786	#ifdef CONFIG_HIGH_RES_TIMERS
				787	/* Retrigger the CPU local events everywhere */
				788	on_each_cpu(retrigger_next_event, NULL, 1);
				789	#endif
				790	timerfd_clock_was_set();
				791	}
				792
				793	/*
				794	* During resume we might have to reprogram the high resolution timer
				795	* interrupt (on the local CPU):
				796	*/
				797	void hrtimers_resume(void)
				798	{
				799	WARN_ONCE(!irqs_disabled(),
				800	KERN_INFO "hrtimers_resume() called with IRQs enabled!");
				801
				802	/* Retrigger on the local CPU */
				803	retrigger_next_event(NULL);
				804	/* And schedule a retrigger for all others */
				805	clock_was_set_delayed();
				806	}
				807
				808	static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer)
				809	{
				810	#ifdef CONFIG_TIMER_STATS
				811	if (timer->start_site)
				812	return;
				813	timer->start_site = __builtin_return_address(0);
				814	memcpy(timer->start_comm, current->comm, TASK_COMM_LEN);
				815	timer->start_pid = current->pid;
				816	#endif
				817	}
				818
				819	static inline void timer_stats_hrtimer_clear_start_info(struct hrtimer *timer)
				820	{
				821	#ifdef CONFIG_TIMER_STATS
				822	timer->start_site = NULL;
				823	#endif
				824	}
				825
				826	static inline void timer_stats_account_hrtimer(struct hrtimer *timer)
				827	{
				828	#ifdef CONFIG_TIMER_STATS
				829	if (likely(!timer_stats_active))
				830	return;
				831	timer_stats_update_stats(timer, timer->start_pid, timer->start_site,
				832	timer->function, timer->start_comm, 0);
				833	#endif
				834	}
				835
				836	/*
				837	* Counterpart to lock_hrtimer_base above:
				838	*/
				839	static inline
				840	void unlock_hrtimer_base(const struct hrtimer timer, unsigned long flags)
				841	{
				842	raw_spin_unlock_irqrestore(&timer->base->cpu_base->lock, *flags);
				843	}
				844
				845	/**
				846	* hrtimer_forward - forward the timer expiry
				847	* @timer: hrtimer to forward
				848	* @now: forward past this time
				849	* @interval: the interval to forward
				850	*
				851	* Forward the timer expiry so it will expire in the future.
				852	* Returns the number of overruns.
				853	*/
				854	u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval)
				855	{
				856	u64 orun = 1;
				857	ktime_t delta;
				858
				859	delta = ktime_sub(now, hrtimer_get_expires(timer));
				860
				861	if (delta.tv64 < 0)
				862	return 0;
				863
				864	if (WARN_ON(timer->state & HRTIMER_STATE_ENQUEUED))
				865	return 0;
				866
				867	if (interval.tv64 < timer->base->resolution.tv64)
				868	interval.tv64 = timer->base->resolution.tv64;
				869
				870	if (unlikely(delta.tv64 >= interval.tv64)) {
				871	s64 incr = ktime_to_ns(interval);
				872
				873	orun = ktime_divns(delta, incr);
				874	hrtimer_add_expires_ns(timer, incr * orun);
				875	if (hrtimer_get_expires_tv64(timer) > now.tv64)
				876	return orun;
				877	/*
				878	* This (and the ktime_add() below) is the
				879	* correction for exact:
				880	*/
				881	orun++;
				882	}
				883	hrtimer_add_expires(timer, interval);
				884
				885	return orun;
				886	}
				887	EXPORT_SYMBOL_GPL(hrtimer_forward);
				888
				889	#ifdef CONFIG_PREEMPT_RT_BASE
				890	# define wake_up_timer_waiters(b) wake_up(&(b)->wait)
				891
				892	/**
				893	* hrtimer_wait_for_timer - Wait for a running timer
				894	*
				895	* @timer: timer to wait for
				896	*
				897	* The function waits in case the timers callback function is
				898	* currently executed on the waitqueue of the timer base. The
				899	* waitqueue is woken up after the timer callback function has
				900	* finished execution.
				901	*/
				902	void hrtimer_wait_for_timer(const struct hrtimer *timer)
				903	{
				904	struct hrtimer_clock_base *base = timer->base;
				905
				906	if (base && base->cpu_base && !timer->irqsafe)
				907	wait_event(base->cpu_base->wait,
				908	!(timer->state & HRTIMER_STATE_CALLBACK));
				909	}
				910
				911	#else
				912	# define wake_up_timer_waiters(b) do { } while (0)
				913	#endif
				914
				915	/*
				916	* enqueue_hrtimer - internal function to (re)start a timer
				917	*
				918	* The timer is inserted in expiry order. Insertion into the
				919	* red black tree is O(log(n)). Must hold the base lock.
				920	*
				921	* Returns 1 when the new timer is the leftmost timer in the tree.
				922	*/
				923	static int enqueue_hrtimer(struct hrtimer *timer,
				924	struct hrtimer_clock_base *base)
				925	{
				926	debug_activate(timer);
				927
				928	timerqueue_add(&base->active, &timer->node);
				929	base->cpu_base->active_bases \|= 1 << base->index;
				930
				931	/*
				932	* HRTIMER_STATE_ENQUEUED is or'ed to the current state to preserve the
				933	* state of a possibly running callback.
				934	*/
				935	timer->state \|= HRTIMER_STATE_ENQUEUED;
				936
				937	return (&timer->node == base->active.next);
				938	}
				939
				940	/*
				941	* __remove_hrtimer - internal function to remove a timer
				942	*
				943	* Caller must hold the base lock.
				944	*
				945	* High resolution timer mode reprograms the clock event device when the
				946	* timer is the one which expires next. The caller can disable this by setting
				947	* reprogram to zero. This is useful, when the context does a reprogramming
				948	* anyway (e.g. timer interrupt)
				949	*/
				950	static void __remove_hrtimer(struct hrtimer *timer,
				951	struct hrtimer_clock_base *base,
				952	unsigned long newstate, int reprogram)
				953	{
				954	struct timerqueue_node *next_timer;
				955	if (!(timer->state & HRTIMER_STATE_ENQUEUED))
				956	goto out;
				957
				958	if (unlikely(!list_empty(&timer->cb_entry))) {
				959	list_del_init(&timer->cb_entry);
				960	goto out;
				961	}
				962
				963	next_timer = timerqueue_getnext(&base->active);
				964	timerqueue_del(&base->active, &timer->node);
				965	if (&timer->node == next_timer) {
				966	#ifdef CONFIG_HIGH_RES_TIMERS
				967	/* Reprogram the clock event device. if enabled */
				968	if (reprogram && hrtimer_hres_active()) {
				969	ktime_t expires;
				970
				971	expires = ktime_sub(hrtimer_get_expires(timer),
				972	base->offset);
				973	if (base->cpu_base->expires_next.tv64 == expires.tv64)
				974	hrtimer_force_reprogram(base->cpu_base, 1);
				975	}
				976	#endif
				977	}
				978	if (!timerqueue_getnext(&base->active))
				979	base->cpu_base->active_bases &= ~(1 << base->index);
				980	out:
				981	timer->state = newstate;
				982	}
				983
				984	/*
				985	* remove hrtimer, called with base lock held
				986	*/
				987	static inline int
				988	remove_hrtimer(struct hrtimer timer, struct hrtimer_clock_base base)
				989	{
				990	if (hrtimer_is_queued(timer)) {
				991	unsigned long state;
				992	int reprogram;
				993
				994	/*
				995	* Remove the timer and force reprogramming when high
				996	* resolution mode is active and the timer is on the current
				997	* CPU. If we remove a timer on another CPU, reprogramming is
				998	* skipped. The interrupt event on this CPU is fired and
				999	* reprogramming happens in the interrupt handler. This is a
				1000	* rare case and less expensive than a smp call.
				1001	*/
				1002	debug_deactivate(timer);
				1003	timer_stats_hrtimer_clear_start_info(timer);
				1004	reprogram = base->cpu_base == &__get_cpu_var(hrtimer_bases);
				1005	/*
				1006	* We must preserve the CALLBACK state flag here,
				1007	* otherwise we could move the timer base in
				1008	* switch_hrtimer_base.
				1009	*/
				1010	state = timer->state & HRTIMER_STATE_CALLBACK;
				1011	__remove_hrtimer(timer, base, state, reprogram);
				1012	return 1;
				1013	}
				1014	return 0;
				1015	}
				1016
				1017	int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
				1018	unsigned long delta_ns, const enum hrtimer_mode mode,
				1019	int wakeup)
				1020	{
				1021	struct hrtimer_clock_base base, new_base;
				1022	unsigned long flags;
				1023	int ret, leftmost;
				1024
				1025	base = lock_hrtimer_base(timer, &flags);
				1026
				1027	/* Remove an active timer from the queue: */
				1028	ret = remove_hrtimer(timer, base);
				1029
				1030	if (mode & HRTIMER_MODE_REL) {
				1031	tim = ktime_add_safe(tim, base->get_time());
				1032	/*
				1033	* CONFIG_TIME_LOW_RES is a temporary way for architectures
				1034	* to signal that they simply return xtime in
				1035	* do_gettimeoffset(). In this case we want to round up by
				1036	* resolution when starting a relative timer, to avoid short
				1037	* timeouts. This will go away with the GTOD framework.
				1038	*/
				1039	#ifdef CONFIG_TIME_LOW_RES
				1040	tim = ktime_add_safe(tim, base->resolution);
				1041	#endif
				1042	}
				1043
				1044	hrtimer_set_expires_range_ns(timer, tim, delta_ns);
				1045
				1046	/* Switch the timer base, if necessary: */
				1047	new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED);
				1048
				1049	#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
				1050	{
				1051	ktime_t now = new_base->get_time();
				1052
				1053	if (ktime_to_ns(tim) < ktime_to_ns(now))
				1054	timer->praecox = now;
				1055	else
				1056	timer->praecox = ktime_set(0, 0);
				1057	}
				1058	#endif
				1059
				1060	timer_stats_hrtimer_set_start_info(timer);
				1061
				1062	leftmost = enqueue_hrtimer(timer, new_base);
				1063
				1064	/*
				1065	* Only allow reprogramming if the new base is on this CPU.
				1066	* (it might still be on another CPU if the timer was pending)
				1067	*
				1068	* XXX send_remote_softirq() ?
				1069	*/
				1070	if (leftmost && new_base->cpu_base == &__get_cpu_var(hrtimer_bases)
				1071	&& hrtimer_enqueue_reprogram(timer, new_base)) {
				1072
				1073	if (wakeup
				1074	#ifdef CONFIG_PREEMPT_RT_BASE
				1075	/*
				1076	* Move softirq based timers away from the rbtree in
				1077	* case it expired already. Otherwise we would have a
				1078	* stale base->first entry until the softirq runs.
				1079	*/
				1080	&& hrtimer_rt_defer(timer)
				1081	#endif
				1082	) {
				1083	/*
				1084	* We need to drop cpu_base->lock to avoid a
				1085	* lock ordering issue vs. rq->lock.
				1086	*/
				1087	raw_spin_unlock(&new_base->cpu_base->lock);
				1088	raise_softirq_irqoff(HRTIMER_SOFTIRQ);
				1089	local_irq_restore(flags);
				1090	return ret;
				1091	}
				1092
				1093	/*
				1094	* In case we failed to reprogram the timer (mostly
				1095	* because out current timer is already elapsed),
				1096	* remove it again and report a failure. This avoids
				1097	* stale base->first entries.
				1098	*/
				1099	debug_deactivate(timer);
				1100	__remove_hrtimer(timer, new_base,
				1101	timer->state & HRTIMER_STATE_CALLBACK, 0);
				1102	ret = -ETIME;
				1103	}
				1104
				1105	unlock_hrtimer_base(timer, &flags);
				1106
				1107	return ret;
				1108	}
				1109
				1110	/**
				1111	* hrtimer_start_range_ns - (re)start an hrtimer on the current CPU
				1112	* @timer: the timer to be added
				1113	* @tim: expiry time
				1114	* @delta_ns: "slack" range for the timer
				1115	* @mode: expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL)
				1116	*
				1117	* Returns:
				1118	* 0 on success
				1119	* 1 when the timer was active
				1120	*/
				1121	int hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
				1122	unsigned long delta_ns, const enum hrtimer_mode mode)
				1123	{
				1124	return __hrtimer_start_range_ns(timer, tim, delta_ns, mode, 1);
				1125	}
				1126	EXPORT_SYMBOL_GPL(hrtimer_start_range_ns);
				1127
				1128	/**
				1129	* hrtimer_start - (re)start an hrtimer on the current CPU
				1130	* @timer: the timer to be added
				1131	* @tim: expiry time
				1132	* @mode: expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL)
				1133	*
				1134	* Returns:
				1135	* 0 on success
				1136	* 1 when the timer was active
				1137	*/
				1138	int
				1139	hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode)
				1140	{
				1141	return __hrtimer_start_range_ns(timer, tim, 0, mode, 1);
				1142	}
				1143	EXPORT_SYMBOL_GPL(hrtimer_start);
				1144
				1145
				1146	/**
				1147	* hrtimer_try_to_cancel - try to deactivate a timer
				1148	* @timer: hrtimer to stop
				1149	*
				1150	* Returns:
				1151	* 0 when the timer was not active
				1152	* 1 when the timer was active
				1153	* -1 when the timer is currently excuting the callback function and
				1154	* cannot be stopped
				1155	*/
				1156	int hrtimer_try_to_cancel(struct hrtimer *timer)
				1157	{
				1158	struct hrtimer_clock_base *base;
				1159	unsigned long flags;
				1160	int ret = -1;
				1161
				1162	base = lock_hrtimer_base(timer, &flags);
				1163
				1164	if (!hrtimer_callback_running(timer))
				1165	ret = remove_hrtimer(timer, base);
				1166
				1167	unlock_hrtimer_base(timer, &flags);
				1168
				1169	return ret;
				1170
				1171	}
				1172	EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel);
				1173
				1174	/**
				1175	* hrtimer_cancel - cancel a timer and wait for the handler to finish.
				1176	* @timer: the timer to be cancelled
				1177	*
				1178	* Returns:
				1179	* 0 when the timer was not active
				1180	* 1 when the timer was active
				1181	*/
				1182	int hrtimer_cancel(struct hrtimer *timer)
				1183	{
				1184	for (;;) {
				1185	int ret = hrtimer_try_to_cancel(timer);
				1186
				1187	if (ret >= 0)
				1188	return ret;
				1189	hrtimer_wait_for_timer(timer);
				1190	}
				1191	}
				1192	EXPORT_SYMBOL_GPL(hrtimer_cancel);
				1193
				1194	/**
				1195	* hrtimer_get_remaining - get remaining time for the timer
				1196	* @timer: the timer to read
				1197	*/
				1198	ktime_t hrtimer_get_remaining(const struct hrtimer *timer)
				1199	{
				1200	unsigned long flags;
				1201	ktime_t rem;
				1202
				1203	lock_hrtimer_base(timer, &flags);
				1204	rem = hrtimer_expires_remaining(timer);
				1205	unlock_hrtimer_base(timer, &flags);
				1206
				1207	return rem;
				1208	}
				1209	EXPORT_SYMBOL_GPL(hrtimer_get_remaining);
				1210
				1211	#ifdef CONFIG_NO_HZ
				1212	/**
				1213	* hrtimer_get_next_event - get the time until next expiry event
				1214	*
				1215	* Returns the delta to the next expiry event or KTIME_MAX if no timer
				1216	* is pending.
				1217	*/
				1218	ktime_t hrtimer_get_next_event(void)
				1219	{
				1220	struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
				1221	struct hrtimer_clock_base *base = cpu_base->clock_base;
				1222	ktime_t delta, mindelta = { .tv64 = KTIME_MAX };
				1223	unsigned long flags;
				1224	int i;
				1225
				1226	raw_spin_lock_irqsave(&cpu_base->lock, flags);
				1227
				1228	if (!hrtimer_hres_active()) {
				1229	for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) {
				1230	struct hrtimer *timer;
				1231	struct timerqueue_node *next;
				1232
				1233	next = timerqueue_getnext(&base->active);
				1234	if (!next)
				1235	continue;
				1236
				1237	timer = container_of(next, struct hrtimer, node);
				1238	delta.tv64 = hrtimer_get_expires_tv64(timer);
				1239	delta = ktime_sub(delta, base->get_time());
				1240	if (delta.tv64 < mindelta.tv64)
				1241	mindelta.tv64 = delta.tv64;
				1242	}
				1243	}
				1244
				1245	raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
				1246
				1247	if (mindelta.tv64 < 0)
				1248	mindelta.tv64 = 0;
				1249	return mindelta;
				1250	}
				1251	#endif
				1252
				1253	static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
				1254	enum hrtimer_mode mode)
				1255	{
				1256	struct hrtimer_cpu_base *cpu_base;
				1257	int base;
				1258
				1259	memset(timer, 0, sizeof(struct hrtimer));
				1260
				1261	cpu_base = &__raw_get_cpu_var(hrtimer_bases);
				1262
				1263	if (clock_id == CLOCK_REALTIME && mode != HRTIMER_MODE_ABS)
				1264	clock_id = CLOCK_MONOTONIC;
				1265
				1266	base = hrtimer_clockid_to_base(clock_id);
				1267	timer->base = &cpu_base->clock_base[base];
				1268	INIT_LIST_HEAD(&timer->cb_entry);
				1269	timerqueue_init(&timer->node);
				1270
				1271	#ifdef CONFIG_TIMER_STATS
				1272	timer->start_site = NULL;
				1273	timer->start_pid = -1;
				1274	memset(timer->start_comm, 0, TASK_COMM_LEN);
				1275	#endif
				1276	}
				1277
				1278	/**
				1279	* hrtimer_init - initialize a timer to the given clock
				1280	* @timer: the timer to be initialized
				1281	* @clock_id: the clock to be used
				1282	* @mode: timer mode abs/rel
				1283	*/
				1284	void hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
				1285	enum hrtimer_mode mode)
				1286	{
				1287	debug_init(timer, clock_id, mode);
				1288	__hrtimer_init(timer, clock_id, mode);
				1289	}
				1290	EXPORT_SYMBOL_GPL(hrtimer_init);
				1291
				1292	/**
				1293	* hrtimer_get_res - get the timer resolution for a clock
				1294	* @which_clock: which clock to query
				1295	* @tp: pointer to timespec variable to store the resolution
				1296	*
				1297	* Store the resolution of the clock selected by @which_clock in the
				1298	* variable pointed to by @tp.
				1299	*/
				1300	int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp)
				1301	{
				1302	struct hrtimer_cpu_base *cpu_base;
				1303	int base = hrtimer_clockid_to_base(which_clock);
				1304
				1305	cpu_base = &__raw_get_cpu_var(hrtimer_bases);
				1306	*tp = ktime_to_timespec(cpu_base->clock_base[base].resolution);
				1307
				1308	return 0;
				1309	}
				1310	EXPORT_SYMBOL_GPL(hrtimer_get_res);
				1311
				1312	static void __run_hrtimer(struct hrtimer timer, ktime_t now)
				1313	{
				1314	struct hrtimer_clock_base *base = timer->base;
				1315	struct hrtimer_cpu_base *cpu_base = base->cpu_base;
				1316	enum hrtimer_restart (fn)(struct hrtimer );
				1317	int restart;
				1318
				1319	WARN_ON(!irqs_disabled());
				1320
				1321	debug_deactivate(timer);
				1322	__remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0);
				1323	timer_stats_account_hrtimer(timer);
				1324	fn = timer->function;
				1325
				1326	/*
				1327	* Because we run timers from hardirq context, there is no chance
				1328	* they get migrated to another cpu, therefore its safe to unlock
				1329	* the timer base.
				1330	*/
				1331	raw_spin_unlock(&cpu_base->lock);
				1332	trace_hrtimer_expire_entry(timer, now);
				1333	restart = fn(timer);
				1334	trace_hrtimer_expire_exit(timer);
				1335	raw_spin_lock(&cpu_base->lock);
				1336
				1337	/*
				1338	* Note: We clear the CALLBACK bit after enqueue_hrtimer and
				1339	* we do not reprogramm the event hardware. Happens either in
				1340	* hrtimer_start_range_ns() or in hrtimer_interrupt()
				1341	*
				1342	* Note: Because we dropped the cpu_base->lock above,
				1343	* hrtimer_start_range_ns() can have popped in and enqueued the timer
				1344	* for us already.
				1345	*/
				1346	if (restart != HRTIMER_NORESTART &&
				1347	!(timer->state & HRTIMER_STATE_ENQUEUED))
				1348	enqueue_hrtimer(timer, base);
				1349
				1350	WARN_ON_ONCE(!(timer->state & HRTIMER_STATE_CALLBACK));
				1351
				1352	timer->state &= ~HRTIMER_STATE_CALLBACK;
				1353	}
				1354
				1355	static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer);
				1356
				1357	#ifdef CONFIG_PREEMPT_RT_BASE
				1358	static void hrtimer_rt_reprogram(int restart, struct hrtimer *timer,
				1359	struct hrtimer_clock_base *base)
				1360	{
				1361	/*
				1362	* Note, we clear the callback flag before we requeue the
				1363	* timer otherwise we trigger the callback_running() check
				1364	* in hrtimer_reprogram().
				1365	*/
				1366	timer->state &= ~HRTIMER_STATE_CALLBACK;
				1367
				1368	if (restart != HRTIMER_NORESTART) {
				1369	BUG_ON(hrtimer_active(timer));
				1370	/*
				1371	* Enqueue the timer, if it's the leftmost timer then
				1372	* we need to reprogram it.
				1373	*/
				1374	if (!enqueue_hrtimer(timer, base))
				1375	return;
				1376
				1377	#ifndef CONFIG_HIGH_RES_TIMERS
				1378	}
				1379	#else
				1380	if (base->cpu_base->hres_active &&
				1381	hrtimer_reprogram(timer, base))
				1382	goto requeue;
				1383
				1384	} else if (hrtimer_active(timer)) {
				1385	/*
				1386	* If the timer was rearmed on another CPU, reprogram
				1387	* the event device.
				1388	*/
				1389	if (&timer->node == base->active.next &&
				1390	base->cpu_base->hres_active &&
				1391	hrtimer_reprogram(timer, base))
				1392	goto requeue;
				1393	}
				1394	return;
				1395
				1396	requeue:
				1397	/*
				1398	* Timer is expired. Thus move it from tree to pending list
				1399	* again.
				1400	*/
				1401	__remove_hrtimer(timer, base, timer->state, 0);
				1402	list_add_tail(&timer->cb_entry, &base->expired);
				1403	#endif
				1404	}
				1405
				1406	/*
				1407	* The changes in mainline which removed the callback modes from
				1408	* hrtimer are not yet working with -rt. The non wakeup_process()
				1409	* based callbacks which involve sleeping locks need to be treated
				1410	* seperately.
				1411	*/
				1412	static void hrtimer_rt_run_pending(void)
				1413	{
				1414	enum hrtimer_restart (fn)(struct hrtimer );
				1415	struct hrtimer_cpu_base *cpu_base;
				1416	struct hrtimer_clock_base *base;
				1417	struct hrtimer *timer;
				1418	int index, restart;
				1419
				1420	local_irq_disable();
				1421	cpu_base = &per_cpu(hrtimer_bases, smp_processor_id());
				1422
				1423	raw_spin_lock(&cpu_base->lock);
				1424
				1425	for (index = 0; index < HRTIMER_MAX_CLOCK_BASES; index++) {
				1426	base = &cpu_base->clock_base[index];
				1427
				1428	while (!list_empty(&base->expired)) {
				1429	timer = list_first_entry(&base->expired,
				1430	struct hrtimer, cb_entry);
				1431
				1432	/*
				1433	* Same as the above __run_hrtimer function
				1434	* just we run with interrupts enabled.
				1435	*/
				1436	debug_hrtimer_deactivate(timer);
				1437	__remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0);
				1438	timer_stats_account_hrtimer(timer);
				1439	fn = timer->function;
				1440
				1441	raw_spin_unlock_irq(&cpu_base->lock);
				1442	restart = fn(timer);
				1443	raw_spin_lock_irq(&cpu_base->lock);
				1444
				1445	hrtimer_rt_reprogram(restart, timer, base);
				1446	}
				1447	}
				1448
				1449	raw_spin_unlock_irq(&cpu_base->lock);
				1450
				1451	wake_up_timer_waiters(cpu_base);
				1452	}
				1453
				1454	static int hrtimer_rt_defer(struct hrtimer *timer)
				1455	{
				1456	if (timer->irqsafe)
				1457	return 0;
				1458
				1459	__remove_hrtimer(timer, timer->base, timer->state, 0);
				1460	list_add_tail(&timer->cb_entry, &timer->base->expired);
				1461	return 1;
				1462	}
				1463
				1464	#else
				1465
				1466	static inline void hrtimer_rt_run_pending(void)
				1467	{
				1468	hrtimer_peek_ahead_timers();
				1469	}
				1470
				1471	static inline int hrtimer_rt_defer(struct hrtimer *timer) { return 0; }
				1472
				1473	#endif
				1474
				1475	#ifdef CONFIG_HIGH_RES_TIMERS
				1476
				1477	/*
				1478	* High resolution timer interrupt
				1479	* Called with interrupts disabled
				1480	*/
				1481	void hrtimer_interrupt(struct clock_event_device *dev)
				1482	{
				1483	struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
				1484	ktime_t expires_next, now, entry_time, delta;
				1485	int i, retries = 0, raise = 0;
				1486
				1487	BUG_ON(!cpu_base->hres_active);
				1488	cpu_base->nr_events++;
				1489	dev->next_event.tv64 = KTIME_MAX;
				1490
				1491	raw_spin_lock(&cpu_base->lock);
				1492	entry_time = now = hrtimer_update_base(cpu_base);
				1493	retry:
				1494	expires_next.tv64 = KTIME_MAX;
				1495	/*
				1496	* We set expires_next to KTIME_MAX here with cpu_base->lock
				1497	* held to prevent that a timer is enqueued in our queue via
				1498	* the migration code. This does not affect enqueueing of
				1499	* timers which run their callback and need to be requeued on
				1500	* this CPU.
				1501	*/
				1502	cpu_base->expires_next.tv64 = KTIME_MAX;
				1503
				1504	for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
				1505	struct hrtimer_clock_base *base;
				1506	struct timerqueue_node *node;
				1507	ktime_t basenow;
				1508
				1509	if (!(cpu_base->active_bases & (1 << i)))
				1510	continue;
				1511
				1512	base = cpu_base->clock_base + i;
				1513	basenow = ktime_add(now, base->offset);
				1514
				1515	while ((node = timerqueue_getnext(&base->active))) {
				1516	struct hrtimer *timer;
				1517
				1518	timer = container_of(node, struct hrtimer, node);
				1519
				1520	trace_hrtimer_interrupt(raw_smp_processor_id(),
				1521	ktime_to_ns(ktime_sub(ktime_to_ns(timer->praecox) ?
				1522	timer->praecox : hrtimer_get_expires(timer),
				1523	basenow)),
				1524	current,
				1525	timer->function == hrtimer_wakeup ?
				1526	container_of(timer, struct hrtimer_sleeper,
				1527	timer)->task : NULL);
				1528
				1529	/*
				1530	* The immediate goal for using the softexpires is
				1531	* minimizing wakeups, not running timers at the
				1532	* earliest interrupt after their soft expiration.
				1533	* This allows us to avoid using a Priority Search
				1534	* Tree, which can answer a stabbing querry for
				1535	* overlapping intervals and instead use the simple
				1536	* BST we already have.
				1537	* We don't add extra wakeups by delaying timers that
				1538	* are right-of a not yet expired timer, because that
				1539	* timer will have to trigger a wakeup anyway.
				1540	*/
				1541
				1542	if (basenow.tv64 < hrtimer_get_softexpires_tv64(timer)) {
				1543	ktime_t expires;
				1544
				1545	expires = ktime_sub(hrtimer_get_expires(timer),
				1546	base->offset);
				1547	if (expires.tv64 < 0)
				1548	expires.tv64 = KTIME_MAX;
				1549	if (expires.tv64 < expires_next.tv64)
				1550	expires_next = expires;
				1551	break;
				1552	}
				1553
				1554	if (!hrtimer_rt_defer(timer))
				1555	__run_hrtimer(timer, &basenow);
				1556	else
				1557	raise = 1;
				1558	}
				1559	}
				1560
				1561	/*
				1562	* Store the new expiry value so the migration code can verify
				1563	* against it.
				1564	*/
				1565	cpu_base->expires_next = expires_next;
				1566	raw_spin_unlock(&cpu_base->lock);
				1567
				1568	/* Reprogramming necessary ? */
				1569	if (expires_next.tv64 == KTIME_MAX \|\|
				1570	!tick_program_event(expires_next, 0)) {
				1571	cpu_base->hang_detected = 0;
				1572	goto out;
				1573	}
				1574
				1575	/*
				1576	* The next timer was already expired due to:
				1577	* - tracing
				1578	* - long lasting callbacks
				1579	* - being scheduled away when running in a VM
				1580	*
				1581	* We need to prevent that we loop forever in the hrtimer
				1582	* interrupt routine. We give it 3 attempts to avoid
				1583	* overreacting on some spurious event.
				1584	*
				1585	* Acquire base lock for updating the offsets and retrieving
				1586	* the current time.
				1587	*/
				1588	raw_spin_lock(&cpu_base->lock);
				1589	now = hrtimer_update_base(cpu_base);
				1590	cpu_base->nr_retries++;
				1591	if (++retries < 3)
				1592	goto retry;
				1593	/*
				1594	* Give the system a chance to do something else than looping
				1595	* here. We stored the entry time, so we know exactly how long
				1596	* we spent here. We schedule the next event this amount of
				1597	* time away.
				1598	*/
				1599	cpu_base->nr_hangs++;
				1600	cpu_base->hang_detected = 1;
				1601	raw_spin_unlock(&cpu_base->lock);
				1602	delta = ktime_sub(now, entry_time);
				1603	if (delta.tv64 > cpu_base->max_hang_time.tv64)
				1604	cpu_base->max_hang_time = delta;
				1605	/*
				1606	* Limit it to a sensible value as we enforce a longer
				1607	* delay. Give the CPU at least 100ms to catch up.
				1608	*/
				1609	if (delta.tv64 > 100 * NSEC_PER_MSEC)
				1610	expires_next = ktime_add_ns(now, 100 * NSEC_PER_MSEC);
				1611	else
				1612	expires_next = ktime_add(now, delta);
				1613	tick_program_event(expires_next, 1);
				1614	printk_once(KERN_WARNING "hrtimer: interrupt took %llu ns\n",
				1615	ktime_to_ns(delta));
				1616	out:
				1617	if (raise)
				1618	raise_softirq_irqoff(HRTIMER_SOFTIRQ);
				1619	}
				1620
				1621	/*
				1622	* local version of hrtimer_peek_ahead_timers() called with interrupts
				1623	* disabled.
				1624	*/
				1625	static void __hrtimer_peek_ahead_timers(void)
				1626	{
				1627	struct tick_device *td;
				1628
				1629	if (!hrtimer_hres_active())
				1630	return;
				1631
				1632	td = &__get_cpu_var(tick_cpu_device);
				1633	if (td && td->evtdev)
				1634	hrtimer_interrupt(td->evtdev);
				1635	}
				1636
				1637	/**
				1638	* hrtimer_peek_ahead_timers -- run soft-expired timers now
				1639	*
				1640	* hrtimer_peek_ahead_timers will peek at the timer queue of
				1641	* the current cpu and check if there are any timers for which
				1642	* the soft expires time has passed. If any such timers exist,
				1643	* they are run immediately and then removed from the timer queue.
				1644	*
				1645	*/
				1646	void hrtimer_peek_ahead_timers(void)
				1647	{
				1648	unsigned long flags;
				1649
				1650	local_irq_save(flags);
				1651	__hrtimer_peek_ahead_timers();
				1652	local_irq_restore(flags);
				1653	}
				1654
				1655	#else /* CONFIG_HIGH_RES_TIMERS */
				1656
				1657	static inline void __hrtimer_peek_ahead_timers(void) { }
				1658
				1659	#endif /* !CONFIG_HIGH_RES_TIMERS */
				1660
				1661	static void run_hrtimer_softirq(struct softirq_action *h)
				1662	{
				1663	hrtimer_rt_run_pending();
				1664	}
				1665
				1666	/*
				1667	* Called from timer softirq every jiffy, expire hrtimers:
				1668	*
				1669	* For HRT its the fall back code to run the softirq in the timer
				1670	* softirq context in case the hrtimer initialization failed or has
				1671	* not been done yet.
				1672	*/
				1673	void hrtimer_run_pending(void)
				1674	{
				1675	if (hrtimer_hres_active())
				1676	return;
				1677
				1678	/*
				1679	* This _is_ ugly: We have to check in the softirq context,
				1680	* whether we can switch to highres and / or nohz mode. The
				1681	* clocksource switch happens in the timer interrupt with
				1682	* xtime_lock held. Notification from there only sets the
				1683	* check bit in the tick_oneshot code, otherwise we might
				1684	* deadlock vs. xtime_lock.
				1685	*/
				1686	if (tick_check_oneshot_change(!hrtimer_is_hres_enabled()))
				1687	hrtimer_switch_to_hres();
				1688	}
				1689
				1690	/*
				1691	* Called from hardirq context every jiffy
				1692	*/
				1693	void hrtimer_run_queues(void)
				1694	{
				1695	struct timerqueue_node *node;
				1696	struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
				1697	struct hrtimer_clock_base *base;
				1698	int index, gettime = 1, raise = 0;
				1699
				1700	if (hrtimer_hres_active())
				1701	return;
				1702
				1703	for (index = 0; index < HRTIMER_MAX_CLOCK_BASES; index++) {
				1704	base = &cpu_base->clock_base[index];
				1705	if (!timerqueue_getnext(&base->active))
				1706	continue;
				1707
				1708	if (gettime) {
				1709	hrtimer_get_softirq_time(cpu_base);
				1710	gettime = 0;
				1711	}
				1712
				1713	raw_spin_lock(&cpu_base->lock);
				1714
				1715	while ((node = timerqueue_getnext(&base->active))) {
				1716	struct hrtimer *timer;
				1717
				1718	timer = container_of(node, struct hrtimer, node);
				1719	if (base->softirq_time.tv64 <=
				1720	hrtimer_get_expires_tv64(timer))
				1721	break;
				1722
				1723	if (!hrtimer_rt_defer(timer))
				1724	__run_hrtimer(timer, &base->softirq_time);
				1725	else
				1726	raise = 1;
				1727	}
				1728	raw_spin_unlock(&cpu_base->lock);
				1729	}
				1730
				1731	if (raise)
				1732	raise_softirq_irqoff(HRTIMER_SOFTIRQ);
				1733	}
				1734
				1735	/*
				1736	* Sleep related functions:
				1737	*/
				1738	static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer)
				1739	{
				1740	struct hrtimer_sleeper *t =
				1741	container_of(timer, struct hrtimer_sleeper, timer);
				1742	struct task_struct *task = t->task;
				1743
				1744	t->task = NULL;
				1745	if (task)
				1746	wake_up_process(task);
				1747
				1748	return HRTIMER_NORESTART;
				1749	}
				1750
				1751	void hrtimer_init_sleeper(struct hrtimer_sleeper sl, struct task_struct task)
				1752	{
				1753	sl->timer.function = hrtimer_wakeup;
				1754	sl->timer.irqsafe = 1;
				1755	sl->task = task;
				1756	}
				1757	EXPORT_SYMBOL_GPL(hrtimer_init_sleeper);
				1758
				1759	static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode,
				1760	unsigned long state)
				1761	{
				1762	hrtimer_init_sleeper(t, current);
				1763
				1764	do {
				1765	set_current_state(state);
				1766	hrtimer_start_expires(&t->timer, mode);
				1767	if (!hrtimer_active(&t->timer))
				1768	t->task = NULL;
				1769
				1770	if (likely(t->task))
				1771	schedule();
				1772
				1773	hrtimer_cancel(&t->timer);
				1774	mode = HRTIMER_MODE_ABS;
				1775
				1776	} while (t->task && !signal_pending(current));
				1777
				1778	__set_current_state(TASK_RUNNING);
				1779
				1780	return t->task == NULL;
				1781	}
				1782
				1783	static int update_rmtp(struct hrtimer timer, struct timespec __user rmtp)
				1784	{
				1785	struct timespec rmt;
				1786	ktime_t rem;
				1787
				1788	rem = hrtimer_expires_remaining(timer);
				1789	if (rem.tv64 <= 0)
				1790	return 0;
				1791	rmt = ktime_to_timespec(rem);
				1792
				1793	if (copy_to_user(rmtp, &rmt, sizeof(*rmtp)))
				1794	return -EFAULT;
				1795
				1796	return 1;
				1797	}
				1798
				1799	long __sched hrtimer_nanosleep_restart(struct restart_block *restart)
				1800	{
				1801	struct hrtimer_sleeper t;
				1802	struct timespec __user *rmtp;
				1803	int ret = 0;
				1804
				1805	hrtimer_init_on_stack(&t.timer, restart->nanosleep.clockid,
				1806	HRTIMER_MODE_ABS);
				1807	hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires);
				1808
				1809	/* cpu_chill() does not care about restart state. */
				1810	if (do_nanosleep(&t, HRTIMER_MODE_ABS, TASK_INTERRUPTIBLE))
				1811	goto out;
				1812
				1813	rmtp = restart->nanosleep.rmtp;
				1814	if (rmtp) {
				1815	ret = update_rmtp(&t.timer, rmtp);
				1816	if (ret <= 0)
				1817	goto out;
				1818	}
				1819
				1820	/* The other values in restart are already filled in */
				1821	ret = -ERESTART_RESTARTBLOCK;
				1822	out:
				1823	destroy_hrtimer_on_stack(&t.timer);
				1824	return ret;
				1825	}
				1826
				1827	static long
				1828	__hrtimer_nanosleep(struct timespec rqtp, struct timespec __user rmtp,
				1829	const enum hrtimer_mode mode, const clockid_t clockid,
				1830	unsigned long state)
				1831	{
				1832	struct restart_block *restart;
				1833	struct hrtimer_sleeper t;
				1834	int ret = 0;
				1835	unsigned long slack;
				1836
				1837	slack = current->timer_slack_ns;
				1838	if (rt_task(current))
				1839	slack = 0;
				1840
				1841	hrtimer_init_on_stack(&t.timer, clockid, mode);
				1842	hrtimer_set_expires_range_ns(&t.timer, timespec_to_ktime(*rqtp), slack);
				1843	if (do_nanosleep(&t, mode, state))
				1844	goto out;
				1845
				1846	/* Absolute timers do not update the rmtp value and restart: */
				1847	if (mode == HRTIMER_MODE_ABS) {
				1848	ret = -ERESTARTNOHAND;
				1849	goto out;
				1850	}
				1851
				1852	if (rmtp) {
				1853	ret = update_rmtp(&t.timer, rmtp);
				1854	if (ret <= 0)
				1855	goto out;
				1856	}
				1857
				1858	restart = &current_thread_info()->restart_block;
				1859	restart->fn = hrtimer_nanosleep_restart;
				1860	restart->nanosleep.clockid = t.timer.base->clockid;
				1861	restart->nanosleep.rmtp = rmtp;
				1862	restart->nanosleep.expires = hrtimer_get_expires_tv64(&t.timer);
				1863
				1864	ret = -ERESTART_RESTARTBLOCK;
				1865	out:
				1866	destroy_hrtimer_on_stack(&t.timer);
				1867	return ret;
				1868	}
				1869
				1870	long hrtimer_nanosleep(struct timespec rqtp, struct timespec __user rmtp,
				1871	const enum hrtimer_mode mode, const clockid_t clockid)
				1872	{
				1873	return __hrtimer_nanosleep(rqtp, rmtp, mode, clockid, TASK_INTERRUPTIBLE);
				1874	}
				1875
				1876	SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp,
				1877	struct timespec __user *, rmtp)
				1878	{
				1879	struct timespec tu;
				1880
				1881	if (copy_from_user(&tu, rqtp, sizeof(tu)))
				1882	return -EFAULT;
				1883
				1884	if (!timespec_valid(&tu))
				1885	return -EINVAL;
				1886
				1887	return hrtimer_nanosleep(&tu, rmtp, HRTIMER_MODE_REL, CLOCK_MONOTONIC);
				1888	}
				1889
				1890	#ifdef CONFIG_PREEMPT_RT_FULL
				1891	/*
				1892	* Sleep for 1 ms in hope whoever holds what we want will let it go.
				1893	*/
				1894	void cpu_chill(void)
				1895	{
				1896	struct timespec tu = {
				1897	.tv_nsec = NSEC_PER_MSEC,
				1898	};
				1899	unsigned int freeze_flag = current->flags & PF_NOFREEZE;
				1900
				1901	current->flags \|= PF_NOFREEZE;
				1902	__hrtimer_nanosleep(&tu, NULL, HRTIMER_MODE_REL, CLOCK_MONOTONIC,
				1903	TASK_UNINTERRUPTIBLE);
				1904	if (!freeze_flag)
				1905	current->flags &= ~PF_NOFREEZE;
				1906	}
				1907	EXPORT_SYMBOL(cpu_chill);
				1908	#endif
				1909
				1910	/*
				1911	* Functions related to boot-time initialization:
				1912	*/
				1913	static void __cpuinit init_hrtimers_cpu(int cpu)
				1914	{
				1915	struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu);
				1916	int i;
				1917
				1918	for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
				1919	cpu_base->clock_base[i].cpu_base = cpu_base;
				1920	timerqueue_init_head(&cpu_base->clock_base[i].active);
				1921	INIT_LIST_HEAD(&cpu_base->clock_base[i].expired);
				1922	}
				1923
				1924	hrtimer_init_hres(cpu_base);
				1925	#ifdef CONFIG_PREEMPT_RT_BASE
				1926	init_waitqueue_head(&cpu_base->wait);
				1927	#endif
				1928	}
				1929
				1930	#ifdef CONFIG_HOTPLUG_CPU
				1931
				1932	static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
				1933	struct hrtimer_clock_base *new_base)
				1934	{
				1935	struct hrtimer *timer;
				1936	struct timerqueue_node *node;
				1937
				1938	while ((node = timerqueue_getnext(&old_base->active))) {
				1939	timer = container_of(node, struct hrtimer, node);
				1940	BUG_ON(hrtimer_callback_running(timer));
				1941	debug_deactivate(timer);
				1942
				1943	/*
				1944	* Mark it as STATE_MIGRATE not INACTIVE otherwise the
				1945	* timer could be seen as !active and just vanish away
				1946	* under us on another CPU
				1947	*/
				1948	__remove_hrtimer(timer, old_base, HRTIMER_STATE_MIGRATE, 0);
				1949	timer->base = new_base;
				1950	/*
				1951	* Enqueue the timers on the new cpu. This does not
				1952	* reprogram the event device in case the timer
				1953	* expires before the earliest on this CPU, but we run
				1954	* hrtimer_interrupt after we migrated everything to
				1955	* sort out already expired timers and reprogram the
				1956	* event device.
				1957	*/
				1958	enqueue_hrtimer(timer, new_base);
				1959
				1960	/* Clear the migration state bit */
				1961	timer->state &= ~HRTIMER_STATE_MIGRATE;
				1962	}
				1963	}
				1964
				1965	static void migrate_hrtimers(int scpu)
				1966	{
				1967	struct hrtimer_cpu_base old_base, new_base;
				1968	int i;
				1969
				1970	BUG_ON(cpu_online(scpu));
				1971	tick_cancel_sched_timer(scpu);
				1972
				1973	local_irq_disable();
				1974	old_base = &per_cpu(hrtimer_bases, scpu);
				1975	new_base = &__get_cpu_var(hrtimer_bases);
				1976	/*
				1977	* The caller is globally serialized and nobody else
				1978	* takes two locks at once, deadlock is not possible.
				1979	*/
				1980	raw_spin_lock(&new_base->lock);
				1981	raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
				1982
				1983	for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
				1984	migrate_hrtimer_list(&old_base->clock_base[i],
				1985	&new_base->clock_base[i]);
				1986	}
				1987
				1988	raw_spin_unlock(&old_base->lock);
				1989	raw_spin_unlock(&new_base->lock);
				1990
				1991	/* Check, if we got expired work to do */
				1992	__hrtimer_peek_ahead_timers();
				1993	local_irq_enable();
				1994	}
				1995
				1996	#endif /* CONFIG_HOTPLUG_CPU */
				1997
				1998	static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self,
				1999	unsigned long action, void *hcpu)
				2000	{
				2001	int scpu = (long)hcpu;
				2002
				2003	switch (action) {
				2004
				2005	case CPU_UP_PREPARE:
				2006	case CPU_UP_PREPARE_FROZEN:
				2007	init_hrtimers_cpu(scpu);
				2008	break;
				2009
				2010	#ifdef CONFIG_HOTPLUG_CPU
				2011	case CPU_DYING:
				2012	case CPU_DYING_FROZEN:
				2013	clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DYING, &scpu);
				2014	break;
				2015	case CPU_DEAD:
				2016	case CPU_DEAD_FROZEN:
				2017	{
				2018	clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DEAD, &scpu);
				2019	migrate_hrtimers(scpu);
				2020	break;
				2021	}
				2022	#endif
				2023
				2024	default:
				2025	break;
				2026	}
				2027
				2028	return NOTIFY_OK;
				2029	}
				2030
				2031	static struct notifier_block __cpuinitdata hrtimers_nb = {
				2032	.notifier_call = hrtimer_cpu_notify,
				2033	};
				2034
				2035	void __init hrtimers_init(void)
				2036	{
				2037	hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE,
				2038	(void *)(long)smp_processor_id());
				2039	register_cpu_notifier(&hrtimers_nb);
				2040	open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq);
				2041	}
				2042
				2043	/**
				2044	* schedule_hrtimeout_range_clock - sleep until timeout
				2045	* @expires: timeout value (ktime_t)
				2046	* @delta: slack in expires timeout (ktime_t)
				2047	* @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL
				2048	* @clock: timer clock, CLOCK_MONOTONIC or CLOCK_REALTIME
				2049	*/
				2050	int __sched
				2051	schedule_hrtimeout_range_clock(ktime_t *expires, unsigned long delta,
				2052	const enum hrtimer_mode mode, int clock)
				2053	{
				2054	struct hrtimer_sleeper t;
				2055
				2056	/*
				2057	* Optimize when a zero timeout value is given. It does not
				2058	* matter whether this is an absolute or a relative time.
				2059	*/
				2060	if (expires && !expires->tv64) {
				2061	__set_current_state(TASK_RUNNING);
				2062	return 0;
				2063	}
				2064
				2065	/*
				2066	* A NULL parameter means "infinite"
				2067	*/
				2068	if (!expires) {
				2069	schedule();
				2070	__set_current_state(TASK_RUNNING);
				2071	return -EINTR;
				2072	}
				2073
				2074	hrtimer_init_on_stack(&t.timer, clock, mode);
				2075	hrtimer_set_expires_range_ns(&t.timer, *expires, delta);
				2076
				2077	hrtimer_init_sleeper(&t, current);
				2078
				2079	hrtimer_start_expires(&t.timer, mode);
				2080	if (!hrtimer_active(&t.timer))
				2081	t.task = NULL;
				2082
				2083	if (likely(t.task))
				2084	schedule();
				2085
				2086	hrtimer_cancel(&t.timer);
				2087	destroy_hrtimer_on_stack(&t.timer);
				2088
				2089	__set_current_state(TASK_RUNNING);
				2090
				2091	return !t.task ? 0 : -EINTR;
				2092	}
				2093
				2094	/**
				2095	* schedule_hrtimeout_range - sleep until timeout
				2096	* @expires: timeout value (ktime_t)
				2097	* @delta: slack in expires timeout (ktime_t)
				2098	* @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL
				2099	*
				2100	* Make the current task sleep until the given expiry time has
				2101	* elapsed. The routine will return immediately unless
				2102	* the current task state has been set (see set_current_state()).
				2103	*
				2104	* The @delta argument gives the kernel the freedom to schedule the
				2105	* actual wakeup to a time that is both power and performance friendly.
				2106	* The kernel give the normal best effort behavior for "@expires+@delta",
				2107	* but may decide to fire the timer earlier, but no earlier than @expires.
				2108	*
				2109	* You can set the task state as follows -
				2110	*
				2111	* %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
				2112	* pass before the routine returns.
				2113	*
				2114	* %TASK_INTERRUPTIBLE - the routine may return early if a signal is
				2115	* delivered to the current task.
				2116	*
				2117	* The current task state is guaranteed to be TASK_RUNNING when this
				2118	* routine returns.
				2119	*
				2120	* Returns 0 when the timer has expired otherwise -EINTR
				2121	*/
				2122	int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta,
				2123	const enum hrtimer_mode mode)
				2124	{
				2125	return schedule_hrtimeout_range_clock(expires, delta, mode,
				2126	CLOCK_MONOTONIC);
				2127	}
				2128	EXPORT_SYMBOL_GPL(schedule_hrtimeout_range);
				2129
				2130	/**
				2131	* schedule_hrtimeout - sleep until timeout
				2132	* @expires: timeout value (ktime_t)
				2133	* @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL
				2134	*
				2135	* Make the current task sleep until the given expiry time has
				2136	* elapsed. The routine will return immediately unless
				2137	* the current task state has been set (see set_current_state()).
				2138	*
				2139	* You can set the task state as follows -
				2140	*
				2141	* %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
				2142	* pass before the routine returns.
				2143	*
				2144	* %TASK_INTERRUPTIBLE - the routine may return early if a signal is
				2145	* delivered to the current task.
				2146	*
				2147	* The current task state is guaranteed to be TASK_RUNNING when this
				2148	* routine returns.
				2149	*
				2150	* Returns 0 when the timer has expired otherwise -EINTR
				2151	*/
				2152	int __sched schedule_hrtimeout(ktime_t *expires,
				2153	const enum hrtimer_mode mode)
				2154	{
				2155	return schedule_hrtimeout_range(expires, 0, mode);
				2156	}
				2157	EXPORT_SYMBOL_GPL(schedule_hrtimeout);