blob: 4fef5e93fe7b077cc3b030287f237c7f8290ae04 [file] [log] [blame]
lh9ed821d2023-04-07 01:36:19 -07001/*
2 * linux/kernel/timer.c
3 *
4 * Kernel internal timers, basic process system calls
5 *
6 * Copyright (C) 1991, 1992 Linus Torvalds
7 *
8 * 1997-01-28 Modified by Finn Arne Gangstad to make timers scale better.
9 *
10 * 1997-09-10 Updated NTP code according to technical memorandum Jan '96
11 * "A Kernel Model for Precision Timekeeping" by Dave Mills
12 * 1998-12-24 Fixed a xtime SMP race (we need the xtime_lock rw spinlock to
13 * serialize accesses to xtime/lost_ticks).
14 * Copyright (C) 1998 Andrea Arcangeli
15 * 1999-03-10 Improved NTP compatibility by Ulrich Windl
16 * 2002-05-31 Move sys_sysinfo here and make its locking sane, Robert Love
17 * 2000-10-05 Implemented scalable SMP per-CPU timer handling.
18 * Copyright (C) 2000, 2001, 2002 Ingo Molnar
19 * Designed by David S. Miller, Alexey Kuznetsov and Ingo Molnar
20 */
21
22#include <linux/kernel_stat.h>
23#include <linux/export.h>
24#include <linux/interrupt.h>
25#include <linux/percpu.h>
26#include <linux/init.h>
27#include <linux/mm.h>
28#include <linux/swap.h>
29#include <linux/pid_namespace.h>
30#include <linux/notifier.h>
31#include <linux/thread_info.h>
32#include <linux/time.h>
33#include <linux/jiffies.h>
34#include <linux/posix-timers.h>
35#include <linux/cpu.h>
36#include <linux/syscalls.h>
37#include <linux/delay.h>
38#include <linux/tick.h>
39#include <linux/kallsyms.h>
40#include <linux/irq_work.h>
41#include <linux/sched.h>
42#include <linux/slab.h>
43
44#include <asm/uaccess.h>
45#include <asm/unistd.h>
46#include <asm/div64.h>
47#include <asm/timex.h>
48#include <asm/io.h>
49
50#define CREATE_TRACE_POINTS
51#include <trace/events/timer.h>
52
53u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES;
54
55EXPORT_SYMBOL(jiffies_64);
56
57/*
58 * per-CPU timer vector definitions:
59 */
60#define TVN_BITS (CONFIG_BASE_SMALL ? 4 : 6)
61#define TVR_BITS (CONFIG_BASE_SMALL ? 6 : 8)
62#define TVN_SIZE (1 << TVN_BITS)
63#define TVR_SIZE (1 << TVR_BITS)
64#define TVN_MASK (TVN_SIZE - 1)
65#define TVR_MASK (TVR_SIZE - 1)
66#define MAX_TVAL ((unsigned long)((1ULL << (TVR_BITS + 4*TVN_BITS)) - 1))
67
68struct tvec {
69 struct list_head vec[TVN_SIZE];
70};
71
72struct tvec_root {
73 struct list_head vec[TVR_SIZE];
74};
75
76struct tvec_base {
77 spinlock_t lock;
78 struct timer_list *running_timer;
79#ifdef CONFIG_PREEMPT_RT_FULL
80 wait_queue_head_t wait_for_running_timer;
81#endif
82 unsigned long timer_jiffies;
83 unsigned long next_timer;
84 struct tvec_root tv1;
85 struct tvec tv2;
86 struct tvec tv3;
87 struct tvec tv4;
88 struct tvec tv5;
89} ____cacheline_aligned;
90
91struct tvec_base boot_tvec_bases;
92EXPORT_SYMBOL(boot_tvec_bases);
93static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases;
94
95/* Functions below help us manage 'deferrable' flag */
96static inline unsigned int tbase_get_deferrable(struct tvec_base *base)
97{
98 return ((unsigned int)(unsigned long)base & TBASE_DEFERRABLE_FLAG);
99}
100
101static inline struct tvec_base *tbase_get_base(struct tvec_base *base)
102{
103 return ((struct tvec_base *)((unsigned long)base & ~TBASE_DEFERRABLE_FLAG));
104}
105
106static inline void timer_set_deferrable(struct timer_list *timer)
107{
108 timer->base = TBASE_MAKE_DEFERRED(timer->base);
109}
110
111static inline void
112timer_set_base(struct timer_list *timer, struct tvec_base *new_base)
113{
114 timer->base = (struct tvec_base *)((unsigned long)(new_base) |
115 tbase_get_deferrable(timer->base));
116}
117
118static unsigned long round_jiffies_common(unsigned long j, int cpu,
119 bool force_up)
120{
121 int rem;
122 unsigned long original = j;
123
124 /*
125 * We don't want all cpus firing their timers at once hitting the
126 * same lock or cachelines, so we skew each extra cpu with an extra
127 * 3 jiffies. This 3 jiffies came originally from the mm/ code which
128 * already did this.
129 * The skew is done by adding 3*cpunr, then round, then subtract this
130 * extra offset again.
131 */
132 j += cpu * 3;
133
134 rem = j % HZ;
135
136 /*
137 * If the target jiffie is just after a whole second (which can happen
138 * due to delays of the timer irq, long irq off times etc etc) then
139 * we should round down to the whole second, not up. Use 1/4th second
140 * as cutoff for this rounding as an extreme upper bound for this.
141 * But never round down if @force_up is set.
142 */
143 if (rem < HZ/4 && !force_up) /* round down */
144 j = j - rem;
145 else /* round up */
146 j = j - rem + HZ;
147
148 /* now that we have rounded, subtract the extra skew again */
149 j -= cpu * 3;
150
151 /*
152 * Make sure j is still in the future. Otherwise return the
153 * unmodified value.
154 */
155 return time_is_after_jiffies(j) ? j : original;
156}
157
158/**
159 * __round_jiffies - function to round jiffies to a full second
160 * @j: the time in (absolute) jiffies that should be rounded
161 * @cpu: the processor number on which the timeout will happen
162 *
163 * __round_jiffies() rounds an absolute time in the future (in jiffies)
164 * up or down to (approximately) full seconds. This is useful for timers
165 * for which the exact time they fire does not matter too much, as long as
166 * they fire approximately every X seconds.
167 *
168 * By rounding these timers to whole seconds, all such timers will fire
169 * at the same time, rather than at various times spread out. The goal
170 * of this is to have the CPU wake up less, which saves power.
171 *
172 * The exact rounding is skewed for each processor to avoid all
173 * processors firing at the exact same time, which could lead
174 * to lock contention or spurious cache line bouncing.
175 *
176 * The return value is the rounded version of the @j parameter.
177 */
178unsigned long __round_jiffies(unsigned long j, int cpu)
179{
180 return round_jiffies_common(j, cpu, false);
181}
182EXPORT_SYMBOL_GPL(__round_jiffies);
183
184/**
185 * __round_jiffies_relative - function to round jiffies to a full second
186 * @j: the time in (relative) jiffies that should be rounded
187 * @cpu: the processor number on which the timeout will happen
188 *
189 * __round_jiffies_relative() rounds a time delta in the future (in jiffies)
190 * up or down to (approximately) full seconds. This is useful for timers
191 * for which the exact time they fire does not matter too much, as long as
192 * they fire approximately every X seconds.
193 *
194 * By rounding these timers to whole seconds, all such timers will fire
195 * at the same time, rather than at various times spread out. The goal
196 * of this is to have the CPU wake up less, which saves power.
197 *
198 * The exact rounding is skewed for each processor to avoid all
199 * processors firing at the exact same time, which could lead
200 * to lock contention or spurious cache line bouncing.
201 *
202 * The return value is the rounded version of the @j parameter.
203 */
204unsigned long __round_jiffies_relative(unsigned long j, int cpu)
205{
206 unsigned long j0 = jiffies;
207
208 /* Use j0 because jiffies might change while we run */
209 return round_jiffies_common(j + j0, cpu, false) - j0;
210}
211EXPORT_SYMBOL_GPL(__round_jiffies_relative);
212
213/**
214 * round_jiffies - function to round jiffies to a full second
215 * @j: the time in (absolute) jiffies that should be rounded
216 *
217 * round_jiffies() rounds an absolute time in the future (in jiffies)
218 * up or down to (approximately) full seconds. This is useful for timers
219 * for which the exact time they fire does not matter too much, as long as
220 * they fire approximately every X seconds.
221 *
222 * By rounding these timers to whole seconds, all such timers will fire
223 * at the same time, rather than at various times spread out. The goal
224 * of this is to have the CPU wake up less, which saves power.
225 *
226 * The return value is the rounded version of the @j parameter.
227 */
228unsigned long round_jiffies(unsigned long j)
229{
230 return round_jiffies_common(j, raw_smp_processor_id(), false);
231}
232EXPORT_SYMBOL_GPL(round_jiffies);
233
234/**
235 * round_jiffies_relative - function to round jiffies to a full second
236 * @j: the time in (relative) jiffies that should be rounded
237 *
238 * round_jiffies_relative() rounds a time delta in the future (in jiffies)
239 * up or down to (approximately) full seconds. This is useful for timers
240 * for which the exact time they fire does not matter too much, as long as
241 * they fire approximately every X seconds.
242 *
243 * By rounding these timers to whole seconds, all such timers will fire
244 * at the same time, rather than at various times spread out. The goal
245 * of this is to have the CPU wake up less, which saves power.
246 *
247 * The return value is the rounded version of the @j parameter.
248 */
249unsigned long round_jiffies_relative(unsigned long j)
250{
251 return __round_jiffies_relative(j, raw_smp_processor_id());
252}
253EXPORT_SYMBOL_GPL(round_jiffies_relative);
254
255/**
256 * __round_jiffies_up - function to round jiffies up to a full second
257 * @j: the time in (absolute) jiffies that should be rounded
258 * @cpu: the processor number on which the timeout will happen
259 *
260 * This is the same as __round_jiffies() except that it will never
261 * round down. This is useful for timeouts for which the exact time
262 * of firing does not matter too much, as long as they don't fire too
263 * early.
264 */
265unsigned long __round_jiffies_up(unsigned long j, int cpu)
266{
267 return round_jiffies_common(j, cpu, true);
268}
269EXPORT_SYMBOL_GPL(__round_jiffies_up);
270
271/**
272 * __round_jiffies_up_relative - function to round jiffies up to a full second
273 * @j: the time in (relative) jiffies that should be rounded
274 * @cpu: the processor number on which the timeout will happen
275 *
276 * This is the same as __round_jiffies_relative() except that it will never
277 * round down. This is useful for timeouts for which the exact time
278 * of firing does not matter too much, as long as they don't fire too
279 * early.
280 */
281unsigned long __round_jiffies_up_relative(unsigned long j, int cpu)
282{
283 unsigned long j0 = jiffies;
284
285 /* Use j0 because jiffies might change while we run */
286 return round_jiffies_common(j + j0, cpu, true) - j0;
287}
288EXPORT_SYMBOL_GPL(__round_jiffies_up_relative);
289
290/**
291 * round_jiffies_up - function to round jiffies up to a full second
292 * @j: the time in (absolute) jiffies that should be rounded
293 *
294 * This is the same as round_jiffies() except that it will never
295 * round down. This is useful for timeouts for which the exact time
296 * of firing does not matter too much, as long as they don't fire too
297 * early.
298 */
299unsigned long round_jiffies_up(unsigned long j)
300{
301 return round_jiffies_common(j, raw_smp_processor_id(), true);
302}
303EXPORT_SYMBOL_GPL(round_jiffies_up);
304
305/**
306 * round_jiffies_up_relative - function to round jiffies up to a full second
307 * @j: the time in (relative) jiffies that should be rounded
308 *
309 * This is the same as round_jiffies_relative() except that it will never
310 * round down. This is useful for timeouts for which the exact time
311 * of firing does not matter too much, as long as they don't fire too
312 * early.
313 */
314unsigned long round_jiffies_up_relative(unsigned long j)
315{
316 return __round_jiffies_up_relative(j, raw_smp_processor_id());
317}
318EXPORT_SYMBOL_GPL(round_jiffies_up_relative);
319
320/**
321 * set_timer_slack - set the allowed slack for a timer
322 * @timer: the timer to be modified
323 * @slack_hz: the amount of time (in jiffies) allowed for rounding
324 *
325 * Set the amount of time, in jiffies, that a certain timer has
326 * in terms of slack. By setting this value, the timer subsystem
327 * will schedule the actual timer somewhere between
328 * the time mod_timer() asks for, and that time plus the slack.
329 *
330 * By setting the slack to -1, a percentage of the delay is used
331 * instead.
332 */
333void set_timer_slack(struct timer_list *timer, int slack_hz)
334{
335 timer->slack = slack_hz;
336}
337EXPORT_SYMBOL_GPL(set_timer_slack);
338
339static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
340{
341 unsigned long expires = timer->expires;
342 unsigned long idx = expires - base->timer_jiffies;
343 struct list_head *vec;
344
345 if (idx < TVR_SIZE) {
346 int i = expires & TVR_MASK;
347 vec = base->tv1.vec + i;
348 } else if (idx < 1 << (TVR_BITS + TVN_BITS)) {
349 int i = (expires >> TVR_BITS) & TVN_MASK;
350 vec = base->tv2.vec + i;
351 } else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) {
352 int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK;
353 vec = base->tv3.vec + i;
354 } else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) {
355 int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK;
356 vec = base->tv4.vec + i;
357 } else if ((signed long) idx < 0) {
358 /*
359 * Can happen if you add a timer with expires == jiffies,
360 * or you set a timer to go off in the past
361 */
362 vec = base->tv1.vec + (base->timer_jiffies & TVR_MASK);
363 } else {
364 int i;
365 /* If the timeout is larger than MAX_TVAL (on 64-bit
366 * architectures or with CONFIG_BASE_SMALL=1) then we
367 * use the maximum timeout.
368 */
369 if (idx > MAX_TVAL) {
370 idx = MAX_TVAL;
371 expires = idx + base->timer_jiffies;
372 }
373 i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
374 vec = base->tv5.vec + i;
375 }
376 /*
377 * Timers are FIFO:
378 */
379 list_add_tail(&timer->entry, vec);
380}
381
382#ifdef CONFIG_TIMER_STATS
383void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr)
384{
385 if (timer->start_site)
386 return;
387
388 timer->start_site = addr;
389 memcpy(timer->start_comm, current->comm, TASK_COMM_LEN);
390 timer->start_pid = current->pid;
391}
392
393static void timer_stats_account_timer(struct timer_list *timer)
394{
395 unsigned int flag = 0;
396
397 if (likely(!timer->start_site))
398 return;
399 if (unlikely(tbase_get_deferrable(timer->base)))
400 flag |= TIMER_STATS_FLAG_DEFERRABLE;
401
402 timer_stats_update_stats(timer, timer->start_pid, timer->start_site,
403 timer->function, timer->start_comm, flag);
404}
405
406#else
407static void timer_stats_account_timer(struct timer_list *timer) {}
408#endif
409
410#ifdef CONFIG_DEBUG_OBJECTS_TIMERS
411
412static struct debug_obj_descr timer_debug_descr;
413
414static void *timer_debug_hint(void *addr)
415{
416 return ((struct timer_list *) addr)->function;
417}
418
419/*
420 * fixup_init is called when:
421 * - an active object is initialized
422 */
423static int timer_fixup_init(void *addr, enum debug_obj_state state)
424{
425 struct timer_list *timer = addr;
426
427 switch (state) {
428 case ODEBUG_STATE_ACTIVE:
429 del_timer_sync(timer);
430 debug_object_init(timer, &timer_debug_descr);
431 return 1;
432 default:
433 return 0;
434 }
435}
436
437/* Stub timer callback for improperly used timers. */
438static void stub_timer(unsigned long data)
439{
440 WARN_ON(1);
441}
442
443/*
444 * fixup_activate is called when:
445 * - an active object is activated
446 * - an unknown object is activated (might be a statically initialized object)
447 */
448static int timer_fixup_activate(void *addr, enum debug_obj_state state)
449{
450 struct timer_list *timer = addr;
451
452 switch (state) {
453
454 case ODEBUG_STATE_NOTAVAILABLE:
455 /*
456 * This is not really a fixup. The timer was
457 * statically initialized. We just make sure that it
458 * is tracked in the object tracker.
459 */
460 if (timer->entry.next == NULL &&
461 timer->entry.prev == TIMER_ENTRY_STATIC) {
462 debug_object_init(timer, &timer_debug_descr);
463 debug_object_activate(timer, &timer_debug_descr);
464 return 0;
465 } else {
466 setup_timer(timer, stub_timer, 0);
467 return 1;
468 }
469 return 0;
470
471 case ODEBUG_STATE_ACTIVE:
472 WARN_ON(1);
473
474 default:
475 return 0;
476 }
477}
478
479/*
480 * fixup_free is called when:
481 * - an active object is freed
482 */
483static int timer_fixup_free(void *addr, enum debug_obj_state state)
484{
485 struct timer_list *timer = addr;
486
487 switch (state) {
488 case ODEBUG_STATE_ACTIVE:
489 del_timer_sync(timer);
490 debug_object_free(timer, &timer_debug_descr);
491 return 1;
492 default:
493 return 0;
494 }
495}
496
497/*
498 * fixup_assert_init is called when:
499 * - an untracked/uninit-ed object is found
500 */
501static int timer_fixup_assert_init(void *addr, enum debug_obj_state state)
502{
503 struct timer_list *timer = addr;
504
505 switch (state) {
506 case ODEBUG_STATE_NOTAVAILABLE:
507 if (timer->entry.prev == TIMER_ENTRY_STATIC) {
508 /*
509 * This is not really a fixup. The timer was
510 * statically initialized. We just make sure that it
511 * is tracked in the object tracker.
512 */
513 debug_object_init(timer, &timer_debug_descr);
514 return 0;
515 } else {
516 setup_timer(timer, stub_timer, 0);
517 return 1;
518 }
519 default:
520 return 0;
521 }
522}
523
524static struct debug_obj_descr timer_debug_descr = {
525 .name = "timer_list",
526 .debug_hint = timer_debug_hint,
527 .fixup_init = timer_fixup_init,
528 .fixup_activate = timer_fixup_activate,
529 .fixup_free = timer_fixup_free,
530 .fixup_assert_init = timer_fixup_assert_init,
531};
532
533static inline void debug_timer_init(struct timer_list *timer)
534{
535 debug_object_init(timer, &timer_debug_descr);
536}
537
538static inline void debug_timer_activate(struct timer_list *timer)
539{
540 debug_object_activate(timer, &timer_debug_descr);
541}
542
543static inline void debug_timer_deactivate(struct timer_list *timer)
544{
545 debug_object_deactivate(timer, &timer_debug_descr);
546}
547
548static inline void debug_timer_free(struct timer_list *timer)
549{
550 debug_object_free(timer, &timer_debug_descr);
551}
552
553static inline void debug_timer_assert_init(struct timer_list *timer)
554{
555 debug_object_assert_init(timer, &timer_debug_descr);
556}
557
558static void __init_timer(struct timer_list *timer,
559 const char *name,
560 struct lock_class_key *key);
561
562void init_timer_on_stack_key(struct timer_list *timer,
563 const char *name,
564 struct lock_class_key *key)
565{
566 debug_object_init_on_stack(timer, &timer_debug_descr);
567 __init_timer(timer, name, key);
568}
569EXPORT_SYMBOL_GPL(init_timer_on_stack_key);
570
571void destroy_timer_on_stack(struct timer_list *timer)
572{
573 debug_object_free(timer, &timer_debug_descr);
574}
575EXPORT_SYMBOL_GPL(destroy_timer_on_stack);
576
577#else
578static inline void debug_timer_init(struct timer_list *timer) { }
579static inline void debug_timer_activate(struct timer_list *timer) { }
580static inline void debug_timer_deactivate(struct timer_list *timer) { }
581static inline void debug_timer_assert_init(struct timer_list *timer) { }
582#endif
583
584static inline void debug_init(struct timer_list *timer)
585{
586 debug_timer_init(timer);
587 trace_timer_init(timer);
588}
589
590static inline void
591debug_activate(struct timer_list *timer, unsigned long expires)
592{
593 debug_timer_activate(timer);
594 trace_timer_start(timer, expires);
595}
596
597static inline void debug_deactivate(struct timer_list *timer)
598{
599 debug_timer_deactivate(timer);
600 trace_timer_cancel(timer);
601}
602
603static inline void debug_assert_init(struct timer_list *timer)
604{
605 debug_timer_assert_init(timer);
606}
607
608static void __init_timer(struct timer_list *timer,
609 const char *name,
610 struct lock_class_key *key)
611{
612 timer->entry.next = NULL;
613 timer->base = __raw_get_cpu_var(tvec_bases);
614 timer->slack = -1;
615#ifdef CONFIG_TIMER_STATS
616 timer->start_site = NULL;
617 timer->start_pid = -1;
618 memset(timer->start_comm, 0, TASK_COMM_LEN);
619#endif
620 lockdep_init_map(&timer->lockdep_map, name, key, 0);
621}
622
623void setup_deferrable_timer_on_stack_key(struct timer_list *timer,
624 const char *name,
625 struct lock_class_key *key,
626 void (*function)(unsigned long),
627 unsigned long data)
628{
629 timer->function = function;
630 timer->data = data;
631 init_timer_on_stack_key(timer, name, key);
632 timer_set_deferrable(timer);
633}
634EXPORT_SYMBOL_GPL(setup_deferrable_timer_on_stack_key);
635
636/**
637 * init_timer_key - initialize a timer
638 * @timer: the timer to be initialized
639 * @name: name of the timer
640 * @key: lockdep class key of the fake lock used for tracking timer
641 * sync lock dependencies
642 *
643 * init_timer_key() must be done to a timer prior calling *any* of the
644 * other timer functions.
645 */
646void init_timer_key(struct timer_list *timer,
647 const char *name,
648 struct lock_class_key *key)
649{
650 debug_init(timer);
651 __init_timer(timer, name, key);
652}
653EXPORT_SYMBOL(init_timer_key);
654
655void init_timer_deferrable_key(struct timer_list *timer,
656 const char *name,
657 struct lock_class_key *key)
658{
659 init_timer_key(timer, name, key);
660 timer_set_deferrable(timer);
661}
662EXPORT_SYMBOL(init_timer_deferrable_key);
663
664static inline void detach_timer(struct timer_list *timer,
665 int clear_pending)
666{
667 struct list_head *entry = &timer->entry;
668
669 debug_deactivate(timer);
670
671 __list_del(entry->prev, entry->next);
672 if (clear_pending)
673 entry->next = NULL;
674 entry->prev = LIST_POISON2;
675}
676
677/*
678 * We are using hashed locking: holding per_cpu(tvec_bases).lock
679 * means that all timers which are tied to this base via timer->base are
680 * locked, and the base itself is locked too.
681 *
682 * So __run_timers/migrate_timers can safely modify all timers which could
683 * be found on ->tvX lists.
684 *
685 * When the timer's base is locked, and the timer removed from list, it is
686 * possible to set timer->base = NULL and drop the lock: the timer remains
687 * locked.
688 */
689static struct tvec_base *lock_timer_base(struct timer_list *timer,
690 unsigned long *flags)
691 __acquires(timer->base->lock)
692{
693 struct tvec_base *base;
694
695 for (;;) {
696 struct tvec_base *prelock_base = timer->base;
697 base = tbase_get_base(prelock_base);
698 if (likely(base != NULL)) {
699 spin_lock_irqsave(&base->lock, *flags);
700 if (likely(prelock_base == timer->base))
701 return base;
702 /* The timer has migrated to another CPU */
703 spin_unlock_irqrestore(&base->lock, *flags);
704 }
705 cpu_relax();
706 }
707}
708
709#ifndef CONFIG_PREEMPT_RT_FULL
710static inline struct tvec_base *switch_timer_base(struct timer_list *timer,
711 struct tvec_base *old,
712 struct tvec_base *new)
713{
714 /* See the comment in lock_timer_base() */
715 timer_set_base(timer, NULL);
716 spin_unlock(&old->lock);
717 spin_lock(&new->lock);
718 timer_set_base(timer, new);
719 return new;
720}
721#else
722static inline struct tvec_base *switch_timer_base(struct timer_list *timer,
723 struct tvec_base *old,
724 struct tvec_base *new)
725{
726 /*
727 * We cannot do the above because we might be preempted and
728 * then the preempter would see NULL and loop forever.
729 */
730 if (spin_trylock(&new->lock)) {
731 timer_set_base(timer, new);
732 spin_unlock(&old->lock);
733 return new;
734 }
735 return old;
736}
737#endif
738
739static inline int
740__mod_timer(struct timer_list *timer, unsigned long expires,
741 bool pending_only, int pinned)
742{
743 struct tvec_base *base, *new_base;
744 unsigned long flags;
745 int ret = 0 , cpu;
746
747 timer_stats_timer_set_start_info(timer);
748 BUG_ON(!timer->function);
749
750 base = lock_timer_base(timer, &flags);
751
752 if (timer_pending(timer)) {
753 detach_timer(timer, 0);
754 if (timer->expires == base->next_timer &&
755 !tbase_get_deferrable(timer->base))
756 base->next_timer = base->timer_jiffies;
757 ret = 1;
758 } else {
759 if (pending_only)
760 goto out_unlock;
761 }
762
763 debug_activate(timer, expires);
764
765 preempt_disable_rt();
766 cpu = smp_processor_id();
767
768#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
769 if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu))
770 cpu = get_nohz_timer_target();
771#endif
772 preempt_enable_rt();
773
774 new_base = per_cpu(tvec_bases, cpu);
775
776 if (base != new_base) {
777 /*
778 * We are trying to schedule the timer on the local CPU.
779 * However we can't change timer's base while it is running,
780 * otherwise del_timer_sync() can't detect that the timer's
781 * handler yet has not finished. This also guarantees that
782 * the timer is serialized wrt itself.
783 */
784 if (likely(base->running_timer != timer))
785 base = switch_timer_base(timer, base, new_base);
786 }
787
788 timer->expires = expires;
789 if (time_before(timer->expires, base->next_timer) &&
790 !tbase_get_deferrable(timer->base))
791 base->next_timer = timer->expires;
792 internal_add_timer(base, timer);
793
794out_unlock:
795 spin_unlock_irqrestore(&base->lock, flags);
796
797 return ret;
798}
799
800/**
801 * mod_timer_pending - modify a pending timer's timeout
802 * @timer: the pending timer to be modified
803 * @expires: new timeout in jiffies
804 *
805 * mod_timer_pending() is the same for pending timers as mod_timer(),
806 * but will not re-activate and modify already deleted timers.
807 *
808 * It is useful for unserialized use of timers.
809 */
810int mod_timer_pending(struct timer_list *timer, unsigned long expires)
811{
812 return __mod_timer(timer, expires, true, TIMER_NOT_PINNED);
813}
814EXPORT_SYMBOL(mod_timer_pending);
815
816/*
817 * Decide where to put the timer while taking the slack into account
818 *
819 * Algorithm:
820 * 1) calculate the maximum (absolute) time
821 * 2) calculate the highest bit where the expires and new max are different
822 * 3) use this bit to make a mask
823 * 4) use the bitmask to round down the maximum time, so that all last
824 * bits are zeros
825 */
826static inline
827unsigned long apply_slack(struct timer_list *timer, unsigned long expires)
828{
829 unsigned long expires_limit, mask;
830 int bit;
831
832 if (timer->slack >= 0) {
833 expires_limit = expires + timer->slack;
834 } else {
835 long delta = expires - jiffies;
836
837 if (delta < 256)
838 return expires;
839
840 expires_limit = expires + delta / 256;
841 }
842 mask = expires ^ expires_limit;
843 if (mask == 0)
844 return expires;
845
846 bit = find_last_bit(&mask, BITS_PER_LONG);
847
848 mask = (1UL << bit) - 1;
849
850 expires_limit = expires_limit & ~(mask);
851
852 return expires_limit;
853}
854
855/**
856 * mod_timer - modify a timer's timeout
857 * @timer: the timer to be modified
858 * @expires: new timeout in jiffies
859 *
860 * mod_timer() is a more efficient way to update the expire field of an
861 * active timer (if the timer is inactive it will be activated)
862 *
863 * mod_timer(timer, expires) is equivalent to:
864 *
865 * del_timer(timer); timer->expires = expires; add_timer(timer);
866 *
867 * Note that if there are multiple unserialized concurrent users of the
868 * same timer, then mod_timer() is the only safe way to modify the timeout,
869 * since add_timer() cannot modify an already running timer.
870 *
871 * The function returns whether it has modified a pending timer or not.
872 * (ie. mod_timer() of an inactive timer returns 0, mod_timer() of an
873 * active timer returns 1.)
874 */
875int mod_timer(struct timer_list *timer, unsigned long expires)
876{
877 expires = apply_slack(timer, expires);
878
879 /*
880 * This is a common optimization triggered by the
881 * networking code - if the timer is re-modified
882 * to be the same thing then just return:
883 */
884 if (timer_pending(timer) && timer->expires == expires)
885 return 1;
886
887 return __mod_timer(timer, expires, false, TIMER_NOT_PINNED);
888}
889EXPORT_SYMBOL(mod_timer);
890
891/**
892 * mod_timer_pinned - modify a timer's timeout
893 * @timer: the timer to be modified
894 * @expires: new timeout in jiffies
895 *
896 * mod_timer_pinned() is a way to update the expire field of an
897 * active timer (if the timer is inactive it will be activated)
898 * and not allow the timer to be migrated to a different CPU.
899 *
900 * mod_timer_pinned(timer, expires) is equivalent to:
901 *
902 * del_timer(timer); timer->expires = expires; add_timer(timer);
903 */
904int mod_timer_pinned(struct timer_list *timer, unsigned long expires)
905{
906 if (timer->expires == expires && timer_pending(timer))
907 return 1;
908
909 return __mod_timer(timer, expires, false, TIMER_PINNED);
910}
911EXPORT_SYMBOL(mod_timer_pinned);
912
913/**
914 * add_timer - start a timer
915 * @timer: the timer to be added
916 *
917 * The kernel will do a ->function(->data) callback from the
918 * timer interrupt at the ->expires point in the future. The
919 * current time is 'jiffies'.
920 *
921 * The timer's ->expires, ->function (and if the handler uses it, ->data)
922 * fields must be set prior calling this function.
923 *
924 * Timers with an ->expires field in the past will be executed in the next
925 * timer tick.
926 */
927void add_timer(struct timer_list *timer)
928{
929 BUG_ON(timer_pending(timer));
930 mod_timer(timer, timer->expires);
931}
932EXPORT_SYMBOL(add_timer);
933
934/**
935 * add_timer_on - start a timer on a particular CPU
936 * @timer: the timer to be added
937 * @cpu: the CPU to start it on
938 *
939 * This is not very scalable on SMP. Double adds are not possible.
940 */
941void add_timer_on(struct timer_list *timer, int cpu)
942{
943 struct tvec_base *base = per_cpu(tvec_bases, cpu);
944 unsigned long flags;
945
946 timer_stats_timer_set_start_info(timer);
947 BUG_ON(timer_pending(timer) || !timer->function);
948 spin_lock_irqsave(&base->lock, flags);
949 timer_set_base(timer, base);
950 debug_activate(timer, timer->expires);
951 if (time_before(timer->expires, base->next_timer) &&
952 !tbase_get_deferrable(timer->base))
953 base->next_timer = timer->expires;
954 internal_add_timer(base, timer);
955 /*
956 * Check whether the other CPU is idle and needs to be
957 * triggered to reevaluate the timer wheel when nohz is
958 * active. We are protected against the other CPU fiddling
959 * with the timer by holding the timer base lock. This also
960 * makes sure that a CPU on the way to idle can not evaluate
961 * the timer wheel.
962 */
963 wake_up_idle_cpu(cpu);
964 spin_unlock_irqrestore(&base->lock, flags);
965}
966EXPORT_SYMBOL_GPL(add_timer_on);
967
968#ifdef CONFIG_PREEMPT_RT_FULL
969/*
970 * Wait for a running timer
971 */
972static void wait_for_running_timer(struct timer_list *timer)
973{
974 struct tvec_base *base = timer->base;
975
976 if (base->running_timer == timer)
977 wait_event(base->wait_for_running_timer,
978 base->running_timer != timer);
979}
980
981# define wakeup_timer_waiters(b) wake_up(&(b)->wait_for_running_timer)
982#else
983static inline void wait_for_running_timer(struct timer_list *timer)
984{
985 cpu_relax();
986}
987
988# define wakeup_timer_waiters(b) do { } while (0)
989#endif
990
991/**
992 * del_timer - deactive a timer.
993 * @timer: the timer to be deactivated
994 *
995 * del_timer() deactivates a timer - this works on both active and inactive
996 * timers.
997 *
998 * The function returns whether it has deactivated a pending timer or not.
999 * (ie. del_timer() of an inactive timer returns 0, del_timer() of an
1000 * active timer returns 1.)
1001 */
1002int del_timer(struct timer_list *timer)
1003{
1004 struct tvec_base *base;
1005 unsigned long flags;
1006 int ret = 0;
1007
1008 debug_assert_init(timer);
1009
1010 timer_stats_timer_clear_start_info(timer);
1011 if (timer_pending(timer)) {
1012 base = lock_timer_base(timer, &flags);
1013 if (timer_pending(timer)) {
1014 detach_timer(timer, 1);
1015 if (timer->expires == base->next_timer &&
1016 !tbase_get_deferrable(timer->base))
1017 base->next_timer = base->timer_jiffies;
1018 ret = 1;
1019 }
1020 spin_unlock_irqrestore(&base->lock, flags);
1021 }
1022
1023 return ret;
1024}
1025EXPORT_SYMBOL(del_timer);
1026
1027/**
1028 * try_to_del_timer_sync - Try to deactivate a timer
1029 * @timer: timer do del
1030 *
1031 * This function tries to deactivate a timer. Upon successful (ret >= 0)
1032 * exit the timer is not queued and the handler is not running on any CPU.
1033 */
1034int try_to_del_timer_sync(struct timer_list *timer)
1035{
1036 struct tvec_base *base;
1037 unsigned long flags;
1038 int ret = -1;
1039
1040 debug_assert_init(timer);
1041
1042 base = lock_timer_base(timer, &flags);
1043
1044 if (base->running_timer == timer)
1045 goto out;
1046
1047 timer_stats_timer_clear_start_info(timer);
1048 ret = 0;
1049 if (timer_pending(timer)) {
1050 detach_timer(timer, 1);
1051 if (timer->expires == base->next_timer &&
1052 !tbase_get_deferrable(timer->base))
1053 base->next_timer = base->timer_jiffies;
1054 ret = 1;
1055 }
1056out:
1057 spin_unlock_irqrestore(&base->lock, flags);
1058
1059 return ret;
1060}
1061EXPORT_SYMBOL(try_to_del_timer_sync);
1062
1063#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL)
1064/**
1065 * del_timer_sync - deactivate a timer and wait for the handler to finish.
1066 * @timer: the timer to be deactivated
1067 *
1068 * This function only differs from del_timer() on SMP: besides deactivating
1069 * the timer it also makes sure the handler has finished executing on other
1070 * CPUs.
1071 *
1072 * Synchronization rules: Callers must prevent restarting of the timer,
1073 * otherwise this function is meaningless. It must not be called from
1074 * interrupt contexts. The caller must not hold locks which would prevent
1075 * completion of the timer's handler. The timer's handler must not call
1076 * add_timer_on(). Upon exit the timer is not queued and the handler is
1077 * not running on any CPU.
1078 *
1079 * Note: You must not hold locks that are held in interrupt context
1080 * while calling this function. Even if the lock has nothing to do
1081 * with the timer in question. Here's why:
1082 *
1083 * CPU0 CPU1
1084 * ---- ----
1085 * <SOFTIRQ>
1086 * call_timer_fn();
1087 * base->running_timer = mytimer;
1088 * spin_lock_irq(somelock);
1089 * <IRQ>
1090 * spin_lock(somelock);
1091 * del_timer_sync(mytimer);
1092 * while (base->running_timer == mytimer);
1093 *
1094 * Now del_timer_sync() will never return and never release somelock.
1095 * The interrupt on the other CPU is waiting to grab somelock but
1096 * it has interrupted the softirq that CPU0 is waiting to finish.
1097 *
1098 * The function returns whether it has deactivated a pending timer or not.
1099 */
1100int del_timer_sync(struct timer_list *timer)
1101{
1102#ifdef CONFIG_LOCKDEP
1103 unsigned long flags;
1104
1105 /*
1106 * If lockdep gives a backtrace here, please reference
1107 * the synchronization rules above.
1108 */
1109 local_irq_save(flags);
1110 lock_map_acquire(&timer->lockdep_map);
1111 lock_map_release(&timer->lockdep_map);
1112 local_irq_restore(flags);
1113#endif
1114 /*
1115 * don't use it in hardirq context, because it
1116 * could lead to deadlock.
1117 */
1118 WARN_ON(in_irq());
1119 for (;;) {
1120 int ret = try_to_del_timer_sync(timer);
1121 if (ret >= 0)
1122 return ret;
1123 wait_for_running_timer(timer);
1124 }
1125}
1126EXPORT_SYMBOL(del_timer_sync);
1127#endif
1128
1129static int cascade(struct tvec_base *base, struct tvec *tv, int index)
1130{
1131 /* cascade all the timers from tv up one level */
1132 struct timer_list *timer, *tmp;
1133 struct list_head tv_list;
1134
1135 list_replace_init(tv->vec + index, &tv_list);
1136
1137 /*
1138 * We are removing _all_ timers from the list, so we
1139 * don't have to detach them individually.
1140 */
1141 list_for_each_entry_safe(timer, tmp, &tv_list, entry) {
1142 BUG_ON(tbase_get_base(timer->base) != base);
1143 internal_add_timer(base, timer);
1144 }
1145
1146 return index;
1147}
1148
1149static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long),
1150 unsigned long data)
1151{
1152 int preempt_count = preempt_count();
1153
1154#ifdef CONFIG_LOCKDEP
1155 /*
1156 * It is permissible to free the timer from inside the
1157 * function that is called from it, this we need to take into
1158 * account for lockdep too. To avoid bogus "held lock freed"
1159 * warnings as well as problems when looking into
1160 * timer->lockdep_map, make a copy and use that here.
1161 */
1162 struct lockdep_map lockdep_map = timer->lockdep_map;
1163#endif
1164 /*
1165 * Couple the lock chain with the lock chain at
1166 * del_timer_sync() by acquiring the lock_map around the fn()
1167 * call here and in del_timer_sync().
1168 */
1169 lock_map_acquire(&lockdep_map);
1170
1171 trace_timer_expire_entry(timer);
1172 zxic_trace_timer_enter(fn);
1173 fn(data);
1174 zxic_trace_timer_exit(fn);
1175 trace_timer_expire_exit(timer);
1176
1177 lock_map_release(&lockdep_map);
1178
1179 if (preempt_count != preempt_count()) {
1180 WARN_ONCE(1, "timer: %pF preempt leak: %08x -> %08x\n",
1181 fn, preempt_count, preempt_count());
1182 /*
1183 * Restore the preempt count. That gives us a decent
1184 * chance to survive and extract information. If the
1185 * callback kept a lock held, bad luck, but not worse
1186 * than the BUG() we had.
1187 */
1188 preempt_count() = preempt_count;
1189 }
1190}
1191
1192#define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK)
1193
1194/**
1195 * __run_timers - run all expired timers (if any) on this CPU.
1196 * @base: the timer vector to be processed.
1197 *
1198 * This function cascades all vectors and executes all expired timer
1199 * vectors.
1200 */
1201static inline void __run_timers(struct tvec_base *base)
1202{
1203 struct timer_list *timer;
1204
1205 spin_lock_irq(&base->lock);
1206 while (time_after_eq(jiffies, base->timer_jiffies)) {
1207 struct list_head work_list;
1208 struct list_head *head = &work_list;
1209 int index = base->timer_jiffies & TVR_MASK;
1210
1211 /*
1212 * Cascade timers:
1213 */
1214 if (!index &&
1215 (!cascade(base, &base->tv2, INDEX(0))) &&
1216 (!cascade(base, &base->tv3, INDEX(1))) &&
1217 !cascade(base, &base->tv4, INDEX(2)))
1218 cascade(base, &base->tv5, INDEX(3));
1219 ++base->timer_jiffies;
1220 list_replace_init(base->tv1.vec + index, &work_list);
1221 while (!list_empty(head)) {
1222 void (*fn)(unsigned long);
1223 unsigned long data;
1224
1225 timer = list_first_entry(head, struct timer_list,entry);
1226 fn = timer->function;
1227 data = timer->data;
1228
1229 timer_stats_account_timer(timer);
1230
1231 base->running_timer = timer;
1232 detach_timer(timer, 1);
1233
1234 spin_unlock_irq(&base->lock);
1235 call_timer_fn(timer, fn, data);
1236 base->running_timer = NULL;
1237 spin_lock_irq(&base->lock);
1238 }
1239 }
1240 wakeup_timer_waiters(base);
1241 spin_unlock_irq(&base->lock);
1242}
1243
1244#ifdef CONFIG_NO_HZ
1245/*
1246 * Find out when the next timer event is due to happen. This
1247 * is used on S/390 to stop all activity when a CPU is idle.
1248 * This function needs to be called with interrupts disabled.
1249 */
1250static unsigned long __next_timer_interrupt(struct tvec_base *base)
1251{
1252 unsigned long timer_jiffies = base->timer_jiffies;
1253 unsigned long expires = timer_jiffies + NEXT_TIMER_MAX_DELTA;
1254 int index, slot, array, found = 0;
1255 struct timer_list *nte;
1256 struct tvec *varray[4];
1257
1258 /* Look for timer events in tv1. */
1259 index = slot = timer_jiffies & TVR_MASK;
1260 do {
1261 list_for_each_entry(nte, base->tv1.vec + slot, entry) {
1262 if (tbase_get_deferrable(nte->base))
1263 continue;
1264
1265 found = 1;
1266 expires = nte->expires;
1267 /* Look at the cascade bucket(s)? */
1268 if (!index || slot < index)
1269 goto cascade;
1270 return expires;
1271 }
1272 slot = (slot + 1) & TVR_MASK;
1273 } while (slot != index);
1274
1275cascade:
1276 /* Calculate the next cascade event */
1277 if (index)
1278 timer_jiffies += TVR_SIZE - index;
1279 timer_jiffies >>= TVR_BITS;
1280
1281 /* Check tv2-tv5. */
1282 varray[0] = &base->tv2;
1283 varray[1] = &base->tv3;
1284 varray[2] = &base->tv4;
1285 varray[3] = &base->tv5;
1286
1287 for (array = 0; array < 4; array++) {
1288 struct tvec *varp = varray[array];
1289
1290 index = slot = timer_jiffies & TVN_MASK;
1291 do {
1292 list_for_each_entry(nte, varp->vec + slot, entry) {
1293 if (tbase_get_deferrable(nte->base))
1294 continue;
1295
1296 found = 1;
1297 if (time_before(nte->expires, expires))
1298 expires = nte->expires;
1299 }
1300 /*
1301 * Do we still search for the first timer or are
1302 * we looking up the cascade buckets ?
1303 */
1304 if (found) {
1305 /* Look at the cascade bucket(s)? */
1306 if (!index || slot < index)
1307 break;
1308 return expires;
1309 }
1310 slot = (slot + 1) & TVN_MASK;
1311 } while (slot != index);
1312
1313 if (index)
1314 timer_jiffies += TVN_SIZE - index;
1315 timer_jiffies >>= TVN_BITS;
1316 }
1317 return expires;
1318}
1319
1320/*
1321 * Check, if the next hrtimer event is before the next timer wheel
1322 * event:
1323 */
1324static unsigned long cmp_next_hrtimer_event(unsigned long now,
1325 unsigned long expires)
1326{
1327 ktime_t hr_delta = hrtimer_get_next_event();
1328 struct timespec tsdelta;
1329 unsigned long delta;
1330
1331 if (hr_delta.tv64 == KTIME_MAX)
1332 return expires;
1333
1334 /*
1335 * Expired timer available, let it expire in the next tick
1336 */
1337 if (hr_delta.tv64 <= 0)
1338 return now + 1;
1339
1340 tsdelta = ktime_to_timespec(hr_delta);
1341 delta = timespec_to_jiffies(&tsdelta);
1342
1343 /*
1344 * Limit the delta to the max value, which is checked in
1345 * tick_nohz_stop_sched_tick():
1346 */
1347 if (delta > NEXT_TIMER_MAX_DELTA)
1348 delta = NEXT_TIMER_MAX_DELTA;
1349
1350 /*
1351 * Take rounding errors in to account and make sure, that it
1352 * expires in the next tick. Otherwise we go into an endless
1353 * ping pong due to tick_nohz_stop_sched_tick() retriggering
1354 * the timer softirq
1355 */
1356 if (delta < 1)
1357 delta = 1;
1358 now += delta;
1359 if (time_before(now, expires))
1360 return now;
1361 return expires;
1362}
1363
1364/**
1365 * get_next_timer_interrupt - return the jiffy of the next pending timer
1366 * @now: current time (in jiffies)
1367 */
1368unsigned long get_next_timer_interrupt(unsigned long now)
1369{
1370 struct tvec_base *base = __this_cpu_read(tvec_bases);
1371 unsigned long expires;
1372
1373 /*
1374 * Pretend that there is no timer pending if the cpu is offline.
1375 * Possible pending timers will be migrated later to an active cpu.
1376 */
1377 if (cpu_is_offline(smp_processor_id()))
1378 return now + NEXT_TIMER_MAX_DELTA;
1379
1380#ifdef CONFIG_PREEMPT_RT_FULL
1381 /*
1382 * On PREEMPT_RT we cannot sleep here. If the trylock does not
1383 * succeed then we return the worst-case 'expires in 1 tick'
1384 * value. We use the rt functions here directly to avoid a
1385 * migrate_disable() call.
1386 */
1387 if (spin_do_trylock(&base->lock)) {
1388 if (time_before_eq(base->next_timer, base->timer_jiffies))
1389 base->next_timer = __next_timer_interrupt(base);
1390 expires = base->next_timer;
1391 rt_spin_unlock_after_trylock_in_irq(&base->lock);
1392 } else {
1393 expires = now + 1;
1394 }
1395#else
1396 spin_lock(&base->lock);
1397 if (time_before_eq(base->next_timer, base->timer_jiffies))
1398 base->next_timer = __next_timer_interrupt(base);
1399 expires = base->next_timer;
1400 spin_unlock(&base->lock);
1401
1402 if (time_before_eq(expires, now))
1403 return now;
1404#endif
1405 return cmp_next_hrtimer_event(now, expires);
1406}
1407#endif
1408
1409/*
1410 * Called from the timer interrupt handler to charge one tick to the current
1411 * process. user_tick is 1 if the tick is user time, 0 for system.
1412 */
1413void update_process_times(int user_tick)
1414{
1415 struct task_struct *p = current;
1416 int cpu = smp_processor_id();
1417
1418 /* Note: this timer irq context must be accounted for as well. */
1419 account_process_tick(p, user_tick);
1420 scheduler_tick();
1421 run_local_timers();
1422 rcu_check_callbacks(cpu, user_tick);
1423#if defined(CONFIG_IRQ_WORK) && !defined(CONFIG_PREEMPT_RT_FULL)
1424 if (in_irq())
1425 irq_work_run();
1426#endif
1427 run_posix_cpu_timers(p);
1428}
1429
1430/*
1431 * This function runs timers and the timer-tq in bottom half context.
1432 */
1433static void run_timer_softirq(struct softirq_action *h)
1434{
1435 struct tvec_base *base = __this_cpu_read(tvec_bases);
1436
1437#if defined(CONFIG_IRQ_WORK) && defined(CONFIG_PREEMPT_RT_FULL)
1438 irq_work_run();
1439#endif
1440
1441 printk_tick();
1442 hrtimer_run_pending();
1443
1444 if (time_after_eq(jiffies, base->timer_jiffies))
1445 __run_timers(base);
1446}
1447
1448/*
1449 * Called by the local, per-CPU timer interrupt on SMP.
1450 */
1451void run_local_timers(void)
1452{
1453 hrtimer_run_queues();
1454 raise_softirq(TIMER_SOFTIRQ);
1455}
1456
1457#ifdef __ARCH_WANT_SYS_ALARM
1458
1459/*
1460 * For backwards compatibility? This can be done in libc so Alpha
1461 * and all newer ports shouldn't need it.
1462 */
1463SYSCALL_DEFINE1(alarm, unsigned int, seconds)
1464{
1465 return alarm_setitimer(seconds);
1466}
1467
1468#endif
1469
1470#ifndef __alpha__
1471
1472/*
1473 * The Alpha uses getxpid, getxuid, and getxgid instead. Maybe this
1474 * should be moved into arch/i386 instead?
1475 */
1476
1477/**
1478 * sys_getpid - return the thread group id of the current process
1479 *
1480 * Note, despite the name, this returns the tgid not the pid. The tgid and
1481 * the pid are identical unless CLONE_THREAD was specified on clone() in
1482 * which case the tgid is the same in all threads of the same group.
1483 *
1484 * This is SMP safe as current->tgid does not change.
1485 */
1486SYSCALL_DEFINE0(getpid)
1487{
1488 return task_tgid_vnr(current);
1489}
1490
1491/*
1492 * Accessing ->real_parent is not SMP-safe, it could
1493 * change from under us. However, we can use a stale
1494 * value of ->real_parent under rcu_read_lock(), see
1495 * release_task()->call_rcu(delayed_put_task_struct).
1496 */
1497SYSCALL_DEFINE0(getppid)
1498{
1499 int pid;
1500
1501 rcu_read_lock();
1502 pid = task_tgid_vnr(rcu_dereference(current->real_parent));
1503 rcu_read_unlock();
1504
1505 return pid;
1506}
1507
1508SYSCALL_DEFINE0(getuid)
1509{
1510 /* Only we change this so SMP safe */
1511 return current_uid();
1512}
1513
1514SYSCALL_DEFINE0(geteuid)
1515{
1516 /* Only we change this so SMP safe */
1517 return current_euid();
1518}
1519
1520SYSCALL_DEFINE0(getgid)
1521{
1522 /* Only we change this so SMP safe */
1523 return current_gid();
1524}
1525
1526SYSCALL_DEFINE0(getegid)
1527{
1528 /* Only we change this so SMP safe */
1529 return current_egid();
1530}
1531
1532#endif
1533
1534static void process_timeout(unsigned long __data)
1535{
1536 wake_up_process((struct task_struct *)__data);
1537}
1538
1539/**
1540 * schedule_timeout - sleep until timeout
1541 * @timeout: timeout value in jiffies
1542 *
1543 * Make the current task sleep until @timeout jiffies have
1544 * elapsed. The routine will return immediately unless
1545 * the current task state has been set (see set_current_state()).
1546 *
1547 * You can set the task state as follows -
1548 *
1549 * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to
1550 * pass before the routine returns. The routine will return 0
1551 *
1552 * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
1553 * delivered to the current task. In this case the remaining time
1554 * in jiffies will be returned, or 0 if the timer expired in time
1555 *
1556 * The current task state is guaranteed to be TASK_RUNNING when this
1557 * routine returns.
1558 *
1559 * Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule
1560 * the CPU away without a bound on the timeout. In this case the return
1561 * value will be %MAX_SCHEDULE_TIMEOUT.
1562 *
1563 * In all cases the return value is guaranteed to be non-negative.
1564 */
1565signed long __sched schedule_timeout(signed long timeout)
1566{
1567 struct timer_list timer;
1568 unsigned long expire;
1569
1570 switch (timeout)
1571 {
1572 case MAX_SCHEDULE_TIMEOUT:
1573 /*
1574 * These two special cases are useful to be comfortable
1575 * in the caller. Nothing more. We could take
1576 * MAX_SCHEDULE_TIMEOUT from one of the negative value
1577 * but I' d like to return a valid offset (>=0) to allow
1578 * the caller to do everything it want with the retval.
1579 */
1580 schedule();
1581 goto out;
1582 default:
1583 /*
1584 * Another bit of PARANOID. Note that the retval will be
1585 * 0 since no piece of kernel is supposed to do a check
1586 * for a negative retval of schedule_timeout() (since it
1587 * should never happens anyway). You just have the printk()
1588 * that will tell you if something is gone wrong and where.
1589 */
1590 if (timeout < 0) {
1591 printk(KERN_ERR "schedule_timeout: wrong timeout "
1592 "value %lx\n", timeout);
1593 dump_stack();
1594 current->state = TASK_RUNNING;
1595 goto out;
1596 }
1597 }
1598
1599 expire = timeout + jiffies;
1600
1601 setup_timer_on_stack(&timer, process_timeout, (unsigned long)current);
1602 __mod_timer(&timer, expire, false, TIMER_NOT_PINNED);
1603 schedule();
1604 del_singleshot_timer_sync(&timer);
1605
1606 /* Remove the timer from the object tracker */
1607 destroy_timer_on_stack(&timer);
1608
1609 timeout = expire - jiffies;
1610
1611 out:
1612 return timeout < 0 ? 0 : timeout;
1613}
1614EXPORT_SYMBOL(schedule_timeout);
1615
1616/*
1617 * We can use __set_current_state() here because schedule_timeout() calls
1618 * schedule() unconditionally.
1619 */
1620signed long __sched schedule_timeout_interruptible(signed long timeout)
1621{
1622 __set_current_state(TASK_INTERRUPTIBLE);
1623 return schedule_timeout(timeout);
1624}
1625EXPORT_SYMBOL(schedule_timeout_interruptible);
1626
1627signed long __sched schedule_timeout_killable(signed long timeout)
1628{
1629 __set_current_state(TASK_KILLABLE);
1630 return schedule_timeout(timeout);
1631}
1632EXPORT_SYMBOL(schedule_timeout_killable);
1633
1634signed long __sched schedule_timeout_uninterruptible(signed long timeout)
1635{
1636 __set_current_state(TASK_UNINTERRUPTIBLE);
1637 return schedule_timeout(timeout);
1638}
1639EXPORT_SYMBOL(schedule_timeout_uninterruptible);
1640
1641/* Thread ID - the internal kernel "pid" */
1642SYSCALL_DEFINE0(gettid)
1643{
1644 return task_pid_vnr(current);
1645}
1646
1647/**
1648 * do_sysinfo - fill in sysinfo struct
1649 * @info: pointer to buffer to fill
1650 */
1651int do_sysinfo(struct sysinfo *info)
1652{
1653 unsigned long mem_total, sav_total;
1654 unsigned int mem_unit, bitcount;
1655 struct timespec tp;
1656
1657 memset(info, 0, sizeof(struct sysinfo));
1658
1659 ktime_get_ts(&tp);
1660 monotonic_to_bootbased(&tp);
1661 info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
1662
1663 get_avenrun(info->loads, 0, SI_LOAD_SHIFT - FSHIFT);
1664
1665 info->procs = nr_threads;
1666
1667 si_meminfo(info);
1668 si_swapinfo(info);
1669
1670 /*
1671 * If the sum of all the available memory (i.e. ram + swap)
1672 * is less than can be stored in a 32 bit unsigned long then
1673 * we can be binary compatible with 2.2.x kernels. If not,
1674 * well, in that case 2.2.x was broken anyways...
1675 *
1676 * -Erik Andersen <andersee@debian.org>
1677 */
1678
1679 mem_total = info->totalram + info->totalswap;
1680 if (mem_total < info->totalram || mem_total < info->totalswap)
1681 goto out;
1682 bitcount = 0;
1683 mem_unit = info->mem_unit;
1684 while (mem_unit > 1) {
1685 bitcount++;
1686 mem_unit >>= 1;
1687 sav_total = mem_total;
1688 mem_total <<= 1;
1689 if (mem_total < sav_total)
1690 goto out;
1691 }
1692
1693 /*
1694 * If mem_total did not overflow, multiply all memory values by
1695 * info->mem_unit and set it to 1. This leaves things compatible
1696 * with 2.2.x, and also retains compatibility with earlier 2.4.x
1697 * kernels...
1698 */
1699
1700 info->mem_unit = 1;
1701 info->totalram <<= bitcount;
1702 info->freeram <<= bitcount;
1703 info->sharedram <<= bitcount;
1704 info->bufferram <<= bitcount;
1705 info->totalswap <<= bitcount;
1706 info->freeswap <<= bitcount;
1707 info->totalhigh <<= bitcount;
1708 info->freehigh <<= bitcount;
1709
1710out:
1711 return 0;
1712}
1713
1714SYSCALL_DEFINE1(sysinfo, struct sysinfo __user *, info)
1715{
1716 struct sysinfo val;
1717
1718 do_sysinfo(&val);
1719
1720 if (copy_to_user(info, &val, sizeof(struct sysinfo)))
1721 return -EFAULT;
1722
1723 return 0;
1724}
1725
1726static int __cpuinit init_timers_cpu(int cpu)
1727{
1728 int j;
1729 struct tvec_base *base;
1730 static char __cpuinitdata tvec_base_done[NR_CPUS];
1731
1732 if (!tvec_base_done[cpu]) {
1733 static char boot_done;
1734
1735 if (boot_done) {
1736 /*
1737 * The APs use this path later in boot
1738 */
1739 base = kmalloc_node(sizeof(*base),
1740 GFP_KERNEL | __GFP_ZERO,
1741 cpu_to_node(cpu));
1742 if (!base)
1743 return -ENOMEM;
1744
1745 /* Make sure that tvec_base is 2 byte aligned */
1746 if (tbase_get_deferrable(base)) {
1747 WARN_ON(1);
1748 kfree(base);
1749 return -ENOMEM;
1750 }
1751 per_cpu(tvec_bases, cpu) = base;
1752 } else {
1753 /*
1754 * This is for the boot CPU - we use compile-time
1755 * static initialisation because per-cpu memory isn't
1756 * ready yet and because the memory allocators are not
1757 * initialised either.
1758 */
1759 boot_done = 1;
1760 base = &boot_tvec_bases;
1761 }
1762 spin_lock_init(&base->lock);
1763#ifdef CONFIG_PREEMPT_RT_FULL
1764 init_waitqueue_head(&base->wait_for_running_timer);
1765#endif
1766 tvec_base_done[cpu] = 1;
1767 } else {
1768 base = per_cpu(tvec_bases, cpu);
1769 }
1770
1771 for (j = 0; j < TVN_SIZE; j++) {
1772 INIT_LIST_HEAD(base->tv5.vec + j);
1773 INIT_LIST_HEAD(base->tv4.vec + j);
1774 INIT_LIST_HEAD(base->tv3.vec + j);
1775 INIT_LIST_HEAD(base->tv2.vec + j);
1776 }
1777 for (j = 0; j < TVR_SIZE; j++)
1778 INIT_LIST_HEAD(base->tv1.vec + j);
1779
1780 base->timer_jiffies = jiffies;
1781 base->next_timer = base->timer_jiffies;
1782 return 0;
1783}
1784
1785#ifdef CONFIG_HOTPLUG_CPU
1786static void migrate_timer_list(struct tvec_base *new_base, struct list_head *head)
1787{
1788 struct timer_list *timer;
1789
1790 while (!list_empty(head)) {
1791 timer = list_first_entry(head, struct timer_list, entry);
1792 detach_timer(timer, 0);
1793 timer_set_base(timer, new_base);
1794 if (time_before(timer->expires, new_base->next_timer) &&
1795 !tbase_get_deferrable(timer->base))
1796 new_base->next_timer = timer->expires;
1797 internal_add_timer(new_base, timer);
1798 }
1799}
1800
1801static void __cpuinit migrate_timers(int cpu)
1802{
1803 struct tvec_base *old_base;
1804 struct tvec_base *new_base;
1805 int i;
1806
1807 BUG_ON(cpu_online(cpu));
1808 old_base = per_cpu(tvec_bases, cpu);
1809 new_base = get_local_var(tvec_bases);
1810 /*
1811 * The caller is globally serialized and nobody else
1812 * takes two locks at once, deadlock is not possible.
1813 */
1814 spin_lock_irq(&new_base->lock);
1815 spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
1816
1817 BUG_ON(old_base->running_timer);
1818
1819 for (i = 0; i < TVR_SIZE; i++)
1820 migrate_timer_list(new_base, old_base->tv1.vec + i);
1821 for (i = 0; i < TVN_SIZE; i++) {
1822 migrate_timer_list(new_base, old_base->tv2.vec + i);
1823 migrate_timer_list(new_base, old_base->tv3.vec + i);
1824 migrate_timer_list(new_base, old_base->tv4.vec + i);
1825 migrate_timer_list(new_base, old_base->tv5.vec + i);
1826 }
1827
1828 spin_unlock(&old_base->lock);
1829 spin_unlock_irq(&new_base->lock);
1830 put_local_var(tvec_bases);
1831}
1832#endif /* CONFIG_HOTPLUG_CPU */
1833
1834static int __cpuinit timer_cpu_notify(struct notifier_block *self,
1835 unsigned long action, void *hcpu)
1836{
1837 long cpu = (long)hcpu;
1838 int err;
1839
1840 switch(action) {
1841 case CPU_UP_PREPARE:
1842 case CPU_UP_PREPARE_FROZEN:
1843 err = init_timers_cpu(cpu);
1844 if (err < 0)
1845 return notifier_from_errno(err);
1846 break;
1847#ifdef CONFIG_HOTPLUG_CPU
1848 case CPU_DEAD:
1849 case CPU_DEAD_FROZEN:
1850 migrate_timers(cpu);
1851 break;
1852#endif
1853 default:
1854 break;
1855 }
1856 return NOTIFY_OK;
1857}
1858
1859static struct notifier_block __cpuinitdata timers_nb = {
1860 .notifier_call = timer_cpu_notify,
1861};
1862
1863
1864void __init init_timers(void)
1865{
1866 int err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE,
1867 (void *)(long)smp_processor_id());
1868
1869 init_timer_stats();
1870
1871 BUG_ON(err != NOTIFY_OK);
1872 register_cpu_notifier(&timers_nb);
1873 open_softirq(TIMER_SOFTIRQ, run_timer_softirq);
1874}
1875
1876/**
1877 * msleep - sleep safely even with waitqueue interruptions
1878 * @msecs: Time in milliseconds to sleep for
1879 */
1880void msleep(unsigned int msecs)
1881{
1882 unsigned long timeout = msecs_to_jiffies(msecs) + 1;
1883
1884 while (timeout)
1885 timeout = schedule_timeout_uninterruptible(timeout);
1886}
1887
1888EXPORT_SYMBOL(msleep);
1889
1890/**
1891 * msleep_interruptible - sleep waiting for signals
1892 * @msecs: Time in milliseconds to sleep for
1893 */
1894unsigned long msleep_interruptible(unsigned int msecs)
1895{
1896 unsigned long timeout = msecs_to_jiffies(msecs) + 1;
1897
1898 while (timeout && !signal_pending(current))
1899 timeout = schedule_timeout_interruptible(timeout);
1900 return jiffies_to_msecs(timeout);
1901}
1902
1903EXPORT_SYMBOL(msleep_interruptible);
1904
1905static int __sched do_usleep_range(unsigned long min, unsigned long max)
1906{
1907 ktime_t kmin;
1908 unsigned long delta;
1909
1910 kmin = ktime_set(0, min * NSEC_PER_USEC);
1911 delta = (max - min) * NSEC_PER_USEC;
1912 return schedule_hrtimeout_range(&kmin, delta, HRTIMER_MODE_REL);
1913}
1914
1915/**
1916 * usleep_range - Drop in replacement for udelay where wakeup is flexible
1917 * @min: Minimum time in usecs to sleep
1918 * @max: Maximum time in usecs to sleep
1919 */
1920void usleep_range(unsigned long min, unsigned long max)
1921{
1922 __set_current_state(TASK_UNINTERRUPTIBLE);
1923 do_usleep_range(min, max);
1924}
1925EXPORT_SYMBOL(usleep_range);