blob: 21be371337f9f97f587c84eb59bb3b760e0ef72f [file] [log] [blame]
lh9ed821d2023-04-07 01:36:19 -07001/*
2 * linux/kernel/softirq.c
3 *
4 * Copyright (C) 1992 Linus Torvalds
5 *
6 * Distribute under GPLv2.
7 *
8 * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
9 *
10 * Remote softirq infrastructure is by Jens Axboe.
11 */
12
13#include <linux/export.h>
14#include <linux/kernel_stat.h>
15#include <linux/interrupt.h>
16#include <linux/init.h>
17#include <linux/mm.h>
18#include <linux/notifier.h>
19#include <linux/percpu.h>
20#include <linux/cpu.h>
21#include <linux/freezer.h>
22#include <linux/kthread.h>
23#include <linux/rcupdate.h>
24#include <linux/delay.h>
25#include <linux/ftrace.h>
26#include <linux/smp.h>
27#include <linux/tick.h>
28#include <linux/locallock.h>
29
30#define CREATE_TRACE_POINTS
31#include <trace/events/irq.h>
32
33#include <asm/irq.h>
34/*
35 - No shared variables, all the data are CPU local.
36 - If a softirq needs serialization, let it serialize itself
37 by its own spinlocks.
38 - Even if softirq is serialized, only local cpu is marked for
39 execution. Hence, we get something sort of weak cpu binding.
40 Though it is still not clear, will it result in better locality
41 or will not.
42
43 Examples:
44 - NET RX softirq. It is multithreaded and does not require
45 any global serialization.
46 - NET TX softirq. It kicks software netdevice queues, hence
47 it is logically serialized per device, but this serialization
48 is invisible to common code.
49 - Tasklets: serialized wrt itself.
50 */
51#define CONFIG_SOFTIRQ_PATCH /* for SOFTIRQ PATCH*/
52
53#ifndef __ARCH_IRQ_STAT
54irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
55EXPORT_SYMBOL(irq_stat);
56#endif
57
58static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
59
60DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
61
62char *softirq_to_name[NR_SOFTIRQS] = {
63 "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL",
64 "TASKLET", "SCHED", "HRTIMER", "RCU"
65};
66
67#ifdef CONFIG_NO_HZ
68# ifdef CONFIG_PREEMPT_RT_FULL
69/*
70 * On preempt-rt a softirq might be blocked on a lock. There might be
71 * no other runnable task on this CPU because the lock owner runs on
72 * some other CPU. So we have to go into idle with the pending bit
73 * set. Therefor we need to check this otherwise we warn about false
74 * positives which confuses users and defeats the whole purpose of
75 * this test.
76 *
77 * This code is called with interrupts disabled.
78 */
79void softirq_check_pending_idle(void)
80{
81 static int rate_limit;
82 u32 warnpending = 0, pending = local_softirq_pending();
83
84 if (rate_limit >= 10)
85 return;
86
87 if (pending) {
88 struct task_struct *tsk;
89
90 tsk = __get_cpu_var(ksoftirqd);
91 /*
92 * The wakeup code in rtmutex.c wakes up the task
93 * _before_ it sets pi_blocked_on to NULL under
94 * tsk->pi_lock. So we need to check for both: state
95 * and pi_blocked_on.
96 */
97 raw_spin_lock(&tsk->pi_lock);
98
99 if (!tsk->pi_blocked_on && !(tsk->state == TASK_RUNNING))
100 warnpending = 1;
101
102 raw_spin_unlock(&tsk->pi_lock);
103 }
104
105 if (warnpending) {
106 printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
107 pending);
108 rate_limit++;
109 }
110}
111# else
112/*
113 * On !PREEMPT_RT we just printk rate limited:
114 */
115void softirq_check_pending_idle(void)
116{
117 static int rate_limit;
118
119 if (rate_limit < 10) {
120 printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
121 local_softirq_pending());
122 rate_limit++;
123 }
124}
125# endif
126#endif
127
128/*
129 * we cannot loop indefinitely here to avoid userspace starvation,
130 * but we also don't want to introduce a worst case 1/HZ latency
131 * to the pending events, so lets the scheduler to balance
132 * the softirq load for us.
133 */
134static void wakeup_softirqd(void)
135{
136 /* Interrupts are disabled: no need to stop preemption */
137 struct task_struct *tsk = __this_cpu_read(ksoftirqd);
138
139 if (tsk && tsk->state != TASK_RUNNING)
140 wake_up_process(tsk);
141}
142
143static void handle_pending_softirqs(u32 pending, int cpu, int need_rcu_bh_qs)
144{
145 struct softirq_action *h = softirq_vec;
146 unsigned int prev_count = preempt_count();
147
148 local_irq_enable();
149 for ( ; pending; h++, pending >>= 1) {
150 unsigned int vec_nr = h - softirq_vec;
151
152 if (!(pending & 1))
153 continue;
154
155 kstat_incr_softirqs_this_cpu(vec_nr);
156 trace_softirq_entry(vec_nr);
157 zxic_trace_softirq_enter(vec_nr);
158 h->action(h);
159 zxic_trace_softirq_exit(vec_nr);
160 trace_softirq_exit(vec_nr);
161 if (unlikely(prev_count != preempt_count())) {
162 printk(KERN_ERR
163 "huh, entered softirq %u %s %p with preempt_count %08x exited with %08x?\n",
164 vec_nr, softirq_to_name[vec_nr], h->action,
165 prev_count, (unsigned int) preempt_count());
166 preempt_count() = prev_count;
167 }
168 if (need_rcu_bh_qs)
169 rcu_bh_qs(cpu);
170 }
171 local_irq_disable();
172}
173
174#ifndef CONFIG_PREEMPT_RT_FULL
175/*
176 * preempt_count and SOFTIRQ_OFFSET usage:
177 * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving
178 * softirq processing.
179 * - preempt_count is changed by SOFTIRQ_DISABLE_OFFSET (= 2 * SOFTIRQ_OFFSET)
180 * on local_bh_disable or local_bh_enable.
181 * This lets us distinguish between whether we are currently processing
182 * softirq and whether we just have bh disabled.
183 */
184
185/*
186 * This one is for softirq.c-internal use,
187 * where hardirqs are disabled legitimately:
188 */
189#ifdef CONFIG_TRACE_IRQFLAGS
190static void __local_bh_disable(unsigned long ip, unsigned int cnt)
191{
192 unsigned long flags;
193
194 WARN_ON_ONCE(in_irq());
195
196 raw_local_irq_save(flags);
197 /*
198 * The preempt tracer hooks into add_preempt_count and will break
199 * lockdep because it calls back into lockdep after SOFTIRQ_OFFSET
200 * is set and before current->softirq_enabled is cleared.
201 * We must manually increment preempt_count here and manually
202 * call the trace_preempt_off later.
203 */
204 preempt_count() += cnt;
205 /*
206 * Were softirqs turned off above:
207 */
208 if (softirq_count() == cnt)
209 trace_softirqs_off(ip);
210 raw_local_irq_restore(flags);
211
212 if (preempt_count() == cnt)
213 trace_preempt_off(CALLER_ADDR0, get_lock_parent_ip());
214}
215#else /* !CONFIG_TRACE_IRQFLAGS */
216static inline void __local_bh_disable(unsigned long ip, unsigned int cnt)
217{
218 add_preempt_count(cnt);
219 barrier();
220}
221#endif /* CONFIG_TRACE_IRQFLAGS */
222
223void local_bh_disable(void)
224{
225 __local_bh_disable((unsigned long)__builtin_return_address(0),
226 SOFTIRQ_DISABLE_OFFSET);
227}
228
229EXPORT_SYMBOL(local_bh_disable);
230
231static void __local_bh_enable(unsigned int cnt)
232{
233 WARN_ON_ONCE(in_irq());
234 WARN_ON_ONCE(!irqs_disabled());
235
236 if (softirq_count() == cnt)
237 trace_softirqs_on((unsigned long)__builtin_return_address(0));
238 sub_preempt_count(cnt);
239}
240
241/*
242 * Special-case - softirqs can safely be enabled in
243 * cond_resched_softirq(), or by __do_softirq(),
244 * without processing still-pending softirqs:
245 */
246void _local_bh_enable(void)
247{
248 __local_bh_enable(SOFTIRQ_DISABLE_OFFSET);
249}
250
251EXPORT_SYMBOL(_local_bh_enable);
252
253static inline void _local_bh_enable_ip(unsigned long ip)
254{
255 WARN_ON_ONCE(in_irq() || irqs_disabled());
256#ifdef CONFIG_TRACE_IRQFLAGS
257 local_irq_disable();
258#endif
259 /*
260 * Are softirqs going to be turned on now:
261 */
262 if (softirq_count() == SOFTIRQ_DISABLE_OFFSET)
263 trace_softirqs_on(ip);
264 /*
265 * Keep preemption disabled until we are done with
266 * softirq processing:
267 */
268 sub_preempt_count(SOFTIRQ_DISABLE_OFFSET - 1);
269
270 if (unlikely(!in_interrupt() && local_softirq_pending()))
271 do_softirq();
272
273 dec_preempt_count();
274#ifdef CONFIG_TRACE_IRQFLAGS
275 local_irq_enable();
276#endif
277 preempt_check_resched();
278}
279
280void local_bh_enable(void)
281{
282 _local_bh_enable_ip((unsigned long)__builtin_return_address(0));
283}
284EXPORT_SYMBOL(local_bh_enable);
285
286void local_bh_enable_ip(unsigned long ip)
287{
288 _local_bh_enable_ip(ip);
289}
290EXPORT_SYMBOL(local_bh_enable_ip);
291
292/*
293 * We restart softirq processing for at most MAX_SOFTIRQ_RESTART times,
294 * but break the loop if need_resched() is set or after 2 ms.
295 * The MAX_SOFTIRQ_TIME provides a nice upper bound in most cases, but in
296 * certain cases, such as stop_machine(), jiffies may cease to
297 * increment and so we need the MAX_SOFTIRQ_RESTART limit as
298 * well to make sure we eventually return from this method.
299 *
300 * These limits have been established via experimentation.
301 * The two things to balance is latency against fairness -
302 * we want to handle softirqs as soon as possible, but they
303 * should not be able to lock up the box.
304 */
305#define MAX_SOFTIRQ_TIME msecs_to_jiffies(2)
306#define MAX_SOFTIRQ_RESTART 10
307
308#ifdef CONFIG_TRACE_IRQFLAGS
309/*
310 * Convoluted means of passing __do_softirq() a message through the various
311 * architecture execute_on_stack() bits.
312 *
313 * When we run softirqs from irq_exit() and thus on the hardirq stack we need
314 * to keep the lockdep irq context tracking as tight as possible in order to
315 * not miss-qualify lock contexts and miss possible deadlocks.
316 */
317static DEFINE_PER_CPU(int, softirq_from_hardirq);
318
319static inline void lockdep_softirq_from_hardirq(void)
320{
321 this_cpu_write(softirq_from_hardirq, 1);
322}
323
324static inline void lockdep_softirq_start(void)
325{
326 if (this_cpu_read(softirq_from_hardirq))
327 trace_hardirq_exit();
328 lockdep_softirq_enter();
329}
330
331static inline void lockdep_softirq_end(void)
332{
333 lockdep_softirq_exit();
334 if (this_cpu_read(softirq_from_hardirq)) {
335 this_cpu_write(softirq_from_hardirq, 0);
336 trace_hardirq_enter();
337 }
338}
339
340#else
341static inline void lockdep_softirq_from_hardirq(void) { }
342static inline void lockdep_softirq_start(void) { }
343static inline void lockdep_softirq_end(void) { }
344#endif
345
346asmlinkage void __do_softirq(void)
347{
348 __u32 pending;
349 unsigned long end = jiffies + MAX_SOFTIRQ_TIME;
350 int cpu;
351 int max_restart = MAX_SOFTIRQ_RESTART;
352
353 pending = local_softirq_pending();
354 account_system_vtime(current);
355
356 __local_bh_disable((unsigned long)__builtin_return_address(0),
357 SOFTIRQ_OFFSET);
358 lockdep_softirq_start();
359
360 cpu = smp_processor_id();
361restart:
362 /* Reset the pending bitmask before enabling irqs */
363 set_softirq_pending(0);
364
365 handle_pending_softirqs(pending, cpu, 1);
366
367 pending = local_softirq_pending();
368 if (pending) {
369 if (time_before(jiffies, end) && !need_resched() &&
370 --max_restart)
371 goto restart;
372
373 wakeup_softirqd();
374 }
375
376 lockdep_softirq_end();
377
378 account_system_vtime(current);
379 __local_bh_enable(SOFTIRQ_OFFSET);
380}
381
382/*
383 * Called with preemption disabled from run_ksoftirqd()
384 */
385static int ksoftirqd_do_softirq(int cpu)
386{
387 /*
388 * Preempt disable stops cpu going offline.
389 * If already offline, we'll be on wrong CPU:
390 * don't process.
391 */
392 if (cpu_is_offline(cpu))
393 return -1;
394
395 local_irq_disable();
396 if (local_softirq_pending())
397 __do_softirq();
398 local_irq_enable();
399 return 0;
400}
401
402#ifndef __ARCH_HAS_DO_SOFTIRQ
403
404asmlinkage void do_softirq(void)
405{
406 __u32 pending;
407 unsigned long flags;
408
409 if (in_interrupt())
410 return;
411
412 local_irq_save(flags);
413
414 pending = local_softirq_pending();
415
416 if (pending)
417 __do_softirq();
418
419 local_irq_restore(flags);
420}
421
422#endif
423
424static inline void local_bh_disable_nort(void) { local_bh_disable(); }
425static inline void _local_bh_enable_nort(void) { _local_bh_enable(); }
426static inline void ksoftirqd_set_sched_params(void) { }
427static inline void ksoftirqd_clr_sched_params(void) { }
428
429#else /* !PREEMPT_RT_FULL */
430
431/*
432 * On RT we serialize softirq execution with a cpu local lock
433 */
434static DEFINE_LOCAL_IRQ_LOCK(local_softirq_lock);
435static DEFINE_PER_CPU(struct task_struct *, local_softirq_runner);
436
437static void __do_softirq_common(int need_rcu_bh_qs);
438
439void __do_softirq(void)
440{
441 __do_softirq_common(0);
442}
443
444void __init softirq_early_init(void)
445{
446 local_irq_lock_init(local_softirq_lock);
447}
448
449void local_bh_disable(void)
450{
451 migrate_disable();
452 current->softirq_nestcnt++;
453#ifdef CONFIG_SOFTIRQ_PATCH
454 local_lock(local_softirq_lock);
455#endif
456}
457EXPORT_SYMBOL(local_bh_disable);
458
459void local_bh_enable(void)
460{
461 long soft_cnt;
462
463 if (WARN_ON(current->softirq_nestcnt == 0))
464 return;
465
466#ifdef CONFIG_SOFTIRQ_PATCH
467 local_unlock(local_softirq_lock);/*first realse the lock to ksoftirqd*/
468 if ((current->softirq_nestcnt == 1) &&
469 local_softirq_pending()) {
470 wakeup_softirqd(); /*in order to avoid ksoftirqd occur deadlock it just wakeup ksoftirqd*/
471 WARN_ON(current->softirq_nestcnt != 1);
472 }
473#else
474 if ((current->softirq_nestcnt == 1) &&
475 local_softirq_pending() &&
476 local_trylock(local_softirq_lock)) {
477
478 local_irq_disable();
479 if (local_softirq_pending())
480 __do_softirq();
481 local_irq_enable();
482 local_unlock(local_softirq_lock);
483 WARN_ON(current->softirq_nestcnt != 1);
484 }
485#endif
486 current->softirq_nestcnt--;
487 migrate_enable();
488}
489EXPORT_SYMBOL(local_bh_enable);
490
491void local_bh_enable_ip(unsigned long ip)
492{
493 local_bh_enable();
494}
495EXPORT_SYMBOL(local_bh_enable_ip);
496
497void _local_bh_enable(void)
498{
499 current->softirq_nestcnt--;
500 migrate_enable();
501#ifdef CONFIG_SOFTIRQ_PATCH
502 local_unlock(local_softirq_lock);/*first realse the lock to ksoftirqd*/
503#endif
504}
505EXPORT_SYMBOL(_local_bh_enable);
506
507/* For tracing */
508int notrace __in_softirq(void)
509{
510 if (__get_cpu_var(local_softirq_lock).owner == current)
511 return __get_cpu_var(local_softirq_lock).nestcnt;
512 return 0;
513}
514
515int in_serving_softirq(void)
516{
517 int res;
518
519 preempt_disable();
520 res = __get_cpu_var(local_softirq_runner) == current;
521 preempt_enable();
522 return res;
523}
524EXPORT_SYMBOL(in_serving_softirq);
525
526/*
527 * Called with bh and local interrupts disabled. For full RT cpu must
528 * be pinned.
529 */
530static void __do_softirq_common(int need_rcu_bh_qs)
531{
532 u32 pending = local_softirq_pending();
533 int cpu = smp_processor_id();
534
535 current->softirq_nestcnt++;
536
537 /* Reset the pending bitmask before enabling irqs */
538 set_softirq_pending(0);
539
540 __get_cpu_var(local_softirq_runner) = current;
541
542 lockdep_softirq_enter();
543
544 handle_pending_softirqs(pending, cpu, need_rcu_bh_qs);
545
546 pending = local_softirq_pending();
547 if (pending)
548 wakeup_softirqd();
549
550 lockdep_softirq_exit();
551 __get_cpu_var(local_softirq_runner) = NULL;
552
553 current->softirq_nestcnt--;
554}
555
556static int __thread_do_softirq(int cpu)
557{
558 /*
559 * Prevent the current cpu from going offline.
560 * pin_current_cpu() can reenable preemption and block on the
561 * hotplug mutex. When it returns, the current cpu is
562 * pinned. It might be the wrong one, but the offline check
563 * below catches that.
564 */
565 pin_current_cpu();
566 /*
567 * If called from ksoftirqd (cpu >= 0) we need to check
568 * whether we are on the wrong cpu due to cpu offlining. If
569 * called via thread_do_softirq() no action required.
570 */
571 if (cpu >= 0 && cpu_is_offline(cpu)) {
572 unpin_current_cpu();
573 return -1;
574 }
575 preempt_enable();
576 local_lock(local_softirq_lock);
577 local_irq_disable();
578 /*
579 * We cannot switch stacks on RT as we want to be able to
580 * schedule!
581 */
582 if (local_softirq_pending())
583 __do_softirq_common(cpu >= 0);
584 unpin_current_cpu();
585 local_irq_enable();
586 local_unlock(local_softirq_lock);
587 preempt_disable();
588 return 0;
589}
590
591/*
592 * Called from netif_rx_ni(). Preemption enabled.
593 */
594 #ifdef CONFIG_SOFTIRQ_PATCH
595 /*if the current is not hold the softirqcontext ,then wakeup the ksoftirqd */
596void thread_do_softirq(void)
597{
598 if (!in_serving_softirq()) {
599 wakeup_softirqd();
600 }
601}
602
603 #else
604void thread_do_softirq(void)
605{
606 if (!in_serving_softirq()) {
607 preempt_disable();
608 __thread_do_softirq(-1);
609 preempt_enable();
610 }
611}
612
613 #endif
614
615static int ksoftirqd_do_softirq(int cpu)
616{
617 return __thread_do_softirq(cpu);
618}
619
620static inline void local_bh_disable_nort(void) { }
621static inline void _local_bh_enable_nort(void) { }
622
623static inline void ksoftirqd_set_sched_params(void)
624{
625 struct sched_param param = { .sched_priority = 1 };
626
627 sched_setscheduler(current, SCHED_FIFO, &param);
628}
629
630static inline void ksoftirqd_clr_sched_params(void)
631{
632 struct sched_param param = { .sched_priority = 0 };
633
634 sched_setscheduler(current, SCHED_NORMAL, &param);
635}
636
637#endif /* PREEMPT_RT_FULL */
638/*
639 * Enter an interrupt context.
640 */
641void irq_enter(void)
642{
643 int cpu = smp_processor_id();
644
645 rcu_irq_enter();
646 if (is_idle_task(current) && !in_interrupt()) {
647 /*
648 * Prevent raise_softirq from needlessly waking up ksoftirqd
649 * here, as softirq will be serviced on return from interrupt.
650 */
651 local_bh_disable_nort();
652 tick_check_idle(cpu);
653 _local_bh_enable_nort();
654 }
655
656 __irq_enter();
657}
658
659static inline void invoke_softirq(void)
660{
661#ifndef CONFIG_PREEMPT_RT_FULL
662 if (!force_irqthreads) {
663 lockdep_softirq_from_hardirq();
664#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
665 __do_softirq();
666#else
667 do_softirq();
668#endif
669 } else {
670 __local_bh_disable((unsigned long)__builtin_return_address(0),
671 SOFTIRQ_OFFSET);
672 wakeup_softirqd();
673 __local_bh_enable(SOFTIRQ_OFFSET);
674 }
675#else
676 wakeup_softirqd();
677#endif
678}
679
680/*
681 * Exit an interrupt context. Process softirqs if needed and possible:
682 */
683void irq_exit(void)
684{
685 account_system_vtime(current);
686 sub_preempt_count(IRQ_EXIT_OFFSET);
687 if (!in_interrupt() && local_softirq_pending())
688 invoke_softirq();
689
690#ifdef CONFIG_NO_HZ
691 /* Make sure that timer wheel updates are propagated */
692 if (idle_cpu(smp_processor_id()) && !in_interrupt() && !need_resched())
693 tick_nohz_irq_exit();
694#endif
695 rcu_irq_exit();
696 trace_hardirq_exit(); /* must be last! */
697 sched_preempt_enable_no_resched();
698}
699
700/*
701 * This function must run with irqs disabled!
702 */
703inline void raise_softirq_irqoff(unsigned int nr)
704{
705 __raise_softirq_irqoff(nr);
706
707 /*
708 * If we're in an interrupt or softirq, we're done
709 * (this also catches softirq-disabled code). We will
710 * actually run the softirq once we return from
711 * the irq or softirq.
712 *
713 * Otherwise we wake up ksoftirqd to make sure we
714 * schedule the softirq soon.
715 */
716 if (!in_interrupt())
717 wakeup_softirqd();
718}
719
720void raise_softirq(unsigned int nr)
721{
722 unsigned long flags;
723
724 local_irq_save(flags);
725 raise_softirq_irqoff(nr);
726 local_irq_restore(flags);
727}
728
729void __raise_softirq_irqoff(unsigned int nr)
730{
731 trace_softirq_raise(nr);
732 or_softirq_pending(1UL << nr);
733}
734
735void open_softirq(int nr, void (*action)(struct softirq_action *))
736{
737 softirq_vec[nr].action = action;
738}
739
740/*
741 * Tasklets
742 */
743struct tasklet_head
744{
745 struct tasklet_struct *head;
746 struct tasklet_struct **tail;
747};
748
749static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
750static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
751
752static void inline
753__tasklet_common_schedule(struct tasklet_struct *t, struct tasklet_head *head, unsigned int nr)
754{
755 if (tasklet_trylock(t)) {
756again:
757 /* We may have been preempted before tasklet_trylock
758 * and __tasklet_action may have already run.
759 * So double check the sched bit while the takslet
760 * is locked before adding it to the list.
761 */
762 if (test_bit(TASKLET_STATE_SCHED, &t->state)) {
763 t->next = NULL;
764 *head->tail = t;
765 head->tail = &(t->next);
766 raise_softirq_irqoff(nr);
767 tasklet_unlock(t);
768 } else {
769 /* This is subtle. If we hit the corner case above
770 * It is possible that we get preempted right here,
771 * and another task has successfully called
772 * tasklet_schedule(), then this function, and
773 * failed on the trylock. Thus we must be sure
774 * before releasing the tasklet lock, that the
775 * SCHED_BIT is clear. Otherwise the tasklet
776 * may get its SCHED_BIT set, but not added to the
777 * list
778 */
779 if (!tasklet_tryunlock(t))
780 goto again;
781 }
782 }
783}
784
785void __tasklet_schedule(struct tasklet_struct *t)
786{
787 unsigned long flags;
788
789 local_irq_save(flags);
790 __tasklet_common_schedule(t, &__get_cpu_var(tasklet_vec), TASKLET_SOFTIRQ);
791 local_irq_restore(flags);
792}
793
794EXPORT_SYMBOL(__tasklet_schedule);
795
796void __tasklet_hi_schedule(struct tasklet_struct *t)
797{
798 unsigned long flags;
799
800 local_irq_save(flags);
801 __tasklet_common_schedule(t, &__get_cpu_var(tasklet_hi_vec), HI_SOFTIRQ);
802 local_irq_restore(flags);
803}
804
805EXPORT_SYMBOL(__tasklet_hi_schedule);
806
807void __tasklet_hi_schedule_first(struct tasklet_struct *t)
808{
809 __tasklet_hi_schedule(t);
810}
811
812EXPORT_SYMBOL(__tasklet_hi_schedule_first);
813
814void tasklet_enable(struct tasklet_struct *t)
815{
816 if (!atomic_dec_and_test(&t->count))
817 return;
818 if (test_and_clear_bit(TASKLET_STATE_PENDING, &t->state))
819 tasklet_schedule(t);
820}
821
822EXPORT_SYMBOL(tasklet_enable);
823
824void tasklet_hi_enable(struct tasklet_struct *t)
825{
826 if (!atomic_dec_and_test(&t->count))
827 return;
828 if (test_and_clear_bit(TASKLET_STATE_PENDING, &t->state))
829 tasklet_hi_schedule(t);
830}
831
832EXPORT_SYMBOL(tasklet_hi_enable);
833
834static void
835__tasklet_action(struct softirq_action *a, struct tasklet_struct *list)
836{
837 int loops = 1000000;
838
839 while (list) {
840 struct tasklet_struct *t = list;
841
842 list = list->next;
843
844 /*
845 * Should always succeed - after a tasklist got on the
846 * list (after getting the SCHED bit set from 0 to 1),
847 * nothing but the tasklet softirq it got queued to can
848 * lock it:
849 */
850 if (!tasklet_trylock(t)) {
851 WARN_ON(1);
852 continue;
853 }
854
855 t->next = NULL;
856
857 /*
858 * If we cannot handle the tasklet because it's disabled,
859 * mark it as pending. tasklet_enable() will later
860 * re-schedule the tasklet.
861 */
862 if (unlikely(atomic_read(&t->count))) {
863out_disabled:
864 /* implicit unlock: */
865 wmb();
866 t->state = TASKLET_STATEF_PENDING;
867 continue;
868 }
869
870 /*
871 * After this point on the tasklet might be rescheduled
872 * on another CPU, but it can only be added to another
873 * CPU's tasklet list if we unlock the tasklet (which we
874 * dont do yet).
875 */
876 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
877 WARN_ON(1);
878
879again:
880 t->func(t->data);
881
882 /*
883 * Try to unlock the tasklet. We must use cmpxchg, because
884 * another CPU might have scheduled or disabled the tasklet.
885 * We only allow the STATE_RUN -> 0 transition here.
886 */
887 while (!tasklet_tryunlock(t)) {
888 /*
889 * If it got disabled meanwhile, bail out:
890 */
891 if (atomic_read(&t->count))
892 goto out_disabled;
893 /*
894 * If it got scheduled meanwhile, re-execute
895 * the tasklet function:
896 */
897 if (test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
898 goto again;
899 if (!--loops) {
900 printk("hm, tasklet state: %08lx\n", t->state);
901 WARN_ON(1);
902 tasklet_unlock(t);
903 break;
904 }
905 }
906 }
907}
908
909static void tasklet_action(struct softirq_action *a)
910{
911 struct tasklet_struct *list;
912
913 local_irq_disable();
914 list = __get_cpu_var(tasklet_vec).head;
915 __get_cpu_var(tasklet_vec).head = NULL;
916 __get_cpu_var(tasklet_vec).tail = &__get_cpu_var(tasklet_vec).head;
917 local_irq_enable();
918
919 __tasklet_action(a, list);
920}
921
922static void tasklet_hi_action(struct softirq_action *a)
923{
924 struct tasklet_struct *list;
925
926 local_irq_disable();
927 list = __this_cpu_read(tasklet_hi_vec.head);
928 __this_cpu_write(tasklet_hi_vec.head, NULL);
929 __this_cpu_write(tasklet_hi_vec.tail, &__get_cpu_var(tasklet_hi_vec).head);
930 local_irq_enable();
931
932 __tasklet_action(a, list);
933}
934
935
936void tasklet_init(struct tasklet_struct *t,
937 void (*func)(unsigned long), unsigned long data)
938{
939 t->next = NULL;
940 t->state = 0;
941 atomic_set(&t->count, 0);
942 t->func = func;
943 t->data = data;
944}
945
946EXPORT_SYMBOL(tasklet_init);
947
948void tasklet_kill(struct tasklet_struct *t)
949{
950 if (in_interrupt())
951 printk("Attempt to kill tasklet from interrupt\n");
952
953 while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
954 do {
955 msleep(1);
956 } while (test_bit(TASKLET_STATE_SCHED, &t->state));
957 }
958 tasklet_unlock_wait(t);
959 clear_bit(TASKLET_STATE_SCHED, &t->state);
960}
961
962EXPORT_SYMBOL(tasklet_kill);
963
964/*
965 * tasklet_hrtimer
966 */
967
968/*
969 * The trampoline is called when the hrtimer expires. It schedules a tasklet
970 * to run __tasklet_hrtimer_trampoline() which in turn will call the intended
971 * hrtimer callback, but from softirq context.
972 */
973static enum hrtimer_restart __hrtimer_tasklet_trampoline(struct hrtimer *timer)
974{
975 struct tasklet_hrtimer *ttimer =
976 container_of(timer, struct tasklet_hrtimer, timer);
977
978 tasklet_hi_schedule(&ttimer->tasklet);
979 return HRTIMER_NORESTART;
980}
981
982/*
983 * Helper function which calls the hrtimer callback from
984 * tasklet/softirq context
985 */
986static void __tasklet_hrtimer_trampoline(unsigned long data)
987{
988 struct tasklet_hrtimer *ttimer = (void *)data;
989 enum hrtimer_restart restart;
990
991 restart = ttimer->function(&ttimer->timer);
992 if (restart != HRTIMER_NORESTART)
993 hrtimer_restart(&ttimer->timer);
994}
995
996/**
997 * tasklet_hrtimer_init - Init a tasklet/hrtimer combo for softirq callbacks
998 * @ttimer: tasklet_hrtimer which is initialized
999 * @function: hrtimer callback function which gets called from softirq context
1000 * @which_clock: clock id (CLOCK_MONOTONIC/CLOCK_REALTIME)
1001 * @mode: hrtimer mode (HRTIMER_MODE_ABS/HRTIMER_MODE_REL)
1002 */
1003void tasklet_hrtimer_init(struct tasklet_hrtimer *ttimer,
1004 enum hrtimer_restart (*function)(struct hrtimer *),
1005 clockid_t which_clock, enum hrtimer_mode mode)
1006{
1007 hrtimer_init(&ttimer->timer, which_clock, mode);
1008 ttimer->timer.function = __hrtimer_tasklet_trampoline;
1009 tasklet_init(&ttimer->tasklet, __tasklet_hrtimer_trampoline,
1010 (unsigned long)ttimer);
1011 ttimer->function = function;
1012}
1013EXPORT_SYMBOL_GPL(tasklet_hrtimer_init);
1014
1015/*
1016 * Remote softirq bits
1017 */
1018
1019DEFINE_PER_CPU(struct list_head [NR_SOFTIRQS], softirq_work_list);
1020EXPORT_PER_CPU_SYMBOL(softirq_work_list);
1021
1022static void __local_trigger(struct call_single_data *cp, int softirq)
1023{
1024 struct list_head *head = &__get_cpu_var(softirq_work_list[softirq]);
1025
1026 list_add_tail(&cp->list, head);
1027
1028 /* Trigger the softirq only if the list was previously empty. */
1029 if (head->next == &cp->list)
1030 raise_softirq_irqoff(softirq);
1031}
1032
1033#ifdef CONFIG_USE_GENERIC_SMP_HELPERS
1034static void remote_softirq_receive(void *data)
1035{
1036 struct call_single_data *cp = data;
1037 unsigned long flags;
1038 int softirq;
1039
1040 softirq = cp->priv;
1041
1042 local_irq_save(flags);
1043 __local_trigger(cp, softirq);
1044 local_irq_restore(flags);
1045}
1046
1047static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
1048{
1049 if (cpu_online(cpu)) {
1050 cp->func = remote_softirq_receive;
1051 cp->info = cp;
1052 cp->flags = 0;
1053 cp->priv = softirq;
1054
1055 __smp_call_function_single(cpu, cp, 0);
1056 return 0;
1057 }
1058 return 1;
1059}
1060#else /* CONFIG_USE_GENERIC_SMP_HELPERS */
1061static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
1062{
1063 return 1;
1064}
1065#endif
1066
1067/**
1068 * __send_remote_softirq - try to schedule softirq work on a remote cpu
1069 * @cp: private SMP call function data area
1070 * @cpu: the remote cpu
1071 * @this_cpu: the currently executing cpu
1072 * @softirq: the softirq for the work
1073 *
1074 * Attempt to schedule softirq work on a remote cpu. If this cannot be
1075 * done, the work is instead queued up on the local cpu.
1076 *
1077 * Interrupts must be disabled.
1078 */
1079void __send_remote_softirq(struct call_single_data *cp, int cpu, int this_cpu, int softirq)
1080{
1081 if (cpu == this_cpu || __try_remote_softirq(cp, cpu, softirq))
1082 __local_trigger(cp, softirq);
1083}
1084EXPORT_SYMBOL(__send_remote_softirq);
1085
1086/**
1087 * send_remote_softirq - try to schedule softirq work on a remote cpu
1088 * @cp: private SMP call function data area
1089 * @cpu: the remote cpu
1090 * @softirq: the softirq for the work
1091 *
1092 * Like __send_remote_softirq except that disabling interrupts and
1093 * computing the current cpu is done for the caller.
1094 */
1095void send_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
1096{
1097 unsigned long flags;
1098 int this_cpu;
1099
1100 local_irq_save(flags);
1101 this_cpu = smp_processor_id();
1102 __send_remote_softirq(cp, cpu, this_cpu, softirq);
1103 local_irq_restore(flags);
1104}
1105EXPORT_SYMBOL(send_remote_softirq);
1106
1107static int __cpuinit remote_softirq_cpu_notify(struct notifier_block *self,
1108 unsigned long action, void *hcpu)
1109{
1110 /*
1111 * If a CPU goes away, splice its entries to the current CPU
1112 * and trigger a run of the softirq
1113 */
1114 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
1115 int cpu = (unsigned long) hcpu;
1116 int i;
1117
1118 local_irq_disable();
1119 for (i = 0; i < NR_SOFTIRQS; i++) {
1120 struct list_head *head = &per_cpu(softirq_work_list[i], cpu);
1121 struct list_head *local_head;
1122
1123 if (list_empty(head))
1124 continue;
1125
1126 local_head = &__get_cpu_var(softirq_work_list[i]);
1127 list_splice_init(head, local_head);
1128 raise_softirq_irqoff(i);
1129 }
1130 local_irq_enable();
1131 }
1132
1133 return NOTIFY_OK;
1134}
1135
1136static struct notifier_block __cpuinitdata remote_softirq_cpu_notifier = {
1137 .notifier_call = remote_softirq_cpu_notify,
1138};
1139
1140void __init softirq_init(void)
1141{
1142 int cpu;
1143
1144 for_each_possible_cpu(cpu) {
1145 int i;
1146
1147 per_cpu(tasklet_vec, cpu).tail =
1148 &per_cpu(tasklet_vec, cpu).head;
1149 per_cpu(tasklet_hi_vec, cpu).tail =
1150 &per_cpu(tasklet_hi_vec, cpu).head;
1151 for (i = 0; i < NR_SOFTIRQS; i++)
1152 INIT_LIST_HEAD(&per_cpu(softirq_work_list[i], cpu));
1153 }
1154
1155 register_hotcpu_notifier(&remote_softirq_cpu_notifier);
1156
1157 open_softirq(TASKLET_SOFTIRQ, tasklet_action);
1158 open_softirq(HI_SOFTIRQ, tasklet_hi_action);
1159}
1160
1161#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL)
1162void tasklet_unlock_wait(struct tasklet_struct *t)
1163{
1164 while (test_bit(TASKLET_STATE_RUN, &(t)->state)) {
1165 /*
1166 * Hack for now to avoid this busy-loop:
1167 */
1168#ifdef CONFIG_PREEMPT_RT_FULL
1169 msleep(1);
1170#else
1171 barrier();
1172#endif
1173 }
1174}
1175EXPORT_SYMBOL(tasklet_unlock_wait);
1176#endif
1177
1178static int run_ksoftirqd(void * __bind_cpu)
1179{
1180 ksoftirqd_set_sched_params();
1181
1182 set_current_state(TASK_INTERRUPTIBLE);
1183
1184 while (!kthread_should_stop()) {
1185 preempt_disable();
1186 if (!local_softirq_pending())
1187 schedule_preempt_disabled();
1188
1189 __set_current_state(TASK_RUNNING);
1190
1191 while (local_softirq_pending()) {
1192 if (ksoftirqd_do_softirq((long) __bind_cpu))
1193 goto wait_to_die;
1194 sched_preempt_enable_no_resched();
1195 cond_resched();
1196 preempt_disable();
1197 rcu_note_context_switch((long)__bind_cpu);
1198 }
1199 preempt_enable();
1200 set_current_state(TASK_INTERRUPTIBLE);
1201 }
1202 __set_current_state(TASK_RUNNING);
1203 return 0;
1204
1205wait_to_die:
1206 preempt_enable();
1207 ksoftirqd_clr_sched_params();
1208 /* Wait for kthread_stop */
1209 set_current_state(TASK_INTERRUPTIBLE);
1210 while (!kthread_should_stop()) {
1211 schedule();
1212 set_current_state(TASK_INTERRUPTIBLE);
1213 }
1214 __set_current_state(TASK_RUNNING);
1215 return 0;
1216}
1217
1218#ifdef CONFIG_HOTPLUG_CPU
1219/*
1220 * tasklet_kill_immediate is called to remove a tasklet which can already be
1221 * scheduled for execution on @cpu.
1222 *
1223 * Unlike tasklet_kill, this function removes the tasklet
1224 * _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state.
1225 *
1226 * When this function is called, @cpu must be in the CPU_DEAD state.
1227 */
1228void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
1229{
1230 struct tasklet_struct **i;
1231
1232 BUG_ON(cpu_online(cpu));
1233 BUG_ON(test_bit(TASKLET_STATE_RUN, &t->state));
1234
1235 if (!test_bit(TASKLET_STATE_SCHED, &t->state))
1236 return;
1237
1238 /* CPU is dead, so no lock needed. */
1239 for (i = &per_cpu(tasklet_vec, cpu).head; *i; i = &(*i)->next) {
1240 if (*i == t) {
1241 *i = t->next;
1242 /* If this was the tail element, move the tail ptr */
1243 if (*i == NULL)
1244 per_cpu(tasklet_vec, cpu).tail = i;
1245 return;
1246 }
1247 }
1248 BUG();
1249}
1250
1251static void takeover_tasklets(unsigned int cpu)
1252{
1253 /* CPU is dead, so no lock needed. */
1254 local_irq_disable();
1255
1256 /* Find end, append list for that CPU. */
1257 if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) {
1258 *__this_cpu_read(tasklet_vec.tail) = per_cpu(tasklet_vec, cpu).head;
1259 this_cpu_write(tasklet_vec.tail, per_cpu(tasklet_vec, cpu).tail);
1260 per_cpu(tasklet_vec, cpu).head = NULL;
1261 per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head;
1262 }
1263 raise_softirq_irqoff(TASKLET_SOFTIRQ);
1264
1265 if (&per_cpu(tasklet_hi_vec, cpu).head != per_cpu(tasklet_hi_vec, cpu).tail) {
1266 *__this_cpu_read(tasklet_hi_vec.tail) = per_cpu(tasklet_hi_vec, cpu).head;
1267 __this_cpu_write(tasklet_hi_vec.tail, per_cpu(tasklet_hi_vec, cpu).tail);
1268 per_cpu(tasklet_hi_vec, cpu).head = NULL;
1269 per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head;
1270 }
1271 raise_softirq_irqoff(HI_SOFTIRQ);
1272
1273 local_irq_enable();
1274}
1275#endif /* CONFIG_HOTPLUG_CPU */
1276
1277static int __cpuinit cpu_callback(struct notifier_block *nfb,
1278 unsigned long action,
1279 void *hcpu)
1280{
1281 int hotcpu = (unsigned long)hcpu;
1282 struct task_struct *p;
1283
1284 switch (action & ~CPU_TASKS_FROZEN) {
1285 case CPU_UP_PREPARE:
1286 p = kthread_create_on_node(run_ksoftirqd,
1287 (unsigned long)hcpu+1,
1288 cpu_to_node(hotcpu),
1289 "ksoftirqd/%d", hotcpu);
1290 if (IS_ERR(p)) {
1291 printk("ksoftirqd for %i failed\n", hotcpu);
1292 return notifier_from_errno(PTR_ERR(p));
1293 }
1294 kthread_bind(p, hotcpu);
1295 per_cpu(ksoftirqd, hotcpu) = p;
1296 break;
1297 case CPU_ONLINE:
1298 wake_up_process(per_cpu(ksoftirqd, hotcpu));
1299 break;
1300#ifdef CONFIG_HOTPLUG_CPU
1301 case CPU_UP_CANCELED:
1302 if (!per_cpu(ksoftirqd, hotcpu))
1303 break;
1304 /* Unbind so it can run. Fall thru. */
1305 kthread_bind(per_cpu(ksoftirqd, hotcpu),
1306 cpumask_any(cpu_online_mask));
1307 case CPU_POST_DEAD: {
1308 static const struct sched_param param = {
1309 .sched_priority = MAX_RT_PRIO-1
1310 };
1311
1312 p = per_cpu(ksoftirqd, hotcpu);
1313 per_cpu(ksoftirqd, hotcpu) = NULL;
1314 sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
1315 kthread_stop(p);
1316 takeover_tasklets(hotcpu);
1317 break;
1318 }
1319#endif /* CONFIG_HOTPLUG_CPU */
1320 }
1321 return NOTIFY_OK;
1322}
1323
1324static struct notifier_block __cpuinitdata cpu_nfb = {
1325 .notifier_call = cpu_callback
1326};
1327
1328static __init int spawn_ksoftirqd(void)
1329{
1330 void *cpu = (void *)(long)smp_processor_id();
1331 int err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
1332
1333 BUG_ON(err != NOTIFY_OK);
1334 cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
1335 register_cpu_notifier(&cpu_nfb);
1336 return 0;
1337}
1338early_initcall(spawn_ksoftirqd);
1339
1340/*
1341 * [ These __weak aliases are kept in a separate compilation unit, so that
1342 * GCC does not inline them incorrectly. ]
1343 */
1344
1345int __init __weak early_irq_init(void)
1346{
1347 return 0;
1348}
1349
1350#ifdef CONFIG_GENERIC_HARDIRQS
1351int __init __weak arch_probe_nr_irqs(void)
1352{
1353 return NR_IRQS_LEGACY;
1354}
1355
1356int __init __weak arch_early_irq_init(void)
1357{
1358 return 0;
1359}
1360#endif