blob: ba1bdb0e368a605b5f040412a70e49c250f3a05c [file] [log] [blame]
lh9ed821d2023-04-07 01:36:19 -07001/*
2 * RT-Mutexes: simple blocking mutual exclusion locks with PI support
3 *
4 * started by Ingo Molnar and Thomas Gleixner.
5 *
6 * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
7 * Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
8 * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
9 * Copyright (C) 2006 Esben Nielsen
10 *
11 * Adaptive Spinlocks:
12 * Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich,
13 * and Peter Morreale,
14 * Adaptive Spinlocks simplification:
15 * Copyright (C) 2008 Red Hat, Inc., Steven Rostedt <srostedt@redhat.com>
16 *
17 * See Documentation/rt-mutex-design.txt for details.
18 */
19#include <linux/spinlock.h>
20#include <linux/export.h>
21#include <linux/sched.h>
22#include <linux/timer.h>
23
24#include "rtmutex_common.h"
25
26/*
27 * lock->owner state tracking:
28 *
29 * lock->owner holds the task_struct pointer of the owner. Bit 0
30 * is used to keep track of the "lock has waiters" state.
31 *
32 * owner bit0
33 * NULL 0 lock is free (fast acquire possible)
34 * NULL 1 lock is free and has waiters and the top waiter
35 * is going to take the lock*
36 * taskpointer 0 lock is held (fast release possible)
37 * taskpointer 1 lock is held and has waiters**
38 *
39 * The fast atomic compare exchange based acquire and release is only
40 * possible when bit 0 of lock->owner is 0.
41 *
42 * (*) It also can be a transitional state when grabbing the lock
43 * with ->wait_lock is held. To prevent any fast path cmpxchg to the lock,
44 * we need to set the bit0 before looking at the lock, and the owner may be
45 * NULL in this small time, hence this can be a transitional state.
46 *
47 * (**) There is a small time when bit 0 is set but there are no
48 * waiters. This can happen when grabbing the lock in the slow path.
49 * To prevent a cmpxchg of the owner releasing the lock, we need to
50 * set this bit before looking at the lock.
51 */
52
53static void
54rt_mutex_set_owner(struct rt_mutex *lock, struct task_struct *owner)
55{
56 unsigned long val = (unsigned long)owner;
57
58 if (rt_mutex_has_waiters(lock))
59 val |= RT_MUTEX_HAS_WAITERS;
60
61 lock->owner = (struct task_struct *)val;
62}
63
64static inline void clear_rt_mutex_waiters(struct rt_mutex *lock)
65{
66 lock->owner = (struct task_struct *)
67 ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS);
68}
69
70static void fixup_rt_mutex_waiters(struct rt_mutex *lock)
71{
72 if (!rt_mutex_has_waiters(lock))
73 clear_rt_mutex_waiters(lock);
74}
75
76static int rt_mutex_real_waiter(struct rt_mutex_waiter *waiter)
77{
78 return waiter && waiter != PI_WAKEUP_INPROGRESS &&
79 waiter != PI_REQUEUE_INPROGRESS;
80}
81
82/*
83 * We can speed up the acquire/release, if the architecture
84 * supports cmpxchg and if there's no debugging state to be set up
85 */
86#if defined(__HAVE_ARCH_CMPXCHG) && !defined(CONFIG_DEBUG_RT_MUTEXES)
87# define rt_mutex_cmpxchg(l,c,n) (cmpxchg(&l->owner, c, n) == c)
88static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
89{
90 unsigned long owner, *p = (unsigned long *) &lock->owner;
91
92 do {
93 owner = *p;
94 } while (cmpxchg(p, owner, owner | RT_MUTEX_HAS_WAITERS) != owner);
95}
96
97/*
98 * Safe fastpath aware unlock:
99 * 1) Clear the waiters bit
100 * 2) Drop lock->wait_lock
101 * 3) Try to unlock the lock with cmpxchg
102 */
103static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock)
104 __releases(lock->wait_lock)
105{
106 struct task_struct *owner = rt_mutex_owner(lock);
107
108 clear_rt_mutex_waiters(lock);
109 raw_spin_unlock(&lock->wait_lock);
110 /*
111 * If a new waiter comes in between the unlock and the cmpxchg
112 * we have two situations:
113 *
114 * unlock(wait_lock);
115 * lock(wait_lock);
116 * cmpxchg(p, owner, 0) == owner
117 * mark_rt_mutex_waiters(lock);
118 * acquire(lock);
119 * or:
120 *
121 * unlock(wait_lock);
122 * lock(wait_lock);
123 * mark_rt_mutex_waiters(lock);
124 *
125 * cmpxchg(p, owner, 0) != owner
126 * enqueue_waiter();
127 * unlock(wait_lock);
128 * lock(wait_lock);
129 * wake waiter();
130 * unlock(wait_lock);
131 * lock(wait_lock);
132 * acquire(lock);
133 */
134 return rt_mutex_cmpxchg(lock, owner, NULL);
135}
136
137#else
138# define rt_mutex_cmpxchg(l,c,n) (0)
139static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
140{
141 lock->owner = (struct task_struct *)
142 ((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS);
143}
144
145/*
146 * Simple slow path only version: lock->owner is protected by lock->wait_lock.
147 */
148static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock)
149 __releases(lock->wait_lock)
150{
151 lock->owner = NULL;
152 raw_spin_unlock(&lock->wait_lock);
153 return true;
154}
155#endif
156
157static inline void
158rt_mutex_enqueue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter)
159{
160 plist_add(&waiter->list_entry, &lock->wait_list);
161}
162
163static inline void
164rt_mutex_dequeue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter)
165{
166 plist_del(&waiter->list_entry, &lock->wait_list);
167}
168
169static inline void
170rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
171{
172 waiter->pi_list_entry.prio = waiter->list_entry.prio;
173 plist_add(&waiter->pi_list_entry, &task->pi_waiters);
174}
175
176static inline void
177rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
178{
179 plist_del(&waiter->pi_list_entry, &task->pi_waiters);
180}
181
182static inline void init_lists(struct rt_mutex *lock)
183{
184 if (unlikely(!lock->wait_list.node_list.prev))
185 plist_head_init(&lock->wait_list);
186}
187
188/*
189 * Calculate task priority from the waiter list priority
190 *
191 * Return task->normal_prio when the waiter list is empty or when
192 * the waiter is not allowed to do priority boosting
193 */
194int rt_mutex_getprio(struct task_struct *task)
195{
196 if (likely(!task_has_pi_waiters(task)))
197 return task->normal_prio;
198
199 return min(task_top_pi_waiter(task)->pi_list_entry.prio,
200 task->normal_prio);
201}
202
203/*
204 * Called by sched_setscheduler() to check whether the priority change
205 * is overruled by a possible priority boosting.
206 */
207int rt_mutex_check_prio(struct task_struct *task, int newprio)
208{
209 if (!task_has_pi_waiters(task))
210 return 0;
211
212 return task_top_pi_waiter(task)->pi_list_entry.prio <= newprio;
213}
214
215/*
216 * Adjust the priority of a task, after its pi_waiters got modified.
217 *
218 * This can be both boosting and unboosting. task->pi_lock must be held.
219 */
220static void __rt_mutex_adjust_prio(struct task_struct *task)
221{
222 int prio = rt_mutex_getprio(task);
223
224 if (task->prio != prio)
225 rt_mutex_setprio(task, prio);
226}
227
228/*
229 * Adjust task priority (undo boosting). Called from the exit path of
230 * rt_mutex_slowunlock() and rt_mutex_slowlock().
231 *
232 * (Note: We do this outside of the protection of lock->wait_lock to
233 * allow the lock to be taken while or before we readjust the priority
234 * of task. We do not use the spin_xx_mutex() variants here as we are
235 * outside of the debug path.)
236 */
237static void rt_mutex_adjust_prio(struct task_struct *task)
238{
239 unsigned long flags;
240
241 raw_spin_lock_irqsave(&task->pi_lock, flags);
242 __rt_mutex_adjust_prio(task);
243 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
244}
245
246static void rt_mutex_wake_waiter(struct rt_mutex_waiter *waiter)
247{
248 if (waiter->savestate)
249 wake_up_lock_sleeper(waiter->task);
250 else
251 wake_up_process(waiter->task);
252}
253
254/*
255 * Deadlock detection is conditional:
256 *
257 * If CONFIG_DEBUG_RT_MUTEXES=n, deadlock detection is only conducted
258 * if the detect argument is == RT_MUTEX_FULL_CHAINWALK.
259 *
260 * If CONFIG_DEBUG_RT_MUTEXES=y, deadlock detection is always
261 * conducted independent of the detect argument.
262 *
263 * If the waiter argument is NULL this indicates the deboost path and
264 * deadlock detection is disabled independent of the detect argument
265 * and the config settings.
266 */
267static bool rt_mutex_cond_detect_deadlock(struct rt_mutex_waiter *waiter,
268 enum rtmutex_chainwalk chwalk)
269{
270 /*
271 * This is just a wrapper function for the following call,
272 * because debug_rt_mutex_detect_deadlock() smells like a magic
273 * debug feature and I wanted to keep the cond function in the
274 * main source file along with the comments instead of having
275 * two of the same in the headers.
276 */
277 return debug_rt_mutex_detect_deadlock(waiter, chwalk);
278}
279
280/*
281 * Max number of times we'll walk the boosting chain:
282 */
283int max_lock_depth = 1024;
284
285static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p)
286{
287 return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL;
288}
289
290/*
291 * Adjust the priority chain. Also used for deadlock detection.
292 * Decreases task's usage by one - may thus free the task.
293 * Returns 0 or -EDEADLK.
294 *
295 * Chain walk basics and protection scope
296 *
297 * [R] refcount on task
298 * [P] task->pi_lock held
299 * [L] rtmutex->wait_lock held
300 *
301 * Step Description Protected by
302 * function arguments:
303 * @task [R]
304 * @orig_lock if != NULL @top_task is blocked on it
305 * @next_lock Unprotected. Cannot be
306 * dereferenced. Only used for
307 * comparison.
308 * @orig_waiter if != NULL @top_task is blocked on it
309 * @top_task current, or in case of proxy
310 * locking protected by calling
311 * code
312 * again:
313 * loop_sanity_check();
314 * retry:
315 * [1] lock(task->pi_lock); [R] acquire [P]
316 * [2] waiter = task->pi_blocked_on; [P]
317 * [3] check_exit_conditions_1(); [P]
318 * [4] lock = waiter->lock; [P]
319 * [5] if (!try_lock(lock->wait_lock)) { [P] try to acquire [L]
320 * unlock(task->pi_lock); release [P]
321 * goto retry;
322 * }
323 * [6] check_exit_conditions_2(); [P] + [L]
324 * [7] requeue_lock_waiter(lock, waiter); [P] + [L]
325 * [8] unlock(task->pi_lock); release [P]
326 * put_task_struct(task); release [R]
327 * [9] check_exit_conditions_3(); [L]
328 * [10] task = owner(lock); [L]
329 * get_task_struct(task); [L] acquire [R]
330 * lock(task->pi_lock); [L] acquire [P]
331 * [11] requeue_pi_waiter(tsk, waiters(lock));[P] + [L]
332 * [12] check_exit_conditions_4(); [P] + [L]
333 * [13] unlock(task->pi_lock); release [P]
334 * unlock(lock->wait_lock); release [L]
335 * goto again;
336 */
337static int rt_mutex_adjust_prio_chain(struct task_struct *task,
338 enum rtmutex_chainwalk chwalk,
339 struct rt_mutex *orig_lock,
340 struct rt_mutex *next_lock,
341 struct rt_mutex_waiter *orig_waiter,
342 struct task_struct *top_task)
343{
344 struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter;
345 struct rt_mutex_waiter *prerequeue_top_waiter;
346 int ret = 0, depth = 0;
347 struct rt_mutex *lock;
348 bool detect_deadlock;
349 unsigned long flags;
350 bool requeue = true;
351
352 detect_deadlock = rt_mutex_cond_detect_deadlock(orig_waiter, chwalk);
353
354 /*
355 * The (de)boosting is a step by step approach with a lot of
356 * pitfalls. We want this to be preemptible and we want hold a
357 * maximum of two locks per step. So we have to check
358 * carefully whether things change under us.
359 */
360 again:
361 /*
362 * We limit the lock chain length for each invocation.
363 */
364 if (++depth > max_lock_depth) {
365 static int prev_max;
366
367 /*
368 * Print this only once. If the admin changes the limit,
369 * print a new message when reaching the limit again.
370 */
371 if (prev_max != max_lock_depth) {
372 prev_max = max_lock_depth;
373 printk(KERN_WARNING "Maximum lock depth %d reached "
374 "task: %s (%d)\n", max_lock_depth,
375 top_task->comm, task_pid_nr(top_task));
376 }
377 put_task_struct(task);
378
379 return -EDEADLK;
380 }
381
382 /*
383 * We are fully preemptible here and only hold the refcount on
384 * @task. So everything can have changed under us since the
385 * caller or our own code below (goto retry/again) dropped all
386 * locks.
387 */
388 retry:
389 /*
390 * [1] Task cannot go away as we did a get_task() before !
391 */
392 raw_spin_lock_irqsave(&task->pi_lock, flags);
393
394 /*
395 * [2] Get the waiter on which @task is blocked on.
396 */
397 waiter = task->pi_blocked_on;
398
399 /*
400 * [3] check_exit_conditions_1() protected by task->pi_lock.
401 */
402
403 /*
404 * Check whether the end of the boosting chain has been
405 * reached or the state of the chain has changed while we
406 * dropped the locks.
407 */
408 if (!rt_mutex_real_waiter(waiter))
409 goto out_unlock_pi;
410
411 /*
412 * Check the orig_waiter state. After we dropped the locks,
413 * the previous owner of the lock might have released the lock.
414 */
415 if (orig_waiter && !rt_mutex_owner(orig_lock))
416 goto out_unlock_pi;
417
418 /*
419 * We dropped all locks after taking a refcount on @task, so
420 * the task might have moved on in the lock chain or even left
421 * the chain completely and blocks now on an unrelated lock or
422 * on @orig_lock.
423 *
424 * We stored the lock on which @task was blocked in @next_lock,
425 * so we can detect the chain change.
426 */
427 if (next_lock != waiter->lock)
428 goto out_unlock_pi;
429
430 /*
431 * Drop out, when the task has no waiters. Note,
432 * top_waiter can be NULL, when we are in the deboosting
433 * mode!
434 */
435 if (top_waiter) {
436 if (!task_has_pi_waiters(task))
437 goto out_unlock_pi;
438 /*
439 * If deadlock detection is off, we stop here if we
440 * are not the top pi waiter of the task. If deadlock
441 * detection is enabled we continue, but stop the
442 * requeueing in the chain walk.
443 */
444 if (top_waiter != task_top_pi_waiter(task)) {
445 if (!detect_deadlock)
446 goto out_unlock_pi;
447 else
448 requeue = false;
449 }
450 }
451
452 /*
453 * If the waiter priority is the same as the task priority
454 * then there is no further priority adjustment necessary. If
455 * deadlock detection is off, we stop the chain walk. If its
456 * enabled we continue, but stop the requeueing in the chain
457 * walk.
458 */
459 if (waiter->list_entry.prio == task->prio) {
460 if (!detect_deadlock)
461 goto out_unlock_pi;
462 else
463 requeue = false;
464 }
465
466 /*
467 * [4] Get the next lock
468 */
469 lock = waiter->lock;
470 /*
471 * [5] We need to trylock here as we are holding task->pi_lock,
472 * which is the reverse lock order versus the other rtmutex
473 * operations.
474 */
475 if (!raw_spin_trylock(&lock->wait_lock)) {
476 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
477 cpu_relax();
478 goto retry;
479 }
480
481 /*
482 * [6] check_exit_conditions_2() protected by task->pi_lock and
483 * lock->wait_lock.
484 *
485 * Deadlock detection. If the lock is the same as the original
486 * lock which caused us to walk the lock chain or if the
487 * current lock is owned by the task which initiated the chain
488 * walk, we detected a deadlock.
489 */
490 if (lock == orig_lock || rt_mutex_owner(lock) == top_task) {
491 debug_rt_mutex_deadlock(chwalk, orig_waiter, lock);
492 raw_spin_unlock(&lock->wait_lock);
493 ret = -EDEADLK;
494 goto out_unlock_pi;
495 }
496
497 /*
498 * If we just follow the lock chain for deadlock detection, no
499 * need to do all the requeue operations. To avoid a truckload
500 * of conditionals around the various places below, just do the
501 * minimum chain walk checks.
502 */
503 if (!requeue) {
504 /*
505 * No requeue[7] here. Just release @task [8]
506 */
507 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
508 put_task_struct(task);
509
510 /*
511 * [9] check_exit_conditions_3 protected by lock->wait_lock.
512 * If there is no owner of the lock, end of chain.
513 */
514 if (!rt_mutex_owner(lock)) {
515 raw_spin_unlock(&lock->wait_lock);
516 return 0;
517 }
518
519 /* [10] Grab the next task, i.e. owner of @lock */
520 task = rt_mutex_owner(lock);
521 get_task_struct(task);
522 raw_spin_lock_irqsave(&task->pi_lock, flags);
523
524 /*
525 * No requeue [11] here. We just do deadlock detection.
526 *
527 * [12] Store whether owner is blocked
528 * itself. Decision is made after dropping the locks
529 */
530 next_lock = task_blocked_on_lock(task);
531 /*
532 * Get the top waiter for the next iteration
533 */
534 top_waiter = rt_mutex_top_waiter(lock);
535
536 /* [13] Drop locks */
537 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
538 raw_spin_unlock(&lock->wait_lock);
539
540 /* If owner is not blocked, end of chain. */
541 if (!next_lock)
542 goto out_put_task;
543 goto again;
544 }
545
546 /*
547 * Store the current top waiter before doing the requeue
548 * operation on @lock. We need it for the boost/deboost
549 * decision below.
550 */
551 prerequeue_top_waiter = rt_mutex_top_waiter(lock);
552
553 /* [7] Requeue the waiter in the lock waiter list. */
554 rt_mutex_dequeue(lock, waiter);
555 waiter->list_entry.prio = task->prio;
556 rt_mutex_enqueue(lock, waiter);
557
558 /* [8] Release the task */
559 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
560 put_task_struct(task);
561
562 /*
563 * [9] check_exit_conditions_3 protected by lock->wait_lock.
564 *
565 * We must abort the chain walk if there is no lock owner even
566 * in the dead lock detection case, as we have nothing to
567 * follow here. This is the end of the chain we are walking.
568 */
569 if (!rt_mutex_owner(lock)) {
570 struct rt_mutex_waiter *lock_top_waiter;
571
572 /*
573 * If the requeue [7] above changed the top waiter,
574 * then we need to wake the new top waiter up to try
575 * to get the lock.
576 */
577 lock_top_waiter = rt_mutex_top_waiter(lock);
578 if (prerequeue_top_waiter != lock_top_waiter)
579 rt_mutex_wake_waiter(lock_top_waiter);
580 raw_spin_unlock(&lock->wait_lock);
581 return 0;
582 }
583
584 /* [10] Grab the next task, i.e. the owner of @lock */
585 task = rt_mutex_owner(lock);
586 get_task_struct(task);
587 raw_spin_lock_irqsave(&task->pi_lock, flags);
588
589 /* [11] requeue the pi waiters if necessary */
590 if (waiter == rt_mutex_top_waiter(lock)) {
591 /*
592 * The waiter became the new top (highest priority)
593 * waiter on the lock. Replace the previous top waiter
594 * in the owner tasks pi waiters list with this waiter
595 * and adjust the priority of the owner.
596 */
597 rt_mutex_dequeue_pi(task, prerequeue_top_waiter);
598 rt_mutex_enqueue_pi(task, waiter);
599 __rt_mutex_adjust_prio(task);
600
601 } else if (prerequeue_top_waiter == waiter) {
602 /*
603 * The waiter was the top waiter on the lock, but is
604 * no longer the top prority waiter. Replace waiter in
605 * the owner tasks pi waiters list with the new top
606 * (highest priority) waiter and adjust the priority
607 * of the owner.
608 * The new top waiter is stored in @waiter so that
609 * @waiter == @top_waiter evaluates to true below and
610 * we continue to deboost the rest of the chain.
611 */
612 rt_mutex_dequeue_pi(task, waiter);
613 waiter = rt_mutex_top_waiter(lock);
614 rt_mutex_enqueue_pi(task, waiter);
615 __rt_mutex_adjust_prio(task);
616 } else {
617 /*
618 * Nothing changed. No need to do any priority
619 * adjustment.
620 */
621 }
622
623 /*
624 * [12] check_exit_conditions_4() protected by task->pi_lock
625 * and lock->wait_lock. The actual decisions are made after we
626 * dropped the locks.
627 *
628 * Check whether the task which owns the current lock is pi
629 * blocked itself. If yes we store a pointer to the lock for
630 * the lock chain change detection above. After we dropped
631 * task->pi_lock next_lock cannot be dereferenced anymore.
632 */
633 next_lock = task_blocked_on_lock(task);
634 /*
635 * Store the top waiter of @lock for the end of chain walk
636 * decision below.
637 */
638 top_waiter = rt_mutex_top_waiter(lock);
639
640 /* [13] Drop the locks */
641 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
642 raw_spin_unlock(&lock->wait_lock);
643
644 /*
645 * Make the actual exit decisions [12], based on the stored
646 * values.
647 *
648 * We reached the end of the lock chain. Stop right here. No
649 * point to go back just to figure that out.
650 */
651 if (!next_lock)
652 goto out_put_task;
653
654 /*
655 * If the current waiter is not the top waiter on the lock,
656 * then we can stop the chain walk here if we are not in full
657 * deadlock detection mode.
658 */
659 if (!detect_deadlock && waiter != top_waiter)
660 goto out_put_task;
661
662 goto again;
663
664 out_unlock_pi:
665 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
666 out_put_task:
667 put_task_struct(task);
668
669 return ret;
670}
671
672
673#define STEAL_NORMAL 0
674#define STEAL_LATERAL 1
675
676/*
677 * Note that RT tasks are excluded from lateral-steals to prevent the
678 * introduction of an unbounded latency
679 */
680static inline int lock_is_stealable(struct task_struct *task,
681 struct task_struct *pendowner, int mode)
682{
683 if (mode == STEAL_NORMAL || rt_task(task)) {
684 if (task->prio >= pendowner->prio)
685 return 0;
686 } else if (task->prio > pendowner->prio)
687 return 0;
688 return 1;
689}
690
691/*
692 * Try to take an rt-mutex
693 *
694 * Must be called with lock->wait_lock held.
695 *
696 * @lock: The lock to be acquired.
697 * @task: The task which wants to acquire the lock
698 * @waiter: The waiter that is queued to the lock's wait list if the
699 * callsite called task_blocked_on_lock(), otherwise NULL
700 */
701static int
702__try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
703 struct rt_mutex_waiter *waiter, int mode)
704{
705 unsigned long flags;
706
707 /*
708 * Before testing whether we can acquire @lock, we set the
709 * RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all
710 * other tasks which try to modify @lock into the slow path
711 * and they serialize on @lock->wait_lock.
712 *
713 * The RT_MUTEX_HAS_WAITERS bit can have a transitional state
714 * as explained at the top of this file if and only if:
715 *
716 * - There is a lock owner. The caller must fixup the
717 * transient state if it does a trylock or leaves the lock
718 * function due to a signal or timeout.
719 *
720 * - @task acquires the lock and there are no other
721 * waiters. This is undone in rt_mutex_set_owner(@task) at
722 * the end of this function.
723 */
724 mark_rt_mutex_waiters(lock);
725
726 /*
727 * If @lock has an owner, give up.
728 */
729 if (rt_mutex_owner(lock))
730 return 0;
731
732 /*
733 * If @waiter != NULL, @task has already enqueued the waiter
734 * into @lock waiter list. If @waiter == NULL then this is a
735 * trylock attempt.
736 */
737 if (waiter) {
738 /*
739 * If waiter is not the highest priority waiter of
740 * @lock, give up.
741 */
742 if (waiter != rt_mutex_top_waiter(lock))
743 return 0;
744
745 /*
746 * We can acquire the lock. Remove the waiter from the
747 * lock waiters list.
748 */
749 rt_mutex_dequeue(lock, waiter);
750
751 } else {
752 /*
753 * If the lock has waiters already we check whether @task is
754 * eligible to take over the lock.
755 *
756 * If there are no other waiters, @task can acquire
757 * the lock. @task->pi_blocked_on is NULL, so it does
758 * not need to be dequeued.
759 */
760 if (rt_mutex_has_waiters(lock)) {
761 struct task_struct *pown = rt_mutex_top_waiter(lock)->task;
762
763 /*
764 * If @task->prio is greater than or equal to
765 * the top waiter priority (kernel view),
766 * @task lost.
767 */
768 if (task != pown && !lock_is_stealable(task, pown, mode))
769 return 0;
770
771 /*
772 * The current top waiter stays enqueued. We
773 * don't have to change anything in the lock
774 * waiters order.
775 */
776 } else {
777 /*
778 * No waiters. Take the lock without the
779 * pi_lock dance.@task->pi_blocked_on is NULL
780 * and we have no waiters to enqueue in @task
781 * pi waiters list.
782 */
783 goto takeit;
784 }
785 }
786
787 /*
788 * Clear @task->pi_blocked_on. Requires protection by
789 * @task->pi_lock. Redundant operation for the @waiter == NULL
790 * case, but conditionals are more expensive than a redundant
791 * store.
792 */
793 raw_spin_lock_irqsave(&task->pi_lock, flags);
794 task->pi_blocked_on = NULL;
795 /*
796 * Finish the lock acquisition. @task is the new owner. If
797 * other waiters exist we have to insert the highest priority
798 * waiter into @task->pi_waiters list.
799 */
800 if (rt_mutex_has_waiters(lock))
801 rt_mutex_enqueue_pi(task, rt_mutex_top_waiter(lock));
802 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
803
804takeit:
805 /* We got the lock. */
806 debug_rt_mutex_lock(lock);
807
808 /*
809 * This either preserves the RT_MUTEX_HAS_WAITERS bit if there
810 * are still waiters or clears it.
811 */
812 rt_mutex_set_owner(lock, task);
813
814 rt_mutex_deadlock_account_lock(lock, task);
815
816 return 1;
817}
818
819static inline int
820try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
821 struct rt_mutex_waiter *waiter)
822{
823 return __try_to_take_rt_mutex(lock, task, waiter, STEAL_NORMAL);
824}
825
826/*
827 * Task blocks on lock.
828 *
829 * Prepare waiter and propagate pi chain
830 *
831 * This must be called with lock->wait_lock held.
832 */
833static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
834 struct rt_mutex_waiter *waiter,
835 struct task_struct *task,
836 enum rtmutex_chainwalk chwalk)
837{
838 struct task_struct *owner = rt_mutex_owner(lock);
839 struct rt_mutex_waiter *top_waiter = waiter;
840 struct rt_mutex *next_lock;
841 int chain_walk = 0, res;
842 unsigned long flags;
843
844 /*
845 * Early deadlock detection. We really don't want the task to
846 * enqueue on itself just to untangle the mess later. It's not
847 * only an optimization. We drop the locks, so another waiter
848 * can come in before the chain walk detects the deadlock. So
849 * the other will detect the deadlock and return -EDEADLOCK,
850 * which is wrong, as the other waiter is not in a deadlock
851 * situation.
852 */
853 if (owner == task)
854 return -EDEADLK;
855
856 raw_spin_lock_irqsave(&task->pi_lock, flags);
857
858 /*
859 * In the case of futex requeue PI, this will be a proxy
860 * lock. The task will wake unaware that it is enqueueed on
861 * this lock. Avoid blocking on two locks and corrupting
862 * pi_blocked_on via the PI_WAKEUP_INPROGRESS
863 * flag. futex_wait_requeue_pi() sets this when it wakes up
864 * before requeue (due to a signal or timeout). Do not enqueue
865 * the task if PI_WAKEUP_INPROGRESS is set.
866 */
867 if (task != current && task->pi_blocked_on == PI_WAKEUP_INPROGRESS) {
868 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
869 return -EAGAIN;
870 }
871
872 BUG_ON(rt_mutex_real_waiter(task->pi_blocked_on));
873
874 __rt_mutex_adjust_prio(task);
875 waiter->task = task;
876 waiter->lock = lock;
877 plist_node_init(&waiter->list_entry, task->prio);
878 plist_node_init(&waiter->pi_list_entry, task->prio);
879
880 /* Get the top priority waiter on the lock */
881 if (rt_mutex_has_waiters(lock))
882 top_waiter = rt_mutex_top_waiter(lock);
883 rt_mutex_enqueue(lock, waiter);
884
885 task->pi_blocked_on = waiter;
886
887 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
888
889 if (!owner)
890 return 0;
891
892 raw_spin_lock_irqsave(&owner->pi_lock, flags);
893 if (waiter == rt_mutex_top_waiter(lock)) {
894 rt_mutex_dequeue_pi(owner, top_waiter);
895 rt_mutex_enqueue_pi(owner, waiter);
896
897 __rt_mutex_adjust_prio(owner);
898 if (rt_mutex_real_waiter(owner->pi_blocked_on))
899 chain_walk = 1;
900 } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) {
901 chain_walk = 1;
902 }
903
904 /* Store the lock on which owner is blocked or NULL */
905 next_lock = task_blocked_on_lock(owner);
906
907 raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
908 /*
909 * Even if full deadlock detection is on, if the owner is not
910 * blocked itself, we can avoid finding this out in the chain
911 * walk.
912 */
913 if (!chain_walk || !next_lock)
914 return 0;
915
916 /*
917 * The owner can't disappear while holding a lock,
918 * so the owner struct is protected by wait_lock.
919 * Gets dropped in rt_mutex_adjust_prio_chain()!
920 */
921 get_task_struct(owner);
922
923 raw_spin_unlock(&lock->wait_lock);
924
925 res = rt_mutex_adjust_prio_chain(owner, chwalk, lock,
926 next_lock, waiter, task);
927
928 raw_spin_lock(&lock->wait_lock);
929
930 return res;
931}
932
933/*
934 * Wake up the next waiter on the lock.
935 *
936 * Remove the top waiter from the current tasks pi waiter list and
937 * wake it up.
938 *
939 * Called with lock->wait_lock held.
940 */
941static void wakeup_next_waiter(struct rt_mutex *lock)
942{
943 struct rt_mutex_waiter *waiter;
944 unsigned long flags;
945
946 raw_spin_lock_irqsave(&current->pi_lock, flags);
947
948 waiter = rt_mutex_top_waiter(lock);
949
950 /*
951 * Remove it from current->pi_waiters. We do not adjust a
952 * possible priority boost right now. We execute wakeup in the
953 * boosted mode and go back to normal after releasing
954 * lock->wait_lock.
955 */
956 rt_mutex_dequeue_pi(current, waiter);
957
958 /*
959 * As we are waking up the top waiter, and the waiter stays
960 * queued on the lock until it gets the lock, this lock
961 * obviously has waiters. Just set the bit here and this has
962 * the added benefit of forcing all new tasks into the
963 * slow path making sure no task of lower priority than
964 * the top waiter can steal this lock.
965 */
966 lock->owner = (void *) RT_MUTEX_HAS_WAITERS;
967
968 raw_spin_unlock_irqrestore(&current->pi_lock, flags);
969
970 /*
971 * It's safe to dereference waiter as it cannot go away as
972 * long as we hold lock->wait_lock. The waiter task needs to
973 * acquire it in order to dequeue the waiter.
974 */
975 rt_mutex_wake_waiter(waiter);
976}
977
978/*
979 * Remove a waiter from a lock and give up
980 *
981 * Must be called with lock->wait_lock held and
982 * have just failed to try_to_take_rt_mutex().
983 */
984static void remove_waiter(struct rt_mutex *lock,
985 struct rt_mutex_waiter *waiter)
986{
987 bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock));
988 struct task_struct *owner = rt_mutex_owner(lock);
989 struct rt_mutex *next_lock = NULL;
990 unsigned long flags;
991
992 raw_spin_lock_irqsave(&current->pi_lock, flags);
993 rt_mutex_dequeue(lock, waiter);
994 current->pi_blocked_on = NULL;
995 raw_spin_unlock_irqrestore(&current->pi_lock, flags);
996
997 /*
998 * Only update priority if the waiter was the highest priority
999 * waiter of the lock and there is an owner to update.
1000 */
1001 if (!owner || !is_top_waiter)
1002 return;
1003
1004 raw_spin_lock_irqsave(&owner->pi_lock, flags);
1005
1006 rt_mutex_dequeue_pi(owner, waiter);
1007
1008 if (rt_mutex_has_waiters(lock))
1009 rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock));
1010
1011 __rt_mutex_adjust_prio(owner);
1012
1013 /* Store the lock on which owner is blocked or NULL */
1014 if (rt_mutex_real_waiter(owner->pi_blocked_on))
1015 next_lock = task_blocked_on_lock(owner);
1016
1017 raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
1018
1019 WARN_ON(!plist_node_empty(&waiter->pi_list_entry));
1020
1021 /*
1022 * Don't walk the chain, if the owner task is not blocked
1023 * itself.
1024 */
1025 if (!next_lock)
1026 return;
1027
1028 /* gets dropped in rt_mutex_adjust_prio_chain()! */
1029 get_task_struct(owner);
1030
1031 raw_spin_unlock(&lock->wait_lock);
1032
1033 rt_mutex_adjust_prio_chain(owner, RT_MUTEX_MIN_CHAINWALK, lock,
1034 next_lock, NULL, current);
1035
1036 raw_spin_lock(&lock->wait_lock);
1037}
1038
1039/*
1040 * Recheck the pi chain, in case we got a priority setting
1041 *
1042 * Called from sched_setscheduler
1043 */
1044void rt_mutex_adjust_pi(struct task_struct *task)
1045{
1046 struct rt_mutex_waiter *waiter;
1047 struct rt_mutex *next_lock;
1048 unsigned long flags;
1049
1050 raw_spin_lock_irqsave(&task->pi_lock, flags);
1051
1052 waiter = task->pi_blocked_on;
1053 if (!rt_mutex_real_waiter(waiter) ||
1054 waiter->list_entry.prio == task->prio) {
1055 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
1056 return;
1057 }
1058
1059 next_lock = waiter->lock;
1060 /* gets dropped in rt_mutex_adjust_prio_chain()! */
1061 get_task_struct(task);
1062 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
1063 rt_mutex_adjust_prio_chain(task, RT_MUTEX_MIN_CHAINWALK, NULL,
1064 next_lock, NULL, task);
1065}
1066
1067#ifdef CONFIG_PREEMPT_RT_FULL
1068/*
1069 * preemptible spin_lock functions:
1070 */
1071static inline void rt_spin_lock_fastlock(struct rt_mutex *lock,
1072 void (*slowfn)(struct rt_mutex *lock))
1073{
1074 might_sleep();
1075
1076 if (likely(rt_mutex_cmpxchg(lock, NULL, current)))
1077 rt_mutex_deadlock_account_lock(lock, current);
1078 else
1079 slowfn(lock);
1080}
1081
1082static inline void rt_spin_lock_fastunlock(struct rt_mutex *lock,
1083 void (*slowfn)(struct rt_mutex *lock))
1084{
1085 if (likely(rt_mutex_cmpxchg(lock, current, NULL)))
1086 rt_mutex_deadlock_account_unlock(current);
1087 else
1088 slowfn(lock);
1089}
1090
1091#ifdef CONFIG_SMP
1092/*
1093 * Note that owner is a speculative pointer and dereferencing relies
1094 * on rcu_read_lock() and the check against the lock owner.
1095 */
1096static int adaptive_wait(struct rt_mutex *lock,
1097 struct task_struct *owner)
1098{
1099 int res = 0;
1100
1101 rcu_read_lock();
1102 for (;;) {
1103 if (owner != rt_mutex_owner(lock))
1104 break;
1105 /*
1106 * Ensure that owner->on_cpu is dereferenced _after_
1107 * checking the above to be valid.
1108 */
1109 barrier();
1110 if (!owner->on_cpu) {
1111 res = 1;
1112 break;
1113 }
1114 cpu_relax();
1115 }
1116 rcu_read_unlock();
1117 return res;
1118}
1119#else
1120static int adaptive_wait(struct rt_mutex *lock,
1121 struct task_struct *orig_owner)
1122{
1123 return 1;
1124}
1125#endif
1126
1127# define pi_lock(lock) raw_spin_lock_irq(lock)
1128# define pi_unlock(lock) raw_spin_unlock_irq(lock)
1129
1130/*
1131 * Slow path lock function spin_lock style: this variant is very
1132 * careful not to miss any non-lock wakeups.
1133 *
1134 * We store the current state under p->pi_lock in p->saved_state and
1135 * the try_to_wake_up() code handles this accordingly.
1136 */
1137static void noinline __sched rt_spin_lock_slowlock(struct rt_mutex *lock)
1138{
1139 struct task_struct *lock_owner, *self = current;
1140 struct rt_mutex_waiter waiter, *top_waiter;
1141 int ret;
1142
1143 rt_mutex_init_waiter(&waiter, true);
1144
1145 raw_spin_lock(&lock->wait_lock);
1146 init_lists(lock);
1147
1148 if (__try_to_take_rt_mutex(lock, self, NULL, STEAL_LATERAL)) {
1149 raw_spin_unlock(&lock->wait_lock);
1150 return;
1151 }
1152
1153 BUG_ON(rt_mutex_owner(lock) == self);
1154
1155 /*
1156 * We save whatever state the task is in and we'll restore it
1157 * after acquiring the lock taking real wakeups into account
1158 * as well. We are serialized via pi_lock against wakeups. See
1159 * try_to_wake_up().
1160 */
1161 pi_lock(&self->pi_lock);
1162 self->saved_state = self->state;
1163 __set_current_state(TASK_UNINTERRUPTIBLE);
1164 pi_unlock(&self->pi_lock);
1165
1166 ret = task_blocks_on_rt_mutex(lock, &waiter, self, RT_MUTEX_MIN_CHAINWALK);
1167 BUG_ON(ret);
1168
1169 for (;;) {
1170 /* Try to acquire the lock again. */
1171 if (__try_to_take_rt_mutex(lock, self, &waiter, STEAL_LATERAL))
1172 break;
1173
1174 top_waiter = rt_mutex_top_waiter(lock);
1175 lock_owner = rt_mutex_owner(lock);
1176
1177 raw_spin_unlock(&lock->wait_lock);
1178
1179 debug_rt_mutex_print_deadlock(&waiter);
1180
1181 if (top_waiter != &waiter || adaptive_wait(lock, lock_owner))
1182 schedule_rt_mutex(lock);
1183
1184 raw_spin_lock(&lock->wait_lock);
1185
1186 pi_lock(&self->pi_lock);
1187 __set_current_state(TASK_UNINTERRUPTIBLE);
1188 pi_unlock(&self->pi_lock);
1189 }
1190
1191 /*
1192 * Restore the task state to current->saved_state. We set it
1193 * to the original state above and the try_to_wake_up() code
1194 * has possibly updated it when a real (non-rtmutex) wakeup
1195 * happened while we were blocked. Clear saved_state so
1196 * try_to_wakeup() does not get confused.
1197 */
1198 pi_lock(&self->pi_lock);
1199 __set_current_state(self->saved_state);
1200 self->saved_state = TASK_RUNNING;
1201 pi_unlock(&self->pi_lock);
1202
1203 /*
1204 * try_to_take_rt_mutex() sets the waiter bit
1205 * unconditionally. We might have to fix that up:
1206 */
1207 fixup_rt_mutex_waiters(lock);
1208
1209 BUG_ON(rt_mutex_has_waiters(lock) && &waiter == rt_mutex_top_waiter(lock));
1210 BUG_ON(!plist_node_empty(&waiter.list_entry));
1211
1212 raw_spin_unlock(&lock->wait_lock);
1213
1214 debug_rt_mutex_free_waiter(&waiter);
1215}
1216
1217/*
1218 * Slow path to release a rt_mutex spin_lock style
1219 */
1220static void __sched __rt_spin_lock_slowunlock(struct rt_mutex *lock)
1221{
1222 debug_rt_mutex_unlock(lock);
1223
1224 rt_mutex_deadlock_account_unlock(current);
1225
1226 if (!rt_mutex_has_waiters(lock)) {
1227 lock->owner = NULL;
1228 raw_spin_unlock(&lock->wait_lock);
1229 return;
1230 }
1231
1232 wakeup_next_waiter(lock);
1233
1234 raw_spin_unlock(&lock->wait_lock);
1235
1236 /* Undo pi boosting.when necessary */
1237 rt_mutex_adjust_prio(current);
1238}
1239
1240static void noinline __sched rt_spin_lock_slowunlock(struct rt_mutex *lock)
1241{
1242 raw_spin_lock(&lock->wait_lock);
1243 __rt_spin_lock_slowunlock(lock);
1244}
1245
1246static void noinline __sched rt_spin_lock_slowunlock_hirq(struct rt_mutex *lock)
1247{
1248 int ret;
1249
1250 do {
1251 ret = raw_spin_trylock(&lock->wait_lock);
1252 } while (!ret);
1253
1254 __rt_spin_lock_slowunlock(lock);
1255}
1256
1257void __lockfunc rt_spin_lock(spinlock_t *lock)
1258{
1259 rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock);
1260 spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
1261}
1262EXPORT_SYMBOL(rt_spin_lock);
1263
1264void __lockfunc __rt_spin_lock(struct rt_mutex *lock)
1265{
1266 rt_spin_lock_fastlock(lock, rt_spin_lock_slowlock);
1267}
1268EXPORT_SYMBOL(__rt_spin_lock);
1269
1270#ifdef CONFIG_DEBUG_LOCK_ALLOC
1271void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass)
1272{
1273 rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock);
1274 spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
1275}
1276EXPORT_SYMBOL(rt_spin_lock_nested);
1277#endif
1278
1279void __lockfunc rt_spin_unlock(spinlock_t *lock)
1280{
1281 /* NOTE: we always pass in '1' for nested, for simplicity */
1282 spin_release(&lock->dep_map, 1, _RET_IP_);
1283 rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock);
1284}
1285EXPORT_SYMBOL(rt_spin_unlock);
1286
1287void __lockfunc rt_spin_unlock_after_trylock_in_irq(spinlock_t *lock)
1288{
1289 /* NOTE: we always pass in '1' for nested, for simplicity */
1290 spin_release(&lock->dep_map, 1, _RET_IP_);
1291 rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock_hirq);
1292}
1293
1294void __lockfunc __rt_spin_unlock(struct rt_mutex *lock)
1295{
1296 rt_spin_lock_fastunlock(lock, rt_spin_lock_slowunlock);
1297}
1298EXPORT_SYMBOL(__rt_spin_unlock);
1299
1300/*
1301 * Wait for the lock to get unlocked: instead of polling for an unlock
1302 * (like raw spinlocks do), we lock and unlock, to force the kernel to
1303 * schedule if there's contention:
1304 */
1305void __lockfunc rt_spin_unlock_wait(spinlock_t *lock)
1306{
1307 spin_lock(lock);
1308 spin_unlock(lock);
1309}
1310EXPORT_SYMBOL(rt_spin_unlock_wait);
1311
1312int __lockfunc rt_spin_trylock(spinlock_t *lock)
1313{
1314 int ret = rt_mutex_trylock(&lock->lock);
1315
1316 if (ret)
1317 spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
1318 return ret;
1319}
1320EXPORT_SYMBOL(rt_spin_trylock);
1321
1322int __lockfunc rt_spin_trylock_bh(spinlock_t *lock)
1323{
1324 int ret;
1325
1326 local_bh_disable();
1327 ret = rt_mutex_trylock(&lock->lock);
1328 if (ret) {
1329 migrate_disable();
1330 spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
1331 } else
1332 local_bh_enable();
1333 return ret;
1334}
1335EXPORT_SYMBOL(rt_spin_trylock_bh);
1336
1337int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags)
1338{
1339 int ret;
1340
1341 *flags = 0;
1342 migrate_disable();
1343 ret = rt_mutex_trylock(&lock->lock);
1344 if (ret)
1345 spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
1346 else
1347 migrate_enable();
1348 return ret;
1349}
1350EXPORT_SYMBOL(rt_spin_trylock_irqsave);
1351
1352int atomic_dec_and_spin_lock(atomic_t *atomic, spinlock_t *lock)
1353{
1354 /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */
1355 if (atomic_add_unless(atomic, -1, 1))
1356 return 0;
1357 migrate_disable();
1358 rt_spin_lock(lock);
1359 if (atomic_dec_and_test(atomic))
1360 return 1;
1361 rt_spin_unlock(lock);
1362 migrate_enable();
1363 return 0;
1364}
1365EXPORT_SYMBOL(atomic_dec_and_spin_lock);
1366
1367void
1368__rt_spin_lock_init(spinlock_t *lock, char *name, struct lock_class_key *key)
1369{
1370#ifdef CONFIG_DEBUG_LOCK_ALLOC
1371 /*
1372 * Make sure we are not reinitializing a held lock:
1373 */
1374 debug_check_no_locks_freed((void *)lock, sizeof(*lock));
1375 lockdep_init_map(&lock->dep_map, name, key, 0);
1376#endif
1377}
1378EXPORT_SYMBOL(__rt_spin_lock_init);
1379
1380#endif /* PREEMPT_RT_FULL */
1381
1382/**
1383 * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop
1384 * @lock: the rt_mutex to take
1385 * @state: the state the task should block in (TASK_INTERRUPTIBLE
1386 * or TASK_UNINTERRUPTIBLE)
1387 * @timeout: the pre-initialized and started timer, or NULL for none
1388 * @waiter: the pre-initialized rt_mutex_waiter
1389 *
1390 * lock->wait_lock must be held by the caller.
1391 */
1392static int __sched
1393__rt_mutex_slowlock(struct rt_mutex *lock, int state,
1394 struct hrtimer_sleeper *timeout,
1395 struct rt_mutex_waiter *waiter)
1396{
1397 int ret = 0;
1398
1399 for (;;) {
1400 /* Try to acquire the lock: */
1401 if (try_to_take_rt_mutex(lock, current, waiter))
1402 break;
1403
1404 /*
1405 * TASK_INTERRUPTIBLE checks for signals and
1406 * timeout. Ignored otherwise.
1407 */
1408 if (unlikely(state == TASK_INTERRUPTIBLE)) {
1409 /* Signal pending? */
1410 if (signal_pending(current))
1411 ret = -EINTR;
1412 if (timeout && !timeout->task)
1413 ret = -ETIMEDOUT;
1414 if (ret)
1415 break;
1416 }
1417
1418 raw_spin_unlock(&lock->wait_lock);
1419
1420 debug_rt_mutex_print_deadlock(waiter);
1421
1422 schedule_rt_mutex(lock);
1423
1424 raw_spin_lock(&lock->wait_lock);
1425 set_current_state(state);
1426 }
1427
1428 return ret;
1429}
1430
1431static void rt_mutex_handle_deadlock(int res, int detect_deadlock,
1432 struct rt_mutex_waiter *w)
1433{
1434 /*
1435 * If the result is not -EDEADLOCK or the caller requested
1436 * deadlock detection, nothing to do here.
1437 */
1438 if (res != -EDEADLOCK || detect_deadlock)
1439 return;
1440
1441 /*
1442 * Yell lowdly and stop the task right here.
1443 */
1444 rt_mutex_print_deadlock(w);
1445 while (1) {
1446 set_current_state(TASK_INTERRUPTIBLE);
1447 schedule();
1448 }
1449}
1450
1451/*
1452 * Slow path lock function:
1453 */
1454static int __sched
1455rt_mutex_slowlock(struct rt_mutex *lock, int state,
1456 struct hrtimer_sleeper *timeout,
1457 enum rtmutex_chainwalk chwalk)
1458{
1459 struct rt_mutex_waiter waiter;
1460 int ret = 0;
1461
1462 rt_mutex_init_waiter(&waiter, false);
1463
1464 raw_spin_lock(&lock->wait_lock);
1465 init_lists(lock);
1466
1467 /* Try to acquire the lock again: */
1468 if (try_to_take_rt_mutex(lock, current, NULL)) {
1469 raw_spin_unlock(&lock->wait_lock);
1470 return 0;
1471 }
1472
1473 set_current_state(state);
1474
1475 /* Setup the timer, when timeout != NULL */
1476 if (unlikely(timeout)) {
1477 hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
1478 if (!hrtimer_active(&timeout->timer))
1479 timeout->task = NULL;
1480 }
1481
1482 ret = task_blocks_on_rt_mutex(lock, &waiter, current, chwalk);
1483
1484 if (likely(!ret))
1485 ret = __rt_mutex_slowlock(lock, state, timeout, &waiter);
1486
1487 set_current_state(TASK_RUNNING);
1488
1489 if (unlikely(ret)) {
1490 if (rt_mutex_has_waiters(lock))
1491 remove_waiter(lock, &waiter);
1492 rt_mutex_handle_deadlock(ret, chwalk, &waiter);
1493 }
1494
1495 /*
1496 * try_to_take_rt_mutex() sets the waiter bit
1497 * unconditionally. We might have to fix that up.
1498 */
1499 fixup_rt_mutex_waiters(lock);
1500
1501 raw_spin_unlock(&lock->wait_lock);
1502
1503 /* Remove pending timer: */
1504 if (unlikely(timeout))
1505 hrtimer_cancel(&timeout->timer);
1506
1507 debug_rt_mutex_free_waiter(&waiter);
1508
1509 return ret;
1510}
1511
1512/*
1513 * Slow path try-lock function:
1514 */
1515static inline int rt_mutex_slowtrylock(struct rt_mutex *lock)
1516{
1517 int ret;
1518
1519 /*
1520 * If the lock already has an owner we fail to get the lock.
1521 * This can be done without taking the @lock->wait_lock as
1522 * it is only being read, and this is a trylock anyway.
1523 */
1524 if (rt_mutex_owner(lock))
1525 return 0;
1526
1527 /*
1528 * The mutex has currently no owner. Lock the wait lock and
1529 * try to acquire the lock.
1530 */
1531 if (!raw_spin_trylock(&lock->wait_lock))
1532 return 0;
1533 init_lists(lock);
1534
1535 ret = try_to_take_rt_mutex(lock, current, NULL);
1536
1537 /*
1538 * try_to_take_rt_mutex() sets the lock waiters bit
1539 * unconditionally. Clean this up.
1540 */
1541 fixup_rt_mutex_waiters(lock);
1542
1543 raw_spin_unlock(&lock->wait_lock);
1544
1545 return ret;
1546}
1547
1548/*
1549 * Slow path to release a rt-mutex:
1550 */
1551static void __sched
1552rt_mutex_slowunlock(struct rt_mutex *lock)
1553{
1554 raw_spin_lock(&lock->wait_lock);
1555
1556 debug_rt_mutex_unlock(lock);
1557
1558 rt_mutex_deadlock_account_unlock(current);
1559
1560 /*
1561 * We must be careful here if the fast path is enabled. If we
1562 * have no waiters queued we cannot set owner to NULL here
1563 * because of:
1564 *
1565 * foo->lock->owner = NULL;
1566 * rtmutex_lock(foo->lock); <- fast path
1567 * free = atomic_dec_and_test(foo->refcnt);
1568 * rtmutex_unlock(foo->lock); <- fast path
1569 * if (free)
1570 * kfree(foo);
1571 * raw_spin_unlock(foo->lock->wait_lock);
1572 *
1573 * So for the fastpath enabled kernel:
1574 *
1575 * Nothing can set the waiters bit as long as we hold
1576 * lock->wait_lock. So we do the following sequence:
1577 *
1578 * owner = rt_mutex_owner(lock);
1579 * clear_rt_mutex_waiters(lock);
1580 * raw_spin_unlock(&lock->wait_lock);
1581 * if (cmpxchg(&lock->owner, owner, 0) == owner)
1582 * return;
1583 * goto retry;
1584 *
1585 * The fastpath disabled variant is simple as all access to
1586 * lock->owner is serialized by lock->wait_lock:
1587 *
1588 * lock->owner = NULL;
1589 * raw_spin_unlock(&lock->wait_lock);
1590 */
1591 while (!rt_mutex_has_waiters(lock)) {
1592 /* Drops lock->wait_lock ! */
1593 if (unlock_rt_mutex_safe(lock) == true)
1594 return;
1595 /* Relock the rtmutex and try again */
1596 raw_spin_lock(&lock->wait_lock);
1597 }
1598
1599 /*
1600 * The wakeup next waiter path does not suffer from the above
1601 * race. See the comments there.
1602 */
1603 wakeup_next_waiter(lock);
1604
1605 raw_spin_unlock(&lock->wait_lock);
1606
1607 /* Undo pi boosting if necessary: */
1608 rt_mutex_adjust_prio(current);
1609}
1610
1611/*
1612 * debug aware fast / slowpath lock,trylock,unlock
1613 *
1614 * The atomic acquire/release ops are compiled away, when either the
1615 * architecture does not support cmpxchg or when debugging is enabled.
1616 */
1617static inline int
1618rt_mutex_fastlock(struct rt_mutex *lock, int state,
1619 int (*slowfn)(struct rt_mutex *lock, int state,
1620 struct hrtimer_sleeper *timeout,
1621 enum rtmutex_chainwalk chwalk))
1622{
1623 if (likely(rt_mutex_cmpxchg(lock, NULL, current))) {
1624 rt_mutex_deadlock_account_lock(lock, current);
1625 return 0;
1626 } else
1627 return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK);
1628}
1629
1630static inline int
1631rt_mutex_timed_fastlock(struct rt_mutex *lock, int state,
1632 struct hrtimer_sleeper *timeout,
1633 enum rtmutex_chainwalk chwalk,
1634 int (*slowfn)(struct rt_mutex *lock, int state,
1635 struct hrtimer_sleeper *timeout,
1636 enum rtmutex_chainwalk chwalk))
1637{
1638 if (chwalk == RT_MUTEX_MIN_CHAINWALK &&
1639 likely(rt_mutex_cmpxchg(lock, NULL, current))) {
1640 rt_mutex_deadlock_account_lock(lock, current);
1641 return 0;
1642 } else
1643 return slowfn(lock, state, timeout, chwalk);
1644}
1645
1646static inline int
1647rt_mutex_fasttrylock(struct rt_mutex *lock,
1648 int (*slowfn)(struct rt_mutex *lock))
1649{
1650 if (likely(rt_mutex_cmpxchg(lock, NULL, current))) {
1651 rt_mutex_deadlock_account_lock(lock, current);
1652 return 1;
1653 }
1654 return slowfn(lock);
1655}
1656
1657static inline void
1658rt_mutex_fastunlock(struct rt_mutex *lock,
1659 void (*slowfn)(struct rt_mutex *lock))
1660{
1661 if (likely(rt_mutex_cmpxchg(lock, current, NULL)))
1662 rt_mutex_deadlock_account_unlock(current);
1663 else
1664 slowfn(lock);
1665}
1666
1667/**
1668 * rt_mutex_lock - lock a rt_mutex
1669 *
1670 * @lock: the rt_mutex to be locked
1671 */
1672void __sched rt_mutex_lock(struct rt_mutex *lock)
1673{
1674 might_sleep();
1675
1676 rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, rt_mutex_slowlock);
1677}
1678EXPORT_SYMBOL_GPL(rt_mutex_lock);
1679
1680/**
1681 * rt_mutex_lock_interruptible - lock a rt_mutex interruptible
1682 *
1683 * @lock: the rt_mutex to be locked
1684 *
1685 * Returns:
1686 * 0 on success
1687 * -EINTR when interrupted by a signal
1688 */
1689int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock)
1690{
1691 might_sleep();
1692
1693 return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, rt_mutex_slowlock);
1694}
1695EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible);
1696
1697/*
1698 * Futex variant with full deadlock detection.
1699 */
1700int rt_mutex_timed_futex_lock(struct rt_mutex *lock,
1701 struct hrtimer_sleeper *timeout)
1702{
1703 might_sleep();
1704
1705 return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
1706 RT_MUTEX_FULL_CHAINWALK, rt_mutex_slowlock);
1707}
1708
1709/**
1710 * rt_mutex_lock_killable - lock a rt_mutex killable
1711 *
1712 * @lock: the rt_mutex to be locked
1713 *
1714 * Returns:
1715 * 0 on success
1716 * -EINTR when interrupted by a signal
1717 * -EDEADLK when the lock would deadlock (when deadlock detection is on)
1718 */
1719int __sched rt_mutex_lock_killable(struct rt_mutex *lock)
1720{
1721 might_sleep();
1722
1723 return rt_mutex_fastlock(lock, TASK_KILLABLE, rt_mutex_slowlock);
1724}
1725EXPORT_SYMBOL_GPL(rt_mutex_lock_killable);
1726
1727/**
1728 * rt_mutex_timed_lock - lock a rt_mutex interruptible
1729 * the timeout structure is provided
1730 * by the caller
1731 *
1732 * @lock: the rt_mutex to be locked
1733 * @timeout: timeout structure or NULL (no timeout)
1734 *
1735 * Returns:
1736 * 0 on success
1737 * -EINTR when interrupted by a signal
1738 * -ETIMEDOUT when the timeout expired
1739 */
1740int
1741rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout)
1742{
1743 might_sleep();
1744
1745 return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
1746 RT_MUTEX_MIN_CHAINWALK, rt_mutex_slowlock);
1747}
1748EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);
1749
1750/**
1751 * rt_mutex_trylock - try to lock a rt_mutex
1752 *
1753 * @lock: the rt_mutex to be locked
1754 *
1755 * Returns 1 on success and 0 on contention
1756 */
1757int __sched rt_mutex_trylock(struct rt_mutex *lock)
1758{
1759 return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock);
1760}
1761EXPORT_SYMBOL_GPL(rt_mutex_trylock);
1762
1763/**
1764 * rt_mutex_unlock - unlock a rt_mutex
1765 *
1766 * @lock: the rt_mutex to be unlocked
1767 */
1768void __sched rt_mutex_unlock(struct rt_mutex *lock)
1769{
1770 rt_mutex_fastunlock(lock, rt_mutex_slowunlock);
1771}
1772EXPORT_SYMBOL_GPL(rt_mutex_unlock);
1773
1774/**
1775 * rt_mutex_destroy - mark a mutex unusable
1776 * @lock: the mutex to be destroyed
1777 *
1778 * This function marks the mutex uninitialized, and any subsequent
1779 * use of the mutex is forbidden. The mutex must not be locked when
1780 * this function is called.
1781 */
1782void rt_mutex_destroy(struct rt_mutex *lock)
1783{
1784 WARN_ON(rt_mutex_is_locked(lock));
1785#ifdef CONFIG_DEBUG_RT_MUTEXES
1786 lock->magic = NULL;
1787#endif
1788}
1789EXPORT_SYMBOL(rt_mutex_destroy);
1790
1791/**
1792 * __rt_mutex_init - initialize the rt lock
1793 *
1794 * @lock: the rt lock to be initialized
1795 *
1796 * Initialize the rt lock to unlocked state.
1797 *
1798 * Initializing of a locked rt lock is not allowed
1799 */
1800void __rt_mutex_init(struct rt_mutex *lock, const char *name)
1801{
1802 lock->owner = NULL;
1803 plist_head_init(&lock->wait_list);
1804
1805 debug_rt_mutex_init(lock, name);
1806}
1807EXPORT_SYMBOL(__rt_mutex_init);
1808
1809/**
1810 * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a
1811 * proxy owner
1812 *
1813 * @lock: the rt_mutex to be locked
1814 * @proxy_owner:the task to set as owner
1815 *
1816 * No locking. Caller has to do serializing itself
1817 * Special API call for PI-futex support
1818 */
1819void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
1820 struct task_struct *proxy_owner)
1821{
1822 rt_mutex_init(lock);
1823 debug_rt_mutex_proxy_lock(lock, proxy_owner);
1824 rt_mutex_set_owner(lock, proxy_owner);
1825 rt_mutex_deadlock_account_lock(lock, proxy_owner);
1826}
1827
1828/**
1829 * rt_mutex_proxy_unlock - release a lock on behalf of owner
1830 *
1831 * @lock: the rt_mutex to be locked
1832 *
1833 * No locking. Caller has to do serializing itself
1834 * Special API call for PI-futex support
1835 */
1836void rt_mutex_proxy_unlock(struct rt_mutex *lock,
1837 struct task_struct *proxy_owner)
1838{
1839 debug_rt_mutex_proxy_unlock(lock);
1840 rt_mutex_set_owner(lock, NULL);
1841 rt_mutex_deadlock_account_unlock(proxy_owner);
1842}
1843
1844/**
1845 * rt_mutex_start_proxy_lock() - Start lock acquisition for another task
1846 * @lock: the rt_mutex to take
1847 * @waiter: the pre-initialized rt_mutex_waiter
1848 * @task: the task to prepare
1849 *
1850 * Returns:
1851 * 0 - task blocked on lock
1852 * 1 - acquired the lock for task, caller should wake it up
1853 * <0 - error
1854 *
1855 * Special API call for FUTEX_REQUEUE_PI support.
1856 */
1857int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
1858 struct rt_mutex_waiter *waiter,
1859 struct task_struct *task)
1860{
1861 int ret;
1862
1863 raw_spin_lock(&lock->wait_lock);
1864
1865 if (try_to_take_rt_mutex(lock, task, NULL)) {
1866 raw_spin_unlock(&lock->wait_lock);
1867 return 1;
1868 }
1869
1870#ifdef CONFIG_PREEMPT_RT_FULL
1871 /*
1872 * In PREEMPT_RT there's an added race.
1873 * If the task, that we are about to requeue, times out,
1874 * it can set the PI_WAKEUP_INPROGRESS. This tells the requeue
1875 * to skip this task. But right after the task sets
1876 * its pi_blocked_on to PI_WAKEUP_INPROGRESS it can then
1877 * block on the spin_lock(&hb->lock), which in RT is an rtmutex.
1878 * This will replace the PI_WAKEUP_INPROGRESS with the actual
1879 * lock that it blocks on. We *must not* place this task
1880 * on this proxy lock in that case.
1881 *
1882 * To prevent this race, we first take the task's pi_lock
1883 * and check if it has updated its pi_blocked_on. If it has,
1884 * we assume that it woke up and we return -EAGAIN.
1885 * Otherwise, we set the task's pi_blocked_on to
1886 * PI_REQUEUE_INPROGRESS, so that if the task is waking up
1887 * it will know that we are in the process of requeuing it.
1888 */
1889 raw_spin_lock_irq(&task->pi_lock);
1890 if (task->pi_blocked_on) {
1891 raw_spin_unlock_irq(&task->pi_lock);
1892 raw_spin_unlock(&lock->wait_lock);
1893 return -EAGAIN;
1894 }
1895 task->pi_blocked_on = PI_REQUEUE_INPROGRESS;
1896 raw_spin_unlock_irq(&task->pi_lock);
1897#endif
1898
1899 /* We enforce deadlock detection for futexes */
1900 ret = task_blocks_on_rt_mutex(lock, waiter, task,
1901 RT_MUTEX_FULL_CHAINWALK);
1902
1903 if (ret && !rt_mutex_owner(lock)) {
1904 /*
1905 * Reset the return value. We might have
1906 * returned with -EDEADLK and the owner
1907 * released the lock while we were walking the
1908 * pi chain. Let the waiter sort it out.
1909 */
1910 ret = 0;
1911 }
1912
1913 if (ret && rt_mutex_has_waiters(lock))
1914 remove_waiter(lock, waiter);
1915
1916 raw_spin_unlock(&lock->wait_lock);
1917
1918 debug_rt_mutex_print_deadlock(waiter);
1919
1920 return ret;
1921}
1922
1923/**
1924 * rt_mutex_next_owner - return the next owner of the lock
1925 *
1926 * @lock: the rt lock query
1927 *
1928 * Returns the next owner of the lock or NULL
1929 *
1930 * Caller has to serialize against other accessors to the lock
1931 * itself.
1932 *
1933 * Special API call for PI-futex support
1934 */
1935struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock)
1936{
1937 if (!rt_mutex_has_waiters(lock))
1938 return NULL;
1939
1940 return rt_mutex_top_waiter(lock)->task;
1941}
1942
1943/**
1944 * rt_mutex_finish_proxy_lock() - Complete lock acquisition
1945 * @lock: the rt_mutex we were woken on
1946 * @to: the timeout, null if none. hrtimer should already have
1947 * been started.
1948 * @waiter: the pre-initialized rt_mutex_waiter
1949 *
1950 * Complete the lock acquisition started our behalf by another thread.
1951 *
1952 * Returns:
1953 * 0 - success
1954 * <0 - error, one of -EINTR, -ETIMEDOUT
1955 *
1956 * Special API call for PI-futex requeue support
1957 */
1958int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
1959 struct hrtimer_sleeper *to,
1960 struct rt_mutex_waiter *waiter)
1961{
1962 int ret;
1963
1964 raw_spin_lock(&lock->wait_lock);
1965
1966 set_current_state(TASK_INTERRUPTIBLE);
1967
1968 ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter);
1969
1970 set_current_state(TASK_RUNNING);
1971
1972 if (unlikely(ret))
1973 remove_waiter(lock, waiter);
1974
1975 /*
1976 * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
1977 * have to fix that up.
1978 */
1979 fixup_rt_mutex_waiters(lock);
1980
1981 raw_spin_unlock(&lock->wait_lock);
1982
1983 return ret;
1984}