blob: 2f0372976459eb771c830442efe94d24c70ba489 [file] [log] [blame]
xjb04a4022021-11-25 15:01:52 +08001// SPDX-License-Identifier: GPL-2.0
2/*
3 * Ldisc rw semaphore
4 *
5 * The ldisc semaphore is semantically a rw_semaphore but which enforces
6 * an alternate policy, namely:
7 * 1) Supports lock wait timeouts
8 * 2) Write waiter has priority
9 * 3) Downgrading is not supported
10 *
11 * Implementation notes:
12 * 1) Upper half of semaphore count is a wait count (differs from rwsem
13 * in that rwsem normalizes the upper half to the wait bias)
14 * 2) Lacks overflow checking
15 *
16 * The generic counting was copied and modified from include/asm-generic/rwsem.h
17 * by Paul Mackerras <paulus@samba.org>.
18 *
19 * The scheduling policy was copied and modified from lib/rwsem.c
20 * Written by David Howells (dhowells@redhat.com).
21 *
22 * This implementation incorporates the write lock stealing work of
23 * Michel Lespinasse <walken@google.com>.
24 *
25 * Copyright (C) 2013 Peter Hurley <peter@hurleysoftware.com>
26 */
27
28#include <linux/list.h>
29#include <linux/spinlock.h>
30#include <linux/atomic.h>
31#include <linux/tty.h>
32#include <linux/sched.h>
33#include <linux/sched/debug.h>
34#include <linux/sched/task.h>
35
36
37#ifdef CONFIG_DEBUG_LOCK_ALLOC
38# define __acq(l, s, t, r, c, n, i) \
39 lock_acquire(&(l)->dep_map, s, t, r, c, n, i)
40# define __rel(l, n, i) \
41 lock_release(&(l)->dep_map, n, i)
42#define lockdep_acquire(l, s, t, i) __acq(l, s, t, 0, 1, NULL, i)
43#define lockdep_acquire_nest(l, s, t, n, i) __acq(l, s, t, 0, 1, n, i)
44#define lockdep_acquire_read(l, s, t, i) __acq(l, s, t, 1, 1, NULL, i)
45#define lockdep_release(l, n, i) __rel(l, n, i)
46#else
47# define lockdep_acquire(l, s, t, i) do { } while (0)
48# define lockdep_acquire_nest(l, s, t, n, i) do { } while (0)
49# define lockdep_acquire_read(l, s, t, i) do { } while (0)
50# define lockdep_release(l, n, i) do { } while (0)
51#endif
52
53#ifdef CONFIG_LOCK_STAT
54# define lock_stat(_lock, stat) lock_##stat(&(_lock)->dep_map, _RET_IP_)
55#else
56# define lock_stat(_lock, stat) do { } while (0)
57#endif
58
59
60#if BITS_PER_LONG == 64
61# define LDSEM_ACTIVE_MASK 0xffffffffL
62#else
63# define LDSEM_ACTIVE_MASK 0x0000ffffL
64#endif
65
66#define LDSEM_UNLOCKED 0L
67#define LDSEM_ACTIVE_BIAS 1L
68#define LDSEM_WAIT_BIAS (-LDSEM_ACTIVE_MASK-1)
69#define LDSEM_READ_BIAS LDSEM_ACTIVE_BIAS
70#define LDSEM_WRITE_BIAS (LDSEM_WAIT_BIAS + LDSEM_ACTIVE_BIAS)
71
72struct ldsem_waiter {
73 struct list_head list;
74 struct task_struct *task;
75};
76
77/*
78 * Initialize an ldsem:
79 */
80void __init_ldsem(struct ld_semaphore *sem, const char *name,
81 struct lock_class_key *key)
82{
83#ifdef CONFIG_DEBUG_LOCK_ALLOC
84 /*
85 * Make sure we are not reinitializing a held semaphore:
86 */
87 debug_check_no_locks_freed((void *)sem, sizeof(*sem));
88 lockdep_init_map(&sem->dep_map, name, key, 0);
89#endif
90 atomic_long_set(&sem->count, LDSEM_UNLOCKED);
91 sem->wait_readers = 0;
92 raw_spin_lock_init(&sem->wait_lock);
93 INIT_LIST_HEAD(&sem->read_wait);
94 INIT_LIST_HEAD(&sem->write_wait);
95}
96
97static void __ldsem_wake_readers(struct ld_semaphore *sem)
98{
99 struct ldsem_waiter *waiter, *next;
100 struct task_struct *tsk;
101 long adjust, count;
102
103 /*
104 * Try to grant read locks to all readers on the read wait list.
105 * Note the 'active part' of the count is incremented by
106 * the number of readers before waking any processes up.
107 */
108 adjust = sem->wait_readers * (LDSEM_ACTIVE_BIAS - LDSEM_WAIT_BIAS);
109 count = atomic_long_add_return(adjust, &sem->count);
110 do {
111 if (count > 0)
112 break;
113 if (atomic_long_try_cmpxchg(&sem->count, &count, count - adjust))
114 return;
115 } while (1);
116
117 list_for_each_entry_safe(waiter, next, &sem->read_wait, list) {
118 tsk = waiter->task;
119 smp_store_release(&waiter->task, NULL);
120 wake_up_process(tsk);
121 put_task_struct(tsk);
122 }
123 INIT_LIST_HEAD(&sem->read_wait);
124 sem->wait_readers = 0;
125}
126
127static inline int writer_trylock(struct ld_semaphore *sem)
128{
129 /*
130 * Only wake this writer if the active part of the count can be
131 * transitioned from 0 -> 1
132 */
133 long count = atomic_long_add_return(LDSEM_ACTIVE_BIAS, &sem->count);
134 do {
135 if ((count & LDSEM_ACTIVE_MASK) == LDSEM_ACTIVE_BIAS)
136 return 1;
137 if (atomic_long_try_cmpxchg(&sem->count, &count, count - LDSEM_ACTIVE_BIAS))
138 return 0;
139 } while (1);
140}
141
142static void __ldsem_wake_writer(struct ld_semaphore *sem)
143{
144 struct ldsem_waiter *waiter;
145
146 waiter = list_entry(sem->write_wait.next, struct ldsem_waiter, list);
147 wake_up_process(waiter->task);
148}
149
150/*
151 * handle the lock release when processes blocked on it that can now run
152 * - if we come here from up_xxxx(), then:
153 * - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed)
154 * - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so)
155 * - the spinlock must be held by the caller
156 * - woken process blocks are discarded from the list after having task zeroed
157 */
158static void __ldsem_wake(struct ld_semaphore *sem)
159{
160 if (!list_empty(&sem->write_wait))
161 __ldsem_wake_writer(sem);
162 else if (!list_empty(&sem->read_wait))
163 __ldsem_wake_readers(sem);
164}
165
166static void ldsem_wake(struct ld_semaphore *sem)
167{
168 unsigned long flags;
169
170 raw_spin_lock_irqsave(&sem->wait_lock, flags);
171 __ldsem_wake(sem);
172 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
173}
174
175/*
176 * wait for the read lock to be granted
177 */
178static struct ld_semaphore __sched *
179down_read_failed(struct ld_semaphore *sem, long count, long timeout)
180{
181 struct ldsem_waiter waiter;
182 long adjust = -LDSEM_ACTIVE_BIAS + LDSEM_WAIT_BIAS;
183
184 /* set up my own style of waitqueue */
185 raw_spin_lock_irq(&sem->wait_lock);
186
187 /*
188 * Try to reverse the lock attempt but if the count has changed
189 * so that reversing fails, check if there are are no waiters,
190 * and early-out if not
191 */
192 do {
193 if (atomic_long_try_cmpxchg(&sem->count, &count, count + adjust)) {
194 count += adjust;
195 break;
196 }
197 if (count > 0) {
198 raw_spin_unlock_irq(&sem->wait_lock);
199 return sem;
200 }
201 } while (1);
202
203 list_add_tail(&waiter.list, &sem->read_wait);
204 sem->wait_readers++;
205
206 waiter.task = current;
207 get_task_struct(current);
208
209 /* if there are no active locks, wake the new lock owner(s) */
210 if ((count & LDSEM_ACTIVE_MASK) == 0)
211 __ldsem_wake(sem);
212
213 raw_spin_unlock_irq(&sem->wait_lock);
214
215 /* wait to be given the lock */
216 for (;;) {
217 set_current_state(TASK_UNINTERRUPTIBLE);
218
219 if (!smp_load_acquire(&waiter.task))
220 break;
221 if (!timeout)
222 break;
223 timeout = schedule_timeout(timeout);
224 }
225
226 __set_current_state(TASK_RUNNING);
227
228 if (!timeout) {
229 /*
230 * Lock timed out but check if this task was just
231 * granted lock ownership - if so, pretend there
232 * was no timeout; otherwise, cleanup lock wait.
233 */
234 raw_spin_lock_irq(&sem->wait_lock);
235 if (waiter.task) {
236 atomic_long_add_return(-LDSEM_WAIT_BIAS, &sem->count);
237 list_del(&waiter.list);
238 raw_spin_unlock_irq(&sem->wait_lock);
239 put_task_struct(waiter.task);
240 return NULL;
241 }
242 raw_spin_unlock_irq(&sem->wait_lock);
243 }
244
245 return sem;
246}
247
248/*
249 * wait for the write lock to be granted
250 */
251static struct ld_semaphore __sched *
252down_write_failed(struct ld_semaphore *sem, long count, long timeout)
253{
254 struct ldsem_waiter waiter;
255 long adjust = -LDSEM_ACTIVE_BIAS;
256 int locked = 0;
257
258 /* set up my own style of waitqueue */
259 raw_spin_lock_irq(&sem->wait_lock);
260
261 /*
262 * Try to reverse the lock attempt but if the count has changed
263 * so that reversing fails, check if the lock is now owned,
264 * and early-out if so.
265 */
266 do {
267 if (atomic_long_try_cmpxchg(&sem->count, &count, count + adjust))
268 break;
269 if ((count & LDSEM_ACTIVE_MASK) == LDSEM_ACTIVE_BIAS) {
270 raw_spin_unlock_irq(&sem->wait_lock);
271 return sem;
272 }
273 } while (1);
274
275 list_add_tail(&waiter.list, &sem->write_wait);
276
277 waiter.task = current;
278
279 set_current_state(TASK_UNINTERRUPTIBLE);
280 for (;;) {
281 if (!timeout)
282 break;
283 raw_spin_unlock_irq(&sem->wait_lock);
284 timeout = schedule_timeout(timeout);
285 raw_spin_lock_irq(&sem->wait_lock);
286 set_current_state(TASK_UNINTERRUPTIBLE);
287 locked = writer_trylock(sem);
288 if (locked)
289 break;
290 }
291
292 if (!locked)
293 atomic_long_add_return(-LDSEM_WAIT_BIAS, &sem->count);
294 list_del(&waiter.list);
295
296 /*
297 * In case of timeout, wake up every reader who gave the right of way
298 * to writer. Prevent separation readers into two groups:
299 * one that helds semaphore and another that sleeps.
300 * (in case of no contention with a writer)
301 */
302 if (!locked && list_empty(&sem->write_wait))
303 __ldsem_wake_readers(sem);
304
305 raw_spin_unlock_irq(&sem->wait_lock);
306
307 __set_current_state(TASK_RUNNING);
308
309 /* lock wait may have timed out */
310 if (!locked)
311 return NULL;
312 return sem;
313}
314
315
316
317static int __ldsem_down_read_nested(struct ld_semaphore *sem,
318 int subclass, long timeout)
319{
320 long count;
321
322 lockdep_acquire_read(sem, subclass, 0, _RET_IP_);
323
324 count = atomic_long_add_return(LDSEM_READ_BIAS, &sem->count);
325 if (count <= 0) {
326 lock_stat(sem, contended);
327 if (!down_read_failed(sem, count, timeout)) {
328 lockdep_release(sem, 1, _RET_IP_);
329 return 0;
330 }
331 }
332 lock_stat(sem, acquired);
333 return 1;
334}
335
336static int __ldsem_down_write_nested(struct ld_semaphore *sem,
337 int subclass, long timeout)
338{
339 long count;
340
341 lockdep_acquire(sem, subclass, 0, _RET_IP_);
342
343 count = atomic_long_add_return(LDSEM_WRITE_BIAS, &sem->count);
344 if ((count & LDSEM_ACTIVE_MASK) != LDSEM_ACTIVE_BIAS) {
345 lock_stat(sem, contended);
346 if (!down_write_failed(sem, count, timeout)) {
347 lockdep_release(sem, 1, _RET_IP_);
348 return 0;
349 }
350 }
351 lock_stat(sem, acquired);
352 return 1;
353}
354
355
356/*
357 * lock for reading -- returns 1 if successful, 0 if timed out
358 */
359int __sched ldsem_down_read(struct ld_semaphore *sem, long timeout)
360{
361 might_sleep();
362 return __ldsem_down_read_nested(sem, 0, timeout);
363}
364
365/*
366 * trylock for reading -- returns 1 if successful, 0 if contention
367 */
368int ldsem_down_read_trylock(struct ld_semaphore *sem)
369{
370 long count = atomic_long_read(&sem->count);
371
372 while (count >= 0) {
373 if (atomic_long_try_cmpxchg(&sem->count, &count, count + LDSEM_READ_BIAS)) {
374 lockdep_acquire_read(sem, 0, 1, _RET_IP_);
375 lock_stat(sem, acquired);
376 return 1;
377 }
378 }
379 return 0;
380}
381
382/*
383 * lock for writing -- returns 1 if successful, 0 if timed out
384 */
385int __sched ldsem_down_write(struct ld_semaphore *sem, long timeout)
386{
387 might_sleep();
388 return __ldsem_down_write_nested(sem, 0, timeout);
389}
390
391/*
392 * trylock for writing -- returns 1 if successful, 0 if contention
393 */
394int ldsem_down_write_trylock(struct ld_semaphore *sem)
395{
396 long count = atomic_long_read(&sem->count);
397
398 while ((count & LDSEM_ACTIVE_MASK) == 0) {
399 if (atomic_long_try_cmpxchg(&sem->count, &count, count + LDSEM_WRITE_BIAS)) {
400 lockdep_acquire(sem, 0, 1, _RET_IP_);
401 lock_stat(sem, acquired);
402 return 1;
403 }
404 }
405 return 0;
406}
407
408/*
409 * release a read lock
410 */
411void ldsem_up_read(struct ld_semaphore *sem)
412{
413 long count;
414
415 lockdep_release(sem, 1, _RET_IP_);
416
417 count = atomic_long_add_return(-LDSEM_READ_BIAS, &sem->count);
418 if (count < 0 && (count & LDSEM_ACTIVE_MASK) == 0)
419 ldsem_wake(sem);
420}
421
422/*
423 * release a write lock
424 */
425void ldsem_up_write(struct ld_semaphore *sem)
426{
427 long count;
428
429 lockdep_release(sem, 1, _RET_IP_);
430
431 count = atomic_long_add_return(-LDSEM_WRITE_BIAS, &sem->count);
432 if (count < 0)
433 ldsem_wake(sem);
434}
435
436
437#ifdef CONFIG_DEBUG_LOCK_ALLOC
438
439int ldsem_down_read_nested(struct ld_semaphore *sem, int subclass, long timeout)
440{
441 might_sleep();
442 return __ldsem_down_read_nested(sem, subclass, timeout);
443}
444
445int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass,
446 long timeout)
447{
448 might_sleep();
449 return __ldsem_down_write_nested(sem, subclass, timeout);
450}
451
452#endif