Blame - ap/libc/glibc/glibc-2.23/nptl/sem_waitcommon.c - T106_DC

blob: 3ba73ab0b7a67e26c93e431bf5ebf99c3ea0f97f [file] [log] [blame]

xf.li	bdd93d5	2023-05-12 07:10:14 -0700	[diff] [blame]	1	/* sem_waitcommon -- wait on a semaphore, shared code.
				2	Copyright (C) 2003-2016 Free Software Foundation, Inc.
				3	This file is part of the GNU C Library.
				4	Contributed by Paul Mackerras <paulus@au.ibm.com>, 2003.
				5
				6	The GNU C Library is free software; you can redistribute it and/or
				7	modify it under the terms of the GNU Lesser General Public
				8	License as published by the Free Software Foundation; either
				9	version 2.1 of the License, or (at your option) any later version.
				10
				11	The GNU C Library is distributed in the hope that it will be useful,
				12	but WITHOUT ANY WARRANTY; without even the implied warranty of
				13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				14	Lesser General Public License for more details.
				15
				16	You should have received a copy of the GNU Lesser General Public
				17	License along with the GNU C Library; if not, see
				18	<http://www.gnu.org/licenses/>. */
				19
				20	#include <kernel-features.h>
				21	#include <errno.h>
				22	#include <sysdep.h>
				23	#include <futex-internal.h>
				24	#include <internaltypes.h>
				25	#include <semaphore.h>
				26	#include <sys/time.h>
				27
				28	#include <pthreadP.h>
				29	#include <shlib-compat.h>
				30	#include <atomic.h>
				31
				32
				33	/* The semaphore provides two main operations: sem_post adds a token to the
				34	semaphore; sem_wait grabs a token from the semaphore, potentially waiting
				35	until there is a token available. A sem_wait needs to synchronize with
				36	the sem_post that provided the token, so that whatever lead to the sem_post
				37	happens before the code after sem_wait.
				38
				39	Conceptually, available tokens can simply be counted; let's call that the
				40	value of the semaphore. However, we also want to know whether there might
				41	be a sem_wait that is blocked on the value because it was zero (using a
				42	futex with the value being the futex variable); if there is no blocked
				43	sem_wait, sem_post does not need to execute a futex_wake call. Therefore,
				44	we also need to count the number of potentially blocked sem_wait calls
				45	(which we call nwaiters).
				46
				47	What makes this tricky is that POSIX requires that a semaphore can be
				48	destroyed as soon as the last remaining sem_wait has returned, and no
				49	other sem_wait or sem_post calls are executing concurrently. However, the
				50	sem_post call whose token was consumed by the last sem_wait is considered
				51	to have finished once it provided the token to the sem_wait.
				52	Thus, sem_post must not access the semaphore struct anymore after it has
				53	made a token available; IOW, it needs to be able to atomically provide
				54	a token and check whether any blocked sem_wait calls might exist.
				55
				56	This is straightforward to do if the architecture provides 64b atomics
				57	because we can just put both the value and nwaiters into one variable that
				58	we access atomically: This is the data field, the value is in the
				59	least-significant 32 bits, and nwaiters in the other bits. When sem_post
				60	makes a value available, it can atomically check nwaiters.
				61
				62	If we have only 32b atomics available, we cannot put both nwaiters and
				63	value into one 32b value because then we might have too few bits for both
				64	of those counters. Therefore, we need to use two distinct fields.
				65
				66	To allow sem_post to atomically make a token available and check for
				67	blocked sem_wait calls, we use one bit in value to indicate whether
				68	nwaiters is nonzero. That allows sem_post to use basically the same
				69	algorithm as with 64b atomics, but requires sem_wait to update the bit; it
				70	can't do this atomically with another access to nwaiters, but it can compute
				71	a conservative value for the bit because it's benign if the bit is set
				72	even if nwaiters is zero (all we get is an unnecessary futex wake call by
				73	sem_post).
				74	Specifically, sem_wait will unset the bit speculatively if it believes that
				75	there is no other concurrently executing sem_wait. If it misspeculated,
				76	it will have to clean up by waking any other sem_wait call (i.e., what
				77	sem_post would do otherwise). This does not conflict with the destruction
				78	requirement because the semaphore must not be destructed while any sem_wait
				79	is still executing. */
				80
				81	#if !__HAVE_64B_ATOMICS
				82	static void
				83	__sem_wait_32_finish (struct new_sem *sem);
				84	#endif
				85
				86	static void
				87	__sem_wait_cleanup (void *arg)
				88	{
				89	struct new_sem sem = (struct new_sem ) arg;
				90
				91	#if __HAVE_64B_ATOMICS
				92	/* Stop being registered as a waiter. See below for MO. */
				93	atomic_fetch_add_relaxed (&sem->data, -((uint64_t) 1 << SEM_NWAITERS_SHIFT));
				94	#else
				95	__sem_wait_32_finish (sem);
				96	#endif
				97	}
				98
				99	/* Wait until at least one token is available, possibly with a timeout.
				100	This is in a separate function in order to make sure gcc
				101	puts the call site into an exception region, and thus the
				102	cleanups get properly run. TODO still necessary? Other futex_wait
				103	users don't seem to need it. */
				104	static int
				105	__attribute__ ((noinline))
				106	do_futex_wait (struct new_sem sem, const struct timespec abstime)
				107	{
				108	int err;
				109
				110	#if __HAVE_64B_ATOMICS
				111	err = futex_abstimed_wait_cancelable (
				112	(unsigned int *) &sem->data + SEM_VALUE_OFFSET, 0, abstime,
				113	sem->private);
				114	#else
				115	err = futex_abstimed_wait_cancelable (&sem->value, SEM_NWAITERS_MASK,
				116	abstime, sem->private);
				117	#endif
				118
				119	return err;
				120	}
				121
				122	/* Fast path: Try to grab a token without blocking. */
				123	static int
				124	__new_sem_wait_fast (struct new_sem *sem, int definitive_result)
				125	{
				126	/* We need acquire MO if we actually grab a token, so that this
				127	synchronizes with all token providers (i.e., the RMW operation we read
				128	from or all those before it in modification order; also see sem_post).
				129	We do not need to guarantee any ordering if we observed that there is
				130	no token (POSIX leaves it unspecified whether functions that fail
				131	synchronize memory); thus, relaxed MO is sufficient for the initial load
				132	and the failure path of the CAS. If the weak CAS fails and we need a
				133	definitive result, retry. */
				134	#if __HAVE_64B_ATOMICS
				135	uint64_t d = atomic_load_relaxed (&sem->data);
				136	do
				137	{
				138	if ((d & SEM_VALUE_MASK) == 0)
				139	break;
				140	if (atomic_compare_exchange_weak_acquire (&sem->data, &d, d - 1))
				141	return 0;
				142	}
				143	while (definitive_result);
				144	return -1;
				145	#else
				146	unsigned int v = atomic_load_relaxed (&sem->value);
				147	do
				148	{
				149	if ((v >> SEM_VALUE_SHIFT) == 0)
				150	break;
				151	if (atomic_compare_exchange_weak_acquire (&sem->value,
				152	&v, v - (1 << SEM_VALUE_SHIFT)))
				153	return 0;
				154	}
				155	while (definitive_result);
				156	return -1;
				157	#endif
				158	}
				159
				160	/* Slow path that blocks. */
				161	static int
				162	__attribute__ ((noinline))
				163	__new_sem_wait_slow (struct new_sem sem, const struct timespec abstime)
				164	{
				165	int err = 0;
				166
				167	#if __HAVE_64B_ATOMICS
				168	/* Add a waiter. Relaxed MO is sufficient because we can rely on the
				169	ordering provided by the RMW operations we use. */
				170	uint64_t d = atomic_fetch_add_relaxed (&sem->data,
				171	(uint64_t) 1 << SEM_NWAITERS_SHIFT);
				172
				173	pthread_cleanup_push (__sem_wait_cleanup, sem);
				174
				175	/* Wait for a token to be available. Retry until we can grab one. */
				176	for (;;)
				177	{
				178	/* If there is no token available, sleep until there is. */
				179	if ((d & SEM_VALUE_MASK) == 0)
				180	{
				181	err = do_futex_wait (sem, abstime);
				182	/* A futex return value of 0 or EAGAIN is due to a real or spurious
				183	wake-up, or due to a change in the number of tokens. We retry in
				184	these cases.
				185	If we timed out, forward this to the caller.
				186	EINTR is returned if we are interrupted by a signal; we
				187	forward this to the caller. (See futex_wait and related
				188	documentation. Before Linux 2.6.22, EINTR was also returned on
				189	spurious wake-ups; we only support more recent Linux versions,
				190	so do not need to consider this here.) */
				191	if (err == ETIMEDOUT \|\| err == EINTR)
				192	{
				193	__set_errno (err);
				194	err = -1;
				195	/* Stop being registered as a waiter. */
				196	atomic_fetch_add_relaxed (&sem->data,
				197	-((uint64_t) 1 << SEM_NWAITERS_SHIFT));
				198	break;
				199	}
				200	/* Relaxed MO is sufficient; see below. */
				201	d = atomic_load_relaxed (&sem->data);
				202	}
				203	else
				204	{
				205	/* Try to grab both a token and stop being a waiter. We need
				206	acquire MO so this synchronizes with all token providers (i.e.,
				207	the RMW operation we read from or all those before it in
				208	modification order; also see sem_post). On the failure path,
				209	relaxed MO is sufficient because we only eventually need the
				210	up-to-date value; the futex_wait or the CAS perform the real
				211	work. */
				212	if (atomic_compare_exchange_weak_acquire (&sem->data,
				213	&d, d - 1 - ((uint64_t) 1 << SEM_NWAITERS_SHIFT)))
				214	{
				215	err = 0;
				216	break;
				217	}
				218	}
				219	}
				220
				221	pthread_cleanup_pop (0);
				222	#else
				223	/* The main difference to the 64b-atomics implementation is that we need to
				224	access value and nwaiters in separate steps, and that the nwaiters bit
				225	in the value can temporarily not be set even if nwaiters is nonzero.
				226	We work around incorrectly unsetting the nwaiters bit by letting sem_wait
				227	set the bit again and waking the number of waiters that could grab a
				228	token. There are two additional properties we need to ensure:
				229	(1) We make sure that whenever unsetting the bit, we see the increment of
				230	nwaiters by the other thread that set the bit. IOW, we will notice if
				231	we make a mistake.
				232	(2) When setting the nwaiters bit, we make sure that we see the unsetting
				233	of the bit by another waiter that happened before us. This avoids having
				234	to blindly set the bit whenever we need to block on it. We set/unset
				235	the bit while having incremented nwaiters (i.e., are a registered
				236	waiter), and the problematic case only happens when one waiter indeed
				237	followed another (i.e., nwaiters was never larger than 1); thus, this
				238	works similarly as with a critical section using nwaiters (see the MOs
				239	and related comments below).
				240
				241	An alternative approach would be to unset the bit after decrementing
				242	nwaiters; however, that would result in needing Dekker-like
				243	synchronization and thus full memory barriers. We also would not be able
				244	to prevent misspeculation, so this alternative scheme does not seem
				245	beneficial. */
				246	unsigned int v;
				247
				248	/* Add a waiter. We need acquire MO so this synchronizes with the release
				249	MO we use when decrementing nwaiters below; it ensures that if another
				250	waiter unset the bit before us, we see that and set it again. Also see
				251	property (2) above. */
				252	atomic_fetch_add_acquire (&sem->nwaiters, 1);
				253
				254	pthread_cleanup_push (__sem_wait_cleanup, sem);
				255
				256	/* Wait for a token to be available. Retry until we can grab one. */
				257	/* We do not need any ordering wrt. to this load's reads-from, so relaxed
				258	MO is sufficient. The acquire MO above ensures that in the problematic
				259	case, we do see the unsetting of the bit by another waiter. */
				260	v = atomic_load_relaxed (&sem->value);
				261	do
				262	{
				263	do
				264	{
				265	/* We are about to block, so make sure that the nwaiters bit is
				266	set. We need release MO on the CAS to ensure that when another
				267	waiter unsets the nwaiters bit, it will also observe that we
				268	incremented nwaiters in the meantime (also see the unsetting of
				269	the bit below). Relaxed MO on CAS failure is sufficient (see
				270	above). */
				271	do
				272	{
				273	if ((v & SEM_NWAITERS_MASK) != 0)
				274	break;
				275	}
				276	while (!atomic_compare_exchange_weak_release (&sem->value,
				277	&v, v \| SEM_NWAITERS_MASK));
				278	/* If there is no token, wait. */
				279	if ((v >> SEM_VALUE_SHIFT) == 0)
				280	{
				281	/* See __HAVE_64B_ATOMICS variant. */
				282	err = do_futex_wait(sem, abstime);
				283	if (err == ETIMEDOUT \|\| err == EINTR)
				284	{
				285	__set_errno (err);
				286	err = -1;
				287	goto error;
				288	}
				289	err = 0;
				290	/* We blocked, so there might be a token now. Relaxed MO is
				291	sufficient (see above). */
				292	v = atomic_load_relaxed (&sem->value);
				293	}
				294	}
				295	/* If there is no token, we must not try to grab one. */
				296	while ((v >> SEM_VALUE_SHIFT) == 0);
				297	}
				298	/* Try to grab a token. We need acquire MO so this synchronizes with
				299	all token providers (i.e., the RMW operation we read from or all those
				300	before it in modification order; also see sem_post). */
				301	while (!atomic_compare_exchange_weak_acquire (&sem->value,
				302	&v, v - (1 << SEM_VALUE_SHIFT)));
				303
				304	error:
				305	pthread_cleanup_pop (0);
				306
				307	__sem_wait_32_finish (sem);
				308	#endif
				309
				310	return err;
				311	}
				312
				313	/* Stop being a registered waiter (non-64b-atomics code only). */
				314	#if !__HAVE_64B_ATOMICS
				315	static void
				316	__sem_wait_32_finish (struct new_sem *sem)
				317	{
				318	/* The nwaiters bit is still set, try to unset it now if this seems
				319	necessary. We do this before decrementing nwaiters so that the unsetting
				320	is visible to other waiters entering after us. Relaxed MO is sufficient
				321	because we are just speculating here; a stronger MO would not prevent
				322	misspeculation. */
				323	unsigned int wguess = atomic_load_relaxed (&sem->nwaiters);
				324	if (wguess == 1)
				325	/* We might be the last waiter, so unset. This needs acquire MO so that
				326	it syncronizes with the release MO when setting the bit above; if we
				327	overwrite someone else that set the bit, we'll read in the following
				328	decrement of nwaiters at least from that release sequence, so we'll
				329	see if the other waiter is still active or if another writer entered
				330	in the meantime (i.e., using the check below). */
				331	atomic_fetch_and_acquire (&sem->value, ~SEM_NWAITERS_MASK);
				332
				333	/* Now stop being a waiter, and see whether our guess was correct.
				334	This needs release MO so that it synchronizes with the acquire MO when
				335	a waiter increments nwaiters; this makes sure that newer writers see that
				336	we reset the waiters_present bit. */
				337	unsigned int wfinal = atomic_fetch_add_release (&sem->nwaiters, -1);
				338	if (wfinal > 1 && wguess == 1)
				339	{
				340	/* We guessed wrong, and so need to clean up after the mistake and
				341	unblock any waiters that could have not been woken. There is no
				342	additional ordering that we need to set up, so relaxed MO is
				343	sufficient. */
				344	unsigned int v = atomic_fetch_or_relaxed (&sem->value,
				345	SEM_NWAITERS_MASK);
				346	/* If there are available tokens, then wake as many waiters. If there
				347	aren't any, then there is no need to wake anyone because there is
				348	none to grab for another waiter. If tokens become available
				349	subsequently, then the respective sem_post calls will do the wake-up
				350	due to us having set the nwaiters bit again. */
				351	v >>= SEM_VALUE_SHIFT;
				352	if (v > 0)
				353	futex_wake (&sem->value, v, sem->private);
				354	}
				355	}
				356	#endif