Blame - ap/libc/glibc/glibc-2.23/nptl/allocatestack.c - T106_DC

blob: c044b205c8777ca0bbe4359a634091d444968757 [file] [log] [blame]

xf.li	bdd93d5	2023-05-12 07:10:14 -0700	[diff] [blame]	1	/* Copyright (C) 2002-2016 Free Software Foundation, Inc.
				2	This file is part of the GNU C Library.
				3	Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
				4
				5	The GNU C Library is free software; you can redistribute it and/or
				6	modify it under the terms of the GNU Lesser General Public
				7	License as published by the Free Software Foundation; either
				8	version 2.1 of the License, or (at your option) any later version.
				9
				10	The GNU C Library is distributed in the hope that it will be useful,
				11	but WITHOUT ANY WARRANTY; without even the implied warranty of
				12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				13	Lesser General Public License for more details.
				14
				15	You should have received a copy of the GNU Lesser General Public
				16	License along with the GNU C Library; if not, see
				17	<http://www.gnu.org/licenses/>. */
				18
				19	#include <assert.h>
				20	#include <errno.h>
				21	#include <signal.h>
				22	#include <stdint.h>
				23	#include <string.h>
				24	#include <unistd.h>
				25	#include <sys/mman.h>
				26	#include <sys/param.h>
				27	#include <dl-sysdep.h>
				28	#include <dl-tls.h>
				29	#include <tls.h>
				30	#include <list.h>
				31	#include <lowlevellock.h>
				32	#include <futex-internal.h>
				33	#include <kernel-features.h>
				34	#include <stack-aliasing.h>
				35
				36
				37	#ifndef NEED_SEPARATE_REGISTER_STACK
				38
				39	/* Most architectures have exactly one stack pointer. Some have more. */
				40	# define STACK_VARIABLES void *stackaddr = NULL
				41
				42	/* How to pass the values to the 'create_thread' function. */
				43	# define STACK_VARIABLES_ARGS stackaddr
				44
				45	/* How to declare function which gets there parameters. */
				46	# define STACK_VARIABLES_PARMS void *stackaddr
				47
				48	/* How to declare allocate_stack. */
				49	# define ALLOCATE_STACK_PARMS void **stack
				50
				51	/* This is how the function is called. We do it this way to allow
				52	other variants of the function to have more parameters. */
				53	# define ALLOCATE_STACK(attr, pd) allocate_stack (attr, pd, &stackaddr)
				54
				55	#else
				56
				57	/* We need two stacks. The kernel will place them but we have to tell
				58	the kernel about the size of the reserved address space. */
				59	# define STACK_VARIABLES void *stackaddr = NULL; size_t stacksize = 0
				60
				61	/* How to pass the values to the 'create_thread' function. */
				62	# define STACK_VARIABLES_ARGS stackaddr, stacksize
				63
				64	/* How to declare function which gets there parameters. */
				65	# define STACK_VARIABLES_PARMS void *stackaddr, size_t stacksize
				66
				67	/* How to declare allocate_stack. */
				68	# define ALLOCATE_STACK_PARMS void *stack, size_t stacksize
				69
				70	/* This is how the function is called. We do it this way to allow
				71	other variants of the function to have more parameters. */
				72	# define ALLOCATE_STACK(attr, pd) \
				73	allocate_stack (attr, pd, &stackaddr, &stacksize)
				74
				75	#endif
				76
				77
				78	/* Default alignment of stack. */
				79	#ifndef STACK_ALIGN
				80	# define STACK_ALIGN __alignof__ (long double)
				81	#endif
				82
				83	/* Default value for minimal stack size after allocating thread
				84	descriptor and guard. */
				85	#ifndef MINIMAL_REST_STACK
				86	# define MINIMAL_REST_STACK 4096
				87	#endif
				88
				89
				90	/* Newer kernels have the MAP_STACK flag to indicate a mapping is used for
				91	a stack. Use it when possible. */
				92	#ifndef MAP_STACK
				93	# define MAP_STACK 0
				94	#endif
				95
				96	/* This yields the pointer that TLS support code calls the thread pointer. */
				97	#if TLS_TCB_AT_TP
				98	# define TLS_TPADJ(pd) (pd)
				99	#elif TLS_DTV_AT_TP
				100	# define TLS_TPADJ(pd) ((struct pthread )((char ) (pd) + TLS_PRE_TCB_SIZE))
				101	#endif
				102
				103	/* Cache handling for not-yet free stacks. */
				104
				105	/* Maximum size in kB of cache. */
				106	static size_t stack_cache_maxsize = 40 * 1024 * 1024; /* 40MiBi by default. */
				107	static size_t stack_cache_actsize;
				108
				109	/* Mutex protecting this variable. */
				110	static int stack_cache_lock = LLL_LOCK_INITIALIZER;
				111
				112	/* List of queued stack frames. */
				113	static LIST_HEAD (stack_cache);
				114
				115	/* List of the stacks in use. */
				116	static LIST_HEAD (stack_used);
				117
				118	/* We need to record what list operations we are going to do so that,
				119	in case of an asynchronous interruption due to a fork() call, we
				120	can correct for the work. */
				121	static uintptr_t in_flight_stack;
				122
				123	/* List of the threads with user provided stacks in use. No need to
				124	initialize this, since it's done in __pthread_initialize_minimal. */
				125	list_t __stack_user __attribute__ ((nocommon));
				126	hidden_data_def (__stack_user)
				127
				128	#if COLORING_INCREMENT != 0
				129	/* Number of threads created. */
				130	static unsigned int nptl_ncreated;
				131	#endif
				132
				133
				134	/* Check whether the stack is still used or not. */
				135	#define FREE_P(descr) ((descr)->tid <= 0)
				136
				137
				138	static void
				139	stack_list_del (list_t *elem)
				140	{
				141	in_flight_stack = (uintptr_t) elem;
				142
				143	atomic_write_barrier ();
				144
				145	list_del (elem);
				146
				147	atomic_write_barrier ();
				148
				149	in_flight_stack = 0;
				150	}
				151
				152
				153	static void
				154	stack_list_add (list_t elem, list_t list)
				155	{
				156	in_flight_stack = (uintptr_t) elem \| 1;
				157
				158	atomic_write_barrier ();
				159
				160	list_add (elem, list);
				161
				162	atomic_write_barrier ();
				163
				164	in_flight_stack = 0;
				165	}
				166
				167
				168	/* We create a double linked list of all cache entries. Double linked
				169	because this allows removing entries from the end. */
				170
				171
				172	/* Get a stack frame from the cache. We have to match by size since
				173	some blocks might be too small or far too large. */
				174	static struct pthread *
				175	get_cached_stack (size_t sizep, void *memp)
				176	{
				177	size_t size = *sizep;
				178	struct pthread *result = NULL;
				179	list_t *entry;
				180
				181	lll_lock (stack_cache_lock, LLL_PRIVATE);
				182
				183	/* Search the cache for a matching entry. We search for the
				184	smallest stack which has at least the required size. Note that
				185	in normal situations the size of all allocated stacks is the
				186	same. As the very least there are only a few different sizes.
				187	Therefore this loop will exit early most of the time with an
				188	exact match. */
				189	list_for_each (entry, &stack_cache)
				190	{
				191	struct pthread *curr;
				192
				193	curr = list_entry (entry, struct pthread, list);
				194	if (FREE_P (curr) && curr->stackblock_size >= size)
				195	{
				196	if (curr->stackblock_size == size)
				197	{
				198	result = curr;
				199	break;
				200	}
				201
				202	if (result == NULL
				203	\|\| result->stackblock_size > curr->stackblock_size)
				204	result = curr;
				205	}
				206	}
				207
				208	if (__builtin_expect (result == NULL, 0)
				209	/* Make sure the size difference is not too excessive. In that
				210	case we do not use the block. */
				211	\|\| __builtin_expect (result->stackblock_size > 4 * size, 0))
				212	{
				213	/* Release the lock. */
				214	lll_unlock (stack_cache_lock, LLL_PRIVATE);
				215
				216	return NULL;
				217	}
				218
				219	/* Don't allow setxid until cloned. */
				220	result->setxid_futex = -1;
				221
				222	/* Dequeue the entry. */
				223	stack_list_del (&result->list);
				224
				225	/* And add to the list of stacks in use. */
				226	stack_list_add (&result->list, &stack_used);
				227
				228	/* And decrease the cache size. */
				229	stack_cache_actsize -= result->stackblock_size;
				230
				231	/* Release the lock early. */
				232	lll_unlock (stack_cache_lock, LLL_PRIVATE);
				233
				234	/* Report size and location of the stack to the caller. */
				235	*sizep = result->stackblock_size;
				236	*memp = result->stackblock;
				237
				238	/* Cancellation handling is back to the default. */
				239	result->cancelhandling = 0;
				240	result->cleanup = NULL;
				241
				242	/* No pending event. */
				243	result->nextevent = NULL;
				244
				245	/* Clear the DTV. */
				246	dtv_t *dtv = GET_DTV (TLS_TPADJ (result));
				247	for (size_t cnt = 0; cnt < dtv[-1].counter; ++cnt)
				248	if (! dtv[1 + cnt].pointer.is_static
				249	&& dtv[1 + cnt].pointer.val != TLS_DTV_UNALLOCATED)
				250	free (dtv[1 + cnt].pointer.val);
				251	memset (dtv, '\0', (dtv[-1].counter + 1) * sizeof (dtv_t));
				252
				253	/* Re-initialize the TLS. */
				254	_dl_allocate_tls_init (TLS_TPADJ (result));
				255
				256	return result;
				257	}
				258
				259
				260	/* Free stacks until cache size is lower than LIMIT. */
				261	void
				262	__free_stacks (size_t limit)
				263	{
				264	/* We reduce the size of the cache. Remove the last entries until
				265	the size is below the limit. */
				266	list_t *entry;
				267	list_t *prev;
				268
				269	/* Search from the end of the list. */
				270	list_for_each_prev_safe (entry, prev, &stack_cache)
				271	{
				272	struct pthread *curr;
				273
				274	curr = list_entry (entry, struct pthread, list);
				275	if (FREE_P (curr))
				276	{
				277	/* Unlink the block. */
				278	stack_list_del (entry);
				279
				280	/* Account for the freed memory. */
				281	stack_cache_actsize -= curr->stackblock_size;
				282
				283	/* Free the memory associated with the ELF TLS. */
				284	_dl_deallocate_tls (TLS_TPADJ (curr), false);
				285
				286	/* Remove this block. This should never fail. If it does
				287	something is really wrong. */
				288	if (munmap (curr->stackblock, curr->stackblock_size) != 0)
				289	abort ();
				290
				291	/* Maybe we have freed enough. */
				292	if (stack_cache_actsize <= limit)
				293	break;
				294	}
				295	}
				296	}
				297
				298
				299	/* Add a stack frame which is not used anymore to the stack. Must be
				300	called with the cache lock held. */
				301	static inline void
				302	__attribute ((always_inline))
				303	queue_stack (struct pthread *stack)
				304	{
				305	/* We unconditionally add the stack to the list. The memory may
				306	still be in use but it will not be reused until the kernel marks
				307	the stack as not used anymore. */
				308	stack_list_add (&stack->list, &stack_cache);
				309
				310	stack_cache_actsize += stack->stackblock_size;
				311	if (__glibc_unlikely (stack_cache_actsize > stack_cache_maxsize))
				312	__free_stacks (stack_cache_maxsize);
				313	}
				314
				315
				316	static int
				317	internal_function
				318	change_stack_perm (struct pthread *pd
				319	#ifdef NEED_SEPARATE_REGISTER_STACK
				320	, size_t pagemask
				321	#endif
				322	)
				323	{
				324	#ifdef NEED_SEPARATE_REGISTER_STACK
				325	void *stack = (pd->stackblock
				326	+ (((((pd->stackblock_size - pd->guardsize) / 2)
				327	& pagemask) + pd->guardsize) & pagemask));
				328	size_t len = pd->stackblock + pd->stackblock_size - stack;
				329	#elif _STACK_GROWS_DOWN
				330	void *stack = pd->stackblock + pd->guardsize;
				331	size_t len = pd->stackblock_size - pd->guardsize;
				332	#elif _STACK_GROWS_UP
				333	void *stack = pd->stackblock;
				334	size_t len = (uintptr_t) pd - pd->guardsize - (uintptr_t) pd->stackblock;
				335	#else
				336	# error "Define either _STACK_GROWS_DOWN or _STACK_GROWS_UP"
				337	#endif
				338	if (mprotect (stack, len, PROT_READ \| PROT_WRITE \| PROT_EXEC) != 0)
				339	return errno;
				340
				341	return 0;
				342	}
				343
				344
				345	/* Returns a usable stack for a new thread either by allocating a
				346	new stack or reusing a cached stack of sufficient size.
				347	ATTR must be non-NULL and point to a valid pthread_attr.
				348	PDP must be non-NULL. */
				349	static int
				350	allocate_stack (const struct pthread_attr attr, struct pthread *pdp,
				351	ALLOCATE_STACK_PARMS)
				352	{
				353	struct pthread *pd;
				354	size_t size;
				355	size_t pagesize_m1 = __getpagesize () - 1;
				356
				357	assert (powerof2 (pagesize_m1 + 1));
				358	assert (TCB_ALIGNMENT >= STACK_ALIGN);
				359
				360	/* Get the stack size from the attribute if it is set. Otherwise we
				361	use the default we determined at start time. */
				362	if (attr->stacksize != 0)
				363	size = attr->stacksize;
				364	else
				365	{
				366	lll_lock (__default_pthread_attr_lock, LLL_PRIVATE);
				367	size = __default_pthread_attr.stacksize;
				368	lll_unlock (__default_pthread_attr_lock, LLL_PRIVATE);
				369	}
				370
				371	/* Get memory for the stack. */
				372	if (__glibc_unlikely (attr->flags & ATTR_FLAG_STACKADDR))
				373	{
				374	uintptr_t adj;
				375
				376	/* If the user also specified the size of the stack make sure it
				377	is large enough. */
				378	if (attr->stacksize != 0
				379	&& attr->stacksize < (__static_tls_size + MINIMAL_REST_STACK))
				380	return EINVAL;
				381
				382	/* Adjust stack size for alignment of the TLS block. */
				383	#if TLS_TCB_AT_TP
				384	adj = ((uintptr_t) attr->stackaddr - TLS_TCB_SIZE)
				385	& __static_tls_align_m1;
				386	assert (size > adj + TLS_TCB_SIZE);
				387	#elif TLS_DTV_AT_TP
				388	adj = ((uintptr_t) attr->stackaddr - __static_tls_size)
				389	& __static_tls_align_m1;
				390	assert (size > adj);
				391	#endif
				392
				393	/* The user provided some memory. Let's hope it matches the
				394	size... We do not allocate guard pages if the user provided
				395	the stack. It is the user's responsibility to do this if it
				396	is wanted. */
				397	#if TLS_TCB_AT_TP
				398	pd = (struct pthread *) ((uintptr_t) attr->stackaddr
				399	- TLS_TCB_SIZE - adj);
				400	#elif TLS_DTV_AT_TP
				401	pd = (struct pthread *) (((uintptr_t) attr->stackaddr
				402	- __static_tls_size - adj)
				403	- TLS_PRE_TCB_SIZE);
				404	#endif
				405
				406	/* The user provided stack memory needs to be cleared. */
				407	memset (pd, '\0', sizeof (struct pthread));
				408
				409	/* The first TSD block is included in the TCB. */
				410	pd->specific[0] = pd->specific_1stblock;
				411
				412	/* Remember the stack-related values. */
				413	pd->stackblock = (char *) attr->stackaddr - size;
				414	pd->stackblock_size = size;
				415
				416	/* This is a user-provided stack. It will not be queued in the
				417	stack cache nor will the memory (except the TLS memory) be freed. */
				418	pd->user_stack = true;
				419
				420	/* This is at least the second thread. */
				421	pd->header.multiple_threads = 1;
				422	#ifndef TLS_MULTIPLE_THREADS_IN_TCB
				423	__pthread_multiple_threads = *__libc_multiple_threads_ptr = 1;
				424	#endif
				425
				426	#ifndef __ASSUME_PRIVATE_FUTEX
				427	/* The thread must know when private futexes are supported. */
				428	pd->header.private_futex = THREAD_GETMEM (THREAD_SELF,
				429	header.private_futex);
				430	#endif
				431
				432	#ifdef NEED_DL_SYSINFO
				433	SETUP_THREAD_SYSINFO (pd);
				434	#endif
				435
				436	/* The process ID is also the same as that of the caller. */
				437	pd->pid = THREAD_GETMEM (THREAD_SELF, pid);
				438
				439	/* Don't allow setxid until cloned. */
				440	pd->setxid_futex = -1;
				441
				442	/* Allocate the DTV for this thread. */
				443	if (_dl_allocate_tls (TLS_TPADJ (pd)) == NULL)
				444	{
				445	/* Something went wrong. */
				446	assert (errno == ENOMEM);
				447	return errno;
				448	}
				449
				450
				451	/* Prepare to modify global data. */
				452	lll_lock (stack_cache_lock, LLL_PRIVATE);
				453
				454	/* And add to the list of stacks in use. */
				455	list_add (&pd->list, &__stack_user);
				456
				457	lll_unlock (stack_cache_lock, LLL_PRIVATE);
				458	}
				459	else
				460	{
				461	/* Allocate some anonymous memory. If possible use the cache. */
				462	size_t guardsize;
				463	size_t reqsize;
				464	void *mem;
				465	const int prot = (PROT_READ \| PROT_WRITE
				466	\| ((GL(dl_stack_flags) & PF_X) ? PROT_EXEC : 0));
				467
				468	#if COLORING_INCREMENT != 0
				469	/* Add one more page for stack coloring. Don't do it for stacks
				470	with 16 times pagesize or larger. This might just cause
				471	unnecessary misalignment. */
				472	if (size <= 16 * pagesize_m1)
				473	size += pagesize_m1 + 1;
				474	#endif
				475
				476	/* Adjust the stack size for alignment. */
				477	size &= ~__static_tls_align_m1;
				478	assert (size != 0);
				479
				480	/* Make sure the size of the stack is enough for the guard and
				481	eventually the thread descriptor. */
				482	guardsize = (attr->guardsize + pagesize_m1) & ~pagesize_m1;
				483	if (__builtin_expect (size < ((guardsize + __static_tls_size
				484	+ MINIMAL_REST_STACK + pagesize_m1)
				485	& ~pagesize_m1),
				486	0))
				487	/* The stack is too small (or the guard too large). */
				488	return EINVAL;
				489
				490	/* Try to get a stack from the cache. */
				491	reqsize = size;
				492	pd = get_cached_stack (&size, &mem);
				493	if (pd == NULL)
				494	{
				495	/* To avoid aliasing effects on a larger scale than pages we
				496	adjust the allocated stack size if necessary. This way
				497	allocations directly following each other will not have
				498	aliasing problems. */
				499	#if MULTI_PAGE_ALIASING != 0
				500	if ((size % MULTI_PAGE_ALIASING) == 0)
				501	size += pagesize_m1 + 1;
				502	#endif
				503
				504	mem = mmap (NULL, size, prot,
				505	MAP_PRIVATE \| MAP_ANONYMOUS \| MAP_STACK, -1, 0);
				506
				507	if (__glibc_unlikely (mem == MAP_FAILED))
				508	return errno;
				509
				510	/* SIZE is guaranteed to be greater than zero.
				511	So we can never get a null pointer back from mmap. */
				512	assert (mem != NULL);
				513
				514	#if COLORING_INCREMENT != 0
				515	/* Atomically increment NCREATED. */
				516	unsigned int ncreated = atomic_increment_val (&nptl_ncreated);
				517
				518	/* We chose the offset for coloring by incrementing it for
				519	every new thread by a fixed amount. The offset used
				520	module the page size. Even if coloring would be better
				521	relative to higher alignment values it makes no sense to
				522	do it since the mmap() interface does not allow us to
				523	specify any alignment for the returned memory block. */
				524	size_t coloring = (ncreated * COLORING_INCREMENT) & pagesize_m1;
				525
				526	/* Make sure the coloring offsets does not disturb the alignment
				527	of the TCB and static TLS block. */
				528	if (__glibc_unlikely ((coloring & __static_tls_align_m1) != 0))
				529	coloring = (((coloring + __static_tls_align_m1)
				530	& ~(__static_tls_align_m1))
				531	& ~pagesize_m1);
				532	#else
				533	/* Unless specified we do not make any adjustments. */
				534	# define coloring 0
				535	#endif
				536
				537	/* Place the thread descriptor at the end of the stack. */
				538	#if TLS_TCB_AT_TP
				539	pd = (struct pthread ) ((char ) mem + size - coloring) - 1;
				540	#elif TLS_DTV_AT_TP
				541	pd = (struct pthread *) ((((uintptr_t) mem + size - coloring
				542	- __static_tls_size)
				543	& ~__static_tls_align_m1)
				544	- TLS_PRE_TCB_SIZE);
				545	#endif
				546
				547	/* Remember the stack-related values. */
				548	pd->stackblock = mem;
				549	pd->stackblock_size = size;
				550
				551	/* We allocated the first block thread-specific data array.
				552	This address will not change for the lifetime of this
				553	descriptor. */
				554	pd->specific[0] = pd->specific_1stblock;
				555
				556	/* This is at least the second thread. */
				557	pd->header.multiple_threads = 1;
				558	#ifndef TLS_MULTIPLE_THREADS_IN_TCB
				559	__pthread_multiple_threads = *__libc_multiple_threads_ptr = 1;
				560	#endif
				561
				562	#ifndef __ASSUME_PRIVATE_FUTEX
				563	/* The thread must know when private futexes are supported. */
				564	pd->header.private_futex = THREAD_GETMEM (THREAD_SELF,
				565	header.private_futex);
				566	#endif
				567
				568	#ifdef NEED_DL_SYSINFO
				569	SETUP_THREAD_SYSINFO (pd);
				570	#endif
				571
				572	/* Don't allow setxid until cloned. */
				573	pd->setxid_futex = -1;
				574
				575	/* The process ID is also the same as that of the caller. */
				576	pd->pid = THREAD_GETMEM (THREAD_SELF, pid);
				577
				578	/* Allocate the DTV for this thread. */
				579	if (_dl_allocate_tls (TLS_TPADJ (pd)) == NULL)
				580	{
				581	/* Something went wrong. */
				582	assert (errno == ENOMEM);
				583
				584	/* Free the stack memory we just allocated. */
				585	(void) munmap (mem, size);
				586
				587	return errno;
				588	}
				589
				590
				591	/* Prepare to modify global data. */
				592	lll_lock (stack_cache_lock, LLL_PRIVATE);
				593
				594	/* And add to the list of stacks in use. */
				595	stack_list_add (&pd->list, &stack_used);
				596
				597	lll_unlock (stack_cache_lock, LLL_PRIVATE);
				598
				599
				600	/* There might have been a race. Another thread might have
				601	caused the stacks to get exec permission while this new
				602	stack was prepared. Detect if this was possible and
				603	change the permission if necessary. */
				604	if (__builtin_expect ((GL(dl_stack_flags) & PF_X) != 0
				605	&& (prot & PROT_EXEC) == 0, 0))
				606	{
				607	int err = change_stack_perm (pd
				608	#ifdef NEED_SEPARATE_REGISTER_STACK
				609	, ~pagesize_m1
				610	#endif
				611	);
				612	if (err != 0)
				613	{
				614	/* Free the stack memory we just allocated. */
				615	(void) munmap (mem, size);
				616
				617	return err;
				618	}
				619	}
				620
				621
				622	/* Note that all of the stack and the thread descriptor is
				623	zeroed. This means we do not have to initialize fields
				624	with initial value zero. This is specifically true for
				625	the 'tid' field which is always set back to zero once the
				626	stack is not used anymore and for the 'guardsize' field
				627	which will be read next. */
				628	}
				629
				630	/* Create or resize the guard area if necessary. */
				631	if (__glibc_unlikely (guardsize > pd->guardsize))
				632	{
				633	#ifdef NEED_SEPARATE_REGISTER_STACK
				634	char *guard = mem + (((size - guardsize) / 2) & ~pagesize_m1);
				635	#elif _STACK_GROWS_DOWN
				636	char *guard = mem;
				637	# elif _STACK_GROWS_UP
				638	char guard = (char ) (((uintptr_t) pd - guardsize) & ~pagesize_m1);
				639	#endif
				640	if (mprotect (guard, guardsize, PROT_NONE) != 0)
				641	{
				642	mprot_error:
				643	lll_lock (stack_cache_lock, LLL_PRIVATE);
				644
				645	/* Remove the thread from the list. */
				646	stack_list_del (&pd->list);
				647
				648	lll_unlock (stack_cache_lock, LLL_PRIVATE);
				649
				650	/* Get rid of the TLS block we allocated. */
				651	_dl_deallocate_tls (TLS_TPADJ (pd), false);
				652
				653	/* Free the stack memory regardless of whether the size
				654	of the cache is over the limit or not. If this piece
				655	of memory caused problems we better do not use it
				656	anymore. Uh, and we ignore possible errors. There
				657	is nothing we could do. */
				658	(void) munmap (mem, size);
				659
				660	return errno;
				661	}
				662
				663	pd->guardsize = guardsize;
				664	}
				665	else if (__builtin_expect (pd->guardsize - guardsize > size - reqsize,
				666	0))
				667	{
				668	/* The old guard area is too large. */
				669
				670	#ifdef NEED_SEPARATE_REGISTER_STACK
				671	char *guard = mem + (((size - guardsize) / 2) & ~pagesize_m1);
				672	char *oldguard = mem + (((size - pd->guardsize) / 2) & ~pagesize_m1);
				673
				674	if (oldguard < guard
				675	&& mprotect (oldguard, guard - oldguard, prot) != 0)
				676	goto mprot_error;
				677
				678	if (mprotect (guard + guardsize,
				679	oldguard + pd->guardsize - guard - guardsize,
				680	prot) != 0)
				681	goto mprot_error;
				682	#elif _STACK_GROWS_DOWN
				683	if (mprotect ((char *) mem + guardsize, pd->guardsize - guardsize,
				684	prot) != 0)
				685	goto mprot_error;
				686	#elif _STACK_GROWS_UP
				687	if (mprotect ((char *) pd - pd->guardsize,
				688	pd->guardsize - guardsize, prot) != 0)
				689	goto mprot_error;
				690	#endif
				691
				692	pd->guardsize = guardsize;
				693	}
				694	/* The pthread_getattr_np() calls need to get passed the size
				695	requested in the attribute, regardless of how large the
				696	actually used guardsize is. */
				697	pd->reported_guardsize = guardsize;
				698	}
				699
				700	/* Initialize the lock. We have to do this unconditionally since the
				701	stillborn thread could be canceled while the lock is taken. */
				702	pd->lock = LLL_LOCK_INITIALIZER;
				703
				704	/* The robust mutex lists also need to be initialized
				705	unconditionally because the cleanup for the previous stack owner
				706	might have happened in the kernel. */
				707	pd->robust_head.futex_offset = (offsetof (pthread_mutex_t, __data.__lock)
				708	- offsetof (pthread_mutex_t,
				709	__data.__list.__next));
				710	pd->robust_head.list_op_pending = NULL;
				711	#ifdef __PTHREAD_MUTEX_HAVE_PREV
				712	pd->robust_prev = &pd->robust_head;
				713	#endif
				714	pd->robust_head.list = &pd->robust_head;
				715
				716	/* We place the thread descriptor at the end of the stack. */
				717	*pdp = pd;
				718
				719	#if _STACK_GROWS_DOWN
				720	void *stacktop;
				721
				722	# if TLS_TCB_AT_TP
				723	/* The stack begins before the TCB and the static TLS block. */
				724	stacktop = ((char *) (pd + 1) - __static_tls_size);
				725	# elif TLS_DTV_AT_TP
				726	stacktop = (char *) (pd - 1);
				727	# endif
				728
				729	# ifdef NEED_SEPARATE_REGISTER_STACK
				730	*stack = pd->stackblock;
				731	stacksize = stacktop - stack;
				732	# else
				733	*stack = stacktop;
				734	# endif
				735	#else
				736	*stack = pd->stackblock;
				737	assert (*stack > 0);
				738	#endif
				739
				740	return 0;
				741	}
				742
				743
				744	void
				745	internal_function
				746	__deallocate_stack (struct pthread *pd)
				747	{
				748	lll_lock (stack_cache_lock, LLL_PRIVATE);
				749
				750	/* Remove the thread from the list of threads with user defined
				751	stacks. */
				752	stack_list_del (&pd->list);
				753
				754	/* Not much to do. Just free the mmap()ed memory. Note that we do
				755	not reset the 'used' flag in the 'tid' field. This is done by
				756	the kernel. If no thread has been created yet this field is
				757	still zero. */
				758	if (__glibc_likely (! pd->user_stack))
				759	(void) queue_stack (pd);
				760	else
				761	/* Free the memory associated with the ELF TLS. */
				762	_dl_deallocate_tls (TLS_TPADJ (pd), false);
				763
				764	lll_unlock (stack_cache_lock, LLL_PRIVATE);
				765	}
				766
				767
				768	int
				769	internal_function
				770	__make_stacks_executable (void **stack_endp)
				771	{
				772	/* First the main thread's stack. */
				773	int err = _dl_make_stack_executable (stack_endp);
				774	if (err != 0)
				775	return err;
				776
				777	#ifdef NEED_SEPARATE_REGISTER_STACK
				778	const size_t pagemask = ~(__getpagesize () - 1);
				779	#endif
				780
				781	lll_lock (stack_cache_lock, LLL_PRIVATE);
				782
				783	list_t *runp;
				784	list_for_each (runp, &stack_used)
				785	{
				786	err = change_stack_perm (list_entry (runp, struct pthread, list)
				787	#ifdef NEED_SEPARATE_REGISTER_STACK
				788	, pagemask
				789	#endif
				790	);
				791	if (err != 0)
				792	break;
				793	}
				794
				795	/* Also change the permission for the currently unused stacks. This
				796	might be wasted time but better spend it here than adding a check
				797	in the fast path. */
				798	if (err == 0)
				799	list_for_each (runp, &stack_cache)
				800	{
				801	err = change_stack_perm (list_entry (runp, struct pthread, list)
				802	#ifdef NEED_SEPARATE_REGISTER_STACK
				803	, pagemask
				804	#endif
				805	);
				806	if (err != 0)
				807	break;
				808	}
				809
				810	lll_unlock (stack_cache_lock, LLL_PRIVATE);
				811
				812	return err;
				813	}
				814
				815
				816	/* In case of a fork() call the memory allocation in the child will be
				817	the same but only one thread is running. All stacks except that of
				818	the one running thread are not used anymore. We have to recycle
				819	them. */
				820	void
				821	__reclaim_stacks (void)
				822	{
				823	struct pthread self = (struct pthread ) THREAD_SELF;
				824
				825	/* No locking necessary. The caller is the only stack in use. But
				826	we have to be aware that we might have interrupted a list
				827	operation. */
				828
				829	if (in_flight_stack != 0)
				830	{
				831	bool add_p = in_flight_stack & 1;
				832	list_t elem = (list_t ) (in_flight_stack & ~(uintptr_t) 1);
				833
				834	if (add_p)
				835	{
				836	/* We always add at the beginning of the list. So in this case we
				837	only need to check the beginning of these lists to see if the
				838	pointers at the head of the list are inconsistent. */
				839	list_t *l = NULL;
				840
				841	if (stack_used.next->prev != &stack_used)
				842	l = &stack_used;
				843	else if (stack_cache.next->prev != &stack_cache)
				844	l = &stack_cache;
				845
				846	if (l != NULL)
				847	{
				848	assert (l->next->prev == elem);
				849	elem->next = l->next;
				850	elem->prev = l;
				851	l->next = elem;
				852	}
				853	}
				854	else
				855	{
				856	/* We can simply always replay the delete operation. */
				857	elem->next->prev = elem->prev;
				858	elem->prev->next = elem->next;
				859	}
				860	}
				861
				862	/* Mark all stacks except the still running one as free. */
				863	list_t *runp;
				864	list_for_each (runp, &stack_used)
				865	{
				866	struct pthread *curp = list_entry (runp, struct pthread, list);
				867	if (curp != self)
				868	{
				869	/* This marks the stack as free. */
				870	curp->tid = 0;
				871
				872	/* The PID field must be initialized for the new process. */
				873	curp->pid = self->pid;
				874
				875	/* Account for the size of the stack. */
				876	stack_cache_actsize += curp->stackblock_size;
				877
				878	if (curp->specific_used)
				879	{
				880	/* Clear the thread-specific data. */
				881	memset (curp->specific_1stblock, '\0',
				882	sizeof (curp->specific_1stblock));
				883
				884	curp->specific_used = false;
				885
				886	for (size_t cnt = 1; cnt < PTHREAD_KEY_1STLEVEL_SIZE; ++cnt)
				887	if (curp->specific[cnt] != NULL)
				888	{
				889	memset (curp->specific[cnt], '\0',
				890	sizeof (curp->specific_1stblock));
				891
				892	/* We have allocated the block which we do not
				893	free here so re-set the bit. */
				894	curp->specific_used = true;
				895	}
				896	}
				897	}
				898	}
				899
				900	/* Reset the PIDs in any cached stacks. */
				901	list_for_each (runp, &stack_cache)
				902	{
				903	struct pthread *curp = list_entry (runp, struct pthread, list);
				904	curp->pid = self->pid;
				905	}
				906
				907	/* Add the stack of all running threads to the cache. */
				908	list_splice (&stack_used, &stack_cache);
				909
				910	/* Remove the entry for the current thread to from the cache list
				911	and add it to the list of running threads. Which of the two
				912	lists is decided by the user_stack flag. */
				913	stack_list_del (&self->list);
				914
				915	/* Re-initialize the lists for all the threads. */
				916	INIT_LIST_HEAD (&stack_used);
				917	INIT_LIST_HEAD (&__stack_user);
				918
				919	if (__glibc_unlikely (THREAD_GETMEM (self, user_stack)))
				920	list_add (&self->list, &__stack_user);
				921	else
				922	list_add (&self->list, &stack_used);
				923
				924	/* There is one thread running. */
				925	__nptl_nthreads = 1;
				926
				927	in_flight_stack = 0;
				928
				929	/* Initialize locks. */
				930	stack_cache_lock = LLL_LOCK_INITIALIZER;
				931	__default_pthread_attr_lock = LLL_LOCK_INITIALIZER;
				932	}
				933
				934
				935	#if HP_TIMING_AVAIL
				936	# undef __find_thread_by_id
				937	/* Find a thread given the thread ID. */
				938	attribute_hidden
				939	struct pthread *
				940	__find_thread_by_id (pid_t tid)
				941	{
				942	struct pthread *result = NULL;
				943
				944	lll_lock (stack_cache_lock, LLL_PRIVATE);
				945
				946	/* Iterate over the list with system-allocated threads first. */
				947	list_t *runp;
				948	list_for_each (runp, &stack_used)
				949	{
				950	struct pthread *curp;
				951
				952	curp = list_entry (runp, struct pthread, list);
				953
				954	if (curp->tid == tid)
				955	{
				956	result = curp;
				957	goto out;
				958	}
				959	}
				960
				961	/* Now the list with threads using user-allocated stacks. */
				962	list_for_each (runp, &__stack_user)
				963	{
				964	struct pthread *curp;
				965
				966	curp = list_entry (runp, struct pthread, list);
				967
				968	if (curp->tid == tid)
				969	{
				970	result = curp;
				971	goto out;
				972	}
				973	}
				974
				975	out:
				976	lll_unlock (stack_cache_lock, LLL_PRIVATE);
				977
				978	return result;
				979	}
				980	#endif
				981
				982
				983	#ifdef SIGSETXID
				984	static void
				985	internal_function
				986	setxid_mark_thread (struct xid_command cmdp, struct pthread t)
				987	{
				988	int ch;
				989
				990	/* Wait until this thread is cloned. */
				991	if (t->setxid_futex == -1
				992	&& ! atomic_compare_and_exchange_bool_acq (&t->setxid_futex, -2, -1))
				993	do
				994	futex_wait_simple (&t->setxid_futex, -2, FUTEX_PRIVATE);
				995	while (t->setxid_futex == -2);
				996
				997	/* Don't let the thread exit before the setxid handler runs. */
				998	t->setxid_futex = 0;
				999
				1000	do
				1001	{
				1002	ch = t->cancelhandling;
				1003
				1004	/* If the thread is exiting right now, ignore it. */
				1005	if ((ch & EXITING_BITMASK) != 0)
				1006	{
				1007	/* Release the futex if there is no other setxid in
				1008	progress. */
				1009	if ((ch & SETXID_BITMASK) == 0)
				1010	{
				1011	t->setxid_futex = 1;
				1012	futex_wake (&t->setxid_futex, 1, FUTEX_PRIVATE);
				1013	}
				1014	return;
				1015	}
				1016	}
				1017	while (atomic_compare_and_exchange_bool_acq (&t->cancelhandling,
				1018	ch \| SETXID_BITMASK, ch));
				1019	}
				1020
				1021
				1022	static void
				1023	internal_function
				1024	setxid_unmark_thread (struct xid_command cmdp, struct pthread t)
				1025	{
				1026	int ch;
				1027
				1028	do
				1029	{
				1030	ch = t->cancelhandling;
				1031	if ((ch & SETXID_BITMASK) == 0)
				1032	return;
				1033	}
				1034	while (atomic_compare_and_exchange_bool_acq (&t->cancelhandling,
				1035	ch & ~SETXID_BITMASK, ch));
				1036
				1037	/* Release the futex just in case. */
				1038	t->setxid_futex = 1;
				1039	futex_wake (&t->setxid_futex, 1, FUTEX_PRIVATE);
				1040	}
				1041
				1042
				1043	static int
				1044	internal_function
				1045	setxid_signal_thread (struct xid_command cmdp, struct pthread t)
				1046	{
				1047	if ((t->cancelhandling & SETXID_BITMASK) == 0)
				1048	return 0;
				1049
				1050	int val;
				1051	INTERNAL_SYSCALL_DECL (err);
				1052	val = INTERNAL_SYSCALL (tgkill, err, 3, THREAD_GETMEM (THREAD_SELF, pid),
				1053	t->tid, SIGSETXID);
				1054
				1055	/* If this failed, it must have had not started yet or else exited. */
				1056	if (!INTERNAL_SYSCALL_ERROR_P (val, err))
				1057	{
				1058	atomic_increment (&cmdp->cntr);
				1059	return 1;
				1060	}
				1061	else
				1062	return 0;
				1063	}
				1064
				1065	/* Check for consistency across set*id system call results. The abort
				1066	should not happen as long as all privileges changes happen through
				1067	the glibc wrappers. ERROR must be 0 (no error) or an errno
				1068	code. */
				1069	void
				1070	attribute_hidden
				1071	__nptl_setxid_error (struct xid_command *cmdp, int error)
				1072	{
				1073	do
				1074	{
				1075	int olderror = cmdp->error;
				1076	if (olderror == error)
				1077	break;
				1078	if (olderror != -1)
				1079	/* Mismatch between current and previous results. */
				1080	abort ();
				1081	}
				1082	while (atomic_compare_and_exchange_bool_acq (&cmdp->error, error, -1));
				1083	}
				1084
				1085	int
				1086	attribute_hidden
				1087	__nptl_setxid (struct xid_command *cmdp)
				1088	{
				1089	int signalled;
				1090	int result;
				1091	lll_lock (stack_cache_lock, LLL_PRIVATE);
				1092
				1093	__xidcmd = cmdp;
				1094	cmdp->cntr = 0;
				1095	cmdp->error = -1;
				1096
				1097	struct pthread *self = THREAD_SELF;
				1098
				1099	/* Iterate over the list with system-allocated threads first. */
				1100	list_t *runp;
				1101	list_for_each (runp, &stack_used)
				1102	{
				1103	struct pthread *t = list_entry (runp, struct pthread, list);
				1104	if (t == self)
				1105	continue;
				1106
				1107	setxid_mark_thread (cmdp, t);
				1108	}
				1109
				1110	/* Now the list with threads using user-allocated stacks. */
				1111	list_for_each (runp, &__stack_user)
				1112	{
				1113	struct pthread *t = list_entry (runp, struct pthread, list);
				1114	if (t == self)
				1115	continue;
				1116
				1117	setxid_mark_thread (cmdp, t);
				1118	}
				1119
				1120	/* Iterate until we don't succeed in signalling anyone. That means
				1121	we have gotten all running threads, and their children will be
				1122	automatically correct once started. */
				1123	do
				1124	{
				1125	signalled = 0;
				1126
				1127	list_for_each (runp, &stack_used)
				1128	{
				1129	struct pthread *t = list_entry (runp, struct pthread, list);
				1130	if (t == self)
				1131	continue;
				1132
				1133	signalled += setxid_signal_thread (cmdp, t);
				1134	}
				1135
				1136	list_for_each (runp, &__stack_user)
				1137	{
				1138	struct pthread *t = list_entry (runp, struct pthread, list);
				1139	if (t == self)
				1140	continue;
				1141
				1142	signalled += setxid_signal_thread (cmdp, t);
				1143	}
				1144
				1145	int cur = cmdp->cntr;
				1146	while (cur != 0)
				1147	{
				1148	futex_wait_simple ((unsigned int *) &cmdp->cntr, cur,
				1149	FUTEX_PRIVATE);
				1150	cur = cmdp->cntr;
				1151	}
				1152	}
				1153	while (signalled != 0);
				1154
				1155	/* Clean up flags, so that no thread blocks during exit waiting
				1156	for a signal which will never come. */
				1157	list_for_each (runp, &stack_used)
				1158	{
				1159	struct pthread *t = list_entry (runp, struct pthread, list);
				1160	if (t == self)
				1161	continue;
				1162
				1163	setxid_unmark_thread (cmdp, t);
				1164	}
				1165
				1166	list_for_each (runp, &__stack_user)
				1167	{
				1168	struct pthread *t = list_entry (runp, struct pthread, list);
				1169	if (t == self)
				1170	continue;
				1171
				1172	setxid_unmark_thread (cmdp, t);
				1173	}
				1174
				1175	/* This must be last, otherwise the current thread might not have
				1176	permissions to send SIGSETXID syscall to the other threads. */
				1177	INTERNAL_SYSCALL_DECL (err);
				1178	result = INTERNAL_SYSCALL_NCS (cmdp->syscall_no, err, 3,
				1179	cmdp->id[0], cmdp->id[1], cmdp->id[2]);
				1180	int error = 0;
				1181	if (__glibc_unlikely (INTERNAL_SYSCALL_ERROR_P (result, err)))
				1182	{
				1183	error = INTERNAL_SYSCALL_ERRNO (result, err);
				1184	__set_errno (error);
				1185	result = -1;
				1186	}
				1187	__nptl_setxid_error (cmdp, error);
				1188
				1189	lll_unlock (stack_cache_lock, LLL_PRIVATE);
				1190	return result;
				1191	}
				1192	#endif /* SIGSETXID. */
				1193
				1194
				1195	static inline void __attribute__((always_inline))
				1196	init_one_static_tls (struct pthread curp, struct link_map map)
				1197	{
				1198	# if TLS_TCB_AT_TP
				1199	void dest = (char ) curp - map->l_tls_offset;
				1200	# elif TLS_DTV_AT_TP
				1201	void dest = (char ) curp + map->l_tls_offset + TLS_PRE_TCB_SIZE;
				1202	# else
				1203	# error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
				1204	# endif
				1205
				1206	/* We cannot delay the initialization of the Static TLS area, since
				1207	it can be accessed with LE or IE, but since the DTV is only used
				1208	by GD and LD, we can delay its update to avoid a race. */
				1209	memset (__mempcpy (dest, map->l_tls_initimage, map->l_tls_initimage_size),
				1210	'\0', map->l_tls_blocksize - map->l_tls_initimage_size);
				1211	}
				1212
				1213	void
				1214	attribute_hidden
				1215	__pthread_init_static_tls (struct link_map *map)
				1216	{
				1217	lll_lock (stack_cache_lock, LLL_PRIVATE);
				1218
				1219	/* Iterate over the list with system-allocated threads first. */
				1220	list_t *runp;
				1221	list_for_each (runp, &stack_used)
				1222	init_one_static_tls (list_entry (runp, struct pthread, list), map);
				1223
				1224	/* Now the list with threads using user-allocated stacks. */
				1225	list_for_each (runp, &__stack_user)
				1226	init_one_static_tls (list_entry (runp, struct pthread, list), map);
				1227
				1228	lll_unlock (stack_cache_lock, LLL_PRIVATE);
				1229	}
				1230
				1231
				1232	void
				1233	attribute_hidden
				1234	__wait_lookup_done (void)
				1235	{
				1236	lll_lock (stack_cache_lock, LLL_PRIVATE);
				1237
				1238	struct pthread *self = THREAD_SELF;
				1239
				1240	/* Iterate over the list with system-allocated threads first. */
				1241	list_t *runp;
				1242	list_for_each (runp, &stack_used)
				1243	{
				1244	struct pthread *t = list_entry (runp, struct pthread, list);
				1245	if (t == self \|\| t->header.gscope_flag == THREAD_GSCOPE_FLAG_UNUSED)
				1246	continue;
				1247
				1248	int *const gscope_flagp = &t->header.gscope_flag;
				1249
				1250	/* We have to wait until this thread is done with the global
				1251	scope. First tell the thread that we are waiting and
				1252	possibly have to be woken. */
				1253	if (atomic_compare_and_exchange_bool_acq (gscope_flagp,
				1254	THREAD_GSCOPE_FLAG_WAIT,
				1255	THREAD_GSCOPE_FLAG_USED))
				1256	continue;
				1257
				1258	do
				1259	futex_wait_simple ((unsigned int *) gscope_flagp,
				1260	THREAD_GSCOPE_FLAG_WAIT, FUTEX_PRIVATE);
				1261	while (*gscope_flagp == THREAD_GSCOPE_FLAG_WAIT);
				1262	}
				1263
				1264	/* Now the list with threads using user-allocated stacks. */
				1265	list_for_each (runp, &__stack_user)
				1266	{
				1267	struct pthread *t = list_entry (runp, struct pthread, list);
				1268	if (t == self \|\| t->header.gscope_flag == THREAD_GSCOPE_FLAG_UNUSED)
				1269	continue;
				1270
				1271	int *const gscope_flagp = &t->header.gscope_flag;
				1272
				1273	/* We have to wait until this thread is done with the global
				1274	scope. First tell the thread that we are waiting and
				1275	possibly have to be woken. */
				1276	if (atomic_compare_and_exchange_bool_acq (gscope_flagp,
				1277	THREAD_GSCOPE_FLAG_WAIT,
				1278	THREAD_GSCOPE_FLAG_USED))
				1279	continue;
				1280
				1281	do
				1282	futex_wait_simple ((unsigned int *) gscope_flagp,
				1283	THREAD_GSCOPE_FLAG_WAIT, FUTEX_PRIVATE);
				1284	while (*gscope_flagp == THREAD_GSCOPE_FLAG_WAIT);
				1285	}
				1286
				1287	lll_unlock (stack_cache_lock, LLL_PRIVATE);
				1288	}