Blame - ap/libc/glibc/glibc-2.22/nptl/allocatestack.c - T106_DC

blob: c56a4df12c06c986b863a98b349c2d73a8a9f815 [file] [log] [blame]

lh	9ed821d	2023-04-07 01:36:19 -0700	[diff] [blame^]	1	/* Copyright (C) 2002-2015 Free Software Foundation, Inc.
				2	This file is part of the GNU C Library.
				3	Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
				4
				5	The GNU C Library is free software; you can redistribute it and/or
				6	modify it under the terms of the GNU Lesser General Public
				7	License as published by the Free Software Foundation; either
				8	version 2.1 of the License, or (at your option) any later version.
				9
				10	The GNU C Library is distributed in the hope that it will be useful,
				11	but WITHOUT ANY WARRANTY; without even the implied warranty of
				12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				13	Lesser General Public License for more details.
				14
				15	You should have received a copy of the GNU Lesser General Public
				16	License along with the GNU C Library; if not, see
				17	<http://www.gnu.org/licenses/>. */
				18
				19	#include <assert.h>
				20	#include <errno.h>
				21	#include <signal.h>
				22	#include <stdint.h>
				23	#include <string.h>
				24	#include <unistd.h>
				25	#include <sys/mman.h>
				26	#include <sys/param.h>
				27	#include <dl-sysdep.h>
				28	#include <dl-tls.h>
				29	#include <tls.h>
				30	#include <list.h>
				31	#include <lowlevellock.h>
				32	#include <futex-internal.h>
				33	#include <kernel-features.h>
				34	#include <stack-aliasing.h>
				35
				36
				37	#ifndef NEED_SEPARATE_REGISTER_STACK
				38
				39	/* Most architectures have exactly one stack pointer. Some have more. */
				40	# define STACK_VARIABLES void *stackaddr = NULL
				41
				42	/* How to pass the values to the 'create_thread' function. */
				43	# define STACK_VARIABLES_ARGS stackaddr
				44
				45	/* How to declare function which gets there parameters. */
				46	# define STACK_VARIABLES_PARMS void *stackaddr
				47
				48	/* How to declare allocate_stack. */
				49	# define ALLOCATE_STACK_PARMS void **stack
				50
				51	/* This is how the function is called. We do it this way to allow
				52	other variants of the function to have more parameters. */
				53	# define ALLOCATE_STACK(attr, pd) allocate_stack (attr, pd, &stackaddr)
				54
				55	#else
				56
				57	/* We need two stacks. The kernel will place them but we have to tell
				58	the kernel about the size of the reserved address space. */
				59	# define STACK_VARIABLES void *stackaddr = NULL; size_t stacksize = 0
				60
				61	/* How to pass the values to the 'create_thread' function. */
				62	# define STACK_VARIABLES_ARGS stackaddr, stacksize
				63
				64	/* How to declare function which gets there parameters. */
				65	# define STACK_VARIABLES_PARMS void *stackaddr, size_t stacksize
				66
				67	/* How to declare allocate_stack. */
				68	# define ALLOCATE_STACK_PARMS void *stack, size_t stacksize
				69
				70	/* This is how the function is called. We do it this way to allow
				71	other variants of the function to have more parameters. */
				72	# define ALLOCATE_STACK(attr, pd) \
				73	allocate_stack (attr, pd, &stackaddr, &stacksize)
				74
				75	#endif
				76
				77
				78	/* Default alignment of stack. */
				79	#ifndef STACK_ALIGN
				80	# define STACK_ALIGN __alignof__ (long double)
				81	#endif
				82
				83	/* Default value for minimal stack size after allocating thread
				84	descriptor and guard. */
				85	#ifndef MINIMAL_REST_STACK
				86	# define MINIMAL_REST_STACK 4096
				87	#endif
				88
				89
				90	/* Newer kernels have the MAP_STACK flag to indicate a mapping is used for
				91	a stack. Use it when possible. */
				92	#ifndef MAP_STACK
				93	# define MAP_STACK 0
				94	#endif
				95
				96	/* This yields the pointer that TLS support code calls the thread pointer. */
				97	#if TLS_TCB_AT_TP
				98	# define TLS_TPADJ(pd) (pd)
				99	#elif TLS_DTV_AT_TP
				100	# define TLS_TPADJ(pd) ((struct pthread )((char ) (pd) + TLS_PRE_TCB_SIZE))
				101	#endif
				102
				103	/* Cache handling for not-yet free stacks. */
				104
				105	/* Maximum size in kB of cache. */
				106	static size_t stack_cache_maxsize = 40 * 1024 * 1024; /* 40MiBi by default. */
				107	static size_t stack_cache_actsize;
				108
				109	/* Mutex protecting this variable. */
				110	static int stack_cache_lock = LLL_LOCK_INITIALIZER;
				111
				112	/* List of queued stack frames. */
				113	static LIST_HEAD (stack_cache);
				114
				115	/* List of the stacks in use. */
				116	static LIST_HEAD (stack_used);
				117
				118	/* We need to record what list operations we are going to do so that,
				119	in case of an asynchronous interruption due to a fork() call, we
				120	can correct for the work. */
				121	static uintptr_t in_flight_stack;
				122
				123	/* List of the threads with user provided stacks in use. No need to
				124	initialize this, since it's done in __pthread_initialize_minimal. */
				125	list_t __stack_user __attribute__ ((nocommon));
				126	hidden_data_def (__stack_user)
				127
				128	#if COLORING_INCREMENT != 0
				129	/* Number of threads created. */
				130	static unsigned int nptl_ncreated;
				131	#endif
				132
				133
				134	/* Check whether the stack is still used or not. */
				135	#define FREE_P(descr) ((descr)->tid <= 0)
				136
				137
				138	static void
				139	stack_list_del (list_t *elem)
				140	{
				141	in_flight_stack = (uintptr_t) elem;
				142
				143	atomic_write_barrier ();
				144
				145	list_del (elem);
				146
				147	atomic_write_barrier ();
				148
				149	in_flight_stack = 0;
				150	}
				151
				152
				153	static void
				154	stack_list_add (list_t elem, list_t list)
				155	{
				156	in_flight_stack = (uintptr_t) elem \| 1;
				157
				158	atomic_write_barrier ();
				159
				160	list_add (elem, list);
				161
				162	atomic_write_barrier ();
				163
				164	in_flight_stack = 0;
				165	}
				166
				167
				168	/* We create a double linked list of all cache entries. Double linked
				169	because this allows removing entries from the end. */
				170
				171
				172	/* Get a stack frame from the cache. We have to match by size since
				173	some blocks might be too small or far too large. */
				174	static struct pthread *
				175	get_cached_stack (size_t sizep, void *memp)
				176	{
				177	size_t size = *sizep;
				178	struct pthread *result = NULL;
				179	list_t *entry;
				180
				181	lll_lock (stack_cache_lock, LLL_PRIVATE);
				182
				183	/* Search the cache for a matching entry. We search for the
				184	smallest stack which has at least the required size. Note that
				185	in normal situations the size of all allocated stacks is the
				186	same. As the very least there are only a few different sizes.
				187	Therefore this loop will exit early most of the time with an
				188	exact match. */
				189	list_for_each (entry, &stack_cache)
				190	{
				191	struct pthread *curr;
				192
				193	curr = list_entry (entry, struct pthread, list);
				194	if (FREE_P (curr) && curr->stackblock_size >= size)
				195	{
				196	if (curr->stackblock_size == size)
				197	{
				198	result = curr;
				199	break;
				200	}
				201
				202	if (result == NULL
				203	\|\| result->stackblock_size > curr->stackblock_size)
				204	result = curr;
				205	}
				206	}
				207
				208	if (__builtin_expect (result == NULL, 0)
				209	/* Make sure the size difference is not too excessive. In that
				210	case we do not use the block. */
				211	\|\| __builtin_expect (result->stackblock_size > 4 * size, 0))
				212	{
				213	/* Release the lock. */
				214	lll_unlock (stack_cache_lock, LLL_PRIVATE);
				215
				216	return NULL;
				217	}
				218
				219	/* Don't allow setxid until cloned. */
				220	result->setxid_futex = -1;
				221
				222	/* Dequeue the entry. */
				223	stack_list_del (&result->list);
				224
				225	/* And add to the list of stacks in use. */
				226	stack_list_add (&result->list, &stack_used);
				227
				228	/* And decrease the cache size. */
				229	stack_cache_actsize -= result->stackblock_size;
				230
				231	/* Release the lock early. */
				232	lll_unlock (stack_cache_lock, LLL_PRIVATE);
				233
				234	/* Report size and location of the stack to the caller. */
				235	*sizep = result->stackblock_size;
				236	*memp = result->stackblock;
				237
				238	/* Cancellation handling is back to the default. */
				239	result->cancelhandling = 0;
				240	result->cleanup = NULL;
				241
				242	/* No pending event. */
				243	result->nextevent = NULL;
				244
				245	/* Clear the DTV. */
				246	dtv_t *dtv = GET_DTV (TLS_TPADJ (result));
				247	for (size_t cnt = 0; cnt < dtv[-1].counter; ++cnt)
				248	if (! dtv[1 + cnt].pointer.is_static
				249	&& dtv[1 + cnt].pointer.val != TLS_DTV_UNALLOCATED)
				250	free (dtv[1 + cnt].pointer.val);
				251	memset (dtv, '\0', (dtv[-1].counter + 1) * sizeof (dtv_t));
				252
				253	/* Re-initialize the TLS. */
				254	_dl_allocate_tls_init (TLS_TPADJ (result));
				255
				256	return result;
				257	}
				258
				259
				260	/* Free stacks until cache size is lower than LIMIT. */
				261	void
				262	__free_stacks (size_t limit)
				263	{
				264	/* We reduce the size of the cache. Remove the last entries until
				265	the size is below the limit. */
				266	list_t *entry;
				267	list_t *prev;
				268
				269	/* Search from the end of the list. */
				270	list_for_each_prev_safe (entry, prev, &stack_cache)
				271	{
				272	struct pthread *curr;
				273
				274	curr = list_entry (entry, struct pthread, list);
				275	if (FREE_P (curr))
				276	{
				277	/* Unlink the block. */
				278	stack_list_del (entry);
				279
				280	/* Account for the freed memory. */
				281	stack_cache_actsize -= curr->stackblock_size;
				282
				283	/* Free the memory associated with the ELF TLS. */
				284	_dl_deallocate_tls (TLS_TPADJ (curr), false);
				285
				286	/* Remove this block. This should never fail. If it does
				287	something is really wrong. */
				288	if (munmap (curr->stackblock, curr->stackblock_size) != 0)
				289	abort ();
				290
				291	/* Maybe we have freed enough. */
				292	if (stack_cache_actsize <= limit)
				293	break;
				294	}
				295	}
				296	}
				297
				298
				299	/* Add a stack frame which is not used anymore to the stack. Must be
				300	called with the cache lock held. */
				301	static inline void
				302	__attribute ((always_inline))
				303	queue_stack (struct pthread *stack)
				304	{
				305	/* We unconditionally add the stack to the list. The memory may
				306	still be in use but it will not be reused until the kernel marks
				307	the stack as not used anymore. */
				308	stack_list_add (&stack->list, &stack_cache);
				309
				310	stack_cache_actsize += stack->stackblock_size;
				311	if (__glibc_unlikely (stack_cache_actsize > stack_cache_maxsize))
				312	__free_stacks (stack_cache_maxsize);
				313	}
				314
				315
				316	static int
				317	internal_function
				318	change_stack_perm (struct pthread *pd
				319	#ifdef NEED_SEPARATE_REGISTER_STACK
				320	, size_t pagemask
				321	#endif
				322	)
				323	{
				324	#ifdef NEED_SEPARATE_REGISTER_STACK
				325	void *stack = (pd->stackblock
				326	+ (((((pd->stackblock_size - pd->guardsize) / 2)
				327	& pagemask) + pd->guardsize) & pagemask));
				328	size_t len = pd->stackblock + pd->stackblock_size - stack;
				329	#elif _STACK_GROWS_DOWN
				330	void *stack = pd->stackblock + pd->guardsize;
				331	size_t len = pd->stackblock_size - pd->guardsize;
				332	#elif _STACK_GROWS_UP
				333	void *stack = pd->stackblock;
				334	size_t len = (uintptr_t) pd - pd->guardsize - (uintptr_t) pd->stackblock;
				335	#else
				336	# error "Define either _STACK_GROWS_DOWN or _STACK_GROWS_UP"
				337	#endif
				338	if (mprotect (stack, len, PROT_READ \| PROT_WRITE \| PROT_EXEC) != 0)
				339	return errno;
				340
				341	return 0;
				342	}
				343
				344
				345	/* Returns a usable stack for a new thread either by allocating a
				346	new stack or reusing a cached stack of sufficient size.
				347	ATTR must be non-NULL and point to a valid pthread_attr.
				348	PDP must be non-NULL. */
				349	static int
				350	allocate_stack (const struct pthread_attr attr, struct pthread *pdp,
				351	ALLOCATE_STACK_PARMS)
				352	{
				353	struct pthread *pd;
				354	size_t size;
				355	size_t pagesize_m1 = __getpagesize () - 1;
				356	void *stacktop;
				357
				358	assert (powerof2 (pagesize_m1 + 1));
				359	assert (TCB_ALIGNMENT >= STACK_ALIGN);
				360
				361	/* Get the stack size from the attribute if it is set. Otherwise we
				362	use the default we determined at start time. */
				363	if (attr->stacksize != 0)
				364	size = attr->stacksize;
				365	else
				366	{
				367	lll_lock (__default_pthread_attr_lock, LLL_PRIVATE);
				368	size = __default_pthread_attr.stacksize;
				369	lll_unlock (__default_pthread_attr_lock, LLL_PRIVATE);
				370	}
				371
				372	/* Get memory for the stack. */
				373	if (__glibc_unlikely (attr->flags & ATTR_FLAG_STACKADDR))
				374	{
				375	uintptr_t adj;
				376
				377	/* If the user also specified the size of the stack make sure it
				378	is large enough. */
				379	if (attr->stacksize != 0
				380	&& attr->stacksize < (__static_tls_size + MINIMAL_REST_STACK))
				381	return EINVAL;
				382
				383	/* Adjust stack size for alignment of the TLS block. */
				384	#if TLS_TCB_AT_TP
				385	adj = ((uintptr_t) attr->stackaddr - TLS_TCB_SIZE)
				386	& __static_tls_align_m1;
				387	assert (size > adj + TLS_TCB_SIZE);
				388	#elif TLS_DTV_AT_TP
				389	adj = ((uintptr_t) attr->stackaddr - __static_tls_size)
				390	& __static_tls_align_m1;
				391	assert (size > adj);
				392	#endif
				393
				394	/* The user provided some memory. Let's hope it matches the
				395	size... We do not allocate guard pages if the user provided
				396	the stack. It is the user's responsibility to do this if it
				397	is wanted. */
				398	#if TLS_TCB_AT_TP
				399	pd = (struct pthread *) ((uintptr_t) attr->stackaddr
				400	- TLS_TCB_SIZE - adj);
				401	#elif TLS_DTV_AT_TP
				402	pd = (struct pthread *) (((uintptr_t) attr->stackaddr
				403	- __static_tls_size - adj)
				404	- TLS_PRE_TCB_SIZE);
				405	#endif
				406
				407	/* The user provided stack memory needs to be cleared. */
				408	memset (pd, '\0', sizeof (struct pthread));
				409
				410	/* The first TSD block is included in the TCB. */
				411	pd->specific[0] = pd->specific_1stblock;
				412
				413	/* Remember the stack-related values. */
				414	pd->stackblock = (char *) attr->stackaddr - size;
				415	pd->stackblock_size = size;
				416
				417	/* This is a user-provided stack. It will not be queued in the
				418	stack cache nor will the memory (except the TLS memory) be freed. */
				419	pd->user_stack = true;
				420
				421	/* This is at least the second thread. */
				422	pd->header.multiple_threads = 1;
				423	#ifndef TLS_MULTIPLE_THREADS_IN_TCB
				424	__pthread_multiple_threads = *__libc_multiple_threads_ptr = 1;
				425	#endif
				426
				427	#ifndef __ASSUME_PRIVATE_FUTEX
				428	/* The thread must know when private futexes are supported. */
				429	pd->header.private_futex = THREAD_GETMEM (THREAD_SELF,
				430	header.private_futex);
				431	#endif
				432
				433	#ifdef NEED_DL_SYSINFO
				434	SETUP_THREAD_SYSINFO (pd);
				435	#endif
				436
				437	/* The process ID is also the same as that of the caller. */
				438	pd->pid = THREAD_GETMEM (THREAD_SELF, pid);
				439
				440	/* Don't allow setxid until cloned. */
				441	pd->setxid_futex = -1;
				442
				443	/* Allocate the DTV for this thread. */
				444	if (_dl_allocate_tls (TLS_TPADJ (pd)) == NULL)
				445	{
				446	/* Something went wrong. */
				447	assert (errno == ENOMEM);
				448	return errno;
				449	}
				450
				451
				452	/* Prepare to modify global data. */
				453	lll_lock (stack_cache_lock, LLL_PRIVATE);
				454
				455	/* And add to the list of stacks in use. */
				456	list_add (&pd->list, &__stack_user);
				457
				458	lll_unlock (stack_cache_lock, LLL_PRIVATE);
				459	}
				460	else
				461	{
				462	/* Allocate some anonymous memory. If possible use the cache. */
				463	size_t guardsize;
				464	size_t reqsize;
				465	void *mem;
				466	const int prot = (PROT_READ \| PROT_WRITE
				467	\| ((GL(dl_stack_flags) & PF_X) ? PROT_EXEC : 0));
				468
				469	#if COLORING_INCREMENT != 0
				470	/* Add one more page for stack coloring. Don't do it for stacks
				471	with 16 times pagesize or larger. This might just cause
				472	unnecessary misalignment. */
				473	if (size <= 16 * pagesize_m1)
				474	size += pagesize_m1 + 1;
				475	#endif
				476
				477	/* Adjust the stack size for alignment. */
				478	size &= ~__static_tls_align_m1;
				479	assert (size != 0);
				480
				481	/* Make sure the size of the stack is enough for the guard and
				482	eventually the thread descriptor. */
				483	guardsize = (attr->guardsize + pagesize_m1) & ~pagesize_m1;
				484	if (__builtin_expect (size < ((guardsize + __static_tls_size
				485	+ MINIMAL_REST_STACK + pagesize_m1)
				486	& ~pagesize_m1),
				487	0))
				488	/* The stack is too small (or the guard too large). */
				489	return EINVAL;
				490
				491	/* Try to get a stack from the cache. */
				492	reqsize = size;
				493	pd = get_cached_stack (&size, &mem);
				494	if (pd == NULL)
				495	{
				496	/* To avoid aliasing effects on a larger scale than pages we
				497	adjust the allocated stack size if necessary. This way
				498	allocations directly following each other will not have
				499	aliasing problems. */
				500	#if MULTI_PAGE_ALIASING != 0
				501	if ((size % MULTI_PAGE_ALIASING) == 0)
				502	size += pagesize_m1 + 1;
				503	#endif
				504
				505	mem = mmap (NULL, size, prot,
				506	MAP_PRIVATE \| MAP_ANONYMOUS \| MAP_STACK, -1, 0);
				507
				508	if (__glibc_unlikely (mem == MAP_FAILED))
				509	return errno;
				510
				511	/* SIZE is guaranteed to be greater than zero.
				512	So we can never get a null pointer back from mmap. */
				513	assert (mem != NULL);
				514
				515	#if COLORING_INCREMENT != 0
				516	/* Atomically increment NCREATED. */
				517	unsigned int ncreated = atomic_increment_val (&nptl_ncreated);
				518
				519	/* We chose the offset for coloring by incrementing it for
				520	every new thread by a fixed amount. The offset used
				521	module the page size. Even if coloring would be better
				522	relative to higher alignment values it makes no sense to
				523	do it since the mmap() interface does not allow us to
				524	specify any alignment for the returned memory block. */
				525	size_t coloring = (ncreated * COLORING_INCREMENT) & pagesize_m1;
				526
				527	/* Make sure the coloring offsets does not disturb the alignment
				528	of the TCB and static TLS block. */
				529	if (__glibc_unlikely ((coloring & __static_tls_align_m1) != 0))
				530	coloring = (((coloring + __static_tls_align_m1)
				531	& ~(__static_tls_align_m1))
				532	& ~pagesize_m1);
				533	#else
				534	/* Unless specified we do not make any adjustments. */
				535	# define coloring 0
				536	#endif
				537
				538	/* Place the thread descriptor at the end of the stack. */
				539	#if TLS_TCB_AT_TP
				540	pd = (struct pthread ) ((char ) mem + size - coloring) - 1;
				541	#elif TLS_DTV_AT_TP
				542	pd = (struct pthread *) ((((uintptr_t) mem + size - coloring
				543	- __static_tls_size)
				544	& ~__static_tls_align_m1)
				545	- TLS_PRE_TCB_SIZE);
				546	#endif
				547
				548	/* Remember the stack-related values. */
				549	pd->stackblock = mem;
				550	pd->stackblock_size = size;
				551
				552	/* We allocated the first block thread-specific data array.
				553	This address will not change for the lifetime of this
				554	descriptor. */
				555	pd->specific[0] = pd->specific_1stblock;
				556
				557	/* This is at least the second thread. */
				558	pd->header.multiple_threads = 1;
				559	#ifndef TLS_MULTIPLE_THREADS_IN_TCB
				560	__pthread_multiple_threads = *__libc_multiple_threads_ptr = 1;
				561	#endif
				562
				563	#ifndef __ASSUME_PRIVATE_FUTEX
				564	/* The thread must know when private futexes are supported. */
				565	pd->header.private_futex = THREAD_GETMEM (THREAD_SELF,
				566	header.private_futex);
				567	#endif
				568
				569	#ifdef NEED_DL_SYSINFO
				570	SETUP_THREAD_SYSINFO (pd);
				571	#endif
				572
				573	/* Don't allow setxid until cloned. */
				574	pd->setxid_futex = -1;
				575
				576	/* The process ID is also the same as that of the caller. */
				577	pd->pid = THREAD_GETMEM (THREAD_SELF, pid);
				578
				579	/* Allocate the DTV for this thread. */
				580	if (_dl_allocate_tls (TLS_TPADJ (pd)) == NULL)
				581	{
				582	/* Something went wrong. */
				583	assert (errno == ENOMEM);
				584
				585	/* Free the stack memory we just allocated. */
				586	(void) munmap (mem, size);
				587
				588	return errno;
				589	}
				590
				591
				592	/* Prepare to modify global data. */
				593	lll_lock (stack_cache_lock, LLL_PRIVATE);
				594
				595	/* And add to the list of stacks in use. */
				596	stack_list_add (&pd->list, &stack_used);
				597
				598	lll_unlock (stack_cache_lock, LLL_PRIVATE);
				599
				600
				601	/* There might have been a race. Another thread might have
				602	caused the stacks to get exec permission while this new
				603	stack was prepared. Detect if this was possible and
				604	change the permission if necessary. */
				605	if (__builtin_expect ((GL(dl_stack_flags) & PF_X) != 0
				606	&& (prot & PROT_EXEC) == 0, 0))
				607	{
				608	int err = change_stack_perm (pd
				609	#ifdef NEED_SEPARATE_REGISTER_STACK
				610	, ~pagesize_m1
				611	#endif
				612	);
				613	if (err != 0)
				614	{
				615	/* Free the stack memory we just allocated. */
				616	(void) munmap (mem, size);
				617
				618	return err;
				619	}
				620	}
				621
				622
				623	/* Note that all of the stack and the thread descriptor is
				624	zeroed. This means we do not have to initialize fields
				625	with initial value zero. This is specifically true for
				626	the 'tid' field which is always set back to zero once the
				627	stack is not used anymore and for the 'guardsize' field
				628	which will be read next. */
				629	}
				630
				631	/* Create or resize the guard area if necessary. */
				632	if (__glibc_unlikely (guardsize > pd->guardsize))
				633	{
				634	#ifdef NEED_SEPARATE_REGISTER_STACK
				635	char *guard = mem + (((size - guardsize) / 2) & ~pagesize_m1);
				636	#elif _STACK_GROWS_DOWN
				637	char *guard = mem;
				638	# elif _STACK_GROWS_UP
				639	char guard = (char ) (((uintptr_t) pd - guardsize) & ~pagesize_m1);
				640	#endif
				641	if (mprotect (guard, guardsize, PROT_NONE) != 0)
				642	{
				643	mprot_error:
				644	lll_lock (stack_cache_lock, LLL_PRIVATE);
				645
				646	/* Remove the thread from the list. */
				647	stack_list_del (&pd->list);
				648
				649	lll_unlock (stack_cache_lock, LLL_PRIVATE);
				650
				651	/* Get rid of the TLS block we allocated. */
				652	_dl_deallocate_tls (TLS_TPADJ (pd), false);
				653
				654	/* Free the stack memory regardless of whether the size
				655	of the cache is over the limit or not. If this piece
				656	of memory caused problems we better do not use it
				657	anymore. Uh, and we ignore possible errors. There
				658	is nothing we could do. */
				659	(void) munmap (mem, size);
				660
				661	return errno;
				662	}
				663
				664	pd->guardsize = guardsize;
				665	}
				666	else if (__builtin_expect (pd->guardsize - guardsize > size - reqsize,
				667	0))
				668	{
				669	/* The old guard area is too large. */
				670
				671	#ifdef NEED_SEPARATE_REGISTER_STACK
				672	char *guard = mem + (((size - guardsize) / 2) & ~pagesize_m1);
				673	char *oldguard = mem + (((size - pd->guardsize) / 2) & ~pagesize_m1);
				674
				675	if (oldguard < guard
				676	&& mprotect (oldguard, guard - oldguard, prot) != 0)
				677	goto mprot_error;
				678
				679	if (mprotect (guard + guardsize,
				680	oldguard + pd->guardsize - guard - guardsize,
				681	prot) != 0)
				682	goto mprot_error;
				683	#elif _STACK_GROWS_DOWN
				684	if (mprotect ((char *) mem + guardsize, pd->guardsize - guardsize,
				685	prot) != 0)
				686	goto mprot_error;
				687	#elif _STACK_GROWS_UP
				688	if (mprotect ((char *) pd - pd->guardsize,
				689	pd->guardsize - guardsize, prot) != 0)
				690	goto mprot_error;
				691	#endif
				692
				693	pd->guardsize = guardsize;
				694	}
				695	/* The pthread_getattr_np() calls need to get passed the size
				696	requested in the attribute, regardless of how large the
				697	actually used guardsize is. */
				698	pd->reported_guardsize = guardsize;
				699	}
				700
				701	/* Initialize the lock. We have to do this unconditionally since the
				702	stillborn thread could be canceled while the lock is taken. */
				703	pd->lock = LLL_LOCK_INITIALIZER;
				704
				705	/* The robust mutex lists also need to be initialized
				706	unconditionally because the cleanup for the previous stack owner
				707	might have happened in the kernel. */
				708	pd->robust_head.futex_offset = (offsetof (pthread_mutex_t, __data.__lock)
				709	- offsetof (pthread_mutex_t,
				710	__data.__list.__next));
				711	pd->robust_head.list_op_pending = NULL;
				712	#ifdef __PTHREAD_MUTEX_HAVE_PREV
				713	pd->robust_prev = &pd->robust_head;
				714	#endif
				715	pd->robust_head.list = &pd->robust_head;
				716
				717	/* We place the thread descriptor at the end of the stack. */
				718	*pdp = pd;
				719
				720	#if TLS_TCB_AT_TP
				721	/* The stack begins before the TCB and the static TLS block. */
				722	stacktop = ((char *) (pd + 1) - __static_tls_size);
				723	#elif TLS_DTV_AT_TP
				724	stacktop = (char *) (pd - 1);
				725	#endif
				726
				727	#ifdef NEED_SEPARATE_REGISTER_STACK
				728	*stack = pd->stackblock;
				729	stacksize = stacktop - stack;
				730	#elif _STACK_GROWS_DOWN
				731	*stack = stacktop;
				732	#elif _STACK_GROWS_UP
				733	*stack = pd->stackblock;
				734	assert (*stack > 0);
				735	#endif
				736
				737	return 0;
				738	}
				739
				740
				741	void
				742	internal_function
				743	__deallocate_stack (struct pthread *pd)
				744	{
				745	lll_lock (stack_cache_lock, LLL_PRIVATE);
				746
				747	/* Remove the thread from the list of threads with user defined
				748	stacks. */
				749	stack_list_del (&pd->list);
				750
				751	/* Not much to do. Just free the mmap()ed memory. Note that we do
				752	not reset the 'used' flag in the 'tid' field. This is done by
				753	the kernel. If no thread has been created yet this field is
				754	still zero. */
				755	if (__glibc_likely (! pd->user_stack))
				756	(void) queue_stack (pd);
				757	else
				758	/* Free the memory associated with the ELF TLS. */
				759	_dl_deallocate_tls (TLS_TPADJ (pd), false);
				760
				761	lll_unlock (stack_cache_lock, LLL_PRIVATE);
				762	}
				763
				764
				765	int
				766	internal_function
				767	__make_stacks_executable (void **stack_endp)
				768	{
				769	/* First the main thread's stack. */
				770	int err = _dl_make_stack_executable (stack_endp);
				771	if (err != 0)
				772	return err;
				773
				774	#ifdef NEED_SEPARATE_REGISTER_STACK
				775	const size_t pagemask = ~(__getpagesize () - 1);
				776	#endif
				777
				778	lll_lock (stack_cache_lock, LLL_PRIVATE);
				779
				780	list_t *runp;
				781	list_for_each (runp, &stack_used)
				782	{
				783	err = change_stack_perm (list_entry (runp, struct pthread, list)
				784	#ifdef NEED_SEPARATE_REGISTER_STACK
				785	, pagemask
				786	#endif
				787	);
				788	if (err != 0)
				789	break;
				790	}
				791
				792	/* Also change the permission for the currently unused stacks. This
				793	might be wasted time but better spend it here than adding a check
				794	in the fast path. */
				795	if (err == 0)
				796	list_for_each (runp, &stack_cache)
				797	{
				798	err = change_stack_perm (list_entry (runp, struct pthread, list)
				799	#ifdef NEED_SEPARATE_REGISTER_STACK
				800	, pagemask
				801	#endif
				802	);
				803	if (err != 0)
				804	break;
				805	}
				806
				807	lll_unlock (stack_cache_lock, LLL_PRIVATE);
				808
				809	return err;
				810	}
				811
				812
				813	/* In case of a fork() call the memory allocation in the child will be
				814	the same but only one thread is running. All stacks except that of
				815	the one running thread are not used anymore. We have to recycle
				816	them. */
				817	void
				818	__reclaim_stacks (void)
				819	{
				820	struct pthread self = (struct pthread ) THREAD_SELF;
				821
				822	/* No locking necessary. The caller is the only stack in use. But
				823	we have to be aware that we might have interrupted a list
				824	operation. */
				825
				826	if (in_flight_stack != 0)
				827	{
				828	bool add_p = in_flight_stack & 1;
				829	list_t elem = (list_t ) (in_flight_stack & ~(uintptr_t) 1);
				830
				831	if (add_p)
				832	{
				833	/* We always add at the beginning of the list. So in this case we
				834	only need to check the beginning of these lists to see if the
				835	pointers at the head of the list are inconsistent. */
				836	list_t *l = NULL;
				837
				838	if (stack_used.next->prev != &stack_used)
				839	l = &stack_used;
				840	else if (stack_cache.next->prev != &stack_cache)
				841	l = &stack_cache;
				842
				843	if (l != NULL)
				844	{
				845	assert (l->next->prev == elem);
				846	elem->next = l->next;
				847	elem->prev = l;
				848	l->next = elem;
				849	}
				850	}
				851	else
				852	{
				853	/* We can simply always replay the delete operation. */
				854	elem->next->prev = elem->prev;
				855	elem->prev->next = elem->next;
				856	}
				857	}
				858
				859	/* Mark all stacks except the still running one as free. */
				860	list_t *runp;
				861	list_for_each (runp, &stack_used)
				862	{
				863	struct pthread *curp = list_entry (runp, struct pthread, list);
				864	if (curp != self)
				865	{
				866	/* This marks the stack as free. */
				867	curp->tid = 0;
				868
				869	/* The PID field must be initialized for the new process. */
				870	curp->pid = self->pid;
				871
				872	/* Account for the size of the stack. */
				873	stack_cache_actsize += curp->stackblock_size;
				874
				875	if (curp->specific_used)
				876	{
				877	/* Clear the thread-specific data. */
				878	memset (curp->specific_1stblock, '\0',
				879	sizeof (curp->specific_1stblock));
				880
				881	curp->specific_used = false;
				882
				883	for (size_t cnt = 1; cnt < PTHREAD_KEY_1STLEVEL_SIZE; ++cnt)
				884	if (curp->specific[cnt] != NULL)
				885	{
				886	memset (curp->specific[cnt], '\0',
				887	sizeof (curp->specific_1stblock));
				888
				889	/* We have allocated the block which we do not
				890	free here so re-set the bit. */
				891	curp->specific_used = true;
				892	}
				893	}
				894	}
				895	}
				896
				897	/* Reset the PIDs in any cached stacks. */
				898	list_for_each (runp, &stack_cache)
				899	{
				900	struct pthread *curp = list_entry (runp, struct pthread, list);
				901	curp->pid = self->pid;
				902	}
				903
				904	/* Add the stack of all running threads to the cache. */
				905	list_splice (&stack_used, &stack_cache);
				906
				907	/* Remove the entry for the current thread to from the cache list
				908	and add it to the list of running threads. Which of the two
				909	lists is decided by the user_stack flag. */
				910	stack_list_del (&self->list);
				911
				912	/* Re-initialize the lists for all the threads. */
				913	INIT_LIST_HEAD (&stack_used);
				914	INIT_LIST_HEAD (&__stack_user);
				915
				916	if (__glibc_unlikely (THREAD_GETMEM (self, user_stack)))
				917	list_add (&self->list, &__stack_user);
				918	else
				919	list_add (&self->list, &stack_used);
				920
				921	/* There is one thread running. */
				922	__nptl_nthreads = 1;
				923
				924	in_flight_stack = 0;
				925
				926	/* Initialize locks. */
				927	stack_cache_lock = LLL_LOCK_INITIALIZER;
				928	__default_pthread_attr_lock = LLL_LOCK_INITIALIZER;
				929	}
				930
				931
				932	#if HP_TIMING_AVAIL
				933	# undef __find_thread_by_id
				934	/* Find a thread given the thread ID. */
				935	attribute_hidden
				936	struct pthread *
				937	__find_thread_by_id (pid_t tid)
				938	{
				939	struct pthread *result = NULL;
				940
				941	lll_lock (stack_cache_lock, LLL_PRIVATE);
				942
				943	/* Iterate over the list with system-allocated threads first. */
				944	list_t *runp;
				945	list_for_each (runp, &stack_used)
				946	{
				947	struct pthread *curp;
				948
				949	curp = list_entry (runp, struct pthread, list);
				950
				951	if (curp->tid == tid)
				952	{
				953	result = curp;
				954	goto out;
				955	}
				956	}
				957
				958	/* Now the list with threads using user-allocated stacks. */
				959	list_for_each (runp, &__stack_user)
				960	{
				961	struct pthread *curp;
				962
				963	curp = list_entry (runp, struct pthread, list);
				964
				965	if (curp->tid == tid)
				966	{
				967	result = curp;
				968	goto out;
				969	}
				970	}
				971
				972	out:
				973	lll_unlock (stack_cache_lock, LLL_PRIVATE);
				974
				975	return result;
				976	}
				977	#endif
				978
				979
				980	#ifdef SIGSETXID
				981	static void
				982	internal_function
				983	setxid_mark_thread (struct xid_command cmdp, struct pthread t)
				984	{
				985	int ch;
				986
				987	/* Wait until this thread is cloned. */
				988	if (t->setxid_futex == -1
				989	&& ! atomic_compare_and_exchange_bool_acq (&t->setxid_futex, -2, -1))
				990	do
				991	futex_wait_simple (&t->setxid_futex, -2, FUTEX_PRIVATE);
				992	while (t->setxid_futex == -2);
				993
				994	/* Don't let the thread exit before the setxid handler runs. */
				995	t->setxid_futex = 0;
				996
				997	do
				998	{
				999	ch = t->cancelhandling;
				1000
				1001	/* If the thread is exiting right now, ignore it. */
				1002	if ((ch & EXITING_BITMASK) != 0)
				1003	{
				1004	/* Release the futex if there is no other setxid in
				1005	progress. */
				1006	if ((ch & SETXID_BITMASK) == 0)
				1007	{
				1008	t->setxid_futex = 1;
				1009	futex_wake (&t->setxid_futex, 1, FUTEX_PRIVATE);
				1010	}
				1011	return;
				1012	}
				1013	}
				1014	while (atomic_compare_and_exchange_bool_acq (&t->cancelhandling,
				1015	ch \| SETXID_BITMASK, ch));
				1016	}
				1017
				1018
				1019	static void
				1020	internal_function
				1021	setxid_unmark_thread (struct xid_command cmdp, struct pthread t)
				1022	{
				1023	int ch;
				1024
				1025	do
				1026	{
				1027	ch = t->cancelhandling;
				1028	if ((ch & SETXID_BITMASK) == 0)
				1029	return;
				1030	}
				1031	while (atomic_compare_and_exchange_bool_acq (&t->cancelhandling,
				1032	ch & ~SETXID_BITMASK, ch));
				1033
				1034	/* Release the futex just in case. */
				1035	t->setxid_futex = 1;
				1036	futex_wake (&t->setxid_futex, 1, FUTEX_PRIVATE);
				1037	}
				1038
				1039
				1040	static int
				1041	internal_function
				1042	setxid_signal_thread (struct xid_command cmdp, struct pthread t)
				1043	{
				1044	if ((t->cancelhandling & SETXID_BITMASK) == 0)
				1045	return 0;
				1046
				1047	int val;
				1048	INTERNAL_SYSCALL_DECL (err);
				1049	val = INTERNAL_SYSCALL (tgkill, err, 3, THREAD_GETMEM (THREAD_SELF, pid),
				1050	t->tid, SIGSETXID);
				1051
				1052	/* If this failed, it must have had not started yet or else exited. */
				1053	if (!INTERNAL_SYSCALL_ERROR_P (val, err))
				1054	{
				1055	atomic_increment (&cmdp->cntr);
				1056	return 1;
				1057	}
				1058	else
				1059	return 0;
				1060	}
				1061
				1062	/* Check for consistency across set*id system call results. The abort
				1063	should not happen as long as all privileges changes happen through
				1064	the glibc wrappers. ERROR must be 0 (no error) or an errno
				1065	code. */
				1066	void
				1067	attribute_hidden
				1068	__nptl_setxid_error (struct xid_command *cmdp, int error)
				1069	{
				1070	do
				1071	{
				1072	int olderror = cmdp->error;
				1073	if (olderror == error)
				1074	break;
				1075	if (olderror != -1)
				1076	/* Mismatch between current and previous results. */
				1077	abort ();
				1078	}
				1079	while (atomic_compare_and_exchange_bool_acq (&cmdp->error, error, -1));
				1080	}
				1081
				1082	int
				1083	attribute_hidden
				1084	__nptl_setxid (struct xid_command *cmdp)
				1085	{
				1086	int signalled;
				1087	int result;
				1088	lll_lock (stack_cache_lock, LLL_PRIVATE);
				1089
				1090	__xidcmd = cmdp;
				1091	cmdp->cntr = 0;
				1092	cmdp->error = -1;
				1093
				1094	struct pthread *self = THREAD_SELF;
				1095
				1096	/* Iterate over the list with system-allocated threads first. */
				1097	list_t *runp;
				1098	list_for_each (runp, &stack_used)
				1099	{
				1100	struct pthread *t = list_entry (runp, struct pthread, list);
				1101	if (t == self)
				1102	continue;
				1103
				1104	setxid_mark_thread (cmdp, t);
				1105	}
				1106
				1107	/* Now the list with threads using user-allocated stacks. */
				1108	list_for_each (runp, &__stack_user)
				1109	{
				1110	struct pthread *t = list_entry (runp, struct pthread, list);
				1111	if (t == self)
				1112	continue;
				1113
				1114	setxid_mark_thread (cmdp, t);
				1115	}
				1116
				1117	/* Iterate until we don't succeed in signalling anyone. That means
				1118	we have gotten all running threads, and their children will be
				1119	automatically correct once started. */
				1120	do
				1121	{
				1122	signalled = 0;
				1123
				1124	list_for_each (runp, &stack_used)
				1125	{
				1126	struct pthread *t = list_entry (runp, struct pthread, list);
				1127	if (t == self)
				1128	continue;
				1129
				1130	signalled += setxid_signal_thread (cmdp, t);
				1131	}
				1132
				1133	list_for_each (runp, &__stack_user)
				1134	{
				1135	struct pthread *t = list_entry (runp, struct pthread, list);
				1136	if (t == self)
				1137	continue;
				1138
				1139	signalled += setxid_signal_thread (cmdp, t);
				1140	}
				1141
				1142	int cur = cmdp->cntr;
				1143	while (cur != 0)
				1144	{
				1145	futex_wait_simple ((unsigned int *) &cmdp->cntr, cur,
				1146	FUTEX_PRIVATE);
				1147	cur = cmdp->cntr;
				1148	}
				1149	}
				1150	while (signalled != 0);
				1151
				1152	/* Clean up flags, so that no thread blocks during exit waiting
				1153	for a signal which will never come. */
				1154	list_for_each (runp, &stack_used)
				1155	{
				1156	struct pthread *t = list_entry (runp, struct pthread, list);
				1157	if (t == self)
				1158	continue;
				1159
				1160	setxid_unmark_thread (cmdp, t);
				1161	}
				1162
				1163	list_for_each (runp, &__stack_user)
				1164	{
				1165	struct pthread *t = list_entry (runp, struct pthread, list);
				1166	if (t == self)
				1167	continue;
				1168
				1169	setxid_unmark_thread (cmdp, t);
				1170	}
				1171
				1172	/* This must be last, otherwise the current thread might not have
				1173	permissions to send SIGSETXID syscall to the other threads. */
				1174	INTERNAL_SYSCALL_DECL (err);
				1175	result = INTERNAL_SYSCALL_NCS (cmdp->syscall_no, err, 3,
				1176	cmdp->id[0], cmdp->id[1], cmdp->id[2]);
				1177	int error = 0;
				1178	if (__glibc_unlikely (INTERNAL_SYSCALL_ERROR_P (result, err)))
				1179	{
				1180	error = INTERNAL_SYSCALL_ERRNO (result, err);
				1181	__set_errno (error);
				1182	result = -1;
				1183	}
				1184	__nptl_setxid_error (cmdp, error);
				1185
				1186	lll_unlock (stack_cache_lock, LLL_PRIVATE);
				1187	return result;
				1188	}
				1189	#endif /* SIGSETXID. */
				1190
				1191
				1192	static inline void __attribute__((always_inline))
				1193	init_one_static_tls (struct pthread curp, struct link_map map)
				1194	{
				1195	# if TLS_TCB_AT_TP
				1196	void dest = (char ) curp - map->l_tls_offset;
				1197	# elif TLS_DTV_AT_TP
				1198	void dest = (char ) curp + map->l_tls_offset + TLS_PRE_TCB_SIZE;
				1199	# else
				1200	# error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
				1201	# endif
				1202
				1203	/* We cannot delay the initialization of the Static TLS area, since
				1204	it can be accessed with LE or IE, but since the DTV is only used
				1205	by GD and LD, we can delay its update to avoid a race. */
				1206	memset (__mempcpy (dest, map->l_tls_initimage, map->l_tls_initimage_size),
				1207	'\0', map->l_tls_blocksize - map->l_tls_initimage_size);
				1208	}
				1209
				1210	void
				1211	attribute_hidden
				1212	__pthread_init_static_tls (struct link_map *map)
				1213	{
				1214	lll_lock (stack_cache_lock, LLL_PRIVATE);
				1215
				1216	/* Iterate over the list with system-allocated threads first. */
				1217	list_t *runp;
				1218	list_for_each (runp, &stack_used)
				1219	init_one_static_tls (list_entry (runp, struct pthread, list), map);
				1220
				1221	/* Now the list with threads using user-allocated stacks. */
				1222	list_for_each (runp, &__stack_user)
				1223	init_one_static_tls (list_entry (runp, struct pthread, list), map);
				1224
				1225	lll_unlock (stack_cache_lock, LLL_PRIVATE);
				1226	}
				1227
				1228
				1229	void
				1230	attribute_hidden
				1231	__wait_lookup_done (void)
				1232	{
				1233	lll_lock (stack_cache_lock, LLL_PRIVATE);
				1234
				1235	struct pthread *self = THREAD_SELF;
				1236
				1237	/* Iterate over the list with system-allocated threads first. */
				1238	list_t *runp;
				1239	list_for_each (runp, &stack_used)
				1240	{
				1241	struct pthread *t = list_entry (runp, struct pthread, list);
				1242	if (t == self \|\| t->header.gscope_flag == THREAD_GSCOPE_FLAG_UNUSED)
				1243	continue;
				1244
				1245	int *const gscope_flagp = &t->header.gscope_flag;
				1246
				1247	/* We have to wait until this thread is done with the global
				1248	scope. First tell the thread that we are waiting and
				1249	possibly have to be woken. */
				1250	if (atomic_compare_and_exchange_bool_acq (gscope_flagp,
				1251	THREAD_GSCOPE_FLAG_WAIT,
				1252	THREAD_GSCOPE_FLAG_USED))
				1253	continue;
				1254
				1255	do
				1256	futex_wait_simple ((unsigned int *) gscope_flagp,
				1257	THREAD_GSCOPE_FLAG_WAIT, FUTEX_PRIVATE);
				1258	while (*gscope_flagp == THREAD_GSCOPE_FLAG_WAIT);
				1259	}
				1260
				1261	/* Now the list with threads using user-allocated stacks. */
				1262	list_for_each (runp, &__stack_user)
				1263	{
				1264	struct pthread *t = list_entry (runp, struct pthread, list);
				1265	if (t == self \|\| t->header.gscope_flag == THREAD_GSCOPE_FLAG_UNUSED)
				1266	continue;
				1267
				1268	int *const gscope_flagp = &t->header.gscope_flag;
				1269
				1270	/* We have to wait until this thread is done with the global
				1271	scope. First tell the thread that we are waiting and
				1272	possibly have to be woken. */
				1273	if (atomic_compare_and_exchange_bool_acq (gscope_flagp,
				1274	THREAD_GSCOPE_FLAG_WAIT,
				1275	THREAD_GSCOPE_FLAG_USED))
				1276	continue;
				1277
				1278	do
				1279	futex_wait_simple ((unsigned int *) gscope_flagp,
				1280	THREAD_GSCOPE_FLAG_WAIT, FUTEX_PRIVATE);
				1281	while (*gscope_flagp == THREAD_GSCOPE_FLAG_WAIT);
				1282	}
				1283
				1284	lll_unlock (stack_cache_lock, LLL_PRIVATE);
				1285	}