Blame - ap/build/uClibc/libpthread/nptl/allocatestack.c - T106_DC

blob: e52632db344cd2696e5daef07296ccaa206b90b5 [file] [log] [blame]

lh	9ed821d	2023-04-07 01:36:19 -0700	[diff] [blame^]	1	/* Copyright (C) 2002-2007, 2009 Free Software Foundation, Inc.
				2	This file is part of the GNU C Library.
				3	Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
				4
				5	The GNU C Library is free software; you can redistribute it and/or
				6	modify it under the terms of the GNU Lesser General Public
				7	License as published by the Free Software Foundation; either
				8	version 2.1 of the License, or (at your option) any later version.
				9
				10	The GNU C Library is distributed in the hope that it will be useful,
				11	but WITHOUT ANY WARRANTY; without even the implied warranty of
				12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				13	Lesser General Public License for more details.
				14
				15	You should have received a copy of the GNU Lesser General Public
				16	License along with the GNU C Library; if not, write to the Free
				17	Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
				18	02111-1307 USA. */
				19
				20	#include <assert.h>
				21	#include <errno.h>
				22	#include <signal.h>
				23	#include <stdint.h>
				24	#include <string.h>
				25	#include <unistd.h>
				26	#include <sys/mman.h>
				27	#include <sys/param.h>
				28	#include <tls.h>
				29	#include <lowlevellock.h>
				30	#include <link.h>
				31	#include <bits/kernel-features.h>
				32
				33
				34	#ifndef NEED_SEPARATE_REGISTER_STACK
				35
				36	/* Most architectures have exactly one stack pointer. Some have more. */
				37	# define STACK_VARIABLES void *stackaddr = NULL
				38
				39	/* How to pass the values to the 'create_thread' function. */
				40	# define STACK_VARIABLES_ARGS stackaddr
				41
				42	/* How to declare function which gets there parameters. */
				43	# define STACK_VARIABLES_PARMS void *stackaddr
				44
				45	/* How to declare allocate_stack. */
				46	# define ALLOCATE_STACK_PARMS void **stack
				47
				48	/* This is how the function is called. We do it this way to allow
				49	other variants of the function to have more parameters. */
				50	# define ALLOCATE_STACK(attr, pd) allocate_stack (attr, pd, &stackaddr)
				51
				52	#else
				53
				54	/* We need two stacks. The kernel will place them but we have to tell
				55	the kernel about the size of the reserved address space. */
				56	# define STACK_VARIABLES void *stackaddr = NULL; size_t stacksize = 0
				57
				58	/* How to pass the values to the 'create_thread' function. */
				59	# define STACK_VARIABLES_ARGS stackaddr, stacksize
				60
				61	/* How to declare function which gets there parameters. */
				62	# define STACK_VARIABLES_PARMS void *stackaddr, size_t stacksize
				63
				64	/* How to declare allocate_stack. */
				65	# define ALLOCATE_STACK_PARMS void *stack, size_t stacksize
				66
				67	/* This is how the function is called. We do it this way to allow
				68	other variants of the function to have more parameters. */
				69	# define ALLOCATE_STACK(attr, pd) \
				70	allocate_stack (attr, pd, &stackaddr, &stacksize)
				71
				72	#endif
				73
				74
				75	/* Default alignment of stack. */
				76	#ifndef STACK_ALIGN
				77	# define STACK_ALIGN __alignof__ (long double)
				78	#endif
				79
				80	/* Default value for minimal stack size after allocating thread
				81	descriptor and guard. */
				82	#ifndef MINIMAL_REST_STACK
				83	# define MINIMAL_REST_STACK 4096
				84	#endif
				85
				86
				87	/* Newer kernels have the MAP_STACK flag to indicate a mapping is used for
				88	a stack. Use it when possible. */
				89	#ifndef MAP_STACK
				90	# define MAP_STACK 0
				91	#endif
				92
				93	/* This yields the pointer that TLS support code calls the thread pointer. */
				94	#if defined(TLS_TCB_AT_TP)
				95	# define TLS_TPADJ(pd) (pd)
				96	#elif defined(TLS_DTV_AT_TP)
				97	# define TLS_TPADJ(pd) ((struct pthread )((char ) (pd) + TLS_PRE_TCB_SIZE))
				98	#endif
				99
				100	/* Cache handling for not-yet free stacks. */
				101
				102	/* Maximum size in kB of cache. */
				103	//static size_t stack_cache_maxsize = 40 * 1024 * 1024; /* 40MiBi by default. */
				104	static size_t stack_cache_maxsize = 256 * 1024; /* 40MiBi is too large for embeded devices, change it to 256KiBi */
				105	static size_t stack_cache_actsize;
				106
				107	/* Mutex protecting this variable. */
				108	static int stack_cache_lock = LLL_LOCK_INITIALIZER;
				109
				110	/* List of queued stack frames. */
				111	static LIST_HEAD (stack_cache);
				112
				113	/* List of the stacks in use. */
				114	static LIST_HEAD (stack_used);
				115
				116	/* We need to record what list operations we are going to do so that,
				117	in case of an asynchronous interruption due to a fork() call, we
				118	can correct for the work. */
				119	static uintptr_t in_flight_stack;
				120
				121	/* List of the threads with user provided stacks in use. No need to
				122	initialize this, since it's done in __pthread_initialize_minimal. */
				123	list_t __stack_user __attribute__ ((nocommon));
				124	hidden_data_def (__stack_user)
				125
				126	#if defined COLORING_INCREMENT && COLORING_INCREMENT != 0
				127	/* Number of threads created. */
				128	static unsigned int nptl_ncreated;
				129	#endif
				130
				131
				132	/* Check whether the stack is still used or not. */
				133	#define FREE_P(descr) ((descr)->tid <= 0)
				134
				135
				136	static void
				137	stack_list_del (list_t *elem)
				138	{
				139	in_flight_stack = (uintptr_t) elem;
				140
				141	atomic_write_barrier ();
				142
				143	list_del (elem);
				144
				145	atomic_write_barrier ();
				146
				147	in_flight_stack = 0;
				148	}
				149
				150
				151	static void
				152	stack_list_add (list_t elem, list_t list)
				153	{
				154	in_flight_stack = (uintptr_t) elem \| 1;
				155
				156	atomic_write_barrier ();
				157
				158	list_add (elem, list);
				159
				160	atomic_write_barrier ();
				161
				162	in_flight_stack = 0;
				163	}
				164
				165
				166	/* We create a double linked list of all cache entries. Double linked
				167	because this allows removing entries from the end. */
				168
				169
				170	/* Get a stack frame from the cache. We have to match by size since
				171	some blocks might be too small or far too large. */
				172	static struct pthread *
				173	get_cached_stack (size_t sizep, void *memp)
				174	{
				175	size_t size = *sizep;
				176	struct pthread *result = NULL;
				177	list_t *entry;
				178
				179	lll_lock (stack_cache_lock, LLL_PRIVATE);
				180
				181	/* Search the cache for a matching entry. We search for the
				182	smallest stack which has at least the required size. Note that
				183	in normal situations the size of all allocated stacks is the
				184	same. As the very least there are only a few different sizes.
				185	Therefore this loop will exit early most of the time with an
				186	exact match. */
				187	list_for_each (entry, &stack_cache)
				188	{
				189	struct pthread *curr;
				190
				191	curr = list_entry (entry, struct pthread, list);
				192	if (FREE_P (curr) && curr->stackblock_size >= size)
				193	{
				194	if (curr->stackblock_size == size)
				195	{
				196	result = curr;
				197	break;
				198	}
				199
				200	if (result == NULL
				201	\|\| result->stackblock_size > curr->stackblock_size)
				202	result = curr;
				203	}
				204	}
				205
				206	if (__builtin_expect (result == NULL, 0)
				207	/* Make sure the size difference is not too excessive. In that
				208	case we do not use the block. */
				209	\|\| __builtin_expect (result->stackblock_size > 4 * size, 0))
				210	{
				211	/* Release the lock. */
				212	lll_unlock (stack_cache_lock, LLL_PRIVATE);
				213
				214	return NULL;
				215	}
				216
				217	/* Dequeue the entry. */
				218	stack_list_del (&result->list);
				219
				220	/* And add to the list of stacks in use. */
				221	stack_list_add (&result->list, &stack_used);
				222
				223	/* And decrease the cache size. */
				224	stack_cache_actsize -= result->stackblock_size;
				225
				226	/* Release the lock early. */
				227	lll_unlock (stack_cache_lock, LLL_PRIVATE);
				228
				229	/* Report size and location of the stack to the caller. */
				230	*sizep = result->stackblock_size;
				231	*memp = result->stackblock;
				232
				233	/* Cancellation handling is back to the default. */
				234	result->cancelhandling = 0;
				235	result->cleanup = NULL;
				236
				237	/* No pending event. */
				238	result->nextevent = NULL;
				239
				240	/* Clear the DTV. */
				241	dtv_t *dtv = GET_DTV (TLS_TPADJ (result));
				242	memset (dtv, '\0', (dtv[-1].counter + 1) * sizeof (dtv_t));
				243
				244	/* Re-initialize the TLS. */
				245	_dl_allocate_tls_init (TLS_TPADJ (result));
				246
				247	return result;
				248	}
				249
				250
				251	/* Free stacks until cache size is lower than LIMIT. */
				252	void
				253	__free_stacks (size_t limit)
				254	{
				255	/* We reduce the size of the cache. Remove the last entries until
				256	the size is below the limit. */
				257	list_t *entry;
				258	list_t *prev;
				259
				260	/* Search from the end of the list. */
				261	list_for_each_prev_safe (entry, prev, &stack_cache)
				262	{
				263	struct pthread *curr;
				264
				265	curr = list_entry (entry, struct pthread, list);
				266	if (FREE_P (curr))
				267	{
				268	/* Unlink the block. */
				269	stack_list_del (entry);
				270
				271	/* Account for the freed memory. */
				272	stack_cache_actsize -= curr->stackblock_size;
				273
				274	/* Free the memory associated with the ELF TLS. */
				275	_dl_deallocate_tls (TLS_TPADJ (curr), false);
				276
				277	/* Remove this block. This should never fail. If it does
				278	something is really wrong. */
				279	if (munmap (curr->stackblock, curr->stackblock_size) != 0)
				280	abort ();
				281
				282	/* Maybe we have freed enough. */
				283	if (stack_cache_actsize <= limit)
				284	break;
				285	}
				286	}
				287	}
				288
				289
				290	/* Add a stack frame which is not used anymore to the stack. Must be
				291	called with the cache lock held. */
				292	static inline void
				293	__attribute ((always_inline))
				294	queue_stack (struct pthread *stack)
				295	{
				296	/* We unconditionally add the stack to the list. The memory may
				297	still be in use but it will not be reused until the kernel marks
				298	the stack as not used anymore. */
				299	stack_list_add (&stack->list, &stack_cache);
				300
				301	stack_cache_actsize += stack->stackblock_size;
				302	if (__builtin_expect (stack_cache_actsize > stack_cache_maxsize, 0))
				303	__free_stacks (stack_cache_maxsize);
				304	}
				305
				306
				307	static int
				308	internal_function
				309	change_stack_perm (struct pthread *pd
				310	#ifdef NEED_SEPARATE_REGISTER_STACK
				311	, size_t pagemask
				312	#endif
				313	)
				314	{
				315	#ifdef NEED_SEPARATE_REGISTER_STACK
				316	void *stack = (pd->stackblock
				317	+ (((((pd->stackblock_size - pd->guardsize) / 2)
				318	& pagemask) + pd->guardsize) & pagemask));
				319	size_t len = pd->stackblock + pd->stackblock_size - stack;
				320	#elif defined _STACK_GROWS_DOWN
				321	void *stack = pd->stackblock + pd->guardsize;
				322	size_t len = pd->stackblock_size - pd->guardsize;
				323	#elif defined _STACK_GROWS_UP
				324	void *stack = pd->stackblock;
				325	size_t len = (uintptr_t) pd - pd->guardsize - (uintptr_t) pd->stackblock;
				326	#else
				327	# error "Define either _STACK_GROWS_DOWN or _STACK_GROWS_UP"
				328	#endif
				329	if (mprotect (stack, len, PROT_READ \| PROT_WRITE \| PROT_EXEC) != 0)
				330	return errno;
				331
				332	return 0;
				333	}
				334
				335
				336	static int
				337	allocate_stack (const struct pthread_attr attr, struct pthread *pdp,
				338	ALLOCATE_STACK_PARMS)
				339	{
				340	struct pthread *pd;
				341	size_t size;
				342	size_t pagesize_m1 = __getpagesize () - 1;
				343	void *stacktop;
				344
				345	assert (attr != NULL);
				346	assert (powerof2 (pagesize_m1 + 1));
				347	assert (TCB_ALIGNMENT >= STACK_ALIGN);
				348
				349	/* Get the stack size from the attribute if it is set. Otherwise we
				350	use the default we determined at start time. */
				351	size = attr->stacksize ?: __default_stacksize;
				352
				353	/* Get memory for the stack. */
				354	if (__builtin_expect (attr->flags & ATTR_FLAG_STACKADDR, 0))
				355	{
				356	uintptr_t adj;
				357
				358	/* If the user also specified the size of the stack make sure it
				359	is large enough. */
				360	if (attr->stacksize != 0
				361	&& attr->stacksize < (__static_tls_size + MINIMAL_REST_STACK))
				362	return EINVAL;
				363
				364	/* Adjust stack size for alignment of the TLS block. */
				365	#if defined(TLS_TCB_AT_TP)
				366	adj = ((uintptr_t) attr->stackaddr - TLS_TCB_SIZE)
				367	& __static_tls_align_m1;
				368	assert (size > adj + TLS_TCB_SIZE);
				369	#elif defined(TLS_DTV_AT_TP)
				370	adj = ((uintptr_t) attr->stackaddr - __static_tls_size)
				371	& __static_tls_align_m1;
				372	assert (size > adj);
				373	#endif
				374
				375	/* The user provided some memory. Let's hope it matches the
				376	size... We do not allocate guard pages if the user provided
				377	the stack. It is the user's responsibility to do this if it
				378	is wanted. */
				379	#if defined(TLS_TCB_AT_TP)
				380	pd = (struct pthread *) ((uintptr_t) attr->stackaddr
				381	- TLS_TCB_SIZE - adj);
				382	#elif defined(TLS_DTV_AT_TP)
				383	pd = (struct pthread *) (((uintptr_t) attr->stackaddr
				384	- __static_tls_size - adj)
				385	- TLS_PRE_TCB_SIZE);
				386	#endif
				387
				388	/* The user provided stack memory needs to be cleared. */
				389	memset (pd, '\0', sizeof (struct pthread));
				390
				391	/* The first TSD block is included in the TCB. */
				392	pd->specific[0] = pd->specific_1stblock;
				393
				394	/* Remember the stack-related values. */
				395	pd->stackblock = (char *) attr->stackaddr - size;
				396	pd->stackblock_size = size;
				397
				398	/* This is a user-provided stack. It will not be queued in the
				399	stack cache nor will the memory (except the TLS memory) be freed. */
				400	pd->user_stack = true;
				401
				402	/* This is at least the second thread. */
				403	pd->header.multiple_threads = 1;
				404	#ifndef TLS_MULTIPLE_THREADS_IN_TCB
				405	__pthread_multiple_threads = *__libc_multiple_threads_ptr = 1;
				406	#endif
				407
				408	#ifndef __ASSUME_PRIVATE_FUTEX
				409	/* The thread must know when private futexes are supported. */
				410	pd->header.private_futex = THREAD_GETMEM (THREAD_SELF,
				411	header.private_futex);
				412	#endif
				413
				414	#ifdef NEED_DL_SYSINFO
				415	/* Copy the sysinfo value from the parent. */
				416	THREAD_SYSINFO(pd) = THREAD_SELF_SYSINFO;
				417	#endif
				418
				419	/* The process ID is also the same as that of the caller. */
				420	pd->pid = THREAD_GETMEM (THREAD_SELF, pid);
				421
				422	/* Allocate the DTV for this thread. */
				423	if (_dl_allocate_tls (TLS_TPADJ (pd)) == NULL)
				424	{
				425	/* Something went wrong. */
				426	assert (errno == ENOMEM);
				427	return EAGAIN;
				428	}
				429
				430
				431	/* Prepare to modify global data. */
				432	lll_lock (stack_cache_lock, LLL_PRIVATE);
				433
				434	/* And add to the list of stacks in use. */
				435	list_add (&pd->list, &__stack_user);
				436
				437	lll_unlock (stack_cache_lock, LLL_PRIVATE);
				438	}
				439	else
				440	{
				441	/* Allocate some anonymous memory. If possible use the cache. */
				442	size_t guardsize;
				443	size_t reqsize;
				444	void *mem = 0;
				445	const int prot = (PROT_READ \| PROT_WRITE);
				446
				447	#if defined COLORING_INCREMENT && COLORING_INCREMENT != 0
				448	/* Add one more page for stack coloring. Don't do it for stacks
				449	with 16 times pagesize or larger. This might just cause
				450	unnecessary misalignment. */
				451	if (size <= 16 * pagesize_m1)
				452	size += pagesize_m1 + 1;
				453	#endif
				454
				455	/* Adjust the stack size for alignment. */
				456	size &= ~__static_tls_align_m1;
				457	assert (size != 0);
				458
				459	/* Make sure the size of the stack is enough for the guard and
				460	eventually the thread descriptor. */
				461	guardsize = (attr->guardsize + pagesize_m1) & ~pagesize_m1;
				462	if (__builtin_expect (size < ((guardsize + __static_tls_size
				463	+ MINIMAL_REST_STACK + pagesize_m1)
				464	& ~pagesize_m1),
				465	0))
				466	/* The stack is too small (or the guard too large). */
				467	return EINVAL;
				468
				469	/* Try to get a stack from the cache. */
				470	reqsize = size;
				471	pd = get_cached_stack (&size, &mem);
				472	if (pd == NULL)
				473	{
				474	/* To avoid aliasing effects on a larger scale than pages we
				475	adjust the allocated stack size if necessary. This way
				476	allocations directly following each other will not have
				477	aliasing problems. */
				478	#if defined MULTI_PAGE_ALIASING && MULTI_PAGE_ALIASING != 0
				479	if ((size % MULTI_PAGE_ALIASING) == 0)
				480	size += pagesize_m1 + 1;
				481	#endif
				482
				483	mem = mmap (NULL, size, prot,
				484	MAP_PRIVATE \| MAP_ANONYMOUS \| MAP_STACK, -1, 0);
				485
				486	if (__builtin_expect (mem == MAP_FAILED, 0))
				487	{
				488	if (errno == ENOMEM)
				489	__set_errno (EAGAIN);
				490
				491	return errno;
				492	}
				493
				494	/* SIZE is guaranteed to be greater than zero.
				495	So we can never get a null pointer back from mmap. */
				496	assert (mem != NULL);
				497
				498	#if defined COLORING_INCREMENT && COLORING_INCREMENT != 0
				499	/* Atomically increment NCREATED. */
				500	unsigned int ncreated = atomic_increment_val (&nptl_ncreated);
				501
				502	/* We chose the offset for coloring by incrementing it for
				503	every new thread by a fixed amount. The offset used
				504	module the page size. Even if coloring would be better
				505	relative to higher alignment values it makes no sense to
				506	do it since the mmap() interface does not allow us to
				507	specify any alignment for the returned memory block. */
				508	size_t coloring = (ncreated * COLORING_INCREMENT) & pagesize_m1;
				509
				510	/* Make sure the coloring offsets does not disturb the alignment
				511	of the TCB and static TLS block. */
				512	if (__builtin_expect ((coloring & __static_tls_align_m1) != 0, 0))
				513	coloring = (((coloring + __static_tls_align_m1)
				514	& ~(__static_tls_align_m1))
				515	& ~pagesize_m1);
				516	#else
				517	/* Unless specified we do not make any adjustments. */
				518	# define coloring 0
				519	#endif
				520
				521	/* Place the thread descriptor at the end of the stack. */
				522	#if defined(TLS_TCB_AT_TP)
				523	pd = (struct pthread ) ((char ) mem + size - coloring) - 1;
				524	#elif defined(TLS_DTV_AT_TP)
				525	pd = (struct pthread *) ((((uintptr_t) mem + size - coloring
				526	- __static_tls_size)
				527	& ~__static_tls_align_m1)
				528	- TLS_PRE_TCB_SIZE);
				529	#endif
				530
				531	/* Remember the stack-related values. */
				532	pd->stackblock = mem;
				533	pd->stackblock_size = size;
				534
				535	/* We allocated the first block thread-specific data array.
				536	This address will not change for the lifetime of this
				537	descriptor. */
				538	pd->specific[0] = pd->specific_1stblock;
				539
				540	/* This is at least the second thread. */
				541	pd->header.multiple_threads = 1;
				542	#ifndef TLS_MULTIPLE_THREADS_IN_TCB
				543	__pthread_multiple_threads = *__libc_multiple_threads_ptr = 1;
				544	#endif
				545
				546	#ifndef __ASSUME_PRIVATE_FUTEX
				547	/* The thread must know when private futexes are supported. */
				548	pd->header.private_futex = THREAD_GETMEM (THREAD_SELF,
				549	header.private_futex);
				550	#endif
				551
				552	#ifdef NEED_DL_SYSINFO
				553	/* Copy the sysinfo value from the parent. */
				554	THREAD_SYSINFO(pd) = THREAD_SELF_SYSINFO;
				555	#endif
				556
				557	/* The process ID is also the same as that of the caller. */
				558	pd->pid = THREAD_GETMEM (THREAD_SELF, pid);
				559
				560	/* Allocate the DTV for this thread. */
				561	if (_dl_allocate_tls (TLS_TPADJ (pd)) == NULL)
				562	{
				563	/* Something went wrong. */
				564	assert (errno == ENOMEM);
				565
				566	/* Free the stack memory we just allocated. */
				567	(void) munmap (mem, size);
				568
				569	return EAGAIN;
				570	}
				571
				572
				573	/* Prepare to modify global data. */
				574	lll_lock (stack_cache_lock, LLL_PRIVATE);
				575
				576	/* And add to the list of stacks in use. */
				577	stack_list_add (&pd->list, &stack_used);
				578
				579	lll_unlock (stack_cache_lock, LLL_PRIVATE);
				580
				581
				582	/* Note that all of the stack and the thread descriptor is
				583	zeroed. This means we do not have to initialize fields
				584	with initial value zero. This is specifically true for
				585	the 'tid' field which is always set back to zero once the
				586	stack is not used anymore and for the 'guardsize' field
				587	which will be read next. */
				588	}
				589
				590	/* Create or resize the guard area if necessary. */
				591	if (__builtin_expect (guardsize > pd->guardsize, 0))
				592	{
				593	#ifdef NEED_SEPARATE_REGISTER_STACK
				594	char *guard = mem + (((size - guardsize) / 2) & ~pagesize_m1);
				595	#elif defined _STACK_GROWS_DOWN
				596	char *guard = mem;
				597	#elif defined _STACK_GROWS_UP
				598	char guard = (char ) (((uintptr_t) pd - guardsize) & ~pagesize_m1);
				599	#endif
				600	if (mprotect (guard, guardsize, PROT_NONE) != 0)
				601	{
				602	int err;
				603	mprot_error:
				604	err = errno;
				605
				606	lll_lock (stack_cache_lock, LLL_PRIVATE);
				607
				608	/* Remove the thread from the list. */
				609	stack_list_del (&pd->list);
				610
				611	lll_unlock (stack_cache_lock, LLL_PRIVATE);
				612
				613	/* Get rid of the TLS block we allocated. */
				614	_dl_deallocate_tls (TLS_TPADJ (pd), false);
				615
				616	/* Free the stack memory regardless of whether the size
				617	of the cache is over the limit or not. If this piece
				618	of memory caused problems we better do not use it
				619	anymore. Uh, and we ignore possible errors. There
				620	is nothing we could do. */
				621	(void) munmap (mem, size);
				622
				623	return err;
				624	}
				625
				626	pd->guardsize = guardsize;
				627	}
				628	else if (__builtin_expect (pd->guardsize - guardsize > size - reqsize,
				629	0))
				630	{
				631	/* The old guard area is too large. */
				632
				633	#ifdef NEED_SEPARATE_REGISTER_STACK
				634	char *guard = mem + (((size - guardsize) / 2) & ~pagesize_m1);
				635	char *oldguard = mem + (((size - pd->guardsize) / 2) & ~pagesize_m1);
				636
				637	if (oldguard < guard
				638	&& mprotect (oldguard, guard - oldguard, prot) != 0)
				639	goto mprot_error;
				640
				641	if (mprotect (guard + guardsize,
				642	oldguard + pd->guardsize - guard - guardsize,
				643	prot) != 0)
				644	goto mprot_error;
				645	#elif defined _STACK_GROWS_DOWN
				646	if (mprotect ((char *) mem + guardsize, pd->guardsize - guardsize,
				647	prot) != 0)
				648	goto mprot_error;
				649	#elif defined _STACK_GROWS_UP
				650	if (mprotect ((char *) pd - pd->guardsize,
				651	pd->guardsize - guardsize, prot) != 0)
				652	goto mprot_error;
				653	#endif
				654
				655	pd->guardsize = guardsize;
				656	}
				657	/* The pthread_getattr_np() calls need to get passed the size
				658	requested in the attribute, regardless of how large the
				659	actually used guardsize is. */
				660	pd->reported_guardsize = guardsize;
				661	}
				662
				663	/* Initialize the lock. We have to do this unconditionally since the
				664	stillborn thread could be canceled while the lock is taken. */
				665	pd->lock = LLL_LOCK_INITIALIZER;
				666
				667	/* The robust mutex lists also need to be initialized
				668	unconditionally because the cleanup for the previous stack owner
				669	might have happened in the kernel. */
				670	pd->robust_head.futex_offset = (offsetof (pthread_mutex_t, __data.__lock)
				671	- offsetof (pthread_mutex_t,
				672	__data.__list.__next));
				673	pd->robust_head.list_op_pending = NULL;
				674	#ifdef __PTHREAD_MUTEX_HAVE_PREV
				675	pd->robust_prev = &pd->robust_head;
				676	#endif
				677	pd->robust_head.list = &pd->robust_head;
				678
				679	/* We place the thread descriptor at the end of the stack. */
				680	*pdp = pd;
				681
				682	#if defined(TLS_TCB_AT_TP)
				683	/* The stack begins before the TCB and the static TLS block. */
				684	stacktop = ((char *) (pd + 1) - __static_tls_size);
				685	#elif defined(TLS_DTV_AT_TP)
				686	stacktop = (char *) (pd - 1);
				687	#endif
				688
				689	#ifdef NEED_SEPARATE_REGISTER_STACK
				690	*stack = pd->stackblock;
				691	stacksize = stacktop - stack;
				692	#elif defined _STACK_GROWS_DOWN
				693	*stack = stacktop;
				694	#elif defined _STACK_GROWS_UP
				695	*stack = pd->stackblock;
				696	assert (*stack > 0);
				697	#endif
				698
				699	return 0;
				700	}
				701
				702
				703	void
				704	internal_function
				705	__deallocate_stack (struct pthread *pd)
				706	{
				707	lll_lock (stack_cache_lock, LLL_PRIVATE);
				708
				709	/* Remove the thread from the list of threads with user defined
				710	stacks. */
				711	stack_list_del (&pd->list);
				712
				713	/* Not much to do. Just free the mmap()ed memory. Note that we do
				714	not reset the 'used' flag in the 'tid' field. This is done by
				715	the kernel. If no thread has been created yet this field is
				716	still zero. */
				717	if (__builtin_expect (! pd->user_stack, 1))
				718	(void) queue_stack (pd);
				719	else
				720	/* Free the memory associated with the ELF TLS. */
				721	_dl_deallocate_tls (TLS_TPADJ (pd), false);
				722
				723	lll_unlock (stack_cache_lock, LLL_PRIVATE);
				724	}
				725
				726
				727	int
				728	internal_function
				729	__make_stacks_executable (void **stack_endp)
				730	{
				731	/* First the main thread's stack. */
				732	int err = EPERM;
				733	if (err != 0)
				734	return err;
				735
				736	#ifdef NEED_SEPARATE_REGISTER_STACK
				737	const size_t pagemask = ~(__getpagesize () - 1);
				738	#endif
				739
				740	lll_lock (stack_cache_lock, LLL_PRIVATE);
				741
				742	list_t *runp;
				743	list_for_each (runp, &stack_used)
				744	{
				745	err = change_stack_perm (list_entry (runp, struct pthread, list)
				746	#ifdef NEED_SEPARATE_REGISTER_STACK
				747	, pagemask
				748	#endif
				749	);
				750	if (err != 0)
				751	break;
				752	}
				753
				754	/* Also change the permission for the currently unused stacks. This
				755	might be wasted time but better spend it here than adding a check
				756	in the fast path. */
				757	if (err == 0)
				758	list_for_each (runp, &stack_cache)
				759	{
				760	err = change_stack_perm (list_entry (runp, struct pthread, list)
				761	#ifdef NEED_SEPARATE_REGISTER_STACK
				762	, pagemask
				763	#endif
				764	);
				765	if (err != 0)
				766	break;
				767	}
				768
				769	lll_unlock (stack_cache_lock, LLL_PRIVATE);
				770
				771	return err;
				772	}
				773
				774
				775	/* In case of a fork() call the memory allocation in the child will be
				776	the same but only one thread is running. All stacks except that of
				777	the one running thread are not used anymore. We have to recycle
				778	them. */
				779	void
				780	__reclaim_stacks (void)
				781	{
				782	struct pthread self = (struct pthread ) THREAD_SELF;
				783
				784	/* No locking necessary. The caller is the only stack in use. But
				785	we have to be aware that we might have interrupted a list
				786	operation. */
				787
				788	if (in_flight_stack != 0)
				789	{
				790	bool add_p = in_flight_stack & 1;
				791	list_t elem = (list_t ) (in_flight_stack & ~UINTMAX_C (1));
				792
				793	if (add_p)
				794	{
				795	/* We always add at the beginning of the list. So in this
				796	case we only need to check the beginning of these lists. */
				797	int check_list (list_t *l)
				798	{
				799	if (l->next->prev != l)
				800	{
				801	assert (l->next->prev == elem);
				802
				803	elem->next = l->next;
				804	elem->prev = l;
				805	l->next = elem;
				806
				807	return 1;
				808	}
				809
				810	return 0;
				811	}
				812
				813	if (check_list (&stack_used) == 0)
				814	(void) check_list (&stack_cache);
				815	}
				816	else
				817	{
				818	/* We can simply always replay the delete operation. */
				819	elem->next->prev = elem->prev;
				820	elem->prev->next = elem->next;
				821	}
				822	}
				823
				824	/* Mark all stacks except the still running one as free. */
				825	list_t *runp;
				826	list_for_each (runp, &stack_used)
				827	{
				828	struct pthread *curp = list_entry (runp, struct pthread, list);
				829	if (curp != self)
				830	{
				831	/* This marks the stack as free. */
				832	curp->tid = 0;
				833
				834	/* The PID field must be initialized for the new process. */
				835	curp->pid = self->pid;
				836
				837	/* Account for the size of the stack. */
				838	stack_cache_actsize += curp->stackblock_size;
				839
				840	if (curp->specific_used)
				841	{
				842	/* Clear the thread-specific data. */
				843	memset (curp->specific_1stblock, '\0',
				844	sizeof (curp->specific_1stblock));
				845
				846	curp->specific_used = false;
				847
				848	size_t cnt;
				849	for (cnt = 1; cnt < PTHREAD_KEY_1STLEVEL_SIZE; ++cnt)
				850	if (curp->specific[cnt] != NULL)
				851	{
				852	memset (curp->specific[cnt], '\0',
				853	sizeof (curp->specific_1stblock));
				854
				855	/* We have allocated the block which we do not
				856	free here so re-set the bit. */
				857	curp->specific_used = true;
				858	}
				859	}
				860	}
				861	}
				862
				863	/* Reset the PIDs in any cached stacks. */
				864	list_for_each (runp, &stack_cache)
				865	{
				866	struct pthread *curp = list_entry (runp, struct pthread, list);
				867	curp->pid = self->pid;
				868	}
				869
				870	/* Add the stack of all running threads to the cache. */
				871	list_splice (&stack_used, &stack_cache);
				872
				873	/* Remove the entry for the current thread to from the cache list
				874	and add it to the list of running threads. Which of the two
				875	lists is decided by the user_stack flag. */
				876	stack_list_del (&self->list);
				877
				878	/* Re-initialize the lists for all the threads. */
				879	INIT_LIST_HEAD (&stack_used);
				880	INIT_LIST_HEAD (&__stack_user);
				881
				882	if (__builtin_expect (THREAD_GETMEM (self, user_stack), 0))
				883	list_add (&self->list, &__stack_user);
				884	else
				885	list_add (&self->list, &stack_used);
				886
				887	/* There is one thread running. */
				888	__nptl_nthreads = 1;
				889
				890	in_flight_stack = 0;
				891
				892	/* Initialize the lock. */
				893	stack_cache_lock = LLL_LOCK_INITIALIZER;
				894	}
				895
				896
				897	#if HP_TIMING_AVAIL
				898	# undef __find_thread_by_id
				899	/* Find a thread given the thread ID. */
				900	attribute_hidden
				901	struct pthread *
				902	__find_thread_by_id (pid_t tid)
				903	{
				904	struct pthread *result = NULL;
				905
				906	lll_lock (stack_cache_lock, LLL_PRIVATE);
				907
				908	/* Iterate over the list with system-allocated threads first. */
				909	list_t *runp;
				910	list_for_each (runp, &stack_used)
				911	{
				912	struct pthread *curp;
				913
				914	curp = list_entry (runp, struct pthread, list);
				915
				916	if (curp->tid == tid)
				917	{
				918	result = curp;
				919	goto out;
				920	}
				921	}
				922
				923	/* Now the list with threads using user-allocated stacks. */
				924	list_for_each (runp, &__stack_user)
				925	{
				926	struct pthread *curp;
				927
				928	curp = list_entry (runp, struct pthread, list);
				929
				930	if (curp->tid == tid)
				931	{
				932	result = curp;
				933	goto out;
				934	}
				935	}
				936
				937	out:
				938	lll_unlock (stack_cache_lock, LLL_PRIVATE);
				939
				940	return result;
				941	}
				942	#endif
				943
				944
				945	static void
				946	internal_function
				947	setxid_mark_thread (struct xid_command cmdp, struct pthread t)
				948	{
				949	int ch;
				950
				951	/* Don't let the thread exit before the setxid handler runs. */
				952	t->setxid_futex = 0;
				953
				954	do
				955	{
				956	ch = t->cancelhandling;
				957
				958	/* If the thread is exiting right now, ignore it. */
				959	if ((ch & EXITING_BITMASK) != 0)
				960	return;
				961	}
				962	while (atomic_compare_and_exchange_bool_acq (&t->cancelhandling,
				963	ch \| SETXID_BITMASK, ch));
				964	}
				965
				966
				967	static void
				968	internal_function
				969	setxid_unmark_thread (struct xid_command cmdp, struct pthread t)
				970	{
				971	int ch;
				972
				973	do
				974	{
				975	ch = t->cancelhandling;
				976	if ((ch & SETXID_BITMASK) == 0)
				977	return;
				978	}
				979	while (atomic_compare_and_exchange_bool_acq (&t->cancelhandling,
				980	ch & ~SETXID_BITMASK, ch));
				981
				982	/* Release the futex just in case. */
				983	t->setxid_futex = 1;
				984	lll_futex_wake (&t->setxid_futex, 1, LLL_PRIVATE);
				985	}
				986
				987
				988	static int
				989	internal_function
				990	setxid_signal_thread (struct xid_command cmdp, struct pthread t)
				991	{
				992	if ((t->cancelhandling & SETXID_BITMASK) == 0)
				993	return 0;
				994
				995	int val;
				996	INTERNAL_SYSCALL_DECL (err);
				997	#if defined (__ASSUME_TGKILL) && __ASSUME_TGKILL
				998	val = INTERNAL_SYSCALL (tgkill, err, 3, THREAD_GETMEM (THREAD_SELF, pid),
				999	t->tid, SIGSETXID);
				1000	#else
				1001	# ifdef __NR_tgkill
				1002	val = INTERNAL_SYSCALL (tgkill, err, 3, THREAD_GETMEM (THREAD_SELF, pid),
				1003	t->tid, SIGSETXID);
				1004	if (INTERNAL_SYSCALL_ERROR_P (val, err)
				1005	&& INTERNAL_SYSCALL_ERRNO (val, err) == ENOSYS)
				1006	# endif
				1007	val = INTERNAL_SYSCALL (tkill, err, 2, t->tid, SIGSETXID);
				1008	#endif
				1009
				1010	/* If this failed, it must have had not started yet or else exited. */
				1011	if (!INTERNAL_SYSCALL_ERROR_P (val, err))
				1012	{
				1013	atomic_increment (&cmdp->cntr);
				1014	return 1;
				1015	}
				1016	else
				1017	return 0;
				1018	}
				1019
				1020
				1021	int
				1022	attribute_hidden
				1023	__nptl_setxid (struct xid_command *cmdp)
				1024	{
				1025	int signalled;
				1026	int result;
				1027	lll_lock (stack_cache_lock, LLL_PRIVATE);
				1028
				1029	__xidcmd = cmdp;
				1030	cmdp->cntr = 0;
				1031
				1032	struct pthread *self = THREAD_SELF;
				1033
				1034	/* Iterate over the list with system-allocated threads first. */
				1035	list_t *runp;
				1036	list_for_each (runp, &stack_used)
				1037	{
				1038	struct pthread *t = list_entry (runp, struct pthread, list);
				1039	if (t == self)
				1040	continue;
				1041
				1042	setxid_mark_thread (cmdp, t);
				1043	}
				1044
				1045	/* Now the list with threads using user-allocated stacks. */
				1046	list_for_each (runp, &__stack_user)
				1047	{
				1048	struct pthread *t = list_entry (runp, struct pthread, list);
				1049	if (t == self)
				1050	continue;
				1051
				1052	setxid_mark_thread (cmdp, t);
				1053	}
				1054
				1055	/* Iterate until we don't succeed in signalling anyone. That means
				1056	we have gotten all running threads, and their children will be
				1057	automatically correct once started. */
				1058	do
				1059	{
				1060	signalled = 0;
				1061
				1062	list_for_each (runp, &stack_used)
				1063	{
				1064	struct pthread *t = list_entry (runp, struct pthread, list);
				1065	if (t == self)
				1066	continue;
				1067
				1068	signalled += setxid_signal_thread (cmdp, t);
				1069	}
				1070
				1071	list_for_each (runp, &__stack_user)
				1072	{
				1073	struct pthread *t = list_entry (runp, struct pthread, list);
				1074	if (t == self)
				1075	continue;
				1076
				1077	signalled += setxid_signal_thread (cmdp, t);
				1078	}
				1079
				1080	int cur = cmdp->cntr;
				1081	while (cur != 0)
				1082	{
				1083	lll_futex_wait (&cmdp->cntr, cur, LLL_PRIVATE);
				1084	cur = cmdp->cntr;
				1085	}
				1086	}
				1087	while (signalled != 0);
				1088
				1089	/* Clean up flags, so that no thread blocks during exit waiting
				1090	for a signal which will never come. */
				1091	list_for_each (runp, &stack_used)
				1092	{
				1093	struct pthread *t = list_entry (runp, struct pthread, list);
				1094	if (t == self)
				1095	continue;
				1096
				1097	setxid_unmark_thread (cmdp, t);
				1098	}
				1099
				1100	list_for_each (runp, &__stack_user)
				1101	{
				1102	struct pthread *t = list_entry (runp, struct pthread, list);
				1103	if (t == self)
				1104	continue;
				1105
				1106	setxid_unmark_thread (cmdp, t);
				1107	}
				1108
				1109	/* This must be last, otherwise the current thread might not have
				1110	permissions to send SIGSETXID syscall to the other threads. */
				1111	INTERNAL_SYSCALL_DECL (err);
				1112	result = INTERNAL_SYSCALL_NCS (cmdp->syscall_no, err, 3,
				1113	cmdp->id[0], cmdp->id[1], cmdp->id[2]);
				1114	if (INTERNAL_SYSCALL_ERROR_P (result, err))
				1115	{
				1116	__set_errno (INTERNAL_SYSCALL_ERRNO (result, err));
				1117	result = -1;
				1118	}
				1119
				1120	lll_unlock (stack_cache_lock, LLL_PRIVATE);
				1121	return result;
				1122	}
				1123
				1124	static inline void __attribute__((always_inline))
				1125	init_one_static_tls (struct pthread curp, struct link_map map)
				1126	{
				1127	dtv_t *dtv = GET_DTV (TLS_TPADJ (curp));
				1128	# if defined(TLS_TCB_AT_TP)
				1129	void dest = (char ) curp - map->l_tls_offset;
				1130	# elif defined(TLS_DTV_AT_TP)
				1131	void dest = (char ) curp + map->l_tls_offset + TLS_PRE_TCB_SIZE;
				1132	# else
				1133	# error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
				1134	# endif
				1135
				1136	/* Fill in the DTV slot so that a later LD/GD access will find it. */
				1137	dtv[map->l_tls_modid].pointer.val = dest;
				1138	dtv[map->l_tls_modid].pointer.is_static = true;
				1139
				1140	/* Initialize the memory. */
				1141	memset (mempcpy (dest, map->l_tls_initimage, map->l_tls_initimage_size),
				1142	'\0', map->l_tls_blocksize - map->l_tls_initimage_size);
				1143	}
				1144
				1145	void
				1146	attribute_hidden
				1147	__pthread_init_static_tls (struct link_map *map)
				1148	{
				1149	lll_lock (stack_cache_lock, LLL_PRIVATE);
				1150
				1151	/* Iterate over the list with system-allocated threads first. */
				1152	list_t *runp;
				1153	list_for_each (runp, &stack_used)
				1154	init_one_static_tls (list_entry (runp, struct pthread, list), map);
				1155
				1156	/* Now the list with threads using user-allocated stacks. */
				1157	list_for_each (runp, &__stack_user)
				1158	init_one_static_tls (list_entry (runp, struct pthread, list), map);
				1159
				1160	lll_unlock (stack_cache_lock, LLL_PRIVATE);
				1161	}
				1162
				1163
				1164	void
				1165	attribute_hidden
				1166	__wait_lookup_done (void)
				1167	{
				1168	lll_lock (stack_cache_lock, LLL_PRIVATE);
				1169
				1170	struct pthread *self = THREAD_SELF;
				1171
				1172	/* Iterate over the list with system-allocated threads first. */
				1173	list_t *runp;
				1174	list_for_each (runp, &stack_used)
				1175	{
				1176	struct pthread *t = list_entry (runp, struct pthread, list);
				1177	if (t == self \|\| t->header.gscope_flag == THREAD_GSCOPE_FLAG_UNUSED)
				1178	continue;
				1179
				1180	int *const gscope_flagp = &t->header.gscope_flag;
				1181
				1182	/* We have to wait until this thread is done with the global
				1183	scope. First tell the thread that we are waiting and
				1184	possibly have to be woken. */
				1185	if (atomic_compare_and_exchange_bool_acq (gscope_flagp,
				1186	THREAD_GSCOPE_FLAG_WAIT,
				1187	THREAD_GSCOPE_FLAG_USED))
				1188	continue;
				1189
				1190	do
				1191	lll_futex_wait (gscope_flagp, THREAD_GSCOPE_FLAG_WAIT, LLL_PRIVATE);
				1192	while (*gscope_flagp == THREAD_GSCOPE_FLAG_WAIT);
				1193	}
				1194
				1195	/* Now the list with threads using user-allocated stacks. */
				1196	list_for_each (runp, &__stack_user)
				1197	{
				1198	struct pthread *t = list_entry (runp, struct pthread, list);
				1199	if (t == self \|\| t->header.gscope_flag == THREAD_GSCOPE_FLAG_UNUSED)
				1200	continue;
				1201
				1202	int *const gscope_flagp = &t->header.gscope_flag;
				1203
				1204	/* We have to wait until this thread is done with the global
				1205	scope. First tell the thread that we are waiting and
				1206	possibly have to be woken. */
				1207	if (atomic_compare_and_exchange_bool_acq (gscope_flagp,
				1208	THREAD_GSCOPE_FLAG_WAIT,
				1209	THREAD_GSCOPE_FLAG_USED))
				1210	continue;
				1211
				1212	do
				1213	lll_futex_wait (gscope_flagp, THREAD_GSCOPE_FLAG_WAIT, LLL_PRIVATE);
				1214	while (*gscope_flagp == THREAD_GSCOPE_FLAG_WAIT);
				1215	}
				1216
				1217	lll_unlock (stack_cache_lock, LLL_PRIVATE);
				1218	}