blob: e52632db344cd2696e5daef07296ccaa206b90b5 [file] [log] [blame]
lh9ed821d2023-04-07 01:36:19 -07001/* Copyright (C) 2002-2007, 2009 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, write to the Free
17 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18 02111-1307 USA. */
19
20#include <assert.h>
21#include <errno.h>
22#include <signal.h>
23#include <stdint.h>
24#include <string.h>
25#include <unistd.h>
26#include <sys/mman.h>
27#include <sys/param.h>
28#include <tls.h>
29#include <lowlevellock.h>
30#include <link.h>
31#include <bits/kernel-features.h>
32
33
34#ifndef NEED_SEPARATE_REGISTER_STACK
35
36/* Most architectures have exactly one stack pointer. Some have more. */
37# define STACK_VARIABLES void *stackaddr = NULL
38
39/* How to pass the values to the 'create_thread' function. */
40# define STACK_VARIABLES_ARGS stackaddr
41
42/* How to declare function which gets there parameters. */
43# define STACK_VARIABLES_PARMS void *stackaddr
44
45/* How to declare allocate_stack. */
46# define ALLOCATE_STACK_PARMS void **stack
47
48/* This is how the function is called. We do it this way to allow
49 other variants of the function to have more parameters. */
50# define ALLOCATE_STACK(attr, pd) allocate_stack (attr, pd, &stackaddr)
51
52#else
53
54/* We need two stacks. The kernel will place them but we have to tell
55 the kernel about the size of the reserved address space. */
56# define STACK_VARIABLES void *stackaddr = NULL; size_t stacksize = 0
57
58/* How to pass the values to the 'create_thread' function. */
59# define STACK_VARIABLES_ARGS stackaddr, stacksize
60
61/* How to declare function which gets there parameters. */
62# define STACK_VARIABLES_PARMS void *stackaddr, size_t stacksize
63
64/* How to declare allocate_stack. */
65# define ALLOCATE_STACK_PARMS void **stack, size_t *stacksize
66
67/* This is how the function is called. We do it this way to allow
68 other variants of the function to have more parameters. */
69# define ALLOCATE_STACK(attr, pd) \
70 allocate_stack (attr, pd, &stackaddr, &stacksize)
71
72#endif
73
74
75/* Default alignment of stack. */
76#ifndef STACK_ALIGN
77# define STACK_ALIGN __alignof__ (long double)
78#endif
79
80/* Default value for minimal stack size after allocating thread
81 descriptor and guard. */
82#ifndef MINIMAL_REST_STACK
83# define MINIMAL_REST_STACK 4096
84#endif
85
86
87/* Newer kernels have the MAP_STACK flag to indicate a mapping is used for
88 a stack. Use it when possible. */
89#ifndef MAP_STACK
90# define MAP_STACK 0
91#endif
92
93/* This yields the pointer that TLS support code calls the thread pointer. */
94#if defined(TLS_TCB_AT_TP)
95# define TLS_TPADJ(pd) (pd)
96#elif defined(TLS_DTV_AT_TP)
97# define TLS_TPADJ(pd) ((struct pthread *)((char *) (pd) + TLS_PRE_TCB_SIZE))
98#endif
99
100/* Cache handling for not-yet free stacks. */
101
102/* Maximum size in kB of cache. */
103//static size_t stack_cache_maxsize = 40 * 1024 * 1024; /* 40MiBi by default. */
104static size_t stack_cache_maxsize = 256 * 1024; /* 40MiBi is too large for embeded devices, change it to 256KiBi */
105static size_t stack_cache_actsize;
106
107/* Mutex protecting this variable. */
108static int stack_cache_lock = LLL_LOCK_INITIALIZER;
109
110/* List of queued stack frames. */
111static LIST_HEAD (stack_cache);
112
113/* List of the stacks in use. */
114static LIST_HEAD (stack_used);
115
116/* We need to record what list operations we are going to do so that,
117 in case of an asynchronous interruption due to a fork() call, we
118 can correct for the work. */
119static uintptr_t in_flight_stack;
120
121/* List of the threads with user provided stacks in use. No need to
122 initialize this, since it's done in __pthread_initialize_minimal. */
123list_t __stack_user __attribute__ ((nocommon));
124hidden_data_def (__stack_user)
125
126#if defined COLORING_INCREMENT && COLORING_INCREMENT != 0
127/* Number of threads created. */
128static unsigned int nptl_ncreated;
129#endif
130
131
132/* Check whether the stack is still used or not. */
133#define FREE_P(descr) ((descr)->tid <= 0)
134
135
136static void
137stack_list_del (list_t *elem)
138{
139 in_flight_stack = (uintptr_t) elem;
140
141 atomic_write_barrier ();
142
143 list_del (elem);
144
145 atomic_write_barrier ();
146
147 in_flight_stack = 0;
148}
149
150
151static void
152stack_list_add (list_t *elem, list_t *list)
153{
154 in_flight_stack = (uintptr_t) elem | 1;
155
156 atomic_write_barrier ();
157
158 list_add (elem, list);
159
160 atomic_write_barrier ();
161
162 in_flight_stack = 0;
163}
164
165
166/* We create a double linked list of all cache entries. Double linked
167 because this allows removing entries from the end. */
168
169
170/* Get a stack frame from the cache. We have to match by size since
171 some blocks might be too small or far too large. */
172static struct pthread *
173get_cached_stack (size_t *sizep, void **memp)
174{
175 size_t size = *sizep;
176 struct pthread *result = NULL;
177 list_t *entry;
178
179 lll_lock (stack_cache_lock, LLL_PRIVATE);
180
181 /* Search the cache for a matching entry. We search for the
182 smallest stack which has at least the required size. Note that
183 in normal situations the size of all allocated stacks is the
184 same. As the very least there are only a few different sizes.
185 Therefore this loop will exit early most of the time with an
186 exact match. */
187 list_for_each (entry, &stack_cache)
188 {
189 struct pthread *curr;
190
191 curr = list_entry (entry, struct pthread, list);
192 if (FREE_P (curr) && curr->stackblock_size >= size)
193 {
194 if (curr->stackblock_size == size)
195 {
196 result = curr;
197 break;
198 }
199
200 if (result == NULL
201 || result->stackblock_size > curr->stackblock_size)
202 result = curr;
203 }
204 }
205
206 if (__builtin_expect (result == NULL, 0)
207 /* Make sure the size difference is not too excessive. In that
208 case we do not use the block. */
209 || __builtin_expect (result->stackblock_size > 4 * size, 0))
210 {
211 /* Release the lock. */
212 lll_unlock (stack_cache_lock, LLL_PRIVATE);
213
214 return NULL;
215 }
216
217 /* Dequeue the entry. */
218 stack_list_del (&result->list);
219
220 /* And add to the list of stacks in use. */
221 stack_list_add (&result->list, &stack_used);
222
223 /* And decrease the cache size. */
224 stack_cache_actsize -= result->stackblock_size;
225
226 /* Release the lock early. */
227 lll_unlock (stack_cache_lock, LLL_PRIVATE);
228
229 /* Report size and location of the stack to the caller. */
230 *sizep = result->stackblock_size;
231 *memp = result->stackblock;
232
233 /* Cancellation handling is back to the default. */
234 result->cancelhandling = 0;
235 result->cleanup = NULL;
236
237 /* No pending event. */
238 result->nextevent = NULL;
239
240 /* Clear the DTV. */
241 dtv_t *dtv = GET_DTV (TLS_TPADJ (result));
242 memset (dtv, '\0', (dtv[-1].counter + 1) * sizeof (dtv_t));
243
244 /* Re-initialize the TLS. */
245 _dl_allocate_tls_init (TLS_TPADJ (result));
246
247 return result;
248}
249
250
251/* Free stacks until cache size is lower than LIMIT. */
252void
253__free_stacks (size_t limit)
254{
255 /* We reduce the size of the cache. Remove the last entries until
256 the size is below the limit. */
257 list_t *entry;
258 list_t *prev;
259
260 /* Search from the end of the list. */
261 list_for_each_prev_safe (entry, prev, &stack_cache)
262 {
263 struct pthread *curr;
264
265 curr = list_entry (entry, struct pthread, list);
266 if (FREE_P (curr))
267 {
268 /* Unlink the block. */
269 stack_list_del (entry);
270
271 /* Account for the freed memory. */
272 stack_cache_actsize -= curr->stackblock_size;
273
274 /* Free the memory associated with the ELF TLS. */
275 _dl_deallocate_tls (TLS_TPADJ (curr), false);
276
277 /* Remove this block. This should never fail. If it does
278 something is really wrong. */
279 if (munmap (curr->stackblock, curr->stackblock_size) != 0)
280 abort ();
281
282 /* Maybe we have freed enough. */
283 if (stack_cache_actsize <= limit)
284 break;
285 }
286 }
287}
288
289
290/* Add a stack frame which is not used anymore to the stack. Must be
291 called with the cache lock held. */
292static inline void
293__attribute ((always_inline))
294queue_stack (struct pthread *stack)
295{
296 /* We unconditionally add the stack to the list. The memory may
297 still be in use but it will not be reused until the kernel marks
298 the stack as not used anymore. */
299 stack_list_add (&stack->list, &stack_cache);
300
301 stack_cache_actsize += stack->stackblock_size;
302 if (__builtin_expect (stack_cache_actsize > stack_cache_maxsize, 0))
303 __free_stacks (stack_cache_maxsize);
304}
305
306
307static int
308internal_function
309change_stack_perm (struct pthread *pd
310#ifdef NEED_SEPARATE_REGISTER_STACK
311 , size_t pagemask
312#endif
313 )
314{
315#ifdef NEED_SEPARATE_REGISTER_STACK
316 void *stack = (pd->stackblock
317 + (((((pd->stackblock_size - pd->guardsize) / 2)
318 & pagemask) + pd->guardsize) & pagemask));
319 size_t len = pd->stackblock + pd->stackblock_size - stack;
320#elif defined _STACK_GROWS_DOWN
321 void *stack = pd->stackblock + pd->guardsize;
322 size_t len = pd->stackblock_size - pd->guardsize;
323#elif defined _STACK_GROWS_UP
324 void *stack = pd->stackblock;
325 size_t len = (uintptr_t) pd - pd->guardsize - (uintptr_t) pd->stackblock;
326#else
327# error "Define either _STACK_GROWS_DOWN or _STACK_GROWS_UP"
328#endif
329 if (mprotect (stack, len, PROT_READ | PROT_WRITE | PROT_EXEC) != 0)
330 return errno;
331
332 return 0;
333}
334
335
336static int
337allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
338 ALLOCATE_STACK_PARMS)
339{
340 struct pthread *pd;
341 size_t size;
342 size_t pagesize_m1 = __getpagesize () - 1;
343 void *stacktop;
344
345 assert (attr != NULL);
346 assert (powerof2 (pagesize_m1 + 1));
347 assert (TCB_ALIGNMENT >= STACK_ALIGN);
348
349 /* Get the stack size from the attribute if it is set. Otherwise we
350 use the default we determined at start time. */
351 size = attr->stacksize ?: __default_stacksize;
352
353 /* Get memory for the stack. */
354 if (__builtin_expect (attr->flags & ATTR_FLAG_STACKADDR, 0))
355 {
356 uintptr_t adj;
357
358 /* If the user also specified the size of the stack make sure it
359 is large enough. */
360 if (attr->stacksize != 0
361 && attr->stacksize < (__static_tls_size + MINIMAL_REST_STACK))
362 return EINVAL;
363
364 /* Adjust stack size for alignment of the TLS block. */
365#if defined(TLS_TCB_AT_TP)
366 adj = ((uintptr_t) attr->stackaddr - TLS_TCB_SIZE)
367 & __static_tls_align_m1;
368 assert (size > adj + TLS_TCB_SIZE);
369#elif defined(TLS_DTV_AT_TP)
370 adj = ((uintptr_t) attr->stackaddr - __static_tls_size)
371 & __static_tls_align_m1;
372 assert (size > adj);
373#endif
374
375 /* The user provided some memory. Let's hope it matches the
376 size... We do not allocate guard pages if the user provided
377 the stack. It is the user's responsibility to do this if it
378 is wanted. */
379#if defined(TLS_TCB_AT_TP)
380 pd = (struct pthread *) ((uintptr_t) attr->stackaddr
381 - TLS_TCB_SIZE - adj);
382#elif defined(TLS_DTV_AT_TP)
383 pd = (struct pthread *) (((uintptr_t) attr->stackaddr
384 - __static_tls_size - adj)
385 - TLS_PRE_TCB_SIZE);
386#endif
387
388 /* The user provided stack memory needs to be cleared. */
389 memset (pd, '\0', sizeof (struct pthread));
390
391 /* The first TSD block is included in the TCB. */
392 pd->specific[0] = pd->specific_1stblock;
393
394 /* Remember the stack-related values. */
395 pd->stackblock = (char *) attr->stackaddr - size;
396 pd->stackblock_size = size;
397
398 /* This is a user-provided stack. It will not be queued in the
399 stack cache nor will the memory (except the TLS memory) be freed. */
400 pd->user_stack = true;
401
402 /* This is at least the second thread. */
403 pd->header.multiple_threads = 1;
404#ifndef TLS_MULTIPLE_THREADS_IN_TCB
405 __pthread_multiple_threads = *__libc_multiple_threads_ptr = 1;
406#endif
407
408#ifndef __ASSUME_PRIVATE_FUTEX
409 /* The thread must know when private futexes are supported. */
410 pd->header.private_futex = THREAD_GETMEM (THREAD_SELF,
411 header.private_futex);
412#endif
413
414#ifdef NEED_DL_SYSINFO
415 /* Copy the sysinfo value from the parent. */
416 THREAD_SYSINFO(pd) = THREAD_SELF_SYSINFO;
417#endif
418
419 /* The process ID is also the same as that of the caller. */
420 pd->pid = THREAD_GETMEM (THREAD_SELF, pid);
421
422 /* Allocate the DTV for this thread. */
423 if (_dl_allocate_tls (TLS_TPADJ (pd)) == NULL)
424 {
425 /* Something went wrong. */
426 assert (errno == ENOMEM);
427 return EAGAIN;
428 }
429
430
431 /* Prepare to modify global data. */
432 lll_lock (stack_cache_lock, LLL_PRIVATE);
433
434 /* And add to the list of stacks in use. */
435 list_add (&pd->list, &__stack_user);
436
437 lll_unlock (stack_cache_lock, LLL_PRIVATE);
438 }
439 else
440 {
441 /* Allocate some anonymous memory. If possible use the cache. */
442 size_t guardsize;
443 size_t reqsize;
444 void *mem = 0;
445 const int prot = (PROT_READ | PROT_WRITE);
446
447#if defined COLORING_INCREMENT && COLORING_INCREMENT != 0
448 /* Add one more page for stack coloring. Don't do it for stacks
449 with 16 times pagesize or larger. This might just cause
450 unnecessary misalignment. */
451 if (size <= 16 * pagesize_m1)
452 size += pagesize_m1 + 1;
453#endif
454
455 /* Adjust the stack size for alignment. */
456 size &= ~__static_tls_align_m1;
457 assert (size != 0);
458
459 /* Make sure the size of the stack is enough for the guard and
460 eventually the thread descriptor. */
461 guardsize = (attr->guardsize + pagesize_m1) & ~pagesize_m1;
462 if (__builtin_expect (size < ((guardsize + __static_tls_size
463 + MINIMAL_REST_STACK + pagesize_m1)
464 & ~pagesize_m1),
465 0))
466 /* The stack is too small (or the guard too large). */
467 return EINVAL;
468
469 /* Try to get a stack from the cache. */
470 reqsize = size;
471 pd = get_cached_stack (&size, &mem);
472 if (pd == NULL)
473 {
474 /* To avoid aliasing effects on a larger scale than pages we
475 adjust the allocated stack size if necessary. This way
476 allocations directly following each other will not have
477 aliasing problems. */
478#if defined MULTI_PAGE_ALIASING && MULTI_PAGE_ALIASING != 0
479 if ((size % MULTI_PAGE_ALIASING) == 0)
480 size += pagesize_m1 + 1;
481#endif
482
483 mem = mmap (NULL, size, prot,
484 MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
485
486 if (__builtin_expect (mem == MAP_FAILED, 0))
487 {
488 if (errno == ENOMEM)
489 __set_errno (EAGAIN);
490
491 return errno;
492 }
493
494 /* SIZE is guaranteed to be greater than zero.
495 So we can never get a null pointer back from mmap. */
496 assert (mem != NULL);
497
498#if defined COLORING_INCREMENT && COLORING_INCREMENT != 0
499 /* Atomically increment NCREATED. */
500 unsigned int ncreated = atomic_increment_val (&nptl_ncreated);
501
502 /* We chose the offset for coloring by incrementing it for
503 every new thread by a fixed amount. The offset used
504 module the page size. Even if coloring would be better
505 relative to higher alignment values it makes no sense to
506 do it since the mmap() interface does not allow us to
507 specify any alignment for the returned memory block. */
508 size_t coloring = (ncreated * COLORING_INCREMENT) & pagesize_m1;
509
510 /* Make sure the coloring offsets does not disturb the alignment
511 of the TCB and static TLS block. */
512 if (__builtin_expect ((coloring & __static_tls_align_m1) != 0, 0))
513 coloring = (((coloring + __static_tls_align_m1)
514 & ~(__static_tls_align_m1))
515 & ~pagesize_m1);
516#else
517 /* Unless specified we do not make any adjustments. */
518# define coloring 0
519#endif
520
521 /* Place the thread descriptor at the end of the stack. */
522#if defined(TLS_TCB_AT_TP)
523 pd = (struct pthread *) ((char *) mem + size - coloring) - 1;
524#elif defined(TLS_DTV_AT_TP)
525 pd = (struct pthread *) ((((uintptr_t) mem + size - coloring
526 - __static_tls_size)
527 & ~__static_tls_align_m1)
528 - TLS_PRE_TCB_SIZE);
529#endif
530
531 /* Remember the stack-related values. */
532 pd->stackblock = mem;
533 pd->stackblock_size = size;
534
535 /* We allocated the first block thread-specific data array.
536 This address will not change for the lifetime of this
537 descriptor. */
538 pd->specific[0] = pd->specific_1stblock;
539
540 /* This is at least the second thread. */
541 pd->header.multiple_threads = 1;
542#ifndef TLS_MULTIPLE_THREADS_IN_TCB
543 __pthread_multiple_threads = *__libc_multiple_threads_ptr = 1;
544#endif
545
546#ifndef __ASSUME_PRIVATE_FUTEX
547 /* The thread must know when private futexes are supported. */
548 pd->header.private_futex = THREAD_GETMEM (THREAD_SELF,
549 header.private_futex);
550#endif
551
552#ifdef NEED_DL_SYSINFO
553 /* Copy the sysinfo value from the parent. */
554 THREAD_SYSINFO(pd) = THREAD_SELF_SYSINFO;
555#endif
556
557 /* The process ID is also the same as that of the caller. */
558 pd->pid = THREAD_GETMEM (THREAD_SELF, pid);
559
560 /* Allocate the DTV for this thread. */
561 if (_dl_allocate_tls (TLS_TPADJ (pd)) == NULL)
562 {
563 /* Something went wrong. */
564 assert (errno == ENOMEM);
565
566 /* Free the stack memory we just allocated. */
567 (void) munmap (mem, size);
568
569 return EAGAIN;
570 }
571
572
573 /* Prepare to modify global data. */
574 lll_lock (stack_cache_lock, LLL_PRIVATE);
575
576 /* And add to the list of stacks in use. */
577 stack_list_add (&pd->list, &stack_used);
578
579 lll_unlock (stack_cache_lock, LLL_PRIVATE);
580
581
582 /* Note that all of the stack and the thread descriptor is
583 zeroed. This means we do not have to initialize fields
584 with initial value zero. This is specifically true for
585 the 'tid' field which is always set back to zero once the
586 stack is not used anymore and for the 'guardsize' field
587 which will be read next. */
588 }
589
590 /* Create or resize the guard area if necessary. */
591 if (__builtin_expect (guardsize > pd->guardsize, 0))
592 {
593#ifdef NEED_SEPARATE_REGISTER_STACK
594 char *guard = mem + (((size - guardsize) / 2) & ~pagesize_m1);
595#elif defined _STACK_GROWS_DOWN
596 char *guard = mem;
597#elif defined _STACK_GROWS_UP
598 char *guard = (char *) (((uintptr_t) pd - guardsize) & ~pagesize_m1);
599#endif
600 if (mprotect (guard, guardsize, PROT_NONE) != 0)
601 {
602 int err;
603 mprot_error:
604 err = errno;
605
606 lll_lock (stack_cache_lock, LLL_PRIVATE);
607
608 /* Remove the thread from the list. */
609 stack_list_del (&pd->list);
610
611 lll_unlock (stack_cache_lock, LLL_PRIVATE);
612
613 /* Get rid of the TLS block we allocated. */
614 _dl_deallocate_tls (TLS_TPADJ (pd), false);
615
616 /* Free the stack memory regardless of whether the size
617 of the cache is over the limit or not. If this piece
618 of memory caused problems we better do not use it
619 anymore. Uh, and we ignore possible errors. There
620 is nothing we could do. */
621 (void) munmap (mem, size);
622
623 return err;
624 }
625
626 pd->guardsize = guardsize;
627 }
628 else if (__builtin_expect (pd->guardsize - guardsize > size - reqsize,
629 0))
630 {
631 /* The old guard area is too large. */
632
633#ifdef NEED_SEPARATE_REGISTER_STACK
634 char *guard = mem + (((size - guardsize) / 2) & ~pagesize_m1);
635 char *oldguard = mem + (((size - pd->guardsize) / 2) & ~pagesize_m1);
636
637 if (oldguard < guard
638 && mprotect (oldguard, guard - oldguard, prot) != 0)
639 goto mprot_error;
640
641 if (mprotect (guard + guardsize,
642 oldguard + pd->guardsize - guard - guardsize,
643 prot) != 0)
644 goto mprot_error;
645#elif defined _STACK_GROWS_DOWN
646 if (mprotect ((char *) mem + guardsize, pd->guardsize - guardsize,
647 prot) != 0)
648 goto mprot_error;
649#elif defined _STACK_GROWS_UP
650 if (mprotect ((char *) pd - pd->guardsize,
651 pd->guardsize - guardsize, prot) != 0)
652 goto mprot_error;
653#endif
654
655 pd->guardsize = guardsize;
656 }
657 /* The pthread_getattr_np() calls need to get passed the size
658 requested in the attribute, regardless of how large the
659 actually used guardsize is. */
660 pd->reported_guardsize = guardsize;
661 }
662
663 /* Initialize the lock. We have to do this unconditionally since the
664 stillborn thread could be canceled while the lock is taken. */
665 pd->lock = LLL_LOCK_INITIALIZER;
666
667 /* The robust mutex lists also need to be initialized
668 unconditionally because the cleanup for the previous stack owner
669 might have happened in the kernel. */
670 pd->robust_head.futex_offset = (offsetof (pthread_mutex_t, __data.__lock)
671 - offsetof (pthread_mutex_t,
672 __data.__list.__next));
673 pd->robust_head.list_op_pending = NULL;
674#ifdef __PTHREAD_MUTEX_HAVE_PREV
675 pd->robust_prev = &pd->robust_head;
676#endif
677 pd->robust_head.list = &pd->robust_head;
678
679 /* We place the thread descriptor at the end of the stack. */
680 *pdp = pd;
681
682#if defined(TLS_TCB_AT_TP)
683 /* The stack begins before the TCB and the static TLS block. */
684 stacktop = ((char *) (pd + 1) - __static_tls_size);
685#elif defined(TLS_DTV_AT_TP)
686 stacktop = (char *) (pd - 1);
687#endif
688
689#ifdef NEED_SEPARATE_REGISTER_STACK
690 *stack = pd->stackblock;
691 *stacksize = stacktop - *stack;
692#elif defined _STACK_GROWS_DOWN
693 *stack = stacktop;
694#elif defined _STACK_GROWS_UP
695 *stack = pd->stackblock;
696 assert (*stack > 0);
697#endif
698
699 return 0;
700}
701
702
703void
704internal_function
705__deallocate_stack (struct pthread *pd)
706{
707 lll_lock (stack_cache_lock, LLL_PRIVATE);
708
709 /* Remove the thread from the list of threads with user defined
710 stacks. */
711 stack_list_del (&pd->list);
712
713 /* Not much to do. Just free the mmap()ed memory. Note that we do
714 not reset the 'used' flag in the 'tid' field. This is done by
715 the kernel. If no thread has been created yet this field is
716 still zero. */
717 if (__builtin_expect (! pd->user_stack, 1))
718 (void) queue_stack (pd);
719 else
720 /* Free the memory associated with the ELF TLS. */
721 _dl_deallocate_tls (TLS_TPADJ (pd), false);
722
723 lll_unlock (stack_cache_lock, LLL_PRIVATE);
724}
725
726
727int
728internal_function
729__make_stacks_executable (void **stack_endp)
730{
731 /* First the main thread's stack. */
732 int err = EPERM;
733 if (err != 0)
734 return err;
735
736#ifdef NEED_SEPARATE_REGISTER_STACK
737 const size_t pagemask = ~(__getpagesize () - 1);
738#endif
739
740 lll_lock (stack_cache_lock, LLL_PRIVATE);
741
742 list_t *runp;
743 list_for_each (runp, &stack_used)
744 {
745 err = change_stack_perm (list_entry (runp, struct pthread, list)
746#ifdef NEED_SEPARATE_REGISTER_STACK
747 , pagemask
748#endif
749 );
750 if (err != 0)
751 break;
752 }
753
754 /* Also change the permission for the currently unused stacks. This
755 might be wasted time but better spend it here than adding a check
756 in the fast path. */
757 if (err == 0)
758 list_for_each (runp, &stack_cache)
759 {
760 err = change_stack_perm (list_entry (runp, struct pthread, list)
761#ifdef NEED_SEPARATE_REGISTER_STACK
762 , pagemask
763#endif
764 );
765 if (err != 0)
766 break;
767 }
768
769 lll_unlock (stack_cache_lock, LLL_PRIVATE);
770
771 return err;
772}
773
774
775/* In case of a fork() call the memory allocation in the child will be
776 the same but only one thread is running. All stacks except that of
777 the one running thread are not used anymore. We have to recycle
778 them. */
779void
780__reclaim_stacks (void)
781{
782 struct pthread *self = (struct pthread *) THREAD_SELF;
783
784 /* No locking necessary. The caller is the only stack in use. But
785 we have to be aware that we might have interrupted a list
786 operation. */
787
788 if (in_flight_stack != 0)
789 {
790 bool add_p = in_flight_stack & 1;
791 list_t *elem = (list_t *) (in_flight_stack & ~UINTMAX_C (1));
792
793 if (add_p)
794 {
795 /* We always add at the beginning of the list. So in this
796 case we only need to check the beginning of these lists. */
797 int check_list (list_t *l)
798 {
799 if (l->next->prev != l)
800 {
801 assert (l->next->prev == elem);
802
803 elem->next = l->next;
804 elem->prev = l;
805 l->next = elem;
806
807 return 1;
808 }
809
810 return 0;
811 }
812
813 if (check_list (&stack_used) == 0)
814 (void) check_list (&stack_cache);
815 }
816 else
817 {
818 /* We can simply always replay the delete operation. */
819 elem->next->prev = elem->prev;
820 elem->prev->next = elem->next;
821 }
822 }
823
824 /* Mark all stacks except the still running one as free. */
825 list_t *runp;
826 list_for_each (runp, &stack_used)
827 {
828 struct pthread *curp = list_entry (runp, struct pthread, list);
829 if (curp != self)
830 {
831 /* This marks the stack as free. */
832 curp->tid = 0;
833
834 /* The PID field must be initialized for the new process. */
835 curp->pid = self->pid;
836
837 /* Account for the size of the stack. */
838 stack_cache_actsize += curp->stackblock_size;
839
840 if (curp->specific_used)
841 {
842 /* Clear the thread-specific data. */
843 memset (curp->specific_1stblock, '\0',
844 sizeof (curp->specific_1stblock));
845
846 curp->specific_used = false;
847
848 size_t cnt;
849 for (cnt = 1; cnt < PTHREAD_KEY_1STLEVEL_SIZE; ++cnt)
850 if (curp->specific[cnt] != NULL)
851 {
852 memset (curp->specific[cnt], '\0',
853 sizeof (curp->specific_1stblock));
854
855 /* We have allocated the block which we do not
856 free here so re-set the bit. */
857 curp->specific_used = true;
858 }
859 }
860 }
861 }
862
863 /* Reset the PIDs in any cached stacks. */
864 list_for_each (runp, &stack_cache)
865 {
866 struct pthread *curp = list_entry (runp, struct pthread, list);
867 curp->pid = self->pid;
868 }
869
870 /* Add the stack of all running threads to the cache. */
871 list_splice (&stack_used, &stack_cache);
872
873 /* Remove the entry for the current thread to from the cache list
874 and add it to the list of running threads. Which of the two
875 lists is decided by the user_stack flag. */
876 stack_list_del (&self->list);
877
878 /* Re-initialize the lists for all the threads. */
879 INIT_LIST_HEAD (&stack_used);
880 INIT_LIST_HEAD (&__stack_user);
881
882 if (__builtin_expect (THREAD_GETMEM (self, user_stack), 0))
883 list_add (&self->list, &__stack_user);
884 else
885 list_add (&self->list, &stack_used);
886
887 /* There is one thread running. */
888 __nptl_nthreads = 1;
889
890 in_flight_stack = 0;
891
892 /* Initialize the lock. */
893 stack_cache_lock = LLL_LOCK_INITIALIZER;
894}
895
896
897#if HP_TIMING_AVAIL
898# undef __find_thread_by_id
899/* Find a thread given the thread ID. */
900attribute_hidden
901struct pthread *
902__find_thread_by_id (pid_t tid)
903{
904 struct pthread *result = NULL;
905
906 lll_lock (stack_cache_lock, LLL_PRIVATE);
907
908 /* Iterate over the list with system-allocated threads first. */
909 list_t *runp;
910 list_for_each (runp, &stack_used)
911 {
912 struct pthread *curp;
913
914 curp = list_entry (runp, struct pthread, list);
915
916 if (curp->tid == tid)
917 {
918 result = curp;
919 goto out;
920 }
921 }
922
923 /* Now the list with threads using user-allocated stacks. */
924 list_for_each (runp, &__stack_user)
925 {
926 struct pthread *curp;
927
928 curp = list_entry (runp, struct pthread, list);
929
930 if (curp->tid == tid)
931 {
932 result = curp;
933 goto out;
934 }
935 }
936
937 out:
938 lll_unlock (stack_cache_lock, LLL_PRIVATE);
939
940 return result;
941}
942#endif
943
944
945static void
946internal_function
947setxid_mark_thread (struct xid_command *cmdp, struct pthread *t)
948{
949 int ch;
950
951 /* Don't let the thread exit before the setxid handler runs. */
952 t->setxid_futex = 0;
953
954 do
955 {
956 ch = t->cancelhandling;
957
958 /* If the thread is exiting right now, ignore it. */
959 if ((ch & EXITING_BITMASK) != 0)
960 return;
961 }
962 while (atomic_compare_and_exchange_bool_acq (&t->cancelhandling,
963 ch | SETXID_BITMASK, ch));
964}
965
966
967static void
968internal_function
969setxid_unmark_thread (struct xid_command *cmdp, struct pthread *t)
970{
971 int ch;
972
973 do
974 {
975 ch = t->cancelhandling;
976 if ((ch & SETXID_BITMASK) == 0)
977 return;
978 }
979 while (atomic_compare_and_exchange_bool_acq (&t->cancelhandling,
980 ch & ~SETXID_BITMASK, ch));
981
982 /* Release the futex just in case. */
983 t->setxid_futex = 1;
984 lll_futex_wake (&t->setxid_futex, 1, LLL_PRIVATE);
985}
986
987
988static int
989internal_function
990setxid_signal_thread (struct xid_command *cmdp, struct pthread *t)
991{
992 if ((t->cancelhandling & SETXID_BITMASK) == 0)
993 return 0;
994
995 int val;
996 INTERNAL_SYSCALL_DECL (err);
997#if defined (__ASSUME_TGKILL) && __ASSUME_TGKILL
998 val = INTERNAL_SYSCALL (tgkill, err, 3, THREAD_GETMEM (THREAD_SELF, pid),
999 t->tid, SIGSETXID);
1000#else
1001# ifdef __NR_tgkill
1002 val = INTERNAL_SYSCALL (tgkill, err, 3, THREAD_GETMEM (THREAD_SELF, pid),
1003 t->tid, SIGSETXID);
1004 if (INTERNAL_SYSCALL_ERROR_P (val, err)
1005 && INTERNAL_SYSCALL_ERRNO (val, err) == ENOSYS)
1006# endif
1007 val = INTERNAL_SYSCALL (tkill, err, 2, t->tid, SIGSETXID);
1008#endif
1009
1010 /* If this failed, it must have had not started yet or else exited. */
1011 if (!INTERNAL_SYSCALL_ERROR_P (val, err))
1012 {
1013 atomic_increment (&cmdp->cntr);
1014 return 1;
1015 }
1016 else
1017 return 0;
1018}
1019
1020
1021int
1022attribute_hidden
1023__nptl_setxid (struct xid_command *cmdp)
1024{
1025 int signalled;
1026 int result;
1027 lll_lock (stack_cache_lock, LLL_PRIVATE);
1028
1029 __xidcmd = cmdp;
1030 cmdp->cntr = 0;
1031
1032 struct pthread *self = THREAD_SELF;
1033
1034 /* Iterate over the list with system-allocated threads first. */
1035 list_t *runp;
1036 list_for_each (runp, &stack_used)
1037 {
1038 struct pthread *t = list_entry (runp, struct pthread, list);
1039 if (t == self)
1040 continue;
1041
1042 setxid_mark_thread (cmdp, t);
1043 }
1044
1045 /* Now the list with threads using user-allocated stacks. */
1046 list_for_each (runp, &__stack_user)
1047 {
1048 struct pthread *t = list_entry (runp, struct pthread, list);
1049 if (t == self)
1050 continue;
1051
1052 setxid_mark_thread (cmdp, t);
1053 }
1054
1055 /* Iterate until we don't succeed in signalling anyone. That means
1056 we have gotten all running threads, and their children will be
1057 automatically correct once started. */
1058 do
1059 {
1060 signalled = 0;
1061
1062 list_for_each (runp, &stack_used)
1063 {
1064 struct pthread *t = list_entry (runp, struct pthread, list);
1065 if (t == self)
1066 continue;
1067
1068 signalled += setxid_signal_thread (cmdp, t);
1069 }
1070
1071 list_for_each (runp, &__stack_user)
1072 {
1073 struct pthread *t = list_entry (runp, struct pthread, list);
1074 if (t == self)
1075 continue;
1076
1077 signalled += setxid_signal_thread (cmdp, t);
1078 }
1079
1080 int cur = cmdp->cntr;
1081 while (cur != 0)
1082 {
1083 lll_futex_wait (&cmdp->cntr, cur, LLL_PRIVATE);
1084 cur = cmdp->cntr;
1085 }
1086 }
1087 while (signalled != 0);
1088
1089 /* Clean up flags, so that no thread blocks during exit waiting
1090 for a signal which will never come. */
1091 list_for_each (runp, &stack_used)
1092 {
1093 struct pthread *t = list_entry (runp, struct pthread, list);
1094 if (t == self)
1095 continue;
1096
1097 setxid_unmark_thread (cmdp, t);
1098 }
1099
1100 list_for_each (runp, &__stack_user)
1101 {
1102 struct pthread *t = list_entry (runp, struct pthread, list);
1103 if (t == self)
1104 continue;
1105
1106 setxid_unmark_thread (cmdp, t);
1107 }
1108
1109 /* This must be last, otherwise the current thread might not have
1110 permissions to send SIGSETXID syscall to the other threads. */
1111 INTERNAL_SYSCALL_DECL (err);
1112 result = INTERNAL_SYSCALL_NCS (cmdp->syscall_no, err, 3,
1113 cmdp->id[0], cmdp->id[1], cmdp->id[2]);
1114 if (INTERNAL_SYSCALL_ERROR_P (result, err))
1115 {
1116 __set_errno (INTERNAL_SYSCALL_ERRNO (result, err));
1117 result = -1;
1118 }
1119
1120 lll_unlock (stack_cache_lock, LLL_PRIVATE);
1121 return result;
1122}
1123
1124static inline void __attribute__((always_inline))
1125init_one_static_tls (struct pthread *curp, struct link_map *map)
1126{
1127 dtv_t *dtv = GET_DTV (TLS_TPADJ (curp));
1128# if defined(TLS_TCB_AT_TP)
1129 void *dest = (char *) curp - map->l_tls_offset;
1130# elif defined(TLS_DTV_AT_TP)
1131 void *dest = (char *) curp + map->l_tls_offset + TLS_PRE_TCB_SIZE;
1132# else
1133# error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
1134# endif
1135
1136 /* Fill in the DTV slot so that a later LD/GD access will find it. */
1137 dtv[map->l_tls_modid].pointer.val = dest;
1138 dtv[map->l_tls_modid].pointer.is_static = true;
1139
1140 /* Initialize the memory. */
1141 memset (mempcpy (dest, map->l_tls_initimage, map->l_tls_initimage_size),
1142 '\0', map->l_tls_blocksize - map->l_tls_initimage_size);
1143}
1144
1145void
1146attribute_hidden
1147__pthread_init_static_tls (struct link_map *map)
1148{
1149 lll_lock (stack_cache_lock, LLL_PRIVATE);
1150
1151 /* Iterate over the list with system-allocated threads first. */
1152 list_t *runp;
1153 list_for_each (runp, &stack_used)
1154 init_one_static_tls (list_entry (runp, struct pthread, list), map);
1155
1156 /* Now the list with threads using user-allocated stacks. */
1157 list_for_each (runp, &__stack_user)
1158 init_one_static_tls (list_entry (runp, struct pthread, list), map);
1159
1160 lll_unlock (stack_cache_lock, LLL_PRIVATE);
1161}
1162
1163
1164void
1165attribute_hidden
1166__wait_lookup_done (void)
1167{
1168 lll_lock (stack_cache_lock, LLL_PRIVATE);
1169
1170 struct pthread *self = THREAD_SELF;
1171
1172 /* Iterate over the list with system-allocated threads first. */
1173 list_t *runp;
1174 list_for_each (runp, &stack_used)
1175 {
1176 struct pthread *t = list_entry (runp, struct pthread, list);
1177 if (t == self || t->header.gscope_flag == THREAD_GSCOPE_FLAG_UNUSED)
1178 continue;
1179
1180 int *const gscope_flagp = &t->header.gscope_flag;
1181
1182 /* We have to wait until this thread is done with the global
1183 scope. First tell the thread that we are waiting and
1184 possibly have to be woken. */
1185 if (atomic_compare_and_exchange_bool_acq (gscope_flagp,
1186 THREAD_GSCOPE_FLAG_WAIT,
1187 THREAD_GSCOPE_FLAG_USED))
1188 continue;
1189
1190 do
1191 lll_futex_wait (gscope_flagp, THREAD_GSCOPE_FLAG_WAIT, LLL_PRIVATE);
1192 while (*gscope_flagp == THREAD_GSCOPE_FLAG_WAIT);
1193 }
1194
1195 /* Now the list with threads using user-allocated stacks. */
1196 list_for_each (runp, &__stack_user)
1197 {
1198 struct pthread *t = list_entry (runp, struct pthread, list);
1199 if (t == self || t->header.gscope_flag == THREAD_GSCOPE_FLAG_UNUSED)
1200 continue;
1201
1202 int *const gscope_flagp = &t->header.gscope_flag;
1203
1204 /* We have to wait until this thread is done with the global
1205 scope. First tell the thread that we are waiting and
1206 possibly have to be woken. */
1207 if (atomic_compare_and_exchange_bool_acq (gscope_flagp,
1208 THREAD_GSCOPE_FLAG_WAIT,
1209 THREAD_GSCOPE_FLAG_USED))
1210 continue;
1211
1212 do
1213 lll_futex_wait (gscope_flagp, THREAD_GSCOPE_FLAG_WAIT, LLL_PRIVATE);
1214 while (*gscope_flagp == THREAD_GSCOPE_FLAG_WAIT);
1215 }
1216
1217 lll_unlock (stack_cache_lock, LLL_PRIVATE);
1218}