blob: 64719eb5e179818376f2e06ec56d0d432a1091fb [file] [log] [blame]
lh9ed821d2023-04-07 01:36:19 -07001/* Thread-local storage handling in the ELF dynamic linker.
2 AArch64 version.
3 Copyright (C) 2011-2015 Free Software Foundation, Inc.
4
5 This file is part of the GNU C Library.
6
7 The GNU C Library is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Lesser General Public
9 License as published by the Free Software Foundation; either
10 version 2.1 of the License, or (at your option) any later version.
11
12 The GNU C Library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public
18 License along with the GNU C Library; if not, see
19 <http://www.gnu.org/licenses/>. */
20
21#include <sysdep.h>
22#include <tls.h>
23#include "tlsdesc.h"
24
25#define NSAVEDQREGPAIRS 16
26#define SAVE_Q_REGISTERS \
27 stp q0, q1, [sp, #-32*NSAVEDQREGPAIRS]!; \
28 cfi_adjust_cfa_offset (32*NSAVEDQREGPAIRS); \
29 stp q2, q3, [sp, #32*1]; \
30 stp q4, q5, [sp, #32*2]; \
31 stp q6, q7, [sp, #32*3]; \
32 stp q8, q9, [sp, #32*4]; \
33 stp q10, q11, [sp, #32*5]; \
34 stp q12, q13, [sp, #32*6]; \
35 stp q14, q15, [sp, #32*7]; \
36 stp q16, q17, [sp, #32*8]; \
37 stp q18, q19, [sp, #32*9]; \
38 stp q20, q21, [sp, #32*10]; \
39 stp q22, q23, [sp, #32*11]; \
40 stp q24, q25, [sp, #32*12]; \
41 stp q26, q27, [sp, #32*13]; \
42 stp q28, q29, [sp, #32*14]; \
43 stp q30, q31, [sp, #32*15];
44
45#define RESTORE_Q_REGISTERS \
46 ldp q2, q3, [sp, #32*1]; \
47 ldp q4, q5, [sp, #32*2]; \
48 ldp q6, q7, [sp, #32*3]; \
49 ldp q8, q9, [sp, #32*4]; \
50 ldp q10, q11, [sp, #32*5]; \
51 ldp q12, q13, [sp, #32*6]; \
52 ldp q14, q15, [sp, #32*7]; \
53 ldp q16, q17, [sp, #32*8]; \
54 ldp q18, q19, [sp, #32*9]; \
55 ldp q20, q21, [sp, #32*10]; \
56 ldp q22, q23, [sp, #32*11]; \
57 ldp q24, q25, [sp, #32*12]; \
58 ldp q26, q27, [sp, #32*13]; \
59 ldp q28, q29, [sp, #32*14]; \
60 ldp q30, q31, [sp, #32*15]; \
61 ldp q0, q1, [sp], #32*NSAVEDQREGPAIRS; \
62 cfi_adjust_cfa_offset (-32*NSAVEDQREGPAIRS);
63
64 .text
65
66 /* Compute the thread pointer offset for symbols in the static
67 TLS block. The offset is the same for all threads.
68 Prototype:
69 _dl_tlsdesc_return (tlsdesc *) ;
70 */
71 .hidden _dl_tlsdesc_return
72 .global _dl_tlsdesc_return
73 .type _dl_tlsdesc_return,%function
74 cfi_startproc
75 .align 2
76_dl_tlsdesc_return:
77 ldr x0, [x0, #8]
78 RET
79 cfi_endproc
80 .size _dl_tlsdesc_return, .-_dl_tlsdesc_return
81
82 /* Same as _dl_tlsdesc_return but with synchronization for
83 lazy relocation.
84 Prototype:
85 _dl_tlsdesc_return_lazy (tlsdesc *) ;
86 */
87 .hidden _dl_tlsdesc_return_lazy
88 .global _dl_tlsdesc_return_lazy
89 .type _dl_tlsdesc_return_lazy,%function
90 cfi_startproc
91 .align 2
92_dl_tlsdesc_return_lazy:
93 /* The ldar here happens after the load from [x0] at the call site
94 (that is generated by the compiler as part of the TLS access ABI),
95 so it reads the same value (this function is the final value of
96 td->entry) and thus it synchronizes with the release store to
97 td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load
98 from [x0,#8] here happens after the initialization of td->arg. */
99 ldar xzr, [x0]
100 ldr x0, [x0, #8]
101 RET
102 cfi_endproc
103 .size _dl_tlsdesc_return_lazy, .-_dl_tlsdesc_return_lazy
104
105 /* Handler for undefined weak TLS symbols.
106 Prototype:
107 _dl_tlsdesc_undefweak (tlsdesc *);
108
109 The second word of the descriptor contains the addend.
110 Return the addend minus the thread pointer. This ensures
111 that when the caller adds on the thread pointer it gets back
112 the addend. */
113
114 .hidden _dl_tlsdesc_undefweak
115 .global _dl_tlsdesc_undefweak
116 .type _dl_tlsdesc_undefweak,%function
117 cfi_startproc
118 .align 2
119_dl_tlsdesc_undefweak:
120 str x1, [sp, #-16]!
121 cfi_adjust_cfa_offset (16)
122 /* The ldar here happens after the load from [x0] at the call site
123 (that is generated by the compiler as part of the TLS access ABI),
124 so it reads the same value (this function is the final value of
125 td->entry) and thus it synchronizes with the release store to
126 td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load
127 from [x0,#8] here happens after the initialization of td->arg. */
128 ldar xzr, [x0]
129 ldr x0, [x0, #8]
130 mrs x1, tpidr_el0
131 sub x0, x0, x1
132 ldr x1, [sp], #16
133 cfi_adjust_cfa_offset (-16)
134 RET
135 cfi_endproc
136 .size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
137
138#ifdef SHARED
139 /* Handler for dynamic TLS symbols.
140 Prototype:
141 _dl_tlsdesc_dynamic (tlsdesc *) ;
142
143 The second word of the descriptor points to a
144 tlsdesc_dynamic_arg structure.
145
146 Returns the offset between the thread pointer and the
147 object referenced by the argument.
148
149 ptrdiff_t
150 __attribute__ ((__regparm__ (1)))
151 _dl_tlsdesc_dynamic (struct tlsdesc *tdp)
152 {
153 struct tlsdesc_dynamic_arg *td = tdp->arg;
154 dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + DTV_OFFSET);
155 if (__builtin_expect (td->gen_count <= dtv[0].counter
156 && (dtv[td->tlsinfo.ti_module].pointer.val
157 != TLS_DTV_UNALLOCATED),
158 1))
159 return dtv[td->tlsinfo.ti_module].pointer.val
160 + td->tlsinfo.ti_offset
161 - __thread_pointer;
162
163 return ___tls_get_addr (&td->tlsinfo) - __thread_pointer;
164 }
165 */
166
167 .hidden _dl_tlsdesc_dynamic
168 .global _dl_tlsdesc_dynamic
169 .type _dl_tlsdesc_dynamic,%function
170 cfi_startproc
171 .align 2
172_dl_tlsdesc_dynamic:
173# define NSAVEXREGPAIRS 2
174 stp x29, x30, [sp,#-(32+16*NSAVEXREGPAIRS)]!
175 cfi_adjust_cfa_offset (32+16*NSAVEXREGPAIRS)
176 mov x29, sp
177
178 /* Save just enough registers to support fast path, if we fall
179 into slow path we will save additional registers. */
180
181 stp x1, x2, [sp, #32+16*0]
182 stp x3, x4, [sp, #32+16*1]
183
184 mrs x4, tpidr_el0
185 /* The ldar here happens after the load from [x0] at the call site
186 (that is generated by the compiler as part of the TLS access ABI),
187 so it reads the same value (this function is the final value of
188 td->entry) and thus it synchronizes with the release store to
189 td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load
190 from [x0,#8] here happens after the initialization of td->arg. */
191 ldar xzr, [x0]
192 ldr x1, [x0,#8]
193 ldr x0, [x4]
194 ldr x3, [x1,#16]
195 ldr x2, [x0]
196 cmp x3, x2
197 b.hi 2f
198 ldr x2, [x1]
199 add x0, x0, x2, lsl #4
200 ldr x0, [x0]
201 cmn x0, #0x1
202 b.eq 2f
203 ldr x1, [x1,#8]
204 add x0, x0, x1
205 sub x0, x0, x4
2061:
207 ldp x1, x2, [sp, #32+16*0]
208 ldp x3, x4, [sp, #32+16*1]
209
210 ldp x29, x30, [sp], #(32+16*NSAVEXREGPAIRS)
211 cfi_adjust_cfa_offset (-32-16*NSAVEXREGPAIRS)
212# undef NSAVEXREGPAIRS
213 RET
2142:
215 /* This is the slow path. We need to call __tls_get_addr() which
216 means we need to save and restore all the register that the
217 callee will trash. */
218
219 /* Save the remaining registers that we must treat as caller save. */
220# define NSAVEXREGPAIRS 7
221 stp x5, x6, [sp, #-16*NSAVEXREGPAIRS]!
222 cfi_adjust_cfa_offset (16*NSAVEXREGPAIRS)
223 stp x7, x8, [sp, #16*1]
224 stp x9, x10, [sp, #16*2]
225 stp x11, x12, [sp, #16*3]
226 stp x13, x14, [sp, #16*4]
227 stp x15, x16, [sp, #16*5]
228 stp x17, x18, [sp, #16*6]
229
230 SAVE_Q_REGISTERS
231
232 mov x0, x1
233 bl __tls_get_addr
234
235 mrs x1, tpidr_el0
236 sub x0, x0, x1
237
238 RESTORE_Q_REGISTERS
239
240 ldp x7, x8, [sp, #16*1]
241 ldp x9, x10, [sp, #16*2]
242 ldp x11, x12, [sp, #16*3]
243 ldp x13, x14, [sp, #16*4]
244 ldp x15, x16, [sp, #16*5]
245 ldp x17, x18, [sp, #16*6]
246 ldp x5, x6, [sp], #16*NSAVEXREGPAIRS
247 cfi_adjust_cfa_offset (-16*NSAVEXREGPAIRS)
248 b 1b
249 cfi_endproc
250 .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
251# undef NSAVEXREGPAIRS
252#endif
253
254 /* This function is a wrapper for a lazy resolver for TLS_DESC
255 RELA relocations.
256 When the actual resolver returns, it will have adjusted the
257 TLS descriptor such that we can tail-call it for it to return
258 the TP offset of the symbol. */
259
260 .hidden _dl_tlsdesc_resolve_rela
261 .global _dl_tlsdesc_resolve_rela
262 .type _dl_tlsdesc_resolve_rela,%function
263 cfi_startproc
264 .align 2
265_dl_tlsdesc_resolve_rela:
266#define NSAVEXREGPAIRS 9
267 stp x29, x30, [sp, #-(32+16*NSAVEXREGPAIRS)]!
268 cfi_adjust_cfa_offset (32+16*NSAVEXREGPAIRS)
269 mov x29, sp
270 stp x1, x4, [sp, #32+16*0]
271 stp x5, x6, [sp, #32+16*1]
272 stp x7, x8, [sp, #32+16*2]
273 stp x9, x10, [sp, #32+16*3]
274 stp x11, x12, [sp, #32+16*4]
275 stp x13, x14, [sp, #32+16*5]
276 stp x15, x16, [sp, #32+16*6]
277 stp x17, x18, [sp, #32+16*7]
278 str x0, [sp, #32+16*8]
279
280 SAVE_Q_REGISTERS
281
282 ldr x1, [x3, #8]
283 bl _dl_tlsdesc_resolve_rela_fixup
284
285 RESTORE_Q_REGISTERS
286
287 ldr x0, [sp, #32+16*8]
288 ldr x1, [x0]
289 blr x1
290
291 ldp x1, x4, [sp, #32+16*0]
292 ldp x5, x6, [sp, #32+16*1]
293 ldp x7, x8, [sp, #32+16*2]
294 ldp x9, x10, [sp, #32+16*3]
295 ldp x11, x12, [sp, #32+16*4]
296 ldp x13, x14, [sp, #32+16*5]
297 ldp x15, x16, [sp, #32+16*6]
298 ldp x17, x18, [sp, #32+16*7]
299 ldp x29, x30, [sp], #(32+16*NSAVEXREGPAIRS)
300 cfi_adjust_cfa_offset (-32-16*NSAVEXREGPAIRS)
301 ldp x2, x3, [sp], #16
302 cfi_adjust_cfa_offset (-16)
303 RET
304#undef NSAVEXREGPAIRS
305 cfi_endproc
306 .size _dl_tlsdesc_resolve_rela, .-_dl_tlsdesc_resolve_rela
307
308 /* This function is a placeholder for lazy resolving of TLS
309 relocations. Once some thread starts resolving a TLS
310 relocation, it sets up the TLS descriptor to use this
311 resolver, such that other threads that would attempt to
312 resolve it concurrently may skip the call to the original lazy
313 resolver and go straight to a condition wait.
314
315 When the actual resolver returns, it will have adjusted the
316 TLS descriptor such that we can tail-call it for it to return
317 the TP offset of the symbol. */
318
319 .hidden _dl_tlsdesc_resolve_hold
320 .global _dl_tlsdesc_resolve_hold
321 .type _dl_tlsdesc_resolve_hold,%function
322 cfi_startproc
323 .align 2
324_dl_tlsdesc_resolve_hold:
325#define NSAVEXREGPAIRS 10
3261:
327 stp x29, x30, [sp, #-(32+16*NSAVEXREGPAIRS)]!
328 cfi_adjust_cfa_offset (32+16*NSAVEXREGPAIRS)
329 mov x29, sp
330 stp x1, x2, [sp, #32+16*0]
331 stp x3, x4, [sp, #32+16*1]
332 stp x5, x6, [sp, #32+16*2]
333 stp x7, x8, [sp, #32+16*3]
334 stp x9, x10, [sp, #32+16*4]
335 stp x11, x12, [sp, #32+16*5]
336 stp x13, x14, [sp, #32+16*6]
337 stp x15, x16, [sp, #32+16*7]
338 stp x17, x18, [sp, #32+16*8]
339 str x0, [sp, #32+16*9]
340
341 SAVE_Q_REGISTERS
342
343 adr x1, 1b
344 bl _dl_tlsdesc_resolve_hold_fixup
345
346 RESTORE_Q_REGISTERS
347
348 ldr x0, [sp, #32+16*9]
349 ldr x1, [x0]
350 blr x1
351
352 ldp x1, x2, [sp, #32+16*0]
353 ldp x3, x4, [sp, #32+16*1]
354 ldp x5, x6, [sp, #32+16*2]
355 ldp x7, x8, [sp, #32+16*3]
356 ldp x9, x10, [sp, #32+16*4]
357 ldp x11, x12, [sp, #32+16*5]
358 ldp x13, x14, [sp, #32+16*6]
359 ldp x15, x16, [sp, #32+16*7]
360 ldp x17, x18, [sp, #32+16*8]
361 ldp x29, x30, [sp], #(32+16*NSAVEXREGPAIRS)
362 cfi_adjust_cfa_offset (-32-16*NSAVEXREGPAIRS)
363 RET
364 cfi_endproc
365 .size _dl_tlsdesc_resolve_hold, .-_dl_tlsdesc_resolve_hold
366#undef NSAVEXREGPAIRS