| xf.li | bdd93d5 | 2023-05-12 07:10:14 -0700 | [diff] [blame] | 1 | /* Thread-local storage handling in the ELF dynamic linker. | 
 | 2 |    AArch64 version. | 
 | 3 |    Copyright (C) 2011-2016 Free Software Foundation, Inc. | 
 | 4 |  | 
 | 5 |    This file is part of the GNU C Library. | 
 | 6 |  | 
 | 7 |    The GNU C Library is free software; you can redistribute it and/or | 
 | 8 |    modify it under the terms of the GNU Lesser General Public | 
 | 9 |    License as published by the Free Software Foundation; either | 
 | 10 |    version 2.1 of the License, or (at your option) any later version. | 
 | 11 |  | 
 | 12 |    The GNU C Library is distributed in the hope that it will be useful, | 
 | 13 |    but WITHOUT ANY WARRANTY; without even the implied warranty of | 
 | 14 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | 
 | 15 |    Lesser General Public License for more details. | 
 | 16 |  | 
 | 17 |    You should have received a copy of the GNU Lesser General Public | 
 | 18 |    License along with the GNU C Library; if not, see | 
 | 19 |    <http://www.gnu.org/licenses/>.  */ | 
 | 20 |  | 
 | 21 | #include <sysdep.h> | 
 | 22 | #include <tls.h> | 
 | 23 | #include "tlsdesc.h" | 
 | 24 |  | 
 | 25 | #define NSAVEDQREGPAIRS	16 | 
 | 26 | #define SAVE_Q_REGISTERS				\ | 
 | 27 | 	stp	q0, q1,	[sp, #-32*NSAVEDQREGPAIRS]!;	\ | 
 | 28 | 	cfi_adjust_cfa_offset (32*NSAVEDQREGPAIRS);	\ | 
 | 29 | 	stp	 q2,  q3, [sp, #32*1];			\ | 
 | 30 | 	stp	 q4,  q5, [sp, #32*2];			\ | 
 | 31 | 	stp	 q6,  q7, [sp, #32*3];			\ | 
 | 32 | 	stp	 q8,  q9, [sp, #32*4];			\ | 
 | 33 | 	stp	q10, q11, [sp, #32*5];			\ | 
 | 34 | 	stp	q12, q13, [sp, #32*6];			\ | 
 | 35 | 	stp	q14, q15, [sp, #32*7];			\ | 
 | 36 | 	stp	q16, q17, [sp, #32*8];			\ | 
 | 37 | 	stp	q18, q19, [sp, #32*9];			\ | 
 | 38 | 	stp	q20, q21, [sp, #32*10];			\ | 
 | 39 | 	stp	q22, q23, [sp, #32*11];			\ | 
 | 40 | 	stp	q24, q25, [sp, #32*12];			\ | 
 | 41 | 	stp	q26, q27, [sp, #32*13];			\ | 
 | 42 | 	stp	q28, q29, [sp, #32*14];			\ | 
 | 43 | 	stp	q30, q31, [sp, #32*15]; | 
 | 44 |  | 
 | 45 | #define RESTORE_Q_REGISTERS				\ | 
 | 46 | 	ldp	 q2,  q3, [sp, #32*1];			\ | 
 | 47 | 	ldp	 q4,  q5, [sp, #32*2];			\ | 
 | 48 | 	ldp	 q6,  q7, [sp, #32*3];			\ | 
 | 49 | 	ldp	 q8,  q9, [sp, #32*4];			\ | 
 | 50 | 	ldp	q10, q11, [sp, #32*5];			\ | 
 | 51 | 	ldp	q12, q13, [sp, #32*6];			\ | 
 | 52 | 	ldp	q14, q15, [sp, #32*7];			\ | 
 | 53 | 	ldp	q16, q17, [sp, #32*8];			\ | 
 | 54 | 	ldp	q18, q19, [sp, #32*9];			\ | 
 | 55 | 	ldp	q20, q21, [sp, #32*10];			\ | 
 | 56 | 	ldp	q22, q23, [sp, #32*11];			\ | 
 | 57 | 	ldp	q24, q25, [sp, #32*12];			\ | 
 | 58 | 	ldp	q26, q27, [sp, #32*13];			\ | 
 | 59 | 	ldp	q28, q29, [sp, #32*14];			\ | 
 | 60 | 	ldp	q30, q31, [sp, #32*15];			\ | 
 | 61 | 	ldp	 q0,  q1, [sp], #32*NSAVEDQREGPAIRS;	\ | 
 | 62 | 	cfi_adjust_cfa_offset (-32*NSAVEDQREGPAIRS); | 
 | 63 |  | 
 | 64 | 	.text | 
 | 65 |  | 
 | 66 | 	/* Compute the thread pointer offset for symbols in the static | 
 | 67 | 	   TLS block. The offset is the same for all threads. | 
 | 68 | 	   Prototype: | 
 | 69 | 	   _dl_tlsdesc_return (tlsdesc *) ; | 
 | 70 | 	 */ | 
 | 71 | 	.hidden _dl_tlsdesc_return | 
 | 72 | 	.global	_dl_tlsdesc_return | 
 | 73 | 	.type	_dl_tlsdesc_return,%function | 
 | 74 | 	cfi_startproc | 
 | 75 | 	.align 2 | 
 | 76 | _dl_tlsdesc_return: | 
 | 77 | 	ldr	x0, [x0, #8] | 
 | 78 | 	RET | 
 | 79 | 	cfi_endproc | 
 | 80 | 	.size	_dl_tlsdesc_return, .-_dl_tlsdesc_return | 
 | 81 |  | 
 | 82 | 	/* Same as _dl_tlsdesc_return but with synchronization for | 
 | 83 | 	   lazy relocation. | 
 | 84 | 	   Prototype: | 
 | 85 | 	   _dl_tlsdesc_return_lazy (tlsdesc *) ; | 
 | 86 | 	 */ | 
 | 87 | 	.hidden _dl_tlsdesc_return_lazy | 
 | 88 | 	.global	_dl_tlsdesc_return_lazy | 
 | 89 | 	.type	_dl_tlsdesc_return_lazy,%function | 
 | 90 | 	cfi_startproc | 
 | 91 | 	.align 2 | 
 | 92 | _dl_tlsdesc_return_lazy: | 
 | 93 | 	/* The ldar here happens after the load from [x0] at the call site | 
 | 94 | 	   (that is generated by the compiler as part of the TLS access ABI), | 
 | 95 | 	   so it reads the same value (this function is the final value of | 
 | 96 | 	   td->entry) and thus it synchronizes with the release store to | 
 | 97 | 	   td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load | 
 | 98 | 	   from [x0,#8] here happens after the initialization of td->arg.  */ | 
 | 99 | 	ldar	xzr, [x0] | 
 | 100 | 	ldr	x0, [x0, #8] | 
 | 101 | 	RET | 
 | 102 | 	cfi_endproc | 
 | 103 | 	.size	_dl_tlsdesc_return_lazy, .-_dl_tlsdesc_return_lazy | 
 | 104 |  | 
 | 105 | 	/* Handler for undefined weak TLS symbols. | 
 | 106 | 	   Prototype: | 
 | 107 | 	   _dl_tlsdesc_undefweak (tlsdesc *); | 
 | 108 |  | 
 | 109 | 	   The second word of the descriptor contains the addend. | 
 | 110 | 	   Return the addend minus the thread pointer. This ensures | 
 | 111 | 	   that when the caller adds on the thread pointer it gets back | 
 | 112 | 	   the addend.  */ | 
 | 113 |  | 
 | 114 | 	.hidden _dl_tlsdesc_undefweak | 
 | 115 | 	.global	_dl_tlsdesc_undefweak | 
 | 116 | 	.type	_dl_tlsdesc_undefweak,%function | 
 | 117 | 	cfi_startproc | 
 | 118 | 	.align  2 | 
 | 119 | _dl_tlsdesc_undefweak: | 
 | 120 | 	str	x1, [sp, #-16]! | 
 | 121 | 	cfi_adjust_cfa_offset (16) | 
 | 122 | 	/* The ldar here happens after the load from [x0] at the call site | 
 | 123 | 	   (that is generated by the compiler as part of the TLS access ABI), | 
 | 124 | 	   so it reads the same value (this function is the final value of | 
 | 125 | 	   td->entry) and thus it synchronizes with the release store to | 
 | 126 | 	   td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load | 
 | 127 | 	   from [x0,#8] here happens after the initialization of td->arg.  */ | 
 | 128 | 	ldar	xzr, [x0] | 
 | 129 | 	ldr	x0, [x0, #8] | 
 | 130 | 	mrs	x1, tpidr_el0 | 
 | 131 | 	sub	x0, x0, x1 | 
 | 132 | 	ldr	x1, [sp], #16 | 
 | 133 | 	cfi_adjust_cfa_offset (-16) | 
 | 134 | 	RET | 
 | 135 | 	cfi_endproc | 
 | 136 | 	.size	_dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak | 
 | 137 |  | 
 | 138 | #ifdef SHARED | 
 | 139 | 	/* Handler for dynamic TLS symbols. | 
 | 140 | 	   Prototype: | 
 | 141 | 	   _dl_tlsdesc_dynamic (tlsdesc *) ; | 
 | 142 |  | 
 | 143 | 	   The second word of the descriptor points to a | 
 | 144 | 	   tlsdesc_dynamic_arg structure. | 
 | 145 |  | 
 | 146 | 	   Returns the offset between the thread pointer and the | 
 | 147 | 	   object referenced by the argument. | 
 | 148 |  | 
 | 149 | 	   ptrdiff_t | 
 | 150 | 	   __attribute__ ((__regparm__ (1))) | 
 | 151 | 	   _dl_tlsdesc_dynamic (struct tlsdesc *tdp) | 
 | 152 | 	   { | 
 | 153 | 	     struct tlsdesc_dynamic_arg *td = tdp->arg; | 
 | 154 | 	     dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + DTV_OFFSET); | 
 | 155 | 	     if (__builtin_expect (td->gen_count <= dtv[0].counter | 
 | 156 | 		&& (dtv[td->tlsinfo.ti_module].pointer.val | 
 | 157 | 		    != TLS_DTV_UNALLOCATED), | 
 | 158 | 		1)) | 
 | 159 | 	       return dtv[td->tlsinfo.ti_module].pointer.val | 
 | 160 | 		+ td->tlsinfo.ti_offset | 
 | 161 | 		- __thread_pointer; | 
 | 162 |  | 
 | 163 | 	     return ___tls_get_addr (&td->tlsinfo) - __thread_pointer; | 
 | 164 | 	   } | 
 | 165 | 	 */ | 
 | 166 |  | 
 | 167 | 	.hidden _dl_tlsdesc_dynamic | 
 | 168 | 	.global	_dl_tlsdesc_dynamic | 
 | 169 | 	.type	_dl_tlsdesc_dynamic,%function | 
 | 170 | 	cfi_startproc | 
 | 171 | 	.align 2 | 
 | 172 | _dl_tlsdesc_dynamic: | 
 | 173 | # define NSAVEXREGPAIRS 2 | 
 | 174 | 	stp	x29, x30, [sp,#-(32+16*NSAVEXREGPAIRS)]! | 
 | 175 | 	cfi_adjust_cfa_offset (32+16*NSAVEXREGPAIRS) | 
 | 176 | 	mov	x29, sp | 
 | 177 |  | 
 | 178 | 	/* Save just enough registers to support fast path, if we fall | 
 | 179 | 	   into slow path we will save additional registers.  */ | 
 | 180 |  | 
 | 181 | 	stp	x1,  x2, [sp, #32+16*0] | 
 | 182 | 	stp	x3,  x4, [sp, #32+16*1] | 
 | 183 |  | 
 | 184 | 	mrs	x4, tpidr_el0 | 
 | 185 | 	/* The ldar here happens after the load from [x0] at the call site | 
 | 186 | 	   (that is generated by the compiler as part of the TLS access ABI), | 
 | 187 | 	   so it reads the same value (this function is the final value of | 
 | 188 | 	   td->entry) and thus it synchronizes with the release store to | 
 | 189 | 	   td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load | 
 | 190 | 	   from [x0,#8] here happens after the initialization of td->arg.  */ | 
 | 191 | 	ldar	xzr, [x0] | 
 | 192 | 	ldr	x1, [x0,#8] | 
 | 193 | 	ldr	x0, [x4] | 
 | 194 | 	ldr	x3, [x1,#16] | 
 | 195 | 	ldr	x2, [x0] | 
 | 196 | 	cmp	x3, x2 | 
 | 197 | 	b.hi	2f | 
 | 198 | 	ldr	x2, [x1] | 
 | 199 | 	add	x0, x0, x2, lsl #4 | 
 | 200 | 	ldr	x0, [x0] | 
 | 201 | 	cmn	x0, #0x1 | 
 | 202 | 	b.eq	2f | 
 | 203 | 	ldr	x1, [x1,#8] | 
 | 204 | 	add	x0, x0, x1 | 
 | 205 | 	sub	x0, x0, x4 | 
 | 206 | 1: | 
 | 207 | 	ldp	 x1,  x2, [sp, #32+16*0] | 
 | 208 | 	ldp	 x3,  x4, [sp, #32+16*1] | 
 | 209 |  | 
 | 210 | 	ldp	x29, x30, [sp], #(32+16*NSAVEXREGPAIRS) | 
 | 211 | 	cfi_adjust_cfa_offset (-32-16*NSAVEXREGPAIRS) | 
 | 212 | # undef NSAVEXREGPAIRS | 
 | 213 | 	RET | 
 | 214 | 2: | 
 | 215 | 	/* This is the slow path. We need to call __tls_get_addr() which | 
 | 216 | 	   means we need to save and restore all the register that the | 
 | 217 | 	   callee will trash.  */ | 
 | 218 |  | 
 | 219 | 	/* Save the remaining registers that we must treat as caller save.  */ | 
 | 220 | # define NSAVEXREGPAIRS 7 | 
 | 221 | 	stp	 x5,  x6, [sp, #-16*NSAVEXREGPAIRS]! | 
 | 222 | 	cfi_adjust_cfa_offset (16*NSAVEXREGPAIRS) | 
 | 223 | 	stp	 x7,  x8, [sp, #16*1] | 
 | 224 | 	stp	 x9, x10, [sp, #16*2] | 
 | 225 | 	stp	x11, x12, [sp, #16*3] | 
 | 226 | 	stp	x13, x14, [sp, #16*4] | 
 | 227 | 	stp	x15, x16, [sp, #16*5] | 
 | 228 | 	stp	x17, x18, [sp, #16*6] | 
 | 229 |  | 
 | 230 | 	SAVE_Q_REGISTERS | 
 | 231 |  | 
 | 232 | 	mov	x0, x1 | 
 | 233 | 	bl	__tls_get_addr | 
 | 234 |  | 
 | 235 | 	mrs	x1, tpidr_el0 | 
 | 236 | 	sub	x0, x0, x1 | 
 | 237 |  | 
 | 238 | 	RESTORE_Q_REGISTERS | 
 | 239 |  | 
 | 240 | 	ldp	 x7,  x8, [sp, #16*1] | 
 | 241 | 	ldp	 x9, x10, [sp, #16*2] | 
 | 242 | 	ldp	x11, x12, [sp, #16*3] | 
 | 243 | 	ldp	x13, x14, [sp, #16*4] | 
 | 244 | 	ldp	x15, x16, [sp, #16*5] | 
 | 245 | 	ldp	x17, x18, [sp, #16*6] | 
 | 246 | 	ldp	 x5,  x6, [sp], #16*NSAVEXREGPAIRS | 
 | 247 | 	cfi_adjust_cfa_offset (-16*NSAVEXREGPAIRS) | 
 | 248 | 	b	1b | 
 | 249 | 	cfi_endproc | 
 | 250 | 	.size	_dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic | 
 | 251 | # undef NSAVEXREGPAIRS | 
 | 252 | #endif | 
 | 253 |  | 
 | 254 | 	/* This function is a wrapper for a lazy resolver for TLS_DESC | 
 | 255 | 	   RELA relocations. | 
 | 256 | 	   When the actual resolver returns, it will have adjusted the | 
 | 257 | 	   TLS descriptor such that we can tail-call it for it to return | 
 | 258 | 	   the TP offset of the symbol.  */ | 
 | 259 |  | 
 | 260 | 	.hidden _dl_tlsdesc_resolve_rela | 
 | 261 | 	.global	_dl_tlsdesc_resolve_rela | 
 | 262 | 	.type	_dl_tlsdesc_resolve_rela,%function | 
 | 263 | 	cfi_startproc | 
 | 264 | 	.align 2 | 
 | 265 | _dl_tlsdesc_resolve_rela: | 
 | 266 | #define	NSAVEXREGPAIRS 9 | 
 | 267 | 	stp	x29, x30, [sp, #-(32+16*NSAVEXREGPAIRS)]! | 
 | 268 | 	cfi_adjust_cfa_offset (32+16*NSAVEXREGPAIRS) | 
 | 269 | 	mov	x29, sp | 
 | 270 | 	stp	 x1,  x4, [sp, #32+16*0] | 
 | 271 | 	stp	 x5,  x6, [sp, #32+16*1] | 
 | 272 | 	stp	 x7,  x8, [sp, #32+16*2] | 
 | 273 | 	stp	 x9, x10, [sp, #32+16*3] | 
 | 274 | 	stp	x11, x12, [sp, #32+16*4] | 
 | 275 | 	stp	x13, x14, [sp, #32+16*5] | 
 | 276 | 	stp	x15, x16, [sp, #32+16*6] | 
 | 277 | 	stp	x17, x18, [sp, #32+16*7] | 
 | 278 | 	str	x0,       [sp, #32+16*8] | 
 | 279 |  | 
 | 280 | 	SAVE_Q_REGISTERS | 
 | 281 |  | 
 | 282 | 	ldr	x1, [x3, #8] | 
 | 283 | 	bl	_dl_tlsdesc_resolve_rela_fixup | 
 | 284 |  | 
 | 285 | 	RESTORE_Q_REGISTERS | 
 | 286 |  | 
 | 287 | 	ldr	x0, [sp, #32+16*8] | 
 | 288 | 	ldr	x1, [x0] | 
 | 289 | 	blr	x1 | 
 | 290 |  | 
 | 291 | 	ldp	 x1,  x4, [sp, #32+16*0] | 
 | 292 | 	ldp	 x5,  x6, [sp, #32+16*1] | 
 | 293 | 	ldp	 x7,  x8, [sp, #32+16*2] | 
 | 294 | 	ldp	 x9, x10, [sp, #32+16*3] | 
 | 295 | 	ldp	x11, x12, [sp, #32+16*4] | 
 | 296 | 	ldp	x13, x14, [sp, #32+16*5] | 
 | 297 | 	ldp	x15, x16, [sp, #32+16*6] | 
 | 298 | 	ldp	x17, x18, [sp, #32+16*7] | 
 | 299 | 	ldp	x29, x30, [sp], #(32+16*NSAVEXREGPAIRS) | 
 | 300 | 	cfi_adjust_cfa_offset (-32-16*NSAVEXREGPAIRS) | 
 | 301 | 	ldp	x2, x3, [sp], #16 | 
 | 302 | 	cfi_adjust_cfa_offset (-16) | 
 | 303 | 	RET | 
 | 304 | #undef NSAVEXREGPAIRS | 
 | 305 | 	cfi_endproc | 
 | 306 | 	.size	_dl_tlsdesc_resolve_rela, .-_dl_tlsdesc_resolve_rela | 
 | 307 |  | 
 | 308 | 	/* This function is a placeholder for lazy resolving of TLS | 
 | 309 | 	relocations.  Once some thread starts resolving a TLS | 
 | 310 | 	relocation, it sets up the TLS descriptor to use this | 
 | 311 | 	resolver, such that other threads that would attempt to | 
 | 312 | 	resolve it concurrently may skip the call to the original lazy | 
 | 313 | 	resolver and go straight to a condition wait. | 
 | 314 |  | 
 | 315 | 	When the actual resolver returns, it will have adjusted the | 
 | 316 | 	TLS descriptor such that we can tail-call it for it to return | 
 | 317 | 	the TP offset of the symbol.  */ | 
 | 318 |  | 
 | 319 | 	.hidden _dl_tlsdesc_resolve_hold | 
 | 320 | 	.global	_dl_tlsdesc_resolve_hold | 
 | 321 | 	.type	_dl_tlsdesc_resolve_hold,%function | 
 | 322 | 	cfi_startproc | 
 | 323 | 	.align 2 | 
 | 324 | _dl_tlsdesc_resolve_hold: | 
 | 325 | #define	NSAVEXREGPAIRS 10 | 
 | 326 | 1: | 
 | 327 | 	stp	x29, x30, [sp, #-(32+16*NSAVEXREGPAIRS)]! | 
 | 328 | 	cfi_adjust_cfa_offset (32+16*NSAVEXREGPAIRS) | 
 | 329 | 	mov	x29, sp | 
 | 330 | 	stp	 x1,  x2, [sp, #32+16*0] | 
 | 331 | 	stp	 x3,  x4, [sp, #32+16*1] | 
 | 332 | 	stp	 x5,  x6, [sp, #32+16*2] | 
 | 333 | 	stp	 x7,  x8, [sp, #32+16*3] | 
 | 334 | 	stp	 x9, x10, [sp, #32+16*4] | 
 | 335 | 	stp	x11, x12, [sp, #32+16*5] | 
 | 336 | 	stp	x13, x14, [sp, #32+16*6] | 
 | 337 | 	stp	x15, x16, [sp, #32+16*7] | 
 | 338 | 	stp	x17, x18, [sp, #32+16*8] | 
 | 339 | 	str	x0,       [sp, #32+16*9] | 
 | 340 |  | 
 | 341 | 	SAVE_Q_REGISTERS | 
 | 342 |  | 
 | 343 | 	adr	x1, 1b | 
 | 344 | 	bl	_dl_tlsdesc_resolve_hold_fixup | 
 | 345 |  | 
 | 346 | 	RESTORE_Q_REGISTERS | 
 | 347 |  | 
 | 348 | 	ldr	x0, [sp, #32+16*9] | 
 | 349 | 	ldr	x1, [x0] | 
 | 350 | 	blr	x1 | 
 | 351 |  | 
 | 352 | 	ldp	 x1,  x2, [sp, #32+16*0] | 
 | 353 | 	ldp	 x3,  x4, [sp, #32+16*1] | 
 | 354 | 	ldp	 x5,  x6, [sp, #32+16*2] | 
 | 355 | 	ldp	 x7,  x8, [sp, #32+16*3] | 
 | 356 | 	ldp	 x9, x10, [sp, #32+16*4] | 
 | 357 | 	ldp	x11, x12, [sp, #32+16*5] | 
 | 358 | 	ldp	x13, x14, [sp, #32+16*6] | 
 | 359 | 	ldp	x15, x16, [sp, #32+16*7] | 
 | 360 | 	ldp	x17, x18, [sp, #32+16*8] | 
 | 361 | 	ldp	x29, x30, [sp], #(32+16*NSAVEXREGPAIRS) | 
 | 362 | 	cfi_adjust_cfa_offset (-32-16*NSAVEXREGPAIRS) | 
 | 363 | 	RET | 
 | 364 | 	cfi_endproc | 
 | 365 | 	.size	_dl_tlsdesc_resolve_hold, .-_dl_tlsdesc_resolve_hold | 
 | 366 | #undef NSAVEXREGPAIRS |