lh | 9ed821d | 2023-04-07 01:36:19 -0700 | [diff] [blame^] | 1 | /* Thread-local storage handling in the ELF dynamic linker. |
| 2 | AArch64 version. |
| 3 | Copyright (C) 2011-2015 Free Software Foundation, Inc. |
| 4 | |
| 5 | This file is part of the GNU C Library. |
| 6 | |
| 7 | The GNU C Library is free software; you can redistribute it and/or |
| 8 | modify it under the terms of the GNU Lesser General Public |
| 9 | License as published by the Free Software Foundation; either |
| 10 | version 2.1 of the License, or (at your option) any later version. |
| 11 | |
| 12 | The GNU C Library is distributed in the hope that it will be useful, |
| 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 15 | Lesser General Public License for more details. |
| 16 | |
| 17 | You should have received a copy of the GNU Lesser General Public |
| 18 | License along with the GNU C Library; if not, see |
| 19 | <http://www.gnu.org/licenses/>. */ |
| 20 | |
| 21 | #include <sysdep.h> |
| 22 | #include <tls.h> |
| 23 | #include "tlsdesc.h" |
| 24 | |
| 25 | #define NSAVEDQREGPAIRS 16 |
| 26 | #define SAVE_Q_REGISTERS \ |
| 27 | stp q0, q1, [sp, #-32*NSAVEDQREGPAIRS]!; \ |
| 28 | cfi_adjust_cfa_offset (32*NSAVEDQREGPAIRS); \ |
| 29 | stp q2, q3, [sp, #32*1]; \ |
| 30 | stp q4, q5, [sp, #32*2]; \ |
| 31 | stp q6, q7, [sp, #32*3]; \ |
| 32 | stp q8, q9, [sp, #32*4]; \ |
| 33 | stp q10, q11, [sp, #32*5]; \ |
| 34 | stp q12, q13, [sp, #32*6]; \ |
| 35 | stp q14, q15, [sp, #32*7]; \ |
| 36 | stp q16, q17, [sp, #32*8]; \ |
| 37 | stp q18, q19, [sp, #32*9]; \ |
| 38 | stp q20, q21, [sp, #32*10]; \ |
| 39 | stp q22, q23, [sp, #32*11]; \ |
| 40 | stp q24, q25, [sp, #32*12]; \ |
| 41 | stp q26, q27, [sp, #32*13]; \ |
| 42 | stp q28, q29, [sp, #32*14]; \ |
| 43 | stp q30, q31, [sp, #32*15]; |
| 44 | |
| 45 | #define RESTORE_Q_REGISTERS \ |
| 46 | ldp q2, q3, [sp, #32*1]; \ |
| 47 | ldp q4, q5, [sp, #32*2]; \ |
| 48 | ldp q6, q7, [sp, #32*3]; \ |
| 49 | ldp q8, q9, [sp, #32*4]; \ |
| 50 | ldp q10, q11, [sp, #32*5]; \ |
| 51 | ldp q12, q13, [sp, #32*6]; \ |
| 52 | ldp q14, q15, [sp, #32*7]; \ |
| 53 | ldp q16, q17, [sp, #32*8]; \ |
| 54 | ldp q18, q19, [sp, #32*9]; \ |
| 55 | ldp q20, q21, [sp, #32*10]; \ |
| 56 | ldp q22, q23, [sp, #32*11]; \ |
| 57 | ldp q24, q25, [sp, #32*12]; \ |
| 58 | ldp q26, q27, [sp, #32*13]; \ |
| 59 | ldp q28, q29, [sp, #32*14]; \ |
| 60 | ldp q30, q31, [sp, #32*15]; \ |
| 61 | ldp q0, q1, [sp], #32*NSAVEDQREGPAIRS; \ |
| 62 | cfi_adjust_cfa_offset (-32*NSAVEDQREGPAIRS); |
| 63 | |
| 64 | .text |
| 65 | |
| 66 | /* Compute the thread pointer offset for symbols in the static |
| 67 | TLS block. The offset is the same for all threads. |
| 68 | Prototype: |
| 69 | _dl_tlsdesc_return (tlsdesc *) ; |
| 70 | */ |
| 71 | .hidden _dl_tlsdesc_return |
| 72 | .global _dl_tlsdesc_return |
| 73 | .type _dl_tlsdesc_return,%function |
| 74 | cfi_startproc |
| 75 | .align 2 |
| 76 | _dl_tlsdesc_return: |
| 77 | ldr x0, [x0, #8] |
| 78 | RET |
| 79 | cfi_endproc |
| 80 | .size _dl_tlsdesc_return, .-_dl_tlsdesc_return |
| 81 | |
| 82 | /* Same as _dl_tlsdesc_return but with synchronization for |
| 83 | lazy relocation. |
| 84 | Prototype: |
| 85 | _dl_tlsdesc_return_lazy (tlsdesc *) ; |
| 86 | */ |
| 87 | .hidden _dl_tlsdesc_return_lazy |
| 88 | .global _dl_tlsdesc_return_lazy |
| 89 | .type _dl_tlsdesc_return_lazy,%function |
| 90 | cfi_startproc |
| 91 | .align 2 |
| 92 | _dl_tlsdesc_return_lazy: |
| 93 | /* The ldar here happens after the load from [x0] at the call site |
| 94 | (that is generated by the compiler as part of the TLS access ABI), |
| 95 | so it reads the same value (this function is the final value of |
| 96 | td->entry) and thus it synchronizes with the release store to |
| 97 | td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load |
| 98 | from [x0,#8] here happens after the initialization of td->arg. */ |
| 99 | ldar xzr, [x0] |
| 100 | ldr x0, [x0, #8] |
| 101 | RET |
| 102 | cfi_endproc |
| 103 | .size _dl_tlsdesc_return_lazy, .-_dl_tlsdesc_return_lazy |
| 104 | |
| 105 | /* Handler for undefined weak TLS symbols. |
| 106 | Prototype: |
| 107 | _dl_tlsdesc_undefweak (tlsdesc *); |
| 108 | |
| 109 | The second word of the descriptor contains the addend. |
| 110 | Return the addend minus the thread pointer. This ensures |
| 111 | that when the caller adds on the thread pointer it gets back |
| 112 | the addend. */ |
| 113 | |
| 114 | .hidden _dl_tlsdesc_undefweak |
| 115 | .global _dl_tlsdesc_undefweak |
| 116 | .type _dl_tlsdesc_undefweak,%function |
| 117 | cfi_startproc |
| 118 | .align 2 |
| 119 | _dl_tlsdesc_undefweak: |
| 120 | str x1, [sp, #-16]! |
| 121 | cfi_adjust_cfa_offset (16) |
| 122 | /* The ldar here happens after the load from [x0] at the call site |
| 123 | (that is generated by the compiler as part of the TLS access ABI), |
| 124 | so it reads the same value (this function is the final value of |
| 125 | td->entry) and thus it synchronizes with the release store to |
| 126 | td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load |
| 127 | from [x0,#8] here happens after the initialization of td->arg. */ |
| 128 | ldar xzr, [x0] |
| 129 | ldr x0, [x0, #8] |
| 130 | mrs x1, tpidr_el0 |
| 131 | sub x0, x0, x1 |
| 132 | ldr x1, [sp], #16 |
| 133 | cfi_adjust_cfa_offset (-16) |
| 134 | RET |
| 135 | cfi_endproc |
| 136 | .size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak |
| 137 | |
| 138 | #ifdef SHARED |
| 139 | /* Handler for dynamic TLS symbols. |
| 140 | Prototype: |
| 141 | _dl_tlsdesc_dynamic (tlsdesc *) ; |
| 142 | |
| 143 | The second word of the descriptor points to a |
| 144 | tlsdesc_dynamic_arg structure. |
| 145 | |
| 146 | Returns the offset between the thread pointer and the |
| 147 | object referenced by the argument. |
| 148 | |
| 149 | ptrdiff_t |
| 150 | __attribute__ ((__regparm__ (1))) |
| 151 | _dl_tlsdesc_dynamic (struct tlsdesc *tdp) |
| 152 | { |
| 153 | struct tlsdesc_dynamic_arg *td = tdp->arg; |
| 154 | dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + DTV_OFFSET); |
| 155 | if (__builtin_expect (td->gen_count <= dtv[0].counter |
| 156 | && (dtv[td->tlsinfo.ti_module].pointer.val |
| 157 | != TLS_DTV_UNALLOCATED), |
| 158 | 1)) |
| 159 | return dtv[td->tlsinfo.ti_module].pointer.val |
| 160 | + td->tlsinfo.ti_offset |
| 161 | - __thread_pointer; |
| 162 | |
| 163 | return ___tls_get_addr (&td->tlsinfo) - __thread_pointer; |
| 164 | } |
| 165 | */ |
| 166 | |
| 167 | .hidden _dl_tlsdesc_dynamic |
| 168 | .global _dl_tlsdesc_dynamic |
| 169 | .type _dl_tlsdesc_dynamic,%function |
| 170 | cfi_startproc |
| 171 | .align 2 |
| 172 | _dl_tlsdesc_dynamic: |
| 173 | # define NSAVEXREGPAIRS 2 |
| 174 | stp x29, x30, [sp,#-(32+16*NSAVEXREGPAIRS)]! |
| 175 | cfi_adjust_cfa_offset (32+16*NSAVEXREGPAIRS) |
| 176 | mov x29, sp |
| 177 | |
| 178 | /* Save just enough registers to support fast path, if we fall |
| 179 | into slow path we will save additional registers. */ |
| 180 | |
| 181 | stp x1, x2, [sp, #32+16*0] |
| 182 | stp x3, x4, [sp, #32+16*1] |
| 183 | |
| 184 | mrs x4, tpidr_el0 |
| 185 | /* The ldar here happens after the load from [x0] at the call site |
| 186 | (that is generated by the compiler as part of the TLS access ABI), |
| 187 | so it reads the same value (this function is the final value of |
| 188 | td->entry) and thus it synchronizes with the release store to |
| 189 | td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load |
| 190 | from [x0,#8] here happens after the initialization of td->arg. */ |
| 191 | ldar xzr, [x0] |
| 192 | ldr x1, [x0,#8] |
| 193 | ldr x0, [x4] |
| 194 | ldr x3, [x1,#16] |
| 195 | ldr x2, [x0] |
| 196 | cmp x3, x2 |
| 197 | b.hi 2f |
| 198 | ldr x2, [x1] |
| 199 | add x0, x0, x2, lsl #4 |
| 200 | ldr x0, [x0] |
| 201 | cmn x0, #0x1 |
| 202 | b.eq 2f |
| 203 | ldr x1, [x1,#8] |
| 204 | add x0, x0, x1 |
| 205 | sub x0, x0, x4 |
| 206 | 1: |
| 207 | ldp x1, x2, [sp, #32+16*0] |
| 208 | ldp x3, x4, [sp, #32+16*1] |
| 209 | |
| 210 | ldp x29, x30, [sp], #(32+16*NSAVEXREGPAIRS) |
| 211 | cfi_adjust_cfa_offset (-32-16*NSAVEXREGPAIRS) |
| 212 | # undef NSAVEXREGPAIRS |
| 213 | RET |
| 214 | 2: |
| 215 | /* This is the slow path. We need to call __tls_get_addr() which |
| 216 | means we need to save and restore all the register that the |
| 217 | callee will trash. */ |
| 218 | |
| 219 | /* Save the remaining registers that we must treat as caller save. */ |
| 220 | # define NSAVEXREGPAIRS 7 |
| 221 | stp x5, x6, [sp, #-16*NSAVEXREGPAIRS]! |
| 222 | cfi_adjust_cfa_offset (16*NSAVEXREGPAIRS) |
| 223 | stp x7, x8, [sp, #16*1] |
| 224 | stp x9, x10, [sp, #16*2] |
| 225 | stp x11, x12, [sp, #16*3] |
| 226 | stp x13, x14, [sp, #16*4] |
| 227 | stp x15, x16, [sp, #16*5] |
| 228 | stp x17, x18, [sp, #16*6] |
| 229 | |
| 230 | SAVE_Q_REGISTERS |
| 231 | |
| 232 | mov x0, x1 |
| 233 | bl __tls_get_addr |
| 234 | |
| 235 | mrs x1, tpidr_el0 |
| 236 | sub x0, x0, x1 |
| 237 | |
| 238 | RESTORE_Q_REGISTERS |
| 239 | |
| 240 | ldp x7, x8, [sp, #16*1] |
| 241 | ldp x9, x10, [sp, #16*2] |
| 242 | ldp x11, x12, [sp, #16*3] |
| 243 | ldp x13, x14, [sp, #16*4] |
| 244 | ldp x15, x16, [sp, #16*5] |
| 245 | ldp x17, x18, [sp, #16*6] |
| 246 | ldp x5, x6, [sp], #16*NSAVEXREGPAIRS |
| 247 | cfi_adjust_cfa_offset (-16*NSAVEXREGPAIRS) |
| 248 | b 1b |
| 249 | cfi_endproc |
| 250 | .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic |
| 251 | # undef NSAVEXREGPAIRS |
| 252 | #endif |
| 253 | |
| 254 | /* This function is a wrapper for a lazy resolver for TLS_DESC |
| 255 | RELA relocations. |
| 256 | When the actual resolver returns, it will have adjusted the |
| 257 | TLS descriptor such that we can tail-call it for it to return |
| 258 | the TP offset of the symbol. */ |
| 259 | |
| 260 | .hidden _dl_tlsdesc_resolve_rela |
| 261 | .global _dl_tlsdesc_resolve_rela |
| 262 | .type _dl_tlsdesc_resolve_rela,%function |
| 263 | cfi_startproc |
| 264 | .align 2 |
| 265 | _dl_tlsdesc_resolve_rela: |
| 266 | #define NSAVEXREGPAIRS 9 |
| 267 | stp x29, x30, [sp, #-(32+16*NSAVEXREGPAIRS)]! |
| 268 | cfi_adjust_cfa_offset (32+16*NSAVEXREGPAIRS) |
| 269 | mov x29, sp |
| 270 | stp x1, x4, [sp, #32+16*0] |
| 271 | stp x5, x6, [sp, #32+16*1] |
| 272 | stp x7, x8, [sp, #32+16*2] |
| 273 | stp x9, x10, [sp, #32+16*3] |
| 274 | stp x11, x12, [sp, #32+16*4] |
| 275 | stp x13, x14, [sp, #32+16*5] |
| 276 | stp x15, x16, [sp, #32+16*6] |
| 277 | stp x17, x18, [sp, #32+16*7] |
| 278 | str x0, [sp, #32+16*8] |
| 279 | |
| 280 | SAVE_Q_REGISTERS |
| 281 | |
| 282 | ldr x1, [x3, #8] |
| 283 | bl _dl_tlsdesc_resolve_rela_fixup |
| 284 | |
| 285 | RESTORE_Q_REGISTERS |
| 286 | |
| 287 | ldr x0, [sp, #32+16*8] |
| 288 | ldr x1, [x0] |
| 289 | blr x1 |
| 290 | |
| 291 | ldp x1, x4, [sp, #32+16*0] |
| 292 | ldp x5, x6, [sp, #32+16*1] |
| 293 | ldp x7, x8, [sp, #32+16*2] |
| 294 | ldp x9, x10, [sp, #32+16*3] |
| 295 | ldp x11, x12, [sp, #32+16*4] |
| 296 | ldp x13, x14, [sp, #32+16*5] |
| 297 | ldp x15, x16, [sp, #32+16*6] |
| 298 | ldp x17, x18, [sp, #32+16*7] |
| 299 | ldp x29, x30, [sp], #(32+16*NSAVEXREGPAIRS) |
| 300 | cfi_adjust_cfa_offset (-32-16*NSAVEXREGPAIRS) |
| 301 | ldp x2, x3, [sp], #16 |
| 302 | cfi_adjust_cfa_offset (-16) |
| 303 | RET |
| 304 | #undef NSAVEXREGPAIRS |
| 305 | cfi_endproc |
| 306 | .size _dl_tlsdesc_resolve_rela, .-_dl_tlsdesc_resolve_rela |
| 307 | |
| 308 | /* This function is a placeholder for lazy resolving of TLS |
| 309 | relocations. Once some thread starts resolving a TLS |
| 310 | relocation, it sets up the TLS descriptor to use this |
| 311 | resolver, such that other threads that would attempt to |
| 312 | resolve it concurrently may skip the call to the original lazy |
| 313 | resolver and go straight to a condition wait. |
| 314 | |
| 315 | When the actual resolver returns, it will have adjusted the |
| 316 | TLS descriptor such that we can tail-call it for it to return |
| 317 | the TP offset of the symbol. */ |
| 318 | |
| 319 | .hidden _dl_tlsdesc_resolve_hold |
| 320 | .global _dl_tlsdesc_resolve_hold |
| 321 | .type _dl_tlsdesc_resolve_hold,%function |
| 322 | cfi_startproc |
| 323 | .align 2 |
| 324 | _dl_tlsdesc_resolve_hold: |
| 325 | #define NSAVEXREGPAIRS 10 |
| 326 | 1: |
| 327 | stp x29, x30, [sp, #-(32+16*NSAVEXREGPAIRS)]! |
| 328 | cfi_adjust_cfa_offset (32+16*NSAVEXREGPAIRS) |
| 329 | mov x29, sp |
| 330 | stp x1, x2, [sp, #32+16*0] |
| 331 | stp x3, x4, [sp, #32+16*1] |
| 332 | stp x5, x6, [sp, #32+16*2] |
| 333 | stp x7, x8, [sp, #32+16*3] |
| 334 | stp x9, x10, [sp, #32+16*4] |
| 335 | stp x11, x12, [sp, #32+16*5] |
| 336 | stp x13, x14, [sp, #32+16*6] |
| 337 | stp x15, x16, [sp, #32+16*7] |
| 338 | stp x17, x18, [sp, #32+16*8] |
| 339 | str x0, [sp, #32+16*9] |
| 340 | |
| 341 | SAVE_Q_REGISTERS |
| 342 | |
| 343 | adr x1, 1b |
| 344 | bl _dl_tlsdesc_resolve_hold_fixup |
| 345 | |
| 346 | RESTORE_Q_REGISTERS |
| 347 | |
| 348 | ldr x0, [sp, #32+16*9] |
| 349 | ldr x1, [x0] |
| 350 | blr x1 |
| 351 | |
| 352 | ldp x1, x2, [sp, #32+16*0] |
| 353 | ldp x3, x4, [sp, #32+16*1] |
| 354 | ldp x5, x6, [sp, #32+16*2] |
| 355 | ldp x7, x8, [sp, #32+16*3] |
| 356 | ldp x9, x10, [sp, #32+16*4] |
| 357 | ldp x11, x12, [sp, #32+16*5] |
| 358 | ldp x13, x14, [sp, #32+16*6] |
| 359 | ldp x15, x16, [sp, #32+16*7] |
| 360 | ldp x17, x18, [sp, #32+16*8] |
| 361 | ldp x29, x30, [sp], #(32+16*NSAVEXREGPAIRS) |
| 362 | cfi_adjust_cfa_offset (-32-16*NSAVEXREGPAIRS) |
| 363 | RET |
| 364 | cfi_endproc |
| 365 | .size _dl_tlsdesc_resolve_hold, .-_dl_tlsdesc_resolve_hold |
| 366 | #undef NSAVEXREGPAIRS |