lh | 9ed821d | 2023-04-07 01:36:19 -0700 | [diff] [blame] | 1 | /* PLT trampolines. ia64 version. |
| 2 | Copyright (C) 2005-2015 Free Software Foundation, Inc. |
| 3 | This file is part of the GNU C Library. |
| 4 | |
| 5 | The GNU C Library is free software; you can redistribute it and/or |
| 6 | modify it under the terms of the GNU Lesser General Public |
| 7 | License as published by the Free Software Foundation; either |
| 8 | version 2.1 of the License, or (at your option) any later version. |
| 9 | |
| 10 | The GNU C Library is distributed in the hope that it will be useful, |
| 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 13 | Lesser General Public License for more details. |
| 14 | |
| 15 | You should have received a copy of the GNU Lesser General Public |
| 16 | License along with the GNU C Library; if not, see |
| 17 | <http://www.gnu.org/licenses/>. */ |
| 18 | |
| 19 | #include <sysdep.h> |
| 20 | #undef ret |
| 21 | |
| 22 | /* |
| 23 | This code is used in dl-runtime.c to call the `_dl_fixup' function |
| 24 | and then redirect to the address it returns. `_dl_fixup()' takes two |
| 25 | arguments, however _dl_profile_fixup() takes five. |
| 26 | |
| 27 | The ABI specifies that we will never see more than 8 input |
| 28 | registers to a function call, thus it is safe to simply allocate |
| 29 | those, and simpler than playing stack games. */ |
| 30 | |
| 31 | /* Used to save and restore 8 incoming fp registers */ |
| 32 | #define RESOLVE_FRAME_SIZE (16*8) |
| 33 | |
| 34 | ENTRY(_dl_runtime_resolve) |
| 35 | { .mmi |
| 36 | .prologue |
| 37 | .save ar.pfs, r40 |
| 38 | alloc loc0 = ar.pfs, 8, 6, 2, 0 |
| 39 | /* Use the 16 byte scratch area. r2 will start at f8 and |
| 40 | r3 will start at f9. */ |
| 41 | adds r2 = -(RESOLVE_FRAME_SIZE - 16), r12 |
| 42 | adds r3 = -(RESOLVE_FRAME_SIZE - 32), r12 |
| 43 | } |
| 44 | { .mii |
| 45 | .fframe RESOLVE_FRAME_SIZE |
| 46 | adds r12 = -RESOLVE_FRAME_SIZE, r12 |
| 47 | .save rp, loc1 |
| 48 | mov loc1 = b0 |
| 49 | .body |
| 50 | mov loc2 = r8 /* preserve struct value register */ |
| 51 | ;; |
| 52 | } |
| 53 | { .mii |
| 54 | mov loc3 = r9 /* preserve language specific register */ |
| 55 | mov loc4 = r10 /* preserve language specific register */ |
| 56 | mov loc5 = r11 /* preserve language specific register */ |
| 57 | } |
| 58 | { .mmi |
| 59 | stf.spill [r2] = f8, 32 |
| 60 | stf.spill [r3] = f9, 32 |
| 61 | mov out0 = r16 |
| 62 | ;; |
| 63 | } |
| 64 | { .mmi |
| 65 | stf.spill [r2] = f10, 32 |
| 66 | stf.spill [r3] = f11, 32 |
| 67 | shl out1 = r15, 4 |
| 68 | ;; |
| 69 | } |
| 70 | { .mmi |
| 71 | stf.spill [r2] = f12, 32 |
| 72 | stf.spill [r3] = f13, 32 |
| 73 | /* Relocation record is 24 byte. */ |
| 74 | shladd out1 = r15, 3, out1 |
| 75 | ;; |
| 76 | } |
| 77 | { .mmb |
| 78 | stf.spill [r2] = f14 |
| 79 | stf.spill [r3] = f15 |
| 80 | br.call.sptk.many b0 = _dl_fixup |
| 81 | } |
| 82 | { .mii |
| 83 | /* Skip the 16byte scratch area. */ |
| 84 | adds r2 = 16, r12 |
| 85 | adds r3 = 32, r12 |
| 86 | mov b6 = ret0 |
| 87 | ;; |
| 88 | } |
| 89 | { .mmi |
| 90 | ldf.fill f8 = [r2], 32 |
| 91 | ldf.fill f9 = [r3], 32 |
| 92 | mov b0 = loc1 |
| 93 | ;; |
| 94 | } |
| 95 | { .mmi |
| 96 | ldf.fill f10 = [r2], 32 |
| 97 | ldf.fill f11 = [r3], 32 |
| 98 | mov gp = ret1 |
| 99 | ;; |
| 100 | } |
| 101 | { .mmi |
| 102 | ldf.fill f12 = [r2], 32 |
| 103 | ldf.fill f13 = [r3], 32 |
| 104 | mov ar.pfs = loc0 |
| 105 | ;; |
| 106 | } |
| 107 | { .mmi |
| 108 | ldf.fill f14 = [r2], 32 |
| 109 | ldf.fill f15 = [r3], 32 |
| 110 | .restore sp /* pop the unwind frame state */ |
| 111 | adds r12 = RESOLVE_FRAME_SIZE, r12 |
| 112 | ;; |
| 113 | } |
| 114 | { .mii |
| 115 | mov r9 = loc3 /* restore language specific register */ |
| 116 | mov r10 = loc4 /* restore language specific register */ |
| 117 | mov r11 = loc5 /* restore language specific register */ |
| 118 | } |
| 119 | { .mii |
| 120 | mov r8 = loc2 /* restore struct value register */ |
| 121 | ;; |
| 122 | } |
| 123 | /* An alloc is needed for the break system call to work. |
| 124 | We don't care about the old value of the pfs register. */ |
| 125 | { .mmb |
| 126 | .prologue |
| 127 | .body |
| 128 | alloc r2 = ar.pfs, 0, 0, 8, 0 |
| 129 | br.sptk.many b6 |
| 130 | ;; |
| 131 | } |
| 132 | END(_dl_runtime_resolve) |
| 133 | |
| 134 | |
| 135 | /* The fourth argument to _dl_profile_fixup and the third one to |
| 136 | _dl_call_pltexit are a pointer to La_ia64_regs: |
| 137 | |
| 138 | 8byte r8 |
| 139 | 8byte r9 |
| 140 | 8byte r10 |
| 141 | 8byte r11 |
| 142 | 8byte in0 |
| 143 | 8byte in1 |
| 144 | 8byte in2 |
| 145 | 8byte in3 |
| 146 | 8byte in4 |
| 147 | 8byte in5 |
| 148 | 8byte in6 |
| 149 | 8byte in7 |
| 150 | 16byte f8 |
| 151 | 16byte f9 |
| 152 | 16byte f10 |
| 153 | 16byte f11 |
| 154 | 16byte f12 |
| 155 | 16byte f13 |
| 156 | 16byte f14 |
| 157 | 16byte f15 |
| 158 | 8byte ar.unat |
| 159 | 8byte sp |
| 160 | |
| 161 | The fifth argument to _dl_profile_fixup is a pointer to long int. |
| 162 | The fourth argument to _dl_call_pltexit is a pointer to |
| 163 | La_ia64_retval: |
| 164 | |
| 165 | 8byte r8 |
| 166 | 8byte r9 |
| 167 | 8byte r10 |
| 168 | 8byte r11 |
| 169 | 16byte f8 |
| 170 | 16byte f9 |
| 171 | 16byte f10 |
| 172 | 16byte f11 |
| 173 | 16byte f12 |
| 174 | 16byte f13 |
| 175 | 16byte f14 |
| 176 | 16byte f15 |
| 177 | |
| 178 | Since stack has to be 16 byte aligned, the stack allocation is in |
| 179 | 16byte increment. Before calling _dl_profile_fixup, the stack will |
| 180 | look like |
| 181 | |
| 182 | psp new frame_size |
| 183 | +16 La_ia64_regs |
| 184 | sp scratch |
| 185 | |
| 186 | */ |
| 187 | |
| 188 | #define PLTENTER_FRAME_SIZE (4*8 + 8*8 + 8*16 + 2*8 + 16) |
| 189 | #define PLTEXIT_FRAME_SIZE (PLTENTER_FRAME_SIZE + 4*8 + 8*16) |
| 190 | |
| 191 | #ifndef PROF |
| 192 | ENTRY(_dl_runtime_profile) |
| 193 | { .mii |
| 194 | .prologue |
| 195 | .save ar.pfs, r40 |
| 196 | alloc loc0 = ar.pfs, 8, 12, 8, 0 |
| 197 | .vframe loc10 |
| 198 | mov loc10 = r12 |
| 199 | .save rp, loc1 |
| 200 | mov loc1 = b0 |
| 201 | } |
| 202 | { .mii |
| 203 | .save ar.unat, r17 |
| 204 | mov r17 = ar.unat |
| 205 | .save ar.lc, loc6 |
| 206 | mov loc6 = ar.lc |
| 207 | mov loc11 = gp |
| 208 | } |
| 209 | { .mii |
| 210 | .body |
| 211 | /* There is a 16 byte scratch area. r2 will start at r8 and |
| 212 | r3 will start at r9 for La_ia64_regs. */ |
| 213 | adds r2 = -(PLTENTER_FRAME_SIZE - 16), r12 |
| 214 | adds r3 = -(PLTENTER_FRAME_SIZE - 24), r12 |
| 215 | adds r12 = -PLTENTER_FRAME_SIZE, r12 |
| 216 | ;; |
| 217 | } |
| 218 | { .mmi |
| 219 | st8 [r2] = r8, 16; |
| 220 | st8 [r3] = r9, 16; |
| 221 | mov out2 = b0 /* needed by _dl_fixup_profile */ |
| 222 | ;; |
| 223 | } |
| 224 | { .mmi |
| 225 | st8 [r2] = r10, 16; |
| 226 | st8 [r3] = r11, 16; |
| 227 | adds out3 = 16, r12 /* pointer to La_ia64_regs */ |
| 228 | ;; |
| 229 | } |
| 230 | { .mmi |
| 231 | .mem.offset 0, 0 |
| 232 | st8.spill [r2] = in0, 16 |
| 233 | .mem.offset 8, 0 |
| 234 | st8.spill [r3] = in1, 16 |
| 235 | mov out4 = loc10 /* pointer to new frame size */ |
| 236 | ;; |
| 237 | } |
| 238 | { .mmi |
| 239 | .mem.offset 0, 0 |
| 240 | st8.spill [r2] = in2, 16 |
| 241 | .mem.offset 8, 0 |
| 242 | st8.spill [r3] = in3, 16 |
| 243 | mov loc2 = r8 /* preserve struct value register */ |
| 244 | ;; |
| 245 | } |
| 246 | { .mmi |
| 247 | .mem.offset 0, 0 |
| 248 | st8.spill [r2] = in4, 16 |
| 249 | .mem.offset 8, 0 |
| 250 | st8.spill [r3] = in5, 16 |
| 251 | mov loc3 = r9 /* preserve language specific register */ |
| 252 | ;; |
| 253 | } |
| 254 | { .mmi |
| 255 | .mem.offset 0, 0 |
| 256 | st8 [r2] = in6, 16 |
| 257 | .mem.offset 8, 0 |
| 258 | st8 [r3] = in7, 24 /* adjust for f9 */ |
| 259 | mov loc4 = r10 /* preserve language specific register */ |
| 260 | ;; |
| 261 | } |
| 262 | { .mii |
| 263 | mov r18 = ar.unat /* save it in La_ia64_regs */ |
| 264 | mov loc7 = out3 /* save it for _dl_call_pltexit */ |
| 265 | mov loc5 = r11 /* preserve language specific register */ |
| 266 | } |
| 267 | { .mmi |
| 268 | stf.spill [r2] = f8, 32 |
| 269 | stf.spill [r3] = f9, 32 |
| 270 | mov out0 = r16 /* needed by _dl_fixup_profile */ |
| 271 | ;; |
| 272 | } |
| 273 | { .mii |
| 274 | mov ar.unat = r17 /* restore it for function call */ |
| 275 | mov loc8 = r16 /* save it for _dl_call_pltexit */ |
| 276 | nop.i 0x0 |
| 277 | } |
| 278 | { .mmi |
| 279 | stf.spill [r2] = f10, 32 |
| 280 | stf.spill [r3] = f11, 32 |
| 281 | shl out1 = r15, 4 |
| 282 | ;; |
| 283 | } |
| 284 | { .mmi |
| 285 | stf.spill [r2] = f12, 32 |
| 286 | stf.spill [r3] = f13, 32 |
| 287 | /* Relocation record is 24 byte. */ |
| 288 | shladd out1 = r15, 3, out1 |
| 289 | ;; |
| 290 | } |
| 291 | { .mmi |
| 292 | stf.spill [r2] = f14, 32 |
| 293 | stf.spill [r3] = f15, 24 |
| 294 | mov loc9 = out1 /* save it for _dl_call_pltexit */ |
| 295 | ;; |
| 296 | } |
| 297 | { .mmb |
| 298 | st8 [r2] = r18 /* store ar.unat */ |
| 299 | st8 [r3] = loc10 /* store sp */ |
| 300 | br.call.sptk.many b0 = _dl_profile_fixup |
| 301 | } |
| 302 | { .mii |
| 303 | /* Skip the 16byte scratch area, 4 language specific GRs and |
| 304 | 8 incoming GRs to restore incoming fp registers. */ |
| 305 | adds r2 = (4*8 + 8*8 + 16), r12 |
| 306 | adds r3 = (4*8 + 8*8 + 32), r12 |
| 307 | mov b6 = ret0 |
| 308 | ;; |
| 309 | } |
| 310 | { .mmi |
| 311 | ldf.fill f8 = [r2], 32 |
| 312 | ldf.fill f9 = [r3], 32 |
| 313 | mov gp = ret1 |
| 314 | ;; |
| 315 | } |
| 316 | { .mmi |
| 317 | ldf.fill f10 = [r2], 32 |
| 318 | ldf.fill f11 = [r3], 32 |
| 319 | mov r8 = loc2 /* restore struct value register */ |
| 320 | ;; |
| 321 | } |
| 322 | { .mmi |
| 323 | ldf.fill f12 = [r2], 32 |
| 324 | ldf.fill f13 = [r3], 32 |
| 325 | mov r9 = loc3 /* restore language specific register */ |
| 326 | ;; |
| 327 | } |
| 328 | { .mmi |
| 329 | ldf.fill f14 = [r2], 32 |
| 330 | ldf.fill f15 = [r3], 32 |
| 331 | mov r10 = loc4 /* restore language specific register */ |
| 332 | ;; |
| 333 | } |
| 334 | { .mii |
| 335 | ld8 r15 = [loc10] /* load the new frame size */ |
| 336 | mov r11 = loc5 /* restore language specific register */ |
| 337 | ;; |
| 338 | cmp.eq p6, p7 = -1, r15 |
| 339 | ;; |
| 340 | } |
| 341 | { .mii |
| 342 | (p7) cmp.eq p8, p9 = 0, r15 |
| 343 | (p6) mov b0 = loc1 |
| 344 | (p6) mov ar.lc = loc6 |
| 345 | } |
| 346 | { .mib |
| 347 | nop.m 0x0 |
| 348 | (p6) mov ar.pfs = loc0 |
| 349 | (p6) br.cond.dptk.many .Lresolved |
| 350 | ;; |
| 351 | } |
| 352 | |
| 353 | /* At this point, the stack looks like |
| 354 | |
| 355 | +psp free |
| 356 | +16 La_ia64_regs |
| 357 | sp scratch |
| 358 | |
| 359 | We need to keep the current stack and call the resolved |
| 360 | function by copying the r15 byte from sp + PLTENTER_FRAME_SIZE |
| 361 | + 16 (scratch area) to sp + 16 (scratch area). Since stack |
| 362 | has to be 16byte aligned, we around r15 up to 16byte. */ |
| 363 | |
| 364 | { .mbb |
| 365 | (p9) adds r15 = 15, r15 |
| 366 | (p8) br.cond.dptk.many .Lno_new_frame |
| 367 | nop.b 0x0 |
| 368 | ;; |
| 369 | } |
| 370 | { .mmi |
| 371 | and r15 = -16, r15 |
| 372 | ;; |
| 373 | /* We don't copy the 16byte scatch area. Prepare r16/r17 as |
| 374 | destination. */ |
| 375 | sub r16 = r12, r15 |
| 376 | sub r17 = r12, r15 |
| 377 | ;; |
| 378 | } |
| 379 | { .mii |
| 380 | adds r16 = 16, r16 |
| 381 | adds r17 = 24, r17 |
| 382 | sub r12 = r12, r15 /* Adjust stack */ |
| 383 | ;; |
| 384 | } |
| 385 | { .mii |
| 386 | nop.m 0x0 |
| 387 | shr r15 = r15, 4 |
| 388 | ;; |
| 389 | adds r15 = -1, r15 |
| 390 | ;; |
| 391 | } |
| 392 | { .mii |
| 393 | /* Skip the 16byte scatch area. Prepare r2/r3 as source. */ |
| 394 | adds r2 = 16, loc10 |
| 395 | adds r3 = 24, loc10 |
| 396 | mov ar.lc = r15 |
| 397 | ;; |
| 398 | } |
| 399 | .Lcopy: |
| 400 | { .mmi |
| 401 | ld8 r18 = [r2], 16 |
| 402 | ld8 r19 = [r3], 16 |
| 403 | nop.i 0x0 |
| 404 | ;; |
| 405 | } |
| 406 | { .mmb |
| 407 | st8 [r16] = r18, 16 |
| 408 | st8 [r17] = r19, 16 |
| 409 | br.cloop.sptk.few .Lcopy |
| 410 | } |
| 411 | .Lno_new_frame: |
| 412 | { .mii |
| 413 | mov out0 = in0 |
| 414 | mov out1 = in1 |
| 415 | mov out2 = in2 |
| 416 | } |
| 417 | { .mii |
| 418 | mov out3 = in3 |
| 419 | mov out4 = in4 |
| 420 | mov out5 = in5 |
| 421 | } |
| 422 | { .mib |
| 423 | mov out6 = in6 |
| 424 | mov out7 = in7 |
| 425 | /* Call the resolved function */ |
| 426 | br.call.sptk.many b0 = b6 |
| 427 | } |
| 428 | { .mii |
| 429 | /* Prepare stack for _dl_call_pltexit. Loc10 has the original |
| 430 | stack pointer. */ |
| 431 | adds r12 = -PLTEXIT_FRAME_SIZE, loc10 |
| 432 | adds r2 = -(PLTEXIT_FRAME_SIZE - 16), loc10 |
| 433 | adds r3 = -(PLTEXIT_FRAME_SIZE - 24), loc10 |
| 434 | ;; |
| 435 | } |
| 436 | { .mmi |
| 437 | /* Load all possible return values into buffer. */ |
| 438 | st8 [r2] = r8, 16 |
| 439 | st8 [r3] = r9, 16 |
| 440 | mov out0 = loc8 |
| 441 | ;; |
| 442 | } |
| 443 | { .mmi |
| 444 | st8 [r2] = r10, 16 |
| 445 | st8 [r3] = r11, 24 |
| 446 | mov out1 = loc9 |
| 447 | ;; |
| 448 | } |
| 449 | { .mmi |
| 450 | stf.spill [r2] = f8, 32 |
| 451 | stf.spill [r3] = f9, 32 |
| 452 | mov out2 = loc7 /* Pointer to La_ia64_regs */ |
| 453 | ;; |
| 454 | } |
| 455 | { .mmi |
| 456 | stf.spill [r2] = f10, 32 |
| 457 | stf.spill [r3] = f11, 32 |
| 458 | adds out3 = 16, r12 /* Pointer to La_ia64_retval */ |
| 459 | ;; |
| 460 | } |
| 461 | { .mmi |
| 462 | stf.spill [r2] = f12, 32 |
| 463 | stf.spill [r3] = f13, 32 |
| 464 | /* We need to restore gp for _dl_call_pltexit. */ |
| 465 | mov gp = loc11 |
| 466 | ;; |
| 467 | } |
| 468 | { .mmb |
| 469 | stf.spill [r2] = f14 |
| 470 | stf.spill [r3] = f15 |
| 471 | br.call.sptk.many b0 = _dl_call_pltexit |
| 472 | } |
| 473 | { .mmi |
| 474 | /* Load all the non-floating and floating return values. Skip |
| 475 | the 16byte scratch area. */ |
| 476 | adds r2 = 16, r12 |
| 477 | adds r3 = 24, r12 |
| 478 | nop.i 0x0 |
| 479 | ;; |
| 480 | } |
| 481 | { .mmi |
| 482 | ld8 r8 = [r2], 16 |
| 483 | ld8 r9 = [r3], 16 |
| 484 | nop.i 0x0 |
| 485 | ;; |
| 486 | } |
| 487 | { .mmi |
| 488 | ld8 r10 = [r2], 16 |
| 489 | ld8 r11 = [r3], 24 |
| 490 | nop.i 0x0 |
| 491 | ;; |
| 492 | } |
| 493 | { .mmi |
| 494 | ldf.fill f8 = [r2], 32 |
| 495 | ldf.fill f9 = [r3], 32 |
| 496 | mov ar.lc = loc6 |
| 497 | ;; |
| 498 | } |
| 499 | { .mmi |
| 500 | ldf.fill f10 = [r2], 32 |
| 501 | ldf.fill f11 = [r3], 32 |
| 502 | mov ar.pfs = loc0 |
| 503 | ;; |
| 504 | } |
| 505 | { .mmi |
| 506 | ldf.fill f12 = [r2], 32 |
| 507 | ldf.fill f13 = [r3], 32 |
| 508 | mov b0 = loc1 |
| 509 | ;; |
| 510 | } |
| 511 | { .mmi |
| 512 | ldf.fill f14 = [r2] |
| 513 | ldf.fill f15 = [r3] |
| 514 | /* We know that the previous stack pointer, loc10, isn't 0. |
| 515 | We use it to reload p7. */ |
| 516 | cmp.ne p7, p0 = 0, loc10 |
| 517 | ;; |
| 518 | } |
| 519 | .Lresolved: |
| 520 | { .mmb |
| 521 | .restore sp |
| 522 | mov r12 = loc10 |
| 523 | (p7) br.ret.sptk.many b0 |
| 524 | ;; |
| 525 | } |
| 526 | /* An alloc is needed for the break system call to work. We |
| 527 | don't care about the old value of the pfs register. After |
| 528 | this alloc, we can't use any rotating registers. Otherwise |
| 529 | assembler won't be happy. This has to be at the end. */ |
| 530 | { .mmb |
| 531 | .prologue |
| 532 | .body |
| 533 | alloc r2 = ar.pfs, 0, 0, 8, 0 |
| 534 | br.sptk.many b6 |
| 535 | ;; |
| 536 | } |
| 537 | END(_dl_runtime_profile) |
| 538 | #endif |