b.liu | e958203 | 2025-04-17 19:18:16 +0800 | [diff] [blame] | 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
| 2 | /* |
| 3 | * trampoline.S: Jump start slave processors on sparc64. |
| 4 | * |
| 5 | * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) |
| 6 | */ |
| 7 | |
| 8 | |
| 9 | #include <asm/head.h> |
| 10 | #include <asm/asi.h> |
| 11 | #include <asm/lsu.h> |
| 12 | #include <asm/dcr.h> |
| 13 | #include <asm/dcu.h> |
| 14 | #include <asm/pstate.h> |
| 15 | #include <asm/page.h> |
| 16 | #include <asm/pgtable.h> |
| 17 | #include <asm/spitfire.h> |
| 18 | #include <asm/processor.h> |
| 19 | #include <asm/thread_info.h> |
| 20 | #include <asm/mmu.h> |
| 21 | #include <asm/hypervisor.h> |
| 22 | #include <asm/cpudata.h> |
| 23 | |
| 24 | .data |
| 25 | .align 8 |
| 26 | call_method: |
| 27 | .asciz "call-method" |
| 28 | .align 8 |
| 29 | itlb_load: |
| 30 | .asciz "SUNW,itlb-load" |
| 31 | .align 8 |
| 32 | dtlb_load: |
| 33 | .asciz "SUNW,dtlb-load" |
| 34 | |
| 35 | #define TRAMP_STACK_SIZE 1024 |
| 36 | .align 16 |
| 37 | tramp_stack: |
| 38 | .skip TRAMP_STACK_SIZE |
| 39 | |
| 40 | .align 8 |
| 41 | .globl sparc64_cpu_startup, sparc64_cpu_startup_end |
| 42 | sparc64_cpu_startup: |
| 43 | BRANCH_IF_SUN4V(g1, niagara_startup) |
| 44 | BRANCH_IF_CHEETAH_BASE(g1, g5, cheetah_startup) |
| 45 | BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1, g5, cheetah_plus_startup) |
| 46 | |
| 47 | ba,pt %xcc, spitfire_startup |
| 48 | nop |
| 49 | |
| 50 | cheetah_plus_startup: |
| 51 | /* Preserve OBP chosen DCU and DCR register settings. */ |
| 52 | ba,pt %xcc, cheetah_generic_startup |
| 53 | nop |
| 54 | |
| 55 | cheetah_startup: |
| 56 | mov DCR_BPE | DCR_RPE | DCR_SI | DCR_IFPOE | DCR_MS, %g1 |
| 57 | wr %g1, %asr18 |
| 58 | |
| 59 | sethi %uhi(DCU_ME|DCU_RE|DCU_HPE|DCU_SPE|DCU_SL|DCU_WE), %g5 |
| 60 | or %g5, %ulo(DCU_ME|DCU_RE|DCU_HPE|DCU_SPE|DCU_SL|DCU_WE), %g5 |
| 61 | sllx %g5, 32, %g5 |
| 62 | or %g5, DCU_DM | DCU_IM | DCU_DC | DCU_IC, %g5 |
| 63 | stxa %g5, [%g0] ASI_DCU_CONTROL_REG |
| 64 | membar #Sync |
| 65 | /* fallthru */ |
| 66 | |
| 67 | cheetah_generic_startup: |
| 68 | mov TSB_EXTENSION_P, %g3 |
| 69 | stxa %g0, [%g3] ASI_DMMU |
| 70 | stxa %g0, [%g3] ASI_IMMU |
| 71 | membar #Sync |
| 72 | |
| 73 | mov TSB_EXTENSION_S, %g3 |
| 74 | stxa %g0, [%g3] ASI_DMMU |
| 75 | membar #Sync |
| 76 | |
| 77 | mov TSB_EXTENSION_N, %g3 |
| 78 | stxa %g0, [%g3] ASI_DMMU |
| 79 | stxa %g0, [%g3] ASI_IMMU |
| 80 | membar #Sync |
| 81 | /* fallthru */ |
| 82 | |
| 83 | niagara_startup: |
| 84 | /* Disable STICK_INT interrupts. */ |
| 85 | sethi %hi(0x80000000), %g5 |
| 86 | sllx %g5, 32, %g5 |
| 87 | wr %g5, %asr25 |
| 88 | |
| 89 | ba,pt %xcc, startup_continue |
| 90 | nop |
| 91 | |
| 92 | spitfire_startup: |
| 93 | mov (LSU_CONTROL_IC | LSU_CONTROL_DC | LSU_CONTROL_IM | LSU_CONTROL_DM), %g1 |
| 94 | stxa %g1, [%g0] ASI_LSU_CONTROL |
| 95 | membar #Sync |
| 96 | |
| 97 | startup_continue: |
| 98 | mov %o0, %l0 |
| 99 | BRANCH_IF_SUN4V(g1, niagara_lock_tlb) |
| 100 | |
| 101 | sethi %hi(0x80000000), %g2 |
| 102 | sllx %g2, 32, %g2 |
| 103 | wr %g2, 0, %tick_cmpr |
| 104 | |
| 105 | /* Call OBP by hand to lock KERNBASE into i/d tlbs. |
| 106 | * We lock 'num_kernel_image_mappings' consequetive entries. |
| 107 | */ |
| 108 | sethi %hi(prom_entry_lock), %g2 |
| 109 | 1: ldstub [%g2 + %lo(prom_entry_lock)], %g1 |
| 110 | brnz,pn %g1, 1b |
| 111 | nop |
| 112 | |
| 113 | /* Get onto temporary stack which will be in the locked |
| 114 | * kernel image. |
| 115 | */ |
| 116 | sethi %hi(tramp_stack), %g1 |
| 117 | or %g1, %lo(tramp_stack), %g1 |
| 118 | add %g1, TRAMP_STACK_SIZE, %g1 |
| 119 | sub %g1, STACKFRAME_SZ + STACK_BIAS + 256, %sp |
| 120 | flushw |
| 121 | |
| 122 | /* Setup the loop variables: |
| 123 | * %l3: VADDR base |
| 124 | * %l4: TTE base |
| 125 | * %l5: Loop iterator, iterates from 0 to 'num_kernel_image_mappings' |
| 126 | * %l6: Number of TTE entries to map |
| 127 | * %l7: Highest TTE entry number, we count down |
| 128 | */ |
| 129 | sethi %hi(KERNBASE), %l3 |
| 130 | sethi %hi(kern_locked_tte_data), %l4 |
| 131 | ldx [%l4 + %lo(kern_locked_tte_data)], %l4 |
| 132 | clr %l5 |
| 133 | sethi %hi(num_kernel_image_mappings), %l6 |
| 134 | lduw [%l6 + %lo(num_kernel_image_mappings)], %l6 |
| 135 | |
| 136 | mov 15, %l7 |
| 137 | BRANCH_IF_ANY_CHEETAH(g1,g5,2f) |
| 138 | |
| 139 | mov 63, %l7 |
| 140 | 2: |
| 141 | |
| 142 | 3: |
| 143 | /* Lock into I-MMU */ |
| 144 | sethi %hi(call_method), %g2 |
| 145 | or %g2, %lo(call_method), %g2 |
| 146 | stx %g2, [%sp + 2047 + 128 + 0x00] |
| 147 | mov 5, %g2 |
| 148 | stx %g2, [%sp + 2047 + 128 + 0x08] |
| 149 | mov 1, %g2 |
| 150 | stx %g2, [%sp + 2047 + 128 + 0x10] |
| 151 | sethi %hi(itlb_load), %g2 |
| 152 | or %g2, %lo(itlb_load), %g2 |
| 153 | stx %g2, [%sp + 2047 + 128 + 0x18] |
| 154 | sethi %hi(prom_mmu_ihandle_cache), %g2 |
| 155 | lduw [%g2 + %lo(prom_mmu_ihandle_cache)], %g2 |
| 156 | stx %g2, [%sp + 2047 + 128 + 0x20] |
| 157 | |
| 158 | /* Each TTE maps 4MB, convert index to offset. */ |
| 159 | sllx %l5, 22, %g1 |
| 160 | |
| 161 | add %l3, %g1, %g2 |
| 162 | stx %g2, [%sp + 2047 + 128 + 0x28] ! VADDR |
| 163 | add %l4, %g1, %g2 |
| 164 | stx %g2, [%sp + 2047 + 128 + 0x30] ! TTE |
| 165 | |
| 166 | /* TTE index is highest minus loop index. */ |
| 167 | sub %l7, %l5, %g2 |
| 168 | stx %g2, [%sp + 2047 + 128 + 0x38] |
| 169 | |
| 170 | sethi %hi(p1275buf), %g2 |
| 171 | or %g2, %lo(p1275buf), %g2 |
| 172 | ldx [%g2 + 0x08], %o1 |
| 173 | call %o1 |
| 174 | add %sp, (2047 + 128), %o0 |
| 175 | |
| 176 | /* Lock into D-MMU */ |
| 177 | sethi %hi(call_method), %g2 |
| 178 | or %g2, %lo(call_method), %g2 |
| 179 | stx %g2, [%sp + 2047 + 128 + 0x00] |
| 180 | mov 5, %g2 |
| 181 | stx %g2, [%sp + 2047 + 128 + 0x08] |
| 182 | mov 1, %g2 |
| 183 | stx %g2, [%sp + 2047 + 128 + 0x10] |
| 184 | sethi %hi(dtlb_load), %g2 |
| 185 | or %g2, %lo(dtlb_load), %g2 |
| 186 | stx %g2, [%sp + 2047 + 128 + 0x18] |
| 187 | sethi %hi(prom_mmu_ihandle_cache), %g2 |
| 188 | lduw [%g2 + %lo(prom_mmu_ihandle_cache)], %g2 |
| 189 | stx %g2, [%sp + 2047 + 128 + 0x20] |
| 190 | |
| 191 | /* Each TTE maps 4MB, convert index to offset. */ |
| 192 | sllx %l5, 22, %g1 |
| 193 | |
| 194 | add %l3, %g1, %g2 |
| 195 | stx %g2, [%sp + 2047 + 128 + 0x28] ! VADDR |
| 196 | add %l4, %g1, %g2 |
| 197 | stx %g2, [%sp + 2047 + 128 + 0x30] ! TTE |
| 198 | |
| 199 | /* TTE index is highest minus loop index. */ |
| 200 | sub %l7, %l5, %g2 |
| 201 | stx %g2, [%sp + 2047 + 128 + 0x38] |
| 202 | |
| 203 | sethi %hi(p1275buf), %g2 |
| 204 | or %g2, %lo(p1275buf), %g2 |
| 205 | ldx [%g2 + 0x08], %o1 |
| 206 | call %o1 |
| 207 | add %sp, (2047 + 128), %o0 |
| 208 | |
| 209 | add %l5, 1, %l5 |
| 210 | cmp %l5, %l6 |
| 211 | bne,pt %xcc, 3b |
| 212 | nop |
| 213 | |
| 214 | sethi %hi(prom_entry_lock), %g2 |
| 215 | stb %g0, [%g2 + %lo(prom_entry_lock)] |
| 216 | |
| 217 | ba,pt %xcc, after_lock_tlb |
| 218 | nop |
| 219 | |
| 220 | niagara_lock_tlb: |
| 221 | sethi %hi(KERNBASE), %l3 |
| 222 | sethi %hi(kern_locked_tte_data), %l4 |
| 223 | ldx [%l4 + %lo(kern_locked_tte_data)], %l4 |
| 224 | clr %l5 |
| 225 | sethi %hi(num_kernel_image_mappings), %l6 |
| 226 | lduw [%l6 + %lo(num_kernel_image_mappings)], %l6 |
| 227 | |
| 228 | 1: |
| 229 | mov HV_FAST_MMU_MAP_PERM_ADDR, %o5 |
| 230 | sllx %l5, 22, %g2 |
| 231 | add %l3, %g2, %o0 |
| 232 | clr %o1 |
| 233 | add %l4, %g2, %o2 |
| 234 | mov HV_MMU_IMMU, %o3 |
| 235 | ta HV_FAST_TRAP |
| 236 | |
| 237 | mov HV_FAST_MMU_MAP_PERM_ADDR, %o5 |
| 238 | sllx %l5, 22, %g2 |
| 239 | add %l3, %g2, %o0 |
| 240 | clr %o1 |
| 241 | add %l4, %g2, %o2 |
| 242 | mov HV_MMU_DMMU, %o3 |
| 243 | ta HV_FAST_TRAP |
| 244 | |
| 245 | add %l5, 1, %l5 |
| 246 | cmp %l5, %l6 |
| 247 | bne,pt %xcc, 1b |
| 248 | nop |
| 249 | |
| 250 | after_lock_tlb: |
| 251 | wrpr %g0, (PSTATE_PRIV | PSTATE_PEF), %pstate |
| 252 | wr %g0, 0, %fprs |
| 253 | |
| 254 | wr %g0, ASI_P, %asi |
| 255 | |
| 256 | mov PRIMARY_CONTEXT, %g7 |
| 257 | |
| 258 | 661: stxa %g0, [%g7] ASI_DMMU |
| 259 | .section .sun4v_1insn_patch, "ax" |
| 260 | .word 661b |
| 261 | stxa %g0, [%g7] ASI_MMU |
| 262 | .previous |
| 263 | |
| 264 | membar #Sync |
| 265 | mov SECONDARY_CONTEXT, %g7 |
| 266 | |
| 267 | 661: stxa %g0, [%g7] ASI_DMMU |
| 268 | .section .sun4v_1insn_patch, "ax" |
| 269 | .word 661b |
| 270 | stxa %g0, [%g7] ASI_MMU |
| 271 | .previous |
| 272 | |
| 273 | membar #Sync |
| 274 | |
| 275 | /* Everything we do here, until we properly take over the |
| 276 | * trap table, must be done with extreme care. We cannot |
| 277 | * make any references to %g6 (current thread pointer), |
| 278 | * %g4 (current task pointer), or %g5 (base of current cpu's |
| 279 | * per-cpu area) until we properly take over the trap table |
| 280 | * from the firmware and hypervisor. |
| 281 | * |
| 282 | * Get onto temporary stack which is in the locked kernel image. |
| 283 | */ |
| 284 | sethi %hi(tramp_stack), %g1 |
| 285 | or %g1, %lo(tramp_stack), %g1 |
| 286 | add %g1, TRAMP_STACK_SIZE, %g1 |
| 287 | sub %g1, STACKFRAME_SZ + STACK_BIAS + 256, %sp |
| 288 | mov 0, %fp |
| 289 | |
| 290 | /* Put garbage in these registers to trap any access to them. */ |
| 291 | set 0xdeadbeef, %g4 |
| 292 | set 0xdeadbeef, %g5 |
| 293 | set 0xdeadbeef, %g6 |
| 294 | |
| 295 | call init_irqwork_curcpu |
| 296 | nop |
| 297 | |
| 298 | sethi %hi(tlb_type), %g3 |
| 299 | lduw [%g3 + %lo(tlb_type)], %g2 |
| 300 | cmp %g2, 3 |
| 301 | bne,pt %icc, 1f |
| 302 | nop |
| 303 | |
| 304 | call hard_smp_processor_id |
| 305 | nop |
| 306 | |
| 307 | call sun4v_register_mondo_queues |
| 308 | nop |
| 309 | |
| 310 | 1: call init_cur_cpu_trap |
| 311 | ldx [%l0], %o0 |
| 312 | |
| 313 | /* Start using proper page size encodings in ctx register. */ |
| 314 | sethi %hi(sparc64_kern_pri_context), %g3 |
| 315 | ldx [%g3 + %lo(sparc64_kern_pri_context)], %g2 |
| 316 | mov PRIMARY_CONTEXT, %g1 |
| 317 | |
| 318 | 661: stxa %g2, [%g1] ASI_DMMU |
| 319 | .section .sun4v_1insn_patch, "ax" |
| 320 | .word 661b |
| 321 | stxa %g2, [%g1] ASI_MMU |
| 322 | .previous |
| 323 | |
| 324 | membar #Sync |
| 325 | |
| 326 | wrpr %g0, 0, %wstate |
| 327 | |
| 328 | sethi %hi(prom_entry_lock), %g2 |
| 329 | 1: ldstub [%g2 + %lo(prom_entry_lock)], %g1 |
| 330 | brnz,pn %g1, 1b |
| 331 | nop |
| 332 | |
| 333 | /* As a hack, put &init_thread_union into %g6. |
| 334 | * prom_world() loads from here to restore the %asi |
| 335 | * register. |
| 336 | */ |
| 337 | sethi %hi(init_thread_union), %g6 |
| 338 | or %g6, %lo(init_thread_union), %g6 |
| 339 | |
| 340 | sethi %hi(is_sun4v), %o0 |
| 341 | lduw [%o0 + %lo(is_sun4v)], %o0 |
| 342 | brz,pt %o0, 2f |
| 343 | nop |
| 344 | |
| 345 | TRAP_LOAD_TRAP_BLOCK(%g2, %g3) |
| 346 | add %g2, TRAP_PER_CPU_FAULT_INFO, %g2 |
| 347 | stxa %g2, [%g0] ASI_SCRATCHPAD |
| 348 | |
| 349 | /* Compute physical address: |
| 350 | * |
| 351 | * paddr = kern_base + (mmfsa_vaddr - KERNBASE) |
| 352 | */ |
| 353 | sethi %hi(KERNBASE), %g3 |
| 354 | sub %g2, %g3, %g2 |
| 355 | sethi %hi(kern_base), %g3 |
| 356 | ldx [%g3 + %lo(kern_base)], %g3 |
| 357 | add %g2, %g3, %o1 |
| 358 | sethi %hi(sparc64_ttable_tl0), %o0 |
| 359 | |
| 360 | set prom_set_trap_table_name, %g2 |
| 361 | stx %g2, [%sp + 2047 + 128 + 0x00] |
| 362 | mov 2, %g2 |
| 363 | stx %g2, [%sp + 2047 + 128 + 0x08] |
| 364 | mov 0, %g2 |
| 365 | stx %g2, [%sp + 2047 + 128 + 0x10] |
| 366 | stx %o0, [%sp + 2047 + 128 + 0x18] |
| 367 | stx %o1, [%sp + 2047 + 128 + 0x20] |
| 368 | sethi %hi(p1275buf), %g2 |
| 369 | or %g2, %lo(p1275buf), %g2 |
| 370 | ldx [%g2 + 0x08], %o1 |
| 371 | call %o1 |
| 372 | add %sp, (2047 + 128), %o0 |
| 373 | |
| 374 | ba,pt %xcc, 3f |
| 375 | nop |
| 376 | |
| 377 | 2: sethi %hi(sparc64_ttable_tl0), %o0 |
| 378 | set prom_set_trap_table_name, %g2 |
| 379 | stx %g2, [%sp + 2047 + 128 + 0x00] |
| 380 | mov 1, %g2 |
| 381 | stx %g2, [%sp + 2047 + 128 + 0x08] |
| 382 | mov 0, %g2 |
| 383 | stx %g2, [%sp + 2047 + 128 + 0x10] |
| 384 | stx %o0, [%sp + 2047 + 128 + 0x18] |
| 385 | sethi %hi(p1275buf), %g2 |
| 386 | or %g2, %lo(p1275buf), %g2 |
| 387 | ldx [%g2 + 0x08], %o1 |
| 388 | call %o1 |
| 389 | add %sp, (2047 + 128), %o0 |
| 390 | |
| 391 | 3: sethi %hi(prom_entry_lock), %g2 |
| 392 | stb %g0, [%g2 + %lo(prom_entry_lock)] |
| 393 | |
| 394 | ldx [%l0], %g6 |
| 395 | ldx [%g6 + TI_TASK], %g4 |
| 396 | |
| 397 | mov 1, %g5 |
| 398 | sllx %g5, THREAD_SHIFT, %g5 |
| 399 | sub %g5, (STACKFRAME_SZ + STACK_BIAS), %g5 |
| 400 | add %g6, %g5, %sp |
| 401 | |
| 402 | rdpr %pstate, %o1 |
| 403 | or %o1, PSTATE_IE, %o1 |
| 404 | wrpr %o1, 0, %pstate |
| 405 | |
| 406 | call smp_callin |
| 407 | nop |
| 408 | |
| 409 | call cpu_panic |
| 410 | nop |
| 411 | 1: b,a,pt %xcc, 1b |
| 412 | |
| 413 | .align 8 |
| 414 | sparc64_cpu_startup_end: |