| b.liu | e958203 | 2025-04-17 19:18:16 +0800 | [diff] [blame] | 1 | // SPDX-License-Identifier: GPL-2.0-only |
| 2 | /* |
| 3 | * Copyright (C) 2015 - ARM Ltd |
| 4 | * Author: Marc Zyngier <marc.zyngier@arm.com> |
| 5 | */ |
| 6 | |
| 7 | #include <linux/arm-smccc.h> |
| 8 | #include <linux/kvm_host.h> |
| 9 | #include <linux/types.h> |
| 10 | #include <linux/jump_label.h> |
| 11 | #include <uapi/linux/psci.h> |
| 12 | |
| 13 | #include <kvm/arm_psci.h> |
| 14 | |
| 15 | #include <asm/arch_gicv3.h> |
| 16 | #include <asm/cpufeature.h> |
| 17 | #include <asm/extable.h> |
| 18 | #include <asm/kprobes.h> |
| 19 | #include <asm/kvm_asm.h> |
| 20 | #include <asm/kvm_emulate.h> |
| 21 | #include <asm/kvm_host.h> |
| 22 | #include <asm/kvm_hyp.h> |
| 23 | #include <asm/kvm_mmu.h> |
| 24 | #include <asm/fpsimd.h> |
| 25 | #include <asm/debug-monitors.h> |
| 26 | #include <asm/processor.h> |
| 27 | #include <asm/thread_info.h> |
| 28 | #include <asm/vectors.h> |
| 29 | |
| 30 | extern struct exception_table_entry __start___kvm_ex_table; |
| 31 | extern struct exception_table_entry __stop___kvm_ex_table; |
| 32 | |
| 33 | /* Check whether the FP regs were dirtied while in the host-side run loop: */ |
| 34 | static bool __hyp_text update_fp_enabled(struct kvm_vcpu *vcpu) |
| 35 | { |
| 36 | /* |
| 37 | * When the system doesn't support FP/SIMD, we cannot rely on |
| 38 | * the _TIF_FOREIGN_FPSTATE flag. However, we always inject an |
| 39 | * abort on the very first access to FP and thus we should never |
| 40 | * see KVM_ARM64_FP_ENABLED. For added safety, make sure we always |
| 41 | * trap the accesses. |
| 42 | */ |
| 43 | if (!system_supports_fpsimd() || |
| 44 | vcpu->arch.host_thread_info->flags & _TIF_FOREIGN_FPSTATE) |
| 45 | vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED | |
| 46 | KVM_ARM64_FP_HOST); |
| 47 | |
| 48 | return !!(vcpu->arch.flags & KVM_ARM64_FP_ENABLED); |
| 49 | } |
| 50 | |
| 51 | /* Save the 32-bit only FPSIMD system register state */ |
| 52 | static void __hyp_text __fpsimd_save_fpexc32(struct kvm_vcpu *vcpu) |
| 53 | { |
| 54 | if (!vcpu_el1_is_32bit(vcpu)) |
| 55 | return; |
| 56 | |
| 57 | vcpu->arch.ctxt.sys_regs[FPEXC32_EL2] = read_sysreg(fpexc32_el2); |
| 58 | } |
| 59 | |
| 60 | static void __hyp_text __activate_traps_fpsimd32(struct kvm_vcpu *vcpu) |
| 61 | { |
| 62 | /* |
| 63 | * We are about to set CPTR_EL2.TFP to trap all floating point |
| 64 | * register accesses to EL2, however, the ARM ARM clearly states that |
| 65 | * traps are only taken to EL2 if the operation would not otherwise |
| 66 | * trap to EL1. Therefore, always make sure that for 32-bit guests, |
| 67 | * we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit. |
| 68 | * If FP/ASIMD is not implemented, FPEXC is UNDEFINED and any access to |
| 69 | * it will cause an exception. |
| 70 | */ |
| 71 | if (vcpu_el1_is_32bit(vcpu) && system_supports_fpsimd()) { |
| 72 | write_sysreg(1 << 30, fpexc32_el2); |
| 73 | isb(); |
| 74 | } |
| 75 | } |
| 76 | |
| 77 | static void __hyp_text __activate_traps_common(struct kvm_vcpu *vcpu) |
| 78 | { |
| 79 | /* Trap on AArch32 cp15 c15 (impdef sysregs) accesses (EL1 or EL0) */ |
| 80 | write_sysreg(1 << 15, hstr_el2); |
| 81 | |
| 82 | /* |
| 83 | * Make sure we trap PMU access from EL0 to EL2. Also sanitize |
| 84 | * PMSELR_EL0 to make sure it never contains the cycle |
| 85 | * counter, which could make a PMXEVCNTR_EL0 access UNDEF at |
| 86 | * EL1 instead of being trapped to EL2. |
| 87 | */ |
| 88 | write_sysreg(0, pmselr_el0); |
| 89 | write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0); |
| 90 | write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); |
| 91 | } |
| 92 | |
| 93 | static void __hyp_text __deactivate_traps_common(void) |
| 94 | { |
| 95 | write_sysreg(0, hstr_el2); |
| 96 | write_sysreg(0, pmuserenr_el0); |
| 97 | } |
| 98 | |
| 99 | static void activate_traps_vhe(struct kvm_vcpu *vcpu) |
| 100 | { |
| 101 | u64 val; |
| 102 | |
| 103 | val = read_sysreg(cpacr_el1); |
| 104 | val |= CPACR_EL1_TTA; |
| 105 | val &= ~CPACR_EL1_ZEN; |
| 106 | |
| 107 | /* |
| 108 | * With VHE (HCR.E2H == 1), accesses to CPACR_EL1 are routed to |
| 109 | * CPTR_EL2. In general, CPACR_EL1 has the same layout as CPTR_EL2, |
| 110 | * except for some missing controls, such as TAM. |
| 111 | * In this case, CPTR_EL2.TAM has the same position with or without |
| 112 | * VHE (HCR.E2H == 1) which allows us to use here the CPTR_EL2.TAM |
| 113 | * shift value for trapping the AMU accesses. |
| 114 | */ |
| 115 | |
| 116 | val |= CPTR_EL2_TAM; |
| 117 | |
| 118 | if (update_fp_enabled(vcpu)) { |
| 119 | if (vcpu_has_sve(vcpu)) |
| 120 | val |= CPACR_EL1_ZEN; |
| 121 | } else { |
| 122 | val &= ~CPACR_EL1_FPEN; |
| 123 | __activate_traps_fpsimd32(vcpu); |
| 124 | } |
| 125 | |
| 126 | write_sysreg(val, cpacr_el1); |
| 127 | |
| 128 | write_sysreg(kvm_get_hyp_vector(), vbar_el1); |
| 129 | } |
| 130 | NOKPROBE_SYMBOL(activate_traps_vhe); |
| 131 | |
| 132 | static void __hyp_text __activate_traps_nvhe(struct kvm_vcpu *vcpu) |
| 133 | { |
| 134 | u64 val; |
| 135 | |
| 136 | __activate_traps_common(vcpu); |
| 137 | |
| 138 | val = CPTR_EL2_DEFAULT; |
| 139 | val |= CPTR_EL2_TTA | CPTR_EL2_TZ | CPTR_EL2_TAM; |
| 140 | if (!update_fp_enabled(vcpu)) { |
| 141 | val |= CPTR_EL2_TFP; |
| 142 | __activate_traps_fpsimd32(vcpu); |
| 143 | } |
| 144 | |
| 145 | write_sysreg(val, cptr_el2); |
| 146 | } |
| 147 | |
| 148 | static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu) |
| 149 | { |
| 150 | u64 hcr = vcpu->arch.hcr_el2; |
| 151 | |
| 152 | if (cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM)) |
| 153 | hcr |= HCR_TVM; |
| 154 | |
| 155 | write_sysreg(hcr, hcr_el2); |
| 156 | |
| 157 | if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE)) |
| 158 | write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2); |
| 159 | |
| 160 | if (has_vhe()) |
| 161 | activate_traps_vhe(vcpu); |
| 162 | else |
| 163 | __activate_traps_nvhe(vcpu); |
| 164 | } |
| 165 | |
| 166 | static void deactivate_traps_vhe(void) |
| 167 | { |
| 168 | const char *host_vectors = vectors; |
| 169 | write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2); |
| 170 | |
| 171 | /* |
| 172 | * ARM erratum 1165522 requires the actual execution of the above |
| 173 | * before we can switch to the EL2/EL0 translation regime used by |
| 174 | * the host. |
| 175 | */ |
| 176 | asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_1165522)); |
| 177 | |
| 178 | write_sysreg(CPACR_EL1_DEFAULT, cpacr_el1); |
| 179 | |
| 180 | if (!arm64_kernel_unmapped_at_el0()) |
| 181 | host_vectors = __this_cpu_read(this_cpu_vector); |
| 182 | write_sysreg(host_vectors, vbar_el1); |
| 183 | } |
| 184 | NOKPROBE_SYMBOL(deactivate_traps_vhe); |
| 185 | |
| 186 | static void __hyp_text __deactivate_traps_nvhe(void) |
| 187 | { |
| 188 | u64 mdcr_el2 = read_sysreg(mdcr_el2); |
| 189 | |
| 190 | __deactivate_traps_common(); |
| 191 | |
| 192 | mdcr_el2 &= MDCR_EL2_HPMN_MASK; |
| 193 | mdcr_el2 |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT; |
| 194 | |
| 195 | write_sysreg(mdcr_el2, mdcr_el2); |
| 196 | write_sysreg(HCR_HOST_NVHE_FLAGS, hcr_el2); |
| 197 | write_sysreg(CPTR_EL2_DEFAULT, cptr_el2); |
| 198 | } |
| 199 | |
| 200 | static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu) |
| 201 | { |
| 202 | /* |
| 203 | * If we pended a virtual abort, preserve it until it gets |
| 204 | * cleared. See D1.14.3 (Virtual Interrupts) for details, but |
| 205 | * the crucial bit is "On taking a vSError interrupt, |
| 206 | * HCR_EL2.VSE is cleared to 0." |
| 207 | */ |
| 208 | if (vcpu->arch.hcr_el2 & HCR_VSE) { |
| 209 | vcpu->arch.hcr_el2 &= ~HCR_VSE; |
| 210 | vcpu->arch.hcr_el2 |= read_sysreg(hcr_el2) & HCR_VSE; |
| 211 | } |
| 212 | |
| 213 | if (has_vhe()) |
| 214 | deactivate_traps_vhe(); |
| 215 | else |
| 216 | __deactivate_traps_nvhe(); |
| 217 | } |
| 218 | |
| 219 | void activate_traps_vhe_load(struct kvm_vcpu *vcpu) |
| 220 | { |
| 221 | __activate_traps_common(vcpu); |
| 222 | } |
| 223 | |
| 224 | void deactivate_traps_vhe_put(void) |
| 225 | { |
| 226 | u64 mdcr_el2 = read_sysreg(mdcr_el2); |
| 227 | |
| 228 | mdcr_el2 &= MDCR_EL2_HPMN_MASK | |
| 229 | MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT | |
| 230 | MDCR_EL2_TPMS; |
| 231 | |
| 232 | write_sysreg(mdcr_el2, mdcr_el2); |
| 233 | |
| 234 | __deactivate_traps_common(); |
| 235 | } |
| 236 | |
| 237 | static void __hyp_text __activate_vm(struct kvm *kvm) |
| 238 | { |
| 239 | __load_guest_stage2(kvm); |
| 240 | } |
| 241 | |
| 242 | static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu) |
| 243 | { |
| 244 | write_sysreg(0, vttbr_el2); |
| 245 | } |
| 246 | |
| 247 | /* Save VGICv3 state on non-VHE systems */ |
| 248 | static void __hyp_text __hyp_vgic_save_state(struct kvm_vcpu *vcpu) |
| 249 | { |
| 250 | if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) { |
| 251 | __vgic_v3_save_state(vcpu); |
| 252 | __vgic_v3_deactivate_traps(vcpu); |
| 253 | } |
| 254 | } |
| 255 | |
| 256 | /* Restore VGICv3 state on non_VEH systems */ |
| 257 | static void __hyp_text __hyp_vgic_restore_state(struct kvm_vcpu *vcpu) |
| 258 | { |
| 259 | if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) { |
| 260 | __vgic_v3_activate_traps(vcpu); |
| 261 | __vgic_v3_restore_state(vcpu); |
| 262 | } |
| 263 | } |
| 264 | |
| 265 | static bool __hyp_text __translate_far_to_hpfar(u64 far, u64 *hpfar) |
| 266 | { |
| 267 | u64 par, tmp; |
| 268 | |
| 269 | /* |
| 270 | * Resolve the IPA the hard way using the guest VA. |
| 271 | * |
| 272 | * Stage-1 translation already validated the memory access |
| 273 | * rights. As such, we can use the EL1 translation regime, and |
| 274 | * don't have to distinguish between EL0 and EL1 access. |
| 275 | * |
| 276 | * We do need to save/restore PAR_EL1 though, as we haven't |
| 277 | * saved the guest context yet, and we may return early... |
| 278 | */ |
| 279 | par = read_sysreg(par_el1); |
| 280 | if (!__kvm_at("s1e1r", far)) |
| 281 | tmp = read_sysreg(par_el1); |
| 282 | else |
| 283 | tmp = SYS_PAR_EL1_F; /* back to the guest */ |
| 284 | write_sysreg(par, par_el1); |
| 285 | |
| 286 | if (unlikely(tmp & SYS_PAR_EL1_F)) |
| 287 | return false; /* Translation failed, back to guest */ |
| 288 | |
| 289 | /* Convert PAR to HPFAR format */ |
| 290 | *hpfar = PAR_TO_HPFAR(tmp); |
| 291 | return true; |
| 292 | } |
| 293 | |
| 294 | static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu) |
| 295 | { |
| 296 | u8 ec; |
| 297 | u64 esr; |
| 298 | u64 hpfar, far; |
| 299 | |
| 300 | esr = vcpu->arch.fault.esr_el2; |
| 301 | ec = ESR_ELx_EC(esr); |
| 302 | |
| 303 | if (ec != ESR_ELx_EC_DABT_LOW && ec != ESR_ELx_EC_IABT_LOW) |
| 304 | return true; |
| 305 | |
| 306 | far = read_sysreg_el2(SYS_FAR); |
| 307 | |
| 308 | /* |
| 309 | * The HPFAR can be invalid if the stage 2 fault did not |
| 310 | * happen during a stage 1 page table walk (the ESR_EL2.S1PTW |
| 311 | * bit is clear) and one of the two following cases are true: |
| 312 | * 1. The fault was due to a permission fault |
| 313 | * 2. The processor carries errata 834220 |
| 314 | * |
| 315 | * Therefore, for all non S1PTW faults where we either have a |
| 316 | * permission fault or the errata workaround is enabled, we |
| 317 | * resolve the IPA using the AT instruction. |
| 318 | */ |
| 319 | if (!(esr & ESR_ELx_S1PTW) && |
| 320 | (cpus_have_const_cap(ARM64_WORKAROUND_834220) || |
| 321 | (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) { |
| 322 | if (!__translate_far_to_hpfar(far, &hpfar)) |
| 323 | return false; |
| 324 | } else { |
| 325 | hpfar = read_sysreg(hpfar_el2); |
| 326 | } |
| 327 | |
| 328 | vcpu->arch.fault.far_el2 = far; |
| 329 | vcpu->arch.fault.hpfar_el2 = hpfar; |
| 330 | return true; |
| 331 | } |
| 332 | |
| 333 | /* Check for an FPSIMD/SVE trap and handle as appropriate */ |
| 334 | static bool __hyp_text __hyp_handle_fpsimd(struct kvm_vcpu *vcpu) |
| 335 | { |
| 336 | bool vhe, sve_guest, sve_host; |
| 337 | u8 hsr_ec; |
| 338 | |
| 339 | if (!system_supports_fpsimd()) |
| 340 | return false; |
| 341 | |
| 342 | if (system_supports_sve()) { |
| 343 | sve_guest = vcpu_has_sve(vcpu); |
| 344 | sve_host = vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE; |
| 345 | vhe = true; |
| 346 | } else { |
| 347 | sve_guest = false; |
| 348 | sve_host = false; |
| 349 | vhe = has_vhe(); |
| 350 | } |
| 351 | |
| 352 | hsr_ec = kvm_vcpu_trap_get_class(vcpu); |
| 353 | if (hsr_ec != ESR_ELx_EC_FP_ASIMD && |
| 354 | hsr_ec != ESR_ELx_EC_SVE) |
| 355 | return false; |
| 356 | |
| 357 | /* Don't handle SVE traps for non-SVE vcpus here: */ |
| 358 | if (!sve_guest) |
| 359 | if (hsr_ec != ESR_ELx_EC_FP_ASIMD) |
| 360 | return false; |
| 361 | |
| 362 | /* Valid trap. Switch the context: */ |
| 363 | |
| 364 | if (vhe) { |
| 365 | u64 reg = read_sysreg(cpacr_el1) | CPACR_EL1_FPEN; |
| 366 | |
| 367 | if (sve_guest) |
| 368 | reg |= CPACR_EL1_ZEN; |
| 369 | |
| 370 | write_sysreg(reg, cpacr_el1); |
| 371 | } else { |
| 372 | write_sysreg(read_sysreg(cptr_el2) & ~(u64)CPTR_EL2_TFP, |
| 373 | cptr_el2); |
| 374 | } |
| 375 | |
| 376 | isb(); |
| 377 | |
| 378 | if (vcpu->arch.flags & KVM_ARM64_FP_HOST) { |
| 379 | /* |
| 380 | * In the SVE case, VHE is assumed: it is enforced by |
| 381 | * Kconfig and kvm_arch_init(). |
| 382 | */ |
| 383 | if (sve_host) { |
| 384 | struct thread_struct *thread = container_of( |
| 385 | vcpu->arch.host_fpsimd_state, |
| 386 | struct thread_struct, uw.fpsimd_state); |
| 387 | |
| 388 | sve_save_state(sve_pffr(thread), |
| 389 | &vcpu->arch.host_fpsimd_state->fpsr); |
| 390 | } else { |
| 391 | __fpsimd_save_state(vcpu->arch.host_fpsimd_state); |
| 392 | } |
| 393 | |
| 394 | vcpu->arch.flags &= ~KVM_ARM64_FP_HOST; |
| 395 | } |
| 396 | |
| 397 | if (sve_guest) { |
| 398 | sve_load_state(vcpu_sve_pffr(vcpu), |
| 399 | &vcpu->arch.ctxt.gp_regs.fp_regs.fpsr, |
| 400 | sve_vq_from_vl(vcpu->arch.sve_max_vl) - 1); |
| 401 | write_sysreg_s(vcpu->arch.ctxt.sys_regs[ZCR_EL1], SYS_ZCR_EL12); |
| 402 | } else { |
| 403 | __fpsimd_restore_state(&vcpu->arch.ctxt.gp_regs.fp_regs); |
| 404 | } |
| 405 | |
| 406 | /* Skip restoring fpexc32 for AArch64 guests */ |
| 407 | if (!(read_sysreg(hcr_el2) & HCR_RW)) |
| 408 | write_sysreg(vcpu->arch.ctxt.sys_regs[FPEXC32_EL2], |
| 409 | fpexc32_el2); |
| 410 | |
| 411 | vcpu->arch.flags |= KVM_ARM64_FP_ENABLED; |
| 412 | |
| 413 | return true; |
| 414 | } |
| 415 | |
| 416 | static bool __hyp_text handle_tx2_tvm(struct kvm_vcpu *vcpu) |
| 417 | { |
| 418 | u32 sysreg = esr_sys64_to_sysreg(kvm_vcpu_get_hsr(vcpu)); |
| 419 | int rt = kvm_vcpu_sys_get_rt(vcpu); |
| 420 | u64 val = vcpu_get_reg(vcpu, rt); |
| 421 | |
| 422 | /* |
| 423 | * The normal sysreg handling code expects to see the traps, |
| 424 | * let's not do anything here. |
| 425 | */ |
| 426 | if (vcpu->arch.hcr_el2 & HCR_TVM) |
| 427 | return false; |
| 428 | |
| 429 | switch (sysreg) { |
| 430 | case SYS_SCTLR_EL1: |
| 431 | write_sysreg_el1(val, SYS_SCTLR); |
| 432 | break; |
| 433 | case SYS_TTBR0_EL1: |
| 434 | write_sysreg_el1(val, SYS_TTBR0); |
| 435 | break; |
| 436 | case SYS_TTBR1_EL1: |
| 437 | write_sysreg_el1(val, SYS_TTBR1); |
| 438 | break; |
| 439 | case SYS_TCR_EL1: |
| 440 | write_sysreg_el1(val, SYS_TCR); |
| 441 | break; |
| 442 | case SYS_ESR_EL1: |
| 443 | write_sysreg_el1(val, SYS_ESR); |
| 444 | break; |
| 445 | case SYS_FAR_EL1: |
| 446 | write_sysreg_el1(val, SYS_FAR); |
| 447 | break; |
| 448 | case SYS_AFSR0_EL1: |
| 449 | write_sysreg_el1(val, SYS_AFSR0); |
| 450 | break; |
| 451 | case SYS_AFSR1_EL1: |
| 452 | write_sysreg_el1(val, SYS_AFSR1); |
| 453 | break; |
| 454 | case SYS_MAIR_EL1: |
| 455 | write_sysreg_el1(val, SYS_MAIR); |
| 456 | break; |
| 457 | case SYS_AMAIR_EL1: |
| 458 | write_sysreg_el1(val, SYS_AMAIR); |
| 459 | break; |
| 460 | case SYS_CONTEXTIDR_EL1: |
| 461 | write_sysreg_el1(val, SYS_CONTEXTIDR); |
| 462 | break; |
| 463 | default: |
| 464 | return false; |
| 465 | } |
| 466 | |
| 467 | __kvm_skip_instr(vcpu); |
| 468 | return true; |
| 469 | } |
| 470 | |
| 471 | /* |
| 472 | * Return true when we were able to fixup the guest exit and should return to |
| 473 | * the guest, false when we should restore the host state and return to the |
| 474 | * main run loop. |
| 475 | */ |
| 476 | static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) |
| 477 | { |
| 478 | if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ) |
| 479 | vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR); |
| 480 | |
| 481 | /* |
| 482 | * We're using the raw exception code in order to only process |
| 483 | * the trap if no SError is pending. We will come back to the |
| 484 | * same PC once the SError has been injected, and replay the |
| 485 | * trapping instruction. |
| 486 | */ |
| 487 | if (*exit_code != ARM_EXCEPTION_TRAP) |
| 488 | goto exit; |
| 489 | |
| 490 | if (cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) && |
| 491 | kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 && |
| 492 | handle_tx2_tvm(vcpu)) |
| 493 | return true; |
| 494 | |
| 495 | /* |
| 496 | * We trap the first access to the FP/SIMD to save the host context |
| 497 | * and restore the guest context lazily. |
| 498 | * If FP/SIMD is not implemented, handle the trap and inject an |
| 499 | * undefined instruction exception to the guest. |
| 500 | * Similarly for trapped SVE accesses. |
| 501 | */ |
| 502 | if (__hyp_handle_fpsimd(vcpu)) |
| 503 | return true; |
| 504 | |
| 505 | if (!__populate_fault_info(vcpu)) |
| 506 | return true; |
| 507 | |
| 508 | if (static_branch_unlikely(&vgic_v2_cpuif_trap)) { |
| 509 | bool valid; |
| 510 | |
| 511 | valid = kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_DABT_LOW && |
| 512 | kvm_vcpu_trap_get_fault_type(vcpu) == FSC_FAULT && |
| 513 | kvm_vcpu_dabt_isvalid(vcpu) && |
| 514 | !kvm_vcpu_dabt_isextabt(vcpu) && |
| 515 | !kvm_vcpu_abt_iss1tw(vcpu); |
| 516 | |
| 517 | if (valid) { |
| 518 | int ret = __vgic_v2_perform_cpuif_access(vcpu); |
| 519 | |
| 520 | if (ret == 1) |
| 521 | return true; |
| 522 | |
| 523 | /* Promote an illegal access to an SError.*/ |
| 524 | if (ret == -1) |
| 525 | *exit_code = ARM_EXCEPTION_EL1_SERROR; |
| 526 | |
| 527 | goto exit; |
| 528 | } |
| 529 | } |
| 530 | |
| 531 | if (static_branch_unlikely(&vgic_v3_cpuif_trap) && |
| 532 | (kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 || |
| 533 | kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_CP15_32)) { |
| 534 | int ret = __vgic_v3_perform_cpuif_access(vcpu); |
| 535 | |
| 536 | if (ret == 1) |
| 537 | return true; |
| 538 | } |
| 539 | |
| 540 | exit: |
| 541 | /* Return to the host kernel and handle the exit */ |
| 542 | return false; |
| 543 | } |
| 544 | |
| 545 | static inline bool __hyp_text __needs_ssbd_off(struct kvm_vcpu *vcpu) |
| 546 | { |
| 547 | if (!cpus_have_const_cap(ARM64_SSBD)) |
| 548 | return false; |
| 549 | |
| 550 | return !(vcpu->arch.workaround_flags & VCPU_WORKAROUND_2_FLAG); |
| 551 | } |
| 552 | |
| 553 | static void __hyp_text __set_guest_arch_workaround_state(struct kvm_vcpu *vcpu) |
| 554 | { |
| 555 | #ifdef CONFIG_ARM64_SSBD |
| 556 | /* |
| 557 | * The host runs with the workaround always present. If the |
| 558 | * guest wants it disabled, so be it... |
| 559 | */ |
| 560 | if (__needs_ssbd_off(vcpu) && |
| 561 | __hyp_this_cpu_read(arm64_ssbd_callback_required)) |
| 562 | arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 0, NULL); |
| 563 | #endif |
| 564 | } |
| 565 | |
| 566 | static void __hyp_text __set_host_arch_workaround_state(struct kvm_vcpu *vcpu) |
| 567 | { |
| 568 | #ifdef CONFIG_ARM64_SSBD |
| 569 | /* |
| 570 | * If the guest has disabled the workaround, bring it back on. |
| 571 | */ |
| 572 | if (__needs_ssbd_off(vcpu) && |
| 573 | __hyp_this_cpu_read(arm64_ssbd_callback_required)) |
| 574 | arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 1, NULL); |
| 575 | #endif |
| 576 | } |
| 577 | |
| 578 | /** |
| 579 | * Disable host events, enable guest events |
| 580 | */ |
| 581 | static bool __hyp_text __pmu_switch_to_guest(struct kvm_cpu_context *host_ctxt) |
| 582 | { |
| 583 | struct kvm_host_data *host; |
| 584 | struct kvm_pmu_events *pmu; |
| 585 | |
| 586 | host = container_of(host_ctxt, struct kvm_host_data, host_ctxt); |
| 587 | pmu = &host->pmu_events; |
| 588 | |
| 589 | if (pmu->events_host) |
| 590 | write_sysreg(pmu->events_host, pmcntenclr_el0); |
| 591 | |
| 592 | if (pmu->events_guest) |
| 593 | write_sysreg(pmu->events_guest, pmcntenset_el0); |
| 594 | |
| 595 | return (pmu->events_host || pmu->events_guest); |
| 596 | } |
| 597 | |
| 598 | /** |
| 599 | * Disable guest events, enable host events |
| 600 | */ |
| 601 | static void __hyp_text __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt) |
| 602 | { |
| 603 | struct kvm_host_data *host; |
| 604 | struct kvm_pmu_events *pmu; |
| 605 | |
| 606 | host = container_of(host_ctxt, struct kvm_host_data, host_ctxt); |
| 607 | pmu = &host->pmu_events; |
| 608 | |
| 609 | if (pmu->events_guest) |
| 610 | write_sysreg(pmu->events_guest, pmcntenclr_el0); |
| 611 | |
| 612 | if (pmu->events_host) |
| 613 | write_sysreg(pmu->events_host, pmcntenset_el0); |
| 614 | } |
| 615 | |
| 616 | /* Switch to the guest for VHE systems running in EL2 */ |
| 617 | int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) |
| 618 | { |
| 619 | struct kvm_cpu_context *host_ctxt; |
| 620 | struct kvm_cpu_context *guest_ctxt; |
| 621 | u64 exit_code; |
| 622 | |
| 623 | host_ctxt = vcpu->arch.host_cpu_context; |
| 624 | host_ctxt->__hyp_running_vcpu = vcpu; |
| 625 | guest_ctxt = &vcpu->arch.ctxt; |
| 626 | |
| 627 | sysreg_save_host_state_vhe(host_ctxt); |
| 628 | |
| 629 | /* |
| 630 | * ARM erratum 1165522 requires us to configure both stage 1 and |
| 631 | * stage 2 translation for the guest context before we clear |
| 632 | * HCR_EL2.TGE. |
| 633 | * |
| 634 | * We have already configured the guest's stage 1 translation in |
| 635 | * kvm_vcpu_load_sysregs above. We must now call __activate_vm |
| 636 | * before __activate_traps, because __activate_vm configures |
| 637 | * stage 2 translation, and __activate_traps clear HCR_EL2.TGE |
| 638 | * (among other things). |
| 639 | */ |
| 640 | __activate_vm(vcpu->kvm); |
| 641 | __activate_traps(vcpu); |
| 642 | |
| 643 | sysreg_restore_guest_state_vhe(guest_ctxt); |
| 644 | __debug_switch_to_guest(vcpu); |
| 645 | |
| 646 | __set_guest_arch_workaround_state(vcpu); |
| 647 | |
| 648 | do { |
| 649 | /* Jump in the fire! */ |
| 650 | exit_code = __guest_enter(vcpu, host_ctxt); |
| 651 | |
| 652 | /* And we're baaack! */ |
| 653 | } while (fixup_guest_exit(vcpu, &exit_code)); |
| 654 | |
| 655 | __set_host_arch_workaround_state(vcpu); |
| 656 | |
| 657 | sysreg_save_guest_state_vhe(guest_ctxt); |
| 658 | |
| 659 | __deactivate_traps(vcpu); |
| 660 | |
| 661 | sysreg_restore_host_state_vhe(host_ctxt); |
| 662 | |
| 663 | if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) |
| 664 | __fpsimd_save_fpexc32(vcpu); |
| 665 | |
| 666 | __debug_switch_to_host(vcpu); |
| 667 | |
| 668 | return exit_code; |
| 669 | } |
| 670 | NOKPROBE_SYMBOL(kvm_vcpu_run_vhe); |
| 671 | |
| 672 | /* Switch to the guest for legacy non-VHE systems */ |
| 673 | int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu) |
| 674 | { |
| 675 | struct kvm_cpu_context *host_ctxt; |
| 676 | struct kvm_cpu_context *guest_ctxt; |
| 677 | bool pmu_switch_needed; |
| 678 | u64 exit_code; |
| 679 | |
| 680 | /* |
| 681 | * Having IRQs masked via PMR when entering the guest means the GIC |
| 682 | * will not signal the CPU of interrupts of lower priority, and the |
| 683 | * only way to get out will be via guest exceptions. |
| 684 | * Naturally, we want to avoid this. |
| 685 | */ |
| 686 | if (system_uses_irq_prio_masking()) { |
| 687 | gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); |
| 688 | dsb(sy); |
| 689 | } |
| 690 | |
| 691 | vcpu = kern_hyp_va(vcpu); |
| 692 | |
| 693 | host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context); |
| 694 | host_ctxt->__hyp_running_vcpu = vcpu; |
| 695 | guest_ctxt = &vcpu->arch.ctxt; |
| 696 | |
| 697 | pmu_switch_needed = __pmu_switch_to_guest(host_ctxt); |
| 698 | |
| 699 | __sysreg_save_state_nvhe(host_ctxt); |
| 700 | |
| 701 | /* |
| 702 | * We must flush and disable the SPE buffer for nVHE, as |
| 703 | * the translation regime(EL1&0) is going to be loaded with |
| 704 | * that of the guest. And we must do this before we change the |
| 705 | * translation regime to EL2 (via MDCR_EL2_EPB == 0) and |
| 706 | * before we load guest Stage1. |
| 707 | */ |
| 708 | __debug_save_host_buffers_nvhe(vcpu); |
| 709 | |
| 710 | __activate_vm(kern_hyp_va(vcpu->kvm)); |
| 711 | __activate_traps(vcpu); |
| 712 | |
| 713 | __hyp_vgic_restore_state(vcpu); |
| 714 | __timer_enable_traps(vcpu); |
| 715 | |
| 716 | /* |
| 717 | * We must restore the 32-bit state before the sysregs, thanks |
| 718 | * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72). |
| 719 | */ |
| 720 | __sysreg32_restore_state(vcpu); |
| 721 | __sysreg_restore_state_nvhe(guest_ctxt); |
| 722 | __debug_switch_to_guest(vcpu); |
| 723 | |
| 724 | __set_guest_arch_workaround_state(vcpu); |
| 725 | |
| 726 | do { |
| 727 | /* Jump in the fire! */ |
| 728 | exit_code = __guest_enter(vcpu, host_ctxt); |
| 729 | |
| 730 | /* And we're baaack! */ |
| 731 | } while (fixup_guest_exit(vcpu, &exit_code)); |
| 732 | |
| 733 | __set_host_arch_workaround_state(vcpu); |
| 734 | |
| 735 | __sysreg_save_state_nvhe(guest_ctxt); |
| 736 | __sysreg32_save_state(vcpu); |
| 737 | __timer_disable_traps(vcpu); |
| 738 | __hyp_vgic_save_state(vcpu); |
| 739 | |
| 740 | __deactivate_traps(vcpu); |
| 741 | __deactivate_vm(vcpu); |
| 742 | |
| 743 | __sysreg_restore_state_nvhe(host_ctxt); |
| 744 | |
| 745 | if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) |
| 746 | __fpsimd_save_fpexc32(vcpu); |
| 747 | |
| 748 | __debug_switch_to_host(vcpu); |
| 749 | |
| 750 | /* |
| 751 | * This must come after restoring the host sysregs, since a non-VHE |
| 752 | * system may enable SPE here and make use of the TTBRs. |
| 753 | */ |
| 754 | __debug_restore_host_buffers_nvhe(vcpu); |
| 755 | |
| 756 | if (pmu_switch_needed) |
| 757 | __pmu_switch_to_host(host_ctxt); |
| 758 | |
| 759 | /* Returning to host will clear PSR.I, remask PMR if needed */ |
| 760 | if (system_uses_irq_prio_masking()) |
| 761 | gic_write_pmr(GIC_PRIO_IRQOFF); |
| 762 | |
| 763 | return exit_code; |
| 764 | } |
| 765 | |
| 766 | static const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n"; |
| 767 | |
| 768 | static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par, |
| 769 | struct kvm_cpu_context *__host_ctxt) |
| 770 | { |
| 771 | struct kvm_vcpu *vcpu; |
| 772 | unsigned long str_va; |
| 773 | |
| 774 | vcpu = __host_ctxt->__hyp_running_vcpu; |
| 775 | |
| 776 | if (read_sysreg(vttbr_el2)) { |
| 777 | __timer_disable_traps(vcpu); |
| 778 | __deactivate_traps(vcpu); |
| 779 | __deactivate_vm(vcpu); |
| 780 | __sysreg_restore_state_nvhe(__host_ctxt); |
| 781 | } |
| 782 | |
| 783 | /* |
| 784 | * Force the panic string to be loaded from the literal pool, |
| 785 | * making sure it is a kernel address and not a PC-relative |
| 786 | * reference. |
| 787 | */ |
| 788 | asm volatile("ldr %0, =%1" : "=r" (str_va) : "S" (__hyp_panic_string)); |
| 789 | |
| 790 | __hyp_do_panic(str_va, |
| 791 | spsr, elr, |
| 792 | read_sysreg(esr_el2), read_sysreg_el2(SYS_FAR), |
| 793 | read_sysreg(hpfar_el2), par, vcpu); |
| 794 | } |
| 795 | |
| 796 | static void __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par, |
| 797 | struct kvm_cpu_context *host_ctxt) |
| 798 | { |
| 799 | struct kvm_vcpu *vcpu; |
| 800 | vcpu = host_ctxt->__hyp_running_vcpu; |
| 801 | |
| 802 | __deactivate_traps(vcpu); |
| 803 | sysreg_restore_host_state_vhe(host_ctxt); |
| 804 | |
| 805 | panic(__hyp_panic_string, |
| 806 | spsr, elr, |
| 807 | read_sysreg_el2(SYS_ESR), read_sysreg_el2(SYS_FAR), |
| 808 | read_sysreg(hpfar_el2), par, vcpu); |
| 809 | } |
| 810 | NOKPROBE_SYMBOL(__hyp_call_panic_vhe); |
| 811 | |
| 812 | void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt) |
| 813 | { |
| 814 | u64 spsr = read_sysreg_el2(SYS_SPSR); |
| 815 | u64 elr = read_sysreg_el2(SYS_ELR); |
| 816 | u64 par = read_sysreg(par_el1); |
| 817 | |
| 818 | if (!has_vhe()) |
| 819 | __hyp_call_panic_nvhe(spsr, elr, par, host_ctxt); |
| 820 | else |
| 821 | __hyp_call_panic_vhe(spsr, elr, par, host_ctxt); |
| 822 | |
| 823 | unreachable(); |
| 824 | } |
| 825 | |
| 826 | asmlinkage void __hyp_text kvm_unexpected_el2_exception(void) |
| 827 | { |
| 828 | unsigned long addr, fixup; |
| 829 | struct kvm_cpu_context *host_ctxt; |
| 830 | struct exception_table_entry *entry, *end; |
| 831 | unsigned long elr_el2 = read_sysreg(elr_el2); |
| 832 | |
| 833 | entry = hyp_symbol_addr(__start___kvm_ex_table); |
| 834 | end = hyp_symbol_addr(__stop___kvm_ex_table); |
| 835 | host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; |
| 836 | |
| 837 | while (entry < end) { |
| 838 | addr = (unsigned long)&entry->insn + entry->insn; |
| 839 | fixup = (unsigned long)&entry->fixup + entry->fixup; |
| 840 | |
| 841 | if (addr != elr_el2) { |
| 842 | entry++; |
| 843 | continue; |
| 844 | } |
| 845 | |
| 846 | write_sysreg(fixup, elr_el2); |
| 847 | return; |
| 848 | } |
| 849 | |
| 850 | hyp_panic(host_ctxt); |
| 851 | } |