| xj | b04a402 | 2021-11-25 15:01:52 +0800 | [diff] [blame] | 1 | // SPDX-License-Identifier: GPL-2.0 | 
|  | 2 | /* | 
|  | 3 | * kvm nested virtualization support for s390x | 
|  | 4 | * | 
|  | 5 | * Copyright IBM Corp. 2016, 2018 | 
|  | 6 | * | 
|  | 7 | *    Author(s): David Hildenbrand <dahi@linux.vnet.ibm.com> | 
|  | 8 | */ | 
|  | 9 | #include <linux/vmalloc.h> | 
|  | 10 | #include <linux/kvm_host.h> | 
|  | 11 | #include <linux/bug.h> | 
|  | 12 | #include <linux/list.h> | 
|  | 13 | #include <linux/bitmap.h> | 
|  | 14 | #include <linux/sched/signal.h> | 
|  | 15 |  | 
|  | 16 | #include <asm/gmap.h> | 
|  | 17 | #include <asm/mmu_context.h> | 
|  | 18 | #include <asm/sclp.h> | 
|  | 19 | #include <asm/nmi.h> | 
|  | 20 | #include <asm/dis.h> | 
|  | 21 | #include "kvm-s390.h" | 
|  | 22 | #include "gaccess.h" | 
|  | 23 |  | 
|  | 24 | struct vsie_page { | 
|  | 25 | struct kvm_s390_sie_block scb_s;	/* 0x0000 */ | 
|  | 26 | /* | 
|  | 27 | * the backup info for machine check. ensure it's at | 
|  | 28 | * the same offset as that in struct sie_page! | 
|  | 29 | */ | 
|  | 30 | struct mcck_volatile_info mcck_info;    /* 0x0200 */ | 
|  | 31 | /* | 
|  | 32 | * The pinned original scb. Be aware that other VCPUs can modify | 
|  | 33 | * it while we read from it. Values that are used for conditions or | 
|  | 34 | * are reused conditionally, should be accessed via READ_ONCE. | 
|  | 35 | */ | 
|  | 36 | struct kvm_s390_sie_block *scb_o;	/* 0x0218 */ | 
|  | 37 | /* the shadow gmap in use by the vsie_page */ | 
|  | 38 | struct gmap *gmap;			/* 0x0220 */ | 
|  | 39 | /* address of the last reported fault to guest2 */ | 
|  | 40 | unsigned long fault_addr;		/* 0x0228 */ | 
|  | 41 | /* calculated guest addresses of satellite control blocks */ | 
|  | 42 | gpa_t sca_gpa;				/* 0x0230 */ | 
|  | 43 | gpa_t itdba_gpa;			/* 0x0238 */ | 
|  | 44 | gpa_t gvrd_gpa;				/* 0x0240 */ | 
|  | 45 | gpa_t riccbd_gpa;			/* 0x0248 */ | 
|  | 46 | gpa_t sdnx_gpa;				/* 0x0250 */ | 
|  | 47 | __u8 reserved[0x0700 - 0x0258];		/* 0x0258 */ | 
|  | 48 | struct kvm_s390_crypto_cb crycb;	/* 0x0700 */ | 
|  | 49 | __u8 fac[S390_ARCH_FAC_LIST_SIZE_BYTE];	/* 0x0800 */ | 
|  | 50 | }; | 
|  | 51 |  | 
|  | 52 | /* trigger a validity icpt for the given scb */ | 
|  | 53 | static int set_validity_icpt(struct kvm_s390_sie_block *scb, | 
|  | 54 | __u16 reason_code) | 
|  | 55 | { | 
|  | 56 | scb->ipa = 0x1000; | 
|  | 57 | scb->ipb = ((__u32) reason_code) << 16; | 
|  | 58 | scb->icptcode = ICPT_VALIDITY; | 
|  | 59 | return 1; | 
|  | 60 | } | 
|  | 61 |  | 
|  | 62 | /* mark the prefix as unmapped, this will block the VSIE */ | 
|  | 63 | static void prefix_unmapped(struct vsie_page *vsie_page) | 
|  | 64 | { | 
|  | 65 | atomic_or(PROG_REQUEST, &vsie_page->scb_s.prog20); | 
|  | 66 | } | 
|  | 67 |  | 
|  | 68 | /* mark the prefix as unmapped and wait until the VSIE has been left */ | 
|  | 69 | static void prefix_unmapped_sync(struct vsie_page *vsie_page) | 
|  | 70 | { | 
|  | 71 | prefix_unmapped(vsie_page); | 
|  | 72 | if (vsie_page->scb_s.prog0c & PROG_IN_SIE) | 
|  | 73 | atomic_or(CPUSTAT_STOP_INT, &vsie_page->scb_s.cpuflags); | 
|  | 74 | while (vsie_page->scb_s.prog0c & PROG_IN_SIE) | 
|  | 75 | cpu_relax(); | 
|  | 76 | } | 
|  | 77 |  | 
|  | 78 | /* mark the prefix as mapped, this will allow the VSIE to run */ | 
|  | 79 | static void prefix_mapped(struct vsie_page *vsie_page) | 
|  | 80 | { | 
|  | 81 | atomic_andnot(PROG_REQUEST, &vsie_page->scb_s.prog20); | 
|  | 82 | } | 
|  | 83 |  | 
|  | 84 | /* test if the prefix is mapped into the gmap shadow */ | 
|  | 85 | static int prefix_is_mapped(struct vsie_page *vsie_page) | 
|  | 86 | { | 
|  | 87 | return !(atomic_read(&vsie_page->scb_s.prog20) & PROG_REQUEST); | 
|  | 88 | } | 
|  | 89 |  | 
|  | 90 | /* copy the updated intervention request bits into the shadow scb */ | 
|  | 91 | static void update_intervention_requests(struct vsie_page *vsie_page) | 
|  | 92 | { | 
|  | 93 | const int bits = CPUSTAT_STOP_INT | CPUSTAT_IO_INT | CPUSTAT_EXT_INT; | 
|  | 94 | int cpuflags; | 
|  | 95 |  | 
|  | 96 | cpuflags = atomic_read(&vsie_page->scb_o->cpuflags); | 
|  | 97 | atomic_andnot(bits, &vsie_page->scb_s.cpuflags); | 
|  | 98 | atomic_or(cpuflags & bits, &vsie_page->scb_s.cpuflags); | 
|  | 99 | } | 
|  | 100 |  | 
|  | 101 | /* shadow (filter and validate) the cpuflags  */ | 
|  | 102 | static int prepare_cpuflags(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) | 
|  | 103 | { | 
|  | 104 | struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; | 
|  | 105 | struct kvm_s390_sie_block *scb_o = vsie_page->scb_o; | 
|  | 106 | int newflags, cpuflags = atomic_read(&scb_o->cpuflags); | 
|  | 107 |  | 
|  | 108 | /* we don't allow ESA/390 guests */ | 
|  | 109 | if (!(cpuflags & CPUSTAT_ZARCH)) | 
|  | 110 | return set_validity_icpt(scb_s, 0x0001U); | 
|  | 111 |  | 
|  | 112 | if (cpuflags & (CPUSTAT_RRF | CPUSTAT_MCDS)) | 
|  | 113 | return set_validity_icpt(scb_s, 0x0001U); | 
|  | 114 | else if (cpuflags & (CPUSTAT_SLSV | CPUSTAT_SLSR)) | 
|  | 115 | return set_validity_icpt(scb_s, 0x0007U); | 
|  | 116 |  | 
|  | 117 | /* intervention requests will be set later */ | 
|  | 118 | newflags = CPUSTAT_ZARCH; | 
|  | 119 | if (cpuflags & CPUSTAT_GED && test_kvm_facility(vcpu->kvm, 8)) | 
|  | 120 | newflags |= CPUSTAT_GED; | 
|  | 121 | if (cpuflags & CPUSTAT_GED2 && test_kvm_facility(vcpu->kvm, 78)) { | 
|  | 122 | if (cpuflags & CPUSTAT_GED) | 
|  | 123 | return set_validity_icpt(scb_s, 0x0001U); | 
|  | 124 | newflags |= CPUSTAT_GED2; | 
|  | 125 | } | 
|  | 126 | if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_GPERE)) | 
|  | 127 | newflags |= cpuflags & CPUSTAT_P; | 
|  | 128 | if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_GSLS)) | 
|  | 129 | newflags |= cpuflags & CPUSTAT_SM; | 
|  | 130 | if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_IBS)) | 
|  | 131 | newflags |= cpuflags & CPUSTAT_IBS; | 
|  | 132 | if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_KSS)) | 
|  | 133 | newflags |= cpuflags & CPUSTAT_KSS; | 
|  | 134 |  | 
|  | 135 | atomic_set(&scb_s->cpuflags, newflags); | 
|  | 136 | return 0; | 
|  | 137 | } | 
|  | 138 |  | 
|  | 139 | /* | 
|  | 140 | * Create a shadow copy of the crycb block and setup key wrapping, if | 
|  | 141 | * requested for guest 3 and enabled for guest 2. | 
|  | 142 | * | 
|  | 143 | * We only accept format-1 (no AP in g2), but convert it into format-2 | 
|  | 144 | * There is nothing to do for format-0. | 
|  | 145 | * | 
|  | 146 | * Returns: - 0 if shadowed or nothing to do | 
|  | 147 | *          - > 0 if control has to be given to guest 2 | 
|  | 148 | */ | 
|  | 149 | static int shadow_crycb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) | 
|  | 150 | { | 
|  | 151 | struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; | 
|  | 152 | struct kvm_s390_sie_block *scb_o = vsie_page->scb_o; | 
|  | 153 | const uint32_t crycbd_o = READ_ONCE(scb_o->crycbd); | 
|  | 154 | const u32 crycb_addr = crycbd_o & 0x7ffffff8U; | 
|  | 155 | unsigned long *b1, *b2; | 
|  | 156 | u8 ecb3_flags; | 
|  | 157 |  | 
|  | 158 | scb_s->crycbd = 0; | 
|  | 159 | if (!(crycbd_o & vcpu->arch.sie_block->crycbd & CRYCB_FORMAT1)) | 
|  | 160 | return 0; | 
|  | 161 | /* format-1 is supported with message-security-assist extension 3 */ | 
|  | 162 | if (!test_kvm_facility(vcpu->kvm, 76)) | 
|  | 163 | return 0; | 
|  | 164 | /* we may only allow it if enabled for guest 2 */ | 
|  | 165 | ecb3_flags = scb_o->ecb3 & vcpu->arch.sie_block->ecb3 & | 
|  | 166 | (ECB3_AES | ECB3_DEA); | 
|  | 167 | if (!ecb3_flags) | 
|  | 168 | return 0; | 
|  | 169 |  | 
|  | 170 | if ((crycb_addr & PAGE_MASK) != ((crycb_addr + 128) & PAGE_MASK)) | 
|  | 171 | return set_validity_icpt(scb_s, 0x003CU); | 
|  | 172 | else if (!crycb_addr) | 
|  | 173 | return set_validity_icpt(scb_s, 0x0039U); | 
|  | 174 |  | 
|  | 175 | /* copy only the wrapping keys */ | 
|  | 176 | if (read_guest_real(vcpu, crycb_addr + 72, | 
|  | 177 | vsie_page->crycb.dea_wrapping_key_mask, 56)) | 
|  | 178 | return set_validity_icpt(scb_s, 0x0035U); | 
|  | 179 |  | 
|  | 180 | scb_s->ecb3 |= ecb3_flags; | 
|  | 181 | scb_s->crycbd = ((__u32)(__u64) &vsie_page->crycb) | CRYCB_FORMAT1 | | 
|  | 182 | CRYCB_FORMAT2; | 
|  | 183 |  | 
|  | 184 | /* xor both blocks in one run */ | 
|  | 185 | b1 = (unsigned long *) vsie_page->crycb.dea_wrapping_key_mask; | 
|  | 186 | b2 = (unsigned long *) | 
|  | 187 | vcpu->kvm->arch.crypto.crycb->dea_wrapping_key_mask; | 
|  | 188 | /* as 56%8 == 0, bitmap_xor won't overwrite any data */ | 
|  | 189 | bitmap_xor(b1, b1, b2, BITS_PER_BYTE * 56); | 
|  | 190 | return 0; | 
|  | 191 | } | 
|  | 192 |  | 
|  | 193 | /* shadow (round up/down) the ibc to avoid validity icpt */ | 
|  | 194 | static void prepare_ibc(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) | 
|  | 195 | { | 
|  | 196 | struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; | 
|  | 197 | struct kvm_s390_sie_block *scb_o = vsie_page->scb_o; | 
|  | 198 | /* READ_ONCE does not work on bitfields - use a temporary variable */ | 
|  | 199 | const uint32_t __new_ibc = scb_o->ibc; | 
|  | 200 | const uint32_t new_ibc = READ_ONCE(__new_ibc) & 0x0fffU; | 
|  | 201 | __u64 min_ibc = (sclp.ibc >> 16) & 0x0fffU; | 
|  | 202 |  | 
|  | 203 | scb_s->ibc = 0; | 
|  | 204 | /* ibc installed in g2 and requested for g3 */ | 
|  | 205 | if (vcpu->kvm->arch.model.ibc && new_ibc) { | 
|  | 206 | scb_s->ibc = new_ibc; | 
|  | 207 | /* takte care of the minimum ibc level of the machine */ | 
|  | 208 | if (scb_s->ibc < min_ibc) | 
|  | 209 | scb_s->ibc = min_ibc; | 
|  | 210 | /* take care of the maximum ibc level set for the guest */ | 
|  | 211 | if (scb_s->ibc > vcpu->kvm->arch.model.ibc) | 
|  | 212 | scb_s->ibc = vcpu->kvm->arch.model.ibc; | 
|  | 213 | } | 
|  | 214 | } | 
|  | 215 |  | 
|  | 216 | /* unshadow the scb, copying parameters back to the real scb */ | 
|  | 217 | static void unshadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) | 
|  | 218 | { | 
|  | 219 | struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; | 
|  | 220 | struct kvm_s390_sie_block *scb_o = vsie_page->scb_o; | 
|  | 221 |  | 
|  | 222 | /* interception */ | 
|  | 223 | scb_o->icptcode = scb_s->icptcode; | 
|  | 224 | scb_o->icptstatus = scb_s->icptstatus; | 
|  | 225 | scb_o->ipa = scb_s->ipa; | 
|  | 226 | scb_o->ipb = scb_s->ipb; | 
|  | 227 | scb_o->gbea = scb_s->gbea; | 
|  | 228 |  | 
|  | 229 | /* timer */ | 
|  | 230 | scb_o->cputm = scb_s->cputm; | 
|  | 231 | scb_o->ckc = scb_s->ckc; | 
|  | 232 | scb_o->todpr = scb_s->todpr; | 
|  | 233 |  | 
|  | 234 | /* guest state */ | 
|  | 235 | scb_o->gpsw = scb_s->gpsw; | 
|  | 236 | scb_o->gg14 = scb_s->gg14; | 
|  | 237 | scb_o->gg15 = scb_s->gg15; | 
|  | 238 | memcpy(scb_o->gcr, scb_s->gcr, 128); | 
|  | 239 | scb_o->pp = scb_s->pp; | 
|  | 240 |  | 
|  | 241 | /* branch prediction */ | 
|  | 242 | if (test_kvm_facility(vcpu->kvm, 82)) { | 
|  | 243 | scb_o->fpf &= ~FPF_BPBC; | 
|  | 244 | scb_o->fpf |= scb_s->fpf & FPF_BPBC; | 
|  | 245 | } | 
|  | 246 |  | 
|  | 247 | /* interrupt intercept */ | 
|  | 248 | switch (scb_s->icptcode) { | 
|  | 249 | case ICPT_PROGI: | 
|  | 250 | case ICPT_INSTPROGI: | 
|  | 251 | case ICPT_EXTINT: | 
|  | 252 | memcpy((void *)((u64)scb_o + 0xc0), | 
|  | 253 | (void *)((u64)scb_s + 0xc0), 0xf0 - 0xc0); | 
|  | 254 | break; | 
|  | 255 | case ICPT_PARTEXEC: | 
|  | 256 | /* MVPG only */ | 
|  | 257 | memcpy((void *)((u64)scb_o + 0xc0), | 
|  | 258 | (void *)((u64)scb_s + 0xc0), 0xd0 - 0xc0); | 
|  | 259 | break; | 
|  | 260 | } | 
|  | 261 |  | 
|  | 262 | if (scb_s->ihcpu != 0xffffU) | 
|  | 263 | scb_o->ihcpu = scb_s->ihcpu; | 
|  | 264 | } | 
|  | 265 |  | 
|  | 266 | /* | 
|  | 267 | * Setup the shadow scb by copying and checking the relevant parts of the g2 | 
|  | 268 | * provided scb. | 
|  | 269 | * | 
|  | 270 | * Returns: - 0 if the scb has been shadowed | 
|  | 271 | *          - > 0 if control has to be given to guest 2 | 
|  | 272 | */ | 
|  | 273 | static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) | 
|  | 274 | { | 
|  | 275 | struct kvm_s390_sie_block *scb_o = vsie_page->scb_o; | 
|  | 276 | struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; | 
|  | 277 | /* READ_ONCE does not work on bitfields - use a temporary variable */ | 
|  | 278 | const uint32_t __new_prefix = scb_o->prefix; | 
|  | 279 | const uint32_t new_prefix = READ_ONCE(__new_prefix); | 
|  | 280 | const bool wants_tx = READ_ONCE(scb_o->ecb) & ECB_TE; | 
|  | 281 | bool had_tx = scb_s->ecb & ECB_TE; | 
|  | 282 | unsigned long new_mso = 0; | 
|  | 283 | int rc; | 
|  | 284 |  | 
|  | 285 | /* make sure we don't have any leftovers when reusing the scb */ | 
|  | 286 | scb_s->icptcode = 0; | 
|  | 287 | scb_s->eca = 0; | 
|  | 288 | scb_s->ecb = 0; | 
|  | 289 | scb_s->ecb2 = 0; | 
|  | 290 | scb_s->ecb3 = 0; | 
|  | 291 | scb_s->ecd = 0; | 
|  | 292 | scb_s->fac = 0; | 
|  | 293 | scb_s->fpf = 0; | 
|  | 294 |  | 
|  | 295 | rc = prepare_cpuflags(vcpu, vsie_page); | 
|  | 296 | if (rc) | 
|  | 297 | goto out; | 
|  | 298 |  | 
|  | 299 | /* timer */ | 
|  | 300 | scb_s->cputm = scb_o->cputm; | 
|  | 301 | scb_s->ckc = scb_o->ckc; | 
|  | 302 | scb_s->todpr = scb_o->todpr; | 
|  | 303 | scb_s->epoch = scb_o->epoch; | 
|  | 304 |  | 
|  | 305 | /* guest state */ | 
|  | 306 | scb_s->gpsw = scb_o->gpsw; | 
|  | 307 | scb_s->gg14 = scb_o->gg14; | 
|  | 308 | scb_s->gg15 = scb_o->gg15; | 
|  | 309 | memcpy(scb_s->gcr, scb_o->gcr, 128); | 
|  | 310 | scb_s->pp = scb_o->pp; | 
|  | 311 |  | 
|  | 312 | /* interception / execution handling */ | 
|  | 313 | scb_s->gbea = scb_o->gbea; | 
|  | 314 | scb_s->lctl = scb_o->lctl; | 
|  | 315 | scb_s->svcc = scb_o->svcc; | 
|  | 316 | scb_s->ictl = scb_o->ictl; | 
|  | 317 | /* | 
|  | 318 | * SKEY handling functions can't deal with false setting of PTE invalid | 
|  | 319 | * bits. Therefore we cannot provide interpretation and would later | 
|  | 320 | * have to provide own emulation handlers. | 
|  | 321 | */ | 
|  | 322 | if (!(atomic_read(&scb_s->cpuflags) & CPUSTAT_KSS)) | 
|  | 323 | scb_s->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE; | 
|  | 324 |  | 
|  | 325 | scb_s->icpua = scb_o->icpua; | 
|  | 326 |  | 
|  | 327 | if (!(atomic_read(&scb_s->cpuflags) & CPUSTAT_SM)) | 
|  | 328 | new_mso = READ_ONCE(scb_o->mso) & 0xfffffffffff00000UL; | 
|  | 329 | /* if the hva of the prefix changes, we have to remap the prefix */ | 
|  | 330 | if (scb_s->mso != new_mso || scb_s->prefix != new_prefix) | 
|  | 331 | prefix_unmapped(vsie_page); | 
|  | 332 | /* SIE will do mso/msl validity and exception checks for us */ | 
|  | 333 | scb_s->msl = scb_o->msl & 0xfffffffffff00000UL; | 
|  | 334 | scb_s->mso = new_mso; | 
|  | 335 | scb_s->prefix = new_prefix; | 
|  | 336 |  | 
|  | 337 | /* We have to definetly flush the tlb if this scb never ran */ | 
|  | 338 | if (scb_s->ihcpu != 0xffffU) | 
|  | 339 | scb_s->ihcpu = scb_o->ihcpu; | 
|  | 340 |  | 
|  | 341 | /* MVPG and Protection Exception Interpretation are always available */ | 
|  | 342 | scb_s->eca |= scb_o->eca & (ECA_MVPGI | ECA_PROTEXCI); | 
|  | 343 | /* Host-protection-interruption introduced with ESOP */ | 
|  | 344 | if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_ESOP)) | 
|  | 345 | scb_s->ecb |= scb_o->ecb & ECB_HOSTPROTINT; | 
|  | 346 | /* transactional execution */ | 
|  | 347 | if (test_kvm_facility(vcpu->kvm, 73) && wants_tx) { | 
|  | 348 | /* remap the prefix is tx is toggled on */ | 
|  | 349 | if (!had_tx) | 
|  | 350 | prefix_unmapped(vsie_page); | 
|  | 351 | scb_s->ecb |= ECB_TE; | 
|  | 352 | } | 
|  | 353 | /* branch prediction */ | 
|  | 354 | if (test_kvm_facility(vcpu->kvm, 82)) | 
|  | 355 | scb_s->fpf |= scb_o->fpf & FPF_BPBC; | 
|  | 356 | /* SIMD */ | 
|  | 357 | if (test_kvm_facility(vcpu->kvm, 129)) { | 
|  | 358 | scb_s->eca |= scb_o->eca & ECA_VX; | 
|  | 359 | scb_s->ecd |= scb_o->ecd & ECD_HOSTREGMGMT; | 
|  | 360 | } | 
|  | 361 | /* Run-time-Instrumentation */ | 
|  | 362 | if (test_kvm_facility(vcpu->kvm, 64)) | 
|  | 363 | scb_s->ecb3 |= scb_o->ecb3 & ECB3_RI; | 
|  | 364 | /* Instruction Execution Prevention */ | 
|  | 365 | if (test_kvm_facility(vcpu->kvm, 130)) | 
|  | 366 | scb_s->ecb2 |= scb_o->ecb2 & ECB2_IEP; | 
|  | 367 | /* Guarded Storage */ | 
|  | 368 | if (test_kvm_facility(vcpu->kvm, 133)) { | 
|  | 369 | scb_s->ecb |= scb_o->ecb & ECB_GS; | 
|  | 370 | scb_s->ecd |= scb_o->ecd & ECD_HOSTREGMGMT; | 
|  | 371 | } | 
|  | 372 | if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_SIIF)) | 
|  | 373 | scb_s->eca |= scb_o->eca & ECA_SII; | 
|  | 374 | if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_IB)) | 
|  | 375 | scb_s->eca |= scb_o->eca & ECA_IB; | 
|  | 376 | if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_CEI)) | 
|  | 377 | scb_s->eca |= scb_o->eca & ECA_CEI; | 
|  | 378 | /* Epoch Extension */ | 
|  | 379 | if (test_kvm_facility(vcpu->kvm, 139)) | 
|  | 380 | scb_s->ecd |= scb_o->ecd & ECD_MEF; | 
|  | 381 |  | 
|  | 382 | /* etoken */ | 
|  | 383 | if (test_kvm_facility(vcpu->kvm, 156)) | 
|  | 384 | scb_s->ecd |= scb_o->ecd & ECD_ETOKENF; | 
|  | 385 |  | 
|  | 386 | prepare_ibc(vcpu, vsie_page); | 
|  | 387 | rc = shadow_crycb(vcpu, vsie_page); | 
|  | 388 | out: | 
|  | 389 | if (rc) | 
|  | 390 | unshadow_scb(vcpu, vsie_page); | 
|  | 391 | return rc; | 
|  | 392 | } | 
|  | 393 |  | 
|  | 394 | void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start, | 
|  | 395 | unsigned long end) | 
|  | 396 | { | 
|  | 397 | struct kvm *kvm = gmap->private; | 
|  | 398 | struct vsie_page *cur; | 
|  | 399 | unsigned long prefix; | 
|  | 400 | struct page *page; | 
|  | 401 | int i; | 
|  | 402 |  | 
|  | 403 | if (!gmap_is_shadow(gmap)) | 
|  | 404 | return; | 
|  | 405 | if (start >= 1UL << 31) | 
|  | 406 | /* We are only interested in prefix pages */ | 
|  | 407 | return; | 
|  | 408 |  | 
|  | 409 | /* | 
|  | 410 | * Only new shadow blocks are added to the list during runtime, | 
|  | 411 | * therefore we can safely reference them all the time. | 
|  | 412 | */ | 
|  | 413 | for (i = 0; i < kvm->arch.vsie.page_count; i++) { | 
|  | 414 | page = READ_ONCE(kvm->arch.vsie.pages[i]); | 
|  | 415 | if (!page) | 
|  | 416 | continue; | 
|  | 417 | cur = page_to_virt(page); | 
|  | 418 | if (READ_ONCE(cur->gmap) != gmap) | 
|  | 419 | continue; | 
|  | 420 | prefix = cur->scb_s.prefix << GUEST_PREFIX_SHIFT; | 
|  | 421 | /* with mso/msl, the prefix lies at an offset */ | 
|  | 422 | prefix += cur->scb_s.mso; | 
|  | 423 | if (prefix <= end && start <= prefix + 2 * PAGE_SIZE - 1) | 
|  | 424 | prefix_unmapped_sync(cur); | 
|  | 425 | } | 
|  | 426 | } | 
|  | 427 |  | 
|  | 428 | /* | 
|  | 429 | * Map the first prefix page and if tx is enabled also the second prefix page. | 
|  | 430 | * | 
|  | 431 | * The prefix will be protected, a gmap notifier will inform about unmaps. | 
|  | 432 | * The shadow scb must not be executed until the prefix is remapped, this is | 
|  | 433 | * guaranteed by properly handling PROG_REQUEST. | 
|  | 434 | * | 
|  | 435 | * Returns: - 0 on if successfully mapped or already mapped | 
|  | 436 | *          - > 0 if control has to be given to guest 2 | 
|  | 437 | *          - -EAGAIN if the caller can retry immediately | 
|  | 438 | *          - -ENOMEM if out of memory | 
|  | 439 | */ | 
|  | 440 | static int map_prefix(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) | 
|  | 441 | { | 
|  | 442 | struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; | 
|  | 443 | u64 prefix = scb_s->prefix << GUEST_PREFIX_SHIFT; | 
|  | 444 | int rc; | 
|  | 445 |  | 
|  | 446 | if (prefix_is_mapped(vsie_page)) | 
|  | 447 | return 0; | 
|  | 448 |  | 
|  | 449 | /* mark it as mapped so we can catch any concurrent unmappers */ | 
|  | 450 | prefix_mapped(vsie_page); | 
|  | 451 |  | 
|  | 452 | /* with mso/msl, the prefix lies at offset *mso* */ | 
|  | 453 | prefix += scb_s->mso; | 
|  | 454 |  | 
|  | 455 | rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix); | 
|  | 456 | if (!rc && (scb_s->ecb & ECB_TE)) | 
|  | 457 | rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, | 
|  | 458 | prefix + PAGE_SIZE); | 
|  | 459 | /* | 
|  | 460 | * We don't have to mprotect, we will be called for all unshadows. | 
|  | 461 | * SIE will detect if protection applies and trigger a validity. | 
|  | 462 | */ | 
|  | 463 | if (rc) | 
|  | 464 | prefix_unmapped(vsie_page); | 
|  | 465 | if (rc > 0 || rc == -EFAULT) | 
|  | 466 | rc = set_validity_icpt(scb_s, 0x0037U); | 
|  | 467 | return rc; | 
|  | 468 | } | 
|  | 469 |  | 
|  | 470 | /* | 
|  | 471 | * Pin the guest page given by gpa and set hpa to the pinned host address. | 
|  | 472 | * Will always be pinned writable. | 
|  | 473 | * | 
|  | 474 | * Returns: - 0 on success | 
|  | 475 | *          - -EINVAL if the gpa is not valid guest storage | 
|  | 476 | */ | 
|  | 477 | static int pin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t *hpa) | 
|  | 478 | { | 
|  | 479 | struct page *page; | 
|  | 480 |  | 
|  | 481 | page = gfn_to_page(kvm, gpa_to_gfn(gpa)); | 
|  | 482 | if (is_error_page(page)) | 
|  | 483 | return -EINVAL; | 
|  | 484 | *hpa = (hpa_t) page_to_virt(page) + (gpa & ~PAGE_MASK); | 
|  | 485 | return 0; | 
|  | 486 | } | 
|  | 487 |  | 
|  | 488 | /* Unpins a page previously pinned via pin_guest_page, marking it as dirty. */ | 
|  | 489 | static void unpin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t hpa) | 
|  | 490 | { | 
|  | 491 | kvm_release_pfn_dirty(hpa >> PAGE_SHIFT); | 
|  | 492 | /* mark the page always as dirty for migration */ | 
|  | 493 | mark_page_dirty(kvm, gpa_to_gfn(gpa)); | 
|  | 494 | } | 
|  | 495 |  | 
|  | 496 | /* unpin all blocks previously pinned by pin_blocks(), marking them dirty */ | 
|  | 497 | static void unpin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) | 
|  | 498 | { | 
|  | 499 | struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; | 
|  | 500 | hpa_t hpa; | 
|  | 501 |  | 
|  | 502 | hpa = (u64) scb_s->scaoh << 32 | scb_s->scaol; | 
|  | 503 | if (hpa) { | 
|  | 504 | unpin_guest_page(vcpu->kvm, vsie_page->sca_gpa, hpa); | 
|  | 505 | vsie_page->sca_gpa = 0; | 
|  | 506 | scb_s->scaol = 0; | 
|  | 507 | scb_s->scaoh = 0; | 
|  | 508 | } | 
|  | 509 |  | 
|  | 510 | hpa = scb_s->itdba; | 
|  | 511 | if (hpa) { | 
|  | 512 | unpin_guest_page(vcpu->kvm, vsie_page->itdba_gpa, hpa); | 
|  | 513 | vsie_page->itdba_gpa = 0; | 
|  | 514 | scb_s->itdba = 0; | 
|  | 515 | } | 
|  | 516 |  | 
|  | 517 | hpa = scb_s->gvrd; | 
|  | 518 | if (hpa) { | 
|  | 519 | unpin_guest_page(vcpu->kvm, vsie_page->gvrd_gpa, hpa); | 
|  | 520 | vsie_page->gvrd_gpa = 0; | 
|  | 521 | scb_s->gvrd = 0; | 
|  | 522 | } | 
|  | 523 |  | 
|  | 524 | hpa = scb_s->riccbd; | 
|  | 525 | if (hpa) { | 
|  | 526 | unpin_guest_page(vcpu->kvm, vsie_page->riccbd_gpa, hpa); | 
|  | 527 | vsie_page->riccbd_gpa = 0; | 
|  | 528 | scb_s->riccbd = 0; | 
|  | 529 | } | 
|  | 530 |  | 
|  | 531 | hpa = scb_s->sdnxo; | 
|  | 532 | if (hpa) { | 
|  | 533 | unpin_guest_page(vcpu->kvm, vsie_page->sdnx_gpa, hpa); | 
|  | 534 | vsie_page->sdnx_gpa = 0; | 
|  | 535 | scb_s->sdnxo = 0; | 
|  | 536 | } | 
|  | 537 | } | 
|  | 538 |  | 
|  | 539 | /* | 
|  | 540 | * Instead of shadowing some blocks, we can simply forward them because the | 
|  | 541 | * addresses in the scb are 64 bit long. | 
|  | 542 | * | 
|  | 543 | * This works as long as the data lies in one page. If blocks ever exceed one | 
|  | 544 | * page, we have to fall back to shadowing. | 
|  | 545 | * | 
|  | 546 | * As we reuse the sca, the vcpu pointers contained in it are invalid. We must | 
|  | 547 | * therefore not enable any facilities that access these pointers (e.g. SIGPIF). | 
|  | 548 | * | 
|  | 549 | * Returns: - 0 if all blocks were pinned. | 
|  | 550 | *          - > 0 if control has to be given to guest 2 | 
|  | 551 | *          - -ENOMEM if out of memory | 
|  | 552 | */ | 
|  | 553 | static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) | 
|  | 554 | { | 
|  | 555 | struct kvm_s390_sie_block *scb_o = vsie_page->scb_o; | 
|  | 556 | struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; | 
|  | 557 | hpa_t hpa; | 
|  | 558 | gpa_t gpa; | 
|  | 559 | int rc = 0; | 
|  | 560 |  | 
|  | 561 | gpa = READ_ONCE(scb_o->scaol) & ~0xfUL; | 
|  | 562 | if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_64BSCAO)) | 
|  | 563 | gpa |= (u64) READ_ONCE(scb_o->scaoh) << 32; | 
|  | 564 | if (gpa) { | 
|  | 565 | if (gpa < 2 * PAGE_SIZE) | 
|  | 566 | rc = set_validity_icpt(scb_s, 0x0038U); | 
|  | 567 | else if ((gpa & ~0x1fffUL) == kvm_s390_get_prefix(vcpu)) | 
|  | 568 | rc = set_validity_icpt(scb_s, 0x0011U); | 
|  | 569 | else if ((gpa & PAGE_MASK) != | 
|  | 570 | ((gpa + sizeof(struct bsca_block) - 1) & PAGE_MASK)) | 
|  | 571 | rc = set_validity_icpt(scb_s, 0x003bU); | 
|  | 572 | if (!rc) { | 
|  | 573 | rc = pin_guest_page(vcpu->kvm, gpa, &hpa); | 
|  | 574 | if (rc) | 
|  | 575 | rc = set_validity_icpt(scb_s, 0x0034U); | 
|  | 576 | } | 
|  | 577 | if (rc) | 
|  | 578 | goto unpin; | 
|  | 579 | vsie_page->sca_gpa = gpa; | 
|  | 580 | scb_s->scaoh = (u32)((u64)hpa >> 32); | 
|  | 581 | scb_s->scaol = (u32)(u64)hpa; | 
|  | 582 | } | 
|  | 583 |  | 
|  | 584 | gpa = READ_ONCE(scb_o->itdba) & ~0xffUL; | 
|  | 585 | if (gpa && (scb_s->ecb & ECB_TE)) { | 
|  | 586 | if (gpa < 2 * PAGE_SIZE) { | 
|  | 587 | rc = set_validity_icpt(scb_s, 0x0080U); | 
|  | 588 | goto unpin; | 
|  | 589 | } | 
|  | 590 | /* 256 bytes cannot cross page boundaries */ | 
|  | 591 | rc = pin_guest_page(vcpu->kvm, gpa, &hpa); | 
|  | 592 | if (rc) { | 
|  | 593 | rc = set_validity_icpt(scb_s, 0x0080U); | 
|  | 594 | goto unpin; | 
|  | 595 | } | 
|  | 596 | vsie_page->itdba_gpa = gpa; | 
|  | 597 | scb_s->itdba = hpa; | 
|  | 598 | } | 
|  | 599 |  | 
|  | 600 | gpa = READ_ONCE(scb_o->gvrd) & ~0x1ffUL; | 
|  | 601 | if (gpa && (scb_s->eca & ECA_VX) && !(scb_s->ecd & ECD_HOSTREGMGMT)) { | 
|  | 602 | if (gpa < 2 * PAGE_SIZE) { | 
|  | 603 | rc = set_validity_icpt(scb_s, 0x1310U); | 
|  | 604 | goto unpin; | 
|  | 605 | } | 
|  | 606 | /* | 
|  | 607 | * 512 bytes vector registers cannot cross page boundaries | 
|  | 608 | * if this block gets bigger, we have to shadow it. | 
|  | 609 | */ | 
|  | 610 | rc = pin_guest_page(vcpu->kvm, gpa, &hpa); | 
|  | 611 | if (rc) { | 
|  | 612 | rc = set_validity_icpt(scb_s, 0x1310U); | 
|  | 613 | goto unpin; | 
|  | 614 | } | 
|  | 615 | vsie_page->gvrd_gpa = gpa; | 
|  | 616 | scb_s->gvrd = hpa; | 
|  | 617 | } | 
|  | 618 |  | 
|  | 619 | gpa = READ_ONCE(scb_o->riccbd) & ~0x3fUL; | 
|  | 620 | if (gpa && (scb_s->ecb3 & ECB3_RI)) { | 
|  | 621 | if (gpa < 2 * PAGE_SIZE) { | 
|  | 622 | rc = set_validity_icpt(scb_s, 0x0043U); | 
|  | 623 | goto unpin; | 
|  | 624 | } | 
|  | 625 | /* 64 bytes cannot cross page boundaries */ | 
|  | 626 | rc = pin_guest_page(vcpu->kvm, gpa, &hpa); | 
|  | 627 | if (rc) { | 
|  | 628 | rc = set_validity_icpt(scb_s, 0x0043U); | 
|  | 629 | goto unpin; | 
|  | 630 | } | 
|  | 631 | /* Validity 0x0044 will be checked by SIE */ | 
|  | 632 | vsie_page->riccbd_gpa = gpa; | 
|  | 633 | scb_s->riccbd = hpa; | 
|  | 634 | } | 
|  | 635 | if (((scb_s->ecb & ECB_GS) && !(scb_s->ecd & ECD_HOSTREGMGMT)) || | 
|  | 636 | (scb_s->ecd & ECD_ETOKENF)) { | 
|  | 637 | unsigned long sdnxc; | 
|  | 638 |  | 
|  | 639 | gpa = READ_ONCE(scb_o->sdnxo) & ~0xfUL; | 
|  | 640 | sdnxc = READ_ONCE(scb_o->sdnxo) & 0xfUL; | 
|  | 641 | if (!gpa || gpa < 2 * PAGE_SIZE) { | 
|  | 642 | rc = set_validity_icpt(scb_s, 0x10b0U); | 
|  | 643 | goto unpin; | 
|  | 644 | } | 
|  | 645 | if (sdnxc < 6 || sdnxc > 12) { | 
|  | 646 | rc = set_validity_icpt(scb_s, 0x10b1U); | 
|  | 647 | goto unpin; | 
|  | 648 | } | 
|  | 649 | if (gpa & ((1 << sdnxc) - 1)) { | 
|  | 650 | rc = set_validity_icpt(scb_s, 0x10b2U); | 
|  | 651 | goto unpin; | 
|  | 652 | } | 
|  | 653 | /* Due to alignment rules (checked above) this cannot | 
|  | 654 | * cross page boundaries | 
|  | 655 | */ | 
|  | 656 | rc = pin_guest_page(vcpu->kvm, gpa, &hpa); | 
|  | 657 | if (rc) { | 
|  | 658 | rc = set_validity_icpt(scb_s, 0x10b0U); | 
|  | 659 | goto unpin; | 
|  | 660 | } | 
|  | 661 | vsie_page->sdnx_gpa = gpa; | 
|  | 662 | scb_s->sdnxo = hpa | sdnxc; | 
|  | 663 | } | 
|  | 664 | return 0; | 
|  | 665 | unpin: | 
|  | 666 | unpin_blocks(vcpu, vsie_page); | 
|  | 667 | return rc; | 
|  | 668 | } | 
|  | 669 |  | 
|  | 670 | /* unpin the scb provided by guest 2, marking it as dirty */ | 
|  | 671 | static void unpin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page, | 
|  | 672 | gpa_t gpa) | 
|  | 673 | { | 
|  | 674 | hpa_t hpa = (hpa_t) vsie_page->scb_o; | 
|  | 675 |  | 
|  | 676 | if (hpa) | 
|  | 677 | unpin_guest_page(vcpu->kvm, gpa, hpa); | 
|  | 678 | vsie_page->scb_o = NULL; | 
|  | 679 | } | 
|  | 680 |  | 
|  | 681 | /* | 
|  | 682 | * Pin the scb at gpa provided by guest 2 at vsie_page->scb_o. | 
|  | 683 | * | 
|  | 684 | * Returns: - 0 if the scb was pinned. | 
|  | 685 | *          - > 0 if control has to be given to guest 2 | 
|  | 686 | */ | 
|  | 687 | static int pin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page, | 
|  | 688 | gpa_t gpa) | 
|  | 689 | { | 
|  | 690 | hpa_t hpa; | 
|  | 691 | int rc; | 
|  | 692 |  | 
|  | 693 | rc = pin_guest_page(vcpu->kvm, gpa, &hpa); | 
|  | 694 | if (rc) { | 
|  | 695 | rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); | 
|  | 696 | WARN_ON_ONCE(rc); | 
|  | 697 | return 1; | 
|  | 698 | } | 
|  | 699 | vsie_page->scb_o = (struct kvm_s390_sie_block *) hpa; | 
|  | 700 | return 0; | 
|  | 701 | } | 
|  | 702 |  | 
|  | 703 | /* | 
|  | 704 | * Inject a fault into guest 2. | 
|  | 705 | * | 
|  | 706 | * Returns: - > 0 if control has to be given to guest 2 | 
|  | 707 | *            < 0 if an error occurred during injection. | 
|  | 708 | */ | 
|  | 709 | static int inject_fault(struct kvm_vcpu *vcpu, __u16 code, __u64 vaddr, | 
|  | 710 | bool write_flag) | 
|  | 711 | { | 
|  | 712 | struct kvm_s390_pgm_info pgm = { | 
|  | 713 | .code = code, | 
|  | 714 | .trans_exc_code = | 
|  | 715 | /* 0-51: virtual address */ | 
|  | 716 | (vaddr & 0xfffffffffffff000UL) | | 
|  | 717 | /* 52-53: store / fetch */ | 
|  | 718 | (((unsigned int) !write_flag) + 1) << 10, | 
|  | 719 | /* 62-63: asce id (alway primary == 0) */ | 
|  | 720 | .exc_access_id = 0, /* always primary */ | 
|  | 721 | .op_access_id = 0, /* not MVPG */ | 
|  | 722 | }; | 
|  | 723 | int rc; | 
|  | 724 |  | 
|  | 725 | if (code == PGM_PROTECTION) | 
|  | 726 | pgm.trans_exc_code |= 0x4UL; | 
|  | 727 |  | 
|  | 728 | rc = kvm_s390_inject_prog_irq(vcpu, &pgm); | 
|  | 729 | return rc ? rc : 1; | 
|  | 730 | } | 
|  | 731 |  | 
|  | 732 | /* | 
|  | 733 | * Handle a fault during vsie execution on a gmap shadow. | 
|  | 734 | * | 
|  | 735 | * Returns: - 0 if the fault was resolved | 
|  | 736 | *          - > 0 if control has to be given to guest 2 | 
|  | 737 | *          - < 0 if an error occurred | 
|  | 738 | */ | 
|  | 739 | static int handle_fault(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) | 
|  | 740 | { | 
|  | 741 | int rc; | 
|  | 742 |  | 
|  | 743 | if (current->thread.gmap_int_code == PGM_PROTECTION) | 
|  | 744 | /* we can directly forward all protection exceptions */ | 
|  | 745 | return inject_fault(vcpu, PGM_PROTECTION, | 
|  | 746 | current->thread.gmap_addr, 1); | 
|  | 747 |  | 
|  | 748 | rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, | 
|  | 749 | current->thread.gmap_addr); | 
|  | 750 | if (rc > 0) { | 
|  | 751 | rc = inject_fault(vcpu, rc, | 
|  | 752 | current->thread.gmap_addr, | 
|  | 753 | current->thread.gmap_write_flag); | 
|  | 754 | if (rc >= 0) | 
|  | 755 | vsie_page->fault_addr = current->thread.gmap_addr; | 
|  | 756 | } | 
|  | 757 | return rc; | 
|  | 758 | } | 
|  | 759 |  | 
|  | 760 | /* | 
|  | 761 | * Retry the previous fault that required guest 2 intervention. This avoids | 
|  | 762 | * one superfluous SIE re-entry and direct exit. | 
|  | 763 | * | 
|  | 764 | * Will ignore any errors. The next SIE fault will do proper fault handling. | 
|  | 765 | */ | 
|  | 766 | static void handle_last_fault(struct kvm_vcpu *vcpu, | 
|  | 767 | struct vsie_page *vsie_page) | 
|  | 768 | { | 
|  | 769 | if (vsie_page->fault_addr) | 
|  | 770 | kvm_s390_shadow_fault(vcpu, vsie_page->gmap, | 
|  | 771 | vsie_page->fault_addr); | 
|  | 772 | vsie_page->fault_addr = 0; | 
|  | 773 | } | 
|  | 774 |  | 
|  | 775 | static inline void clear_vsie_icpt(struct vsie_page *vsie_page) | 
|  | 776 | { | 
|  | 777 | vsie_page->scb_s.icptcode = 0; | 
|  | 778 | } | 
|  | 779 |  | 
|  | 780 | /* rewind the psw and clear the vsie icpt, so we can retry execution */ | 
|  | 781 | static void retry_vsie_icpt(struct vsie_page *vsie_page) | 
|  | 782 | { | 
|  | 783 | struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; | 
|  | 784 | int ilen = insn_length(scb_s->ipa >> 8); | 
|  | 785 |  | 
|  | 786 | /* take care of EXECUTE instructions */ | 
|  | 787 | if (scb_s->icptstatus & 1) { | 
|  | 788 | ilen = (scb_s->icptstatus >> 4) & 0x6; | 
|  | 789 | if (!ilen) | 
|  | 790 | ilen = 4; | 
|  | 791 | } | 
|  | 792 | scb_s->gpsw.addr = __rewind_psw(scb_s->gpsw, ilen); | 
|  | 793 | clear_vsie_icpt(vsie_page); | 
|  | 794 | } | 
|  | 795 |  | 
|  | 796 | /* | 
|  | 797 | * Try to shadow + enable the guest 2 provided facility list. | 
|  | 798 | * Retry instruction execution if enabled for and provided by guest 2. | 
|  | 799 | * | 
|  | 800 | * Returns: - 0 if handled (retry or guest 2 icpt) | 
|  | 801 | *          - > 0 if control has to be given to guest 2 | 
|  | 802 | */ | 
|  | 803 | static int handle_stfle(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) | 
|  | 804 | { | 
|  | 805 | struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; | 
|  | 806 | __u32 fac = READ_ONCE(vsie_page->scb_o->fac) & 0x7ffffff8U; | 
|  | 807 |  | 
|  | 808 | if (fac && test_kvm_facility(vcpu->kvm, 7)) { | 
|  | 809 | retry_vsie_icpt(vsie_page); | 
|  | 810 | if (read_guest_real(vcpu, fac, &vsie_page->fac, | 
|  | 811 | sizeof(vsie_page->fac))) | 
|  | 812 | return set_validity_icpt(scb_s, 0x1090U); | 
|  | 813 | scb_s->fac = (__u32)(__u64) &vsie_page->fac; | 
|  | 814 | } | 
|  | 815 | return 0; | 
|  | 816 | } | 
|  | 817 |  | 
|  | 818 | /* | 
|  | 819 | * Run the vsie on a shadow scb and a shadow gmap, without any further | 
|  | 820 | * sanity checks, handling SIE faults. | 
|  | 821 | * | 
|  | 822 | * Returns: - 0 everything went fine | 
|  | 823 | *          - > 0 if control has to be given to guest 2 | 
|  | 824 | *          - < 0 if an error occurred | 
|  | 825 | */ | 
|  | 826 | static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) | 
|  | 827 | __releases(vcpu->kvm->srcu) | 
|  | 828 | __acquires(vcpu->kvm->srcu) | 
|  | 829 | { | 
|  | 830 | struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; | 
|  | 831 | struct kvm_s390_sie_block *scb_o = vsie_page->scb_o; | 
|  | 832 | int guest_bp_isolation; | 
|  | 833 | int rc; | 
|  | 834 |  | 
|  | 835 | handle_last_fault(vcpu, vsie_page); | 
|  | 836 |  | 
|  | 837 | if (need_resched()) | 
|  | 838 | schedule(); | 
|  | 839 | if (test_cpu_flag(CIF_MCCK_PENDING)) | 
|  | 840 | s390_handle_mcck(); | 
|  | 841 |  | 
|  | 842 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); | 
|  | 843 |  | 
|  | 844 | /* save current guest state of bp isolation override */ | 
|  | 845 | guest_bp_isolation = test_thread_flag(TIF_ISOLATE_BP_GUEST); | 
|  | 846 |  | 
|  | 847 | /* | 
|  | 848 | * The guest is running with BPBC, so we have to force it on for our | 
|  | 849 | * nested guest. This is done by enabling BPBC globally, so the BPBC | 
|  | 850 | * control in the SCB (which the nested guest can modify) is simply | 
|  | 851 | * ignored. | 
|  | 852 | */ | 
|  | 853 | if (test_kvm_facility(vcpu->kvm, 82) && | 
|  | 854 | vcpu->arch.sie_block->fpf & FPF_BPBC) | 
|  | 855 | set_thread_flag(TIF_ISOLATE_BP_GUEST); | 
|  | 856 |  | 
|  | 857 | local_irq_disable(); | 
|  | 858 | guest_enter_irqoff(); | 
|  | 859 | local_irq_enable(); | 
|  | 860 |  | 
|  | 861 | rc = sie64a(scb_s, vcpu->run->s.regs.gprs); | 
|  | 862 |  | 
|  | 863 | local_irq_disable(); | 
|  | 864 | guest_exit_irqoff(); | 
|  | 865 | local_irq_enable(); | 
|  | 866 |  | 
|  | 867 | /* restore guest state for bp isolation override */ | 
|  | 868 | if (!guest_bp_isolation) | 
|  | 869 | clear_thread_flag(TIF_ISOLATE_BP_GUEST); | 
|  | 870 |  | 
|  | 871 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); | 
|  | 872 |  | 
|  | 873 | if (rc == -EINTR) { | 
|  | 874 | VCPU_EVENT(vcpu, 3, "%s", "machine check"); | 
|  | 875 | kvm_s390_reinject_machine_check(vcpu, &vsie_page->mcck_info); | 
|  | 876 | return 0; | 
|  | 877 | } | 
|  | 878 |  | 
|  | 879 | if (rc > 0) | 
|  | 880 | rc = 0; /* we could still have an icpt */ | 
|  | 881 | else if (rc == -EFAULT) | 
|  | 882 | return handle_fault(vcpu, vsie_page); | 
|  | 883 |  | 
|  | 884 | switch (scb_s->icptcode) { | 
|  | 885 | case ICPT_INST: | 
|  | 886 | if (scb_s->ipa == 0xb2b0) | 
|  | 887 | rc = handle_stfle(vcpu, vsie_page); | 
|  | 888 | break; | 
|  | 889 | case ICPT_STOP: | 
|  | 890 | /* stop not requested by g2 - must have been a kick */ | 
|  | 891 | if (!(atomic_read(&scb_o->cpuflags) & CPUSTAT_STOP_INT)) | 
|  | 892 | clear_vsie_icpt(vsie_page); | 
|  | 893 | break; | 
|  | 894 | case ICPT_VALIDITY: | 
|  | 895 | if ((scb_s->ipa & 0xf000) != 0xf000) | 
|  | 896 | scb_s->ipa += 0x1000; | 
|  | 897 | break; | 
|  | 898 | } | 
|  | 899 | return rc; | 
|  | 900 | } | 
|  | 901 |  | 
|  | 902 | static void release_gmap_shadow(struct vsie_page *vsie_page) | 
|  | 903 | { | 
|  | 904 | if (vsie_page->gmap) | 
|  | 905 | gmap_put(vsie_page->gmap); | 
|  | 906 | WRITE_ONCE(vsie_page->gmap, NULL); | 
|  | 907 | prefix_unmapped(vsie_page); | 
|  | 908 | } | 
|  | 909 |  | 
|  | 910 | static int acquire_gmap_shadow(struct kvm_vcpu *vcpu, | 
|  | 911 | struct vsie_page *vsie_page) | 
|  | 912 | { | 
|  | 913 | unsigned long asce; | 
|  | 914 | union ctlreg0 cr0; | 
|  | 915 | struct gmap *gmap; | 
|  | 916 | int edat; | 
|  | 917 |  | 
|  | 918 | asce = vcpu->arch.sie_block->gcr[1]; | 
|  | 919 | cr0.val = vcpu->arch.sie_block->gcr[0]; | 
|  | 920 | edat = cr0.edat && test_kvm_facility(vcpu->kvm, 8); | 
|  | 921 | edat += edat && test_kvm_facility(vcpu->kvm, 78); | 
|  | 922 |  | 
|  | 923 | /* | 
|  | 924 | * ASCE or EDAT could have changed since last icpt, or the gmap | 
|  | 925 | * we're holding has been unshadowed. If the gmap is still valid, | 
|  | 926 | * we can safely reuse it. | 
|  | 927 | */ | 
|  | 928 | if (vsie_page->gmap && gmap_shadow_valid(vsie_page->gmap, asce, edat)) | 
|  | 929 | return 0; | 
|  | 930 |  | 
|  | 931 | /* release the old shadow - if any, and mark the prefix as unmapped */ | 
|  | 932 | release_gmap_shadow(vsie_page); | 
|  | 933 | gmap = gmap_shadow(vcpu->arch.gmap, asce, edat); | 
|  | 934 | if (IS_ERR(gmap)) | 
|  | 935 | return PTR_ERR(gmap); | 
|  | 936 | gmap->private = vcpu->kvm; | 
|  | 937 | WRITE_ONCE(vsie_page->gmap, gmap); | 
|  | 938 | return 0; | 
|  | 939 | } | 
|  | 940 |  | 
|  | 941 | /* | 
|  | 942 | * Register the shadow scb at the VCPU, e.g. for kicking out of vsie. | 
|  | 943 | */ | 
|  | 944 | static void register_shadow_scb(struct kvm_vcpu *vcpu, | 
|  | 945 | struct vsie_page *vsie_page) | 
|  | 946 | { | 
|  | 947 | struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; | 
|  | 948 |  | 
|  | 949 | WRITE_ONCE(vcpu->arch.vsie_block, &vsie_page->scb_s); | 
|  | 950 | /* | 
|  | 951 | * External calls have to lead to a kick of the vcpu and | 
|  | 952 | * therefore the vsie -> Simulate Wait state. | 
|  | 953 | */ | 
|  | 954 | kvm_s390_set_cpuflags(vcpu, CPUSTAT_WAIT); | 
|  | 955 | /* | 
|  | 956 | * We have to adjust the g3 epoch by the g2 epoch. The epoch will | 
|  | 957 | * automatically be adjusted on tod clock changes via kvm_sync_clock. | 
|  | 958 | */ | 
|  | 959 | preempt_disable(); | 
|  | 960 | scb_s->epoch += vcpu->kvm->arch.epoch; | 
|  | 961 |  | 
|  | 962 | if (scb_s->ecd & ECD_MEF) { | 
|  | 963 | scb_s->epdx += vcpu->kvm->arch.epdx; | 
|  | 964 | if (scb_s->epoch < vcpu->kvm->arch.epoch) | 
|  | 965 | scb_s->epdx += 1; | 
|  | 966 | } | 
|  | 967 |  | 
|  | 968 | preempt_enable(); | 
|  | 969 | } | 
|  | 970 |  | 
|  | 971 | /* | 
|  | 972 | * Unregister a shadow scb from a VCPU. | 
|  | 973 | */ | 
|  | 974 | static void unregister_shadow_scb(struct kvm_vcpu *vcpu) | 
|  | 975 | { | 
|  | 976 | kvm_s390_clear_cpuflags(vcpu, CPUSTAT_WAIT); | 
|  | 977 | WRITE_ONCE(vcpu->arch.vsie_block, NULL); | 
|  | 978 | } | 
|  | 979 |  | 
|  | 980 | /* | 
|  | 981 | * Run the vsie on a shadowed scb, managing the gmap shadow, handling | 
|  | 982 | * prefix pages and faults. | 
|  | 983 | * | 
|  | 984 | * Returns: - 0 if no errors occurred | 
|  | 985 | *          - > 0 if control has to be given to guest 2 | 
|  | 986 | *          - -ENOMEM if out of memory | 
|  | 987 | */ | 
|  | 988 | static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) | 
|  | 989 | { | 
|  | 990 | struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; | 
|  | 991 | int rc = 0; | 
|  | 992 |  | 
|  | 993 | while (1) { | 
|  | 994 | rc = acquire_gmap_shadow(vcpu, vsie_page); | 
|  | 995 | if (!rc) | 
|  | 996 | rc = map_prefix(vcpu, vsie_page); | 
|  | 997 | if (!rc) { | 
|  | 998 | gmap_enable(vsie_page->gmap); | 
|  | 999 | update_intervention_requests(vsie_page); | 
|  | 1000 | rc = do_vsie_run(vcpu, vsie_page); | 
|  | 1001 | gmap_enable(vcpu->arch.gmap); | 
|  | 1002 | } | 
|  | 1003 | atomic_andnot(PROG_BLOCK_SIE, &scb_s->prog20); | 
|  | 1004 |  | 
|  | 1005 | if (rc == -EAGAIN) | 
|  | 1006 | rc = 0; | 
|  | 1007 | if (rc || scb_s->icptcode || signal_pending(current) || | 
|  | 1008 | kvm_s390_vcpu_has_irq(vcpu, 0)) | 
|  | 1009 | break; | 
|  | 1010 | } | 
|  | 1011 |  | 
|  | 1012 | if (rc == -EFAULT) { | 
|  | 1013 | /* | 
|  | 1014 | * Addressing exceptions are always presentes as intercepts. | 
|  | 1015 | * As addressing exceptions are suppressing and our guest 3 PSW | 
|  | 1016 | * points at the responsible instruction, we have to | 
|  | 1017 | * forward the PSW and set the ilc. If we can't read guest 3 | 
|  | 1018 | * instruction, we can use an arbitrary ilc. Let's always use | 
|  | 1019 | * ilen = 4 for now, so we can avoid reading in guest 3 virtual | 
|  | 1020 | * memory. (we could also fake the shadow so the hardware | 
|  | 1021 | * handles it). | 
|  | 1022 | */ | 
|  | 1023 | scb_s->icptcode = ICPT_PROGI; | 
|  | 1024 | scb_s->iprcc = PGM_ADDRESSING; | 
|  | 1025 | scb_s->pgmilc = 4; | 
|  | 1026 | scb_s->gpsw.addr = __rewind_psw(scb_s->gpsw, 4); | 
|  | 1027 | } | 
|  | 1028 | return rc; | 
|  | 1029 | } | 
|  | 1030 |  | 
|  | 1031 | /* | 
|  | 1032 | * Get or create a vsie page for a scb address. | 
|  | 1033 | * | 
|  | 1034 | * Returns: - address of a vsie page (cached or new one) | 
|  | 1035 | *          - NULL if the same scb address is already used by another VCPU | 
|  | 1036 | *          - ERR_PTR(-ENOMEM) if out of memory | 
|  | 1037 | */ | 
|  | 1038 | static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr) | 
|  | 1039 | { | 
|  | 1040 | struct vsie_page *vsie_page; | 
|  | 1041 | struct page *page; | 
|  | 1042 | int nr_vcpus; | 
|  | 1043 |  | 
|  | 1044 | rcu_read_lock(); | 
|  | 1045 | page = radix_tree_lookup(&kvm->arch.vsie.addr_to_page, addr >> 9); | 
|  | 1046 | rcu_read_unlock(); | 
|  | 1047 | if (page) { | 
|  | 1048 | if (page_ref_inc_return(page) == 2) | 
|  | 1049 | return page_to_virt(page); | 
|  | 1050 | page_ref_dec(page); | 
|  | 1051 | } | 
|  | 1052 |  | 
|  | 1053 | /* | 
|  | 1054 | * We want at least #online_vcpus shadows, so every VCPU can execute | 
|  | 1055 | * the VSIE in parallel. | 
|  | 1056 | */ | 
|  | 1057 | nr_vcpus = atomic_read(&kvm->online_vcpus); | 
|  | 1058 |  | 
|  | 1059 | mutex_lock(&kvm->arch.vsie.mutex); | 
|  | 1060 | if (kvm->arch.vsie.page_count < nr_vcpus) { | 
|  | 1061 | page = alloc_page(GFP_KERNEL | __GFP_ZERO | GFP_DMA); | 
|  | 1062 | if (!page) { | 
|  | 1063 | mutex_unlock(&kvm->arch.vsie.mutex); | 
|  | 1064 | return ERR_PTR(-ENOMEM); | 
|  | 1065 | } | 
|  | 1066 | page_ref_inc(page); | 
|  | 1067 | kvm->arch.vsie.pages[kvm->arch.vsie.page_count] = page; | 
|  | 1068 | kvm->arch.vsie.page_count++; | 
|  | 1069 | } else { | 
|  | 1070 | /* reuse an existing entry that belongs to nobody */ | 
|  | 1071 | while (true) { | 
|  | 1072 | page = kvm->arch.vsie.pages[kvm->arch.vsie.next]; | 
|  | 1073 | if (page_ref_inc_return(page) == 2) | 
|  | 1074 | break; | 
|  | 1075 | page_ref_dec(page); | 
|  | 1076 | kvm->arch.vsie.next++; | 
|  | 1077 | kvm->arch.vsie.next %= nr_vcpus; | 
|  | 1078 | } | 
|  | 1079 | radix_tree_delete(&kvm->arch.vsie.addr_to_page, page->index >> 9); | 
|  | 1080 | } | 
|  | 1081 | page->index = addr; | 
|  | 1082 | /* double use of the same address */ | 
|  | 1083 | if (radix_tree_insert(&kvm->arch.vsie.addr_to_page, addr >> 9, page)) { | 
|  | 1084 | page_ref_dec(page); | 
|  | 1085 | mutex_unlock(&kvm->arch.vsie.mutex); | 
|  | 1086 | return NULL; | 
|  | 1087 | } | 
|  | 1088 | mutex_unlock(&kvm->arch.vsie.mutex); | 
|  | 1089 |  | 
|  | 1090 | vsie_page = page_to_virt(page); | 
|  | 1091 | memset(&vsie_page->scb_s, 0, sizeof(struct kvm_s390_sie_block)); | 
|  | 1092 | release_gmap_shadow(vsie_page); | 
|  | 1093 | vsie_page->fault_addr = 0; | 
|  | 1094 | vsie_page->scb_s.ihcpu = 0xffffU; | 
|  | 1095 | return vsie_page; | 
|  | 1096 | } | 
|  | 1097 |  | 
|  | 1098 | /* put a vsie page acquired via get_vsie_page */ | 
|  | 1099 | static void put_vsie_page(struct kvm *kvm, struct vsie_page *vsie_page) | 
|  | 1100 | { | 
|  | 1101 | struct page *page = pfn_to_page(__pa(vsie_page) >> PAGE_SHIFT); | 
|  | 1102 |  | 
|  | 1103 | page_ref_dec(page); | 
|  | 1104 | } | 
|  | 1105 |  | 
|  | 1106 | int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu) | 
|  | 1107 | { | 
|  | 1108 | struct vsie_page *vsie_page; | 
|  | 1109 | unsigned long scb_addr; | 
|  | 1110 | int rc; | 
|  | 1111 |  | 
|  | 1112 | vcpu->stat.instruction_sie++; | 
|  | 1113 | if (!test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_SIEF2)) | 
|  | 1114 | return -EOPNOTSUPP; | 
|  | 1115 | if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) | 
|  | 1116 | return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); | 
|  | 1117 |  | 
|  | 1118 | BUILD_BUG_ON(sizeof(struct vsie_page) != PAGE_SIZE); | 
|  | 1119 | scb_addr = kvm_s390_get_base_disp_s(vcpu, NULL); | 
|  | 1120 |  | 
|  | 1121 | /* 512 byte alignment */ | 
|  | 1122 | if (unlikely(scb_addr & 0x1ffUL)) | 
|  | 1123 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | 
|  | 1124 |  | 
|  | 1125 | if (signal_pending(current) || kvm_s390_vcpu_has_irq(vcpu, 0)) | 
|  | 1126 | return 0; | 
|  | 1127 |  | 
|  | 1128 | vsie_page = get_vsie_page(vcpu->kvm, scb_addr); | 
|  | 1129 | if (IS_ERR(vsie_page)) | 
|  | 1130 | return PTR_ERR(vsie_page); | 
|  | 1131 | else if (!vsie_page) | 
|  | 1132 | /* double use of sie control block - simply do nothing */ | 
|  | 1133 | return 0; | 
|  | 1134 |  | 
|  | 1135 | rc = pin_scb(vcpu, vsie_page, scb_addr); | 
|  | 1136 | if (rc) | 
|  | 1137 | goto out_put; | 
|  | 1138 | rc = shadow_scb(vcpu, vsie_page); | 
|  | 1139 | if (rc) | 
|  | 1140 | goto out_unpin_scb; | 
|  | 1141 | rc = pin_blocks(vcpu, vsie_page); | 
|  | 1142 | if (rc) | 
|  | 1143 | goto out_unshadow; | 
|  | 1144 | register_shadow_scb(vcpu, vsie_page); | 
|  | 1145 | rc = vsie_run(vcpu, vsie_page); | 
|  | 1146 | unregister_shadow_scb(vcpu); | 
|  | 1147 | unpin_blocks(vcpu, vsie_page); | 
|  | 1148 | out_unshadow: | 
|  | 1149 | unshadow_scb(vcpu, vsie_page); | 
|  | 1150 | out_unpin_scb: | 
|  | 1151 | unpin_scb(vcpu, vsie_page, scb_addr); | 
|  | 1152 | out_put: | 
|  | 1153 | put_vsie_page(vcpu->kvm, vsie_page); | 
|  | 1154 |  | 
|  | 1155 | return rc < 0 ? rc : 0; | 
|  | 1156 | } | 
|  | 1157 |  | 
|  | 1158 | /* Init the vsie data structures. To be called when a vm is initialized. */ | 
|  | 1159 | void kvm_s390_vsie_init(struct kvm *kvm) | 
|  | 1160 | { | 
|  | 1161 | mutex_init(&kvm->arch.vsie.mutex); | 
|  | 1162 | INIT_RADIX_TREE(&kvm->arch.vsie.addr_to_page, GFP_KERNEL); | 
|  | 1163 | } | 
|  | 1164 |  | 
|  | 1165 | /* Destroy the vsie data structures. To be called when a vm is destroyed. */ | 
|  | 1166 | void kvm_s390_vsie_destroy(struct kvm *kvm) | 
|  | 1167 | { | 
|  | 1168 | struct vsie_page *vsie_page; | 
|  | 1169 | struct page *page; | 
|  | 1170 | int i; | 
|  | 1171 |  | 
|  | 1172 | mutex_lock(&kvm->arch.vsie.mutex); | 
|  | 1173 | for (i = 0; i < kvm->arch.vsie.page_count; i++) { | 
|  | 1174 | page = kvm->arch.vsie.pages[i]; | 
|  | 1175 | kvm->arch.vsie.pages[i] = NULL; | 
|  | 1176 | vsie_page = page_to_virt(page); | 
|  | 1177 | release_gmap_shadow(vsie_page); | 
|  | 1178 | /* free the radix tree entry */ | 
|  | 1179 | radix_tree_delete(&kvm->arch.vsie.addr_to_page, page->index >> 9); | 
|  | 1180 | __free_page(page); | 
|  | 1181 | } | 
|  | 1182 | kvm->arch.vsie.page_count = 0; | 
|  | 1183 | mutex_unlock(&kvm->arch.vsie.mutex); | 
|  | 1184 | } | 
|  | 1185 |  | 
|  | 1186 | void kvm_s390_vsie_kick(struct kvm_vcpu *vcpu) | 
|  | 1187 | { | 
|  | 1188 | struct kvm_s390_sie_block *scb = READ_ONCE(vcpu->arch.vsie_block); | 
|  | 1189 |  | 
|  | 1190 | /* | 
|  | 1191 | * Even if the VCPU lets go of the shadow sie block reference, it is | 
|  | 1192 | * still valid in the cache. So we can safely kick it. | 
|  | 1193 | */ | 
|  | 1194 | if (scb) { | 
|  | 1195 | atomic_or(PROG_BLOCK_SIE, &scb->prog20); | 
|  | 1196 | if (scb->prog0c & PROG_IN_SIE) | 
|  | 1197 | atomic_or(CPUSTAT_STOP_INT, &scb->cpuflags); | 
|  | 1198 | } | 
|  | 1199 | } |