Blame - marvell/linux/arch/arm64/kvm/reset.c - T108

blob: a3105ae464be1bc584f324b10549ce7189c31b9f [file] [log] [blame]

b.liu	e958203	2025-04-17 19:18:16 +0800	[diff] [blame^]	1	// SPDX-License-Identifier: GPL-2.0-only
				2	/*
				3	* Copyright (C) 2012,2013 - ARM Ltd
				4	* Author: Marc Zyngier <marc.zyngier@arm.com>
				5	*
				6	* Derived from arch/arm/kvm/reset.c
				7	* Copyright (C) 2012 - Virtual Open Systems and Columbia University
				8	* Author: Christoffer Dall <c.dall@virtualopensystems.com>
				9	*/
				10
				11	#include <linux/errno.h>
				12	#include <linux/kernel.h>
				13	#include <linux/kvm_host.h>
				14	#include <linux/kvm.h>
				15	#include <linux/hw_breakpoint.h>
				16	#include <linux/slab.h>
				17	#include <linux/string.h>
				18	#include <linux/types.h>
				19
				20	#include <kvm/arm_arch_timer.h>
				21
				22	#include <asm/cpufeature.h>
				23	#include <asm/cputype.h>
				24	#include <asm/fpsimd.h>
				25	#include <asm/ptrace.h>
				26	#include <asm/kvm_arm.h>
				27	#include <asm/kvm_asm.h>
				28	#include <asm/kvm_coproc.h>
				29	#include <asm/kvm_emulate.h>
				30	#include <asm/kvm_mmu.h>
				31	#include <asm/virt.h>
				32
				33	/* Maximum phys_shift supported for any VM on this host */
				34	static u32 kvm_ipa_limit;
				35
				36	/*
				37	* ARMv8 Reset Values
				38	*/
				39	static const struct kvm_regs default_regs_reset = {
				40	.regs.pstate = (PSR_MODE_EL1h \| PSR_A_BIT \| PSR_I_BIT \|
				41	PSR_F_BIT \| PSR_D_BIT),
				42	};
				43
				44	static const struct kvm_regs default_regs_reset32 = {
				45	.regs.pstate = (PSR_AA32_MODE_SVC \| PSR_AA32_A_BIT \|
				46	PSR_AA32_I_BIT \| PSR_AA32_F_BIT),
				47	};
				48
				49	static bool cpu_has_32bit_el1(void)
				50	{
				51	u64 pfr0;
				52
				53	pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
				54	return !!(pfr0 & 0x20);
				55	}
				56
				57	/**
				58	* kvm_arch_vm_ioctl_check_extension
				59	*
				60	* We currently assume that the number of HW registers is uniform
				61	* across all CPUs (see cpuinfo_sanity_check).
				62	*/
				63	int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext)
				64	{
				65	int r;
				66
				67	switch (ext) {
				68	case KVM_CAP_ARM_EL1_32BIT:
				69	r = cpu_has_32bit_el1();
				70	break;
				71	case KVM_CAP_GUEST_DEBUG_HW_BPS:
				72	r = get_num_brps();
				73	break;
				74	case KVM_CAP_GUEST_DEBUG_HW_WPS:
				75	r = get_num_wrps();
				76	break;
				77	case KVM_CAP_ARM_PMU_V3:
				78	r = kvm_arm_support_pmu_v3();
				79	break;
				80	case KVM_CAP_ARM_INJECT_SERROR_ESR:
				81	r = cpus_have_const_cap(ARM64_HAS_RAS_EXTN);
				82	break;
				83	case KVM_CAP_SET_GUEST_DEBUG:
				84	case KVM_CAP_VCPU_ATTRIBUTES:
				85	r = 1;
				86	break;
				87	case KVM_CAP_ARM_VM_IPA_SIZE:
				88	r = kvm_ipa_limit;
				89	break;
				90	case KVM_CAP_ARM_SVE:
				91	r = system_supports_sve();
				92	break;
				93	case KVM_CAP_ARM_PTRAUTH_ADDRESS:
				94	case KVM_CAP_ARM_PTRAUTH_GENERIC:
				95	r = has_vhe() && system_supports_address_auth() &&
				96	system_supports_generic_auth();
				97	break;
				98	default:
				99	r = 0;
				100	}
				101
				102	return r;
				103	}
				104
				105	unsigned int kvm_sve_max_vl;
				106
				107	int kvm_arm_init_sve(void)
				108	{
				109	if (system_supports_sve()) {
				110	kvm_sve_max_vl = sve_max_virtualisable_vl;
				111
				112	/*
				113	* The get_sve_reg()/set_sve_reg() ioctl interface will need
				114	* to be extended with multiple register slice support in
				115	* order to support vector lengths greater than
				116	* SVE_VL_ARCH_MAX:
				117	*/
				118	if (WARN_ON(kvm_sve_max_vl > SVE_VL_ARCH_MAX))
				119	kvm_sve_max_vl = SVE_VL_ARCH_MAX;
				120
				121	/*
				122	* Don't even try to make use of vector lengths that
				123	* aren't available on all CPUs, for now:
				124	*/
				125	if (kvm_sve_max_vl < sve_max_vl)
				126	pr_warn("KVM: SVE vector length for guests limited to %u bytes\n",
				127	kvm_sve_max_vl);
				128	}
				129
				130	return 0;
				131	}
				132
				133	static int kvm_vcpu_enable_sve(struct kvm_vcpu *vcpu)
				134	{
				135	if (!system_supports_sve())
				136	return -EINVAL;
				137
				138	/* Verify that KVM startup enforced this when SVE was detected: */
				139	if (WARN_ON(!has_vhe()))
				140	return -EINVAL;
				141
				142	vcpu->arch.sve_max_vl = kvm_sve_max_vl;
				143
				144	/*
				145	* Userspace can still customize the vector lengths by writing
				146	* KVM_REG_ARM64_SVE_VLS. Allocation is deferred until
				147	* kvm_arm_vcpu_finalize(), which freezes the configuration.
				148	*/
				149	vcpu->arch.flags \|= KVM_ARM64_GUEST_HAS_SVE;
				150
				151	return 0;
				152	}
				153
				154	/*
				155	* Finalize vcpu's maximum SVE vector length, allocating
				156	* vcpu->arch.sve_state as necessary.
				157	*/
				158	static int kvm_vcpu_finalize_sve(struct kvm_vcpu *vcpu)
				159	{
				160	void *buf;
				161	unsigned int vl;
				162
				163	vl = vcpu->arch.sve_max_vl;
				164
				165	/*
				166	* Resposibility for these properties is shared between
				167	* kvm_arm_init_arch_resources(), kvm_vcpu_enable_sve() and
				168	* set_sve_vls(). Double-check here just to be sure:
				169	*/
				170	if (WARN_ON(!sve_vl_valid(vl) \|\| vl > sve_max_virtualisable_vl \|\|
				171	vl > SVE_VL_ARCH_MAX))
				172	return -EIO;
				173
				174	buf = kzalloc(SVE_SIG_REGS_SIZE(sve_vq_from_vl(vl)), GFP_KERNEL);
				175	if (!buf)
				176	return -ENOMEM;
				177
				178	vcpu->arch.sve_state = buf;
				179	vcpu->arch.flags \|= KVM_ARM64_VCPU_SVE_FINALIZED;
				180	return 0;
				181	}
				182
				183	int kvm_arm_vcpu_finalize(struct kvm_vcpu *vcpu, int feature)
				184	{
				185	switch (feature) {
				186	case KVM_ARM_VCPU_SVE:
				187	if (!vcpu_has_sve(vcpu))
				188	return -EINVAL;
				189
				190	if (kvm_arm_vcpu_sve_finalized(vcpu))
				191	return -EPERM;
				192
				193	return kvm_vcpu_finalize_sve(vcpu);
				194	}
				195
				196	return -EINVAL;
				197	}
				198
				199	bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu)
				200	{
				201	if (vcpu_has_sve(vcpu) && !kvm_arm_vcpu_sve_finalized(vcpu))
				202	return false;
				203
				204	return true;
				205	}
				206
				207	void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
				208	{
				209	kfree(vcpu->arch.sve_state);
				210	}
				211
				212	static void kvm_vcpu_reset_sve(struct kvm_vcpu *vcpu)
				213	{
				214	if (vcpu_has_sve(vcpu))
				215	memset(vcpu->arch.sve_state, 0, vcpu_sve_state_size(vcpu));
				216	}
				217
				218	static int kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu)
				219	{
				220	/* Support ptrauth only if the system supports these capabilities. */
				221	if (!has_vhe())
				222	return -EINVAL;
				223
				224	if (!system_supports_address_auth() \|\|
				225	!system_supports_generic_auth())
				226	return -EINVAL;
				227	/*
				228	* For now make sure that both address/generic pointer authentication
				229	* features are requested by the userspace together.
				230	*/
				231	if (!test_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, vcpu->arch.features) \|\|
				232	!test_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, vcpu->arch.features))
				233	return -EINVAL;
				234
				235	vcpu->arch.flags \|= KVM_ARM64_GUEST_HAS_PTRAUTH;
				236	return 0;
				237	}
				238
				239	/**
				240	* kvm_reset_vcpu - sets core registers and sys_regs to reset value
				241	* @vcpu: The VCPU pointer
				242	*
				243	* This function finds the right table above and sets the registers on
				244	* the virtual CPU struct to their architecturally defined reset
				245	* values, except for registers whose reset is deferred until
				246	* kvm_arm_vcpu_finalize().
				247	*
				248	* Note: This function can be called from two paths: The KVM_ARM_VCPU_INIT
				249	* ioctl or as part of handling a request issued by another VCPU in the PSCI
				250	* handling code. In the first case, the VCPU will not be loaded, and in the
				251	* second case the VCPU will be loaded. Because this function operates purely
				252	* on the memory-backed valus of system registers, we want to do a full put if
				253	* we were loaded (handling a request) and load the values back at the end of
				254	* the function. Otherwise we leave the state alone. In both cases, we
				255	* disable preemption around the vcpu reset as we would otherwise race with
				256	* preempt notifiers which also call put/load.
				257	*/
				258	int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
				259	{
				260	const struct kvm_regs *cpu_reset;
				261	int ret;
				262	bool loaded;
				263
				264	/* Reset PMU outside of the non-preemptible section */
				265	kvm_pmu_vcpu_reset(vcpu);
				266
				267	preempt_disable();
				268	loaded = (vcpu->cpu != -1);
				269	if (loaded)
				270	kvm_arch_vcpu_put(vcpu);
				271
				272	if (!kvm_arm_vcpu_sve_finalized(vcpu)) {
				273	if (test_bit(KVM_ARM_VCPU_SVE, vcpu->arch.features)) {
				274	ret = kvm_vcpu_enable_sve(vcpu);
				275	if (ret)
				276	goto out;
				277	}
				278	} else {
				279	kvm_vcpu_reset_sve(vcpu);
				280	}
				281
				282	if (test_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, vcpu->arch.features) \|\|
				283	test_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, vcpu->arch.features)) {
				284	if (kvm_vcpu_enable_ptrauth(vcpu)) {
				285	ret = -EINVAL;
				286	goto out;
				287	}
				288	}
				289
				290	switch (vcpu->arch.target) {
				291	default:
				292	if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) {
				293	if (!cpu_has_32bit_el1()) {
				294	ret = -EINVAL;
				295	goto out;
				296	}
				297	cpu_reset = &default_regs_reset32;
				298	} else {
				299	cpu_reset = &default_regs_reset;
				300	}
				301
				302	break;
				303	}
				304
				305	/* Reset core registers */
				306	memcpy(vcpu_gp_regs(vcpu), cpu_reset, sizeof(*cpu_reset));
				307
				308	/* Reset system registers */
				309	kvm_reset_sys_regs(vcpu);
				310
				311	/*
				312	* Additional reset state handling that PSCI may have imposed on us.
				313	* Must be done after all the sys_reg reset.
				314	*/
				315	if (vcpu->arch.reset_state.reset) {
				316	unsigned long target_pc = vcpu->arch.reset_state.pc;
				317
				318	/* Gracefully handle Thumb2 entry point */
				319	if (vcpu_mode_is_32bit(vcpu) && (target_pc & 1)) {
				320	target_pc &= ~1UL;
				321	vcpu_set_thumb(vcpu);
				322	}
				323
				324	/* Propagate caller endianness */
				325	if (vcpu->arch.reset_state.be)
				326	kvm_vcpu_set_be(vcpu);
				327
				328	*vcpu_pc(vcpu) = target_pc;
				329	vcpu_set_reg(vcpu, 0, vcpu->arch.reset_state.r0);
				330
				331	vcpu->arch.reset_state.reset = false;
				332	}
				333
				334	/* Default workaround setup is enabled (if supported) */
				335	if (kvm_arm_have_ssbd() == KVM_SSBD_KERNEL)
				336	vcpu->arch.workaround_flags \|= VCPU_WORKAROUND_2_FLAG;
				337
				338	/* Reset timer */
				339	ret = kvm_timer_vcpu_reset(vcpu);
				340	out:
				341	if (loaded)
				342	kvm_arch_vcpu_load(vcpu, smp_processor_id());
				343	preempt_enable();
				344	return ret;
				345	}
				346
				347	void kvm_set_ipa_limit(void)
				348	{
				349	unsigned int ipa_max, pa_max, va_max, parange;
				350
				351	parange = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1) & 0x7;
				352	pa_max = id_aa64mmfr0_parange_to_phys_shift(parange);
				353
				354	/* Clamp the IPA limit to the PA size supported by the kernel */
				355	ipa_max = (pa_max > PHYS_MASK_SHIFT) ? PHYS_MASK_SHIFT : pa_max;
				356	/*
				357	* Since our stage2 table is dependent on the stage1 page table code,
				358	* we must always honor the following condition:
				359	*
				360	* Number of levels in Stage1 >= Number of levels in Stage2.
				361	*
				362	* So clamp the ipa limit further down to limit the number of levels.
				363	* Since we can concatenate upto 16 tables at entry level, we could
				364	* go upto 4bits above the maximum VA addressible with the current
				365	* number of levels.
				366	*/
				367	va_max = PGDIR_SHIFT + PAGE_SHIFT - 3;
				368	va_max += 4;
				369
				370	if (va_max < ipa_max)
				371	ipa_max = va_max;
				372
				373	/*
				374	* If the final limit is lower than the real physical address
				375	* limit of the CPUs, report the reason.
				376	*/
				377	if (ipa_max < pa_max)
				378	pr_info("kvm: Limiting the IPA size due to kernel %s Address limit\n",
				379	(va_max < pa_max) ? "Virtual" : "Physical");
				380
				381	kvm_ipa_limit = ipa_max;
				382	kvm_info("IPA Size Limit: %d bits%s\n", kvm_ipa_limit,
				383	((kvm_ipa_limit < KVM_PHYS_SHIFT) ?
				384	" (Reduced IPA size, limited VM/VMM compatibility)" : ""));
				385	}
				386
				387	/*
				388	* Configure the VTCR_EL2 for this VM. The VTCR value is common
				389	* across all the physical CPUs on the system. We use system wide
				390	* sanitised values to fill in different fields, except for Hardware
				391	* Management of Access Flags. HA Flag is set unconditionally on
				392	* all CPUs, as it is safe to run with or without the feature and
				393	* the bit is RES0 on CPUs that don't support it.
				394	*/
				395	int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type)
				396	{
				397	u64 vtcr = VTCR_EL2_FLAGS;
				398	u32 parange, phys_shift;
				399	u8 lvls;
				400
				401	if (type & ~KVM_VM_TYPE_ARM_IPA_SIZE_MASK)
				402	return -EINVAL;
				403
				404	phys_shift = KVM_VM_TYPE_ARM_IPA_SIZE(type);
				405	if (phys_shift) {
				406	if (phys_shift > kvm_ipa_limit \|\|
				407	phys_shift < 32)
				408	return -EINVAL;
				409	} else {
				410	phys_shift = KVM_PHYS_SHIFT;
				411	if (phys_shift > kvm_ipa_limit) {
				412	pr_warn_once("%s using unsupported default IPA limit, upgrade your VMM\n",
				413	current->comm);
				414	return -EINVAL;
				415	}
				416	}
				417
				418	parange = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1) & 7;
				419	if (parange > ID_AA64MMFR0_PARANGE_MAX)
				420	parange = ID_AA64MMFR0_PARANGE_MAX;
				421	vtcr \|= parange << VTCR_EL2_PS_SHIFT;
				422
				423	vtcr \|= VTCR_EL2_T0SZ(phys_shift);
				424	/*
				425	* Use a minimum 2 level page table to prevent splitting
				426	* host PMD huge pages at stage2.
				427	*/
				428	lvls = stage2_pgtable_levels(phys_shift);
				429	if (lvls < 2)
				430	lvls = 2;
				431	vtcr \|= VTCR_EL2_LVLS_TO_SL0(lvls);
				432
				433	/*
				434	* Enable the Hardware Access Flag management, unconditionally
				435	* on all CPUs. The features is RES0 on CPUs without the support
				436	* and must be ignored by the CPUs.
				437	*/
				438	vtcr \|= VTCR_EL2_HA;
				439
				440	/* Set the vmid bits */
				441	vtcr \|= (kvm_get_vmid_bits() == 16) ?
				442	VTCR_EL2_VS_16BIT :
				443	VTCR_EL2_VS_8BIT;
				444	kvm->arch.vtcr = vtcr;
				445	return 0;
				446	}