Blame - src/kernel/linux/v4.14/arch/s390/kvm/kvm-s390.c - T103

blob: 46fee3f4deddac150549dc217a04f82fb935edef [file] [log] [blame]

rjw	1f88458	2022-01-06 17:20:42 +0800	[diff] [blame^]	1	/*
				2	* hosting zSeries kernel virtual machines
				3	*
				4	* Copyright IBM Corp. 2008, 2009
				5	*
				6	* This program is free software; you can redistribute it and/or modify
				7	* it under the terms of the GNU General Public License (version 2 only)
				8	* as published by the Free Software Foundation.
				9	*
				10	* Author(s): Carsten Otte <cotte@de.ibm.com>
				11	* Christian Borntraeger <borntraeger@de.ibm.com>
				12	* Heiko Carstens <heiko.carstens@de.ibm.com>
				13	* Christian Ehrhardt <ehrhardt@de.ibm.com>
				14	* Jason J. Herne <jjherne@us.ibm.com>
				15	*/
				16
				17	#include <linux/compiler.h>
				18	#include <linux/err.h>
				19	#include <linux/fs.h>
				20	#include <linux/hrtimer.h>
				21	#include <linux/init.h>
				22	#include <linux/kvm.h>
				23	#include <linux/kvm_host.h>
				24	#include <linux/mman.h>
				25	#include <linux/module.h>
				26	#include <linux/moduleparam.h>
				27	#include <linux/random.h>
				28	#include <linux/slab.h>
				29	#include <linux/timer.h>
				30	#include <linux/vmalloc.h>
				31	#include <linux/bitmap.h>
				32	#include <linux/sched/signal.h>
				33	#include <linux/string.h>
				34
				35	#include <asm/asm-offsets.h>
				36	#include <asm/lowcore.h>
				37	#include <asm/stp.h>
				38	#include <asm/pgtable.h>
				39	#include <asm/gmap.h>
				40	#include <asm/nmi.h>
				41	#include <asm/switch_to.h>
				42	#include <asm/isc.h>
				43	#include <asm/sclp.h>
				44	#include <asm/cpacf.h>
				45	#include <asm/timex.h>
				46	#include "kvm-s390.h"
				47	#include "gaccess.h"
				48
				49	#define KMSG_COMPONENT "kvm-s390"
				50	#undef pr_fmt
				51	#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
				52
				53	#define CREATE_TRACE_POINTS
				54	#include "trace.h"
				55	#include "trace-s390.h"
				56
				57	#define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
				58	#define LOCAL_IRQS 32
				59	#define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
				60	(KVM_MAX_VCPUS + LOCAL_IRQS))
				61
				62	#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
				63
				64	struct kvm_stats_debugfs_item debugfs_entries[] = {
				65	{ "userspace_handled", VCPU_STAT(exit_userspace) },
				66	{ "exit_null", VCPU_STAT(exit_null) },
				67	{ "exit_validity", VCPU_STAT(exit_validity) },
				68	{ "exit_stop_request", VCPU_STAT(exit_stop_request) },
				69	{ "exit_external_request", VCPU_STAT(exit_external_request) },
				70	{ "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
				71	{ "exit_instruction", VCPU_STAT(exit_instruction) },
				72	{ "exit_pei", VCPU_STAT(exit_pei) },
				73	{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
				74	{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
				75	{ "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
				76	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
				77	{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
				78	{ "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
				79	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
				80	{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
				81	{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
				82	{ "instruction_stctl", VCPU_STAT(instruction_stctl) },
				83	{ "instruction_stctg", VCPU_STAT(instruction_stctg) },
				84	{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
				85	{ "deliver_external_call", VCPU_STAT(deliver_external_call) },
				86	{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
				87	{ "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
				88	{ "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
				89	{ "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
				90	{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
				91	{ "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
				92	{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
				93	{ "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
				94	{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
				95	{ "instruction_spx", VCPU_STAT(instruction_spx) },
				96	{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
				97	{ "instruction_stap", VCPU_STAT(instruction_stap) },
				98	{ "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
				99	{ "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
				100	{ "instruction_stsch", VCPU_STAT(instruction_stsch) },
				101	{ "instruction_chsc", VCPU_STAT(instruction_chsc) },
				102	{ "instruction_essa", VCPU_STAT(instruction_essa) },
				103	{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
				104	{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
				105	{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
				106	{ "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
				107	{ "instruction_sie", VCPU_STAT(instruction_sie) },
				108	{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
				109	{ "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
				110	{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
				111	{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
				112	{ "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
				113	{ "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
				114	{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
				115	{ "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
				116	{ "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
				117	{ "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
				118	{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
				119	{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
				120	{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
				121	{ "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
				122	{ "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
				123	{ "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
				124	{ "diagnose_10", VCPU_STAT(diagnose_10) },
				125	{ "diagnose_44", VCPU_STAT(diagnose_44) },
				126	{ "diagnose_9c", VCPU_STAT(diagnose_9c) },
				127	{ "diagnose_258", VCPU_STAT(diagnose_258) },
				128	{ "diagnose_308", VCPU_STAT(diagnose_308) },
				129	{ "diagnose_500", VCPU_STAT(diagnose_500) },
				130	{ NULL }
				131	};
				132
				133	struct kvm_s390_tod_clock_ext {
				134	__u8 epoch_idx;
				135	__u64 tod;
				136	__u8 reserved[7];
				137	} __packed;
				138
				139	/* allow nested virtualization in KVM (if enabled by user space) */
				140	static int nested;
				141	module_param(nested, int, S_IRUGO);
				142	MODULE_PARM_DESC(nested, "Nested virtualization support");
				143
				144	/* upper facilities limit for kvm */
				145	unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
				146
				147	unsigned long kvm_s390_fac_list_mask_size(void)
				148	{
				149	BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
				150	return ARRAY_SIZE(kvm_s390_fac_list_mask);
				151	}
				152
				153	/* available cpu features supported by kvm */
				154	static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
				155	/* available subfunctions indicated via query / "test bit" */
				156	static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
				157
				158	static struct gmap_notifier gmap_notifier;
				159	static struct gmap_notifier vsie_gmap_notifier;
				160	debug_info_t *kvm_s390_dbf;
				161
				162	/* Section: not file related */
				163	int kvm_arch_hardware_enable(void)
				164	{
				165	/* every s390 is virtualization enabled ;-) */
				166	return 0;
				167	}
				168
				169	static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
				170	unsigned long end);
				171
				172	static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
				173	{
				174	u8 delta_idx = 0;
				175
				176	/*
				177	* The TOD jumps by delta, we have to compensate this by adding
				178	* -delta to the epoch.
				179	*/
				180	delta = -delta;
				181
				182	/* sign-extension - we're adding to signed values below */
				183	if ((s64)delta < 0)
				184	delta_idx = -1;
				185
				186	scb->epoch += delta;
				187	if (scb->ecd & ECD_MEF) {
				188	scb->epdx += delta_idx;
				189	if (scb->epoch < delta)
				190	scb->epdx += 1;
				191	}
				192	}
				193
				194	/*
				195	* This callback is executed during stop_machine(). All CPUs are therefore
				196	* temporarily stopped. In order not to change guest behavior, we have to
				197	* disable preemption whenever we touch the epoch of kvm and the VCPUs,
				198	* so a CPU won't be stopped while calculating with the epoch.
				199	*/
				200	static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
				201	void *v)
				202	{
				203	struct kvm *kvm;
				204	struct kvm_vcpu *vcpu;
				205	int i;
				206	unsigned long long *delta = v;
				207
				208	list_for_each_entry(kvm, &vm_list, vm_list) {
				209	kvm_for_each_vcpu(i, vcpu, kvm) {
				210	kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
				211	if (i == 0) {
				212	kvm->arch.epoch = vcpu->arch.sie_block->epoch;
				213	kvm->arch.epdx = vcpu->arch.sie_block->epdx;
				214	}
				215	if (vcpu->arch.cputm_enabled)
				216	vcpu->arch.cputm_start += *delta;
				217	if (vcpu->arch.vsie_block)
				218	kvm_clock_sync_scb(vcpu->arch.vsie_block,
				219	*delta);
				220	}
				221	}
				222	return NOTIFY_OK;
				223	}
				224
				225	static struct notifier_block kvm_clock_notifier = {
				226	.notifier_call = kvm_clock_sync,
				227	};
				228
				229	int kvm_arch_hardware_setup(void)
				230	{
				231	gmap_notifier.notifier_call = kvm_gmap_notifier;
				232	gmap_register_pte_notifier(&gmap_notifier);
				233	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
				234	gmap_register_pte_notifier(&vsie_gmap_notifier);
				235	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
				236	&kvm_clock_notifier);
				237	return 0;
				238	}
				239
				240	void kvm_arch_hardware_unsetup(void)
				241	{
				242	gmap_unregister_pte_notifier(&gmap_notifier);
				243	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
				244	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
				245	&kvm_clock_notifier);
				246	}
				247
				248	static void allow_cpu_feat(unsigned long nr)
				249	{
				250	set_bit_inv(nr, kvm_s390_available_cpu_feat);
				251	}
				252
				253	static inline int plo_test_bit(unsigned char nr)
				254	{
				255	register unsigned long r0 asm("0") = (unsigned long) nr \| 0x100;
				256	int cc;
				257
				258	asm volatile(
				259	/* Parameter registers are ignored for "test bit" */
				260	" plo 0,0,0,0(0)\n"
				261	" ipm %0\n"
				262	" srl %0,28\n"
				263	: "=d" (cc)
				264	: "d" (r0)
				265	: "cc");
				266	return cc == 0;
				267	}
				268
				269	static void kvm_s390_cpu_feat_init(void)
				270	{
				271	int i;
				272
				273	for (i = 0; i < 256; ++i) {
				274	if (plo_test_bit(i))
				275	kvm_s390_available_subfunc.plo[i >> 3] \|= 0x80 >> (i & 7);
				276	}
				277
				278	if (test_facility(28)) /* TOD-clock steering */
				279	ptff(kvm_s390_available_subfunc.ptff,
				280	sizeof(kvm_s390_available_subfunc.ptff),
				281	PTFF_QAF);
				282
				283	if (test_facility(17)) { /* MSA */
				284	__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
				285	kvm_s390_available_subfunc.kmac);
				286	__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
				287	kvm_s390_available_subfunc.kmc);
				288	__cpacf_query(CPACF_KM, (cpacf_mask_t *)
				289	kvm_s390_available_subfunc.km);
				290	__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
				291	kvm_s390_available_subfunc.kimd);
				292	__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
				293	kvm_s390_available_subfunc.klmd);
				294	}
				295	if (test_facility(76)) /* MSA3 */
				296	__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
				297	kvm_s390_available_subfunc.pckmo);
				298	if (test_facility(77)) { /* MSA4 */
				299	__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
				300	kvm_s390_available_subfunc.kmctr);
				301	__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
				302	kvm_s390_available_subfunc.kmf);
				303	__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
				304	kvm_s390_available_subfunc.kmo);
				305	__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
				306	kvm_s390_available_subfunc.pcc);
				307	}
				308	if (test_facility(57)) /* MSA5 */
				309	__cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
				310	kvm_s390_available_subfunc.ppno);
				311
				312	if (test_facility(146)) /* MSA8 */
				313	__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
				314	kvm_s390_available_subfunc.kma);
				315
				316	if (MACHINE_HAS_ESOP)
				317	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
				318	/*
				319	* We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
				320	* 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
				321	*/
				322	if (!sclp.has_sief2 \|\| !MACHINE_HAS_ESOP \|\| !sclp.has_64bscao \|\|
				323	!test_facility(3) \|\| !nested)
				324	return;
				325	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
				326	if (sclp.has_64bscao)
				327	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
				328	if (sclp.has_siif)
				329	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
				330	if (sclp.has_gpere)
				331	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
				332	if (sclp.has_gsls)
				333	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
				334	if (sclp.has_ib)
				335	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
				336	if (sclp.has_cei)
				337	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
				338	if (sclp.has_ibs)
				339	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
				340	if (sclp.has_kss)
				341	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
				342	/*
				343	* KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
				344	* all skey handling functions read/set the skey from the PGSTE
				345	* instead of the real storage key.
				346	*
				347	* KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
				348	* pages being detected as preserved although they are resident.
				349	*
				350	* KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
				351	* have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
				352	*
				353	* For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
				354	* KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
				355	* correctly shadowed. We can do that for the PGSTE but not for PTE.I.
				356	*
				357	* KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
				358	* cannot easily shadow the SCA because of the ipte lock.
				359	*/
				360	}
				361
				362	int kvm_arch_init(void *opaque)
				363	{
				364	int rc;
				365
				366	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
				367	if (!kvm_s390_dbf)
				368	return -ENOMEM;
				369
				370	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
				371	rc = -ENOMEM;
				372	goto out_debug_unreg;
				373	}
				374
				375	kvm_s390_cpu_feat_init();
				376
				377	/* Register floating interrupt controller interface. */
				378	rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
				379	if (rc) {
				380	pr_err("Failed to register FLIC rc=%d\n", rc);
				381	goto out_debug_unreg;
				382	}
				383	return 0;
				384
				385	out_debug_unreg:
				386	debug_unregister(kvm_s390_dbf);
				387	return rc;
				388	}
				389
				390	void kvm_arch_exit(void)
				391	{
				392	debug_unregister(kvm_s390_dbf);
				393	}
				394
				395	/* Section: device related */
				396	long kvm_arch_dev_ioctl(struct file *filp,
				397	unsigned int ioctl, unsigned long arg)
				398	{
				399	if (ioctl == KVM_S390_ENABLE_SIE)
				400	return s390_enable_sie();
				401	return -EINVAL;
				402	}
				403
				404	int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
				405	{
				406	int r;
				407
				408	switch (ext) {
				409	case KVM_CAP_S390_PSW:
				410	case KVM_CAP_S390_GMAP:
				411	case KVM_CAP_SYNC_MMU:
				412	#ifdef CONFIG_KVM_S390_UCONTROL
				413	case KVM_CAP_S390_UCONTROL:
				414	#endif
				415	case KVM_CAP_ASYNC_PF:
				416	case KVM_CAP_SYNC_REGS:
				417	case KVM_CAP_ONE_REG:
				418	case KVM_CAP_ENABLE_CAP:
				419	case KVM_CAP_S390_CSS_SUPPORT:
				420	case KVM_CAP_IOEVENTFD:
				421	case KVM_CAP_DEVICE_CTRL:
				422	case KVM_CAP_ENABLE_CAP_VM:
				423	case KVM_CAP_S390_IRQCHIP:
				424	case KVM_CAP_VM_ATTRIBUTES:
				425	case KVM_CAP_MP_STATE:
				426	case KVM_CAP_IMMEDIATE_EXIT:
				427	case KVM_CAP_S390_INJECT_IRQ:
				428	case KVM_CAP_S390_USER_SIGP:
				429	case KVM_CAP_S390_USER_STSI:
				430	case KVM_CAP_S390_SKEYS:
				431	case KVM_CAP_S390_IRQ_STATE:
				432	case KVM_CAP_S390_USER_INSTR0:
				433	case KVM_CAP_S390_CMMA_MIGRATION:
				434	case KVM_CAP_S390_AIS:
				435	r = 1;
				436	break;
				437	case KVM_CAP_S390_MEM_OP:
				438	r = MEM_OP_MAX_SIZE;
				439	break;
				440	case KVM_CAP_NR_VCPUS:
				441	case KVM_CAP_MAX_VCPUS:
				442	case KVM_CAP_MAX_VCPU_ID:
				443	r = KVM_S390_BSCA_CPU_SLOTS;
				444	if (!kvm_s390_use_sca_entries())
				445	r = KVM_MAX_VCPUS;
				446	else if (sclp.has_esca && sclp.has_64bscao)
				447	r = KVM_S390_ESCA_CPU_SLOTS;
				448	break;
				449	case KVM_CAP_NR_MEMSLOTS:
				450	r = KVM_USER_MEM_SLOTS;
				451	break;
				452	case KVM_CAP_S390_COW:
				453	r = MACHINE_HAS_ESOP;
				454	break;
				455	case KVM_CAP_S390_VECTOR_REGISTERS:
				456	r = MACHINE_HAS_VX;
				457	break;
				458	case KVM_CAP_S390_RI:
				459	r = test_facility(64);
				460	break;
				461	case KVM_CAP_S390_GS:
				462	r = test_facility(133);
				463	break;
				464	case KVM_CAP_S390_BPB:
				465	r = test_facility(82);
				466	break;
				467	default:
				468	r = 0;
				469	}
				470	return r;
				471	}
				472
				473	static void kvm_s390_sync_dirty_log(struct kvm *kvm,
				474	struct kvm_memory_slot *memslot)
				475	{
				476	gfn_t cur_gfn, last_gfn;
				477	unsigned long address;
				478	struct gmap *gmap = kvm->arch.gmap;
				479
				480	/* Loop over all guest pages */
				481	last_gfn = memslot->base_gfn + memslot->npages;
				482	for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
				483	address = gfn_to_hva_memslot(memslot, cur_gfn);
				484
				485	if (test_and_clear_guest_dirty(gmap->mm, address))
				486	mark_page_dirty(kvm, cur_gfn);
				487	if (fatal_signal_pending(current))
				488	return;
				489	cond_resched();
				490	}
				491	}
				492
				493	/* Section: vm related */
				494	static void sca_del_vcpu(struct kvm_vcpu *vcpu);
				495
				496	/*
				497	* Get (and clear) the dirty memory log for a memory slot.
				498	*/
				499	int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
				500	struct kvm_dirty_log *log)
				501	{
				502	int r;
				503	unsigned long n;
				504	struct kvm_memslots *slots;
				505	struct kvm_memory_slot *memslot;
				506	int is_dirty = 0;
				507
				508	if (kvm_is_ucontrol(kvm))
				509	return -EINVAL;
				510
				511	mutex_lock(&kvm->slots_lock);
				512
				513	r = -EINVAL;
				514	if (log->slot >= KVM_USER_MEM_SLOTS)
				515	goto out;
				516
				517	slots = kvm_memslots(kvm);
				518	memslot = id_to_memslot(slots, log->slot);
				519	r = -ENOENT;
				520	if (!memslot->dirty_bitmap)
				521	goto out;
				522
				523	kvm_s390_sync_dirty_log(kvm, memslot);
				524	r = kvm_get_dirty_log(kvm, log, &is_dirty);
				525	if (r)
				526	goto out;
				527
				528	/* Clear the dirty log */
				529	if (is_dirty) {
				530	n = kvm_dirty_bitmap_bytes(memslot);
				531	memset(memslot->dirty_bitmap, 0, n);
				532	}
				533	r = 0;
				534	out:
				535	mutex_unlock(&kvm->slots_lock);
				536	return r;
				537	}
				538
				539	static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
				540	{
				541	unsigned int i;
				542	struct kvm_vcpu *vcpu;
				543
				544	kvm_for_each_vcpu(i, vcpu, kvm) {
				545	kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
				546	}
				547	}
				548
				549	static int kvm_vm_ioctl_enable_cap(struct kvm kvm, struct kvm_enable_cap cap)
				550	{
				551	int r;
				552
				553	if (cap->flags)
				554	return -EINVAL;
				555
				556	switch (cap->cap) {
				557	case KVM_CAP_S390_IRQCHIP:
				558	VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
				559	kvm->arch.use_irqchip = 1;
				560	r = 0;
				561	break;
				562	case KVM_CAP_S390_USER_SIGP:
				563	VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
				564	kvm->arch.user_sigp = 1;
				565	r = 0;
				566	break;
				567	case KVM_CAP_S390_VECTOR_REGISTERS:
				568	mutex_lock(&kvm->lock);
				569	if (kvm->created_vcpus) {
				570	r = -EBUSY;
				571	} else if (MACHINE_HAS_VX) {
				572	set_kvm_facility(kvm->arch.model.fac_mask, 129);
				573	set_kvm_facility(kvm->arch.model.fac_list, 129);
				574	if (test_facility(134)) {
				575	set_kvm_facility(kvm->arch.model.fac_mask, 134);
				576	set_kvm_facility(kvm->arch.model.fac_list, 134);
				577	}
				578	if (test_facility(135)) {
				579	set_kvm_facility(kvm->arch.model.fac_mask, 135);
				580	set_kvm_facility(kvm->arch.model.fac_list, 135);
				581	}
				582	r = 0;
				583	} else
				584	r = -EINVAL;
				585	mutex_unlock(&kvm->lock);
				586	VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
				587	r ? "(not available)" : "(success)");
				588	break;
				589	case KVM_CAP_S390_RI:
				590	r = -EINVAL;
				591	mutex_lock(&kvm->lock);
				592	if (kvm->created_vcpus) {
				593	r = -EBUSY;
				594	} else if (test_facility(64)) {
				595	set_kvm_facility(kvm->arch.model.fac_mask, 64);
				596	set_kvm_facility(kvm->arch.model.fac_list, 64);
				597	r = 0;
				598	}
				599	mutex_unlock(&kvm->lock);
				600	VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
				601	r ? "(not available)" : "(success)");
				602	break;
				603	case KVM_CAP_S390_AIS:
				604	mutex_lock(&kvm->lock);
				605	if (kvm->created_vcpus) {
				606	r = -EBUSY;
				607	} else {
				608	set_kvm_facility(kvm->arch.model.fac_mask, 72);
				609	set_kvm_facility(kvm->arch.model.fac_list, 72);
				610	r = 0;
				611	}
				612	mutex_unlock(&kvm->lock);
				613	VM_EVENT(kvm, 3, "ENABLE: AIS %s",
				614	r ? "(not available)" : "(success)");
				615	break;
				616	case KVM_CAP_S390_GS:
				617	r = -EINVAL;
				618	mutex_lock(&kvm->lock);
				619	if (kvm->created_vcpus) {
				620	r = -EBUSY;
				621	} else if (test_facility(133)) {
				622	set_kvm_facility(kvm->arch.model.fac_mask, 133);
				623	set_kvm_facility(kvm->arch.model.fac_list, 133);
				624	r = 0;
				625	}
				626	mutex_unlock(&kvm->lock);
				627	VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
				628	r ? "(not available)" : "(success)");
				629	break;
				630	case KVM_CAP_S390_USER_STSI:
				631	VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
				632	kvm->arch.user_stsi = 1;
				633	r = 0;
				634	break;
				635	case KVM_CAP_S390_USER_INSTR0:
				636	VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
				637	kvm->arch.user_instr0 = 1;
				638	icpt_operexc_on_all_vcpus(kvm);
				639	r = 0;
				640	break;
				641	default:
				642	r = -EINVAL;
				643	break;
				644	}
				645	return r;
				646	}
				647
				648	static int kvm_s390_get_mem_control(struct kvm kvm, struct kvm_device_attr attr)
				649	{
				650	int ret;
				651
				652	switch (attr->attr) {
				653	case KVM_S390_VM_MEM_LIMIT_SIZE:
				654	ret = 0;
				655	VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
				656	kvm->arch.mem_limit);
				657	if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
				658	ret = -EFAULT;
				659	break;
				660	default:
				661	ret = -ENXIO;
				662	break;
				663	}
				664	return ret;
				665	}
				666
				667	static int kvm_s390_set_mem_control(struct kvm kvm, struct kvm_device_attr attr)
				668	{
				669	int ret;
				670	unsigned int idx;
				671	switch (attr->attr) {
				672	case KVM_S390_VM_MEM_ENABLE_CMMA:
				673	ret = -ENXIO;
				674	if (!sclp.has_cmma)
				675	break;
				676
				677	ret = -EBUSY;
				678	VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
				679	mutex_lock(&kvm->lock);
				680	if (!kvm->created_vcpus) {
				681	kvm->arch.use_cmma = 1;
				682	ret = 0;
				683	}
				684	mutex_unlock(&kvm->lock);
				685	break;
				686	case KVM_S390_VM_MEM_CLR_CMMA:
				687	ret = -ENXIO;
				688	if (!sclp.has_cmma)
				689	break;
				690	ret = -EINVAL;
				691	if (!kvm->arch.use_cmma)
				692	break;
				693
				694	VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
				695	mutex_lock(&kvm->lock);
				696	idx = srcu_read_lock(&kvm->srcu);
				697	s390_reset_cmma(kvm->arch.gmap->mm);
				698	srcu_read_unlock(&kvm->srcu, idx);
				699	mutex_unlock(&kvm->lock);
				700	ret = 0;
				701	break;
				702	case KVM_S390_VM_MEM_LIMIT_SIZE: {
				703	unsigned long new_limit;
				704
				705	if (kvm_is_ucontrol(kvm))
				706	return -EINVAL;
				707
				708	if (get_user(new_limit, (u64 __user *)attr->addr))
				709	return -EFAULT;
				710
				711	if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
				712	new_limit > kvm->arch.mem_limit)
				713	return -E2BIG;
				714
				715	if (!new_limit)
				716	return -EINVAL;
				717
				718	/* gmap_create takes last usable address */
				719	if (new_limit != KVM_S390_NO_MEM_LIMIT)
				720	new_limit -= 1;
				721
				722	ret = -EBUSY;
				723	mutex_lock(&kvm->lock);
				724	if (!kvm->created_vcpus) {
				725	/* gmap_create will round the limit up */
				726	struct gmap *new = gmap_create(current->mm, new_limit);
				727
				728	if (!new) {
				729	ret = -ENOMEM;
				730	} else {
				731	gmap_remove(kvm->arch.gmap);
				732	new->private = kvm;
				733	kvm->arch.gmap = new;
				734	ret = 0;
				735	}
				736	}
				737	mutex_unlock(&kvm->lock);
				738	VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
				739	VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
				740	(void *) kvm->arch.gmap->asce);
				741	break;
				742	}
				743	default:
				744	ret = -ENXIO;
				745	break;
				746	}
				747	return ret;
				748	}
				749
				750	static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
				751
				752	static int kvm_s390_vm_set_crypto(struct kvm kvm, struct kvm_device_attr attr)
				753	{
				754	struct kvm_vcpu *vcpu;
				755	int i;
				756
				757	if (!test_kvm_facility(kvm, 76))
				758	return -EINVAL;
				759
				760	mutex_lock(&kvm->lock);
				761	switch (attr->attr) {
				762	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
				763	get_random_bytes(
				764	kvm->arch.crypto.crycb->aes_wrapping_key_mask,
				765	sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
				766	kvm->arch.crypto.aes_kw = 1;
				767	VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
				768	break;
				769	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
				770	get_random_bytes(
				771	kvm->arch.crypto.crycb->dea_wrapping_key_mask,
				772	sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
				773	kvm->arch.crypto.dea_kw = 1;
				774	VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
				775	break;
				776	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
				777	kvm->arch.crypto.aes_kw = 0;
				778	memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
				779	sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
				780	VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
				781	break;
				782	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
				783	kvm->arch.crypto.dea_kw = 0;
				784	memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
				785	sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
				786	VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
				787	break;
				788	default:
				789	mutex_unlock(&kvm->lock);
				790	return -ENXIO;
				791	}
				792
				793	kvm_for_each_vcpu(i, vcpu, kvm) {
				794	kvm_s390_vcpu_crypto_setup(vcpu);
				795	exit_sie(vcpu);
				796	}
				797	mutex_unlock(&kvm->lock);
				798	return 0;
				799	}
				800
				801	static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
				802	{
				803	int cx;
				804	struct kvm_vcpu *vcpu;
				805
				806	kvm_for_each_vcpu(cx, vcpu, kvm)
				807	kvm_s390_sync_request(req, vcpu);
				808	}
				809
				810	/*
				811	* Must be called with kvm->srcu held to avoid races on memslots, and with
				812	* kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
				813	*/
				814	static int kvm_s390_vm_start_migration(struct kvm *kvm)
				815	{
				816	struct kvm_s390_migration_state *mgs;
				817	struct kvm_memory_slot *ms;
				818	/* should be the only one */
				819	struct kvm_memslots *slots;
				820	unsigned long ram_pages;
				821	int slotnr;
				822
				823	/* migration mode already enabled */
				824	if (kvm->arch.migration_state)
				825	return 0;
				826
				827	slots = kvm_memslots(kvm);
				828	if (!slots \|\| !slots->used_slots)
				829	return -EINVAL;
				830
				831	mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
				832	if (!mgs)
				833	return -ENOMEM;
				834	kvm->arch.migration_state = mgs;
				835
				836	if (kvm->arch.use_cmma) {
				837	/*
				838	* Get the first slot. They are reverse sorted by base_gfn, so
				839	* the first slot is also the one at the end of the address
				840	* space. We have verified above that at least one slot is
				841	* present.
				842	*/
				843	ms = slots->memslots;
				844	/* round up so we only use full longs */
				845	ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
				846	/* allocate enough bytes to store all the bits */
				847	mgs->pgste_bitmap = vmalloc(ram_pages / 8);
				848	if (!mgs->pgste_bitmap) {
				849	kfree(mgs);
				850	kvm->arch.migration_state = NULL;
				851	return -ENOMEM;
				852	}
				853
				854	mgs->bitmap_size = ram_pages;
				855	atomic64_set(&mgs->dirty_pages, ram_pages);
				856	/* mark all the pages in active slots as dirty */
				857	for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
				858	ms = slots->memslots + slotnr;
				859	bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
				860	}
				861
				862	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
				863	}
				864	return 0;
				865	}
				866
				867	/*
				868	* Must be called with kvm->slots_lock to avoid races with ourselves and
				869	* kvm_s390_vm_start_migration.
				870	*/
				871	static int kvm_s390_vm_stop_migration(struct kvm *kvm)
				872	{
				873	struct kvm_s390_migration_state *mgs;
				874
				875	/* migration mode already disabled */
				876	if (!kvm->arch.migration_state)
				877	return 0;
				878	mgs = kvm->arch.migration_state;
				879	kvm->arch.migration_state = NULL;
				880
				881	if (kvm->arch.use_cmma) {
				882	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
				883	/* We have to wait for the essa emulation to finish */
				884	synchronize_srcu(&kvm->srcu);
				885	vfree(mgs->pgste_bitmap);
				886	}
				887	kfree(mgs);
				888	return 0;
				889	}
				890
				891	static int kvm_s390_vm_set_migration(struct kvm *kvm,
				892	struct kvm_device_attr *attr)
				893	{
				894	int res = -ENXIO;
				895
				896	mutex_lock(&kvm->slots_lock);
				897	switch (attr->attr) {
				898	case KVM_S390_VM_MIGRATION_START:
				899	res = kvm_s390_vm_start_migration(kvm);
				900	break;
				901	case KVM_S390_VM_MIGRATION_STOP:
				902	res = kvm_s390_vm_stop_migration(kvm);
				903	break;
				904	default:
				905	break;
				906	}
				907	mutex_unlock(&kvm->slots_lock);
				908
				909	return res;
				910	}
				911
				912	static int kvm_s390_vm_get_migration(struct kvm *kvm,
				913	struct kvm_device_attr *attr)
				914	{
				915	u64 mig = (kvm->arch.migration_state != NULL);
				916
				917	if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
				918	return -ENXIO;
				919
				920	if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
				921	return -EFAULT;
				922	return 0;
				923	}
				924
				925	static int kvm_s390_set_tod_ext(struct kvm kvm, struct kvm_device_attr attr)
				926	{
				927	struct kvm_s390_vm_tod_clock gtod;
				928
				929	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
				930	return -EFAULT;
				931
				932	if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
				933	return -EINVAL;
				934	kvm_s390_set_tod_clock(kvm, &gtod);
				935
				936	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
				937	gtod.epoch_idx, gtod.tod);
				938
				939	return 0;
				940	}
				941
				942	static int kvm_s390_set_tod_high(struct kvm kvm, struct kvm_device_attr attr)
				943	{
				944	u8 gtod_high;
				945
				946	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
				947	sizeof(gtod_high)))
				948	return -EFAULT;
				949
				950	if (gtod_high != 0)
				951	return -EINVAL;
				952	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
				953
				954	return 0;
				955	}
				956
				957	static int kvm_s390_set_tod_low(struct kvm kvm, struct kvm_device_attr attr)
				958	{
				959	struct kvm_s390_vm_tod_clock gtod = { 0 };
				960
				961	if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
				962	sizeof(gtod.tod)))
				963	return -EFAULT;
				964
				965	kvm_s390_set_tod_clock(kvm, &gtod);
				966	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
				967	return 0;
				968	}
				969
				970	static int kvm_s390_set_tod(struct kvm kvm, struct kvm_device_attr attr)
				971	{
				972	int ret;
				973
				974	if (attr->flags)
				975	return -EINVAL;
				976
				977	switch (attr->attr) {
				978	case KVM_S390_VM_TOD_EXT:
				979	ret = kvm_s390_set_tod_ext(kvm, attr);
				980	break;
				981	case KVM_S390_VM_TOD_HIGH:
				982	ret = kvm_s390_set_tod_high(kvm, attr);
				983	break;
				984	case KVM_S390_VM_TOD_LOW:
				985	ret = kvm_s390_set_tod_low(kvm, attr);
				986	break;
				987	default:
				988	ret = -ENXIO;
				989	break;
				990	}
				991	return ret;
				992	}
				993
				994	static void kvm_s390_get_tod_clock_ext(struct kvm *kvm,
				995	struct kvm_s390_vm_tod_clock *gtod)
				996	{
				997	struct kvm_s390_tod_clock_ext htod;
				998
				999	preempt_disable();
				1000
				1001	get_tod_clock_ext((char *)&htod);
				1002
				1003	gtod->tod = htod.tod + kvm->arch.epoch;
				1004	gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
				1005
				1006	if (gtod->tod < htod.tod)
				1007	gtod->epoch_idx += 1;
				1008
				1009	preempt_enable();
				1010	}
				1011
				1012	static int kvm_s390_get_tod_ext(struct kvm kvm, struct kvm_device_attr attr)
				1013	{
				1014	struct kvm_s390_vm_tod_clock gtod;
				1015
				1016	memset(&gtod, 0, sizeof(gtod));
				1017
				1018	if (test_kvm_facility(kvm, 139))
				1019	kvm_s390_get_tod_clock_ext(kvm, &gtod);
				1020	else
				1021	gtod.tod = kvm_s390_get_tod_clock_fast(kvm);
				1022
				1023	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
				1024	return -EFAULT;
				1025
				1026	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
				1027	gtod.epoch_idx, gtod.tod);
				1028	return 0;
				1029	}
				1030
				1031	static int kvm_s390_get_tod_high(struct kvm kvm, struct kvm_device_attr attr)
				1032	{
				1033	u8 gtod_high = 0;
				1034
				1035	if (copy_to_user((void __user *)attr->addr, &gtod_high,
				1036	sizeof(gtod_high)))
				1037	return -EFAULT;
				1038	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
				1039
				1040	return 0;
				1041	}
				1042
				1043	static int kvm_s390_get_tod_low(struct kvm kvm, struct kvm_device_attr attr)
				1044	{
				1045	u64 gtod;
				1046
				1047	gtod = kvm_s390_get_tod_clock_fast(kvm);
				1048	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
				1049	return -EFAULT;
				1050	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
				1051
				1052	return 0;
				1053	}
				1054
				1055	static int kvm_s390_get_tod(struct kvm kvm, struct kvm_device_attr attr)
				1056	{
				1057	int ret;
				1058
				1059	if (attr->flags)
				1060	return -EINVAL;
				1061
				1062	switch (attr->attr) {
				1063	case KVM_S390_VM_TOD_EXT:
				1064	ret = kvm_s390_get_tod_ext(kvm, attr);
				1065	break;
				1066	case KVM_S390_VM_TOD_HIGH:
				1067	ret = kvm_s390_get_tod_high(kvm, attr);
				1068	break;
				1069	case KVM_S390_VM_TOD_LOW:
				1070	ret = kvm_s390_get_tod_low(kvm, attr);
				1071	break;
				1072	default:
				1073	ret = -ENXIO;
				1074	break;
				1075	}
				1076	return ret;
				1077	}
				1078
				1079	static int kvm_s390_set_processor(struct kvm kvm, struct kvm_device_attr attr)
				1080	{
				1081	struct kvm_s390_vm_cpu_processor *proc;
				1082	u16 lowest_ibc, unblocked_ibc;
				1083	int ret = 0;
				1084
				1085	mutex_lock(&kvm->lock);
				1086	if (kvm->created_vcpus) {
				1087	ret = -EBUSY;
				1088	goto out;
				1089	}
				1090	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
				1091	if (!proc) {
				1092	ret = -ENOMEM;
				1093	goto out;
				1094	}
				1095	if (!copy_from_user(proc, (void __user *)attr->addr,
				1096	sizeof(*proc))) {
				1097	kvm->arch.model.cpuid = proc->cpuid;
				1098	lowest_ibc = sclp.ibc >> 16 & 0xfff;
				1099	unblocked_ibc = sclp.ibc & 0xfff;
				1100	if (lowest_ibc && proc->ibc) {
				1101	if (proc->ibc > unblocked_ibc)
				1102	kvm->arch.model.ibc = unblocked_ibc;
				1103	else if (proc->ibc < lowest_ibc)
				1104	kvm->arch.model.ibc = lowest_ibc;
				1105	else
				1106	kvm->arch.model.ibc = proc->ibc;
				1107	}
				1108	memcpy(kvm->arch.model.fac_list, proc->fac_list,
				1109	S390_ARCH_FAC_LIST_SIZE_BYTE);
				1110	VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
				1111	kvm->arch.model.ibc,
				1112	kvm->arch.model.cpuid);
				1113	VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
				1114	kvm->arch.model.fac_list[0],
				1115	kvm->arch.model.fac_list[1],
				1116	kvm->arch.model.fac_list[2]);
				1117	} else
				1118	ret = -EFAULT;
				1119	kfree(proc);
				1120	out:
				1121	mutex_unlock(&kvm->lock);
				1122	return ret;
				1123	}
				1124
				1125	static int kvm_s390_set_processor_feat(struct kvm *kvm,
				1126	struct kvm_device_attr *attr)
				1127	{
				1128	struct kvm_s390_vm_cpu_feat data;
				1129	int ret = -EBUSY;
				1130
				1131	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
				1132	return -EFAULT;
				1133	if (!bitmap_subset((unsigned long *) data.feat,
				1134	kvm_s390_available_cpu_feat,
				1135	KVM_S390_VM_CPU_FEAT_NR_BITS))
				1136	return -EINVAL;
				1137
				1138	mutex_lock(&kvm->lock);
				1139	if (!kvm->created_vcpus) {
				1140	bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
				1141	KVM_S390_VM_CPU_FEAT_NR_BITS);
				1142	ret = 0;
				1143	}
				1144	mutex_unlock(&kvm->lock);
				1145	return ret;
				1146	}
				1147
				1148	static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
				1149	struct kvm_device_attr *attr)
				1150	{
				1151	/*
				1152	* Once supported by kernel + hw, we have to store the subfunctions
				1153	* in kvm->arch and remember that user space configured them.
				1154	*/
				1155	return -ENXIO;
				1156	}
				1157
				1158	static int kvm_s390_set_cpu_model(struct kvm kvm, struct kvm_device_attr attr)
				1159	{
				1160	int ret = -ENXIO;
				1161
				1162	switch (attr->attr) {
				1163	case KVM_S390_VM_CPU_PROCESSOR:
				1164	ret = kvm_s390_set_processor(kvm, attr);
				1165	break;
				1166	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
				1167	ret = kvm_s390_set_processor_feat(kvm, attr);
				1168	break;
				1169	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
				1170	ret = kvm_s390_set_processor_subfunc(kvm, attr);
				1171	break;
				1172	}
				1173	return ret;
				1174	}
				1175
				1176	static int kvm_s390_get_processor(struct kvm kvm, struct kvm_device_attr attr)
				1177	{
				1178	struct kvm_s390_vm_cpu_processor *proc;
				1179	int ret = 0;
				1180
				1181	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
				1182	if (!proc) {
				1183	ret = -ENOMEM;
				1184	goto out;
				1185	}
				1186	proc->cpuid = kvm->arch.model.cpuid;
				1187	proc->ibc = kvm->arch.model.ibc;
				1188	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
				1189	S390_ARCH_FAC_LIST_SIZE_BYTE);
				1190	VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
				1191	kvm->arch.model.ibc,
				1192	kvm->arch.model.cpuid);
				1193	VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
				1194	kvm->arch.model.fac_list[0],
				1195	kvm->arch.model.fac_list[1],
				1196	kvm->arch.model.fac_list[2]);
				1197	if (copy_to_user((void __user )attr->addr, proc, sizeof(proc)))
				1198	ret = -EFAULT;
				1199	kfree(proc);
				1200	out:
				1201	return ret;
				1202	}
				1203
				1204	static int kvm_s390_get_machine(struct kvm kvm, struct kvm_device_attr attr)
				1205	{
				1206	struct kvm_s390_vm_cpu_machine *mach;
				1207	int ret = 0;
				1208
				1209	mach = kzalloc(sizeof(*mach), GFP_KERNEL);
				1210	if (!mach) {
				1211	ret = -ENOMEM;
				1212	goto out;
				1213	}
				1214	get_cpu_id((struct cpuid *) &mach->cpuid);
				1215	mach->ibc = sclp.ibc;
				1216	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
				1217	S390_ARCH_FAC_LIST_SIZE_BYTE);
				1218	memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
				1219	sizeof(S390_lowcore.stfle_fac_list));
				1220	VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx",
				1221	kvm->arch.model.ibc,
				1222	kvm->arch.model.cpuid);
				1223	VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx",
				1224	mach->fac_mask[0],
				1225	mach->fac_mask[1],
				1226	mach->fac_mask[2]);
				1227	VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx",
				1228	mach->fac_list[0],
				1229	mach->fac_list[1],
				1230	mach->fac_list[2]);
				1231	if (copy_to_user((void __user )attr->addr, mach, sizeof(mach)))
				1232	ret = -EFAULT;
				1233	kfree(mach);
				1234	out:
				1235	return ret;
				1236	}
				1237
				1238	static int kvm_s390_get_processor_feat(struct kvm *kvm,
				1239	struct kvm_device_attr *attr)
				1240	{
				1241	struct kvm_s390_vm_cpu_feat data;
				1242
				1243	bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
				1244	KVM_S390_VM_CPU_FEAT_NR_BITS);
				1245	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
				1246	return -EFAULT;
				1247	return 0;
				1248	}
				1249
				1250	static int kvm_s390_get_machine_feat(struct kvm *kvm,
				1251	struct kvm_device_attr *attr)
				1252	{
				1253	struct kvm_s390_vm_cpu_feat data;
				1254
				1255	bitmap_copy((unsigned long *) data.feat,
				1256	kvm_s390_available_cpu_feat,
				1257	KVM_S390_VM_CPU_FEAT_NR_BITS);
				1258	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
				1259	return -EFAULT;
				1260	return 0;
				1261	}
				1262
				1263	static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
				1264	struct kvm_device_attr *attr)
				1265	{
				1266	/*
				1267	* Once we can actually configure subfunctions (kernel + hw support),
				1268	* we have to check if they were already set by user space, if so copy
				1269	* them from kvm->arch.
				1270	*/
				1271	return -ENXIO;
				1272	}
				1273
				1274	static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
				1275	struct kvm_device_attr *attr)
				1276	{
				1277	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
				1278	sizeof(struct kvm_s390_vm_cpu_subfunc)))
				1279	return -EFAULT;
				1280	return 0;
				1281	}
				1282	static int kvm_s390_get_cpu_model(struct kvm kvm, struct kvm_device_attr attr)
				1283	{
				1284	int ret = -ENXIO;
				1285
				1286	switch (attr->attr) {
				1287	case KVM_S390_VM_CPU_PROCESSOR:
				1288	ret = kvm_s390_get_processor(kvm, attr);
				1289	break;
				1290	case KVM_S390_VM_CPU_MACHINE:
				1291	ret = kvm_s390_get_machine(kvm, attr);
				1292	break;
				1293	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
				1294	ret = kvm_s390_get_processor_feat(kvm, attr);
				1295	break;
				1296	case KVM_S390_VM_CPU_MACHINE_FEAT:
				1297	ret = kvm_s390_get_machine_feat(kvm, attr);
				1298	break;
				1299	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
				1300	ret = kvm_s390_get_processor_subfunc(kvm, attr);
				1301	break;
				1302	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
				1303	ret = kvm_s390_get_machine_subfunc(kvm, attr);
				1304	break;
				1305	}
				1306	return ret;
				1307	}
				1308
				1309	static int kvm_s390_vm_set_attr(struct kvm kvm, struct kvm_device_attr attr)
				1310	{
				1311	int ret;
				1312
				1313	switch (attr->group) {
				1314	case KVM_S390_VM_MEM_CTRL:
				1315	ret = kvm_s390_set_mem_control(kvm, attr);
				1316	break;
				1317	case KVM_S390_VM_TOD:
				1318	ret = kvm_s390_set_tod(kvm, attr);
				1319	break;
				1320	case KVM_S390_VM_CPU_MODEL:
				1321	ret = kvm_s390_set_cpu_model(kvm, attr);
				1322	break;
				1323	case KVM_S390_VM_CRYPTO:
				1324	ret = kvm_s390_vm_set_crypto(kvm, attr);
				1325	break;
				1326	case KVM_S390_VM_MIGRATION:
				1327	ret = kvm_s390_vm_set_migration(kvm, attr);
				1328	break;
				1329	default:
				1330	ret = -ENXIO;
				1331	break;
				1332	}
				1333
				1334	return ret;
				1335	}
				1336
				1337	static int kvm_s390_vm_get_attr(struct kvm kvm, struct kvm_device_attr attr)
				1338	{
				1339	int ret;
				1340
				1341	switch (attr->group) {
				1342	case KVM_S390_VM_MEM_CTRL:
				1343	ret = kvm_s390_get_mem_control(kvm, attr);
				1344	break;
				1345	case KVM_S390_VM_TOD:
				1346	ret = kvm_s390_get_tod(kvm, attr);
				1347	break;
				1348	case KVM_S390_VM_CPU_MODEL:
				1349	ret = kvm_s390_get_cpu_model(kvm, attr);
				1350	break;
				1351	case KVM_S390_VM_MIGRATION:
				1352	ret = kvm_s390_vm_get_migration(kvm, attr);
				1353	break;
				1354	default:
				1355	ret = -ENXIO;
				1356	break;
				1357	}
				1358
				1359	return ret;
				1360	}
				1361
				1362	static int kvm_s390_vm_has_attr(struct kvm kvm, struct kvm_device_attr attr)
				1363	{
				1364	int ret;
				1365
				1366	switch (attr->group) {
				1367	case KVM_S390_VM_MEM_CTRL:
				1368	switch (attr->attr) {
				1369	case KVM_S390_VM_MEM_ENABLE_CMMA:
				1370	case KVM_S390_VM_MEM_CLR_CMMA:
				1371	ret = sclp.has_cmma ? 0 : -ENXIO;
				1372	break;
				1373	case KVM_S390_VM_MEM_LIMIT_SIZE:
				1374	ret = 0;
				1375	break;
				1376	default:
				1377	ret = -ENXIO;
				1378	break;
				1379	}
				1380	break;
				1381	case KVM_S390_VM_TOD:
				1382	switch (attr->attr) {
				1383	case KVM_S390_VM_TOD_LOW:
				1384	case KVM_S390_VM_TOD_HIGH:
				1385	ret = 0;
				1386	break;
				1387	default:
				1388	ret = -ENXIO;
				1389	break;
				1390	}
				1391	break;
				1392	case KVM_S390_VM_CPU_MODEL:
				1393	switch (attr->attr) {
				1394	case KVM_S390_VM_CPU_PROCESSOR:
				1395	case KVM_S390_VM_CPU_MACHINE:
				1396	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
				1397	case KVM_S390_VM_CPU_MACHINE_FEAT:
				1398	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
				1399	ret = 0;
				1400	break;
				1401	/* configuring subfunctions is not supported yet */
				1402	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
				1403	default:
				1404	ret = -ENXIO;
				1405	break;
				1406	}
				1407	break;
				1408	case KVM_S390_VM_CRYPTO:
				1409	switch (attr->attr) {
				1410	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
				1411	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
				1412	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
				1413	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
				1414	ret = 0;
				1415	break;
				1416	default:
				1417	ret = -ENXIO;
				1418	break;
				1419	}
				1420	break;
				1421	case KVM_S390_VM_MIGRATION:
				1422	ret = 0;
				1423	break;
				1424	default:
				1425	ret = -ENXIO;
				1426	break;
				1427	}
				1428
				1429	return ret;
				1430	}
				1431
				1432	static long kvm_s390_get_skeys(struct kvm kvm, struct kvm_s390_skeys args)
				1433	{
				1434	uint8_t *keys;
				1435	uint64_t hva;
				1436	int srcu_idx, i, r = 0;
				1437
				1438	if (args->flags != 0)
				1439	return -EINVAL;
				1440
				1441	/* Is this guest using storage keys? */
				1442	if (!mm_use_skey(current->mm))
				1443	return KVM_S390_GET_SKEYS_NONE;
				1444
				1445	/* Enforce sane limit on memory allocation */
				1446	if (args->count < 1 \|\| args->count > KVM_S390_SKEYS_MAX)
				1447	return -EINVAL;
				1448
				1449	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
				1450	if (!keys)
				1451	return -ENOMEM;
				1452
				1453	down_read(&current->mm->mmap_sem);
				1454	srcu_idx = srcu_read_lock(&kvm->srcu);
				1455	for (i = 0; i < args->count; i++) {
				1456	hva = gfn_to_hva(kvm, args->start_gfn + i);
				1457	if (kvm_is_error_hva(hva)) {
				1458	r = -EFAULT;
				1459	break;
				1460	}
				1461
				1462	r = get_guest_storage_key(current->mm, hva, &keys[i]);
				1463	if (r)
				1464	break;
				1465	}
				1466	srcu_read_unlock(&kvm->srcu, srcu_idx);
				1467	up_read(&current->mm->mmap_sem);
				1468
				1469	if (!r) {
				1470	r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
				1471	sizeof(uint8_t) * args->count);
				1472	if (r)
				1473	r = -EFAULT;
				1474	}
				1475
				1476	kvfree(keys);
				1477	return r;
				1478	}
				1479
				1480	static long kvm_s390_set_skeys(struct kvm kvm, struct kvm_s390_skeys args)
				1481	{
				1482	uint8_t *keys;
				1483	uint64_t hva;
				1484	int srcu_idx, i, r = 0;
				1485
				1486	if (args->flags != 0)
				1487	return -EINVAL;
				1488
				1489	/* Enforce sane limit on memory allocation */
				1490	if (args->count < 1 \|\| args->count > KVM_S390_SKEYS_MAX)
				1491	return -EINVAL;
				1492
				1493	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
				1494	if (!keys)
				1495	return -ENOMEM;
				1496
				1497	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
				1498	sizeof(uint8_t) * args->count);
				1499	if (r) {
				1500	r = -EFAULT;
				1501	goto out;
				1502	}
				1503
				1504	/* Enable storage key handling for the guest */
				1505	r = s390_enable_skey();
				1506	if (r)
				1507	goto out;
				1508
				1509	down_read(&current->mm->mmap_sem);
				1510	srcu_idx = srcu_read_lock(&kvm->srcu);
				1511	for (i = 0; i < args->count; i++) {
				1512	hva = gfn_to_hva(kvm, args->start_gfn + i);
				1513	if (kvm_is_error_hva(hva)) {
				1514	r = -EFAULT;
				1515	break;
				1516	}
				1517
				1518	/* Lowest order bit is reserved */
				1519	if (keys[i] & 0x01) {
				1520	r = -EINVAL;
				1521	break;
				1522	}
				1523
				1524	r = set_guest_storage_key(current->mm, hva, keys[i], 0);
				1525	if (r)
				1526	break;
				1527	}
				1528	srcu_read_unlock(&kvm->srcu, srcu_idx);
				1529	up_read(&current->mm->mmap_sem);
				1530	out:
				1531	kvfree(keys);
				1532	return r;
				1533	}
				1534
				1535	/*
				1536	* Base address and length must be sent at the start of each block, therefore
				1537	* it's cheaper to send some clean data, as long as it's less than the size of
				1538	* two longs.
				1539	*/
				1540	#define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
				1541	/* for consistency */
				1542	#define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
				1543
				1544	/*
				1545	* This function searches for the next page with dirty CMMA attributes, and
				1546	* saves the attributes in the buffer up to either the end of the buffer or
				1547	* until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
				1548	* no trailing clean bytes are saved.
				1549	* In case no dirty bits were found, or if CMMA was not enabled or used, the
				1550	* output buffer will indicate 0 as length.
				1551	*/
				1552	static int kvm_s390_get_cmma_bits(struct kvm *kvm,
				1553	struct kvm_s390_cmma_log *args)
				1554	{
				1555	struct kvm_s390_migration_state *s = kvm->arch.migration_state;
				1556	unsigned long bufsize, hva, pgstev, i, next, cur;
				1557	int srcu_idx, peek, r = 0, rr;
				1558	u8 *res;
				1559
				1560	cur = args->start_gfn;
				1561	i = next = pgstev = 0;
				1562
				1563	if (unlikely(!kvm->arch.use_cmma))
				1564	return -ENXIO;
				1565	/* Invalid/unsupported flags were specified */
				1566	if (args->flags & ~KVM_S390_CMMA_PEEK)
				1567	return -EINVAL;
				1568	/* Migration mode query, and we are not doing a migration */
				1569	peek = !!(args->flags & KVM_S390_CMMA_PEEK);
				1570	if (!peek && !s)
				1571	return -EINVAL;
				1572	/* CMMA is disabled or was not used, or the buffer has length zero */
				1573	bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
				1574	if (!bufsize \|\| !kvm->mm->context.use_cmma) {
				1575	memset(args, 0, sizeof(*args));
				1576	return 0;
				1577	}
				1578
				1579	if (!peek) {
				1580	/* We are not peeking, and there are no dirty pages */
				1581	if (!atomic64_read(&s->dirty_pages)) {
				1582	memset(args, 0, sizeof(*args));
				1583	return 0;
				1584	}
				1585	cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
				1586	args->start_gfn);
				1587	if (cur >= s->bitmap_size) /* nothing found, loop back */
				1588	cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
				1589	if (cur >= s->bitmap_size) { /* again! (very unlikely) */
				1590	memset(args, 0, sizeof(*args));
				1591	return 0;
				1592	}
				1593	next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
				1594	}
				1595
				1596	res = vmalloc(bufsize);
				1597	if (!res)
				1598	return -ENOMEM;
				1599
				1600	args->start_gfn = cur;
				1601
				1602	down_read(&kvm->mm->mmap_sem);
				1603	srcu_idx = srcu_read_lock(&kvm->srcu);
				1604	while (i < bufsize) {
				1605	hva = gfn_to_hva(kvm, cur);
				1606	if (kvm_is_error_hva(hva)) {
				1607	r = -EFAULT;
				1608	break;
				1609	}
				1610	/* decrement only if we actually flipped the bit to 0 */
				1611	if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
				1612	atomic64_dec(&s->dirty_pages);
				1613	r = get_pgste(kvm->mm, hva, &pgstev);
				1614	if (r < 0)
				1615	pgstev = 0;
				1616	/* save the value */
				1617	res[i++] = (pgstev >> 24) & 0x43;
				1618	/*
				1619	* if the next bit is too far away, stop.
				1620	* if we reached the previous "next", find the next one
				1621	*/
				1622	if (!peek) {
				1623	if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
				1624	break;
				1625	if (cur == next)
				1626	next = find_next_bit(s->pgste_bitmap,
				1627	s->bitmap_size, cur + 1);
				1628	/* reached the end of the bitmap or of the buffer, stop */
				1629	if ((next >= s->bitmap_size) \|\|
				1630	(next >= args->start_gfn + bufsize))
				1631	break;
				1632	}
				1633	cur++;
				1634	}
				1635	srcu_read_unlock(&kvm->srcu, srcu_idx);
				1636	up_read(&kvm->mm->mmap_sem);
				1637	args->count = i;
				1638	args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
				1639
				1640	rr = copy_to_user((void __user *)args->values, res, args->count);
				1641	if (rr)
				1642	r = -EFAULT;
				1643
				1644	vfree(res);
				1645	return r;
				1646	}
				1647
				1648	/*
				1649	* This function sets the CMMA attributes for the given pages. If the input
				1650	* buffer has zero length, no action is taken, otherwise the attributes are
				1651	* set and the mm->context.use_cmma flag is set.
				1652	*/
				1653	static int kvm_s390_set_cmma_bits(struct kvm *kvm,
				1654	const struct kvm_s390_cmma_log *args)
				1655	{
				1656	unsigned long hva, mask, pgstev, i;
				1657	uint8_t *bits;
				1658	int srcu_idx, r = 0;
				1659
				1660	mask = args->mask;
				1661
				1662	if (!kvm->arch.use_cmma)
				1663	return -ENXIO;
				1664	/* invalid/unsupported flags */
				1665	if (args->flags != 0)
				1666	return -EINVAL;
				1667	/* Enforce sane limit on memory allocation */
				1668	if (args->count > KVM_S390_CMMA_SIZE_MAX)
				1669	return -EINVAL;
				1670	/* Nothing to do */
				1671	if (args->count == 0)
				1672	return 0;
				1673
				1674	bits = vmalloc(sizeof(bits) args->count);
				1675	if (!bits)
				1676	return -ENOMEM;
				1677
				1678	r = copy_from_user(bits, (void __user *)args->values, args->count);
				1679	if (r) {
				1680	r = -EFAULT;
				1681	goto out;
				1682	}
				1683
				1684	down_read(&kvm->mm->mmap_sem);
				1685	srcu_idx = srcu_read_lock(&kvm->srcu);
				1686	for (i = 0; i < args->count; i++) {
				1687	hva = gfn_to_hva(kvm, args->start_gfn + i);
				1688	if (kvm_is_error_hva(hva)) {
				1689	r = -EFAULT;
				1690	break;
				1691	}
				1692
				1693	pgstev = bits[i];
				1694	pgstev = pgstev << 24;
				1695	mask &= _PGSTE_GPS_USAGE_MASK \| _PGSTE_GPS_NODAT;
				1696	set_pgste_bits(kvm->mm, hva, mask, pgstev);
				1697	}
				1698	srcu_read_unlock(&kvm->srcu, srcu_idx);
				1699	up_read(&kvm->mm->mmap_sem);
				1700
				1701	if (!kvm->mm->context.use_cmma) {
				1702	down_write(&kvm->mm->mmap_sem);
				1703	kvm->mm->context.use_cmma = 1;
				1704	up_write(&kvm->mm->mmap_sem);
				1705	}
				1706	out:
				1707	vfree(bits);
				1708	return r;
				1709	}
				1710
				1711	long kvm_arch_vm_ioctl(struct file *filp,
				1712	unsigned int ioctl, unsigned long arg)
				1713	{
				1714	struct kvm *kvm = filp->private_data;
				1715	void __user argp = (void __user )arg;
				1716	struct kvm_device_attr attr;
				1717	int r;
				1718
				1719	switch (ioctl) {
				1720	case KVM_S390_INTERRUPT: {
				1721	struct kvm_s390_interrupt s390int;
				1722
				1723	r = -EFAULT;
				1724	if (copy_from_user(&s390int, argp, sizeof(s390int)))
				1725	break;
				1726	r = kvm_s390_inject_vm(kvm, &s390int);
				1727	break;
				1728	}
				1729	case KVM_ENABLE_CAP: {
				1730	struct kvm_enable_cap cap;
				1731	r = -EFAULT;
				1732	if (copy_from_user(&cap, argp, sizeof(cap)))
				1733	break;
				1734	r = kvm_vm_ioctl_enable_cap(kvm, &cap);
				1735	break;
				1736	}
				1737	case KVM_CREATE_IRQCHIP: {
				1738	struct kvm_irq_routing_entry routing;
				1739
				1740	r = -EINVAL;
				1741	if (kvm->arch.use_irqchip) {
				1742	/* Set up dummy routing. */
				1743	memset(&routing, 0, sizeof(routing));
				1744	r = kvm_set_irq_routing(kvm, &routing, 0, 0);
				1745	}
				1746	break;
				1747	}
				1748	case KVM_SET_DEVICE_ATTR: {
				1749	r = -EFAULT;
				1750	if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
				1751	break;
				1752	r = kvm_s390_vm_set_attr(kvm, &attr);
				1753	break;
				1754	}
				1755	case KVM_GET_DEVICE_ATTR: {
				1756	r = -EFAULT;
				1757	if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
				1758	break;
				1759	r = kvm_s390_vm_get_attr(kvm, &attr);
				1760	break;
				1761	}
				1762	case KVM_HAS_DEVICE_ATTR: {
				1763	r = -EFAULT;
				1764	if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
				1765	break;
				1766	r = kvm_s390_vm_has_attr(kvm, &attr);
				1767	break;
				1768	}
				1769	case KVM_S390_GET_SKEYS: {
				1770	struct kvm_s390_skeys args;
				1771
				1772	r = -EFAULT;
				1773	if (copy_from_user(&args, argp,
				1774	sizeof(struct kvm_s390_skeys)))
				1775	break;
				1776	r = kvm_s390_get_skeys(kvm, &args);
				1777	break;
				1778	}
				1779	case KVM_S390_SET_SKEYS: {
				1780	struct kvm_s390_skeys args;
				1781
				1782	r = -EFAULT;
				1783	if (copy_from_user(&args, argp,
				1784	sizeof(struct kvm_s390_skeys)))
				1785	break;
				1786	r = kvm_s390_set_skeys(kvm, &args);
				1787	break;
				1788	}
				1789	case KVM_S390_GET_CMMA_BITS: {
				1790	struct kvm_s390_cmma_log args;
				1791
				1792	r = -EFAULT;
				1793	if (copy_from_user(&args, argp, sizeof(args)))
				1794	break;
				1795	mutex_lock(&kvm->slots_lock);
				1796	r = kvm_s390_get_cmma_bits(kvm, &args);
				1797	mutex_unlock(&kvm->slots_lock);
				1798	if (!r) {
				1799	r = copy_to_user(argp, &args, sizeof(args));
				1800	if (r)
				1801	r = -EFAULT;
				1802	}
				1803	break;
				1804	}
				1805	case KVM_S390_SET_CMMA_BITS: {
				1806	struct kvm_s390_cmma_log args;
				1807
				1808	r = -EFAULT;
				1809	if (copy_from_user(&args, argp, sizeof(args)))
				1810	break;
				1811	mutex_lock(&kvm->slots_lock);
				1812	r = kvm_s390_set_cmma_bits(kvm, &args);
				1813	mutex_unlock(&kvm->slots_lock);
				1814	break;
				1815	}
				1816	default:
				1817	r = -ENOTTY;
				1818	}
				1819
				1820	return r;
				1821	}
				1822
				1823	static int kvm_s390_query_ap_config(u8 *config)
				1824	{
				1825	u32 fcn_code = 0x04000000UL;
				1826	u32 cc = 0;
				1827
				1828	memset(config, 0, 128);
				1829	asm volatile(
				1830	"lgr 0,%1\n"
				1831	"lgr 2,%2\n"
				1832	".long 0xb2af0000\n" /* PQAP(QCI) */
				1833	"0: ipm %0\n"
				1834	"srl %0,28\n"
				1835	"1:\n"
				1836	EX_TABLE(0b, 1b)
				1837	: "+r" (cc)
				1838	: "r" (fcn_code), "r" (config)
				1839	: "cc", "0", "2", "memory"
				1840	);
				1841
				1842	return cc;
				1843	}
				1844
				1845	static int kvm_s390_apxa_installed(void)
				1846	{
				1847	u8 config[128];
				1848	int cc;
				1849
				1850	if (test_facility(12)) {
				1851	cc = kvm_s390_query_ap_config(config);
				1852
				1853	if (cc)
				1854	pr_err("PQAP(QCI) failed with cc=%d", cc);
				1855	else
				1856	return config[0] & 0x40;
				1857	}
				1858
				1859	return 0;
				1860	}
				1861
				1862	static void kvm_s390_set_crycb_format(struct kvm *kvm)
				1863	{
				1864	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
				1865
				1866	if (kvm_s390_apxa_installed())
				1867	kvm->arch.crypto.crycbd \|= CRYCB_FORMAT2;
				1868	else
				1869	kvm->arch.crypto.crycbd \|= CRYCB_FORMAT1;
				1870	}
				1871
				1872	static u64 kvm_s390_get_initial_cpuid(void)
				1873	{
				1874	struct cpuid cpuid;
				1875
				1876	get_cpu_id(&cpuid);
				1877	cpuid.version = 0xff;
				1878	return ((u64 ) &cpuid);
				1879	}
				1880
				1881	static void kvm_s390_crypto_init(struct kvm *kvm)
				1882	{
				1883	if (!test_kvm_facility(kvm, 76))
				1884	return;
				1885
				1886	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
				1887	kvm_s390_set_crycb_format(kvm);
				1888
				1889	/* Enable AES/DEA protected key functions by default */
				1890	kvm->arch.crypto.aes_kw = 1;
				1891	kvm->arch.crypto.dea_kw = 1;
				1892	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
				1893	sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
				1894	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
				1895	sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
				1896	}
				1897
				1898	static void sca_dispose(struct kvm *kvm)
				1899	{
				1900	if (kvm->arch.use_esca)
				1901	free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
				1902	else
				1903	free_page((unsigned long)(kvm->arch.sca));
				1904	kvm->arch.sca = NULL;
				1905	}
				1906
				1907	int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
				1908	{
				1909	gfp_t alloc_flags = GFP_KERNEL;
				1910	int i, rc;
				1911	char debug_name[16];
				1912	static unsigned long sca_offset;
				1913
				1914	rc = -EINVAL;
				1915	#ifdef CONFIG_KVM_S390_UCONTROL
				1916	if (type & ~KVM_VM_S390_UCONTROL)
				1917	goto out_err;
				1918	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
				1919	goto out_err;
				1920	#else
				1921	if (type)
				1922	goto out_err;
				1923	#endif
				1924
				1925	rc = s390_enable_sie();
				1926	if (rc)
				1927	goto out_err;
				1928
				1929	rc = -ENOMEM;
				1930
				1931	ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
				1932
				1933	kvm->arch.use_esca = 0; /* start with basic SCA */
				1934	if (!sclp.has_64bscao)
				1935	alloc_flags \|= GFP_DMA;
				1936	rwlock_init(&kvm->arch.sca_lock);
				1937	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
				1938	if (!kvm->arch.sca)
				1939	goto out_err;
				1940	mutex_lock(&kvm_lock);
				1941	sca_offset += 16;
				1942	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
				1943	sca_offset = 0;
				1944	kvm->arch.sca = (struct bsca_block *)
				1945	((char *) kvm->arch.sca + sca_offset);
				1946	mutex_unlock(&kvm_lock);
				1947
				1948	sprintf(debug_name, "kvm-%u", current->pid);
				1949
				1950	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
				1951	if (!kvm->arch.dbf)
				1952	goto out_err;
				1953
				1954	kvm->arch.sie_page2 =
				1955	(struct sie_page2 *) get_zeroed_page(GFP_KERNEL \| GFP_DMA);
				1956	if (!kvm->arch.sie_page2)
				1957	goto out_err;
				1958
				1959	/* Populate the facility mask initially. */
				1960	memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
				1961	sizeof(S390_lowcore.stfle_fac_list));
				1962	for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
				1963	if (i < kvm_s390_fac_list_mask_size())
				1964	kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
				1965	else
				1966	kvm->arch.model.fac_mask[i] = 0UL;
				1967	}
				1968
				1969	/* Populate the facility list initially. */
				1970	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
				1971	memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
				1972	S390_ARCH_FAC_LIST_SIZE_BYTE);
				1973
				1974	/* we are always in czam mode - even on pre z14 machines */
				1975	set_kvm_facility(kvm->arch.model.fac_mask, 138);
				1976	set_kvm_facility(kvm->arch.model.fac_list, 138);
				1977	/* we emulate STHYI in kvm */
				1978	set_kvm_facility(kvm->arch.model.fac_mask, 74);
				1979	set_kvm_facility(kvm->arch.model.fac_list, 74);
				1980	if (MACHINE_HAS_TLB_GUEST) {
				1981	set_kvm_facility(kvm->arch.model.fac_mask, 147);
				1982	set_kvm_facility(kvm->arch.model.fac_list, 147);
				1983	}
				1984
				1985	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
				1986	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
				1987
				1988	kvm_s390_crypto_init(kvm);
				1989
				1990	mutex_init(&kvm->arch.float_int.ais_lock);
				1991	kvm->arch.float_int.simm = 0;
				1992	kvm->arch.float_int.nimm = 0;
				1993	spin_lock_init(&kvm->arch.float_int.lock);
				1994	for (i = 0; i < FIRQ_LIST_COUNT; i++)
				1995	INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
				1996	init_waitqueue_head(&kvm->arch.ipte_wq);
				1997	mutex_init(&kvm->arch.ipte_mutex);
				1998
				1999	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
				2000	VM_EVENT(kvm, 3, "vm created with type %lu", type);
				2001
				2002	if (type & KVM_VM_S390_UCONTROL) {
				2003	kvm->arch.gmap = NULL;
				2004	kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
				2005	} else {
				2006	if (sclp.hamax == U64_MAX)
				2007	kvm->arch.mem_limit = TASK_SIZE_MAX;
				2008	else
				2009	kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
				2010	sclp.hamax + 1);
				2011	kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
				2012	if (!kvm->arch.gmap)
				2013	goto out_err;
				2014	kvm->arch.gmap->private = kvm;
				2015	kvm->arch.gmap->pfault_enabled = 0;
				2016	}
				2017
				2018	kvm->arch.css_support = 0;
				2019	kvm->arch.use_irqchip = 0;
				2020	kvm->arch.epoch = 0;
				2021
				2022	spin_lock_init(&kvm->arch.start_stop_lock);
				2023	kvm_s390_vsie_init(kvm);
				2024	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
				2025
				2026	return 0;
				2027	out_err:
				2028	free_page((unsigned long)kvm->arch.sie_page2);
				2029	debug_unregister(kvm->arch.dbf);
				2030	sca_dispose(kvm);
				2031	KVM_EVENT(3, "creation of vm failed: %d", rc);
				2032	return rc;
				2033	}
				2034
				2035	bool kvm_arch_has_vcpu_debugfs(void)
				2036	{
				2037	return false;
				2038	}
				2039
				2040	int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
				2041	{
				2042	return 0;
				2043	}
				2044
				2045	void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
				2046	{
				2047	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
				2048	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
				2049	kvm_s390_clear_local_irqs(vcpu);
				2050	kvm_clear_async_pf_completion_queue(vcpu);
				2051	if (!kvm_is_ucontrol(vcpu->kvm))
				2052	sca_del_vcpu(vcpu);
				2053
				2054	if (kvm_is_ucontrol(vcpu->kvm))
				2055	gmap_remove(vcpu->arch.gmap);
				2056
				2057	if (vcpu->kvm->arch.use_cmma)
				2058	kvm_s390_vcpu_unsetup_cmma(vcpu);
				2059	free_page((unsigned long)(vcpu->arch.sie_block));
				2060
				2061	kvm_vcpu_uninit(vcpu);
				2062	kmem_cache_free(kvm_vcpu_cache, vcpu);
				2063	}
				2064
				2065	static void kvm_free_vcpus(struct kvm *kvm)
				2066	{
				2067	unsigned int i;
				2068	struct kvm_vcpu *vcpu;
				2069
				2070	kvm_for_each_vcpu(i, vcpu, kvm)
				2071	kvm_arch_vcpu_destroy(vcpu);
				2072
				2073	mutex_lock(&kvm->lock);
				2074	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
				2075	kvm->vcpus[i] = NULL;
				2076
				2077	atomic_set(&kvm->online_vcpus, 0);
				2078	mutex_unlock(&kvm->lock);
				2079	}
				2080
				2081	void kvm_arch_destroy_vm(struct kvm *kvm)
				2082	{
				2083	kvm_free_vcpus(kvm);
				2084	sca_dispose(kvm);
				2085	debug_unregister(kvm->arch.dbf);
				2086	free_page((unsigned long)kvm->arch.sie_page2);
				2087	if (!kvm_is_ucontrol(kvm))
				2088	gmap_remove(kvm->arch.gmap);
				2089	kvm_s390_destroy_adapters(kvm);
				2090	kvm_s390_clear_float_irqs(kvm);
				2091	kvm_s390_vsie_destroy(kvm);
				2092	if (kvm->arch.migration_state) {
				2093	vfree(kvm->arch.migration_state->pgste_bitmap);
				2094	kfree(kvm->arch.migration_state);
				2095	}
				2096	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
				2097	}
				2098
				2099	/* Section: vcpu related */
				2100	static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
				2101	{
				2102	vcpu->arch.gmap = gmap_create(current->mm, -1UL);
				2103	if (!vcpu->arch.gmap)
				2104	return -ENOMEM;
				2105	vcpu->arch.gmap->private = vcpu->kvm;
				2106
				2107	return 0;
				2108	}
				2109
				2110	static void sca_del_vcpu(struct kvm_vcpu *vcpu)
				2111	{
				2112	if (!kvm_s390_use_sca_entries())
				2113	return;
				2114	read_lock(&vcpu->kvm->arch.sca_lock);
				2115	if (vcpu->kvm->arch.use_esca) {
				2116	struct esca_block *sca = vcpu->kvm->arch.sca;
				2117
				2118	clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
				2119	sca->cpu[vcpu->vcpu_id].sda = 0;
				2120	} else {
				2121	struct bsca_block *sca = vcpu->kvm->arch.sca;
				2122
				2123	clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
				2124	sca->cpu[vcpu->vcpu_id].sda = 0;
				2125	}
				2126	read_unlock(&vcpu->kvm->arch.sca_lock);
				2127	}
				2128
				2129	static void sca_add_vcpu(struct kvm_vcpu *vcpu)
				2130	{
				2131	if (!kvm_s390_use_sca_entries()) {
				2132	struct bsca_block *sca = vcpu->kvm->arch.sca;
				2133
				2134	/* we still need the basic sca for the ipte control */
				2135	vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
				2136	vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
				2137	return;
				2138	}
				2139	read_lock(&vcpu->kvm->arch.sca_lock);
				2140	if (vcpu->kvm->arch.use_esca) {
				2141	struct esca_block *sca = vcpu->kvm->arch.sca;
				2142
				2143	sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
				2144	vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
				2145	vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
				2146	vcpu->arch.sie_block->ecb2 \|= ECB2_ESCA;
				2147	set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
				2148	} else {
				2149	struct bsca_block *sca = vcpu->kvm->arch.sca;
				2150
				2151	sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
				2152	vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
				2153	vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
				2154	set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
				2155	}
				2156	read_unlock(&vcpu->kvm->arch.sca_lock);
				2157	}
				2158
				2159	/* Basic SCA to Extended SCA data copy routines */
				2160	static inline void sca_copy_entry(struct esca_entry d, struct bsca_entry s)
				2161	{
				2162	d->sda = s->sda;
				2163	d->sigp_ctrl.c = s->sigp_ctrl.c;
				2164	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
				2165	}
				2166
				2167	static void sca_copy_b_to_e(struct esca_block d, struct bsca_block s)
				2168	{
				2169	int i;
				2170
				2171	d->ipte_control = s->ipte_control;
				2172	d->mcn[0] = s->mcn;
				2173	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
				2174	sca_copy_entry(&d->cpu[i], &s->cpu[i]);
				2175	}
				2176
				2177	static int sca_switch_to_extended(struct kvm *kvm)
				2178	{
				2179	struct bsca_block *old_sca = kvm->arch.sca;
				2180	struct esca_block *new_sca;
				2181	struct kvm_vcpu *vcpu;
				2182	unsigned int vcpu_idx;
				2183	u32 scaol, scaoh;
				2184
				2185	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL\|__GFP_ZERO);
				2186	if (!new_sca)
				2187	return -ENOMEM;
				2188
				2189	scaoh = (u32)((u64)(new_sca) >> 32);
				2190	scaol = (u32)(u64)(new_sca) & ~0x3fU;
				2191
				2192	kvm_s390_vcpu_block_all(kvm);
				2193	write_lock(&kvm->arch.sca_lock);
				2194
				2195	sca_copy_b_to_e(new_sca, old_sca);
				2196
				2197	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
				2198	vcpu->arch.sie_block->scaoh = scaoh;
				2199	vcpu->arch.sie_block->scaol = scaol;
				2200	vcpu->arch.sie_block->ecb2 \|= ECB2_ESCA;
				2201	}
				2202	kvm->arch.sca = new_sca;
				2203	kvm->arch.use_esca = 1;
				2204
				2205	write_unlock(&kvm->arch.sca_lock);
				2206	kvm_s390_vcpu_unblock_all(kvm);
				2207
				2208	free_page((unsigned long)old_sca);
				2209
				2210	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
				2211	old_sca, kvm->arch.sca);
				2212	return 0;
				2213	}
				2214
				2215	static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
				2216	{
				2217	int rc;
				2218
				2219	if (!kvm_s390_use_sca_entries()) {
				2220	if (id < KVM_MAX_VCPUS)
				2221	return true;
				2222	return false;
				2223	}
				2224	if (id < KVM_S390_BSCA_CPU_SLOTS)
				2225	return true;
				2226	if (!sclp.has_esca \|\| !sclp.has_64bscao)
				2227	return false;
				2228
				2229	mutex_lock(&kvm->lock);
				2230	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
				2231	mutex_unlock(&kvm->lock);
				2232
				2233	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
				2234	}
				2235
				2236	int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
				2237	{
				2238	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
				2239	kvm_clear_async_pf_completion_queue(vcpu);
				2240	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX \|
				2241	KVM_SYNC_GPRS \|
				2242	KVM_SYNC_ACRS \|
				2243	KVM_SYNC_CRS \|
				2244	KVM_SYNC_ARCH0 \|
				2245	KVM_SYNC_PFAULT;
				2246	kvm_s390_set_prefix(vcpu, 0);
				2247	if (test_kvm_facility(vcpu->kvm, 64))
				2248	vcpu->run->kvm_valid_regs \|= KVM_SYNC_RICCB;
				2249	if (test_kvm_facility(vcpu->kvm, 82))
				2250	vcpu->run->kvm_valid_regs \|= KVM_SYNC_BPBC;
				2251	if (test_kvm_facility(vcpu->kvm, 133))
				2252	vcpu->run->kvm_valid_regs \|= KVM_SYNC_GSCB;
				2253	/* fprs can be synchronized via vrs, even if the guest has no vx. With
				2254	* MACHINE_HAS_VX, (load\|store)_fpu_regs() will work with vrs format.
				2255	*/
				2256	if (MACHINE_HAS_VX)
				2257	vcpu->run->kvm_valid_regs \|= KVM_SYNC_VRS;
				2258	else
				2259	vcpu->run->kvm_valid_regs \|= KVM_SYNC_FPRS;
				2260
				2261	if (kvm_is_ucontrol(vcpu->kvm))
				2262	return __kvm_ucontrol_vcpu_init(vcpu);
				2263
				2264	return 0;
				2265	}
				2266
				2267	/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
				2268	static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
				2269	{
				2270	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
				2271	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
				2272	vcpu->arch.cputm_start = get_tod_clock_fast();
				2273	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
				2274	}
				2275
				2276	/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
				2277	static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
				2278	{
				2279	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
				2280	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
				2281	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
				2282	vcpu->arch.cputm_start = 0;
				2283	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
				2284	}
				2285
				2286	/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
				2287	static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
				2288	{
				2289	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
				2290	vcpu->arch.cputm_enabled = true;
				2291	__start_cpu_timer_accounting(vcpu);
				2292	}
				2293
				2294	/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
				2295	static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
				2296	{
				2297	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
				2298	__stop_cpu_timer_accounting(vcpu);
				2299	vcpu->arch.cputm_enabled = false;
				2300	}
				2301
				2302	static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
				2303	{
				2304	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
				2305	__enable_cpu_timer_accounting(vcpu);
				2306	preempt_enable();
				2307	}
				2308
				2309	static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
				2310	{
				2311	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
				2312	__disable_cpu_timer_accounting(vcpu);
				2313	preempt_enable();
				2314	}
				2315
				2316	/* set the cpu timer - may only be called from the VCPU thread itself */
				2317	void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
				2318	{
				2319	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
				2320	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
				2321	if (vcpu->arch.cputm_enabled)
				2322	vcpu->arch.cputm_start = get_tod_clock_fast();
				2323	vcpu->arch.sie_block->cputm = cputm;
				2324	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
				2325	preempt_enable();
				2326	}
				2327
				2328	/* update and get the cpu timer - can also be called from other VCPU threads */
				2329	__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
				2330	{
				2331	unsigned int seq;
				2332	__u64 value;
				2333
				2334	if (unlikely(!vcpu->arch.cputm_enabled))
				2335	return vcpu->arch.sie_block->cputm;
				2336
				2337	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
				2338	do {
				2339	seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
				2340	/*
				2341	* If the writer would ever execute a read in the critical
				2342	* section, e.g. in irq context, we have a deadlock.
				2343	*/
				2344	WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
				2345	value = vcpu->arch.sie_block->cputm;
				2346	/* if cputm_start is 0, accounting is being started/stopped */
				2347	if (likely(vcpu->arch.cputm_start))
				2348	value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
				2349	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
				2350	preempt_enable();
				2351	return value;
				2352	}
				2353
				2354	void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
				2355	{
				2356
				2357	gmap_enable(vcpu->arch.enabled_gmap);
				2358	atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
				2359	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
				2360	__start_cpu_timer_accounting(vcpu);
				2361	vcpu->cpu = cpu;
				2362	}
				2363
				2364	void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
				2365	{
				2366	vcpu->cpu = -1;
				2367	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
				2368	__stop_cpu_timer_accounting(vcpu);
				2369	atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
				2370	vcpu->arch.enabled_gmap = gmap_get_enabled();
				2371	gmap_disable(vcpu->arch.enabled_gmap);
				2372
				2373	}
				2374
				2375	static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
				2376	{
				2377	/* this equals initial cpu reset in pop, but we don't switch to ESA */
				2378	vcpu->arch.sie_block->gpsw.mask = 0UL;
				2379	vcpu->arch.sie_block->gpsw.addr = 0UL;
				2380	kvm_s390_set_prefix(vcpu, 0);
				2381	kvm_s390_set_cpu_timer(vcpu, 0);
				2382	vcpu->arch.sie_block->ckc = 0UL;
				2383	vcpu->arch.sie_block->todpr = 0;
				2384	memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
				2385	vcpu->arch.sie_block->gcr[0] = 0xE0UL;
				2386	vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
				2387	vcpu->run->s.regs.fpc = 0;
				2388	vcpu->arch.sie_block->gbea = 1;
				2389	vcpu->arch.sie_block->pp = 0;
				2390	vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
				2391	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
				2392	kvm_clear_async_pf_completion_queue(vcpu);
				2393	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
				2394	kvm_s390_vcpu_stop(vcpu);
				2395	kvm_s390_clear_local_irqs(vcpu);
				2396	}
				2397
				2398	void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
				2399	{
				2400	mutex_lock(&vcpu->kvm->lock);
				2401	preempt_disable();
				2402	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
				2403	vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
				2404	preempt_enable();
				2405	mutex_unlock(&vcpu->kvm->lock);
				2406	if (!kvm_is_ucontrol(vcpu->kvm)) {
				2407	vcpu->arch.gmap = vcpu->kvm->arch.gmap;
				2408	sca_add_vcpu(vcpu);
				2409	}
				2410	if (test_kvm_facility(vcpu->kvm, 74) \|\| vcpu->kvm->arch.user_instr0)
				2411	vcpu->arch.sie_block->ictl \|= ICTL_OPEREXC;
				2412	/* make vcpu_load load the right gmap on the first trigger */
				2413	vcpu->arch.enabled_gmap = vcpu->arch.gmap;
				2414	}
				2415
				2416	static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
				2417	{
				2418	if (!test_kvm_facility(vcpu->kvm, 76))
				2419	return;
				2420
				2421	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES \| ECB3_DEA);
				2422
				2423	if (vcpu->kvm->arch.crypto.aes_kw)
				2424	vcpu->arch.sie_block->ecb3 \|= ECB3_AES;
				2425	if (vcpu->kvm->arch.crypto.dea_kw)
				2426	vcpu->arch.sie_block->ecb3 \|= ECB3_DEA;
				2427
				2428	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
				2429	}
				2430
				2431	void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
				2432	{
				2433	free_page(vcpu->arch.sie_block->cbrlo);
				2434	vcpu->arch.sie_block->cbrlo = 0;
				2435	}
				2436
				2437	int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
				2438	{
				2439	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
				2440	if (!vcpu->arch.sie_block->cbrlo)
				2441	return -ENOMEM;
				2442
				2443	vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
				2444	return 0;
				2445	}
				2446
				2447	static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
				2448	{
				2449	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
				2450
				2451	vcpu->arch.sie_block->ibc = model->ibc;
				2452	if (test_kvm_facility(vcpu->kvm, 7))
				2453	vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
				2454	}
				2455
				2456	int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
				2457	{
				2458	int rc = 0;
				2459
				2460	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH \|
				2461	CPUSTAT_SM \|
				2462	CPUSTAT_STOPPED);
				2463
				2464	if (test_kvm_facility(vcpu->kvm, 78))
				2465	atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
				2466	else if (test_kvm_facility(vcpu->kvm, 8))
				2467	atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
				2468
				2469	kvm_s390_vcpu_setup_model(vcpu);
				2470
				2471	/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
				2472	if (MACHINE_HAS_ESOP)
				2473	vcpu->arch.sie_block->ecb \|= ECB_HOSTPROTINT;
				2474	if (test_kvm_facility(vcpu->kvm, 9))
				2475	vcpu->arch.sie_block->ecb \|= ECB_SRSI;
				2476	if (test_kvm_facility(vcpu->kvm, 73))
				2477	vcpu->arch.sie_block->ecb \|= ECB_TE;
				2478
				2479	if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
				2480	vcpu->arch.sie_block->ecb2 \|= ECB2_PFMFI;
				2481	if (test_kvm_facility(vcpu->kvm, 130))
				2482	vcpu->arch.sie_block->ecb2 \|= ECB2_IEP;
				2483	vcpu->arch.sie_block->eca = ECA_MVPGI \| ECA_PROTEXCI;
				2484	if (sclp.has_cei)
				2485	vcpu->arch.sie_block->eca \|= ECA_CEI;
				2486	if (sclp.has_ib)
				2487	vcpu->arch.sie_block->eca \|= ECA_IB;
				2488	if (sclp.has_siif)
				2489	vcpu->arch.sie_block->eca \|= ECA_SII;
				2490	if (sclp.has_sigpif)
				2491	vcpu->arch.sie_block->eca \|= ECA_SIGPI;
				2492	if (test_kvm_facility(vcpu->kvm, 129)) {
				2493	vcpu->arch.sie_block->eca \|= ECA_VX;
				2494	vcpu->arch.sie_block->ecd \|= ECD_HOSTREGMGMT;
				2495	}
				2496	if (test_kvm_facility(vcpu->kvm, 139))
				2497	vcpu->arch.sie_block->ecd \|= ECD_MEF;
				2498
				2499	vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
				2500	\| SDNXC;
				2501	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
				2502
				2503	if (sclp.has_kss)
				2504	atomic_or(CPUSTAT_KSS, &vcpu->arch.sie_block->cpuflags);
				2505	else
				2506	vcpu->arch.sie_block->ictl \|= ICTL_ISKE \| ICTL_SSKE \| ICTL_RRBE;
				2507
				2508	if (vcpu->kvm->arch.use_cmma) {
				2509	rc = kvm_s390_vcpu_setup_cmma(vcpu);
				2510	if (rc)
				2511	return rc;
				2512	}
				2513	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
				2514	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
				2515
				2516	kvm_s390_vcpu_crypto_setup(vcpu);
				2517
				2518	return rc;
				2519	}
				2520
				2521	struct kvm_vcpu kvm_arch_vcpu_create(struct kvm kvm,
				2522	unsigned int id)
				2523	{
				2524	struct kvm_vcpu *vcpu;
				2525	struct sie_page *sie_page;
				2526	int rc = -EINVAL;
				2527
				2528	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
				2529	goto out;
				2530
				2531	rc = -ENOMEM;
				2532
				2533	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
				2534	if (!vcpu)
				2535	goto out;
				2536
				2537	BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
				2538	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
				2539	if (!sie_page)
				2540	goto out_free_cpu;
				2541
				2542	vcpu->arch.sie_block = &sie_page->sie_block;
				2543	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
				2544
				2545	/* the real guest size will always be smaller than msl */
				2546	vcpu->arch.sie_block->mso = 0;
				2547	vcpu->arch.sie_block->msl = sclp.hamax;
				2548
				2549	vcpu->arch.sie_block->icpua = id;
				2550	spin_lock_init(&vcpu->arch.local_int.lock);
				2551	vcpu->arch.local_int.float_int = &kvm->arch.float_int;
				2552	vcpu->arch.local_int.wq = &vcpu->wq;
				2553	vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
				2554	seqcount_init(&vcpu->arch.cputm_seqcount);
				2555
				2556	rc = kvm_vcpu_init(vcpu, kvm, id);
				2557	if (rc)
				2558	goto out_free_sie_block;
				2559	VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
				2560	vcpu->arch.sie_block);
				2561	trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
				2562
				2563	return vcpu;
				2564	out_free_sie_block:
				2565	free_page((unsigned long)(vcpu->arch.sie_block));
				2566	out_free_cpu:
				2567	kmem_cache_free(kvm_vcpu_cache, vcpu);
				2568	out:
				2569	return ERR_PTR(rc);
				2570	}
				2571
				2572	int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
				2573	{
				2574	return kvm_s390_vcpu_has_irq(vcpu, 0);
				2575	}
				2576
				2577	bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
				2578	{
				2579	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
				2580	}
				2581
				2582	void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
				2583	{
				2584	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
				2585	exit_sie(vcpu);
				2586	}
				2587
				2588	void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
				2589	{
				2590	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
				2591	}
				2592
				2593	static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
				2594	{
				2595	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
				2596	exit_sie(vcpu);
				2597	}
				2598
				2599	static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
				2600	{
				2601	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
				2602	}
				2603
				2604	/*
				2605	* Kick a guest cpu out of SIE and wait until SIE is not running.
				2606	* If the CPU is not running (e.g. waiting as idle) the function will
				2607	* return immediately. */
				2608	void exit_sie(struct kvm_vcpu *vcpu)
				2609	{
				2610	atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
				2611	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
				2612	cpu_relax();
				2613	}
				2614
				2615	/* Kick a guest cpu out of SIE to process a request synchronously */
				2616	void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
				2617	{
				2618	kvm_make_request(req, vcpu);
				2619	kvm_s390_vcpu_request(vcpu);
				2620	}
				2621
				2622	static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
				2623	unsigned long end)
				2624	{
				2625	struct kvm *kvm = gmap->private;
				2626	struct kvm_vcpu *vcpu;
				2627	unsigned long prefix;
				2628	int i;
				2629
				2630	if (gmap_is_shadow(gmap))
				2631	return;
				2632	if (start >= 1UL << 31)
				2633	/* We are only interested in prefix pages */
				2634	return;
				2635	kvm_for_each_vcpu(i, vcpu, kvm) {
				2636	/* match against both prefix pages */
				2637	prefix = kvm_s390_get_prefix(vcpu);
				2638	if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
				2639	VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
				2640	start, end);
				2641	kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
				2642	}
				2643	}
				2644	}
				2645
				2646	int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
				2647	{
				2648	/* kvm common code refers to this, but never calls it */
				2649	BUG();
				2650	return 0;
				2651	}
				2652
				2653	static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
				2654	struct kvm_one_reg *reg)
				2655	{
				2656	int r = -EINVAL;
				2657
				2658	switch (reg->id) {
				2659	case KVM_REG_S390_TODPR:
				2660	r = put_user(vcpu->arch.sie_block->todpr,
				2661	(u32 __user *)reg->addr);
				2662	break;
				2663	case KVM_REG_S390_EPOCHDIFF:
				2664	r = put_user(vcpu->arch.sie_block->epoch,
				2665	(u64 __user *)reg->addr);
				2666	break;
				2667	case KVM_REG_S390_CPU_TIMER:
				2668	r = put_user(kvm_s390_get_cpu_timer(vcpu),
				2669	(u64 __user *)reg->addr);
				2670	break;
				2671	case KVM_REG_S390_CLOCK_COMP:
				2672	r = put_user(vcpu->arch.sie_block->ckc,
				2673	(u64 __user *)reg->addr);
				2674	break;
				2675	case KVM_REG_S390_PFTOKEN:
				2676	r = put_user(vcpu->arch.pfault_token,
				2677	(u64 __user *)reg->addr);
				2678	break;
				2679	case KVM_REG_S390_PFCOMPARE:
				2680	r = put_user(vcpu->arch.pfault_compare,
				2681	(u64 __user *)reg->addr);
				2682	break;
				2683	case KVM_REG_S390_PFSELECT:
				2684	r = put_user(vcpu->arch.pfault_select,
				2685	(u64 __user *)reg->addr);
				2686	break;
				2687	case KVM_REG_S390_PP:
				2688	r = put_user(vcpu->arch.sie_block->pp,
				2689	(u64 __user *)reg->addr);
				2690	break;
				2691	case KVM_REG_S390_GBEA:
				2692	r = put_user(vcpu->arch.sie_block->gbea,
				2693	(u64 __user *)reg->addr);
				2694	break;
				2695	default:
				2696	break;
				2697	}
				2698
				2699	return r;
				2700	}
				2701
				2702	static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
				2703	struct kvm_one_reg *reg)
				2704	{
				2705	int r = -EINVAL;
				2706	__u64 val;
				2707
				2708	switch (reg->id) {
				2709	case KVM_REG_S390_TODPR:
				2710	r = get_user(vcpu->arch.sie_block->todpr,
				2711	(u32 __user *)reg->addr);
				2712	break;
				2713	case KVM_REG_S390_EPOCHDIFF:
				2714	r = get_user(vcpu->arch.sie_block->epoch,
				2715	(u64 __user *)reg->addr);
				2716	break;
				2717	case KVM_REG_S390_CPU_TIMER:
				2718	r = get_user(val, (u64 __user *)reg->addr);
				2719	if (!r)
				2720	kvm_s390_set_cpu_timer(vcpu, val);
				2721	break;
				2722	case KVM_REG_S390_CLOCK_COMP:
				2723	r = get_user(vcpu->arch.sie_block->ckc,
				2724	(u64 __user *)reg->addr);
				2725	break;
				2726	case KVM_REG_S390_PFTOKEN:
				2727	r = get_user(vcpu->arch.pfault_token,
				2728	(u64 __user *)reg->addr);
				2729	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
				2730	kvm_clear_async_pf_completion_queue(vcpu);
				2731	break;
				2732	case KVM_REG_S390_PFCOMPARE:
				2733	r = get_user(vcpu->arch.pfault_compare,
				2734	(u64 __user *)reg->addr);
				2735	break;
				2736	case KVM_REG_S390_PFSELECT:
				2737	r = get_user(vcpu->arch.pfault_select,
				2738	(u64 __user *)reg->addr);
				2739	break;
				2740	case KVM_REG_S390_PP:
				2741	r = get_user(vcpu->arch.sie_block->pp,
				2742	(u64 __user *)reg->addr);
				2743	break;
				2744	case KVM_REG_S390_GBEA:
				2745	r = get_user(vcpu->arch.sie_block->gbea,
				2746	(u64 __user *)reg->addr);
				2747	break;
				2748	default:
				2749	break;
				2750	}
				2751
				2752	return r;
				2753	}
				2754
				2755	static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
				2756	{
				2757	kvm_s390_vcpu_initial_reset(vcpu);
				2758	return 0;
				2759	}
				2760
				2761	int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu vcpu, struct kvm_regs regs)
				2762	{
				2763	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
				2764	return 0;
				2765	}
				2766
				2767	int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu vcpu, struct kvm_regs regs)
				2768	{
				2769	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
				2770	return 0;
				2771	}
				2772
				2773	int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
				2774	struct kvm_sregs *sregs)
				2775	{
				2776	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
				2777	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
				2778	return 0;
				2779	}
				2780
				2781	int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
				2782	struct kvm_sregs *sregs)
				2783	{
				2784	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
				2785	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
				2786	return 0;
				2787	}
				2788
				2789	int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu vcpu, struct kvm_fpu fpu)
				2790	{
				2791	if (test_fp_ctl(fpu->fpc))
				2792	return -EINVAL;
				2793	vcpu->run->s.regs.fpc = fpu->fpc;
				2794	if (MACHINE_HAS_VX)
				2795	convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
				2796	(freg_t *) fpu->fprs);
				2797	else
				2798	memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
				2799	return 0;
				2800	}
				2801
				2802	int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu vcpu, struct kvm_fpu fpu)
				2803	{
				2804	/* make sure we have the latest values */
				2805	save_fpu_regs();
				2806	if (MACHINE_HAS_VX)
				2807	convert_vx_to_fp((freg_t *) fpu->fprs,
				2808	(__vector128 *) vcpu->run->s.regs.vrs);
				2809	else
				2810	memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
				2811	fpu->fpc = vcpu->run->s.regs.fpc;
				2812	return 0;
				2813	}
				2814
				2815	static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
				2816	{
				2817	int rc = 0;
				2818
				2819	if (!is_vcpu_stopped(vcpu))
				2820	rc = -EBUSY;
				2821	else {
				2822	vcpu->run->psw_mask = psw.mask;
				2823	vcpu->run->psw_addr = psw.addr;
				2824	}
				2825	return rc;
				2826	}
				2827
				2828	int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
				2829	struct kvm_translation *tr)
				2830	{
				2831	return -EINVAL; /* not implemented yet */
				2832	}
				2833
				2834	#define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP \| \
				2835	KVM_GUESTDBG_USE_HW_BP \| \
				2836	KVM_GUESTDBG_ENABLE)
				2837
				2838	int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
				2839	struct kvm_guest_debug *dbg)
				2840	{
				2841	int rc = 0;
				2842
				2843	vcpu->guest_debug = 0;
				2844	kvm_s390_clear_bp_data(vcpu);
				2845
				2846	if (dbg->control & ~VALID_GUESTDBG_FLAGS)
				2847	return -EINVAL;
				2848	if (!sclp.has_gpere)
				2849	return -EINVAL;
				2850
				2851	if (dbg->control & KVM_GUESTDBG_ENABLE) {
				2852	vcpu->guest_debug = dbg->control;
				2853	/* enforce guest PER */
				2854	atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
				2855
				2856	if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
				2857	rc = kvm_s390_import_bp_data(vcpu, dbg);
				2858	} else {
				2859	atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
				2860	vcpu->arch.guestdbg.last_bp = 0;
				2861	}
				2862
				2863	if (rc) {
				2864	vcpu->guest_debug = 0;
				2865	kvm_s390_clear_bp_data(vcpu);
				2866	atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
				2867	}
				2868
				2869	return rc;
				2870	}
				2871
				2872	int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
				2873	struct kvm_mp_state *mp_state)
				2874	{
				2875	/* CHECK_STOP and LOAD are not supported yet */
				2876	return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
				2877	KVM_MP_STATE_OPERATING;
				2878	}
				2879
				2880	int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
				2881	struct kvm_mp_state *mp_state)
				2882	{
				2883	int rc = 0;
				2884
				2885	/* user space knows about this interface - let it control the state */
				2886	vcpu->kvm->arch.user_cpu_state_ctrl = 1;
				2887
				2888	switch (mp_state->mp_state) {
				2889	case KVM_MP_STATE_STOPPED:
				2890	kvm_s390_vcpu_stop(vcpu);
				2891	break;
				2892	case KVM_MP_STATE_OPERATING:
				2893	kvm_s390_vcpu_start(vcpu);
				2894	break;
				2895	case KVM_MP_STATE_LOAD:
				2896	case KVM_MP_STATE_CHECK_STOP:
				2897	/* fall through - CHECK_STOP and LOAD are not supported yet */
				2898	default:
				2899	rc = -ENXIO;
				2900	}
				2901
				2902	return rc;
				2903	}
				2904
				2905	static bool ibs_enabled(struct kvm_vcpu *vcpu)
				2906	{
				2907	return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
				2908	}
				2909
				2910	static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
				2911	{
				2912	retry:
				2913	kvm_s390_vcpu_request_handled(vcpu);
				2914	if (!kvm_request_pending(vcpu))
				2915	return 0;
				2916	/*
				2917	* We use MMU_RELOAD just to re-arm the ipte notifier for the
				2918	* guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
				2919	* This ensures that the ipte instruction for this request has
				2920	* already finished. We might race against a second unmapper that
				2921	* wants to set the blocking bit. Lets just retry the request loop.
				2922	*/
				2923	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
				2924	int rc;
				2925	rc = gmap_mprotect_notify(vcpu->arch.gmap,
				2926	kvm_s390_get_prefix(vcpu),
				2927	PAGE_SIZE * 2, PROT_WRITE);
				2928	if (rc) {
				2929	kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
				2930	return rc;
				2931	}
				2932	goto retry;
				2933	}
				2934
				2935	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
				2936	vcpu->arch.sie_block->ihcpu = 0xffff;
				2937	goto retry;
				2938	}
				2939
				2940	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
				2941	if (!ibs_enabled(vcpu)) {
				2942	trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
				2943	atomic_or(CPUSTAT_IBS,
				2944	&vcpu->arch.sie_block->cpuflags);
				2945	}
				2946	goto retry;
				2947	}
				2948
				2949	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
				2950	if (ibs_enabled(vcpu)) {
				2951	trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
				2952	atomic_andnot(CPUSTAT_IBS,
				2953	&vcpu->arch.sie_block->cpuflags);
				2954	}
				2955	goto retry;
				2956	}
				2957
				2958	if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
				2959	vcpu->arch.sie_block->ictl \|= ICTL_OPEREXC;
				2960	goto retry;
				2961	}
				2962
				2963	if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
				2964	/*
				2965	* Disable CMMA virtualization; we will emulate the ESSA
				2966	* instruction manually, in order to provide additional
				2967	* functionalities needed for live migration.
				2968	*/
				2969	vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
				2970	goto retry;
				2971	}
				2972
				2973	if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
				2974	/*
				2975	* Re-enable CMMA virtualization if CMMA is available and
				2976	* was used.
				2977	*/
				2978	if ((vcpu->kvm->arch.use_cmma) &&
				2979	(vcpu->kvm->mm->context.use_cmma))
				2980	vcpu->arch.sie_block->ecb2 \|= ECB2_CMMA;
				2981	goto retry;
				2982	}
				2983
				2984	/* nothing to do, just clear the request */
				2985	kvm_clear_request(KVM_REQ_UNHALT, vcpu);
				2986
				2987	return 0;
				2988	}
				2989
				2990	void kvm_s390_set_tod_clock(struct kvm *kvm,
				2991	const struct kvm_s390_vm_tod_clock *gtod)
				2992	{
				2993	struct kvm_vcpu *vcpu;
				2994	struct kvm_s390_tod_clock_ext htod;
				2995	int i;
				2996
				2997	mutex_lock(&kvm->lock);
				2998	preempt_disable();
				2999
				3000	get_tod_clock_ext((char *)&htod);
				3001
				3002	kvm->arch.epoch = gtod->tod - htod.tod;
				3003	kvm->arch.epdx = 0;
				3004	if (test_kvm_facility(kvm, 139)) {
				3005	kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
				3006	if (kvm->arch.epoch > gtod->tod)
				3007	kvm->arch.epdx -= 1;
				3008	}
				3009
				3010	kvm_s390_vcpu_block_all(kvm);
				3011	kvm_for_each_vcpu(i, vcpu, kvm) {
				3012	vcpu->arch.sie_block->epoch = kvm->arch.epoch;
				3013	vcpu->arch.sie_block->epdx = kvm->arch.epdx;
				3014	}
				3015
				3016	kvm_s390_vcpu_unblock_all(kvm);
				3017	preempt_enable();
				3018	mutex_unlock(&kvm->lock);
				3019	}
				3020
				3021	/**
				3022	* kvm_arch_fault_in_page - fault-in guest page if necessary
				3023	* @vcpu: The corresponding virtual cpu
				3024	* @gpa: Guest physical address
				3025	* @writable: Whether the page should be writable or not
				3026	*
				3027	* Make sure that a guest page has been faulted-in on the host.
				3028	*
				3029	* Return: Zero on success, negative error code otherwise.
				3030	*/
				3031	long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
				3032	{
				3033	return gmap_fault(vcpu->arch.gmap, gpa,
				3034	writable ? FAULT_FLAG_WRITE : 0);
				3035	}
				3036
				3037	static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
				3038	unsigned long token)
				3039	{
				3040	struct kvm_s390_interrupt inti;
				3041	struct kvm_s390_irq irq;
				3042
				3043	if (start_token) {
				3044	irq.u.ext.ext_params2 = token;
				3045	irq.type = KVM_S390_INT_PFAULT_INIT;
				3046	WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
				3047	} else {
				3048	inti.type = KVM_S390_INT_PFAULT_DONE;
				3049	inti.parm64 = token;
				3050	WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
				3051	}
				3052	}
				3053
				3054	void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
				3055	struct kvm_async_pf *work)
				3056	{
				3057	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
				3058	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
				3059	}
				3060
				3061	void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
				3062	struct kvm_async_pf *work)
				3063	{
				3064	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
				3065	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
				3066	}
				3067
				3068	void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
				3069	struct kvm_async_pf *work)
				3070	{
				3071	/* s390 will always inject the page directly */
				3072	}
				3073
				3074	bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
				3075	{
				3076	/*
				3077	* s390 will always inject the page directly,
				3078	* but we still want check_async_completion to cleanup
				3079	*/
				3080	return true;
				3081	}
				3082
				3083	static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
				3084	{
				3085	hva_t hva;
				3086	struct kvm_arch_async_pf arch;
				3087	int rc;
				3088
				3089	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
				3090	return 0;
				3091	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
				3092	vcpu->arch.pfault_compare)
				3093	return 0;
				3094	if (psw_extint_disabled(vcpu))
				3095	return 0;
				3096	if (kvm_s390_vcpu_has_irq(vcpu, 0))
				3097	return 0;
				3098	if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
				3099	return 0;
				3100	if (!vcpu->arch.gmap->pfault_enabled)
				3101	return 0;
				3102
				3103	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
				3104	hva += current->thread.gmap_addr & ~PAGE_MASK;
				3105	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
				3106	return 0;
				3107
				3108	rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
				3109	return rc;
				3110	}
				3111
				3112	static int vcpu_pre_run(struct kvm_vcpu *vcpu)
				3113	{
				3114	int rc, cpuflags;
				3115
				3116	/*
				3117	* On s390 notifications for arriving pages will be delivered directly
				3118	* to the guest but the house keeping for completed pfaults is
				3119	* handled outside the worker.
				3120	*/
				3121	kvm_check_async_pf_completion(vcpu);
				3122
				3123	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
				3124	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
				3125
				3126	if (need_resched())
				3127	schedule();
				3128
				3129	if (test_cpu_flag(CIF_MCCK_PENDING))
				3130	s390_handle_mcck();
				3131
				3132	if (!kvm_is_ucontrol(vcpu->kvm)) {
				3133	rc = kvm_s390_deliver_pending_interrupts(vcpu);
				3134	if (rc)
				3135	return rc;
				3136	}
				3137
				3138	rc = kvm_s390_handle_requests(vcpu);
				3139	if (rc)
				3140	return rc;
				3141
				3142	if (guestdbg_enabled(vcpu)) {
				3143	kvm_s390_backup_guest_per_regs(vcpu);
				3144	kvm_s390_patch_guest_per_regs(vcpu);
				3145	}
				3146
				3147	vcpu->arch.sie_block->icptcode = 0;
				3148	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
				3149	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
				3150	trace_kvm_s390_sie_enter(vcpu, cpuflags);
				3151
				3152	return 0;
				3153	}
				3154
				3155	static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
				3156	{
				3157	struct kvm_s390_pgm_info pgm_info = {
				3158	.code = PGM_ADDRESSING,
				3159	};
				3160	u8 opcode, ilen;
				3161	int rc;
				3162
				3163	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
				3164	trace_kvm_s390_sie_fault(vcpu);
				3165
				3166	/*
				3167	* We want to inject an addressing exception, which is defined as a
				3168	* suppressing or terminating exception. However, since we came here
				3169	* by a DAT access exception, the PSW still points to the faulting
				3170	* instruction since DAT exceptions are nullifying. So we've got
				3171	* to look up the current opcode to get the length of the instruction
				3172	* to be able to forward the PSW.
				3173	*/
				3174	rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
				3175	ilen = insn_length(opcode);
				3176	if (rc < 0) {
				3177	return rc;
				3178	} else if (rc) {
				3179	/* Instruction-Fetching Exceptions - we can't detect the ilen.
				3180	* Forward by arbitrary ilc, injection will take care of
				3181	* nullification if necessary.
				3182	*/
				3183	pgm_info = vcpu->arch.pgm;
				3184	ilen = 4;
				3185	}
				3186	pgm_info.flags = ilen \| KVM_S390_PGM_FLAGS_ILC_VALID;
				3187	kvm_s390_forward_psw(vcpu, ilen);
				3188	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
				3189	}
				3190
				3191	static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
				3192	{
				3193	struct mcck_volatile_info *mcck_info;
				3194	struct sie_page *sie_page;
				3195
				3196	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
				3197	vcpu->arch.sie_block->icptcode);
				3198	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
				3199
				3200	if (guestdbg_enabled(vcpu))
				3201	kvm_s390_restore_guest_per_regs(vcpu);
				3202
				3203	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
				3204	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
				3205
				3206	if (exit_reason == -EINTR) {
				3207	VCPU_EVENT(vcpu, 3, "%s", "machine check");
				3208	sie_page = container_of(vcpu->arch.sie_block,
				3209	struct sie_page, sie_block);
				3210	mcck_info = &sie_page->mcck_info;
				3211	kvm_s390_reinject_machine_check(vcpu, mcck_info);
				3212	return 0;
				3213	}
				3214
				3215	if (vcpu->arch.sie_block->icptcode > 0) {
				3216	int rc = kvm_handle_sie_intercept(vcpu);
				3217
				3218	if (rc != -EOPNOTSUPP)
				3219	return rc;
				3220	vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
				3221	vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
				3222	vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
				3223	vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
				3224	return -EREMOTE;
				3225	} else if (exit_reason != -EFAULT) {
				3226	vcpu->stat.exit_null++;
				3227	return 0;
				3228	} else if (kvm_is_ucontrol(vcpu->kvm)) {
				3229	vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
				3230	vcpu->run->s390_ucontrol.trans_exc_code =
				3231	current->thread.gmap_addr;
				3232	vcpu->run->s390_ucontrol.pgm_code = 0x10;
				3233	return -EREMOTE;
				3234	} else if (current->thread.gmap_pfault) {
				3235	trace_kvm_s390_major_guest_pfault(vcpu);
				3236	current->thread.gmap_pfault = 0;
				3237	if (kvm_arch_setup_async_pf(vcpu))
				3238	return 0;
				3239	return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
				3240	}
				3241	return vcpu_post_run_fault_in_sie(vcpu);
				3242	}
				3243
				3244	static int __vcpu_run(struct kvm_vcpu *vcpu)
				3245	{
				3246	int rc, exit_reason;
				3247
				3248	/*
				3249	* We try to hold kvm->srcu during most of vcpu_run (except when run-
				3250	* ning the guest), so that memslots (and other stuff) are protected
				3251	*/
				3252	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
				3253
				3254	do {
				3255	rc = vcpu_pre_run(vcpu);
				3256	if (rc)
				3257	break;
				3258
				3259	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
				3260	/*
				3261	* As PF_VCPU will be used in fault handler, between
				3262	* guest_enter and guest_exit should be no uaccess.
				3263	*/
				3264	local_irq_disable();
				3265	guest_enter_irqoff();
				3266	__disable_cpu_timer_accounting(vcpu);
				3267	local_irq_enable();
				3268	exit_reason = sie64a(vcpu->arch.sie_block,
				3269	vcpu->run->s.regs.gprs);
				3270	local_irq_disable();
				3271	__enable_cpu_timer_accounting(vcpu);
				3272	guest_exit_irqoff();
				3273	local_irq_enable();
				3274	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
				3275
				3276	rc = vcpu_post_run(vcpu, exit_reason);
				3277	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
				3278
				3279	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
				3280	return rc;
				3281	}
				3282
				3283	static void sync_regs(struct kvm_vcpu vcpu, struct kvm_run kvm_run)
				3284	{
				3285	struct runtime_instr_cb *riccb;
				3286	struct gs_cb *gscb;
				3287
				3288	riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
				3289	gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
				3290	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
				3291	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
				3292	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
				3293	kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
				3294	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
				3295	memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
				3296	/* some control register changes require a tlb flush */
				3297	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
				3298	}
				3299	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
				3300	kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
				3301	vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
				3302	vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
				3303	vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
				3304	vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
				3305	}
				3306	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
				3307	vcpu->arch.pfault_token = kvm_run->s.regs.pft;
				3308	vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
				3309	vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
				3310	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
				3311	kvm_clear_async_pf_completion_queue(vcpu);
				3312	}
				3313	/*
				3314	* If userspace sets the riccb (e.g. after migration) to a valid state,
				3315	* we should enable RI here instead of doing the lazy enablement.
				3316	*/
				3317	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
				3318	test_kvm_facility(vcpu->kvm, 64) &&
				3319	riccb->valid &&
				3320	!(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
				3321	VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
				3322	vcpu->arch.sie_block->ecb3 \|= ECB3_RI;
				3323	}
				3324	/*
				3325	* If userspace sets the gscb (e.g. after migration) to non-zero,
				3326	* we should enable GS here instead of doing the lazy enablement.
				3327	*/
				3328	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
				3329	test_kvm_facility(vcpu->kvm, 133) &&
				3330	gscb->gssm &&
				3331	!vcpu->arch.gs_enabled) {
				3332	VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
				3333	vcpu->arch.sie_block->ecb \|= ECB_GS;
				3334	vcpu->arch.sie_block->ecd \|= ECD_HOSTREGMGMT;
				3335	vcpu->arch.gs_enabled = 1;
				3336	}
				3337	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
				3338	test_kvm_facility(vcpu->kvm, 82)) {
				3339	vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
				3340	vcpu->arch.sie_block->fpf \|= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
				3341	}
				3342	save_access_regs(vcpu->arch.host_acrs);
				3343	restore_access_regs(vcpu->run->s.regs.acrs);
				3344	/* save host (userspace) fprs/vrs */
				3345	save_fpu_regs();
				3346	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
				3347	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
				3348	if (MACHINE_HAS_VX)
				3349	current->thread.fpu.regs = vcpu->run->s.regs.vrs;
				3350	else
				3351	current->thread.fpu.regs = vcpu->run->s.regs.fprs;
				3352	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
				3353	if (test_fp_ctl(current->thread.fpu.fpc))
				3354	/* User space provided an invalid FPC, let's clear it */
				3355	current->thread.fpu.fpc = 0;
				3356	if (MACHINE_HAS_GS) {
				3357	preempt_disable();
				3358	__ctl_set_bit(2, 4);
				3359	if (current->thread.gs_cb) {
				3360	vcpu->arch.host_gscb = current->thread.gs_cb;
				3361	save_gs_cb(vcpu->arch.host_gscb);
				3362	}
				3363	if (vcpu->arch.gs_enabled) {
				3364	current->thread.gs_cb = (struct gs_cb *)
				3365	&vcpu->run->s.regs.gscb;
				3366	restore_gs_cb(current->thread.gs_cb);
				3367	}
				3368	preempt_enable();
				3369	}
				3370
				3371	kvm_run->kvm_dirty_regs = 0;
				3372	}
				3373
				3374	static void store_regs(struct kvm_vcpu vcpu, struct kvm_run kvm_run)
				3375	{
				3376	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
				3377	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
				3378	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
				3379	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
				3380	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
				3381	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
				3382	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
				3383	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
				3384	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
				3385	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
				3386	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
				3387	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
				3388	kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
				3389	save_access_regs(vcpu->run->s.regs.acrs);
				3390	restore_access_regs(vcpu->arch.host_acrs);
				3391	/* Save guest register state */
				3392	save_fpu_regs();
				3393	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
				3394	/* Restore will be done lazily at return */
				3395	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
				3396	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
				3397	if (MACHINE_HAS_GS) {
				3398	__ctl_set_bit(2, 4);
				3399	if (vcpu->arch.gs_enabled)
				3400	save_gs_cb(current->thread.gs_cb);
				3401	preempt_disable();
				3402	current->thread.gs_cb = vcpu->arch.host_gscb;
				3403	restore_gs_cb(vcpu->arch.host_gscb);
				3404	preempt_enable();
				3405	if (!vcpu->arch.host_gscb)
				3406	__ctl_clear_bit(2, 4);
				3407	vcpu->arch.host_gscb = NULL;
				3408	}
				3409
				3410	}
				3411
				3412	int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu vcpu, struct kvm_run kvm_run)
				3413	{
				3414	int rc;
				3415
				3416	if (kvm_run->immediate_exit)
				3417	return -EINTR;
				3418
				3419	if (guestdbg_exit_pending(vcpu)) {
				3420	kvm_s390_prepare_debug_exit(vcpu);
				3421	return 0;
				3422	}
				3423
				3424	kvm_sigset_activate(vcpu);
				3425
				3426	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
				3427	kvm_s390_vcpu_start(vcpu);
				3428	} else if (is_vcpu_stopped(vcpu)) {
				3429	pr_err_ratelimited("can't run stopped vcpu %d\n",
				3430	vcpu->vcpu_id);
				3431	return -EINVAL;
				3432	}
				3433
				3434	sync_regs(vcpu, kvm_run);
				3435	enable_cpu_timer_accounting(vcpu);
				3436
				3437	might_fault();
				3438	rc = __vcpu_run(vcpu);
				3439
				3440	if (signal_pending(current) && !rc) {
				3441	kvm_run->exit_reason = KVM_EXIT_INTR;
				3442	rc = -EINTR;
				3443	}
				3444
				3445	if (guestdbg_exit_pending(vcpu) && !rc) {
				3446	kvm_s390_prepare_debug_exit(vcpu);
				3447	rc = 0;
				3448	}
				3449
				3450	if (rc == -EREMOTE) {
				3451	/* userspace support is needed, kvm_run has been prepared */
				3452	rc = 0;
				3453	}
				3454
				3455	disable_cpu_timer_accounting(vcpu);
				3456	store_regs(vcpu, kvm_run);
				3457
				3458	kvm_sigset_deactivate(vcpu);
				3459
				3460	vcpu->stat.exit_userspace++;
				3461	return rc;
				3462	}
				3463
				3464	/*
				3465	* store status at address
				3466	* we use have two special cases:
				3467	* KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
				3468	* KVM_S390_STORE_STATUS_PREFIXED: -> prefix
				3469	*/
				3470	int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
				3471	{
				3472	unsigned char archmode = 1;
				3473	freg_t fprs[NUM_FPRS];
				3474	unsigned int px;
				3475	u64 clkcomp, cputm;
				3476	int rc;
				3477
				3478	px = kvm_s390_get_prefix(vcpu);
				3479	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
				3480	if (write_guest_abs(vcpu, 163, &archmode, 1))
				3481	return -EFAULT;
				3482	gpa = 0;
				3483	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
				3484	if (write_guest_real(vcpu, 163, &archmode, 1))
				3485	return -EFAULT;
				3486	gpa = px;
				3487	} else
				3488	gpa -= __LC_FPREGS_SAVE_AREA;
				3489
				3490	/* manually convert vector registers if necessary */
				3491	if (MACHINE_HAS_VX) {
				3492	convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
				3493	rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
				3494	fprs, 128);
				3495	} else {
				3496	rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
				3497	vcpu->run->s.regs.fprs, 128);
				3498	}
				3499	rc \|= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
				3500	vcpu->run->s.regs.gprs, 128);
				3501	rc \|= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
				3502	&vcpu->arch.sie_block->gpsw, 16);
				3503	rc \|= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
				3504	&px, 4);
				3505	rc \|= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
				3506	&vcpu->run->s.regs.fpc, 4);
				3507	rc \|= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
				3508	&vcpu->arch.sie_block->todpr, 4);
				3509	cputm = kvm_s390_get_cpu_timer(vcpu);
				3510	rc \|= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
				3511	&cputm, 8);
				3512	clkcomp = vcpu->arch.sie_block->ckc >> 8;
				3513	rc \|= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
				3514	&clkcomp, 8);
				3515	rc \|= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
				3516	&vcpu->run->s.regs.acrs, 64);
				3517	rc \|= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
				3518	&vcpu->arch.sie_block->gcr, 128);
				3519	return rc ? -EFAULT : 0;
				3520	}
				3521
				3522	int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
				3523	{
				3524	/*
				3525	* The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
				3526	* switch in the run ioctl. Let's update our copies before we save
				3527	* it into the save area
				3528	*/
				3529	save_fpu_regs();
				3530	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
				3531	save_access_regs(vcpu->run->s.regs.acrs);
				3532
				3533	return kvm_s390_store_status_unloaded(vcpu, addr);
				3534	}
				3535
				3536	static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
				3537	{
				3538	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
				3539	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
				3540	}
				3541
				3542	static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
				3543	{
				3544	unsigned int i;
				3545	struct kvm_vcpu *vcpu;
				3546
				3547	kvm_for_each_vcpu(i, vcpu, kvm) {
				3548	__disable_ibs_on_vcpu(vcpu);
				3549	}
				3550	}
				3551
				3552	static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
				3553	{
				3554	if (!sclp.has_ibs)
				3555	return;
				3556	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
				3557	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
				3558	}
				3559
				3560	void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
				3561	{
				3562	int i, online_vcpus, started_vcpus = 0;
				3563
				3564	if (!is_vcpu_stopped(vcpu))
				3565	return;
				3566
				3567	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
				3568	/* Only one cpu at a time may enter/leave the STOPPED state. */
				3569	spin_lock(&vcpu->kvm->arch.start_stop_lock);
				3570	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
				3571
				3572	for (i = 0; i < online_vcpus; i++) {
				3573	if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
				3574	started_vcpus++;
				3575	}
				3576
				3577	if (started_vcpus == 0) {
				3578	/* we're the only active VCPU -> speed it up */
				3579	__enable_ibs_on_vcpu(vcpu);
				3580	} else if (started_vcpus == 1) {
				3581	/*
				3582	* As we are starting a second VCPU, we have to disable
				3583	* the IBS facility on all VCPUs to remove potentially
				3584	* oustanding ENABLE requests.
				3585	*/
				3586	__disable_ibs_on_all_vcpus(vcpu->kvm);
				3587	}
				3588
				3589	atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
				3590	/*
				3591	* Another VCPU might have used IBS while we were offline.
				3592	* Let's play safe and flush the VCPU at startup.
				3593	*/
				3594	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
				3595	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
				3596	return;
				3597	}
				3598
				3599	void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
				3600	{
				3601	int i, online_vcpus, started_vcpus = 0;
				3602	struct kvm_vcpu *started_vcpu = NULL;
				3603
				3604	if (is_vcpu_stopped(vcpu))
				3605	return;
				3606
				3607	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
				3608	/* Only one cpu at a time may enter/leave the STOPPED state. */
				3609	spin_lock(&vcpu->kvm->arch.start_stop_lock);
				3610	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
				3611
				3612	/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
				3613	kvm_s390_clear_stop_irq(vcpu);
				3614
				3615	atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
				3616	__disable_ibs_on_vcpu(vcpu);
				3617
				3618	for (i = 0; i < online_vcpus; i++) {
				3619	if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
				3620	started_vcpus++;
				3621	started_vcpu = vcpu->kvm->vcpus[i];
				3622	}
				3623	}
				3624
				3625	if (started_vcpus == 1) {
				3626	/*
				3627	* As we only have one VCPU left, we want to enable the
				3628	* IBS facility for that VCPU to speed it up.
				3629	*/
				3630	__enable_ibs_on_vcpu(started_vcpu);
				3631	}
				3632
				3633	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
				3634	return;
				3635	}
				3636
				3637	static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
				3638	struct kvm_enable_cap *cap)
				3639	{
				3640	int r;
				3641
				3642	if (cap->flags)
				3643	return -EINVAL;
				3644
				3645	switch (cap->cap) {
				3646	case KVM_CAP_S390_CSS_SUPPORT:
				3647	if (!vcpu->kvm->arch.css_support) {
				3648	vcpu->kvm->arch.css_support = 1;
				3649	VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
				3650	trace_kvm_s390_enable_css(vcpu->kvm);
				3651	}
				3652	r = 0;
				3653	break;
				3654	default:
				3655	r = -EINVAL;
				3656	break;
				3657	}
				3658	return r;
				3659	}
				3660
				3661	static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
				3662	struct kvm_s390_mem_op *mop)
				3663	{
				3664	void __user uaddr = (void __user )mop->buf;
				3665	void *tmpbuf = NULL;
				3666	int r, srcu_idx;
				3667	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
				3668	\| KVM_S390_MEMOP_F_CHECK_ONLY;
				3669
				3670	if (mop->flags & ~supported_flags \|\| mop->ar >= NUM_ACRS \|\| !mop->size)
				3671	return -EINVAL;
				3672
				3673	if (mop->size > MEM_OP_MAX_SIZE)
				3674	return -E2BIG;
				3675
				3676	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
				3677	tmpbuf = vmalloc(mop->size);
				3678	if (!tmpbuf)
				3679	return -ENOMEM;
				3680	}
				3681
				3682	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
				3683
				3684	switch (mop->op) {
				3685	case KVM_S390_MEMOP_LOGICAL_READ:
				3686	if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
				3687	r = check_gva_range(vcpu, mop->gaddr, mop->ar,
				3688	mop->size, GACC_FETCH);
				3689	break;
				3690	}
				3691	r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
				3692	if (r == 0) {
				3693	if (copy_to_user(uaddr, tmpbuf, mop->size))
				3694	r = -EFAULT;
				3695	}
				3696	break;
				3697	case KVM_S390_MEMOP_LOGICAL_WRITE:
				3698	if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
				3699	r = check_gva_range(vcpu, mop->gaddr, mop->ar,
				3700	mop->size, GACC_STORE);
				3701	break;
				3702	}
				3703	if (copy_from_user(tmpbuf, uaddr, mop->size)) {
				3704	r = -EFAULT;
				3705	break;
				3706	}
				3707	r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
				3708	break;
				3709	default:
				3710	r = -EINVAL;
				3711	}
				3712
				3713	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
				3714
				3715	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
				3716	kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
				3717
				3718	vfree(tmpbuf);
				3719	return r;
				3720	}
				3721
				3722	long kvm_arch_vcpu_ioctl(struct file *filp,
				3723	unsigned int ioctl, unsigned long arg)
				3724	{
				3725	struct kvm_vcpu *vcpu = filp->private_data;
				3726	void __user argp = (void __user )arg;
				3727	int idx;
				3728	long r;
				3729
				3730	switch (ioctl) {
				3731	case KVM_S390_IRQ: {
				3732	struct kvm_s390_irq s390irq;
				3733
				3734	r = -EFAULT;
				3735	if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
				3736	break;
				3737	r = kvm_s390_inject_vcpu(vcpu, &s390irq);
				3738	break;
				3739	}
				3740	case KVM_S390_INTERRUPT: {
				3741	struct kvm_s390_interrupt s390int;
				3742	struct kvm_s390_irq s390irq = {};
				3743
				3744	r = -EFAULT;
				3745	if (copy_from_user(&s390int, argp, sizeof(s390int)))
				3746	break;
				3747	if (s390int_to_s390irq(&s390int, &s390irq))
				3748	return -EINVAL;
				3749	r = kvm_s390_inject_vcpu(vcpu, &s390irq);
				3750	break;
				3751	}
				3752	case KVM_S390_STORE_STATUS:
				3753	idx = srcu_read_lock(&vcpu->kvm->srcu);
				3754	r = kvm_s390_store_status_unloaded(vcpu, arg);
				3755	srcu_read_unlock(&vcpu->kvm->srcu, idx);
				3756	break;
				3757	case KVM_S390_SET_INITIAL_PSW: {
				3758	psw_t psw;
				3759
				3760	r = -EFAULT;
				3761	if (copy_from_user(&psw, argp, sizeof(psw)))
				3762	break;
				3763	r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
				3764	break;
				3765	}
				3766	case KVM_S390_INITIAL_RESET:
				3767	r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
				3768	break;
				3769	case KVM_SET_ONE_REG:
				3770	case KVM_GET_ONE_REG: {
				3771	struct kvm_one_reg reg;
				3772	r = -EFAULT;
				3773	if (copy_from_user(&reg, argp, sizeof(reg)))
				3774	break;
				3775	if (ioctl == KVM_SET_ONE_REG)
				3776	r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
				3777	else
				3778	r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
				3779	break;
				3780	}
				3781	#ifdef CONFIG_KVM_S390_UCONTROL
				3782	case KVM_S390_UCAS_MAP: {
				3783	struct kvm_s390_ucas_mapping ucasmap;
				3784
				3785	if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
				3786	r = -EFAULT;
				3787	break;
				3788	}
				3789
				3790	if (!kvm_is_ucontrol(vcpu->kvm)) {
				3791	r = -EINVAL;
				3792	break;
				3793	}
				3794
				3795	r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
				3796	ucasmap.vcpu_addr, ucasmap.length);
				3797	break;
				3798	}
				3799	case KVM_S390_UCAS_UNMAP: {
				3800	struct kvm_s390_ucas_mapping ucasmap;
				3801
				3802	if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
				3803	r = -EFAULT;
				3804	break;
				3805	}
				3806
				3807	if (!kvm_is_ucontrol(vcpu->kvm)) {
				3808	r = -EINVAL;
				3809	break;
				3810	}
				3811
				3812	r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
				3813	ucasmap.length);
				3814	break;
				3815	}
				3816	#endif
				3817	case KVM_S390_VCPU_FAULT: {
				3818	r = gmap_fault(vcpu->arch.gmap, arg, 0);
				3819	break;
				3820	}
				3821	case KVM_ENABLE_CAP:
				3822	{
				3823	struct kvm_enable_cap cap;
				3824	r = -EFAULT;
				3825	if (copy_from_user(&cap, argp, sizeof(cap)))
				3826	break;
				3827	r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
				3828	break;
				3829	}
				3830	case KVM_S390_MEM_OP: {
				3831	struct kvm_s390_mem_op mem_op;
				3832
				3833	if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
				3834	r = kvm_s390_guest_mem_op(vcpu, &mem_op);
				3835	else
				3836	r = -EFAULT;
				3837	break;
				3838	}
				3839	case KVM_S390_SET_IRQ_STATE: {
				3840	struct kvm_s390_irq_state irq_state;
				3841
				3842	r = -EFAULT;
				3843	if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
				3844	break;
				3845	if (irq_state.len > VCPU_IRQS_MAX_BUF \|\|
				3846	irq_state.len == 0 \|\|
				3847	irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
				3848	r = -EINVAL;
				3849	break;
				3850	}
				3851	r = kvm_s390_set_irq_state(vcpu,
				3852	(void __user *) irq_state.buf,
				3853	irq_state.len);
				3854	break;
				3855	}
				3856	case KVM_S390_GET_IRQ_STATE: {
				3857	struct kvm_s390_irq_state irq_state;
				3858
				3859	r = -EFAULT;
				3860	if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
				3861	break;
				3862	if (irq_state.len == 0) {
				3863	r = -EINVAL;
				3864	break;
				3865	}
				3866	r = kvm_s390_get_irq_state(vcpu,
				3867	(__u8 __user *) irq_state.buf,
				3868	irq_state.len);
				3869	break;
				3870	}
				3871	default:
				3872	r = -ENOTTY;
				3873	}
				3874	return r;
				3875	}
				3876
				3877	int kvm_arch_vcpu_fault(struct kvm_vcpu vcpu, struct vm_fault vmf)
				3878	{
				3879	#ifdef CONFIG_KVM_S390_UCONTROL
				3880	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
				3881	&& (kvm_is_ucontrol(vcpu->kvm))) {
				3882	vmf->page = virt_to_page(vcpu->arch.sie_block);
				3883	get_page(vmf->page);
				3884	return 0;
				3885	}
				3886	#endif
				3887	return VM_FAULT_SIGBUS;
				3888	}
				3889
				3890	int kvm_arch_create_memslot(struct kvm kvm, struct kvm_memory_slot slot,
				3891	unsigned long npages)
				3892	{
				3893	return 0;
				3894	}
				3895
				3896	/* Section: memory related */
				3897	int kvm_arch_prepare_memory_region(struct kvm *kvm,
				3898	struct kvm_memory_slot *memslot,
				3899	const struct kvm_userspace_memory_region *mem,
				3900	enum kvm_mr_change change)
				3901	{
				3902	/* A few sanity checks. We can have memory slots which have to be
				3903	located/ended at a segment boundary (1MB). The memory in userland is
				3904	ok to be fragmented into various different vmas. It is okay to mmap()
				3905	and munmap() stuff in this slot after doing this call at any time */
				3906
				3907	if (mem->userspace_addr & 0xffffful)
				3908	return -EINVAL;
				3909
				3910	if (mem->memory_size & 0xffffful)
				3911	return -EINVAL;
				3912
				3913	if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
				3914	return -EINVAL;
				3915
				3916	return 0;
				3917	}
				3918
				3919	void kvm_arch_commit_memory_region(struct kvm *kvm,
				3920	const struct kvm_userspace_memory_region *mem,
				3921	const struct kvm_memory_slot *old,
				3922	const struct kvm_memory_slot *new,
				3923	enum kvm_mr_change change)
				3924	{
				3925	int rc = 0;
				3926
				3927	switch (change) {
				3928	case KVM_MR_DELETE:
				3929	rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
				3930	old->npages * PAGE_SIZE);
				3931	break;
				3932	case KVM_MR_MOVE:
				3933	rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
				3934	old->npages * PAGE_SIZE);
				3935	if (rc)
				3936	break;
				3937	/* FALLTHROUGH */
				3938	case KVM_MR_CREATE:
				3939	rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
				3940	mem->guest_phys_addr, mem->memory_size);
				3941	break;
				3942	case KVM_MR_FLAGS_ONLY:
				3943	break;
				3944	default:
				3945	WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
				3946	}
				3947	if (rc)
				3948	pr_warn("failed to commit memory region\n");
				3949	return;
				3950	}
				3951
				3952	static inline unsigned long nonhyp_mask(int i)
				3953	{
				3954	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
				3955
				3956	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
				3957	}
				3958
				3959	void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
				3960	{
				3961	vcpu->valid_wakeup = false;
				3962	}
				3963
				3964	static int __init kvm_s390_init(void)
				3965	{
				3966	int i;
				3967
				3968	if (!sclp.has_sief2) {
				3969	pr_info("SIE not available\n");
				3970	return -ENODEV;
				3971	}
				3972
				3973	for (i = 0; i < 16; i++)
				3974	kvm_s390_fac_list_mask[i] \|=
				3975	S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
				3976
				3977	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
				3978	}
				3979
				3980	static void __exit kvm_s390_exit(void)
				3981	{
				3982	kvm_exit();
				3983	}
				3984
				3985	module_init(kvm_s390_init);
				3986	module_exit(kvm_s390_exit);
				3987
				3988	/*
				3989	* Enable autoloading of the kvm module.
				3990	* Note that we add the module alias here instead of virt/kvm/kvm_main.c
				3991	* since x86 takes a different approach.
				3992	*/
				3993	#include <linux/miscdevice.h>
				3994	MODULE_ALIAS_MISCDEV(KVM_MINOR);
				3995	MODULE_ALIAS("devname:kvm");