Blame - src/kernel/linux/v4.19/arch/x86/kvm/hyperv.c - T800

blob: 5842c5f587fe910b9358eda88cb619bd958413c4 [file] [log] [blame]

xj	b04a402	2021-11-25 15:01:52 +0800	[diff] [blame]	1	/*
				2	* KVM Microsoft Hyper-V emulation
				3	*
				4	* derived from arch/x86/kvm/x86.c
				5	*
				6	* Copyright (C) 2006 Qumranet, Inc.
				7	* Copyright (C) 2008 Qumranet, Inc.
				8	* Copyright IBM Corporation, 2008
				9	* Copyright 2010 Red Hat, Inc. and/or its affiliates.
				10	* Copyright (C) 2015 Andrey Smetanin <asmetanin@virtuozzo.com>
				11	*
				12	* Authors:
				13	* Avi Kivity <avi@qumranet.com>
				14	* Yaniv Kamay <yaniv@qumranet.com>
				15	* Amit Shah <amit.shah@qumranet.com>
				16	* Ben-Ami Yassour <benami@il.ibm.com>
				17	* Andrey Smetanin <asmetanin@virtuozzo.com>
				18	*
				19	* This work is licensed under the terms of the GNU GPL, version 2. See
				20	* the COPYING file in the top-level directory.
				21	*
				22	*/
				23
				24	#include "x86.h"
				25	#include "lapic.h"
				26	#include "ioapic.h"
				27	#include "hyperv.h"
				28
				29	#include <linux/kvm_host.h>
				30	#include <linux/highmem.h>
				31	#include <linux/sched/cputime.h>
				32	#include <linux/eventfd.h>
				33
				34	#include <asm/apicdef.h>
				35	#include <trace/events/kvm.h>
				36
				37	#include "trace.h"
				38
				39	static inline u64 synic_read_sint(struct kvm_vcpu_hv_synic *synic, int sint)
				40	{
				41	return atomic64_read(&synic->sint[sint]);
				42	}
				43
				44	static inline int synic_get_sint_vector(u64 sint_value)
				45	{
				46	if (sint_value & HV_SYNIC_SINT_MASKED)
				47	return -1;
				48	return sint_value & HV_SYNIC_SINT_VECTOR_MASK;
				49	}
				50
				51	static bool synic_has_vector_connected(struct kvm_vcpu_hv_synic *synic,
				52	int vector)
				53	{
				54	int i;
				55
				56	for (i = 0; i < ARRAY_SIZE(synic->sint); i++) {
				57	if (synic_get_sint_vector(synic_read_sint(synic, i)) == vector)
				58	return true;
				59	}
				60	return false;
				61	}
				62
				63	static bool synic_has_vector_auto_eoi(struct kvm_vcpu_hv_synic *synic,
				64	int vector)
				65	{
				66	int i;
				67	u64 sint_value;
				68
				69	for (i = 0; i < ARRAY_SIZE(synic->sint); i++) {
				70	sint_value = synic_read_sint(synic, i);
				71	if (synic_get_sint_vector(sint_value) == vector &&
				72	sint_value & HV_SYNIC_SINT_AUTO_EOI)
				73	return true;
				74	}
				75	return false;
				76	}
				77
				78	static void synic_update_vector(struct kvm_vcpu_hv_synic *synic,
				79	int vector)
				80	{
				81	if (vector < HV_SYNIC_FIRST_VALID_VECTOR)
				82	return;
				83
				84	if (synic_has_vector_connected(synic, vector))
				85	__set_bit(vector, synic->vec_bitmap);
				86	else
				87	__clear_bit(vector, synic->vec_bitmap);
				88
				89	if (synic_has_vector_auto_eoi(synic, vector))
				90	__set_bit(vector, synic->auto_eoi_bitmap);
				91	else
				92	__clear_bit(vector, synic->auto_eoi_bitmap);
				93	}
				94
				95	static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint,
				96	u64 data, bool host)
				97	{
				98	int vector, old_vector;
				99	bool masked;
				100
				101	vector = data & HV_SYNIC_SINT_VECTOR_MASK;
				102	masked = data & HV_SYNIC_SINT_MASKED;
				103
				104	/*
				105	* Valid vectors are 16-255, however, nested Hyper-V attempts to write
				106	* default '0x10000' value on boot and this should not #GP. We need to
				107	* allow zero-initing the register from host as well.
				108	*/
				109	if (vector < HV_SYNIC_FIRST_VALID_VECTOR && !host && !masked)
				110	return 1;
				111	/*
				112	* Guest may configure multiple SINTs to use the same vector, so
				113	* we maintain a bitmap of vectors handled by synic, and a
				114	* bitmap of vectors with auto-eoi behavior. The bitmaps are
				115	* updated here, and atomically queried on fast paths.
				116	*/
				117	old_vector = synic_read_sint(synic, sint) & HV_SYNIC_SINT_VECTOR_MASK;
				118
				119	atomic64_set(&synic->sint[sint], data);
				120
				121	synic_update_vector(synic, old_vector);
				122
				123	synic_update_vector(synic, vector);
				124
				125	/* Load SynIC vectors into EOI exit bitmap */
				126	kvm_make_request(KVM_REQ_SCAN_IOAPIC, synic_to_vcpu(synic));
				127	return 0;
				128	}
				129
				130	static struct kvm_vcpu get_vcpu_by_vpidx(struct kvm kvm, u32 vpidx)
				131	{
				132	struct kvm_vcpu *vcpu = NULL;
				133	int i;
				134
				135	if (vpidx >= KVM_MAX_VCPUS)
				136	return NULL;
				137
				138	vcpu = kvm_get_vcpu(kvm, vpidx);
				139	if (vcpu && vcpu_to_hv_vcpu(vcpu)->vp_index == vpidx)
				140	return vcpu;
				141	kvm_for_each_vcpu(i, vcpu, kvm)
				142	if (vcpu_to_hv_vcpu(vcpu)->vp_index == vpidx)
				143	return vcpu;
				144	return NULL;
				145	}
				146
				147	static struct kvm_vcpu_hv_synic synic_get(struct kvm kvm, u32 vpidx)
				148	{
				149	struct kvm_vcpu *vcpu;
				150	struct kvm_vcpu_hv_synic *synic;
				151
				152	vcpu = get_vcpu_by_vpidx(kvm, vpidx);
				153	if (!vcpu)
				154	return NULL;
				155	synic = vcpu_to_synic(vcpu);
				156	return (synic->active) ? synic : NULL;
				157	}
				158
				159	static void synic_clear_sint_msg_pending(struct kvm_vcpu_hv_synic *synic,
				160	u32 sint)
				161	{
				162	struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
				163	struct page *page;
				164	gpa_t gpa;
				165	struct hv_message *msg;
				166	struct hv_message_page *msg_page;
				167
				168	gpa = synic->msg_page & PAGE_MASK;
				169	page = kvm_vcpu_gfn_to_page(vcpu, gpa >> PAGE_SHIFT);
				170	if (is_error_page(page)) {
				171	vcpu_err(vcpu, "Hyper-V SynIC can't get msg page, gpa 0x%llx\n",
				172	gpa);
				173	return;
				174	}
				175	msg_page = kmap_atomic(page);
				176
				177	msg = &msg_page->sint_message[sint];
				178	msg->header.message_flags.msg_pending = 0;
				179
				180	kunmap_atomic(msg_page);
				181	kvm_release_page_dirty(page);
				182	kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT);
				183	}
				184
				185	static void kvm_hv_notify_acked_sint(struct kvm_vcpu *vcpu, u32 sint)
				186	{
				187	struct kvm *kvm = vcpu->kvm;
				188	struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu);
				189	struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu);
				190	struct kvm_vcpu_hv_stimer *stimer;
				191	int gsi, idx, stimers_pending;
				192
				193	trace_kvm_hv_notify_acked_sint(vcpu->vcpu_id, sint);
				194
				195	if (synic->msg_page & HV_SYNIC_SIMP_ENABLE)
				196	synic_clear_sint_msg_pending(synic, sint);
				197
				198	/* Try to deliver pending Hyper-V SynIC timers messages */
				199	stimers_pending = 0;
				200	for (idx = 0; idx < ARRAY_SIZE(hv_vcpu->stimer); idx++) {
				201	stimer = &hv_vcpu->stimer[idx];
				202	if (stimer->msg_pending &&
				203	(stimer->config & HV_STIMER_ENABLE) &&
				204	HV_STIMER_SINT(stimer->config) == sint) {
				205	set_bit(stimer->index,
				206	hv_vcpu->stimer_pending_bitmap);
				207	stimers_pending++;
				208	}
				209	}
				210	if (stimers_pending)
				211	kvm_make_request(KVM_REQ_HV_STIMER, vcpu);
				212
				213	idx = srcu_read_lock(&kvm->irq_srcu);
				214	gsi = atomic_read(&synic->sint_to_gsi[sint]);
				215	if (gsi != -1)
				216	kvm_notify_acked_gsi(kvm, gsi);
				217	srcu_read_unlock(&kvm->irq_srcu, idx);
				218	}
				219
				220	static void synic_exit(struct kvm_vcpu_hv_synic *synic, u32 msr)
				221	{
				222	struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
				223	struct kvm_vcpu_hv *hv_vcpu = &vcpu->arch.hyperv;
				224
				225	hv_vcpu->exit.type = KVM_EXIT_HYPERV_SYNIC;
				226	hv_vcpu->exit.u.synic.msr = msr;
				227	hv_vcpu->exit.u.synic.control = synic->control;
				228	hv_vcpu->exit.u.synic.evt_page = synic->evt_page;
				229	hv_vcpu->exit.u.synic.msg_page = synic->msg_page;
				230
				231	kvm_make_request(KVM_REQ_HV_EXIT, vcpu);
				232	}
				233
				234	static int synic_set_msr(struct kvm_vcpu_hv_synic *synic,
				235	u32 msr, u64 data, bool host)
				236	{
				237	struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
				238	int ret;
				239
				240	if (!synic->active && !host)
				241	return 1;
				242
				243	trace_kvm_hv_synic_set_msr(vcpu->vcpu_id, msr, data, host);
				244
				245	ret = 0;
				246	switch (msr) {
				247	case HV_X64_MSR_SCONTROL:
				248	synic->control = data;
				249	if (!host)
				250	synic_exit(synic, msr);
				251	break;
				252	case HV_X64_MSR_SVERSION:
				253	if (!host) {
				254	ret = 1;
				255	break;
				256	}
				257	synic->version = data;
				258	break;
				259	case HV_X64_MSR_SIEFP:
				260	if ((data & HV_SYNIC_SIEFP_ENABLE) && !host &&
				261	!synic->dont_zero_synic_pages)
				262	if (kvm_clear_guest(vcpu->kvm,
				263	data & PAGE_MASK, PAGE_SIZE)) {
				264	ret = 1;
				265	break;
				266	}
				267	synic->evt_page = data;
				268	if (!host)
				269	synic_exit(synic, msr);
				270	break;
				271	case HV_X64_MSR_SIMP:
				272	if ((data & HV_SYNIC_SIMP_ENABLE) && !host &&
				273	!synic->dont_zero_synic_pages)
				274	if (kvm_clear_guest(vcpu->kvm,
				275	data & PAGE_MASK, PAGE_SIZE)) {
				276	ret = 1;
				277	break;
				278	}
				279	synic->msg_page = data;
				280	if (!host)
				281	synic_exit(synic, msr);
				282	break;
				283	case HV_X64_MSR_EOM: {
				284	int i;
				285
				286	for (i = 0; i < ARRAY_SIZE(synic->sint); i++)
				287	kvm_hv_notify_acked_sint(vcpu, i);
				288	break;
				289	}
				290	case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
				291	ret = synic_set_sint(synic, msr - HV_X64_MSR_SINT0, data, host);
				292	break;
				293	default:
				294	ret = 1;
				295	break;
				296	}
				297	return ret;
				298	}
				299
				300	static int synic_get_msr(struct kvm_vcpu_hv_synic synic, u32 msr, u64 pdata,
				301	bool host)
				302	{
				303	int ret;
				304
				305	if (!synic->active && !host)
				306	return 1;
				307
				308	ret = 0;
				309	switch (msr) {
				310	case HV_X64_MSR_SCONTROL:
				311	*pdata = synic->control;
				312	break;
				313	case HV_X64_MSR_SVERSION:
				314	*pdata = synic->version;
				315	break;
				316	case HV_X64_MSR_SIEFP:
				317	*pdata = synic->evt_page;
				318	break;
				319	case HV_X64_MSR_SIMP:
				320	*pdata = synic->msg_page;
				321	break;
				322	case HV_X64_MSR_EOM:
				323	*pdata = 0;
				324	break;
				325	case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
				326	*pdata = atomic64_read(&synic->sint[msr - HV_X64_MSR_SINT0]);
				327	break;
				328	default:
				329	ret = 1;
				330	break;
				331	}
				332	return ret;
				333	}
				334
				335	static int synic_set_irq(struct kvm_vcpu_hv_synic *synic, u32 sint)
				336	{
				337	struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
				338	struct kvm_lapic_irq irq;
				339	int ret, vector;
				340
				341	if (sint >= ARRAY_SIZE(synic->sint))
				342	return -EINVAL;
				343
				344	vector = synic_get_sint_vector(synic_read_sint(synic, sint));
				345	if (vector < 0)
				346	return -ENOENT;
				347
				348	memset(&irq, 0, sizeof(irq));
				349	irq.shorthand = APIC_DEST_SELF;
				350	irq.dest_mode = APIC_DEST_PHYSICAL;
				351	irq.delivery_mode = APIC_DM_FIXED;
				352	irq.vector = vector;
				353	irq.level = 1;
				354
				355	ret = kvm_irq_delivery_to_apic(vcpu->kvm, vcpu->arch.apic, &irq, NULL);
				356	trace_kvm_hv_synic_set_irq(vcpu->vcpu_id, sint, irq.vector, ret);
				357	return ret;
				358	}
				359
				360	int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vpidx, u32 sint)
				361	{
				362	struct kvm_vcpu_hv_synic *synic;
				363
				364	synic = synic_get(kvm, vpidx);
				365	if (!synic)
				366	return -EINVAL;
				367
				368	return synic_set_irq(synic, sint);
				369	}
				370
				371	void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector)
				372	{
				373	struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu);
				374	int i;
				375
				376	trace_kvm_hv_synic_send_eoi(vcpu->vcpu_id, vector);
				377
				378	for (i = 0; i < ARRAY_SIZE(synic->sint); i++)
				379	if (synic_get_sint_vector(synic_read_sint(synic, i)) == vector)
				380	kvm_hv_notify_acked_sint(vcpu, i);
				381	}
				382
				383	static int kvm_hv_set_sint_gsi(struct kvm *kvm, u32 vpidx, u32 sint, int gsi)
				384	{
				385	struct kvm_vcpu_hv_synic *synic;
				386
				387	synic = synic_get(kvm, vpidx);
				388	if (!synic)
				389	return -EINVAL;
				390
				391	if (sint >= ARRAY_SIZE(synic->sint_to_gsi))
				392	return -EINVAL;
				393
				394	atomic_set(&synic->sint_to_gsi[sint], gsi);
				395	return 0;
				396	}
				397
				398	void kvm_hv_irq_routing_update(struct kvm *kvm)
				399	{
				400	struct kvm_irq_routing_table *irq_rt;
				401	struct kvm_kernel_irq_routing_entry *e;
				402	u32 gsi;
				403
				404	irq_rt = srcu_dereference_check(kvm->irq_routing, &kvm->irq_srcu,
				405	lockdep_is_held(&kvm->irq_lock));
				406
				407	for (gsi = 0; gsi < irq_rt->nr_rt_entries; gsi++) {
				408	hlist_for_each_entry(e, &irq_rt->map[gsi], link) {
				409	if (e->type == KVM_IRQ_ROUTING_HV_SINT)
				410	kvm_hv_set_sint_gsi(kvm, e->hv_sint.vcpu,
				411	e->hv_sint.sint, gsi);
				412	}
				413	}
				414	}
				415
				416	static void synic_init(struct kvm_vcpu_hv_synic *synic)
				417	{
				418	int i;
				419
				420	memset(synic, 0, sizeof(*synic));
				421	synic->version = HV_SYNIC_VERSION_1;
				422	for (i = 0; i < ARRAY_SIZE(synic->sint); i++) {
				423	atomic64_set(&synic->sint[i], HV_SYNIC_SINT_MASKED);
				424	atomic_set(&synic->sint_to_gsi[i], -1);
				425	}
				426	}
				427
				428	static u64 get_time_ref_counter(struct kvm *kvm)
				429	{
				430	struct kvm_hv *hv = &kvm->arch.hyperv;
				431	struct kvm_vcpu *vcpu;
				432	u64 tsc;
				433
				434	/*
				435	* The guest has not set up the TSC page or the clock isn't
				436	* stable, fall back to get_kvmclock_ns.
				437	*/
				438	if (!hv->tsc_ref.tsc_sequence)
				439	return div_u64(get_kvmclock_ns(kvm), 100);
				440
				441	vcpu = kvm_get_vcpu(kvm, 0);
				442	tsc = kvm_read_l1_tsc(vcpu, rdtsc());
				443	return mul_u64_u64_shr(tsc, hv->tsc_ref.tsc_scale, 64)
				444	+ hv->tsc_ref.tsc_offset;
				445	}
				446
				447	static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer,
				448	bool vcpu_kick)
				449	{
				450	struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer);
				451
				452	set_bit(stimer->index,
				453	vcpu_to_hv_vcpu(vcpu)->stimer_pending_bitmap);
				454	kvm_make_request(KVM_REQ_HV_STIMER, vcpu);
				455	if (vcpu_kick)
				456	kvm_vcpu_kick(vcpu);
				457	}
				458
				459	static void stimer_cleanup(struct kvm_vcpu_hv_stimer *stimer)
				460	{
				461	struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer);
				462
				463	trace_kvm_hv_stimer_cleanup(stimer_to_vcpu(stimer)->vcpu_id,
				464	stimer->index);
				465
				466	hrtimer_cancel(&stimer->timer);
				467	clear_bit(stimer->index,
				468	vcpu_to_hv_vcpu(vcpu)->stimer_pending_bitmap);
				469	stimer->msg_pending = false;
				470	stimer->exp_time = 0;
				471	}
				472
				473	static enum hrtimer_restart stimer_timer_callback(struct hrtimer *timer)
				474	{
				475	struct kvm_vcpu_hv_stimer *stimer;
				476
				477	stimer = container_of(timer, struct kvm_vcpu_hv_stimer, timer);
				478	trace_kvm_hv_stimer_callback(stimer_to_vcpu(stimer)->vcpu_id,
				479	stimer->index);
				480	stimer_mark_pending(stimer, true);
				481
				482	return HRTIMER_NORESTART;
				483	}
				484
				485	/*
				486	* stimer_start() assumptions:
				487	* a) stimer->count is not equal to 0
				488	* b) stimer->config has HV_STIMER_ENABLE flag
				489	*/
				490	static int stimer_start(struct kvm_vcpu_hv_stimer *stimer)
				491	{
				492	u64 time_now;
				493	ktime_t ktime_now;
				494
				495	time_now = get_time_ref_counter(stimer_to_vcpu(stimer)->kvm);
				496	ktime_now = ktime_get();
				497
				498	if (stimer->config & HV_STIMER_PERIODIC) {
				499	if (stimer->exp_time) {
				500	if (time_now >= stimer->exp_time) {
				501	u64 remainder;
				502
				503	div64_u64_rem(time_now - stimer->exp_time,
				504	stimer->count, &remainder);
				505	stimer->exp_time =
				506	time_now + (stimer->count - remainder);
				507	}
				508	} else
				509	stimer->exp_time = time_now + stimer->count;
				510
				511	trace_kvm_hv_stimer_start_periodic(
				512	stimer_to_vcpu(stimer)->vcpu_id,
				513	stimer->index,
				514	time_now, stimer->exp_time);
				515
				516	hrtimer_start(&stimer->timer,
				517	ktime_add_ns(ktime_now,
				518	100 * (stimer->exp_time - time_now)),
				519	HRTIMER_MODE_ABS);
				520	return 0;
				521	}
				522	stimer->exp_time = stimer->count;
				523	if (time_now >= stimer->count) {
				524	/*
				525	* Expire timer according to Hypervisor Top-Level Functional
				526	* specification v4(15.3.1):
				527	* "If a one shot is enabled and the specified count is in
				528	* the past, it will expire immediately."
				529	*/
				530	stimer_mark_pending(stimer, false);
				531	return 0;
				532	}
				533
				534	trace_kvm_hv_stimer_start_one_shot(stimer_to_vcpu(stimer)->vcpu_id,
				535	stimer->index,
				536	time_now, stimer->count);
				537
				538	hrtimer_start(&stimer->timer,
				539	ktime_add_ns(ktime_now, 100 * (stimer->count - time_now)),
				540	HRTIMER_MODE_ABS);
				541	return 0;
				542	}
				543
				544	static int stimer_set_config(struct kvm_vcpu_hv_stimer *stimer, u64 config,
				545	bool host)
				546	{
				547	trace_kvm_hv_stimer_set_config(stimer_to_vcpu(stimer)->vcpu_id,
				548	stimer->index, config, host);
				549
				550	stimer_cleanup(stimer);
				551	if ((stimer->config & HV_STIMER_ENABLE) && HV_STIMER_SINT(config) == 0)
				552	config &= ~HV_STIMER_ENABLE;
				553	stimer->config = config;
				554	stimer_mark_pending(stimer, false);
				555	return 0;
				556	}
				557
				558	static int stimer_set_count(struct kvm_vcpu_hv_stimer *stimer, u64 count,
				559	bool host)
				560	{
				561	trace_kvm_hv_stimer_set_count(stimer_to_vcpu(stimer)->vcpu_id,
				562	stimer->index, count, host);
				563
				564	stimer_cleanup(stimer);
				565	stimer->count = count;
				566	if (stimer->count == 0)
				567	stimer->config &= ~HV_STIMER_ENABLE;
				568	else if (stimer->config & HV_STIMER_AUTOENABLE)
				569	stimer->config \|= HV_STIMER_ENABLE;
				570	stimer_mark_pending(stimer, false);
				571	return 0;
				572	}
				573
				574	static int stimer_get_config(struct kvm_vcpu_hv_stimer stimer, u64 pconfig)
				575	{
				576	*pconfig = stimer->config;
				577	return 0;
				578	}
				579
				580	static int stimer_get_count(struct kvm_vcpu_hv_stimer stimer, u64 pcount)
				581	{
				582	*pcount = stimer->count;
				583	return 0;
				584	}
				585
				586	static int synic_deliver_msg(struct kvm_vcpu_hv_synic *synic, u32 sint,
				587	struct hv_message *src_msg)
				588	{
				589	struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
				590	struct page *page;
				591	gpa_t gpa;
				592	struct hv_message *dst_msg;
				593	int r;
				594	struct hv_message_page *msg_page;
				595
				596	if (!(synic->msg_page & HV_SYNIC_SIMP_ENABLE))
				597	return -ENOENT;
				598
				599	gpa = synic->msg_page & PAGE_MASK;
				600	page = kvm_vcpu_gfn_to_page(vcpu, gpa >> PAGE_SHIFT);
				601	if (is_error_page(page))
				602	return -EFAULT;
				603
				604	msg_page = kmap_atomic(page);
				605	dst_msg = &msg_page->sint_message[sint];
				606	if (sync_cmpxchg(&dst_msg->header.message_type, HVMSG_NONE,
				607	src_msg->header.message_type) != HVMSG_NONE) {
				608	dst_msg->header.message_flags.msg_pending = 1;
				609	r = -EAGAIN;
				610	} else {
				611	memcpy(&dst_msg->u.payload, &src_msg->u.payload,
				612	src_msg->header.payload_size);
				613	dst_msg->header.message_type = src_msg->header.message_type;
				614	dst_msg->header.payload_size = src_msg->header.payload_size;
				615	r = synic_set_irq(synic, sint);
				616	if (r >= 1)
				617	r = 0;
				618	else if (r == 0)
				619	r = -EFAULT;
				620	}
				621	kunmap_atomic(msg_page);
				622	kvm_release_page_dirty(page);
				623	kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT);
				624	return r;
				625	}
				626
				627	static int stimer_send_msg(struct kvm_vcpu_hv_stimer *stimer)
				628	{
				629	struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer);
				630	struct hv_message *msg = &stimer->msg;
				631	struct hv_timer_message_payload *payload =
				632	(struct hv_timer_message_payload *)&msg->u.payload;
				633
				634	payload->expiration_time = stimer->exp_time;
				635	payload->delivery_time = get_time_ref_counter(vcpu->kvm);
				636	return synic_deliver_msg(vcpu_to_synic(vcpu),
				637	HV_STIMER_SINT(stimer->config), msg);
				638	}
				639
				640	static void stimer_expiration(struct kvm_vcpu_hv_stimer *stimer)
				641	{
				642	int r;
				643
				644	stimer->msg_pending = true;
				645	r = stimer_send_msg(stimer);
				646	trace_kvm_hv_stimer_expiration(stimer_to_vcpu(stimer)->vcpu_id,
				647	stimer->index, r);
				648	if (!r) {
				649	stimer->msg_pending = false;
				650	if (!(stimer->config & HV_STIMER_PERIODIC))
				651	stimer->config &= ~HV_STIMER_ENABLE;
				652	}
				653	}
				654
				655	void kvm_hv_process_stimers(struct kvm_vcpu *vcpu)
				656	{
				657	struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu);
				658	struct kvm_vcpu_hv_stimer *stimer;
				659	u64 time_now, exp_time;
				660	int i;
				661
				662	for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++)
				663	if (test_and_clear_bit(i, hv_vcpu->stimer_pending_bitmap)) {
				664	stimer = &hv_vcpu->stimer[i];
				665	if (stimer->config & HV_STIMER_ENABLE) {
				666	exp_time = stimer->exp_time;
				667
				668	if (exp_time) {
				669	time_now =
				670	get_time_ref_counter(vcpu->kvm);
				671	if (time_now >= exp_time)
				672	stimer_expiration(stimer);
				673	}
				674
				675	if ((stimer->config & HV_STIMER_ENABLE) &&
				676	stimer->count) {
				677	if (!stimer->msg_pending)
				678	stimer_start(stimer);
				679	} else
				680	stimer_cleanup(stimer);
				681	}
				682	}
				683	}
				684
				685	void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu)
				686	{
				687	struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu);
				688	int i;
				689
				690	for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++)
				691	stimer_cleanup(&hv_vcpu->stimer[i]);
				692	}
				693
				694	bool kvm_hv_assist_page_enabled(struct kvm_vcpu *vcpu)
				695	{
				696	if (!(vcpu->arch.hyperv.hv_vapic & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE))
				697	return false;
				698	return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED;
				699	}
				700	EXPORT_SYMBOL_GPL(kvm_hv_assist_page_enabled);
				701
				702	bool kvm_hv_get_assist_page(struct kvm_vcpu *vcpu,
				703	struct hv_vp_assist_page *assist_page)
				704	{
				705	if (!kvm_hv_assist_page_enabled(vcpu))
				706	return false;
				707	return !kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data,
				708	assist_page, sizeof(*assist_page));
				709	}
				710	EXPORT_SYMBOL_GPL(kvm_hv_get_assist_page);
				711
				712	static void stimer_prepare_msg(struct kvm_vcpu_hv_stimer *stimer)
				713	{
				714	struct hv_message *msg = &stimer->msg;
				715	struct hv_timer_message_payload *payload =
				716	(struct hv_timer_message_payload *)&msg->u.payload;
				717
				718	memset(&msg->header, 0, sizeof(msg->header));
				719	msg->header.message_type = HVMSG_TIMER_EXPIRED;
				720	msg->header.payload_size = sizeof(*payload);
				721
				722	payload->timer_index = stimer->index;
				723	payload->expiration_time = 0;
				724	payload->delivery_time = 0;
				725	}
				726
				727	static void stimer_init(struct kvm_vcpu_hv_stimer *stimer, int timer_index)
				728	{
				729	memset(stimer, 0, sizeof(*stimer));
				730	stimer->index = timer_index;
				731	hrtimer_init(&stimer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
				732	stimer->timer.function = stimer_timer_callback;
				733	stimer_prepare_msg(stimer);
				734	}
				735
				736	void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu)
				737	{
				738	struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu);
				739	int i;
				740
				741	synic_init(&hv_vcpu->synic);
				742
				743	bitmap_zero(hv_vcpu->stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT);
				744	for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++)
				745	stimer_init(&hv_vcpu->stimer[i], i);
				746	}
				747
				748	void kvm_hv_vcpu_postcreate(struct kvm_vcpu *vcpu)
				749	{
				750	struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu);
				751
				752	hv_vcpu->vp_index = kvm_vcpu_get_idx(vcpu);
				753	}
				754
				755	int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages)
				756	{
				757	struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu);
				758
				759	/*
				760	* Hyper-V SynIC auto EOI SINT's are
				761	* not compatible with APICV, so deactivate APICV
				762	*/
				763	kvm_vcpu_deactivate_apicv(vcpu);
				764	synic->active = true;
				765	synic->dont_zero_synic_pages = dont_zero_synic_pages;
				766	return 0;
				767	}
				768
				769	static bool kvm_hv_msr_partition_wide(u32 msr)
				770	{
				771	bool r = false;
				772
				773	switch (msr) {
				774	case HV_X64_MSR_GUEST_OS_ID:
				775	case HV_X64_MSR_HYPERCALL:
				776	case HV_X64_MSR_REFERENCE_TSC:
				777	case HV_X64_MSR_TIME_REF_COUNT:
				778	case HV_X64_MSR_CRASH_CTL:
				779	case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
				780	case HV_X64_MSR_RESET:
				781	case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
				782	case HV_X64_MSR_TSC_EMULATION_CONTROL:
				783	case HV_X64_MSR_TSC_EMULATION_STATUS:
				784	r = true;
				785	break;
				786	}
				787
				788	return r;
				789	}
				790
				791	static int kvm_hv_msr_get_crash_data(struct kvm_vcpu *vcpu,
				792	u32 index, u64 *pdata)
				793	{
				794	struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
				795
				796	if (WARN_ON_ONCE(index >= ARRAY_SIZE(hv->hv_crash_param)))
				797	return -EINVAL;
				798
				799	*pdata = hv->hv_crash_param[index];
				800	return 0;
				801	}
				802
				803	static int kvm_hv_msr_get_crash_ctl(struct kvm_vcpu vcpu, u64 pdata)
				804	{
				805	struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
				806
				807	*pdata = hv->hv_crash_ctl;
				808	return 0;
				809	}
				810
				811	static int kvm_hv_msr_set_crash_ctl(struct kvm_vcpu *vcpu, u64 data, bool host)
				812	{
				813	struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
				814
				815	if (host)
				816	hv->hv_crash_ctl = data & HV_X64_MSR_CRASH_CTL_NOTIFY;
				817
				818	if (!host && (data & HV_X64_MSR_CRASH_CTL_NOTIFY)) {
				819
				820	vcpu_debug(vcpu, "hv crash (0x%llx 0x%llx 0x%llx 0x%llx 0x%llx)\n",
				821	hv->hv_crash_param[0],
				822	hv->hv_crash_param[1],
				823	hv->hv_crash_param[2],
				824	hv->hv_crash_param[3],
				825	hv->hv_crash_param[4]);
				826
				827	/* Send notification about crash to user space */
				828	kvm_make_request(KVM_REQ_HV_CRASH, vcpu);
				829	}
				830
				831	return 0;
				832	}
				833
				834	static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu,
				835	u32 index, u64 data)
				836	{
				837	struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
				838
				839	if (WARN_ON_ONCE(index >= ARRAY_SIZE(hv->hv_crash_param)))
				840	return -EINVAL;
				841
				842	hv->hv_crash_param[index] = data;
				843	return 0;
				844	}
				845
				846	/*
				847	* The kvmclock and Hyper-V TSC page use similar formulas, and converting
				848	* between them is possible:
				849	*
				850	* kvmclock formula:
				851	* nsec = (ticks - tsc_timestamp) * tsc_to_system_mul * 2^(tsc_shift-32)
				852	* + system_time
				853	*
				854	* Hyper-V formula:
				855	* nsec/100 = ticks * scale / 2^64 + offset
				856	*
				857	* When tsc_timestamp = system_time = 0, offset is zero in the Hyper-V formula.
				858	* By dividing the kvmclock formula by 100 and equating what's left we get:
				859	* ticks * scale / 2^64 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100
				860	* scale / 2^64 = tsc_to_system_mul * 2^(tsc_shift-32) / 100
				861	* scale = tsc_to_system_mul * 2^(32+tsc_shift) / 100
				862	*
				863	* Now expand the kvmclock formula and divide by 100:
				864	* nsec = ticks * tsc_to_system_mul * 2^(tsc_shift-32)
				865	* - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32)
				866	* + system_time
				867	* nsec/100 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100
				868	* - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32) / 100
				869	* + system_time / 100
				870	*
				871	* Replace tsc_to_system_mul * 2^(tsc_shift-32) / 100 by scale / 2^64:
				872	* nsec/100 = ticks * scale / 2^64
				873	* - tsc_timestamp * scale / 2^64
				874	* + system_time / 100
				875	*
				876	* Equate with the Hyper-V formula so that ticks * scale / 2^64 cancels out:
				877	* offset = system_time / 100 - tsc_timestamp * scale / 2^64
				878	*
				879	* These two equivalencies are implemented in this function.
				880	*/
				881	static bool compute_tsc_page_parameters(struct pvclock_vcpu_time_info *hv_clock,
				882	HV_REFERENCE_TSC_PAGE *tsc_ref)
				883	{
				884	u64 max_mul;
				885
				886	if (!(hv_clock->flags & PVCLOCK_TSC_STABLE_BIT))
				887	return false;
				888
				889	/*
				890	* check if scale would overflow, if so we use the time ref counter
				891	* tsc_to_system_mul * 2^(tsc_shift+32) / 100 >= 2^64
				892	* tsc_to_system_mul / 100 >= 2^(32-tsc_shift)
				893	* tsc_to_system_mul >= 100 * 2^(32-tsc_shift)
				894	*/
				895	max_mul = 100ull << (32 - hv_clock->tsc_shift);
				896	if (hv_clock->tsc_to_system_mul >= max_mul)
				897	return false;
				898
				899	/*
				900	* Otherwise compute the scale and offset according to the formulas
				901	* derived above.
				902	*/
				903	tsc_ref->tsc_scale =
				904	mul_u64_u32_div(1ULL << (32 + hv_clock->tsc_shift),
				905	hv_clock->tsc_to_system_mul,
				906	100);
				907
				908	tsc_ref->tsc_offset = hv_clock->system_time;
				909	do_div(tsc_ref->tsc_offset, 100);
				910	tsc_ref->tsc_offset -=
				911	mul_u64_u64_shr(hv_clock->tsc_timestamp, tsc_ref->tsc_scale, 64);
				912	return true;
				913	}
				914
				915	void kvm_hv_setup_tsc_page(struct kvm *kvm,
				916	struct pvclock_vcpu_time_info *hv_clock)
				917	{
				918	struct kvm_hv *hv = &kvm->arch.hyperv;
				919	u32 tsc_seq;
				920	u64 gfn;
				921
				922	BUILD_BUG_ON(sizeof(tsc_seq) != sizeof(hv->tsc_ref.tsc_sequence));
				923	BUILD_BUG_ON(offsetof(HV_REFERENCE_TSC_PAGE, tsc_sequence) != 0);
				924
				925	if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE))
				926	return;
				927
				928	mutex_lock(&kvm->arch.hyperv.hv_lock);
				929	if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE))
				930	goto out_unlock;
				931
				932	gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
				933	/*
				934	* Because the TSC parameters only vary when there is a
				935	* change in the master clock, do not bother with caching.
				936	*/
				937	if (unlikely(kvm_read_guest(kvm, gfn_to_gpa(gfn),
				938	&tsc_seq, sizeof(tsc_seq))))
				939	goto out_unlock;
				940
				941	/*
				942	* While we're computing and writing the parameters, force the
				943	* guest to use the time reference count MSR.
				944	*/
				945	hv->tsc_ref.tsc_sequence = 0;
				946	if (kvm_write_guest(kvm, gfn_to_gpa(gfn),
				947	&hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence)))
				948	goto out_unlock;
				949
				950	if (!compute_tsc_page_parameters(hv_clock, &hv->tsc_ref))
				951	goto out_unlock;
				952
				953	/* Ensure sequence is zero before writing the rest of the struct. */
				954	smp_wmb();
				955	if (kvm_write_guest(kvm, gfn_to_gpa(gfn), &hv->tsc_ref, sizeof(hv->tsc_ref)))
				956	goto out_unlock;
				957
				958	/*
				959	* Now switch to the TSC page mechanism by writing the sequence.
				960	*/
				961	tsc_seq++;
				962	if (tsc_seq == 0xFFFFFFFF \|\| tsc_seq == 0)
				963	tsc_seq = 1;
				964
				965	/* Write the struct entirely before the non-zero sequence. */
				966	smp_wmb();
				967
				968	hv->tsc_ref.tsc_sequence = tsc_seq;
				969	kvm_write_guest(kvm, gfn_to_gpa(gfn),
				970	&hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence));
				971	out_unlock:
				972	mutex_unlock(&kvm->arch.hyperv.hv_lock);
				973	}
				974
				975	static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
				976	bool host)
				977	{
				978	struct kvm *kvm = vcpu->kvm;
				979	struct kvm_hv *hv = &kvm->arch.hyperv;
				980
				981	switch (msr) {
				982	case HV_X64_MSR_GUEST_OS_ID:
				983	hv->hv_guest_os_id = data;
				984	/* setting guest os id to zero disables hypercall page */
				985	if (!hv->hv_guest_os_id)
				986	hv->hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE;
				987	break;
				988	case HV_X64_MSR_HYPERCALL: {
				989	u64 gfn;
				990	unsigned long addr;
				991	u8 instructions[4];
				992
				993	/* if guest os id is not set hypercall should remain disabled */
				994	if (!hv->hv_guest_os_id)
				995	break;
				996	if (!(data & HV_X64_MSR_HYPERCALL_ENABLE)) {
				997	hv->hv_hypercall = data;
				998	break;
				999	}
				1000	gfn = data >> HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT;
				1001	addr = gfn_to_hva(kvm, gfn);
				1002	if (kvm_is_error_hva(addr))
				1003	return 1;
				1004	kvm_x86_ops->patch_hypercall(vcpu, instructions);
				1005	((unsigned char )instructions)[3] = 0xc3; / ret */
				1006	if (__copy_to_user((void __user *)addr, instructions, 4))
				1007	return 1;
				1008	hv->hv_hypercall = data;
				1009	mark_page_dirty(kvm, gfn);
				1010	break;
				1011	}
				1012	case HV_X64_MSR_REFERENCE_TSC:
				1013	hv->hv_tsc_page = data;
				1014	if (hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)
				1015	kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
				1016	break;
				1017	case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
				1018	return kvm_hv_msr_set_crash_data(vcpu,
				1019	msr - HV_X64_MSR_CRASH_P0,
				1020	data);
				1021	case HV_X64_MSR_CRASH_CTL:
				1022	return kvm_hv_msr_set_crash_ctl(vcpu, data, host);
				1023	case HV_X64_MSR_RESET:
				1024	if (data == 1) {
				1025	vcpu_debug(vcpu, "hyper-v reset requested\n");
				1026	kvm_make_request(KVM_REQ_HV_RESET, vcpu);
				1027	}
				1028	break;
				1029	case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
				1030	hv->hv_reenlightenment_control = data;
				1031	break;
				1032	case HV_X64_MSR_TSC_EMULATION_CONTROL:
				1033	hv->hv_tsc_emulation_control = data;
				1034	break;
				1035	case HV_X64_MSR_TSC_EMULATION_STATUS:
				1036	hv->hv_tsc_emulation_status = data;
				1037	break;
				1038	case HV_X64_MSR_TIME_REF_COUNT:
				1039	/* read-only, but still ignore it if host-initiated */
				1040	if (!host)
				1041	return 1;
				1042	break;
				1043	default:
				1044	vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n",
				1045	msr, data);
				1046	return 1;
				1047	}
				1048	return 0;
				1049	}
				1050
				1051	/* Calculate cpu time spent by current task in 100ns units */
				1052	static u64 current_task_runtime_100ns(void)
				1053	{
				1054	u64 utime, stime;
				1055
				1056	task_cputime_adjusted(current, &utime, &stime);
				1057
				1058	return div_u64(utime + stime, 100);
				1059	}
				1060
				1061	static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
				1062	{
				1063	struct kvm_vcpu_hv *hv_vcpu = &vcpu->arch.hyperv;
				1064
				1065	switch (msr) {
				1066	case HV_X64_MSR_VP_INDEX: {
				1067	struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
				1068	int vcpu_idx = kvm_vcpu_get_idx(vcpu);
				1069	u32 new_vp_index = (u32)data;
				1070
				1071	if (!host \|\| new_vp_index >= KVM_MAX_VCPUS)
				1072	return 1;
				1073
				1074	if (new_vp_index == hv_vcpu->vp_index)
				1075	return 0;
				1076
				1077	/*
				1078	* The VP index is initialized to vcpu_index by
				1079	* kvm_hv_vcpu_postcreate so they initially match. Now the
				1080	* VP index is changing, adjust num_mismatched_vp_indexes if
				1081	* it now matches or no longer matches vcpu_idx.
				1082	*/
				1083	if (hv_vcpu->vp_index == vcpu_idx)
				1084	atomic_inc(&hv->num_mismatched_vp_indexes);
				1085	else if (new_vp_index == vcpu_idx)
				1086	atomic_dec(&hv->num_mismatched_vp_indexes);
				1087
				1088	hv_vcpu->vp_index = new_vp_index;
				1089	break;
				1090	}
				1091	case HV_X64_MSR_VP_ASSIST_PAGE: {
				1092	u64 gfn;
				1093	unsigned long addr;
				1094
				1095	if (!(data & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE)) {
				1096	hv_vcpu->hv_vapic = data;
				1097	if (kvm_lapic_enable_pv_eoi(vcpu, 0, 0))
				1098	return 1;
				1099	break;
				1100	}
				1101	gfn = data >> HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT;
				1102	addr = kvm_vcpu_gfn_to_hva(vcpu, gfn);
				1103	if (kvm_is_error_hva(addr))
				1104	return 1;
				1105	if (__clear_user((void __user *)addr, PAGE_SIZE))
				1106	return 1;
				1107	hv_vcpu->hv_vapic = data;
				1108	kvm_vcpu_mark_page_dirty(vcpu, gfn);
				1109	if (kvm_lapic_enable_pv_eoi(vcpu,
				1110	gfn_to_gpa(gfn) \| KVM_MSR_ENABLED,
				1111	sizeof(struct hv_vp_assist_page)))
				1112	return 1;
				1113	break;
				1114	}
				1115	case HV_X64_MSR_EOI:
				1116	return kvm_hv_vapic_msr_write(vcpu, APIC_EOI, data);
				1117	case HV_X64_MSR_ICR:
				1118	return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data);
				1119	case HV_X64_MSR_TPR:
				1120	return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data);
				1121	case HV_X64_MSR_VP_RUNTIME:
				1122	if (!host)
				1123	return 1;
				1124	hv_vcpu->runtime_offset = data - current_task_runtime_100ns();
				1125	break;
				1126	case HV_X64_MSR_SCONTROL:
				1127	case HV_X64_MSR_SVERSION:
				1128	case HV_X64_MSR_SIEFP:
				1129	case HV_X64_MSR_SIMP:
				1130	case HV_X64_MSR_EOM:
				1131	case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
				1132	return synic_set_msr(vcpu_to_synic(vcpu), msr, data, host);
				1133	case HV_X64_MSR_STIMER0_CONFIG:
				1134	case HV_X64_MSR_STIMER1_CONFIG:
				1135	case HV_X64_MSR_STIMER2_CONFIG:
				1136	case HV_X64_MSR_STIMER3_CONFIG: {
				1137	int timer_index = (msr - HV_X64_MSR_STIMER0_CONFIG)/2;
				1138
				1139	return stimer_set_config(vcpu_to_stimer(vcpu, timer_index),
				1140	data, host);
				1141	}
				1142	case HV_X64_MSR_STIMER0_COUNT:
				1143	case HV_X64_MSR_STIMER1_COUNT:
				1144	case HV_X64_MSR_STIMER2_COUNT:
				1145	case HV_X64_MSR_STIMER3_COUNT: {
				1146	int timer_index = (msr - HV_X64_MSR_STIMER0_COUNT)/2;
				1147
				1148	return stimer_set_count(vcpu_to_stimer(vcpu, timer_index),
				1149	data, host);
				1150	}
				1151	case HV_X64_MSR_TSC_FREQUENCY:
				1152	case HV_X64_MSR_APIC_FREQUENCY:
				1153	/* read-only, but still ignore it if host-initiated */
				1154	if (!host)
				1155	return 1;
				1156	break;
				1157	default:
				1158	vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n",
				1159	msr, data);
				1160	return 1;
				1161	}
				1162
				1163	return 0;
				1164	}
				1165
				1166	static int kvm_hv_get_msr_pw(struct kvm_vcpu vcpu, u32 msr, u64 pdata)
				1167	{
				1168	u64 data = 0;
				1169	struct kvm *kvm = vcpu->kvm;
				1170	struct kvm_hv *hv = &kvm->arch.hyperv;
				1171
				1172	switch (msr) {
				1173	case HV_X64_MSR_GUEST_OS_ID:
				1174	data = hv->hv_guest_os_id;
				1175	break;
				1176	case HV_X64_MSR_HYPERCALL:
				1177	data = hv->hv_hypercall;
				1178	break;
				1179	case HV_X64_MSR_TIME_REF_COUNT:
				1180	data = get_time_ref_counter(kvm);
				1181	break;
				1182	case HV_X64_MSR_REFERENCE_TSC:
				1183	data = hv->hv_tsc_page;
				1184	break;
				1185	case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
				1186	return kvm_hv_msr_get_crash_data(vcpu,
				1187	msr - HV_X64_MSR_CRASH_P0,
				1188	pdata);
				1189	case HV_X64_MSR_CRASH_CTL:
				1190	return kvm_hv_msr_get_crash_ctl(vcpu, pdata);
				1191	case HV_X64_MSR_RESET:
				1192	data = 0;
				1193	break;
				1194	case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
				1195	data = hv->hv_reenlightenment_control;
				1196	break;
				1197	case HV_X64_MSR_TSC_EMULATION_CONTROL:
				1198	data = hv->hv_tsc_emulation_control;
				1199	break;
				1200	case HV_X64_MSR_TSC_EMULATION_STATUS:
				1201	data = hv->hv_tsc_emulation_status;
				1202	break;
				1203	default:
				1204	vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
				1205	return 1;
				1206	}
				1207
				1208	*pdata = data;
				1209	return 0;
				1210	}
				1211
				1212	static int kvm_hv_get_msr(struct kvm_vcpu vcpu, u32 msr, u64 pdata,
				1213	bool host)
				1214	{
				1215	u64 data = 0;
				1216	struct kvm_vcpu_hv *hv_vcpu = &vcpu->arch.hyperv;
				1217
				1218	switch (msr) {
				1219	case HV_X64_MSR_VP_INDEX:
				1220	data = hv_vcpu->vp_index;
				1221	break;
				1222	case HV_X64_MSR_EOI:
				1223	return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata);
				1224	case HV_X64_MSR_ICR:
				1225	return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata);
				1226	case HV_X64_MSR_TPR:
				1227	return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata);
				1228	case HV_X64_MSR_VP_ASSIST_PAGE:
				1229	data = hv_vcpu->hv_vapic;
				1230	break;
				1231	case HV_X64_MSR_VP_RUNTIME:
				1232	data = current_task_runtime_100ns() + hv_vcpu->runtime_offset;
				1233	break;
				1234	case HV_X64_MSR_SCONTROL:
				1235	case HV_X64_MSR_SVERSION:
				1236	case HV_X64_MSR_SIEFP:
				1237	case HV_X64_MSR_SIMP:
				1238	case HV_X64_MSR_EOM:
				1239	case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
				1240	return synic_get_msr(vcpu_to_synic(vcpu), msr, pdata, host);
				1241	case HV_X64_MSR_STIMER0_CONFIG:
				1242	case HV_X64_MSR_STIMER1_CONFIG:
				1243	case HV_X64_MSR_STIMER2_CONFIG:
				1244	case HV_X64_MSR_STIMER3_CONFIG: {
				1245	int timer_index = (msr - HV_X64_MSR_STIMER0_CONFIG)/2;
				1246
				1247	return stimer_get_config(vcpu_to_stimer(vcpu, timer_index),
				1248	pdata);
				1249	}
				1250	case HV_X64_MSR_STIMER0_COUNT:
				1251	case HV_X64_MSR_STIMER1_COUNT:
				1252	case HV_X64_MSR_STIMER2_COUNT:
				1253	case HV_X64_MSR_STIMER3_COUNT: {
				1254	int timer_index = (msr - HV_X64_MSR_STIMER0_COUNT)/2;
				1255
				1256	return stimer_get_count(vcpu_to_stimer(vcpu, timer_index),
				1257	pdata);
				1258	}
				1259	case HV_X64_MSR_TSC_FREQUENCY:
				1260	data = (u64)vcpu->arch.virtual_tsc_khz * 1000;
				1261	break;
				1262	case HV_X64_MSR_APIC_FREQUENCY:
				1263	data = APIC_BUS_FREQUENCY;
				1264	break;
				1265	default:
				1266	vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
				1267	return 1;
				1268	}
				1269	*pdata = data;
				1270	return 0;
				1271	}
				1272
				1273	int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
				1274	{
				1275	if (kvm_hv_msr_partition_wide(msr)) {
				1276	int r;
				1277
				1278	mutex_lock(&vcpu->kvm->arch.hyperv.hv_lock);
				1279	r = kvm_hv_set_msr_pw(vcpu, msr, data, host);
				1280	mutex_unlock(&vcpu->kvm->arch.hyperv.hv_lock);
				1281	return r;
				1282	} else
				1283	return kvm_hv_set_msr(vcpu, msr, data, host);
				1284	}
				1285
				1286	int kvm_hv_get_msr_common(struct kvm_vcpu vcpu, u32 msr, u64 pdata, bool host)
				1287	{
				1288	if (kvm_hv_msr_partition_wide(msr)) {
				1289	int r;
				1290
				1291	mutex_lock(&vcpu->kvm->arch.hyperv.hv_lock);
				1292	r = kvm_hv_get_msr_pw(vcpu, msr, pdata);
				1293	mutex_unlock(&vcpu->kvm->arch.hyperv.hv_lock);
				1294	return r;
				1295	} else
				1296	return kvm_hv_get_msr(vcpu, msr, pdata, host);
				1297	}
				1298
				1299	static __always_inline int get_sparse_bank_no(u64 valid_bank_mask, int bank_no)
				1300	{
				1301	int i = 0, j;
				1302
				1303	if (!(valid_bank_mask & BIT_ULL(bank_no)))
				1304	return -1;
				1305
				1306	for (j = 0; j < bank_no; j++)
				1307	if (valid_bank_mask & BIT_ULL(j))
				1308	i++;
				1309
				1310	return i;
				1311	}
				1312
				1313	static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa,
				1314	u16 rep_cnt, bool ex)
				1315	{
				1316	struct kvm *kvm = current_vcpu->kvm;
				1317	struct kvm_vcpu_hv *hv_current = &current_vcpu->arch.hyperv;
				1318	struct hv_tlb_flush_ex flush_ex;
				1319	struct hv_tlb_flush flush;
				1320	struct kvm_vcpu *vcpu;
				1321	unsigned long vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)] = {0};
				1322	unsigned long valid_bank_mask = 0;
				1323	u64 sparse_banks[64];
				1324	int sparse_banks_len, i;
				1325	bool all_cpus;
				1326
				1327	if (!ex) {
				1328	if (unlikely(kvm_read_guest(kvm, ingpa, &flush, sizeof(flush))))
				1329	return HV_STATUS_INVALID_HYPERCALL_INPUT;
				1330
				1331	trace_kvm_hv_flush_tlb(flush.processor_mask,
				1332	flush.address_space, flush.flags);
				1333
				1334	sparse_banks[0] = flush.processor_mask;
				1335
				1336	/*
				1337	* Work around possible WS2012 bug: it sends hypercalls
				1338	* with processor_mask = 0x0 and HV_FLUSH_ALL_PROCESSORS clear,
				1339	* while also expecting us to flush something and crashing if
				1340	* we don't. Let's treat processor_mask == 0 same as
				1341	* HV_FLUSH_ALL_PROCESSORS.
				1342	*/
				1343	all_cpus = (flush.flags & HV_FLUSH_ALL_PROCESSORS) \|\|
				1344	flush.processor_mask == 0;
				1345	} else {
				1346	if (unlikely(kvm_read_guest(kvm, ingpa, &flush_ex,
				1347	sizeof(flush_ex))))
				1348	return HV_STATUS_INVALID_HYPERCALL_INPUT;
				1349
				1350	trace_kvm_hv_flush_tlb_ex(flush_ex.hv_vp_set.valid_bank_mask,
				1351	flush_ex.hv_vp_set.format,
				1352	flush_ex.address_space,
				1353	flush_ex.flags);
				1354
				1355	valid_bank_mask = flush_ex.hv_vp_set.valid_bank_mask;
				1356	all_cpus = flush_ex.hv_vp_set.format !=
				1357	HV_GENERIC_SET_SPARSE_4K;
				1358
				1359	sparse_banks_len = bitmap_weight(&valid_bank_mask, 64) *
				1360	sizeof(sparse_banks[0]);
				1361
				1362	if (!sparse_banks_len && !all_cpus)
				1363	goto ret_success;
				1364
				1365	if (!all_cpus &&
				1366	kvm_read_guest(kvm,
				1367	ingpa + offsetof(struct hv_tlb_flush_ex,
				1368	hv_vp_set.bank_contents),
				1369	sparse_banks,
				1370	sparse_banks_len))
				1371	return HV_STATUS_INVALID_HYPERCALL_INPUT;
				1372	}
				1373
				1374	cpumask_clear(&hv_current->tlb_lush);
				1375
				1376	kvm_for_each_vcpu(i, vcpu, kvm) {
				1377	struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
				1378	int bank = hv->vp_index / 64, sbank = 0;
				1379
				1380	if (!all_cpus) {
				1381	/* Banks >64 can't be represented */
				1382	if (bank >= 64)
				1383	continue;
				1384
				1385	/* Non-ex hypercalls can only address first 64 vCPUs */
				1386	if (!ex && bank)
				1387	continue;
				1388
				1389	if (ex) {
				1390	/*
				1391	* Check is the bank of this vCPU is in sparse
				1392	* set and get the sparse bank number.
				1393	*/
				1394	sbank = get_sparse_bank_no(valid_bank_mask,
				1395	bank);
				1396
				1397	if (sbank < 0)
				1398	continue;
				1399	}
				1400
				1401	if (!(sparse_banks[sbank] & BIT_ULL(hv->vp_index % 64)))
				1402	continue;
				1403	}
				1404
				1405	/*
				1406	* vcpu->arch.cr3 may not be up-to-date for running vCPUs so we
				1407	* can't analyze it here, flush TLB regardless of the specified
				1408	* address space.
				1409	*/
				1410	__set_bit(i, vcpu_bitmap);
				1411	}
				1412
				1413	kvm_make_vcpus_request_mask(kvm,
				1414	KVM_REQ_TLB_FLUSH \| KVM_REQUEST_NO_WAKEUP,
				1415	vcpu_bitmap, &hv_current->tlb_lush);
				1416
				1417	ret_success:
				1418	/* We always do full TLB flush, set rep_done = rep_cnt. */
				1419	return (u64)HV_STATUS_SUCCESS \|
				1420	((u64)rep_cnt << HV_HYPERCALL_REP_COMP_OFFSET);
				1421	}
				1422
				1423	bool kvm_hv_hypercall_enabled(struct kvm *kvm)
				1424	{
				1425	return READ_ONCE(kvm->arch.hyperv.hv_hypercall) & HV_X64_MSR_HYPERCALL_ENABLE;
				1426	}
				1427
				1428	static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result)
				1429	{
				1430	bool longmode;
				1431
				1432	longmode = is_64_bit_mode(vcpu);
				1433	if (longmode)
				1434	kvm_register_write(vcpu, VCPU_REGS_RAX, result);
				1435	else {
				1436	kvm_register_write(vcpu, VCPU_REGS_RDX, result >> 32);
				1437	kvm_register_write(vcpu, VCPU_REGS_RAX, result & 0xffffffff);
				1438	}
				1439	}
				1440
				1441	static int kvm_hv_hypercall_complete(struct kvm_vcpu *vcpu, u64 result)
				1442	{
				1443	kvm_hv_hypercall_set_result(vcpu, result);
				1444	++vcpu->stat.hypercalls;
				1445	return kvm_skip_emulated_instruction(vcpu);
				1446	}
				1447
				1448	static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu)
				1449	{
				1450	return kvm_hv_hypercall_complete(vcpu, vcpu->run->hyperv.u.hcall.result);
				1451	}
				1452
				1453	static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, bool fast, u64 param)
				1454	{
				1455	struct eventfd_ctx *eventfd;
				1456
				1457	if (unlikely(!fast)) {
				1458	int ret;
				1459	gpa_t gpa = param;
				1460
				1461	if ((gpa & (__alignof__(param) - 1)) \|\|
				1462	offset_in_page(gpa) + sizeof(param) > PAGE_SIZE)
				1463	return HV_STATUS_INVALID_ALIGNMENT;
				1464
				1465	ret = kvm_vcpu_read_guest(vcpu, gpa, &param, sizeof(param));
				1466	if (ret < 0)
				1467	return HV_STATUS_INVALID_ALIGNMENT;
				1468	}
				1469
				1470	/*
				1471	* Per spec, bits 32-47 contain the extra "flag number". However, we
				1472	* have no use for it, and in all known usecases it is zero, so just
				1473	* report lookup failure if it isn't.
				1474	*/
				1475	if (param & 0xffff00000000ULL)
				1476	return HV_STATUS_INVALID_PORT_ID;
				1477	/* remaining bits are reserved-zero */
				1478	if (param & ~KVM_HYPERV_CONN_ID_MASK)
				1479	return HV_STATUS_INVALID_HYPERCALL_INPUT;
				1480
				1481	/* the eventfd is protected by vcpu->kvm->srcu, but conn_to_evt isn't */
				1482	rcu_read_lock();
				1483	eventfd = idr_find(&vcpu->kvm->arch.hyperv.conn_to_evt, param);
				1484	rcu_read_unlock();
				1485	if (!eventfd)
				1486	return HV_STATUS_INVALID_PORT_ID;
				1487
				1488	eventfd_signal(eventfd, 1);
				1489	return HV_STATUS_SUCCESS;
				1490	}
				1491
				1492	int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
				1493	{
				1494	u64 param, ingpa, outgpa, ret = HV_STATUS_SUCCESS;
				1495	uint16_t code, rep_idx, rep_cnt;
				1496	bool fast, longmode, rep;
				1497
				1498	/*
				1499	* hypercall generates UD from non zero cpl and real mode
				1500	* per HYPER-V spec
				1501	*/
				1502	if (kvm_x86_ops->get_cpl(vcpu) != 0 \|\| !is_protmode(vcpu)) {
				1503	kvm_queue_exception(vcpu, UD_VECTOR);
				1504	return 1;
				1505	}
				1506
				1507	longmode = is_64_bit_mode(vcpu);
				1508
				1509	if (!longmode) {
				1510	param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) \|
				1511	(kvm_register_read(vcpu, VCPU_REGS_RAX) & 0xffffffff);
				1512	ingpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RBX) << 32) \|
				1513	(kvm_register_read(vcpu, VCPU_REGS_RCX) & 0xffffffff);
				1514	outgpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDI) << 32) \|
				1515	(kvm_register_read(vcpu, VCPU_REGS_RSI) & 0xffffffff);
				1516	}
				1517	#ifdef CONFIG_X86_64
				1518	else {
				1519	param = kvm_register_read(vcpu, VCPU_REGS_RCX);
				1520	ingpa = kvm_register_read(vcpu, VCPU_REGS_RDX);
				1521	outgpa = kvm_register_read(vcpu, VCPU_REGS_R8);
				1522	}
				1523	#endif
				1524
				1525	code = param & 0xffff;
				1526	fast = !!(param & HV_HYPERCALL_FAST_BIT);
				1527	rep_cnt = (param >> HV_HYPERCALL_REP_COMP_OFFSET) & 0xfff;
				1528	rep_idx = (param >> HV_HYPERCALL_REP_START_OFFSET) & 0xfff;
				1529	rep = !!(rep_cnt \|\| rep_idx);
				1530
				1531	trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa);
				1532
				1533	switch (code) {
				1534	case HVCALL_NOTIFY_LONG_SPIN_WAIT:
				1535	if (unlikely(rep)) {
				1536	ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
				1537	break;
				1538	}
				1539	kvm_vcpu_on_spin(vcpu, true);
				1540	break;
				1541	case HVCALL_SIGNAL_EVENT:
				1542	if (unlikely(rep)) {
				1543	ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
				1544	break;
				1545	}
				1546	ret = kvm_hvcall_signal_event(vcpu, fast, ingpa);
				1547	if (ret != HV_STATUS_INVALID_PORT_ID)
				1548	break;
				1549	/* maybe userspace knows this conn_id: fall through */
				1550	case HVCALL_POST_MESSAGE:
				1551	/* don't bother userspace if it has no way to handle it */
				1552	if (unlikely(rep \|\| !vcpu_to_synic(vcpu)->active)) {
				1553	ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
				1554	break;
				1555	}
				1556	vcpu->run->exit_reason = KVM_EXIT_HYPERV;
				1557	vcpu->run->hyperv.type = KVM_EXIT_HYPERV_HCALL;
				1558	vcpu->run->hyperv.u.hcall.input = param;
				1559	vcpu->run->hyperv.u.hcall.params[0] = ingpa;
				1560	vcpu->run->hyperv.u.hcall.params[1] = outgpa;
				1561	vcpu->arch.complete_userspace_io =
				1562	kvm_hv_hypercall_complete_userspace;
				1563	return 0;
				1564	case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST:
				1565	if (unlikely(fast \|\| !rep_cnt \|\| rep_idx)) {
				1566	ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
				1567	break;
				1568	}
				1569	ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, false);
				1570	break;
				1571	case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE:
				1572	if (unlikely(fast \|\| rep)) {
				1573	ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
				1574	break;
				1575	}
				1576	ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, false);
				1577	break;
				1578	case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX:
				1579	if (unlikely(fast \|\| !rep_cnt \|\| rep_idx)) {
				1580	ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
				1581	break;
				1582	}
				1583	ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, true);
				1584	break;
				1585	case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX:
				1586	if (unlikely(fast \|\| rep)) {
				1587	ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
				1588	break;
				1589	}
				1590	ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, true);
				1591	break;
				1592	default:
				1593	ret = HV_STATUS_INVALID_HYPERCALL_CODE;
				1594	break;
				1595	}
				1596
				1597	return kvm_hv_hypercall_complete(vcpu, ret);
				1598	}
				1599
				1600	void kvm_hv_init_vm(struct kvm *kvm)
				1601	{
				1602	mutex_init(&kvm->arch.hyperv.hv_lock);
				1603	idr_init(&kvm->arch.hyperv.conn_to_evt);
				1604	}
				1605
				1606	void kvm_hv_destroy_vm(struct kvm *kvm)
				1607	{
				1608	struct eventfd_ctx *eventfd;
				1609	int i;
				1610
				1611	idr_for_each_entry(&kvm->arch.hyperv.conn_to_evt, eventfd, i)
				1612	eventfd_ctx_put(eventfd);
				1613	idr_destroy(&kvm->arch.hyperv.conn_to_evt);
				1614	}
				1615
				1616	static int kvm_hv_eventfd_assign(struct kvm *kvm, u32 conn_id, int fd)
				1617	{
				1618	struct kvm_hv *hv = &kvm->arch.hyperv;
				1619	struct eventfd_ctx *eventfd;
				1620	int ret;
				1621
				1622	eventfd = eventfd_ctx_fdget(fd);
				1623	if (IS_ERR(eventfd))
				1624	return PTR_ERR(eventfd);
				1625
				1626	mutex_lock(&hv->hv_lock);
				1627	ret = idr_alloc(&hv->conn_to_evt, eventfd, conn_id, conn_id + 1,
				1628	GFP_KERNEL);
				1629	mutex_unlock(&hv->hv_lock);
				1630
				1631	if (ret >= 0)
				1632	return 0;
				1633
				1634	if (ret == -ENOSPC)
				1635	ret = -EEXIST;
				1636	eventfd_ctx_put(eventfd);
				1637	return ret;
				1638	}
				1639
				1640	static int kvm_hv_eventfd_deassign(struct kvm *kvm, u32 conn_id)
				1641	{
				1642	struct kvm_hv *hv = &kvm->arch.hyperv;
				1643	struct eventfd_ctx *eventfd;
				1644
				1645	mutex_lock(&hv->hv_lock);
				1646	eventfd = idr_remove(&hv->conn_to_evt, conn_id);
				1647	mutex_unlock(&hv->hv_lock);
				1648
				1649	if (!eventfd)
				1650	return -ENOENT;
				1651
				1652	synchronize_srcu(&kvm->srcu);
				1653	eventfd_ctx_put(eventfd);
				1654	return 0;
				1655	}
				1656
				1657	int kvm_vm_ioctl_hv_eventfd(struct kvm kvm, struct kvm_hyperv_eventfd args)
				1658	{
				1659	if ((args->flags & ~KVM_HYPERV_EVENTFD_DEASSIGN) \|\|
				1660	(args->conn_id & ~KVM_HYPERV_CONN_ID_MASK))
				1661	return -EINVAL;
				1662
				1663	if (args->flags == KVM_HYPERV_EVENTFD_DEASSIGN)
				1664	return kvm_hv_eventfd_deassign(kvm, args->conn_id);
				1665	return kvm_hv_eventfd_assign(kvm, args->conn_id, args->fd);
				1666	}