Blame - src/kernel/linux/v4.19/arch/x86/hyperv/mmu.c - T800

blob: 2f34d52753526bf30a6457c93c94f09fe334c7f0 [file] [log] [blame]

xj	b04a402	2021-11-25 15:01:52 +0800	[diff] [blame]	1	#define pr_fmt(fmt) "Hyper-V: " fmt
				2
				3	#include <linux/hyperv.h>
				4	#include <linux/log2.h>
				5	#include <linux/slab.h>
				6	#include <linux/types.h>
				7
				8	#include <asm/fpu/api.h>
				9	#include <asm/mshyperv.h>
				10	#include <asm/msr.h>
				11	#include <asm/tlbflush.h>
				12	#include <asm/tlb.h>
				13
				14	#define CREATE_TRACE_POINTS
				15	#include <asm/trace/hyperv.h>
				16
				17	/* Each gva in gva_list encodes up to 4096 pages to flush */
				18	#define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE)
				19
				20	static u64 hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
				21	const struct flush_tlb_info *info);
				22
				23	/*
				24	* Fills in gva_list starting from offset. Returns the number of items added.
				25	*/
				26	static inline int fill_gva_list(u64 gva_list[], int offset,
				27	unsigned long start, unsigned long end)
				28	{
				29	int gva_n = offset;
				30	unsigned long cur = start, diff;
				31
				32	do {
				33	diff = end > cur ? end - cur : 0;
				34
				35	gva_list[gva_n] = cur & PAGE_MASK;
				36	/*
				37	* Lower 12 bits encode the number of additional
				38	* pages to flush (in addition to the 'cur' page).
				39	*/
				40	if (diff >= HV_TLB_FLUSH_UNIT) {
				41	gva_list[gva_n] \|= ~PAGE_MASK;
				42	cur += HV_TLB_FLUSH_UNIT;
				43	} else if (diff) {
				44	gva_list[gva_n] \|= (diff - 1) >> PAGE_SHIFT;
				45	cur = end;
				46	}
				47
				48	gva_n++;
				49
				50	} while (cur < end);
				51
				52	return gva_n - offset;
				53	}
				54
				55	static void hyperv_flush_tlb_others(const struct cpumask *cpus,
				56	const struct flush_tlb_info *info)
				57	{
				58	int cpu, vcpu, gva_n, max_gvas;
				59	struct hv_tlb_flush **flush_pcpu;
				60	struct hv_tlb_flush *flush;
				61	u64 status = U64_MAX;
				62	unsigned long flags;
				63
				64	trace_hyperv_mmu_flush_tlb_others(cpus, info);
				65
				66	if (!hv_hypercall_pg)
				67	goto do_native;
				68
				69	if (cpumask_empty(cpus))
				70	return;
				71
				72	local_irq_save(flags);
				73
				74	flush_pcpu = (struct hv_tlb_flush **)
				75	this_cpu_ptr(hyperv_pcpu_input_arg);
				76
				77	flush = *flush_pcpu;
				78
				79	if (unlikely(!flush)) {
				80	local_irq_restore(flags);
				81	goto do_native;
				82	}
				83
				84	if (info->mm) {
				85	/*
				86	* AddressSpace argument must match the CR3 with PCID bits
				87	* stripped out.
				88	*/
				89	flush->address_space = virt_to_phys(info->mm->pgd);
				90	flush->address_space &= CR3_ADDR_MASK;
				91	flush->flags = 0;
				92	} else {
				93	flush->address_space = 0;
				94	flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
				95	}
				96
				97	flush->processor_mask = 0;
				98	if (cpumask_equal(cpus, cpu_present_mask)) {
				99	flush->flags \|= HV_FLUSH_ALL_PROCESSORS;
				100	} else {
				101	/*
				102	* From the supplied CPU set we need to figure out if we can get
				103	* away with cheaper HVCALL_FLUSH_VIRTUAL_ADDRESS_{LIST,SPACE}
				104	* hypercalls. This is possible when the highest VP number in
				105	* the set is < 64. As VP numbers are usually in ascending order
				106	* and match Linux CPU ids, here is an optimization: we check
				107	* the VP number for the highest bit in the supplied set first
				108	* so we can quickly find out if using *_EX hypercalls is a
				109	* must. We will also check all VP numbers when walking the
				110	* supplied CPU set to remain correct in all cases.
				111	*/
				112	if (hv_cpu_number_to_vp_number(cpumask_last(cpus)) >= 64)
				113	goto do_ex_hypercall;
				114
				115	for_each_cpu(cpu, cpus) {
				116	vcpu = hv_cpu_number_to_vp_number(cpu);
				117	if (vcpu == VP_INVAL) {
				118	local_irq_restore(flags);
				119	goto do_native;
				120	}
				121
				122	if (vcpu >= 64)
				123	goto do_ex_hypercall;
				124
				125	__set_bit(vcpu, (unsigned long *)
				126	&flush->processor_mask);
				127	}
				128	}
				129
				130	/*
				131	* We can flush not more than max_gvas with one hypercall. Flush the
				132	* whole address space if we were asked to do more.
				133	*/
				134	max_gvas = (PAGE_SIZE - sizeof(*flush)) / sizeof(flush->gva_list[0]);
				135
				136	if (info->end == TLB_FLUSH_ALL) {
				137	flush->flags \|= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
				138	status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE,
				139	flush, NULL);
				140	} else if (info->end &&
				141	((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) {
				142	status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE,
				143	flush, NULL);
				144	} else {
				145	gva_n = fill_gva_list(flush->gva_list, 0,
				146	info->start, info->end);
				147	status = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST,
				148	gva_n, 0, flush, NULL);
				149	}
				150	goto check_status;
				151
				152	do_ex_hypercall:
				153	status = hyperv_flush_tlb_others_ex(cpus, info);
				154
				155	check_status:
				156	local_irq_restore(flags);
				157
				158	if (!(status & HV_HYPERCALL_RESULT_MASK))
				159	return;
				160	do_native:
				161	native_flush_tlb_others(cpus, info);
				162	}
				163
				164	static u64 hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
				165	const struct flush_tlb_info *info)
				166	{
				167	int nr_bank = 0, max_gvas, gva_n;
				168	struct hv_tlb_flush_ex **flush_pcpu;
				169	struct hv_tlb_flush_ex *flush;
				170	u64 status;
				171
				172	if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED))
				173	return U64_MAX;
				174
				175	flush_pcpu = (struct hv_tlb_flush_ex **)
				176	this_cpu_ptr(hyperv_pcpu_input_arg);
				177
				178	flush = *flush_pcpu;
				179
				180	if (info->mm) {
				181	/*
				182	* AddressSpace argument must match the CR3 with PCID bits
				183	* stripped out.
				184	*/
				185	flush->address_space = virt_to_phys(info->mm->pgd);
				186	flush->address_space &= CR3_ADDR_MASK;
				187	flush->flags = 0;
				188	} else {
				189	flush->address_space = 0;
				190	flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
				191	}
				192
				193	flush->hv_vp_set.valid_bank_mask = 0;
				194
				195	flush->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
				196	nr_bank = cpumask_to_vpset(&(flush->hv_vp_set), cpus);
				197	if (nr_bank < 0)
				198	return U64_MAX;
				199
				200	/*
				201	* We can flush not more than max_gvas with one hypercall. Flush the
				202	* whole address space if we were asked to do more.
				203	*/
				204	max_gvas =
				205	(PAGE_SIZE - sizeof(flush) - nr_bank
				206	sizeof(flush->hv_vp_set.bank_contents[0])) /
				207	sizeof(flush->gva_list[0]);
				208
				209	if (info->end == TLB_FLUSH_ALL) {
				210	flush->flags \|= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
				211	status = hv_do_rep_hypercall(
				212	HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
				213	0, nr_bank, flush, NULL);
				214	} else if (info->end &&
				215	((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) {
				216	status = hv_do_rep_hypercall(
				217	HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
				218	0, nr_bank, flush, NULL);
				219	} else {
				220	gva_n = fill_gva_list(flush->gva_list, nr_bank,
				221	info->start, info->end);
				222	status = hv_do_rep_hypercall(
				223	HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX,
				224	gva_n, nr_bank, flush, NULL);
				225	}
				226
				227	return status;
				228	}
				229
				230	void hyperv_setup_mmu_ops(void)
				231	{
				232	if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED))
				233	return;
				234
				235	pr_info("Using hypercall for remote TLB flush\n");
				236	pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others;
				237	pv_mmu_ops.tlb_remove_table = tlb_remove_table;
				238	}