blob: 6024fafed1642d0c2a5d09143a55849dbdb69aa0 [file] [log] [blame]
b.liue9582032025-04-17 19:18:16 +08001// SPDX-License-Identifier: GPL-2.0
2
3#include <linux/acpi.h>
4#include <linux/cpu.h>
5#include <linux/kexec.h>
6#include <linux/memblock.h>
7
8#include <xen/features.h>
9#include <xen/events.h>
10#include <xen/interface/memory.h>
11
12#include <asm/cpu.h>
13#include <asm/smp.h>
14#include <asm/reboot.h>
15#include <asm/setup.h>
16#include <asm/hypervisor.h>
17#include <asm/e820/api.h>
18#include <asm/early_ioremap.h>
19
20#include <asm/xen/cpuid.h>
21#include <asm/xen/hypervisor.h>
22#include <asm/xen/page.h>
23
24#include "xen-ops.h"
25#include "mmu.h"
26#include "smp.h"
27
28static unsigned long shared_info_pfn;
29
30void xen_hvm_init_shared_info(void)
31{
32 struct xen_add_to_physmap xatp;
33
34 xatp.domid = DOMID_SELF;
35 xatp.idx = 0;
36 xatp.space = XENMAPSPACE_shared_info;
37 xatp.gpfn = shared_info_pfn;
38 if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
39 BUG();
40}
41
42static void __init reserve_shared_info(void)
43{
44 u64 pa;
45
46 /*
47 * Search for a free page starting at 4kB physical address.
48 * Low memory is preferred to avoid an EPT large page split up
49 * by the mapping.
50 * Starting below X86_RESERVE_LOW (usually 64kB) is fine as
51 * the BIOS used for HVM guests is well behaved and won't
52 * clobber memory other than the first 4kB.
53 */
54 for (pa = PAGE_SIZE;
55 !e820__mapped_all(pa, pa + PAGE_SIZE, E820_TYPE_RAM) ||
56 memblock_is_reserved(pa);
57 pa += PAGE_SIZE)
58 ;
59
60 shared_info_pfn = PHYS_PFN(pa);
61
62 memblock_reserve(pa, PAGE_SIZE);
63 HYPERVISOR_shared_info = early_memremap(pa, PAGE_SIZE);
64}
65
66static void __init xen_hvm_init_mem_mapping(void)
67{
68 early_memunmap(HYPERVISOR_shared_info, PAGE_SIZE);
69 HYPERVISOR_shared_info = __va(PFN_PHYS(shared_info_pfn));
70
71 /*
72 * The virtual address of the shared_info page has changed, so
73 * the vcpu_info pointer for VCPU 0 is now stale.
74 *
75 * The prepare_boot_cpu callback will re-initialize it via
76 * xen_vcpu_setup, but we can't rely on that to be called for
77 * old Xen versions (xen_have_vector_callback == 0).
78 *
79 * It is, in any case, bad to have a stale vcpu_info pointer
80 * so reset it now.
81 */
82 xen_vcpu_info_reset(0);
83}
84
85static void __init init_hvm_pv_info(void)
86{
87 int major, minor;
88 uint32_t eax, ebx, ecx, edx, base;
89
90 base = xen_cpuid_base();
91 eax = cpuid_eax(base + 1);
92
93 major = eax >> 16;
94 minor = eax & 0xffff;
95 printk(KERN_INFO "Xen version %d.%d.\n", major, minor);
96
97 xen_domain_type = XEN_HVM_DOMAIN;
98
99 /* PVH set up hypercall page in xen_prepare_pvh(). */
100 if (xen_pvh_domain())
101 pv_info.name = "Xen PVH";
102 else {
103 u64 pfn;
104 uint32_t msr;
105
106 pv_info.name = "Xen HVM";
107 msr = cpuid_ebx(base + 2);
108 pfn = __pa(hypercall_page);
109 wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
110 }
111
112 xen_setup_features();
113
114 cpuid(base + 4, &eax, &ebx, &ecx, &edx);
115 if (eax & XEN_HVM_CPUID_VCPU_ID_PRESENT)
116 this_cpu_write(xen_vcpu_id, ebx);
117 else
118 this_cpu_write(xen_vcpu_id, smp_processor_id());
119}
120
121#ifdef CONFIG_KEXEC_CORE
122static void xen_hvm_shutdown(void)
123{
124 native_machine_shutdown();
125 if (kexec_in_progress)
126 xen_reboot(SHUTDOWN_soft_reset);
127}
128
129static void xen_hvm_crash_shutdown(struct pt_regs *regs)
130{
131 native_machine_crash_shutdown(regs);
132 xen_reboot(SHUTDOWN_soft_reset);
133}
134#endif
135
136static int xen_cpu_up_prepare_hvm(unsigned int cpu)
137{
138 int rc = 0;
139
140 /*
141 * This can happen if CPU was offlined earlier and
142 * offlining timed out in common_cpu_die().
143 */
144 if (cpu_report_state(cpu) == CPU_DEAD_FROZEN) {
145 xen_smp_intr_free(cpu);
146 xen_uninit_lock_cpu(cpu);
147 }
148
149 if (cpu_acpi_id(cpu) != U32_MAX)
150 per_cpu(xen_vcpu_id, cpu) = cpu_acpi_id(cpu);
151 else
152 per_cpu(xen_vcpu_id, cpu) = cpu;
153 rc = xen_vcpu_setup(cpu);
154 if (rc)
155 return rc;
156
157 if (xen_have_vector_callback && xen_feature(XENFEAT_hvm_safe_pvclock))
158 xen_setup_timer(cpu);
159
160 rc = xen_smp_intr_init(cpu);
161 if (rc) {
162 WARN(1, "xen_smp_intr_init() for CPU %d failed: %d\n",
163 cpu, rc);
164 }
165 return rc;
166}
167
168static int xen_cpu_dead_hvm(unsigned int cpu)
169{
170 xen_smp_intr_free(cpu);
171
172 if (xen_have_vector_callback && xen_feature(XENFEAT_hvm_safe_pvclock))
173 xen_teardown_timer(cpu);
174
175 return 0;
176}
177
178static bool no_vector_callback __initdata;
179
180static void __init xen_hvm_guest_init(void)
181{
182 if (xen_pv_domain())
183 return;
184
185 init_hvm_pv_info();
186
187 reserve_shared_info();
188 xen_hvm_init_shared_info();
189
190 /*
191 * xen_vcpu is a pointer to the vcpu_info struct in the shared_info
192 * page, we use it in the event channel upcall and in some pvclock
193 * related functions.
194 */
195 xen_vcpu_info_reset(0);
196
197 xen_panic_handler_init();
198
199 if (!no_vector_callback && xen_feature(XENFEAT_hvm_callback_vector))
200 xen_have_vector_callback = 1;
201
202 xen_hvm_smp_init();
203 WARN_ON(xen_cpuhp_setup(xen_cpu_up_prepare_hvm, xen_cpu_dead_hvm));
204 xen_unplug_emulated_devices();
205 x86_init.irqs.intr_init = xen_init_IRQ;
206 xen_hvm_init_time_ops();
207 xen_hvm_init_mmu_ops();
208
209#ifdef CONFIG_KEXEC_CORE
210 machine_ops.shutdown = xen_hvm_shutdown;
211 machine_ops.crash_shutdown = xen_hvm_crash_shutdown;
212#endif
213}
214
215static __init int xen_parse_nopv(char *arg)
216{
217 pr_notice("\"xen_nopv\" is deprecated, please use \"nopv\" instead\n");
218
219 if (xen_cpuid_base())
220 nopv = true;
221 return 0;
222}
223early_param("xen_nopv", xen_parse_nopv);
224
225static __init int xen_parse_no_vector_callback(char *arg)
226{
227 no_vector_callback = true;
228 return 0;
229}
230early_param("xen_no_vector_callback", xen_parse_no_vector_callback);
231
232bool __init xen_hvm_need_lapic(void)
233{
234 if (xen_pv_domain())
235 return false;
236 if (!xen_hvm_domain())
237 return false;
238 if (xen_feature(XENFEAT_hvm_pirqs) && xen_have_vector_callback)
239 return false;
240 return true;
241}
242
243static __init void xen_hvm_guest_late_init(void)
244{
245#ifdef CONFIG_XEN_PVH
246 /* Test for PVH domain (PVH boot path taken overrides ACPI flags). */
247 if (!xen_pvh &&
248 (x86_platform.legacy.rtc || !x86_platform.legacy.no_vga))
249 return;
250
251 /* PVH detected. */
252 xen_pvh = true;
253
254 if (nopv)
255 panic("\"nopv\" and \"xen_nopv\" parameters are unsupported in PVH guest.");
256
257 /* Make sure we don't fall back to (default) ACPI_IRQ_MODEL_PIC. */
258 if (!nr_ioapics && acpi_irq_model == ACPI_IRQ_MODEL_PIC)
259 acpi_irq_model = ACPI_IRQ_MODEL_PLATFORM;
260
261 machine_ops.emergency_restart = xen_emergency_restart;
262 pv_info.name = "Xen PVH";
263#endif
264}
265
266static uint32_t __init xen_platform_hvm(void)
267{
268 uint32_t xen_domain = xen_cpuid_base();
269 struct x86_hyper_init *h = &x86_hyper_xen_hvm.init;
270
271 if (xen_pv_domain())
272 return 0;
273
274 if (xen_pvh_domain() && nopv) {
275 /* Guest booting via the Xen-PVH boot entry goes here */
276 pr_info("\"nopv\" parameter is ignored in PVH guest\n");
277 nopv = false;
278 } else if (nopv && xen_domain) {
279 /*
280 * Guest booting via normal boot entry (like via grub2) goes
281 * here.
282 *
283 * Use interface functions for bare hardware if nopv,
284 * xen_hvm_guest_late_init is an exception as we need to
285 * detect PVH and panic there.
286 */
287 h->init_platform = x86_init_noop;
288 h->x2apic_available = bool_x86_init_noop;
289 h->init_mem_mapping = x86_init_noop;
290 h->init_after_bootmem = x86_init_noop;
291 h->guest_late_init = xen_hvm_guest_late_init;
292 x86_hyper_xen_hvm.runtime.pin_vcpu = x86_op_int_noop;
293 }
294 return xen_domain;
295}
296
297struct hypervisor_x86 x86_hyper_xen_hvm __initdata = {
298 .name = "Xen HVM",
299 .detect = xen_platform_hvm,
300 .type = X86_HYPER_XEN_HVM,
301 .init.init_platform = xen_hvm_guest_init,
302 .init.x2apic_available = xen_x2apic_para_available,
303 .init.init_mem_mapping = xen_hvm_init_mem_mapping,
304 .init.guest_late_init = xen_hvm_guest_late_init,
305 .runtime.pin_vcpu = xen_pin_vcpu,
306 .ignore_nopv = true,
307};