blob: c7912fc072b10b05293250172c125c707d7a1fed [file] [log] [blame]
b.liue9582032025-04-17 19:18:16 +08001// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Local APIC handling, local APIC timers
4 *
5 * (c) 1999, 2000, 2009 Ingo Molnar <mingo@redhat.com>
6 *
7 * Fixes
8 * Maciej W. Rozycki : Bits for genuine 82489DX APICs;
9 * thanks to Eric Gilmore
10 * and Rolf G. Tews
11 * for testing these extensively.
12 * Maciej W. Rozycki : Various updates and fixes.
13 * Mikael Pettersson : Power Management for UP-APIC.
14 * Pavel Machek and
15 * Mikael Pettersson : PM converted to driver model.
16 */
17
18#include <linux/perf_event.h>
19#include <linux/kernel_stat.h>
20#include <linux/mc146818rtc.h>
21#include <linux/acpi_pmtmr.h>
22#include <linux/clockchips.h>
23#include <linux/interrupt.h>
24#include <linux/memblock.h>
25#include <linux/ftrace.h>
26#include <linux/ioport.h>
27#include <linux/export.h>
28#include <linux/syscore_ops.h>
29#include <linux/delay.h>
30#include <linux/timex.h>
31#include <linux/i8253.h>
32#include <linux/dmar.h>
33#include <linux/init.h>
34#include <linux/cpu.h>
35#include <linux/dmi.h>
36#include <linux/smp.h>
37#include <linux/mm.h>
38
39#include <asm/trace/irq_vectors.h>
40#include <asm/irq_remapping.h>
41#include <asm/perf_event.h>
42#include <asm/x86_init.h>
43#include <asm/pgalloc.h>
44#include <linux/atomic.h>
45#include <asm/barrier.h>
46#include <asm/mpspec.h>
47#include <asm/i8259.h>
48#include <asm/proto.h>
49#include <asm/traps.h>
50#include <asm/apic.h>
51#include <asm/io_apic.h>
52#include <asm/desc.h>
53#include <asm/hpet.h>
54#include <asm/mtrr.h>
55#include <asm/time.h>
56#include <asm/smp.h>
57#include <asm/mce.h>
58#include <asm/tsc.h>
59#include <asm/hypervisor.h>
60#include <asm/cpu_device_id.h>
61#include <asm/intel-family.h>
62#include <asm/irq_regs.h>
63
64unsigned int num_processors;
65
66unsigned disabled_cpus;
67
68/* Processor that is doing the boot up */
69unsigned int boot_cpu_physical_apicid __ro_after_init = -1U;
70EXPORT_SYMBOL_GPL(boot_cpu_physical_apicid);
71
72u8 boot_cpu_apic_version __ro_after_init;
73
74/*
75 * The highest APIC ID seen during enumeration.
76 */
77static unsigned int max_physical_apicid;
78
79/*
80 * Bitmask of physically existing CPUs:
81 */
82physid_mask_t phys_cpu_present_map;
83
84/*
85 * Processor to be disabled specified by kernel parameter
86 * disable_cpu_apicid=<int>, mostly used for the kdump 2nd kernel to
87 * avoid undefined behaviour caused by sending INIT from AP to BSP.
88 */
89static unsigned int disabled_cpu_apicid __ro_after_init = BAD_APICID;
90
91/*
92 * This variable controls which CPUs receive external NMIs. By default,
93 * external NMIs are delivered only to the BSP.
94 */
95static int apic_extnmi __ro_after_init = APIC_EXTNMI_BSP;
96
97/*
98 * Map cpu index to physical APIC ID
99 */
100DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid, BAD_APICID);
101DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid, BAD_APICID);
102DEFINE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid, U32_MAX);
103EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
104EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
105EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_acpiid);
106
107#ifdef CONFIG_X86_32
108
109/*
110 * On x86_32, the mapping between cpu and logical apicid may vary
111 * depending on apic in use. The following early percpu variable is
112 * used for the mapping. This is where the behaviors of x86_64 and 32
113 * actually diverge. Let's keep it ugly for now.
114 */
115DEFINE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid, BAD_APICID);
116
117/* Local APIC was disabled by the BIOS and enabled by the kernel */
118static int enabled_via_apicbase __ro_after_init;
119
120/*
121 * Handle interrupt mode configuration register (IMCR).
122 * This register controls whether the interrupt signals
123 * that reach the BSP come from the master PIC or from the
124 * local APIC. Before entering Symmetric I/O Mode, either
125 * the BIOS or the operating system must switch out of
126 * PIC Mode by changing the IMCR.
127 */
128static inline void imcr_pic_to_apic(void)
129{
130 /* select IMCR register */
131 outb(0x70, 0x22);
132 /* NMI and 8259 INTR go through APIC */
133 outb(0x01, 0x23);
134}
135
136static inline void imcr_apic_to_pic(void)
137{
138 /* select IMCR register */
139 outb(0x70, 0x22);
140 /* NMI and 8259 INTR go directly to BSP */
141 outb(0x00, 0x23);
142}
143#endif
144
145/*
146 * Knob to control our willingness to enable the local APIC.
147 *
148 * +1=force-enable
149 */
150static int force_enable_local_apic __initdata;
151
152/*
153 * APIC command line parameters
154 */
155static int __init parse_lapic(char *arg)
156{
157 if (IS_ENABLED(CONFIG_X86_32) && !arg)
158 force_enable_local_apic = 1;
159 else if (arg && !strncmp(arg, "notscdeadline", 13))
160 setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
161 return 0;
162}
163early_param("lapic", parse_lapic);
164
165#ifdef CONFIG_X86_64
166static int apic_calibrate_pmtmr __initdata;
167static __init int setup_apicpmtimer(char *s)
168{
169 apic_calibrate_pmtmr = 1;
170 notsc_setup(NULL);
171 return 1;
172}
173__setup("apicpmtimer", setup_apicpmtimer);
174#endif
175
176unsigned long mp_lapic_addr __ro_after_init;
177int disable_apic __ro_after_init;
178/* Disable local APIC timer from the kernel commandline or via dmi quirk */
179static int disable_apic_timer __initdata;
180/* Local APIC timer works in C2 */
181int local_apic_timer_c2_ok __ro_after_init;
182EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
183
184/*
185 * Debug level, exported for io_apic.c
186 */
187int apic_verbosity __ro_after_init;
188
189int pic_mode __ro_after_init;
190
191/* Have we found an MP table */
192int smp_found_config __ro_after_init;
193
194static struct resource lapic_resource = {
195 .name = "Local APIC",
196 .flags = IORESOURCE_MEM | IORESOURCE_BUSY,
197};
198
199unsigned int lapic_timer_period = 0;
200
201static void apic_pm_activate(void);
202
203static unsigned long apic_phys __ro_after_init;
204
205/*
206 * Get the LAPIC version
207 */
208static inline int lapic_get_version(void)
209{
210 return GET_APIC_VERSION(apic_read(APIC_LVR));
211}
212
213/*
214 * Check, if the APIC is integrated or a separate chip
215 */
216static inline int lapic_is_integrated(void)
217{
218 return APIC_INTEGRATED(lapic_get_version());
219}
220
221/*
222 * Check, whether this is a modern or a first generation APIC
223 */
224static int modern_apic(void)
225{
226 /* AMD systems use old APIC versions, so check the CPU */
227 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
228 boot_cpu_data.x86 >= 0xf)
229 return 1;
230
231 /* Hygon systems use modern APIC */
232 if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
233 return 1;
234
235 return lapic_get_version() >= 0x14;
236}
237
238/*
239 * right after this call apic become NOOP driven
240 * so apic->write/read doesn't do anything
241 */
242static void __init apic_disable(void)
243{
244 pr_info("APIC: switched to apic NOOP\n");
245 apic = &apic_noop;
246}
247
248void native_apic_wait_icr_idle(void)
249{
250 while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
251 cpu_relax();
252}
253
254u32 native_safe_apic_wait_icr_idle(void)
255{
256 u32 send_status;
257 int timeout;
258
259 timeout = 0;
260 do {
261 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
262 if (!send_status)
263 break;
264 inc_irq_stat(icr_read_retry_count);
265 udelay(100);
266 } while (timeout++ < 1000);
267
268 return send_status;
269}
270
271void native_apic_icr_write(u32 low, u32 id)
272{
273 unsigned long flags;
274
275 local_irq_save(flags);
276 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id));
277 apic_write(APIC_ICR, low);
278 local_irq_restore(flags);
279}
280
281u64 native_apic_icr_read(void)
282{
283 u32 icr1, icr2;
284
285 icr2 = apic_read(APIC_ICR2);
286 icr1 = apic_read(APIC_ICR);
287
288 return icr1 | ((u64)icr2 << 32);
289}
290
291#ifdef CONFIG_X86_32
292/**
293 * get_physical_broadcast - Get number of physical broadcast IDs
294 */
295int get_physical_broadcast(void)
296{
297 return modern_apic() ? 0xff : 0xf;
298}
299#endif
300
301/**
302 * lapic_get_maxlvt - get the maximum number of local vector table entries
303 */
304int lapic_get_maxlvt(void)
305{
306 /*
307 * - we always have APIC integrated on 64bit mode
308 * - 82489DXs do not report # of LVT entries
309 */
310 return lapic_is_integrated() ? GET_APIC_MAXLVT(apic_read(APIC_LVR)) : 2;
311}
312
313/*
314 * Local APIC timer
315 */
316
317/* Clock divisor */
318#define APIC_DIVISOR 16
319#define TSC_DIVISOR 8
320
321/*
322 * This function sets up the local APIC timer, with a timeout of
323 * 'clocks' APIC bus clock. During calibration we actually call
324 * this function twice on the boot CPU, once with a bogus timeout
325 * value, second time for real. The other (noncalibrating) CPUs
326 * call this function only once, with the real, calibrated value.
327 *
328 * We do reads before writes even if unnecessary, to get around the
329 * P5 APIC double write bug.
330 */
331static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
332{
333 unsigned int lvtt_value, tmp_value;
334
335 lvtt_value = LOCAL_TIMER_VECTOR;
336 if (!oneshot)
337 lvtt_value |= APIC_LVT_TIMER_PERIODIC;
338 else if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
339 lvtt_value |= APIC_LVT_TIMER_TSCDEADLINE;
340
341 if (!lapic_is_integrated())
342 lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV);
343
344 if (!irqen)
345 lvtt_value |= APIC_LVT_MASKED;
346
347 apic_write(APIC_LVTT, lvtt_value);
348
349 if (lvtt_value & APIC_LVT_TIMER_TSCDEADLINE) {
350 /*
351 * See Intel SDM: TSC-Deadline Mode chapter. In xAPIC mode,
352 * writing to the APIC LVTT and TSC_DEADLINE MSR isn't serialized.
353 * According to Intel, MFENCE can do the serialization here.
354 */
355 asm volatile("mfence" : : : "memory");
356 return;
357 }
358
359 /*
360 * Divide PICLK by 16
361 */
362 tmp_value = apic_read(APIC_TDCR);
363 apic_write(APIC_TDCR,
364 (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) |
365 APIC_TDR_DIV_16);
366
367 if (!oneshot)
368 apic_write(APIC_TMICT, clocks / APIC_DIVISOR);
369}
370
371/*
372 * Setup extended LVT, AMD specific
373 *
374 * Software should use the LVT offsets the BIOS provides. The offsets
375 * are determined by the subsystems using it like those for MCE
376 * threshold or IBS. On K8 only offset 0 (APIC500) and MCE interrupts
377 * are supported. Beginning with family 10h at least 4 offsets are
378 * available.
379 *
380 * Since the offsets must be consistent for all cores, we keep track
381 * of the LVT offsets in software and reserve the offset for the same
382 * vector also to be used on other cores. An offset is freed by
383 * setting the entry to APIC_EILVT_MASKED.
384 *
385 * If the BIOS is right, there should be no conflicts. Otherwise a
386 * "[Firmware Bug]: ..." error message is generated. However, if
387 * software does not properly determines the offsets, it is not
388 * necessarily a BIOS bug.
389 */
390
391static atomic_t eilvt_offsets[APIC_EILVT_NR_MAX];
392
393static inline int eilvt_entry_is_changeable(unsigned int old, unsigned int new)
394{
395 return (old & APIC_EILVT_MASKED)
396 || (new == APIC_EILVT_MASKED)
397 || ((new & ~APIC_EILVT_MASKED) == old);
398}
399
400static unsigned int reserve_eilvt_offset(int offset, unsigned int new)
401{
402 unsigned int rsvd, vector;
403
404 if (offset >= APIC_EILVT_NR_MAX)
405 return ~0;
406
407 rsvd = atomic_read(&eilvt_offsets[offset]);
408 do {
409 vector = rsvd & ~APIC_EILVT_MASKED; /* 0: unassigned */
410 if (vector && !eilvt_entry_is_changeable(vector, new))
411 /* may not change if vectors are different */
412 return rsvd;
413 } while (!atomic_try_cmpxchg(&eilvt_offsets[offset], &rsvd, new));
414
415 rsvd = new & ~APIC_EILVT_MASKED;
416 if (rsvd && rsvd != vector)
417 pr_info("LVT offset %d assigned for vector 0x%02x\n",
418 offset, rsvd);
419
420 return new;
421}
422
423/*
424 * If mask=1, the LVT entry does not generate interrupts while mask=0
425 * enables the vector. See also the BKDGs. Must be called with
426 * preemption disabled.
427 */
428
429int setup_APIC_eilvt(u8 offset, u8 vector, u8 msg_type, u8 mask)
430{
431 unsigned long reg = APIC_EILVTn(offset);
432 unsigned int new, old, reserved;
433
434 new = (mask << 16) | (msg_type << 8) | vector;
435 old = apic_read(reg);
436 reserved = reserve_eilvt_offset(offset, new);
437
438 if (reserved != new) {
439 pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for "
440 "vector 0x%x, but the register is already in use for "
441 "vector 0x%x on another cpu\n",
442 smp_processor_id(), reg, offset, new, reserved);
443 return -EINVAL;
444 }
445
446 if (!eilvt_entry_is_changeable(old, new)) {
447 pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for "
448 "vector 0x%x, but the register is already in use for "
449 "vector 0x%x on this cpu\n",
450 smp_processor_id(), reg, offset, new, old);
451 return -EBUSY;
452 }
453
454 apic_write(reg, new);
455
456 return 0;
457}
458EXPORT_SYMBOL_GPL(setup_APIC_eilvt);
459
460/*
461 * Program the next event, relative to now
462 */
463static int lapic_next_event(unsigned long delta,
464 struct clock_event_device *evt)
465{
466 apic_write(APIC_TMICT, delta);
467 return 0;
468}
469
470static int lapic_next_deadline(unsigned long delta,
471 struct clock_event_device *evt)
472{
473 u64 tsc;
474
475 /* This MSR is special and need a special fence: */
476 weak_wrmsr_fence();
477
478 tsc = rdtsc();
479 wrmsrl(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR));
480 return 0;
481}
482
483static int lapic_timer_shutdown(struct clock_event_device *evt)
484{
485 unsigned int v;
486
487 /* Lapic used as dummy for broadcast ? */
488 if (evt->features & CLOCK_EVT_FEAT_DUMMY)
489 return 0;
490
491 v = apic_read(APIC_LVTT);
492 v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
493 apic_write(APIC_LVTT, v);
494
495 /*
496 * Setting APIC_LVT_MASKED (above) should be enough to tell
497 * the hardware that this timer will never fire. But AMD
498 * erratum 411 and some Intel CPU behavior circa 2024 say
499 * otherwise. Time for belt and suspenders programming: mask
500 * the timer _and_ zero the counter registers:
501 */
502 if (v & APIC_LVT_TIMER_TSCDEADLINE)
503 wrmsrl(MSR_IA32_TSC_DEADLINE, 0);
504 else
505 apic_write(APIC_TMICT, 0);
506
507 return 0;
508}
509
510static inline int
511lapic_timer_set_periodic_oneshot(struct clock_event_device *evt, bool oneshot)
512{
513 /* Lapic used as dummy for broadcast ? */
514 if (evt->features & CLOCK_EVT_FEAT_DUMMY)
515 return 0;
516
517 __setup_APIC_LVTT(lapic_timer_period, oneshot, 1);
518 return 0;
519}
520
521static int lapic_timer_set_periodic(struct clock_event_device *evt)
522{
523 return lapic_timer_set_periodic_oneshot(evt, false);
524}
525
526static int lapic_timer_set_oneshot(struct clock_event_device *evt)
527{
528 return lapic_timer_set_periodic_oneshot(evt, true);
529}
530
531/*
532 * Local APIC timer broadcast function
533 */
534static void lapic_timer_broadcast(const struct cpumask *mask)
535{
536#ifdef CONFIG_SMP
537 apic->send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
538#endif
539}
540
541
542/*
543 * The local apic timer can be used for any function which is CPU local.
544 */
545static struct clock_event_device lapic_clockevent = {
546 .name = "lapic",
547 .features = CLOCK_EVT_FEAT_PERIODIC |
548 CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP
549 | CLOCK_EVT_FEAT_DUMMY,
550 .shift = 32,
551 .set_state_shutdown = lapic_timer_shutdown,
552 .set_state_periodic = lapic_timer_set_periodic,
553 .set_state_oneshot = lapic_timer_set_oneshot,
554 .set_state_oneshot_stopped = lapic_timer_shutdown,
555 .set_next_event = lapic_next_event,
556 .broadcast = lapic_timer_broadcast,
557 .rating = 100,
558 .irq = -1,
559};
560static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
561
562#define DEADLINE_MODEL_MATCH_FUNC(model, func) \
563 { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&func }
564
565#define DEADLINE_MODEL_MATCH_REV(model, rev) \
566 { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)rev }
567
568static __init u32 hsx_deadline_rev(void)
569{
570 switch (boot_cpu_data.x86_stepping) {
571 case 0x02: return 0x3a; /* EP */
572 case 0x04: return 0x0f; /* EX */
573 }
574
575 return ~0U;
576}
577
578static __init u32 bdx_deadline_rev(void)
579{
580 switch (boot_cpu_data.x86_stepping) {
581 case 0x02: return 0x00000011;
582 case 0x03: return 0x0700000e;
583 case 0x04: return 0x0f00000c;
584 case 0x05: return 0x0e000003;
585 }
586
587 return ~0U;
588}
589
590static __init u32 skx_deadline_rev(void)
591{
592 switch (boot_cpu_data.x86_stepping) {
593 case 0x03: return 0x01000136;
594 case 0x04: return 0x02000014;
595 }
596
597 if (boot_cpu_data.x86_stepping > 4)
598 return 0;
599
600 return ~0U;
601}
602
603static const struct x86_cpu_id deadline_match[] __initconst = {
604 DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_HASWELL_X, hsx_deadline_rev),
605 DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_X, 0x0b000020),
606 DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_BROADWELL_D, bdx_deadline_rev),
607 DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_SKYLAKE_X, skx_deadline_rev),
608
609 DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL, 0x22),
610 DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_L, 0x20),
611 DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_G, 0x17),
612
613 DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL, 0x25),
614 DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_G, 0x17),
615
616 DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_SKYLAKE_L, 0xb2),
617 DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_SKYLAKE, 0xb2),
618
619 DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_KABYLAKE_L, 0x52),
620 DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_KABYLAKE, 0x52),
621
622 {},
623};
624
625static __init bool apic_validate_deadline_timer(void)
626{
627 const struct x86_cpu_id *m;
628 u32 rev;
629
630 if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
631 return false;
632 if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
633 return true;
634
635 m = x86_match_cpu(deadline_match);
636 if (!m)
637 return true;
638
639 /*
640 * Function pointers will have the MSB set due to address layout,
641 * immediate revisions will not.
642 */
643 if ((long)m->driver_data < 0)
644 rev = ((u32 (*)(void))(m->driver_data))();
645 else
646 rev = (u32)m->driver_data;
647
648 if (boot_cpu_data.microcode >= rev)
649 return true;
650
651 setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
652 pr_err(FW_BUG "TSC_DEADLINE disabled due to Errata; "
653 "please update microcode to version: 0x%x (or later)\n", rev);
654 return false;
655}
656
657/*
658 * Setup the local APIC timer for this CPU. Copy the initialized values
659 * of the boot CPU and register the clock event in the framework.
660 */
661static void setup_APIC_timer(void)
662{
663 struct clock_event_device *levt = this_cpu_ptr(&lapic_events);
664
665 if (this_cpu_has(X86_FEATURE_ARAT)) {
666 lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP;
667 /* Make LAPIC timer preferrable over percpu HPET */
668 lapic_clockevent.rating = 150;
669 }
670
671 memcpy(levt, &lapic_clockevent, sizeof(*levt));
672 levt->cpumask = cpumask_of(smp_processor_id());
673
674 if (this_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) {
675 levt->name = "lapic-deadline";
676 levt->features &= ~(CLOCK_EVT_FEAT_PERIODIC |
677 CLOCK_EVT_FEAT_DUMMY);
678 levt->set_next_event = lapic_next_deadline;
679 clockevents_config_and_register(levt,
680 tsc_khz * (1000 / TSC_DIVISOR),
681 0xF, ~0UL);
682 } else
683 clockevents_register_device(levt);
684}
685
686/*
687 * Install the updated TSC frequency from recalibration at the TSC
688 * deadline clockevent devices.
689 */
690static void __lapic_update_tsc_freq(void *info)
691{
692 struct clock_event_device *levt = this_cpu_ptr(&lapic_events);
693
694 if (!this_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
695 return;
696
697 clockevents_update_freq(levt, tsc_khz * (1000 / TSC_DIVISOR));
698}
699
700void lapic_update_tsc_freq(void)
701{
702 /*
703 * The clockevent device's ->mult and ->shift can both be
704 * changed. In order to avoid races, schedule the frequency
705 * update code on each CPU.
706 */
707 on_each_cpu(__lapic_update_tsc_freq, NULL, 0);
708}
709
710/*
711 * In this functions we calibrate APIC bus clocks to the external timer.
712 *
713 * We want to do the calibration only once since we want to have local timer
714 * irqs syncron. CPUs connected by the same APIC bus have the very same bus
715 * frequency.
716 *
717 * This was previously done by reading the PIT/HPET and waiting for a wrap
718 * around to find out, that a tick has elapsed. I have a box, where the PIT
719 * readout is broken, so it never gets out of the wait loop again. This was
720 * also reported by others.
721 *
722 * Monitoring the jiffies value is inaccurate and the clockevents
723 * infrastructure allows us to do a simple substitution of the interrupt
724 * handler.
725 *
726 * The calibration routine also uses the pm_timer when possible, as the PIT
727 * happens to run way too slow (factor 2.3 on my VAIO CoreDuo, which goes
728 * back to normal later in the boot process).
729 */
730
731#define LAPIC_CAL_LOOPS (HZ/10)
732
733static __initdata int lapic_cal_loops = -1;
734static __initdata long lapic_cal_t1, lapic_cal_t2;
735static __initdata unsigned long long lapic_cal_tsc1, lapic_cal_tsc2;
736static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2;
737static __initdata unsigned long lapic_cal_j1, lapic_cal_j2;
738
739/*
740 * Temporary interrupt handler and polled calibration function.
741 */
742static void __init lapic_cal_handler(struct clock_event_device *dev)
743{
744 unsigned long long tsc = 0;
745 long tapic = apic_read(APIC_TMCCT);
746 unsigned long pm = acpi_pm_read_early();
747
748 if (boot_cpu_has(X86_FEATURE_TSC))
749 tsc = rdtsc();
750
751 switch (lapic_cal_loops++) {
752 case 0:
753 lapic_cal_t1 = tapic;
754 lapic_cal_tsc1 = tsc;
755 lapic_cal_pm1 = pm;
756 lapic_cal_j1 = jiffies;
757 break;
758
759 case LAPIC_CAL_LOOPS:
760 lapic_cal_t2 = tapic;
761 lapic_cal_tsc2 = tsc;
762 if (pm < lapic_cal_pm1)
763 pm += ACPI_PM_OVRRUN;
764 lapic_cal_pm2 = pm;
765 lapic_cal_j2 = jiffies;
766 break;
767 }
768}
769
770static int __init
771calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc)
772{
773 const long pm_100ms = PMTMR_TICKS_PER_SEC / 10;
774 const long pm_thresh = pm_100ms / 100;
775 unsigned long mult;
776 u64 res;
777
778#ifndef CONFIG_X86_PM_TIMER
779 return -1;
780#endif
781
782 apic_printk(APIC_VERBOSE, "... PM-Timer delta = %ld\n", deltapm);
783
784 /* Check, if the PM timer is available */
785 if (!deltapm)
786 return -1;
787
788 mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22);
789
790 if (deltapm > (pm_100ms - pm_thresh) &&
791 deltapm < (pm_100ms + pm_thresh)) {
792 apic_printk(APIC_VERBOSE, "... PM-Timer result ok\n");
793 return 0;
794 }
795
796 res = (((u64)deltapm) * mult) >> 22;
797 do_div(res, 1000000);
798 pr_warning("APIC calibration not consistent "
799 "with PM-Timer: %ldms instead of 100ms\n",(long)res);
800
801 /* Correct the lapic counter value */
802 res = (((u64)(*delta)) * pm_100ms);
803 do_div(res, deltapm);
804 pr_info("APIC delta adjusted to PM-Timer: "
805 "%lu (%ld)\n", (unsigned long)res, *delta);
806 *delta = (long)res;
807
808 /* Correct the tsc counter value */
809 if (boot_cpu_has(X86_FEATURE_TSC)) {
810 res = (((u64)(*deltatsc)) * pm_100ms);
811 do_div(res, deltapm);
812 apic_printk(APIC_VERBOSE, "TSC delta adjusted to "
813 "PM-Timer: %lu (%ld)\n",
814 (unsigned long)res, *deltatsc);
815 *deltatsc = (long)res;
816 }
817
818 return 0;
819}
820
821static int __init lapic_init_clockevent(void)
822{
823 if (!lapic_timer_period)
824 return -1;
825
826 /* Calculate the scaled math multiplication factor */
827 lapic_clockevent.mult = div_sc(lapic_timer_period/APIC_DIVISOR,
828 TICK_NSEC, lapic_clockevent.shift);
829 lapic_clockevent.max_delta_ns =
830 clockevent_delta2ns(0x7FFFFFFF, &lapic_clockevent);
831 lapic_clockevent.max_delta_ticks = 0x7FFFFFFF;
832 lapic_clockevent.min_delta_ns =
833 clockevent_delta2ns(0xF, &lapic_clockevent);
834 lapic_clockevent.min_delta_ticks = 0xF;
835
836 return 0;
837}
838
839bool __init apic_needs_pit(void)
840{
841 /*
842 * If the frequencies are not known, PIT is required for both TSC
843 * and apic timer calibration.
844 */
845 if (!tsc_khz || !cpu_khz)
846 return true;
847
848 /* Is there an APIC at all or is it disabled? */
849 if (!boot_cpu_has(X86_FEATURE_APIC) || disable_apic)
850 return true;
851
852 /*
853 * If interrupt delivery mode is legacy PIC or virtual wire without
854 * configuration, the local APIC timer wont be set up. Make sure
855 * that the PIT is initialized.
856 */
857 if (apic_intr_mode == APIC_PIC ||
858 apic_intr_mode == APIC_VIRTUAL_WIRE_NO_CONFIG)
859 return true;
860
861 /* Virt guests may lack ARAT, but still have DEADLINE */
862 if (!boot_cpu_has(X86_FEATURE_ARAT))
863 return true;
864
865 /* Deadline timer is based on TSC so no further PIT action required */
866 if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
867 return false;
868
869 /* APIC timer disabled? */
870 if (disable_apic_timer)
871 return true;
872 /*
873 * The APIC timer frequency is known already, no PIT calibration
874 * required. If unknown, let the PIT be initialized.
875 */
876 return lapic_timer_period == 0;
877}
878
879static int __init calibrate_APIC_clock(void)
880{
881 struct clock_event_device *levt = this_cpu_ptr(&lapic_events);
882 u64 tsc_perj = 0, tsc_start = 0;
883 unsigned long jif_start;
884 unsigned long deltaj;
885 long delta, deltatsc;
886 int pm_referenced = 0;
887
888 if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
889 return 0;
890
891 /*
892 * Check if lapic timer has already been calibrated by platform
893 * specific routine, such as tsc calibration code. If so just fill
894 * in the clockevent structure and return.
895 */
896 if (!lapic_init_clockevent()) {
897 apic_printk(APIC_VERBOSE, "lapic timer already calibrated %d\n",
898 lapic_timer_period);
899 /*
900 * Direct calibration methods must have an always running
901 * local APIC timer, no need for broadcast timer.
902 */
903 lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
904 return 0;
905 }
906
907 apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
908 "calibrating APIC timer ...\n");
909
910 /*
911 * There are platforms w/o global clockevent devices. Instead of
912 * making the calibration conditional on that, use a polling based
913 * approach everywhere.
914 */
915 local_irq_disable();
916
917 /*
918 * Setup the APIC counter to maximum. There is no way the lapic
919 * can underflow in the 100ms detection time frame
920 */
921 __setup_APIC_LVTT(0xffffffff, 0, 0);
922
923 /*
924 * Methods to terminate the calibration loop:
925 * 1) Global clockevent if available (jiffies)
926 * 2) TSC if available and frequency is known
927 */
928 jif_start = READ_ONCE(jiffies);
929
930 if (tsc_khz) {
931 tsc_start = rdtsc();
932 tsc_perj = div_u64((u64)tsc_khz * 1000, HZ);
933 }
934
935 /*
936 * Enable interrupts so the tick can fire, if a global
937 * clockevent device is available
938 */
939 local_irq_enable();
940
941 while (lapic_cal_loops <= LAPIC_CAL_LOOPS) {
942 /* Wait for a tick to elapse */
943 while (1) {
944 if (tsc_khz) {
945 u64 tsc_now = rdtsc();
946 if ((tsc_now - tsc_start) >= tsc_perj) {
947 tsc_start += tsc_perj;
948 break;
949 }
950 } else {
951 unsigned long jif_now = READ_ONCE(jiffies);
952
953 if (time_after(jif_now, jif_start)) {
954 jif_start = jif_now;
955 break;
956 }
957 }
958 cpu_relax();
959 }
960
961 /* Invoke the calibration routine */
962 local_irq_disable();
963 lapic_cal_handler(NULL);
964 local_irq_enable();
965 }
966
967 local_irq_disable();
968
969 /* Build delta t1-t2 as apic timer counts down */
970 delta = lapic_cal_t1 - lapic_cal_t2;
971 apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta);
972
973 deltatsc = (long)(lapic_cal_tsc2 - lapic_cal_tsc1);
974
975 /* we trust the PM based calibration if possible */
976 pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1,
977 &delta, &deltatsc);
978
979 lapic_timer_period = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS;
980 lapic_init_clockevent();
981
982 apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta);
983 apic_printk(APIC_VERBOSE, "..... mult: %u\n", lapic_clockevent.mult);
984 apic_printk(APIC_VERBOSE, "..... calibration result: %u\n",
985 lapic_timer_period);
986
987 if (boot_cpu_has(X86_FEATURE_TSC)) {
988 apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
989 "%ld.%04ld MHz.\n",
990 (deltatsc / LAPIC_CAL_LOOPS) / (1000000 / HZ),
991 (deltatsc / LAPIC_CAL_LOOPS) % (1000000 / HZ));
992 }
993
994 apic_printk(APIC_VERBOSE, "..... host bus clock speed is "
995 "%u.%04u MHz.\n",
996 lapic_timer_period / (1000000 / HZ),
997 lapic_timer_period % (1000000 / HZ));
998
999 /*
1000 * Do a sanity check on the APIC calibration result
1001 */
1002 if (lapic_timer_period < (1000000 / HZ)) {
1003 local_irq_enable();
1004 pr_warning("APIC frequency too slow, disabling apic timer\n");
1005 return -1;
1006 }
1007
1008 levt->features &= ~CLOCK_EVT_FEAT_DUMMY;
1009
1010 /*
1011 * PM timer calibration failed or not turned on so lets try APIC
1012 * timer based calibration, if a global clockevent device is
1013 * available.
1014 */
1015 if (!pm_referenced && global_clock_event) {
1016 apic_printk(APIC_VERBOSE, "... verify APIC timer\n");
1017
1018 /*
1019 * Setup the apic timer manually
1020 */
1021 levt->event_handler = lapic_cal_handler;
1022 lapic_timer_set_periodic(levt);
1023 lapic_cal_loops = -1;
1024
1025 /* Let the interrupts run */
1026 local_irq_enable();
1027
1028 while (lapic_cal_loops <= LAPIC_CAL_LOOPS)
1029 cpu_relax();
1030
1031 /* Stop the lapic timer */
1032 local_irq_disable();
1033 lapic_timer_shutdown(levt);
1034
1035 /* Jiffies delta */
1036 deltaj = lapic_cal_j2 - lapic_cal_j1;
1037 apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj);
1038
1039 /* Check, if the jiffies result is consistent */
1040 if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2)
1041 apic_printk(APIC_VERBOSE, "... jiffies result ok\n");
1042 else
1043 levt->features |= CLOCK_EVT_FEAT_DUMMY;
1044 }
1045 local_irq_enable();
1046
1047 if (levt->features & CLOCK_EVT_FEAT_DUMMY) {
1048 pr_warning("APIC timer disabled due to verification failure\n");
1049 return -1;
1050 }
1051
1052 return 0;
1053}
1054
1055/*
1056 * Setup the boot APIC
1057 *
1058 * Calibrate and verify the result.
1059 */
1060void __init setup_boot_APIC_clock(void)
1061{
1062 /*
1063 * The local apic timer can be disabled via the kernel
1064 * commandline or from the CPU detection code. Register the lapic
1065 * timer as a dummy clock event source on SMP systems, so the
1066 * broadcast mechanism is used. On UP systems simply ignore it.
1067 */
1068 if (disable_apic_timer) {
1069 pr_info("Disabling APIC timer\n");
1070 /* No broadcast on UP ! */
1071 if (num_possible_cpus() > 1) {
1072 lapic_clockevent.mult = 1;
1073 setup_APIC_timer();
1074 }
1075 return;
1076 }
1077
1078 if (calibrate_APIC_clock()) {
1079 /* No broadcast on UP ! */
1080 if (num_possible_cpus() > 1)
1081 setup_APIC_timer();
1082 return;
1083 }
1084
1085 /*
1086 * If nmi_watchdog is set to IO_APIC, we need the
1087 * PIT/HPET going. Otherwise register lapic as a dummy
1088 * device.
1089 */
1090 lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
1091
1092 /* Setup the lapic or request the broadcast */
1093 setup_APIC_timer();
1094 amd_e400_c1e_apic_setup();
1095}
1096
1097void setup_secondary_APIC_clock(void)
1098{
1099 setup_APIC_timer();
1100 amd_e400_c1e_apic_setup();
1101}
1102
1103/*
1104 * The guts of the apic timer interrupt
1105 */
1106static void local_apic_timer_interrupt(void)
1107{
1108 struct clock_event_device *evt = this_cpu_ptr(&lapic_events);
1109
1110 /*
1111 * Normally we should not be here till LAPIC has been initialized but
1112 * in some cases like kdump, its possible that there is a pending LAPIC
1113 * timer interrupt from previous kernel's context and is delivered in
1114 * new kernel the moment interrupts are enabled.
1115 *
1116 * Interrupts are enabled early and LAPIC is setup much later, hence
1117 * its possible that when we get here evt->event_handler is NULL.
1118 * Check for event_handler being NULL and discard the interrupt as
1119 * spurious.
1120 */
1121 if (!evt->event_handler) {
1122 pr_warning("Spurious LAPIC timer interrupt on cpu %d\n",
1123 smp_processor_id());
1124 /* Switch it off */
1125 lapic_timer_shutdown(evt);
1126 return;
1127 }
1128
1129 /*
1130 * the NMI deadlock-detector uses this.
1131 */
1132 inc_irq_stat(apic_timer_irqs);
1133
1134 evt->event_handler(evt);
1135}
1136
1137/*
1138 * Local APIC timer interrupt. This is the most natural way for doing
1139 * local interrupts, but local timer interrupts can be emulated by
1140 * broadcast interrupts too. [in case the hw doesn't support APIC timers]
1141 *
1142 * [ if a single-CPU system runs an SMP kernel then we call the local
1143 * interrupt as well. Thus we cannot inline the local irq ... ]
1144 */
1145__visible void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs)
1146{
1147 struct pt_regs *old_regs = set_irq_regs(regs);
1148
1149 /*
1150 * NOTE! We'd better ACK the irq immediately,
1151 * because timer handling can be slow.
1152 *
1153 * update_process_times() expects us to have done irq_enter().
1154 * Besides, if we don't timer interrupts ignore the global
1155 * interrupt lock, which is the WrongThing (tm) to do.
1156 */
1157 entering_ack_irq();
1158 trace_local_timer_entry(LOCAL_TIMER_VECTOR);
1159 local_apic_timer_interrupt();
1160 trace_local_timer_exit(LOCAL_TIMER_VECTOR);
1161 exiting_irq();
1162
1163 set_irq_regs(old_regs);
1164}
1165
1166int setup_profiling_timer(unsigned int multiplier)
1167{
1168 return -EINVAL;
1169}
1170
1171/*
1172 * Local APIC start and shutdown
1173 */
1174
1175/**
1176 * clear_local_APIC - shutdown the local APIC
1177 *
1178 * This is called, when a CPU is disabled and before rebooting, so the state of
1179 * the local APIC has no dangling leftovers. Also used to cleanout any BIOS
1180 * leftovers during boot.
1181 */
1182void clear_local_APIC(void)
1183{
1184 int maxlvt;
1185 u32 v;
1186
1187 /* APIC hasn't been mapped yet */
1188 if (!x2apic_mode && !apic_phys)
1189 return;
1190
1191 maxlvt = lapic_get_maxlvt();
1192 /*
1193 * Masking an LVT entry can trigger a local APIC error
1194 * if the vector is zero. Mask LVTERR first to prevent this.
1195 */
1196 if (maxlvt >= 3) {
1197 v = ERROR_APIC_VECTOR; /* any non-zero vector will do */
1198 apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
1199 }
1200 /*
1201 * Careful: we have to set masks only first to deassert
1202 * any level-triggered sources.
1203 */
1204 v = apic_read(APIC_LVTT);
1205 apic_write(APIC_LVTT, v | APIC_LVT_MASKED);
1206 v = apic_read(APIC_LVT0);
1207 apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
1208 v = apic_read(APIC_LVT1);
1209 apic_write(APIC_LVT1, v | APIC_LVT_MASKED);
1210 if (maxlvt >= 4) {
1211 v = apic_read(APIC_LVTPC);
1212 apic_write(APIC_LVTPC, v | APIC_LVT_MASKED);
1213 }
1214
1215 /* lets not touch this if we didn't frob it */
1216#ifdef CONFIG_X86_THERMAL_VECTOR
1217 if (maxlvt >= 5) {
1218 v = apic_read(APIC_LVTTHMR);
1219 apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED);
1220 }
1221#endif
1222#ifdef CONFIG_X86_MCE_INTEL
1223 if (maxlvt >= 6) {
1224 v = apic_read(APIC_LVTCMCI);
1225 if (!(v & APIC_LVT_MASKED))
1226 apic_write(APIC_LVTCMCI, v | APIC_LVT_MASKED);
1227 }
1228#endif
1229
1230 /*
1231 * Clean APIC state for other OSs:
1232 */
1233 apic_write(APIC_LVTT, APIC_LVT_MASKED);
1234 apic_write(APIC_LVT0, APIC_LVT_MASKED);
1235 apic_write(APIC_LVT1, APIC_LVT_MASKED);
1236 if (maxlvt >= 3)
1237 apic_write(APIC_LVTERR, APIC_LVT_MASKED);
1238 if (maxlvt >= 4)
1239 apic_write(APIC_LVTPC, APIC_LVT_MASKED);
1240
1241 /* Integrated APIC (!82489DX) ? */
1242 if (lapic_is_integrated()) {
1243 if (maxlvt > 3)
1244 /* Clear ESR due to Pentium errata 3AP and 11AP */
1245 apic_write(APIC_ESR, 0);
1246 apic_read(APIC_ESR);
1247 }
1248}
1249
1250/**
1251 * apic_soft_disable - Clears and software disables the local APIC on hotplug
1252 *
1253 * Contrary to disable_local_APIC() this does not touch the enable bit in
1254 * MSR_IA32_APICBASE. Clearing that bit on systems based on the 3 wire APIC
1255 * bus would require a hardware reset as the APIC would lose track of bus
1256 * arbitration. On systems with FSB delivery APICBASE could be disabled,
1257 * but it has to be guaranteed that no interrupt is sent to the APIC while
1258 * in that state and it's not clear from the SDM whether it still responds
1259 * to INIT/SIPI messages. Stay on the safe side and use software disable.
1260 */
1261void apic_soft_disable(void)
1262{
1263 u32 value;
1264
1265 clear_local_APIC();
1266
1267 /* Soft disable APIC (implies clearing of registers for 82489DX!). */
1268 value = apic_read(APIC_SPIV);
1269 value &= ~APIC_SPIV_APIC_ENABLED;
1270 apic_write(APIC_SPIV, value);
1271}
1272
1273/**
1274 * disable_local_APIC - clear and disable the local APIC
1275 */
1276void disable_local_APIC(void)
1277{
1278 /* APIC hasn't been mapped yet */
1279 if (!x2apic_mode && !apic_phys)
1280 return;
1281
1282 apic_soft_disable();
1283
1284#ifdef CONFIG_X86_32
1285 /*
1286 * When LAPIC was disabled by the BIOS and enabled by the kernel,
1287 * restore the disabled state.
1288 */
1289 if (enabled_via_apicbase) {
1290 unsigned int l, h;
1291
1292 rdmsr(MSR_IA32_APICBASE, l, h);
1293 l &= ~MSR_IA32_APICBASE_ENABLE;
1294 wrmsr(MSR_IA32_APICBASE, l, h);
1295 }
1296#endif
1297}
1298
1299/*
1300 * If Linux enabled the LAPIC against the BIOS default disable it down before
1301 * re-entering the BIOS on shutdown. Otherwise the BIOS may get confused and
1302 * not power-off. Additionally clear all LVT entries before disable_local_APIC
1303 * for the case where Linux didn't enable the LAPIC.
1304 */
1305void lapic_shutdown(void)
1306{
1307 unsigned long flags;
1308
1309 if (!boot_cpu_has(X86_FEATURE_APIC) && !apic_from_smp_config())
1310 return;
1311
1312 local_irq_save(flags);
1313
1314#ifdef CONFIG_X86_32
1315 if (!enabled_via_apicbase)
1316 clear_local_APIC();
1317 else
1318#endif
1319 disable_local_APIC();
1320
1321
1322 local_irq_restore(flags);
1323}
1324
1325/**
1326 * sync_Arb_IDs - synchronize APIC bus arbitration IDs
1327 */
1328void __init sync_Arb_IDs(void)
1329{
1330 /*
1331 * Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 And not
1332 * needed on AMD.
1333 */
1334 if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
1335 return;
1336
1337 /*
1338 * Wait for idle.
1339 */
1340 apic_wait_icr_idle();
1341
1342 apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n");
1343 apic_write(APIC_ICR, APIC_DEST_ALLINC |
1344 APIC_INT_LEVELTRIG | APIC_DM_INIT);
1345}
1346
1347enum apic_intr_mode_id apic_intr_mode __ro_after_init;
1348
1349static int __init __apic_intr_mode_select(void)
1350{
1351 /* Check kernel option */
1352 if (disable_apic) {
1353 pr_info("APIC disabled via kernel command line\n");
1354 return APIC_PIC;
1355 }
1356
1357 /* Check BIOS */
1358#ifdef CONFIG_X86_64
1359 /* On 64-bit, the APIC must be integrated, Check local APIC only */
1360 if (!boot_cpu_has(X86_FEATURE_APIC)) {
1361 disable_apic = 1;
1362 pr_info("APIC disabled by BIOS\n");
1363 return APIC_PIC;
1364 }
1365#else
1366 /* On 32-bit, the APIC may be integrated APIC or 82489DX */
1367
1368 /* Neither 82489DX nor integrated APIC ? */
1369 if (!boot_cpu_has(X86_FEATURE_APIC) && !smp_found_config) {
1370 disable_apic = 1;
1371 return APIC_PIC;
1372 }
1373
1374 /* If the BIOS pretends there is an integrated APIC ? */
1375 if (!boot_cpu_has(X86_FEATURE_APIC) &&
1376 APIC_INTEGRATED(boot_cpu_apic_version)) {
1377 disable_apic = 1;
1378 pr_err(FW_BUG "Local APIC %d not detected, force emulation\n",
1379 boot_cpu_physical_apicid);
1380 return APIC_PIC;
1381 }
1382#endif
1383
1384 /* Check MP table or ACPI MADT configuration */
1385 if (!smp_found_config) {
1386 disable_ioapic_support();
1387 if (!acpi_lapic) {
1388 pr_info("APIC: ACPI MADT or MP tables are not detected\n");
1389 return APIC_VIRTUAL_WIRE_NO_CONFIG;
1390 }
1391 return APIC_VIRTUAL_WIRE;
1392 }
1393
1394#ifdef CONFIG_SMP
1395 /* If SMP should be disabled, then really disable it! */
1396 if (!setup_max_cpus) {
1397 pr_info("APIC: SMP mode deactivated\n");
1398 return APIC_SYMMETRIC_IO_NO_ROUTING;
1399 }
1400
1401 if (read_apic_id() != boot_cpu_physical_apicid) {
1402 panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
1403 read_apic_id(), boot_cpu_physical_apicid);
1404 /* Or can we switch back to PIC here? */
1405 }
1406#endif
1407
1408 return APIC_SYMMETRIC_IO;
1409}
1410
1411/* Select the interrupt delivery mode for the BSP */
1412void __init apic_intr_mode_select(void)
1413{
1414 apic_intr_mode = __apic_intr_mode_select();
1415}
1416
1417/*
1418 * An initial setup of the virtual wire mode.
1419 */
1420void __init init_bsp_APIC(void)
1421{
1422 unsigned int value;
1423
1424 /*
1425 * Don't do the setup now if we have a SMP BIOS as the
1426 * through-I/O-APIC virtual wire mode might be active.
1427 */
1428 if (smp_found_config || !boot_cpu_has(X86_FEATURE_APIC))
1429 return;
1430
1431 /*
1432 * Do not trust the local APIC being empty at bootup.
1433 */
1434 clear_local_APIC();
1435
1436 /*
1437 * Enable APIC.
1438 */
1439 value = apic_read(APIC_SPIV);
1440 value &= ~APIC_VECTOR_MASK;
1441 value |= APIC_SPIV_APIC_ENABLED;
1442
1443#ifdef CONFIG_X86_32
1444 /* This bit is reserved on P4/Xeon and should be cleared */
1445 if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
1446 (boot_cpu_data.x86 == 15))
1447 value &= ~APIC_SPIV_FOCUS_DISABLED;
1448 else
1449#endif
1450 value |= APIC_SPIV_FOCUS_DISABLED;
1451 value |= SPURIOUS_APIC_VECTOR;
1452 apic_write(APIC_SPIV, value);
1453
1454 /*
1455 * Set up the virtual wire mode.
1456 */
1457 apic_write(APIC_LVT0, APIC_DM_EXTINT);
1458 value = APIC_DM_NMI;
1459 if (!lapic_is_integrated()) /* 82489DX */
1460 value |= APIC_LVT_LEVEL_TRIGGER;
1461 if (apic_extnmi == APIC_EXTNMI_NONE)
1462 value |= APIC_LVT_MASKED;
1463 apic_write(APIC_LVT1, value);
1464}
1465
1466static void __init apic_bsp_setup(bool upmode);
1467
1468/* Init the interrupt delivery mode for the BSP */
1469void __init apic_intr_mode_init(void)
1470{
1471 bool upmode = IS_ENABLED(CONFIG_UP_LATE_INIT);
1472
1473 switch (apic_intr_mode) {
1474 case APIC_PIC:
1475 pr_info("APIC: Keep in PIC mode(8259)\n");
1476 return;
1477 case APIC_VIRTUAL_WIRE:
1478 pr_info("APIC: Switch to virtual wire mode setup\n");
1479 default_setup_apic_routing();
1480 break;
1481 case APIC_VIRTUAL_WIRE_NO_CONFIG:
1482 pr_info("APIC: Switch to virtual wire mode setup with no configuration\n");
1483 upmode = true;
1484 default_setup_apic_routing();
1485 break;
1486 case APIC_SYMMETRIC_IO:
1487 pr_info("APIC: Switch to symmetric I/O mode setup\n");
1488 default_setup_apic_routing();
1489 break;
1490 case APIC_SYMMETRIC_IO_NO_ROUTING:
1491 pr_info("APIC: Switch to symmetric I/O mode setup in no SMP routine\n");
1492 break;
1493 }
1494
1495 apic_bsp_setup(upmode);
1496}
1497
1498static void lapic_setup_esr(void)
1499{
1500 unsigned int oldvalue, value, maxlvt;
1501
1502 if (!lapic_is_integrated()) {
1503 pr_info("No ESR for 82489DX.\n");
1504 return;
1505 }
1506
1507 if (apic->disable_esr) {
1508 /*
1509 * Something untraceable is creating bad interrupts on
1510 * secondary quads ... for the moment, just leave the
1511 * ESR disabled - we can't do anything useful with the
1512 * errors anyway - mbligh
1513 */
1514 pr_info("Leaving ESR disabled.\n");
1515 return;
1516 }
1517
1518 maxlvt = lapic_get_maxlvt();
1519 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
1520 apic_write(APIC_ESR, 0);
1521 oldvalue = apic_read(APIC_ESR);
1522
1523 /* enables sending errors */
1524 value = ERROR_APIC_VECTOR;
1525 apic_write(APIC_LVTERR, value);
1526
1527 /*
1528 * spec says clear errors after enabling vector.
1529 */
1530 if (maxlvt > 3)
1531 apic_write(APIC_ESR, 0);
1532 value = apic_read(APIC_ESR);
1533 if (value != oldvalue)
1534 apic_printk(APIC_VERBOSE, "ESR value before enabling "
1535 "vector: 0x%08x after: 0x%08x\n",
1536 oldvalue, value);
1537}
1538
1539#define APIC_IR_REGS APIC_ISR_NR
1540#define APIC_IR_BITS (APIC_IR_REGS * 32)
1541#define APIC_IR_MAPSIZE (APIC_IR_BITS / BITS_PER_LONG)
1542
1543union apic_ir {
1544 unsigned long map[APIC_IR_MAPSIZE];
1545 u32 regs[APIC_IR_REGS];
1546};
1547
1548static bool apic_check_and_ack(union apic_ir *irr, union apic_ir *isr)
1549{
1550 int i, bit;
1551
1552 /* Read the IRRs */
1553 for (i = 0; i < APIC_IR_REGS; i++)
1554 irr->regs[i] = apic_read(APIC_IRR + i * 0x10);
1555
1556 /* Read the ISRs */
1557 for (i = 0; i < APIC_IR_REGS; i++)
1558 isr->regs[i] = apic_read(APIC_ISR + i * 0x10);
1559
1560 /*
1561 * If the ISR map is not empty. ACK the APIC and run another round
1562 * to verify whether a pending IRR has been unblocked and turned
1563 * into a ISR.
1564 */
1565 if (!bitmap_empty(isr->map, APIC_IR_BITS)) {
1566 /*
1567 * There can be multiple ISR bits set when a high priority
1568 * interrupt preempted a lower priority one. Issue an ACK
1569 * per set bit.
1570 */
1571 for_each_set_bit(bit, isr->map, APIC_IR_BITS)
1572 ack_APIC_irq();
1573 return true;
1574 }
1575
1576 return !bitmap_empty(irr->map, APIC_IR_BITS);
1577}
1578
1579/*
1580 * After a crash, we no longer service the interrupts and a pending
1581 * interrupt from previous kernel might still have ISR bit set.
1582 *
1583 * Most probably by now the CPU has serviced that pending interrupt and it
1584 * might not have done the ack_APIC_irq() because it thought, interrupt
1585 * came from i8259 as ExtInt. LAPIC did not get EOI so it does not clear
1586 * the ISR bit and cpu thinks it has already serivced the interrupt. Hence
1587 * a vector might get locked. It was noticed for timer irq (vector
1588 * 0x31). Issue an extra EOI to clear ISR.
1589 *
1590 * If there are pending IRR bits they turn into ISR bits after a higher
1591 * priority ISR bit has been acked.
1592 */
1593static void apic_pending_intr_clear(void)
1594{
1595 union apic_ir irr, isr;
1596 unsigned int i;
1597
1598 /* 512 loops are way oversized and give the APIC a chance to obey. */
1599 for (i = 0; i < 512; i++) {
1600 if (!apic_check_and_ack(&irr, &isr))
1601 return;
1602 }
1603 /* Dump the IRR/ISR content if that failed */
1604 pr_warn("APIC: Stale IRR: %256pb ISR: %256pb\n", irr.map, isr.map);
1605}
1606
1607/**
1608 * setup_local_APIC - setup the local APIC
1609 *
1610 * Used to setup local APIC while initializing BSP or bringing up APs.
1611 * Always called with preemption disabled.
1612 */
1613static void setup_local_APIC(void)
1614{
1615 int cpu = smp_processor_id();
1616 unsigned int value;
1617
1618 if (disable_apic) {
1619 disable_ioapic_support();
1620 return;
1621 }
1622
1623 /*
1624 * If this comes from kexec/kcrash the APIC might be enabled in
1625 * SPIV. Soft disable it before doing further initialization.
1626 */
1627 value = apic_read(APIC_SPIV);
1628 value &= ~APIC_SPIV_APIC_ENABLED;
1629 apic_write(APIC_SPIV, value);
1630
1631#ifdef CONFIG_X86_32
1632 /* Pound the ESR really hard over the head with a big hammer - mbligh */
1633 if (lapic_is_integrated() && apic->disable_esr) {
1634 apic_write(APIC_ESR, 0);
1635 apic_write(APIC_ESR, 0);
1636 apic_write(APIC_ESR, 0);
1637 apic_write(APIC_ESR, 0);
1638 }
1639#endif
1640 /*
1641 * Double-check whether this APIC is really registered.
1642 * This is meaningless in clustered apic mode, so we skip it.
1643 */
1644 BUG_ON(!apic->apic_id_registered());
1645
1646 /*
1647 * Intel recommends to set DFR, LDR and TPR before enabling
1648 * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
1649 * document number 292116). So here it goes...
1650 */
1651 apic->init_apic_ldr();
1652
1653#ifdef CONFIG_X86_32
1654 if (apic->dest_logical) {
1655 int logical_apicid, ldr_apicid;
1656
1657 /*
1658 * APIC LDR is initialized. If logical_apicid mapping was
1659 * initialized during get_smp_config(), make sure it matches
1660 * the actual value.
1661 */
1662 logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
1663 ldr_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
1664 if (logical_apicid != BAD_APICID)
1665 WARN_ON(logical_apicid != ldr_apicid);
1666 /* Always use the value from LDR. */
1667 early_per_cpu(x86_cpu_to_logical_apicid, cpu) = ldr_apicid;
1668 }
1669#endif
1670
1671 /*
1672 * Set Task Priority to 'accept all except vectors 0-31'. An APIC
1673 * vector in the 16-31 range could be delivered if TPR == 0, but we
1674 * would think it's an exception and terrible things will happen. We
1675 * never change this later on.
1676 */
1677 value = apic_read(APIC_TASKPRI);
1678 value &= ~APIC_TPRI_MASK;
1679 value |= 0x10;
1680 apic_write(APIC_TASKPRI, value);
1681
1682 /* Clear eventually stale ISR/IRR bits */
1683 apic_pending_intr_clear();
1684
1685 /*
1686 * Now that we are all set up, enable the APIC
1687 */
1688 value = apic_read(APIC_SPIV);
1689 value &= ~APIC_VECTOR_MASK;
1690 /*
1691 * Enable APIC
1692 */
1693 value |= APIC_SPIV_APIC_ENABLED;
1694
1695#ifdef CONFIG_X86_32
1696 /*
1697 * Some unknown Intel IO/APIC (or APIC) errata is biting us with
1698 * certain networking cards. If high frequency interrupts are
1699 * happening on a particular IOAPIC pin, plus the IOAPIC routing
1700 * entry is masked/unmasked at a high rate as well then sooner or
1701 * later IOAPIC line gets 'stuck', no more interrupts are received
1702 * from the device. If focus CPU is disabled then the hang goes
1703 * away, oh well :-(
1704 *
1705 * [ This bug can be reproduced easily with a level-triggered
1706 * PCI Ne2000 networking cards and PII/PIII processors, dual
1707 * BX chipset. ]
1708 */
1709 /*
1710 * Actually disabling the focus CPU check just makes the hang less
1711 * frequent as it makes the interrupt distributon model be more
1712 * like LRU than MRU (the short-term load is more even across CPUs).
1713 */
1714
1715 /*
1716 * - enable focus processor (bit==0)
1717 * - 64bit mode always use processor focus
1718 * so no need to set it
1719 */
1720 value &= ~APIC_SPIV_FOCUS_DISABLED;
1721#endif
1722
1723 /*
1724 * Set spurious IRQ vector
1725 */
1726 value |= SPURIOUS_APIC_VECTOR;
1727 apic_write(APIC_SPIV, value);
1728
1729 perf_events_lapic_init();
1730
1731 /*
1732 * Set up LVT0, LVT1:
1733 *
1734 * set up through-local-APIC on the boot CPU's LINT0. This is not
1735 * strictly necessary in pure symmetric-IO mode, but sometimes
1736 * we delegate interrupts to the 8259A.
1737 */
1738 /*
1739 * TODO: set up through-local-APIC from through-I/O-APIC? --macro
1740 */
1741 value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
1742 if (!cpu && (pic_mode || !value || skip_ioapic_setup)) {
1743 value = APIC_DM_EXTINT;
1744 apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n", cpu);
1745 } else {
1746 value = APIC_DM_EXTINT | APIC_LVT_MASKED;
1747 apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n", cpu);
1748 }
1749 apic_write(APIC_LVT0, value);
1750
1751 /*
1752 * Only the BSP sees the LINT1 NMI signal by default. This can be
1753 * modified by apic_extnmi= boot option.
1754 */
1755 if ((!cpu && apic_extnmi != APIC_EXTNMI_NONE) ||
1756 apic_extnmi == APIC_EXTNMI_ALL)
1757 value = APIC_DM_NMI;
1758 else
1759 value = APIC_DM_NMI | APIC_LVT_MASKED;
1760
1761 /* Is 82489DX ? */
1762 if (!lapic_is_integrated())
1763 value |= APIC_LVT_LEVEL_TRIGGER;
1764 apic_write(APIC_LVT1, value);
1765
1766#ifdef CONFIG_X86_MCE_INTEL
1767 /* Recheck CMCI information after local APIC is up on CPU #0 */
1768 if (!cpu)
1769 cmci_recheck();
1770#endif
1771}
1772
1773static void end_local_APIC_setup(void)
1774{
1775 lapic_setup_esr();
1776
1777#ifdef CONFIG_X86_32
1778 {
1779 unsigned int value;
1780 /* Disable the local apic timer */
1781 value = apic_read(APIC_LVTT);
1782 value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
1783 apic_write(APIC_LVTT, value);
1784 }
1785#endif
1786
1787 apic_pm_activate();
1788}
1789
1790/*
1791 * APIC setup function for application processors. Called from smpboot.c
1792 */
1793void apic_ap_setup(void)
1794{
1795 setup_local_APIC();
1796 end_local_APIC_setup();
1797}
1798
1799#ifdef CONFIG_X86_X2APIC
1800int x2apic_mode;
1801
1802enum {
1803 X2APIC_OFF,
1804 X2APIC_ON,
1805 X2APIC_DISABLED,
1806};
1807static int x2apic_state;
1808
1809static void __x2apic_disable(void)
1810{
1811 u64 msr;
1812
1813 if (!boot_cpu_has(X86_FEATURE_APIC))
1814 return;
1815
1816 rdmsrl(MSR_IA32_APICBASE, msr);
1817 if (!(msr & X2APIC_ENABLE))
1818 return;
1819 /* Disable xapic and x2apic first and then reenable xapic mode */
1820 wrmsrl(MSR_IA32_APICBASE, msr & ~(X2APIC_ENABLE | XAPIC_ENABLE));
1821 wrmsrl(MSR_IA32_APICBASE, msr & ~X2APIC_ENABLE);
1822 printk_once(KERN_INFO "x2apic disabled\n");
1823}
1824
1825static void __x2apic_enable(void)
1826{
1827 u64 msr;
1828
1829 rdmsrl(MSR_IA32_APICBASE, msr);
1830 if (msr & X2APIC_ENABLE)
1831 return;
1832 wrmsrl(MSR_IA32_APICBASE, msr | X2APIC_ENABLE);
1833 printk_once(KERN_INFO "x2apic enabled\n");
1834}
1835
1836static int __init setup_nox2apic(char *str)
1837{
1838 if (x2apic_enabled()) {
1839 int apicid = native_apic_msr_read(APIC_ID);
1840
1841 if (apicid >= 255) {
1842 pr_warning("Apicid: %08x, cannot enforce nox2apic\n",
1843 apicid);
1844 return 0;
1845 }
1846 pr_warning("x2apic already enabled.\n");
1847 __x2apic_disable();
1848 }
1849 setup_clear_cpu_cap(X86_FEATURE_X2APIC);
1850 x2apic_state = X2APIC_DISABLED;
1851 x2apic_mode = 0;
1852 return 0;
1853}
1854early_param("nox2apic", setup_nox2apic);
1855
1856/* Called from cpu_init() to enable x2apic on (secondary) cpus */
1857void x2apic_setup(void)
1858{
1859 /*
1860 * If x2apic is not in ON state, disable it if already enabled
1861 * from BIOS.
1862 */
1863 if (x2apic_state != X2APIC_ON) {
1864 __x2apic_disable();
1865 return;
1866 }
1867 __x2apic_enable();
1868}
1869
1870static __init void x2apic_disable(void)
1871{
1872 u32 x2apic_id, state = x2apic_state;
1873
1874 x2apic_mode = 0;
1875 x2apic_state = X2APIC_DISABLED;
1876
1877 if (state != X2APIC_ON)
1878 return;
1879
1880 x2apic_id = read_apic_id();
1881 if (x2apic_id >= 255)
1882 panic("Cannot disable x2apic, id: %08x\n", x2apic_id);
1883
1884 __x2apic_disable();
1885 register_lapic_address(mp_lapic_addr);
1886}
1887
1888static __init void x2apic_enable(void)
1889{
1890 if (x2apic_state != X2APIC_OFF)
1891 return;
1892
1893 x2apic_mode = 1;
1894 x2apic_state = X2APIC_ON;
1895 __x2apic_enable();
1896}
1897
1898static __init void try_to_enable_x2apic(int remap_mode)
1899{
1900 if (x2apic_state == X2APIC_DISABLED)
1901 return;
1902
1903 if (remap_mode != IRQ_REMAP_X2APIC_MODE) {
1904 /*
1905 * Using X2APIC without IR is not architecturally supported
1906 * on bare metal but may be supported in guests.
1907 */
1908 if (!x86_init.hyper.x2apic_available()) {
1909 pr_info("x2apic: IRQ remapping doesn't support X2APIC mode\n");
1910 x2apic_disable();
1911 return;
1912 }
1913
1914 /*
1915 * Without IR, all CPUs can be addressed by IOAPIC/MSI only
1916 * in physical mode, and CPUs with an APIC ID that cannnot
1917 * be addressed must not be brought online.
1918 */
1919 x2apic_set_max_apicid(255);
1920 x2apic_phys = 1;
1921 }
1922 x2apic_enable();
1923}
1924
1925void __init check_x2apic(void)
1926{
1927 if (x2apic_enabled()) {
1928 pr_info("x2apic: enabled by BIOS, switching to x2apic ops\n");
1929 x2apic_mode = 1;
1930 x2apic_state = X2APIC_ON;
1931 } else if (!boot_cpu_has(X86_FEATURE_X2APIC)) {
1932 x2apic_state = X2APIC_DISABLED;
1933 }
1934}
1935#else /* CONFIG_X86_X2APIC */
1936static int __init validate_x2apic(void)
1937{
1938 if (!apic_is_x2apic_enabled())
1939 return 0;
1940 /*
1941 * Checkme: Can we simply turn off x2apic here instead of panic?
1942 */
1943 panic("BIOS has enabled x2apic but kernel doesn't support x2apic, please disable x2apic in BIOS.\n");
1944}
1945early_initcall(validate_x2apic);
1946
1947static inline void try_to_enable_x2apic(int remap_mode) { }
1948static inline void __x2apic_enable(void) { }
1949#endif /* !CONFIG_X86_X2APIC */
1950
1951void __init enable_IR_x2apic(void)
1952{
1953 unsigned long flags;
1954 int ret, ir_stat;
1955
1956 if (skip_ioapic_setup) {
1957 pr_info("Not enabling interrupt remapping due to skipped IO-APIC setup\n");
1958 return;
1959 }
1960
1961 ir_stat = irq_remapping_prepare();
1962 if (ir_stat < 0 && !x2apic_supported())
1963 return;
1964
1965 ret = save_ioapic_entries();
1966 if (ret) {
1967 pr_info("Saving IO-APIC state failed: %d\n", ret);
1968 return;
1969 }
1970
1971 local_irq_save(flags);
1972 legacy_pic->mask_all();
1973 mask_ioapic_entries();
1974
1975 /* If irq_remapping_prepare() succeeded, try to enable it */
1976 if (ir_stat >= 0)
1977 ir_stat = irq_remapping_enable();
1978 /* ir_stat contains the remap mode or an error code */
1979 try_to_enable_x2apic(ir_stat);
1980
1981 if (ir_stat < 0)
1982 restore_ioapic_entries();
1983 legacy_pic->restore_mask();
1984 local_irq_restore(flags);
1985}
1986
1987#ifdef CONFIG_X86_64
1988/*
1989 * Detect and enable local APICs on non-SMP boards.
1990 * Original code written by Keir Fraser.
1991 * On AMD64 we trust the BIOS - if it says no APIC it is likely
1992 * not correctly set up (usually the APIC timer won't work etc.)
1993 */
1994static int __init detect_init_APIC(void)
1995{
1996 if (!boot_cpu_has(X86_FEATURE_APIC)) {
1997 pr_info("No local APIC present\n");
1998 return -1;
1999 }
2000
2001 mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
2002 return 0;
2003}
2004#else
2005
2006static int __init apic_verify(void)
2007{
2008 u32 features, h, l;
2009
2010 /*
2011 * The APIC feature bit should now be enabled
2012 * in `cpuid'
2013 */
2014 features = cpuid_edx(1);
2015 if (!(features & (1 << X86_FEATURE_APIC))) {
2016 pr_warning("Could not enable APIC!\n");
2017 return -1;
2018 }
2019 set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
2020 mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
2021
2022 /* The BIOS may have set up the APIC at some other address */
2023 if (boot_cpu_data.x86 >= 6) {
2024 rdmsr(MSR_IA32_APICBASE, l, h);
2025 if (l & MSR_IA32_APICBASE_ENABLE)
2026 mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
2027 }
2028
2029 pr_info("Found and enabled local APIC!\n");
2030 return 0;
2031}
2032
2033int __init apic_force_enable(unsigned long addr)
2034{
2035 u32 h, l;
2036
2037 if (disable_apic)
2038 return -1;
2039
2040 /*
2041 * Some BIOSes disable the local APIC in the APIC_BASE
2042 * MSR. This can only be done in software for Intel P6 or later
2043 * and AMD K7 (Model > 1) or later.
2044 */
2045 if (boot_cpu_data.x86 >= 6) {
2046 rdmsr(MSR_IA32_APICBASE, l, h);
2047 if (!(l & MSR_IA32_APICBASE_ENABLE)) {
2048 pr_info("Local APIC disabled by BIOS -- reenabling.\n");
2049 l &= ~MSR_IA32_APICBASE_BASE;
2050 l |= MSR_IA32_APICBASE_ENABLE | addr;
2051 wrmsr(MSR_IA32_APICBASE, l, h);
2052 enabled_via_apicbase = 1;
2053 }
2054 }
2055 return apic_verify();
2056}
2057
2058/*
2059 * Detect and initialize APIC
2060 */
2061static int __init detect_init_APIC(void)
2062{
2063 /* Disabled by kernel option? */
2064 if (disable_apic)
2065 return -1;
2066
2067 switch (boot_cpu_data.x86_vendor) {
2068 case X86_VENDOR_AMD:
2069 if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) ||
2070 (boot_cpu_data.x86 >= 15))
2071 break;
2072 goto no_apic;
2073 case X86_VENDOR_HYGON:
2074 break;
2075 case X86_VENDOR_INTEL:
2076 if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 ||
2077 (boot_cpu_data.x86 == 5 && boot_cpu_has(X86_FEATURE_APIC)))
2078 break;
2079 goto no_apic;
2080 default:
2081 goto no_apic;
2082 }
2083
2084 if (!boot_cpu_has(X86_FEATURE_APIC)) {
2085 /*
2086 * Over-ride BIOS and try to enable the local APIC only if
2087 * "lapic" specified.
2088 */
2089 if (!force_enable_local_apic) {
2090 pr_info("Local APIC disabled by BIOS -- "
2091 "you can enable it with \"lapic\"\n");
2092 return -1;
2093 }
2094 if (apic_force_enable(APIC_DEFAULT_PHYS_BASE))
2095 return -1;
2096 } else {
2097 if (apic_verify())
2098 return -1;
2099 }
2100
2101 apic_pm_activate();
2102
2103 return 0;
2104
2105no_apic:
2106 pr_info("No local APIC present or hardware disabled\n");
2107 return -1;
2108}
2109#endif
2110
2111/**
2112 * init_apic_mappings - initialize APIC mappings
2113 */
2114void __init init_apic_mappings(void)
2115{
2116 unsigned int new_apicid;
2117
2118 if (apic_validate_deadline_timer())
2119 pr_info("TSC deadline timer available\n");
2120
2121 if (x2apic_mode) {
2122 boot_cpu_physical_apicid = read_apic_id();
2123 return;
2124 }
2125
2126 /* If no local APIC can be found return early */
2127 if (!smp_found_config && detect_init_APIC()) {
2128 /* lets NOP'ify apic operations */
2129 pr_info("APIC: disable apic facility\n");
2130 apic_disable();
2131 } else {
2132 apic_phys = mp_lapic_addr;
2133
2134 /*
2135 * If the system has ACPI MADT tables or MP info, the LAPIC
2136 * address is already registered.
2137 */
2138 if (!acpi_lapic && !smp_found_config)
2139 register_lapic_address(apic_phys);
2140 }
2141
2142 /*
2143 * Fetch the APIC ID of the BSP in case we have a
2144 * default configuration (or the MP table is broken).
2145 */
2146 new_apicid = read_apic_id();
2147 if (boot_cpu_physical_apicid != new_apicid) {
2148 boot_cpu_physical_apicid = new_apicid;
2149 /*
2150 * yeah -- we lie about apic_version
2151 * in case if apic was disabled via boot option
2152 * but it's not a problem for SMP compiled kernel
2153 * since apic_intr_mode_select is prepared for such
2154 * a case and disable smp mode
2155 */
2156 boot_cpu_apic_version = GET_APIC_VERSION(apic_read(APIC_LVR));
2157 }
2158}
2159
2160void __init register_lapic_address(unsigned long address)
2161{
2162 mp_lapic_addr = address;
2163
2164 if (!x2apic_mode) {
2165 set_fixmap_nocache(FIX_APIC_BASE, address);
2166 apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
2167 APIC_BASE, address);
2168 }
2169 if (boot_cpu_physical_apicid == -1U) {
2170 boot_cpu_physical_apicid = read_apic_id();
2171 boot_cpu_apic_version = GET_APIC_VERSION(apic_read(APIC_LVR));
2172 }
2173}
2174
2175/*
2176 * Local APIC interrupts
2177 */
2178
2179/*
2180 * This interrupt should _never_ happen with our APIC/SMP architecture
2181 */
2182__visible void __irq_entry smp_spurious_interrupt(struct pt_regs *regs)
2183{
2184 u8 vector = ~regs->orig_ax;
2185 u32 v;
2186
2187 entering_irq();
2188 trace_spurious_apic_entry(vector);
2189
2190 inc_irq_stat(irq_spurious_count);
2191
2192 /*
2193 * If this is a spurious interrupt then do not acknowledge
2194 */
2195 if (vector == SPURIOUS_APIC_VECTOR) {
2196 /* See SDM vol 3 */
2197 pr_info("Spurious APIC interrupt (vector 0xFF) on CPU#%d, should never happen.\n",
2198 smp_processor_id());
2199 goto out;
2200 }
2201
2202 /*
2203 * If it is a vectored one, verify it's set in the ISR. If set,
2204 * acknowledge it.
2205 */
2206 v = apic_read(APIC_ISR + ((vector & ~0x1f) >> 1));
2207 if (v & (1 << (vector & 0x1f))) {
2208 pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Acked\n",
2209 vector, smp_processor_id());
2210 ack_APIC_irq();
2211 } else {
2212 pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Not pending!\n",
2213 vector, smp_processor_id());
2214 }
2215out:
2216 trace_spurious_apic_exit(vector);
2217 exiting_irq();
2218}
2219
2220/*
2221 * This interrupt should never happen with our APIC/SMP architecture
2222 */
2223__visible void __irq_entry smp_error_interrupt(struct pt_regs *regs)
2224{
2225 static const char * const error_interrupt_reason[] = {
2226 "Send CS error", /* APIC Error Bit 0 */
2227 "Receive CS error", /* APIC Error Bit 1 */
2228 "Send accept error", /* APIC Error Bit 2 */
2229 "Receive accept error", /* APIC Error Bit 3 */
2230 "Redirectable IPI", /* APIC Error Bit 4 */
2231 "Send illegal vector", /* APIC Error Bit 5 */
2232 "Received illegal vector", /* APIC Error Bit 6 */
2233 "Illegal register address", /* APIC Error Bit 7 */
2234 };
2235 u32 v, i = 0;
2236
2237 entering_irq();
2238 trace_error_apic_entry(ERROR_APIC_VECTOR);
2239
2240 /* First tickle the hardware, only then report what went on. -- REW */
2241 if (lapic_get_maxlvt() > 3) /* Due to the Pentium erratum 3AP. */
2242 apic_write(APIC_ESR, 0);
2243 v = apic_read(APIC_ESR);
2244 ack_APIC_irq();
2245 atomic_inc(&irq_err_count);
2246
2247 apic_printk(APIC_DEBUG, KERN_DEBUG "APIC error on CPU%d: %02x",
2248 smp_processor_id(), v);
2249
2250 v &= 0xff;
2251 while (v) {
2252 if (v & 0x1)
2253 apic_printk(APIC_DEBUG, KERN_CONT " : %s", error_interrupt_reason[i]);
2254 i++;
2255 v >>= 1;
2256 }
2257
2258 apic_printk(APIC_DEBUG, KERN_CONT "\n");
2259
2260 trace_error_apic_exit(ERROR_APIC_VECTOR);
2261 exiting_irq();
2262}
2263
2264/**
2265 * connect_bsp_APIC - attach the APIC to the interrupt system
2266 */
2267static void __init connect_bsp_APIC(void)
2268{
2269#ifdef CONFIG_X86_32
2270 if (pic_mode) {
2271 /*
2272 * Do not trust the local APIC being empty at bootup.
2273 */
2274 clear_local_APIC();
2275 /*
2276 * PIC mode, enable APIC mode in the IMCR, i.e. connect BSP's
2277 * local APIC to INT and NMI lines.
2278 */
2279 apic_printk(APIC_VERBOSE, "leaving PIC mode, "
2280 "enabling APIC mode.\n");
2281 imcr_pic_to_apic();
2282 }
2283#endif
2284}
2285
2286/**
2287 * disconnect_bsp_APIC - detach the APIC from the interrupt system
2288 * @virt_wire_setup: indicates, whether virtual wire mode is selected
2289 *
2290 * Virtual wire mode is necessary to deliver legacy interrupts even when the
2291 * APIC is disabled.
2292 */
2293void disconnect_bsp_APIC(int virt_wire_setup)
2294{
2295 unsigned int value;
2296
2297#ifdef CONFIG_X86_32
2298 if (pic_mode) {
2299 /*
2300 * Put the board back into PIC mode (has an effect only on
2301 * certain older boards). Note that APIC interrupts, including
2302 * IPIs, won't work beyond this point! The only exception are
2303 * INIT IPIs.
2304 */
2305 apic_printk(APIC_VERBOSE, "disabling APIC mode, "
2306 "entering PIC mode.\n");
2307 imcr_apic_to_pic();
2308 return;
2309 }
2310#endif
2311
2312 /* Go back to Virtual Wire compatibility mode */
2313
2314 /* For the spurious interrupt use vector F, and enable it */
2315 value = apic_read(APIC_SPIV);
2316 value &= ~APIC_VECTOR_MASK;
2317 value |= APIC_SPIV_APIC_ENABLED;
2318 value |= 0xf;
2319 apic_write(APIC_SPIV, value);
2320
2321 if (!virt_wire_setup) {
2322 /*
2323 * For LVT0 make it edge triggered, active high,
2324 * external and enabled
2325 */
2326 value = apic_read(APIC_LVT0);
2327 value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
2328 APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
2329 APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
2330 value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
2331 value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
2332 apic_write(APIC_LVT0, value);
2333 } else {
2334 /* Disable LVT0 */
2335 apic_write(APIC_LVT0, APIC_LVT_MASKED);
2336 }
2337
2338 /*
2339 * For LVT1 make it edge triggered, active high,
2340 * nmi and enabled
2341 */
2342 value = apic_read(APIC_LVT1);
2343 value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
2344 APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
2345 APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
2346 value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
2347 value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
2348 apic_write(APIC_LVT1, value);
2349}
2350
2351/*
2352 * The number of allocated logical CPU IDs. Since logical CPU IDs are allocated
2353 * contiguously, it equals to current allocated max logical CPU ID plus 1.
2354 * All allocated CPU IDs should be in the [0, nr_logical_cpuids) range,
2355 * so the maximum of nr_logical_cpuids is nr_cpu_ids.
2356 *
2357 * NOTE: Reserve 0 for BSP.
2358 */
2359static int nr_logical_cpuids = 1;
2360
2361/*
2362 * Used to store mapping between logical CPU IDs and APIC IDs.
2363 */
2364static int cpuid_to_apicid[] = {
2365 [0 ... NR_CPUS - 1] = -1,
2366};
2367
2368bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
2369{
2370 return phys_id == cpuid_to_apicid[cpu];
2371}
2372
2373#ifdef CONFIG_SMP
2374/**
2375 * apic_id_is_primary_thread - Check whether APIC ID belongs to a primary thread
2376 * @id: APIC ID to check
2377 */
2378bool apic_id_is_primary_thread(unsigned int apicid)
2379{
2380 u32 mask;
2381
2382 if (smp_num_siblings == 1)
2383 return true;
2384 /* Isolate the SMT bit(s) in the APICID and check for 0 */
2385 mask = (1U << (fls(smp_num_siblings) - 1)) - 1;
2386 return !(apicid & mask);
2387}
2388#endif
2389
2390/*
2391 * Should use this API to allocate logical CPU IDs to keep nr_logical_cpuids
2392 * and cpuid_to_apicid[] synchronized.
2393 */
2394static int allocate_logical_cpuid(int apicid)
2395{
2396 int i;
2397
2398 /*
2399 * cpuid <-> apicid mapping is persistent, so when a cpu is up,
2400 * check if the kernel has allocated a cpuid for it.
2401 */
2402 for (i = 0; i < nr_logical_cpuids; i++) {
2403 if (cpuid_to_apicid[i] == apicid)
2404 return i;
2405 }
2406
2407 /* Allocate a new cpuid. */
2408 if (nr_logical_cpuids >= nr_cpu_ids) {
2409 WARN_ONCE(1, "APIC: NR_CPUS/possible_cpus limit of %u reached. "
2410 "Processor %d/0x%x and the rest are ignored.\n",
2411 nr_cpu_ids, nr_logical_cpuids, apicid);
2412 return -EINVAL;
2413 }
2414
2415 cpuid_to_apicid[nr_logical_cpuids] = apicid;
2416 return nr_logical_cpuids++;
2417}
2418
2419int generic_processor_info(int apicid, int version)
2420{
2421 int cpu, max = nr_cpu_ids;
2422 bool boot_cpu_detected = physid_isset(boot_cpu_physical_apicid,
2423 phys_cpu_present_map);
2424
2425 /*
2426 * boot_cpu_physical_apicid is designed to have the apicid
2427 * returned by read_apic_id(), i.e, the apicid of the
2428 * currently booting-up processor. However, on some platforms,
2429 * it is temporarily modified by the apicid reported as BSP
2430 * through MP table. Concretely:
2431 *
2432 * - arch/x86/kernel/mpparse.c: MP_processor_info()
2433 * - arch/x86/mm/amdtopology.c: amd_numa_init()
2434 *
2435 * This function is executed with the modified
2436 * boot_cpu_physical_apicid. So, disabled_cpu_apicid kernel
2437 * parameter doesn't work to disable APs on kdump 2nd kernel.
2438 *
2439 * Since fixing handling of boot_cpu_physical_apicid requires
2440 * another discussion and tests on each platform, we leave it
2441 * for now and here we use read_apic_id() directly in this
2442 * function, generic_processor_info().
2443 */
2444 if (disabled_cpu_apicid != BAD_APICID &&
2445 disabled_cpu_apicid != read_apic_id() &&
2446 disabled_cpu_apicid == apicid) {
2447 int thiscpu = num_processors + disabled_cpus;
2448
2449 pr_warning("APIC: Disabling requested cpu."
2450 " Processor %d/0x%x ignored.\n",
2451 thiscpu, apicid);
2452
2453 disabled_cpus++;
2454 return -ENODEV;
2455 }
2456
2457 /*
2458 * If boot cpu has not been detected yet, then only allow upto
2459 * nr_cpu_ids - 1 processors and keep one slot free for boot cpu
2460 */
2461 if (!boot_cpu_detected && num_processors >= nr_cpu_ids - 1 &&
2462 apicid != boot_cpu_physical_apicid) {
2463 int thiscpu = max + disabled_cpus - 1;
2464
2465 pr_warning(
2466 "APIC: NR_CPUS/possible_cpus limit of %i almost"
2467 " reached. Keeping one slot for boot cpu."
2468 " Processor %d/0x%x ignored.\n", max, thiscpu, apicid);
2469
2470 disabled_cpus++;
2471 return -ENODEV;
2472 }
2473
2474 if (num_processors >= nr_cpu_ids) {
2475 int thiscpu = max + disabled_cpus;
2476
2477 pr_warning("APIC: NR_CPUS/possible_cpus limit of %i "
2478 "reached. Processor %d/0x%x ignored.\n",
2479 max, thiscpu, apicid);
2480
2481 disabled_cpus++;
2482 return -EINVAL;
2483 }
2484
2485 if (apicid == boot_cpu_physical_apicid) {
2486 /*
2487 * x86_bios_cpu_apicid is required to have processors listed
2488 * in same order as logical cpu numbers. Hence the first
2489 * entry is BSP, and so on.
2490 * boot_cpu_init() already hold bit 0 in cpu_present_mask
2491 * for BSP.
2492 */
2493 cpu = 0;
2494
2495 /* Logical cpuid 0 is reserved for BSP. */
2496 cpuid_to_apicid[0] = apicid;
2497 } else {
2498 cpu = allocate_logical_cpuid(apicid);
2499 if (cpu < 0) {
2500 disabled_cpus++;
2501 return -EINVAL;
2502 }
2503 }
2504
2505 /*
2506 * Validate version
2507 */
2508 if (version == 0x0) {
2509 pr_warning("BIOS bug: APIC version is 0 for CPU %d/0x%x, fixing up to 0x10\n",
2510 cpu, apicid);
2511 version = 0x10;
2512 }
2513
2514 if (version != boot_cpu_apic_version) {
2515 pr_warning("BIOS bug: APIC version mismatch, boot CPU: %x, CPU %d: version %x\n",
2516 boot_cpu_apic_version, cpu, version);
2517 }
2518
2519 if (apicid > max_physical_apicid)
2520 max_physical_apicid = apicid;
2521
2522#if defined(CONFIG_SMP) || defined(CONFIG_X86_64)
2523 early_per_cpu(x86_cpu_to_apicid, cpu) = apicid;
2524 early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
2525#endif
2526#ifdef CONFIG_X86_32
2527 early_per_cpu(x86_cpu_to_logical_apicid, cpu) =
2528 apic->x86_32_early_logical_apicid(cpu);
2529#endif
2530 set_cpu_possible(cpu, true);
2531 physid_set(apicid, phys_cpu_present_map);
2532 set_cpu_present(cpu, true);
2533 num_processors++;
2534
2535 return cpu;
2536}
2537
2538int hard_smp_processor_id(void)
2539{
2540 return read_apic_id();
2541}
2542
2543/*
2544 * Override the generic EOI implementation with an optimized version.
2545 * Only called during early boot when only one CPU is active and with
2546 * interrupts disabled, so we know this does not race with actual APIC driver
2547 * use.
2548 */
2549void __init apic_set_eoi_write(void (*eoi_write)(u32 reg, u32 v))
2550{
2551 struct apic **drv;
2552
2553 for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) {
2554 /* Should happen once for each apic */
2555 WARN_ON((*drv)->eoi_write == eoi_write);
2556 (*drv)->native_eoi_write = (*drv)->eoi_write;
2557 (*drv)->eoi_write = eoi_write;
2558 }
2559}
2560
2561static void __init apic_bsp_up_setup(void)
2562{
2563#ifdef CONFIG_X86_64
2564 apic_write(APIC_ID, apic->set_apic_id(boot_cpu_physical_apicid));
2565#else
2566 /*
2567 * Hack: In case of kdump, after a crash, kernel might be booting
2568 * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid
2569 * might be zero if read from MP tables. Get it from LAPIC.
2570 */
2571# ifdef CONFIG_CRASH_DUMP
2572 boot_cpu_physical_apicid = read_apic_id();
2573# endif
2574#endif
2575 physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
2576}
2577
2578/**
2579 * apic_bsp_setup - Setup function for local apic and io-apic
2580 * @upmode: Force UP mode (for APIC_init_uniprocessor)
2581 */
2582static void __init apic_bsp_setup(bool upmode)
2583{
2584 connect_bsp_APIC();
2585 if (upmode)
2586 apic_bsp_up_setup();
2587 setup_local_APIC();
2588
2589 enable_IO_APIC();
2590 end_local_APIC_setup();
2591 irq_remap_enable_fault_handling();
2592 setup_IO_APIC();
2593 lapic_update_legacy_vectors();
2594}
2595
2596#ifdef CONFIG_UP_LATE_INIT
2597void __init up_late_init(void)
2598{
2599 if (apic_intr_mode == APIC_PIC)
2600 return;
2601
2602 /* Setup local timer */
2603 x86_init.timers.setup_percpu_clockev();
2604}
2605#endif
2606
2607/*
2608 * Power management
2609 */
2610#ifdef CONFIG_PM
2611
2612static struct {
2613 /*
2614 * 'active' is true if the local APIC was enabled by us and
2615 * not the BIOS; this signifies that we are also responsible
2616 * for disabling it before entering apm/acpi suspend
2617 */
2618 int active;
2619 /* r/w apic fields */
2620 unsigned int apic_id;
2621 unsigned int apic_taskpri;
2622 unsigned int apic_ldr;
2623 unsigned int apic_dfr;
2624 unsigned int apic_spiv;
2625 unsigned int apic_lvtt;
2626 unsigned int apic_lvtpc;
2627 unsigned int apic_lvt0;
2628 unsigned int apic_lvt1;
2629 unsigned int apic_lvterr;
2630 unsigned int apic_tmict;
2631 unsigned int apic_tdcr;
2632 unsigned int apic_thmr;
2633 unsigned int apic_cmci;
2634} apic_pm_state;
2635
2636static int lapic_suspend(void)
2637{
2638 unsigned long flags;
2639 int maxlvt;
2640
2641 if (!apic_pm_state.active)
2642 return 0;
2643
2644 maxlvt = lapic_get_maxlvt();
2645
2646 apic_pm_state.apic_id = apic_read(APIC_ID);
2647 apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
2648 apic_pm_state.apic_ldr = apic_read(APIC_LDR);
2649 apic_pm_state.apic_dfr = apic_read(APIC_DFR);
2650 apic_pm_state.apic_spiv = apic_read(APIC_SPIV);
2651 apic_pm_state.apic_lvtt = apic_read(APIC_LVTT);
2652 if (maxlvt >= 4)
2653 apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
2654 apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0);
2655 apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1);
2656 apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
2657 apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
2658 apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
2659#ifdef CONFIG_X86_THERMAL_VECTOR
2660 if (maxlvt >= 5)
2661 apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
2662#endif
2663#ifdef CONFIG_X86_MCE_INTEL
2664 if (maxlvt >= 6)
2665 apic_pm_state.apic_cmci = apic_read(APIC_LVTCMCI);
2666#endif
2667
2668 local_irq_save(flags);
2669 disable_local_APIC();
2670
2671 irq_remapping_disable();
2672
2673 local_irq_restore(flags);
2674 return 0;
2675}
2676
2677static void lapic_resume(void)
2678{
2679 unsigned int l, h;
2680 unsigned long flags;
2681 int maxlvt;
2682
2683 if (!apic_pm_state.active)
2684 return;
2685
2686 local_irq_save(flags);
2687
2688 /*
2689 * IO-APIC and PIC have their own resume routines.
2690 * We just mask them here to make sure the interrupt
2691 * subsystem is completely quiet while we enable x2apic
2692 * and interrupt-remapping.
2693 */
2694 mask_ioapic_entries();
2695 legacy_pic->mask_all();
2696
2697 if (x2apic_mode) {
2698 __x2apic_enable();
2699 } else {
2700 /*
2701 * Make sure the APICBASE points to the right address
2702 *
2703 * FIXME! This will be wrong if we ever support suspend on
2704 * SMP! We'll need to do this as part of the CPU restore!
2705 */
2706 if (boot_cpu_data.x86 >= 6) {
2707 rdmsr(MSR_IA32_APICBASE, l, h);
2708 l &= ~MSR_IA32_APICBASE_BASE;
2709 l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
2710 wrmsr(MSR_IA32_APICBASE, l, h);
2711 }
2712 }
2713
2714 maxlvt = lapic_get_maxlvt();
2715 apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
2716 apic_write(APIC_ID, apic_pm_state.apic_id);
2717 apic_write(APIC_DFR, apic_pm_state.apic_dfr);
2718 apic_write(APIC_LDR, apic_pm_state.apic_ldr);
2719 apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri);
2720 apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
2721 apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
2722 apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
2723#ifdef CONFIG_X86_THERMAL_VECTOR
2724 if (maxlvt >= 5)
2725 apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
2726#endif
2727#ifdef CONFIG_X86_MCE_INTEL
2728 if (maxlvt >= 6)
2729 apic_write(APIC_LVTCMCI, apic_pm_state.apic_cmci);
2730#endif
2731 if (maxlvt >= 4)
2732 apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
2733 apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
2734 apic_write(APIC_TDCR, apic_pm_state.apic_tdcr);
2735 apic_write(APIC_TMICT, apic_pm_state.apic_tmict);
2736 apic_write(APIC_ESR, 0);
2737 apic_read(APIC_ESR);
2738 apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr);
2739 apic_write(APIC_ESR, 0);
2740 apic_read(APIC_ESR);
2741
2742 irq_remapping_reenable(x2apic_mode);
2743
2744 local_irq_restore(flags);
2745}
2746
2747/*
2748 * This device has no shutdown method - fully functioning local APICs
2749 * are needed on every CPU up until machine_halt/restart/poweroff.
2750 */
2751
2752static struct syscore_ops lapic_syscore_ops = {
2753 .resume = lapic_resume,
2754 .suspend = lapic_suspend,
2755};
2756
2757static void apic_pm_activate(void)
2758{
2759 apic_pm_state.active = 1;
2760}
2761
2762static int __init init_lapic_sysfs(void)
2763{
2764 /* XXX: remove suspend/resume procs if !apic_pm_state.active? */
2765 if (boot_cpu_has(X86_FEATURE_APIC))
2766 register_syscore_ops(&lapic_syscore_ops);
2767
2768 return 0;
2769}
2770
2771/* local apic needs to resume before other devices access its registers. */
2772core_initcall(init_lapic_sysfs);
2773
2774#else /* CONFIG_PM */
2775
2776static void apic_pm_activate(void) { }
2777
2778#endif /* CONFIG_PM */
2779
2780#ifdef CONFIG_X86_64
2781
2782static int multi_checked;
2783static int multi;
2784
2785static int set_multi(const struct dmi_system_id *d)
2786{
2787 if (multi)
2788 return 0;
2789 pr_info("APIC: %s detected, Multi Chassis\n", d->ident);
2790 multi = 1;
2791 return 0;
2792}
2793
2794static const struct dmi_system_id multi_dmi_table[] = {
2795 {
2796 .callback = set_multi,
2797 .ident = "IBM System Summit2",
2798 .matches = {
2799 DMI_MATCH(DMI_SYS_VENDOR, "IBM"),
2800 DMI_MATCH(DMI_PRODUCT_NAME, "Summit2"),
2801 },
2802 },
2803 {}
2804};
2805
2806static void dmi_check_multi(void)
2807{
2808 if (multi_checked)
2809 return;
2810
2811 dmi_check_system(multi_dmi_table);
2812 multi_checked = 1;
2813}
2814
2815/*
2816 * apic_is_clustered_box() -- Check if we can expect good TSC
2817 *
2818 * Thus far, the major user of this is IBM's Summit2 series:
2819 * Clustered boxes may have unsynced TSC problems if they are
2820 * multi-chassis.
2821 * Use DMI to check them
2822 */
2823int apic_is_clustered_box(void)
2824{
2825 dmi_check_multi();
2826 return multi;
2827}
2828#endif
2829
2830/*
2831 * APIC command line parameters
2832 */
2833static int __init setup_disableapic(char *arg)
2834{
2835 disable_apic = 1;
2836 setup_clear_cpu_cap(X86_FEATURE_APIC);
2837 return 0;
2838}
2839early_param("disableapic", setup_disableapic);
2840
2841/* same as disableapic, for compatibility */
2842static int __init setup_nolapic(char *arg)
2843{
2844 return setup_disableapic(arg);
2845}
2846early_param("nolapic", setup_nolapic);
2847
2848static int __init parse_lapic_timer_c2_ok(char *arg)
2849{
2850 local_apic_timer_c2_ok = 1;
2851 return 0;
2852}
2853early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok);
2854
2855static int __init parse_disable_apic_timer(char *arg)
2856{
2857 disable_apic_timer = 1;
2858 return 0;
2859}
2860early_param("noapictimer", parse_disable_apic_timer);
2861
2862static int __init parse_nolapic_timer(char *arg)
2863{
2864 disable_apic_timer = 1;
2865 return 0;
2866}
2867early_param("nolapic_timer", parse_nolapic_timer);
2868
2869static int __init apic_set_verbosity(char *arg)
2870{
2871 if (!arg) {
2872#ifdef CONFIG_X86_64
2873 skip_ioapic_setup = 0;
2874 return 0;
2875#endif
2876 return -EINVAL;
2877 }
2878
2879 if (strcmp("debug", arg) == 0)
2880 apic_verbosity = APIC_DEBUG;
2881 else if (strcmp("verbose", arg) == 0)
2882 apic_verbosity = APIC_VERBOSE;
2883#ifdef CONFIG_X86_64
2884 else {
2885 pr_warning("APIC Verbosity level %s not recognised"
2886 " use apic=verbose or apic=debug\n", arg);
2887 return -EINVAL;
2888 }
2889#endif
2890
2891 return 0;
2892}
2893early_param("apic", apic_set_verbosity);
2894
2895static int __init lapic_insert_resource(void)
2896{
2897 if (!apic_phys)
2898 return -1;
2899
2900 /* Put local APIC into the resource map. */
2901 lapic_resource.start = apic_phys;
2902 lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1;
2903 insert_resource(&iomem_resource, &lapic_resource);
2904
2905 return 0;
2906}
2907
2908/*
2909 * need call insert after e820__reserve_resources()
2910 * that is using request_resource
2911 */
2912late_initcall(lapic_insert_resource);
2913
2914static int __init apic_set_disabled_cpu_apicid(char *arg)
2915{
2916 if (!arg || !get_option(&arg, &disabled_cpu_apicid))
2917 return -EINVAL;
2918
2919 return 0;
2920}
2921early_param("disable_cpu_apicid", apic_set_disabled_cpu_apicid);
2922
2923static int __init apic_set_extnmi(char *arg)
2924{
2925 if (!arg)
2926 return -EINVAL;
2927
2928 if (!strncmp("all", arg, 3))
2929 apic_extnmi = APIC_EXTNMI_ALL;
2930 else if (!strncmp("none", arg, 4))
2931 apic_extnmi = APIC_EXTNMI_NONE;
2932 else if (!strncmp("bsp", arg, 3))
2933 apic_extnmi = APIC_EXTNMI_BSP;
2934 else {
2935 pr_warn("Unknown external NMI delivery mode `%s' ignored\n", arg);
2936 return -EINVAL;
2937 }
2938
2939 return 0;
2940}
2941early_param("apic_extnmi", apic_set_extnmi);