blob: 8742ded401e761e7ec93e86ae2574eb5c21f1b74 [file] [log] [blame]
b.liue9582032025-04-17 19:18:16 +08001// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * intel_idle.c - native hardware idle loop for modern Intel processors
4 *
5 * Copyright (c) 2013, Intel Corporation.
6 * Len Brown <len.brown@intel.com>
7 */
8
9/*
10 * intel_idle is a cpuidle driver that loads on specific Intel processors
11 * in lieu of the legacy ACPI processor_idle driver. The intent is to
12 * make Linux more efficient on these processors, as intel_idle knows
13 * more than ACPI, as well as make Linux more immune to ACPI BIOS bugs.
14 */
15
16/*
17 * Design Assumptions
18 *
19 * All CPUs have same idle states as boot CPU
20 *
21 * Chipset BM_STS (bus master status) bit is a NOP
22 * for preventing entry into deep C-stats
23 */
24
25/*
26 * Known limitations
27 *
28 * The driver currently initializes for_each_online_cpu() upon modprobe.
29 * It it unaware of subsequent processors hot-added to the system.
30 * This means that if you boot with maxcpus=n and later online
31 * processors above n, those processors will use C1 only.
32 *
33 * ACPI has a .suspend hack to turn off deep c-statees during suspend
34 * to avoid complications with the lapic timer workaround.
35 * Have not seen issues with suspend, but may need same workaround here.
36 *
37 */
38
39/* un-comment DEBUG to enable pr_debug() statements */
40#define DEBUG
41
42#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
43
44#include <linux/kernel.h>
45#include <linux/cpuidle.h>
46#include <linux/tick.h>
47#include <trace/events/power.h>
48#include <linux/sched.h>
49#include <linux/sched/smt.h>
50#include <linux/notifier.h>
51#include <linux/cpu.h>
52#include <linux/moduleparam.h>
53#include <asm/cpu_device_id.h>
54#include <asm/intel-family.h>
55#include <asm/nospec-branch.h>
56#include <asm/mwait.h>
57#include <asm/msr.h>
58
59#define INTEL_IDLE_VERSION "0.4.1"
60
61static struct cpuidle_driver intel_idle_driver = {
62 .name = "intel_idle",
63 .owner = THIS_MODULE,
64};
65/* intel_idle.max_cstate=0 disables driver */
66static int max_cstate = CPUIDLE_STATE_MAX - 1;
67
68static unsigned int mwait_substates;
69
70#define LAPIC_TIMER_ALWAYS_RELIABLE 0xFFFFFFFF
71/* Reliable LAPIC Timer States, bit 1 for C1 etc. */
72static unsigned int lapic_timer_reliable_states = (1 << 1); /* Default to only C1 */
73
74struct idle_cpu {
75 struct cpuidle_state *state_table;
76
77 /*
78 * Hardware C-state auto-demotion may not always be optimal.
79 * Indicate which enable bits to clear here.
80 */
81 unsigned long auto_demotion_disable_flags;
82 bool byt_auto_demotion_disable_flag;
83 bool disable_promotion_to_c1e;
84};
85
86static const struct idle_cpu *icpu;
87static struct cpuidle_device __percpu *intel_idle_cpuidle_devices;
88static int intel_idle(struct cpuidle_device *dev,
89 struct cpuidle_driver *drv, int index);
90static int intel_idle_s2idle(struct cpuidle_device *dev,
91 struct cpuidle_driver *drv, int index);
92static struct cpuidle_state *cpuidle_state_table;
93
94/*
95 * Set this flag for states where the HW flushes the TLB for us
96 * and so we don't need cross-calls to keep it consistent.
97 * If this flag is set, SW flushes the TLB, so even if the
98 * HW doesn't do the flushing, this flag is safe to use.
99 */
100#define CPUIDLE_FLAG_TLB_FLUSHED 0x10000
101
102/*
103 * Disable IBRS across idle (when KERNEL_IBRS), is exclusive vs IRQ_ENABLE
104 * above.
105 */
106#define CPUIDLE_FLAG_IBRS BIT(16)
107
108/*
109 * MWAIT takes an 8-bit "hint" in EAX "suggesting"
110 * the C-state (top nibble) and sub-state (bottom nibble)
111 * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc.
112 *
113 * We store the hint at the top of our "flags" for each state.
114 */
115#define flg2MWAIT(flags) (((flags) >> 24) & 0xFF)
116#define MWAIT2flg(eax) ((eax & 0xFF) << 24)
117
118static __cpuidle int intel_idle_ibrs(struct cpuidle_device *dev,
119 struct cpuidle_driver *drv, int index)
120{
121 bool smt_active = sched_smt_active();
122 u64 spec_ctrl = spec_ctrl_current();
123 int ret;
124
125 if (smt_active)
126 wrmsrl(MSR_IA32_SPEC_CTRL, 0);
127
128 ret = intel_idle(dev, drv, index);
129
130 if (smt_active)
131 wrmsrl(MSR_IA32_SPEC_CTRL, spec_ctrl);
132
133 return ret;
134}
135
136/*
137 * States are indexed by the cstate number,
138 * which is also the index into the MWAIT hint array.
139 * Thus C0 is a dummy.
140 */
141static struct cpuidle_state nehalem_cstates[] = {
142 {
143 .name = "C1",
144 .desc = "MWAIT 0x00",
145 .flags = MWAIT2flg(0x00),
146 .exit_latency = 3,
147 .target_residency = 6,
148 .enter = &intel_idle,
149 .enter_s2idle = intel_idle_s2idle, },
150 {
151 .name = "C1E",
152 .desc = "MWAIT 0x01",
153 .flags = MWAIT2flg(0x01),
154 .exit_latency = 10,
155 .target_residency = 20,
156 .enter = &intel_idle,
157 .enter_s2idle = intel_idle_s2idle, },
158 {
159 .name = "C3",
160 .desc = "MWAIT 0x10",
161 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
162 .exit_latency = 20,
163 .target_residency = 80,
164 .enter = &intel_idle,
165 .enter_s2idle = intel_idle_s2idle, },
166 {
167 .name = "C6",
168 .desc = "MWAIT 0x20",
169 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
170 .exit_latency = 200,
171 .target_residency = 800,
172 .enter = &intel_idle,
173 .enter_s2idle = intel_idle_s2idle, },
174 {
175 .enter = NULL }
176};
177
178static struct cpuidle_state snb_cstates[] = {
179 {
180 .name = "C1",
181 .desc = "MWAIT 0x00",
182 .flags = MWAIT2flg(0x00),
183 .exit_latency = 2,
184 .target_residency = 2,
185 .enter = &intel_idle,
186 .enter_s2idle = intel_idle_s2idle, },
187 {
188 .name = "C1E",
189 .desc = "MWAIT 0x01",
190 .flags = MWAIT2flg(0x01),
191 .exit_latency = 10,
192 .target_residency = 20,
193 .enter = &intel_idle,
194 .enter_s2idle = intel_idle_s2idle, },
195 {
196 .name = "C3",
197 .desc = "MWAIT 0x10",
198 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
199 .exit_latency = 80,
200 .target_residency = 211,
201 .enter = &intel_idle,
202 .enter_s2idle = intel_idle_s2idle, },
203 {
204 .name = "C6",
205 .desc = "MWAIT 0x20",
206 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
207 .exit_latency = 104,
208 .target_residency = 345,
209 .enter = &intel_idle,
210 .enter_s2idle = intel_idle_s2idle, },
211 {
212 .name = "C7",
213 .desc = "MWAIT 0x30",
214 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
215 .exit_latency = 109,
216 .target_residency = 345,
217 .enter = &intel_idle,
218 .enter_s2idle = intel_idle_s2idle, },
219 {
220 .enter = NULL }
221};
222
223static struct cpuidle_state byt_cstates[] = {
224 {
225 .name = "C1",
226 .desc = "MWAIT 0x00",
227 .flags = MWAIT2flg(0x00),
228 .exit_latency = 1,
229 .target_residency = 1,
230 .enter = &intel_idle,
231 .enter_s2idle = intel_idle_s2idle, },
232 {
233 .name = "C6N",
234 .desc = "MWAIT 0x58",
235 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
236 .exit_latency = 300,
237 .target_residency = 275,
238 .enter = &intel_idle,
239 .enter_s2idle = intel_idle_s2idle, },
240 {
241 .name = "C6S",
242 .desc = "MWAIT 0x52",
243 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
244 .exit_latency = 500,
245 .target_residency = 560,
246 .enter = &intel_idle,
247 .enter_s2idle = intel_idle_s2idle, },
248 {
249 .name = "C7",
250 .desc = "MWAIT 0x60",
251 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
252 .exit_latency = 1200,
253 .target_residency = 4000,
254 .enter = &intel_idle,
255 .enter_s2idle = intel_idle_s2idle, },
256 {
257 .name = "C7S",
258 .desc = "MWAIT 0x64",
259 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
260 .exit_latency = 10000,
261 .target_residency = 20000,
262 .enter = &intel_idle,
263 .enter_s2idle = intel_idle_s2idle, },
264 {
265 .enter = NULL }
266};
267
268static struct cpuidle_state cht_cstates[] = {
269 {
270 .name = "C1",
271 .desc = "MWAIT 0x00",
272 .flags = MWAIT2flg(0x00),
273 .exit_latency = 1,
274 .target_residency = 1,
275 .enter = &intel_idle,
276 .enter_s2idle = intel_idle_s2idle, },
277 {
278 .name = "C6N",
279 .desc = "MWAIT 0x58",
280 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
281 .exit_latency = 80,
282 .target_residency = 275,
283 .enter = &intel_idle,
284 .enter_s2idle = intel_idle_s2idle, },
285 {
286 .name = "C6S",
287 .desc = "MWAIT 0x52",
288 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
289 .exit_latency = 200,
290 .target_residency = 560,
291 .enter = &intel_idle,
292 .enter_s2idle = intel_idle_s2idle, },
293 {
294 .name = "C7",
295 .desc = "MWAIT 0x60",
296 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
297 .exit_latency = 1200,
298 .target_residency = 4000,
299 .enter = &intel_idle,
300 .enter_s2idle = intel_idle_s2idle, },
301 {
302 .name = "C7S",
303 .desc = "MWAIT 0x64",
304 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
305 .exit_latency = 10000,
306 .target_residency = 20000,
307 .enter = &intel_idle,
308 .enter_s2idle = intel_idle_s2idle, },
309 {
310 .enter = NULL }
311};
312
313static struct cpuidle_state ivb_cstates[] = {
314 {
315 .name = "C1",
316 .desc = "MWAIT 0x00",
317 .flags = MWAIT2flg(0x00),
318 .exit_latency = 1,
319 .target_residency = 1,
320 .enter = &intel_idle,
321 .enter_s2idle = intel_idle_s2idle, },
322 {
323 .name = "C1E",
324 .desc = "MWAIT 0x01",
325 .flags = MWAIT2flg(0x01),
326 .exit_latency = 10,
327 .target_residency = 20,
328 .enter = &intel_idle,
329 .enter_s2idle = intel_idle_s2idle, },
330 {
331 .name = "C3",
332 .desc = "MWAIT 0x10",
333 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
334 .exit_latency = 59,
335 .target_residency = 156,
336 .enter = &intel_idle,
337 .enter_s2idle = intel_idle_s2idle, },
338 {
339 .name = "C6",
340 .desc = "MWAIT 0x20",
341 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
342 .exit_latency = 80,
343 .target_residency = 300,
344 .enter = &intel_idle,
345 .enter_s2idle = intel_idle_s2idle, },
346 {
347 .name = "C7",
348 .desc = "MWAIT 0x30",
349 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
350 .exit_latency = 87,
351 .target_residency = 300,
352 .enter = &intel_idle,
353 .enter_s2idle = intel_idle_s2idle, },
354 {
355 .enter = NULL }
356};
357
358static struct cpuidle_state ivt_cstates[] = {
359 {
360 .name = "C1",
361 .desc = "MWAIT 0x00",
362 .flags = MWAIT2flg(0x00),
363 .exit_latency = 1,
364 .target_residency = 1,
365 .enter = &intel_idle,
366 .enter_s2idle = intel_idle_s2idle, },
367 {
368 .name = "C1E",
369 .desc = "MWAIT 0x01",
370 .flags = MWAIT2flg(0x01),
371 .exit_latency = 10,
372 .target_residency = 80,
373 .enter = &intel_idle,
374 .enter_s2idle = intel_idle_s2idle, },
375 {
376 .name = "C3",
377 .desc = "MWAIT 0x10",
378 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
379 .exit_latency = 59,
380 .target_residency = 156,
381 .enter = &intel_idle,
382 .enter_s2idle = intel_idle_s2idle, },
383 {
384 .name = "C6",
385 .desc = "MWAIT 0x20",
386 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
387 .exit_latency = 82,
388 .target_residency = 300,
389 .enter = &intel_idle,
390 .enter_s2idle = intel_idle_s2idle, },
391 {
392 .enter = NULL }
393};
394
395static struct cpuidle_state ivt_cstates_4s[] = {
396 {
397 .name = "C1",
398 .desc = "MWAIT 0x00",
399 .flags = MWAIT2flg(0x00),
400 .exit_latency = 1,
401 .target_residency = 1,
402 .enter = &intel_idle,
403 .enter_s2idle = intel_idle_s2idle, },
404 {
405 .name = "C1E",
406 .desc = "MWAIT 0x01",
407 .flags = MWAIT2flg(0x01),
408 .exit_latency = 10,
409 .target_residency = 250,
410 .enter = &intel_idle,
411 .enter_s2idle = intel_idle_s2idle, },
412 {
413 .name = "C3",
414 .desc = "MWAIT 0x10",
415 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
416 .exit_latency = 59,
417 .target_residency = 300,
418 .enter = &intel_idle,
419 .enter_s2idle = intel_idle_s2idle, },
420 {
421 .name = "C6",
422 .desc = "MWAIT 0x20",
423 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
424 .exit_latency = 84,
425 .target_residency = 400,
426 .enter = &intel_idle,
427 .enter_s2idle = intel_idle_s2idle, },
428 {
429 .enter = NULL }
430};
431
432static struct cpuidle_state ivt_cstates_8s[] = {
433 {
434 .name = "C1",
435 .desc = "MWAIT 0x00",
436 .flags = MWAIT2flg(0x00),
437 .exit_latency = 1,
438 .target_residency = 1,
439 .enter = &intel_idle,
440 .enter_s2idle = intel_idle_s2idle, },
441 {
442 .name = "C1E",
443 .desc = "MWAIT 0x01",
444 .flags = MWAIT2flg(0x01),
445 .exit_latency = 10,
446 .target_residency = 500,
447 .enter = &intel_idle,
448 .enter_s2idle = intel_idle_s2idle, },
449 {
450 .name = "C3",
451 .desc = "MWAIT 0x10",
452 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
453 .exit_latency = 59,
454 .target_residency = 600,
455 .enter = &intel_idle,
456 .enter_s2idle = intel_idle_s2idle, },
457 {
458 .name = "C6",
459 .desc = "MWAIT 0x20",
460 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
461 .exit_latency = 88,
462 .target_residency = 700,
463 .enter = &intel_idle,
464 .enter_s2idle = intel_idle_s2idle, },
465 {
466 .enter = NULL }
467};
468
469static struct cpuidle_state hsw_cstates[] = {
470 {
471 .name = "C1",
472 .desc = "MWAIT 0x00",
473 .flags = MWAIT2flg(0x00),
474 .exit_latency = 2,
475 .target_residency = 2,
476 .enter = &intel_idle,
477 .enter_s2idle = intel_idle_s2idle, },
478 {
479 .name = "C1E",
480 .desc = "MWAIT 0x01",
481 .flags = MWAIT2flg(0x01),
482 .exit_latency = 10,
483 .target_residency = 20,
484 .enter = &intel_idle,
485 .enter_s2idle = intel_idle_s2idle, },
486 {
487 .name = "C3",
488 .desc = "MWAIT 0x10",
489 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
490 .exit_latency = 33,
491 .target_residency = 100,
492 .enter = &intel_idle,
493 .enter_s2idle = intel_idle_s2idle, },
494 {
495 .name = "C6",
496 .desc = "MWAIT 0x20",
497 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
498 .exit_latency = 133,
499 .target_residency = 400,
500 .enter = &intel_idle,
501 .enter_s2idle = intel_idle_s2idle, },
502 {
503 .name = "C7s",
504 .desc = "MWAIT 0x32",
505 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
506 .exit_latency = 166,
507 .target_residency = 500,
508 .enter = &intel_idle,
509 .enter_s2idle = intel_idle_s2idle, },
510 {
511 .name = "C8",
512 .desc = "MWAIT 0x40",
513 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
514 .exit_latency = 300,
515 .target_residency = 900,
516 .enter = &intel_idle,
517 .enter_s2idle = intel_idle_s2idle, },
518 {
519 .name = "C9",
520 .desc = "MWAIT 0x50",
521 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
522 .exit_latency = 600,
523 .target_residency = 1800,
524 .enter = &intel_idle,
525 .enter_s2idle = intel_idle_s2idle, },
526 {
527 .name = "C10",
528 .desc = "MWAIT 0x60",
529 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
530 .exit_latency = 2600,
531 .target_residency = 7700,
532 .enter = &intel_idle,
533 .enter_s2idle = intel_idle_s2idle, },
534 {
535 .enter = NULL }
536};
537static struct cpuidle_state bdw_cstates[] = {
538 {
539 .name = "C1",
540 .desc = "MWAIT 0x00",
541 .flags = MWAIT2flg(0x00),
542 .exit_latency = 2,
543 .target_residency = 2,
544 .enter = &intel_idle,
545 .enter_s2idle = intel_idle_s2idle, },
546 {
547 .name = "C1E",
548 .desc = "MWAIT 0x01",
549 .flags = MWAIT2flg(0x01),
550 .exit_latency = 10,
551 .target_residency = 20,
552 .enter = &intel_idle,
553 .enter_s2idle = intel_idle_s2idle, },
554 {
555 .name = "C3",
556 .desc = "MWAIT 0x10",
557 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
558 .exit_latency = 40,
559 .target_residency = 100,
560 .enter = &intel_idle,
561 .enter_s2idle = intel_idle_s2idle, },
562 {
563 .name = "C6",
564 .desc = "MWAIT 0x20",
565 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
566 .exit_latency = 133,
567 .target_residency = 400,
568 .enter = &intel_idle,
569 .enter_s2idle = intel_idle_s2idle, },
570 {
571 .name = "C7s",
572 .desc = "MWAIT 0x32",
573 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
574 .exit_latency = 166,
575 .target_residency = 500,
576 .enter = &intel_idle,
577 .enter_s2idle = intel_idle_s2idle, },
578 {
579 .name = "C8",
580 .desc = "MWAIT 0x40",
581 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
582 .exit_latency = 300,
583 .target_residency = 900,
584 .enter = &intel_idle,
585 .enter_s2idle = intel_idle_s2idle, },
586 {
587 .name = "C9",
588 .desc = "MWAIT 0x50",
589 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
590 .exit_latency = 600,
591 .target_residency = 1800,
592 .enter = &intel_idle,
593 .enter_s2idle = intel_idle_s2idle, },
594 {
595 .name = "C10",
596 .desc = "MWAIT 0x60",
597 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
598 .exit_latency = 2600,
599 .target_residency = 7700,
600 .enter = &intel_idle,
601 .enter_s2idle = intel_idle_s2idle, },
602 {
603 .enter = NULL }
604};
605
606static struct cpuidle_state skl_cstates[] = {
607 {
608 .name = "C1",
609 .desc = "MWAIT 0x00",
610 .flags = MWAIT2flg(0x00),
611 .exit_latency = 2,
612 .target_residency = 2,
613 .enter = &intel_idle,
614 .enter_s2idle = intel_idle_s2idle, },
615 {
616 .name = "C1E",
617 .desc = "MWAIT 0x01",
618 .flags = MWAIT2flg(0x01),
619 .exit_latency = 10,
620 .target_residency = 20,
621 .enter = &intel_idle,
622 .enter_s2idle = intel_idle_s2idle, },
623 {
624 .name = "C3",
625 .desc = "MWAIT 0x10",
626 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
627 .exit_latency = 70,
628 .target_residency = 100,
629 .enter = &intel_idle,
630 .enter_s2idle = intel_idle_s2idle, },
631 {
632 .name = "C6",
633 .desc = "MWAIT 0x20",
634 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
635 .exit_latency = 85,
636 .target_residency = 200,
637 .enter = &intel_idle,
638 .enter_s2idle = intel_idle_s2idle, },
639 {
640 .name = "C7s",
641 .desc = "MWAIT 0x33",
642 .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
643 .exit_latency = 124,
644 .target_residency = 800,
645 .enter = &intel_idle,
646 .enter_s2idle = intel_idle_s2idle, },
647 {
648 .name = "C8",
649 .desc = "MWAIT 0x40",
650 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
651 .exit_latency = 200,
652 .target_residency = 800,
653 .enter = &intel_idle,
654 .enter_s2idle = intel_idle_s2idle, },
655 {
656 .name = "C9",
657 .desc = "MWAIT 0x50",
658 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
659 .exit_latency = 480,
660 .target_residency = 5000,
661 .enter = &intel_idle,
662 .enter_s2idle = intel_idle_s2idle, },
663 {
664 .name = "C10",
665 .desc = "MWAIT 0x60",
666 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
667 .exit_latency = 890,
668 .target_residency = 5000,
669 .enter = &intel_idle,
670 .enter_s2idle = intel_idle_s2idle, },
671 {
672 .enter = NULL }
673};
674
675static struct cpuidle_state skx_cstates[] = {
676 {
677 .name = "C1",
678 .desc = "MWAIT 0x00",
679 .flags = MWAIT2flg(0x00),
680 .exit_latency = 2,
681 .target_residency = 2,
682 .enter = &intel_idle,
683 .enter_s2idle = intel_idle_s2idle, },
684 {
685 .name = "C1E",
686 .desc = "MWAIT 0x01",
687 .flags = MWAIT2flg(0x01),
688 .exit_latency = 10,
689 .target_residency = 20,
690 .enter = &intel_idle,
691 .enter_s2idle = intel_idle_s2idle, },
692 {
693 .name = "C6",
694 .desc = "MWAIT 0x20",
695 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
696 .exit_latency = 133,
697 .target_residency = 600,
698 .enter = &intel_idle,
699 .enter_s2idle = intel_idle_s2idle, },
700 {
701 .enter = NULL }
702};
703
704static struct cpuidle_state atom_cstates[] = {
705 {
706 .name = "C1E",
707 .desc = "MWAIT 0x00",
708 .flags = MWAIT2flg(0x00),
709 .exit_latency = 10,
710 .target_residency = 20,
711 .enter = &intel_idle,
712 .enter_s2idle = intel_idle_s2idle, },
713 {
714 .name = "C2",
715 .desc = "MWAIT 0x10",
716 .flags = MWAIT2flg(0x10),
717 .exit_latency = 20,
718 .target_residency = 80,
719 .enter = &intel_idle,
720 .enter_s2idle = intel_idle_s2idle, },
721 {
722 .name = "C4",
723 .desc = "MWAIT 0x30",
724 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
725 .exit_latency = 100,
726 .target_residency = 400,
727 .enter = &intel_idle,
728 .enter_s2idle = intel_idle_s2idle, },
729 {
730 .name = "C6",
731 .desc = "MWAIT 0x52",
732 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
733 .exit_latency = 140,
734 .target_residency = 560,
735 .enter = &intel_idle,
736 .enter_s2idle = intel_idle_s2idle, },
737 {
738 .enter = NULL }
739};
740static struct cpuidle_state tangier_cstates[] = {
741 {
742 .name = "C1",
743 .desc = "MWAIT 0x00",
744 .flags = MWAIT2flg(0x00),
745 .exit_latency = 1,
746 .target_residency = 4,
747 .enter = &intel_idle,
748 .enter_s2idle = intel_idle_s2idle, },
749 {
750 .name = "C4",
751 .desc = "MWAIT 0x30",
752 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
753 .exit_latency = 100,
754 .target_residency = 400,
755 .enter = &intel_idle,
756 .enter_s2idle = intel_idle_s2idle, },
757 {
758 .name = "C6",
759 .desc = "MWAIT 0x52",
760 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
761 .exit_latency = 140,
762 .target_residency = 560,
763 .enter = &intel_idle,
764 .enter_s2idle = intel_idle_s2idle, },
765 {
766 .name = "C7",
767 .desc = "MWAIT 0x60",
768 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
769 .exit_latency = 1200,
770 .target_residency = 4000,
771 .enter = &intel_idle,
772 .enter_s2idle = intel_idle_s2idle, },
773 {
774 .name = "C9",
775 .desc = "MWAIT 0x64",
776 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
777 .exit_latency = 10000,
778 .target_residency = 20000,
779 .enter = &intel_idle,
780 .enter_s2idle = intel_idle_s2idle, },
781 {
782 .enter = NULL }
783};
784static struct cpuidle_state avn_cstates[] = {
785 {
786 .name = "C1",
787 .desc = "MWAIT 0x00",
788 .flags = MWAIT2flg(0x00),
789 .exit_latency = 2,
790 .target_residency = 2,
791 .enter = &intel_idle,
792 .enter_s2idle = intel_idle_s2idle, },
793 {
794 .name = "C6",
795 .desc = "MWAIT 0x51",
796 .flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED,
797 .exit_latency = 15,
798 .target_residency = 45,
799 .enter = &intel_idle,
800 .enter_s2idle = intel_idle_s2idle, },
801 {
802 .enter = NULL }
803};
804static struct cpuidle_state knl_cstates[] = {
805 {
806 .name = "C1",
807 .desc = "MWAIT 0x00",
808 .flags = MWAIT2flg(0x00),
809 .exit_latency = 1,
810 .target_residency = 2,
811 .enter = &intel_idle,
812 .enter_s2idle = intel_idle_s2idle },
813 {
814 .name = "C6",
815 .desc = "MWAIT 0x10",
816 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
817 .exit_latency = 120,
818 .target_residency = 500,
819 .enter = &intel_idle,
820 .enter_s2idle = intel_idle_s2idle },
821 {
822 .enter = NULL }
823};
824
825static struct cpuidle_state bxt_cstates[] = {
826 {
827 .name = "C1",
828 .desc = "MWAIT 0x00",
829 .flags = MWAIT2flg(0x00),
830 .exit_latency = 2,
831 .target_residency = 2,
832 .enter = &intel_idle,
833 .enter_s2idle = intel_idle_s2idle, },
834 {
835 .name = "C1E",
836 .desc = "MWAIT 0x01",
837 .flags = MWAIT2flg(0x01),
838 .exit_latency = 10,
839 .target_residency = 20,
840 .enter = &intel_idle,
841 .enter_s2idle = intel_idle_s2idle, },
842 {
843 .name = "C6",
844 .desc = "MWAIT 0x20",
845 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
846 .exit_latency = 133,
847 .target_residency = 133,
848 .enter = &intel_idle,
849 .enter_s2idle = intel_idle_s2idle, },
850 {
851 .name = "C7s",
852 .desc = "MWAIT 0x31",
853 .flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED,
854 .exit_latency = 155,
855 .target_residency = 155,
856 .enter = &intel_idle,
857 .enter_s2idle = intel_idle_s2idle, },
858 {
859 .name = "C8",
860 .desc = "MWAIT 0x40",
861 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
862 .exit_latency = 1000,
863 .target_residency = 1000,
864 .enter = &intel_idle,
865 .enter_s2idle = intel_idle_s2idle, },
866 {
867 .name = "C9",
868 .desc = "MWAIT 0x50",
869 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
870 .exit_latency = 2000,
871 .target_residency = 2000,
872 .enter = &intel_idle,
873 .enter_s2idle = intel_idle_s2idle, },
874 {
875 .name = "C10",
876 .desc = "MWAIT 0x60",
877 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
878 .exit_latency = 10000,
879 .target_residency = 10000,
880 .enter = &intel_idle,
881 .enter_s2idle = intel_idle_s2idle, },
882 {
883 .enter = NULL }
884};
885
886static struct cpuidle_state dnv_cstates[] = {
887 {
888 .name = "C1",
889 .desc = "MWAIT 0x00",
890 .flags = MWAIT2flg(0x00),
891 .exit_latency = 2,
892 .target_residency = 2,
893 .enter = &intel_idle,
894 .enter_s2idle = intel_idle_s2idle, },
895 {
896 .name = "C1E",
897 .desc = "MWAIT 0x01",
898 .flags = MWAIT2flg(0x01),
899 .exit_latency = 10,
900 .target_residency = 20,
901 .enter = &intel_idle,
902 .enter_s2idle = intel_idle_s2idle, },
903 {
904 .name = "C6",
905 .desc = "MWAIT 0x20",
906 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
907 .exit_latency = 50,
908 .target_residency = 500,
909 .enter = &intel_idle,
910 .enter_s2idle = intel_idle_s2idle, },
911 {
912 .enter = NULL }
913};
914
915/**
916 * intel_idle
917 * @dev: cpuidle_device
918 * @drv: cpuidle driver
919 * @index: index of cpuidle state
920 *
921 * Must be called under local_irq_disable().
922 */
923static __cpuidle int intel_idle(struct cpuidle_device *dev,
924 struct cpuidle_driver *drv, int index)
925{
926 unsigned long ecx = 1; /* break on interrupt flag */
927 struct cpuidle_state *state = &drv->states[index];
928 unsigned long eax = flg2MWAIT(state->flags);
929 unsigned int cstate;
930 bool uninitialized_var(tick);
931 int cpu = smp_processor_id();
932
933 /*
934 * leave_mm() to avoid costly and often unnecessary wakeups
935 * for flushing the user TLB's associated with the active mm.
936 */
937 if (state->flags & CPUIDLE_FLAG_TLB_FLUSHED)
938 leave_mm(cpu);
939
940 if (!static_cpu_has(X86_FEATURE_ARAT)) {
941 cstate = (((eax) >> MWAIT_SUBSTATE_SIZE) &
942 MWAIT_CSTATE_MASK) + 1;
943 tick = false;
944 if (!(lapic_timer_reliable_states & (1 << (cstate)))) {
945 tick = true;
946 tick_broadcast_enter();
947 }
948 }
949
950 mwait_idle_with_hints(eax, ecx);
951
952 if (!static_cpu_has(X86_FEATURE_ARAT) && tick)
953 tick_broadcast_exit();
954
955 return index;
956}
957
958/**
959 * intel_idle_s2idle - simplified "enter" callback routine for suspend-to-idle
960 * @dev: cpuidle_device
961 * @drv: cpuidle driver
962 * @index: state index
963 */
964static int intel_idle_s2idle(struct cpuidle_device *dev,
965 struct cpuidle_driver *drv, int index)
966{
967 unsigned long ecx = 1; /* break on interrupt flag */
968 unsigned long eax = flg2MWAIT(drv->states[index].flags);
969
970 mwait_idle_with_hints(eax, ecx);
971
972 return 0;
973}
974
975static void __setup_broadcast_timer(bool on)
976{
977 if (on)
978 tick_broadcast_enable();
979 else
980 tick_broadcast_disable();
981}
982
983static void auto_demotion_disable(void)
984{
985 unsigned long long msr_bits;
986
987 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
988 msr_bits &= ~(icpu->auto_demotion_disable_flags);
989 wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
990}
991static void c1e_promotion_disable(void)
992{
993 unsigned long long msr_bits;
994
995 rdmsrl(MSR_IA32_POWER_CTL, msr_bits);
996 msr_bits &= ~0x2;
997 wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
998}
999
1000static const struct idle_cpu idle_cpu_nehalem = {
1001 .state_table = nehalem_cstates,
1002 .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
1003 .disable_promotion_to_c1e = true,
1004};
1005
1006static const struct idle_cpu idle_cpu_atom = {
1007 .state_table = atom_cstates,
1008};
1009
1010static const struct idle_cpu idle_cpu_tangier = {
1011 .state_table = tangier_cstates,
1012};
1013
1014static const struct idle_cpu idle_cpu_lincroft = {
1015 .state_table = atom_cstates,
1016 .auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE,
1017};
1018
1019static const struct idle_cpu idle_cpu_snb = {
1020 .state_table = snb_cstates,
1021 .disable_promotion_to_c1e = true,
1022};
1023
1024static const struct idle_cpu idle_cpu_byt = {
1025 .state_table = byt_cstates,
1026 .disable_promotion_to_c1e = true,
1027 .byt_auto_demotion_disable_flag = true,
1028};
1029
1030static const struct idle_cpu idle_cpu_cht = {
1031 .state_table = cht_cstates,
1032 .disable_promotion_to_c1e = true,
1033 .byt_auto_demotion_disable_flag = true,
1034};
1035
1036static const struct idle_cpu idle_cpu_ivb = {
1037 .state_table = ivb_cstates,
1038 .disable_promotion_to_c1e = true,
1039};
1040
1041static const struct idle_cpu idle_cpu_ivt = {
1042 .state_table = ivt_cstates,
1043 .disable_promotion_to_c1e = true,
1044};
1045
1046static const struct idle_cpu idle_cpu_hsw = {
1047 .state_table = hsw_cstates,
1048 .disable_promotion_to_c1e = true,
1049};
1050
1051static const struct idle_cpu idle_cpu_bdw = {
1052 .state_table = bdw_cstates,
1053 .disable_promotion_to_c1e = true,
1054};
1055
1056static const struct idle_cpu idle_cpu_skl = {
1057 .state_table = skl_cstates,
1058 .disable_promotion_to_c1e = true,
1059};
1060
1061static const struct idle_cpu idle_cpu_skx = {
1062 .state_table = skx_cstates,
1063 .disable_promotion_to_c1e = true,
1064};
1065
1066static const struct idle_cpu idle_cpu_avn = {
1067 .state_table = avn_cstates,
1068 .disable_promotion_to_c1e = true,
1069};
1070
1071static const struct idle_cpu idle_cpu_knl = {
1072 .state_table = knl_cstates,
1073};
1074
1075static const struct idle_cpu idle_cpu_bxt = {
1076 .state_table = bxt_cstates,
1077 .disable_promotion_to_c1e = true,
1078};
1079
1080static const struct idle_cpu idle_cpu_dnv = {
1081 .state_table = dnv_cstates,
1082 .disable_promotion_to_c1e = true,
1083};
1084
1085static const struct x86_cpu_id intel_idle_ids[] __initconst = {
1086 INTEL_CPU_FAM6(NEHALEM_EP, idle_cpu_nehalem),
1087 INTEL_CPU_FAM6(NEHALEM, idle_cpu_nehalem),
1088 INTEL_CPU_FAM6(NEHALEM_G, idle_cpu_nehalem),
1089 INTEL_CPU_FAM6(WESTMERE, idle_cpu_nehalem),
1090 INTEL_CPU_FAM6(WESTMERE_EP, idle_cpu_nehalem),
1091 INTEL_CPU_FAM6(NEHALEM_EX, idle_cpu_nehalem),
1092 INTEL_CPU_FAM6(ATOM_BONNELL, idle_cpu_atom),
1093 INTEL_CPU_FAM6(ATOM_BONNELL_MID, idle_cpu_lincroft),
1094 INTEL_CPU_FAM6(WESTMERE_EX, idle_cpu_nehalem),
1095 INTEL_CPU_FAM6(SANDYBRIDGE, idle_cpu_snb),
1096 INTEL_CPU_FAM6(SANDYBRIDGE_X, idle_cpu_snb),
1097 INTEL_CPU_FAM6(ATOM_SALTWELL, idle_cpu_atom),
1098 INTEL_CPU_FAM6(ATOM_SILVERMONT, idle_cpu_byt),
1099 INTEL_CPU_FAM6(ATOM_SILVERMONT_MID, idle_cpu_tangier),
1100 INTEL_CPU_FAM6(ATOM_AIRMONT, idle_cpu_cht),
1101 INTEL_CPU_FAM6(IVYBRIDGE, idle_cpu_ivb),
1102 INTEL_CPU_FAM6(IVYBRIDGE_X, idle_cpu_ivt),
1103 INTEL_CPU_FAM6(HASWELL, idle_cpu_hsw),
1104 INTEL_CPU_FAM6(HASWELL_X, idle_cpu_hsw),
1105 INTEL_CPU_FAM6(HASWELL_L, idle_cpu_hsw),
1106 INTEL_CPU_FAM6(HASWELL_G, idle_cpu_hsw),
1107 INTEL_CPU_FAM6(ATOM_SILVERMONT_D, idle_cpu_avn),
1108 INTEL_CPU_FAM6(BROADWELL, idle_cpu_bdw),
1109 INTEL_CPU_FAM6(BROADWELL_G, idle_cpu_bdw),
1110 INTEL_CPU_FAM6(BROADWELL_X, idle_cpu_bdw),
1111 INTEL_CPU_FAM6(BROADWELL_D, idle_cpu_bdw),
1112 INTEL_CPU_FAM6(SKYLAKE_L, idle_cpu_skl),
1113 INTEL_CPU_FAM6(SKYLAKE, idle_cpu_skl),
1114 INTEL_CPU_FAM6(KABYLAKE_L, idle_cpu_skl),
1115 INTEL_CPU_FAM6(KABYLAKE, idle_cpu_skl),
1116 INTEL_CPU_FAM6(SKYLAKE_X, idle_cpu_skx),
1117 INTEL_CPU_FAM6(XEON_PHI_KNL, idle_cpu_knl),
1118 INTEL_CPU_FAM6(XEON_PHI_KNM, idle_cpu_knl),
1119 INTEL_CPU_FAM6(ATOM_GOLDMONT, idle_cpu_bxt),
1120 INTEL_CPU_FAM6(ATOM_GOLDMONT_PLUS, idle_cpu_bxt),
1121 INTEL_CPU_FAM6(ATOM_GOLDMONT_D, idle_cpu_dnv),
1122 INTEL_CPU_FAM6(ATOM_TREMONT_D, idle_cpu_dnv),
1123 {}
1124};
1125
1126/*
1127 * intel_idle_probe()
1128 */
1129static int __init intel_idle_probe(void)
1130{
1131 unsigned int eax, ebx, ecx;
1132 const struct x86_cpu_id *id;
1133
1134 if (max_cstate == 0) {
1135 pr_debug("disabled\n");
1136 return -EPERM;
1137 }
1138
1139 id = x86_match_cpu(intel_idle_ids);
1140 if (!id) {
1141 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
1142 boot_cpu_data.x86 == 6)
1143 pr_debug("does not run on family %d model %d\n",
1144 boot_cpu_data.x86, boot_cpu_data.x86_model);
1145 return -ENODEV;
1146 }
1147
1148 if (!boot_cpu_has(X86_FEATURE_MWAIT)) {
1149 pr_debug("Please enable MWAIT in BIOS SETUP\n");
1150 return -ENODEV;
1151 }
1152
1153 if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
1154 return -ENODEV;
1155
1156 cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates);
1157
1158 if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
1159 !(ecx & CPUID5_ECX_INTERRUPT_BREAK) ||
1160 !mwait_substates)
1161 return -ENODEV;
1162
1163 pr_debug("MWAIT substates: 0x%x\n", mwait_substates);
1164
1165 icpu = (const struct idle_cpu *)id->driver_data;
1166 cpuidle_state_table = icpu->state_table;
1167
1168 pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n",
1169 boot_cpu_data.x86_model);
1170
1171 return 0;
1172}
1173
1174/*
1175 * intel_idle_cpuidle_devices_uninit()
1176 * Unregisters the cpuidle devices.
1177 */
1178static void intel_idle_cpuidle_devices_uninit(void)
1179{
1180 int i;
1181 struct cpuidle_device *dev;
1182
1183 for_each_online_cpu(i) {
1184 dev = per_cpu_ptr(intel_idle_cpuidle_devices, i);
1185 cpuidle_unregister_device(dev);
1186 }
1187}
1188
1189/*
1190 * ivt_idle_state_table_update(void)
1191 *
1192 * Tune IVT multi-socket targets
1193 * Assumption: num_sockets == (max_package_num + 1)
1194 */
1195static void ivt_idle_state_table_update(void)
1196{
1197 /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */
1198 int cpu, package_num, num_sockets = 1;
1199
1200 for_each_online_cpu(cpu) {
1201 package_num = topology_physical_package_id(cpu);
1202 if (package_num + 1 > num_sockets) {
1203 num_sockets = package_num + 1;
1204
1205 if (num_sockets > 4) {
1206 cpuidle_state_table = ivt_cstates_8s;
1207 return;
1208 }
1209 }
1210 }
1211
1212 if (num_sockets > 2)
1213 cpuidle_state_table = ivt_cstates_4s;
1214
1215 /* else, 1 and 2 socket systems use default ivt_cstates */
1216}
1217
1218/*
1219 * Translate IRTL (Interrupt Response Time Limit) MSR to usec
1220 */
1221
1222static unsigned int irtl_ns_units[] = {
1223 1, 32, 1024, 32768, 1048576, 33554432, 0, 0 };
1224
1225static unsigned long long irtl_2_usec(unsigned long long irtl)
1226{
1227 unsigned long long ns;
1228
1229 if (!irtl)
1230 return 0;
1231
1232 ns = irtl_ns_units[(irtl >> 10) & 0x7];
1233
1234 return div64_u64((irtl & 0x3FF) * ns, 1000);
1235}
1236/*
1237 * bxt_idle_state_table_update(void)
1238 *
1239 * On BXT, we trust the IRTL to show the definitive maximum latency
1240 * We use the same value for target_residency.
1241 */
1242static void bxt_idle_state_table_update(void)
1243{
1244 unsigned long long msr;
1245 unsigned int usec;
1246
1247 rdmsrl(MSR_PKGC6_IRTL, msr);
1248 usec = irtl_2_usec(msr);
1249 if (usec) {
1250 bxt_cstates[2].exit_latency = usec;
1251 bxt_cstates[2].target_residency = usec;
1252 }
1253
1254 rdmsrl(MSR_PKGC7_IRTL, msr);
1255 usec = irtl_2_usec(msr);
1256 if (usec) {
1257 bxt_cstates[3].exit_latency = usec;
1258 bxt_cstates[3].target_residency = usec;
1259 }
1260
1261 rdmsrl(MSR_PKGC8_IRTL, msr);
1262 usec = irtl_2_usec(msr);
1263 if (usec) {
1264 bxt_cstates[4].exit_latency = usec;
1265 bxt_cstates[4].target_residency = usec;
1266 }
1267
1268 rdmsrl(MSR_PKGC9_IRTL, msr);
1269 usec = irtl_2_usec(msr);
1270 if (usec) {
1271 bxt_cstates[5].exit_latency = usec;
1272 bxt_cstates[5].target_residency = usec;
1273 }
1274
1275 rdmsrl(MSR_PKGC10_IRTL, msr);
1276 usec = irtl_2_usec(msr);
1277 if (usec) {
1278 bxt_cstates[6].exit_latency = usec;
1279 bxt_cstates[6].target_residency = usec;
1280 }
1281
1282}
1283/*
1284 * sklh_idle_state_table_update(void)
1285 *
1286 * On SKL-H (model 0x5e) disable C8 and C9 if:
1287 * C10 is enabled and SGX disabled
1288 */
1289static void sklh_idle_state_table_update(void)
1290{
1291 unsigned long long msr;
1292 unsigned int eax, ebx, ecx, edx;
1293
1294
1295 /* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */
1296 if (max_cstate <= 7)
1297 return;
1298
1299 /* if PC10 not present in CPUID.MWAIT.EDX */
1300 if ((mwait_substates & (0xF << 28)) == 0)
1301 return;
1302
1303 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
1304
1305 /* PC10 is not enabled in PKG C-state limit */
1306 if ((msr & 0xF) != 8)
1307 return;
1308
1309 ecx = 0;
1310 cpuid(7, &eax, &ebx, &ecx, &edx);
1311
1312 /* if SGX is present */
1313 if (ebx & (1 << 2)) {
1314
1315 rdmsrl(MSR_IA32_FEATURE_CONTROL, msr);
1316
1317 /* if SGX is enabled */
1318 if (msr & (1 << 18))
1319 return;
1320 }
1321
1322 skl_cstates[5].disabled = 1; /* C8-SKL */
1323 skl_cstates[6].disabled = 1; /* C9-SKL */
1324}
1325/*
1326 * intel_idle_state_table_update()
1327 *
1328 * Update the default state_table for this CPU-id
1329 */
1330
1331static void intel_idle_state_table_update(void)
1332{
1333 switch (boot_cpu_data.x86_model) {
1334
1335 case INTEL_FAM6_IVYBRIDGE_X:
1336 ivt_idle_state_table_update();
1337 break;
1338 case INTEL_FAM6_ATOM_GOLDMONT:
1339 case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
1340 bxt_idle_state_table_update();
1341 break;
1342 case INTEL_FAM6_SKYLAKE:
1343 sklh_idle_state_table_update();
1344 break;
1345 }
1346}
1347
1348/*
1349 * intel_idle_cpuidle_driver_init()
1350 * allocate, initialize cpuidle_states
1351 */
1352static void __init intel_idle_cpuidle_driver_init(void)
1353{
1354 int cstate;
1355 struct cpuidle_driver *drv = &intel_idle_driver;
1356
1357 intel_idle_state_table_update();
1358
1359 cpuidle_poll_state_init(drv);
1360 drv->state_count = 1;
1361
1362 for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
1363 int num_substates, mwait_hint, mwait_cstate;
1364
1365 if ((cpuidle_state_table[cstate].enter == NULL) &&
1366 (cpuidle_state_table[cstate].enter_s2idle == NULL))
1367 break;
1368
1369 if (cstate + 1 > max_cstate) {
1370 pr_info("max_cstate %d reached\n", max_cstate);
1371 break;
1372 }
1373
1374 mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags);
1375 mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint);
1376
1377 /* number of sub-states for this state in CPUID.MWAIT */
1378 num_substates = (mwait_substates >> ((mwait_cstate + 1) * 4))
1379 & MWAIT_SUBSTATE_MASK;
1380
1381 /* if NO sub-states for this state in CPUID, skip it */
1382 if (num_substates == 0)
1383 continue;
1384
1385 /* if state marked as disabled, skip it */
1386 if (cpuidle_state_table[cstate].disabled != 0) {
1387 pr_debug("state %s is disabled\n",
1388 cpuidle_state_table[cstate].name);
1389 continue;
1390 }
1391
1392
1393 if (((mwait_cstate + 1) > 2) &&
1394 !boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
1395 mark_tsc_unstable("TSC halts in idle"
1396 " states deeper than C2");
1397
1398 drv->states[drv->state_count] = /* structure copy */
1399 cpuidle_state_table[cstate];
1400
1401 if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) &&
1402 cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IBRS) {
1403 drv->states[drv->state_count].enter = intel_idle_ibrs;
1404 }
1405
1406 drv->state_count += 1;
1407 }
1408
1409 if (icpu->byt_auto_demotion_disable_flag) {
1410 wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0);
1411 wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0);
1412 }
1413}
1414
1415
1416/*
1417 * intel_idle_cpu_init()
1418 * allocate, initialize, register cpuidle_devices
1419 * @cpu: cpu/core to initialize
1420 */
1421static int intel_idle_cpu_init(unsigned int cpu)
1422{
1423 struct cpuidle_device *dev;
1424
1425 dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
1426 dev->cpu = cpu;
1427
1428 if (cpuidle_register_device(dev)) {
1429 pr_debug("cpuidle_register_device %d failed!\n", cpu);
1430 return -EIO;
1431 }
1432
1433 if (icpu->auto_demotion_disable_flags)
1434 auto_demotion_disable();
1435
1436 if (icpu->disable_promotion_to_c1e)
1437 c1e_promotion_disable();
1438
1439 return 0;
1440}
1441
1442static int intel_idle_cpu_online(unsigned int cpu)
1443{
1444 struct cpuidle_device *dev;
1445
1446 if (lapic_timer_reliable_states != LAPIC_TIMER_ALWAYS_RELIABLE)
1447 __setup_broadcast_timer(true);
1448
1449 /*
1450 * Some systems can hotplug a cpu at runtime after
1451 * the kernel has booted, we have to initialize the
1452 * driver in this case
1453 */
1454 dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
1455 if (!dev->registered)
1456 return intel_idle_cpu_init(cpu);
1457
1458 return 0;
1459}
1460
1461static int __init intel_idle_init(void)
1462{
1463 int retval;
1464
1465 /* Do not load intel_idle at all for now if idle= is passed */
1466 if (boot_option_idle_override != IDLE_NO_OVERRIDE)
1467 return -ENODEV;
1468
1469 retval = intel_idle_probe();
1470 if (retval)
1471 return retval;
1472
1473 intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device);
1474 if (intel_idle_cpuidle_devices == NULL)
1475 return -ENOMEM;
1476
1477 intel_idle_cpuidle_driver_init();
1478 retval = cpuidle_register_driver(&intel_idle_driver);
1479 if (retval) {
1480 struct cpuidle_driver *drv = cpuidle_get_driver();
1481 printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"),
1482 drv ? drv->name : "none");
1483 goto init_driver_fail;
1484 }
1485
1486 if (boot_cpu_has(X86_FEATURE_ARAT)) /* Always Reliable APIC Timer */
1487 lapic_timer_reliable_states = LAPIC_TIMER_ALWAYS_RELIABLE;
1488
1489 retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online",
1490 intel_idle_cpu_online, NULL);
1491 if (retval < 0)
1492 goto hp_setup_fail;
1493
1494 pr_debug("lapic_timer_reliable_states 0x%x\n",
1495 lapic_timer_reliable_states);
1496
1497 return 0;
1498
1499hp_setup_fail:
1500 intel_idle_cpuidle_devices_uninit();
1501 cpuidle_unregister_driver(&intel_idle_driver);
1502init_driver_fail:
1503 free_percpu(intel_idle_cpuidle_devices);
1504 return retval;
1505
1506}
1507device_initcall(intel_idle_init);
1508
1509/*
1510 * We are not really modular, but we used to support that. Meaning we also
1511 * support "intel_idle.max_cstate=..." at boot and also a read-only export of
1512 * it at /sys/module/intel_idle/parameters/max_cstate -- so using module_param
1513 * is the easiest way (currently) to continue doing that.
1514 */
1515module_param(max_cstate, int, 0444);