blob: d24f637a6a1eda0a5b87de8e0b175c1ce25443cf [file] [log] [blame]
b.liue9582032025-04-17 19:18:16 +08001// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * intel_powerclamp.c - package c-state idle injection
4 *
5 * Copyright (c) 2012, Intel Corporation.
6 *
7 * Authors:
8 * Arjan van de Ven <arjan@linux.intel.com>
9 * Jacob Pan <jacob.jun.pan@linux.intel.com>
10 *
11 * TODO:
12 * 1. better handle wakeup from external interrupts, currently a fixed
13 * compensation is added to clamping duration when excessive amount
14 * of wakeups are observed during idle time. the reason is that in
15 * case of external interrupts without need for ack, clamping down
16 * cpu in non-irq context does not reduce irq. for majority of the
17 * cases, clamping down cpu does help reduce irq as well, we should
18 * be able to differentiate the two cases and give a quantitative
19 * solution for the irqs that we can control. perhaps based on
20 * get_cpu_iowait_time_us()
21 *
22 * 2. synchronization with other hw blocks
23 */
24
25#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
26
27#include <linux/module.h>
28#include <linux/kernel.h>
29#include <linux/delay.h>
30#include <linux/kthread.h>
31#include <linux/cpu.h>
32#include <linux/thermal.h>
33#include <linux/slab.h>
34#include <linux/tick.h>
35#include <linux/debugfs.h>
36#include <linux/seq_file.h>
37#include <linux/sched/rt.h>
38#include <uapi/linux/sched/types.h>
39
40#include <asm/nmi.h>
41#include <asm/msr.h>
42#include <asm/mwait.h>
43#include <asm/cpu_device_id.h>
44#include <asm/hardirq.h>
45
46#define MAX_TARGET_RATIO (50U)
47/* For each undisturbed clamping period (no extra wake ups during idle time),
48 * we increment the confidence counter for the given target ratio.
49 * CONFIDENCE_OK defines the level where runtime calibration results are
50 * valid.
51 */
52#define CONFIDENCE_OK (3)
53/* Default idle injection duration, driver adjust sleep time to meet target
54 * idle ratio. Similar to frequency modulation.
55 */
56#define DEFAULT_DURATION_JIFFIES (6)
57
58static unsigned int target_mwait;
59static struct dentry *debug_dir;
60static bool poll_pkg_cstate_enable;
61
62/* user selected target */
63static unsigned int set_target_ratio;
64static unsigned int current_ratio;
65static bool should_skip;
66static bool reduce_irq;
67static atomic_t idle_wakeup_counter;
68static unsigned int control_cpu; /* The cpu assigned to collect stat and update
69 * control parameters. default to BSP but BSP
70 * can be offlined.
71 */
72static bool clamping;
73
74static const struct sched_param sparam = {
75 .sched_priority = MAX_USER_RT_PRIO / 2,
76};
77struct powerclamp_worker_data {
78 struct kthread_worker *worker;
79 struct kthread_work balancing_work;
80 struct kthread_delayed_work idle_injection_work;
81 unsigned int cpu;
82 unsigned int count;
83 unsigned int guard;
84 unsigned int window_size_now;
85 unsigned int target_ratio;
86 unsigned int duration_jiffies;
87 bool clamping;
88};
89
90static struct powerclamp_worker_data __percpu *worker_data;
91static struct thermal_cooling_device *cooling_dev;
92static unsigned long *cpu_clamping_mask; /* bit map for tracking per cpu
93 * clamping kthread worker
94 */
95
96static unsigned int duration;
97static unsigned int pkg_cstate_ratio_cur;
98static unsigned int window_size;
99
100static int duration_set(const char *arg, const struct kernel_param *kp)
101{
102 int ret = 0;
103 unsigned long new_duration;
104
105 ret = kstrtoul(arg, 10, &new_duration);
106 if (ret)
107 goto exit;
108 if (new_duration > 25 || new_duration < 6) {
109 pr_err("Out of recommended range %lu, between 6-25ms\n",
110 new_duration);
111 ret = -EINVAL;
112 }
113
114 duration = clamp(new_duration, 6ul, 25ul);
115 smp_mb();
116
117exit:
118
119 return ret;
120}
121
122static const struct kernel_param_ops duration_ops = {
123 .set = duration_set,
124 .get = param_get_int,
125};
126
127
128module_param_cb(duration, &duration_ops, &duration, 0644);
129MODULE_PARM_DESC(duration, "forced idle time for each attempt in msec.");
130
131struct powerclamp_calibration_data {
132 unsigned long confidence; /* used for calibration, basically a counter
133 * gets incremented each time a clamping
134 * period is completed without extra wakeups
135 * once that counter is reached given level,
136 * compensation is deemed usable.
137 */
138 unsigned long steady_comp; /* steady state compensation used when
139 * no extra wakeups occurred.
140 */
141 unsigned long dynamic_comp; /* compensate excessive wakeup from idle
142 * mostly from external interrupts.
143 */
144};
145
146static struct powerclamp_calibration_data cal_data[MAX_TARGET_RATIO];
147
148static int window_size_set(const char *arg, const struct kernel_param *kp)
149{
150 int ret = 0;
151 unsigned long new_window_size;
152
153 ret = kstrtoul(arg, 10, &new_window_size);
154 if (ret)
155 goto exit_win;
156 if (new_window_size > 10 || new_window_size < 2) {
157 pr_err("Out of recommended window size %lu, between 2-10\n",
158 new_window_size);
159 ret = -EINVAL;
160 }
161
162 window_size = clamp(new_window_size, 2ul, 10ul);
163 smp_mb();
164
165exit_win:
166
167 return ret;
168}
169
170static const struct kernel_param_ops window_size_ops = {
171 .set = window_size_set,
172 .get = param_get_int,
173};
174
175module_param_cb(window_size, &window_size_ops, &window_size, 0644);
176MODULE_PARM_DESC(window_size, "sliding window in number of clamping cycles\n"
177 "\tpowerclamp controls idle ratio within this window. larger\n"
178 "\twindow size results in slower response time but more smooth\n"
179 "\tclamping results. default to 2.");
180
181static void find_target_mwait(void)
182{
183 unsigned int eax, ebx, ecx, edx;
184 unsigned int highest_cstate = 0;
185 unsigned int highest_subcstate = 0;
186 int i;
187
188 if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
189 return;
190
191 cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx);
192
193 if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
194 !(ecx & CPUID5_ECX_INTERRUPT_BREAK))
195 return;
196
197 edx >>= MWAIT_SUBSTATE_SIZE;
198 for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) {
199 if (edx & MWAIT_SUBSTATE_MASK) {
200 highest_cstate = i;
201 highest_subcstate = edx & MWAIT_SUBSTATE_MASK;
202 }
203 }
204 target_mwait = (highest_cstate << MWAIT_SUBSTATE_SIZE) |
205 (highest_subcstate - 1);
206
207}
208
209struct pkg_cstate_info {
210 bool skip;
211 int msr_index;
212 int cstate_id;
213};
214
215#define PKG_CSTATE_INIT(id) { \
216 .msr_index = MSR_PKG_C##id##_RESIDENCY, \
217 .cstate_id = id \
218 }
219
220static struct pkg_cstate_info pkg_cstates[] = {
221 PKG_CSTATE_INIT(2),
222 PKG_CSTATE_INIT(3),
223 PKG_CSTATE_INIT(6),
224 PKG_CSTATE_INIT(7),
225 PKG_CSTATE_INIT(8),
226 PKG_CSTATE_INIT(9),
227 PKG_CSTATE_INIT(10),
228 {NULL},
229};
230
231static bool has_pkg_state_counter(void)
232{
233 u64 val;
234 struct pkg_cstate_info *info = pkg_cstates;
235
236 /* check if any one of the counter msrs exists */
237 while (info->msr_index) {
238 if (!rdmsrl_safe(info->msr_index, &val))
239 return true;
240 info++;
241 }
242
243 return false;
244}
245
246static u64 pkg_state_counter(void)
247{
248 u64 val;
249 u64 count = 0;
250 struct pkg_cstate_info *info = pkg_cstates;
251
252 while (info->msr_index) {
253 if (!info->skip) {
254 if (!rdmsrl_safe(info->msr_index, &val))
255 count += val;
256 else
257 info->skip = true;
258 }
259 info++;
260 }
261
262 return count;
263}
264
265static unsigned int get_compensation(int ratio)
266{
267 unsigned int comp = 0;
268
269 if (!poll_pkg_cstate_enable)
270 return 0;
271
272 /* we only use compensation if all adjacent ones are good */
273 if (ratio == 1 &&
274 cal_data[ratio].confidence >= CONFIDENCE_OK &&
275 cal_data[ratio + 1].confidence >= CONFIDENCE_OK &&
276 cal_data[ratio + 2].confidence >= CONFIDENCE_OK) {
277 comp = (cal_data[ratio].steady_comp +
278 cal_data[ratio + 1].steady_comp +
279 cal_data[ratio + 2].steady_comp) / 3;
280 } else if (ratio == MAX_TARGET_RATIO - 1 &&
281 cal_data[ratio].confidence >= CONFIDENCE_OK &&
282 cal_data[ratio - 1].confidence >= CONFIDENCE_OK &&
283 cal_data[ratio - 2].confidence >= CONFIDENCE_OK) {
284 comp = (cal_data[ratio].steady_comp +
285 cal_data[ratio - 1].steady_comp +
286 cal_data[ratio - 2].steady_comp) / 3;
287 } else if (cal_data[ratio].confidence >= CONFIDENCE_OK &&
288 cal_data[ratio - 1].confidence >= CONFIDENCE_OK &&
289 cal_data[ratio + 1].confidence >= CONFIDENCE_OK) {
290 comp = (cal_data[ratio].steady_comp +
291 cal_data[ratio - 1].steady_comp +
292 cal_data[ratio + 1].steady_comp) / 3;
293 }
294
295 /* REVISIT: simple penalty of double idle injection */
296 if (reduce_irq)
297 comp = ratio;
298 /* do not exceed limit */
299 if (comp + ratio >= MAX_TARGET_RATIO)
300 comp = MAX_TARGET_RATIO - ratio - 1;
301
302 return comp;
303}
304
305static void adjust_compensation(int target_ratio, unsigned int win)
306{
307 int delta;
308 struct powerclamp_calibration_data *d = &cal_data[target_ratio];
309
310 /*
311 * adjust compensations if confidence level has not been reached or
312 * there are too many wakeups during the last idle injection period, we
313 * cannot trust the data for compensation.
314 */
315 if (d->confidence >= CONFIDENCE_OK ||
316 atomic_read(&idle_wakeup_counter) >
317 win * num_online_cpus())
318 return;
319
320 delta = set_target_ratio - current_ratio;
321 /* filter out bad data */
322 if (delta >= 0 && delta <= (1+target_ratio/10)) {
323 if (d->steady_comp)
324 d->steady_comp =
325 roundup(delta+d->steady_comp, 2)/2;
326 else
327 d->steady_comp = delta;
328 d->confidence++;
329 }
330}
331
332static bool powerclamp_adjust_controls(unsigned int target_ratio,
333 unsigned int guard, unsigned int win)
334{
335 static u64 msr_last, tsc_last;
336 u64 msr_now, tsc_now;
337 u64 val64;
338
339 /* check result for the last window */
340 msr_now = pkg_state_counter();
341 tsc_now = rdtsc();
342
343 /* calculate pkg cstate vs tsc ratio */
344 if (!msr_last || !tsc_last)
345 current_ratio = 1;
346 else if (tsc_now-tsc_last) {
347 val64 = 100*(msr_now-msr_last);
348 do_div(val64, (tsc_now-tsc_last));
349 current_ratio = val64;
350 }
351
352 /* update record */
353 msr_last = msr_now;
354 tsc_last = tsc_now;
355
356 adjust_compensation(target_ratio, win);
357 /*
358 * too many external interrupts, set flag such
359 * that we can take measure later.
360 */
361 reduce_irq = atomic_read(&idle_wakeup_counter) >=
362 2 * win * num_online_cpus();
363
364 atomic_set(&idle_wakeup_counter, 0);
365 /* if we are above target+guard, skip */
366 return set_target_ratio + guard <= current_ratio;
367}
368
369static void clamp_balancing_func(struct kthread_work *work)
370{
371 struct powerclamp_worker_data *w_data;
372 int sleeptime;
373 unsigned long target_jiffies;
374 unsigned int compensated_ratio;
375 int interval; /* jiffies to sleep for each attempt */
376
377 w_data = container_of(work, struct powerclamp_worker_data,
378 balancing_work);
379
380 /*
381 * make sure user selected ratio does not take effect until
382 * the next round. adjust target_ratio if user has changed
383 * target such that we can converge quickly.
384 */
385 w_data->target_ratio = READ_ONCE(set_target_ratio);
386 w_data->guard = 1 + w_data->target_ratio / 20;
387 w_data->window_size_now = window_size;
388 w_data->duration_jiffies = msecs_to_jiffies(duration);
389 w_data->count++;
390
391 /*
392 * systems may have different ability to enter package level
393 * c-states, thus we need to compensate the injected idle ratio
394 * to achieve the actual target reported by the HW.
395 */
396 compensated_ratio = w_data->target_ratio +
397 get_compensation(w_data->target_ratio);
398 if (compensated_ratio <= 0)
399 compensated_ratio = 1;
400 interval = w_data->duration_jiffies * 100 / compensated_ratio;
401
402 /* align idle time */
403 target_jiffies = roundup(jiffies, interval);
404 sleeptime = target_jiffies - jiffies;
405 if (sleeptime <= 0)
406 sleeptime = 1;
407
408 if (clamping && w_data->clamping && cpu_online(w_data->cpu))
409 kthread_queue_delayed_work(w_data->worker,
410 &w_data->idle_injection_work,
411 sleeptime);
412}
413
414static void clamp_idle_injection_func(struct kthread_work *work)
415{
416 struct powerclamp_worker_data *w_data;
417
418 w_data = container_of(work, struct powerclamp_worker_data,
419 idle_injection_work.work);
420
421 /*
422 * only elected controlling cpu can collect stats and update
423 * control parameters.
424 */
425 if (w_data->cpu == control_cpu &&
426 !(w_data->count % w_data->window_size_now)) {
427 should_skip =
428 powerclamp_adjust_controls(w_data->target_ratio,
429 w_data->guard,
430 w_data->window_size_now);
431 smp_mb();
432 }
433
434 if (should_skip)
435 goto balance;
436
437 play_idle(jiffies_to_usecs(w_data->duration_jiffies));
438
439balance:
440 if (clamping && w_data->clamping && cpu_online(w_data->cpu))
441 kthread_queue_work(w_data->worker, &w_data->balancing_work);
442}
443
444/*
445 * 1 HZ polling while clamping is active, useful for userspace
446 * to monitor actual idle ratio.
447 */
448static void poll_pkg_cstate(struct work_struct *dummy);
449static DECLARE_DELAYED_WORK(poll_pkg_cstate_work, poll_pkg_cstate);
450static void poll_pkg_cstate(struct work_struct *dummy)
451{
452 static u64 msr_last;
453 static u64 tsc_last;
454
455 u64 msr_now;
456 u64 tsc_now;
457 u64 val64;
458
459 msr_now = pkg_state_counter();
460 tsc_now = rdtsc();
461
462 /* calculate pkg cstate vs tsc ratio */
463 if (!msr_last || !tsc_last)
464 pkg_cstate_ratio_cur = 1;
465 else {
466 if (tsc_now - tsc_last) {
467 val64 = 100 * (msr_now - msr_last);
468 do_div(val64, (tsc_now - tsc_last));
469 pkg_cstate_ratio_cur = val64;
470 }
471 }
472
473 /* update record */
474 msr_last = msr_now;
475 tsc_last = tsc_now;
476
477 if (true == clamping)
478 schedule_delayed_work(&poll_pkg_cstate_work, HZ);
479}
480
481static void start_power_clamp_worker(unsigned long cpu)
482{
483 struct powerclamp_worker_data *w_data = per_cpu_ptr(worker_data, cpu);
484 struct kthread_worker *worker;
485
486 worker = kthread_create_worker_on_cpu(cpu, 0, "kidle_inj/%ld", cpu);
487 if (IS_ERR(worker))
488 return;
489
490 w_data->worker = worker;
491 w_data->count = 0;
492 w_data->cpu = cpu;
493 w_data->clamping = true;
494 set_bit(cpu, cpu_clamping_mask);
495 sched_setscheduler(worker->task, SCHED_FIFO, &sparam);
496 kthread_init_work(&w_data->balancing_work, clamp_balancing_func);
497 kthread_init_delayed_work(&w_data->idle_injection_work,
498 clamp_idle_injection_func);
499 kthread_queue_work(w_data->worker, &w_data->balancing_work);
500}
501
502static void stop_power_clamp_worker(unsigned long cpu)
503{
504 struct powerclamp_worker_data *w_data = per_cpu_ptr(worker_data, cpu);
505
506 if (!w_data->worker)
507 return;
508
509 w_data->clamping = false;
510 /*
511 * Make sure that all works that get queued after this point see
512 * the clamping disabled. The counter part is not needed because
513 * there is an implicit memory barrier when the queued work
514 * is proceed.
515 */
516 smp_wmb();
517 kthread_cancel_work_sync(&w_data->balancing_work);
518 kthread_cancel_delayed_work_sync(&w_data->idle_injection_work);
519 /*
520 * The balancing work still might be queued here because
521 * the handling of the "clapming" variable, cancel, and queue
522 * operations are not synchronized via a lock. But it is not
523 * a big deal. The balancing work is fast and destroy kthread
524 * will wait for it.
525 */
526 clear_bit(w_data->cpu, cpu_clamping_mask);
527 kthread_destroy_worker(w_data->worker);
528
529 w_data->worker = NULL;
530}
531
532static int start_power_clamp(void)
533{
534 unsigned long cpu;
535
536 set_target_ratio = clamp(set_target_ratio, 0U, MAX_TARGET_RATIO - 1);
537 /* prevent cpu hotplug */
538 get_online_cpus();
539
540 /* prefer BSP */
541 control_cpu = cpumask_first(cpu_online_mask);
542
543 clamping = true;
544 if (poll_pkg_cstate_enable)
545 schedule_delayed_work(&poll_pkg_cstate_work, 0);
546
547 /* start one kthread worker per online cpu */
548 for_each_online_cpu(cpu) {
549 start_power_clamp_worker(cpu);
550 }
551 put_online_cpus();
552
553 return 0;
554}
555
556static void end_power_clamp(void)
557{
558 int i;
559
560 /*
561 * Block requeuing in all the kthread workers. They will flush and
562 * stop faster.
563 */
564 clamping = false;
565 if (bitmap_weight(cpu_clamping_mask, num_possible_cpus())) {
566 for_each_set_bit(i, cpu_clamping_mask, num_possible_cpus()) {
567 pr_debug("clamping worker for cpu %d alive, destroy\n",
568 i);
569 stop_power_clamp_worker(i);
570 }
571 }
572}
573
574static int powerclamp_cpu_online(unsigned int cpu)
575{
576 if (clamping == false)
577 return 0;
578 start_power_clamp_worker(cpu);
579 /* prefer BSP as controlling CPU */
580 if (cpu == 0) {
581 control_cpu = 0;
582 smp_mb();
583 }
584 return 0;
585}
586
587static int powerclamp_cpu_predown(unsigned int cpu)
588{
589 if (clamping == false)
590 return 0;
591
592 stop_power_clamp_worker(cpu);
593 if (cpu != control_cpu)
594 return 0;
595
596 control_cpu = cpumask_first(cpu_online_mask);
597 if (control_cpu == cpu)
598 control_cpu = cpumask_next(cpu, cpu_online_mask);
599 smp_mb();
600 return 0;
601}
602
603static int powerclamp_get_max_state(struct thermal_cooling_device *cdev,
604 unsigned long *state)
605{
606 *state = MAX_TARGET_RATIO;
607
608 return 0;
609}
610
611static int powerclamp_get_cur_state(struct thermal_cooling_device *cdev,
612 unsigned long *state)
613{
614 if (clamping) {
615 if (poll_pkg_cstate_enable)
616 *state = pkg_cstate_ratio_cur;
617 else
618 *state = set_target_ratio;
619 } else {
620 /* to save power, do not poll idle ratio while not clamping */
621 *state = -1; /* indicates invalid state */
622 }
623
624 return 0;
625}
626
627static int powerclamp_set_cur_state(struct thermal_cooling_device *cdev,
628 unsigned long new_target_ratio)
629{
630 int ret = 0;
631
632 new_target_ratio = clamp(new_target_ratio, 0UL,
633 (unsigned long) (MAX_TARGET_RATIO-1));
634 if (set_target_ratio == 0 && new_target_ratio > 0) {
635 pr_info("Start idle injection to reduce power\n");
636 set_target_ratio = new_target_ratio;
637 ret = start_power_clamp();
638 goto exit_set;
639 } else if (set_target_ratio > 0 && new_target_ratio == 0) {
640 pr_info("Stop forced idle injection\n");
641 end_power_clamp();
642 set_target_ratio = 0;
643 } else /* adjust currently running */ {
644 set_target_ratio = new_target_ratio;
645 /* make new set_target_ratio visible to other cpus */
646 smp_mb();
647 }
648
649exit_set:
650 return ret;
651}
652
653/* bind to generic thermal layer as cooling device*/
654static struct thermal_cooling_device_ops powerclamp_cooling_ops = {
655 .get_max_state = powerclamp_get_max_state,
656 .get_cur_state = powerclamp_get_cur_state,
657 .set_cur_state = powerclamp_set_cur_state,
658};
659
660static const struct x86_cpu_id __initconst intel_powerclamp_ids[] = {
661 { X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, X86_FEATURE_MWAIT },
662 {}
663};
664MODULE_DEVICE_TABLE(x86cpu, intel_powerclamp_ids);
665
666static int __init powerclamp_probe(void)
667{
668
669 if (!x86_match_cpu(intel_powerclamp_ids)) {
670 pr_err("CPU does not support MWAIT\n");
671 return -ENODEV;
672 }
673
674 /* The goal for idle time alignment is to achieve package cstate. */
675 if (!has_pkg_state_counter()) {
676 pr_info("No package C-state available\n");
677 return -ENODEV;
678 }
679
680 /* find the deepest mwait value */
681 find_target_mwait();
682
683 return 0;
684}
685
686static int powerclamp_debug_show(struct seq_file *m, void *unused)
687{
688 int i = 0;
689
690 seq_printf(m, "controlling cpu: %d\n", control_cpu);
691 seq_printf(m, "pct confidence steady dynamic (compensation)\n");
692 for (i = 0; i < MAX_TARGET_RATIO; i++) {
693 seq_printf(m, "%d\t%lu\t%lu\t%lu\n",
694 i,
695 cal_data[i].confidence,
696 cal_data[i].steady_comp,
697 cal_data[i].dynamic_comp);
698 }
699
700 return 0;
701}
702
703DEFINE_SHOW_ATTRIBUTE(powerclamp_debug);
704
705static inline void powerclamp_create_debug_files(void)
706{
707 debug_dir = debugfs_create_dir("intel_powerclamp", NULL);
708
709 debugfs_create_file("powerclamp_calib", S_IRUGO, debug_dir, cal_data,
710 &powerclamp_debug_fops);
711}
712
713static enum cpuhp_state hp_state;
714
715static int __init powerclamp_init(void)
716{
717 int retval;
718 int bitmap_size;
719
720 bitmap_size = BITS_TO_LONGS(num_possible_cpus()) * sizeof(long);
721 cpu_clamping_mask = kzalloc(bitmap_size, GFP_KERNEL);
722 if (!cpu_clamping_mask)
723 return -ENOMEM;
724
725 /* probe cpu features and ids here */
726 retval = powerclamp_probe();
727 if (retval)
728 goto exit_free;
729
730 /* set default limit, maybe adjusted during runtime based on feedback */
731 window_size = 2;
732 retval = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
733 "thermal/intel_powerclamp:online",
734 powerclamp_cpu_online,
735 powerclamp_cpu_predown);
736 if (retval < 0)
737 goto exit_free;
738
739 hp_state = retval;
740
741 worker_data = alloc_percpu(struct powerclamp_worker_data);
742 if (!worker_data) {
743 retval = -ENOMEM;
744 goto exit_unregister;
745 }
746
747 if (topology_max_packages() == 1 && topology_max_die_per_package() == 1)
748 poll_pkg_cstate_enable = true;
749
750 cooling_dev = thermal_cooling_device_register("intel_powerclamp", NULL,
751 &powerclamp_cooling_ops);
752 if (IS_ERR(cooling_dev)) {
753 retval = -ENODEV;
754 goto exit_free_thread;
755 }
756
757 if (!duration)
758 duration = jiffies_to_msecs(DEFAULT_DURATION_JIFFIES);
759
760 powerclamp_create_debug_files();
761
762 return 0;
763
764exit_free_thread:
765 free_percpu(worker_data);
766exit_unregister:
767 cpuhp_remove_state_nocalls(hp_state);
768exit_free:
769 kfree(cpu_clamping_mask);
770 return retval;
771}
772module_init(powerclamp_init);
773
774static void __exit powerclamp_exit(void)
775{
776 end_power_clamp();
777 cpuhp_remove_state_nocalls(hp_state);
778 free_percpu(worker_data);
779 thermal_cooling_device_unregister(cooling_dev);
780 kfree(cpu_clamping_mask);
781
782 cancel_delayed_work_sync(&poll_pkg_cstate_work);
783 debugfs_remove_recursive(debug_dir);
784}
785module_exit(powerclamp_exit);
786
787MODULE_LICENSE("GPL");
788MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>");
789MODULE_AUTHOR("Jacob Pan <jacob.jun.pan@linux.intel.com>");
790MODULE_DESCRIPTION("Package Level C-state Idle Injection for Intel CPUs");