b.liu | e958203 | 2025-04-17 19:18:16 +0800 | [diff] [blame^] | 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | /* |
| 3 | * Intel Performance and Energy Bias Hint support. |
| 4 | * |
| 5 | * Copyright (C) 2019 Intel Corporation |
| 6 | * |
| 7 | * Author: |
| 8 | * Rafael J. Wysocki <rafael.j.wysocki@intel.com> |
| 9 | */ |
| 10 | |
| 11 | #include <linux/cpuhotplug.h> |
| 12 | #include <linux/cpu.h> |
| 13 | #include <linux/device.h> |
| 14 | #include <linux/kernel.h> |
| 15 | #include <linux/string.h> |
| 16 | #include <linux/syscore_ops.h> |
| 17 | #include <linux/pm.h> |
| 18 | |
| 19 | #include <asm/cpufeature.h> |
| 20 | #include <asm/msr.h> |
| 21 | |
| 22 | /** |
| 23 | * DOC: overview |
| 24 | * |
| 25 | * The Performance and Energy Bias Hint (EPB) allows software to specify its |
| 26 | * preference with respect to the power-performance tradeoffs present in the |
| 27 | * processor. Generally, the EPB is expected to be set by user space (directly |
| 28 | * via sysfs or with the help of the x86_energy_perf_policy tool), but there are |
| 29 | * two reasons for the kernel to update it. |
| 30 | * |
| 31 | * First, there are systems where the platform firmware resets the EPB during |
| 32 | * system-wide transitions from sleep states back into the working state |
| 33 | * effectively causing the previous EPB updates by user space to be lost. |
| 34 | * Thus the kernel needs to save the current EPB values for all CPUs during |
| 35 | * system-wide transitions to sleep states and restore them on the way back to |
| 36 | * the working state. That can be achieved by saving EPB for secondary CPUs |
| 37 | * when they are taken offline during transitions into system sleep states and |
| 38 | * for the boot CPU in a syscore suspend operation, so that it can be restored |
| 39 | * for the boot CPU in a syscore resume operation and for the other CPUs when |
| 40 | * they are brought back online. However, CPUs that are already offline when |
| 41 | * a system-wide PM transition is started are not taken offline again, but their |
| 42 | * EPB values may still be reset by the platform firmware during the transition, |
| 43 | * so in fact it is necessary to save the EPB of any CPU taken offline and to |
| 44 | * restore it when the given CPU goes back online at all times. |
| 45 | * |
| 46 | * Second, on many systems the initial EPB value coming from the platform |
| 47 | * firmware is 0 ('performance') and at least on some of them that is because |
| 48 | * the platform firmware does not initialize EPB at all with the assumption that |
| 49 | * the OS will do that anyway. That sometimes is problematic, as it may cause |
| 50 | * the system battery to drain too fast, for example, so it is better to adjust |
| 51 | * it on CPU bring-up and if the initial EPB value for a given CPU is 0, the |
| 52 | * kernel changes it to 6 ('normal'). |
| 53 | */ |
| 54 | |
| 55 | static DEFINE_PER_CPU(u8, saved_epb); |
| 56 | |
| 57 | #define EPB_MASK 0x0fULL |
| 58 | #define EPB_SAVED 0x10ULL |
| 59 | #define MAX_EPB EPB_MASK |
| 60 | |
| 61 | static int intel_epb_save(void) |
| 62 | { |
| 63 | u64 epb; |
| 64 | |
| 65 | rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb); |
| 66 | /* |
| 67 | * Ensure that saved_epb will always be nonzero after this write even if |
| 68 | * the EPB value read from the MSR is 0. |
| 69 | */ |
| 70 | this_cpu_write(saved_epb, (epb & EPB_MASK) | EPB_SAVED); |
| 71 | |
| 72 | return 0; |
| 73 | } |
| 74 | |
| 75 | static void intel_epb_restore(void) |
| 76 | { |
| 77 | u64 val = this_cpu_read(saved_epb); |
| 78 | u64 epb; |
| 79 | |
| 80 | rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb); |
| 81 | if (val) { |
| 82 | val &= EPB_MASK; |
| 83 | } else { |
| 84 | /* |
| 85 | * Because intel_epb_save() has not run for the current CPU yet, |
| 86 | * it is going online for the first time, so if its EPB value is |
| 87 | * 0 ('performance') at this point, assume that it has not been |
| 88 | * initialized by the platform firmware and set it to 6 |
| 89 | * ('normal'). |
| 90 | */ |
| 91 | val = epb & EPB_MASK; |
| 92 | if (val == ENERGY_PERF_BIAS_PERFORMANCE) { |
| 93 | val = ENERGY_PERF_BIAS_NORMAL; |
| 94 | pr_warn_once("ENERGY_PERF_BIAS: Set to 'normal', was 'performance'\n"); |
| 95 | } |
| 96 | } |
| 97 | wrmsrl(MSR_IA32_ENERGY_PERF_BIAS, (epb & ~EPB_MASK) | val); |
| 98 | } |
| 99 | |
| 100 | static struct syscore_ops intel_epb_syscore_ops = { |
| 101 | .suspend = intel_epb_save, |
| 102 | .resume = intel_epb_restore, |
| 103 | }; |
| 104 | |
| 105 | static const char * const energy_perf_strings[] = { |
| 106 | "performance", |
| 107 | "balance-performance", |
| 108 | "normal", |
| 109 | "balance-power", |
| 110 | "power" |
| 111 | }; |
| 112 | static const u8 energ_perf_values[] = { |
| 113 | ENERGY_PERF_BIAS_PERFORMANCE, |
| 114 | ENERGY_PERF_BIAS_BALANCE_PERFORMANCE, |
| 115 | ENERGY_PERF_BIAS_NORMAL, |
| 116 | ENERGY_PERF_BIAS_BALANCE_POWERSAVE, |
| 117 | ENERGY_PERF_BIAS_POWERSAVE |
| 118 | }; |
| 119 | |
| 120 | static ssize_t energy_perf_bias_show(struct device *dev, |
| 121 | struct device_attribute *attr, |
| 122 | char *buf) |
| 123 | { |
| 124 | unsigned int cpu = dev->id; |
| 125 | u64 epb; |
| 126 | int ret; |
| 127 | |
| 128 | ret = rdmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb); |
| 129 | if (ret < 0) |
| 130 | return ret; |
| 131 | |
| 132 | return sprintf(buf, "%llu\n", epb); |
| 133 | } |
| 134 | |
| 135 | static ssize_t energy_perf_bias_store(struct device *dev, |
| 136 | struct device_attribute *attr, |
| 137 | const char *buf, size_t count) |
| 138 | { |
| 139 | unsigned int cpu = dev->id; |
| 140 | u64 epb, val; |
| 141 | int ret; |
| 142 | |
| 143 | ret = __sysfs_match_string(energy_perf_strings, |
| 144 | ARRAY_SIZE(energy_perf_strings), buf); |
| 145 | if (ret >= 0) |
| 146 | val = energ_perf_values[ret]; |
| 147 | else if (kstrtou64(buf, 0, &val) || val > MAX_EPB) |
| 148 | return -EINVAL; |
| 149 | |
| 150 | ret = rdmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb); |
| 151 | if (ret < 0) |
| 152 | return ret; |
| 153 | |
| 154 | ret = wrmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, |
| 155 | (epb & ~EPB_MASK) | val); |
| 156 | if (ret < 0) |
| 157 | return ret; |
| 158 | |
| 159 | return count; |
| 160 | } |
| 161 | |
| 162 | static DEVICE_ATTR_RW(energy_perf_bias); |
| 163 | |
| 164 | static struct attribute *intel_epb_attrs[] = { |
| 165 | &dev_attr_energy_perf_bias.attr, |
| 166 | NULL |
| 167 | }; |
| 168 | |
| 169 | static const struct attribute_group intel_epb_attr_group = { |
| 170 | .name = power_group_name, |
| 171 | .attrs = intel_epb_attrs |
| 172 | }; |
| 173 | |
| 174 | static int intel_epb_online(unsigned int cpu) |
| 175 | { |
| 176 | struct device *cpu_dev = get_cpu_device(cpu); |
| 177 | |
| 178 | intel_epb_restore(); |
| 179 | if (!cpuhp_tasks_frozen) |
| 180 | sysfs_merge_group(&cpu_dev->kobj, &intel_epb_attr_group); |
| 181 | |
| 182 | return 0; |
| 183 | } |
| 184 | |
| 185 | static int intel_epb_offline(unsigned int cpu) |
| 186 | { |
| 187 | struct device *cpu_dev = get_cpu_device(cpu); |
| 188 | |
| 189 | if (!cpuhp_tasks_frozen) |
| 190 | sysfs_unmerge_group(&cpu_dev->kobj, &intel_epb_attr_group); |
| 191 | |
| 192 | intel_epb_save(); |
| 193 | return 0; |
| 194 | } |
| 195 | |
| 196 | static __init int intel_epb_init(void) |
| 197 | { |
| 198 | int ret; |
| 199 | |
| 200 | if (!boot_cpu_has(X86_FEATURE_EPB)) |
| 201 | return -ENODEV; |
| 202 | |
| 203 | ret = cpuhp_setup_state(CPUHP_AP_X86_INTEL_EPB_ONLINE, |
| 204 | "x86/intel/epb:online", intel_epb_online, |
| 205 | intel_epb_offline); |
| 206 | if (ret < 0) |
| 207 | goto err_out_online; |
| 208 | |
| 209 | register_syscore_ops(&intel_epb_syscore_ops); |
| 210 | return 0; |
| 211 | |
| 212 | err_out_online: |
| 213 | cpuhp_remove_state(CPUHP_AP_X86_INTEL_EPB_ONLINE); |
| 214 | return ret; |
| 215 | } |
| 216 | subsys_initcall(intel_epb_init); |