blob: 3b4936c93a516e416e1a9cfb43a554b2ef56ff8c [file] [log] [blame]
rjw1f884582022-01-06 17:20:42 +08001/*
2 * Arch specific cpu topology information
3 *
4 * Copyright (C) 2016, ARM Ltd.
5 * Written by: Juri Lelli, ARM Ltd.
6 *
7 * This file is subject to the terms and conditions of the GNU General Public
8 * License. See the file "COPYING" in the main directory of this archive
9 * for more details.
10 *
11 * Released under the GPLv2 only.
12 * SPDX-License-Identifier: GPL-2.0
13 */
14
15#include <linux/acpi.h>
16#include <linux/arch_topology.h>
17#include <linux/cpu.h>
18#include <linux/cpufreq.h>
19#include <linux/device.h>
20#include <linux/of.h>
21#include <linux/slab.h>
22#include <linux/string.h>
23#include <linux/sched/topology.h>
24#include <linux/sched/energy.h>
25#include <linux/cpuset.h>
26
27DEFINE_PER_CPU(unsigned long, freq_scale) = SCHED_CAPACITY_SCALE;
28DEFINE_PER_CPU(unsigned long, max_cpu_freq);
29DEFINE_PER_CPU(unsigned long, max_freq_scale) = SCHED_CAPACITY_SCALE;
30
31void arch_set_freq_scale(struct cpumask *cpus, unsigned long cur_freq,
32 unsigned long max_freq)
33{
34 unsigned long scale;
35 int i;
36
37 scale = (cur_freq << SCHED_CAPACITY_SHIFT) / max_freq;
38
39 for_each_cpu(i, cpus) {
40 per_cpu(freq_scale, i) = scale;
41 per_cpu(max_cpu_freq, i) = max_freq;
42 }
43}
44
45void arch_set_max_freq_scale(struct cpumask *cpus,
46 unsigned long policy_max_freq)
47{
48 unsigned long scale, max_freq;
49 int cpu = cpumask_first(cpus);
50
51 if (cpu > nr_cpu_ids)
52 return;
53
54 max_freq = per_cpu(max_cpu_freq, cpu);
55 if (!max_freq)
56 return;
57
58 scale = (policy_max_freq << SCHED_CAPACITY_SHIFT) / max_freq;
59
60 for_each_cpu(cpu, cpus)
61 per_cpu(max_freq_scale, cpu) = scale;
62}
63
64static DEFINE_MUTEX(cpu_scale_mutex);
65DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE;
66
67void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity)
68{
69 per_cpu(cpu_scale, cpu) = capacity;
70}
71
72static ssize_t cpu_capacity_show(struct device *dev,
73 struct device_attribute *attr,
74 char *buf)
75{
76 struct cpu *cpu = container_of(dev, struct cpu, dev);
77
78 return sprintf(buf, "%lu\n", topology_get_cpu_scale(NULL, cpu->dev.id));
79}
80
81static void update_topology_flags_workfn(struct work_struct *work);
82static DECLARE_WORK(update_topology_flags_work, update_topology_flags_workfn);
83
84static ssize_t cpu_capacity_store(struct device *dev,
85 struct device_attribute *attr,
86 const char *buf,
87 size_t count)
88{
89 struct cpu *cpu = container_of(dev, struct cpu, dev);
90 int this_cpu = cpu->dev.id;
91 int i;
92 unsigned long new_capacity;
93 ssize_t ret;
94 cpumask_var_t mask;
95
96 if (!count)
97 return 0;
98
99 ret = kstrtoul(buf, 0, &new_capacity);
100 if (ret)
101 return ret;
102 if (new_capacity > SCHED_CAPACITY_SCALE)
103 return -EINVAL;
104
105 mutex_lock(&cpu_scale_mutex);
106
107 if (new_capacity < SCHED_CAPACITY_SCALE) {
108 int highest_score_cpu = 0;
109
110 if (!alloc_cpumask_var(&mask, GFP_KERNEL)) {
111 mutex_unlock(&cpu_scale_mutex);
112 return -ENOMEM;
113 }
114
115 cpumask_andnot(mask, cpu_online_mask,
116 topology_core_cpumask(this_cpu));
117
118 for_each_cpu(i, mask) {
119 if (topology_get_cpu_scale(NULL, i) ==
120 SCHED_CAPACITY_SCALE) {
121 highest_score_cpu = 1;
122 break;
123 }
124 }
125
126 free_cpumask_var(mask);
127
128 if (!highest_score_cpu) {
129 mutex_unlock(&cpu_scale_mutex);
130 return -EINVAL;
131 }
132 }
133
134 for_each_cpu(i, topology_core_cpumask(this_cpu))
135 topology_set_cpu_scale(i, new_capacity);
136 mutex_unlock(&cpu_scale_mutex);
137
138 if (topology_detect_flags())
139 schedule_work(&update_topology_flags_work);
140
141 return count;
142}
143
144static DEVICE_ATTR_RW(cpu_capacity);
145
146static int register_cpu_capacity_sysctl(void)
147{
148 int i;
149 struct device *cpu;
150
151 for_each_possible_cpu(i) {
152 cpu = get_cpu_device(i);
153 if (!cpu) {
154 pr_err("%s: too early to get CPU%d device!\n",
155 __func__, i);
156 continue;
157 }
158 device_create_file(cpu, &dev_attr_cpu_capacity);
159 }
160
161 return 0;
162}
163subsys_initcall(register_cpu_capacity_sysctl);
164
165enum asym_cpucap_type { no_asym, asym_thread, asym_core, asym_die };
166static enum asym_cpucap_type asym_cpucap = no_asym;
167enum share_cap_type { no_share_cap, share_cap_thread, share_cap_core, share_cap_die};
168static enum share_cap_type share_cap = no_share_cap;
169
170#ifdef CONFIG_CPU_FREQ
171int detect_share_cap_flag(void)
172{
173 int cpu;
174 enum share_cap_type share_cap_level = no_share_cap;
175 struct cpufreq_policy *policy;
176
177 for_each_possible_cpu(cpu) {
178 policy = cpufreq_cpu_get(cpu);
179
180 if (!policy)
181 return 0;
182
183 if (cpumask_equal(topology_sibling_cpumask(cpu),
184 policy->related_cpus)) {
185 share_cap_level = share_cap_thread;
186 continue;
187 }
188
189 if (cpumask_equal(topology_core_cpumask(cpu),
190 policy->related_cpus)) {
191 share_cap_level = share_cap_core;
192 continue;
193 }
194
195 if (cpumask_equal(cpu_cpu_mask(cpu),
196 policy->related_cpus)) {
197 share_cap_level = share_cap_die;
198 continue;
199 }
200 }
201
202 if (share_cap != share_cap_level) {
203 share_cap = share_cap_level;
204 return 1;
205 }
206
207 return 0;
208}
209#else
210int detect_share_cap_flag(void) { return 0; }
211#endif
212
213/*
214 * Walk cpu topology to determine sched_domain flags.
215 *
216 * SD_ASYM_CPUCAPACITY: Indicates the lowest level that spans all cpu
217 * capacities found in the system for all cpus, i.e. the flag is set
218 * at the same level for all systems. The current algorithm implements
219 * this by looking for higher capacities, which doesn't work for all
220 * conceivable topology, but don't complicate things until it is
221 * necessary.
222 */
223int topology_detect_flags(void)
224{
225 unsigned long max_capacity, capacity;
226 enum asym_cpucap_type asym_level = no_asym;
227 int cpu, die_cpu, core, thread, flags_changed = 0;
228
229 for_each_possible_cpu(cpu) {
230 max_capacity = 0;
231
232 if (asym_level >= asym_thread)
233 goto check_core;
234
235 for_each_cpu(thread, topology_sibling_cpumask(cpu)) {
236 capacity = topology_get_cpu_scale(NULL, thread);
237
238 if (capacity > max_capacity) {
239 if (max_capacity != 0)
240 asym_level = asym_thread;
241
242 max_capacity = capacity;
243 }
244 }
245
246check_core:
247 if (asym_level >= asym_core)
248 goto check_die;
249
250 for_each_cpu(core, topology_core_cpumask(cpu)) {
251 capacity = topology_get_cpu_scale(NULL, core);
252
253 if (capacity > max_capacity) {
254 if (max_capacity != 0)
255 asym_level = asym_core;
256
257 max_capacity = capacity;
258 }
259 }
260check_die:
261 for_each_possible_cpu(die_cpu) {
262 capacity = topology_get_cpu_scale(NULL, die_cpu);
263
264 if (capacity > max_capacity) {
265 if (max_capacity != 0) {
266 asym_level = asym_die;
267 goto done;
268 }
269 }
270 }
271 }
272
273done:
274 if (asym_cpucap != asym_level) {
275 asym_cpucap = asym_level;
276 flags_changed = 1;
277 pr_debug("topology flag change detected\n");
278 }
279
280 if (detect_share_cap_flag())
281 flags_changed = 1;
282
283 return flags_changed;
284}
285
286int topology_smt_flags(void)
287{
288 int flags = 0;
289
290 if (asym_cpucap == asym_thread)
291 flags |= SD_ASYM_CPUCAPACITY;
292
293 if (share_cap == share_cap_thread)
294 flags |= SD_SHARE_CAP_STATES;
295
296 return flags;
297}
298
299int topology_core_flags(void)
300{
301 int flags = 0;
302
303 if (asym_cpucap == asym_core)
304 flags |= SD_ASYM_CPUCAPACITY;
305
306 if (share_cap == share_cap_core)
307 flags |= SD_SHARE_CAP_STATES;
308
309 return flags;
310}
311
312int topology_cpu_flags(void)
313{
314 int flags = 0;
315
316 if (asym_cpucap == asym_die)
317 flags |= SD_ASYM_CPUCAPACITY;
318
319 if (share_cap == share_cap_die)
320 flags |= SD_SHARE_CAP_STATES;
321
322 return flags;
323}
324
325static int update_topology = 0;
326
327int topology_update_cpu_topology(void)
328{
329 return update_topology;
330}
331
332/*
333 * Updating the sched_domains can't be done directly from cpufreq callbacks
334 * due to locking, so queue the work for later.
335 */
336static void update_topology_flags_workfn(struct work_struct *work)
337{
338 update_topology = 1;
339 rebuild_sched_domains();
340 pr_debug("sched_domain hierarchy rebuilt, flags updated\n");
341 update_topology = 0;
342}
343
344static u32 capacity_scale;
345static u32 *raw_capacity;
346
347static int free_raw_capacity(void)
348{
349 kfree(raw_capacity);
350 raw_capacity = NULL;
351
352 return 0;
353}
354
355void topology_normalize_cpu_scale(void)
356{
357 u64 capacity;
358 int cpu;
359
360 if (!raw_capacity)
361 return;
362
363 pr_debug("cpu_capacity: capacity_scale=%u\n", capacity_scale);
364 mutex_lock(&cpu_scale_mutex);
365 for_each_possible_cpu(cpu) {
366 capacity = (raw_capacity[cpu] << SCHED_CAPACITY_SHIFT)
367 / capacity_scale;
368 topology_set_cpu_scale(cpu, capacity);
369 pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu raw_capacity=%u\n",
370 cpu, topology_get_cpu_scale(NULL, cpu),
371 raw_capacity[cpu]);
372 }
373 mutex_unlock(&cpu_scale_mutex);
374}
375
376bool __init topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu)
377{
378 static bool cap_parsing_failed;
379 int ret;
380 u32 cpu_capacity;
381
382 if (cap_parsing_failed)
383 return false;
384
385 ret = of_property_read_u32(cpu_node, "capacity-dmips-mhz",
386 &cpu_capacity);
387 if (!ret) {
388 if (!raw_capacity) {
389 raw_capacity = kcalloc(num_possible_cpus(),
390 sizeof(*raw_capacity),
391 GFP_KERNEL);
392 if (!raw_capacity) {
393 pr_err("cpu_capacity: failed to allocate memory for raw capacities\n");
394 cap_parsing_failed = true;
395 return false;
396 }
397 }
398 capacity_scale = max(cpu_capacity, capacity_scale);
399 raw_capacity[cpu] = cpu_capacity;
400 pr_debug("cpu_capacity: %pOF cpu_capacity=%u (raw)\n",
401 cpu_node, raw_capacity[cpu]);
402 } else {
403 if (raw_capacity) {
404 pr_err("cpu_capacity: missing %pOF raw capacity\n",
405 cpu_node);
406 pr_err("cpu_capacity: partial information: fallback to 1024 for all CPUs\n");
407 }
408 cap_parsing_failed = true;
409 free_raw_capacity();
410 }
411
412 return !ret;
413}
414
415#ifdef CONFIG_CPU_FREQ
416static cpumask_var_t cpus_to_visit;
417static void parsing_done_workfn(struct work_struct *work);
418static DECLARE_WORK(parsing_done_work, parsing_done_workfn);
419
420static int
421init_cpu_capacity_callback(struct notifier_block *nb,
422 unsigned long val,
423 void *data)
424{
425 struct cpufreq_policy *policy = data;
426 int cpu;
427
428 if (!raw_capacity)
429 return 0;
430
431 if (val != CPUFREQ_NOTIFY)
432 return 0;
433
434 pr_debug("cpu_capacity: init cpu capacity for CPUs [%*pbl] (to_visit=%*pbl)\n",
435 cpumask_pr_args(policy->related_cpus),
436 cpumask_pr_args(cpus_to_visit));
437
438 cpumask_andnot(cpus_to_visit, cpus_to_visit, policy->related_cpus);
439
440 for_each_cpu(cpu, policy->related_cpus) {
441 raw_capacity[cpu] = topology_get_cpu_scale(NULL, cpu) *
442 policy->cpuinfo.max_freq / 1000UL;
443 capacity_scale = max(raw_capacity[cpu], capacity_scale);
444 }
445
446 if (cpumask_empty(cpus_to_visit)) {
447 topology_normalize_cpu_scale();
448 init_sched_energy_costs();
449 if (topology_detect_flags())
450 schedule_work(&update_topology_flags_work);
451 free_raw_capacity();
452 pr_debug("cpu_capacity: parsing done\n");
453 schedule_work(&parsing_done_work);
454 }
455
456 return 0;
457}
458
459static struct notifier_block init_cpu_capacity_notifier = {
460 .notifier_call = init_cpu_capacity_callback,
461};
462
463static int __init register_cpufreq_notifier(void)
464{
465 int ret;
466
467 /*
468 * on ACPI-based systems we need to use the default cpu capacity
469 * until we have the necessary code to parse the cpu capacity, so
470 * skip registering cpufreq notifier.
471 */
472 if (!acpi_disabled || !raw_capacity)
473 return -EINVAL;
474
475 if (!alloc_cpumask_var(&cpus_to_visit, GFP_KERNEL)) {
476 pr_err("cpu_capacity: failed to allocate memory for cpus_to_visit\n");
477 return -ENOMEM;
478 }
479
480 cpumask_copy(cpus_to_visit, cpu_possible_mask);
481
482 ret = cpufreq_register_notifier(&init_cpu_capacity_notifier,
483 CPUFREQ_POLICY_NOTIFIER);
484
485 if (ret)
486 free_cpumask_var(cpus_to_visit);
487
488 return ret;
489}
490core_initcall(register_cpufreq_notifier);
491
492static void parsing_done_workfn(struct work_struct *work)
493{
494 cpufreq_unregister_notifier(&init_cpu_capacity_notifier,
495 CPUFREQ_POLICY_NOTIFIER);
496 free_cpumask_var(cpus_to_visit);
497}
498
499#else
500core_initcall(free_raw_capacity);
501#endif