Blame - src/kernel/linux/v4.14/drivers/base/arch_topology.c - T103

blob: 3b4936c93a516e416e1a9cfb43a554b2ef56ff8c [file] [log] [blame]

rjw	1f88458	2022-01-06 17:20:42 +0800	[diff] [blame^]	1	/*
				2	* Arch specific cpu topology information
				3	*
				4	* Copyright (C) 2016, ARM Ltd.
				5	* Written by: Juri Lelli, ARM Ltd.
				6	*
				7	* This file is subject to the terms and conditions of the GNU General Public
				8	* License. See the file "COPYING" in the main directory of this archive
				9	* for more details.
				10	*
				11	* Released under the GPLv2 only.
				12	* SPDX-License-Identifier: GPL-2.0
				13	*/
				14
				15	#include <linux/acpi.h>
				16	#include <linux/arch_topology.h>
				17	#include <linux/cpu.h>
				18	#include <linux/cpufreq.h>
				19	#include <linux/device.h>
				20	#include <linux/of.h>
				21	#include <linux/slab.h>
				22	#include <linux/string.h>
				23	#include <linux/sched/topology.h>
				24	#include <linux/sched/energy.h>
				25	#include <linux/cpuset.h>
				26
				27	DEFINE_PER_CPU(unsigned long, freq_scale) = SCHED_CAPACITY_SCALE;
				28	DEFINE_PER_CPU(unsigned long, max_cpu_freq);
				29	DEFINE_PER_CPU(unsigned long, max_freq_scale) = SCHED_CAPACITY_SCALE;
				30
				31	void arch_set_freq_scale(struct cpumask *cpus, unsigned long cur_freq,
				32	unsigned long max_freq)
				33	{
				34	unsigned long scale;
				35	int i;
				36
				37	scale = (cur_freq << SCHED_CAPACITY_SHIFT) / max_freq;
				38
				39	for_each_cpu(i, cpus) {
				40	per_cpu(freq_scale, i) = scale;
				41	per_cpu(max_cpu_freq, i) = max_freq;
				42	}
				43	}
				44
				45	void arch_set_max_freq_scale(struct cpumask *cpus,
				46	unsigned long policy_max_freq)
				47	{
				48	unsigned long scale, max_freq;
				49	int cpu = cpumask_first(cpus);
				50
				51	if (cpu > nr_cpu_ids)
				52	return;
				53
				54	max_freq = per_cpu(max_cpu_freq, cpu);
				55	if (!max_freq)
				56	return;
				57
				58	scale = (policy_max_freq << SCHED_CAPACITY_SHIFT) / max_freq;
				59
				60	for_each_cpu(cpu, cpus)
				61	per_cpu(max_freq_scale, cpu) = scale;
				62	}
				63
				64	static DEFINE_MUTEX(cpu_scale_mutex);
				65	DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE;
				66
				67	void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity)
				68	{
				69	per_cpu(cpu_scale, cpu) = capacity;
				70	}
				71
				72	static ssize_t cpu_capacity_show(struct device *dev,
				73	struct device_attribute *attr,
				74	char *buf)
				75	{
				76	struct cpu *cpu = container_of(dev, struct cpu, dev);
				77
				78	return sprintf(buf, "%lu\n", topology_get_cpu_scale(NULL, cpu->dev.id));
				79	}
				80
				81	static void update_topology_flags_workfn(struct work_struct *work);
				82	static DECLARE_WORK(update_topology_flags_work, update_topology_flags_workfn);
				83
				84	static ssize_t cpu_capacity_store(struct device *dev,
				85	struct device_attribute *attr,
				86	const char *buf,
				87	size_t count)
				88	{
				89	struct cpu *cpu = container_of(dev, struct cpu, dev);
				90	int this_cpu = cpu->dev.id;
				91	int i;
				92	unsigned long new_capacity;
				93	ssize_t ret;
				94	cpumask_var_t mask;
				95
				96	if (!count)
				97	return 0;
				98
				99	ret = kstrtoul(buf, 0, &new_capacity);
				100	if (ret)
				101	return ret;
				102	if (new_capacity > SCHED_CAPACITY_SCALE)
				103	return -EINVAL;
				104
				105	mutex_lock(&cpu_scale_mutex);
				106
				107	if (new_capacity < SCHED_CAPACITY_SCALE) {
				108	int highest_score_cpu = 0;
				109
				110	if (!alloc_cpumask_var(&mask, GFP_KERNEL)) {
				111	mutex_unlock(&cpu_scale_mutex);
				112	return -ENOMEM;
				113	}
				114
				115	cpumask_andnot(mask, cpu_online_mask,
				116	topology_core_cpumask(this_cpu));
				117
				118	for_each_cpu(i, mask) {
				119	if (topology_get_cpu_scale(NULL, i) ==
				120	SCHED_CAPACITY_SCALE) {
				121	highest_score_cpu = 1;
				122	break;
				123	}
				124	}
				125
				126	free_cpumask_var(mask);
				127
				128	if (!highest_score_cpu) {
				129	mutex_unlock(&cpu_scale_mutex);
				130	return -EINVAL;
				131	}
				132	}
				133
				134	for_each_cpu(i, topology_core_cpumask(this_cpu))
				135	topology_set_cpu_scale(i, new_capacity);
				136	mutex_unlock(&cpu_scale_mutex);
				137
				138	if (topology_detect_flags())
				139	schedule_work(&update_topology_flags_work);
				140
				141	return count;
				142	}
				143
				144	static DEVICE_ATTR_RW(cpu_capacity);
				145
				146	static int register_cpu_capacity_sysctl(void)
				147	{
				148	int i;
				149	struct device *cpu;
				150
				151	for_each_possible_cpu(i) {
				152	cpu = get_cpu_device(i);
				153	if (!cpu) {
				154	pr_err("%s: too early to get CPU%d device!\n",
				155	__func__, i);
				156	continue;
				157	}
				158	device_create_file(cpu, &dev_attr_cpu_capacity);
				159	}
				160
				161	return 0;
				162	}
				163	subsys_initcall(register_cpu_capacity_sysctl);
				164
				165	enum asym_cpucap_type { no_asym, asym_thread, asym_core, asym_die };
				166	static enum asym_cpucap_type asym_cpucap = no_asym;
				167	enum share_cap_type { no_share_cap, share_cap_thread, share_cap_core, share_cap_die};
				168	static enum share_cap_type share_cap = no_share_cap;
				169
				170	#ifdef CONFIG_CPU_FREQ
				171	int detect_share_cap_flag(void)
				172	{
				173	int cpu;
				174	enum share_cap_type share_cap_level = no_share_cap;
				175	struct cpufreq_policy *policy;
				176
				177	for_each_possible_cpu(cpu) {
				178	policy = cpufreq_cpu_get(cpu);
				179
				180	if (!policy)
				181	return 0;
				182
				183	if (cpumask_equal(topology_sibling_cpumask(cpu),
				184	policy->related_cpus)) {
				185	share_cap_level = share_cap_thread;
				186	continue;
				187	}
				188
				189	if (cpumask_equal(topology_core_cpumask(cpu),
				190	policy->related_cpus)) {
				191	share_cap_level = share_cap_core;
				192	continue;
				193	}
				194
				195	if (cpumask_equal(cpu_cpu_mask(cpu),
				196	policy->related_cpus)) {
				197	share_cap_level = share_cap_die;
				198	continue;
				199	}
				200	}
				201
				202	if (share_cap != share_cap_level) {
				203	share_cap = share_cap_level;
				204	return 1;
				205	}
				206
				207	return 0;
				208	}
				209	#else
				210	int detect_share_cap_flag(void) { return 0; }
				211	#endif
				212
				213	/*
				214	* Walk cpu topology to determine sched_domain flags.
				215	*
				216	* SD_ASYM_CPUCAPACITY: Indicates the lowest level that spans all cpu
				217	* capacities found in the system for all cpus, i.e. the flag is set
				218	* at the same level for all systems. The current algorithm implements
				219	* this by looking for higher capacities, which doesn't work for all
				220	* conceivable topology, but don't complicate things until it is
				221	* necessary.
				222	*/
				223	int topology_detect_flags(void)
				224	{
				225	unsigned long max_capacity, capacity;
				226	enum asym_cpucap_type asym_level = no_asym;
				227	int cpu, die_cpu, core, thread, flags_changed = 0;
				228
				229	for_each_possible_cpu(cpu) {
				230	max_capacity = 0;
				231
				232	if (asym_level >= asym_thread)
				233	goto check_core;
				234
				235	for_each_cpu(thread, topology_sibling_cpumask(cpu)) {
				236	capacity = topology_get_cpu_scale(NULL, thread);
				237
				238	if (capacity > max_capacity) {
				239	if (max_capacity != 0)
				240	asym_level = asym_thread;
				241
				242	max_capacity = capacity;
				243	}
				244	}
				245
				246	check_core:
				247	if (asym_level >= asym_core)
				248	goto check_die;
				249
				250	for_each_cpu(core, topology_core_cpumask(cpu)) {
				251	capacity = topology_get_cpu_scale(NULL, core);
				252
				253	if (capacity > max_capacity) {
				254	if (max_capacity != 0)
				255	asym_level = asym_core;
				256
				257	max_capacity = capacity;
				258	}
				259	}
				260	check_die:
				261	for_each_possible_cpu(die_cpu) {
				262	capacity = topology_get_cpu_scale(NULL, die_cpu);
				263
				264	if (capacity > max_capacity) {
				265	if (max_capacity != 0) {
				266	asym_level = asym_die;
				267	goto done;
				268	}
				269	}
				270	}
				271	}
				272
				273	done:
				274	if (asym_cpucap != asym_level) {
				275	asym_cpucap = asym_level;
				276	flags_changed = 1;
				277	pr_debug("topology flag change detected\n");
				278	}
				279
				280	if (detect_share_cap_flag())
				281	flags_changed = 1;
				282
				283	return flags_changed;
				284	}
				285
				286	int topology_smt_flags(void)
				287	{
				288	int flags = 0;
				289
				290	if (asym_cpucap == asym_thread)
				291	flags \|= SD_ASYM_CPUCAPACITY;
				292
				293	if (share_cap == share_cap_thread)
				294	flags \|= SD_SHARE_CAP_STATES;
				295
				296	return flags;
				297	}
				298
				299	int topology_core_flags(void)
				300	{
				301	int flags = 0;
				302
				303	if (asym_cpucap == asym_core)
				304	flags \|= SD_ASYM_CPUCAPACITY;
				305
				306	if (share_cap == share_cap_core)
				307	flags \|= SD_SHARE_CAP_STATES;
				308
				309	return flags;
				310	}
				311
				312	int topology_cpu_flags(void)
				313	{
				314	int flags = 0;
				315
				316	if (asym_cpucap == asym_die)
				317	flags \|= SD_ASYM_CPUCAPACITY;
				318
				319	if (share_cap == share_cap_die)
				320	flags \|= SD_SHARE_CAP_STATES;
				321
				322	return flags;
				323	}
				324
				325	static int update_topology = 0;
				326
				327	int topology_update_cpu_topology(void)
				328	{
				329	return update_topology;
				330	}
				331
				332	/*
				333	* Updating the sched_domains can't be done directly from cpufreq callbacks
				334	* due to locking, so queue the work for later.
				335	*/
				336	static void update_topology_flags_workfn(struct work_struct *work)
				337	{
				338	update_topology = 1;
				339	rebuild_sched_domains();
				340	pr_debug("sched_domain hierarchy rebuilt, flags updated\n");
				341	update_topology = 0;
				342	}
				343
				344	static u32 capacity_scale;
				345	static u32 *raw_capacity;
				346
				347	static int free_raw_capacity(void)
				348	{
				349	kfree(raw_capacity);
				350	raw_capacity = NULL;
				351
				352	return 0;
				353	}
				354
				355	void topology_normalize_cpu_scale(void)
				356	{
				357	u64 capacity;
				358	int cpu;
				359
				360	if (!raw_capacity)
				361	return;
				362
				363	pr_debug("cpu_capacity: capacity_scale=%u\n", capacity_scale);
				364	mutex_lock(&cpu_scale_mutex);
				365	for_each_possible_cpu(cpu) {
				366	capacity = (raw_capacity[cpu] << SCHED_CAPACITY_SHIFT)
				367	/ capacity_scale;
				368	topology_set_cpu_scale(cpu, capacity);
				369	pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu raw_capacity=%u\n",
				370	cpu, topology_get_cpu_scale(NULL, cpu),
				371	raw_capacity[cpu]);
				372	}
				373	mutex_unlock(&cpu_scale_mutex);
				374	}
				375
				376	bool __init topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu)
				377	{
				378	static bool cap_parsing_failed;
				379	int ret;
				380	u32 cpu_capacity;
				381
				382	if (cap_parsing_failed)
				383	return false;
				384
				385	ret = of_property_read_u32(cpu_node, "capacity-dmips-mhz",
				386	&cpu_capacity);
				387	if (!ret) {
				388	if (!raw_capacity) {
				389	raw_capacity = kcalloc(num_possible_cpus(),
				390	sizeof(*raw_capacity),
				391	GFP_KERNEL);
				392	if (!raw_capacity) {
				393	pr_err("cpu_capacity: failed to allocate memory for raw capacities\n");
				394	cap_parsing_failed = true;
				395	return false;
				396	}
				397	}
				398	capacity_scale = max(cpu_capacity, capacity_scale);
				399	raw_capacity[cpu] = cpu_capacity;
				400	pr_debug("cpu_capacity: %pOF cpu_capacity=%u (raw)\n",
				401	cpu_node, raw_capacity[cpu]);
				402	} else {
				403	if (raw_capacity) {
				404	pr_err("cpu_capacity: missing %pOF raw capacity\n",
				405	cpu_node);
				406	pr_err("cpu_capacity: partial information: fallback to 1024 for all CPUs\n");
				407	}
				408	cap_parsing_failed = true;
				409	free_raw_capacity();
				410	}
				411
				412	return !ret;
				413	}
				414
				415	#ifdef CONFIG_CPU_FREQ
				416	static cpumask_var_t cpus_to_visit;
				417	static void parsing_done_workfn(struct work_struct *work);
				418	static DECLARE_WORK(parsing_done_work, parsing_done_workfn);
				419
				420	static int
				421	init_cpu_capacity_callback(struct notifier_block *nb,
				422	unsigned long val,
				423	void *data)
				424	{
				425	struct cpufreq_policy *policy = data;
				426	int cpu;
				427
				428	if (!raw_capacity)
				429	return 0;
				430
				431	if (val != CPUFREQ_NOTIFY)
				432	return 0;
				433
				434	pr_debug("cpu_capacity: init cpu capacity for CPUs [%pbl] (to_visit=%pbl)\n",
				435	cpumask_pr_args(policy->related_cpus),
				436	cpumask_pr_args(cpus_to_visit));
				437
				438	cpumask_andnot(cpus_to_visit, cpus_to_visit, policy->related_cpus);
				439
				440	for_each_cpu(cpu, policy->related_cpus) {
				441	raw_capacity[cpu] = topology_get_cpu_scale(NULL, cpu) *
				442	policy->cpuinfo.max_freq / 1000UL;
				443	capacity_scale = max(raw_capacity[cpu], capacity_scale);
				444	}
				445
				446	if (cpumask_empty(cpus_to_visit)) {
				447	topology_normalize_cpu_scale();
				448	init_sched_energy_costs();
				449	if (topology_detect_flags())
				450	schedule_work(&update_topology_flags_work);
				451	free_raw_capacity();
				452	pr_debug("cpu_capacity: parsing done\n");
				453	schedule_work(&parsing_done_work);
				454	}
				455
				456	return 0;
				457	}
				458
				459	static struct notifier_block init_cpu_capacity_notifier = {
				460	.notifier_call = init_cpu_capacity_callback,
				461	};
				462
				463	static int __init register_cpufreq_notifier(void)
				464	{
				465	int ret;
				466
				467	/*
				468	* on ACPI-based systems we need to use the default cpu capacity
				469	* until we have the necessary code to parse the cpu capacity, so
				470	* skip registering cpufreq notifier.
				471	*/
				472	if (!acpi_disabled \|\| !raw_capacity)
				473	return -EINVAL;
				474
				475	if (!alloc_cpumask_var(&cpus_to_visit, GFP_KERNEL)) {
				476	pr_err("cpu_capacity: failed to allocate memory for cpus_to_visit\n");
				477	return -ENOMEM;
				478	}
				479
				480	cpumask_copy(cpus_to_visit, cpu_possible_mask);
				481
				482	ret = cpufreq_register_notifier(&init_cpu_capacity_notifier,
				483	CPUFREQ_POLICY_NOTIFIER);
				484
				485	if (ret)
				486	free_cpumask_var(cpus_to_visit);
				487
				488	return ret;
				489	}
				490	core_initcall(register_cpufreq_notifier);
				491
				492	static void parsing_done_workfn(struct work_struct *work)
				493	{
				494	cpufreq_unregister_notifier(&init_cpu_capacity_notifier,
				495	CPUFREQ_POLICY_NOTIFIER);
				496	free_cpumask_var(cpus_to_visit);
				497	}
				498
				499	#else
				500	core_initcall(free_raw_capacity);
				501	#endif