Blame - marvell/linux/kernel/sched/sched.h - T108

blob: 73986dd4a1ccacb66b74728162437e42be6e3d6d [file] [log] [blame]

b.liu	e958203	2025-04-17 19:18:16 +0800	[diff] [blame^]	1	/* SPDX-License-Identifier: GPL-2.0 */
				2	/*
				3	* Scheduler internal types and methods:
				4	*/
				5	#include <linux/sched.h>
				6
				7	#include <linux/sched/autogroup.h>
				8	#include <linux/sched/clock.h>
				9	#include <linux/sched/coredump.h>
				10	#include <linux/sched/cpufreq.h>
				11	#include <linux/sched/cputime.h>
				12	#include <linux/sched/deadline.h>
				13	#include <linux/sched/debug.h>
				14	#include <linux/sched/hotplug.h>
				15	#include <linux/sched/idle.h>
				16	#include <linux/sched/init.h>
				17	#include <linux/sched/isolation.h>
				18	#include <linux/sched/jobctl.h>
				19	#include <linux/sched/loadavg.h>
				20	#include <linux/sched/mm.h>
				21	#include <linux/sched/nohz.h>
				22	#include <linux/sched/numa_balancing.h>
				23	#include <linux/sched/prio.h>
				24	#include <linux/sched/rt.h>
				25	#include <linux/sched/signal.h>
				26	#include <linux/sched/smt.h>
				27	#include <linux/sched/stat.h>
				28	#include <linux/sched/sysctl.h>
				29	#include <linux/sched/task.h>
				30	#include <linux/sched/task_stack.h>
				31	#include <linux/sched/topology.h>
				32	#include <linux/sched/user.h>
				33	#include <linux/sched/wake_q.h>
				34	#include <linux/sched/xacct.h>
				35
				36	#include <uapi/linux/sched/types.h>
				37
				38	#include <linux/binfmts.h>
				39	#include <linux/blkdev.h>
				40	#include <linux/compat.h>
				41	#include <linux/context_tracking.h>
				42	#include <linux/cpufreq.h>
				43	#include <linux/cpuidle.h>
				44	#include <linux/cpuset.h>
				45	#include <linux/ctype.h>
				46	#include <linux/debugfs.h>
				47	#include <linux/delayacct.h>
				48	#include <linux/energy_model.h>
				49	#include <linux/init_task.h>
				50	#include <linux/kprobes.h>
				51	#include <linux/kthread.h>
				52	#include <linux/membarrier.h>
				53	#include <linux/migrate.h>
				54	#include <linux/mmu_context.h>
				55	#include <linux/nmi.h>
				56	#include <linux/proc_fs.h>
				57	#include <linux/prefetch.h>
				58	#include <linux/profile.h>
				59	#include <linux/psi.h>
				60	#include <linux/rcupdate_wait.h>
				61	#include <linux/security.h>
				62	#include <linux/stop_machine.h>
				63	#include <linux/suspend.h>
				64	#include <linux/swait.h>
				65	#include <linux/syscalls.h>
				66	#include <linux/task_work.h>
				67	#include <linux/tsacct_kern.h>
				68	#include <linux/android_kabi.h>
				69
				70	#include <asm/tlb.h>
				71
				72	#ifdef CONFIG_PARAVIRT
				73	# include <asm/paravirt.h>
				74	#endif
				75
				76	#include "cpupri.h"
				77	#include "cpudeadline.h"
				78
				79	#ifdef CONFIG_SCHED_DEBUG
				80	# define SCHED_WARN_ON(x) WARN_ONCE(x, #x)
				81	#else
				82	# define SCHED_WARN_ON(x) ({ (void)(x), 0; })
				83	#endif
				84
				85	struct rq;
				86	struct cpuidle_state;
				87
				88	/* task_struct::on_rq states: */
				89	#define TASK_ON_RQ_QUEUED 1
				90	#define TASK_ON_RQ_MIGRATING 2
				91
				92	extern __read_mostly int scheduler_running;
				93
				94	extern unsigned long calc_load_update;
				95	extern atomic_long_t calc_load_tasks;
				96
				97	extern void calc_global_load_tick(struct rq *this_rq);
				98	extern long calc_load_fold_active(struct rq *this_rq, long adjust);
				99
				100	/*
				101	* Helpers for converting nanosecond timing to jiffy resolution
				102	*/
				103	#define NS_TO_JIFFIES(TIME) ((unsigned long)(TIME) / (NSEC_PER_SEC / HZ))
				104
				105	/*
				106	* Increase resolution of nice-level calculations for 64-bit architectures.
				107	* The extra resolution improves shares distribution and load balancing of
				108	* low-weight task groups (eg. nice +19 on an autogroup), deeper taskgroup
				109	* hierarchies, especially on larger systems. This is not a user-visible change
				110	* and does not change the user-interface for setting shares/weights.
				111	*
				112	* We increase resolution only if we have enough bits to allow this increased
				113	* resolution (i.e. 64-bit). The costs for increasing resolution when 32-bit
				114	* are pretty high and the returns do not justify the increased costs.
				115	*
				116	* Really only required when CONFIG_FAIR_GROUP_SCHED=y is also set, but to
				117	* increase coverage and consistency always enable it on 64-bit platforms.
				118	*/
				119	#ifdef CONFIG_64BIT
				120	# define NICE_0_LOAD_SHIFT (SCHED_FIXEDPOINT_SHIFT + SCHED_FIXEDPOINT_SHIFT)
				121	# define scale_load(w) ((w) << SCHED_FIXEDPOINT_SHIFT)
				122	# define scale_load_down(w) \
				123	({ \
				124	unsigned long __w = (w); \
				125	if (__w) \
				126	__w = max(2UL, __w >> SCHED_FIXEDPOINT_SHIFT); \
				127	__w; \
				128	})
				129	#else
				130	# define NICE_0_LOAD_SHIFT (SCHED_FIXEDPOINT_SHIFT)
				131	# define scale_load(w) (w)
				132	# define scale_load_down(w) (w)
				133	#endif
				134
				135	/*
				136	* Task weight (visible to users) and its load (invisible to users) have
				137	* independent resolution, but they should be well calibrated. We use
				138	* scale_load() and scale_load_down(w) to convert between them. The
				139	* following must be true:
				140	*
				141	* scale_load(sched_prio_to_weight[USER_PRIO(NICE_TO_PRIO(0))]) == NICE_0_LOAD
				142	*
				143	*/
				144	#define NICE_0_LOAD (1L << NICE_0_LOAD_SHIFT)
				145
				146	/*
				147	* Single value that decides SCHED_DEADLINE internal math precision.
				148	* 10 -> just above 1us
				149	* 9 -> just above 0.5us
				150	*/
				151	#define DL_SCALE 10
				152
				153	/*
				154	* Single value that denotes runtime == period, ie unlimited time.
				155	*/
				156	#define RUNTIME_INF ((u64)~0ULL)
				157
				158	static inline int idle_policy(int policy)
				159	{
				160	return policy == SCHED_IDLE;
				161	}
				162	static inline int fair_policy(int policy)
				163	{
				164	return policy == SCHED_NORMAL \|\| policy == SCHED_BATCH;
				165	}
				166
				167	static inline int rt_policy(int policy)
				168	{
				169	return policy == SCHED_FIFO \|\| policy == SCHED_RR;
				170	}
				171
				172	static inline int dl_policy(int policy)
				173	{
				174	return policy == SCHED_DEADLINE;
				175	}
				176	static inline bool valid_policy(int policy)
				177	{
				178	return idle_policy(policy) \|\| fair_policy(policy) \|\|
				179	rt_policy(policy) \|\| dl_policy(policy);
				180	}
				181
				182	static inline int task_has_idle_policy(struct task_struct *p)
				183	{
				184	return idle_policy(p->policy);
				185	}
				186
				187	static inline int task_has_rt_policy(struct task_struct *p)
				188	{
				189	return rt_policy(p->policy);
				190	}
				191
				192	static inline int task_has_dl_policy(struct task_struct *p)
				193	{
				194	return dl_policy(p->policy);
				195	}
				196
				197	#define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT)
				198
				199	/*
				200	* !! For sched_setattr_nocheck() (kernel) only !!
				201	*
				202	* This is actually gross. :(
				203	*
				204	* It is used to make schedutil kworker(s) higher priority than SCHED_DEADLINE
				205	* tasks, but still be able to sleep. We need this on platforms that cannot
				206	* atomically change clock frequency. Remove once fast switching will be
				207	* available on such platforms.
				208	*
				209	* SUGOV stands for SchedUtil GOVernor.
				210	*/
				211	#define SCHED_FLAG_SUGOV 0x10000000
				212
				213	#define SCHED_DL_FLAGS (SCHED_FLAG_RECLAIM \| SCHED_FLAG_DL_OVERRUN \| SCHED_FLAG_SUGOV)
				214
				215	static inline bool dl_entity_is_special(struct sched_dl_entity *dl_se)
				216	{
				217	#ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL
				218	return unlikely(dl_se->flags & SCHED_FLAG_SUGOV);
				219	#else
				220	return false;
				221	#endif
				222	}
				223
				224	/*
				225	* Tells if entity @a should preempt entity @b.
				226	*/
				227	static inline bool
				228	dl_entity_preempt(struct sched_dl_entity a, struct sched_dl_entity b)
				229	{
				230	return dl_entity_is_special(a) \|\|
				231	dl_time_before(a->deadline, b->deadline);
				232	}
				233
				234	/*
				235	* This is the priority-queue data structure of the RT scheduling class:
				236	*/
				237	struct rt_prio_array {
				238	DECLARE_BITMAP(bitmap, MAX_RT_PRIO+1); /* include 1 bit for delimiter */
				239	struct list_head queue[MAX_RT_PRIO];
				240	};
				241
				242	struct rt_bandwidth {
				243	/* nests inside the rq lock: */
				244	raw_spinlock_t rt_runtime_lock;
				245	ktime_t rt_period;
				246	u64 rt_runtime;
				247	struct hrtimer rt_period_timer;
				248	unsigned int rt_period_active;
				249	};
				250
				251	void __dl_clear_params(struct task_struct *p);
				252
				253	struct dl_bandwidth {
				254	raw_spinlock_t dl_runtime_lock;
				255	u64 dl_runtime;
				256	u64 dl_period;
				257	};
				258
				259	static inline int dl_bandwidth_enabled(void)
				260	{
				261	return sysctl_sched_rt_runtime >= 0;
				262	}
				263
				264	/*
				265	* To keep the bandwidth of -deadline tasks under control
				266	* we need some place where:
				267	* - store the maximum -deadline bandwidth of each cpu;
				268	* - cache the fraction of bandwidth that is currently allocated in
				269	* each root domain;
				270	*
				271	* This is all done in the data structure below. It is similar to the
				272	* one used for RT-throttling (rt_bandwidth), with the main difference
				273	* that, since here we are only interested in admission control, we
				274	* do not decrease any runtime while the group "executes", neither we
				275	* need a timer to replenish it.
				276	*
				277	* With respect to SMP, bandwidth is given on a per root domain basis,
				278	* meaning that:
				279	* - bw (< 100%) is the deadline bandwidth of each CPU;
				280	* - total_bw is the currently allocated bandwidth in each root domain;
				281	*/
				282	struct dl_bw {
				283	raw_spinlock_t lock;
				284	u64 bw;
				285	u64 total_bw;
				286	};
				287
				288	static inline void __dl_update(struct dl_bw *dl_b, s64 bw);
				289
				290	static inline
				291	void __dl_sub(struct dl_bw *dl_b, u64 tsk_bw, int cpus)
				292	{
				293	dl_b->total_bw -= tsk_bw;
				294	__dl_update(dl_b, (s32)tsk_bw / cpus);
				295	}
				296
				297	static inline
				298	void __dl_add(struct dl_bw *dl_b, u64 tsk_bw, int cpus)
				299	{
				300	dl_b->total_bw += tsk_bw;
				301	__dl_update(dl_b, -((s32)tsk_bw / cpus));
				302	}
				303
				304	static inline
				305	bool __dl_overflow(struct dl_bw *dl_b, int cpus, u64 old_bw, u64 new_bw)
				306	{
				307	return dl_b->bw != -1 &&
				308	dl_b->bw * cpus < dl_b->total_bw - old_bw + new_bw;
				309	}
				310
				311	extern void dl_change_utilization(struct task_struct *p, u64 new_bw);
				312	extern void init_dl_bw(struct dl_bw *dl_b);
				313	extern int sched_dl_global_validate(void);
				314	extern void sched_dl_do_global(void);
				315	extern int sched_dl_overflow(struct task_struct p, int policy, const struct sched_attr attr);
				316	extern void __setparam_dl(struct task_struct p, const struct sched_attr attr);
				317	extern void __getparam_dl(struct task_struct p, struct sched_attr attr);
				318	extern bool __checkparam_dl(const struct sched_attr *attr);
				319	extern bool dl_param_changed(struct task_struct p, const struct sched_attr attr);
				320	extern int dl_task_can_attach(struct task_struct p, const struct cpumask cs_cpus_allowed);
				321	extern int dl_cpuset_cpumask_can_shrink(const struct cpumask cur, const struct cpumask trial);
				322	extern bool dl_cpu_busy(unsigned int cpu);
				323
				324	#ifdef CONFIG_CGROUP_SCHED
				325
				326	#include <linux/cgroup.h>
				327	#include <linux/psi.h>
				328
				329	struct cfs_rq;
				330	struct rt_rq;
				331
				332	extern struct list_head task_groups;
				333
				334	struct cfs_bandwidth {
				335	#ifdef CONFIG_CFS_BANDWIDTH
				336	raw_spinlock_t lock;
				337	ktime_t period;
				338	u64 quota;
				339	u64 runtime;
				340	s64 hierarchical_quota;
				341
				342	u8 idle;
				343	u8 period_active;
				344	u8 distribute_running;
				345	u8 slack_started;
				346	struct hrtimer period_timer;
				347	struct hrtimer slack_timer;
				348	struct list_head throttled_cfs_rq;
				349
				350	/* Statistics: */
				351	int nr_periods;
				352	int nr_throttled;
				353	u64 throttled_time;
				354	#endif
				355	};
				356
				357	/* Task group related information */
				358	struct task_group {
				359	struct cgroup_subsys_state css;
				360
				361	#ifdef CONFIG_FAIR_GROUP_SCHED
				362	/* schedulable entities of this group on each CPU */
				363	struct sched_entity **se;
				364	/* runqueue "owned" by this group on each CPU */
				365	struct cfs_rq **cfs_rq;
				366	unsigned long shares;
				367
				368	#ifdef CONFIG_SMP
				369	/*
				370	* load_avg can be heavily contended at clock tick time, so put
				371	* it in its own cacheline separated from the fields above which
				372	* will also be accessed at each tick.
				373	*/
				374	atomic_long_t load_avg ____cacheline_aligned;
				375	#endif
				376	#endif
				377
				378	#ifdef CONFIG_RT_GROUP_SCHED
				379	struct sched_rt_entity **rt_se;
				380	struct rt_rq **rt_rq;
				381
				382	struct rt_bandwidth rt_bandwidth;
				383	#endif
				384
				385	struct rcu_head rcu;
				386	struct list_head list;
				387
				388	struct task_group *parent;
				389	struct list_head siblings;
				390	struct list_head children;
				391
				392	#ifdef CONFIG_SCHED_AUTOGROUP
				393	struct autogroup *autogroup;
				394	#endif
				395
				396	struct cfs_bandwidth cfs_bandwidth;
				397
				398	#ifdef CONFIG_UCLAMP_TASK_GROUP
				399	/* The two decimal precision [%] value requested from user-space */
				400	unsigned int uclamp_pct[UCLAMP_CNT];
				401	/* Clamp values requested for a task group */
				402	struct uclamp_se uclamp_req[UCLAMP_CNT];
				403	/* Effective clamp values used for a task group */
				404	struct uclamp_se uclamp[UCLAMP_CNT];
				405	/* Latency-sensitive flag used for a task group */
				406	unsigned int latency_sensitive;
				407	#endif
				408
				409	ANDROID_KABI_RESERVE(1);
				410	ANDROID_KABI_RESERVE(2);
				411	ANDROID_KABI_RESERVE(3);
				412	ANDROID_KABI_RESERVE(4);
				413	};
				414
				415	#ifdef CONFIG_FAIR_GROUP_SCHED
				416	#define ROOT_TASK_GROUP_LOAD NICE_0_LOAD
				417
				418	/*
				419	* A weight of 0 or 1 can cause arithmetics problems.
				420	* A weight of a cfs_rq is the sum of weights of which entities
				421	* are queued on this cfs_rq, so a weight of a entity should not be
				422	* too large, so as the shares value of a task group.
				423	* (The default weight is 1024 - so there's no practical
				424	* limitation from this.)
				425	*/
				426	#define MIN_SHARES (1UL << 1)
				427	#define MAX_SHARES (1UL << 18)
				428	#endif
				429
				430	typedef int (tg_visitor)(struct task_group , void *);
				431
				432	extern int walk_tg_tree_from(struct task_group *from,
				433	tg_visitor down, tg_visitor up, void *data);
				434
				435	/*
				436	* Iterate the full tree, calling @down when first entering a node and @up when
				437	* leaving it for the final time.
				438	*
				439	* Caller must hold rcu_lock or sufficient equivalent.
				440	*/
				441	static inline int walk_tg_tree(tg_visitor down, tg_visitor up, void *data)
				442	{
				443	return walk_tg_tree_from(&root_task_group, down, up, data);
				444	}
				445
				446	extern int tg_nop(struct task_group tg, void data);
				447
				448	extern void free_fair_sched_group(struct task_group *tg);
				449	extern int alloc_fair_sched_group(struct task_group tg, struct task_group parent);
				450	extern void online_fair_sched_group(struct task_group *tg);
				451	extern void unregister_fair_sched_group(struct task_group *tg);
				452	extern void init_tg_cfs_entry(struct task_group tg, struct cfs_rq cfs_rq,
				453	struct sched_entity *se, int cpu,
				454	struct sched_entity *parent);
				455	extern void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
				456
				457	extern void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b);
				458	extern void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
				459	extern void unthrottle_cfs_rq(struct cfs_rq *cfs_rq);
				460
				461	extern void free_rt_sched_group(struct task_group *tg);
				462	extern int alloc_rt_sched_group(struct task_group tg, struct task_group parent);
				463	extern void init_tg_rt_entry(struct task_group tg, struct rt_rq rt_rq,
				464	struct sched_rt_entity *rt_se, int cpu,
				465	struct sched_rt_entity *parent);
				466	extern int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us);
				467	extern int sched_group_set_rt_period(struct task_group *tg, u64 rt_period_us);
				468	extern long sched_group_rt_runtime(struct task_group *tg);
				469	extern long sched_group_rt_period(struct task_group *tg);
				470	extern int sched_rt_can_attach(struct task_group tg, struct task_struct tsk);
				471
				472	extern struct task_group sched_create_group(struct task_group parent);
				473	extern void sched_online_group(struct task_group *tg,
				474	struct task_group *parent);
				475	extern void sched_destroy_group(struct task_group *tg);
				476	extern void sched_offline_group(struct task_group *tg);
				477
				478	extern void sched_move_task(struct task_struct *tsk);
				479
				480	#ifdef CONFIG_FAIR_GROUP_SCHED
				481	extern int sched_group_set_shares(struct task_group *tg, unsigned long shares);
				482
				483	#ifdef CONFIG_SMP
				484	extern void set_task_rq_fair(struct sched_entity *se,
				485	struct cfs_rq prev, struct cfs_rq next);
				486	#else /* !CONFIG_SMP */
				487	static inline void set_task_rq_fair(struct sched_entity *se,
				488	struct cfs_rq prev, struct cfs_rq next) { }
				489	#endif /* CONFIG_SMP */
				490	#endif /* CONFIG_FAIR_GROUP_SCHED */
				491
				492	#else /* CONFIG_CGROUP_SCHED */
				493
				494	struct cfs_bandwidth { };
				495
				496	#endif /* CONFIG_CGROUP_SCHED */
				497
				498	/* CFS-related fields in a runqueue */
				499	struct cfs_rq {
				500	struct load_weight load;
				501	unsigned long runnable_weight;
				502	unsigned int nr_running;
				503	unsigned int h_nr_running; /* SCHED_{NORMAL,BATCH,IDLE} */
				504	unsigned int idle_h_nr_running; /* SCHED_IDLE */
				505
				506	u64 exec_clock;
				507	u64 min_vruntime;
				508	#ifndef CONFIG_64BIT
				509	u64 min_vruntime_copy;
				510	#endif
				511
				512	struct rb_root_cached tasks_timeline;
				513
				514	/*
				515	* 'curr' points to currently running entity on this cfs_rq.
				516	* It is set to NULL otherwise (i.e when none are currently running).
				517	*/
				518	struct sched_entity *curr;
				519	struct sched_entity *next;
				520	struct sched_entity *last;
				521	struct sched_entity *skip;
				522
				523	#ifdef CONFIG_SCHED_DEBUG
				524	unsigned int nr_spread_over;
				525	#endif
				526
				527	#ifdef CONFIG_SMP
				528	/*
				529	* CFS load tracking
				530	*/
				531	struct sched_avg avg;
				532	#ifndef CONFIG_64BIT
				533	u64 load_last_update_time_copy;
				534	#endif
				535	struct {
				536	raw_spinlock_t lock ____cacheline_aligned;
				537	int nr;
				538	unsigned long load_avg;
				539	unsigned long util_avg;
				540	unsigned long runnable_sum;
				541	} removed;
				542
				543	#ifdef CONFIG_FAIR_GROUP_SCHED
				544	unsigned long tg_load_avg_contrib;
				545	long propagate;
				546	long prop_runnable_sum;
				547
				548	/*
				549	* h_load = weight * f(tg)
				550	*
				551	* Where f(tg) is the recursive weight fraction assigned to
				552	* this group.
				553	*/
				554	unsigned long h_load;
				555	u64 last_h_load_update;
				556	struct sched_entity *h_load_next;
				557	#endif /* CONFIG_FAIR_GROUP_SCHED */
				558	#endif /* CONFIG_SMP */
				559
				560	#ifdef CONFIG_FAIR_GROUP_SCHED
				561	struct rq rq; / CPU runqueue to which this cfs_rq is attached */
				562
				563	/*
				564	* leaf cfs_rqs are those that hold tasks (lowest schedulable entity in
				565	* a hierarchy). Non-leaf lrqs hold other higher schedulable entities
				566	* (like users, containers etc.)
				567	*
				568	* leaf_cfs_rq_list ties together list of leaf cfs_rq's in a CPU.
				569	* This list is used during load balance.
				570	*/
				571	int on_list;
				572	struct list_head leaf_cfs_rq_list;
				573	struct task_group tg; / group that "owns" this runqueue */
				574
				575	#ifdef CONFIG_CFS_BANDWIDTH
				576	int runtime_enabled;
				577	s64 runtime_remaining;
				578
				579	u64 throttled_clock;
				580	u64 throttled_clock_pelt;
				581	u64 throttled_clock_pelt_time;
				582	int throttled;
				583	int throttle_count;
				584	struct list_head throttled_list;
				585	#endif /* CONFIG_CFS_BANDWIDTH */
				586	#endif /* CONFIG_FAIR_GROUP_SCHED */
				587	};
				588
				589	static inline int rt_bandwidth_enabled(void)
				590	{
				591	return sysctl_sched_rt_runtime >= 0;
				592	}
				593
				594	/* RT IPI pull logic requires IRQ_WORK */
				595	#if defined(CONFIG_IRQ_WORK) && defined(CONFIG_SMP)
				596	# define HAVE_RT_PUSH_IPI
				597	#endif
				598
				599	/* Real-Time classes' related field in a runqueue: */
				600	struct rt_rq {
				601	struct rt_prio_array active;
				602	unsigned int rt_nr_running;
				603	unsigned int rr_nr_running;
				604	#if defined CONFIG_SMP \|\| defined CONFIG_RT_GROUP_SCHED
				605	struct {
				606	int curr; /* highest queued rt task prio */
				607	#ifdef CONFIG_SMP
				608	int next; /* next highest */
				609	#endif
				610	} highest_prio;
				611	#endif
				612	#ifdef CONFIG_SMP
				613	unsigned long rt_nr_migratory;
				614	unsigned long rt_nr_total;
				615	int overloaded;
				616	struct plist_head pushable_tasks;
				617
				618	#endif /* CONFIG_SMP */
				619	int rt_queued;
				620
				621	int rt_throttled;
				622	u64 rt_time;
				623	u64 rt_runtime;
				624	/* Nests inside the rq lock: */
				625	raw_spinlock_t rt_runtime_lock;
				626
				627	#ifdef CONFIG_RT_GROUP_SCHED
				628	unsigned long rt_nr_boosted;
				629
				630	struct rq *rq;
				631	struct task_group *tg;
				632	#endif
				633	};
				634
				635	static inline bool rt_rq_is_runnable(struct rt_rq *rt_rq)
				636	{
				637	return rt_rq->rt_queued && rt_rq->rt_nr_running;
				638	}
				639
				640	/* Deadline class' related fields in a runqueue */
				641	struct dl_rq {
				642	/* runqueue is an rbtree, ordered by deadline */
				643	struct rb_root_cached root;
				644
				645	unsigned long dl_nr_running;
				646
				647	#ifdef CONFIG_SMP
				648	/*
				649	* Deadline values of the currently executing and the
				650	* earliest ready task on this rq. Caching these facilitates
				651	* the decision whether or not a ready but not running task
				652	* should migrate somewhere else.
				653	*/
				654	struct {
				655	u64 curr;
				656	u64 next;
				657	} earliest_dl;
				658
				659	unsigned long dl_nr_migratory;
				660	int overloaded;
				661
				662	/*
				663	* Tasks on this rq that can be pushed away. They are kept in
				664	* an rb-tree, ordered by tasks' deadlines, with caching
				665	* of the leftmost (earliest deadline) element.
				666	*/
				667	struct rb_root_cached pushable_dl_tasks_root;
				668	#else
				669	struct dl_bw dl_bw;
				670	#endif
				671	/*
				672	* "Active utilization" for this runqueue: increased when a
				673	* task wakes up (becomes TASK_RUNNING) and decreased when a
				674	* task blocks
				675	*/
				676	u64 running_bw;
				677
				678	/*
				679	* Utilization of the tasks "assigned" to this runqueue (including
				680	* the tasks that are in runqueue and the tasks that executed on this
				681	* CPU and blocked). Increased when a task moves to this runqueue, and
				682	* decreased when the task moves away (migrates, changes scheduling
				683	* policy, or terminates).
				684	* This is needed to compute the "inactive utilization" for the
				685	* runqueue (inactive utilization = this_bw - running_bw).
				686	*/
				687	u64 this_bw;
				688	u64 extra_bw;
				689
				690	/*
				691	* Inverse of the fraction of CPU utilization that can be reclaimed
				692	* by the GRUB algorithm.
				693	*/
				694	u64 bw_ratio;
				695	};
				696
				697	#ifdef CONFIG_FAIR_GROUP_SCHED
				698	/* An entity is a task if it doesn't "own" a runqueue */
				699	#define entity_is_task(se) (!se->my_q)
				700	#else
				701	#define entity_is_task(se) 1
				702	#endif
				703
				704	#ifdef CONFIG_SMP
				705	/*
				706	* XXX we want to get rid of these helpers and use the full load resolution.
				707	*/
				708	static inline long se_weight(struct sched_entity *se)
				709	{
				710	return scale_load_down(se->load.weight);
				711	}
				712
				713	static inline long se_runnable(struct sched_entity *se)
				714	{
				715	return scale_load_down(se->runnable_weight);
				716	}
				717
				718	static inline bool sched_asym_prefer(int a, int b)
				719	{
				720	return arch_asym_cpu_priority(a) > arch_asym_cpu_priority(b);
				721	}
				722
				723	struct perf_domain {
				724	struct em_perf_domain *em_pd;
				725	struct perf_domain *next;
				726	struct rcu_head rcu;
				727	};
				728
				729	struct max_cpu_capacity {
				730	raw_spinlock_t lock;
				731	unsigned long val;
				732	int cpu;
				733	};
				734
				735	/* Scheduling group status flags */
				736	#define SG_OVERLOAD 0x1 /* More than one runnable task on a CPU. */
				737	#define SG_OVERUTILIZED 0x2 /* One or more CPUs are over-utilized. */
				738
				739	/*
				740	* We add the notion of a root-domain which will be used to define per-domain
				741	* variables. Each exclusive cpuset essentially defines an island domain by
				742	* fully partitioning the member CPUs from any other cpuset. Whenever a new
				743	* exclusive cpuset is created, we also create and attach a new root-domain
				744	* object.
				745	*
				746	*/
				747	struct root_domain {
				748	atomic_t refcount;
				749	atomic_t rto_count;
				750	struct rcu_head rcu;
				751	cpumask_var_t span;
				752	cpumask_var_t online;
				753
				754	/*
				755	* Indicate pullable load on at least one CPU, e.g:
				756	* - More than one runnable task
				757	* - Running task is misfit
				758	*/
				759	int overload;
				760
				761	/* Indicate one or more cpus over-utilized (tipping point) */
				762	int overutilized;
				763
				764	/*
				765	* The bit corresponding to a CPU gets set here if such CPU has more
				766	* than one runnable -deadline task (as it is below for RT tasks).
				767	*/
				768	cpumask_var_t dlo_mask;
				769	atomic_t dlo_count;
				770	struct dl_bw dl_bw;
				771	struct cpudl cpudl;
				772
				773	#ifdef HAVE_RT_PUSH_IPI
				774	/*
				775	* For IPI pull requests, loop across the rto_mask.
				776	*/
				777	struct irq_work rto_push_work;
				778	raw_spinlock_t rto_lock;
				779	/* These are only updated and read within rto_lock */
				780	int rto_loop;
				781	int rto_cpu;
				782	/* These atomics are updated outside of a lock */
				783	atomic_t rto_loop_next;
				784	atomic_t rto_loop_start;
				785	#endif
				786	/*
				787	* The "RT overload" flag: it gets set if a CPU has more than
				788	* one runnable RT task.
				789	*/
				790	cpumask_var_t rto_mask;
				791	struct cpupri cpupri;
				792
				793	/* Maximum cpu capacity in the system. */
				794	struct max_cpu_capacity max_cpu_capacity;
				795
				796	/*
				797	* NULL-terminated list of performance domains intersecting with the
				798	* CPUs of the rd. Protected by RCU.
				799	*/
				800	struct perf_domain __rcu *pd;
				801
				802	ANDROID_KABI_RESERVE(1);
				803	ANDROID_KABI_RESERVE(2);
				804	ANDROID_KABI_RESERVE(3);
				805	ANDROID_KABI_RESERVE(4);
				806	};
				807
				808	extern void init_defrootdomain(void);
				809	extern void init_max_cpu_capacity(struct max_cpu_capacity *mcc);
				810	extern int sched_init_domains(const struct cpumask *cpu_map);
				811	extern void rq_attach_root(struct rq rq, struct root_domain rd);
				812	extern void sched_get_rd(struct root_domain *rd);
				813	extern void sched_put_rd(struct root_domain *rd);
				814
				815	#ifdef HAVE_RT_PUSH_IPI
				816	extern void rto_push_irq_work_func(struct irq_work *work);
				817	#endif
				818	#endif /* CONFIG_SMP */
				819
				820	#ifdef CONFIG_UCLAMP_TASK
				821	/*
				822	* struct uclamp_bucket - Utilization clamp bucket
				823	* @value: utilization clamp value for tasks on this clamp bucket
				824	* @tasks: number of RUNNABLE tasks on this clamp bucket
				825	*
				826	* Keep track of how many tasks are RUNNABLE for a given utilization
				827	* clamp value.
				828	*/
				829	struct uclamp_bucket {
				830	unsigned long value : bits_per(SCHED_CAPACITY_SCALE);
				831	unsigned long tasks : BITS_PER_LONG - bits_per(SCHED_CAPACITY_SCALE);
				832	};
				833
				834	/*
				835	* struct uclamp_rq - rq's utilization clamp
				836	* @value: currently active clamp values for a rq
				837	* @bucket: utilization clamp buckets affecting a rq
				838	*
				839	* Keep track of RUNNABLE tasks on a rq to aggregate their clamp values.
				840	* A clamp value is affecting a rq when there is at least one task RUNNABLE
				841	* (or actually running) with that value.
				842	*
				843	* There are up to UCLAMP_CNT possible different clamp values, currently there
				844	* are only two: minimum utilization and maximum utilization.
				845	*
				846	* All utilization clamping values are MAX aggregated, since:
				847	* - for util_min: we want to run the CPU at least at the max of the minimum
				848	* utilization required by its currently RUNNABLE tasks.
				849	* - for util_max: we want to allow the CPU to run up to the max of the
				850	* maximum utilization allowed by its currently RUNNABLE tasks.
				851	*
				852	* Since on each system we expect only a limited number of different
				853	* utilization clamp values (UCLAMP_BUCKETS), use a simple array to track
				854	* the metrics required to compute all the per-rq utilization clamp values.
				855	*/
				856	struct uclamp_rq {
				857	unsigned int value;
				858	struct uclamp_bucket bucket[UCLAMP_BUCKETS];
				859	};
				860
				861	DECLARE_STATIC_KEY_FALSE(sched_uclamp_used);
				862	#endif /* CONFIG_UCLAMP_TASK */
				863
				864	/*
				865	* This is the main, per-CPU runqueue data structure.
				866	*
				867	* Locking rule: those places that want to lock multiple runqueues
				868	* (such as the load balancing or the thread migration code), lock
				869	* acquire operations must be ordered by ascending &runqueue.
				870	*/
				871	struct rq {
				872	/* runqueue lock: */
				873	raw_spinlock_t lock;
				874
				875	/*
				876	* nr_running and cpu_load should be in the same cacheline because
				877	* remote CPUs use both these fields when doing load calculation.
				878	*/
				879	unsigned int nr_running;
				880	#ifdef CONFIG_NUMA_BALANCING
				881	unsigned int nr_numa_running;
				882	unsigned int nr_preferred_running;
				883	unsigned int numa_migrate_on;
				884	#endif
				885	#ifdef CONFIG_NO_HZ_COMMON
				886	#ifdef CONFIG_SMP
				887	unsigned long last_load_update_tick;
				888	unsigned long last_blocked_load_update_tick;
				889	unsigned int has_blocked_load;
				890	#endif /* CONFIG_SMP */
				891	unsigned int nohz_tick_stopped;
				892	atomic_t nohz_flags;
				893	#endif /* CONFIG_NO_HZ_COMMON */
				894
				895	unsigned long nr_load_updates;
				896	u64 nr_switches;
				897
				898	#ifdef CONFIG_UCLAMP_TASK
				899	/* Utilization clamp values based on CPU's RUNNABLE tasks */
				900	struct uclamp_rq uclamp[UCLAMP_CNT] ____cacheline_aligned;
				901	unsigned int uclamp_flags;
				902	#define UCLAMP_FLAG_IDLE 0x01
				903	#endif
				904
				905	struct cfs_rq cfs;
				906	struct rt_rq rt;
				907	struct dl_rq dl;
				908
				909	#ifdef CONFIG_FAIR_GROUP_SCHED
				910	/* list of leaf cfs_rq on this CPU: */
				911	struct list_head leaf_cfs_rq_list;
				912	struct list_head *tmp_alone_branch;
				913	#endif /* CONFIG_FAIR_GROUP_SCHED */
				914
				915	/*
				916	* This is part of a global counter where only the total sum
				917	* over all CPUs matters. A task can increase this counter on
				918	* one CPU and if it got migrated afterwards it may decrease
				919	* it on another CPU. Always updated under the runqueue lock:
				920	*/
				921	unsigned long nr_uninterruptible;
				922
				923	struct task_struct *curr;
				924	struct task_struct *idle;
				925	struct task_struct *stop;
				926	unsigned long next_balance;
				927	struct mm_struct *prev_mm;
				928
				929	unsigned int clock_update_flags;
				930	u64 clock;
				931	/* Ensure that all clocks are in the same cache line */
				932	u64 clock_task ____cacheline_aligned;
				933	u64 clock_pelt;
				934	unsigned long lost_idle_time;
				935
				936	atomic_t nr_iowait;
				937
				938	#ifdef CONFIG_MEMBARRIER
				939	int membarrier_state;
				940	#endif
				941
				942	#ifdef CONFIG_SMP
				943	struct root_domain *rd;
				944	struct sched_domain __rcu *sd;
				945
				946	unsigned long cpu_capacity;
				947	unsigned long cpu_capacity_orig;
				948
				949	struct callback_head *balance_callback;
				950
				951	unsigned char idle_balance;
				952
				953	unsigned long misfit_task_load;
				954
				955	/* For active balancing */
				956	int active_balance;
				957	int push_cpu;
				958	struct cpu_stop_work active_balance_work;
				959
				960	/* CPU of this runqueue: */
				961	int cpu;
				962	int online;
				963
				964	struct list_head cfs_tasks;
				965
				966	struct sched_avg avg_rt;
				967	struct sched_avg avg_dl;
				968	#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
				969	struct sched_avg avg_irq;
				970	#endif
				971	u64 idle_stamp;
				972	u64 avg_idle;
				973
				974	/* This is used to determine avg_idle's max value */
				975	u64 max_idle_balance_cost;
				976	#endif
				977
				978	#ifdef CONFIG_IRQ_TIME_ACCOUNTING
				979	u64 prev_irq_time;
				980	#endif
				981	#ifdef CONFIG_PARAVIRT
				982	u64 prev_steal_time;
				983	#endif
				984	#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
				985	u64 prev_steal_time_rq;
				986	#endif
				987
				988	/* calc_load related fields */
				989	unsigned long calc_load_update;
				990	long calc_load_active;
				991
				992	#ifdef CONFIG_SCHED_HRTICK
				993	#ifdef CONFIG_SMP
				994	int hrtick_csd_pending;
				995	call_single_data_t hrtick_csd;
				996	#endif
				997	struct hrtimer hrtick_timer;
				998	ktime_t hrtick_time;
				999	#endif
				1000
				1001	#ifdef CONFIG_SCHEDSTATS
				1002	/* latency stats */
				1003	struct sched_info rq_sched_info;
				1004	unsigned long long rq_cpu_time;
				1005	/* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */
				1006
				1007	/* sys_sched_yield() stats */
				1008	unsigned int yld_count;
				1009
				1010	/* schedule() stats */
				1011	unsigned int sched_count;
				1012	unsigned int sched_goidle;
				1013
				1014	/* try_to_wake_up() stats */
				1015	unsigned int ttwu_count;
				1016	unsigned int ttwu_local;
				1017	#endif
				1018
				1019	#ifdef CONFIG_SMP
				1020	struct llist_head wake_list;
				1021	#endif
				1022
				1023	#ifdef CONFIG_CPU_IDLE
				1024	/* Must be inspected within a rcu lock section */
				1025	struct cpuidle_state *idle_state;
				1026	#endif
				1027
				1028	ANDROID_KABI_RESERVE(1);
				1029	ANDROID_KABI_RESERVE(2);
				1030	ANDROID_KABI_RESERVE(3);
				1031	ANDROID_KABI_RESERVE(4);
				1032	};
				1033
				1034	#ifdef CONFIG_FAIR_GROUP_SCHED
				1035
				1036	/* CPU runqueue to which this cfs_rq is attached */
				1037	static inline struct rq rq_of(struct cfs_rq cfs_rq)
				1038	{
				1039	return cfs_rq->rq;
				1040	}
				1041
				1042	#else
				1043
				1044	static inline struct rq rq_of(struct cfs_rq cfs_rq)
				1045	{
				1046	return container_of(cfs_rq, struct rq, cfs);
				1047	}
				1048	#endif
				1049
				1050	static inline int cpu_of(struct rq *rq)
				1051	{
				1052	#ifdef CONFIG_SMP
				1053	return rq->cpu;
				1054	#else
				1055	return 0;
				1056	#endif
				1057	}
				1058
				1059
				1060	#ifdef CONFIG_SCHED_SMT
				1061	extern void __update_idle_core(struct rq *rq);
				1062
				1063	static inline void update_idle_core(struct rq *rq)
				1064	{
				1065	if (static_branch_unlikely(&sched_smt_present))
				1066	__update_idle_core(rq);
				1067	}
				1068
				1069	#else
				1070	static inline void update_idle_core(struct rq *rq) { }
				1071	#endif
				1072
				1073	DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
				1074
				1075	#define cpu_rq(cpu) (&per_cpu(runqueues, (cpu)))
				1076	#define this_rq() this_cpu_ptr(&runqueues)
				1077	#define task_rq(p) cpu_rq(task_cpu(p))
				1078	#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
				1079	#define raw_rq() raw_cpu_ptr(&runqueues)
				1080
				1081	extern void update_rq_clock(struct rq *rq);
				1082
				1083	static inline u64 __rq_clock_broken(struct rq *rq)
				1084	{
				1085	return READ_ONCE(rq->clock);
				1086	}
				1087
				1088	/*
				1089	* rq::clock_update_flags bits
				1090	*
				1091	* %RQCF_REQ_SKIP - will request skipping of clock update on the next
				1092	* call to __schedule(). This is an optimisation to avoid
				1093	* neighbouring rq clock updates.
				1094	*
				1095	* %RQCF_ACT_SKIP - is set from inside of __schedule() when skipping is
				1096	* in effect and calls to update_rq_clock() are being ignored.
				1097	*
				1098	* %RQCF_UPDATED - is a debug flag that indicates whether a call has been
				1099	* made to update_rq_clock() since the last time rq::lock was pinned.
				1100	*
				1101	* If inside of __schedule(), clock_update_flags will have been
				1102	* shifted left (a left shift is a cheap operation for the fast path
				1103	* to promote %RQCF_REQ_SKIP to %RQCF_ACT_SKIP), so you must use,
				1104	*
				1105	* if (rq-clock_update_flags >= RQCF_UPDATED)
				1106	*
				1107	* to check if %RQCF_UPADTED is set. It'll never be shifted more than
				1108	* one position though, because the next rq_unpin_lock() will shift it
				1109	* back.
				1110	*/
				1111	#define RQCF_REQ_SKIP 0x01
				1112	#define RQCF_ACT_SKIP 0x02
				1113	#define RQCF_UPDATED 0x04
				1114
				1115	static inline void assert_clock_updated(struct rq *rq)
				1116	{
				1117	/*
				1118	* The only reason for not seeing a clock update since the
				1119	* last rq_pin_lock() is if we're currently skipping updates.
				1120	*/
				1121	SCHED_WARN_ON(rq->clock_update_flags < RQCF_ACT_SKIP);
				1122	}
				1123
				1124	static inline u64 rq_clock(struct rq *rq)
				1125	{
				1126	lockdep_assert_held(&rq->lock);
				1127	assert_clock_updated(rq);
				1128
				1129	return rq->clock;
				1130	}
				1131
				1132	static inline u64 rq_clock_task(struct rq *rq)
				1133	{
				1134	lockdep_assert_held(&rq->lock);
				1135	assert_clock_updated(rq);
				1136
				1137	return rq->clock_task;
				1138	}
				1139
				1140	static inline void rq_clock_skip_update(struct rq *rq)
				1141	{
				1142	lockdep_assert_held(&rq->lock);
				1143	rq->clock_update_flags \|= RQCF_REQ_SKIP;
				1144	}
				1145
				1146	/*
				1147	* See rt task throttling, which is the only time a skip
				1148	* request is cancelled.
				1149	*/
				1150	static inline void rq_clock_cancel_skipupdate(struct rq *rq)
				1151	{
				1152	lockdep_assert_held(&rq->lock);
				1153	rq->clock_update_flags &= ~RQCF_REQ_SKIP;
				1154	}
				1155
				1156	struct rq_flags {
				1157	unsigned long flags;
				1158	struct pin_cookie cookie;
				1159	#ifdef CONFIG_SCHED_DEBUG
				1160	/*
				1161	* A copy of (rq::clock_update_flags & RQCF_UPDATED) for the
				1162	* current pin context is stashed here in case it needs to be
				1163	* restored in rq_repin_lock().
				1164	*/
				1165	unsigned int clock_update_flags;
				1166	#endif
				1167	};
				1168
				1169	static inline void rq_pin_lock(struct rq rq, struct rq_flags rf)
				1170	{
				1171	rf->cookie = lockdep_pin_lock(&rq->lock);
				1172
				1173	#ifdef CONFIG_SCHED_DEBUG
				1174	rq->clock_update_flags &= (RQCF_REQ_SKIP\|RQCF_ACT_SKIP);
				1175	rf->clock_update_flags = 0;
				1176	#endif
				1177	}
				1178
				1179	static inline void rq_unpin_lock(struct rq rq, struct rq_flags rf)
				1180	{
				1181	#ifdef CONFIG_SCHED_DEBUG
				1182	if (rq->clock_update_flags > RQCF_ACT_SKIP)
				1183	rf->clock_update_flags = RQCF_UPDATED;
				1184	#endif
				1185
				1186	lockdep_unpin_lock(&rq->lock, rf->cookie);
				1187	}
				1188
				1189	static inline void rq_repin_lock(struct rq rq, struct rq_flags rf)
				1190	{
				1191	lockdep_repin_lock(&rq->lock, rf->cookie);
				1192
				1193	#ifdef CONFIG_SCHED_DEBUG
				1194	/*
				1195	* Restore the value we stashed in @rf for this pin context.
				1196	*/
				1197	rq->clock_update_flags \|= rf->clock_update_flags;
				1198	#endif
				1199	}
				1200
				1201	struct rq __task_rq_lock(struct task_struct p, struct rq_flags *rf)
				1202	__acquires(rq->lock);
				1203
				1204	struct rq task_rq_lock(struct task_struct p, struct rq_flags *rf)
				1205	__acquires(p->pi_lock)
				1206	__acquires(rq->lock);
				1207
				1208	static inline void __task_rq_unlock(struct rq rq, struct rq_flags rf)
				1209	__releases(rq->lock)
				1210	{
				1211	rq_unpin_lock(rq, rf);
				1212	raw_spin_unlock(&rq->lock);
				1213	}
				1214
				1215	static inline void
				1216	task_rq_unlock(struct rq rq, struct task_struct p, struct rq_flags *rf)
				1217	__releases(rq->lock)
				1218	__releases(p->pi_lock)
				1219	{
				1220	rq_unpin_lock(rq, rf);
				1221	raw_spin_unlock(&rq->lock);
				1222	raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags);
				1223	}
				1224
				1225	static inline void
				1226	rq_lock_irqsave(struct rq rq, struct rq_flags rf)
				1227	__acquires(rq->lock)
				1228	{
				1229	raw_spin_lock_irqsave(&rq->lock, rf->flags);
				1230	rq_pin_lock(rq, rf);
				1231	}
				1232
				1233	static inline void
				1234	rq_lock_irq(struct rq rq, struct rq_flags rf)
				1235	__acquires(rq->lock)
				1236	{
				1237	raw_spin_lock_irq(&rq->lock);
				1238	rq_pin_lock(rq, rf);
				1239	}
				1240
				1241	static inline void
				1242	rq_lock(struct rq rq, struct rq_flags rf)
				1243	__acquires(rq->lock)
				1244	{
				1245	raw_spin_lock(&rq->lock);
				1246	rq_pin_lock(rq, rf);
				1247	}
				1248
				1249	static inline void
				1250	rq_relock(struct rq rq, struct rq_flags rf)
				1251	__acquires(rq->lock)
				1252	{
				1253	raw_spin_lock(&rq->lock);
				1254	rq_repin_lock(rq, rf);
				1255	}
				1256
				1257	static inline void
				1258	rq_unlock_irqrestore(struct rq rq, struct rq_flags rf)
				1259	__releases(rq->lock)
				1260	{
				1261	rq_unpin_lock(rq, rf);
				1262	raw_spin_unlock_irqrestore(&rq->lock, rf->flags);
				1263	}
				1264
				1265	static inline void
				1266	rq_unlock_irq(struct rq rq, struct rq_flags rf)
				1267	__releases(rq->lock)
				1268	{
				1269	rq_unpin_lock(rq, rf);
				1270	raw_spin_unlock_irq(&rq->lock);
				1271	}
				1272
				1273	static inline void
				1274	rq_unlock(struct rq rq, struct rq_flags rf)
				1275	__releases(rq->lock)
				1276	{
				1277	rq_unpin_lock(rq, rf);
				1278	raw_spin_unlock(&rq->lock);
				1279	}
				1280
				1281	static inline struct rq *
				1282	this_rq_lock_irq(struct rq_flags *rf)
				1283	__acquires(rq->lock)
				1284	{
				1285	struct rq *rq;
				1286
				1287	local_irq_disable();
				1288	rq = this_rq();
				1289	rq_lock(rq, rf);
				1290	return rq;
				1291	}
				1292
				1293	#ifdef CONFIG_NUMA
				1294	enum numa_topology_type {
				1295	NUMA_DIRECT,
				1296	NUMA_GLUELESS_MESH,
				1297	NUMA_BACKPLANE,
				1298	};
				1299	extern enum numa_topology_type sched_numa_topology_type;
				1300	extern int sched_max_numa_distance;
				1301	extern bool find_numa_distance(int distance);
				1302	extern void sched_init_numa(void);
				1303	extern void sched_domains_numa_masks_set(unsigned int cpu);
				1304	extern void sched_domains_numa_masks_clear(unsigned int cpu);
				1305	extern int sched_numa_find_closest(const struct cpumask *cpus, int cpu);
				1306	#else
				1307	static inline void sched_init_numa(void) { }
				1308	static inline void sched_domains_numa_masks_set(unsigned int cpu) { }
				1309	static inline void sched_domains_numa_masks_clear(unsigned int cpu) { }
				1310	static inline int sched_numa_find_closest(const struct cpumask *cpus, int cpu)
				1311	{
				1312	return nr_cpu_ids;
				1313	}
				1314	#endif
				1315
				1316	#ifdef CONFIG_NUMA_BALANCING
				1317	/* The regions in numa_faults array from task_struct */
				1318	enum numa_faults_stats {
				1319	NUMA_MEM = 0,
				1320	NUMA_CPU,
				1321	NUMA_MEMBUF,
				1322	NUMA_CPUBUF
				1323	};
				1324	extern void sched_setnuma(struct task_struct *p, int node);
				1325	extern int migrate_task_to(struct task_struct *p, int cpu);
				1326	extern int migrate_swap(struct task_struct p, struct task_struct t,
				1327	int cpu, int scpu);
				1328	extern void init_numa_balancing(unsigned long clone_flags, struct task_struct *p);
				1329	#else
				1330	static inline void
				1331	init_numa_balancing(unsigned long clone_flags, struct task_struct *p)
				1332	{
				1333	}
				1334	#endif /* CONFIG_NUMA_BALANCING */
				1335
				1336	#ifdef CONFIG_SMP
				1337
				1338	static inline void
				1339	queue_balance_callback(struct rq *rq,
				1340	struct callback_head *head,
				1341	void (func)(struct rq rq))
				1342	{
				1343	lockdep_assert_held(&rq->lock);
				1344
				1345	if (unlikely(head->next))
				1346	return;
				1347
				1348	head->func = (void ()(struct callback_head ))func;
				1349	head->next = rq->balance_callback;
				1350	rq->balance_callback = head;
				1351	}
				1352
				1353	extern void sched_ttwu_pending(void);
				1354
				1355	#define rcu_dereference_check_sched_domain(p) \
				1356	rcu_dereference_check((p), \
				1357	lockdep_is_held(&sched_domains_mutex))
				1358
				1359	/*
				1360	* The domain tree (rq->sd) is protected by RCU's quiescent state transition.
				1361	* See destroy_sched_domains: call_rcu for details.
				1362	*
				1363	* The domain tree of any CPU may only be accessed from within
				1364	* preempt-disabled sections.
				1365	*/
				1366	#define for_each_domain(cpu, __sd) \
				1367	for (__sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd); \
				1368	__sd; __sd = __sd->parent)
				1369
				1370	/**
				1371	* highest_flag_domain - Return highest sched_domain containing flag.
				1372	* @cpu: The CPU whose highest level of sched domain is to
				1373	* be returned.
				1374	* @flag: The flag to check for the highest sched_domain
				1375	* for the given CPU.
				1376	*
				1377	* Returns the highest sched_domain of a CPU which contains the given flag.
				1378	*/
				1379	static inline struct sched_domain *highest_flag_domain(int cpu, int flag)
				1380	{
				1381	struct sched_domain sd, hsd = NULL;
				1382
				1383	for_each_domain(cpu, sd) {
				1384	if (!(sd->flags & flag))
				1385	break;
				1386	hsd = sd;
				1387	}
				1388
				1389	return hsd;
				1390	}
				1391
				1392	static inline struct sched_domain *lowest_flag_domain(int cpu, int flag)
				1393	{
				1394	struct sched_domain *sd;
				1395
				1396	for_each_domain(cpu, sd) {
				1397	if (sd->flags & flag)
				1398	break;
				1399	}
				1400
				1401	return sd;
				1402	}
				1403
				1404	DECLARE_PER_CPU(struct sched_domain __rcu *, sd_llc);
				1405	DECLARE_PER_CPU(int, sd_llc_size);
				1406	DECLARE_PER_CPU(int, sd_llc_id);
				1407	DECLARE_PER_CPU(struct sched_domain_shared __rcu *, sd_llc_shared);
				1408	DECLARE_PER_CPU(struct sched_domain __rcu *, sd_numa);
				1409	DECLARE_PER_CPU(struct sched_domain __rcu *, sd_asym_packing);
				1410	DECLARE_PER_CPU(struct sched_domain __rcu *, sd_asym_cpucapacity);
				1411	extern struct static_key_false sched_asym_cpucapacity;
				1412
				1413	struct sched_group_capacity {
				1414	atomic_t ref;
				1415	/*
				1416	* CPU capacity of this group, SCHED_CAPACITY_SCALE being max capacity
				1417	* for a single CPU.
				1418	*/
				1419	unsigned long capacity;
				1420	unsigned long min_capacity; /* Min per-CPU capacity in group */
				1421	unsigned long max_capacity; /* Max per-CPU capacity in group */
				1422	unsigned long next_update;
				1423	int imbalance; /* XXX unrelated to capacity but shared group state */
				1424
				1425	#ifdef CONFIG_SCHED_DEBUG
				1426	int id;
				1427	#endif
				1428
				1429	unsigned long cpumask[0]; /* Balance mask */
				1430	};
				1431
				1432	struct sched_group {
				1433	struct sched_group next; / Must be a circular list */
				1434	atomic_t ref;
				1435
				1436	unsigned int group_weight;
				1437	struct sched_group_capacity *sgc;
				1438	int asym_prefer_cpu; /* CPU of highest priority in group */
				1439
				1440	/*
				1441	* The CPUs this group covers.
				1442	*
				1443	* NOTE: this field is variable length. (Allocated dynamically
				1444	* by attaching extra space to the end of the structure,
				1445	* depending on how many CPUs the kernel has booted up with)
				1446	*/
				1447	unsigned long cpumask[0];
				1448	};
				1449
				1450	static inline struct cpumask sched_group_span(struct sched_group sg)
				1451	{
				1452	return to_cpumask(sg->cpumask);
				1453	}
				1454
				1455	/*
				1456	* See build_balance_mask().
				1457	*/
				1458	static inline struct cpumask group_balance_mask(struct sched_group sg)
				1459	{
				1460	return to_cpumask(sg->sgc->cpumask);
				1461	}
				1462
				1463	/**
				1464	* group_first_cpu - Returns the first CPU in the cpumask of a sched_group.
				1465	* @group: The group whose first CPU is to be returned.
				1466	*/
				1467	static inline unsigned int group_first_cpu(struct sched_group *group)
				1468	{
				1469	return cpumask_first(sched_group_span(group));
				1470	}
				1471
				1472	extern int group_balance_cpu(struct sched_group *sg);
				1473
				1474	#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
				1475	void register_sched_domain_sysctl(void);
				1476	void dirty_sched_domain_sysctl(int cpu);
				1477	void unregister_sched_domain_sysctl(void);
				1478	#else
				1479	static inline void register_sched_domain_sysctl(void)
				1480	{
				1481	}
				1482	static inline void dirty_sched_domain_sysctl(int cpu)
				1483	{
				1484	}
				1485	static inline void unregister_sched_domain_sysctl(void)
				1486	{
				1487	}
				1488	#endif
				1489
				1490	extern int newidle_balance(struct rq this_rq, struct rq_flags rf);
				1491
				1492	#else
				1493
				1494	static inline void sched_ttwu_pending(void) { }
				1495
				1496	static inline int newidle_balance(struct rq this_rq, struct rq_flags rf) { return 0; }
				1497
				1498	#endif /* CONFIG_SMP */
				1499
				1500	#include "stats.h"
				1501	#include "autogroup.h"
				1502
				1503	#ifdef CONFIG_CGROUP_SCHED
				1504
				1505	/*
				1506	* Return the group to which this tasks belongs.
				1507	*
				1508	* We cannot use task_css() and friends because the cgroup subsystem
				1509	* changes that value before the cgroup_subsys::attach() method is called,
				1510	* therefore we cannot pin it and might observe the wrong value.
				1511	*
				1512	* The same is true for autogroup's p->signal->autogroup->tg, the autogroup
				1513	* core changes this before calling sched_move_task().
				1514	*
				1515	* Instead we use a 'copy' which is updated from sched_move_task() while
				1516	* holding both task_struct::pi_lock and rq::lock.
				1517	*/
				1518	static inline struct task_group task_group(struct task_struct p)
				1519	{
				1520	return p->sched_task_group;
				1521	}
				1522
				1523	/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
				1524	static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
				1525	{
				1526	#if defined(CONFIG_FAIR_GROUP_SCHED) \|\| defined(CONFIG_RT_GROUP_SCHED)
				1527	struct task_group *tg = task_group(p);
				1528	#endif
				1529
				1530	#ifdef CONFIG_FAIR_GROUP_SCHED
				1531	set_task_rq_fair(&p->se, p->se.cfs_rq, tg->cfs_rq[cpu]);
				1532	p->se.cfs_rq = tg->cfs_rq[cpu];
				1533	p->se.parent = tg->se[cpu];
				1534	#endif
				1535
				1536	#ifdef CONFIG_RT_GROUP_SCHED
				1537	p->rt.rt_rq = tg->rt_rq[cpu];
				1538	p->rt.parent = tg->rt_se[cpu];
				1539	#endif
				1540	}
				1541
				1542	#else /* CONFIG_CGROUP_SCHED */
				1543
				1544	static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { }
				1545	static inline struct task_group task_group(struct task_struct p)
				1546	{
				1547	return NULL;
				1548	}
				1549
				1550	#endif /* CONFIG_CGROUP_SCHED */
				1551
				1552	static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
				1553	{
				1554	set_task_rq(p, cpu);
				1555	#ifdef CONFIG_SMP
				1556	/*
				1557	* After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
				1558	* successfully executed on another CPU. We must ensure that updates of
				1559	* per-task data have been completed by this moment.
				1560	*/
				1561	smp_wmb();
				1562	#ifdef CONFIG_THREAD_INFO_IN_TASK
				1563	WRITE_ONCE(p->cpu, cpu);
				1564	#else
				1565	WRITE_ONCE(task_thread_info(p)->cpu, cpu);
				1566	#endif
				1567	p->wake_cpu = cpu;
				1568	#endif
				1569	}
				1570
				1571	/*
				1572	* Tunables that become constants when CONFIG_SCHED_DEBUG is off:
				1573	*/
				1574	#ifdef CONFIG_SCHED_DEBUG
				1575	# include <linux/static_key.h>
				1576	# define const_debug __read_mostly
				1577	#else
				1578	# define const_debug const
				1579	#endif
				1580
				1581	#define SCHED_FEAT(name, enabled) \
				1582	__SCHED_FEAT_##name ,
				1583
				1584	enum {
				1585	#include "features.h"
				1586	__SCHED_FEAT_NR,
				1587	};
				1588
				1589	#undef SCHED_FEAT
				1590
				1591	#ifdef CONFIG_SCHED_DEBUG
				1592
				1593	/*
				1594	* To support run-time toggling of sched features, all the translation units
				1595	* (but core.c) reference the sysctl_sched_features defined in core.c.
				1596	*/
				1597	extern const_debug unsigned int sysctl_sched_features;
				1598
				1599	#ifdef CONFIG_JUMP_LABEL
				1600	#define SCHED_FEAT(name, enabled) \
				1601	static __always_inline bool static_branch_##name(struct static_key *key) \
				1602	{ \
				1603	return static_key_##enabled(key); \
				1604	}
				1605
				1606	#include "features.h"
				1607	#undef SCHED_FEAT
				1608
				1609	extern struct static_key sched_feat_keys[__SCHED_FEAT_NR];
				1610	#define sched_feat(x) (static_branch_##x(&sched_feat_keys[__SCHED_FEAT_##x]))
				1611
				1612	#else /* !CONFIG_JUMP_LABEL */
				1613
				1614	#define sched_feat(x) (sysctl_sched_features & (1UL << __SCHED_FEAT_##x))
				1615
				1616	#endif /* CONFIG_JUMP_LABEL */
				1617
				1618	#else /* !SCHED_DEBUG */
				1619
				1620	/*
				1621	* Each translation unit has its own copy of sysctl_sched_features to allow
				1622	* constants propagation at compile time and compiler optimization based on
				1623	* features default.
				1624	*/
				1625	#define SCHED_FEAT(name, enabled) \
				1626	(1UL << __SCHED_FEAT_##name) * enabled \|
				1627	static const_debug __maybe_unused unsigned int sysctl_sched_features =
				1628	#include "features.h"
				1629	0;
				1630	#undef SCHED_FEAT
				1631
				1632	#define sched_feat(x) !!(sysctl_sched_features & (1UL << __SCHED_FEAT_##x))
				1633
				1634	#endif /* SCHED_DEBUG */
				1635
				1636	extern struct static_key_false sched_numa_balancing;
				1637	extern struct static_key_false sched_schedstats;
				1638
				1639	static inline u64 global_rt_period(void)
				1640	{
				1641	return (u64)sysctl_sched_rt_period * NSEC_PER_USEC;
				1642	}
				1643
				1644	static inline u64 global_rt_runtime(void)
				1645	{
				1646	if (sysctl_sched_rt_runtime < 0)
				1647	return RUNTIME_INF;
				1648
				1649	return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC;
				1650	}
				1651
				1652	static inline int task_current(struct rq rq, struct task_struct p)
				1653	{
				1654	return rq->curr == p;
				1655	}
				1656
				1657	static inline int task_running(struct rq rq, struct task_struct p)
				1658	{
				1659	#ifdef CONFIG_SMP
				1660	return p->on_cpu;
				1661	#else
				1662	return task_current(rq, p);
				1663	#endif
				1664	}
				1665
				1666	static inline int task_on_rq_queued(struct task_struct *p)
				1667	{
				1668	return p->on_rq == TASK_ON_RQ_QUEUED;
				1669	}
				1670
				1671	static inline int task_on_rq_migrating(struct task_struct *p)
				1672	{
				1673	return READ_ONCE(p->on_rq) == TASK_ON_RQ_MIGRATING;
				1674	}
				1675
				1676	/*
				1677	* wake flags
				1678	*/
				1679	#define WF_SYNC 0x01 /* Waker goes to sleep after wakeup */
				1680	#define WF_FORK 0x02 /* Child wakeup after fork */
				1681	#define WF_MIGRATED 0x4 /* Internal use, task got migrated */
				1682
				1683	/*
				1684	* To aid in avoiding the subversion of "niceness" due to uneven distribution
				1685	* of tasks with abnormal "nice" values across CPUs the contribution that
				1686	* each task makes to its run queue's load is weighted according to its
				1687	* scheduling class and "nice" value. For SCHED_NORMAL tasks this is just a
				1688	* scaled version of the new time slice allocation that they receive on time
				1689	* slice expiry etc.
				1690	*/
				1691
				1692	#define WEIGHT_IDLEPRIO 3
				1693	#define WMULT_IDLEPRIO 1431655765
				1694
				1695	extern const int sched_prio_to_weight[40];
				1696	extern const u32 sched_prio_to_wmult[40];
				1697
				1698	/*
				1699	* {de,en}queue flags:
				1700	*
				1701	* DEQUEUE_SLEEP - task is no longer runnable
				1702	* ENQUEUE_WAKEUP - task just became runnable
				1703	*
				1704	* SAVE/RESTORE - an otherwise spurious dequeue/enqueue, done to ensure tasks
				1705	* are in a known state which allows modification. Such pairs
				1706	* should preserve as much state as possible.
				1707	*
				1708	* MOVE - paired with SAVE/RESTORE, explicitly does not preserve the location
				1709	* in the runqueue.
				1710	*
				1711	* ENQUEUE_HEAD - place at front of runqueue (tail if not specified)
				1712	* ENQUEUE_REPLENISH - CBS (replenish runtime and postpone deadline)
				1713	* ENQUEUE_MIGRATED - the task was migrated during wakeup
				1714	*
				1715	*/
				1716
				1717	#define DEQUEUE_SLEEP 0x01
				1718	#define DEQUEUE_SAVE 0x02 /* Matches ENQUEUE_RESTORE */
				1719	#define DEQUEUE_MOVE 0x04 /* Matches ENQUEUE_MOVE */
				1720	#define DEQUEUE_NOCLOCK 0x08 /* Matches ENQUEUE_NOCLOCK */
				1721
				1722	#define ENQUEUE_WAKEUP 0x01
				1723	#define ENQUEUE_RESTORE 0x02
				1724	#define ENQUEUE_MOVE 0x04
				1725	#define ENQUEUE_NOCLOCK 0x08
				1726
				1727	#define ENQUEUE_HEAD 0x10
				1728	#define ENQUEUE_REPLENISH 0x20
				1729	#ifdef CONFIG_SMP
				1730	#define ENQUEUE_MIGRATED 0x40
				1731	#else
				1732	#define ENQUEUE_MIGRATED 0x00
				1733	#endif
				1734
				1735	#define RETRY_TASK ((void *)-1UL)
				1736
				1737	struct sched_class {
				1738	const struct sched_class *next;
				1739
				1740	#ifdef CONFIG_UCLAMP_TASK
				1741	int uclamp_enabled;
				1742	#endif
				1743
				1744	void (enqueue_task) (struct rq rq, struct task_struct *p, int flags);
				1745	void (dequeue_task) (struct rq rq, struct task_struct *p, int flags);
				1746	void (yield_task) (struct rq rq);
				1747	bool (yield_to_task)(struct rq rq, struct task_struct *p, bool preempt);
				1748
				1749	void (check_preempt_curr)(struct rq rq, struct task_struct *p, int flags);
				1750
				1751	/*
				1752	* Both @prev and @rf are optional and may be NULL, in which case the
				1753	* caller must already have invoked put_prev_task(rq, prev, rf).
				1754	*
				1755	* Otherwise it is the responsibility of the pick_next_task() to call
				1756	* put_prev_task() on the @prev task or something equivalent, IFF it
				1757	* returns a next task.
				1758	*
				1759	* In that case (@rf != NULL) it may return RETRY_TASK when it finds a
				1760	* higher prio class has runnable tasks.
				1761	*/
				1762	struct task_struct * (pick_next_task)(struct rq rq,
				1763	struct task_struct *prev,
				1764	struct rq_flags *rf);
				1765	void (put_prev_task)(struct rq rq, struct task_struct *p);
				1766	void (set_next_task)(struct rq rq, struct task_struct *p, bool first);
				1767
				1768	#ifdef CONFIG_SMP
				1769	int (balance)(struct rq rq, struct task_struct prev, struct rq_flags rf);
				1770	int (select_task_rq)(struct task_struct p, int task_cpu, int sd_flag, int flags);
				1771	void (migrate_task_rq)(struct task_struct p, int new_cpu);
				1772
				1773	void (task_woken)(struct rq this_rq, struct task_struct *task);
				1774
				1775	void (set_cpus_allowed)(struct task_struct p,
				1776	const struct cpumask *newmask);
				1777
				1778	void (rq_online)(struct rq rq);
				1779	void (rq_offline)(struct rq rq);
				1780	#endif
				1781
				1782	void (task_tick)(struct rq rq, struct task_struct *p, int queued);
				1783	void (task_fork)(struct task_struct p);
				1784	void (task_dead)(struct task_struct p);
				1785
				1786	/*
				1787	* The switched_from() call is allowed to drop rq->lock, therefore we
				1788	* cannot assume the switched_from/switched_to pair is serliazed by
				1789	* rq->lock. They are however serialized by p->pi_lock.
				1790	*/
				1791	void (switched_from)(struct rq this_rq, struct task_struct *task);
				1792	void (switched_to) (struct rq this_rq, struct task_struct *task);
				1793	void (prio_changed) (struct rq this_rq, struct task_struct *task,
				1794	int oldprio);
				1795
				1796	unsigned int (get_rr_interval)(struct rq rq,
				1797	struct task_struct *task);
				1798
				1799	void (update_curr)(struct rq rq);
				1800
				1801	#define TASK_SET_GROUP 0
				1802	#define TASK_MOVE_GROUP 1
				1803
				1804	#ifdef CONFIG_FAIR_GROUP_SCHED
				1805	void (task_change_group)(struct task_struct p, int type);
				1806	#endif
				1807	};
				1808
				1809	static inline void put_prev_task(struct rq rq, struct task_struct prev)
				1810	{
				1811	WARN_ON_ONCE(rq->curr != prev);
				1812	prev->sched_class->put_prev_task(rq, prev);
				1813	}
				1814
				1815	static inline void set_next_task(struct rq rq, struct task_struct next)
				1816	{
				1817	WARN_ON_ONCE(rq->curr != next);
				1818	next->sched_class->set_next_task(rq, next, false);
				1819	}
				1820
				1821	#ifdef CONFIG_SMP
				1822	#define sched_class_highest (&stop_sched_class)
				1823	#else
				1824	#define sched_class_highest (&dl_sched_class)
				1825	#endif
				1826
				1827	#define for_class_range(class, _from, _to) \
				1828	for (class = (_from); class != (_to); class = class->next)
				1829
				1830	#define for_each_class(class) \
				1831	for_class_range(class, sched_class_highest, NULL)
				1832
				1833	extern const struct sched_class stop_sched_class;
				1834	extern const struct sched_class dl_sched_class;
				1835	extern const struct sched_class rt_sched_class;
				1836	extern const struct sched_class fair_sched_class;
				1837	extern const struct sched_class idle_sched_class;
				1838
				1839	static inline bool sched_stop_runnable(struct rq *rq)
				1840	{
				1841	return rq->stop && task_on_rq_queued(rq->stop);
				1842	}
				1843
				1844	static inline bool sched_dl_runnable(struct rq *rq)
				1845	{
				1846	return rq->dl.dl_nr_running > 0;
				1847	}
				1848
				1849	static inline bool sched_rt_runnable(struct rq *rq)
				1850	{
				1851	return rq->rt.rt_queued > 0;
				1852	}
				1853
				1854	static inline bool sched_fair_runnable(struct rq *rq)
				1855	{
				1856	return rq->cfs.nr_running > 0;
				1857	}
				1858
				1859	#ifdef CONFIG_SMP
				1860
				1861	extern void update_group_capacity(struct sched_domain *sd, int cpu);
				1862
				1863	extern void trigger_load_balance(struct rq *rq);
				1864
				1865	extern void set_cpus_allowed_common(struct task_struct p, const struct cpumask new_mask);
				1866
				1867	#endif
				1868
				1869	#ifdef CONFIG_CPU_IDLE
				1870	static inline void idle_set_state(struct rq *rq,
				1871	struct cpuidle_state *idle_state)
				1872	{
				1873	rq->idle_state = idle_state;
				1874	}
				1875
				1876	static inline struct cpuidle_state idle_get_state(struct rq rq)
				1877	{
				1878	SCHED_WARN_ON(!rcu_read_lock_held());
				1879
				1880	return rq->idle_state;
				1881	}
				1882	#else
				1883	static inline void idle_set_state(struct rq *rq,
				1884	struct cpuidle_state *idle_state)
				1885	{
				1886	}
				1887
				1888	static inline struct cpuidle_state idle_get_state(struct rq rq)
				1889	{
				1890	return NULL;
				1891	}
				1892	#endif
				1893
				1894	extern void schedule_idle(void);
				1895
				1896	extern void sysrq_sched_debug_show(void);
				1897	extern void sched_init_granularity(void);
				1898	extern void update_max_interval(void);
				1899
				1900	extern void init_sched_dl_class(void);
				1901	extern void init_sched_rt_class(void);
				1902	extern void init_sched_fair_class(void);
				1903
				1904	extern void reweight_task(struct task_struct *p, int prio);
				1905
				1906	extern void resched_curr(struct rq *rq);
				1907	extern void resched_cpu(int cpu);
				1908
				1909	extern struct rt_bandwidth def_rt_bandwidth;
				1910	extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
				1911
				1912	extern struct dl_bandwidth def_dl_bandwidth;
				1913	extern void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime);
				1914	extern void init_dl_task_timer(struct sched_dl_entity *dl_se);
				1915	extern void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se);
				1916	extern void init_dl_rq_bw_ratio(struct dl_rq *dl_rq);
				1917
				1918	#define BW_SHIFT 20
				1919	#define BW_UNIT (1 << BW_SHIFT)
				1920	#define RATIO_SHIFT 8
				1921	#define MAX_BW_BITS (64 - BW_SHIFT)
				1922	#define MAX_BW ((1ULL << MAX_BW_BITS) - 1)
				1923	unsigned long to_ratio(u64 period, u64 runtime);
				1924
				1925	extern void init_entity_runnable_average(struct sched_entity *se);
				1926	extern void post_init_entity_util_avg(struct task_struct *p);
				1927
				1928	#ifdef CONFIG_NO_HZ_FULL
				1929	extern bool sched_can_stop_tick(struct rq *rq);
				1930	extern int __init sched_tick_offload_init(void);
				1931
				1932	/*
				1933	* Tick may be needed by tasks in the runqueue depending on their policy and
				1934	* requirements. If tick is needed, lets send the target an IPI to kick it out of
				1935	* nohz mode if necessary.
				1936	*/
				1937	static inline void sched_update_tick_dependency(struct rq *rq)
				1938	{
				1939	int cpu;
				1940
				1941	if (!tick_nohz_full_enabled())
				1942	return;
				1943
				1944	cpu = cpu_of(rq);
				1945
				1946	if (!tick_nohz_full_cpu(cpu))
				1947	return;
				1948
				1949	if (sched_can_stop_tick(rq))
				1950	tick_nohz_dep_clear_cpu(cpu, TICK_DEP_BIT_SCHED);
				1951	else
				1952	tick_nohz_dep_set_cpu(cpu, TICK_DEP_BIT_SCHED);
				1953	}
				1954	#else
				1955	static inline int sched_tick_offload_init(void) { return 0; }
				1956	static inline void sched_update_tick_dependency(struct rq *rq) { }
				1957	#endif
				1958
				1959	static inline void add_nr_running(struct rq *rq, unsigned count)
				1960	{
				1961	unsigned prev_nr = rq->nr_running;
				1962
				1963	rq->nr_running = prev_nr + count;
				1964
				1965	#ifdef CONFIG_SMP
				1966	if (prev_nr < 2 && rq->nr_running >= 2) {
				1967	if (!READ_ONCE(rq->rd->overload))
				1968	WRITE_ONCE(rq->rd->overload, 1);
				1969	}
				1970	#endif
				1971
				1972	sched_update_tick_dependency(rq);
				1973	}
				1974
				1975	static inline void sub_nr_running(struct rq *rq, unsigned count)
				1976	{
				1977	rq->nr_running -= count;
				1978	/* Check if we still need preemption */
				1979	sched_update_tick_dependency(rq);
				1980	}
				1981
				1982	extern void activate_task(struct rq rq, struct task_struct p, int flags);
				1983	extern void deactivate_task(struct rq rq, struct task_struct p, int flags);
				1984
				1985	extern void check_preempt_curr(struct rq rq, struct task_struct p, int flags);
				1986
				1987	extern const_debug unsigned int sysctl_sched_nr_migrate;
				1988	extern const_debug unsigned int sysctl_sched_migration_cost;
				1989
				1990	#ifdef CONFIG_SCHED_HRTICK
				1991
				1992	/*
				1993	* Use hrtick when:
				1994	* - enabled by features
				1995	* - hrtimer is actually high res
				1996	*/
				1997	static inline int hrtick_enabled(struct rq *rq)
				1998	{
				1999	if (!sched_feat(HRTICK))
				2000	return 0;
				2001	if (!cpu_active(cpu_of(rq)))
				2002	return 0;
				2003	return hrtimer_is_hres_active(&rq->hrtick_timer);
				2004	}
				2005
				2006	void hrtick_start(struct rq *rq, u64 delay);
				2007
				2008	#else
				2009
				2010	static inline int hrtick_enabled(struct rq *rq)
				2011	{
				2012	return 0;
				2013	}
				2014
				2015	#endif /* CONFIG_SCHED_HRTICK */
				2016
				2017	#ifndef arch_scale_freq_capacity
				2018	static __always_inline
				2019	unsigned long arch_scale_freq_capacity(int cpu)
				2020	{
				2021	return SCHED_CAPACITY_SCALE;
				2022	}
				2023	#endif
				2024
				2025	#ifndef arch_scale_max_freq_capacity
				2026	struct sched_domain;
				2027	static __always_inline
				2028	unsigned long arch_scale_max_freq_capacity(struct sched_domain *sd, int cpu)
				2029	{
				2030	return SCHED_CAPACITY_SCALE;
				2031	}
				2032	#endif
				2033
				2034	#ifdef CONFIG_SMP
				2035	#ifdef CONFIG_PREEMPTION
				2036
				2037	static inline void double_rq_lock(struct rq rq1, struct rq rq2);
				2038
				2039	/*
				2040	* fair double_lock_balance: Safely acquires both rq->locks in a fair
				2041	* way at the expense of forcing extra atomic operations in all
				2042	* invocations. This assures that the double_lock is acquired using the
				2043	* same underlying policy as the spinlock_t on this architecture, which
				2044	* reduces latency compared to the unfair variant below. However, it
				2045	* also adds more overhead and therefore may reduce throughput.
				2046	*/
				2047	static inline int _double_lock_balance(struct rq this_rq, struct rq busiest)
				2048	__releases(this_rq->lock)
				2049	__acquires(busiest->lock)
				2050	__acquires(this_rq->lock)
				2051	{
				2052	raw_spin_unlock(&this_rq->lock);
				2053	double_rq_lock(this_rq, busiest);
				2054
				2055	return 1;
				2056	}
				2057
				2058	#else
				2059	/*
				2060	* Unfair double_lock_balance: Optimizes throughput at the expense of
				2061	* latency by eliminating extra atomic operations when the locks are
				2062	* already in proper order on entry. This favors lower CPU-ids and will
				2063	* grant the double lock to lower CPUs over higher ids under contention,
				2064	* regardless of entry order into the function.
				2065	*/
				2066	static inline int _double_lock_balance(struct rq this_rq, struct rq busiest)
				2067	__releases(this_rq->lock)
				2068	__acquires(busiest->lock)
				2069	__acquires(this_rq->lock)
				2070	{
				2071	int ret = 0;
				2072
				2073	if (unlikely(!raw_spin_trylock(&busiest->lock))) {
				2074	if (busiest < this_rq) {
				2075	raw_spin_unlock(&this_rq->lock);
				2076	raw_spin_lock(&busiest->lock);
				2077	raw_spin_lock_nested(&this_rq->lock,
				2078	SINGLE_DEPTH_NESTING);
				2079	ret = 1;
				2080	} else
				2081	raw_spin_lock_nested(&busiest->lock,
				2082	SINGLE_DEPTH_NESTING);
				2083	}
				2084	return ret;
				2085	}
				2086
				2087	#endif /* CONFIG_PREEMPTION */
				2088
				2089	/*
				2090	* double_lock_balance - lock the busiest runqueue, this_rq is locked already.
				2091	*/
				2092	static inline int double_lock_balance(struct rq this_rq, struct rq busiest)
				2093	{
				2094	if (unlikely(!irqs_disabled())) {
				2095	/* printk() doesn't work well under rq->lock */
				2096	raw_spin_unlock(&this_rq->lock);
				2097	BUG_ON(1);
				2098	}
				2099
				2100	return _double_lock_balance(this_rq, busiest);
				2101	}
				2102
				2103	static inline void double_unlock_balance(struct rq this_rq, struct rq busiest)
				2104	__releases(busiest->lock)
				2105	{
				2106	raw_spin_unlock(&busiest->lock);
				2107	lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_);
				2108	}
				2109
				2110	static inline void double_lock(spinlock_t l1, spinlock_t l2)
				2111	{
				2112	if (l1 > l2)
				2113	swap(l1, l2);
				2114
				2115	spin_lock(l1);
				2116	spin_lock_nested(l2, SINGLE_DEPTH_NESTING);
				2117	}
				2118
				2119	static inline void double_lock_irq(spinlock_t l1, spinlock_t l2)
				2120	{
				2121	if (l1 > l2)
				2122	swap(l1, l2);
				2123
				2124	spin_lock_irq(l1);
				2125	spin_lock_nested(l2, SINGLE_DEPTH_NESTING);
				2126	}
				2127
				2128	static inline void double_raw_lock(raw_spinlock_t l1, raw_spinlock_t l2)
				2129	{
				2130	if (l1 > l2)
				2131	swap(l1, l2);
				2132
				2133	raw_spin_lock(l1);
				2134	raw_spin_lock_nested(l2, SINGLE_DEPTH_NESTING);
				2135	}
				2136
				2137	/*
				2138	* double_rq_lock - safely lock two runqueues
				2139	*
				2140	* Note this does not disable interrupts like task_rq_lock,
				2141	* you need to do so manually before calling.
				2142	*/
				2143	static inline void double_rq_lock(struct rq rq1, struct rq rq2)
				2144	__acquires(rq1->lock)
				2145	__acquires(rq2->lock)
				2146	{
				2147	BUG_ON(!irqs_disabled());
				2148	if (rq1 == rq2) {
				2149	raw_spin_lock(&rq1->lock);
				2150	__acquire(rq2->lock); /* Fake it out ;) */
				2151	} else {
				2152	if (rq1 < rq2) {
				2153	raw_spin_lock(&rq1->lock);
				2154	raw_spin_lock_nested(&rq2->lock, SINGLE_DEPTH_NESTING);
				2155	} else {
				2156	raw_spin_lock(&rq2->lock);
				2157	raw_spin_lock_nested(&rq1->lock, SINGLE_DEPTH_NESTING);
				2158	}
				2159	}
				2160	}
				2161
				2162	/*
				2163	* double_rq_unlock - safely unlock two runqueues
				2164	*
				2165	* Note this does not restore interrupts like task_rq_unlock,
				2166	* you need to do so manually after calling.
				2167	*/
				2168	static inline void double_rq_unlock(struct rq rq1, struct rq rq2)
				2169	__releases(rq1->lock)
				2170	__releases(rq2->lock)
				2171	{
				2172	raw_spin_unlock(&rq1->lock);
				2173	if (rq1 != rq2)
				2174	raw_spin_unlock(&rq2->lock);
				2175	else
				2176	__release(rq2->lock);
				2177	}
				2178
				2179	extern void set_rq_online (struct rq *rq);
				2180	extern void set_rq_offline(struct rq *rq);
				2181	extern bool sched_smp_initialized;
				2182
				2183	#else /* CONFIG_SMP */
				2184
				2185	/*
				2186	* double_rq_lock - safely lock two runqueues
				2187	*
				2188	* Note this does not disable interrupts like task_rq_lock,
				2189	* you need to do so manually before calling.
				2190	*/
				2191	static inline void double_rq_lock(struct rq rq1, struct rq rq2)
				2192	__acquires(rq1->lock)
				2193	__acquires(rq2->lock)
				2194	{
				2195	BUG_ON(!irqs_disabled());
				2196	BUG_ON(rq1 != rq2);
				2197	raw_spin_lock(&rq1->lock);
				2198	__acquire(rq2->lock); /* Fake it out ;) */
				2199	}
				2200
				2201	/*
				2202	* double_rq_unlock - safely unlock two runqueues
				2203	*
				2204	* Note this does not restore interrupts like task_rq_unlock,
				2205	* you need to do so manually after calling.
				2206	*/
				2207	static inline void double_rq_unlock(struct rq rq1, struct rq rq2)
				2208	__releases(rq1->lock)
				2209	__releases(rq2->lock)
				2210	{
				2211	BUG_ON(rq1 != rq2);
				2212	raw_spin_unlock(&rq1->lock);
				2213	__release(rq2->lock);
				2214	}
				2215
				2216	#endif
				2217
				2218	extern struct sched_entity __pick_first_entity(struct cfs_rq cfs_rq);
				2219	extern struct sched_entity __pick_last_entity(struct cfs_rq cfs_rq);
				2220
				2221	#ifdef CONFIG_SCHED_DEBUG
				2222	extern bool sched_debug_enabled;
				2223
				2224	extern void print_cfs_stats(struct seq_file *m, int cpu);
				2225	extern void print_rt_stats(struct seq_file *m, int cpu);
				2226	extern void print_dl_stats(struct seq_file *m, int cpu);
				2227	extern void print_cfs_rq(struct seq_file m, int cpu, struct cfs_rq cfs_rq);
				2228	extern void print_rt_rq(struct seq_file m, int cpu, struct rt_rq rt_rq);
				2229	extern void print_dl_rq(struct seq_file m, int cpu, struct dl_rq dl_rq);
				2230	#ifdef CONFIG_NUMA_BALANCING
				2231	extern void
				2232	show_numa_stats(struct task_struct p, struct seq_file m);
				2233	extern void
				2234	print_numa_stats(struct seq_file *m, int node, unsigned long tsf,
				2235	unsigned long tpf, unsigned long gsf, unsigned long gpf);
				2236	#endif /* CONFIG_NUMA_BALANCING */
				2237	#endif /* CONFIG_SCHED_DEBUG */
				2238
				2239	extern void init_cfs_rq(struct cfs_rq *cfs_rq);
				2240	extern void init_rt_rq(struct rt_rq *rt_rq);
				2241	extern void init_dl_rq(struct dl_rq *dl_rq);
				2242
				2243	extern void cfs_bandwidth_usage_inc(void);
				2244	extern void cfs_bandwidth_usage_dec(void);
				2245
				2246	#ifdef CONFIG_NO_HZ_COMMON
				2247	#define NOHZ_BALANCE_KICK_BIT 0
				2248	#define NOHZ_STATS_KICK_BIT 1
				2249
				2250	#define NOHZ_BALANCE_KICK BIT(NOHZ_BALANCE_KICK_BIT)
				2251	#define NOHZ_STATS_KICK BIT(NOHZ_STATS_KICK_BIT)
				2252
				2253	#define NOHZ_KICK_MASK (NOHZ_BALANCE_KICK \| NOHZ_STATS_KICK)
				2254
				2255	#define nohz_flags(cpu) (&cpu_rq(cpu)->nohz_flags)
				2256
				2257	extern void nohz_balance_exit_idle(struct rq *rq);
				2258	#else
				2259	static inline void nohz_balance_exit_idle(struct rq *rq) { }
				2260	#endif
				2261
				2262
				2263	#ifdef CONFIG_SMP
				2264	static inline
				2265	void __dl_update(struct dl_bw *dl_b, s64 bw)
				2266	{
				2267	struct root_domain *rd = container_of(dl_b, struct root_domain, dl_bw);
				2268	int i;
				2269
				2270	RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
				2271	"sched RCU must be held");
				2272	for_each_cpu_and(i, rd->span, cpu_active_mask) {
				2273	struct rq *rq = cpu_rq(i);
				2274
				2275	rq->dl.extra_bw += bw;
				2276	}
				2277	}
				2278	#else
				2279	static inline
				2280	void __dl_update(struct dl_bw *dl_b, s64 bw)
				2281	{
				2282	struct dl_rq *dl = container_of(dl_b, struct dl_rq, dl_bw);
				2283
				2284	dl->extra_bw += bw;
				2285	}
				2286	#endif
				2287
				2288
				2289	#ifdef CONFIG_IRQ_TIME_ACCOUNTING
				2290	struct irqtime {
				2291	u64 total;
				2292	u64 tick_delta;
				2293	u64 irq_start_time;
				2294	struct u64_stats_sync sync;
				2295	};
				2296
				2297	DECLARE_PER_CPU(struct irqtime, cpu_irqtime);
				2298
				2299	/*
				2300	* Returns the irqtime minus the softirq time computed by ksoftirqd.
				2301	* Otherwise ksoftirqd's sum_exec_runtime is substracted its own runtime
				2302	* and never move forward.
				2303	*/
				2304	static inline u64 irq_time_read(int cpu)
				2305	{
				2306	struct irqtime *irqtime = &per_cpu(cpu_irqtime, cpu);
				2307	unsigned int seq;
				2308	u64 total;
				2309
				2310	do {
				2311	seq = __u64_stats_fetch_begin(&irqtime->sync);
				2312	total = irqtime->total;
				2313	} while (__u64_stats_fetch_retry(&irqtime->sync, seq));
				2314
				2315	return total;
				2316	}
				2317	#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
				2318
				2319	#ifdef CONFIG_CPU_FREQ
				2320	DECLARE_PER_CPU(struct update_util_data __rcu *, cpufreq_update_util_data);
				2321
				2322	/**
				2323	* cpufreq_update_util - Take a note about CPU utilization changes.
				2324	* @rq: Runqueue to carry out the update for.
				2325	* @flags: Update reason flags.
				2326	*
				2327	* This function is called by the scheduler on the CPU whose utilization is
				2328	* being updated.
				2329	*
				2330	* It can only be called from RCU-sched read-side critical sections.
				2331	*
				2332	* The way cpufreq is currently arranged requires it to evaluate the CPU
				2333	* performance state (frequency/voltage) on a regular basis to prevent it from
				2334	* being stuck in a completely inadequate performance level for too long.
				2335	* That is not guaranteed to happen if the updates are only triggered from CFS
				2336	* and DL, though, because they may not be coming in if only RT tasks are
				2337	* active all the time (or there are RT tasks only).
				2338	*
				2339	* As a workaround for that issue, this function is called periodically by the
				2340	* RT sched class to trigger extra cpufreq updates to prevent it from stalling,
				2341	* but that really is a band-aid. Going forward it should be replaced with
				2342	* solutions targeted more specifically at RT tasks.
				2343	*/
				2344	static inline void cpufreq_update_util(struct rq *rq, unsigned int flags)
				2345	{
				2346	struct update_util_data *data;
				2347
				2348	data = rcu_dereference_sched(*per_cpu_ptr(&cpufreq_update_util_data,
				2349	cpu_of(rq)));
				2350	if (data)
				2351	data->func(data, rq_clock(rq), flags);
				2352	}
				2353	#else
				2354	static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}
				2355	#endif /* CONFIG_CPU_FREQ */
				2356
				2357	#ifdef CONFIG_UCLAMP_TASK
				2358	unsigned long uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id);
				2359
				2360	/**
				2361	* uclamp_util_with - clamp @util with @rq and @p effective uclamp values.
				2362	* @rq: The rq to clamp against. Must not be NULL.
				2363	* @util: The util value to clamp.
				2364	* @p: The task to clamp against. Can be NULL if you want to clamp
				2365	* against @rq only.
				2366	*
				2367	* Clamps the passed @util to the max(@rq, @p) effective uclamp values.
				2368	*
				2369	* If sched_uclamp_used static key is disabled, then just return the util
				2370	* without any clamping since uclamp aggregation at the rq level in the fast
				2371	* path is disabled, rendering this operation a NOP.
				2372	*
				2373	* Use uclamp_eff_value() if you don't care about uclamp values at rq level. It
				2374	* will return the correct effective uclamp value of the task even if the
				2375	* static key is disabled.
				2376	*/
				2377	static __always_inline
				2378	unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util,
				2379	struct task_struct *p)
				2380	{
				2381	unsigned long min_util;
				2382	unsigned long max_util;
				2383
				2384	if (!static_branch_likely(&sched_uclamp_used))
				2385	return util;
				2386
				2387	min_util = READ_ONCE(rq->uclamp[UCLAMP_MIN].value);
				2388	max_util = READ_ONCE(rq->uclamp[UCLAMP_MAX].value);
				2389
				2390	if (p) {
				2391	min_util = max(min_util, uclamp_eff_value(p, UCLAMP_MIN));
				2392	max_util = max(max_util, uclamp_eff_value(p, UCLAMP_MAX));
				2393	}
				2394
				2395	/*
				2396	* Since CPU's {min,max}_util clamps are MAX aggregated considering
				2397	* RUNNABLE tasks with _different_ clamps, we can end up with an
				2398	* inversion. Fix it now when the clamps are applied.
				2399	*/
				2400	if (unlikely(min_util >= max_util))
				2401	return min_util;
				2402
				2403	return clamp(util, min_util, max_util);
				2404	}
				2405
				2406	static inline bool uclamp_boosted(struct task_struct *p)
				2407	{
				2408	return uclamp_eff_value(p, UCLAMP_MIN) > 0;
				2409	}
				2410
				2411	/*
				2412	* When uclamp is compiled in, the aggregation at rq level is 'turned off'
				2413	* by default in the fast path and only gets turned on once userspace performs
				2414	* an operation that requires it.
				2415	*
				2416	* Returns true if userspace opted-in to use uclamp and aggregation at rq level
				2417	* hence is active.
				2418	*/
				2419	static inline bool uclamp_is_used(void)
				2420	{
				2421	return static_branch_likely(&sched_uclamp_used);
				2422	}
				2423	#else /* CONFIG_UCLAMP_TASK */
				2424	static inline
				2425	unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util,
				2426	struct task_struct *p)
				2427	{
				2428	return util;
				2429	}
				2430	static inline bool uclamp_boosted(struct task_struct *p)
				2431	{
				2432	return false;
				2433	}
				2434
				2435	static inline bool uclamp_is_used(void)
				2436	{
				2437	return false;
				2438	}
				2439	#endif /* CONFIG_UCLAMP_TASK */
				2440
				2441	#ifdef CONFIG_UCLAMP_TASK_GROUP
				2442	static inline bool uclamp_latency_sensitive(struct task_struct *p)
				2443	{
				2444	struct cgroup_subsys_state *css = task_css(p, cpu_cgrp_id);
				2445	struct task_group *tg;
				2446
				2447	if (!css)
				2448	return false;
				2449	tg = container_of(css, struct task_group, css);
				2450
				2451	return tg->latency_sensitive;
				2452	}
				2453	#else
				2454	static inline bool uclamp_latency_sensitive(struct task_struct *p)
				2455	{
				2456	return false;
				2457	}
				2458	#endif /* CONFIG_UCLAMP_TASK_GROUP */
				2459
				2460	#ifdef arch_scale_freq_capacity
				2461	# ifndef arch_scale_freq_invariant
				2462	# define arch_scale_freq_invariant() true
				2463	# endif
				2464	#else
				2465	# define arch_scale_freq_invariant() false
				2466	#endif
				2467
				2468	#ifdef CONFIG_SMP
				2469	static inline unsigned long capacity_orig_of(int cpu)
				2470	{
				2471	return cpu_rq(cpu)->cpu_capacity_orig;
				2472	}
				2473	#endif
				2474
				2475	/**
				2476	* enum schedutil_type - CPU utilization type
				2477	* @FREQUENCY_UTIL: Utilization used to select frequency
				2478	* @ENERGY_UTIL: Utilization used during energy calculation
				2479	*
				2480	* The utilization signals of all scheduling classes (CFS/RT/DL) and IRQ time
				2481	* need to be aggregated differently depending on the usage made of them. This
				2482	* enum is used within schedutil_freq_util() to differentiate the types of
				2483	* utilization expected by the callers, and adjust the aggregation accordingly.
				2484	*/
				2485	enum schedutil_type {
				2486	FREQUENCY_UTIL,
				2487	ENERGY_UTIL,
				2488	};
				2489
				2490	#ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL
				2491
				2492	unsigned long schedutil_cpu_util(int cpu, unsigned long util_cfs,
				2493	unsigned long max, enum schedutil_type type,
				2494	struct task_struct *p);
				2495
				2496	static inline unsigned long cpu_bw_dl(struct rq *rq)
				2497	{
				2498	return (rq->dl.running_bw * SCHED_CAPACITY_SCALE) >> BW_SHIFT;
				2499	}
				2500
				2501	static inline unsigned long cpu_util_dl(struct rq *rq)
				2502	{
				2503	return READ_ONCE(rq->avg_dl.util_avg);
				2504	}
				2505
				2506	static inline unsigned long cpu_util_cfs(struct rq *rq)
				2507	{
				2508	unsigned long util = READ_ONCE(rq->cfs.avg.util_avg);
				2509
				2510	if (sched_feat(UTIL_EST)) {
				2511	util = max_t(unsigned long, util,
				2512	READ_ONCE(rq->cfs.avg.util_est.enqueued));
				2513	}
				2514
				2515	return util;
				2516	}
				2517
				2518	static inline unsigned long cpu_util_rt(struct rq *rq)
				2519	{
				2520	return READ_ONCE(rq->avg_rt.util_avg);
				2521	}
				2522	#else /* CONFIG_CPU_FREQ_GOV_SCHEDUTIL */
				2523	static inline unsigned long schedutil_cpu_util(int cpu, unsigned long util_cfs,
				2524	unsigned long max, enum schedutil_type type,
				2525	struct task_struct *p)
				2526	{
				2527	return 0;
				2528	}
				2529	#endif /* CONFIG_CPU_FREQ_GOV_SCHEDUTIL */
				2530
				2531	#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
				2532	static inline unsigned long cpu_util_irq(struct rq *rq)
				2533	{
				2534	return rq->avg_irq.util_avg;
				2535	}
				2536
				2537	static inline
				2538	unsigned long scale_irq_capacity(unsigned long util, unsigned long irq, unsigned long max)
				2539	{
				2540	util *= (max - irq);
				2541	util /= max;
				2542
				2543	return util;
				2544
				2545	}
				2546	#else
				2547	static inline unsigned long cpu_util_irq(struct rq *rq)
				2548	{
				2549	return 0;
				2550	}
				2551
				2552	static inline
				2553	unsigned long scale_irq_capacity(unsigned long util, unsigned long irq, unsigned long max)
				2554	{
				2555	return util;
				2556	}
				2557	#endif
				2558
				2559	#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
				2560
				2561	#define perf_domain_span(pd) (to_cpumask(((pd)->em_pd->cpus)))
				2562
				2563	DECLARE_STATIC_KEY_FALSE(sched_energy_present);
				2564
				2565	static inline bool sched_energy_enabled(void)
				2566	{
				2567	return static_branch_unlikely(&sched_energy_present);
				2568	}
				2569
				2570	#else /* ! (CONFIG_ENERGY_MODEL && CONFIG_CPU_FREQ_GOV_SCHEDUTIL) */
				2571
				2572	#define perf_domain_span(pd) NULL
				2573	static inline bool sched_energy_enabled(void) { return false; }
				2574
				2575	#endif /* CONFIG_ENERGY_MODEL && CONFIG_CPU_FREQ_GOV_SCHEDUTIL */
				2576
				2577	#ifdef CONFIG_MEMBARRIER
				2578	/*
				2579	* The scheduler provides memory barriers required by membarrier between:
				2580	* - prior user-space memory accesses and store to rq->membarrier_state,
				2581	* - store to rq->membarrier_state and following user-space memory accesses.
				2582	* In the same way it provides those guarantees around store to rq->curr.
				2583	*/
				2584	static inline void membarrier_switch_mm(struct rq *rq,
				2585	struct mm_struct *prev_mm,
				2586	struct mm_struct *next_mm)
				2587	{
				2588	int membarrier_state;
				2589
				2590	if (prev_mm == next_mm)
				2591	return;
				2592
				2593	membarrier_state = atomic_read(&next_mm->membarrier_state);
				2594	if (READ_ONCE(rq->membarrier_state) == membarrier_state)
				2595	return;
				2596
				2597	WRITE_ONCE(rq->membarrier_state, membarrier_state);
				2598	}
				2599	#else
				2600	static inline void membarrier_switch_mm(struct rq *rq,
				2601	struct mm_struct *prev_mm,
				2602	struct mm_struct *next_mm)
				2603	{
				2604	}
				2605	#endif