Blame - ap/os/linux/linux-3.4.x/kernel/sched/rt.c - T106_DC

blob: 89a985a89bcba6b7e53e50e9d19be7bfe141b5f4 [file] [log] [blame]

lh	9ed821d	2023-04-07 01:36:19 -0700	[diff] [blame]	1	/*
				2	* Real-Time Scheduling Class (mapped to the SCHED_FIFO and SCHED_RR
				3	* policies)
				4	*/
				5
				6	#include "sched.h"
				7
				8	#include <linux/slab.h>
				9
				10	static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun);
				11
				12	struct rt_bandwidth def_rt_bandwidth;
				13
				14	static enum hrtimer_restart sched_rt_period_timer(struct hrtimer *timer)
				15	{
				16	struct rt_bandwidth *rt_b =
				17	container_of(timer, struct rt_bandwidth, rt_period_timer);
				18	ktime_t now;
				19	int overrun;
				20	int idle = 0;
				21
				22	for (;;) {
				23	now = hrtimer_cb_get_time(timer);
				24	overrun = hrtimer_forward(timer, now, rt_b->rt_period);
				25
				26	if (!overrun)
				27	break;
				28
				29	idle = do_sched_rt_period_timer(rt_b, overrun);
				30	}
				31
				32	return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
				33	}
				34
				35	void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
				36	{
				37	rt_b->rt_period = ns_to_ktime(period);
				38	rt_b->rt_runtime = runtime;
				39
				40	raw_spin_lock_init(&rt_b->rt_runtime_lock);
				41
				42	hrtimer_init(&rt_b->rt_period_timer,
				43	CLOCK_MONOTONIC, HRTIMER_MODE_REL);
				44	rt_b->rt_period_timer.irqsafe = 1;
				45	rt_b->rt_period_timer.function = sched_rt_period_timer;
				46	}
				47
				48	static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
				49	{
				50	if (!rt_bandwidth_enabled() \|\| rt_b->rt_runtime == RUNTIME_INF)
				51	return;
				52
				53	if (hrtimer_active(&rt_b->rt_period_timer))
				54	return;
				55
				56	raw_spin_lock(&rt_b->rt_runtime_lock);
				57	start_bandwidth_timer(&rt_b->rt_period_timer, rt_b->rt_period);
				58	raw_spin_unlock(&rt_b->rt_runtime_lock);
				59	}
				60
				61	void init_rt_rq(struct rt_rq rt_rq, struct rq rq)
				62	{
				63	struct rt_prio_array *array;
				64	int i;
				65
				66	array = &rt_rq->active;
				67	for (i = 0; i < MAX_RT_PRIO; i++) {
				68	INIT_LIST_HEAD(array->queue + i);
				69	__clear_bit(i, array->bitmap);
				70	}
				71	/* delimiter for bitsearch: */
				72	__set_bit(MAX_RT_PRIO, array->bitmap);
				73
				74	#if defined CONFIG_SMP
				75	rt_rq->highest_prio.curr = MAX_RT_PRIO;
				76	rt_rq->highest_prio.next = MAX_RT_PRIO;
				77	rt_rq->rt_nr_migratory = 0;
				78	rt_rq->overloaded = 0;
				79	plist_head_init(&rt_rq->pushable_tasks);
				80	#endif
				81
				82	rt_rq->rt_time = 0;
				83	rt_rq->rt_throttled = 0;
				84	rt_rq->rt_runtime = 0;
				85	raw_spin_lock_init(&rt_rq->rt_runtime_lock);
				86	}
				87
				88	#ifdef CONFIG_RT_GROUP_SCHED
				89	static void destroy_rt_bandwidth(struct rt_bandwidth *rt_b)
				90	{
				91	hrtimer_cancel(&rt_b->rt_period_timer);
				92	}
				93
				94	#define rt_entity_is_task(rt_se) (!(rt_se)->my_q)
				95
				96	static inline struct task_struct rt_task_of(struct sched_rt_entity rt_se)
				97	{
				98	#ifdef CONFIG_SCHED_DEBUG
				99	WARN_ON_ONCE(!rt_entity_is_task(rt_se));
				100	#endif
				101	return container_of(rt_se, struct task_struct, rt);
				102	}
				103
				104	static inline struct rq rq_of_rt_rq(struct rt_rq rt_rq)
				105	{
				106	return rt_rq->rq;
				107	}
				108
				109	static inline struct rt_rq rt_rq_of_se(struct sched_rt_entity rt_se)
				110	{
				111	return rt_se->rt_rq;
				112	}
				113
				114	void free_rt_sched_group(struct task_group *tg)
				115	{
				116	int i;
				117
				118	if (tg->rt_se)
				119	destroy_rt_bandwidth(&tg->rt_bandwidth);
				120
				121	for_each_possible_cpu(i) {
				122	if (tg->rt_rq)
				123	kfree(tg->rt_rq[i]);
				124	if (tg->rt_se)
				125	kfree(tg->rt_se[i]);
				126	}
				127
				128	kfree(tg->rt_rq);
				129	kfree(tg->rt_se);
				130	}
				131
				132	void init_tg_rt_entry(struct task_group tg, struct rt_rq rt_rq,
				133	struct sched_rt_entity *rt_se, int cpu,
				134	struct sched_rt_entity *parent)
				135	{
				136	struct rq *rq = cpu_rq(cpu);
				137
				138	rt_rq->highest_prio.curr = MAX_RT_PRIO;
				139	rt_rq->rt_nr_boosted = 0;
				140	rt_rq->rq = rq;
				141	rt_rq->tg = tg;
				142
				143	tg->rt_rq[cpu] = rt_rq;
				144	tg->rt_se[cpu] = rt_se;
				145
				146	if (!rt_se)
				147	return;
				148
				149	if (!parent)
				150	rt_se->rt_rq = &rq->rt;
				151	else
				152	rt_se->rt_rq = parent->my_q;
				153
				154	rt_se->my_q = rt_rq;
				155	rt_se->parent = parent;
				156	INIT_LIST_HEAD(&rt_se->run_list);
				157	}
				158
				159	int alloc_rt_sched_group(struct task_group tg, struct task_group parent)
				160	{
				161	struct rt_rq *rt_rq;
				162	struct sched_rt_entity *rt_se;
				163	int i;
				164
				165	tg->rt_rq = kzalloc(sizeof(rt_rq) * nr_cpu_ids, GFP_KERNEL);
				166	if (!tg->rt_rq)
				167	goto err;
				168	tg->rt_se = kzalloc(sizeof(rt_se) * nr_cpu_ids, GFP_KERNEL);
				169	if (!tg->rt_se)
				170	goto err;
				171
				172	init_rt_bandwidth(&tg->rt_bandwidth,
				173	ktime_to_ns(def_rt_bandwidth.rt_period), 0);
				174
				175	for_each_possible_cpu(i) {
				176	rt_rq = kzalloc_node(sizeof(struct rt_rq),
				177	GFP_KERNEL, cpu_to_node(i));
				178	if (!rt_rq)
				179	goto err;
				180
				181	rt_se = kzalloc_node(sizeof(struct sched_rt_entity),
				182	GFP_KERNEL, cpu_to_node(i));
				183	if (!rt_se)
				184	goto err_free_rq;
				185
				186	init_rt_rq(rt_rq, cpu_rq(i));
				187	rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime;
				188	init_tg_rt_entry(tg, rt_rq, rt_se, i, parent->rt_se[i]);
				189	}
				190
				191	return 1;
				192
				193	err_free_rq:
				194	kfree(rt_rq);
				195	err:
				196	return 0;
				197	}
				198
				199	#else /* CONFIG_RT_GROUP_SCHED */
				200
				201	#define rt_entity_is_task(rt_se) (1)
				202
				203	static inline struct task_struct rt_task_of(struct sched_rt_entity rt_se)
				204	{
				205	return container_of(rt_se, struct task_struct, rt);
				206	}
				207
				208	static inline struct rq rq_of_rt_rq(struct rt_rq rt_rq)
				209	{
				210	return container_of(rt_rq, struct rq, rt);
				211	}
				212
				213	static inline struct rt_rq rt_rq_of_se(struct sched_rt_entity rt_se)
				214	{
				215	struct task_struct *p = rt_task_of(rt_se);
				216	struct rq *rq = task_rq(p);
				217
				218	return &rq->rt;
				219	}
				220
				221	void free_rt_sched_group(struct task_group *tg) { }
				222
				223	int alloc_rt_sched_group(struct task_group tg, struct task_group parent)
				224	{
				225	return 1;
				226	}
				227	#endif /* CONFIG_RT_GROUP_SCHED */
				228
				229	#ifdef CONFIG_SMP
				230
				231	static inline int rt_overloaded(struct rq *rq)
				232	{
				233	return atomic_read(&rq->rd->rto_count);
				234	}
				235
				236	static inline void rt_set_overload(struct rq *rq)
				237	{
				238	if (!rq->online)
				239	return;
				240
				241	cpumask_set_cpu(rq->cpu, rq->rd->rto_mask);
				242	/*
				243	* Make sure the mask is visible before we set
				244	* the overload count. That is checked to determine
				245	* if we should look at the mask. It would be a shame
				246	* if we looked at the mask, but the mask was not
				247	* updated yet.
				248	*/
				249	wmb();
				250	atomic_inc(&rq->rd->rto_count);
				251	}
				252
				253	static inline void rt_clear_overload(struct rq *rq)
				254	{
				255	if (!rq->online)
				256	return;
				257
				258	/* the order here really doesn't matter */
				259	atomic_dec(&rq->rd->rto_count);
				260	cpumask_clear_cpu(rq->cpu, rq->rd->rto_mask);
				261	}
				262
				263	static void update_rt_migration(struct rt_rq *rt_rq)
				264	{
				265	if (rt_rq->rt_nr_migratory && rt_rq->rt_nr_total > 1) {
				266	if (!rt_rq->overloaded) {
				267	rt_set_overload(rq_of_rt_rq(rt_rq));
				268	rt_rq->overloaded = 1;
				269	}
				270	} else if (rt_rq->overloaded) {
				271	rt_clear_overload(rq_of_rt_rq(rt_rq));
				272	rt_rq->overloaded = 0;
				273	}
				274	}
				275
				276	static void inc_rt_migration(struct sched_rt_entity rt_se, struct rt_rq rt_rq)
				277	{
				278	if (!rt_entity_is_task(rt_se))
				279	return;
				280
				281	rt_rq = &rq_of_rt_rq(rt_rq)->rt;
				282
				283	rt_rq->rt_nr_total++;
				284	if (rt_se->nr_cpus_allowed > 1)
				285	rt_rq->rt_nr_migratory++;
				286
				287	update_rt_migration(rt_rq);
				288	}
				289
				290	static void dec_rt_migration(struct sched_rt_entity rt_se, struct rt_rq rt_rq)
				291	{
				292	if (!rt_entity_is_task(rt_se))
				293	return;
				294
				295	rt_rq = &rq_of_rt_rq(rt_rq)->rt;
				296
				297	rt_rq->rt_nr_total--;
				298	if (rt_se->nr_cpus_allowed > 1)
				299	rt_rq->rt_nr_migratory--;
				300
				301	update_rt_migration(rt_rq);
				302	}
				303
				304	static inline int has_pushable_tasks(struct rq *rq)
				305	{
				306	return !plist_head_empty(&rq->rt.pushable_tasks);
				307	}
				308
				309	static void enqueue_pushable_task(struct rq rq, struct task_struct p)
				310	{
				311	plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks);
				312	plist_node_init(&p->pushable_tasks, p->prio);
				313	plist_add(&p->pushable_tasks, &rq->rt.pushable_tasks);
				314
				315	/* Update the highest prio pushable task */
				316	if (p->prio < rq->rt.highest_prio.next)
				317	rq->rt.highest_prio.next = p->prio;
				318	}
				319
				320	static void dequeue_pushable_task(struct rq rq, struct task_struct p)
				321	{
				322	plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks);
				323
				324	/* Update the new highest prio pushable task */
				325	if (has_pushable_tasks(rq)) {
				326	p = plist_first_entry(&rq->rt.pushable_tasks,
				327	struct task_struct, pushable_tasks);
				328	rq->rt.highest_prio.next = p->prio;
				329	} else
				330	rq->rt.highest_prio.next = MAX_RT_PRIO;
				331	}
				332
				333	#else
				334
				335	static inline void enqueue_pushable_task(struct rq rq, struct task_struct p)
				336	{
				337	}
				338
				339	static inline void dequeue_pushable_task(struct rq rq, struct task_struct p)
				340	{
				341	}
				342
				343	static inline
				344	void inc_rt_migration(struct sched_rt_entity rt_se, struct rt_rq rt_rq)
				345	{
				346	}
				347
				348	static inline
				349	void dec_rt_migration(struct sched_rt_entity rt_se, struct rt_rq rt_rq)
				350	{
				351	}
				352
				353	#endif /* CONFIG_SMP */
				354
				355	static inline int on_rt_rq(struct sched_rt_entity *rt_se)
				356	{
				357	return !list_empty(&rt_se->run_list);
				358	}
				359
				360	#ifdef CONFIG_RT_GROUP_SCHED
				361
				362	static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
				363	{
				364	if (!rt_rq->tg)
				365	return RUNTIME_INF;
				366
				367	return rt_rq->rt_runtime;
				368	}
				369
				370	static inline u64 sched_rt_period(struct rt_rq *rt_rq)
				371	{
				372	return ktime_to_ns(rt_rq->tg->rt_bandwidth.rt_period);
				373	}
				374
				375	typedef struct task_group *rt_rq_iter_t;
				376
				377	static inline struct task_group next_task_group(struct task_group tg)
				378	{
				379	do {
				380	tg = list_entry_rcu(tg->list.next,
				381	typeof(struct task_group), list);
				382	} while (&tg->list != &task_groups && task_group_is_autogroup(tg));
				383
				384	if (&tg->list == &task_groups)
				385	tg = NULL;
				386
				387	return tg;
				388	}
				389
				390	#define for_each_rt_rq(rt_rq, iter, rq) \
				391	for (iter = container_of(&task_groups, typeof(*iter), list); \
				392	(iter = next_task_group(iter)) && \
				393	(rt_rq = iter->rt_rq[cpu_of(rq)]);)
				394
				395	static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq)
				396	{
				397	list_add_rcu(&rt_rq->leaf_rt_rq_list,
				398	&rq_of_rt_rq(rt_rq)->leaf_rt_rq_list);
				399	}
				400
				401	static inline void list_del_leaf_rt_rq(struct rt_rq *rt_rq)
				402	{
				403	list_del_rcu(&rt_rq->leaf_rt_rq_list);
				404	}
				405
				406	#define for_each_leaf_rt_rq(rt_rq, rq) \
				407	list_for_each_entry_rcu(rt_rq, &rq->leaf_rt_rq_list, leaf_rt_rq_list)
				408
				409	#define for_each_sched_rt_entity(rt_se) \
				410	for (; rt_se; rt_se = rt_se->parent)
				411
				412	static inline struct rt_rq group_rt_rq(struct sched_rt_entity rt_se)
				413	{
				414	return rt_se->my_q;
				415	}
				416
				417	static void enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head);
				418	static void dequeue_rt_entity(struct sched_rt_entity *rt_se);
				419
				420	static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
				421	{
				422	struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr;
				423	struct sched_rt_entity *rt_se;
				424
				425	int cpu = cpu_of(rq_of_rt_rq(rt_rq));
				426
				427	rt_se = rt_rq->tg->rt_se[cpu];
				428
				429	if (rt_rq->rt_nr_running) {
				430	if (rt_se && !on_rt_rq(rt_se))
				431	enqueue_rt_entity(rt_se, false);
				432	if (rt_rq->highest_prio.curr < curr->prio)
				433	resched_task(curr);
				434	}
				435	}
				436
				437	static void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
				438	{
				439	struct sched_rt_entity *rt_se;
				440	int cpu = cpu_of(rq_of_rt_rq(rt_rq));
				441
				442	rt_se = rt_rq->tg->rt_se[cpu];
				443
				444	if (rt_se && on_rt_rq(rt_se))
				445	dequeue_rt_entity(rt_se);
				446	}
				447
				448	static inline int rt_rq_throttled(struct rt_rq *rt_rq)
				449	{
				450	return rt_rq->rt_throttled && !rt_rq->rt_nr_boosted;
				451	}
				452
				453	static int rt_se_boosted(struct sched_rt_entity *rt_se)
				454	{
				455	struct rt_rq *rt_rq = group_rt_rq(rt_se);
				456	struct task_struct *p;
				457
				458	if (rt_rq)
				459	return !!rt_rq->rt_nr_boosted;
				460
				461	p = rt_task_of(rt_se);
				462	return p->prio != p->normal_prio;
				463	}
				464
				465	#ifdef CONFIG_SMP
				466	static inline const struct cpumask *sched_rt_period_mask(void)
				467	{
				468	return cpu_rq(smp_processor_id())->rd->span;
				469	}
				470	#else
				471	static inline const struct cpumask *sched_rt_period_mask(void)
				472	{
				473	return cpu_online_mask;
				474	}
				475	#endif
				476
				477	static inline
				478	struct rt_rq sched_rt_period_rt_rq(struct rt_bandwidth rt_b, int cpu)
				479	{
				480	return container_of(rt_b, struct task_group, rt_bandwidth)->rt_rq[cpu];
				481	}
				482
				483	static inline struct rt_bandwidth sched_rt_bandwidth(struct rt_rq rt_rq)
				484	{
				485	return &rt_rq->tg->rt_bandwidth;
				486	}
				487
				488	#else /* !CONFIG_RT_GROUP_SCHED */
				489
				490	static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
				491	{
				492	return rt_rq->rt_runtime;
				493	}
				494
				495	static inline u64 sched_rt_period(struct rt_rq *rt_rq)
				496	{
				497	return ktime_to_ns(def_rt_bandwidth.rt_period);
				498	}
				499
				500	typedef struct rt_rq *rt_rq_iter_t;
				501
				502	#define for_each_rt_rq(rt_rq, iter, rq) \
				503	for ((void) iter, rt_rq = &rq->rt; rt_rq; rt_rq = NULL)
				504
				505	static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq)
				506	{
				507	}
				508
				509	static inline void list_del_leaf_rt_rq(struct rt_rq *rt_rq)
				510	{
				511	}
				512
				513	#define for_each_leaf_rt_rq(rt_rq, rq) \
				514	for (rt_rq = &rq->rt; rt_rq; rt_rq = NULL)
				515
				516	#define for_each_sched_rt_entity(rt_se) \
				517	for (; rt_se; rt_se = NULL)
				518
				519	static inline struct rt_rq group_rt_rq(struct sched_rt_entity rt_se)
				520	{
				521	return NULL;
				522	}
				523
				524	static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
				525	{
				526	if (rt_rq->rt_nr_running)
				527	resched_task(rq_of_rt_rq(rt_rq)->curr);
				528	}
				529
				530	static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
				531	{
				532	}
				533
				534	static inline int rt_rq_throttled(struct rt_rq *rt_rq)
				535	{
				536	return rt_rq->rt_throttled;
				537	}
				538
				539	static inline const struct cpumask *sched_rt_period_mask(void)
				540	{
				541	return cpu_online_mask;
				542	}
				543
				544	static inline
				545	struct rt_rq sched_rt_period_rt_rq(struct rt_bandwidth rt_b, int cpu)
				546	{
				547	return &cpu_rq(cpu)->rt;
				548	}
				549
				550	static inline struct rt_bandwidth sched_rt_bandwidth(struct rt_rq rt_rq)
				551	{
				552	return &def_rt_bandwidth;
				553	}
				554
				555	#endif /* CONFIG_RT_GROUP_SCHED */
				556
				557	#ifdef CONFIG_SMP
				558	/*
				559	* We ran out of runtime, see if we can borrow some from our neighbours.
				560	*/
				561	static int do_balance_runtime(struct rt_rq *rt_rq)
				562	{
				563	struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
				564	struct root_domain *rd = rq_of_rt_rq(rt_rq)->rd;
				565	int i, weight, more = 0;
				566	u64 rt_period;
				567
				568	weight = cpumask_weight(rd->span);
				569
				570	raw_spin_lock(&rt_b->rt_runtime_lock);
				571	rt_period = ktime_to_ns(rt_b->rt_period);
				572	for_each_cpu(i, rd->span) {
				573	struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
				574	s64 diff;
				575
				576	if (iter == rt_rq)
				577	continue;
				578
				579	raw_spin_lock(&iter->rt_runtime_lock);
				580	/*
				581	* Either all rqs have inf runtime and there's nothing to steal
				582	* or __disable_runtime() below sets a specific rq to inf to
				583	* indicate its been disabled and disalow stealing.
				584	*/
				585	if (iter->rt_runtime == RUNTIME_INF)
				586	goto next;
				587
				588	/*
				589	* From runqueues with spare time, take 1/n part of their
				590	* spare time, but no more than our period.
				591	*/
				592	diff = iter->rt_runtime - iter->rt_time;
				593	if (diff > 0) {
				594	diff = div_u64((u64)diff, weight);
				595	if (rt_rq->rt_runtime + diff > rt_period)
				596	diff = rt_period - rt_rq->rt_runtime;
				597	iter->rt_runtime -= diff;
				598	rt_rq->rt_runtime += diff;
				599	more = 1;
				600	if (rt_rq->rt_runtime == rt_period) {
				601	raw_spin_unlock(&iter->rt_runtime_lock);
				602	break;
				603	}
				604	}
				605	next:
				606	raw_spin_unlock(&iter->rt_runtime_lock);
				607	}
				608	raw_spin_unlock(&rt_b->rt_runtime_lock);
				609
				610	return more;
				611	}
				612
				613	/*
				614	* Ensure this RQ takes back all the runtime it lend to its neighbours.
				615	*/
				616	static void __disable_runtime(struct rq *rq)
				617	{
				618	struct root_domain *rd = rq->rd;
				619	rt_rq_iter_t iter;
				620	struct rt_rq *rt_rq;
				621
				622	if (unlikely(!scheduler_running))
				623	return;
				624
				625	for_each_rt_rq(rt_rq, iter, rq) {
				626	struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
				627	s64 want;
				628	int i;
				629
				630	raw_spin_lock(&rt_b->rt_runtime_lock);
				631	raw_spin_lock(&rt_rq->rt_runtime_lock);
				632	/*
				633	* Either we're all inf and nobody needs to borrow, or we're
				634	* already disabled and thus have nothing to do, or we have
				635	* exactly the right amount of runtime to take out.
				636	*/
				637	if (rt_rq->rt_runtime == RUNTIME_INF \|\|
				638	rt_rq->rt_runtime == rt_b->rt_runtime)
				639	goto balanced;
				640	raw_spin_unlock(&rt_rq->rt_runtime_lock);
				641
				642	/*
				643	* Calculate the difference between what we started out with
				644	* and what we current have, that's the amount of runtime
				645	* we lend and now have to reclaim.
				646	*/
				647	want = rt_b->rt_runtime - rt_rq->rt_runtime;
				648
				649	/*
				650	* Greedy reclaim, take back as much as we can.
				651	*/
				652	for_each_cpu(i, rd->span) {
				653	struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
				654	s64 diff;
				655
				656	/*
				657	* Can't reclaim from ourselves or disabled runqueues.
				658	*/
				659	if (iter == rt_rq \|\| iter->rt_runtime == RUNTIME_INF)
				660	continue;
				661
				662	raw_spin_lock(&iter->rt_runtime_lock);
				663	if (want > 0) {
				664	diff = min_t(s64, iter->rt_runtime, want);
				665	iter->rt_runtime -= diff;
				666	want -= diff;
				667	} else {
				668	iter->rt_runtime -= want;
				669	want -= want;
				670	}
				671	raw_spin_unlock(&iter->rt_runtime_lock);
				672
				673	if (!want)
				674	break;
				675	}
				676
				677	raw_spin_lock(&rt_rq->rt_runtime_lock);
				678	/*
				679	* We cannot be left wanting - that would mean some runtime
				680	* leaked out of the system.
				681	*/
				682	BUG_ON(want);
				683	balanced:
				684	/*
				685	* Disable all the borrow logic by pretending we have inf
				686	* runtime - in which case borrowing doesn't make sense.
				687	*/
				688	rt_rq->rt_runtime = RUNTIME_INF;
				689	rt_rq->rt_throttled = 0;
				690	raw_spin_unlock(&rt_rq->rt_runtime_lock);
				691	raw_spin_unlock(&rt_b->rt_runtime_lock);
				692	}
				693	}
				694
				695	static void disable_runtime(struct rq *rq)
				696	{
				697	unsigned long flags;
				698
				699	raw_spin_lock_irqsave(&rq->lock, flags);
				700	__disable_runtime(rq);
				701	raw_spin_unlock_irqrestore(&rq->lock, flags);
				702	}
				703
				704	static void __enable_runtime(struct rq *rq)
				705	{
				706	rt_rq_iter_t iter;
				707	struct rt_rq *rt_rq;
				708
				709	if (unlikely(!scheduler_running))
				710	return;
				711
				712	/*
				713	* Reset each runqueue's bandwidth settings
				714	*/
				715	for_each_rt_rq(rt_rq, iter, rq) {
				716	struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
				717
				718	raw_spin_lock(&rt_b->rt_runtime_lock);
				719	raw_spin_lock(&rt_rq->rt_runtime_lock);
				720	rt_rq->rt_runtime = rt_b->rt_runtime;
				721	rt_rq->rt_time = 0;
				722	rt_rq->rt_throttled = 0;
				723	raw_spin_unlock(&rt_rq->rt_runtime_lock);
				724	raw_spin_unlock(&rt_b->rt_runtime_lock);
				725	}
				726	}
				727
				728	static void enable_runtime(struct rq *rq)
				729	{
				730	unsigned long flags;
				731
				732	raw_spin_lock_irqsave(&rq->lock, flags);
				733	__enable_runtime(rq);
				734	raw_spin_unlock_irqrestore(&rq->lock, flags);
				735	}
				736
				737	int update_runtime(struct notifier_block nfb, unsigned long action, void hcpu)
				738	{
				739	int cpu = (int)(long)hcpu;
				740
				741	switch (action) {
				742	case CPU_DOWN_PREPARE:
				743	case CPU_DOWN_PREPARE_FROZEN:
				744	disable_runtime(cpu_rq(cpu));
				745	return NOTIFY_OK;
				746
				747	case CPU_DOWN_FAILED:
				748	case CPU_DOWN_FAILED_FROZEN:
				749	case CPU_ONLINE:
				750	case CPU_ONLINE_FROZEN:
				751	enable_runtime(cpu_rq(cpu));
				752	return NOTIFY_OK;
				753
				754	default:
				755	return NOTIFY_DONE;
				756	}
				757	}
				758
				759	static int balance_runtime(struct rt_rq *rt_rq)
				760	{
				761	int more = 0;
				762
				763	if (!sched_feat(RT_RUNTIME_SHARE))
				764	return more;
				765
				766	if (rt_rq->rt_time > rt_rq->rt_runtime) {
				767	raw_spin_unlock(&rt_rq->rt_runtime_lock);
				768	more = do_balance_runtime(rt_rq);
				769	raw_spin_lock(&rt_rq->rt_runtime_lock);
				770	}
				771
				772	return more;
				773	}
				774	#else /* !CONFIG_SMP */
				775	static inline int balance_runtime(struct rt_rq *rt_rq)
				776	{
				777	return 0;
				778	}
				779	#endif /* CONFIG_SMP */
				780
				781	static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
				782	{
				783	int i, idle = 1, throttled = 0;
				784	const struct cpumask *span;
				785
				786	span = sched_rt_period_mask();
				787	#ifdef CONFIG_RT_GROUP_SCHED
				788	/*
				789	* FIXME: isolated CPUs should really leave the root task group,
				790	* whether they are isolcpus or were isolated via cpusets, lest
				791	* the timer run on a CPU which does not service all runqueues,
				792	* potentially leaving other CPUs indefinitely throttled. If
				793	* isolation is really required, the user will turn the throttle
				794	* off to kill the perturbations it causes anyway. Meanwhile,
				795	* this maintains functionality for boot and/or troubleshooting.
				796	*/
				797	if (rt_b == &root_task_group.rt_bandwidth)
				798	span = cpu_online_mask;
				799	#endif
				800	for_each_cpu(i, span) {
				801	int enqueue = 0;
				802	struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i);
				803	struct rq *rq = rq_of_rt_rq(rt_rq);
				804
				805	raw_spin_lock(&rq->lock);
				806	if (rt_rq->rt_time) {
				807	u64 runtime;
				808
				809	raw_spin_lock(&rt_rq->rt_runtime_lock);
				810	if (rt_rq->rt_throttled)
				811	balance_runtime(rt_rq);
				812	runtime = rt_rq->rt_runtime;
				813	rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime);
				814	if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) {
				815	rt_rq->rt_throttled = 0;
				816	enqueue = 1;
				817
				818	/*
				819	* Force a clock update if the CPU was idle,
				820	* lest wakeup -> unthrottle time accumulate.
				821	*/
				822	if (rt_rq->rt_nr_running && rq->curr == rq->idle)
				823	rq->skip_clock_update = -1;
				824	}
				825	if (rt_rq->rt_time \|\| rt_rq->rt_nr_running)
				826	idle = 0;
				827	raw_spin_unlock(&rt_rq->rt_runtime_lock);
				828	} else if (rt_rq->rt_nr_running) {
				829	idle = 0;
				830	if (!rt_rq_throttled(rt_rq))
				831	enqueue = 1;
				832	}
				833	if (rt_rq->rt_throttled)
				834	throttled = 1;
				835
				836	if (enqueue)
				837	sched_rt_rq_enqueue(rt_rq);
				838	raw_spin_unlock(&rq->lock);
				839	}
				840
				841	if (!throttled && (!rt_bandwidth_enabled() \|\| rt_b->rt_runtime == RUNTIME_INF))
				842	return 1;
				843
				844	return idle;
				845	}
				846
				847	static inline int rt_se_prio(struct sched_rt_entity *rt_se)
				848	{
				849	#ifdef CONFIG_RT_GROUP_SCHED
				850	struct rt_rq *rt_rq = group_rt_rq(rt_se);
				851
				852	if (rt_rq)
				853	return rt_rq->highest_prio.curr;
				854	#endif
				855
				856	return rt_task_of(rt_se)->prio;
				857	}
				858
				859	static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
				860	{
				861	u64 runtime = sched_rt_runtime(rt_rq);
				862
				863	if (rt_rq->rt_throttled)
				864	return rt_rq_throttled(rt_rq);
				865
				866	if (runtime >= sched_rt_period(rt_rq))
				867	return 0;
				868
				869	balance_runtime(rt_rq);
				870	runtime = sched_rt_runtime(rt_rq);
				871	if (runtime == RUNTIME_INF)
				872	return 0;
				873
				874	if (rt_rq->rt_time > runtime) {
				875	struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
				876
				877	/*
				878	* Don't actually throttle groups that have no runtime assigned
				879	* but accrue some time due to boosting.
				880	*/
				881	if (likely(rt_b->rt_runtime)) {
				882	static bool once = false;
				883
				884	rt_rq->rt_throttled = 1;
				885
				886	if (!once) {
				887	once = true;
				888	printk_deferred("sched: RT throttling activated\n");
				889	}
				890	} else {
				891	/*
				892	* In case we did anyway, make it go away,
				893	* replenishment is a joke, since it will replenish us
				894	* with exactly 0 ns.
				895	*/
				896	rt_rq->rt_time = 0;
				897	}
				898
				899	if (rt_rq_throttled(rt_rq)) {
				900	sched_rt_rq_dequeue(rt_rq);
				901	return 1;
				902	}
				903	}
				904
				905	return 0;
				906	}
				907
				908	/*
				909	* Update the current task's runtime statistics. Skip current tasks that
				910	* are not in our scheduling class.
				911	*/
				912	static void update_curr_rt(struct rq *rq)
				913	{
				914	struct task_struct *curr = rq->curr;
				915	struct sched_rt_entity *rt_se = &curr->rt;
				916	struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
				917	u64 delta_exec;
				918
				919	if (curr->sched_class != &rt_sched_class)
				920	return;
				921
				922	delta_exec = rq->clock_task - curr->se.exec_start;
				923	if (unlikely((s64)delta_exec < 0))
				924	delta_exec = 0;
				925
				926	schedstat_set(curr->se.statistics.exec_max,
				927	max(curr->se.statistics.exec_max, delta_exec));
				928
				929	curr->se.sum_exec_runtime += delta_exec;
				930	account_group_exec_runtime(curr, delta_exec);
				931
				932	curr->se.exec_start = rq->clock_task;
				933	cpuacct_charge(curr, delta_exec);
				934
				935	sched_rt_avg_update(rq, delta_exec);
				936
				937	if (!rt_bandwidth_enabled())
				938	return;
				939
				940	for_each_sched_rt_entity(rt_se) {
				941	rt_rq = rt_rq_of_se(rt_se);
				942
				943	if (sched_rt_runtime(rt_rq) != RUNTIME_INF) {
				944	raw_spin_lock(&rt_rq->rt_runtime_lock);
				945	rt_rq->rt_time += delta_exec;
				946	if (sched_rt_runtime_exceeded(rt_rq))
				947	resched_task(curr);
				948	raw_spin_unlock(&rt_rq->rt_runtime_lock);
				949	}
				950	}
				951	}
				952
				953	#if defined CONFIG_SMP
				954
				955	static void
				956	inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
				957	{
				958	struct rq *rq = rq_of_rt_rq(rt_rq);
				959
				960	#ifdef CONFIG_RT_GROUP_SCHED
				961	/*
				962	* Change rq's cpupri only if rt_rq is the top queue.
				963	*/
				964	if (&rq->rt != rt_rq)
				965	return;
				966	#endif
				967	if (rq->online && prio < prev_prio)
				968	cpupri_set(&rq->rd->cpupri, rq->cpu, prio);
				969	}
				970
				971	static void
				972	dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
				973	{
				974	struct rq *rq = rq_of_rt_rq(rt_rq);
				975
				976	#ifdef CONFIG_RT_GROUP_SCHED
				977	/*
				978	* Change rq's cpupri only if rt_rq is the top queue.
				979	*/
				980	if (&rq->rt != rt_rq)
				981	return;
				982	#endif
				983	if (rq->online && rt_rq->highest_prio.curr != prev_prio)
				984	cpupri_set(&rq->rd->cpupri, rq->cpu, rt_rq->highest_prio.curr);
				985	}
				986
				987	#else /* CONFIG_SMP */
				988
				989	static inline
				990	void inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {}
				991	static inline
				992	void dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {}
				993
				994	#endif /* CONFIG_SMP */
				995
				996	#if defined CONFIG_SMP \|\| defined CONFIG_RT_GROUP_SCHED
				997	static void
				998	inc_rt_prio(struct rt_rq *rt_rq, int prio)
				999	{
				1000	int prev_prio = rt_rq->highest_prio.curr;
				1001
				1002	if (prio < prev_prio)
				1003	rt_rq->highest_prio.curr = prio;
				1004
				1005	inc_rt_prio_smp(rt_rq, prio, prev_prio);
				1006	}
				1007
				1008	static void
				1009	dec_rt_prio(struct rt_rq *rt_rq, int prio)
				1010	{
				1011	int prev_prio = rt_rq->highest_prio.curr;
				1012
				1013	if (rt_rq->rt_nr_running) {
				1014
				1015	WARN_ON(prio < prev_prio);
				1016
				1017	/*
				1018	* This may have been our highest task, and therefore
				1019	* we may have some recomputation to do
				1020	*/
				1021	if (prio == prev_prio) {
				1022	struct rt_prio_array *array = &rt_rq->active;
				1023
				1024	rt_rq->highest_prio.curr =
				1025	sched_find_first_bit(array->bitmap);
				1026	}
				1027
				1028	} else
				1029	rt_rq->highest_prio.curr = MAX_RT_PRIO;
				1030
				1031	dec_rt_prio_smp(rt_rq, prio, prev_prio);
				1032	}
				1033
				1034	#else
				1035
				1036	static inline void inc_rt_prio(struct rt_rq *rt_rq, int prio) {}
				1037	static inline void dec_rt_prio(struct rt_rq *rt_rq, int prio) {}
				1038
				1039	#endif /* CONFIG_SMP \|\| CONFIG_RT_GROUP_SCHED */
				1040
				1041	#ifdef CONFIG_RT_GROUP_SCHED
				1042
				1043	static void
				1044	inc_rt_group(struct sched_rt_entity rt_se, struct rt_rq rt_rq)
				1045	{
				1046	if (rt_se_boosted(rt_se))
				1047	rt_rq->rt_nr_boosted++;
				1048
				1049	if (rt_rq->tg)
				1050	start_rt_bandwidth(&rt_rq->tg->rt_bandwidth);
				1051	}
				1052
				1053	static void
				1054	dec_rt_group(struct sched_rt_entity rt_se, struct rt_rq rt_rq)
				1055	{
				1056	if (rt_se_boosted(rt_se))
				1057	rt_rq->rt_nr_boosted--;
				1058
				1059	WARN_ON(!rt_rq->rt_nr_running && rt_rq->rt_nr_boosted);
				1060	}
				1061
				1062	#else /* CONFIG_RT_GROUP_SCHED */
				1063
				1064	static void
				1065	inc_rt_group(struct sched_rt_entity rt_se, struct rt_rq rt_rq)
				1066	{
				1067	start_rt_bandwidth(&def_rt_bandwidth);
				1068	}
				1069
				1070	static inline
				1071	void dec_rt_group(struct sched_rt_entity rt_se, struct rt_rq rt_rq) {}
				1072
				1073	#endif /* CONFIG_RT_GROUP_SCHED */
				1074
				1075	static inline
				1076	void inc_rt_tasks(struct sched_rt_entity rt_se, struct rt_rq rt_rq)
				1077	{
				1078	int prio = rt_se_prio(rt_se);
				1079
				1080	WARN_ON(!rt_prio(prio));
				1081	rt_rq->rt_nr_running++;
				1082
				1083	inc_rt_prio(rt_rq, prio);
				1084	inc_rt_migration(rt_se, rt_rq);
				1085	inc_rt_group(rt_se, rt_rq);
				1086	}
				1087
				1088	static inline
				1089	void dec_rt_tasks(struct sched_rt_entity rt_se, struct rt_rq rt_rq)
				1090	{
				1091	WARN_ON(!rt_prio(rt_se_prio(rt_se)));
				1092	WARN_ON(!rt_rq->rt_nr_running);
				1093	rt_rq->rt_nr_running--;
				1094
				1095	dec_rt_prio(rt_rq, rt_se_prio(rt_se));
				1096	dec_rt_migration(rt_se, rt_rq);
				1097	dec_rt_group(rt_se, rt_rq);
				1098	}
				1099
				1100	static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head)
				1101	{
				1102	struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
				1103	struct rt_prio_array *array = &rt_rq->active;
				1104	struct rt_rq *group_rq = group_rt_rq(rt_se);
				1105	struct list_head *queue = array->queue + rt_se_prio(rt_se);
				1106
				1107	/*
				1108	* Don't enqueue the group if its throttled, or when empty.
				1109	* The latter is a consequence of the former when a child group
				1110	* get throttled and the current group doesn't have any other
				1111	* active members.
				1112	*/
				1113	if (group_rq && (rt_rq_throttled(group_rq) \|\| !group_rq->rt_nr_running))
				1114	return;
				1115
				1116	if (!rt_rq->rt_nr_running)
				1117	list_add_leaf_rt_rq(rt_rq);
				1118
				1119	if (head)
				1120	list_add(&rt_se->run_list, queue);
				1121	else
				1122	list_add_tail(&rt_se->run_list, queue);
				1123	__set_bit(rt_se_prio(rt_se), array->bitmap);
				1124
				1125	inc_rt_tasks(rt_se, rt_rq);
				1126	}
				1127
				1128	static void __dequeue_rt_entity(struct sched_rt_entity *rt_se)
				1129	{
				1130	struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
				1131	struct rt_prio_array *array = &rt_rq->active;
				1132
				1133	list_del_init(&rt_se->run_list);
				1134	if (list_empty(array->queue + rt_se_prio(rt_se)))
				1135	__clear_bit(rt_se_prio(rt_se), array->bitmap);
				1136
				1137	dec_rt_tasks(rt_se, rt_rq);
				1138	if (!rt_rq->rt_nr_running)
				1139	list_del_leaf_rt_rq(rt_rq);
				1140	}
				1141
				1142	/*
				1143	* Because the prio of an upper entry depends on the lower
				1144	* entries, we must remove entries top - down.
				1145	*/
				1146	static void dequeue_rt_stack(struct sched_rt_entity *rt_se)
				1147	{
				1148	struct sched_rt_entity *back = NULL;
				1149
				1150	for_each_sched_rt_entity(rt_se) {
				1151	rt_se->back = back;
				1152	back = rt_se;
				1153	}
				1154
				1155	for (rt_se = back; rt_se; rt_se = rt_se->back) {
				1156	if (on_rt_rq(rt_se))
				1157	__dequeue_rt_entity(rt_se);
				1158	}
				1159	}
				1160
				1161	static void enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head)
				1162	{
				1163	dequeue_rt_stack(rt_se);
				1164	for_each_sched_rt_entity(rt_se)
				1165	__enqueue_rt_entity(rt_se, head);
				1166	}
				1167
				1168	static void dequeue_rt_entity(struct sched_rt_entity *rt_se)
				1169	{
				1170	dequeue_rt_stack(rt_se);
				1171
				1172	for_each_sched_rt_entity(rt_se) {
				1173	struct rt_rq *rt_rq = group_rt_rq(rt_se);
				1174
				1175	if (rt_rq && rt_rq->rt_nr_running)
				1176	__enqueue_rt_entity(rt_se, false);
				1177	}
				1178	}
				1179
				1180	/*
				1181	* Adding/removing a task to/from a priority array:
				1182	*/
				1183	static void
				1184	enqueue_task_rt(struct rq rq, struct task_struct p, int flags)
				1185	{
				1186	struct sched_rt_entity *rt_se = &p->rt;
				1187
				1188	if (flags & ENQUEUE_WAKEUP)
				1189	rt_se->timeout = 0;
				1190
				1191	enqueue_rt_entity(rt_se, flags & ENQUEUE_HEAD);
				1192
				1193	if (!task_current(rq, p) && p->rt.nr_cpus_allowed > 1)
				1194	enqueue_pushable_task(rq, p);
				1195
				1196	inc_nr_running(rq);
				1197	}
				1198
				1199	static void dequeue_task_rt(struct rq rq, struct task_struct p, int flags)
				1200	{
				1201	struct sched_rt_entity *rt_se = &p->rt;
				1202
				1203	update_curr_rt(rq);
				1204	dequeue_rt_entity(rt_se);
				1205
				1206	dequeue_pushable_task(rq, p);
				1207
				1208	dec_nr_running(rq);
				1209	}
				1210
				1211	/*
				1212	* Put task to the head or the end of the run list without the overhead of
				1213	* dequeue followed by enqueue.
				1214	*/
				1215	static void
				1216	requeue_rt_entity(struct rt_rq rt_rq, struct sched_rt_entity rt_se, int head)
				1217	{
				1218	if (on_rt_rq(rt_se)) {
				1219	struct rt_prio_array *array = &rt_rq->active;
				1220	struct list_head *queue = array->queue + rt_se_prio(rt_se);
				1221
				1222	if (head)
				1223	list_move(&rt_se->run_list, queue);
				1224	else
				1225	list_move_tail(&rt_se->run_list, queue);
				1226	}
				1227	}
				1228
				1229	static void requeue_task_rt(struct rq rq, struct task_struct p, int head)
				1230	{
				1231	struct sched_rt_entity *rt_se = &p->rt;
				1232	struct rt_rq *rt_rq;
				1233
				1234	for_each_sched_rt_entity(rt_se) {
				1235	rt_rq = rt_rq_of_se(rt_se);
				1236	requeue_rt_entity(rt_rq, rt_se, head);
				1237	}
				1238	}
				1239
				1240	static void yield_task_rt(struct rq *rq)
				1241	{
				1242	requeue_task_rt(rq, rq->curr, 0);
				1243	}
				1244
				1245	#ifdef CONFIG_SMP
				1246	static int find_lowest_rq(struct task_struct *task);
				1247
				1248	static int
				1249	select_task_rq_rt(struct task_struct *p, int sd_flag, int flags)
				1250	{
				1251	struct task_struct *curr;
				1252	struct rq *rq;
				1253	int cpu;
				1254
				1255	cpu = task_cpu(p);
				1256
				1257	if (p->rt.nr_cpus_allowed == 1)
				1258	goto out;
				1259
				1260	/* For anything but wake ups, just return the task_cpu */
				1261	if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK)
				1262	goto out;
				1263
				1264	rq = cpu_rq(cpu);
				1265
				1266	rcu_read_lock();
				1267	curr = ACCESS_ONCE(rq->curr); /* unlocked access */
				1268
				1269	/*
				1270	* If the current task on @p's runqueue is an RT task, then
				1271	* try to see if we can wake this RT task up on another
				1272	* runqueue. Otherwise simply start this RT task
				1273	* on its current runqueue.
				1274	*
				1275	* We want to avoid overloading runqueues. If the woken
				1276	* task is a higher priority, then it will stay on this CPU
				1277	* and the lower prio task should be moved to another CPU.
				1278	* Even though this will probably make the lower prio task
				1279	* lose its cache, we do not want to bounce a higher task
				1280	* around just because it gave up its CPU, perhaps for a
				1281	* lock?
				1282	*
				1283	* For equal prio tasks, we just let the scheduler sort it out.
				1284	*
				1285	* Otherwise, just let it ride on the affined RQ and the
				1286	* post-schedule router will push the preempted task away
				1287	*
				1288	* This test is optimistic, if we get it wrong the load-balancer
				1289	* will have to sort it out.
				1290	*/
				1291	if (curr && unlikely(rt_task(curr)) &&
				1292	(curr->rt.nr_cpus_allowed < 2 \|\|
				1293	curr->prio <= p->prio) &&
				1294	(p->rt.nr_cpus_allowed > 1)) {
				1295	int target = find_lowest_rq(p);
				1296
				1297	/*
				1298	* Don't bother moving it if the destination CPU is
				1299	* not running a lower priority task.
				1300	*/
				1301	if (target != -1 &&
				1302	p->prio < cpu_rq(target)->rt.highest_prio.curr)
				1303	cpu = target;
				1304	}
				1305	rcu_read_unlock();
				1306
				1307	out:
				1308	return cpu;
				1309	}
				1310
				1311	static void check_preempt_equal_prio(struct rq rq, struct task_struct p)
				1312	{
				1313	if (rq->curr->rt.nr_cpus_allowed == 1)
				1314	return;
				1315
				1316	if (p->rt.nr_cpus_allowed != 1
				1317	&& cpupri_find(&rq->rd->cpupri, p, NULL))
				1318	return;
				1319
				1320	if (!cpupri_find(&rq->rd->cpupri, rq->curr, NULL))
				1321	return;
				1322
				1323	/*
				1324	* There appears to be other cpus that can accept
				1325	* current and none to run 'p', so lets reschedule
				1326	* to try and push current away:
				1327	*/
				1328	requeue_task_rt(rq, p, 1);
				1329	resched_task(rq->curr);
				1330	}
				1331
				1332	#endif /* CONFIG_SMP */
				1333
				1334	/*
				1335	* Preempt the current task with a newly woken task if needed:
				1336	*/
				1337	static void check_preempt_curr_rt(struct rq rq, struct task_struct p, int flags)
				1338	{
				1339	if (p->prio < rq->curr->prio) {
				1340	resched_task(rq->curr);
				1341	return;
				1342	}
				1343
				1344	#ifdef CONFIG_SMP
				1345	/*
				1346	* If:
				1347	*
				1348	* - the newly woken task is of equal priority to the current task
				1349	* - the newly woken task is non-migratable while current is migratable
				1350	* - current will be preempted on the next reschedule
				1351	*
				1352	* we should check to see if current can readily move to a different
				1353	* cpu. If so, we will reschedule to allow the push logic to try
				1354	* to move current somewhere else, making room for our non-migratable
				1355	* task.
				1356	*/
				1357	if (p->prio == rq->curr->prio && !test_tsk_need_resched(rq->curr))
				1358	check_preempt_equal_prio(rq, p);
				1359	#endif
				1360	}
				1361
				1362	static struct sched_rt_entity pick_next_rt_entity(struct rq rq,
				1363	struct rt_rq *rt_rq)
				1364	{
				1365	struct rt_prio_array *array = &rt_rq->active;
				1366	struct sched_rt_entity *next = NULL;
				1367	struct list_head *queue;
				1368	int idx;
				1369
				1370	idx = sched_find_first_bit(array->bitmap);
				1371	BUG_ON(idx >= MAX_RT_PRIO);
				1372
				1373	queue = array->queue + idx;
				1374	next = list_entry(queue->next, struct sched_rt_entity, run_list);
				1375
				1376	return next;
				1377	}
				1378
				1379	static struct task_struct _pick_next_task_rt(struct rq rq)
				1380	{
				1381	struct sched_rt_entity *rt_se;
				1382	struct task_struct *p;
				1383	struct rt_rq *rt_rq;
				1384
				1385	rt_rq = &rq->rt;
				1386
				1387	if (!rt_rq->rt_nr_running)
				1388	return NULL;
				1389
				1390	if (rt_rq_throttled(rt_rq))
				1391	return NULL;
				1392
				1393	do {
				1394	rt_se = pick_next_rt_entity(rq, rt_rq);
				1395	BUG_ON(!rt_se);
				1396	rt_rq = group_rt_rq(rt_se);
				1397	} while (rt_rq);
				1398
				1399	p = rt_task_of(rt_se);
				1400	p->se.exec_start = rq->clock_task;
				1401
				1402	return p;
				1403	}
				1404
				1405	static struct task_struct pick_next_task_rt(struct rq rq)
				1406	{
				1407	struct task_struct *p = _pick_next_task_rt(rq);
				1408
				1409	/* The running task is never eligible for pushing */
				1410	if (p)
				1411	dequeue_pushable_task(rq, p);
				1412
				1413	#ifdef CONFIG_SMP
				1414	/*
				1415	* We detect this state here so that we can avoid taking the RQ
				1416	* lock again later if there is no need to push
				1417	*/
				1418	rq->post_schedule = has_pushable_tasks(rq);
				1419	#endif
				1420
				1421	return p;
				1422	}
				1423
				1424	static void put_prev_task_rt(struct rq rq, struct task_struct p)
				1425	{
				1426	update_curr_rt(rq);
				1427
				1428	/*
				1429	* The previous task needs to be made eligible for pushing
				1430	* if it is still active
				1431	*/
				1432	if (on_rt_rq(&p->rt) && p->rt.nr_cpus_allowed > 1)
				1433	enqueue_pushable_task(rq, p);
				1434	}
				1435
				1436	#ifdef CONFIG_SMP
				1437
				1438	/* Only try algorithms three times */
				1439	#define RT_MAX_TRIES 3
				1440
				1441	static int pick_rt_task(struct rq rq, struct task_struct p, int cpu)
				1442	{
				1443	if (!task_running(rq, p) &&
				1444	(cpu < 0 \|\| cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) &&
				1445	(p->rt.nr_cpus_allowed > 1))
				1446	return 1;
				1447	return 0;
				1448	}
				1449
				1450	/* Return the second highest RT task, NULL otherwise */
				1451	static struct task_struct pick_next_highest_task_rt(struct rq rq, int cpu)
				1452	{
				1453	struct task_struct *next = NULL;
				1454	struct sched_rt_entity *rt_se;
				1455	struct rt_prio_array *array;
				1456	struct rt_rq *rt_rq;
				1457	int idx;
				1458
				1459	for_each_leaf_rt_rq(rt_rq, rq) {
				1460	array = &rt_rq->active;
				1461	idx = sched_find_first_bit(array->bitmap);
				1462	next_idx:
				1463	if (idx >= MAX_RT_PRIO)
				1464	continue;
				1465	if (next && next->prio <= idx)
				1466	continue;
				1467	list_for_each_entry(rt_se, array->queue + idx, run_list) {
				1468	struct task_struct *p;
				1469
				1470	if (!rt_entity_is_task(rt_se))
				1471	continue;
				1472
				1473	p = rt_task_of(rt_se);
				1474	if (pick_rt_task(rq, p, cpu)) {
				1475	next = p;
				1476	break;
				1477	}
				1478	}
				1479	if (!next) {
				1480	idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx+1);
				1481	goto next_idx;
				1482	}
				1483	}
				1484
				1485	return next;
				1486	}
				1487
				1488	static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask);
				1489
				1490	static int find_lowest_rq(struct task_struct *task)
				1491	{
				1492	struct sched_domain *sd;
				1493	struct cpumask *lowest_mask = __get_cpu_var(local_cpu_mask);
				1494	int this_cpu = smp_processor_id();
				1495	int cpu = task_cpu(task);
				1496
				1497	/* Make sure the mask is initialized first */
				1498	if (unlikely(!lowest_mask))
				1499	return -1;
				1500
				1501	if (task->rt.nr_cpus_allowed == 1)
				1502	return -1; /* No other targets possible */
				1503
				1504	if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask))
				1505	return -1; /* No targets found */
				1506
				1507	/*
				1508	* At this point we have built a mask of cpus representing the
				1509	* lowest priority tasks in the system. Now we want to elect
				1510	* the best one based on our affinity and topology.
				1511	*
				1512	* We prioritize the last cpu that the task executed on since
				1513	* it is most likely cache-hot in that location.
				1514	*/
				1515	if (cpumask_test_cpu(cpu, lowest_mask))
				1516	return cpu;
				1517
				1518	/*
				1519	* Otherwise, we consult the sched_domains span maps to figure
				1520	* out which cpu is logically closest to our hot cache data.
				1521	*/
				1522	if (!cpumask_test_cpu(this_cpu, lowest_mask))
				1523	this_cpu = -1; /* Skip this_cpu opt if not among lowest */
				1524
				1525	rcu_read_lock();
				1526	for_each_domain(cpu, sd) {
				1527	if (sd->flags & SD_WAKE_AFFINE) {
				1528	int best_cpu;
				1529
				1530	/*
				1531	* "this_cpu" is cheaper to preempt than a
				1532	* remote processor.
				1533	*/
				1534	if (this_cpu != -1 &&
				1535	cpumask_test_cpu(this_cpu, sched_domain_span(sd))) {
				1536	rcu_read_unlock();
				1537	return this_cpu;
				1538	}
				1539
				1540	best_cpu = cpumask_first_and(lowest_mask,
				1541	sched_domain_span(sd));
				1542	if (best_cpu < nr_cpu_ids) {
				1543	rcu_read_unlock();
				1544	return best_cpu;
				1545	}
				1546	}
				1547	}
				1548	rcu_read_unlock();
				1549
				1550	/*
				1551	* And finally, if there were no matches within the domains
				1552	* just give the caller something to work with from the compatible
				1553	* locations.
				1554	*/
				1555	if (this_cpu != -1)
				1556	return this_cpu;
				1557
				1558	cpu = cpumask_any(lowest_mask);
				1559	if (cpu < nr_cpu_ids)
				1560	return cpu;
				1561	return -1;
				1562	}
				1563
				1564	/* Will lock the rq it finds */
				1565	static struct rq find_lock_lowest_rq(struct task_struct task, struct rq *rq)
				1566	{
				1567	struct rq *lowest_rq = NULL;
				1568	int tries;
				1569	int cpu;
				1570
				1571	for (tries = 0; tries < RT_MAX_TRIES; tries++) {
				1572	cpu = find_lowest_rq(task);
				1573
				1574	if ((cpu == -1) \|\| (cpu == rq->cpu))
				1575	break;
				1576
				1577	lowest_rq = cpu_rq(cpu);
				1578
				1579	if (lowest_rq->rt.highest_prio.curr <= task->prio) {
				1580	/*
				1581	* Target rq has tasks of equal or higher priority,
				1582	* retrying does not release any lock and is unlikely
				1583	* to yield a different result.
				1584	*/
				1585	lowest_rq = NULL;
				1586	break;
				1587	}
				1588
				1589	/* if the prio of this runqueue changed, try again */
				1590	if (double_lock_balance(rq, lowest_rq)) {
				1591	/*
				1592	* We had to unlock the run queue. In
				1593	* the mean time, task could have
				1594	* migrated already or had its affinity changed.
				1595	* Also make sure that it wasn't scheduled on its rq.
				1596	*/
				1597	if (unlikely(task_rq(task) != rq \|\|
				1598	!cpumask_test_cpu(lowest_rq->cpu,
				1599	tsk_cpus_allowed(task)) \|\|
				1600	task_running(rq, task) \|\|
				1601	!task->on_rq)) {
				1602
				1603	raw_spin_unlock(&lowest_rq->lock);
				1604	lowest_rq = NULL;
				1605	break;
				1606	}
				1607	}
				1608
				1609	/* If this rq is still suitable use it. */
				1610	if (lowest_rq->rt.highest_prio.curr > task->prio)
				1611	break;
				1612
				1613	/* try again */
				1614	double_unlock_balance(rq, lowest_rq);
				1615	lowest_rq = NULL;
				1616	}
				1617
				1618	return lowest_rq;
				1619	}
				1620
				1621	static struct task_struct pick_next_pushable_task(struct rq rq)
				1622	{
				1623	struct task_struct *p;
				1624
				1625	if (!has_pushable_tasks(rq))
				1626	return NULL;
				1627
				1628	p = plist_first_entry(&rq->rt.pushable_tasks,
				1629	struct task_struct, pushable_tasks);
				1630
				1631	BUG_ON(rq->cpu != task_cpu(p));
				1632	BUG_ON(task_current(rq, p));
				1633	BUG_ON(p->rt.nr_cpus_allowed <= 1);
				1634
				1635	BUG_ON(!p->on_rq);
				1636	BUG_ON(!rt_task(p));
				1637
				1638	return p;
				1639	}
				1640
				1641	/*
				1642	* If the current CPU has more than one RT task, see if the non
				1643	* running task can migrate over to a CPU that is running a task
				1644	* of lesser priority.
				1645	*/
				1646	static int push_rt_task(struct rq *rq)
				1647	{
				1648	struct task_struct *next_task;
				1649	struct rq *lowest_rq;
				1650	int ret = 0;
				1651
				1652	if (!rq->rt.overloaded)
				1653	return 0;
				1654
				1655	next_task = pick_next_pushable_task(rq);
				1656	if (!next_task)
				1657	return 0;
				1658
				1659	#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
				1660	if (unlikely(task_running(rq, next_task)))
				1661	return 0;
				1662	#endif
				1663
				1664	retry:
				1665	if (unlikely(next_task == rq->curr)) {
				1666	WARN_ON(1);
				1667	return 0;
				1668	}
				1669
				1670	/*
				1671	* It's possible that the next_task slipped in of
				1672	* higher priority than current. If that's the case
				1673	* just reschedule current.
				1674	*/
				1675	if (unlikely(next_task->prio < rq->curr->prio)) {
				1676	resched_task(rq->curr);
				1677	return 0;
				1678	}
				1679
				1680	/* We might release rq lock */
				1681	get_task_struct(next_task);
				1682
				1683	/* find_lock_lowest_rq locks the rq if found */
				1684	lowest_rq = find_lock_lowest_rq(next_task, rq);
				1685	if (!lowest_rq) {
				1686	struct task_struct *task;
				1687	/*
				1688	* find_lock_lowest_rq releases rq->lock
				1689	* so it is possible that next_task has migrated.
				1690	*
				1691	* We need to make sure that the task is still on the same
				1692	* run-queue and is also still the next task eligible for
				1693	* pushing.
				1694	*/
				1695	task = pick_next_pushable_task(rq);
				1696	if (task_cpu(next_task) == rq->cpu && task == next_task) {
				1697	/*
				1698	* The task hasn't migrated, and is still the next
				1699	* eligible task, but we failed to find a run-queue
				1700	* to push it to. Do not retry in this case, since
				1701	* other cpus will pull from us when ready.
				1702	*/
				1703	goto out;
				1704	}
				1705
				1706	if (!task)
				1707	/* No more tasks, just exit */
				1708	goto out;
				1709
				1710	/*
				1711	* Something has shifted, try again.
				1712	*/
				1713	put_task_struct(next_task);
				1714	next_task = task;
				1715	goto retry;
				1716	}
				1717
				1718	deactivate_task(rq, next_task, 0);
				1719	set_task_cpu(next_task, lowest_rq->cpu);
				1720	activate_task(lowest_rq, next_task, 0);
				1721	ret = 1;
				1722
				1723	resched_task(lowest_rq->curr);
				1724
				1725	double_unlock_balance(rq, lowest_rq);
				1726
				1727	out:
				1728	put_task_struct(next_task);
				1729
				1730	return ret;
				1731	}
				1732
				1733	static void push_rt_tasks(struct rq *rq)
				1734	{
				1735	/* push_rt_task will return true if it moved an RT */
				1736	while (push_rt_task(rq))
				1737	;
				1738	}
				1739
				1740	static int pull_rt_task(struct rq *this_rq)
				1741	{
				1742	int this_cpu = this_rq->cpu, ret = 0, cpu;
				1743	struct task_struct *p;
				1744	struct rq *src_rq;
				1745
				1746	if (likely(!rt_overloaded(this_rq)))
				1747	return 0;
				1748
				1749	for_each_cpu(cpu, this_rq->rd->rto_mask) {
				1750	if (this_cpu == cpu)
				1751	continue;
				1752
				1753	src_rq = cpu_rq(cpu);
				1754
				1755	/*
				1756	* Don't bother taking the src_rq->lock if the next highest
				1757	* task is known to be lower-priority than our current task.
				1758	* This may look racy, but if this value is about to go
				1759	* logically higher, the src_rq will push this task away.
				1760	* And if its going logically lower, we do not care
				1761	*/
				1762	if (src_rq->rt.highest_prio.next >=
				1763	this_rq->rt.highest_prio.curr)
				1764	continue;
				1765
				1766	/*
				1767	* We can potentially drop this_rq's lock in
				1768	* double_lock_balance, and another CPU could
				1769	* alter this_rq
				1770	*/
				1771	double_lock_balance(this_rq, src_rq);
				1772
				1773	/*
				1774	* Are there still pullable RT tasks?
				1775	*/
				1776	if (src_rq->rt.rt_nr_running <= 1)
				1777	goto skip;
				1778
				1779	p = pick_next_highest_task_rt(src_rq, this_cpu);
				1780
				1781	/*
				1782	* Do we have an RT task that preempts
				1783	* the to-be-scheduled task?
				1784	*/
				1785	if (p && (p->prio < this_rq->rt.highest_prio.curr)) {
				1786	WARN_ON(p == src_rq->curr);
				1787	WARN_ON(!p->on_rq);
				1788
				1789	/*
				1790	* There's a chance that p is higher in priority
				1791	* than what's currently running on its cpu.
				1792	* This is just that p is wakeing up and hasn't
				1793	* had a chance to schedule. We only pull
				1794	* p if it is lower in priority than the
				1795	* current task on the run queue
				1796	*/
				1797	if (p->prio < src_rq->curr->prio)
				1798	goto skip;
				1799
				1800	ret = 1;
				1801
				1802	deactivate_task(src_rq, p, 0);
				1803	set_task_cpu(p, this_cpu);
				1804	activate_task(this_rq, p, 0);
				1805	/*
				1806	* We continue with the search, just in
				1807	* case there's an even higher prio task
				1808	* in another runqueue. (low likelihood
				1809	* but possible)
				1810	*/
				1811	}
				1812	skip:
				1813	double_unlock_balance(this_rq, src_rq);
				1814	}
				1815
				1816	return ret;
				1817	}
				1818
				1819	static void pre_schedule_rt(struct rq rq, struct task_struct prev)
				1820	{
				1821	/* Try to pull RT tasks here if we lower this rq's prio */
				1822	if (rq->rt.highest_prio.curr > prev->prio)
				1823	pull_rt_task(rq);
				1824	}
				1825
				1826	static void post_schedule_rt(struct rq *rq)
				1827	{
				1828	push_rt_tasks(rq);
				1829	}
				1830
				1831	/*
				1832	* If we are not running and we are not going to reschedule soon, we should
				1833	* try to push tasks away now
				1834	*/
				1835	static void task_woken_rt(struct rq rq, struct task_struct p)
				1836	{
				1837	if (!task_running(rq, p) &&
				1838	!test_tsk_need_resched(rq->curr) &&
				1839	has_pushable_tasks(rq) &&
				1840	p->rt.nr_cpus_allowed > 1 &&
				1841	rt_task(rq->curr) &&
				1842	(rq->curr->rt.nr_cpus_allowed < 2 \|\|
				1843	rq->curr->prio <= p->prio))
				1844	push_rt_tasks(rq);
				1845	}
				1846
				1847	static void set_cpus_allowed_rt(struct task_struct *p,
				1848	const struct cpumask *new_mask)
				1849	{
				1850	int weight = cpumask_weight(new_mask);
				1851
				1852	BUG_ON(!rt_task(p));
				1853
				1854	/*
				1855	* Update the migration status of the RQ if we have an RT task
				1856	* which is running AND changing its weight value.
				1857	*/
				1858	if (p->on_rq && (weight != p->rt.nr_cpus_allowed)) {
				1859	struct rq *rq = task_rq(p);
				1860
				1861	if (!task_current(rq, p)) {
				1862	/*
				1863	* Make sure we dequeue this task from the pushable list
				1864	* before going further. It will either remain off of
				1865	* the list because we are no longer pushable, or it
				1866	* will be requeued.
				1867	*/
				1868	if (p->rt.nr_cpus_allowed > 1)
				1869	dequeue_pushable_task(rq, p);
				1870
				1871	/*
				1872	* Requeue if our weight is changing and still > 1
				1873	*/
				1874	if (weight > 1)
				1875	enqueue_pushable_task(rq, p);
				1876
				1877	}
				1878
				1879	if ((p->rt.nr_cpus_allowed <= 1) && (weight > 1)) {
				1880	rq->rt.rt_nr_migratory++;
				1881	} else if ((p->rt.nr_cpus_allowed > 1) && (weight <= 1)) {
				1882	BUG_ON(!rq->rt.rt_nr_migratory);
				1883	rq->rt.rt_nr_migratory--;
				1884	}
				1885
				1886	update_rt_migration(&rq->rt);
				1887	}
				1888	}
				1889
				1890	/* Assumes rq->lock is held */
				1891	static void rq_online_rt(struct rq *rq)
				1892	{
				1893	if (rq->rt.overloaded)
				1894	rt_set_overload(rq);
				1895
				1896	__enable_runtime(rq);
				1897
				1898	cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio.curr);
				1899	}
				1900
				1901	/* Assumes rq->lock is held */
				1902	static void rq_offline_rt(struct rq *rq)
				1903	{
				1904	if (rq->rt.overloaded)
				1905	rt_clear_overload(rq);
				1906
				1907	__disable_runtime(rq);
				1908
				1909	cpupri_set(&rq->rd->cpupri, rq->cpu, CPUPRI_INVALID);
				1910	}
				1911
				1912	/*
				1913	* When switch from the rt queue, we bring ourselves to a position
				1914	* that we might want to pull RT tasks from other runqueues.
				1915	*/
				1916	static void switched_from_rt(struct rq rq, struct task_struct p)
				1917	{
				1918	/*
				1919	* If there are other RT tasks then we will reschedule
				1920	* and the scheduling of the other RT tasks will handle
				1921	* the balancing. But if we are the last RT task
				1922	* we may need to handle the pulling of RT tasks
				1923	* now.
				1924	*/
				1925	if (p->on_rq && !rq->rt.rt_nr_running)
				1926	pull_rt_task(rq);
				1927	}
				1928
				1929	void init_sched_rt_class(void)
				1930	{
				1931	unsigned int i;
				1932
				1933	for_each_possible_cpu(i) {
				1934	zalloc_cpumask_var_node(&per_cpu(local_cpu_mask, i),
				1935	GFP_KERNEL, cpu_to_node(i));
				1936	}
				1937	}
				1938	#endif /* CONFIG_SMP */
				1939
				1940	/*
				1941	* When switching a task to RT, we may overload the runqueue
				1942	* with RT tasks. In this case we try to push them off to
				1943	* other runqueues.
				1944	*/
				1945	static void switched_to_rt(struct rq rq, struct task_struct p)
				1946	{
				1947	int check_resched = 1;
				1948
				1949	/*
				1950	* If we are already running, then there's nothing
				1951	* that needs to be done. But if we are not running
				1952	* we may need to preempt the current running task.
				1953	* If that current running task is also an RT task
				1954	* then see if we can move to another run queue.
				1955	*/
				1956	if (p->on_rq && rq->curr != p) {
				1957	#ifdef CONFIG_SMP
				1958	if (rq->rt.overloaded && push_rt_task(rq) &&
				1959	/* Don't resched if we changed runqueues */
				1960	rq != task_rq(p))
				1961	check_resched = 0;
				1962	#endif /* CONFIG_SMP */
				1963	if (check_resched && p->prio < rq->curr->prio)
				1964	resched_task(rq->curr);
				1965	}
				1966	}
				1967
				1968	/*
				1969	* Priority of the task has changed. This may cause
				1970	* us to initiate a push or pull.
				1971	*/
				1972	static void
				1973	prio_changed_rt(struct rq rq, struct task_struct p, int oldprio)
				1974	{
				1975	if (!p->on_rq)
				1976	return;
				1977
				1978	if (rq->curr == p) {
				1979	#ifdef CONFIG_SMP
				1980	/*
				1981	* If our priority decreases while running, we
				1982	* may need to pull tasks to this runqueue.
				1983	*/
				1984	if (oldprio < p->prio)
				1985	pull_rt_task(rq);
				1986	/*
				1987	* If there's a higher priority task waiting to run
				1988	* then reschedule. Note, the above pull_rt_task
				1989	* can release the rq lock and p could migrate.
				1990	* Only reschedule if p is still on the same runqueue.
				1991	*/
				1992	if (p->prio > rq->rt.highest_prio.curr && rq->curr == p)
				1993	resched_task(p);
				1994	#else
				1995	/* For UP simply resched on drop of prio */
				1996	if (oldprio < p->prio)
				1997	resched_task(p);
				1998	#endif /* CONFIG_SMP */
				1999	} else {
				2000	/*
				2001	* This task is not running, but if it is
				2002	* greater than the current running task
				2003	* then reschedule.
				2004	*/
				2005	if (p->prio < rq->curr->prio)
				2006	resched_task(rq->curr);
				2007	}
				2008	}
				2009
				2010	static void watchdog(struct rq rq, struct task_struct p)
				2011	{
				2012	unsigned long soft, hard;
				2013
				2014	/* max may change after cur was read, this will be fixed next tick */
				2015	soft = task_rlimit(p, RLIMIT_RTTIME);
				2016	hard = task_rlimit_max(p, RLIMIT_RTTIME);
				2017
				2018	if (soft != RLIM_INFINITY) {
				2019	unsigned long next;
				2020
				2021	if (p->rt.watchdog_stamp != jiffies) {
				2022	p->rt.timeout++;
				2023	p->rt.watchdog_stamp = jiffies;
				2024	}
				2025
				2026	next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ);
				2027	if (p->rt.timeout > next)
				2028	p->cputime_expires.sched_exp = p->se.sum_exec_runtime;
				2029	}
				2030	}
				2031
				2032	static void task_tick_rt(struct rq rq, struct task_struct p, int queued)
				2033	{
				2034	struct sched_rt_entity *rt_se = &p->rt;
				2035
				2036	update_curr_rt(rq);
				2037
				2038	watchdog(rq, p);
				2039
				2040	/*
				2041	* RR tasks need a special form of timeslice management.
				2042	* FIFO tasks have no timeslices.
				2043	*/
				2044	if (p->policy != SCHED_RR)
				2045	return;
				2046
				2047	if (--p->rt.time_slice)
				2048	return;
				2049
				2050	p->rt.time_slice = RR_TIMESLICE;
				2051
				2052	/*
				2053	* Requeue to the end of queue if we (and all of our ancestors) are the
				2054	* only element on the queue
				2055	*/
				2056	for_each_sched_rt_entity(rt_se) {
				2057	if (rt_se->run_list.prev != rt_se->run_list.next) {
				2058	requeue_task_rt(rq, p, 0);
				2059	set_tsk_need_resched(p);
				2060	return;
				2061	}
				2062	}
				2063	}
				2064
				2065	static void set_curr_task_rt(struct rq *rq)
				2066	{
				2067	struct task_struct *p = rq->curr;
				2068
				2069	p->se.exec_start = rq->clock_task;
				2070
				2071	/* The running task is never eligible for pushing */
				2072	dequeue_pushable_task(rq, p);
				2073	}
				2074
				2075	static unsigned int get_rr_interval_rt(struct rq rq, struct task_struct task)
				2076	{
				2077	/*
				2078	* Time slice is 0 for SCHED_FIFO tasks
				2079	*/
				2080	if (task->policy == SCHED_RR)
				2081	return RR_TIMESLICE;
				2082	else
				2083	return 0;
				2084	}
				2085
				2086	const struct sched_class rt_sched_class = {
				2087	.next = &fair_sched_class,
				2088	.enqueue_task = enqueue_task_rt,
				2089	.dequeue_task = dequeue_task_rt,
				2090	.yield_task = yield_task_rt,
				2091
				2092	.check_preempt_curr = check_preempt_curr_rt,
				2093
				2094	.pick_next_task = pick_next_task_rt,
				2095	.put_prev_task = put_prev_task_rt,
				2096
				2097	#ifdef CONFIG_SMP
				2098	.select_task_rq = select_task_rq_rt,
				2099
				2100	.set_cpus_allowed = set_cpus_allowed_rt,
				2101	.rq_online = rq_online_rt,
				2102	.rq_offline = rq_offline_rt,
				2103	.pre_schedule = pre_schedule_rt,
				2104	.post_schedule = post_schedule_rt,
				2105	.task_woken = task_woken_rt,
				2106	.switched_from = switched_from_rt,
				2107	#endif
				2108
				2109	.set_curr_task = set_curr_task_rt,
				2110	.task_tick = task_tick_rt,
				2111
				2112	.get_rr_interval = get_rr_interval_rt,
				2113
				2114	.prio_changed = prio_changed_rt,
				2115	.switched_to = switched_to_rt,
				2116	};
				2117
				2118	#ifdef CONFIG_SCHED_DEBUG
				2119	extern void print_rt_rq(struct seq_file m, int cpu, struct rt_rq rt_rq);
				2120
				2121	void print_rt_stats(struct seq_file *m, int cpu)
				2122	{
				2123	rt_rq_iter_t iter;
				2124	struct rt_rq *rt_rq;
				2125
				2126	rcu_read_lock();
				2127	for_each_rt_rq(rt_rq, iter, cpu_rq(cpu))
				2128	print_rt_rq(m, cpu, rt_rq);
				2129	rcu_read_unlock();
				2130	}
				2131	#endif /* CONFIG_SCHED_DEBUG */