Blame - marvell/linux/drivers/gpu/drm/scheduler/sched_main.c - T108

blob: 37679507f9432a35c18a1ea77cfa1b6a64383454 [file] [log] [blame]

b.liu	e958203	2025-04-17 19:18:16 +0800	[diff] [blame^]	1	/*
				2	* Copyright 2015 Advanced Micro Devices, Inc.
				3	*
				4	* Permission is hereby granted, free of charge, to any person obtaining a
				5	* copy of this software and associated documentation files (the "Software"),
				6	* to deal in the Software without restriction, including without limitation
				7	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
				8	* and/or sell copies of the Software, and to permit persons to whom the
				9	* Software is furnished to do so, subject to the following conditions:
				10	*
				11	* The above copyright notice and this permission notice shall be included in
				12	* all copies or substantial portions of the Software.
				13	*
				14	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				15	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				16	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
				17	* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
				18	* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
				19	* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
				20	* OTHER DEALINGS IN THE SOFTWARE.
				21	*
				22	*/
				23
				24	/**
				25	* DOC: Overview
				26	*
				27	* The GPU scheduler provides entities which allow userspace to push jobs
				28	* into software queues which are then scheduled on a hardware run queue.
				29	* The software queues have a priority among them. The scheduler selects the entities
				30	* from the run queue using a FIFO. The scheduler provides dependency handling
				31	* features among jobs. The driver is supposed to provide callback functions for
				32	* backend operations to the scheduler like submitting a job to hardware run queue,
				33	* returning the dependencies of a job etc.
				34	*
				35	* The organisation of the scheduler is the following:
				36	*
				37	* 1. Each hw run queue has one scheduler
				38	* 2. Each scheduler has multiple run queues with different priorities
				39	* (e.g., HIGH_HW,HIGH_SW, KERNEL, NORMAL)
				40	* 3. Each scheduler run queue has a queue of entities to schedule
				41	* 4. Entities themselves maintain a queue of jobs that will be scheduled on
				42	* the hardware.
				43	*
				44	* The jobs in a entity are always scheduled in the order that they were pushed.
				45	*/
				46
				47	#include <linux/kthread.h>
				48	#include <linux/wait.h>
				49	#include <linux/sched.h>
				50	#include <uapi/linux/sched/types.h>
				51
				52	#include <drm/drm_print.h>
				53	#include <drm/gpu_scheduler.h>
				54	#include <drm/spsc_queue.h>
				55
				56	#define CREATE_TRACE_POINTS
				57	#include "gpu_scheduler_trace.h"
				58
				59	#define to_drm_sched_job(sched_job) \
				60	container_of((sched_job), struct drm_sched_job, queue_node)
				61
				62	static void drm_sched_process_job(struct dma_fence f, struct dma_fence_cb cb);
				63
				64	/**
				65	* drm_sched_rq_init - initialize a given run queue struct
				66	*
				67	* @rq: scheduler run queue
				68	*
				69	* Initializes a scheduler runqueue.
				70	*/
				71	static void drm_sched_rq_init(struct drm_gpu_scheduler *sched,
				72	struct drm_sched_rq *rq)
				73	{
				74	spin_lock_init(&rq->lock);
				75	INIT_LIST_HEAD(&rq->entities);
				76	rq->current_entity = NULL;
				77	rq->sched = sched;
				78	}
				79
				80	/**
				81	* drm_sched_rq_add_entity - add an entity
				82	*
				83	* @rq: scheduler run queue
				84	* @entity: scheduler entity
				85	*
				86	* Adds a scheduler entity to the run queue.
				87	*/
				88	void drm_sched_rq_add_entity(struct drm_sched_rq *rq,
				89	struct drm_sched_entity *entity)
				90	{
				91	if (!list_empty(&entity->list))
				92	return;
				93	spin_lock(&rq->lock);
				94	list_add_tail(&entity->list, &rq->entities);
				95	spin_unlock(&rq->lock);
				96	}
				97
				98	/**
				99	* drm_sched_rq_remove_entity - remove an entity
				100	*
				101	* @rq: scheduler run queue
				102	* @entity: scheduler entity
				103	*
				104	* Removes a scheduler entity from the run queue.
				105	*/
				106	void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
				107	struct drm_sched_entity *entity)
				108	{
				109	if (list_empty(&entity->list))
				110	return;
				111	spin_lock(&rq->lock);
				112	list_del_init(&entity->list);
				113	if (rq->current_entity == entity)
				114	rq->current_entity = NULL;
				115	spin_unlock(&rq->lock);
				116	}
				117
				118	/**
				119	* drm_sched_rq_select_entity - Select an entity which could provide a job to run
				120	*
				121	* @rq: scheduler run queue to check.
				122	*
				123	* Try to find a ready entity, returns NULL if none found.
				124	*/
				125	static struct drm_sched_entity *
				126	drm_sched_rq_select_entity(struct drm_sched_rq *rq)
				127	{
				128	struct drm_sched_entity *entity;
				129
				130	spin_lock(&rq->lock);
				131
				132	entity = rq->current_entity;
				133	if (entity) {
				134	list_for_each_entry_continue(entity, &rq->entities, list) {
				135	if (drm_sched_entity_is_ready(entity)) {
				136	rq->current_entity = entity;
				137	spin_unlock(&rq->lock);
				138	return entity;
				139	}
				140	}
				141	}
				142
				143	list_for_each_entry(entity, &rq->entities, list) {
				144
				145	if (drm_sched_entity_is_ready(entity)) {
				146	rq->current_entity = entity;
				147	spin_unlock(&rq->lock);
				148	return entity;
				149	}
				150
				151	if (entity == rq->current_entity)
				152	break;
				153	}
				154
				155	spin_unlock(&rq->lock);
				156
				157	return NULL;
				158	}
				159
				160	/**
				161	* drm_sched_dependency_optimized
				162	*
				163	* @fence: the dependency fence
				164	* @entity: the entity which depends on the above fence
				165	*
				166	* Returns true if the dependency can be optimized and false otherwise
				167	*/
				168	bool drm_sched_dependency_optimized(struct dma_fence* fence,
				169	struct drm_sched_entity *entity)
				170	{
				171	struct drm_gpu_scheduler *sched = entity->rq->sched;
				172	struct drm_sched_fence *s_fence;
				173
				174	if (!fence \|\| dma_fence_is_signaled(fence))
				175	return false;
				176	if (fence->context == entity->fence_context)
				177	return true;
				178	s_fence = to_drm_sched_fence(fence);
				179	if (s_fence && s_fence->sched == sched)
				180	return true;
				181
				182	return false;
				183	}
				184	EXPORT_SYMBOL(drm_sched_dependency_optimized);
				185
				186	/**
				187	* drm_sched_start_timeout - start timeout for reset worker
				188	*
				189	* @sched: scheduler instance to start the worker for
				190	*
				191	* Start the timeout for the given scheduler.
				192	*/
				193	static void drm_sched_start_timeout(struct drm_gpu_scheduler *sched)
				194	{
				195	if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
				196	!list_empty(&sched->ring_mirror_list))
				197	schedule_delayed_work(&sched->work_tdr, sched->timeout);
				198	}
				199
				200	/**
				201	* drm_sched_fault - immediately start timeout handler
				202	*
				203	* @sched: scheduler where the timeout handling should be started.
				204	*
				205	* Start timeout handling immediately when the driver detects a hardware fault.
				206	*/
				207	void drm_sched_fault(struct drm_gpu_scheduler *sched)
				208	{
				209	mod_delayed_work(system_wq, &sched->work_tdr, 0);
				210	}
				211	EXPORT_SYMBOL(drm_sched_fault);
				212
				213	/**
				214	* drm_sched_suspend_timeout - Suspend scheduler job timeout
				215	*
				216	* @sched: scheduler instance for which to suspend the timeout
				217	*
				218	* Suspend the delayed work timeout for the scheduler. This is done by
				219	* modifying the delayed work timeout to an arbitrary large value,
				220	* MAX_SCHEDULE_TIMEOUT in this case. Note that this function can be
				221	* called from an IRQ context.
				222	*
				223	* Returns the timeout remaining
				224	*
				225	*/
				226	unsigned long drm_sched_suspend_timeout(struct drm_gpu_scheduler *sched)
				227	{
				228	unsigned long sched_timeout, now = jiffies;
				229
				230	sched_timeout = sched->work_tdr.timer.expires;
				231
				232	/*
				233	* Modify the timeout to an arbitrarily large value. This also prevents
				234	* the timeout to be restarted when new submissions arrive
				235	*/
				236	if (mod_delayed_work(system_wq, &sched->work_tdr, MAX_SCHEDULE_TIMEOUT)
				237	&& time_after(sched_timeout, now))
				238	return sched_timeout - now;
				239	else
				240	return sched->timeout;
				241	}
				242	EXPORT_SYMBOL(drm_sched_suspend_timeout);
				243
				244	/**
				245	* drm_sched_resume_timeout - Resume scheduler job timeout
				246	*
				247	* @sched: scheduler instance for which to resume the timeout
				248	* @remaining: remaining timeout
				249	*
				250	* Resume the delayed work timeout for the scheduler. Note that
				251	* this function can be called from an IRQ context.
				252	*/
				253	void drm_sched_resume_timeout(struct drm_gpu_scheduler *sched,
				254	unsigned long remaining)
				255	{
				256	unsigned long flags;
				257
				258	spin_lock_irqsave(&sched->job_list_lock, flags);
				259
				260	if (list_empty(&sched->ring_mirror_list))
				261	cancel_delayed_work(&sched->work_tdr);
				262	else
				263	mod_delayed_work(system_wq, &sched->work_tdr, remaining);
				264
				265	spin_unlock_irqrestore(&sched->job_list_lock, flags);
				266	}
				267	EXPORT_SYMBOL(drm_sched_resume_timeout);
				268
				269	static void drm_sched_job_begin(struct drm_sched_job *s_job)
				270	{
				271	struct drm_gpu_scheduler *sched = s_job->sched;
				272	unsigned long flags;
				273
				274	spin_lock_irqsave(&sched->job_list_lock, flags);
				275	list_add_tail(&s_job->node, &sched->ring_mirror_list);
				276	drm_sched_start_timeout(sched);
				277	spin_unlock_irqrestore(&sched->job_list_lock, flags);
				278	}
				279
				280	static void drm_sched_job_timedout(struct work_struct *work)
				281	{
				282	struct drm_gpu_scheduler *sched;
				283	struct drm_sched_job *job;
				284	unsigned long flags;
				285
				286	sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work);
				287
				288	/* Protects against concurrent deletion in drm_sched_get_cleanup_job */
				289	spin_lock_irqsave(&sched->job_list_lock, flags);
				290	job = list_first_entry_or_null(&sched->ring_mirror_list,
				291	struct drm_sched_job, node);
				292
				293	if (job) {
				294	/*
				295	* Remove the bad job so it cannot be freed by concurrent
				296	* drm_sched_cleanup_jobs. It will be reinserted back after sched->thread
				297	* is parked at which point it's safe.
				298	*/
				299	list_del_init(&job->node);
				300	spin_unlock_irqrestore(&sched->job_list_lock, flags);
				301
				302	job->sched->ops->timedout_job(job);
				303
				304	/*
				305	* Guilty job did complete and hence needs to be manually removed
				306	* See drm_sched_stop doc.
				307	*/
				308	if (sched->free_guilty) {
				309	job->sched->ops->free_job(job);
				310	sched->free_guilty = false;
				311	}
				312	} else {
				313	spin_unlock_irqrestore(&sched->job_list_lock, flags);
				314	}
				315
				316	spin_lock_irqsave(&sched->job_list_lock, flags);
				317	drm_sched_start_timeout(sched);
				318	spin_unlock_irqrestore(&sched->job_list_lock, flags);
				319	}
				320
				321	/**
				322	* drm_sched_increase_karma - Update sched_entity guilty flag
				323	*
				324	* @bad: The job guilty of time out
				325	*
				326	* Increment on every hang caused by the 'bad' job. If this exceeds the hang
				327	* limit of the scheduler then the respective sched entity is marked guilty and
				328	* jobs from it will not be scheduled further
				329	*/
				330	void drm_sched_increase_karma(struct drm_sched_job *bad)
				331	{
				332	int i;
				333	struct drm_sched_entity *tmp;
				334	struct drm_sched_entity *entity;
				335	struct drm_gpu_scheduler *sched = bad->sched;
				336
				337	/* don't increase @bad's karma if it's from KERNEL RQ,
				338	* because sometimes GPU hang would cause kernel jobs (like VM updating jobs)
				339	* corrupt but keep in mind that kernel jobs always considered good.
				340	*/
				341	if (bad->s_priority != DRM_SCHED_PRIORITY_KERNEL) {
				342	atomic_inc(&bad->karma);
				343	for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_KERNEL;
				344	i++) {
				345	struct drm_sched_rq *rq = &sched->sched_rq[i];
				346
				347	spin_lock(&rq->lock);
				348	list_for_each_entry_safe(entity, tmp, &rq->entities, list) {
				349	if (bad->s_fence->scheduled.context ==
				350	entity->fence_context) {
				351	if (atomic_read(&bad->karma) >
				352	bad->sched->hang_limit)
				353	if (entity->guilty)
				354	atomic_set(entity->guilty, 1);
				355	break;
				356	}
				357	}
				358	spin_unlock(&rq->lock);
				359	if (&entity->list != &rq->entities)
				360	break;
				361	}
				362	}
				363	}
				364	EXPORT_SYMBOL(drm_sched_increase_karma);
				365
				366	/**
				367	* drm_sched_stop - stop the scheduler
				368	*
				369	* @sched: scheduler instance
				370	* @bad: job which caused the time out
				371	*
				372	* Stop the scheduler and also removes and frees all completed jobs.
				373	* Note: bad job will not be freed as it might be used later and so it's
				374	* callers responsibility to release it manually if it's not part of the
				375	* mirror list any more.
				376	*
				377	*/
				378	void drm_sched_stop(struct drm_gpu_scheduler sched, struct drm_sched_job bad)
				379	{
				380	struct drm_sched_job s_job, tmp;
				381	unsigned long flags;
				382
				383	kthread_park(sched->thread);
				384
				385	/*
				386	* Reinsert back the bad job here - now it's safe as
				387	* drm_sched_get_cleanup_job cannot race against us and release the
				388	* bad job at this point - we parked (waited for) any in progress
				389	* (earlier) cleanups and drm_sched_get_cleanup_job will not be called
				390	* now until the scheduler thread is unparked.
				391	*/
				392	if (bad && bad->sched == sched)
				393	/*
				394	* Add at the head of the queue to reflect it was the earliest
				395	* job extracted.
				396	*/
				397	list_add(&bad->node, &sched->ring_mirror_list);
				398
				399	/*
				400	* Iterate the job list from later to earlier one and either deactive
				401	* their HW callbacks or remove them from mirror list if they already
				402	* signaled.
				403	* This iteration is thread safe as sched thread is stopped.
				404	*/
				405	list_for_each_entry_safe_reverse(s_job, tmp, &sched->ring_mirror_list, node) {
				406	if (s_job->s_fence->parent &&
				407	dma_fence_remove_callback(s_job->s_fence->parent,
				408	&s_job->cb)) {
				409	atomic_dec(&sched->hw_rq_count);
				410	} else {
				411	/*
				412	* remove job from ring_mirror_list.
				413	* Locking here is for concurrent resume timeout
				414	*/
				415	spin_lock_irqsave(&sched->job_list_lock, flags);
				416	list_del_init(&s_job->node);
				417	spin_unlock_irqrestore(&sched->job_list_lock, flags);
				418
				419	/*
				420	* Wait for job's HW fence callback to finish using s_job
				421	* before releasing it.
				422	*
				423	* Job is still alive so fence refcount at least 1
				424	*/
				425	dma_fence_wait(&s_job->s_fence->finished, false);
				426
				427	/*
				428	* We must keep bad job alive for later use during
				429	* recovery by some of the drivers but leave a hint
				430	* that the guilty job must be released.
				431	*/
				432	if (bad != s_job)
				433	sched->ops->free_job(s_job);
				434	else
				435	sched->free_guilty = true;
				436	}
				437	}
				438
				439	/*
				440	* Stop pending timer in flight as we rearm it in drm_sched_start. This
				441	* avoids the pending timeout work in progress to fire right away after
				442	* this TDR finished and before the newly restarted jobs had a
				443	* chance to complete.
				444	*/
				445	cancel_delayed_work(&sched->work_tdr);
				446	}
				447
				448	EXPORT_SYMBOL(drm_sched_stop);
				449
				450	/**
				451	* drm_sched_job_recovery - recover jobs after a reset
				452	*
				453	* @sched: scheduler instance
				454	* @full_recovery: proceed with complete sched restart
				455	*
				456	*/
				457	void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery)
				458	{
				459	struct drm_sched_job s_job, tmp;
				460	unsigned long flags;
				461	int r;
				462
				463	/*
				464	* Locking the list is not required here as the sched thread is parked
				465	* so no new jobs are being inserted or removed. Also concurrent
				466	* GPU recovers can't run in parallel.
				467	*/
				468	list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
				469	struct dma_fence *fence = s_job->s_fence->parent;
				470
				471	atomic_inc(&sched->hw_rq_count);
				472
				473	if (!full_recovery)
				474	continue;
				475
				476	if (fence) {
				477	r = dma_fence_add_callback(fence, &s_job->cb,
				478	drm_sched_process_job);
				479	if (r == -ENOENT)
				480	drm_sched_process_job(fence, &s_job->cb);
				481	else if (r)
				482	DRM_ERROR("fence add callback failed (%d)\n",
				483	r);
				484	} else
				485	drm_sched_process_job(NULL, &s_job->cb);
				486	}
				487
				488	if (full_recovery) {
				489	spin_lock_irqsave(&sched->job_list_lock, flags);
				490	drm_sched_start_timeout(sched);
				491	spin_unlock_irqrestore(&sched->job_list_lock, flags);
				492	}
				493
				494	kthread_unpark(sched->thread);
				495	}
				496	EXPORT_SYMBOL(drm_sched_start);
				497
				498	/**
				499	* drm_sched_resubmit_jobs - helper to relunch job from mirror ring list
				500	*
				501	* @sched: scheduler instance
				502	*
				503	*/
				504	void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched)
				505	{
				506	struct drm_sched_job s_job, tmp;
				507	uint64_t guilty_context;
				508	bool found_guilty = false;
				509	struct dma_fence *fence;
				510
				511	list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
				512	struct drm_sched_fence *s_fence = s_job->s_fence;
				513
				514	if (!found_guilty && atomic_read(&s_job->karma) > sched->hang_limit) {
				515	found_guilty = true;
				516	guilty_context = s_job->s_fence->scheduled.context;
				517	}
				518
				519	if (found_guilty && s_job->s_fence->scheduled.context == guilty_context)
				520	dma_fence_set_error(&s_fence->finished, -ECANCELED);
				521
				522	dma_fence_put(s_job->s_fence->parent);
				523	fence = sched->ops->run_job(s_job);
				524
				525	if (IS_ERR_OR_NULL(fence)) {
				526	if (IS_ERR(fence))
				527	dma_fence_set_error(&s_fence->finished, PTR_ERR(fence));
				528
				529	s_job->s_fence->parent = NULL;
				530	} else {
				531	s_job->s_fence->parent = fence;
				532	}
				533
				534
				535	}
				536	}
				537	EXPORT_SYMBOL(drm_sched_resubmit_jobs);
				538
				539	/**
				540	* drm_sched_job_init - init a scheduler job
				541	*
				542	* @job: scheduler job to init
				543	* @entity: scheduler entity to use
				544	* @owner: job owner for debugging
				545	*
				546	* Refer to drm_sched_entity_push_job() documentation
				547	* for locking considerations.
				548	*
				549	* Returns 0 for success, negative error code otherwise.
				550	*/
				551	int drm_sched_job_init(struct drm_sched_job *job,
				552	struct drm_sched_entity *entity,
				553	void *owner)
				554	{
				555	struct drm_gpu_scheduler *sched;
				556
				557	drm_sched_entity_select_rq(entity);
				558	if (!entity->rq)
				559	return -ENOENT;
				560
				561	sched = entity->rq->sched;
				562
				563	job->sched = sched;
				564	job->entity = entity;
				565	job->s_priority = entity->rq - sched->sched_rq;
				566	job->s_fence = drm_sched_fence_create(entity, owner);
				567	if (!job->s_fence)
				568	return -ENOMEM;
				569	job->id = atomic64_inc_return(&sched->job_id_count);
				570
				571	INIT_LIST_HEAD(&job->node);
				572
				573	return 0;
				574	}
				575	EXPORT_SYMBOL(drm_sched_job_init);
				576
				577	/**
				578	* drm_sched_job_cleanup - clean up scheduler job resources
				579	*
				580	* @job: scheduler job to clean up
				581	*/
				582	void drm_sched_job_cleanup(struct drm_sched_job *job)
				583	{
				584	dma_fence_put(&job->s_fence->finished);
				585	job->s_fence = NULL;
				586	}
				587	EXPORT_SYMBOL(drm_sched_job_cleanup);
				588
				589	/**
				590	* drm_sched_ready - is the scheduler ready
				591	*
				592	* @sched: scheduler instance
				593	*
				594	* Return true if we can push more jobs to the hw, otherwise false.
				595	*/
				596	static bool drm_sched_ready(struct drm_gpu_scheduler *sched)
				597	{
				598	return atomic_read(&sched->hw_rq_count) <
				599	sched->hw_submission_limit;
				600	}
				601
				602	/**
				603	* drm_sched_wakeup - Wake up the scheduler when it is ready
				604	*
				605	* @sched: scheduler instance
				606	*
				607	*/
				608	void drm_sched_wakeup(struct drm_gpu_scheduler *sched)
				609	{
				610	if (drm_sched_ready(sched))
				611	wake_up_interruptible(&sched->wake_up_worker);
				612	}
				613
				614	/**
				615	* drm_sched_select_entity - Select next entity to process
				616	*
				617	* @sched: scheduler instance
				618	*
				619	* Returns the entity to process or NULL if none are found.
				620	*/
				621	static struct drm_sched_entity *
				622	drm_sched_select_entity(struct drm_gpu_scheduler *sched)
				623	{
				624	struct drm_sched_entity *entity;
				625	int i;
				626
				627	if (!drm_sched_ready(sched))
				628	return NULL;
				629
				630	/* Kernel run queue has higher priority than normal run queue*/
				631	for (i = DRM_SCHED_PRIORITY_MAX - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
				632	entity = drm_sched_rq_select_entity(&sched->sched_rq[i]);
				633	if (entity)
				634	break;
				635	}
				636
				637	return entity;
				638	}
				639
				640	/**
				641	* drm_sched_process_job - process a job
				642	*
				643	* @f: fence
				644	* @cb: fence callbacks
				645	*
				646	* Called after job has finished execution.
				647	*/
				648	static void drm_sched_process_job(struct dma_fence f, struct dma_fence_cb cb)
				649	{
				650	struct drm_sched_job *s_job = container_of(cb, struct drm_sched_job, cb);
				651	struct drm_sched_fence *s_fence = s_job->s_fence;
				652	struct drm_gpu_scheduler *sched = s_fence->sched;
				653
				654	atomic_dec(&sched->hw_rq_count);
				655	atomic_dec(&sched->num_jobs);
				656
				657	trace_drm_sched_process_job(s_fence);
				658
				659	dma_fence_get(&s_fence->finished);
				660	drm_sched_fence_finished(s_fence);
				661	dma_fence_put(&s_fence->finished);
				662	wake_up_interruptible(&sched->wake_up_worker);
				663	}
				664
				665	/**
				666	* drm_sched_get_cleanup_job - fetch the next finished job to be destroyed
				667	*
				668	* @sched: scheduler instance
				669	*
				670	* Returns the next finished job from the mirror list (if there is one)
				671	* ready for it to be destroyed.
				672	*/
				673	static struct drm_sched_job *
				674	drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched)
				675	{
				676	struct drm_sched_job *job;
				677	unsigned long flags;
				678
				679	/* Don't destroy jobs while the timeout worker is running */
				680	if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
				681	!cancel_delayed_work(&sched->work_tdr))
				682	return NULL;
				683
				684	spin_lock_irqsave(&sched->job_list_lock, flags);
				685
				686	job = list_first_entry_or_null(&sched->ring_mirror_list,
				687	struct drm_sched_job, node);
				688
				689	if (job && dma_fence_is_signaled(&job->s_fence->finished)) {
				690	/* remove job from ring_mirror_list */
				691	list_del_init(&job->node);
				692	} else {
				693	job = NULL;
				694	/* queue timeout for next job */
				695	drm_sched_start_timeout(sched);
				696	}
				697
				698	spin_unlock_irqrestore(&sched->job_list_lock, flags);
				699
				700	return job;
				701	}
				702
				703	/**
				704	* drm_sched_blocked - check if the scheduler is blocked
				705	*
				706	* @sched: scheduler instance
				707	*
				708	* Returns true if blocked, otherwise false.
				709	*/
				710	static bool drm_sched_blocked(struct drm_gpu_scheduler *sched)
				711	{
				712	if (kthread_should_park()) {
				713	kthread_parkme();
				714	return true;
				715	}
				716
				717	return false;
				718	}
				719
				720	/**
				721	* drm_sched_main - main scheduler thread
				722	*
				723	* @param: scheduler instance
				724	*
				725	* Returns 0.
				726	*/
				727	static int drm_sched_main(void *param)
				728	{
				729	struct sched_param sparam = {.sched_priority = 1};
				730	struct drm_gpu_scheduler sched = (struct drm_gpu_scheduler )param;
				731	int r;
				732
				733	sched_setscheduler(current, SCHED_FIFO, &sparam);
				734
				735	while (!kthread_should_stop()) {
				736	struct drm_sched_entity *entity = NULL;
				737	struct drm_sched_fence *s_fence;
				738	struct drm_sched_job *sched_job;
				739	struct dma_fence *fence;
				740	struct drm_sched_job *cleanup_job = NULL;
				741
				742	wait_event_interruptible(sched->wake_up_worker,
				743	(cleanup_job = drm_sched_get_cleanup_job(sched)) \|\|
				744	(!drm_sched_blocked(sched) &&
				745	(entity = drm_sched_select_entity(sched))) \|\|
				746	kthread_should_stop());
				747
				748	if (cleanup_job) {
				749	sched->ops->free_job(cleanup_job);
				750	/* queue timeout for next job */
				751	drm_sched_start_timeout(sched);
				752	}
				753
				754	if (!entity)
				755	continue;
				756
				757	sched_job = drm_sched_entity_pop_job(entity);
				758	if (!sched_job)
				759	continue;
				760
				761	s_fence = sched_job->s_fence;
				762
				763	atomic_inc(&sched->hw_rq_count);
				764	drm_sched_job_begin(sched_job);
				765
				766	fence = sched->ops->run_job(sched_job);
				767	drm_sched_fence_scheduled(s_fence);
				768
				769	if (!IS_ERR_OR_NULL(fence)) {
				770	s_fence->parent = dma_fence_get(fence);
				771	r = dma_fence_add_callback(fence, &sched_job->cb,
				772	drm_sched_process_job);
				773	if (r == -ENOENT)
				774	drm_sched_process_job(fence, &sched_job->cb);
				775	else if (r)
				776	DRM_ERROR("fence add callback failed (%d)\n",
				777	r);
				778	dma_fence_put(fence);
				779	} else {
				780	if (IS_ERR(fence))
				781	dma_fence_set_error(&s_fence->finished, PTR_ERR(fence));
				782
				783	drm_sched_process_job(NULL, &sched_job->cb);
				784	}
				785
				786	wake_up(&sched->job_scheduled);
				787	}
				788	return 0;
				789	}
				790
				791	/**
				792	* drm_sched_init - Init a gpu scheduler instance
				793	*
				794	* @sched: scheduler instance
				795	* @ops: backend operations for this scheduler
				796	* @hw_submission: number of hw submissions that can be in flight
				797	* @hang_limit: number of times to allow a job to hang before dropping it
				798	* @timeout: timeout value in jiffies for the scheduler
				799	* @name: name used for debugging
				800	*
				801	* Return 0 on success, otherwise error code.
				802	*/
				803	int drm_sched_init(struct drm_gpu_scheduler *sched,
				804	const struct drm_sched_backend_ops *ops,
				805	unsigned hw_submission,
				806	unsigned hang_limit,
				807	long timeout,
				808	const char *name)
				809	{
				810	int i, ret;
				811	sched->ops = ops;
				812	sched->hw_submission_limit = hw_submission;
				813	sched->name = name;
				814	sched->timeout = timeout;
				815	sched->hang_limit = hang_limit;
				816	for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_MAX; i++)
				817	drm_sched_rq_init(sched, &sched->sched_rq[i]);
				818
				819	init_waitqueue_head(&sched->wake_up_worker);
				820	init_waitqueue_head(&sched->job_scheduled);
				821	INIT_LIST_HEAD(&sched->ring_mirror_list);
				822	spin_lock_init(&sched->job_list_lock);
				823	atomic_set(&sched->hw_rq_count, 0);
				824	INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout);
				825	atomic_set(&sched->num_jobs, 0);
				826	atomic64_set(&sched->job_id_count, 0);
				827
				828	/* Each scheduler will run on a seperate kernel thread */
				829	sched->thread = kthread_run(drm_sched_main, sched, sched->name);
				830	if (IS_ERR(sched->thread)) {
				831	ret = PTR_ERR(sched->thread);
				832	sched->thread = NULL;
				833	DRM_ERROR("Failed to create scheduler for %s.\n", name);
				834	return ret;
				835	}
				836
				837	sched->ready = true;
				838	return 0;
				839	}
				840	EXPORT_SYMBOL(drm_sched_init);
				841
				842	/**
				843	* drm_sched_fini - Destroy a gpu scheduler
				844	*
				845	* @sched: scheduler instance
				846	*
				847	* Tears down and cleans up the scheduler.
				848	*/
				849	void drm_sched_fini(struct drm_gpu_scheduler *sched)
				850	{
				851	if (sched->thread)
				852	kthread_stop(sched->thread);
				853
				854	/* Confirm no work left behind accessing device structures */
				855	cancel_delayed_work_sync(&sched->work_tdr);
				856
				857	sched->ready = false;
				858	}
				859	EXPORT_SYMBOL(drm_sched_fini);