Blame - marvell/linux/drivers/gpu/drm/scheduler/sched_entity.c - T108

blob: 57f9baad9e36f977b4ae5ce18c13161f3802f095 [file] [log] [blame]

b.liu	e958203	2025-04-17 19:18:16 +0800	[diff] [blame^]	1	/*
				2	* Copyright 2015 Advanced Micro Devices, Inc.
				3	*
				4	* Permission is hereby granted, free of charge, to any person obtaining a
				5	* copy of this software and associated documentation files (the "Software"),
				6	* to deal in the Software without restriction, including without limitation
				7	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
				8	* and/or sell copies of the Software, and to permit persons to whom the
				9	* Software is furnished to do so, subject to the following conditions:
				10	*
				11	* The above copyright notice and this permission notice shall be included in
				12	* all copies or substantial portions of the Software.
				13	*
				14	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				15	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				16	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
				17	* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
				18	* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
				19	* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
				20	* OTHER DEALINGS IN THE SOFTWARE.
				21	*
				22	*/
				23
				24	#include <linux/kthread.h>
				25	#include <linux/slab.h>
				26
				27	#include <drm/drm_print.h>
				28	#include <drm/gpu_scheduler.h>
				29
				30	#include "gpu_scheduler_trace.h"
				31
				32	#define to_drm_sched_job(sched_job) \
				33	container_of((sched_job), struct drm_sched_job, queue_node)
				34
				35	/**
				36	* drm_sched_entity_init - Init a context entity used by scheduler when
				37	* submit to HW ring.
				38	*
				39	* @entity: scheduler entity to init
				40	* @rq_list: the list of run queue on which jobs from this
				41	* entity can be submitted
				42	* @num_rq_list: number of run queue in rq_list
				43	* @guilty: atomic_t set to 1 when a job on this queue
				44	* is found to be guilty causing a timeout
				45	*
				46	* Note: the rq_list should have atleast one element to schedule
				47	* the entity
				48	*
				49	* Returns 0 on success or a negative error code on failure.
				50	*/
				51	int drm_sched_entity_init(struct drm_sched_entity *entity,
				52	struct drm_sched_rq **rq_list,
				53	unsigned int num_rq_list,
				54	atomic_t *guilty)
				55	{
				56	int i;
				57
				58	if (!(entity && rq_list && (num_rq_list == 0 \|\| rq_list[0])))
				59	return -EINVAL;
				60
				61	memset(entity, 0, sizeof(struct drm_sched_entity));
				62	INIT_LIST_HEAD(&entity->list);
				63	entity->rq = NULL;
				64	entity->guilty = guilty;
				65	entity->num_rq_list = num_rq_list;
				66	entity->rq_list = kcalloc(num_rq_list, sizeof(struct drm_sched_rq *),
				67	GFP_KERNEL);
				68	if (!entity->rq_list)
				69	return -ENOMEM;
				70
				71	for (i = 0; i < num_rq_list; ++i)
				72	entity->rq_list[i] = rq_list[i];
				73
				74	if (num_rq_list)
				75	entity->rq = rq_list[0];
				76
				77	entity->last_scheduled = NULL;
				78
				79	spin_lock_init(&entity->rq_lock);
				80	spsc_queue_init(&entity->job_queue);
				81
				82	atomic_set(&entity->fence_seq, 0);
				83	entity->fence_context = dma_fence_context_alloc(2);
				84
				85	return 0;
				86	}
				87	EXPORT_SYMBOL(drm_sched_entity_init);
				88
				89	/**
				90	* drm_sched_entity_is_idle - Check if entity is idle
				91	*
				92	* @entity: scheduler entity
				93	*
				94	* Returns true if the entity does not have any unscheduled jobs.
				95	*/
				96	static bool drm_sched_entity_is_idle(struct drm_sched_entity *entity)
				97	{
				98	rmb(); /* for list_empty to work without lock */
				99
				100	if (list_empty(&entity->list) \|\|
				101	spsc_queue_count(&entity->job_queue) == 0)
				102	return true;
				103
				104	return false;
				105	}
				106
				107	/**
				108	* drm_sched_entity_is_ready - Check if entity is ready
				109	*
				110	* @entity: scheduler entity
				111	*
				112	* Return true if entity could provide a job.
				113	*/
				114	bool drm_sched_entity_is_ready(struct drm_sched_entity *entity)
				115	{
				116	if (spsc_queue_peek(&entity->job_queue) == NULL)
				117	return false;
				118
				119	if (READ_ONCE(entity->dependency))
				120	return false;
				121
				122	return true;
				123	}
				124
				125	/**
				126	* drm_sched_entity_get_free_sched - Get the rq from rq_list with least load
				127	*
				128	* @entity: scheduler entity
				129	*
				130	* Return the pointer to the rq with least load.
				131	*/
				132	static struct drm_sched_rq *
				133	drm_sched_entity_get_free_sched(struct drm_sched_entity *entity)
				134	{
				135	struct drm_sched_rq *rq = NULL;
				136	unsigned int min_jobs = UINT_MAX, num_jobs;
				137	int i;
				138
				139	for (i = 0; i < entity->num_rq_list; ++i) {
				140	struct drm_gpu_scheduler *sched = entity->rq_list[i]->sched;
				141
				142	if (!entity->rq_list[i]->sched->ready) {
				143	DRM_WARN("sched%s is not ready, skipping", sched->name);
				144	continue;
				145	}
				146
				147	num_jobs = atomic_read(&sched->num_jobs);
				148	if (num_jobs < min_jobs) {
				149	min_jobs = num_jobs;
				150	rq = entity->rq_list[i];
				151	}
				152	}
				153
				154	return rq;
				155	}
				156
				157	/**
				158	* drm_sched_entity_flush - Flush a context entity
				159	*
				160	* @entity: scheduler entity
				161	* @timeout: time to wait in for Q to become empty in jiffies.
				162	*
				163	* Splitting drm_sched_entity_fini() into two functions, The first one does the
				164	* waiting, removes the entity from the runqueue and returns an error when the
				165	* process was killed.
				166	*
				167	* Returns the remaining time in jiffies left from the input timeout
				168	*/
				169	long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout)
				170	{
				171	struct drm_gpu_scheduler *sched;
				172	struct task_struct *last_user;
				173	long ret = timeout;
				174
				175	if (!entity->rq)
				176	return 0;
				177
				178	sched = entity->rq->sched;
				179	/**
				180	* The client will not queue more IBs during this fini, consume existing
				181	* queued IBs or discard them on SIGKILL
				182	*/
				183	if (current->flags & PF_EXITING) {
				184	if (timeout)
				185	ret = wait_event_timeout(
				186	sched->job_scheduled,
				187	drm_sched_entity_is_idle(entity),
				188	timeout);
				189	} else {
				190	wait_event_killable(sched->job_scheduled,
				191	drm_sched_entity_is_idle(entity));
				192	}
				193
				194	/* For killed process disable any more IBs enqueue right now */
				195	last_user = cmpxchg(&entity->last_user, current->group_leader, NULL);
				196	if ((!last_user \|\| last_user == current->group_leader) &&
				197	(current->flags & PF_EXITING) && (current->exit_code == SIGKILL)) {
				198	spin_lock(&entity->rq_lock);
				199	entity->stopped = true;
				200	drm_sched_rq_remove_entity(entity->rq, entity);
				201	spin_unlock(&entity->rq_lock);
				202	}
				203
				204	return ret;
				205	}
				206	EXPORT_SYMBOL(drm_sched_entity_flush);
				207
				208	/**
				209	* drm_sched_entity_kill_jobs - helper for drm_sched_entity_kill_jobs
				210	*
				211	* @f: signaled fence
				212	* @cb: our callback structure
				213	*
				214	* Signal the scheduler finished fence when the entity in question is killed.
				215	*/
				216	static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f,
				217	struct dma_fence_cb *cb)
				218	{
				219	struct drm_sched_job *job = container_of(cb, struct drm_sched_job,
				220	finish_cb);
				221
				222	drm_sched_fence_finished(job->s_fence);
				223	WARN_ON(job->s_fence->parent);
				224	job->sched->ops->free_job(job);
				225	}
				226
				227	/**
				228	* drm_sched_entity_kill_jobs - Make sure all remaining jobs are killed
				229	*
				230	* @entity: entity which is cleaned up
				231	*
				232	* Makes sure that all remaining jobs in an entity are killed before it is
				233	* destroyed.
				234	*/
				235	static void drm_sched_entity_kill_jobs(struct drm_sched_entity *entity)
				236	{
				237	struct drm_sched_job *job;
				238	struct dma_fence *f;
				239	int r;
				240
				241	while ((job = to_drm_sched_job(spsc_queue_pop(&entity->job_queue)))) {
				242	struct drm_sched_fence *s_fence = job->s_fence;
				243
				244	/* Wait for all dependencies to avoid data corruptions */
				245	while ((f = job->sched->ops->dependency(job, entity)))
				246	dma_fence_wait(f, false);
				247
				248	drm_sched_fence_scheduled(s_fence);
				249	dma_fence_set_error(&s_fence->finished, -ESRCH);
				250
				251	/*
				252	* When pipe is hanged by older entity, new entity might
				253	* not even have chance to submit it's first job to HW
				254	* and so entity->last_scheduled will remain NULL
				255	*/
				256	if (!entity->last_scheduled) {
				257	drm_sched_entity_kill_jobs_cb(NULL, &job->finish_cb);
				258	continue;
				259	}
				260
				261	r = dma_fence_add_callback(entity->last_scheduled,
				262	&job->finish_cb,
				263	drm_sched_entity_kill_jobs_cb);
				264	if (r == -ENOENT)
				265	drm_sched_entity_kill_jobs_cb(NULL, &job->finish_cb);
				266	else if (r)
				267	DRM_ERROR("fence add callback failed (%d)\n", r);
				268	}
				269	}
				270
				271	/**
				272	* drm_sched_entity_cleanup - Destroy a context entity
				273	*
				274	* @entity: scheduler entity
				275	*
				276	* This should be called after @drm_sched_entity_do_release. It goes over the
				277	* entity and signals all jobs with an error code if the process was killed.
				278	*
				279	*/
				280	void drm_sched_entity_fini(struct drm_sched_entity *entity)
				281	{
				282	struct drm_gpu_scheduler *sched = NULL;
				283
				284	if (entity->rq) {
				285	sched = entity->rq->sched;
				286	drm_sched_rq_remove_entity(entity->rq, entity);
				287	}
				288
				289	/* Consumption of existing IBs wasn't completed. Forcefully
				290	* remove them here.
				291	*/
				292	if (spsc_queue_count(&entity->job_queue)) {
				293	if (sched) {
				294	/* Park the kernel for a moment to make sure it isn't processing
				295	* our enity.
				296	*/
				297	kthread_park(sched->thread);
				298	kthread_unpark(sched->thread);
				299	}
				300	if (entity->dependency) {
				301	dma_fence_remove_callback(entity->dependency,
				302	&entity->cb);
				303	dma_fence_put(entity->dependency);
				304	entity->dependency = NULL;
				305	}
				306
				307	drm_sched_entity_kill_jobs(entity);
				308	}
				309
				310	dma_fence_put(entity->last_scheduled);
				311	entity->last_scheduled = NULL;
				312	kfree(entity->rq_list);
				313	}
				314	EXPORT_SYMBOL(drm_sched_entity_fini);
				315
				316	/**
				317	* drm_sched_entity_fini - Destroy a context entity
				318	*
				319	* @entity: scheduler entity
				320	*
				321	* Calls drm_sched_entity_do_release() and drm_sched_entity_cleanup()
				322	*/
				323	void drm_sched_entity_destroy(struct drm_sched_entity *entity)
				324	{
				325	drm_sched_entity_flush(entity, MAX_WAIT_SCHED_ENTITY_Q_EMPTY);
				326	drm_sched_entity_fini(entity);
				327	}
				328	EXPORT_SYMBOL(drm_sched_entity_destroy);
				329
				330	/**
				331	* drm_sched_entity_clear_dep - callback to clear the entities dependency
				332	*/
				333	static void drm_sched_entity_clear_dep(struct dma_fence *f,
				334	struct dma_fence_cb *cb)
				335	{
				336	struct drm_sched_entity *entity =
				337	container_of(cb, struct drm_sched_entity, cb);
				338
				339	entity->dependency = NULL;
				340	dma_fence_put(f);
				341	}
				342
				343	/**
				344	* drm_sched_entity_clear_dep - callback to clear the entities dependency and
				345	* wake up scheduler
				346	*/
				347	static void drm_sched_entity_wakeup(struct dma_fence *f,
				348	struct dma_fence_cb *cb)
				349	{
				350	struct drm_sched_entity *entity =
				351	container_of(cb, struct drm_sched_entity, cb);
				352
				353	drm_sched_entity_clear_dep(f, cb);
				354	drm_sched_wakeup(entity->rq->sched);
				355	}
				356
				357	/**
				358	* drm_sched_entity_set_rq_priority - helper for drm_sched_entity_set_priority
				359	*/
				360	static void drm_sched_entity_set_rq_priority(struct drm_sched_rq **rq,
				361	enum drm_sched_priority priority)
				362	{
				363	rq = &(rq)->sched->sched_rq[priority];
				364	}
				365
				366	/**
				367	* drm_sched_entity_set_priority - Sets priority of the entity
				368	*
				369	* @entity: scheduler entity
				370	* @priority: scheduler priority
				371	*
				372	* Update the priority of runqueus used for the entity.
				373	*/
				374	void drm_sched_entity_set_priority(struct drm_sched_entity *entity,
				375	enum drm_sched_priority priority)
				376	{
				377	unsigned int i;
				378
				379	spin_lock(&entity->rq_lock);
				380
				381	for (i = 0; i < entity->num_rq_list; ++i)
				382	drm_sched_entity_set_rq_priority(&entity->rq_list[i], priority);
				383
				384	if (entity->rq) {
				385	drm_sched_rq_remove_entity(entity->rq, entity);
				386	drm_sched_entity_set_rq_priority(&entity->rq, priority);
				387	drm_sched_rq_add_entity(entity->rq, entity);
				388	}
				389
				390	spin_unlock(&entity->rq_lock);
				391	}
				392	EXPORT_SYMBOL(drm_sched_entity_set_priority);
				393
				394	/**
				395	* drm_sched_entity_add_dependency_cb - add callback for the entities dependency
				396	*
				397	* @entity: entity with dependency
				398	*
				399	* Add a callback to the current dependency of the entity to wake up the
				400	* scheduler when the entity becomes available.
				401	*/
				402	static bool drm_sched_entity_add_dependency_cb(struct drm_sched_entity *entity)
				403	{
				404	struct drm_gpu_scheduler *sched = entity->rq->sched;
				405	struct dma_fence *fence = entity->dependency;
				406	struct drm_sched_fence *s_fence;
				407
				408	if (fence->context == entity->fence_context \|\|
				409	fence->context == entity->fence_context + 1) {
				410	/*
				411	* Fence is a scheduled/finished fence from a job
				412	* which belongs to the same entity, we can ignore
				413	* fences from ourself
				414	*/
				415	dma_fence_put(entity->dependency);
				416	return false;
				417	}
				418
				419	s_fence = to_drm_sched_fence(fence);
				420	if (s_fence && s_fence->sched == sched) {
				421
				422	/*
				423	* Fence is from the same scheduler, only need to wait for
				424	* it to be scheduled
				425	*/
				426	fence = dma_fence_get(&s_fence->scheduled);
				427	dma_fence_put(entity->dependency);
				428	entity->dependency = fence;
				429	if (!dma_fence_add_callback(fence, &entity->cb,
				430	drm_sched_entity_clear_dep))
				431	return true;
				432
				433	/* Ignore it when it is already scheduled */
				434	dma_fence_put(fence);
				435	return false;
				436	}
				437
				438	if (!dma_fence_add_callback(entity->dependency, &entity->cb,
				439	drm_sched_entity_wakeup))
				440	return true;
				441
				442	dma_fence_put(entity->dependency);
				443	return false;
				444	}
				445
				446	/**
				447	* drm_sched_entity_pop_job - get a ready to be scheduled job from the entity
				448	*
				449	* @entity: entity to get the job from
				450	*
				451	* Process all dependencies and try to get one job from the entities queue.
				452	*/
				453	struct drm_sched_job drm_sched_entity_pop_job(struct drm_sched_entity entity)
				454	{
				455	struct drm_gpu_scheduler *sched = entity->rq->sched;
				456	struct drm_sched_job *sched_job;
				457
				458	sched_job = to_drm_sched_job(spsc_queue_peek(&entity->job_queue));
				459	if (!sched_job)
				460	return NULL;
				461
				462	while ((entity->dependency =
				463	sched->ops->dependency(sched_job, entity))) {
				464	trace_drm_sched_job_wait_dep(sched_job, entity->dependency);
				465
				466	if (drm_sched_entity_add_dependency_cb(entity))
				467	return NULL;
				468	}
				469
				470	/* skip jobs from entity that marked guilty */
				471	if (entity->guilty && atomic_read(entity->guilty))
				472	dma_fence_set_error(&sched_job->s_fence->finished, -ECANCELED);
				473
				474	dma_fence_put(entity->last_scheduled);
				475	entity->last_scheduled = dma_fence_get(&sched_job->s_fence->finished);
				476
				477	spsc_queue_pop(&entity->job_queue);
				478	return sched_job;
				479	}
				480
				481	/**
				482	* drm_sched_entity_select_rq - select a new rq for the entity
				483	*
				484	* @entity: scheduler entity
				485	*
				486	* Check all prerequisites and select a new rq for the entity for load
				487	* balancing.
				488	*/
				489	void drm_sched_entity_select_rq(struct drm_sched_entity *entity)
				490	{
				491	struct dma_fence *fence;
				492	struct drm_sched_rq *rq;
				493
				494	if (spsc_queue_count(&entity->job_queue) \|\| entity->num_rq_list <= 1)
				495	return;
				496
				497	fence = READ_ONCE(entity->last_scheduled);
				498	if (fence && !dma_fence_is_signaled(fence))
				499	return;
				500
				501	rq = drm_sched_entity_get_free_sched(entity);
				502	if (rq == entity->rq)
				503	return;
				504
				505	spin_lock(&entity->rq_lock);
				506	drm_sched_rq_remove_entity(entity->rq, entity);
				507	entity->rq = rq;
				508	spin_unlock(&entity->rq_lock);
				509	}
				510
				511	/**
				512	* drm_sched_entity_push_job - Submit a job to the entity's job queue
				513	*
				514	* @sched_job: job to submit
				515	* @entity: scheduler entity
				516	*
				517	* Note: To guarantee that the order of insertion to queue matches
				518	* the job's fence sequence number this function should be
				519	* called with drm_sched_job_init under common lock.
				520	*
				521	* Returns 0 for success, negative error code otherwise.
				522	*/
				523	void drm_sched_entity_push_job(struct drm_sched_job *sched_job,
				524	struct drm_sched_entity *entity)
				525	{
				526	bool first;
				527
				528	trace_drm_sched_job(sched_job, entity);
				529	atomic_inc(&entity->rq->sched->num_jobs);
				530	WRITE_ONCE(entity->last_user, current->group_leader);
				531	first = spsc_queue_push(&entity->job_queue, &sched_job->queue_node);
				532
				533	/* first job wakes up scheduler */
				534	if (first) {
				535	/* Add the entity to the run queue */
				536	spin_lock(&entity->rq_lock);
				537	if (entity->stopped) {
				538	spin_unlock(&entity->rq_lock);
				539
				540	DRM_ERROR("Trying to push to a killed entity\n");
				541	return;
				542	}
				543	drm_sched_rq_add_entity(entity->rq, entity);
				544	spin_unlock(&entity->rq_lock);
				545	drm_sched_wakeup(entity->rq->sched);
				546	}
				547	}
				548	EXPORT_SYMBOL(drm_sched_entity_push_job);