Blame - marvell/linux/drivers/gpu/drm/lima/lima_sched.c - T108

blob: 4127cacac4542b90e70f5b5bd73293d5b9a67323 [file] [log] [blame]

b.liu	e958203	2025-04-17 19:18:16 +0800	[diff] [blame^]	1	// SPDX-License-Identifier: GPL-2.0 OR MIT
				2	/* Copyright 2017-2019 Qiang Yu <yuq825@gmail.com> */
				3
				4	#include <linux/kthread.h>
				5	#include <linux/slab.h>
				6	#include <linux/xarray.h>
				7
				8	#include "lima_drv.h"
				9	#include "lima_sched.h"
				10	#include "lima_vm.h"
				11	#include "lima_mmu.h"
				12	#include "lima_l2_cache.h"
				13	#include "lima_object.h"
				14
				15	struct lima_fence {
				16	struct dma_fence base;
				17	struct lima_sched_pipe *pipe;
				18	};
				19
				20	static struct kmem_cache *lima_fence_slab;
				21	static int lima_fence_slab_refcnt;
				22
				23	int lima_sched_slab_init(void)
				24	{
				25	if (!lima_fence_slab) {
				26	lima_fence_slab = kmem_cache_create(
				27	"lima_fence", sizeof(struct lima_fence), 0,
				28	SLAB_HWCACHE_ALIGN, NULL);
				29	if (!lima_fence_slab)
				30	return -ENOMEM;
				31	}
				32
				33	lima_fence_slab_refcnt++;
				34	return 0;
				35	}
				36
				37	void lima_sched_slab_fini(void)
				38	{
				39	if (!--lima_fence_slab_refcnt) {
				40	kmem_cache_destroy(lima_fence_slab);
				41	lima_fence_slab = NULL;
				42	}
				43	}
				44
				45	static inline struct lima_fence to_lima_fence(struct dma_fence fence)
				46	{
				47	return container_of(fence, struct lima_fence, base);
				48	}
				49
				50	static const char lima_fence_get_driver_name(struct dma_fence fence)
				51	{
				52	return "lima";
				53	}
				54
				55	static const char lima_fence_get_timeline_name(struct dma_fence fence)
				56	{
				57	struct lima_fence *f = to_lima_fence(fence);
				58
				59	return f->pipe->base.name;
				60	}
				61
				62	static void lima_fence_release_rcu(struct rcu_head *rcu)
				63	{
				64	struct dma_fence *f = container_of(rcu, struct dma_fence, rcu);
				65	struct lima_fence *fence = to_lima_fence(f);
				66
				67	kmem_cache_free(lima_fence_slab, fence);
				68	}
				69
				70	static void lima_fence_release(struct dma_fence *fence)
				71	{
				72	struct lima_fence *f = to_lima_fence(fence);
				73
				74	call_rcu(&f->base.rcu, lima_fence_release_rcu);
				75	}
				76
				77	static const struct dma_fence_ops lima_fence_ops = {
				78	.get_driver_name = lima_fence_get_driver_name,
				79	.get_timeline_name = lima_fence_get_timeline_name,
				80	.release = lima_fence_release,
				81	};
				82
				83	static struct lima_fence lima_fence_create(struct lima_sched_pipe pipe)
				84	{
				85	struct lima_fence *fence;
				86
				87	fence = kmem_cache_zalloc(lima_fence_slab, GFP_KERNEL);
				88	if (!fence)
				89	return NULL;
				90
				91	fence->pipe = pipe;
				92	dma_fence_init(&fence->base, &lima_fence_ops, &pipe->fence_lock,
				93	pipe->fence_context, ++pipe->fence_seqno);
				94
				95	return fence;
				96	}
				97
				98	static inline struct lima_sched_task to_lima_task(struct drm_sched_job job)
				99	{
				100	return container_of(job, struct lima_sched_task, base);
				101	}
				102
				103	static inline struct lima_sched_pipe to_lima_pipe(struct drm_gpu_scheduler sched)
				104	{
				105	return container_of(sched, struct lima_sched_pipe, base);
				106	}
				107
				108	int lima_sched_task_init(struct lima_sched_task *task,
				109	struct lima_sched_context *context,
				110	struct lima_bo **bos, int num_bos,
				111	struct lima_vm *vm)
				112	{
				113	int err, i;
				114
				115	task->bos = kmemdup(bos, sizeof(bos) num_bos, GFP_KERNEL);
				116	if (!task->bos)
				117	return -ENOMEM;
				118
				119	for (i = 0; i < num_bos; i++)
				120	drm_gem_object_get(&bos[i]->gem);
				121
				122	err = drm_sched_job_init(&task->base, &context->base, vm);
				123	if (err) {
				124	kfree(task->bos);
				125	return err;
				126	}
				127
				128	task->num_bos = num_bos;
				129	task->vm = lima_vm_get(vm);
				130
				131	xa_init_flags(&task->deps, XA_FLAGS_ALLOC);
				132
				133	return 0;
				134	}
				135
				136	void lima_sched_task_fini(struct lima_sched_task *task)
				137	{
				138	struct dma_fence *fence;
				139	unsigned long index;
				140	int i;
				141
				142	drm_sched_job_cleanup(&task->base);
				143
				144	xa_for_each(&task->deps, index, fence) {
				145	dma_fence_put(fence);
				146	}
				147	xa_destroy(&task->deps);
				148
				149	if (task->bos) {
				150	for (i = 0; i < task->num_bos; i++)
				151	drm_gem_object_put_unlocked(&task->bos[i]->gem);
				152	kfree(task->bos);
				153	}
				154
				155	lima_vm_put(task->vm);
				156	}
				157
				158	int lima_sched_context_init(struct lima_sched_pipe *pipe,
				159	struct lima_sched_context *context,
				160	atomic_t *guilty)
				161	{
				162	struct drm_sched_rq *rq = pipe->base.sched_rq + DRM_SCHED_PRIORITY_NORMAL;
				163
				164	return drm_sched_entity_init(&context->base, &rq, 1, guilty);
				165	}
				166
				167	void lima_sched_context_fini(struct lima_sched_pipe *pipe,
				168	struct lima_sched_context *context)
				169	{
				170	drm_sched_entity_fini(&context->base);
				171	}
				172
				173	struct dma_fence lima_sched_context_queue_task(struct lima_sched_context context,
				174	struct lima_sched_task *task)
				175	{
				176	struct dma_fence *fence = dma_fence_get(&task->base.s_fence->finished);
				177
				178	drm_sched_entity_push_job(&task->base, &context->base);
				179	return fence;
				180	}
				181
				182	static struct dma_fence lima_sched_dependency(struct drm_sched_job job,
				183	struct drm_sched_entity *entity)
				184	{
				185	struct lima_sched_task *task = to_lima_task(job);
				186
				187	if (!xa_empty(&task->deps))
				188	return xa_erase(&task->deps, task->last_dep++);
				189
				190	return NULL;
				191	}
				192
				193	static struct dma_fence lima_sched_run_job(struct drm_sched_job job)
				194	{
				195	struct lima_sched_task *task = to_lima_task(job);
				196	struct lima_sched_pipe *pipe = to_lima_pipe(job->sched);
				197	struct lima_fence *fence;
				198	struct dma_fence *ret;
				199	struct lima_vm vm = NULL, last_vm = NULL;
				200	int i;
				201
				202	/* after GPU reset */
				203	if (job->s_fence->finished.error < 0)
				204	return NULL;
				205
				206	fence = lima_fence_create(pipe);
				207	if (!fence)
				208	return NULL;
				209	task->fence = &fence->base;
				210
				211	/* for caller usage of the fence, otherwise irq handler
				212	* may consume the fence before caller use it
				213	*/
				214	ret = dma_fence_get(task->fence);
				215
				216	pipe->current_task = task;
				217
				218	/* this is needed for MMU to work correctly, otherwise GP/PP
				219	* will hang or page fault for unknown reason after running for
				220	* a while.
				221	*
				222	* Need to investigate:
				223	* 1. is it related to TLB
				224	* 2. how much performance will be affected by L2 cache flush
				225	* 3. can we reduce the calling of this function because all
				226	* GP/PP use the same L2 cache on mali400
				227	*
				228	* TODO:
				229	* 1. move this to task fini to save some wait time?
				230	* 2. when GP/PP use different l2 cache, need PP wait GP l2
				231	* cache flush?
				232	*/
				233	for (i = 0; i < pipe->num_l2_cache; i++)
				234	lima_l2_cache_flush(pipe->l2_cache[i]);
				235
				236	if (task->vm != pipe->current_vm) {
				237	vm = lima_vm_get(task->vm);
				238	last_vm = pipe->current_vm;
				239	pipe->current_vm = task->vm;
				240	}
				241
				242	if (pipe->bcast_mmu)
				243	lima_mmu_switch_vm(pipe->bcast_mmu, vm);
				244	else {
				245	for (i = 0; i < pipe->num_mmu; i++)
				246	lima_mmu_switch_vm(pipe->mmu[i], vm);
				247	}
				248
				249	if (last_vm)
				250	lima_vm_put(last_vm);
				251
				252	pipe->error = false;
				253	pipe->task_run(pipe, task);
				254
				255	return task->fence;
				256	}
				257
				258	static void lima_sched_handle_error_task(struct lima_sched_pipe *pipe,
				259	struct lima_sched_task *task)
				260	{
				261	drm_sched_stop(&pipe->base, &task->base);
				262
				263	if (task)
				264	drm_sched_increase_karma(&task->base);
				265
				266	pipe->task_error(pipe);
				267
				268	if (pipe->bcast_mmu)
				269	lima_mmu_page_fault_resume(pipe->bcast_mmu);
				270	else {
				271	int i;
				272
				273	for (i = 0; i < pipe->num_mmu; i++)
				274	lima_mmu_page_fault_resume(pipe->mmu[i]);
				275	}
				276
				277	if (pipe->current_vm)
				278	lima_vm_put(pipe->current_vm);
				279
				280	pipe->current_vm = NULL;
				281	pipe->current_task = NULL;
				282
				283	drm_sched_resubmit_jobs(&pipe->base);
				284	drm_sched_start(&pipe->base, true);
				285	}
				286
				287	static void lima_sched_timedout_job(struct drm_sched_job *job)
				288	{
				289	struct lima_sched_pipe *pipe = to_lima_pipe(job->sched);
				290	struct lima_sched_task *task = to_lima_task(job);
				291
				292	DRM_ERROR("lima job timeout\n");
				293
				294	lima_sched_handle_error_task(pipe, task);
				295	}
				296
				297	static void lima_sched_free_job(struct drm_sched_job *job)
				298	{
				299	struct lima_sched_task *task = to_lima_task(job);
				300	struct lima_sched_pipe *pipe = to_lima_pipe(job->sched);
				301	struct lima_vm *vm = task->vm;
				302	struct lima_bo **bos = task->bos;
				303	int i;
				304
				305	dma_fence_put(task->fence);
				306
				307	for (i = 0; i < task->num_bos; i++)
				308	lima_vm_bo_del(vm, bos[i]);
				309
				310	lima_sched_task_fini(task);
				311	kmem_cache_free(pipe->task_slab, task);
				312	}
				313
				314	static const struct drm_sched_backend_ops lima_sched_ops = {
				315	.dependency = lima_sched_dependency,
				316	.run_job = lima_sched_run_job,
				317	.timedout_job = lima_sched_timedout_job,
				318	.free_job = lima_sched_free_job,
				319	};
				320
				321	static void lima_sched_error_work(struct work_struct *work)
				322	{
				323	struct lima_sched_pipe *pipe =
				324	container_of(work, struct lima_sched_pipe, error_work);
				325	struct lima_sched_task *task = pipe->current_task;
				326
				327	lima_sched_handle_error_task(pipe, task);
				328	}
				329
				330	int lima_sched_pipe_init(struct lima_sched_pipe pipe, const char name)
				331	{
				332	unsigned int timeout = lima_sched_timeout_ms > 0 ?
				333	lima_sched_timeout_ms : 500;
				334
				335	pipe->fence_context = dma_fence_context_alloc(1);
				336	spin_lock_init(&pipe->fence_lock);
				337
				338	INIT_WORK(&pipe->error_work, lima_sched_error_work);
				339
				340	return drm_sched_init(&pipe->base, &lima_sched_ops, 1, 0,
				341	msecs_to_jiffies(timeout), name);
				342	}
				343
				344	void lima_sched_pipe_fini(struct lima_sched_pipe *pipe)
				345	{
				346	drm_sched_fini(&pipe->base);
				347	}
				348
				349	void lima_sched_pipe_task_done(struct lima_sched_pipe *pipe)
				350	{
				351	if (pipe->error)
				352	schedule_work(&pipe->error_work);
				353	else {
				354	struct lima_sched_task *task = pipe->current_task;
				355
				356	pipe->task_fini(pipe);
				357	dma_fence_signal(task->fence);
				358	}
				359	}