Blame - marvell/linux/drivers/gpu/drm/msm/msm_gpu.c - T108

blob: edd45f434ccd6ae5ddc15952db42c1fb2d60fdf1 [file] [log] [blame]

b.liu	e958203	2025-04-17 19:18:16 +0800	[diff] [blame^]	1	// SPDX-License-Identifier: GPL-2.0-only
				2	/*
				3	* Copyright (C) 2013 Red Hat
				4	* Author: Rob Clark <robdclark@gmail.com>
				5	*/
				6
				7	#include "msm_gpu.h"
				8	#include "msm_gem.h"
				9	#include "msm_mmu.h"
				10	#include "msm_fence.h"
				11	#include "msm_gpu_trace.h"
				12	#include "adreno/adreno_gpu.h"
				13
				14	#include <generated/utsrelease.h>
				15	#include <linux/string_helpers.h>
				16	#include <linux/pm_opp.h>
				17	#include <linux/devfreq.h>
				18	#include <linux/devcoredump.h>
				19	#include <linux/sched/task.h>
				20
				21	/*
				22	* Power Management:
				23	*/
				24
				25	static int msm_devfreq_target(struct device dev, unsigned long freq,
				26	u32 flags)
				27	{
				28	struct msm_gpu *gpu = platform_get_drvdata(to_platform_device(dev));
				29	struct dev_pm_opp *opp;
				30
				31	opp = devfreq_recommended_opp(dev, freq, flags);
				32
				33	if (IS_ERR(opp))
				34	return PTR_ERR(opp);
				35
				36	if (gpu->funcs->gpu_set_freq)
				37	gpu->funcs->gpu_set_freq(gpu, (u64)*freq);
				38	else
				39	clk_set_rate(gpu->core_clk, *freq);
				40
				41	dev_pm_opp_put(opp);
				42
				43	return 0;
				44	}
				45
				46	static int msm_devfreq_get_dev_status(struct device *dev,
				47	struct devfreq_dev_status *status)
				48	{
				49	struct msm_gpu *gpu = platform_get_drvdata(to_platform_device(dev));
				50	ktime_t time;
				51
				52	if (gpu->funcs->gpu_get_freq)
				53	status->current_frequency = gpu->funcs->gpu_get_freq(gpu);
				54	else
				55	status->current_frequency = clk_get_rate(gpu->core_clk);
				56
				57	status->busy_time = gpu->funcs->gpu_busy(gpu);
				58
				59	time = ktime_get();
				60	status->total_time = ktime_us_delta(time, gpu->devfreq.time);
				61	gpu->devfreq.time = time;
				62
				63	return 0;
				64	}
				65
				66	static int msm_devfreq_get_cur_freq(struct device dev, unsigned long freq)
				67	{
				68	struct msm_gpu *gpu = platform_get_drvdata(to_platform_device(dev));
				69
				70	if (gpu->funcs->gpu_get_freq)
				71	*freq = gpu->funcs->gpu_get_freq(gpu);
				72	else
				73	*freq = clk_get_rate(gpu->core_clk);
				74
				75	return 0;
				76	}
				77
				78	static struct devfreq_dev_profile msm_devfreq_profile = {
				79	.polling_ms = 10,
				80	.target = msm_devfreq_target,
				81	.get_dev_status = msm_devfreq_get_dev_status,
				82	.get_cur_freq = msm_devfreq_get_cur_freq,
				83	};
				84
				85	static void msm_devfreq_init(struct msm_gpu *gpu)
				86	{
				87	/* We need target support to do devfreq */
				88	if (!gpu->funcs->gpu_busy)
				89	return;
				90
				91	msm_devfreq_profile.initial_freq = gpu->fast_rate;
				92
				93	/*
				94	* Don't set the freq_table or max_state and let devfreq build the table
				95	* from OPP
				96	*/
				97
				98	gpu->devfreq.devfreq = devm_devfreq_add_device(&gpu->pdev->dev,
				99	&msm_devfreq_profile, DEVFREQ_GOV_SIMPLE_ONDEMAND,
				100	NULL);
				101
				102	if (IS_ERR(gpu->devfreq.devfreq)) {
				103	DRM_DEV_ERROR(&gpu->pdev->dev, "Couldn't initialize GPU devfreq\n");
				104	gpu->devfreq.devfreq = NULL;
				105	}
				106
				107	devfreq_suspend_device(gpu->devfreq.devfreq);
				108	}
				109
				110	static int enable_pwrrail(struct msm_gpu *gpu)
				111	{
				112	struct drm_device *dev = gpu->dev;
				113	int ret = 0;
				114
				115	if (gpu->gpu_reg) {
				116	ret = regulator_enable(gpu->gpu_reg);
				117	if (ret) {
				118	DRM_DEV_ERROR(dev->dev, "failed to enable 'gpu_reg': %d\n", ret);
				119	return ret;
				120	}
				121	}
				122
				123	if (gpu->gpu_cx) {
				124	ret = regulator_enable(gpu->gpu_cx);
				125	if (ret) {
				126	DRM_DEV_ERROR(dev->dev, "failed to enable 'gpu_cx': %d\n", ret);
				127	return ret;
				128	}
				129	}
				130
				131	return 0;
				132	}
				133
				134	static int disable_pwrrail(struct msm_gpu *gpu)
				135	{
				136	if (gpu->gpu_cx)
				137	regulator_disable(gpu->gpu_cx);
				138	if (gpu->gpu_reg)
				139	regulator_disable(gpu->gpu_reg);
				140	return 0;
				141	}
				142
				143	static int enable_clk(struct msm_gpu *gpu)
				144	{
				145	if (gpu->core_clk && gpu->fast_rate)
				146	clk_set_rate(gpu->core_clk, gpu->fast_rate);
				147
				148	/* Set the RBBM timer rate to 19.2Mhz */
				149	if (gpu->rbbmtimer_clk)
				150	clk_set_rate(gpu->rbbmtimer_clk, 19200000);
				151
				152	return clk_bulk_prepare_enable(gpu->nr_clocks, gpu->grp_clks);
				153	}
				154
				155	static int disable_clk(struct msm_gpu *gpu)
				156	{
				157	clk_bulk_disable_unprepare(gpu->nr_clocks, gpu->grp_clks);
				158
				159	/*
				160	* Set the clock to a deliberately low rate. On older targets the clock
				161	* speed had to be non zero to avoid problems. On newer targets this
				162	* will be rounded down to zero anyway so it all works out.
				163	*/
				164	if (gpu->core_clk)
				165	clk_set_rate(gpu->core_clk, 27000000);
				166
				167	if (gpu->rbbmtimer_clk)
				168	clk_set_rate(gpu->rbbmtimer_clk, 0);
				169
				170	return 0;
				171	}
				172
				173	static int enable_axi(struct msm_gpu *gpu)
				174	{
				175	if (gpu->ebi1_clk)
				176	clk_prepare_enable(gpu->ebi1_clk);
				177	return 0;
				178	}
				179
				180	static int disable_axi(struct msm_gpu *gpu)
				181	{
				182	if (gpu->ebi1_clk)
				183	clk_disable_unprepare(gpu->ebi1_clk);
				184	return 0;
				185	}
				186
				187	void msm_gpu_resume_devfreq(struct msm_gpu *gpu)
				188	{
				189	gpu->devfreq.busy_cycles = 0;
				190	gpu->devfreq.time = ktime_get();
				191
				192	devfreq_resume_device(gpu->devfreq.devfreq);
				193	}
				194
				195	int msm_gpu_pm_resume(struct msm_gpu *gpu)
				196	{
				197	int ret;
				198
				199	DBG("%s", gpu->name);
				200
				201	ret = enable_pwrrail(gpu);
				202	if (ret)
				203	return ret;
				204
				205	ret = enable_clk(gpu);
				206	if (ret)
				207	return ret;
				208
				209	ret = enable_axi(gpu);
				210	if (ret)
				211	return ret;
				212
				213	msm_gpu_resume_devfreq(gpu);
				214
				215	gpu->needs_hw_init = true;
				216
				217	return 0;
				218	}
				219
				220	int msm_gpu_pm_suspend(struct msm_gpu *gpu)
				221	{
				222	int ret;
				223
				224	DBG("%s", gpu->name);
				225
				226	devfreq_suspend_device(gpu->devfreq.devfreq);
				227
				228	ret = disable_axi(gpu);
				229	if (ret)
				230	return ret;
				231
				232	ret = disable_clk(gpu);
				233	if (ret)
				234	return ret;
				235
				236	ret = disable_pwrrail(gpu);
				237	if (ret)
				238	return ret;
				239
				240	return 0;
				241	}
				242
				243	int msm_gpu_hw_init(struct msm_gpu *gpu)
				244	{
				245	int ret;
				246
				247	WARN_ON(!mutex_is_locked(&gpu->dev->struct_mutex));
				248
				249	if (!gpu->needs_hw_init)
				250	return 0;
				251
				252	disable_irq(gpu->irq);
				253	ret = gpu->funcs->hw_init(gpu);
				254	if (!ret)
				255	gpu->needs_hw_init = false;
				256	enable_irq(gpu->irq);
				257
				258	return ret;
				259	}
				260
				261	#ifdef CONFIG_DEV_COREDUMP
				262	static ssize_t msm_gpu_devcoredump_read(char *buffer, loff_t offset,
				263	size_t count, void *data, size_t datalen)
				264	{
				265	struct msm_gpu *gpu = data;
				266	struct drm_print_iterator iter;
				267	struct drm_printer p;
				268	struct msm_gpu_state *state;
				269
				270	state = msm_gpu_crashstate_get(gpu);
				271	if (!state)
				272	return 0;
				273
				274	iter.data = buffer;
				275	iter.offset = 0;
				276	iter.start = offset;
				277	iter.remain = count;
				278
				279	p = drm_coredump_printer(&iter);
				280
				281	drm_printf(&p, "---\n");
				282	drm_printf(&p, "kernel: " UTS_RELEASE "\n");
				283	drm_printf(&p, "module: " KBUILD_MODNAME "\n");
				284	drm_printf(&p, "time: %lld.%09ld\n",
				285	state->time.tv_sec, state->time.tv_nsec);
				286	if (state->comm)
				287	drm_printf(&p, "comm: %s\n", state->comm);
				288	if (state->cmd)
				289	drm_printf(&p, "cmdline: %s\n", state->cmd);
				290
				291	gpu->funcs->show(gpu, state, &p);
				292
				293	msm_gpu_crashstate_put(gpu);
				294
				295	return count - iter.remain;
				296	}
				297
				298	static void msm_gpu_devcoredump_free(void *data)
				299	{
				300	struct msm_gpu *gpu = data;
				301
				302	msm_gpu_crashstate_put(gpu);
				303	}
				304
				305	static void msm_gpu_crashstate_get_bo(struct msm_gpu_state *state,
				306	struct msm_gem_object *obj, u64 iova, u32 flags)
				307	{
				308	struct msm_gpu_state_bo *state_bo = &state->bos[state->nr_bos];
				309
				310	/* Don't record write only objects */
				311	state_bo->size = obj->base.size;
				312	state_bo->iova = iova;
				313
				314	/* Only store data for non imported buffer objects marked for read */
				315	if ((flags & MSM_SUBMIT_BO_READ) && !obj->base.import_attach) {
				316	void *ptr;
				317
				318	state_bo->data = kvmalloc(obj->base.size, GFP_KERNEL);
				319	if (!state_bo->data)
				320	goto out;
				321
				322	ptr = msm_gem_get_vaddr_active(&obj->base);
				323	if (IS_ERR(ptr)) {
				324	kvfree(state_bo->data);
				325	state_bo->data = NULL;
				326	goto out;
				327	}
				328
				329	memcpy(state_bo->data, ptr, obj->base.size);
				330	msm_gem_put_vaddr(&obj->base);
				331	}
				332	out:
				333	state->nr_bos++;
				334	}
				335
				336	static void msm_gpu_crashstate_capture(struct msm_gpu *gpu,
				337	struct msm_gem_submit submit, char comm, char *cmd)
				338	{
				339	struct msm_gpu_state *state;
				340
				341	/* Check if the target supports capturing crash state */
				342	if (!gpu->funcs->gpu_state_get)
				343	return;
				344
				345	/* Only save one crash state at a time */
				346	if (gpu->crashstate)
				347	return;
				348
				349	state = gpu->funcs->gpu_state_get(gpu);
				350	if (IS_ERR_OR_NULL(state))
				351	return;
				352
				353	/* Fill in the additional crash state information */
				354	state->comm = kstrdup(comm, GFP_KERNEL);
				355	state->cmd = kstrdup(cmd, GFP_KERNEL);
				356
				357	if (submit) {
				358	int i;
				359
				360	state->bos = kcalloc(submit->nr_cmds,
				361	sizeof(struct msm_gpu_state_bo), GFP_KERNEL);
				362
				363	for (i = 0; state->bos && i < submit->nr_cmds; i++) {
				364	int idx = submit->cmd[i].idx;
				365
				366	msm_gpu_crashstate_get_bo(state, submit->bos[idx].obj,
				367	submit->bos[idx].iova, submit->bos[idx].flags);
				368	}
				369	}
				370
				371	/* Set the active crash state to be dumped on failure */
				372	gpu->crashstate = state;
				373
				374	/* FIXME: Release the crashstate if this errors out? */
				375	dev_coredumpm(gpu->dev->dev, THIS_MODULE, gpu, 0, GFP_KERNEL,
				376	msm_gpu_devcoredump_read, msm_gpu_devcoredump_free);
				377	}
				378	#else
				379	static void msm_gpu_crashstate_capture(struct msm_gpu *gpu,
				380	struct msm_gem_submit submit, char comm, char *cmd)
				381	{
				382	}
				383	#endif
				384
				385	/*
				386	* Hangcheck detection for locked gpu:
				387	*/
				388
				389	static void update_fences(struct msm_gpu gpu, struct msm_ringbuffer ring,
				390	uint32_t fence)
				391	{
				392	struct msm_gem_submit *submit;
				393
				394	list_for_each_entry(submit, &ring->submits, node) {
				395	if (submit->seqno > fence)
				396	break;
				397
				398	msm_update_fence(submit->ring->fctx,
				399	submit->fence->seqno);
				400	}
				401	}
				402
				403	static struct msm_gem_submit *
				404	find_submit(struct msm_ringbuffer *ring, uint32_t fence)
				405	{
				406	struct msm_gem_submit *submit;
				407
				408	WARN_ON(!mutex_is_locked(&ring->gpu->dev->struct_mutex));
				409
				410	list_for_each_entry(submit, &ring->submits, node)
				411	if (submit->seqno == fence)
				412	return submit;
				413
				414	return NULL;
				415	}
				416
				417	static void retire_submits(struct msm_gpu *gpu);
				418
				419	static void recover_worker(struct work_struct *work)
				420	{
				421	struct msm_gpu *gpu = container_of(work, struct msm_gpu, recover_work);
				422	struct drm_device *dev = gpu->dev;
				423	struct msm_drm_private *priv = dev->dev_private;
				424	struct msm_gem_submit *submit;
				425	struct msm_ringbuffer *cur_ring = gpu->funcs->active_ring(gpu);
				426	char comm = NULL, cmd = NULL;
				427	int i;
				428
				429	mutex_lock(&dev->struct_mutex);
				430
				431	DRM_DEV_ERROR(dev->dev, "%s: hangcheck recover!\n", gpu->name);
				432
				433	submit = find_submit(cur_ring, cur_ring->memptrs->fence + 1);
				434	if (submit) {
				435	struct task_struct *task;
				436
				437	/* Increment the fault counts */
				438	gpu->global_faults++;
				439	submit->queue->faults++;
				440
				441	task = get_pid_task(submit->pid, PIDTYPE_PID);
				442	if (task) {
				443	comm = kstrdup(task->comm, GFP_KERNEL);
				444	cmd = kstrdup_quotable_cmdline(task, GFP_KERNEL);
				445	put_task_struct(task);
				446	}
				447
				448	if (comm && cmd) {
				449	DRM_DEV_ERROR(dev->dev, "%s: offending task: %s (%s)\n",
				450	gpu->name, comm, cmd);
				451
				452	msm_rd_dump_submit(priv->hangrd, submit,
				453	"offending task: %s (%s)", comm, cmd);
				454	} else
				455	msm_rd_dump_submit(priv->hangrd, submit, NULL);
				456	}
				457
				458	/* Record the crash state */
				459	pm_runtime_get_sync(&gpu->pdev->dev);
				460	msm_gpu_crashstate_capture(gpu, submit, comm, cmd);
				461	pm_runtime_put_sync(&gpu->pdev->dev);
				462
				463	kfree(cmd);
				464	kfree(comm);
				465
				466	/*
				467	* Update all the rings with the latest and greatest fence.. this
				468	* needs to happen after msm_rd_dump_submit() to ensure that the
				469	* bo's referenced by the offending submit are still around.
				470	*/
				471	for (i = 0; i < gpu->nr_rings; i++) {
				472	struct msm_ringbuffer *ring = gpu->rb[i];
				473
				474	uint32_t fence = ring->memptrs->fence;
				475
				476	/*
				477	* For the current (faulting?) ring/submit advance the fence by
				478	* one more to clear the faulting submit
				479	*/
				480	if (ring == cur_ring)
				481	fence++;
				482
				483	update_fences(gpu, ring, fence);
				484	}
				485
				486	if (msm_gpu_active(gpu)) {
				487	/* retire completed submits, plus the one that hung: */
				488	retire_submits(gpu);
				489
				490	pm_runtime_get_sync(&gpu->pdev->dev);
				491	gpu->funcs->recover(gpu);
				492	pm_runtime_put_sync(&gpu->pdev->dev);
				493
				494	/*
				495	* Replay all remaining submits starting with highest priority
				496	* ring
				497	*/
				498	for (i = 0; i < gpu->nr_rings; i++) {
				499	struct msm_ringbuffer *ring = gpu->rb[i];
				500
				501	list_for_each_entry(submit, &ring->submits, node)
				502	gpu->funcs->submit(gpu, submit, NULL);
				503	}
				504	}
				505
				506	mutex_unlock(&dev->struct_mutex);
				507
				508	msm_gpu_retire(gpu);
				509	}
				510
				511	static void hangcheck_timer_reset(struct msm_gpu *gpu)
				512	{
				513	DBG("%s", gpu->name);
				514	mod_timer(&gpu->hangcheck_timer,
				515	round_jiffies_up(jiffies + DRM_MSM_HANGCHECK_JIFFIES));
				516	}
				517
				518	static void hangcheck_handler(struct timer_list *t)
				519	{
				520	struct msm_gpu *gpu = from_timer(gpu, t, hangcheck_timer);
				521	struct drm_device *dev = gpu->dev;
				522	struct msm_drm_private *priv = dev->dev_private;
				523	struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
				524	uint32_t fence = ring->memptrs->fence;
				525
				526	if (fence != ring->hangcheck_fence) {
				527	/* some progress has been made.. ya! */
				528	ring->hangcheck_fence = fence;
				529	} else if (fence < ring->seqno) {
				530	/* no progress and not done.. hung! */
				531	ring->hangcheck_fence = fence;
				532	DRM_DEV_ERROR(dev->dev, "%s: hangcheck detected gpu lockup rb %d!\n",
				533	gpu->name, ring->id);
				534	DRM_DEV_ERROR(dev->dev, "%s: completed fence: %u\n",
				535	gpu->name, fence);
				536	DRM_DEV_ERROR(dev->dev, "%s: submitted fence: %u\n",
				537	gpu->name, ring->seqno);
				538
				539	queue_work(priv->wq, &gpu->recover_work);
				540	}
				541
				542	/* if still more pending work, reset the hangcheck timer: */
				543	if (ring->seqno > ring->hangcheck_fence)
				544	hangcheck_timer_reset(gpu);
				545
				546	/* workaround for missing irq: */
				547	queue_work(priv->wq, &gpu->retire_work);
				548	}
				549
				550	/*
				551	* Performance Counters:
				552	*/
				553
				554	/* called under perf_lock */
				555	static int update_hw_cntrs(struct msm_gpu gpu, uint32_t ncntrs, uint32_t cntrs)
				556	{
				557	uint32_t current_cntrs[ARRAY_SIZE(gpu->last_cntrs)];
				558	int i, n = min(ncntrs, gpu->num_perfcntrs);
				559
				560	/* read current values: */
				561	for (i = 0; i < gpu->num_perfcntrs; i++)
				562	current_cntrs[i] = gpu_read(gpu, gpu->perfcntrs[i].sample_reg);
				563
				564	/* update cntrs: */
				565	for (i = 0; i < n; i++)
				566	cntrs[i] = current_cntrs[i] - gpu->last_cntrs[i];
				567
				568	/* save current values: */
				569	for (i = 0; i < gpu->num_perfcntrs; i++)
				570	gpu->last_cntrs[i] = current_cntrs[i];
				571
				572	return n;
				573	}
				574
				575	static void update_sw_cntrs(struct msm_gpu *gpu)
				576	{
				577	ktime_t time;
				578	uint32_t elapsed;
				579	unsigned long flags;
				580
				581	spin_lock_irqsave(&gpu->perf_lock, flags);
				582	if (!gpu->perfcntr_active)
				583	goto out;
				584
				585	time = ktime_get();
				586	elapsed = ktime_to_us(ktime_sub(time, gpu->last_sample.time));
				587
				588	gpu->totaltime += elapsed;
				589	if (gpu->last_sample.active)
				590	gpu->activetime += elapsed;
				591
				592	gpu->last_sample.active = msm_gpu_active(gpu);
				593	gpu->last_sample.time = time;
				594
				595	out:
				596	spin_unlock_irqrestore(&gpu->perf_lock, flags);
				597	}
				598
				599	void msm_gpu_perfcntr_start(struct msm_gpu *gpu)
				600	{
				601	unsigned long flags;
				602
				603	pm_runtime_get_sync(&gpu->pdev->dev);
				604
				605	spin_lock_irqsave(&gpu->perf_lock, flags);
				606	/* we could dynamically enable/disable perfcntr registers too.. */
				607	gpu->last_sample.active = msm_gpu_active(gpu);
				608	gpu->last_sample.time = ktime_get();
				609	gpu->activetime = gpu->totaltime = 0;
				610	gpu->perfcntr_active = true;
				611	update_hw_cntrs(gpu, 0, NULL);
				612	spin_unlock_irqrestore(&gpu->perf_lock, flags);
				613	}
				614
				615	void msm_gpu_perfcntr_stop(struct msm_gpu *gpu)
				616	{
				617	gpu->perfcntr_active = false;
				618	pm_runtime_put_sync(&gpu->pdev->dev);
				619	}
				620
				621	/* returns -errno or # of cntrs sampled */
				622	int msm_gpu_perfcntr_sample(struct msm_gpu gpu, uint32_t activetime,
				623	uint32_t totaltime, uint32_t ncntrs, uint32_t cntrs)
				624	{
				625	unsigned long flags;
				626	int ret;
				627
				628	spin_lock_irqsave(&gpu->perf_lock, flags);
				629
				630	if (!gpu->perfcntr_active) {
				631	ret = -EINVAL;
				632	goto out;
				633	}
				634
				635	*activetime = gpu->activetime;
				636	*totaltime = gpu->totaltime;
				637
				638	gpu->activetime = gpu->totaltime = 0;
				639
				640	ret = update_hw_cntrs(gpu, ncntrs, cntrs);
				641
				642	out:
				643	spin_unlock_irqrestore(&gpu->perf_lock, flags);
				644
				645	return ret;
				646	}
				647
				648	/*
				649	* Cmdstream submission/retirement:
				650	*/
				651
				652	static void retire_submit(struct msm_gpu gpu, struct msm_ringbuffer ring,
				653	struct msm_gem_submit *submit)
				654	{
				655	int index = submit->seqno % MSM_GPU_SUBMIT_STATS_COUNT;
				656	volatile struct msm_gpu_submit_stats *stats;
				657	u64 elapsed, clock = 0;
				658	int i;
				659
				660	stats = &ring->memptrs->stats[index];
				661	/* Convert 19.2Mhz alwayson ticks to nanoseconds for elapsed time */
				662	elapsed = (stats->alwayson_end - stats->alwayson_start) * 10000;
				663	do_div(elapsed, 192);
				664
				665	/* Calculate the clock frequency from the number of CP cycles */
				666	if (elapsed) {
				667	clock = (stats->cpcycles_end - stats->cpcycles_start) * 1000;
				668	do_div(clock, elapsed);
				669	}
				670
				671	trace_msm_gpu_submit_retired(submit, elapsed, clock,
				672	stats->alwayson_start, stats->alwayson_end);
				673
				674	for (i = 0; i < submit->nr_bos; i++) {
				675	struct msm_gem_object *msm_obj = submit->bos[i].obj;
				676	/* move to inactive: */
				677	msm_gem_move_to_inactive(&msm_obj->base);
				678	msm_gem_unpin_iova(&msm_obj->base, submit->aspace);
				679	drm_gem_object_put(&msm_obj->base);
				680	}
				681
				682	pm_runtime_mark_last_busy(&gpu->pdev->dev);
				683	pm_runtime_put_autosuspend(&gpu->pdev->dev);
				684	msm_gem_submit_free(submit);
				685	}
				686
				687	static void retire_submits(struct msm_gpu *gpu)
				688	{
				689	struct drm_device *dev = gpu->dev;
				690	struct msm_gem_submit submit, tmp;
				691	int i;
				692
				693	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
				694
				695	/* Retire the commits starting with highest priority */
				696	for (i = 0; i < gpu->nr_rings; i++) {
				697	struct msm_ringbuffer *ring = gpu->rb[i];
				698
				699	list_for_each_entry_safe(submit, tmp, &ring->submits, node) {
				700	if (dma_fence_is_signaled(submit->fence))
				701	retire_submit(gpu, ring, submit);
				702	}
				703	}
				704	}
				705
				706	static void retire_worker(struct work_struct *work)
				707	{
				708	struct msm_gpu *gpu = container_of(work, struct msm_gpu, retire_work);
				709	struct drm_device *dev = gpu->dev;
				710	int i;
				711
				712	for (i = 0; i < gpu->nr_rings; i++)
				713	update_fences(gpu, gpu->rb[i], gpu->rb[i]->memptrs->fence);
				714
				715	mutex_lock(&dev->struct_mutex);
				716	retire_submits(gpu);
				717	mutex_unlock(&dev->struct_mutex);
				718	}
				719
				720	/* call from irq handler to schedule work to retire bo's */
				721	void msm_gpu_retire(struct msm_gpu *gpu)
				722	{
				723	struct msm_drm_private *priv = gpu->dev->dev_private;
				724	queue_work(priv->wq, &gpu->retire_work);
				725	update_sw_cntrs(gpu);
				726	}
				727
				728	/* add bo's to gpu's ring, and kick gpu: */
				729	void msm_gpu_submit(struct msm_gpu gpu, struct msm_gem_submit submit,
				730	struct msm_file_private *ctx)
				731	{
				732	struct drm_device *dev = gpu->dev;
				733	struct msm_drm_private *priv = dev->dev_private;
				734	struct msm_ringbuffer *ring = submit->ring;
				735	int i;
				736
				737	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
				738
				739	pm_runtime_get_sync(&gpu->pdev->dev);
				740
				741	msm_gpu_hw_init(gpu);
				742
				743	submit->seqno = ++ring->seqno;
				744
				745	list_add_tail(&submit->node, &ring->submits);
				746
				747	msm_rd_dump_submit(priv->rd, submit, NULL);
				748
				749	update_sw_cntrs(gpu);
				750
				751	for (i = 0; i < submit->nr_bos; i++) {
				752	struct msm_gem_object *msm_obj = submit->bos[i].obj;
				753	uint64_t iova;
				754
				755	/* can't happen yet.. but when we add 2d support we'll have
				756	* to deal w/ cross-ring synchronization:
				757	*/
				758	WARN_ON(is_active(msm_obj) && (msm_obj->gpu != gpu));
				759
				760	/* submit takes a reference to the bo and iova until retired: */
				761	drm_gem_object_get(&msm_obj->base);
				762	msm_gem_get_and_pin_iova(&msm_obj->base, submit->aspace, &iova);
				763
				764	if (submit->bos[i].flags & MSM_SUBMIT_BO_WRITE)
				765	msm_gem_move_to_active(&msm_obj->base, gpu, true, submit->fence);
				766	else if (submit->bos[i].flags & MSM_SUBMIT_BO_READ)
				767	msm_gem_move_to_active(&msm_obj->base, gpu, false, submit->fence);
				768	}
				769
				770	gpu->funcs->submit(gpu, submit, ctx);
				771	priv->lastctx = ctx;
				772
				773	hangcheck_timer_reset(gpu);
				774	}
				775
				776	/*
				777	* Init/Cleanup:
				778	*/
				779
				780	static irqreturn_t irq_handler(int irq, void *data)
				781	{
				782	struct msm_gpu *gpu = data;
				783	return gpu->funcs->irq(gpu);
				784	}
				785
				786	static int get_clocks(struct platform_device pdev, struct msm_gpu gpu)
				787	{
				788	int ret = devm_clk_bulk_get_all(&pdev->dev, &gpu->grp_clks);
				789
				790	if (ret < 1) {
				791	gpu->nr_clocks = 0;
				792	return ret;
				793	}
				794
				795	gpu->nr_clocks = ret;
				796
				797	gpu->core_clk = msm_clk_bulk_get_clock(gpu->grp_clks,
				798	gpu->nr_clocks, "core");
				799
				800	gpu->rbbmtimer_clk = msm_clk_bulk_get_clock(gpu->grp_clks,
				801	gpu->nr_clocks, "rbbmtimer");
				802
				803	return 0;
				804	}
				805
				806	static struct msm_gem_address_space *
				807	msm_gpu_create_address_space(struct msm_gpu gpu, struct platform_device pdev,
				808	uint64_t va_start, uint64_t va_end)
				809	{
				810	struct msm_gem_address_space *aspace;
				811	int ret;
				812
				813	/*
				814	* Setup IOMMU.. eventually we will (I think) do this once per context
				815	* and have separate page tables per context. For now, to keep things
				816	* simple and to get something working, just use a single address space:
				817	*/
				818	if (!adreno_is_a2xx(to_adreno_gpu(gpu))) {
				819	struct iommu_domain *iommu = iommu_domain_alloc(&platform_bus_type);
				820	if (!iommu)
				821	return NULL;
				822
				823	iommu->geometry.aperture_start = va_start;
				824	iommu->geometry.aperture_end = va_end;
				825
				826	DRM_DEV_INFO(gpu->dev->dev, "%s: using IOMMU\n", gpu->name);
				827
				828	aspace = msm_gem_address_space_create(&pdev->dev, iommu, "gpu");
				829	if (IS_ERR(aspace))
				830	iommu_domain_free(iommu);
				831	} else {
				832	aspace = msm_gem_address_space_create_a2xx(&pdev->dev, gpu, "gpu",
				833	va_start, va_end);
				834	}
				835
				836	if (IS_ERR(aspace)) {
				837	DRM_DEV_ERROR(gpu->dev->dev, "failed to init mmu: %ld\n",
				838	PTR_ERR(aspace));
				839	return ERR_CAST(aspace);
				840	}
				841
				842	ret = aspace->mmu->funcs->attach(aspace->mmu, NULL, 0);
				843	if (ret) {
				844	msm_gem_address_space_put(aspace);
				845	return ERR_PTR(ret);
				846	}
				847
				848	return aspace;
				849	}
				850
				851	int msm_gpu_init(struct drm_device drm, struct platform_device pdev,
				852	struct msm_gpu gpu, const struct msm_gpu_funcs funcs,
				853	const char name, struct msm_gpu_config config)
				854	{
				855	int i, ret, nr_rings = config->nr_rings;
				856	void *memptrs;
				857	uint64_t memptrs_iova;
				858
				859	if (WARN_ON(gpu->num_perfcntrs > ARRAY_SIZE(gpu->last_cntrs)))
				860	gpu->num_perfcntrs = ARRAY_SIZE(gpu->last_cntrs);
				861
				862	gpu->dev = drm;
				863	gpu->funcs = funcs;
				864	gpu->name = name;
				865
				866	INIT_LIST_HEAD(&gpu->active_list);
				867	INIT_WORK(&gpu->retire_work, retire_worker);
				868	INIT_WORK(&gpu->recover_work, recover_worker);
				869
				870
				871	timer_setup(&gpu->hangcheck_timer, hangcheck_handler, 0);
				872
				873	spin_lock_init(&gpu->perf_lock);
				874
				875
				876	/* Map registers: */
				877	gpu->mmio = msm_ioremap(pdev, config->ioname, name);
				878	if (IS_ERR(gpu->mmio)) {
				879	ret = PTR_ERR(gpu->mmio);
				880	goto fail;
				881	}
				882
				883	/* Get Interrupt: */
				884	gpu->irq = platform_get_irq(pdev, 0);
				885	if (gpu->irq < 0) {
				886	ret = gpu->irq;
				887	DRM_DEV_ERROR(drm->dev, "failed to get irq: %d\n", ret);
				888	goto fail;
				889	}
				890
				891	ret = devm_request_irq(&pdev->dev, gpu->irq, irq_handler,
				892	IRQF_TRIGGER_HIGH, gpu->name, gpu);
				893	if (ret) {
				894	DRM_DEV_ERROR(drm->dev, "failed to request IRQ%u: %d\n", gpu->irq, ret);
				895	goto fail;
				896	}
				897
				898	ret = get_clocks(pdev, gpu);
				899	if (ret)
				900	goto fail;
				901
				902	gpu->ebi1_clk = msm_clk_get(pdev, "bus");
				903	DBG("ebi1_clk: %p", gpu->ebi1_clk);
				904	if (IS_ERR(gpu->ebi1_clk))
				905	gpu->ebi1_clk = NULL;
				906
				907	/* Acquire regulators: */
				908	gpu->gpu_reg = devm_regulator_get(&pdev->dev, "vdd");
				909	DBG("gpu_reg: %p", gpu->gpu_reg);
				910	if (IS_ERR(gpu->gpu_reg))
				911	gpu->gpu_reg = NULL;
				912
				913	gpu->gpu_cx = devm_regulator_get(&pdev->dev, "vddcx");
				914	DBG("gpu_cx: %p", gpu->gpu_cx);
				915	if (IS_ERR(gpu->gpu_cx))
				916	gpu->gpu_cx = NULL;
				917
				918	gpu->pdev = pdev;
				919	platform_set_drvdata(pdev, gpu);
				920
				921	msm_devfreq_init(gpu);
				922
				923	gpu->aspace = msm_gpu_create_address_space(gpu, pdev,
				924	config->va_start, config->va_end);
				925
				926	if (gpu->aspace == NULL)
				927	DRM_DEV_INFO(drm->dev, "%s: no IOMMU, fallback to VRAM carveout!\n", name);
				928	else if (IS_ERR(gpu->aspace)) {
				929	ret = PTR_ERR(gpu->aspace);
				930	goto fail;
				931	}
				932
				933	memptrs = msm_gem_kernel_new(drm,
				934	sizeof(struct msm_rbmemptrs) * nr_rings,
				935	MSM_BO_UNCACHED, gpu->aspace, &gpu->memptrs_bo,
				936	&memptrs_iova);
				937
				938	if (IS_ERR(memptrs)) {
				939	ret = PTR_ERR(memptrs);
				940	DRM_DEV_ERROR(drm->dev, "could not allocate memptrs: %d\n", ret);
				941	goto fail;
				942	}
				943
				944	msm_gem_object_set_name(gpu->memptrs_bo, "memptrs");
				945
				946	if (nr_rings > ARRAY_SIZE(gpu->rb)) {
				947	DRM_DEV_INFO_ONCE(drm->dev, "Only creating %zu ringbuffers\n",
				948	ARRAY_SIZE(gpu->rb));
				949	nr_rings = ARRAY_SIZE(gpu->rb);
				950	}
				951
				952	/* Create ringbuffer(s): */
				953	for (i = 0; i < nr_rings; i++) {
				954	gpu->rb[i] = msm_ringbuffer_new(gpu, i, memptrs, memptrs_iova);
				955
				956	if (IS_ERR(gpu->rb[i])) {
				957	ret = PTR_ERR(gpu->rb[i]);
				958	DRM_DEV_ERROR(drm->dev,
				959	"could not create ringbuffer %d: %d\n", i, ret);
				960	goto fail;
				961	}
				962
				963	memptrs += sizeof(struct msm_rbmemptrs);
				964	memptrs_iova += sizeof(struct msm_rbmemptrs);
				965	}
				966
				967	gpu->nr_rings = nr_rings;
				968
				969	return 0;
				970
				971	fail:
				972	for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) {
				973	msm_ringbuffer_destroy(gpu->rb[i]);
				974	gpu->rb[i] = NULL;
				975	}
				976
				977	msm_gem_kernel_put(gpu->memptrs_bo, gpu->aspace, false);
				978
				979	platform_set_drvdata(pdev, NULL);
				980	return ret;
				981	}
				982
				983	void msm_gpu_cleanup(struct msm_gpu *gpu)
				984	{
				985	int i;
				986
				987	DBG("%s", gpu->name);
				988
				989	WARN_ON(!list_empty(&gpu->active_list));
				990
				991	for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) {
				992	msm_ringbuffer_destroy(gpu->rb[i]);
				993	gpu->rb[i] = NULL;
				994	}
				995
				996	msm_gem_kernel_put(gpu->memptrs_bo, gpu->aspace, false);
				997
				998	if (!IS_ERR_OR_NULL(gpu->aspace)) {
				999	gpu->aspace->mmu->funcs->detach(gpu->aspace->mmu,
				1000	NULL, 0);
				1001	msm_gem_address_space_put(gpu->aspace);
				1002	}
				1003	}