Blame - marvell/linux/drivers/gpu/drm/msm/msm_gpu.h - T108

blob: ab8f0f9c9dc8823bdf0fb8657b6942d6f41aee67 [file] [log] [blame]

b.liu	e958203	2025-04-17 19:18:16 +0800	[diff] [blame^]	1	/* SPDX-License-Identifier: GPL-2.0-only */
				2	/*
				3	* Copyright (C) 2013 Red Hat
				4	* Author: Rob Clark <robdclark@gmail.com>
				5	*/
				6
				7	#ifndef __MSM_GPU_H__
				8	#define __MSM_GPU_H__
				9
				10	#include <linux/clk.h>
				11	#include <linux/interconnect.h>
				12	#include <linux/regulator/consumer.h>
				13
				14	#include "msm_drv.h"
				15	#include "msm_fence.h"
				16	#include "msm_ringbuffer.h"
				17
				18	struct msm_gem_submit;
				19	struct msm_gpu_perfcntr;
				20	struct msm_gpu_state;
				21
				22	struct msm_gpu_config {
				23	const char *ioname;
				24	uint64_t va_start;
				25	uint64_t va_end;
				26	unsigned int nr_rings;
				27	};
				28
				29	/* So far, with hardware that I've seen to date, we can have:
				30	* + zero, one, or two z180 2d cores
				31	* + a3xx or a2xx 3d core, which share a common CP (the firmware
				32	* for the CP seems to implement some different PM4 packet types
				33	* but the basics of cmdstream submission are the same)
				34	*
				35	* Which means that the eventual complete "class" hierarchy, once
				36	* support for all past and present hw is in place, becomes:
				37	* + msm_gpu
				38	* + adreno_gpu
				39	* + a3xx_gpu
				40	* + a2xx_gpu
				41	* + z180_gpu
				42	*/
				43	struct msm_gpu_funcs {
				44	int (get_param)(struct msm_gpu gpu, uint32_t param, uint64_t *value);
				45	int (hw_init)(struct msm_gpu gpu);
				46	int (pm_suspend)(struct msm_gpu gpu);
				47	int (pm_resume)(struct msm_gpu gpu);
				48	void (submit)(struct msm_gpu gpu, struct msm_gem_submit *submit,
				49	struct msm_file_private *ctx);
				50	void (flush)(struct msm_gpu gpu, struct msm_ringbuffer *ring);
				51	irqreturn_t (irq)(struct msm_gpu irq);
				52	struct msm_ringbuffer (active_ring)(struct msm_gpu *gpu);
				53	void (recover)(struct msm_gpu gpu);
				54	void (destroy)(struct msm_gpu gpu);
				55	#if defined(CONFIG_DEBUG_FS) \|\| defined(CONFIG_DEV_COREDUMP)
				56	/* show GPU status in debugfs: */
				57	void (show)(struct msm_gpu gpu, struct msm_gpu_state *state,
				58	struct drm_printer *p);
				59	/* for generation specific debugfs: */
				60	int (debugfs_init)(struct msm_gpu gpu, struct drm_minor *minor);
				61	#endif
				62	unsigned long (gpu_busy)(struct msm_gpu gpu);
				63	struct msm_gpu_state (gpu_state_get)(struct msm_gpu *gpu);
				64	int (gpu_state_put)(struct msm_gpu_state state);
				65	unsigned long (gpu_get_freq)(struct msm_gpu gpu);
				66	void (gpu_set_freq)(struct msm_gpu gpu, unsigned long freq);
				67	};
				68
				69	struct msm_gpu {
				70	const char *name;
				71	struct drm_device *dev;
				72	struct platform_device *pdev;
				73	const struct msm_gpu_funcs *funcs;
				74
				75	/* performance counters (hw & sw): */
				76	spinlock_t perf_lock;
				77	bool perfcntr_active;
				78	struct {
				79	bool active;
				80	ktime_t time;
				81	} last_sample;
				82	uint32_t totaltime, activetime; /* sw counters */
				83	uint32_t last_cntrs[5]; /* hw counters */
				84	const struct msm_gpu_perfcntr *perfcntrs;
				85	uint32_t num_perfcntrs;
				86
				87	struct msm_ringbuffer *rb[MSM_GPU_MAX_RINGS];
				88	int nr_rings;
				89
				90	/* list of GEM active objects: */
				91	struct list_head active_list;
				92
				93	/* does gpu need hw_init? */
				94	bool needs_hw_init;
				95
				96	/* number of GPU hangs (for all contexts) */
				97	int global_faults;
				98
				99	/* worker for handling active-list retiring: */
				100	struct work_struct retire_work;
				101
				102	void __iomem *mmio;
				103	int irq;
				104
				105	struct msm_gem_address_space *aspace;
				106
				107	/* Power Control: */
				108	struct regulator gpu_reg, gpu_cx;
				109	struct clk_bulk_data *grp_clks;
				110	int nr_clocks;
				111	struct clk ebi1_clk, core_clk, *rbbmtimer_clk;
				112	uint32_t fast_rate;
				113
				114	struct icc_path *icc_path;
				115
				116	/* Hang and Inactivity Detection:
				117	*/
				118	#define DRM_MSM_INACTIVE_PERIOD 66 /* in ms (roughly four frames) */
				119
				120	#define DRM_MSM_HANGCHECK_PERIOD 500 /* in ms */
				121	#define DRM_MSM_HANGCHECK_JIFFIES msecs_to_jiffies(DRM_MSM_HANGCHECK_PERIOD)
				122	struct timer_list hangcheck_timer;
				123	struct work_struct recover_work;
				124
				125	struct drm_gem_object *memptrs_bo;
				126
				127	struct {
				128	struct devfreq *devfreq;
				129	u64 busy_cycles;
				130	ktime_t time;
				131	} devfreq;
				132
				133	struct msm_gpu_state *crashstate;
				134	};
				135
				136	/* It turns out that all targets use the same ringbuffer size */
				137	#define MSM_GPU_RINGBUFFER_SZ SZ_32K
				138	#define MSM_GPU_RINGBUFFER_BLKSIZE 32
				139
				140	#define MSM_GPU_RB_CNTL_DEFAULT \
				141	(AXXX_CP_RB_CNTL_BUFSZ(ilog2(MSM_GPU_RINGBUFFER_SZ / 8)) \| \
				142	AXXX_CP_RB_CNTL_BLKSZ(ilog2(MSM_GPU_RINGBUFFER_BLKSIZE / 8)))
				143
				144	static inline bool msm_gpu_active(struct msm_gpu *gpu)
				145	{
				146	int i;
				147
				148	for (i = 0; i < gpu->nr_rings; i++) {
				149	struct msm_ringbuffer *ring = gpu->rb[i];
				150
				151	if (ring->seqno > ring->memptrs->fence)
				152	return true;
				153	}
				154
				155	return false;
				156	}
				157
				158	/* Perf-Counters:
				159	* The select_reg and select_val are just there for the benefit of the child
				160	* class that actually enables the perf counter.. but msm_gpu base class
				161	* will handle sampling/displaying the counters.
				162	*/
				163
				164	struct msm_gpu_perfcntr {
				165	uint32_t select_reg;
				166	uint32_t sample_reg;
				167	uint32_t select_val;
				168	const char *name;
				169	};
				170
				171	struct msm_gpu_submitqueue {
				172	int id;
				173	u32 flags;
				174	u32 prio;
				175	int faults;
				176	struct list_head node;
				177	struct kref ref;
				178	};
				179
				180	struct msm_gpu_state_bo {
				181	u64 iova;
				182	size_t size;
				183	void *data;
				184	bool encoded;
				185	};
				186
				187	struct msm_gpu_state {
				188	struct kref ref;
				189	struct timespec64 time;
				190
				191	struct {
				192	u64 iova;
				193	u32 fence;
				194	u32 seqno;
				195	u32 rptr;
				196	u32 wptr;
				197	void *data;
				198	int data_size;
				199	bool encoded;
				200	} ring[MSM_GPU_MAX_RINGS];
				201
				202	int nr_registers;
				203	u32 *registers;
				204
				205	u32 rbbm_status;
				206
				207	char *comm;
				208	char *cmd;
				209
				210	int nr_bos;
				211	struct msm_gpu_state_bo *bos;
				212	};
				213
				214	static inline void gpu_write(struct msm_gpu *gpu, u32 reg, u32 data)
				215	{
				216	msm_writel(data, gpu->mmio + (reg << 2));
				217	}
				218
				219	static inline u32 gpu_read(struct msm_gpu *gpu, u32 reg)
				220	{
				221	return msm_readl(gpu->mmio + (reg << 2));
				222	}
				223
				224	static inline void gpu_rmw(struct msm_gpu *gpu, u32 reg, u32 mask, u32 or)
				225	{
				226	uint32_t val = gpu_read(gpu, reg);
				227
				228	val &= ~mask;
				229	gpu_write(gpu, reg, val \| or);
				230	}
				231
				232	static inline u64 gpu_read64(struct msm_gpu *gpu, u32 lo, u32 hi)
				233	{
				234	u64 val;
				235
				236	/*
				237	* Why not a readq here? Two reasons: 1) many of the LO registers are
				238	* not quad word aligned and 2) the GPU hardware designers have a bit
				239	* of a history of putting registers where they fit, especially in
				240	* spins. The longer a GPU family goes the higher the chance that
				241	* we'll get burned. We could do a series of validity checks if we
				242	* wanted to, but really is a readq() that much better? Nah.
				243	*/
				244
				245	/*
				246	* For some lo/hi registers (like perfcounters), the hi value is latched
				247	* when the lo is read, so make sure to read the lo first to trigger
				248	* that
				249	*/
				250	val = (u64) msm_readl(gpu->mmio + (lo << 2));
				251	val \|= ((u64) msm_readl(gpu->mmio + (hi << 2)) << 32);
				252
				253	return val;
				254	}
				255
				256	static inline void gpu_write64(struct msm_gpu *gpu, u32 lo, u32 hi, u64 val)
				257	{
				258	/* Why not a writeq here? Read the screed above */
				259	msm_writel(lower_32_bits(val), gpu->mmio + (lo << 2));
				260	msm_writel(upper_32_bits(val), gpu->mmio + (hi << 2));
				261	}
				262
				263	int msm_gpu_pm_suspend(struct msm_gpu *gpu);
				264	int msm_gpu_pm_resume(struct msm_gpu *gpu);
				265	void msm_gpu_resume_devfreq(struct msm_gpu *gpu);
				266
				267	int msm_gpu_hw_init(struct msm_gpu *gpu);
				268
				269	void msm_gpu_perfcntr_start(struct msm_gpu *gpu);
				270	void msm_gpu_perfcntr_stop(struct msm_gpu *gpu);
				271	int msm_gpu_perfcntr_sample(struct msm_gpu gpu, uint32_t activetime,
				272	uint32_t totaltime, uint32_t ncntrs, uint32_t cntrs);
				273
				274	void msm_gpu_retire(struct msm_gpu *gpu);
				275	void msm_gpu_submit(struct msm_gpu gpu, struct msm_gem_submit submit,
				276	struct msm_file_private *ctx);
				277
				278	int msm_gpu_init(struct drm_device drm, struct platform_device pdev,
				279	struct msm_gpu gpu, const struct msm_gpu_funcs funcs,
				280	const char name, struct msm_gpu_config config);
				281
				282	void msm_gpu_cleanup(struct msm_gpu *gpu);
				283
				284	struct msm_gpu adreno_load_gpu(struct drm_device dev);
				285	void __init adreno_register(void);
				286	void __exit adreno_unregister(void);
				287
				288	static inline void msm_submitqueue_put(struct msm_gpu_submitqueue *queue)
				289	{
				290	if (queue)
				291	kref_put(&queue->ref, msm_submitqueue_destroy);
				292	}
				293
				294	static inline struct msm_gpu_state msm_gpu_crashstate_get(struct msm_gpu gpu)
				295	{
				296	struct msm_gpu_state *state = NULL;
				297
				298	mutex_lock(&gpu->dev->struct_mutex);
				299
				300	if (gpu->crashstate) {
				301	kref_get(&gpu->crashstate->ref);
				302	state = gpu->crashstate;
				303	}
				304
				305	mutex_unlock(&gpu->dev->struct_mutex);
				306
				307	return state;
				308	}
				309
				310	static inline void msm_gpu_crashstate_put(struct msm_gpu *gpu)
				311	{
				312	mutex_lock(&gpu->dev->struct_mutex);
				313
				314	if (gpu->crashstate) {
				315	if (gpu->funcs->gpu_state_put(gpu->crashstate))
				316	gpu->crashstate = NULL;
				317	}
				318
				319	mutex_unlock(&gpu->dev->struct_mutex);
				320	}
				321
				322	#endif /* __MSM_GPU_H__ */