Blame - marvell/linux/drivers/gpu/drm/i915/i915_request.h - T108

blob: 3a3e7bbf19ff6e38fe9a01ddc0c2b17466bdafa1 [file] [log] [blame]

b.liu	e958203	2025-04-17 19:18:16 +0800	[diff] [blame^]	1	/*
				2	* Copyright © 2008-2018 Intel Corporation
				3	*
				4	* Permission is hereby granted, free of charge, to any person obtaining a
				5	* copy of this software and associated documentation files (the "Software"),
				6	* to deal in the Software without restriction, including without limitation
				7	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
				8	* and/or sell copies of the Software, and to permit persons to whom the
				9	* Software is furnished to do so, subject to the following conditions:
				10	*
				11	* The above copyright notice and this permission notice (including the next
				12	* paragraph) shall be included in all copies or substantial portions of the
				13	* Software.
				14	*
				15	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				16	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				17	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
				18	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				19	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
				20	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
				21	* IN THE SOFTWARE.
				22	*
				23	*/
				24
				25	#ifndef I915_REQUEST_H
				26	#define I915_REQUEST_H
				27
				28	#include <linux/dma-fence.h>
				29	#include <linux/irq_work.h>
				30	#include <linux/lockdep.h>
				31
				32	#include "gt/intel_context_types.h"
				33	#include "gt/intel_engine_types.h"
				34
				35	#include "i915_gem.h"
				36	#include "i915_scheduler.h"
				37	#include "i915_selftest.h"
				38	#include "i915_sw_fence.h"
				39
				40	#include <uapi/drm/i915_drm.h>
				41
				42	struct drm_file;
				43	struct drm_i915_gem_object;
				44	struct i915_request;
				45	struct intel_timeline;
				46	struct intel_timeline_cacheline;
				47
				48	struct i915_capture_list {
				49	struct i915_capture_list *next;
				50	struct i915_vma *vma;
				51	};
				52
				53	enum {
				54	/*
				55	* I915_FENCE_FLAG_ACTIVE - this request is currently submitted to HW.
				56	*
				57	* Set by __i915_request_submit() on handing over to HW, and cleared
				58	* by __i915_request_unsubmit() if we preempt this request.
				59	*
				60	* Finally cleared for consistency on retiring the request, when
				61	* we know the HW is no longer running this request.
				62	*
				63	* See i915_request_is_active()
				64	*/
				65	I915_FENCE_FLAG_ACTIVE = DMA_FENCE_FLAG_USER_BITS,
				66
				67	/*
				68	* I915_FENCE_FLAG_SIGNAL - this request is currently on signal_list
				69	*
				70	* Internal bookkeeping used by the breadcrumb code to track when
				71	* a request is on the various signal_list.
				72	*/
				73	I915_FENCE_FLAG_SIGNAL,
				74	};
				75
				76	/**
				77	* Request queue structure.
				78	*
				79	* The request queue allows us to note sequence numbers that have been emitted
				80	* and may be associated with active buffers to be retired.
				81	*
				82	* By keeping this list, we can avoid having to do questionable sequence
				83	* number comparisons on buffer last_read\|write_seqno. It also allows an
				84	* emission time to be associated with the request for tracking how far ahead
				85	* of the GPU the submission is.
				86	*
				87	* When modifying this structure be very aware that we perform a lockless
				88	* RCU lookup of it that may race against reallocation of the struct
				89	* from the slab freelist. We intentionally do not zero the structure on
				90	* allocation so that the lookup can use the dangling pointers (and is
				91	* cogniscent that those pointers may be wrong). Instead, everything that
				92	* needs to be initialised must be done so explicitly.
				93	*
				94	* The requests are reference counted.
				95	*/
				96	struct i915_request {
				97	struct dma_fence fence;
				98	spinlock_t lock;
				99
				100	/** On Which ring this request was generated */
				101	struct drm_i915_private *i915;
				102
				103	/**
				104	* Context and ring buffer related to this request
				105	* Contexts are refcounted, so when this request is associated with a
				106	* context, we must increment the context's refcount, to guarantee that
				107	* it persists while any request is linked to it. Requests themselves
				108	* are also refcounted, so the request will only be freed when the last
				109	* reference to it is dismissed, and the code in
				110	* i915_request_free() will then decrement the refcount on the
				111	* context.
				112	*/
				113	struct i915_gem_context *gem_context;
				114	struct intel_engine_cs *engine;
				115	struct intel_context *hw_context;
				116	struct intel_ring *ring;
				117	struct intel_timeline *timeline;
				118	struct list_head signal_link;
				119
				120	/*
				121	* The rcu epoch of when this request was allocated. Used to judiciously
				122	* apply backpressure on future allocations to ensure that under
				123	* mempressure there is sufficient RCU ticks for us to reclaim our
				124	* RCU protected slabs.
				125	*/
				126	unsigned long rcustate;
				127
				128	/*
				129	* We pin the timeline->mutex while constructing the request to
				130	* ensure that no caller accidentally drops it during construction.
				131	* The timeline->mutex must be held to ensure that only this caller
				132	* can use the ring and manipulate the associated timeline during
				133	* construction.
				134	*/
				135	struct pin_cookie cookie;
				136
				137	/*
				138	* Fences for the various phases in the request's lifetime.
				139	*
				140	* The submit fence is used to await upon all of the request's
				141	* dependencies. When it is signaled, the request is ready to run.
				142	* It is used by the driver to then queue the request for execution.
				143	*/
				144	struct i915_sw_fence submit;
				145	union {
				146	wait_queue_entry_t submitq;
				147	struct i915_sw_dma_fence_cb dmaq;
				148	};
				149	struct list_head execute_cb;
				150	struct i915_sw_fence semaphore;
				151	struct irq_work semaphore_work;
				152
				153	/*
				154	* A list of everyone we wait upon, and everyone who waits upon us.
				155	* Even though we will not be submitted to the hardware before the
				156	* submit fence is signaled (it waits for all external events as well
				157	* as our own requests), the scheduler still needs to know the
				158	* dependency tree for the lifetime of the request (from execbuf
				159	* to retirement), i.e. bidirectional dependency information for the
				160	* request not tied to individual fences.
				161	*/
				162	struct i915_sched_node sched;
				163	struct i915_dependency dep;
				164	intel_engine_mask_t execution_mask;
				165
				166	/*
				167	* A convenience pointer to the current breadcrumb value stored in
				168	* the HW status page (or our timeline's local equivalent). The full
				169	* path would be rq->hw_context->ring->timeline->hwsp_seqno.
				170	*/
				171	const u32 *hwsp_seqno;
				172
				173	/*
				174	* If we need to access the timeline's seqno for this request in
				175	* another request, we need to keep a read reference to this associated
				176	* cacheline, so that we do not free and recycle it before the foreign
				177	* observers have completed. Hence, we keep a pointer to the cacheline
				178	* inside the timeline's HWSP vma, but it is only valid while this
				179	* request has not completed and guarded by the timeline mutex.
				180	*/
				181	struct intel_timeline_cacheline *hwsp_cacheline;
				182
				183	/** Position in the ring of the start of the request */
				184	u32 head;
				185
				186	/** Position in the ring of the start of the user packets */
				187	u32 infix;
				188
				189	/**
				190	* Position in the ring of the start of the postfix.
				191	* This is required to calculate the maximum available ring space
				192	* without overwriting the postfix.
				193	*/
				194	u32 postfix;
				195
				196	/** Position in the ring of the end of the whole request */
				197	u32 tail;
				198
				199	/** Position in the ring of the end of any workarounds after the tail */
				200	u32 wa_tail;
				201
				202	/** Preallocate space in the ring for the emitting the request */
				203	u32 reserved_space;
				204
				205	/** Batch buffer related to this request if any (used for
				206	* error state dump only).
				207	*/
				208	struct i915_vma *batch;
				209	/**
				210	* Additional buffers requested by userspace to be captured upon
				211	* a GPU hang. The vma/obj on this list are protected by their
				212	* active reference - all objects on this list must also be
				213	* on the active_list (of their final request).
				214	*/
				215	struct i915_capture_list *capture_list;
				216	struct list_head active_list;
				217
				218	/** Time at which this request was emitted, in jiffies. */
				219	unsigned long emitted_jiffies;
				220
				221	unsigned long flags;
				222	#define I915_REQUEST_WAITBOOST BIT(0)
				223	#define I915_REQUEST_NOPREEMPT BIT(1)
				224
				225	/** timeline->request entry for this request */
				226	struct list_head link;
				227
				228	struct drm_i915_file_private *file_priv;
				229	/** file_priv list entry for this request */
				230	struct list_head client_link;
				231
				232	I915_SELFTEST_DECLARE(struct {
				233	struct list_head link;
				234	unsigned long delay;
				235	} mock;)
				236	};
				237
				238	#define I915_FENCE_GFP (GFP_KERNEL \| __GFP_RETRY_MAYFAIL \| __GFP_NOWARN)
				239
				240	extern const struct dma_fence_ops i915_fence_ops;
				241
				242	static inline bool dma_fence_is_i915(const struct dma_fence *fence)
				243	{
				244	return fence->ops == &i915_fence_ops;
				245	}
				246
				247	struct i915_request * __must_check
				248	__i915_request_create(struct intel_context *ce, gfp_t gfp);
				249	struct i915_request * __must_check
				250	i915_request_create(struct intel_context *ce);
				251
				252	struct i915_request __i915_request_commit(struct i915_request request);
				253	void __i915_request_queue(struct i915_request *rq,
				254	const struct i915_sched_attr *attr);
				255
				256	void i915_request_retire_upto(struct i915_request *rq);
				257
				258	static inline struct i915_request *
				259	to_request(struct dma_fence *fence)
				260	{
				261	/* We assume that NULL fence/request are interoperable */
				262	BUILD_BUG_ON(offsetof(struct i915_request, fence) != 0);
				263	GEM_BUG_ON(fence && !dma_fence_is_i915(fence));
				264	return container_of(fence, struct i915_request, fence);
				265	}
				266
				267	static inline struct i915_request *
				268	i915_request_get(struct i915_request *rq)
				269	{
				270	return to_request(dma_fence_get(&rq->fence));
				271	}
				272
				273	static inline struct i915_request *
				274	i915_request_get_rcu(struct i915_request *rq)
				275	{
				276	return to_request(dma_fence_get_rcu(&rq->fence));
				277	}
				278
				279	static inline void
				280	i915_request_put(struct i915_request *rq)
				281	{
				282	dma_fence_put(&rq->fence);
				283	}
				284
				285	int i915_request_await_object(struct i915_request *to,
				286	struct drm_i915_gem_object *obj,
				287	bool write);
				288	int i915_request_await_dma_fence(struct i915_request *rq,
				289	struct dma_fence *fence);
				290	int i915_request_await_execution(struct i915_request *rq,
				291	struct dma_fence *fence,
				292	void (hook)(struct i915_request rq,
				293	struct dma_fence *signal));
				294
				295	void i915_request_add(struct i915_request *rq);
				296
				297	bool __i915_request_submit(struct i915_request *request);
				298	void i915_request_submit(struct i915_request *request);
				299
				300	void i915_request_skip(struct i915_request *request, int error);
				301
				302	void __i915_request_unsubmit(struct i915_request *request);
				303	void i915_request_unsubmit(struct i915_request *request);
				304
				305	/* Note: part of the intel_breadcrumbs family */
				306	bool i915_request_enable_breadcrumb(struct i915_request *request);
				307	void i915_request_cancel_breadcrumb(struct i915_request *request);
				308
				309	long i915_request_wait(struct i915_request *rq,
				310	unsigned int flags,
				311	long timeout)
				312	__attribute__((nonnull(1)));
				313	#define I915_WAIT_INTERRUPTIBLE BIT(0)
				314	#define I915_WAIT_LOCKED BIT(1) /* struct_mutex held, handle GPU reset */
				315	#define I915_WAIT_PRIORITY BIT(2) /* small priority bump for the request */
				316	#define I915_WAIT_ALL BIT(3) /* used by i915_gem_object_wait() */
				317	#define I915_WAIT_FOR_IDLE_BOOST BIT(4)
				318
				319	static inline bool i915_request_signaled(const struct i915_request *rq)
				320	{
				321	/* The request may live longer than its HWSP, so check flags first! */
				322	return test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags);
				323	}
				324
				325	static inline bool i915_request_is_active(const struct i915_request *rq)
				326	{
				327	return test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
				328	}
				329
				330	/**
				331	* Returns true if seq1 is later than seq2.
				332	*/
				333	static inline bool i915_seqno_passed(u32 seq1, u32 seq2)
				334	{
				335	return (s32)(seq1 - seq2) >= 0;
				336	}
				337
				338	static inline u32 __hwsp_seqno(const struct i915_request *rq)
				339	{
				340	return READ_ONCE(*rq->hwsp_seqno);
				341	}
				342
				343	/**
				344	* hwsp_seqno - the current breadcrumb value in the HW status page
				345	* @rq: the request, to chase the relevant HW status page
				346	*
				347	* The emphasis in naming here is that hwsp_seqno() is not a property of the
				348	* request, but an indication of the current HW state (associated with this
				349	* request). Its value will change as the GPU executes more requests.
				350	*
				351	* Returns the current breadcrumb value in the associated HW status page (or
				352	* the local timeline's equivalent) for this request. The request itself
				353	* has the associated breadcrumb value of rq->fence.seqno, when the HW
				354	* status page has that breadcrumb or later, this request is complete.
				355	*/
				356	static inline u32 hwsp_seqno(const struct i915_request *rq)
				357	{
				358	u32 seqno;
				359
				360	rcu_read_lock(); /* the HWSP may be freed at runtime */
				361	seqno = __hwsp_seqno(rq);
				362	rcu_read_unlock();
				363
				364	return seqno;
				365	}
				366
				367	static inline bool __i915_request_has_started(const struct i915_request *rq)
				368	{
				369	return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno - 1);
				370	}
				371
				372	/**
				373	* i915_request_started - check if the request has begun being executed
				374	* @rq: the request
				375	*
				376	* If the timeline is not using initial breadcrumbs, a request is
				377	* considered started if the previous request on its timeline (i.e.
				378	* context) has been signaled.
				379	*
				380	* If the timeline is using semaphores, it will also be emitting an
				381	* "initial breadcrumb" after the semaphores are complete and just before
				382	* it began executing the user payload. A request can therefore be active
				383	* on the HW and not yet started as it is still busywaiting on its
				384	* dependencies (via HW semaphores).
				385	*
				386	* If the request has started, its dependencies will have been signaled
				387	* (either by fences or by semaphores) and it will have begun processing
				388	* the user payload.
				389	*
				390	* However, even if a request has started, it may have been preempted and
				391	* so no longer active, or it may have already completed.
				392	*
				393	* See also i915_request_is_active().
				394	*
				395	* Returns true if the request has begun executing the user payload, or
				396	* has completed:
				397	*/
				398	static inline bool i915_request_started(const struct i915_request *rq)
				399	{
				400	if (i915_request_signaled(rq))
				401	return true;
				402
				403	/* Remember: started but may have since been preempted! */
				404	return __i915_request_has_started(rq);
				405	}
				406
				407	/**
				408	* i915_request_is_running - check if the request may actually be executing
				409	* @rq: the request
				410	*
				411	* Returns true if the request is currently submitted to hardware, has passed
				412	* its start point (i.e. the context is setup and not busywaiting). Note that
				413	* it may no longer be running by the time the function returns!
				414	*/
				415	static inline bool i915_request_is_running(const struct i915_request *rq)
				416	{
				417	if (!i915_request_is_active(rq))
				418	return false;
				419
				420	return __i915_request_has_started(rq);
				421	}
				422
				423	static inline bool i915_request_completed(const struct i915_request *rq)
				424	{
				425	if (i915_request_signaled(rq))
				426	return true;
				427
				428	return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno);
				429	}
				430
				431	static inline void i915_request_mark_complete(struct i915_request *rq)
				432	{
				433	rq->hwsp_seqno = (u32 )&rq->fence.seqno; / decouple from HWSP */
				434	}
				435
				436	static inline bool i915_request_has_waitboost(const struct i915_request *rq)
				437	{
				438	return rq->flags & I915_REQUEST_WAITBOOST;
				439	}
				440
				441	static inline bool i915_request_has_nopreempt(const struct i915_request *rq)
				442	{
				443	/* Preemption should only be disabled very rarely */
				444	return unlikely(rq->flags & I915_REQUEST_NOPREEMPT);
				445	}
				446
				447	bool i915_retire_requests(struct drm_i915_private *i915);
				448
				449	#endif /* I915_REQUEST_H */