Blame - marvell/linux/drivers/gpu/drm/i915/i915_active.h - T108

blob: 0ad7ef60d15f8590ded3f720ee97e09beb8b396e [file] [log] [blame]

b.liu	e958203	2025-04-17 19:18:16 +0800	[diff] [blame^]	1	/*
				2	* SPDX-License-Identifier: MIT
				3	*
				4	* Copyright © 2019 Intel Corporation
				5	*/
				6
				7	#ifndef _I915_ACTIVE_H_
				8	#define _I915_ACTIVE_H_
				9
				10	#include <linux/lockdep.h>
				11
				12	#include "i915_active_types.h"
				13	#include "i915_request.h"
				14
				15	/*
				16	* We treat requests as fences. This is not be to confused with our
				17	* "fence registers" but pipeline synchronisation objects ala GL_ARB_sync.
				18	* We use the fences to synchronize access from the CPU with activity on the
				19	* GPU, for example, we should not rewrite an object's PTE whilst the GPU
				20	* is reading them. We also track fences at a higher level to provide
				21	* implicit synchronisation around GEM objects, e.g. set-domain will wait
				22	* for outstanding GPU rendering before marking the object ready for CPU
				23	* access, or a pageflip will wait until the GPU is complete before showing
				24	* the frame on the scanout.
				25	*
				26	* In order to use a fence, the object must track the fence it needs to
				27	* serialise with. For example, GEM objects want to track both read and
				28	* write access so that we can perform concurrent read operations between
				29	* the CPU and GPU engines, as well as waiting for all rendering to
				30	* complete, or waiting for the last GPU user of a "fence register". The
				31	* object then embeds a #i915_active_request to track the most recent (in
				32	* retirement order) request relevant for the desired mode of access.
				33	* The #i915_active_request is updated with i915_active_request_set() to
				34	* track the most recent fence request, typically this is done as part of
				35	* i915_vma_move_to_active().
				36	*
				37	* When the #i915_active_request completes (is retired), it will
				38	* signal its completion to the owner through a callback as well as mark
				39	* itself as idle (i915_active_request.request == NULL). The owner
				40	* can then perform any action, such as delayed freeing of an active
				41	* resource including itself.
				42	*/
				43
				44	void i915_active_retire_noop(struct i915_active_request *active,
				45	struct i915_request *request);
				46
				47	/**
				48	* i915_active_request_init - prepares the activity tracker for use
				49	* @active - the active tracker
				50	* @rq - initial request to track, can be NULL
				51	* @func - a callback when then the tracker is retired (becomes idle),
				52	* can be NULL
				53	*
				54	* i915_active_request_init() prepares the embedded @active struct for use as
				55	* an activity tracker, that is for tracking the last known active request
				56	* associated with it. When the last request becomes idle, when it is retired
				57	* after completion, the optional callback @func is invoked.
				58	*/
				59	static inline void
				60	i915_active_request_init(struct i915_active_request *active,
				61	struct mutex *lock,
				62	struct i915_request *rq,
				63	i915_active_retire_fn retire)
				64	{
				65	RCU_INIT_POINTER(active->request, rq);
				66	INIT_LIST_HEAD(&active->link);
				67	active->retire = retire ?: i915_active_retire_noop;
				68	#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
				69	active->lock = lock;
				70	#endif
				71	}
				72
				73	#define INIT_ACTIVE_REQUEST(name, lock) \
				74	i915_active_request_init((name), (lock), NULL, NULL)
				75
				76	/**
				77	* i915_active_request_set - updates the tracker to watch the current request
				78	* @active - the active tracker
				79	* @request - the request to watch
				80	*
				81	* __i915_active_request_set() watches the given @request for completion. Whilst
				82	* that @request is busy, the @active reports busy. When that @request is
				83	* retired, the @active tracker is updated to report idle.
				84	*/
				85	static inline void
				86	__i915_active_request_set(struct i915_active_request *active,
				87	struct i915_request *request)
				88	{
				89	#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
				90	lockdep_assert_held(active->lock);
				91	#endif
				92	list_move(&active->link, &request->active_list);
				93	rcu_assign_pointer(active->request, request);
				94	}
				95
				96	int __must_check
				97	i915_active_request_set(struct i915_active_request *active,
				98	struct i915_request *rq);
				99
				100	/**
				101	* i915_active_request_raw - return the active request
				102	* @active - the active tracker
				103	*
				104	* i915_active_request_raw() returns the current request being tracked, or NULL.
				105	* It does not obtain a reference on the request for the caller, so the caller
				106	* must hold struct_mutex.
				107	*/
				108	static inline struct i915_request *
				109	i915_active_request_raw(const struct i915_active_request *active,
				110	struct mutex *mutex)
				111	{
				112	return rcu_dereference_protected(active->request,
				113	lockdep_is_held(mutex));
				114	}
				115
				116	/**
				117	* i915_active_request_peek - report the active request being monitored
				118	* @active - the active tracker
				119	*
				120	* i915_active_request_peek() returns the current request being tracked if
				121	* still active, or NULL. It does not obtain a reference on the request
				122	* for the caller, so the caller must hold struct_mutex.
				123	*/
				124	static inline struct i915_request *
				125	i915_active_request_peek(const struct i915_active_request *active,
				126	struct mutex *mutex)
				127	{
				128	struct i915_request *request;
				129
				130	request = i915_active_request_raw(active, mutex);
				131	if (!request \|\| i915_request_completed(request))
				132	return NULL;
				133
				134	return request;
				135	}
				136
				137	/**
				138	* i915_active_request_get - return a reference to the active request
				139	* @active - the active tracker
				140	*
				141	* i915_active_request_get() returns a reference to the active request, or NULL
				142	* if the active tracker is idle. The caller must hold struct_mutex.
				143	*/
				144	static inline struct i915_request *
				145	i915_active_request_get(const struct i915_active_request *active,
				146	struct mutex *mutex)
				147	{
				148	return i915_request_get(i915_active_request_peek(active, mutex));
				149	}
				150
				151	/**
				152	* __i915_active_request_get_rcu - return a reference to the active request
				153	* @active - the active tracker
				154	*
				155	* __i915_active_request_get() returns a reference to the active request,
				156	* or NULL if the active tracker is idle. The caller must hold the RCU read
				157	* lock, but the returned pointer is safe to use outside of RCU.
				158	*/
				159	static inline struct i915_request *
				160	__i915_active_request_get_rcu(const struct i915_active_request *active)
				161	{
				162	/*
				163	* Performing a lockless retrieval of the active request is super
				164	* tricky. SLAB_TYPESAFE_BY_RCU merely guarantees that the backing
				165	* slab of request objects will not be freed whilst we hold the
				166	* RCU read lock. It does not guarantee that the request itself
				167	* will not be freed and then reused. Viz,
				168	*
				169	* Thread A Thread B
				170	*
				171	* rq = active.request
				172	* retire(rq) -> free(rq);
				173	* (rq is now first on the slab freelist)
				174	* active.request = NULL
				175	*
				176	* rq = new submission on a new object
				177	* ref(rq)
				178	*
				179	* To prevent the request from being reused whilst the caller
				180	* uses it, we take a reference like normal. Whilst acquiring
				181	* the reference we check that it is not in a destroyed state
				182	* (refcnt == 0). That prevents the request being reallocated
				183	* whilst the caller holds on to it. To check that the request
				184	* was not reallocated as we acquired the reference we have to
				185	* check that our request remains the active request across
				186	* the lookup, in the same manner as a seqlock. The visibility
				187	* of the pointer versus the reference counting is controlled
				188	* by using RCU barriers (rcu_dereference and rcu_assign_pointer).
				189	*
				190	* In the middle of all that, we inspect whether the request is
				191	* complete. Retiring is lazy so the request may be completed long
				192	* before the active tracker is updated. Querying whether the
				193	* request is complete is far cheaper (as it involves no locked
				194	* instructions setting cachelines to exclusive) than acquiring
				195	* the reference, so we do it first. The RCU read lock ensures the
				196	* pointer dereference is valid, but does not ensure that the
				197	* seqno nor HWS is the right one! However, if the request was
				198	* reallocated, that means the active tracker's request was complete.
				199	* If the new request is also complete, then both are and we can
				200	* just report the active tracker is idle. If the new request is
				201	* incomplete, then we acquire a reference on it and check that
				202	* it remained the active request.
				203	*
				204	* It is then imperative that we do not zero the request on
				205	* reallocation, so that we can chase the dangling pointers!
				206	* See i915_request_alloc().
				207	*/
				208	do {
				209	struct i915_request *request;
				210
				211	request = rcu_dereference(active->request);
				212	if (!request \|\| i915_request_completed(request))
				213	return NULL;
				214
				215	/*
				216	* An especially silly compiler could decide to recompute the
				217	* result of i915_request_completed, more specifically
				218	* re-emit the load for request->fence.seqno. A race would catch
				219	* a later seqno value, which could flip the result from true to
				220	* false. Which means part of the instructions below might not
				221	* be executed, while later on instructions are executed. Due to
				222	* barriers within the refcounting the inconsistency can't reach
				223	* past the call to i915_request_get_rcu, but not executing
				224	* that while still executing i915_request_put() creates
				225	* havoc enough. Prevent this with a compiler barrier.
				226	*/
				227	barrier();
				228
				229	request = i915_request_get_rcu(request);
				230
				231	/*
				232	* What stops the following rcu_access_pointer() from occurring
				233	* before the above i915_request_get_rcu()? If we were
				234	* to read the value before pausing to get the reference to
				235	* the request, we may not notice a change in the active
				236	* tracker.
				237	*
				238	* The rcu_access_pointer() is a mere compiler barrier, which
				239	* means both the CPU and compiler are free to perform the
				240	* memory read without constraint. The compiler only has to
				241	* ensure that any operations after the rcu_access_pointer()
				242	* occur afterwards in program order. This means the read may
				243	* be performed earlier by an out-of-order CPU, or adventurous
				244	* compiler.
				245	*
				246	* The atomic operation at the heart of
				247	* i915_request_get_rcu(), see dma_fence_get_rcu(), is
				248	* atomic_inc_not_zero() which is only a full memory barrier
				249	* when successful. That is, if i915_request_get_rcu()
				250	* returns the request (and so with the reference counted
				251	* incremented) then the following read for rcu_access_pointer()
				252	* must occur after the atomic operation and so confirm
				253	* that this request is the one currently being tracked.
				254	*
				255	* The corresponding write barrier is part of
				256	* rcu_assign_pointer().
				257	*/
				258	if (!request \|\| request == rcu_access_pointer(active->request))
				259	return rcu_pointer_handoff(request);
				260
				261	i915_request_put(request);
				262	} while (1);
				263	}
				264
				265	/**
				266	* i915_active_request_get_unlocked - return a reference to the active request
				267	* @active - the active tracker
				268	*
				269	* i915_active_request_get_unlocked() returns a reference to the active request,
				270	* or NULL if the active tracker is idle. The reference is obtained under RCU,
				271	* so no locking is required by the caller.
				272	*
				273	* The reference should be freed with i915_request_put().
				274	*/
				275	static inline struct i915_request *
				276	i915_active_request_get_unlocked(const struct i915_active_request *active)
				277	{
				278	struct i915_request *request;
				279
				280	rcu_read_lock();
				281	request = __i915_active_request_get_rcu(active);
				282	rcu_read_unlock();
				283
				284	return request;
				285	}
				286
				287	/**
				288	* i915_active_request_isset - report whether the active tracker is assigned
				289	* @active - the active tracker
				290	*
				291	* i915_active_request_isset() returns true if the active tracker is currently
				292	* assigned to a request. Due to the lazy retiring, that request may be idle
				293	* and this may report stale information.
				294	*/
				295	static inline bool
				296	i915_active_request_isset(const struct i915_active_request *active)
				297	{
				298	return rcu_access_pointer(active->request);
				299	}
				300
				301	/**
				302	* i915_active_request_retire - waits until the request is retired
				303	* @active - the active request on which to wait
				304	*
				305	* i915_active_request_retire() waits until the request is completed,
				306	* and then ensures that at least the retirement handler for this
				307	* @active tracker is called before returning. If the @active
				308	* tracker is idle, the function returns immediately.
				309	*/
				310	static inline int __must_check
				311	i915_active_request_retire(struct i915_active_request *active,
				312	struct mutex *mutex, i915_active_retire_fn retire)
				313	{
				314	struct i915_request *request;
				315	long ret;
				316
				317	request = i915_active_request_raw(active, mutex);
				318	if (!request)
				319	return 0;
				320
				321	ret = i915_request_wait(request,
				322	I915_WAIT_INTERRUPTIBLE,
				323	MAX_SCHEDULE_TIMEOUT);
				324	if (ret < 0)
				325	return ret;
				326
				327	list_del_init(&active->link);
				328	RCU_INIT_POINTER(active->request, NULL);
				329
				330	retire(active, request);
				331
				332	return 0;
				333	}
				334
				335	/*
				336	* GPU activity tracking
				337	*
				338	* Each set of commands submitted to the GPU compromises a single request that
				339	* signals a fence upon completion. struct i915_request combines the
				340	* command submission, scheduling and fence signaling roles. If we want to see
				341	* if a particular task is complete, we need to grab the fence (struct
				342	* i915_request) for that task and check or wait for it to be signaled. More
				343	* often though we want to track the status of a bunch of tasks, for example
				344	* to wait for the GPU to finish accessing some memory across a variety of
				345	* different command pipelines from different clients. We could choose to
				346	* track every single request associated with the task, but knowing that
				347	* each request belongs to an ordered timeline (later requests within a
				348	* timeline must wait for earlier requests), we need only track the
				349	* latest request in each timeline to determine the overall status of the
				350	* task.
				351	*
				352	* struct i915_active provides this tracking across timelines. It builds a
				353	* composite shared-fence, and is updated as new work is submitted to the task,
				354	* forming a snapshot of the current status. It should be embedded into the
				355	* different resources that need to track their associated GPU activity to
				356	* provide a callback when that GPU activity has ceased, or otherwise to
				357	* provide a serialisation point either for request submission or for CPU
				358	* synchronisation.
				359	*/
				360
				361	void __i915_active_init(struct drm_i915_private *i915,
				362	struct i915_active *ref,
				363	int (active)(struct i915_active ref),
				364	void (retire)(struct i915_active ref),
				365	struct lock_class_key *key);
				366	#define i915_active_init(i915, ref, active, retire) do { \
				367	static struct lock_class_key __key; \
				368	\
				369	__i915_active_init(i915, ref, active, retire, &__key); \
				370	} while (0)
				371
				372	int i915_active_ref(struct i915_active *ref,
				373	struct intel_timeline *tl,
				374	struct i915_request *rq);
				375
				376	int i915_active_wait(struct i915_active *ref);
				377
				378	int i915_request_await_active(struct i915_request *rq,
				379	struct i915_active *ref);
				380	int i915_request_await_active_request(struct i915_request *rq,
				381	struct i915_active_request *active);
				382
				383	int i915_active_acquire(struct i915_active *ref);
				384	void i915_active_release(struct i915_active *ref);
				385	void __i915_active_release_nested(struct i915_active *ref, int subclass);
				386
				387	bool i915_active_trygrab(struct i915_active *ref);
				388	void i915_active_ungrab(struct i915_active *ref);
				389
				390	static inline bool
				391	i915_active_is_idle(const struct i915_active *ref)
				392	{
				393	return !atomic_read(&ref->count);
				394	}
				395
				396	#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
				397	void i915_active_fini(struct i915_active *ref);
				398	#else
				399	static inline void i915_active_fini(struct i915_active *ref) { }
				400	#endif
				401
				402	int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
				403	struct intel_engine_cs *engine);
				404	void i915_active_acquire_barrier(struct i915_active *ref);
				405	void i915_request_add_active_barriers(struct i915_request *rq);
				406
				407	#endif /* _I915_ACTIVE_H_ */