Blame - marvell/linux/drivers/gpu/drm/i915/i915_request.c - T108

blob: 49d498882cf628ca620bb01b3d2d40c26f790181 [file] [log] [blame]

b.liu	e958203	2025-04-17 19:18:16 +0800	[diff] [blame^]	1	/*
				2	* Copyright © 2008-2015 Intel Corporation
				3	*
				4	* Permission is hereby granted, free of charge, to any person obtaining a
				5	* copy of this software and associated documentation files (the "Software"),
				6	* to deal in the Software without restriction, including without limitation
				7	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
				8	* and/or sell copies of the Software, and to permit persons to whom the
				9	* Software is furnished to do so, subject to the following conditions:
				10	*
				11	* The above copyright notice and this permission notice (including the next
				12	* paragraph) shall be included in all copies or substantial portions of the
				13	* Software.
				14	*
				15	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				16	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				17	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
				18	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				19	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
				20	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
				21	* IN THE SOFTWARE.
				22	*
				23	*/
				24
				25	#include <linux/dma-fence-array.h>
				26	#include <linux/irq_work.h>
				27	#include <linux/prefetch.h>
				28	#include <linux/sched.h>
				29	#include <linux/sched/clock.h>
				30	#include <linux/sched/signal.h>
				31
				32	#include "gem/i915_gem_context.h"
				33	#include "gt/intel_context.h"
				34
				35	#include "i915_active.h"
				36	#include "i915_drv.h"
				37	#include "i915_globals.h"
				38	#include "i915_trace.h"
				39	#include "intel_pm.h"
				40
				41	struct execute_cb {
				42	struct list_head link;
				43	struct irq_work work;
				44	struct i915_sw_fence *fence;
				45	void (hook)(struct i915_request rq, struct dma_fence *signal);
				46	struct i915_request *signal;
				47	};
				48
				49	static struct i915_global_request {
				50	struct i915_global base;
				51	struct kmem_cache *slab_requests;
				52	struct kmem_cache *slab_dependencies;
				53	struct kmem_cache *slab_execute_cbs;
				54	} global;
				55
				56	static const char i915_fence_get_driver_name(struct dma_fence fence)
				57	{
				58	return "i915";
				59	}
				60
				61	static const char i915_fence_get_timeline_name(struct dma_fence fence)
				62	{
				63	/*
				64	* The timeline struct (as part of the ppgtt underneath a context)
				65	* may be freed when the request is no longer in use by the GPU.
				66	* We could extend the life of a context to beyond that of all
				67	* fences, possibly keeping the hw resource around indefinitely,
				68	* or we just give them a false name. Since
				69	* dma_fence_ops.get_timeline_name is a debug feature, the occasional
				70	* lie seems justifiable.
				71	*/
				72	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
				73	return "signaled";
				74
				75	return to_request(fence)->gem_context->name ?: "[i915]";
				76	}
				77
				78	static bool i915_fence_signaled(struct dma_fence *fence)
				79	{
				80	return i915_request_completed(to_request(fence));
				81	}
				82
				83	static bool i915_fence_enable_signaling(struct dma_fence *fence)
				84	{
				85	return i915_request_enable_breadcrumb(to_request(fence));
				86	}
				87
				88	static signed long i915_fence_wait(struct dma_fence *fence,
				89	bool interruptible,
				90	signed long timeout)
				91	{
				92	return i915_request_wait(to_request(fence),
				93	interruptible \| I915_WAIT_PRIORITY,
				94	timeout);
				95	}
				96
				97	static void i915_fence_release(struct dma_fence *fence)
				98	{
				99	struct i915_request *rq = to_request(fence);
				100
				101	/*
				102	* The request is put onto a RCU freelist (i.e. the address
				103	* is immediately reused), mark the fences as being freed now.
				104	* Otherwise the debugobjects for the fences are only marked as
				105	* freed when the slab cache itself is freed, and so we would get
				106	* caught trying to reuse dead objects.
				107	*/
				108	i915_sw_fence_fini(&rq->submit);
				109	i915_sw_fence_fini(&rq->semaphore);
				110
				111	kmem_cache_free(global.slab_requests, rq);
				112	}
				113
				114	const struct dma_fence_ops i915_fence_ops = {
				115	.get_driver_name = i915_fence_get_driver_name,
				116	.get_timeline_name = i915_fence_get_timeline_name,
				117	.enable_signaling = i915_fence_enable_signaling,
				118	.signaled = i915_fence_signaled,
				119	.wait = i915_fence_wait,
				120	.release = i915_fence_release,
				121	};
				122
				123	static void irq_execute_cb(struct irq_work *wrk)
				124	{
				125	struct execute_cb cb = container_of(wrk, typeof(cb), work);
				126
				127	i915_sw_fence_complete(cb->fence);
				128	kmem_cache_free(global.slab_execute_cbs, cb);
				129	}
				130
				131	static void irq_execute_cb_hook(struct irq_work *wrk)
				132	{
				133	struct execute_cb cb = container_of(wrk, typeof(cb), work);
				134
				135	cb->hook(container_of(cb->fence, struct i915_request, submit),
				136	&cb->signal->fence);
				137	i915_request_put(cb->signal);
				138
				139	irq_execute_cb(wrk);
				140	}
				141
				142	static void __notify_execute_cb(struct i915_request *rq)
				143	{
				144	struct execute_cb *cb;
				145
				146	lockdep_assert_held(&rq->lock);
				147
				148	if (list_empty(&rq->execute_cb))
				149	return;
				150
				151	list_for_each_entry(cb, &rq->execute_cb, link)
				152	irq_work_queue(&cb->work);
				153
				154	/*
				155	* XXX Rollback on __i915_request_unsubmit()
				156	*
				157	* In the future, perhaps when we have an active time-slicing scheduler,
				158	* it will be interesting to unsubmit parallel execution and remove
				159	* busywaits from the GPU until their master is restarted. This is
				160	* quite hairy, we have to carefully rollback the fence and do a
				161	* preempt-to-idle cycle on the target engine, all the while the
				162	* master execute_cb may refire.
				163	*/
				164	INIT_LIST_HEAD(&rq->execute_cb);
				165	}
				166
				167	static inline void
				168	remove_from_client(struct i915_request *request)
				169	{
				170	struct drm_i915_file_private *file_priv;
				171
				172	file_priv = READ_ONCE(request->file_priv);
				173	if (!file_priv)
				174	return;
				175
				176	spin_lock(&file_priv->mm.lock);
				177	if (request->file_priv) {
				178	list_del(&request->client_link);
				179	request->file_priv = NULL;
				180	}
				181	spin_unlock(&file_priv->mm.lock);
				182	}
				183
				184	static void free_capture_list(struct i915_request *request)
				185	{
				186	struct i915_capture_list *capture;
				187
				188	capture = request->capture_list;
				189	while (capture) {
				190	struct i915_capture_list *next = capture->next;
				191
				192	kfree(capture);
				193	capture = next;
				194	}
				195	}
				196
				197	static void remove_from_engine(struct i915_request *rq)
				198	{
				199	struct intel_engine_cs engine, locked;
				200
				201	/*
				202	* Virtual engines complicate acquiring the engine timeline lock,
				203	* as their rq->engine pointer is not stable until under that
				204	* engine lock. The simple ploy we use is to take the lock then
				205	* check that the rq still belongs to the newly locked engine.
				206	*/
				207	locked = READ_ONCE(rq->engine);
				208	spin_lock(&locked->active.lock);
				209	while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) {
				210	spin_unlock(&locked->active.lock);
				211	spin_lock(&engine->active.lock);
				212	locked = engine;
				213	}
				214	list_del(&rq->sched.link);
				215	spin_unlock(&locked->active.lock);
				216	}
				217
				218	static bool i915_request_retire(struct i915_request *rq)
				219	{
				220	struct i915_active_request active, next;
				221
				222	lockdep_assert_held(&rq->timeline->mutex);
				223	if (!i915_request_completed(rq))
				224	return false;
				225
				226	GEM_TRACE("%s fence %llx:%lld, current %d\n",
				227	rq->engine->name,
				228	rq->fence.context, rq->fence.seqno,
				229	hwsp_seqno(rq));
				230
				231	GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
				232	trace_i915_request_retire(rq);
				233
				234	/*
				235	* We know the GPU must have read the request to have
				236	* sent us the seqno + interrupt, so use the position
				237	* of tail of the request to update the last known position
				238	* of the GPU head.
				239	*
				240	* Note this requires that we are always called in request
				241	* completion order.
				242	*/
				243	GEM_BUG_ON(!list_is_first(&rq->link, &rq->timeline->requests));
				244	rq->ring->head = rq->postfix;
				245
				246	/*
				247	* Walk through the active list, calling retire on each. This allows
				248	* objects to track their GPU activity and mark themselves as idle
				249	* when their last active request is completed (updating state
				250	* tracking lists for eviction, active references for GEM, etc).
				251	*
				252	* As the ->retire() may free the node, we decouple it first and
				253	* pass along the auxiliary information (to avoid dereferencing
				254	* the node after the callback).
				255	*/
				256	list_for_each_entry_safe(active, next, &rq->active_list, link) {
				257	/*
				258	* In microbenchmarks or focusing upon time inside the kernel,
				259	* we may spend an inordinate amount of time simply handling
				260	* the retirement of requests and processing their callbacks.
				261	* Of which, this loop itself is particularly hot due to the
				262	* cache misses when jumping around the list of
				263	* i915_active_request. So we try to keep this loop as
				264	* streamlined as possible and also prefetch the next
				265	* i915_active_request to try and hide the likely cache miss.
				266	*/
				267	prefetchw(next);
				268
				269	INIT_LIST_HEAD(&active->link);
				270	RCU_INIT_POINTER(active->request, NULL);
				271
				272	active->retire(active, rq);
				273	}
				274
				275	local_irq_disable();
				276
				277	/*
				278	* We only loosely track inflight requests across preemption,
				279	* and so we may find ourselves attempting to retire a _completed_
				280	* request that we have removed from the HW and put back on a run
				281	* queue.
				282	*/
				283	remove_from_engine(rq);
				284
				285	spin_lock(&rq->lock);
				286	i915_request_mark_complete(rq);
				287	if (!i915_request_signaled(rq))
				288	dma_fence_signal_locked(&rq->fence);
				289	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags))
				290	i915_request_cancel_breadcrumb(rq);
				291	if (i915_request_has_waitboost(rq)) {
				292	GEM_BUG_ON(!atomic_read(&rq->i915->gt_pm.rps.num_waiters));
				293	atomic_dec(&rq->i915->gt_pm.rps.num_waiters);
				294	}
				295	if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) {
				296	set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
				297	__notify_execute_cb(rq);
				298	}
				299	GEM_BUG_ON(!list_empty(&rq->execute_cb));
				300	spin_unlock(&rq->lock);
				301
				302	local_irq_enable();
				303
				304	remove_from_client(rq);
				305	list_del(&rq->link);
				306
				307	intel_context_exit(rq->hw_context);
				308	intel_context_unpin(rq->hw_context);
				309
				310	free_capture_list(rq);
				311	i915_sched_node_fini(&rq->sched);
				312	i915_request_put(rq);
				313
				314	return true;
				315	}
				316
				317	void i915_request_retire_upto(struct i915_request *rq)
				318	{
				319	struct intel_timeline * const tl = rq->timeline;
				320	struct i915_request *tmp;
				321
				322	GEM_TRACE("%s fence %llx:%lld, current %d\n",
				323	rq->engine->name,
				324	rq->fence.context, rq->fence.seqno,
				325	hwsp_seqno(rq));
				326
				327	lockdep_assert_held(&tl->mutex);
				328	GEM_BUG_ON(!i915_request_completed(rq));
				329
				330	do {
				331	tmp = list_first_entry(&tl->requests, typeof(*tmp), link);
				332	} while (i915_request_retire(tmp) && tmp != rq);
				333	}
				334
				335	static int
				336	__i915_request_await_execution(struct i915_request *rq,
				337	struct i915_request *signal,
				338	void (hook)(struct i915_request rq,
				339	struct dma_fence *signal),
				340	gfp_t gfp)
				341	{
				342	struct execute_cb *cb;
				343
				344	if (i915_request_is_active(signal)) {
				345	if (hook)
				346	hook(rq, &signal->fence);
				347	return 0;
				348	}
				349
				350	cb = kmem_cache_alloc(global.slab_execute_cbs, gfp);
				351	if (!cb)
				352	return -ENOMEM;
				353
				354	cb->fence = &rq->submit;
				355	i915_sw_fence_await(cb->fence);
				356	init_irq_work(&cb->work, irq_execute_cb);
				357
				358	if (hook) {
				359	cb->hook = hook;
				360	cb->signal = i915_request_get(signal);
				361	cb->work.func = irq_execute_cb_hook;
				362	}
				363
				364	spin_lock_irq(&signal->lock);
				365	if (i915_request_is_active(signal)) {
				366	if (hook) {
				367	hook(rq, &signal->fence);
				368	i915_request_put(signal);
				369	}
				370	i915_sw_fence_complete(cb->fence);
				371	kmem_cache_free(global.slab_execute_cbs, cb);
				372	} else {
				373	list_add_tail(&cb->link, &signal->execute_cb);
				374	}
				375	spin_unlock_irq(&signal->lock);
				376
				377	return 0;
				378	}
				379
				380	bool __i915_request_submit(struct i915_request *request)
				381	{
				382	struct intel_engine_cs *engine = request->engine;
				383	bool result = false;
				384
				385	GEM_TRACE("%s fence %llx:%lld, current %d\n",
				386	engine->name,
				387	request->fence.context, request->fence.seqno,
				388	hwsp_seqno(request));
				389
				390	GEM_BUG_ON(!irqs_disabled());
				391	lockdep_assert_held(&engine->active.lock);
				392
				393	/*
				394	* With the advent of preempt-to-busy, we frequently encounter
				395	* requests that we have unsubmitted from HW, but left running
				396	* until the next ack and so have completed in the meantime. On
				397	* resubmission of that completed request, we can skip
				398	* updating the payload, and execlists can even skip submitting
				399	* the request.
				400	*
				401	* We must remove the request from the caller's priority queue,
				402	* and the caller must only call us when the request is in their
				403	* priority queue, under the active.lock. This ensures that the
				404	* request has not yet been retired and we can safely move
				405	* the request into the engine->active.list where it will be
				406	* dropped upon retiring. (Otherwise if resubmit a retired
				407	* request, this would be a horrible use-after-free.)
				408	*/
				409	if (i915_request_completed(request))
				410	goto xfer;
				411
				412	if (i915_gem_context_is_banned(request->gem_context))
				413	i915_request_skip(request, -EIO);
				414
				415	/*
				416	* Are we using semaphores when the gpu is already saturated?
				417	*
				418	* Using semaphores incurs a cost in having the GPU poll a
				419	* memory location, busywaiting for it to change. The continual
				420	* memory reads can have a noticeable impact on the rest of the
				421	* system with the extra bus traffic, stalling the cpu as it too
				422	* tries to access memory across the bus (perf stat -e bus-cycles).
				423	*
				424	* If we installed a semaphore on this request and we only submit
				425	* the request after the signaler completed, that indicates the
				426	* system is overloaded and using semaphores at this time only
				427	* increases the amount of work we are doing. If so, we disable
				428	* further use of semaphores until we are idle again, whence we
				429	* optimistically try again.
				430	*/
				431	if (request->sched.semaphores &&
				432	i915_sw_fence_signaled(&request->semaphore))
				433	engine->saturated \|= request->sched.semaphores;
				434
				435	engine->emit_fini_breadcrumb(request,
				436	request->ring->vaddr + request->postfix);
				437
				438	trace_i915_request_execute(request);
				439	engine->serial++;
				440	result = true;
				441
				442	xfer: /* We may be recursing from the signal callback of another i915 fence */
				443	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
				444
				445	if (!test_and_set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags))
				446	list_move_tail(&request->sched.link, &engine->active.requests);
				447
				448	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) &&
				449	!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags) &&
				450	!i915_request_enable_breadcrumb(request))
				451	intel_engine_queue_breadcrumbs(engine);
				452
				453	__notify_execute_cb(request);
				454
				455	spin_unlock(&request->lock);
				456
				457	return result;
				458	}
				459
				460	void i915_request_submit(struct i915_request *request)
				461	{
				462	struct intel_engine_cs *engine = request->engine;
				463	unsigned long flags;
				464
				465	/* Will be called from irq-context when using foreign fences. */
				466	spin_lock_irqsave(&engine->active.lock, flags);
				467
				468	__i915_request_submit(request);
				469
				470	spin_unlock_irqrestore(&engine->active.lock, flags);
				471	}
				472
				473	void __i915_request_unsubmit(struct i915_request *request)
				474	{
				475	struct intel_engine_cs *engine = request->engine;
				476
				477	GEM_TRACE("%s fence %llx:%lld, current %d\n",
				478	engine->name,
				479	request->fence.context, request->fence.seqno,
				480	hwsp_seqno(request));
				481
				482	GEM_BUG_ON(!irqs_disabled());
				483	lockdep_assert_held(&engine->active.lock);
				484
				485	/*
				486	* Only unwind in reverse order, required so that the per-context list
				487	* is kept in seqno/ring order.
				488	*/
				489
				490	/* We may be recursing from the signal callback of another i915 fence */
				491	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
				492
				493	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
				494	i915_request_cancel_breadcrumb(request);
				495
				496	GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags));
				497	clear_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
				498
				499	spin_unlock(&request->lock);
				500
				501	/* We've already spun, don't charge on resubmitting. */
				502	if (request->sched.semaphores && i915_request_started(request)) {
				503	request->sched.attr.priority \|= I915_PRIORITY_NOSEMAPHORE;
				504	request->sched.semaphores = 0;
				505	}
				506
				507	/*
				508	* We don't need to wake_up any waiters on request->execute, they
				509	* will get woken by any other event or us re-adding this request
				510	* to the engine timeline (__i915_request_submit()). The waiters
				511	* should be quite adapt at finding that the request now has a new
				512	* global_seqno to the one they went to sleep on.
				513	*/
				514	}
				515
				516	void i915_request_unsubmit(struct i915_request *request)
				517	{
				518	struct intel_engine_cs *engine = request->engine;
				519	unsigned long flags;
				520
				521	/* Will be called from irq-context when using foreign fences. */
				522	spin_lock_irqsave(&engine->active.lock, flags);
				523
				524	__i915_request_unsubmit(request);
				525
				526	spin_unlock_irqrestore(&engine->active.lock, flags);
				527	}
				528
				529	static int __i915_sw_fence_call
				530	submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
				531	{
				532	struct i915_request *request =
				533	container_of(fence, typeof(*request), submit);
				534
				535	switch (state) {
				536	case FENCE_COMPLETE:
				537	trace_i915_request_submit(request);
				538
				539	if (unlikely(fence->error))
				540	i915_request_skip(request, fence->error);
				541
				542	/*
				543	* We need to serialize use of the submit_request() callback
				544	* with its hotplugging performed during an emergency
				545	* i915_gem_set_wedged(). We use the RCU mechanism to mark the
				546	* critical section in order to force i915_gem_set_wedged() to
				547	* wait until the submit_request() is completed before
				548	* proceeding.
				549	*/
				550	rcu_read_lock();
				551	request->engine->submit_request(request);
				552	rcu_read_unlock();
				553	break;
				554
				555	case FENCE_FREE:
				556	i915_request_put(request);
				557	break;
				558	}
				559
				560	return NOTIFY_DONE;
				561	}
				562
				563	static void irq_semaphore_cb(struct irq_work *wrk)
				564	{
				565	struct i915_request *rq =
				566	container_of(wrk, typeof(*rq), semaphore_work);
				567
				568	i915_schedule_bump_priority(rq, I915_PRIORITY_NOSEMAPHORE);
				569	i915_request_put(rq);
				570	}
				571
				572	static int __i915_sw_fence_call
				573	semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
				574	{
				575	struct i915_request rq = container_of(fence, typeof(rq), semaphore);
				576
				577	switch (state) {
				578	case FENCE_COMPLETE:
				579	if (!(READ_ONCE(rq->sched.attr.priority) & I915_PRIORITY_NOSEMAPHORE)) {
				580	i915_request_get(rq);
				581	init_irq_work(&rq->semaphore_work, irq_semaphore_cb);
				582	irq_work_queue(&rq->semaphore_work);
				583	}
				584	break;
				585
				586	case FENCE_FREE:
				587	i915_request_put(rq);
				588	break;
				589	}
				590
				591	return NOTIFY_DONE;
				592	}
				593
				594	static void retire_requests(struct intel_timeline *tl)
				595	{
				596	struct i915_request rq, rn;
				597
				598	list_for_each_entry_safe(rq, rn, &tl->requests, link)
				599	if (!i915_request_retire(rq))
				600	break;
				601	}
				602
				603	static noinline struct i915_request *
				604	request_alloc_slow(struct intel_timeline *tl, gfp_t gfp)
				605	{
				606	struct i915_request *rq;
				607
				608	if (list_empty(&tl->requests))
				609	goto out;
				610
				611	if (!gfpflags_allow_blocking(gfp))
				612	goto out;
				613
				614	/* Move our oldest request to the slab-cache (if not in use!) */
				615	rq = list_first_entry(&tl->requests, typeof(*rq), link);
				616	i915_request_retire(rq);
				617
				618	rq = kmem_cache_alloc(global.slab_requests,
				619	gfp \| __GFP_RETRY_MAYFAIL \| __GFP_NOWARN);
				620	if (rq)
				621	return rq;
				622
				623	/* Ratelimit ourselves to prevent oom from malicious clients */
				624	rq = list_last_entry(&tl->requests, typeof(*rq), link);
				625	cond_synchronize_rcu(rq->rcustate);
				626
				627	/* Retire our old requests in the hope that we free some */
				628	retire_requests(tl);
				629
				630	out:
				631	return kmem_cache_alloc(global.slab_requests, gfp);
				632	}
				633
				634	struct i915_request *
				635	__i915_request_create(struct intel_context *ce, gfp_t gfp)
				636	{
				637	struct intel_timeline *tl = ce->timeline;
				638	struct i915_request *rq;
				639	u32 seqno;
				640	int ret;
				641
				642	might_sleep_if(gfpflags_allow_blocking(gfp));
				643
				644	/* Check that the caller provided an already pinned context */
				645	__intel_context_pin(ce);
				646
				647	/*
				648	* Beware: Dragons be flying overhead.
				649	*
				650	* We use RCU to look up requests in flight. The lookups may
				651	* race with the request being allocated from the slab freelist.
				652	* That is the request we are writing to here, may be in the process
				653	* of being read by __i915_active_request_get_rcu(). As such,
				654	* we have to be very careful when overwriting the contents. During
				655	* the RCU lookup, we change chase the request->engine pointer,
				656	* read the request->global_seqno and increment the reference count.
				657	*
				658	* The reference count is incremented atomically. If it is zero,
				659	* the lookup knows the request is unallocated and complete. Otherwise,
				660	* it is either still in use, or has been reallocated and reset
				661	* with dma_fence_init(). This increment is safe for release as we
				662	* check that the request we have a reference to and matches the active
				663	* request.
				664	*
				665	* Before we increment the refcount, we chase the request->engine
				666	* pointer. We must not call kmem_cache_zalloc() or else we set
				667	* that pointer to NULL and cause a crash during the lookup. If
				668	* we see the request is completed (based on the value of the
				669	* old engine and seqno), the lookup is complete and reports NULL.
				670	* If we decide the request is not completed (new engine or seqno),
				671	* then we grab a reference and double check that it is still the
				672	* active request - which it won't be and restart the lookup.
				673	*
				674	* Do not use kmem_cache_zalloc() here!
				675	*/
				676	rq = kmem_cache_alloc(global.slab_requests,
				677	gfp \| __GFP_RETRY_MAYFAIL \| __GFP_NOWARN);
				678	if (unlikely(!rq)) {
				679	rq = request_alloc_slow(tl, gfp);
				680	if (!rq) {
				681	ret = -ENOMEM;
				682	goto err_unreserve;
				683	}
				684	}
				685
				686	ret = intel_timeline_get_seqno(tl, rq, &seqno);
				687	if (ret)
				688	goto err_free;
				689
				690	rq->i915 = ce->engine->i915;
				691	rq->hw_context = ce;
				692	rq->gem_context = ce->gem_context;
				693	rq->engine = ce->engine;
				694	rq->ring = ce->ring;
				695	rq->timeline = tl;
				696	rq->hwsp_seqno = tl->hwsp_seqno;
				697	rq->hwsp_cacheline = tl->hwsp_cacheline;
				698	rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */
				699
				700	spin_lock_init(&rq->lock);
				701	dma_fence_init(&rq->fence, &i915_fence_ops, &rq->lock,
				702	tl->fence_context, seqno);
				703
				704	/* We bump the ref for the fence chain */
				705	i915_sw_fence_init(&i915_request_get(rq)->submit, submit_notify);
				706	i915_sw_fence_init(&i915_request_get(rq)->semaphore, semaphore_notify);
				707
				708	i915_sched_node_init(&rq->sched);
				709
				710	/* No zalloc, must clear what we need by hand */
				711	rq->file_priv = NULL;
				712	rq->batch = NULL;
				713	rq->capture_list = NULL;
				714	rq->flags = 0;
				715	rq->execution_mask = ALL_ENGINES;
				716
				717	INIT_LIST_HEAD(&rq->active_list);
				718	INIT_LIST_HEAD(&rq->execute_cb);
				719
				720	/*
				721	* Reserve space in the ring buffer for all the commands required to
				722	* eventually emit this request. This is to guarantee that the
				723	* i915_request_add() call can't fail. Note that the reserve may need
				724	* to be redone if the request is not actually submitted straight
				725	* away, e.g. because a GPU scheduler has deferred it.
				726	*
				727	* Note that due to how we add reserved_space to intel_ring_begin()
				728	* we need to double our request to ensure that if we need to wrap
				729	* around inside i915_request_add() there is sufficient space at
				730	* the beginning of the ring as well.
				731	*/
				732	rq->reserved_space =
				733	2 * rq->engine->emit_fini_breadcrumb_dw * sizeof(u32);
				734
				735	/*
				736	* Record the position of the start of the request so that
				737	* should we detect the updated seqno part-way through the
				738	* GPU processing the request, we never over-estimate the
				739	* position of the head.
				740	*/
				741	rq->head = rq->ring->emit;
				742
				743	ret = rq->engine->request_alloc(rq);
				744	if (ret)
				745	goto err_unwind;
				746
				747	rq->infix = rq->ring->emit; /* end of header; start of user payload */
				748
				749	intel_context_mark_active(ce);
				750	return rq;
				751
				752	err_unwind:
				753	ce->ring->emit = rq->head;
				754
				755	/* Make sure we didn't add ourselves to external state before freeing */
				756	GEM_BUG_ON(!list_empty(&rq->active_list));
				757	GEM_BUG_ON(!list_empty(&rq->sched.signalers_list));
				758	GEM_BUG_ON(!list_empty(&rq->sched.waiters_list));
				759
				760	err_free:
				761	kmem_cache_free(global.slab_requests, rq);
				762	err_unreserve:
				763	intel_context_unpin(ce);
				764	return ERR_PTR(ret);
				765	}
				766
				767	struct i915_request *
				768	i915_request_create(struct intel_context *ce)
				769	{
				770	struct i915_request *rq;
				771	struct intel_timeline *tl;
				772
				773	tl = intel_context_timeline_lock(ce);
				774	if (IS_ERR(tl))
				775	return ERR_CAST(tl);
				776
				777	/* Move our oldest request to the slab-cache (if not in use!) */
				778	rq = list_first_entry(&tl->requests, typeof(*rq), link);
				779	if (!list_is_last(&rq->link, &tl->requests))
				780	i915_request_retire(rq);
				781
				782	intel_context_enter(ce);
				783	rq = __i915_request_create(ce, GFP_KERNEL);
				784	intel_context_exit(ce); /* active reference transferred to request */
				785	if (IS_ERR(rq))
				786	goto err_unlock;
				787
				788	/* Check that we do not interrupt ourselves with a new request */
				789	rq->cookie = lockdep_pin_lock(&tl->mutex);
				790
				791	return rq;
				792
				793	err_unlock:
				794	intel_context_timeline_unlock(tl);
				795	return rq;
				796	}
				797
				798	static int
				799	i915_request_await_start(struct i915_request rq, struct i915_request signal)
				800	{
				801	if (list_is_first(&signal->link, &signal->timeline->requests))
				802	return 0;
				803
				804	signal = list_prev_entry(signal, link);
				805	if (intel_timeline_sync_is_later(rq->timeline, &signal->fence))
				806	return 0;
				807
				808	return i915_sw_fence_await_dma_fence(&rq->submit,
				809	&signal->fence, 0,
				810	I915_FENCE_GFP);
				811	}
				812
				813	static intel_engine_mask_t
				814	already_busywaiting(struct i915_request *rq)
				815	{
				816	/*
				817	* Polling a semaphore causes bus traffic, delaying other users of
				818	* both the GPU and CPU. We want to limit the impact on others,
				819	* while taking advantage of early submission to reduce GPU
				820	* latency. Therefore we restrict ourselves to not using more
				821	* than one semaphore from each source, and not using a semaphore
				822	* if we have detected the engine is saturated (i.e. would not be
				823	* submitted early and cause bus traffic reading an already passed
				824	* semaphore).
				825	*
				826	* See the are-we-too-late? check in __i915_request_submit().
				827	*/
				828	return rq->sched.semaphores \| rq->engine->saturated;
				829	}
				830
				831	static int
				832	emit_semaphore_wait(struct i915_request *to,
				833	struct i915_request *from,
				834	gfp_t gfp)
				835	{
				836	u32 hwsp_offset;
				837	u32 *cs;
				838	int err;
				839
				840	GEM_BUG_ON(!from->timeline->has_initial_breadcrumb);
				841	GEM_BUG_ON(INTEL_GEN(to->i915) < 8);
				842
				843	/* Just emit the first semaphore we see as request space is limited. */
				844	if (already_busywaiting(to) & from->engine->mask)
				845	return i915_sw_fence_await_dma_fence(&to->submit,
				846	&from->fence, 0,
				847	I915_FENCE_GFP);
				848
				849	err = i915_request_await_start(to, from);
				850	if (err < 0)
				851	return err;
				852
				853	/* Only submit our spinner after the signaler is running! */
				854	err = __i915_request_await_execution(to, from, NULL, gfp);
				855	if (err)
				856	return err;
				857
				858	/* We need to pin the signaler's HWSP until we are finished reading. */
				859	err = intel_timeline_read_hwsp(from, to, &hwsp_offset);
				860	if (err)
				861	return err;
				862
				863	cs = intel_ring_begin(to, 4);
				864	if (IS_ERR(cs))
				865	return PTR_ERR(cs);
				866
				867	/*
				868	* Using greater-than-or-equal here means we have to worry
				869	* about seqno wraparound. To side step that issue, we swap
				870	* the timeline HWSP upon wrapping, so that everyone listening
				871	* for the old (pre-wrap) values do not see the much smaller
				872	* (post-wrap) values than they were expecting (and so wait
				873	* forever).
				874	*/
				875	*cs++ = MI_SEMAPHORE_WAIT \|
				876	MI_SEMAPHORE_GLOBAL_GTT \|
				877	MI_SEMAPHORE_POLL \|
				878	MI_SEMAPHORE_SAD_GTE_SDD;
				879	*cs++ = from->fence.seqno;
				880	*cs++ = hwsp_offset;
				881	*cs++ = 0;
				882
				883	intel_ring_advance(to, cs);
				884	to->sched.semaphores \|= from->engine->mask;
				885	to->sched.flags \|= I915_SCHED_HAS_SEMAPHORE_CHAIN;
				886	return 0;
				887	}
				888
				889	static int
				890	i915_request_await_request(struct i915_request to, struct i915_request from)
				891	{
				892	int ret;
				893
				894	GEM_BUG_ON(to == from);
				895	GEM_BUG_ON(to->timeline == from->timeline);
				896
				897	if (i915_request_completed(from)) {
				898	i915_sw_fence_set_error_once(&to->submit, from->fence.error);
				899	return 0;
				900	}
				901
				902	if (to->engine->schedule) {
				903	ret = i915_sched_node_add_dependency(&to->sched, &from->sched);
				904	if (ret < 0)
				905	return ret;
				906	}
				907
				908	if (to->engine == from->engine) {
				909	ret = i915_sw_fence_await_sw_fence_gfp(&to->submit,
				910	&from->submit,
				911	I915_FENCE_GFP);
				912	} else if (intel_engine_has_semaphores(to->engine) &&
				913	to->gem_context->sched.priority >= I915_PRIORITY_NORMAL) {
				914	ret = emit_semaphore_wait(to, from, I915_FENCE_GFP);
				915	} else {
				916	ret = i915_sw_fence_await_dma_fence(&to->submit,
				917	&from->fence, 0,
				918	I915_FENCE_GFP);
				919	}
				920	if (ret < 0)
				921	return ret;
				922
				923	if (to->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN) {
				924	ret = i915_sw_fence_await_dma_fence(&to->semaphore,
				925	&from->fence, 0,
				926	I915_FENCE_GFP);
				927	if (ret < 0)
				928	return ret;
				929	}
				930
				931	return 0;
				932	}
				933
				934	int
				935	i915_request_await_dma_fence(struct i915_request rq, struct dma_fence fence)
				936	{
				937	struct dma_fence **child = &fence;
				938	unsigned int nchild = 1;
				939	int ret;
				940
				941	/*
				942	* Note that if the fence-array was created in signal-on-any mode,
				943	* we should not decompose it into its individual fences. However,
				944	* we don't currently store which mode the fence-array is operating
				945	* in. Fortunately, the only user of signal-on-any is private to
				946	* amdgpu and we should not see any incoming fence-array from
				947	* sync-file being in signal-on-any mode.
				948	*/
				949	if (dma_fence_is_array(fence)) {
				950	struct dma_fence_array *array = to_dma_fence_array(fence);
				951
				952	child = array->fences;
				953	nchild = array->num_fences;
				954	GEM_BUG_ON(!nchild);
				955	}
				956
				957	do {
				958	fence = *child++;
				959	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
				960	continue;
				961
				962	/*
				963	* Requests on the same timeline are explicitly ordered, along
				964	* with their dependencies, by i915_request_add() which ensures
				965	* that requests are submitted in-order through each ring.
				966	*/
				967	if (fence->context == rq->fence.context)
				968	continue;
				969
				970	/* Squash repeated waits to the same timelines */
				971	if (fence->context &&
				972	intel_timeline_sync_is_later(rq->timeline, fence))
				973	continue;
				974
				975	if (dma_fence_is_i915(fence))
				976	ret = i915_request_await_request(rq, to_request(fence));
				977	else
				978	ret = i915_sw_fence_await_dma_fence(&rq->submit, fence,
				979	I915_FENCE_TIMEOUT,
				980	I915_FENCE_GFP);
				981	if (ret < 0)
				982	return ret;
				983
				984	/* Record the latest fence used against each timeline */
				985	if (fence->context)
				986	intel_timeline_sync_set(rq->timeline, fence);
				987	} while (--nchild);
				988
				989	return 0;
				990	}
				991
				992	int
				993	i915_request_await_execution(struct i915_request *rq,
				994	struct dma_fence *fence,
				995	void (hook)(struct i915_request rq,
				996	struct dma_fence *signal))
				997	{
				998	struct dma_fence **child = &fence;
				999	unsigned int nchild = 1;
				1000	int ret;
				1001
				1002	if (dma_fence_is_array(fence)) {
				1003	struct dma_fence_array *array = to_dma_fence_array(fence);
				1004
				1005	/* XXX Error for signal-on-any fence arrays */
				1006
				1007	child = array->fences;
				1008	nchild = array->num_fences;
				1009	GEM_BUG_ON(!nchild);
				1010	}
				1011
				1012	do {
				1013	fence = *child++;
				1014	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
				1015	continue;
				1016
				1017	/*
				1018	* We don't squash repeated fence dependencies here as we
				1019	* want to run our callback in all cases.
				1020	*/
				1021
				1022	if (dma_fence_is_i915(fence))
				1023	ret = __i915_request_await_execution(rq,
				1024	to_request(fence),
				1025	hook,
				1026	I915_FENCE_GFP);
				1027	else
				1028	ret = i915_sw_fence_await_dma_fence(&rq->submit, fence,
				1029	I915_FENCE_TIMEOUT,
				1030	GFP_KERNEL);
				1031	if (ret < 0)
				1032	return ret;
				1033	} while (--nchild);
				1034
				1035	return 0;
				1036	}
				1037
				1038	/**
				1039	* i915_request_await_object - set this request to (async) wait upon a bo
				1040	* @to: request we are wishing to use
				1041	* @obj: object which may be in use on another ring.
				1042	* @write: whether the wait is on behalf of a writer
				1043	*
				1044	* This code is meant to abstract object synchronization with the GPU.
				1045	* Conceptually we serialise writes between engines inside the GPU.
				1046	* We only allow one engine to write into a buffer at any time, but
				1047	* multiple readers. To ensure each has a coherent view of memory, we must:
				1048	*
				1049	* - If there is an outstanding write request to the object, the new
				1050	* request must wait for it to complete (either CPU or in hw, requests
				1051	* on the same ring will be naturally ordered).
				1052	*
				1053	* - If we are a write request (pending_write_domain is set), the new
				1054	* request must wait for outstanding read requests to complete.
				1055	*
				1056	* Returns 0 if successful, else propagates up the lower layer error.
				1057	*/
				1058	int
				1059	i915_request_await_object(struct i915_request *to,
				1060	struct drm_i915_gem_object *obj,
				1061	bool write)
				1062	{
				1063	struct dma_fence *excl;
				1064	int ret = 0;
				1065
				1066	if (write) {
				1067	struct dma_fence **shared;
				1068	unsigned int count, i;
				1069
				1070	ret = dma_resv_get_fences_rcu(obj->base.resv,
				1071	&excl, &count, &shared);
				1072	if (ret)
				1073	return ret;
				1074
				1075	for (i = 0; i < count; i++) {
				1076	ret = i915_request_await_dma_fence(to, shared[i]);
				1077	if (ret)
				1078	break;
				1079
				1080	dma_fence_put(shared[i]);
				1081	}
				1082
				1083	for (; i < count; i++)
				1084	dma_fence_put(shared[i]);
				1085	kfree(shared);
				1086	} else {
				1087	excl = dma_resv_get_excl_rcu(obj->base.resv);
				1088	}
				1089
				1090	if (excl) {
				1091	if (ret == 0)
				1092	ret = i915_request_await_dma_fence(to, excl);
				1093
				1094	dma_fence_put(excl);
				1095	}
				1096
				1097	return ret;
				1098	}
				1099
				1100	void i915_request_skip(struct i915_request *rq, int error)
				1101	{
				1102	void *vaddr = rq->ring->vaddr;
				1103	u32 head;
				1104
				1105	GEM_BUG_ON(!IS_ERR_VALUE((long)error));
				1106	dma_fence_set_error(&rq->fence, error);
				1107
				1108	if (rq->infix == rq->postfix)
				1109	return;
				1110
				1111	/*
				1112	* As this request likely depends on state from the lost
				1113	* context, clear out all the user operations leaving the
				1114	* breadcrumb at the end (so we get the fence notifications).
				1115	*/
				1116	head = rq->infix;
				1117	if (rq->postfix < head) {
				1118	memset(vaddr + head, 0, rq->ring->size - head);
				1119	head = 0;
				1120	}
				1121	memset(vaddr + head, 0, rq->postfix - head);
				1122	rq->infix = rq->postfix;
				1123	}
				1124
				1125	static struct i915_request *
				1126	__i915_request_add_to_timeline(struct i915_request *rq)
				1127	{
				1128	struct intel_timeline *timeline = rq->timeline;
				1129	struct i915_request *prev;
				1130
				1131	/*
				1132	* Dependency tracking and request ordering along the timeline
				1133	* is special cased so that we can eliminate redundant ordering
				1134	* operations while building the request (we know that the timeline
				1135	* itself is ordered, and here we guarantee it).
				1136	*
				1137	* As we know we will need to emit tracking along the timeline,
				1138	* we embed the hooks into our request struct -- at the cost of
				1139	* having to have specialised no-allocation interfaces (which will
				1140	* be beneficial elsewhere).
				1141	*
				1142	* A second benefit to open-coding i915_request_await_request is
				1143	* that we can apply a slight variant of the rules specialised
				1144	* for timelines that jump between engines (such as virtual engines).
				1145	* If we consider the case of virtual engine, we must emit a dma-fence
				1146	* to prevent scheduling of the second request until the first is
				1147	* complete (to maximise our greedy late load balancing) and this
				1148	* precludes optimising to use semaphores serialisation of a single
				1149	* timeline across engines.
				1150	*/
				1151	prev = rcu_dereference_protected(timeline->last_request.request,
				1152	lockdep_is_held(&timeline->mutex));
				1153	if (prev && !i915_request_completed(prev)) {
				1154	if (is_power_of_2(prev->engine->mask \| rq->engine->mask))
				1155	i915_sw_fence_await_sw_fence(&rq->submit,
				1156	&prev->submit,
				1157	&rq->submitq);
				1158	else
				1159	__i915_sw_fence_await_dma_fence(&rq->submit,
				1160	&prev->fence,
				1161	&rq->dmaq);
				1162	if (rq->engine->schedule)
				1163	__i915_sched_node_add_dependency(&rq->sched,
				1164	&prev->sched,
				1165	&rq->dep,
				1166	0);
				1167	}
				1168
				1169	list_add_tail(&rq->link, &timeline->requests);
				1170
				1171	/*
				1172	* Make sure that no request gazumped us - if it was allocated after
				1173	* our i915_request_alloc() and called __i915_request_add() before
				1174	* us, the timeline will hold its seqno which is later than ours.
				1175	*/
				1176	GEM_BUG_ON(timeline->seqno != rq->fence.seqno);
				1177	__i915_active_request_set(&timeline->last_request, rq);
				1178
				1179	return prev;
				1180	}
				1181
				1182	/*
				1183	* NB: This function is not allowed to fail. Doing so would mean the the
				1184	* request is not being tracked for completion but the work itself is
				1185	* going to happen on the hardware. This would be a Bad Thing(tm).
				1186	*/
				1187	struct i915_request __i915_request_commit(struct i915_request rq)
				1188	{
				1189	struct intel_engine_cs *engine = rq->engine;
				1190	struct intel_ring *ring = rq->ring;
				1191	u32 *cs;
				1192
				1193	GEM_TRACE("%s fence %llx:%lld\n",
				1194	engine->name, rq->fence.context, rq->fence.seqno);
				1195
				1196	/*
				1197	* To ensure that this call will not fail, space for its emissions
				1198	* should already have been reserved in the ring buffer. Let the ring
				1199	* know that it is time to use that space up.
				1200	*/
				1201	GEM_BUG_ON(rq->reserved_space > ring->space);
				1202	rq->reserved_space = 0;
				1203	rq->emitted_jiffies = jiffies;
				1204
				1205	/*
				1206	* Record the position of the start of the breadcrumb so that
				1207	* should we detect the updated seqno part-way through the
				1208	* GPU processing the request, we never over-estimate the
				1209	* position of the ring's HEAD.
				1210	*/
				1211	cs = intel_ring_begin(rq, engine->emit_fini_breadcrumb_dw);
				1212	GEM_BUG_ON(IS_ERR(cs));
				1213	rq->postfix = intel_ring_offset(rq, cs);
				1214
				1215	return __i915_request_add_to_timeline(rq);
				1216	}
				1217
				1218	void __i915_request_queue(struct i915_request *rq,
				1219	const struct i915_sched_attr *attr)
				1220	{
				1221	/*
				1222	* Let the backend know a new request has arrived that may need
				1223	* to adjust the existing execution schedule due to a high priority
				1224	* request - i.e. we may want to preempt the current request in order
				1225	* to run a high priority dependency chain before we can execute this
				1226	* request.
				1227	*
				1228	* This is called before the request is ready to run so that we can
				1229	* decide whether to preempt the entire chain so that it is ready to
				1230	* run at the earliest possible convenience.
				1231	*/
				1232	if (attr && rq->engine->schedule)
				1233	rq->engine->schedule(rq, attr);
				1234	i915_sw_fence_commit(&rq->semaphore);
				1235	i915_sw_fence_commit(&rq->submit);
				1236	}
				1237
				1238	void i915_request_add(struct i915_request *rq)
				1239	{
				1240	struct i915_sched_attr attr = rq->gem_context->sched;
				1241	struct intel_timeline * const tl = rq->timeline;
				1242	struct i915_request *prev;
				1243
				1244	lockdep_assert_held(&tl->mutex);
				1245	lockdep_unpin_lock(&tl->mutex, rq->cookie);
				1246
				1247	trace_i915_request_add(rq);
				1248
				1249	prev = __i915_request_commit(rq);
				1250
				1251	/*
				1252	* Boost actual workloads past semaphores!
				1253	*
				1254	* With semaphores we spin on one engine waiting for another,
				1255	* simply to reduce the latency of starting our work when
				1256	* the signaler completes. However, if there is any other
				1257	* work that we could be doing on this engine instead, that
				1258	* is better utilisation and will reduce the overall duration
				1259	* of the current work. To avoid PI boosting a semaphore
				1260	* far in the distance past over useful work, we keep a history
				1261	* of any semaphore use along our dependency chain.
				1262	*/
				1263	if (!(rq->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN))
				1264	attr.priority \|= I915_PRIORITY_NOSEMAPHORE;
				1265
				1266	/*
				1267	* Boost priorities to new clients (new request flows).
				1268	*
				1269	* Allow interactive/synchronous clients to jump ahead of
				1270	* the bulk clients. (FQ_CODEL)
				1271	*/
				1272	if (list_empty(&rq->sched.signalers_list))
				1273	attr.priority \|= I915_PRIORITY_WAIT;
				1274
				1275	local_bh_disable();
				1276	__i915_request_queue(rq, &attr);
				1277	local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
				1278
				1279	/*
				1280	* In typical scenarios, we do not expect the previous request on
				1281	* the timeline to be still tracked by timeline->last_request if it
				1282	* has been completed. If the completed request is still here, that
				1283	* implies that request retirement is a long way behind submission,
				1284	* suggesting that we haven't been retiring frequently enough from
				1285	* the combination of retire-before-alloc, waiters and the background
				1286	* retirement worker. So if the last request on this timeline was
				1287	* already completed, do a catch up pass, flushing the retirement queue
				1288	* up to this client. Since we have now moved the heaviest operations
				1289	* during retirement onto secondary workers, such as freeing objects
				1290	* or contexts, retiring a bunch of requests is mostly list management
				1291	* (and cache misses), and so we should not be overly penalizing this
				1292	* client by performing excess work, though we may still performing
				1293	* work on behalf of others -- but instead we should benefit from
				1294	* improved resource management. (Well, that's the theory at least.)
				1295	*/
				1296	if (prev && i915_request_completed(prev) && prev->timeline == tl)
				1297	i915_request_retire_upto(prev);
				1298
				1299	mutex_unlock(&tl->mutex);
				1300	}
				1301
				1302	static unsigned long local_clock_us(unsigned int *cpu)
				1303	{
				1304	unsigned long t;
				1305
				1306	/*
				1307	* Cheaply and approximately convert from nanoseconds to microseconds.
				1308	* The result and subsequent calculations are also defined in the same
				1309	* approximate microseconds units. The principal source of timing
				1310	* error here is from the simple truncation.
				1311	*
				1312	* Note that local_clock() is only defined wrt to the current CPU;
				1313	* the comparisons are no longer valid if we switch CPUs. Instead of
				1314	* blocking preemption for the entire busywait, we can detect the CPU
				1315	* switch and use that as indicator of system load and a reason to
				1316	* stop busywaiting, see busywait_stop().
				1317	*/
				1318	*cpu = get_cpu();
				1319	t = local_clock() >> 10;
				1320	put_cpu();
				1321
				1322	return t;
				1323	}
				1324
				1325	static bool busywait_stop(unsigned long timeout, unsigned int cpu)
				1326	{
				1327	unsigned int this_cpu;
				1328
				1329	if (time_after(local_clock_us(&this_cpu), timeout))
				1330	return true;
				1331
				1332	return this_cpu != cpu;
				1333	}
				1334
				1335	static bool __i915_spin_request(const struct i915_request * const rq,
				1336	int state, unsigned long timeout_us)
				1337	{
				1338	unsigned int cpu;
				1339
				1340	/*
				1341	* Only wait for the request if we know it is likely to complete.
				1342	*
				1343	* We don't track the timestamps around requests, nor the average
				1344	* request length, so we do not have a good indicator that this
				1345	* request will complete within the timeout. What we do know is the
				1346	* order in which requests are executed by the context and so we can
				1347	* tell if the request has been started. If the request is not even
				1348	* running yet, it is a fair assumption that it will not complete
				1349	* within our relatively short timeout.
				1350	*/
				1351	if (!i915_request_is_running(rq))
				1352	return false;
				1353
				1354	/*
				1355	* When waiting for high frequency requests, e.g. during synchronous
				1356	* rendering split between the CPU and GPU, the finite amount of time
				1357	* required to set up the irq and wait upon it limits the response
				1358	* rate. By busywaiting on the request completion for a short while we
				1359	* can service the high frequency waits as quick as possible. However,
				1360	* if it is a slow request, we want to sleep as quickly as possible.
				1361	* The tradeoff between waiting and sleeping is roughly the time it
				1362	* takes to sleep on a request, on the order of a microsecond.
				1363	*/
				1364
				1365	timeout_us += local_clock_us(&cpu);
				1366	do {
				1367	if (i915_request_completed(rq))
				1368	return true;
				1369
				1370	if (signal_pending_state(state, current))
				1371	break;
				1372
				1373	if (busywait_stop(timeout_us, cpu))
				1374	break;
				1375
				1376	cpu_relax();
				1377	} while (!need_resched());
				1378
				1379	return false;
				1380	}
				1381
				1382	struct request_wait {
				1383	struct dma_fence_cb cb;
				1384	struct task_struct *tsk;
				1385	};
				1386
				1387	static void request_wait_wake(struct dma_fence fence, struct dma_fence_cb cb)
				1388	{
				1389	struct request_wait wait = container_of(cb, typeof(wait), cb);
				1390
				1391	wake_up_process(wait->tsk);
				1392	}
				1393
				1394	/**
				1395	* i915_request_wait - wait until execution of request has finished
				1396	* @rq: the request to wait upon
				1397	* @flags: how to wait
				1398	* @timeout: how long to wait in jiffies
				1399	*
				1400	* i915_request_wait() waits for the request to be completed, for a
				1401	* maximum of @timeout jiffies (with MAX_SCHEDULE_TIMEOUT implying an
				1402	* unbounded wait).
				1403	*
				1404	* Returns the remaining time (in jiffies) if the request completed, which may
				1405	* be zero or -ETIME if the request is unfinished after the timeout expires.
				1406	* May return -EINTR is called with I915_WAIT_INTERRUPTIBLE and a signal is
				1407	* pending before the request completes.
				1408	*/
				1409	long i915_request_wait(struct i915_request *rq,
				1410	unsigned int flags,
				1411	long timeout)
				1412	{
				1413	const int state = flags & I915_WAIT_INTERRUPTIBLE ?
				1414	TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
				1415	struct request_wait wait;
				1416
				1417	might_sleep();
				1418	GEM_BUG_ON(timeout < 0);
				1419
				1420	if (dma_fence_is_signaled(&rq->fence))
				1421	return timeout;
				1422
				1423	if (!timeout)
				1424	return -ETIME;
				1425
				1426	trace_i915_request_wait_begin(rq, flags);
				1427
				1428	/*
				1429	* We must never wait on the GPU while holding a lock as we
				1430	* may need to perform a GPU reset. So while we don't need to
				1431	* serialise wait/reset with an explicit lock, we do want
				1432	* lockdep to detect potential dependency cycles.
				1433	*/
				1434	mutex_acquire(&rq->engine->gt->reset.mutex.dep_map, 0, 0, _THIS_IP_);
				1435
				1436	/*
				1437	* Optimistic spin before touching IRQs.
				1438	*
				1439	* We may use a rather large value here to offset the penalty of
				1440	* switching away from the active task. Frequently, the client will
				1441	* wait upon an old swapbuffer to throttle itself to remain within a
				1442	* frame of the gpu. If the client is running in lockstep with the gpu,
				1443	* then it should not be waiting long at all, and a sleep now will incur
				1444	* extra scheduler latency in producing the next frame. To try to
				1445	* avoid adding the cost of enabling/disabling the interrupt to the
				1446	* short wait, we first spin to see if the request would have completed
				1447	* in the time taken to setup the interrupt.
				1448	*
				1449	* We need upto 5us to enable the irq, and upto 20us to hide the
				1450	* scheduler latency of a context switch, ignoring the secondary
				1451	* impacts from a context switch such as cache eviction.
				1452	*
				1453	* The scheme used for low-latency IO is called "hybrid interrupt
				1454	* polling". The suggestion there is to sleep until just before you
				1455	* expect to be woken by the device interrupt and then poll for its
				1456	* completion. That requires having a good predictor for the request
				1457	* duration, which we currently lack.
				1458	*/
				1459	if (CONFIG_DRM_I915_SPIN_REQUEST &&
				1460	__i915_spin_request(rq, state, CONFIG_DRM_I915_SPIN_REQUEST)) {
				1461	dma_fence_signal(&rq->fence);
				1462	goto out;
				1463	}
				1464
				1465	/*
				1466	* This client is about to stall waiting for the GPU. In many cases
				1467	* this is undesirable and limits the throughput of the system, as
				1468	* many clients cannot continue processing user input/output whilst
				1469	* blocked. RPS autotuning may take tens of milliseconds to respond
				1470	* to the GPU load and thus incurs additional latency for the client.
				1471	* We can circumvent that by promoting the GPU frequency to maximum
				1472	* before we sleep. This makes the GPU throttle up much more quickly
				1473	* (good for benchmarks and user experience, e.g. window animations),
				1474	* but at a cost of spending more power processing the workload
				1475	* (bad for battery).
				1476	*/
				1477	if (flags & I915_WAIT_PRIORITY) {
				1478	if (!i915_request_started(rq) && INTEL_GEN(rq->i915) >= 6)
				1479	gen6_rps_boost(rq);
				1480	i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT);
				1481	}
				1482
				1483	wait.tsk = current;
				1484	if (dma_fence_add_callback(&rq->fence, &wait.cb, request_wait_wake))
				1485	goto out;
				1486
				1487	for (;;) {
				1488	set_current_state(state);
				1489
				1490	if (i915_request_completed(rq)) {
				1491	dma_fence_signal(&rq->fence);
				1492	break;
				1493	}
				1494
				1495	if (signal_pending_state(state, current)) {
				1496	timeout = -ERESTARTSYS;
				1497	break;
				1498	}
				1499
				1500	if (!timeout) {
				1501	timeout = -ETIME;
				1502	break;
				1503	}
				1504
				1505	timeout = io_schedule_timeout(timeout);
				1506	}
				1507	__set_current_state(TASK_RUNNING);
				1508
				1509	dma_fence_remove_callback(&rq->fence, &wait.cb);
				1510
				1511	out:
				1512	mutex_release(&rq->engine->gt->reset.mutex.dep_map, 0, _THIS_IP_);
				1513	trace_i915_request_wait_end(rq);
				1514	return timeout;
				1515	}
				1516
				1517	bool i915_retire_requests(struct drm_i915_private *i915)
				1518	{
				1519	struct intel_gt_timelines *timelines = &i915->gt.timelines;
				1520	struct intel_timeline tl, tn;
				1521	unsigned long flags;
				1522	LIST_HEAD(free);
				1523
				1524	spin_lock_irqsave(&timelines->lock, flags);
				1525	list_for_each_entry_safe(tl, tn, &timelines->active_list, link) {
				1526	if (!mutex_trylock(&tl->mutex))
				1527	continue;
				1528
				1529	intel_timeline_get(tl);
				1530	GEM_BUG_ON(!tl->active_count);
				1531	tl->active_count++; /* pin the list element */
				1532	spin_unlock_irqrestore(&timelines->lock, flags);
				1533
				1534	retire_requests(tl);
				1535
				1536	spin_lock_irqsave(&timelines->lock, flags);
				1537
				1538	/* Resume iteration after dropping lock */
				1539	list_safe_reset_next(tl, tn, link);
				1540	if (!--tl->active_count)
				1541	list_del(&tl->link);
				1542
				1543	mutex_unlock(&tl->mutex);
				1544
				1545	/* Defer the final release to after the spinlock */
				1546	if (refcount_dec_and_test(&tl->kref.refcount)) {
				1547	GEM_BUG_ON(tl->active_count);
				1548	list_add(&tl->link, &free);
				1549	}
				1550	}
				1551	spin_unlock_irqrestore(&timelines->lock, flags);
				1552
				1553	list_for_each_entry_safe(tl, tn, &free, link)
				1554	__intel_timeline_free(&tl->kref);
				1555
				1556	return !list_empty(&timelines->active_list);
				1557	}
				1558
				1559	#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
				1560	#include "selftests/mock_request.c"
				1561	#include "selftests/i915_request.c"
				1562	#endif
				1563
				1564	static void i915_global_request_shrink(void)
				1565	{
				1566	kmem_cache_shrink(global.slab_dependencies);
				1567	kmem_cache_shrink(global.slab_execute_cbs);
				1568	kmem_cache_shrink(global.slab_requests);
				1569	}
				1570
				1571	static void i915_global_request_exit(void)
				1572	{
				1573	kmem_cache_destroy(global.slab_dependencies);
				1574	kmem_cache_destroy(global.slab_execute_cbs);
				1575	kmem_cache_destroy(global.slab_requests);
				1576	}
				1577
				1578	static struct i915_global_request global = { {
				1579	.shrink = i915_global_request_shrink,
				1580	.exit = i915_global_request_exit,
				1581	} };
				1582
				1583	int __init i915_global_request_init(void)
				1584	{
				1585	global.slab_requests = KMEM_CACHE(i915_request,
				1586	SLAB_HWCACHE_ALIGN \|
				1587	SLAB_RECLAIM_ACCOUNT \|
				1588	SLAB_TYPESAFE_BY_RCU);
				1589	if (!global.slab_requests)
				1590	return -ENOMEM;
				1591
				1592	global.slab_execute_cbs = KMEM_CACHE(execute_cb,
				1593	SLAB_HWCACHE_ALIGN \|
				1594	SLAB_RECLAIM_ACCOUNT \|
				1595	SLAB_TYPESAFE_BY_RCU);
				1596	if (!global.slab_execute_cbs)
				1597	goto err_requests;
				1598
				1599	global.slab_dependencies = KMEM_CACHE(i915_dependency,
				1600	SLAB_HWCACHE_ALIGN \|
				1601	SLAB_RECLAIM_ACCOUNT);
				1602	if (!global.slab_dependencies)
				1603	goto err_execute_cbs;
				1604
				1605	i915_global_register(&global.base);
				1606	return 0;
				1607
				1608	err_execute_cbs:
				1609	kmem_cache_destroy(global.slab_execute_cbs);
				1610	err_requests:
				1611	kmem_cache_destroy(global.slab_requests);
				1612	return -ENOMEM;
				1613	}