i915_request.c 42.2 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
/*
 * Copyright © 2008-2015 Intel Corporation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 *
 */

25
#include <linux/prefetch.h>
26
#include <linux/dma-fence-array.h>
27 28
#include <linux/sched.h>
#include <linux/sched/clock.h>
29
#include <linux/sched/signal.h>
30

31 32
#include "i915_drv.h"

33
static const char *i915_fence_get_driver_name(struct dma_fence *fence)
34 35 36 37
{
	return "i915";
}

38
static const char *i915_fence_get_timeline_name(struct dma_fence *fence)
39
{
40 41
	/*
	 * The timeline struct (as part of the ppgtt underneath a context)
42 43 44 45 46 47 48 49 50 51
	 * may be freed when the request is no longer in use by the GPU.
	 * We could extend the life of a context to beyond that of all
	 * fences, possibly keeping the hw resource around indefinitely,
	 * or we just give them a false name. Since
	 * dma_fence_ops.get_timeline_name is a debug feature, the occasional
	 * lie seems justifiable.
	 */
	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
		return "signaled";

52
	return to_request(fence)->timeline->name;
53 54
}

55
static bool i915_fence_signaled(struct dma_fence *fence)
56
{
57
	return i915_request_completed(to_request(fence));
58 59
}

60
static bool i915_fence_enable_signaling(struct dma_fence *fence)
61
{
62
	return intel_engine_enable_signaling(to_request(fence), true);
63 64
}

65
static signed long i915_fence_wait(struct dma_fence *fence,
66
				   bool interruptible,
67
				   signed long timeout)
68
{
69
	return i915_request_wait(to_request(fence), interruptible, timeout);
70 71
}

72
static void i915_fence_release(struct dma_fence *fence)
73
{
74
	struct i915_request *rq = to_request(fence);
75

76 77
	/*
	 * The request is put onto a RCU freelist (i.e. the address
78 79 80 81 82
	 * is immediately reused), mark the fences as being freed now.
	 * Otherwise the debugobjects for the fences are only marked as
	 * freed when the slab cache itself is freed, and so we would get
	 * caught trying to reuse dead objects.
	 */
83
	i915_sw_fence_fini(&rq->submit);
84

85
	kmem_cache_free(rq->i915->requests, rq);
86 87
}

88
const struct dma_fence_ops i915_fence_ops = {
89 90 91 92 93 94 95 96
	.get_driver_name = i915_fence_get_driver_name,
	.get_timeline_name = i915_fence_get_timeline_name,
	.enable_signaling = i915_fence_enable_signaling,
	.signaled = i915_fence_signaled,
	.wait = i915_fence_wait,
	.release = i915_fence_release,
};

97
static inline void
98
i915_request_remove_from_client(struct i915_request *request)
99
{
100
	struct drm_i915_file_private *file_priv;
101

102
	file_priv = request->file_priv;
103 104 105 106
	if (!file_priv)
		return;

	spin_lock(&file_priv->mm.lock);
107 108 109 110
	if (request->file_priv) {
		list_del(&request->client_link);
		request->file_priv = NULL;
	}
111 112 113
	spin_unlock(&file_priv->mm.lock);
}

114 115 116 117 118 119 120 121 122 123 124 125 126 127
static struct i915_dependency *
i915_dependency_alloc(struct drm_i915_private *i915)
{
	return kmem_cache_alloc(i915->dependencies, GFP_KERNEL);
}

static void
i915_dependency_free(struct drm_i915_private *i915,
		     struct i915_dependency *dep)
{
	kmem_cache_free(i915->dependencies, dep);
}

static void
128 129 130 131
__i915_sched_node_add_dependency(struct i915_sched_node *node,
				 struct i915_sched_node *signal,
				 struct i915_dependency *dep,
				 unsigned long flags)
132
{
133
	INIT_LIST_HEAD(&dep->dfs_link);
134
	list_add(&dep->wait_link, &signal->waiters_list);
135
	list_add(&dep->signal_link, &node->signalers_list);
136 137 138 139 140
	dep->signaler = signal;
	dep->flags = flags;
}

static int
141 142 143
i915_sched_node_add_dependency(struct drm_i915_private *i915,
			       struct i915_sched_node *node,
			       struct i915_sched_node *signal)
144 145 146 147 148 149 150
{
	struct i915_dependency *dep;

	dep = i915_dependency_alloc(i915);
	if (!dep)
		return -ENOMEM;

151 152
	__i915_sched_node_add_dependency(node, signal, dep,
					 I915_DEPENDENCY_ALLOC);
153 154 155 156
	return 0;
}

static void
157 158
i915_sched_node_fini(struct drm_i915_private *i915,
		     struct i915_sched_node *node)
159
{
160
	struct i915_dependency *dep, *tmp;
161

162
	GEM_BUG_ON(!list_empty(&node->link));
163

164 165
	/*
	 * Everyone we depended upon (the fences we wait to be signaled)
166 167 168 169
	 * should retire before us and remove themselves from our list.
	 * However, retirement is run independently on each timeline and
	 * so we may be called out-of-order.
	 */
170 171
	list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) {
		GEM_BUG_ON(!i915_sched_node_signaled(dep->signaler));
172 173
		GEM_BUG_ON(!list_empty(&dep->dfs_link));

174 175 176 177 178 179
		list_del(&dep->wait_link);
		if (dep->flags & I915_DEPENDENCY_ALLOC)
			i915_dependency_free(i915, dep);
	}

	/* Remove ourselves from everyone who depends upon us */
180 181
	list_for_each_entry_safe(dep, tmp, &node->waiters_list, wait_link) {
		GEM_BUG_ON(dep->signaler != node);
182 183
		GEM_BUG_ON(!list_empty(&dep->dfs_link));

184 185 186 187 188 189 190
		list_del(&dep->signal_link);
		if (dep->flags & I915_DEPENDENCY_ALLOC)
			i915_dependency_free(i915, dep);
	}
}

static void
191
i915_sched_node_init(struct i915_sched_node *node)
192
{
193 194 195
	INIT_LIST_HEAD(&node->signalers_list);
	INIT_LIST_HEAD(&node->waiters_list);
	INIT_LIST_HEAD(&node->link);
196
	node->attr.priority = I915_PRIORITY_INVALID;
197 198
}

199 200 201
static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno)
{
	struct intel_engine_cs *engine;
202
	struct i915_timeline *timeline;
203 204 205 206 207 208 209 210 211 212
	enum intel_engine_id id;
	int ret;

	/* Carefully retire all requests without writing to the rings */
	ret = i915_gem_wait_for_idle(i915,
				     I915_WAIT_INTERRUPTIBLE |
				     I915_WAIT_LOCKED);
	if (ret)
		return ret;

213 214
	GEM_BUG_ON(i915->gt.active_requests);

215 216
	/* If the seqno wraps around, we need to clear the breadcrumb rbtree */
	for_each_engine(engine, i915, id) {
217 218
		GEM_TRACE("%s seqno %d (current %d) -> %d\n",
			  engine->name,
219
			  engine->timeline.seqno,
220 221
			  intel_engine_get_seqno(engine),
			  seqno);
222

223
		if (!i915_seqno_passed(seqno, engine->timeline.seqno)) {
224 225
			/* Flush any waiters before we reuse the seqno */
			intel_engine_disarm_breadcrumbs(engine);
226
			intel_engine_init_hangcheck(engine);
227
			GEM_BUG_ON(!list_empty(&engine->breadcrumbs.signals));
228 229
		}

230 231
		/* Check we are idle before we fiddle with hw state! */
		GEM_BUG_ON(!intel_engine_is_idle(engine));
232
		GEM_BUG_ON(i915_gem_active_isset(&engine->timeline.last_request));
233

234 235
		/* Finally reset hw state */
		intel_engine_init_global_seqno(engine, seqno);
236
		engine->timeline.seqno = seqno;
237 238
	}

239 240 241
	list_for_each_entry(timeline, &i915->gt.timelines, link)
		memset(timeline->global_sync, 0, sizeof(timeline->global_sync));

242
	i915->gt.request_serial = seqno;
243

244 245 246 247 248
	return 0;
}

int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno)
{
249
	struct drm_i915_private *i915 = to_i915(dev);
250

251
	lockdep_assert_held(&i915->drm.struct_mutex);
252 253 254 255

	if (seqno == 0)
		return -EINVAL;

256 257
	/* HWS page needs to be set less than what we will inject to ring */
	return reset_all_global_seqno(i915, seqno - 1);
258 259
}

260
static int reserve_gt(struct drm_i915_private *i915)
261
{
262 263
	int ret;

264 265 266 267 268 269 270 271 272
	/*
	 * Reservation is fine until we may need to wrap around
	 *
	 * By incrementing the serial for every request, we know that no
	 * individual engine may exceed that serial (as each is reset to 0
	 * on any wrap). This protects even the most pessimistic of migrations
	 * of every request from all engines onto just one.
	 */
	while (unlikely(++i915->gt.request_serial == 0)) {
273 274
		ret = reset_all_global_seqno(i915, 0);
		if (ret) {
275
			i915->gt.request_serial--;
276 277
			return ret;
		}
278 279
	}

280
	if (!i915->gt.active_requests++)
281
		i915_gem_unpark(i915);
282

283 284 285
	return 0;
}

286
static void unreserve_gt(struct drm_i915_private *i915)
287
{
288
	GEM_BUG_ON(!i915->gt.active_requests);
289 290
	if (!--i915->gt.active_requests)
		i915_gem_park(i915);
291 292
}

293
void i915_gem_retire_noop(struct i915_gem_active *active,
294
			  struct i915_request *request)
295 296 297 298
{
	/* Space left intentionally blank */
}

299
static void advance_ring(struct i915_request *request)
300
{
301
	struct intel_ring *ring = request->ring;
302 303
	unsigned int tail;

304 305
	/*
	 * We know the GPU must have read the request to have
306 307 308 309 310 311 312
	 * sent us the seqno + interrupt, so use the position
	 * of tail of the request to update the last known position
	 * of the GPU head.
	 *
	 * Note this requires that we are always called in request
	 * completion order.
	 */
313 314
	GEM_BUG_ON(!list_is_first(&request->ring_link, &ring->request_list));
	if (list_is_last(&request->ring_link, &ring->request_list)) {
315 316
		/*
		 * We may race here with execlists resubmitting this request
317 318 319 320 321 322
		 * as we retire it. The resubmission will move the ring->tail
		 * forwards (to request->wa_tail). We either read the
		 * current value that was written to hw, or the value that
		 * is just about to be. Either works, if we miss the last two
		 * noops - they are safe to be replayed on a reset.
		 */
323
		GEM_TRACE("marking %s as inactive\n", ring->timeline->name);
324
		tail = READ_ONCE(request->tail);
325
		list_del(&ring->active_link);
326
	} else {
327
		tail = request->postfix;
328
	}
329
	list_del_init(&request->ring_link);
330

331
	ring->head = tail;
332 333
}

334
static void free_capture_list(struct i915_request *request)
335
{
336
	struct i915_capture_list *capture;
337 338 339

	capture = request->capture_list;
	while (capture) {
340
		struct i915_capture_list *next = capture->next;
341 342 343 344 345 346

		kfree(capture);
		capture = next;
	}
}

347 348 349 350 351 352 353 354 355 356 357 358 359
static void __retire_engine_request(struct intel_engine_cs *engine,
				    struct i915_request *rq)
{
	GEM_TRACE("%s(%s) fence %llx:%d, global=%d, current %d\n",
		  __func__, engine->name,
		  rq->fence.context, rq->fence.seqno,
		  rq->global_seqno,
		  intel_engine_get_seqno(engine));

	GEM_BUG_ON(!i915_request_completed(rq));

	local_irq_disable();

360 361
	spin_lock(&engine->timeline.lock);
	GEM_BUG_ON(!list_is_first(&rq->link, &engine->timeline.requests));
362
	list_del_init(&rq->link);
363
	spin_unlock(&engine->timeline.lock);
364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386

	spin_lock(&rq->lock);
	if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
		dma_fence_signal_locked(&rq->fence);
	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags))
		intel_engine_cancel_signaling(rq);
	if (rq->waitboost) {
		GEM_BUG_ON(!atomic_read(&rq->i915->gt_pm.rps.num_waiters));
		atomic_dec(&rq->i915->gt_pm.rps.num_waiters);
	}
	spin_unlock(&rq->lock);

	local_irq_enable();

	/*
	 * The backing object for the context is done after switching to the
	 * *next* context. Therefore we cannot retire the previous context until
	 * the next context has already started running. However, since we
	 * cannot take the required locks at i915_request_submit() we
	 * defer the unpinning of the active context to now, retirement of
	 * the subsequent request.
	 */
	if (engine->last_retired_context)
387 388
		intel_context_unpin(engine->last_retired_context);
	engine->last_retired_context = rq->hw_context;
389 390 391 392 393 394 395 396 397 398 399
}

static void __retire_engine_upto(struct intel_engine_cs *engine,
				 struct i915_request *rq)
{
	struct i915_request *tmp;

	if (list_empty(&rq->link))
		return;

	do {
400
		tmp = list_first_entry(&engine->timeline.requests,
401 402 403 404 405 406 407
				       typeof(*tmp), link);

		GEM_BUG_ON(tmp->engine != engine);
		__retire_engine_request(engine, tmp);
	} while (tmp != rq);
}

408
static void i915_request_retire(struct i915_request *request)
409
{
410 411
	struct i915_gem_active *active, *next;

412
	GEM_TRACE("%s fence %llx:%d, global=%d, current %d\n",
413
		  request->engine->name,
414
		  request->fence.context, request->fence.seqno,
415
		  request->global_seqno,
416
		  intel_engine_get_seqno(request->engine));
417

418
	lockdep_assert_held(&request->i915->drm.struct_mutex);
419
	GEM_BUG_ON(!i915_sw_fence_signaled(&request->submit));
420
	GEM_BUG_ON(!i915_request_completed(request));
421

422
	trace_i915_request_retire(request);
423

424
	advance_ring(request);
425 426
	free_capture_list(request);

427 428
	/*
	 * Walk through the active list, calling retire on each. This allows
429 430 431 432 433 434 435 436 437
	 * objects to track their GPU activity and mark themselves as idle
	 * when their *last* active request is completed (updating state
	 * tracking lists for eviction, active references for GEM, etc).
	 *
	 * As the ->retire() may free the node, we decouple it first and
	 * pass along the auxiliary information (to avoid dereferencing
	 * the node after the callback).
	 */
	list_for_each_entry_safe(active, next, &request->active_list, link) {
438 439
		/*
		 * In microbenchmarks or focusing upon time inside the kernel,
440 441 442 443 444 445 446 447 448 449 450
		 * we may spend an inordinate amount of time simply handling
		 * the retirement of requests and processing their callbacks.
		 * Of which, this loop itself is particularly hot due to the
		 * cache misses when jumping around the list of i915_gem_active.
		 * So we try to keep this loop as streamlined as possible and
		 * also prefetch the next i915_gem_active to try and hide
		 * the likely cache miss.
		 */
		prefetchw(next);

		INIT_LIST_HEAD(&active->link);
451
		RCU_INIT_POINTER(active->request, NULL);
452 453 454 455

		active->retire(active, request);
	}

456
	i915_request_remove_from_client(request);
457

458
	/* Retirement decays the ban score as it is a sign of ctx progress */
459
	atomic_dec_if_positive(&request->gem_context->ban_score);
460
	intel_context_unpin(request->hw_context);
461

462
	__retire_engine_upto(request->engine, request);
463

464 465
	unreserve_gt(request->i915);

466
	i915_sched_node_fini(request->i915, &request->sched);
467
	i915_request_put(request);
468 469
}

470
void i915_request_retire_upto(struct i915_request *rq)
471
{
472
	struct intel_ring *ring = rq->ring;
473
	struct i915_request *tmp;
474

475 476 477 478 479 480
	GEM_TRACE("%s fence %llx:%d, global=%d, current %d\n",
		  rq->engine->name,
		  rq->fence.context, rq->fence.seqno,
		  rq->global_seqno,
		  intel_engine_get_seqno(rq->engine));

481 482
	lockdep_assert_held(&rq->i915->drm.struct_mutex);
	GEM_BUG_ON(!i915_request_completed(rq));
483

484
	if (list_empty(&rq->ring_link))
485
		return;
486 487

	do {
488 489
		tmp = list_first_entry(&ring->request_list,
				       typeof(*tmp), ring_link);
490

491 492
		i915_request_retire(tmp);
	} while (tmp != rq);
493 494
}

495
static u32 timeline_get_seqno(struct i915_timeline *tl)
496
{
497
	return ++tl->seqno;
498 499
}

500
static void move_to_timeline(struct i915_request *request,
501
			     struct i915_timeline *timeline)
502
{
503 504
	GEM_BUG_ON(request->timeline == &request->engine->timeline);
	lockdep_assert_held(&request->engine->timeline.lock);
505

506
	spin_lock_nested(&request->timeline->lock, SINGLE_DEPTH_NESTING);
507 508 509 510
	list_move_tail(&request->link, &timeline->requests);
	spin_unlock(&request->timeline->lock);
}

511
void __i915_request_submit(struct i915_request *request)
512
{
513
	struct intel_engine_cs *engine = request->engine;
514
	u32 seqno;
515

516
	GEM_TRACE("%s fence %llx:%d -> global=%d, current %d\n",
517
		  engine->name,
518
		  request->fence.context, request->fence.seqno,
519
		  engine->timeline.seqno + 1,
520
		  intel_engine_get_seqno(engine));
521

522
	GEM_BUG_ON(!irqs_disabled());
523
	lockdep_assert_held(&engine->timeline.lock);
524

525
	GEM_BUG_ON(request->global_seqno);
526

527
	seqno = timeline_get_seqno(&engine->timeline);
528 529 530 531 532 533 534
	GEM_BUG_ON(!seqno);
	GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine), seqno));

	/* We may be recursing from the signal callback of another i915 fence */
	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
	request->global_seqno = seqno;
	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
535
		intel_engine_enable_signaling(request, false);
536 537
	spin_unlock(&request->lock);

538 539
	engine->emit_breadcrumb(request,
				request->ring->vaddr + request->postfix);
540

541
	/* Transfer from per-context onto the global per-engine timeline */
542
	move_to_timeline(request, &engine->timeline);
543

544
	trace_i915_request_execute(request);
545

546
	wake_up_all(&request->execute);
547 548
}

549
void i915_request_submit(struct i915_request *request)
550 551 552
{
	struct intel_engine_cs *engine = request->engine;
	unsigned long flags;
553

554
	/* Will be called from irq-context when using foreign fences. */
555
	spin_lock_irqsave(&engine->timeline.lock, flags);
556

557
	__i915_request_submit(request);
558

559
	spin_unlock_irqrestore(&engine->timeline.lock, flags);
560 561
}

562
void __i915_request_unsubmit(struct i915_request *request)
563
{
564
	struct intel_engine_cs *engine = request->engine;
565

566
	GEM_TRACE("%s fence %llx:%d <- global=%d, current %d\n",
567
		  engine->name,
568
		  request->fence.context, request->fence.seqno,
569 570
		  request->global_seqno,
		  intel_engine_get_seqno(engine));
571

572
	GEM_BUG_ON(!irqs_disabled());
573
	lockdep_assert_held(&engine->timeline.lock);
574

575 576
	/*
	 * Only unwind in reverse order, required so that the per-context list
577 578
	 * is kept in seqno/ring order.
	 */
579
	GEM_BUG_ON(!request->global_seqno);
580
	GEM_BUG_ON(request->global_seqno != engine->timeline.seqno);
581 582
	GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine),
				     request->global_seqno));
583
	engine->timeline.seqno--;
584

585 586 587 588 589 590 591 592
	/* We may be recursing from the signal callback of another i915 fence */
	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
	request->global_seqno = 0;
	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
		intel_engine_cancel_signaling(request);
	spin_unlock(&request->lock);

	/* Transfer back from the global per-engine timeline to per-context */
593
	move_to_timeline(request, request->timeline);
594

595 596
	/*
	 * We don't need to wake_up any waiters on request->execute, they
597
	 * will get woken by any other event or us re-adding this request
598
	 * to the engine timeline (__i915_request_submit()). The waiters
599 600 601 602 603
	 * should be quite adapt at finding that the request now has a new
	 * global_seqno to the one they went to sleep on.
	 */
}

604
void i915_request_unsubmit(struct i915_request *request)
605 606 607 608 609
{
	struct intel_engine_cs *engine = request->engine;
	unsigned long flags;

	/* Will be called from irq-context when using foreign fences. */
610
	spin_lock_irqsave(&engine->timeline.lock, flags);
611

612
	__i915_request_unsubmit(request);
613

614
	spin_unlock_irqrestore(&engine->timeline.lock, flags);
615 616
}

617
static int __i915_sw_fence_call
618
submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
619
{
620
	struct i915_request *request =
621 622 623 624
		container_of(fence, typeof(*request), submit);

	switch (state) {
	case FENCE_COMPLETE:
625
		trace_i915_request_submit(request);
626
		/*
627 628 629 630 631 632
		 * We need to serialize use of the submit_request() callback
		 * with its hotplugging performed during an emergency
		 * i915_gem_set_wedged().  We use the RCU mechanism to mark the
		 * critical section in order to force i915_gem_set_wedged() to
		 * wait until the submit_request() is completed before
		 * proceeding.
633 634
		 */
		rcu_read_lock();
635
		request->engine->submit_request(request);
636
		rcu_read_unlock();
637 638 639
		break;

	case FENCE_FREE:
640
		i915_request_put(request);
641 642 643
		break;
	}

644 645 646
	return NOTIFY_DONE;
}

647
/**
648
 * i915_request_alloc - allocate a request structure
649 650 651 652 653 654 655
 *
 * @engine: engine that we wish to issue the request on.
 * @ctx: context that the request will be associated with.
 *
 * Returns a pointer to the allocated request if successful,
 * or an error code if not.
 */
656 657
struct i915_request *
i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
658
{
659 660
	struct drm_i915_private *i915 = engine->i915;
	struct i915_request *rq;
661
	struct intel_context *ce;
662 663
	int ret;

664
	lockdep_assert_held(&i915->drm.struct_mutex);
665

666 667 668 669 670
	/*
	 * Preempt contexts are reserved for exclusive use to inject a
	 * preemption context switch. They are never to be used for any trivial
	 * request!
	 */
671
	GEM_BUG_ON(ctx == i915->preempt_context);
672

673 674
	/*
	 * ABI: Before userspace accesses the GPU (e.g. execbuffer), report
675
	 * EIO if the GPU is already wedged.
676
	 */
677
	if (i915_terminally_wedged(&i915->gpu_error))
678
		return ERR_PTR(-EIO);
679

680 681
	/*
	 * Pinning the contexts may generate requests in order to acquire
682 683 684
	 * GGTT space, so do this first before we reserve a seqno for
	 * ourselves.
	 */
685 686 687
	ce = intel_context_pin(ctx, engine);
	if (IS_ERR(ce))
		return ERR_CAST(ce);
688

689
	ret = reserve_gt(i915);
690 691 692
	if (ret)
		goto err_unpin;

693
	ret = intel_ring_wait_for_space(ce->ring, MIN_SPACE_FOR_ADD_REQUEST);
694 695 696
	if (ret)
		goto err_unreserve;

697
	/* Move our oldest request to the slab-cache (if not in use!) */
698 699
	rq = list_first_entry(&ce->ring->request_list, typeof(*rq), ring_link);
	if (!list_is_last(&rq->ring_link, &ce->ring->request_list) &&
700
	    i915_request_completed(rq))
701
		i915_request_retire(rq);
702

703 704
	/*
	 * Beware: Dragons be flying overhead.
705 706 707 708
	 *
	 * We use RCU to look up requests in flight. The lookups may
	 * race with the request being allocated from the slab freelist.
	 * That is the request we are writing to here, may be in the process
709
	 * of being read by __i915_gem_active_get_rcu(). As such,
710 711
	 * we have to be very careful when overwriting the contents. During
	 * the RCU lookup, we change chase the request->engine pointer,
712
	 * read the request->global_seqno and increment the reference count.
713 714 715 716
	 *
	 * The reference count is incremented atomically. If it is zero,
	 * the lookup knows the request is unallocated and complete. Otherwise,
	 * it is either still in use, or has been reallocated and reset
717 718
	 * with dma_fence_init(). This increment is safe for release as we
	 * check that the request we have a reference to and matches the active
719 720 721 722 723 724 725 726 727 728 729 730 731
	 * request.
	 *
	 * Before we increment the refcount, we chase the request->engine
	 * pointer. We must not call kmem_cache_zalloc() or else we set
	 * that pointer to NULL and cause a crash during the lookup. If
	 * we see the request is completed (based on the value of the
	 * old engine and seqno), the lookup is complete and reports NULL.
	 * If we decide the request is not completed (new engine or seqno),
	 * then we grab a reference and double check that it is still the
	 * active request - which it won't be and restart the lookup.
	 *
	 * Do not use kmem_cache_zalloc() here!
	 */
732 733 734
	rq = kmem_cache_alloc(i915->requests,
			      GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
	if (unlikely(!rq)) {
735
		/* Ratelimit ourselves to prevent oom from malicious clients */
736
		ret = i915_gem_wait_for_idle(i915,
737 738 739 740 741
					     I915_WAIT_LOCKED |
					     I915_WAIT_INTERRUPTIBLE);
		if (ret)
			goto err_unreserve;

742 743 744 745 746 747 748 749
		/*
		 * We've forced the client to stall and catch up with whatever
		 * backlog there might have been. As we are assuming that we
		 * caused the mempressure, now is an opportune time to
		 * recover as much memory from the request pool as is possible.
		 * Having already penalized the client to stall, we spend
		 * a little extra time to re-optimise page allocation.
		 */
750
		kmem_cache_shrink(i915->requests);
751 752
		rcu_barrier(); /* Recover the TYPESAFE_BY_RCU pages */

753 754
		rq = kmem_cache_alloc(i915->requests, GFP_KERNEL);
		if (!rq) {
755 756 757
			ret = -ENOMEM;
			goto err_unreserve;
		}
758
	}
759

760 761 762
	INIT_LIST_HEAD(&rq->active_list);
	rq->i915 = i915;
	rq->engine = engine;
763
	rq->gem_context = ctx;
764 765 766
	rq->hw_context = ce;
	rq->ring = ce->ring;
	rq->timeline = ce->ring->timeline;
767
	GEM_BUG_ON(rq->timeline == &engine->timeline);
768

769 770
	spin_lock_init(&rq->lock);
	dma_fence_init(&rq->fence,
771
		       &i915_fence_ops,
772 773 774
		       &rq->lock,
		       rq->timeline->fence_context,
		       timeline_get_seqno(rq->timeline));
775

776
	/* We bump the ref for the fence chain */
777 778
	i915_sw_fence_init(&i915_request_get(rq)->submit, submit_notify);
	init_waitqueue_head(&rq->execute);
779

780
	i915_sched_node_init(&rq->sched);
781

782
	/* No zalloc, must clear what we need by hand */
783 784 785 786 787 788
	rq->global_seqno = 0;
	rq->signaling.wait.seqno = 0;
	rq->file_priv = NULL;
	rq->batch = NULL;
	rq->capture_list = NULL;
	rq->waitboost = false;
789

790 791 792
	/*
	 * Reserve space in the ring buffer for all the commands required to
	 * eventually emit this request. This is to guarantee that the
793
	 * i915_request_add() call can't fail. Note that the reserve may need
794 795 796
	 * to be redone if the request is not actually submitted straight
	 * away, e.g. because a GPU scheduler has deferred it.
	 */
797 798
	rq->reserved_space = MIN_SPACE_FOR_ADD_REQUEST;
	GEM_BUG_ON(rq->reserved_space < engine->emit_breadcrumb_sz);
799

800 801
	/*
	 * Record the position of the start of the request so that
802 803 804 805
	 * should we detect the updated seqno part-way through the
	 * GPU processing the request, we never over-estimate the
	 * position of the head.
	 */
806
	rq->head = rq->ring->emit;
807

808
	/* Unconditionally invalidate GPU caches and TLBs. */
809
	ret = engine->emit_flush(rq, EMIT_INVALIDATE);
810
	if (ret)
811
		goto err_unwind;
812

813
	ret = engine->request_alloc(rq);
814 815
	if (ret)
		goto err_unwind;
816

817
	/* Keep a second pin for the dual retirement along engine and ring */
818
	__intel_context_pin(ce);
819

820 821
	rq->infix = rq->ring->emit; /* end of header; start of user payload */

822
	/* Check that we didn't interrupt ourselves with a new request */
823 824
	GEM_BUG_ON(rq->timeline->seqno != rq->fence.seqno);
	return rq;
825

826
err_unwind:
827
	ce->ring->emit = rq->head;
828

829
	/* Make sure we didn't add ourselves to external state before freeing */
830
	GEM_BUG_ON(!list_empty(&rq->active_list));
831 832
	GEM_BUG_ON(!list_empty(&rq->sched.signalers_list));
	GEM_BUG_ON(!list_empty(&rq->sched.waiters_list));
833

834
	kmem_cache_free(i915->requests, rq);
835
err_unreserve:
836
	unreserve_gt(i915);
837
err_unpin:
838
	intel_context_unpin(ce);
839
	return ERR_PTR(ret);
840 841
}

842
static int
843
i915_request_await_request(struct i915_request *to, struct i915_request *from)
844
{
845
	int ret;
846 847

	GEM_BUG_ON(to == from);
848
	GEM_BUG_ON(to->timeline == from->timeline);
849

850
	if (i915_request_completed(from))
851 852
		return 0;

853
	if (to->engine->schedule) {
854 855 856
		ret = i915_sched_node_add_dependency(to->i915,
						     &to->sched,
						     &from->sched);
857 858 859 860
		if (ret < 0)
			return ret;
	}

861 862 863
	if (to->engine == from->engine) {
		ret = i915_sw_fence_await_sw_fence_gfp(&to->submit,
						       &from->submit,
864
						       I915_FENCE_GFP);
865 866 867
		return ret < 0 ? ret : 0;
	}

868 869
	if (to->engine->semaphore.sync_to) {
		u32 seqno;
870

871
		GEM_BUG_ON(!from->engine->semaphore.signal);
872

873
		seqno = i915_request_global_seqno(from);
874
		if (!seqno)
875
			goto await_dma_fence;
876

877 878 879 880
		if (seqno <= to->timeline->global_sync[from->engine->id])
			return 0;

		trace_i915_gem_ring_sync_to(to, from);
881 882 883
		ret = to->engine->semaphore.sync_to(to, from);
		if (ret)
			return ret;
884 885

		to->timeline->global_sync[from->engine->id] = seqno;
886
		return 0;
887 888
	}

889 890 891
await_dma_fence:
	ret = i915_sw_fence_await_dma_fence(&to->submit,
					    &from->fence, 0,
892
					    I915_FENCE_GFP);
893
	return ret < 0 ? ret : 0;
894 895
}

896
int
897
i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
898
{
899 900
	struct dma_fence **child = &fence;
	unsigned int nchild = 1;
901 902
	int ret;

903 904
	/*
	 * Note that if the fence-array was created in signal-on-any mode,
905 906 907 908 909 910
	 * we should *not* decompose it into its individual fences. However,
	 * we don't currently store which mode the fence-array is operating
	 * in. Fortunately, the only user of signal-on-any is private to
	 * amdgpu and we should not see any incoming fence-array from
	 * sync-file being in signal-on-any mode.
	 */
911 912 913 914 915 916 917
	if (dma_fence_is_array(fence)) {
		struct dma_fence_array *array = to_dma_fence_array(fence);

		child = array->fences;
		nchild = array->num_fences;
		GEM_BUG_ON(!nchild);
	}
918

919 920 921 922
	do {
		fence = *child++;
		if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
			continue;
923

924 925
		/*
		 * Requests on the same timeline are explicitly ordered, along
926
		 * with their dependencies, by i915_request_add() which ensures
927 928
		 * that requests are submitted in-order through each ring.
		 */
929
		if (fence->context == rq->fence.context)
930 931
			continue;

932
		/* Squash repeated waits to the same timelines */
933
		if (fence->context != rq->i915->mm.unordered_timeline &&
934
		    i915_timeline_sync_is_later(rq->timeline, fence))
935 936
			continue;

937
		if (dma_fence_is_i915(fence))
938
			ret = i915_request_await_request(rq, to_request(fence));
939
		else
940
			ret = i915_sw_fence_await_dma_fence(&rq->submit, fence,
941
							    I915_FENCE_TIMEOUT,
942
							    I915_FENCE_GFP);
943 944
		if (ret < 0)
			return ret;
945 946

		/* Record the latest fence used against each timeline */
947
		if (fence->context != rq->i915->mm.unordered_timeline)
948
			i915_timeline_sync_set(rq->timeline, fence);
949
	} while (--nchild);
950 951 952 953

	return 0;
}

954
/**
955
 * i915_request_await_object - set this request to (async) wait upon a bo
956 957
 * @to: request we are wishing to use
 * @obj: object which may be in use on another ring.
958
 * @write: whether the wait is on behalf of a writer
959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974
 *
 * This code is meant to abstract object synchronization with the GPU.
 * Conceptually we serialise writes between engines inside the GPU.
 * We only allow one engine to write into a buffer at any time, but
 * multiple readers. To ensure each has a coherent view of memory, we must:
 *
 * - If there is an outstanding write request to the object, the new
 *   request must wait for it to complete (either CPU or in hw, requests
 *   on the same ring will be naturally ordered).
 *
 * - If we are a write request (pending_write_domain is set), the new
 *   request must wait for outstanding read requests to complete.
 *
 * Returns 0 if successful, else propagates up the lower layer error.
 */
int
975 976 977
i915_request_await_object(struct i915_request *to,
			  struct drm_i915_gem_object *obj,
			  bool write)
978
{
979 980
	struct dma_fence *excl;
	int ret = 0;
981 982

	if (write) {
983 984 985 986 987 988 989 990 991
		struct dma_fence **shared;
		unsigned int count, i;

		ret = reservation_object_get_fences_rcu(obj->resv,
							&excl, &count, &shared);
		if (ret)
			return ret;

		for (i = 0; i < count; i++) {
992
			ret = i915_request_await_dma_fence(to, shared[i]);
993 994 995 996 997 998 999 1000 1001
			if (ret)
				break;

			dma_fence_put(shared[i]);
		}

		for (; i < count; i++)
			dma_fence_put(shared[i]);
		kfree(shared);
1002
	} else {
1003
		excl = reservation_object_get_excl_rcu(obj->resv);
1004 1005
	}

1006 1007
	if (excl) {
		if (ret == 0)
1008
			ret = i915_request_await_dma_fence(to, excl);
1009

1010
		dma_fence_put(excl);
1011 1012
	}

1013
	return ret;
1014 1015
}

1016 1017 1018 1019 1020
/*
 * NB: This function is not allowed to fail. Doing so would mean the the
 * request is not being tracked for completion but the work itself is
 * going to happen on the hardware. This would be a Bad Thing(tm).
 */
1021
void __i915_request_add(struct i915_request *request, bool flush_caches)
1022
{
1023
	struct intel_engine_cs *engine = request->engine;
1024
	struct i915_timeline *timeline = request->timeline;
1025
	struct intel_ring *ring = request->ring;
1026
	struct i915_request *prev;
1027
	u32 *cs;
1028
	int err;
1029

1030 1031 1032
	GEM_TRACE("%s fence %llx:%d\n",
		  engine->name, request->fence.context, request->fence.seqno);

1033
	lockdep_assert_held(&request->i915->drm.struct_mutex);
1034
	trace_i915_request_add(request);
1035

1036 1037
	/*
	 * Make sure that no request gazumped us - if it was allocated after
1038
	 * our i915_request_alloc() and called __i915_request_add() before
1039 1040
	 * us, the timeline will hold its seqno which is later than ours.
	 */
1041
	GEM_BUG_ON(timeline->seqno != request->fence.seqno);
1042

1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057
	/*
	 * To ensure that this call will not fail, space for its emissions
	 * should already have been reserved in the ring buffer. Let the ring
	 * know that it is time to use that space up.
	 */
	request->reserved_space = 0;

	/*
	 * Emit any outstanding flushes - execbuf can fail to emit the flush
	 * after having emitted the batchbuffer command. Hence we need to fix
	 * things up similar to emitting the lazy request. The difference here
	 * is that the flush _must_ happen before the next request, no matter
	 * what.
	 */
	if (flush_caches) {
1058
		err = engine->emit_flush(request, EMIT_FLUSH);
1059

1060
		/* Not allowed to fail! */
1061
		WARN(err, "engine->emit_flush() failed: %d!\n", err);
1062 1063
	}

1064 1065
	/*
	 * Record the position of the start of the breadcrumb so that
1066 1067
	 * should we detect the updated seqno part-way through the
	 * GPU processing the request, we never over-estimate the
1068
	 * position of the ring's HEAD.
1069
	 */
1070 1071 1072
	cs = intel_ring_begin(request, engine->emit_breadcrumb_sz);
	GEM_BUG_ON(IS_ERR(cs));
	request->postfix = intel_ring_offset(request, cs);
1073

1074 1075
	/*
	 * Seal the request and mark it as pending execution. Note that
1076 1077 1078 1079
	 * we may inspect this state, without holding any locks, during
	 * hangcheck. Hence we apply the barrier to ensure that we do not
	 * see a more recent value in the hws than we are tracking.
	 */
1080

1081
	prev = i915_gem_active_raw(&timeline->last_request,
1082
				   &request->i915->drm.struct_mutex);
1083
	if (prev && !i915_request_completed(prev)) {
1084 1085
		i915_sw_fence_await_sw_fence(&request->submit, &prev->submit,
					     &request->submitq);
1086
		if (engine->schedule)
1087 1088 1089 1090
			__i915_sched_node_add_dependency(&request->sched,
							 &prev->sched,
							 &request->dep,
							 0);
1091
	}
1092

1093
	spin_lock_irq(&timeline->lock);
1094
	list_add_tail(&request->link, &timeline->requests);
1095 1096
	spin_unlock_irq(&timeline->lock);

1097
	GEM_BUG_ON(timeline->seqno != request->fence.seqno);
1098
	i915_gem_active_set(&timeline->last_request, request);
1099

1100
	list_add_tail(&request->ring_link, &ring->request_list);
1101 1102
	if (list_is_first(&request->ring_link, &ring->request_list)) {
		GEM_TRACE("marking %s as active\n", ring->timeline->name);
1103
		list_add(&ring->active_link, &request->i915->gt.active_rings);
1104
	}
1105
	request->emitted_jiffies = jiffies;
1106

1107 1108
	/*
	 * Let the backend know a new request has arrived that may need
1109 1110 1111 1112 1113 1114 1115 1116 1117
	 * to adjust the existing execution schedule due to a high priority
	 * request - i.e. we may want to preempt the current request in order
	 * to run a high priority dependency chain *before* we can execute this
	 * request.
	 *
	 * This is called before the request is ready to run so that we can
	 * decide whether to preempt the entire chain so that it is ready to
	 * run at the earliest possible convenience.
	 */
1118 1119
	local_bh_disable();
	rcu_read_lock(); /* RCU serialisation for set-wedged protection */
1120
	if (engine->schedule)
1121
		engine->schedule(request, &request->gem_context->sched);
1122
	rcu_read_unlock();
1123 1124
	i915_sw_fence_commit(&request->submit);
	local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142

	/*
	 * In typical scenarios, we do not expect the previous request on
	 * the timeline to be still tracked by timeline->last_request if it
	 * has been completed. If the completed request is still here, that
	 * implies that request retirement is a long way behind submission,
	 * suggesting that we haven't been retiring frequently enough from
	 * the combination of retire-before-alloc, waiters and the background
	 * retirement worker. So if the last request on this timeline was
	 * already completed, do a catch up pass, flushing the retirement queue
	 * up to this client. Since we have now moved the heaviest operations
	 * during retirement onto secondary workers, such as freeing objects
	 * or contexts, retiring a bunch of requests is mostly list management
	 * (and cache misses), and so we should not be overly penalizing this
	 * client by performing excess work, though we may still performing
	 * work on behalf of others -- but instead we should benefit from
	 * improved resource management. (Well, that's the theory at least.)
	 */
1143 1144
	if (prev && i915_request_completed(prev))
		i915_request_retire_upto(prev);
1145 1146 1147 1148 1149 1150
}

static unsigned long local_clock_us(unsigned int *cpu)
{
	unsigned long t;

1151 1152
	/*
	 * Cheaply and approximately convert from nanoseconds to microseconds.
1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179
	 * The result and subsequent calculations are also defined in the same
	 * approximate microseconds units. The principal source of timing
	 * error here is from the simple truncation.
	 *
	 * Note that local_clock() is only defined wrt to the current CPU;
	 * the comparisons are no longer valid if we switch CPUs. Instead of
	 * blocking preemption for the entire busywait, we can detect the CPU
	 * switch and use that as indicator of system load and a reason to
	 * stop busywaiting, see busywait_stop().
	 */
	*cpu = get_cpu();
	t = local_clock() >> 10;
	put_cpu();

	return t;
}

static bool busywait_stop(unsigned long timeout, unsigned int cpu)
{
	unsigned int this_cpu;

	if (time_after(local_clock_us(&this_cpu), timeout))
		return true;

	return this_cpu != cpu;
}

1180
static bool __i915_spin_request(const struct i915_request *rq,
1181
				u32 seqno, int state, unsigned long timeout_us)
1182
{
1183
	struct intel_engine_cs *engine = rq->engine;
1184
	unsigned int irq, cpu;
1185

1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201
	GEM_BUG_ON(!seqno);

	/*
	 * Only wait for the request if we know it is likely to complete.
	 *
	 * We don't track the timestamps around requests, nor the average
	 * request length, so we do not have a good indicator that this
	 * request will complete within the timeout. What we do know is the
	 * order in which requests are executed by the engine and so we can
	 * tell if the request has started. If the request hasn't started yet,
	 * it is a fair assumption that it will not complete within our
	 * relatively short timeout.
	 */
	if (!i915_seqno_passed(intel_engine_get_seqno(engine), seqno - 1))
		return false;

1202 1203
	/*
	 * When waiting for high frequency requests, e.g. during synchronous
1204 1205 1206 1207 1208 1209 1210 1211 1212
	 * rendering split between the CPU and GPU, the finite amount of time
	 * required to set up the irq and wait upon it limits the response
	 * rate. By busywaiting on the request completion for a short while we
	 * can service the high frequency waits as quick as possible. However,
	 * if it is a slow request, we want to sleep as quickly as possible.
	 * The tradeoff between waiting and sleeping is roughly the time it
	 * takes to sleep on a request, on the order of a microsecond.
	 */

1213
	irq = atomic_read(&engine->irq_count);
1214 1215
	timeout_us += local_clock_us(&cpu);
	do {
1216
		if (i915_seqno_passed(intel_engine_get_seqno(engine), seqno))
1217
			return seqno == i915_request_global_seqno(rq);
1218

1219 1220
		/*
		 * Seqno are meant to be ordered *before* the interrupt. If
1221 1222 1223 1224 1225 1226 1227
		 * we see an interrupt without a corresponding seqno advance,
		 * assume we won't see one in the near future but require
		 * the engine->seqno_barrier() to fixup coherency.
		 */
		if (atomic_read(&engine->irq_count) != irq)
			break;

1228 1229 1230 1231 1232 1233
		if (signal_pending_state(state, current))
			break;

		if (busywait_stop(timeout_us, cpu))
			break;

1234
		cpu_relax();
1235 1236 1237 1238 1239
	} while (!need_resched());

	return false;
}

1240
static bool __i915_wait_request_check_and_reset(struct i915_request *request)
1241
{
1242 1243 1244
	struct i915_gpu_error *error = &request->i915->gpu_error;

	if (likely(!i915_reset_handoff(error)))
1245
		return false;
1246

1247
	__set_current_state(TASK_RUNNING);
1248
	i915_reset(request->i915, error->stalled_mask, error->reason);
1249
	return true;
1250 1251
}

1252
/**
1253
 * i915_request_wait - wait until execution of request has finished
1254
 * @rq: the request to wait upon
1255
 * @flags: how to wait
1256 1257
 * @timeout: how long to wait in jiffies
 *
1258
 * i915_request_wait() waits for the request to be completed, for a
1259 1260
 * maximum of @timeout jiffies (with MAX_SCHEDULE_TIMEOUT implying an
 * unbounded wait).
1261
 *
1262 1263 1264
 * If the caller holds the struct_mutex, the caller must pass I915_WAIT_LOCKED
 * in via the flags, and vice versa if the struct_mutex is not held, the caller
 * must not specify that the wait is locked.
1265
 *
1266 1267 1268 1269
 * Returns the remaining time (in jiffies) if the request completed, which may
 * be zero or -ETIME if the request is unfinished after the timeout expires.
 * May return -EINTR is called with I915_WAIT_INTERRUPTIBLE and a signal is
 * pending before the request completes.
1270
 */
1271
long i915_request_wait(struct i915_request *rq,
1272 1273
		       unsigned int flags,
		       long timeout)
1274
{
1275 1276
	const int state = flags & I915_WAIT_INTERRUPTIBLE ?
		TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
1277
	wait_queue_head_t *errq = &rq->i915->gpu_error.wait_queue;
1278 1279
	DEFINE_WAIT_FUNC(reset, default_wake_function);
	DEFINE_WAIT_FUNC(exec, default_wake_function);
1280 1281 1282
	struct intel_wait wait;

	might_sleep();
1283
#if IS_ENABLED(CONFIG_LOCKDEP)
1284
	GEM_BUG_ON(debug_locks &&
1285
		   !!lockdep_is_held(&rq->i915->drm.struct_mutex) !=
1286 1287
		   !!(flags & I915_WAIT_LOCKED));
#endif
1288
	GEM_BUG_ON(timeout < 0);
1289

1290
	if (i915_request_completed(rq))
1291
		return timeout;
1292

1293 1294
	if (!timeout)
		return -ETIME;
1295

1296
	trace_i915_request_wait_begin(rq, flags);
1297

1298
	add_wait_queue(&rq->execute, &exec);
1299 1300 1301
	if (flags & I915_WAIT_LOCKED)
		add_wait_queue(errq, &reset);

1302
	intel_wait_init(&wait, rq);
1303

1304
restart:
1305 1306
	do {
		set_current_state(state);
1307
		if (intel_wait_update_request(&wait, rq))
1308
			break;
1309

1310
		if (flags & I915_WAIT_LOCKED &&
1311
		    __i915_wait_request_check_and_reset(rq))
1312
			continue;
1313

1314 1315
		if (signal_pending_state(state, current)) {
			timeout = -ERESTARTSYS;
1316
			goto complete;
1317
		}
1318

1319 1320 1321 1322
		if (!timeout) {
			timeout = -ETIME;
			goto complete;
		}
1323

1324 1325
		timeout = io_schedule_timeout(timeout);
	} while (1);
1326

1327
	GEM_BUG_ON(!intel_wait_has_seqno(&wait));
1328
	GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
1329

1330
	/* Optimistic short spin before touching IRQs */
1331
	if (__i915_spin_request(rq, wait.seqno, state, 5))
1332 1333 1334
		goto complete;

	set_current_state(state);
1335 1336 1337
	if (intel_engine_add_wait(rq->engine, &wait))
		/*
		 * In order to check that we haven't missed the interrupt
1338 1339 1340 1341 1342
		 * as we enabled it, we need to kick ourselves to do a
		 * coherent check on the seqno before we sleep.
		 */
		goto wakeup;

1343
	if (flags & I915_WAIT_LOCKED)
1344
		__i915_wait_request_check_and_reset(rq);
1345

1346 1347
	for (;;) {
		if (signal_pending_state(state, current)) {
1348
			timeout = -ERESTARTSYS;
1349 1350 1351
			break;
		}

1352 1353
		if (!timeout) {
			timeout = -ETIME;
1354 1355 1356
			break;
		}

1357 1358
		timeout = io_schedule_timeout(timeout);

1359
		if (intel_wait_complete(&wait) &&
1360
		    intel_wait_check_request(&wait, rq))
1361 1362 1363 1364 1365
			break;

		set_current_state(state);

wakeup:
1366 1367
		/*
		 * Carefully check if the request is complete, giving time
1368 1369 1370 1371
		 * for the seqno to be visible following the interrupt.
		 * We also have to check in case we are kicked by the GPU
		 * reset in order to drop the struct_mutex.
		 */
1372
		if (__i915_request_irq_complete(rq))
1373 1374
			break;

1375 1376
		/*
		 * If the GPU is hung, and we hold the lock, reset the GPU
1377 1378 1379 1380 1381 1382 1383 1384 1385 1386
		 * and then check for completion. On a full reset, the engine's
		 * HW seqno will be advanced passed us and we are complete.
		 * If we do a partial reset, we have to wait for the GPU to
		 * resume and update the breadcrumb.
		 *
		 * If we don't hold the mutex, we can just wait for the worker
		 * to come along and update the breadcrumb (either directly
		 * itself, or indirectly by recovering the GPU).
		 */
		if (flags & I915_WAIT_LOCKED &&
1387
		    __i915_wait_request_check_and_reset(rq))
1388 1389
			continue;

1390
		/* Only spin if we know the GPU is processing this request */
1391
		if (__i915_spin_request(rq, wait.seqno, state, 2))
1392
			break;
1393

1394 1395
		if (!intel_wait_check_request(&wait, rq)) {
			intel_engine_remove_wait(rq->engine, &wait);
1396 1397
			goto restart;
		}
1398 1399
	}

1400
	intel_engine_remove_wait(rq->engine, &wait);
1401
complete:
1402
	__set_current_state(TASK_RUNNING);
1403 1404
	if (flags & I915_WAIT_LOCKED)
		remove_wait_queue(errq, &reset);
1405 1406
	remove_wait_queue(&rq->execute, &exec);
	trace_i915_request_wait_end(rq);
1407

1408
	return timeout;
1409
}
1410

1411
static void ring_retire_requests(struct intel_ring *ring)
1412
{
1413
	struct i915_request *request, *next;
1414

1415
	list_for_each_entry_safe(request, next,
1416 1417
				 &ring->request_list, ring_link) {
		if (!i915_request_completed(request))
1418
			break;
1419

1420
		i915_request_retire(request);
1421
	}
1422 1423
}

1424
void i915_retire_requests(struct drm_i915_private *i915)
1425
{
1426
	struct intel_ring *ring, *tmp;
1427

1428
	lockdep_assert_held(&i915->drm.struct_mutex);
1429

1430
	if (!i915->gt.active_requests)
1431 1432
		return;

1433
	list_for_each_entry_safe(ring, tmp, &i915->gt.active_rings, active_link)
1434
		ring_retire_requests(ring);
1435
}
1436 1437 1438

#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftests/mock_request.c"
1439
#include "selftests/i915_request.c"
1440
#endif