Commit 5e5655c3 authored by Chris Wilson's avatar Chris Wilson

drm/i915: Micro-optimise hotpath through intel_ring_begin()

Typically, there is space available within the ring and if not we have
to wait (by definition a slow path). Rearrange the code to reduce the
number of branches and stack size for the hotpath, accomodating a slight
growth for the wait.

v2: Fix the new assert that packets are not larger than the actual ring.
v3: Make the parameters unsigned as well to make usage.
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: default avatarMika Kuoppala <mika.kuoppala@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170504130846.4807-3-chris@chris-wilson.co.uk
parent 95aebcb2
...@@ -1656,7 +1656,8 @@ static int ring_request_alloc(struct drm_i915_gem_request *request) ...@@ -1656,7 +1656,8 @@ static int ring_request_alloc(struct drm_i915_gem_request *request)
return 0; return 0;
} }
static int wait_for_space(struct drm_i915_gem_request *req, int bytes) static noinline int wait_for_space(struct drm_i915_gem_request *req,
unsigned int bytes)
{ {
struct intel_ring *ring = req->ring; struct intel_ring *ring = req->ring;
struct drm_i915_gem_request *target; struct drm_i915_gem_request *target;
...@@ -1701,52 +1702,56 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes) ...@@ -1701,52 +1702,56 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
return 0; return 0;
} }
u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) u32 *intel_ring_begin(struct drm_i915_gem_request *req,
unsigned int num_dwords)
{ {
struct intel_ring *ring = req->ring; struct intel_ring *ring = req->ring;
int remain_actual = ring->size - ring->emit; const unsigned int remain_usable = ring->effective_size - ring->emit;
int remain_usable = ring->effective_size - ring->emit; const unsigned int bytes = num_dwords * sizeof(u32);
int bytes = num_dwords * sizeof(u32); unsigned int need_wrap = 0;
int total_bytes, wait_bytes; unsigned int total_bytes;
bool need_wrap = false;
u32 *cs; u32 *cs;
total_bytes = bytes + req->reserved_space; total_bytes = bytes + req->reserved_space;
GEM_BUG_ON(total_bytes > ring->effective_size);
if (unlikely(bytes > remain_usable)) { if (unlikely(total_bytes > remain_usable)) {
const int remain_actual = ring->size - ring->emit;
if (bytes > remain_usable) {
/* /*
* Not enough space for the basic request. So need to flush * Not enough space for the basic request. So need to
* out the remainder and then wait for base + reserved. * flush out the remainder and then wait for
* base + reserved.
*/ */
wait_bytes = remain_actual + total_bytes; total_bytes += remain_actual;
need_wrap = true; need_wrap = remain_actual | 1;
} else if (unlikely(total_bytes > remain_usable)) { } else {
/* /*
* The base request will fit but the reserved space * The base request will fit but the reserved space
* falls off the end. So we don't need an immediate wrap * falls off the end. So we don't need an immediate
* and only need to effectively wait for the reserved * wrap and only need to effectively wait for the
* size space from the start of ringbuffer. * reserved size from the start of ringbuffer.
*/ */
wait_bytes = remain_actual + req->reserved_space; total_bytes = req->reserved_space + remain_actual;
} else { }
/* No wrapping required, just waiting. */
wait_bytes = total_bytes;
} }
if (wait_bytes > ring->space) { if (unlikely(total_bytes > ring->space)) {
int ret = wait_for_space(req, wait_bytes); int ret = wait_for_space(req, total_bytes);
if (unlikely(ret)) if (unlikely(ret))
return ERR_PTR(ret); return ERR_PTR(ret);
} }
if (unlikely(need_wrap)) { if (unlikely(need_wrap)) {
GEM_BUG_ON(remain_actual > ring->space); need_wrap &= ~1;
GEM_BUG_ON(ring->emit + remain_actual > ring->size); GEM_BUG_ON(need_wrap > ring->space);
GEM_BUG_ON(ring->emit + need_wrap > ring->size);
/* Fill the tail with MI_NOOP */ /* Fill the tail with MI_NOOP */
memset(ring->vaddr + ring->emit, 0, remain_actual); memset(ring->vaddr + ring->emit, 0, need_wrap);
ring->emit = 0; ring->emit = 0;
ring->space -= remain_actual; ring->space -= need_wrap;
} }
GEM_BUG_ON(ring->emit > ring->size - bytes); GEM_BUG_ON(ring->emit > ring->size - bytes);
......
...@@ -497,7 +497,8 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv); ...@@ -497,7 +497,8 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv);
int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request *req); int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request *req);
u32 __must_check *intel_ring_begin(struct drm_i915_gem_request *req, int n); u32 __must_check *intel_ring_begin(struct drm_i915_gem_request *req,
unsigned int n);
static inline void static inline void
intel_ring_advance(struct drm_i915_gem_request *req, u32 *cs) intel_ring_advance(struct drm_i915_gem_request *req, u32 *cs)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment