Commit 3f6a6f34 authored by Chris Wilson's avatar Chris Wilson Committed by Joonas Lahtinen

drm/i915: Reduce i915_request.lock contention for i915_request_wait

Currently, we use i915_request_completed() directly in
i915_request_wait() and follow up with a manual invocation of
dma_fence_signal(). This appears to cause a large number of contentions
on i915_request.lock as when the process is woken up after the fence is
signaled by an interrupt, we will then try and call dma_fence_signal()
ourselves while the signaler is still holding the lock.
dma_fence_is_signaled() has the benefit of checking the
DMA_FENCE_FLAG_SIGNALED_BIT prior to calling dma_fence_signal() and so
avoids most of that contention.
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Cc: Matthew Auld <matthew.auld@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: default avatarTvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200716100754.5670-1-chris@chris-wilson.co.ukSigned-off-by: default avatarRodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: default avatarJoonas Lahtinen <joonas.lahtinen@linux.intel.com>
parent 33936499
...@@ -1640,7 +1640,7 @@ static bool busywait_stop(unsigned long timeout, unsigned int cpu) ...@@ -1640,7 +1640,7 @@ static bool busywait_stop(unsigned long timeout, unsigned int cpu)
return this_cpu != cpu; return this_cpu != cpu;
} }
static bool __i915_spin_request(const struct i915_request * const rq, int state) static bool __i915_spin_request(struct i915_request * const rq, int state)
{ {
unsigned long timeout_ns; unsigned long timeout_ns;
unsigned int cpu; unsigned int cpu;
...@@ -1673,7 +1673,7 @@ static bool __i915_spin_request(const struct i915_request * const rq, int state) ...@@ -1673,7 +1673,7 @@ static bool __i915_spin_request(const struct i915_request * const rq, int state)
timeout_ns = READ_ONCE(rq->engine->props.max_busywait_duration_ns); timeout_ns = READ_ONCE(rq->engine->props.max_busywait_duration_ns);
timeout_ns += local_clock_ns(&cpu); timeout_ns += local_clock_ns(&cpu);
do { do {
if (i915_request_completed(rq)) if (dma_fence_is_signaled(&rq->fence))
return true; return true;
if (signal_pending_state(state, current)) if (signal_pending_state(state, current))
...@@ -1697,7 +1697,7 @@ static void request_wait_wake(struct dma_fence *fence, struct dma_fence_cb *cb) ...@@ -1697,7 +1697,7 @@ static void request_wait_wake(struct dma_fence *fence, struct dma_fence_cb *cb)
{ {
struct request_wait *wait = container_of(cb, typeof(*wait), cb); struct request_wait *wait = container_of(cb, typeof(*wait), cb);
wake_up_process(wait->tsk); wake_up_process(fetch_and_zero(&wait->tsk));
} }
/** /**
...@@ -1766,10 +1766,8 @@ long i915_request_wait(struct i915_request *rq, ...@@ -1766,10 +1766,8 @@ long i915_request_wait(struct i915_request *rq,
* duration, which we currently lack. * duration, which we currently lack.
*/ */
if (IS_ACTIVE(CONFIG_DRM_I915_MAX_REQUEST_BUSYWAIT) && if (IS_ACTIVE(CONFIG_DRM_I915_MAX_REQUEST_BUSYWAIT) &&
__i915_spin_request(rq, state)) { __i915_spin_request(rq, state))
dma_fence_signal(&rq->fence);
goto out; goto out;
}
/* /*
* This client is about to stall waiting for the GPU. In many cases * This client is about to stall waiting for the GPU. In many cases
...@@ -1793,10 +1791,8 @@ long i915_request_wait(struct i915_request *rq, ...@@ -1793,10 +1791,8 @@ long i915_request_wait(struct i915_request *rq,
for (;;) { for (;;) {
set_current_state(state); set_current_state(state);
if (i915_request_completed(rq)) { if (dma_fence_is_signaled(&rq->fence))
dma_fence_signal(&rq->fence);
break; break;
}
intel_engine_flush_submission(rq->engine); intel_engine_flush_submission(rq->engine);
...@@ -1814,7 +1810,9 @@ long i915_request_wait(struct i915_request *rq, ...@@ -1814,7 +1810,9 @@ long i915_request_wait(struct i915_request *rq,
} }
__set_current_state(TASK_RUNNING); __set_current_state(TASK_RUNNING);
dma_fence_remove_callback(&rq->fence, &wait.cb); if (READ_ONCE(wait.tsk))
dma_fence_remove_callback(&rq->fence, &wait.cb);
GEM_BUG_ON(!list_empty(&wait.cb.node));
out: out:
mutex_release(&rq->engine->gt->reset.mutex.dep_map, _THIS_IP_); mutex_release(&rq->engine->gt->reset.mutex.dep_map, _THIS_IP_);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment