Commit c04e0f3b authored by Chris Wilson's avatar Chris Wilson

drm/i915: Separate out the seqno-barrier from engine->get_seqno

In order to simplify future patches, extract the
lazy_coherency optimisation our of the engine->get_seqno() vfunc into
its own callback.

v2: Rename the barrier to engine->irq_seqno_barrier to try and better
reflect that the barrier is only required after the user interrupt before
reading the seqno (to ensure that the seqno update lands in time as we
do not have strict seqno-irq ordering on all platforms).

Reviewed-by: Dave Gordon <david.s.gordon@intel.com> [#v2]

v3: Comments for hangcheck paranoia. Mika wanted to keep the extra
barrier inside the hangcheck, just in case. I can argue that it doesn't
provide a barrier against anything, but the side-effects of applying the
barrier may prevent a false declaration of a hung GPU.
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Cc: Dave Gordon <david.s.gordon@intel.com>
Reviewed-by: default avatarMika Kuoppala <mika.kuoppala@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1460195877-20520-2-git-send-email-chris@chris-wilson.co.uk
parent 9b9ed309
...@@ -598,7 +598,7 @@ static int i915_gem_pageflip_info(struct seq_file *m, void *data) ...@@ -598,7 +598,7 @@ static int i915_gem_pageflip_info(struct seq_file *m, void *data)
engine->name, engine->name,
i915_gem_request_get_seqno(work->flip_queued_req), i915_gem_request_get_seqno(work->flip_queued_req),
dev_priv->next_seqno, dev_priv->next_seqno,
engine->get_seqno(engine, true), engine->get_seqno(engine),
i915_gem_request_completed(work->flip_queued_req, true)); i915_gem_request_completed(work->flip_queued_req, true));
} else } else
seq_printf(m, "Flip not associated with any ring\n"); seq_printf(m, "Flip not associated with any ring\n");
...@@ -730,7 +730,7 @@ static void i915_ring_seqno_info(struct seq_file *m, ...@@ -730,7 +730,7 @@ static void i915_ring_seqno_info(struct seq_file *m,
{ {
if (engine->get_seqno) { if (engine->get_seqno) {
seq_printf(m, "Current sequence (%s): %x\n", seq_printf(m, "Current sequence (%s): %x\n",
engine->name, engine->get_seqno(engine, false)); engine->name, engine->get_seqno(engine));
} }
} }
...@@ -1346,8 +1346,8 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) ...@@ -1346,8 +1346,8 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
intel_runtime_pm_get(dev_priv); intel_runtime_pm_get(dev_priv);
for_each_engine_id(engine, dev_priv, id) { for_each_engine_id(engine, dev_priv, id) {
seqno[id] = engine->get_seqno(engine, false);
acthd[id] = intel_ring_get_active_head(engine); acthd[id] = intel_ring_get_active_head(engine);
seqno[id] = engine->get_seqno(engine);
} }
i915_get_extra_instdone(dev, instdone); i915_get_extra_instdone(dev, instdone);
......
...@@ -3017,15 +3017,19 @@ i915_seqno_passed(uint32_t seq1, uint32_t seq2) ...@@ -3017,15 +3017,19 @@ i915_seqno_passed(uint32_t seq1, uint32_t seq2)
static inline bool i915_gem_request_started(struct drm_i915_gem_request *req, static inline bool i915_gem_request_started(struct drm_i915_gem_request *req,
bool lazy_coherency) bool lazy_coherency)
{ {
u32 seqno = req->engine->get_seqno(req->engine, lazy_coherency); if (!lazy_coherency && req->engine->irq_seqno_barrier)
return i915_seqno_passed(seqno, req->previous_seqno); req->engine->irq_seqno_barrier(req->engine);
return i915_seqno_passed(req->engine->get_seqno(req->engine),
req->previous_seqno);
} }
static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req, static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req,
bool lazy_coherency) bool lazy_coherency)
{ {
u32 seqno = req->engine->get_seqno(req->engine, lazy_coherency); if (!lazy_coherency && req->engine->irq_seqno_barrier)
return i915_seqno_passed(seqno, req->seqno); req->engine->irq_seqno_barrier(req->engine);
return i915_seqno_passed(req->engine->get_seqno(req->engine),
req->seqno);
} }
int __must_check i915_gem_get_seqno(struct drm_device *dev, u32 *seqno); int __must_check i915_gem_get_seqno(struct drm_device *dev, u32 *seqno);
......
...@@ -931,8 +931,8 @@ static void i915_record_ring_state(struct drm_device *dev, ...@@ -931,8 +931,8 @@ static void i915_record_ring_state(struct drm_device *dev,
ering->waiting = waitqueue_active(&engine->irq_queue); ering->waiting = waitqueue_active(&engine->irq_queue);
ering->instpm = I915_READ(RING_INSTPM(engine->mmio_base)); ering->instpm = I915_READ(RING_INSTPM(engine->mmio_base));
ering->seqno = engine->get_seqno(engine, false);
ering->acthd = intel_ring_get_active_head(engine); ering->acthd = intel_ring_get_active_head(engine);
ering->seqno = engine->get_seqno(engine);
ering->last_seqno = engine->last_submitted_seqno; ering->last_seqno = engine->last_submitted_seqno;
ering->start = I915_READ_START(engine); ering->start = I915_READ_START(engine);
ering->head = I915_READ_HEAD(engine); ering->head = I915_READ_HEAD(engine);
......
...@@ -2941,7 +2941,7 @@ static int semaphore_passed(struct intel_engine_cs *engine) ...@@ -2941,7 +2941,7 @@ static int semaphore_passed(struct intel_engine_cs *engine)
if (signaller->hangcheck.deadlock >= I915_NUM_ENGINES) if (signaller->hangcheck.deadlock >= I915_NUM_ENGINES)
return -1; return -1;
if (i915_seqno_passed(signaller->get_seqno(signaller, false), seqno)) if (i915_seqno_passed(signaller->get_seqno(signaller), seqno))
return 1; return 1;
/* cursory check for an unkickable deadlock */ /* cursory check for an unkickable deadlock */
...@@ -3100,8 +3100,18 @@ static void i915_hangcheck_elapsed(struct work_struct *work) ...@@ -3100,8 +3100,18 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
semaphore_clear_deadlocks(dev_priv); semaphore_clear_deadlocks(dev_priv);
seqno = engine->get_seqno(engine, false); /* We don't strictly need an irq-barrier here, as we are not
* serving an interrupt request, be paranoid in case the
* barrier has side-effects (such as preventing a broken
* cacheline snoop) and so be sure that we can see the seqno
* advance. If the seqno should stick, due to a stale
* cacheline, we would erroneously declare the GPU hung.
*/
if (engine->irq_seqno_barrier)
engine->irq_seqno_barrier(engine);
acthd = intel_ring_get_active_head(engine); acthd = intel_ring_get_active_head(engine);
seqno = engine->get_seqno(engine);
if (engine->hangcheck.seqno == seqno) { if (engine->hangcheck.seqno == seqno) {
if (ring_idle(engine, seqno)) { if (ring_idle(engine, seqno)) {
......
...@@ -562,7 +562,7 @@ TRACE_EVENT(i915_gem_request_notify, ...@@ -562,7 +562,7 @@ TRACE_EVENT(i915_gem_request_notify,
TP_fast_assign( TP_fast_assign(
__entry->dev = engine->dev->primary->index; __entry->dev = engine->dev->primary->index;
__entry->ring = engine->id; __entry->ring = engine->id;
__entry->seqno = engine->get_seqno(engine, false); __entry->seqno = engine->get_seqno(engine);
), ),
TP_printk("dev=%u, ring=%u, seqno=%u", TP_printk("dev=%u, ring=%u, seqno=%u",
......
...@@ -1852,7 +1852,7 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request, ...@@ -1852,7 +1852,7 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request,
return 0; return 0;
} }
static u32 gen8_get_seqno(struct intel_engine_cs *engine, bool lazy_coherency) static u32 gen8_get_seqno(struct intel_engine_cs *engine)
{ {
return intel_read_status_page(engine, I915_GEM_HWS_INDEX); return intel_read_status_page(engine, I915_GEM_HWS_INDEX);
} }
...@@ -1862,10 +1862,8 @@ static void gen8_set_seqno(struct intel_engine_cs *engine, u32 seqno) ...@@ -1862,10 +1862,8 @@ static void gen8_set_seqno(struct intel_engine_cs *engine, u32 seqno)
intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno); intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno);
} }
static u32 bxt_a_get_seqno(struct intel_engine_cs *engine, static void bxt_a_seqno_barrier(struct intel_engine_cs *engine)
bool lazy_coherency)
{ {
/* /*
* On BXT A steppings there is a HW coherency issue whereby the * On BXT A steppings there is a HW coherency issue whereby the
* MI_STORE_DATA_IMM storing the completed request's seqno * MI_STORE_DATA_IMM storing the completed request's seqno
...@@ -1876,11 +1874,7 @@ static u32 bxt_a_get_seqno(struct intel_engine_cs *engine, ...@@ -1876,11 +1874,7 @@ static u32 bxt_a_get_seqno(struct intel_engine_cs *engine,
* bxt_a_set_seqno(), where we also do a clflush after the write. So * bxt_a_set_seqno(), where we also do a clflush after the write. So
* this clflush in practice becomes an invalidate operation. * this clflush in practice becomes an invalidate operation.
*/ */
intel_flush_status_page(engine, I915_GEM_HWS_INDEX);
if (!lazy_coherency)
intel_flush_status_page(engine, I915_GEM_HWS_INDEX);
return intel_read_status_page(engine, I915_GEM_HWS_INDEX);
} }
static void bxt_a_set_seqno(struct intel_engine_cs *engine, u32 seqno) static void bxt_a_set_seqno(struct intel_engine_cs *engine, u32 seqno)
...@@ -2058,12 +2052,11 @@ logical_ring_default_vfuncs(struct drm_device *dev, ...@@ -2058,12 +2052,11 @@ logical_ring_default_vfuncs(struct drm_device *dev,
engine->irq_get = gen8_logical_ring_get_irq; engine->irq_get = gen8_logical_ring_get_irq;
engine->irq_put = gen8_logical_ring_put_irq; engine->irq_put = gen8_logical_ring_put_irq;
engine->emit_bb_start = gen8_emit_bb_start; engine->emit_bb_start = gen8_emit_bb_start;
engine->get_seqno = gen8_get_seqno;
engine->set_seqno = gen8_set_seqno;
if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) { if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
engine->get_seqno = bxt_a_get_seqno; engine->irq_seqno_barrier = bxt_a_seqno_barrier;
engine->set_seqno = bxt_a_set_seqno; engine->set_seqno = bxt_a_set_seqno;
} else {
engine->get_seqno = gen8_get_seqno;
engine->set_seqno = gen8_set_seqno;
} }
} }
......
...@@ -1568,8 +1568,8 @@ pc_render_add_request(struct drm_i915_gem_request *req) ...@@ -1568,8 +1568,8 @@ pc_render_add_request(struct drm_i915_gem_request *req)
return 0; return 0;
} }
static u32 static void
gen6_ring_get_seqno(struct intel_engine_cs *engine, bool lazy_coherency) gen6_seqno_barrier(struct intel_engine_cs *engine)
{ {
/* Workaround to force correct ordering between irq and seqno writes on /* Workaround to force correct ordering between irq and seqno writes on
* ivb (and maybe also on snb) by reading from a CS register (like * ivb (and maybe also on snb) by reading from a CS register (like
...@@ -1583,16 +1583,12 @@ gen6_ring_get_seqno(struct intel_engine_cs *engine, bool lazy_coherency) ...@@ -1583,16 +1583,12 @@ gen6_ring_get_seqno(struct intel_engine_cs *engine, bool lazy_coherency)
* batch i.e. much more frequent than a delay when waiting for the * batch i.e. much more frequent than a delay when waiting for the
* interrupt (with the same net latency). * interrupt (with the same net latency).
*/ */
if (!lazy_coherency) { struct drm_i915_private *dev_priv = engine->dev->dev_private;
struct drm_i915_private *dev_priv = engine->dev->dev_private; POSTING_READ_FW(RING_ACTHD(engine->mmio_base));
POSTING_READ_FW(RING_ACTHD(engine->mmio_base));
}
return intel_read_status_page(engine, I915_GEM_HWS_INDEX);
} }
static u32 static u32
ring_get_seqno(struct intel_engine_cs *engine, bool lazy_coherency) ring_get_seqno(struct intel_engine_cs *engine)
{ {
return intel_read_status_page(engine, I915_GEM_HWS_INDEX); return intel_read_status_page(engine, I915_GEM_HWS_INDEX);
} }
...@@ -1604,7 +1600,7 @@ ring_set_seqno(struct intel_engine_cs *engine, u32 seqno) ...@@ -1604,7 +1600,7 @@ ring_set_seqno(struct intel_engine_cs *engine, u32 seqno)
} }
static u32 static u32
pc_render_get_seqno(struct intel_engine_cs *engine, bool lazy_coherency) pc_render_get_seqno(struct intel_engine_cs *engine)
{ {
return engine->scratch.cpu_page[0]; return engine->scratch.cpu_page[0];
} }
...@@ -2828,7 +2824,8 @@ int intel_init_render_ring_buffer(struct drm_device *dev) ...@@ -2828,7 +2824,8 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
engine->irq_get = gen8_ring_get_irq; engine->irq_get = gen8_ring_get_irq;
engine->irq_put = gen8_ring_put_irq; engine->irq_put = gen8_ring_put_irq;
engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT; engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
engine->get_seqno = gen6_ring_get_seqno; engine->irq_seqno_barrier = gen6_seqno_barrier;
engine->get_seqno = ring_get_seqno;
engine->set_seqno = ring_set_seqno; engine->set_seqno = ring_set_seqno;
if (i915_semaphore_is_enabled(dev)) { if (i915_semaphore_is_enabled(dev)) {
WARN_ON(!dev_priv->semaphore_obj); WARN_ON(!dev_priv->semaphore_obj);
...@@ -2845,7 +2842,8 @@ int intel_init_render_ring_buffer(struct drm_device *dev) ...@@ -2845,7 +2842,8 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
engine->irq_get = gen6_ring_get_irq; engine->irq_get = gen6_ring_get_irq;
engine->irq_put = gen6_ring_put_irq; engine->irq_put = gen6_ring_put_irq;
engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT; engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
engine->get_seqno = gen6_ring_get_seqno; engine->irq_seqno_barrier = gen6_seqno_barrier;
engine->get_seqno = ring_get_seqno;
engine->set_seqno = ring_set_seqno; engine->set_seqno = ring_set_seqno;
if (i915_semaphore_is_enabled(dev)) { if (i915_semaphore_is_enabled(dev)) {
engine->semaphore.sync_to = gen6_ring_sync; engine->semaphore.sync_to = gen6_ring_sync;
...@@ -2960,7 +2958,8 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev) ...@@ -2960,7 +2958,8 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev)
engine->write_tail = gen6_bsd_ring_write_tail; engine->write_tail = gen6_bsd_ring_write_tail;
engine->flush = gen6_bsd_ring_flush; engine->flush = gen6_bsd_ring_flush;
engine->add_request = gen6_add_request; engine->add_request = gen6_add_request;
engine->get_seqno = gen6_ring_get_seqno; engine->irq_seqno_barrier = gen6_seqno_barrier;
engine->get_seqno = ring_get_seqno;
engine->set_seqno = ring_set_seqno; engine->set_seqno = ring_set_seqno;
if (INTEL_INFO(dev)->gen >= 8) { if (INTEL_INFO(dev)->gen >= 8) {
engine->irq_enable_mask = engine->irq_enable_mask =
...@@ -3033,7 +3032,8 @@ int intel_init_bsd2_ring_buffer(struct drm_device *dev) ...@@ -3033,7 +3032,8 @@ int intel_init_bsd2_ring_buffer(struct drm_device *dev)
engine->mmio_base = GEN8_BSD2_RING_BASE; engine->mmio_base = GEN8_BSD2_RING_BASE;
engine->flush = gen6_bsd_ring_flush; engine->flush = gen6_bsd_ring_flush;
engine->add_request = gen6_add_request; engine->add_request = gen6_add_request;
engine->get_seqno = gen6_ring_get_seqno; engine->irq_seqno_barrier = gen6_seqno_barrier;
engine->get_seqno = ring_get_seqno;
engine->set_seqno = ring_set_seqno; engine->set_seqno = ring_set_seqno;
engine->irq_enable_mask = engine->irq_enable_mask =
GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT; GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT;
...@@ -3064,7 +3064,8 @@ int intel_init_blt_ring_buffer(struct drm_device *dev) ...@@ -3064,7 +3064,8 @@ int intel_init_blt_ring_buffer(struct drm_device *dev)
engine->write_tail = ring_write_tail; engine->write_tail = ring_write_tail;
engine->flush = gen6_ring_flush; engine->flush = gen6_ring_flush;
engine->add_request = gen6_add_request; engine->add_request = gen6_add_request;
engine->get_seqno = gen6_ring_get_seqno; engine->irq_seqno_barrier = gen6_seqno_barrier;
engine->get_seqno = ring_get_seqno;
engine->set_seqno = ring_set_seqno; engine->set_seqno = ring_set_seqno;
if (INTEL_INFO(dev)->gen >= 8) { if (INTEL_INFO(dev)->gen >= 8) {
engine->irq_enable_mask = engine->irq_enable_mask =
...@@ -3122,7 +3123,8 @@ int intel_init_vebox_ring_buffer(struct drm_device *dev) ...@@ -3122,7 +3123,8 @@ int intel_init_vebox_ring_buffer(struct drm_device *dev)
engine->write_tail = ring_write_tail; engine->write_tail = ring_write_tail;
engine->flush = gen6_ring_flush; engine->flush = gen6_ring_flush;
engine->add_request = gen6_add_request; engine->add_request = gen6_add_request;
engine->get_seqno = gen6_ring_get_seqno; engine->irq_seqno_barrier = gen6_seqno_barrier;
engine->get_seqno = ring_get_seqno;
engine->set_seqno = ring_set_seqno; engine->set_seqno = ring_set_seqno;
if (INTEL_INFO(dev)->gen >= 8) { if (INTEL_INFO(dev)->gen >= 8) {
......
...@@ -193,8 +193,8 @@ struct intel_engine_cs { ...@@ -193,8 +193,8 @@ struct intel_engine_cs {
* seen value is good enough. Note that the seqno will always be * seen value is good enough. Note that the seqno will always be
* monotonic, even if not coherent. * monotonic, even if not coherent.
*/ */
u32 (*get_seqno)(struct intel_engine_cs *ring, void (*irq_seqno_barrier)(struct intel_engine_cs *ring);
bool lazy_coherency); u32 (*get_seqno)(struct intel_engine_cs *ring);
void (*set_seqno)(struct intel_engine_cs *ring, void (*set_seqno)(struct intel_engine_cs *ring,
u32 seqno); u32 seqno);
int (*dispatch_execbuffer)(struct drm_i915_gem_request *req, int (*dispatch_execbuffer)(struct drm_i915_gem_request *req,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment