Commit bc4237ec authored by Chris Wilson's avatar Chris Wilson

drm/i915/execlists: Unify CSB access pointers

Following the removal of the last workarounds, the only CSB mmio access
is for the old vGPU interface. The mmio registers presented by vGPU do
not require forcewake and can be treated as ordinary volatile memory,
i.e. they behave just like the HWSP access just at a different location.
We can reduce the CSB access to a set of read/write/buffer pointers and
treat the various paths identically and not worry about forcewake.
(Forcewake is nightmare for worstcase latency, and we want to process
this all with irqsoff -- no latency allowed!)

v2: Comments, comments, comments. Well, 2 bonus comments.
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: default avatarTvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180628201211.13837-5-chris@chris-wilson.co.uk
parent 8ea397fa
...@@ -25,7 +25,6 @@ ...@@ -25,7 +25,6 @@
#include <drm/drm_print.h> #include <drm/drm_print.h>
#include "i915_drv.h" #include "i915_drv.h"
#include "i915_vgpu.h"
#include "intel_ringbuffer.h" #include "intel_ringbuffer.h"
#include "intel_lrc.h" #include "intel_lrc.h"
...@@ -456,21 +455,10 @@ static void intel_engine_init_batch_pool(struct intel_engine_cs *engine) ...@@ -456,21 +455,10 @@ static void intel_engine_init_batch_pool(struct intel_engine_cs *engine)
i915_gem_batch_pool_init(&engine->batch_pool, engine); i915_gem_batch_pool_init(&engine->batch_pool, engine);
} }
static bool csb_force_mmio(struct drm_i915_private *i915)
{
/* Older GVT emulation depends upon intercepting CSB mmio */
if (intel_vgpu_active(i915) && !intel_vgpu_has_hwsp_emulation(i915))
return true;
return false;
}
static void intel_engine_init_execlist(struct intel_engine_cs *engine) static void intel_engine_init_execlist(struct intel_engine_cs *engine)
{ {
struct intel_engine_execlists * const execlists = &engine->execlists; struct intel_engine_execlists * const execlists = &engine->execlists;
execlists->csb_use_mmio = csb_force_mmio(engine->i915);
execlists->port_mask = 1; execlists->port_mask = 1;
BUILD_BUG_ON_NOT_POWER_OF_2(execlists_num_ports(execlists)); BUILD_BUG_ON_NOT_POWER_OF_2(execlists_num_ports(execlists));
GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS); GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);
......
...@@ -137,6 +137,7 @@ ...@@ -137,6 +137,7 @@
#include <drm/i915_drm.h> #include <drm/i915_drm.h>
#include "i915_drv.h" #include "i915_drv.h"
#include "i915_gem_render_state.h" #include "i915_gem_render_state.h"
#include "i915_vgpu.h"
#include "intel_lrc_reg.h" #include "intel_lrc_reg.h"
#include "intel_mocs.h" #include "intel_mocs.h"
#include "intel_workarounds.h" #include "intel_workarounds.h"
...@@ -954,44 +955,40 @@ static void process_csb(struct intel_engine_cs *engine) ...@@ -954,44 +955,40 @@ static void process_csb(struct intel_engine_cs *engine)
{ {
struct intel_engine_execlists * const execlists = &engine->execlists; struct intel_engine_execlists * const execlists = &engine->execlists;
struct execlist_port *port = execlists->port; struct execlist_port *port = execlists->port;
struct drm_i915_private *i915 = engine->i915; const u32 * const buf = execlists->csb_status;
u8 head, tail;
/* The HWSP contains a (cacheable) mirror of the CSB */
const u32 *buf =
&engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
unsigned int head, tail;
bool fw = false;
/* Clear before reading to catch new interrupts */ /* Clear before reading to catch new interrupts */
clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
smp_mb__after_atomic(); smp_mb__after_atomic();
if (unlikely(execlists->csb_use_mmio)) { /*
intel_uncore_forcewake_get(i915, execlists->fw_domains); * Note that csb_write, csb_status may be either in HWSP or mmio.
fw = true; * When reading from the csb_write mmio register, we have to be
* careful to only use the GEN8_CSB_WRITE_PTR portion, which is
buf = (u32 * __force) * the low 4bits. As it happens we know the next 4bits are always
(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0))); * zero and so we can simply masked off the low u8 of the register
* and treat it identically to reading from the HWSP (without having
head = readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine))); * to use explicit shifting and masking, and probably bifurcating
tail = GEN8_CSB_WRITE_PTR(head); * the code to handle the legacy mmio read).
head = GEN8_CSB_READ_PTR(head); */
execlists->csb_head = head; head = execlists->csb_head;
} else { tail = READ_ONCE(*execlists->csb_write);
const int write_idx = GEM_TRACE("%s cs-irq head=%d, tail=%d\n", engine->name, head, tail);
intel_hws_csb_write_index(i915) - if (unlikely(head == tail))
I915_HWS_CSB_BUF0_INDEX; return;
head = execlists->csb_head; /*
tail = READ_ONCE(buf[write_idx]); * Hopefully paired with a wmb() in HW!
rmb(); /* Hopefully paired with a wmb() in HW */ *
} * We must complete the read of the write pointer before any reads
GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n", * from the CSB, so that we do not see stale values. Without an rmb
engine->name, * (lfence) the HW may speculatively perform the CSB[] reads *before*
head, GEN8_CSB_READ_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?", * we perform the READ_ONCE(*csb_write).
tail, GEN8_CSB_WRITE_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?"); */
rmb();
while (head != tail) { do {
struct i915_request *rq; struct i915_request *rq;
unsigned int status; unsigned int status;
unsigned int count; unsigned int count;
...@@ -1017,12 +1014,12 @@ static void process_csb(struct intel_engine_cs *engine) ...@@ -1017,12 +1014,12 @@ static void process_csb(struct intel_engine_cs *engine)
* status notifier. * status notifier.
*/ */
status = READ_ONCE(buf[2 * head]); /* maybe mmio! */
GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n", GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
engine->name, head, engine->name, head,
status, buf[2*head + 1], buf[2 * head + 0], buf[2 * head + 1],
execlists->active); execlists->active);
status = buf[2 * head];
if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE | if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
GEN8_CTX_STATUS_PREEMPTED)) GEN8_CTX_STATUS_PREEMPTED))
execlists_set_active(execlists, execlists_set_active(execlists,
...@@ -1104,16 +1101,11 @@ static void process_csb(struct intel_engine_cs *engine) ...@@ -1104,16 +1101,11 @@ static void process_csb(struct intel_engine_cs *engine)
} else { } else {
port_set(port, port_pack(rq, count)); port_set(port, port_pack(rq, count));
} }
} } while (head != tail);
if (head != execlists->csb_head) {
execlists->csb_head = head;
writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
}
if (unlikely(fw)) writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
intel_uncore_forcewake_put(i915, execlists->fw_domains); execlists->csb_read);
execlists->csb_head = head;
} }
/* /*
...@@ -2431,28 +2423,11 @@ logical_ring_default_irqs(struct intel_engine_cs *engine) ...@@ -2431,28 +2423,11 @@ logical_ring_default_irqs(struct intel_engine_cs *engine)
static void static void
logical_ring_setup(struct intel_engine_cs *engine) logical_ring_setup(struct intel_engine_cs *engine)
{ {
struct drm_i915_private *dev_priv = engine->i915;
enum forcewake_domains fw_domains;
intel_engine_setup_common(engine); intel_engine_setup_common(engine);
/* Intentionally left blank. */ /* Intentionally left blank. */
engine->buffer = NULL; engine->buffer = NULL;
fw_domains = intel_uncore_forcewake_for_reg(dev_priv,
RING_ELSP(engine),
FW_REG_WRITE);
fw_domains |= intel_uncore_forcewake_for_reg(dev_priv,
RING_CONTEXT_STATUS_PTR(engine),
FW_REG_READ | FW_REG_WRITE);
fw_domains |= intel_uncore_forcewake_for_reg(dev_priv,
RING_CONTEXT_STATUS_BUF_BASE(engine),
FW_REG_READ);
engine->execlists.fw_domains = fw_domains;
tasklet_init(&engine->execlists.tasklet, tasklet_init(&engine->execlists.tasklet,
execlists_submission_tasklet, (unsigned long)engine); execlists_submission_tasklet, (unsigned long)engine);
...@@ -2460,34 +2435,56 @@ logical_ring_setup(struct intel_engine_cs *engine) ...@@ -2460,34 +2435,56 @@ logical_ring_setup(struct intel_engine_cs *engine)
logical_ring_default_irqs(engine); logical_ring_default_irqs(engine);
} }
static bool csb_force_mmio(struct drm_i915_private *i915)
{
/* Older GVT emulation depends upon intercepting CSB mmio */
return intel_vgpu_active(i915) && !intel_vgpu_has_hwsp_emulation(i915);
}
static int logical_ring_init(struct intel_engine_cs *engine) static int logical_ring_init(struct intel_engine_cs *engine)
{ {
struct drm_i915_private *i915 = engine->i915;
struct intel_engine_execlists * const execlists = &engine->execlists;
int ret; int ret;
ret = intel_engine_init_common(engine); ret = intel_engine_init_common(engine);
if (ret) if (ret)
goto error; goto error;
if (HAS_LOGICAL_RING_ELSQ(engine->i915)) { if (HAS_LOGICAL_RING_ELSQ(i915)) {
engine->execlists.submit_reg = engine->i915->regs + execlists->submit_reg = i915->regs +
i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(engine)); i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(engine));
engine->execlists.ctrl_reg = engine->i915->regs + execlists->ctrl_reg = i915->regs +
i915_mmio_reg_offset(RING_EXECLIST_CONTROL(engine)); i915_mmio_reg_offset(RING_EXECLIST_CONTROL(engine));
} else { } else {
engine->execlists.submit_reg = engine->i915->regs + execlists->submit_reg = i915->regs +
i915_mmio_reg_offset(RING_ELSP(engine)); i915_mmio_reg_offset(RING_ELSP(engine));
} }
engine->execlists.preempt_complete_status = ~0u; execlists->preempt_complete_status = ~0u;
if (engine->i915->preempt_context) { if (i915->preempt_context) {
struct intel_context *ce = struct intel_context *ce =
to_intel_context(engine->i915->preempt_context, engine); to_intel_context(i915->preempt_context, engine);
engine->execlists.preempt_complete_status = execlists->preempt_complete_status =
upper_32_bits(ce->lrc_desc); upper_32_bits(ce->lrc_desc);
} }
engine->execlists.csb_head = GEN8_CSB_ENTRIES - 1; execlists->csb_head = GEN8_CSB_ENTRIES - 1;
execlists->csb_read =
i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine));
if (csb_force_mmio(i915)) {
execlists->csb_status = (u32 __force *)
(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
execlists->csb_write = (u32 __force *)execlists->csb_read;
} else {
execlists->csb_status =
&engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
execlists->csb_write =
&engine->status_page.page_addr[intel_hws_csb_write_index(i915)];
}
return 0; return 0;
......
...@@ -300,24 +300,35 @@ struct intel_engine_execlists { ...@@ -300,24 +300,35 @@ struct intel_engine_execlists {
struct rb_node *first; struct rb_node *first;
/** /**
* @fw_domains: forcewake domains for irq tasklet * @csb_read: control register for Context Switch buffer
*
* Note this register is always in mmio.
*/ */
unsigned int fw_domains; u32 __iomem *csb_read;
/** /**
* @csb_head: context status buffer head * @csb_write: control register for Context Switch buffer
*
* Note this register may be either mmio or HWSP shadow.
*/ */
unsigned int csb_head; u32 *csb_write;
/** /**
* @csb_use_mmio: access csb through mmio, instead of hwsp * @csb_status: status array for Context Switch buffer
*
* Note these register may be either mmio or HWSP shadow.
*/ */
bool csb_use_mmio; u32 *csb_status;
/** /**
* @preempt_complete_status: expected CSB upon completing preemption * @preempt_complete_status: expected CSB upon completing preemption
*/ */
u32 preempt_complete_status; u32 preempt_complete_status;
/**
* @csb_head: context status buffer head
*/
u8 csb_head;
}; };
#define INTEL_ENGINE_CS_MAX_NAME 8 #define INTEL_ENGINE_CS_MAX_NAME 8
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment