Commit 77f0d0e9 authored by Chris Wilson's avatar Chris Wilson

drm/i915/execlists: Pack the count into the low bits of the port.request

add/remove: 1/1 grow/shrink: 5/4 up/down: 391/-578 (-187)
function                                     old     new   delta
execlists_submit_ports                       262     471    +209
port_assign.isra                               -     136    +136
capture                                     6344    6359     +15
reset_common_ring                            438     452     +14
execlists_submit_request                     228     238     +10
gen8_init_common_ring                        334     341      +7
intel_engine_is_idle                         106     105      -1
i915_engine_info                            2314    2290     -24
__i915_gem_set_wedged_BKL                    485     411     -74
intel_lrc_irq_handler                       1789    1604    -185
execlists_update_context                     294       -    -294

The most important change there is the improve to the
intel_lrc_irq_handler and excclist_submit_ports (net improvement since
execlists_update_context is now inlined).

v2: Use the port_api() for guc as well (even though currently we do not
pack any counters in there, yet) and hide all port->request_count inside
the helpers.
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: default avatarTvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170517121007.27224-5-chris@chris-wilson.co.uk
parent 0ce81788
...@@ -3353,6 +3353,7 @@ static int i915_engine_info(struct seq_file *m, void *unused) ...@@ -3353,6 +3353,7 @@ static int i915_engine_info(struct seq_file *m, void *unused)
if (i915.enable_execlists) { if (i915.enable_execlists) {
u32 ptr, read, write; u32 ptr, read, write;
struct rb_node *rb; struct rb_node *rb;
unsigned int idx;
seq_printf(m, "\tExeclist status: 0x%08x %08x\n", seq_printf(m, "\tExeclist status: 0x%08x %08x\n",
I915_READ(RING_EXECLIST_STATUS_LO(engine)), I915_READ(RING_EXECLIST_STATUS_LO(engine)),
...@@ -3370,8 +3371,7 @@ static int i915_engine_info(struct seq_file *m, void *unused) ...@@ -3370,8 +3371,7 @@ static int i915_engine_info(struct seq_file *m, void *unused)
if (read > write) if (read > write)
write += GEN8_CSB_ENTRIES; write += GEN8_CSB_ENTRIES;
while (read < write) { while (read < write) {
unsigned int idx = ++read % GEN8_CSB_ENTRIES; idx = ++read % GEN8_CSB_ENTRIES;
seq_printf(m, "\tExeclist CSB[%d]: 0x%08x, context: %d\n", seq_printf(m, "\tExeclist CSB[%d]: 0x%08x, context: %d\n",
idx, idx,
I915_READ(RING_CONTEXT_STATUS_BUF_LO(engine, idx)), I915_READ(RING_CONTEXT_STATUS_BUF_LO(engine, idx)),
...@@ -3379,21 +3379,19 @@ static int i915_engine_info(struct seq_file *m, void *unused) ...@@ -3379,21 +3379,19 @@ static int i915_engine_info(struct seq_file *m, void *unused)
} }
rcu_read_lock(); rcu_read_lock();
rq = READ_ONCE(engine->execlist_port[0].request); for (idx = 0; idx < ARRAY_SIZE(engine->execlist_port); idx++) {
unsigned int count;
rq = port_unpack(&engine->execlist_port[idx],
&count);
if (rq) { if (rq) {
seq_printf(m, "\t\tELSP[0] count=%d, ", seq_printf(m, "\t\tELSP[%d] count=%d, ",
engine->execlist_port[0].count); idx, count);
print_request(m, rq, "rq: "); print_request(m, rq, "rq: ");
} else { } else {
seq_printf(m, "\t\tELSP[0] idle\n"); seq_printf(m, "\t\tELSP[%d] idle\n",
idx);
} }
rq = READ_ONCE(engine->execlist_port[1].request);
if (rq) {
seq_printf(m, "\t\tELSP[1] count=%d, ",
engine->execlist_port[1].count);
print_request(m, rq, "rq: ");
} else {
seq_printf(m, "\t\tELSP[1] idle\n");
} }
rcu_read_unlock(); rcu_read_unlock();
......
...@@ -3019,12 +3019,14 @@ static void engine_set_wedged(struct intel_engine_cs *engine) ...@@ -3019,12 +3019,14 @@ static void engine_set_wedged(struct intel_engine_cs *engine)
*/ */
if (i915.enable_execlists) { if (i915.enable_execlists) {
struct execlist_port *port = engine->execlist_port;
unsigned long flags; unsigned long flags;
unsigned int n;
spin_lock_irqsave(&engine->timeline->lock, flags); spin_lock_irqsave(&engine->timeline->lock, flags);
i915_gem_request_put(engine->execlist_port[0].request); for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++)
i915_gem_request_put(engine->execlist_port[1].request); i915_gem_request_put(port_request(&port[n]));
memset(engine->execlist_port, 0, sizeof(engine->execlist_port)); memset(engine->execlist_port, 0, sizeof(engine->execlist_port));
engine->execlist_queue = RB_ROOT; engine->execlist_queue = RB_ROOT;
engine->execlist_first = NULL; engine->execlist_first = NULL;
......
...@@ -1324,12 +1324,17 @@ static void engine_record_requests(struct intel_engine_cs *engine, ...@@ -1324,12 +1324,17 @@ static void engine_record_requests(struct intel_engine_cs *engine,
static void error_record_engine_execlists(struct intel_engine_cs *engine, static void error_record_engine_execlists(struct intel_engine_cs *engine,
struct drm_i915_error_engine *ee) struct drm_i915_error_engine *ee)
{ {
const struct execlist_port *port = engine->execlist_port;
unsigned int n; unsigned int n;
for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) {
if (engine->execlist_port[n].request) struct drm_i915_gem_request *rq = port_request(&port[n]);
record_request(engine->execlist_port[n].request,
&ee->execlist[n]); if (!rq)
break;
record_request(rq, &ee->execlist[n]);
}
} }
static void record_context(struct drm_i915_error_context *e, static void record_context(struct drm_i915_error_context *e,
......
...@@ -653,10 +653,22 @@ static void nested_enable_signaling(struct drm_i915_gem_request *rq) ...@@ -653,10 +653,22 @@ static void nested_enable_signaling(struct drm_i915_gem_request *rq)
spin_unlock(&rq->lock); spin_unlock(&rq->lock);
} }
static void port_assign(struct execlist_port *port,
struct drm_i915_gem_request *rq)
{
GEM_BUG_ON(rq == port_request(port));
if (port_isset(port))
i915_gem_request_put(port_request(port));
port_set(port, i915_gem_request_get(rq));
nested_enable_signaling(rq);
}
static bool i915_guc_dequeue(struct intel_engine_cs *engine) static bool i915_guc_dequeue(struct intel_engine_cs *engine)
{ {
struct execlist_port *port = engine->execlist_port; struct execlist_port *port = engine->execlist_port;
struct drm_i915_gem_request *last = port[0].request; struct drm_i915_gem_request *last = port_request(port);
struct rb_node *rb; struct rb_node *rb;
bool submit = false; bool submit = false;
...@@ -670,8 +682,7 @@ static bool i915_guc_dequeue(struct intel_engine_cs *engine) ...@@ -670,8 +682,7 @@ static bool i915_guc_dequeue(struct intel_engine_cs *engine)
if (port != engine->execlist_port) if (port != engine->execlist_port)
break; break;
i915_gem_request_assign(&port->request, last); port_assign(port, last);
nested_enable_signaling(last);
port++; port++;
} }
...@@ -681,13 +692,12 @@ static bool i915_guc_dequeue(struct intel_engine_cs *engine) ...@@ -681,13 +692,12 @@ static bool i915_guc_dequeue(struct intel_engine_cs *engine)
rq->priotree.priority = INT_MAX; rq->priotree.priority = INT_MAX;
i915_guc_submit(rq); i915_guc_submit(rq);
trace_i915_gem_request_in(rq, port - engine->execlist_port); trace_i915_gem_request_in(rq, port_index(port, engine));
last = rq; last = rq;
submit = true; submit = true;
} }
if (submit) { if (submit) {
i915_gem_request_assign(&port->request, last); port_assign(port, last);
nested_enable_signaling(last);
engine->execlist_first = rb; engine->execlist_first = rb;
} }
spin_unlock_irq(&engine->timeline->lock); spin_unlock_irq(&engine->timeline->lock);
...@@ -703,17 +713,19 @@ static void i915_guc_irq_handler(unsigned long data) ...@@ -703,17 +713,19 @@ static void i915_guc_irq_handler(unsigned long data)
bool submit; bool submit;
do { do {
rq = port[0].request; rq = port_request(&port[0]);
while (rq && i915_gem_request_completed(rq)) { while (rq && i915_gem_request_completed(rq)) {
trace_i915_gem_request_out(rq); trace_i915_gem_request_out(rq);
i915_gem_request_put(rq); i915_gem_request_put(rq);
port[0].request = port[1].request;
port[1].request = NULL; port[0] = port[1];
rq = port[0].request; memset(&port[1], 0, sizeof(port[1]));
rq = port_request(&port[0]);
} }
submit = false; submit = false;
if (!port[1].request) if (!port_count(&port[1]))
submit = i915_guc_dequeue(engine); submit = i915_guc_dequeue(engine);
} while (submit); } while (submit);
} }
......
...@@ -1233,7 +1233,7 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine) ...@@ -1233,7 +1233,7 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine)
return false; return false;
/* Both ports drained, no more ELSP submission? */ /* Both ports drained, no more ELSP submission? */
if (engine->execlist_port[0].request) if (port_request(&engine->execlist_port[0]))
return false; return false;
/* Ring stopped? */ /* Ring stopped? */
......
...@@ -337,39 +337,32 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq) ...@@ -337,39 +337,32 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq)
static void execlists_submit_ports(struct intel_engine_cs *engine) static void execlists_submit_ports(struct intel_engine_cs *engine)
{ {
struct drm_i915_private *dev_priv = engine->i915;
struct execlist_port *port = engine->execlist_port; struct execlist_port *port = engine->execlist_port;
u32 __iomem *elsp = u32 __iomem *elsp =
dev_priv->regs + i915_mmio_reg_offset(RING_ELSP(engine)); engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine));
u64 desc[2]; unsigned int n;
GEM_BUG_ON(port[0].count > 1); for (n = ARRAY_SIZE(engine->execlist_port); n--; ) {
if (!port[0].count) struct drm_i915_gem_request *rq;
execlists_context_status_change(port[0].request, unsigned int count;
INTEL_CONTEXT_SCHEDULE_IN); u64 desc;
desc[0] = execlists_update_context(port[0].request);
GEM_DEBUG_EXEC(port[0].context_id = upper_32_bits(desc[0])); rq = port_unpack(&port[n], &count);
port[0].count++; if (rq) {
GEM_BUG_ON(count > !n);
if (port[1].request) { if (!count++)
GEM_BUG_ON(port[1].count); execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
execlists_context_status_change(port[1].request, port_set(&port[n], port_pack(rq, count));
INTEL_CONTEXT_SCHEDULE_IN); desc = execlists_update_context(rq);
desc[1] = execlists_update_context(port[1].request); GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
GEM_DEBUG_EXEC(port[1].context_id = upper_32_bits(desc[1]));
port[1].count = 1;
} else { } else {
desc[1] = 0; GEM_BUG_ON(!n);
desc = 0;
} }
GEM_BUG_ON(desc[0] == desc[1]);
/* You must always write both descriptors in the order below. */ writel(upper_32_bits(desc), elsp);
writel(upper_32_bits(desc[1]), elsp); writel(lower_32_bits(desc), elsp);
writel(lower_32_bits(desc[1]), elsp); }
writel(upper_32_bits(desc[0]), elsp);
/* The context is automatically loaded after the following */
writel(lower_32_bits(desc[0]), elsp);
} }
static bool ctx_single_port_submission(const struct i915_gem_context *ctx) static bool ctx_single_port_submission(const struct i915_gem_context *ctx)
...@@ -390,6 +383,17 @@ static bool can_merge_ctx(const struct i915_gem_context *prev, ...@@ -390,6 +383,17 @@ static bool can_merge_ctx(const struct i915_gem_context *prev,
return true; return true;
} }
static void port_assign(struct execlist_port *port,
struct drm_i915_gem_request *rq)
{
GEM_BUG_ON(rq == port_request(port));
if (port_isset(port))
i915_gem_request_put(port_request(port));
port_set(port, port_pack(i915_gem_request_get(rq), port_count(port)));
}
static void execlists_dequeue(struct intel_engine_cs *engine) static void execlists_dequeue(struct intel_engine_cs *engine)
{ {
struct drm_i915_gem_request *last; struct drm_i915_gem_request *last;
...@@ -397,7 +401,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) ...@@ -397,7 +401,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
struct rb_node *rb; struct rb_node *rb;
bool submit = false; bool submit = false;
last = port->request; last = port_request(port);
if (last) if (last)
/* WaIdleLiteRestore:bdw,skl /* WaIdleLiteRestore:bdw,skl
* Apply the wa NOOPs to prevent ring:HEAD == req:TAIL * Apply the wa NOOPs to prevent ring:HEAD == req:TAIL
...@@ -407,7 +411,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) ...@@ -407,7 +411,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
*/ */
last->tail = last->wa_tail; last->tail = last->wa_tail;
GEM_BUG_ON(port[1].request); GEM_BUG_ON(port_isset(&port[1]));
/* Hardware submission is through 2 ports. Conceptually each port /* Hardware submission is through 2 ports. Conceptually each port
* has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
...@@ -464,7 +468,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine) ...@@ -464,7 +468,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
GEM_BUG_ON(last->ctx == cursor->ctx); GEM_BUG_ON(last->ctx == cursor->ctx);
i915_gem_request_assign(&port->request, last); if (submit)
port_assign(port, last);
port++; port++;
} }
...@@ -474,12 +479,12 @@ static void execlists_dequeue(struct intel_engine_cs *engine) ...@@ -474,12 +479,12 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
cursor->priotree.priority = INT_MAX; cursor->priotree.priority = INT_MAX;
__i915_gem_request_submit(cursor); __i915_gem_request_submit(cursor);
trace_i915_gem_request_in(cursor, port - engine->execlist_port); trace_i915_gem_request_in(cursor, port_index(port, engine));
last = cursor; last = cursor;
submit = true; submit = true;
} }
if (submit) { if (submit) {
i915_gem_request_assign(&port->request, last); port_assign(port, last);
engine->execlist_first = rb; engine->execlist_first = rb;
} }
spin_unlock_irq(&engine->timeline->lock); spin_unlock_irq(&engine->timeline->lock);
...@@ -488,16 +493,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine) ...@@ -488,16 +493,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
execlists_submit_ports(engine); execlists_submit_ports(engine);
} }
static bool execlists_elsp_idle(struct intel_engine_cs *engine)
{
return !engine->execlist_port[0].request;
}
static bool execlists_elsp_ready(const struct intel_engine_cs *engine) static bool execlists_elsp_ready(const struct intel_engine_cs *engine)
{ {
const struct execlist_port *port = engine->execlist_port; const struct execlist_port *port = engine->execlist_port;
return port[0].count + port[1].count < 2; return port_count(&port[0]) + port_count(&port[1]) < 2;
} }
/* /*
...@@ -547,7 +547,9 @@ static void intel_lrc_irq_handler(unsigned long data) ...@@ -547,7 +547,9 @@ static void intel_lrc_irq_handler(unsigned long data)
tail = GEN8_CSB_WRITE_PTR(head); tail = GEN8_CSB_WRITE_PTR(head);
head = GEN8_CSB_READ_PTR(head); head = GEN8_CSB_READ_PTR(head);
while (head != tail) { while (head != tail) {
struct drm_i915_gem_request *rq;
unsigned int status; unsigned int status;
unsigned int count;
if (++head == GEN8_CSB_ENTRIES) if (++head == GEN8_CSB_ENTRIES)
head = 0; head = 0;
...@@ -575,22 +577,26 @@ static void intel_lrc_irq_handler(unsigned long data) ...@@ -575,22 +577,26 @@ static void intel_lrc_irq_handler(unsigned long data)
/* Check the context/desc id for this event matches */ /* Check the context/desc id for this event matches */
GEM_DEBUG_BUG_ON(readl(buf + 2 * head + 1) != GEM_DEBUG_BUG_ON(readl(buf + 2 * head + 1) !=
port[0].context_id); port->context_id);
GEM_BUG_ON(port[0].count == 0); rq = port_unpack(port, &count);
if (--port[0].count == 0) { GEM_BUG_ON(count == 0);
if (--count == 0) {
GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED); GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
GEM_BUG_ON(!i915_gem_request_completed(port[0].request)); GEM_BUG_ON(!i915_gem_request_completed(rq));
execlists_context_status_change(port[0].request, execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
INTEL_CONTEXT_SCHEDULE_OUT);
trace_i915_gem_request_out(rq);
i915_gem_request_put(rq);
trace_i915_gem_request_out(port[0].request);
i915_gem_request_put(port[0].request);
port[0] = port[1]; port[0] = port[1];
memset(&port[1], 0, sizeof(port[1])); memset(&port[1], 0, sizeof(port[1]));
} else {
port_set(port, port_pack(rq, count));
} }
GEM_BUG_ON(port[0].count == 0 && /* After the final element, the hw should be idle */
GEM_BUG_ON(port_count(port) == 0 &&
!(status & GEN8_CTX_STATUS_ACTIVE_IDLE)); !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
} }
...@@ -1147,6 +1153,7 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine) ...@@ -1147,6 +1153,7 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
struct drm_i915_private *dev_priv = engine->i915; struct drm_i915_private *dev_priv = engine->i915;
struct execlist_port *port = engine->execlist_port; struct execlist_port *port = engine->execlist_port;
unsigned int n; unsigned int n;
bool submit;
int ret; int ret;
ret = intel_mocs_init_engine(engine); ret = intel_mocs_init_engine(engine);
...@@ -1168,19 +1175,21 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine) ...@@ -1168,19 +1175,21 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
/* After a GPU reset, we may have requests to replay */ /* After a GPU reset, we may have requests to replay */
clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
submit = false;
for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) { for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) {
if (!port[n].request) if (!port_isset(&port[n]))
break; break;
DRM_DEBUG_DRIVER("Restarting %s:%d from 0x%x\n", DRM_DEBUG_DRIVER("Restarting %s:%d from 0x%x\n",
engine->name, n, engine->name, n,
port[n].request->global_seqno); port_request(&port[n])->global_seqno);
/* Discard the current inflight count */ /* Discard the current inflight count */
port[n].count = 0; port_set(&port[n], port_request(&port[n]));
submit = true;
} }
if (!i915.enable_guc_submission && !execlists_elsp_idle(engine)) if (submit && !i915.enable_guc_submission)
execlists_submit_ports(engine); execlists_submit_ports(engine);
return 0; return 0;
...@@ -1258,13 +1267,13 @@ static void reset_common_ring(struct intel_engine_cs *engine, ...@@ -1258,13 +1267,13 @@ static void reset_common_ring(struct intel_engine_cs *engine,
intel_ring_update_space(request->ring); intel_ring_update_space(request->ring);
/* Catch up with any missed context-switch interrupts */ /* Catch up with any missed context-switch interrupts */
if (request->ctx != port[0].request->ctx) { if (request->ctx != port_request(port)->ctx) {
i915_gem_request_put(port[0].request); i915_gem_request_put(port_request(port));
port[0] = port[1]; port[0] = port[1];
memset(&port[1], 0, sizeof(port[1])); memset(&port[1], 0, sizeof(port[1]));
} }
GEM_BUG_ON(request->ctx != port[0].request->ctx); GEM_BUG_ON(request->ctx != port_request(port)->ctx);
/* Reset WaIdleLiteRestore:bdw,skl as well */ /* Reset WaIdleLiteRestore:bdw,skl as well */
request->tail = request->tail =
......
...@@ -368,8 +368,15 @@ struct intel_engine_cs { ...@@ -368,8 +368,15 @@ struct intel_engine_cs {
/* Execlists */ /* Execlists */
struct tasklet_struct irq_tasklet; struct tasklet_struct irq_tasklet;
struct execlist_port { struct execlist_port {
struct drm_i915_gem_request *request; struct drm_i915_gem_request *request_count;
unsigned int count; #define EXECLIST_COUNT_BITS 2
#define port_request(p) ptr_mask_bits((p)->request_count, EXECLIST_COUNT_BITS)
#define port_count(p) ptr_unmask_bits((p)->request_count, EXECLIST_COUNT_BITS)
#define port_pack(rq, count) ptr_pack_bits(rq, count, EXECLIST_COUNT_BITS)
#define port_unpack(p, count) ptr_unpack_bits((p)->request_count, count, EXECLIST_COUNT_BITS)
#define port_set(p, packed) ((p)->request_count = (packed))
#define port_isset(p) ((p)->request_count)
#define port_index(p, e) ((p) - (e)->execlist_port)
GEM_DEBUG_DECL(u32 context_id); GEM_DEBUG_DECL(u32 context_id);
} execlist_port[2]; } execlist_port[2];
struct rb_root execlist_queue; struct rb_root execlist_queue;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment