Commit 73dec95e authored by Tvrtko Ursulin's avatar Tvrtko Ursulin

drm/i915: Emit to ringbuffer directly

This removes the usage of intel_ring_emit in favour of
directly writing to the ring buffer.

intel_ring_emit was preventing the compiler for optimising
fetch and increment of the current ring buffer pointer and
therefore generating very verbose code for every write.

It had no useful purpose since all ringbuffer operations
are started and ended with intel_ring_begin and
intel_ring_advance respectively, with no bail out in the
middle possible, so it is fine to increment the tail in
intel_ring_begin and let the code manage the pointer
itself.

Useless instruction removal amounts to approximately
two and half kilobytes of saved text on my build.

Not sure if this has any measurable performance
implications but executing a ton of useless instructions
on fast paths cannot be good.

v2:
 * Change return from intel_ring_begin to error pointer by
   popular demand.
 * Move tail increment to intel_ring_advance to enable some
   error checking.

v3:
 * Move tail advance back into intel_ring_begin.
 * Rebase and tidy.

v4:
 * Complete rebase after a few months since v3.

v5:
 * Remove unecessary cast and fix !debug compile. (Chris Wilson)

v6:
 * Make intel_ring_offset take request as well.
 * Fix recording of request postfix plus a sprinkle of asserts.
   (Chris Wilson)

v7:
 * Use intel_ring_offset to get the postfix. (Chris Wilson)
 * Convert GVT code as well.

v8:
 * Rename *out++ to *cs++.

v9:
 * Fix GVT out to cs conversion in GVT.

v10:
 * Rebase for new intel_ring_begin in selftests.
Signed-off-by: default avatarTvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Zhi Wang <zhi.a.wang@intel.com>
Reviewed-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Acked-by: default avatarJoonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170214113242.29241-1-tvrtko.ursulin@linux.intel.com
parent d2d15016
...@@ -1513,7 +1513,7 @@ static int copy_gma_to_hva(struct intel_vgpu *vgpu, struct intel_vgpu_mm *mm, ...@@ -1513,7 +1513,7 @@ static int copy_gma_to_hva(struct intel_vgpu *vgpu, struct intel_vgpu_mm *mm,
len += copy_len; len += copy_len;
gma += copy_len; gma += copy_len;
} }
return 0; return len;
} }
...@@ -1630,7 +1630,7 @@ static int perform_bb_shadow(struct parser_exec_state *s) ...@@ -1630,7 +1630,7 @@ static int perform_bb_shadow(struct parser_exec_state *s)
ret = copy_gma_to_hva(s->vgpu, s->vgpu->gtt.ggtt_mm, ret = copy_gma_to_hva(s->vgpu, s->vgpu->gtt.ggtt_mm,
gma, gma + bb_size, gma, gma + bb_size,
dst); dst);
if (ret) { if (ret < 0) {
gvt_err("fail to copy guest ring buffer\n"); gvt_err("fail to copy guest ring buffer\n");
goto unmap_src; goto unmap_src;
} }
...@@ -2594,11 +2594,8 @@ static int scan_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) ...@@ -2594,11 +2594,8 @@ static int scan_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload) static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload)
{ {
struct intel_vgpu *vgpu = workload->vgpu; struct intel_vgpu *vgpu = workload->vgpu;
int ring_id = workload->ring_id;
struct i915_gem_context *shadow_ctx = vgpu->shadow_ctx;
struct intel_ring *ring = shadow_ctx->engine[ring_id].ring;
unsigned long gma_head, gma_tail, gma_top, guest_rb_size; unsigned long gma_head, gma_tail, gma_top, guest_rb_size;
unsigned int copy_len = 0; u32 *cs;
int ret; int ret;
guest_rb_size = _RING_CTL_BUF_SIZE(workload->rb_ctl); guest_rb_size = _RING_CTL_BUF_SIZE(workload->rb_ctl);
...@@ -2612,36 +2609,33 @@ static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload) ...@@ -2612,36 +2609,33 @@ static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload)
gma_top = workload->rb_start + guest_rb_size; gma_top = workload->rb_start + guest_rb_size;
/* allocate shadow ring buffer */ /* allocate shadow ring buffer */
ret = intel_ring_begin(workload->req, workload->rb_len / 4); cs = intel_ring_begin(workload->req, workload->rb_len / sizeof(u32));
if (ret) if (IS_ERR(cs))
return ret; return PTR_ERR(cs);
/* get shadow ring buffer va */ /* get shadow ring buffer va */
workload->shadow_ring_buffer_va = ring->vaddr + ring->tail; workload->shadow_ring_buffer_va = cs;
/* head > tail --> copy head <-> top */ /* head > tail --> copy head <-> top */
if (gma_head > gma_tail) { if (gma_head > gma_tail) {
ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm, ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm,
gma_head, gma_top, gma_head, gma_top, cs);
workload->shadow_ring_buffer_va); if (ret < 0) {
if (ret) {
gvt_err("fail to copy guest ring buffer\n"); gvt_err("fail to copy guest ring buffer\n");
return ret; return ret;
} }
copy_len = gma_top - gma_head; cs += ret / sizeof(u32);
gma_head = workload->rb_start; gma_head = workload->rb_start;
} }
/* copy head or start <-> tail */ /* copy head or start <-> tail */
ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm, ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm, gma_head, gma_tail, cs);
gma_head, gma_tail, if (ret < 0) {
workload->shadow_ring_buffer_va + copy_len);
if (ret) {
gvt_err("fail to copy guest ring buffer\n"); gvt_err("fail to copy guest ring buffer\n");
return ret; return ret;
} }
ring->tail += workload->rb_len; cs += ret / sizeof(u32);
intel_ring_advance(ring); intel_ring_advance(workload->req, cs);
return 0; return 0;
} }
...@@ -2695,7 +2689,7 @@ static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx) ...@@ -2695,7 +2689,7 @@ static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx)
wa_ctx->workload->vgpu->gtt.ggtt_mm, wa_ctx->workload->vgpu->gtt.ggtt_mm,
guest_gma, guest_gma + ctx_size, guest_gma, guest_gma + ctx_size,
map); map);
if (ret) { if (ret < 0) {
gvt_err("fail to copy guest indirect ctx\n"); gvt_err("fail to copy guest indirect ctx\n");
goto unmap_src; goto unmap_src;
} }
......
...@@ -596,10 +596,9 @@ static inline int ...@@ -596,10 +596,9 @@ static inline int
mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags)
{ {
struct drm_i915_private *dev_priv = req->i915; struct drm_i915_private *dev_priv = req->i915;
struct intel_ring *ring = req->ring;
struct intel_engine_cs *engine = req->engine; struct intel_engine_cs *engine = req->engine;
enum intel_engine_id id; enum intel_engine_id id;
u32 flags = hw_flags | MI_MM_SPACE_GTT; u32 *cs, flags = hw_flags | MI_MM_SPACE_GTT;
const int num_rings = const int num_rings =
/* Use an extended w/a on ivb+ if signalling from other rings */ /* Use an extended w/a on ivb+ if signalling from other rings */
i915.semaphores ? i915.semaphores ?
...@@ -629,99 +628,92 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) ...@@ -629,99 +628,92 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags)
if (INTEL_GEN(dev_priv) >= 7) if (INTEL_GEN(dev_priv) >= 7)
len += 2 + (num_rings ? 4*num_rings + 6 : 0); len += 2 + (num_rings ? 4*num_rings + 6 : 0);
ret = intel_ring_begin(req, len); cs = intel_ring_begin(req, len);
if (ret) if (IS_ERR(cs))
return ret; return PTR_ERR(cs);
/* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */ /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */
if (INTEL_GEN(dev_priv) >= 7) { if (INTEL_GEN(dev_priv) >= 7) {
intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_DISABLE); *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
if (num_rings) { if (num_rings) {
struct intel_engine_cs *signaller; struct intel_engine_cs *signaller;
intel_ring_emit(ring, *cs++ = MI_LOAD_REGISTER_IMM(num_rings);
MI_LOAD_REGISTER_IMM(num_rings));
for_each_engine(signaller, dev_priv, id) { for_each_engine(signaller, dev_priv, id) {
if (signaller == engine) if (signaller == engine)
continue; continue;
intel_ring_emit_reg(ring, *cs++ = i915_mmio_reg_offset(
RING_PSMI_CTL(signaller->mmio_base)); RING_PSMI_CTL(signaller->mmio_base));
intel_ring_emit(ring, *cs++ = _MASKED_BIT_ENABLE(
_MASKED_BIT_ENABLE(GEN6_PSMI_SLEEP_MSG_DISABLE)); GEN6_PSMI_SLEEP_MSG_DISABLE);
} }
} }
} }
intel_ring_emit(ring, MI_NOOP); *cs++ = MI_NOOP;
intel_ring_emit(ring, MI_SET_CONTEXT); *cs++ = MI_SET_CONTEXT;
intel_ring_emit(ring, *cs++ = i915_ggtt_offset(req->ctx->engine[RCS].state) | flags;
i915_ggtt_offset(req->ctx->engine[RCS].state) | flags);
/* /*
* w/a: MI_SET_CONTEXT must always be followed by MI_NOOP * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP
* WaMiSetContext_Hang:snb,ivb,vlv * WaMiSetContext_Hang:snb,ivb,vlv
*/ */
intel_ring_emit(ring, MI_NOOP); *cs++ = MI_NOOP;
if (INTEL_GEN(dev_priv) >= 7) { if (INTEL_GEN(dev_priv) >= 7) {
if (num_rings) { if (num_rings) {
struct intel_engine_cs *signaller; struct intel_engine_cs *signaller;
i915_reg_t last_reg = {}; /* keep gcc quiet */ i915_reg_t last_reg = {}; /* keep gcc quiet */
intel_ring_emit(ring, *cs++ = MI_LOAD_REGISTER_IMM(num_rings);
MI_LOAD_REGISTER_IMM(num_rings));
for_each_engine(signaller, dev_priv, id) { for_each_engine(signaller, dev_priv, id) {
if (signaller == engine) if (signaller == engine)
continue; continue;
last_reg = RING_PSMI_CTL(signaller->mmio_base); last_reg = RING_PSMI_CTL(signaller->mmio_base);
intel_ring_emit_reg(ring, last_reg); *cs++ = i915_mmio_reg_offset(last_reg);
intel_ring_emit(ring, *cs++ = _MASKED_BIT_DISABLE(
_MASKED_BIT_DISABLE(GEN6_PSMI_SLEEP_MSG_DISABLE)); GEN6_PSMI_SLEEP_MSG_DISABLE);
} }
/* Insert a delay before the next switch! */ /* Insert a delay before the next switch! */
intel_ring_emit(ring, *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
MI_STORE_REGISTER_MEM | *cs++ = i915_mmio_reg_offset(last_reg);
MI_SRM_LRM_GLOBAL_GTT); *cs++ = i915_ggtt_offset(engine->scratch);
intel_ring_emit_reg(ring, last_reg); *cs++ = MI_NOOP;
intel_ring_emit(ring,
i915_ggtt_offset(engine->scratch));
intel_ring_emit(ring, MI_NOOP);
} }
intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_ENABLE); *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
} }
intel_ring_advance(ring); intel_ring_advance(req, cs);
return ret; return ret;
} }
static int remap_l3(struct drm_i915_gem_request *req, int slice) static int remap_l3(struct drm_i915_gem_request *req, int slice)
{ {
u32 *remap_info = req->i915->l3_parity.remap_info[slice]; u32 *cs, *remap_info = req->i915->l3_parity.remap_info[slice];
struct intel_ring *ring = req->ring; int i;
int i, ret;
if (!remap_info) if (!remap_info)
return 0; return 0;
ret = intel_ring_begin(req, GEN7_L3LOG_SIZE/4 * 2 + 2); cs = intel_ring_begin(req, GEN7_L3LOG_SIZE/4 * 2 + 2);
if (ret) if (IS_ERR(cs))
return ret; return PTR_ERR(cs);
/* /*
* Note: We do not worry about the concurrent register cacheline hang * Note: We do not worry about the concurrent register cacheline hang
* here because no other code should access these registers other than * here because no other code should access these registers other than
* at initialization time. * at initialization time.
*/ */
intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4)); *cs++ = MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4);
for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) { for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) {
intel_ring_emit_reg(ring, GEN7_L3LOG(slice, i)); *cs++ = i915_mmio_reg_offset(GEN7_L3LOG(slice, i));
intel_ring_emit(ring, remap_info[i]); *cs++ = remap_info[i];
} }
intel_ring_emit(ring, MI_NOOP); *cs++ = MI_NOOP;
intel_ring_advance(ring); intel_ring_advance(req, cs);
return 0; return 0;
} }
......
...@@ -1336,25 +1336,25 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas, ...@@ -1336,25 +1336,25 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas,
static int static int
i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req) i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req)
{ {
struct intel_ring *ring = req->ring; u32 *cs;
int ret, i; int i;
if (!IS_GEN7(req->i915) || req->engine->id != RCS) { if (!IS_GEN7(req->i915) || req->engine->id != RCS) {
DRM_DEBUG("sol reset is gen7/rcs only\n"); DRM_DEBUG("sol reset is gen7/rcs only\n");
return -EINVAL; return -EINVAL;
} }
ret = intel_ring_begin(req, 4 * 3); cs = intel_ring_begin(req, 4 * 3);
if (ret) if (IS_ERR(cs))
return ret; return PTR_ERR(cs);
for (i = 0; i < 4; i++) { for (i = 0; i < 4; i++) {
intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); *cs++ = MI_LOAD_REGISTER_IMM(1);
intel_ring_emit_reg(ring, GEN7_SO_WRITE_OFFSET(i)); *cs++ = i915_mmio_reg_offset(GEN7_SO_WRITE_OFFSET(i));
intel_ring_emit(ring, 0); *cs++ = 0;
} }
intel_ring_advance(ring); intel_ring_advance(req, cs);
return 0; return 0;
} }
...@@ -1415,7 +1415,7 @@ execbuf_submit(struct i915_execbuffer_params *params, ...@@ -1415,7 +1415,7 @@ execbuf_submit(struct i915_execbuffer_params *params,
struct drm_i915_private *dev_priv = params->request->i915; struct drm_i915_private *dev_priv = params->request->i915;
u64 exec_start, exec_len; u64 exec_start, exec_len;
int instp_mode; int instp_mode;
u32 instp_mask; u32 instp_mask, *cs;
int ret; int ret;
ret = i915_gem_execbuffer_move_to_gpu(params->request, vmas); ret = i915_gem_execbuffer_move_to_gpu(params->request, vmas);
...@@ -1461,17 +1461,15 @@ execbuf_submit(struct i915_execbuffer_params *params, ...@@ -1461,17 +1461,15 @@ execbuf_submit(struct i915_execbuffer_params *params,
if (params->engine->id == RCS && if (params->engine->id == RCS &&
instp_mode != dev_priv->relative_constants_mode) { instp_mode != dev_priv->relative_constants_mode) {
struct intel_ring *ring = params->request->ring; cs = intel_ring_begin(params->request, 4);
if (IS_ERR(cs))
ret = intel_ring_begin(params->request, 4); return PTR_ERR(cs);
if (ret)
return ret; *cs++ = MI_NOOP;
*cs++ = MI_LOAD_REGISTER_IMM(1);
intel_ring_emit(ring, MI_NOOP); *cs++ = i915_mmio_reg_offset(INSTPM);
intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); *cs++ = instp_mask << 16 | instp_mode;
intel_ring_emit_reg(ring, INSTPM); intel_ring_advance(params->request, cs);
intel_ring_emit(ring, instp_mask << 16 | instp_mode);
intel_ring_advance(ring);
dev_priv->relative_constants_mode = instp_mode; dev_priv->relative_constants_mode = instp_mode;
} }
......
...@@ -686,23 +686,22 @@ static int gen8_write_pdp(struct drm_i915_gem_request *req, ...@@ -686,23 +686,22 @@ static int gen8_write_pdp(struct drm_i915_gem_request *req,
unsigned entry, unsigned entry,
dma_addr_t addr) dma_addr_t addr)
{ {
struct intel_ring *ring = req->ring;
struct intel_engine_cs *engine = req->engine; struct intel_engine_cs *engine = req->engine;
int ret; u32 *cs;
BUG_ON(entry >= 4); BUG_ON(entry >= 4);
ret = intel_ring_begin(req, 6); cs = intel_ring_begin(req, 6);
if (ret) if (IS_ERR(cs))
return ret; return PTR_ERR(cs);
intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); *cs++ = MI_LOAD_REGISTER_IMM(1);
intel_ring_emit_reg(ring, GEN8_RING_PDP_UDW(engine, entry)); *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, entry));
intel_ring_emit(ring, upper_32_bits(addr)); *cs++ = upper_32_bits(addr);
intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); *cs++ = MI_LOAD_REGISTER_IMM(1);
intel_ring_emit_reg(ring, GEN8_RING_PDP_LDW(engine, entry)); *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, entry));
intel_ring_emit(ring, lower_32_bits(addr)); *cs++ = lower_32_bits(addr);
intel_ring_advance(ring); intel_ring_advance(req, cs);
return 0; return 0;
} }
...@@ -1730,8 +1729,8 @@ static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt) ...@@ -1730,8 +1729,8 @@ static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt)
static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
struct drm_i915_gem_request *req) struct drm_i915_gem_request *req)
{ {
struct intel_ring *ring = req->ring;
struct intel_engine_cs *engine = req->engine; struct intel_engine_cs *engine = req->engine;
u32 *cs;
int ret; int ret;
/* NB: TLBs must be flushed and invalidated before a switch */ /* NB: TLBs must be flushed and invalidated before a switch */
...@@ -1739,17 +1738,17 @@ static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, ...@@ -1739,17 +1738,17 @@ static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
if (ret) if (ret)
return ret; return ret;
ret = intel_ring_begin(req, 6); cs = intel_ring_begin(req, 6);
if (ret) if (IS_ERR(cs))
return ret; return PTR_ERR(cs);
intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2)); *cs++ = MI_LOAD_REGISTER_IMM(2);
intel_ring_emit_reg(ring, RING_PP_DIR_DCLV(engine)); *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine));
intel_ring_emit(ring, PP_DIR_DCLV_2G); *cs++ = PP_DIR_DCLV_2G;
intel_ring_emit_reg(ring, RING_PP_DIR_BASE(engine)); *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine));
intel_ring_emit(ring, get_pd_offset(ppgtt)); *cs++ = get_pd_offset(ppgtt);
intel_ring_emit(ring, MI_NOOP); *cs++ = MI_NOOP;
intel_ring_advance(ring); intel_ring_advance(req, cs);
return 0; return 0;
} }
...@@ -1757,8 +1756,8 @@ static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, ...@@ -1757,8 +1756,8 @@ static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
struct drm_i915_gem_request *req) struct drm_i915_gem_request *req)
{ {
struct intel_ring *ring = req->ring;
struct intel_engine_cs *engine = req->engine; struct intel_engine_cs *engine = req->engine;
u32 *cs;
int ret; int ret;
/* NB: TLBs must be flushed and invalidated before a switch */ /* NB: TLBs must be flushed and invalidated before a switch */
...@@ -1766,17 +1765,17 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, ...@@ -1766,17 +1765,17 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
if (ret) if (ret)
return ret; return ret;
ret = intel_ring_begin(req, 6); cs = intel_ring_begin(req, 6);
if (ret) if (IS_ERR(cs))
return ret; return PTR_ERR(cs);
intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2)); *cs++ = MI_LOAD_REGISTER_IMM(2);
intel_ring_emit_reg(ring, RING_PP_DIR_DCLV(engine)); *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine));
intel_ring_emit(ring, PP_DIR_DCLV_2G); *cs++ = PP_DIR_DCLV_2G;
intel_ring_emit_reg(ring, RING_PP_DIR_BASE(engine)); *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine));
intel_ring_emit(ring, get_pd_offset(ppgtt)); *cs++ = get_pd_offset(ppgtt);
intel_ring_emit(ring, MI_NOOP); *cs++ = MI_NOOP;
intel_ring_advance(ring); intel_ring_advance(req, cs);
/* XXX: RCS is the only one to auto invalidate the TLBs? */ /* XXX: RCS is the only one to auto invalidate the TLBs? */
if (engine->id != RCS) { if (engine->id != RCS) {
......
...@@ -824,6 +824,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) ...@@ -824,6 +824,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
struct intel_ring *ring = request->ring; struct intel_ring *ring = request->ring;
struct intel_timeline *timeline = request->timeline; struct intel_timeline *timeline = request->timeline;
struct drm_i915_gem_request *prev; struct drm_i915_gem_request *prev;
u32 *cs;
int err; int err;
lockdep_assert_held(&request->i915->drm.struct_mutex); lockdep_assert_held(&request->i915->drm.struct_mutex);
...@@ -862,10 +863,9 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) ...@@ -862,10 +863,9 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
* GPU processing the request, we never over-estimate the * GPU processing the request, we never over-estimate the
* position of the ring's HEAD. * position of the ring's HEAD.
*/ */
err = intel_ring_begin(request, engine->emit_breadcrumb_sz); cs = intel_ring_begin(request, engine->emit_breadcrumb_sz);
GEM_BUG_ON(err); GEM_BUG_ON(IS_ERR(cs));
request->postfix = ring->tail; request->postfix = intel_ring_offset(request, cs);
ring->tail += engine->emit_breadcrumb_sz * sizeof(u32);
/* Seal the request and mark it as pending execution. Note that /* Seal the request and mark it as pending execution. Note that
* we may inspect this state, without holding any locks, during * we may inspect this state, without holding any locks, during
......
...@@ -10158,14 +10158,12 @@ static int intel_gen2_queue_flip(struct drm_device *dev, ...@@ -10158,14 +10158,12 @@ static int intel_gen2_queue_flip(struct drm_device *dev,
struct drm_i915_gem_request *req, struct drm_i915_gem_request *req,
uint32_t flags) uint32_t flags)
{ {
struct intel_ring *ring = req->ring;
struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
u32 flip_mask; u32 flip_mask, *cs;
int ret;
ret = intel_ring_begin(req, 6); cs = intel_ring_begin(req, 6);
if (ret) if (IS_ERR(cs))
return ret; return PTR_ERR(cs);
/* Can't queue multiple flips, so wait for the previous /* Can't queue multiple flips, so wait for the previous
* one to finish before executing the next. * one to finish before executing the next.
...@@ -10174,13 +10172,12 @@ static int intel_gen2_queue_flip(struct drm_device *dev, ...@@ -10174,13 +10172,12 @@ static int intel_gen2_queue_flip(struct drm_device *dev,
flip_mask = MI_WAIT_FOR_PLANE_B_FLIP; flip_mask = MI_WAIT_FOR_PLANE_B_FLIP;
else else
flip_mask = MI_WAIT_FOR_PLANE_A_FLIP; flip_mask = MI_WAIT_FOR_PLANE_A_FLIP;
intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask); *cs++ = MI_WAIT_FOR_EVENT | flip_mask;
intel_ring_emit(ring, MI_NOOP); *cs++ = MI_NOOP;
intel_ring_emit(ring, MI_DISPLAY_FLIP | *cs++ = MI_DISPLAY_FLIP | MI_DISPLAY_FLIP_PLANE(intel_crtc->plane);
MI_DISPLAY_FLIP_PLANE(intel_crtc->plane)); *cs++ = fb->pitches[0];
intel_ring_emit(ring, fb->pitches[0]); *cs++ = intel_crtc->flip_work->gtt_offset;
intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset); *cs++ = 0; /* aux display base address, unused */
intel_ring_emit(ring, 0); /* aux display base address, unused */
return 0; return 0;
} }
...@@ -10192,26 +10189,23 @@ static int intel_gen3_queue_flip(struct drm_device *dev, ...@@ -10192,26 +10189,23 @@ static int intel_gen3_queue_flip(struct drm_device *dev,
struct drm_i915_gem_request *req, struct drm_i915_gem_request *req,
uint32_t flags) uint32_t flags)
{ {
struct intel_ring *ring = req->ring;
struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
u32 flip_mask; u32 flip_mask, *cs;
int ret;
ret = intel_ring_begin(req, 6); cs = intel_ring_begin(req, 6);
if (ret) if (IS_ERR(cs))
return ret; return PTR_ERR(cs);
if (intel_crtc->plane) if (intel_crtc->plane)
flip_mask = MI_WAIT_FOR_PLANE_B_FLIP; flip_mask = MI_WAIT_FOR_PLANE_B_FLIP;
else else
flip_mask = MI_WAIT_FOR_PLANE_A_FLIP; flip_mask = MI_WAIT_FOR_PLANE_A_FLIP;
intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask); *cs++ = MI_WAIT_FOR_EVENT | flip_mask;
intel_ring_emit(ring, MI_NOOP); *cs++ = MI_NOOP;
intel_ring_emit(ring, MI_DISPLAY_FLIP_I915 | *cs++ = MI_DISPLAY_FLIP_I915 | MI_DISPLAY_FLIP_PLANE(intel_crtc->plane);
MI_DISPLAY_FLIP_PLANE(intel_crtc->plane)); *cs++ = fb->pitches[0];
intel_ring_emit(ring, fb->pitches[0]); *cs++ = intel_crtc->flip_work->gtt_offset;
intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset); *cs++ = MI_NOOP;
intel_ring_emit(ring, MI_NOOP);
return 0; return 0;
} }
...@@ -10223,25 +10217,22 @@ static int intel_gen4_queue_flip(struct drm_device *dev, ...@@ -10223,25 +10217,22 @@ static int intel_gen4_queue_flip(struct drm_device *dev,
struct drm_i915_gem_request *req, struct drm_i915_gem_request *req,
uint32_t flags) uint32_t flags)
{ {
struct intel_ring *ring = req->ring;
struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_private *dev_priv = to_i915(dev);
struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
uint32_t pf, pipesrc; u32 pf, pipesrc, *cs;
int ret;
ret = intel_ring_begin(req, 4); cs = intel_ring_begin(req, 4);
if (ret) if (IS_ERR(cs))
return ret; return PTR_ERR(cs);
/* i965+ uses the linear or tiled offsets from the /* i965+ uses the linear or tiled offsets from the
* Display Registers (which do not change across a page-flip) * Display Registers (which do not change across a page-flip)
* so we need only reprogram the base address. * so we need only reprogram the base address.
*/ */
intel_ring_emit(ring, MI_DISPLAY_FLIP | *cs++ = MI_DISPLAY_FLIP | MI_DISPLAY_FLIP_PLANE(intel_crtc->plane);
MI_DISPLAY_FLIP_PLANE(intel_crtc->plane)); *cs++ = fb->pitches[0];
intel_ring_emit(ring, fb->pitches[0]); *cs++ = intel_crtc->flip_work->gtt_offset |
intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset | intel_fb_modifier_to_tiling(fb->modifier);
intel_fb_modifier_to_tiling(fb->modifier));
/* XXX Enabling the panel-fitter across page-flip is so far /* XXX Enabling the panel-fitter across page-flip is so far
* untested on non-native modes, so ignore it for now. * untested on non-native modes, so ignore it for now.
...@@ -10249,7 +10240,7 @@ static int intel_gen4_queue_flip(struct drm_device *dev, ...@@ -10249,7 +10240,7 @@ static int intel_gen4_queue_flip(struct drm_device *dev,
*/ */
pf = 0; pf = 0;
pipesrc = I915_READ(PIPESRC(intel_crtc->pipe)) & 0x0fff0fff; pipesrc = I915_READ(PIPESRC(intel_crtc->pipe)) & 0x0fff0fff;
intel_ring_emit(ring, pf | pipesrc); *cs++ = pf | pipesrc;
return 0; return 0;
} }
...@@ -10261,21 +10252,17 @@ static int intel_gen6_queue_flip(struct drm_device *dev, ...@@ -10261,21 +10252,17 @@ static int intel_gen6_queue_flip(struct drm_device *dev,
struct drm_i915_gem_request *req, struct drm_i915_gem_request *req,
uint32_t flags) uint32_t flags)
{ {
struct intel_ring *ring = req->ring;
struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_private *dev_priv = to_i915(dev);
struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
uint32_t pf, pipesrc; u32 pf, pipesrc, *cs;
int ret;
ret = intel_ring_begin(req, 4); cs = intel_ring_begin(req, 4);
if (ret) if (IS_ERR(cs))
return ret; return PTR_ERR(cs);
intel_ring_emit(ring, MI_DISPLAY_FLIP | *cs++ = MI_DISPLAY_FLIP | MI_DISPLAY_FLIP_PLANE(intel_crtc->plane);
MI_DISPLAY_FLIP_PLANE(intel_crtc->plane)); *cs++ = fb->pitches[0] | intel_fb_modifier_to_tiling(fb->modifier);
intel_ring_emit(ring, fb->pitches[0] | *cs++ = intel_crtc->flip_work->gtt_offset;
intel_fb_modifier_to_tiling(fb->modifier));
intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset);
/* Contrary to the suggestions in the documentation, /* Contrary to the suggestions in the documentation,
* "Enable Panel Fitter" does not seem to be required when page * "Enable Panel Fitter" does not seem to be required when page
...@@ -10285,7 +10272,7 @@ static int intel_gen6_queue_flip(struct drm_device *dev, ...@@ -10285,7 +10272,7 @@ static int intel_gen6_queue_flip(struct drm_device *dev,
*/ */
pf = 0; pf = 0;
pipesrc = I915_READ(PIPESRC(intel_crtc->pipe)) & 0x0fff0fff; pipesrc = I915_READ(PIPESRC(intel_crtc->pipe)) & 0x0fff0fff;
intel_ring_emit(ring, pf | pipesrc); *cs++ = pf | pipesrc;
return 0; return 0;
} }
...@@ -10298,9 +10285,8 @@ static int intel_gen7_queue_flip(struct drm_device *dev, ...@@ -10298,9 +10285,8 @@ static int intel_gen7_queue_flip(struct drm_device *dev,
uint32_t flags) uint32_t flags)
{ {
struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_private *dev_priv = to_i915(dev);
struct intel_ring *ring = req->ring;
struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
uint32_t plane_bit = 0; u32 *cs, plane_bit = 0;
int len, ret; int len, ret;
switch (intel_crtc->plane) { switch (intel_crtc->plane) {
...@@ -10344,9 +10330,9 @@ static int intel_gen7_queue_flip(struct drm_device *dev, ...@@ -10344,9 +10330,9 @@ static int intel_gen7_queue_flip(struct drm_device *dev,
if (ret) if (ret)
return ret; return ret;
ret = intel_ring_begin(req, len); cs = intel_ring_begin(req, len);
if (ret) if (IS_ERR(cs))
return ret; return PTR_ERR(cs);
/* Unmask the flip-done completion message. Note that the bspec says that /* Unmask the flip-done completion message. Note that the bspec says that
* we should do this for both the BCS and RCS, and that we must not unmask * we should do this for both the BCS and RCS, and that we must not unmask
...@@ -10358,31 +10344,28 @@ static int intel_gen7_queue_flip(struct drm_device *dev, ...@@ -10358,31 +10344,28 @@ static int intel_gen7_queue_flip(struct drm_device *dev,
* to zero does lead to lockups within MI_DISPLAY_FLIP. * to zero does lead to lockups within MI_DISPLAY_FLIP.
*/ */
if (req->engine->id == RCS) { if (req->engine->id == RCS) {
intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); *cs++ = MI_LOAD_REGISTER_IMM(1);
intel_ring_emit_reg(ring, DERRMR); *cs++ = i915_mmio_reg_offset(DERRMR);
intel_ring_emit(ring, ~(DERRMR_PIPEA_PRI_FLIP_DONE | *cs++ = ~(DERRMR_PIPEA_PRI_FLIP_DONE |
DERRMR_PIPEB_PRI_FLIP_DONE | DERRMR_PIPEB_PRI_FLIP_DONE |
DERRMR_PIPEC_PRI_FLIP_DONE)); DERRMR_PIPEC_PRI_FLIP_DONE);
if (IS_GEN8(dev_priv)) if (IS_GEN8(dev_priv))
intel_ring_emit(ring, MI_STORE_REGISTER_MEM_GEN8 | *cs++ = MI_STORE_REGISTER_MEM_GEN8 |
MI_SRM_LRM_GLOBAL_GTT); MI_SRM_LRM_GLOBAL_GTT;
else else
intel_ring_emit(ring, MI_STORE_REGISTER_MEM | *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
MI_SRM_LRM_GLOBAL_GTT); *cs++ = i915_mmio_reg_offset(DERRMR);
intel_ring_emit_reg(ring, DERRMR); *cs++ = i915_ggtt_offset(req->engine->scratch) + 256;
intel_ring_emit(ring,
i915_ggtt_offset(req->engine->scratch) + 256);
if (IS_GEN8(dev_priv)) { if (IS_GEN8(dev_priv)) {
intel_ring_emit(ring, 0); *cs++ = 0;
intel_ring_emit(ring, MI_NOOP); *cs++ = MI_NOOP;
} }
} }
intel_ring_emit(ring, MI_DISPLAY_FLIP_I915 | plane_bit); *cs++ = MI_DISPLAY_FLIP_I915 | plane_bit;
intel_ring_emit(ring, fb->pitches[0] | *cs++ = fb->pitches[0] | intel_fb_modifier_to_tiling(fb->modifier);
intel_fb_modifier_to_tiling(fb->modifier)); *cs++ = intel_crtc->flip_work->gtt_offset;
intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset); *cs++ = MI_NOOP;
intel_ring_emit(ring, (MI_NOOP));
return 0; return 0;
} }
......
...@@ -834,6 +834,7 @@ static int execlists_request_alloc(struct drm_i915_gem_request *request) ...@@ -834,6 +834,7 @@ static int execlists_request_alloc(struct drm_i915_gem_request *request)
{ {
struct intel_engine_cs *engine = request->engine; struct intel_engine_cs *engine = request->engine;
struct intel_context *ce = &request->ctx->engine[engine->id]; struct intel_context *ce = &request->ctx->engine[engine->id];
u32 *cs;
int ret; int ret;
GEM_BUG_ON(!ce->pin_count); GEM_BUG_ON(!ce->pin_count);
...@@ -858,9 +859,11 @@ static int execlists_request_alloc(struct drm_i915_gem_request *request) ...@@ -858,9 +859,11 @@ static int execlists_request_alloc(struct drm_i915_gem_request *request)
goto err; goto err;
} }
ret = intel_ring_begin(request, 0); cs = intel_ring_begin(request, 0);
if (ret) if (IS_ERR(cs)) {
ret = PTR_ERR(cs);
goto err_unreserve; goto err_unreserve;
}
if (!ce->initialised) { if (!ce->initialised) {
ret = engine->init_context(request); ret = engine->init_context(request);
...@@ -889,9 +892,9 @@ static int execlists_request_alloc(struct drm_i915_gem_request *request) ...@@ -889,9 +892,9 @@ static int execlists_request_alloc(struct drm_i915_gem_request *request)
static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req) static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req)
{ {
int ret, i;
struct intel_ring *ring = req->ring;
struct i915_workarounds *w = &req->i915->workarounds; struct i915_workarounds *w = &req->i915->workarounds;
u32 *cs;
int ret, i;
if (w->count == 0) if (w->count == 0)
return 0; return 0;
...@@ -900,18 +903,18 @@ static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req) ...@@ -900,18 +903,18 @@ static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req)
if (ret) if (ret)
return ret; return ret;
ret = intel_ring_begin(req, w->count * 2 + 2); cs = intel_ring_begin(req, w->count * 2 + 2);
if (ret) if (IS_ERR(cs))
return ret; return PTR_ERR(cs);
intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(w->count)); *cs++ = MI_LOAD_REGISTER_IMM(w->count);
for (i = 0; i < w->count; i++) { for (i = 0; i < w->count; i++) {
intel_ring_emit_reg(ring, w->reg[i].addr); *cs++ = i915_mmio_reg_offset(w->reg[i].addr);
intel_ring_emit(ring, w->reg[i].value); *cs++ = w->reg[i].value;
} }
intel_ring_emit(ring, MI_NOOP); *cs++ = MI_NOOP;
intel_ring_advance(ring); intel_ring_advance(req, cs);
ret = req->engine->emit_flush(req, EMIT_BARRIER); ret = req->engine->emit_flush(req, EMIT_BARRIER);
if (ret) if (ret)
...@@ -1404,27 +1407,27 @@ static void reset_common_ring(struct intel_engine_cs *engine, ...@@ -1404,27 +1407,27 @@ static void reset_common_ring(struct intel_engine_cs *engine,
static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req) static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req)
{ {
struct i915_hw_ppgtt *ppgtt = req->ctx->ppgtt; struct i915_hw_ppgtt *ppgtt = req->ctx->ppgtt;
struct intel_ring *ring = req->ring;
struct intel_engine_cs *engine = req->engine; struct intel_engine_cs *engine = req->engine;
const int num_lri_cmds = GEN8_LEGACY_PDPES * 2; const int num_lri_cmds = GEN8_LEGACY_PDPES * 2;
int i, ret; u32 *cs;
int i;
ret = intel_ring_begin(req, num_lri_cmds * 2 + 2); cs = intel_ring_begin(req, num_lri_cmds * 2 + 2);
if (ret) if (IS_ERR(cs))
return ret; return PTR_ERR(cs);
intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(num_lri_cmds)); *cs++ = MI_LOAD_REGISTER_IMM(num_lri_cmds);
for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) { for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) {
const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
intel_ring_emit_reg(ring, GEN8_RING_PDP_UDW(engine, i)); *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, i));
intel_ring_emit(ring, upper_32_bits(pd_daddr)); *cs++ = upper_32_bits(pd_daddr);
intel_ring_emit_reg(ring, GEN8_RING_PDP_LDW(engine, i)); *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, i));
intel_ring_emit(ring, lower_32_bits(pd_daddr)); *cs++ = lower_32_bits(pd_daddr);
} }
intel_ring_emit(ring, MI_NOOP); *cs++ = MI_NOOP;
intel_ring_advance(ring); intel_ring_advance(req, cs);
return 0; return 0;
} }
...@@ -1433,8 +1436,8 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req, ...@@ -1433,8 +1436,8 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req,
u64 offset, u32 len, u64 offset, u32 len,
unsigned int dispatch_flags) unsigned int dispatch_flags)
{ {
struct intel_ring *ring = req->ring;
bool ppgtt = !(dispatch_flags & I915_DISPATCH_SECURE); bool ppgtt = !(dispatch_flags & I915_DISPATCH_SECURE);
u32 *cs;
int ret; int ret;
/* Don't rely in hw updating PDPs, specially in lite-restore. /* Don't rely in hw updating PDPs, specially in lite-restore.
...@@ -1455,19 +1458,17 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req, ...@@ -1455,19 +1458,17 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req,
req->ctx->ppgtt->pd_dirty_rings &= ~intel_engine_flag(req->engine); req->ctx->ppgtt->pd_dirty_rings &= ~intel_engine_flag(req->engine);
} }
ret = intel_ring_begin(req, 4); cs = intel_ring_begin(req, 4);
if (ret) if (IS_ERR(cs))
return ret; return PTR_ERR(cs);
/* FIXME(BDW): Address space and security selectors. */ /* FIXME(BDW): Address space and security selectors. */
intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | *cs++ = MI_BATCH_BUFFER_START_GEN8 | (ppgtt << 8) | (dispatch_flags &
(ppgtt<<8) | I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0);
(dispatch_flags & I915_DISPATCH_RS ? *cs++ = lower_32_bits(offset);
MI_BATCH_RESOURCE_STREAMER : 0)); *cs++ = upper_32_bits(offset);
intel_ring_emit(ring, lower_32_bits(offset)); *cs++ = MI_NOOP;
intel_ring_emit(ring, upper_32_bits(offset)); intel_ring_advance(req, cs);
intel_ring_emit(ring, MI_NOOP);
intel_ring_advance(ring);
return 0; return 0;
} }
...@@ -1488,13 +1489,11 @@ static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine) ...@@ -1488,13 +1489,11 @@ static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine)
static int gen8_emit_flush(struct drm_i915_gem_request *request, u32 mode) static int gen8_emit_flush(struct drm_i915_gem_request *request, u32 mode)
{ {
struct intel_ring *ring = request->ring; u32 cmd, *cs;
u32 cmd;
int ret;
ret = intel_ring_begin(request, 4); cs = intel_ring_begin(request, 4);
if (ret) if (IS_ERR(cs))
return ret; return PTR_ERR(cs);
cmd = MI_FLUSH_DW + 1; cmd = MI_FLUSH_DW + 1;
...@@ -1511,13 +1510,11 @@ static int gen8_emit_flush(struct drm_i915_gem_request *request, u32 mode) ...@@ -1511,13 +1510,11 @@ static int gen8_emit_flush(struct drm_i915_gem_request *request, u32 mode)
cmd |= MI_INVALIDATE_BSD; cmd |= MI_INVALIDATE_BSD;
} }
intel_ring_emit(ring, cmd); *cs++ = cmd;
intel_ring_emit(ring, *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT;
I915_GEM_HWS_SCRATCH_ADDR | *cs++ = 0; /* upper addr */
MI_FLUSH_DW_USE_GTT); *cs++ = 0; /* value */
intel_ring_emit(ring, 0); /* upper addr */ intel_ring_advance(request, cs);
intel_ring_emit(ring, 0); /* value */
intel_ring_advance(ring);
return 0; return 0;
} }
...@@ -1525,13 +1522,11 @@ static int gen8_emit_flush(struct drm_i915_gem_request *request, u32 mode) ...@@ -1525,13 +1522,11 @@ static int gen8_emit_flush(struct drm_i915_gem_request *request, u32 mode)
static int gen8_emit_flush_render(struct drm_i915_gem_request *request, static int gen8_emit_flush_render(struct drm_i915_gem_request *request,
u32 mode) u32 mode)
{ {
struct intel_ring *ring = request->ring;
struct intel_engine_cs *engine = request->engine; struct intel_engine_cs *engine = request->engine;
u32 scratch_addr = u32 scratch_addr =
i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES; i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES;
bool vf_flush_wa = false, dc_flush_wa = false; bool vf_flush_wa = false, dc_flush_wa = false;
u32 flags = 0; u32 *cs, flags = 0;
int ret;
int len; int len;
flags |= PIPE_CONTROL_CS_STALL; flags |= PIPE_CONTROL_CS_STALL;
...@@ -1573,45 +1568,45 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request, ...@@ -1573,45 +1568,45 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request,
if (dc_flush_wa) if (dc_flush_wa)
len += 12; len += 12;
ret = intel_ring_begin(request, len); cs = intel_ring_begin(request, len);
if (ret) if (IS_ERR(cs))
return ret; return PTR_ERR(cs);
if (vf_flush_wa) { if (vf_flush_wa) {
intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); *cs++ = GFX_OP_PIPE_CONTROL(6);
intel_ring_emit(ring, 0); *cs++ = 0;
intel_ring_emit(ring, 0); *cs++ = 0;
intel_ring_emit(ring, 0); *cs++ = 0;
intel_ring_emit(ring, 0); *cs++ = 0;
intel_ring_emit(ring, 0); *cs++ = 0;
} }
if (dc_flush_wa) { if (dc_flush_wa) {
intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); *cs++ = GFX_OP_PIPE_CONTROL(6);
intel_ring_emit(ring, PIPE_CONTROL_DC_FLUSH_ENABLE); *cs++ = PIPE_CONTROL_DC_FLUSH_ENABLE;
intel_ring_emit(ring, 0); *cs++ = 0;
intel_ring_emit(ring, 0); *cs++ = 0;
intel_ring_emit(ring, 0); *cs++ = 0;
intel_ring_emit(ring, 0); *cs++ = 0;
} }
intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); *cs++ = GFX_OP_PIPE_CONTROL(6);
intel_ring_emit(ring, flags); *cs++ = flags;
intel_ring_emit(ring, scratch_addr); *cs++ = scratch_addr;
intel_ring_emit(ring, 0); *cs++ = 0;
intel_ring_emit(ring, 0); *cs++ = 0;
intel_ring_emit(ring, 0); *cs++ = 0;
if (dc_flush_wa) { if (dc_flush_wa) {
intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); *cs++ = GFX_OP_PIPE_CONTROL(6);
intel_ring_emit(ring, PIPE_CONTROL_CS_STALL); *cs++ = PIPE_CONTROL_CS_STALL;
intel_ring_emit(ring, 0); *cs++ = 0;
intel_ring_emit(ring, 0); *cs++ = 0;
intel_ring_emit(ring, 0); *cs++ = 0;
intel_ring_emit(ring, 0); *cs++ = 0;
} }
intel_ring_advance(ring); intel_ring_advance(request, cs);
return 0; return 0;
} }
...@@ -1621,34 +1616,33 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request, ...@@ -1621,34 +1616,33 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request,
* used as a workaround for not being allowed to do lite * used as a workaround for not being allowed to do lite
* restore with HEAD==TAIL (WaIdleLiteRestore). * restore with HEAD==TAIL (WaIdleLiteRestore).
*/ */
static void gen8_emit_wa_tail(struct drm_i915_gem_request *request, u32 *out) static void gen8_emit_wa_tail(struct drm_i915_gem_request *request, u32 *cs)
{ {
*out++ = MI_NOOP; *cs++ = MI_NOOP;
*out++ = MI_NOOP; *cs++ = MI_NOOP;
request->wa_tail = intel_ring_offset(request->ring, out); request->wa_tail = intel_ring_offset(request, cs);
} }
static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request, static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request, u32 *cs)
u32 *out)
{ {
/* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */
BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5)); BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5));
*out++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW; *cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW;
*out++ = intel_hws_seqno_address(request->engine) | MI_FLUSH_DW_USE_GTT; *cs++ = intel_hws_seqno_address(request->engine) | MI_FLUSH_DW_USE_GTT;
*out++ = 0; *cs++ = 0;
*out++ = request->global_seqno; *cs++ = request->global_seqno;
*out++ = MI_USER_INTERRUPT; *cs++ = MI_USER_INTERRUPT;
*out++ = MI_NOOP; *cs++ = MI_NOOP;
request->tail = intel_ring_offset(request->ring, out); request->tail = intel_ring_offset(request, cs);
gen8_emit_wa_tail(request, out); gen8_emit_wa_tail(request, cs);
} }
static const int gen8_emit_breadcrumb_sz = 6 + WA_TAIL_DWORDS; static const int gen8_emit_breadcrumb_sz = 6 + WA_TAIL_DWORDS;
static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request, static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request,
u32 *out) u32 *cs)
{ {
/* We're using qword write, seqno should be aligned to 8 bytes. */ /* We're using qword write, seqno should be aligned to 8 bytes. */
BUILD_BUG_ON(I915_GEM_HWS_INDEX & 1); BUILD_BUG_ON(I915_GEM_HWS_INDEX & 1);
...@@ -1657,20 +1651,19 @@ static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request, ...@@ -1657,20 +1651,19 @@ static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request,
* need a prior CS_STALL, which is emitted by the flush * need a prior CS_STALL, which is emitted by the flush
* following the batch. * following the batch.
*/ */
*out++ = GFX_OP_PIPE_CONTROL(6); *cs++ = GFX_OP_PIPE_CONTROL(6);
*out++ = (PIPE_CONTROL_GLOBAL_GTT_IVB | *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_CS_STALL | PIPE_CONTROL_QW_WRITE;
PIPE_CONTROL_QW_WRITE); *cs++ = intel_hws_seqno_address(request->engine);
*out++ = intel_hws_seqno_address(request->engine); *cs++ = 0;
*out++ = 0; *cs++ = request->global_seqno;
*out++ = request->global_seqno;
/* We're thrashing one dword of HWS. */ /* We're thrashing one dword of HWS. */
*out++ = 0; *cs++ = 0;
*out++ = MI_USER_INTERRUPT; *cs++ = MI_USER_INTERRUPT;
*out++ = MI_NOOP; *cs++ = MI_NOOP;
request->tail = intel_ring_offset(request->ring, out); request->tail = intel_ring_offset(request, cs);
gen8_emit_wa_tail(request, out); gen8_emit_wa_tail(request, cs);
} }
static const int gen8_emit_breadcrumb_render_sz = 8 + WA_TAIL_DWORDS; static const int gen8_emit_breadcrumb_render_sz = 8 + WA_TAIL_DWORDS;
......
...@@ -276,23 +276,22 @@ int intel_mocs_init_engine(struct intel_engine_cs *engine) ...@@ -276,23 +276,22 @@ int intel_mocs_init_engine(struct intel_engine_cs *engine)
static int emit_mocs_control_table(struct drm_i915_gem_request *req, static int emit_mocs_control_table(struct drm_i915_gem_request *req,
const struct drm_i915_mocs_table *table) const struct drm_i915_mocs_table *table)
{ {
struct intel_ring *ring = req->ring;
enum intel_engine_id engine = req->engine->id; enum intel_engine_id engine = req->engine->id;
unsigned int index; unsigned int index;
int ret; u32 *cs;
if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES)) if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES))
return -ENODEV; return -ENODEV;
ret = intel_ring_begin(req, 2 + 2 * GEN9_NUM_MOCS_ENTRIES); cs = intel_ring_begin(req, 2 + 2 * GEN9_NUM_MOCS_ENTRIES);
if (ret) if (IS_ERR(cs))
return ret; return PTR_ERR(cs);
intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES)); *cs++ = MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES);
for (index = 0; index < table->size; index++) { for (index = 0; index < table->size; index++) {
intel_ring_emit_reg(ring, mocs_register(engine, index)); *cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
intel_ring_emit(ring, table->table[index].control_value); *cs++ = table->table[index].control_value;
} }
/* /*
...@@ -304,12 +303,12 @@ static int emit_mocs_control_table(struct drm_i915_gem_request *req, ...@@ -304,12 +303,12 @@ static int emit_mocs_control_table(struct drm_i915_gem_request *req,
* that value to all the used entries. * that value to all the used entries.
*/ */
for (; index < GEN9_NUM_MOCS_ENTRIES; index++) { for (; index < GEN9_NUM_MOCS_ENTRIES; index++) {
intel_ring_emit_reg(ring, mocs_register(engine, index)); *cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
intel_ring_emit(ring, table->table[0].control_value); *cs++ = table->table[0].control_value;
} }
intel_ring_emit(ring, MI_NOOP); *cs++ = MI_NOOP;
intel_ring_advance(ring); intel_ring_advance(req, cs);
return 0; return 0;
} }
...@@ -336,29 +335,27 @@ static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table, ...@@ -336,29 +335,27 @@ static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table,
static int emit_mocs_l3cc_table(struct drm_i915_gem_request *req, static int emit_mocs_l3cc_table(struct drm_i915_gem_request *req,
const struct drm_i915_mocs_table *table) const struct drm_i915_mocs_table *table)
{ {
struct intel_ring *ring = req->ring;
unsigned int i; unsigned int i;
int ret; u32 *cs;
if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES)) if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES))
return -ENODEV; return -ENODEV;
ret = intel_ring_begin(req, 2 + GEN9_NUM_MOCS_ENTRIES); cs = intel_ring_begin(req, 2 + GEN9_NUM_MOCS_ENTRIES);
if (ret) if (IS_ERR(cs))
return ret; return PTR_ERR(cs);
intel_ring_emit(ring, *cs++ = MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES / 2);
MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES / 2));
for (i = 0; i < table->size/2; i++) { for (i = 0; i < table->size/2; i++) {
intel_ring_emit_reg(ring, GEN9_LNCFCMOCS(i)); *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
intel_ring_emit(ring, l3cc_combine(table, 2*i, 2*i+1)); *cs++ = l3cc_combine(table, 2 * i, 2 * i + 1);
} }
if (table->size & 0x01) { if (table->size & 0x01) {
/* Odd table size - 1 left over */ /* Odd table size - 1 left over */
intel_ring_emit_reg(ring, GEN9_LNCFCMOCS(i)); *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
intel_ring_emit(ring, l3cc_combine(table, 2*i, 0)); *cs++ = l3cc_combine(table, 2 * i, 0);
i++; i++;
} }
...@@ -368,12 +365,12 @@ static int emit_mocs_l3cc_table(struct drm_i915_gem_request *req, ...@@ -368,12 +365,12 @@ static int emit_mocs_l3cc_table(struct drm_i915_gem_request *req,
* they are reserved by the hardware. * they are reserved by the hardware.
*/ */
for (; i < GEN9_NUM_MOCS_ENTRIES / 2; i++) { for (; i < GEN9_NUM_MOCS_ENTRIES / 2; i++) {
intel_ring_emit_reg(ring, GEN9_LNCFCMOCS(i)); *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
intel_ring_emit(ring, l3cc_combine(table, 0, 0)); *cs++ = l3cc_combine(table, 0, 0);
} }
intel_ring_emit(ring, MI_NOOP); *cs++ = MI_NOOP;
intel_ring_advance(ring); intel_ring_advance(req, cs);
return 0; return 0;
} }
......
...@@ -267,8 +267,7 @@ static int intel_overlay_on(struct intel_overlay *overlay) ...@@ -267,8 +267,7 @@ static int intel_overlay_on(struct intel_overlay *overlay)
{ {
struct drm_i915_private *dev_priv = overlay->i915; struct drm_i915_private *dev_priv = overlay->i915;
struct drm_i915_gem_request *req; struct drm_i915_gem_request *req;
struct intel_ring *ring; u32 *cs;
int ret;
WARN_ON(overlay->active); WARN_ON(overlay->active);
WARN_ON(IS_I830(dev_priv) && !(dev_priv->quirks & QUIRK_PIPEA_FORCE)); WARN_ON(IS_I830(dev_priv) && !(dev_priv->quirks & QUIRK_PIPEA_FORCE));
...@@ -277,10 +276,10 @@ static int intel_overlay_on(struct intel_overlay *overlay) ...@@ -277,10 +276,10 @@ static int intel_overlay_on(struct intel_overlay *overlay)
if (IS_ERR(req)) if (IS_ERR(req))
return PTR_ERR(req); return PTR_ERR(req);
ret = intel_ring_begin(req, 4); cs = intel_ring_begin(req, 4);
if (ret) { if (IS_ERR(cs)) {
i915_add_request_no_flush(req); i915_add_request_no_flush(req);
return ret; return PTR_ERR(cs);
} }
overlay->active = true; overlay->active = true;
...@@ -288,12 +287,11 @@ static int intel_overlay_on(struct intel_overlay *overlay) ...@@ -288,12 +287,11 @@ static int intel_overlay_on(struct intel_overlay *overlay)
if (IS_I830(dev_priv)) if (IS_I830(dev_priv))
i830_overlay_clock_gating(dev_priv, false); i830_overlay_clock_gating(dev_priv, false);
ring = req->ring; *cs++ = MI_OVERLAY_FLIP | MI_OVERLAY_ON;
intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_ON); *cs++ = overlay->flip_addr | OFC_UPDATE;
intel_ring_emit(ring, overlay->flip_addr | OFC_UPDATE); *cs++ = MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP;
intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP); *cs++ = MI_NOOP;
intel_ring_emit(ring, MI_NOOP); intel_ring_advance(req, cs);
intel_ring_advance(ring);
return intel_overlay_do_wait_request(overlay, req, NULL); return intel_overlay_do_wait_request(overlay, req, NULL);
} }
...@@ -326,10 +324,8 @@ static int intel_overlay_continue(struct intel_overlay *overlay, ...@@ -326,10 +324,8 @@ static int intel_overlay_continue(struct intel_overlay *overlay,
{ {
struct drm_i915_private *dev_priv = overlay->i915; struct drm_i915_private *dev_priv = overlay->i915;
struct drm_i915_gem_request *req; struct drm_i915_gem_request *req;
struct intel_ring *ring;
u32 flip_addr = overlay->flip_addr; u32 flip_addr = overlay->flip_addr;
u32 tmp; u32 tmp, *cs;
int ret;
WARN_ON(!overlay->active); WARN_ON(!overlay->active);
...@@ -345,16 +341,15 @@ static int intel_overlay_continue(struct intel_overlay *overlay, ...@@ -345,16 +341,15 @@ static int intel_overlay_continue(struct intel_overlay *overlay,
if (IS_ERR(req)) if (IS_ERR(req))
return PTR_ERR(req); return PTR_ERR(req);
ret = intel_ring_begin(req, 2); cs = intel_ring_begin(req, 2);
if (ret) { if (IS_ERR(cs)) {
i915_add_request_no_flush(req); i915_add_request_no_flush(req);
return ret; return PTR_ERR(cs);
} }
ring = req->ring; *cs++ = MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE;
intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE); *cs++ = flip_addr;
intel_ring_emit(ring, flip_addr); intel_ring_advance(req, cs);
intel_ring_advance(ring);
intel_overlay_flip_prepare(overlay, vma); intel_overlay_flip_prepare(overlay, vma);
...@@ -408,9 +403,7 @@ static void intel_overlay_off_tail(struct i915_gem_active *active, ...@@ -408,9 +403,7 @@ static void intel_overlay_off_tail(struct i915_gem_active *active,
static int intel_overlay_off(struct intel_overlay *overlay) static int intel_overlay_off(struct intel_overlay *overlay)
{ {
struct drm_i915_gem_request *req; struct drm_i915_gem_request *req;
struct intel_ring *ring; u32 *cs, flip_addr = overlay->flip_addr;
u32 flip_addr = overlay->flip_addr;
int ret;
WARN_ON(!overlay->active); WARN_ON(!overlay->active);
...@@ -424,25 +417,23 @@ static int intel_overlay_off(struct intel_overlay *overlay) ...@@ -424,25 +417,23 @@ static int intel_overlay_off(struct intel_overlay *overlay)
if (IS_ERR(req)) if (IS_ERR(req))
return PTR_ERR(req); return PTR_ERR(req);
ret = intel_ring_begin(req, 6); cs = intel_ring_begin(req, 6);
if (ret) { if (IS_ERR(cs)) {
i915_add_request_no_flush(req); i915_add_request_no_flush(req);
return ret; return PTR_ERR(cs);
} }
ring = req->ring;
/* wait for overlay to go idle */ /* wait for overlay to go idle */
intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE); *cs++ = MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE;
intel_ring_emit(ring, flip_addr); *cs++ = flip_addr;
intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP); *cs++ = MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP;
/* turn overlay off */ /* turn overlay off */
intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_OFF); *cs++ = MI_OVERLAY_FLIP | MI_OVERLAY_OFF;
intel_ring_emit(ring, flip_addr); *cs++ = flip_addr;
intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP); *cs++ = MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP;
intel_ring_advance(ring); intel_ring_advance(req, cs);
intel_overlay_flip_prepare(overlay, NULL); intel_overlay_flip_prepare(overlay, NULL);
...@@ -465,6 +456,7 @@ static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay) ...@@ -465,6 +456,7 @@ static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay)
static int intel_overlay_release_old_vid(struct intel_overlay *overlay) static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
{ {
struct drm_i915_private *dev_priv = overlay->i915; struct drm_i915_private *dev_priv = overlay->i915;
u32 *cs;
int ret; int ret;
lockdep_assert_held(&dev_priv->drm.struct_mutex); lockdep_assert_held(&dev_priv->drm.struct_mutex);
...@@ -478,23 +470,20 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay) ...@@ -478,23 +470,20 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
if (I915_READ(ISR) & I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT) { if (I915_READ(ISR) & I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT) {
/* synchronous slowpath */ /* synchronous slowpath */
struct drm_i915_gem_request *req; struct drm_i915_gem_request *req;
struct intel_ring *ring;
req = alloc_request(overlay); req = alloc_request(overlay);
if (IS_ERR(req)) if (IS_ERR(req))
return PTR_ERR(req); return PTR_ERR(req);
ret = intel_ring_begin(req, 2); cs = intel_ring_begin(req, 2);
if (ret) { if (IS_ERR(cs)) {
i915_add_request_no_flush(req); i915_add_request_no_flush(req);
return ret; return PTR_ERR(cs);
} }
ring = req->ring; *cs++ = MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP;
intel_ring_emit(ring, *cs++ = MI_NOOP;
MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP); intel_ring_advance(req, cs);
intel_ring_emit(ring, MI_NOOP);
intel_ring_advance(ring);
ret = intel_overlay_do_wait_request(overlay, req, ret = intel_overlay_do_wait_request(overlay, req,
intel_overlay_release_old_vid_tail); intel_overlay_release_old_vid_tail);
......
...@@ -61,22 +61,20 @@ void intel_ring_update_space(struct intel_ring *ring) ...@@ -61,22 +61,20 @@ void intel_ring_update_space(struct intel_ring *ring)
static int static int
gen2_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) gen2_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
{ {
struct intel_ring *ring = req->ring; u32 cmd, *cs;
u32 cmd;
int ret;
cmd = MI_FLUSH; cmd = MI_FLUSH;
if (mode & EMIT_INVALIDATE) if (mode & EMIT_INVALIDATE)
cmd |= MI_READ_FLUSH; cmd |= MI_READ_FLUSH;
ret = intel_ring_begin(req, 2); cs = intel_ring_begin(req, 2);
if (ret) if (IS_ERR(cs))
return ret; return PTR_ERR(cs);
intel_ring_emit(ring, cmd); *cs++ = cmd;
intel_ring_emit(ring, MI_NOOP); *cs++ = MI_NOOP;
intel_ring_advance(ring); intel_ring_advance(req, cs);
return 0; return 0;
} }
...@@ -84,9 +82,7 @@ gen2_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) ...@@ -84,9 +82,7 @@ gen2_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
static int static int
gen4_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) gen4_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
{ {
struct intel_ring *ring = req->ring; u32 cmd, *cs;
u32 cmd;
int ret;
/* /*
* read/write caches: * read/write caches:
...@@ -123,13 +119,13 @@ gen4_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) ...@@ -123,13 +119,13 @@ gen4_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
cmd |= MI_INVALIDATE_ISP; cmd |= MI_INVALIDATE_ISP;
} }
ret = intel_ring_begin(req, 2); cs = intel_ring_begin(req, 2);
if (ret) if (IS_ERR(cs))
return ret; return PTR_ERR(cs);
intel_ring_emit(ring, cmd); *cs++ = cmd;
intel_ring_emit(ring, MI_NOOP); *cs++ = MI_NOOP;
intel_ring_advance(ring); intel_ring_advance(req, cs);
return 0; return 0;
} }
...@@ -174,35 +170,33 @@ gen4_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) ...@@ -174,35 +170,33 @@ gen4_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
static int static int
intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req) intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req)
{ {
struct intel_ring *ring = req->ring;
u32 scratch_addr = u32 scratch_addr =
i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES; i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES;
int ret; u32 *cs;
ret = intel_ring_begin(req, 6); cs = intel_ring_begin(req, 6);
if (ret) if (IS_ERR(cs))
return ret; return PTR_ERR(cs);
intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5)); *cs++ = GFX_OP_PIPE_CONTROL(5);
intel_ring_emit(ring, PIPE_CONTROL_CS_STALL | *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD;
PIPE_CONTROL_STALL_AT_SCOREBOARD); *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT;
intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); *cs++ = 0; /* low dword */
intel_ring_emit(ring, 0); /* low dword */ *cs++ = 0; /* high dword */
intel_ring_emit(ring, 0); /* high dword */ *cs++ = MI_NOOP;
intel_ring_emit(ring, MI_NOOP); intel_ring_advance(req, cs);
intel_ring_advance(ring);
cs = intel_ring_begin(req, 6);
ret = intel_ring_begin(req, 6); if (IS_ERR(cs))
if (ret) return PTR_ERR(cs);
return ret;
*cs++ = GFX_OP_PIPE_CONTROL(5);
intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5)); *cs++ = PIPE_CONTROL_QW_WRITE;
intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE); *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT;
intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); *cs++ = 0;
intel_ring_emit(ring, 0); *cs++ = 0;
intel_ring_emit(ring, 0); *cs++ = MI_NOOP;
intel_ring_emit(ring, MI_NOOP); intel_ring_advance(req, cs);
intel_ring_advance(ring);
return 0; return 0;
} }
...@@ -210,10 +204,9 @@ intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req) ...@@ -210,10 +204,9 @@ intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req)
static int static int
gen6_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) gen6_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
{ {
struct intel_ring *ring = req->ring;
u32 scratch_addr = u32 scratch_addr =
i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES; i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES;
u32 flags = 0; u32 *cs, flags = 0;
int ret; int ret;
/* Force SNB workarounds for PIPE_CONTROL flushes */ /* Force SNB workarounds for PIPE_CONTROL flushes */
...@@ -247,15 +240,15 @@ gen6_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) ...@@ -247,15 +240,15 @@ gen6_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL; flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
} }
ret = intel_ring_begin(req, 4); cs = intel_ring_begin(req, 4);
if (ret) if (IS_ERR(cs))
return ret; return PTR_ERR(cs);
intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4)); *cs++ = GFX_OP_PIPE_CONTROL(4);
intel_ring_emit(ring, flags); *cs++ = flags;
intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT;
intel_ring_emit(ring, 0); *cs++ = 0;
intel_ring_advance(ring); intel_ring_advance(req, cs);
return 0; return 0;
} }
...@@ -263,20 +256,17 @@ gen6_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) ...@@ -263,20 +256,17 @@ gen6_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
static int static int
gen7_render_ring_cs_stall_wa(struct drm_i915_gem_request *req) gen7_render_ring_cs_stall_wa(struct drm_i915_gem_request *req)
{ {
struct intel_ring *ring = req->ring; u32 *cs;
int ret;
ret = intel_ring_begin(req, 4); cs = intel_ring_begin(req, 4);
if (ret) if (IS_ERR(cs))
return ret; return PTR_ERR(cs);
intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4)); *cs++ = GFX_OP_PIPE_CONTROL(4);
intel_ring_emit(ring, *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD;
PIPE_CONTROL_CS_STALL | *cs++ = 0;
PIPE_CONTROL_STALL_AT_SCOREBOARD); *cs++ = 0;
intel_ring_emit(ring, 0); intel_ring_advance(req, cs);
intel_ring_emit(ring, 0);
intel_ring_advance(ring);
return 0; return 0;
} }
...@@ -284,11 +274,9 @@ gen7_render_ring_cs_stall_wa(struct drm_i915_gem_request *req) ...@@ -284,11 +274,9 @@ gen7_render_ring_cs_stall_wa(struct drm_i915_gem_request *req)
static int static int
gen7_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) gen7_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
{ {
struct intel_ring *ring = req->ring;
u32 scratch_addr = u32 scratch_addr =
i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES; i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES;
u32 flags = 0; u32 *cs, flags = 0;
int ret;
/* /*
* Ensure that any following seqno writes only happen when the render * Ensure that any following seqno writes only happen when the render
...@@ -332,15 +320,15 @@ gen7_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) ...@@ -332,15 +320,15 @@ gen7_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
gen7_render_ring_cs_stall_wa(req); gen7_render_ring_cs_stall_wa(req);
} }
ret = intel_ring_begin(req, 4); cs = intel_ring_begin(req, 4);
if (ret) if (IS_ERR(cs))
return ret; return PTR_ERR(cs);
intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4)); *cs++ = GFX_OP_PIPE_CONTROL(4);
intel_ring_emit(ring, flags); *cs++ = flags;
intel_ring_emit(ring, scratch_addr); *cs++ = scratch_addr;
intel_ring_emit(ring, 0); *cs++ = 0;
intel_ring_advance(ring); intel_ring_advance(req, cs);
return 0; return 0;
} }
...@@ -349,20 +337,19 @@ static int ...@@ -349,20 +337,19 @@ static int
gen8_emit_pipe_control(struct drm_i915_gem_request *req, gen8_emit_pipe_control(struct drm_i915_gem_request *req,
u32 flags, u32 scratch_addr) u32 flags, u32 scratch_addr)
{ {
struct intel_ring *ring = req->ring; u32 *cs;
int ret;
ret = intel_ring_begin(req, 6); cs = intel_ring_begin(req, 6);
if (ret) if (IS_ERR(cs))
return ret; return PTR_ERR(cs);
intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); *cs++ = GFX_OP_PIPE_CONTROL(6);
intel_ring_emit(ring, flags); *cs++ = flags;
intel_ring_emit(ring, scratch_addr); *cs++ = scratch_addr;
intel_ring_emit(ring, 0); *cs++ = 0;
intel_ring_emit(ring, 0); *cs++ = 0;
intel_ring_emit(ring, 0); *cs++ = 0;
intel_ring_advance(ring); intel_ring_advance(req, cs);
return 0; return 0;
} }
...@@ -659,8 +646,8 @@ static void reset_ring_common(struct intel_engine_cs *engine, ...@@ -659,8 +646,8 @@ static void reset_ring_common(struct intel_engine_cs *engine,
static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req)
{ {
struct intel_ring *ring = req->ring;
struct i915_workarounds *w = &req->i915->workarounds; struct i915_workarounds *w = &req->i915->workarounds;
u32 *cs;
int ret, i; int ret, i;
if (w->count == 0) if (w->count == 0)
...@@ -670,18 +657,18 @@ static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) ...@@ -670,18 +657,18 @@ static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req)
if (ret) if (ret)
return ret; return ret;
ret = intel_ring_begin(req, (w->count * 2 + 2)); cs = intel_ring_begin(req, (w->count * 2 + 2));
if (ret) if (IS_ERR(cs))
return ret; return PTR_ERR(cs);
intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(w->count)); *cs++ = MI_LOAD_REGISTER_IMM(w->count);
for (i = 0; i < w->count; i++) { for (i = 0; i < w->count; i++) {
intel_ring_emit_reg(ring, w->reg[i].addr); *cs++ = i915_mmio_reg_offset(w->reg[i].addr);
intel_ring_emit(ring, w->reg[i].value); *cs++ = w->reg[i].value;
} }
intel_ring_emit(ring, MI_NOOP); *cs++ = MI_NOOP;
intel_ring_advance(ring); intel_ring_advance(req, cs);
ret = req->engine->emit_flush(req, EMIT_BARRIER); ret = req->engine->emit_flush(req, EMIT_BARRIER);
if (ret) if (ret)
...@@ -1276,7 +1263,7 @@ static void render_ring_cleanup(struct intel_engine_cs *engine) ...@@ -1276,7 +1263,7 @@ static void render_ring_cleanup(struct intel_engine_cs *engine)
i915_vma_unpin_and_release(&dev_priv->semaphore); i915_vma_unpin_and_release(&dev_priv->semaphore);
} }
static u32 *gen8_rcs_signal(struct drm_i915_gem_request *req, u32 *out) static u32 *gen8_rcs_signal(struct drm_i915_gem_request *req, u32 *cs)
{ {
struct drm_i915_private *dev_priv = req->i915; struct drm_i915_private *dev_priv = req->i915;
struct intel_engine_cs *waiter; struct intel_engine_cs *waiter;
...@@ -1287,23 +1274,22 @@ static u32 *gen8_rcs_signal(struct drm_i915_gem_request *req, u32 *out) ...@@ -1287,23 +1274,22 @@ static u32 *gen8_rcs_signal(struct drm_i915_gem_request *req, u32 *out)
if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID) if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
continue; continue;
*out++ = GFX_OP_PIPE_CONTROL(6); *cs++ = GFX_OP_PIPE_CONTROL(6);
*out++ = (PIPE_CONTROL_GLOBAL_GTT_IVB | *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_QW_WRITE |
PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
PIPE_CONTROL_CS_STALL); *cs++ = lower_32_bits(gtt_offset);
*out++ = lower_32_bits(gtt_offset); *cs++ = upper_32_bits(gtt_offset);
*out++ = upper_32_bits(gtt_offset); *cs++ = req->global_seqno;
*out++ = req->global_seqno; *cs++ = 0;
*out++ = 0; *cs++ = MI_SEMAPHORE_SIGNAL |
*out++ = (MI_SEMAPHORE_SIGNAL | MI_SEMAPHORE_TARGET(waiter->hw_id);
MI_SEMAPHORE_TARGET(waiter->hw_id)); *cs++ = 0;
*out++ = 0;
} }
return out; return cs;
} }
static u32 *gen8_xcs_signal(struct drm_i915_gem_request *req, u32 *out) static u32 *gen8_xcs_signal(struct drm_i915_gem_request *req, u32 *cs)
{ {
struct drm_i915_private *dev_priv = req->i915; struct drm_i915_private *dev_priv = req->i915;
struct intel_engine_cs *waiter; struct intel_engine_cs *waiter;
...@@ -1314,19 +1300,19 @@ static u32 *gen8_xcs_signal(struct drm_i915_gem_request *req, u32 *out) ...@@ -1314,19 +1300,19 @@ static u32 *gen8_xcs_signal(struct drm_i915_gem_request *req, u32 *out)
if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID) if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
continue; continue;
*out++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW; *cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW;
*out++ = lower_32_bits(gtt_offset) | MI_FLUSH_DW_USE_GTT; *cs++ = lower_32_bits(gtt_offset) | MI_FLUSH_DW_USE_GTT;
*out++ = upper_32_bits(gtt_offset); *cs++ = upper_32_bits(gtt_offset);
*out++ = req->global_seqno; *cs++ = req->global_seqno;
*out++ = (MI_SEMAPHORE_SIGNAL | *cs++ = MI_SEMAPHORE_SIGNAL |
MI_SEMAPHORE_TARGET(waiter->hw_id)); MI_SEMAPHORE_TARGET(waiter->hw_id);
*out++ = 0; *cs++ = 0;
} }
return out; return cs;
} }
static u32 *gen6_signal(struct drm_i915_gem_request *req, u32 *out) static u32 *gen6_signal(struct drm_i915_gem_request *req, u32 *cs)
{ {
struct drm_i915_private *dev_priv = req->i915; struct drm_i915_private *dev_priv = req->i915;
struct intel_engine_cs *engine; struct intel_engine_cs *engine;
...@@ -1341,16 +1327,16 @@ static u32 *gen6_signal(struct drm_i915_gem_request *req, u32 *out) ...@@ -1341,16 +1327,16 @@ static u32 *gen6_signal(struct drm_i915_gem_request *req, u32 *out)
mbox_reg = req->engine->semaphore.mbox.signal[engine->hw_id]; mbox_reg = req->engine->semaphore.mbox.signal[engine->hw_id];
if (i915_mmio_reg_valid(mbox_reg)) { if (i915_mmio_reg_valid(mbox_reg)) {
*out++ = MI_LOAD_REGISTER_IMM(1); *cs++ = MI_LOAD_REGISTER_IMM(1);
*out++ = i915_mmio_reg_offset(mbox_reg); *cs++ = i915_mmio_reg_offset(mbox_reg);
*out++ = req->global_seqno; *cs++ = req->global_seqno;
num_rings++; num_rings++;
} }
} }
if (num_rings & 1) if (num_rings & 1)
*out++ = MI_NOOP; *cs++ = MI_NOOP;
return out; return cs;
} }
static void i9xx_submit_request(struct drm_i915_gem_request *request) static void i9xx_submit_request(struct drm_i915_gem_request *request)
...@@ -1362,15 +1348,14 @@ static void i9xx_submit_request(struct drm_i915_gem_request *request) ...@@ -1362,15 +1348,14 @@ static void i9xx_submit_request(struct drm_i915_gem_request *request)
I915_WRITE_TAIL(request->engine, request->tail); I915_WRITE_TAIL(request->engine, request->tail);
} }
static void i9xx_emit_breadcrumb(struct drm_i915_gem_request *req, static void i9xx_emit_breadcrumb(struct drm_i915_gem_request *req, u32 *cs)
u32 *out)
{ {
*out++ = MI_STORE_DWORD_INDEX; *cs++ = MI_STORE_DWORD_INDEX;
*out++ = I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT; *cs++ = I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT;
*out++ = req->global_seqno; *cs++ = req->global_seqno;
*out++ = MI_USER_INTERRUPT; *cs++ = MI_USER_INTERRUPT;
req->tail = intel_ring_offset(req->ring, out); req->tail = intel_ring_offset(req, cs);
} }
static const int i9xx_emit_breadcrumb_sz = 4; static const int i9xx_emit_breadcrumb_sz = 4;
...@@ -1383,34 +1368,32 @@ static const int i9xx_emit_breadcrumb_sz = 4; ...@@ -1383,34 +1368,32 @@ static const int i9xx_emit_breadcrumb_sz = 4;
* Update the mailbox registers in the *other* rings with the current seqno. * Update the mailbox registers in the *other* rings with the current seqno.
* This acts like a signal in the canonical semaphore. * This acts like a signal in the canonical semaphore.
*/ */
static void gen6_sema_emit_breadcrumb(struct drm_i915_gem_request *req, static void gen6_sema_emit_breadcrumb(struct drm_i915_gem_request *req, u32 *cs)
u32 *out)
{ {
return i9xx_emit_breadcrumb(req, return i9xx_emit_breadcrumb(req,
req->engine->semaphore.signal(req, out)); req->engine->semaphore.signal(req, cs));
} }
static void gen8_render_emit_breadcrumb(struct drm_i915_gem_request *req, static void gen8_render_emit_breadcrumb(struct drm_i915_gem_request *req,
u32 *out) u32 *cs)
{ {
struct intel_engine_cs *engine = req->engine; struct intel_engine_cs *engine = req->engine;
if (engine->semaphore.signal) if (engine->semaphore.signal)
out = engine->semaphore.signal(req, out); cs = engine->semaphore.signal(req, cs);
*out++ = GFX_OP_PIPE_CONTROL(6); *cs++ = GFX_OP_PIPE_CONTROL(6);
*out++ = (PIPE_CONTROL_GLOBAL_GTT_IVB | *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_CS_STALL | PIPE_CONTROL_QW_WRITE;
PIPE_CONTROL_QW_WRITE); *cs++ = intel_hws_seqno_address(engine);
*out++ = intel_hws_seqno_address(engine); *cs++ = 0;
*out++ = 0; *cs++ = req->global_seqno;
*out++ = req->global_seqno;
/* We're thrashing one dword of HWS. */ /* We're thrashing one dword of HWS. */
*out++ = 0; *cs++ = 0;
*out++ = MI_USER_INTERRUPT; *cs++ = MI_USER_INTERRUPT;
*out++ = MI_NOOP; *cs++ = MI_NOOP;
req->tail = intel_ring_offset(req->ring, out); req->tail = intel_ring_offset(req, cs);
} }
static const int gen8_render_emit_breadcrumb_sz = 8; static const int gen8_render_emit_breadcrumb_sz = 8;
...@@ -1427,24 +1410,21 @@ static int ...@@ -1427,24 +1410,21 @@ static int
gen8_ring_sync_to(struct drm_i915_gem_request *req, gen8_ring_sync_to(struct drm_i915_gem_request *req,
struct drm_i915_gem_request *signal) struct drm_i915_gem_request *signal)
{ {
struct intel_ring *ring = req->ring;
struct drm_i915_private *dev_priv = req->i915; struct drm_i915_private *dev_priv = req->i915;
u64 offset = GEN8_WAIT_OFFSET(req->engine, signal->engine->id); u64 offset = GEN8_WAIT_OFFSET(req->engine, signal->engine->id);
struct i915_hw_ppgtt *ppgtt; struct i915_hw_ppgtt *ppgtt;
int ret; u32 *cs;
ret = intel_ring_begin(req, 4); cs = intel_ring_begin(req, 4);
if (ret) if (IS_ERR(cs))
return ret; return PTR_ERR(cs);
intel_ring_emit(ring, *cs++ = MI_SEMAPHORE_WAIT | MI_SEMAPHORE_GLOBAL_GTT |
MI_SEMAPHORE_WAIT | MI_SEMAPHORE_SAD_GTE_SDD;
MI_SEMAPHORE_GLOBAL_GTT | *cs++ = signal->global_seqno;
MI_SEMAPHORE_SAD_GTE_SDD); *cs++ = lower_32_bits(offset);
intel_ring_emit(ring, signal->global_seqno); *cs++ = upper_32_bits(offset);
intel_ring_emit(ring, lower_32_bits(offset)); intel_ring_advance(req, cs);
intel_ring_emit(ring, upper_32_bits(offset));
intel_ring_advance(ring);
/* When the !RCS engines idle waiting upon a semaphore, they lose their /* When the !RCS engines idle waiting upon a semaphore, they lose their
* pagetables and we must reload them before executing the batch. * pagetables and we must reload them before executing the batch.
...@@ -1461,28 +1441,27 @@ static int ...@@ -1461,28 +1441,27 @@ static int
gen6_ring_sync_to(struct drm_i915_gem_request *req, gen6_ring_sync_to(struct drm_i915_gem_request *req,
struct drm_i915_gem_request *signal) struct drm_i915_gem_request *signal)
{ {
struct intel_ring *ring = req->ring;
u32 dw1 = MI_SEMAPHORE_MBOX | u32 dw1 = MI_SEMAPHORE_MBOX |
MI_SEMAPHORE_COMPARE | MI_SEMAPHORE_COMPARE |
MI_SEMAPHORE_REGISTER; MI_SEMAPHORE_REGISTER;
u32 wait_mbox = signal->engine->semaphore.mbox.wait[req->engine->hw_id]; u32 wait_mbox = signal->engine->semaphore.mbox.wait[req->engine->hw_id];
int ret; u32 *cs;
WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID); WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID);
ret = intel_ring_begin(req, 4); cs = intel_ring_begin(req, 4);
if (ret) if (IS_ERR(cs))
return ret; return PTR_ERR(cs);
intel_ring_emit(ring, dw1 | wait_mbox); *cs++ = dw1 | wait_mbox;
/* Throughout all of the GEM code, seqno passed implies our current /* Throughout all of the GEM code, seqno passed implies our current
* seqno is >= the last seqno executed. However for hardware the * seqno is >= the last seqno executed. However for hardware the
* comparison is strictly greater than. * comparison is strictly greater than.
*/ */
intel_ring_emit(ring, signal->global_seqno - 1); *cs++ = signal->global_seqno - 1;
intel_ring_emit(ring, 0); *cs++ = 0;
intel_ring_emit(ring, MI_NOOP); *cs++ = MI_NOOP;
intel_ring_advance(ring); intel_ring_advance(req, cs);
return 0; return 0;
} }
...@@ -1583,16 +1562,15 @@ i8xx_irq_disable(struct intel_engine_cs *engine) ...@@ -1583,16 +1562,15 @@ i8xx_irq_disable(struct intel_engine_cs *engine)
static int static int
bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode) bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode)
{ {
struct intel_ring *ring = req->ring; u32 *cs;
int ret;
ret = intel_ring_begin(req, 2); cs = intel_ring_begin(req, 2);
if (ret) if (IS_ERR(cs))
return ret; return PTR_ERR(cs);
intel_ring_emit(ring, MI_FLUSH); *cs++ = MI_FLUSH;
intel_ring_emit(ring, MI_NOOP); *cs++ = MI_NOOP;
intel_ring_advance(ring); intel_ring_advance(req, cs);
return 0; return 0;
} }
...@@ -1658,20 +1636,16 @@ i965_emit_bb_start(struct drm_i915_gem_request *req, ...@@ -1658,20 +1636,16 @@ i965_emit_bb_start(struct drm_i915_gem_request *req,
u64 offset, u32 length, u64 offset, u32 length,
unsigned int dispatch_flags) unsigned int dispatch_flags)
{ {
struct intel_ring *ring = req->ring; u32 *cs;
int ret;
ret = intel_ring_begin(req, 2); cs = intel_ring_begin(req, 2);
if (ret) if (IS_ERR(cs))
return ret; return PTR_ERR(cs);
intel_ring_emit(ring, *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT | (dispatch_flags &
MI_BATCH_BUFFER_START | I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965);
MI_BATCH_GTT | *cs++ = offset;
(dispatch_flags & I915_DISPATCH_SECURE ? intel_ring_advance(req, cs);
0 : MI_BATCH_NON_SECURE_I965));
intel_ring_emit(ring, offset);
intel_ring_advance(ring);
return 0; return 0;
} }
...@@ -1685,59 +1659,56 @@ i830_emit_bb_start(struct drm_i915_gem_request *req, ...@@ -1685,59 +1659,56 @@ i830_emit_bb_start(struct drm_i915_gem_request *req,
u64 offset, u32 len, u64 offset, u32 len,
unsigned int dispatch_flags) unsigned int dispatch_flags)
{ {
struct intel_ring *ring = req->ring; u32 *cs, cs_offset = i915_ggtt_offset(req->engine->scratch);
u32 cs_offset = i915_ggtt_offset(req->engine->scratch);
int ret;
ret = intel_ring_begin(req, 6); cs = intel_ring_begin(req, 6);
if (ret) if (IS_ERR(cs))
return ret; return PTR_ERR(cs);
/* Evict the invalid PTE TLBs */ /* Evict the invalid PTE TLBs */
intel_ring_emit(ring, COLOR_BLT_CMD | BLT_WRITE_RGBA); *cs++ = COLOR_BLT_CMD | BLT_WRITE_RGBA;
intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096); *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096;
intel_ring_emit(ring, I830_TLB_ENTRIES << 16 | 4); /* load each page */ *cs++ = I830_TLB_ENTRIES << 16 | 4; /* load each page */
intel_ring_emit(ring, cs_offset); *cs++ = cs_offset;
intel_ring_emit(ring, 0xdeadbeef); *cs++ = 0xdeadbeef;
intel_ring_emit(ring, MI_NOOP); *cs++ = MI_NOOP;
intel_ring_advance(ring); intel_ring_advance(req, cs);
if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) { if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) {
if (len > I830_BATCH_LIMIT) if (len > I830_BATCH_LIMIT)
return -ENOSPC; return -ENOSPC;
ret = intel_ring_begin(req, 6 + 2); cs = intel_ring_begin(req, 6 + 2);
if (ret) if (IS_ERR(cs))
return ret; return PTR_ERR(cs);
/* Blit the batch (which has now all relocs applied) to the /* Blit the batch (which has now all relocs applied) to the
* stable batch scratch bo area (so that the CS never * stable batch scratch bo area (so that the CS never
* stumbles over its tlb invalidation bug) ... * stumbles over its tlb invalidation bug) ...
*/ */
intel_ring_emit(ring, SRC_COPY_BLT_CMD | BLT_WRITE_RGBA); *cs++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA;
intel_ring_emit(ring, *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096;
BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096); *cs++ = DIV_ROUND_UP(len, 4096) << 16 | 4096;
intel_ring_emit(ring, DIV_ROUND_UP(len, 4096) << 16 | 4096); *cs++ = cs_offset;
intel_ring_emit(ring, cs_offset); *cs++ = 4096;
intel_ring_emit(ring, 4096); *cs++ = offset;
intel_ring_emit(ring, offset);
*cs++ = MI_FLUSH;
intel_ring_emit(ring, MI_FLUSH); *cs++ = MI_NOOP;
intel_ring_emit(ring, MI_NOOP); intel_ring_advance(req, cs);
intel_ring_advance(ring);
/* ... and execute it. */ /* ... and execute it. */
offset = cs_offset; offset = cs_offset;
} }
ret = intel_ring_begin(req, 2); cs = intel_ring_begin(req, 2);
if (ret) if (IS_ERR(cs))
return ret; return PTR_ERR(cs);
intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT); *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
intel_ring_emit(ring, offset | (dispatch_flags & I915_DISPATCH_SECURE ? *cs++ = offset | (dispatch_flags & I915_DISPATCH_SECURE ? 0 :
0 : MI_BATCH_NON_SECURE)); MI_BATCH_NON_SECURE);
intel_ring_advance(ring); intel_ring_advance(req, cs);
return 0; return 0;
} }
...@@ -1747,17 +1718,16 @@ i915_emit_bb_start(struct drm_i915_gem_request *req, ...@@ -1747,17 +1718,16 @@ i915_emit_bb_start(struct drm_i915_gem_request *req,
u64 offset, u32 len, u64 offset, u32 len,
unsigned int dispatch_flags) unsigned int dispatch_flags)
{ {
struct intel_ring *ring = req->ring; u32 *cs;
int ret;
ret = intel_ring_begin(req, 2); cs = intel_ring_begin(req, 2);
if (ret) if (IS_ERR(cs))
return ret; return PTR_ERR(cs);
intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT); *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
intel_ring_emit(ring, offset | (dispatch_flags & I915_DISPATCH_SECURE ? *cs++ = offset | (dispatch_flags & I915_DISPATCH_SECURE ? 0 :
0 : MI_BATCH_NON_SECURE)); MI_BATCH_NON_SECURE);
intel_ring_advance(ring); intel_ring_advance(req, cs);
return 0; return 0;
} }
...@@ -2165,7 +2135,7 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv) ...@@ -2165,7 +2135,7 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv)
static int ring_request_alloc(struct drm_i915_gem_request *request) static int ring_request_alloc(struct drm_i915_gem_request *request)
{ {
int ret; u32 *cs;
GEM_BUG_ON(!request->ctx->engine[request->engine->id].pin_count); GEM_BUG_ON(!request->ctx->engine[request->engine->id].pin_count);
...@@ -2178,9 +2148,9 @@ static int ring_request_alloc(struct drm_i915_gem_request *request) ...@@ -2178,9 +2148,9 @@ static int ring_request_alloc(struct drm_i915_gem_request *request)
GEM_BUG_ON(!request->engine->buffer); GEM_BUG_ON(!request->engine->buffer);
request->ring = request->engine->buffer; request->ring = request->engine->buffer;
ret = intel_ring_begin(request, 0); cs = intel_ring_begin(request, 0);
if (ret) if (IS_ERR(cs))
return ret; return PTR_ERR(cs);
request->reserved_space -= LEGACY_REQUEST_SIZE; request->reserved_space -= LEGACY_REQUEST_SIZE;
return 0; return 0;
...@@ -2235,7 +2205,7 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes) ...@@ -2235,7 +2205,7 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
return 0; return 0;
} }
int intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
{ {
struct intel_ring *ring = req->ring; struct intel_ring *ring = req->ring;
int remain_actual = ring->size - ring->tail; int remain_actual = ring->size - ring->tail;
...@@ -2243,6 +2213,7 @@ int intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) ...@@ -2243,6 +2213,7 @@ int intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
int bytes = num_dwords * sizeof(u32); int bytes = num_dwords * sizeof(u32);
int total_bytes, wait_bytes; int total_bytes, wait_bytes;
bool need_wrap = false; bool need_wrap = false;
u32 *cs;
total_bytes = bytes + req->reserved_space; total_bytes = bytes + req->reserved_space;
...@@ -2269,7 +2240,7 @@ int intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) ...@@ -2269,7 +2240,7 @@ int intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
if (wait_bytes > ring->space) { if (wait_bytes > ring->space) {
int ret = wait_for_space(req, wait_bytes); int ret = wait_for_space(req, wait_bytes);
if (unlikely(ret)) if (unlikely(ret))
return ret; return ERR_PTR(ret);
} }
if (unlikely(need_wrap)) { if (unlikely(need_wrap)) {
...@@ -2282,32 +2253,34 @@ int intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) ...@@ -2282,32 +2253,34 @@ int intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
ring->space -= remain_actual; ring->space -= remain_actual;
} }
GEM_BUG_ON(ring->tail > ring->size - bytes);
cs = ring->vaddr + ring->tail;
ring->tail += bytes;
ring->space -= bytes; ring->space -= bytes;
GEM_BUG_ON(ring->space < 0); GEM_BUG_ON(ring->space < 0);
GEM_DEBUG_EXEC(ring->advance = ring->tail + bytes);
return 0; return cs;
} }
/* Align the ring tail to a cacheline boundary */ /* Align the ring tail to a cacheline boundary */
int intel_ring_cacheline_align(struct drm_i915_gem_request *req) int intel_ring_cacheline_align(struct drm_i915_gem_request *req)
{ {
struct intel_ring *ring = req->ring;
int num_dwords = int num_dwords =
(ring->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t); (req->ring->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t);
int ret; u32 *cs;
if (num_dwords == 0) if (num_dwords == 0)
return 0; return 0;
num_dwords = CACHELINE_BYTES / sizeof(uint32_t) - num_dwords; num_dwords = CACHELINE_BYTES / sizeof(uint32_t) - num_dwords;
ret = intel_ring_begin(req, num_dwords); cs = intel_ring_begin(req, num_dwords);
if (ret) if (IS_ERR(cs))
return ret; return PTR_ERR(cs);
while (num_dwords--) while (num_dwords--)
intel_ring_emit(ring, MI_NOOP); *cs++ = MI_NOOP;
intel_ring_advance(ring); intel_ring_advance(req, cs);
return 0; return 0;
} }
...@@ -2351,13 +2324,11 @@ static void gen6_bsd_submit_request(struct drm_i915_gem_request *request) ...@@ -2351,13 +2324,11 @@ static void gen6_bsd_submit_request(struct drm_i915_gem_request *request)
static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode) static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode)
{ {
struct intel_ring *ring = req->ring; u32 cmd, *cs;
uint32_t cmd;
int ret;
ret = intel_ring_begin(req, 4); cs = intel_ring_begin(req, 4);
if (ret) if (IS_ERR(cs))
return ret; return PTR_ERR(cs);
cmd = MI_FLUSH_DW; cmd = MI_FLUSH_DW;
if (INTEL_GEN(req->i915) >= 8) if (INTEL_GEN(req->i915) >= 8)
...@@ -2379,16 +2350,16 @@ static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode) ...@@ -2379,16 +2350,16 @@ static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode)
if (mode & EMIT_INVALIDATE) if (mode & EMIT_INVALIDATE)
cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD; cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD;
intel_ring_emit(ring, cmd); *cs++ = cmd;
intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT); *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT;
if (INTEL_GEN(req->i915) >= 8) { if (INTEL_GEN(req->i915) >= 8) {
intel_ring_emit(ring, 0); /* upper addr */ *cs++ = 0; /* upper addr */
intel_ring_emit(ring, 0); /* value */ *cs++ = 0; /* value */
} else { } else {
intel_ring_emit(ring, 0); *cs++ = 0;
intel_ring_emit(ring, MI_NOOP); *cs++ = MI_NOOP;
} }
intel_ring_advance(ring); intel_ring_advance(req, cs);
return 0; return 0;
} }
...@@ -2397,23 +2368,21 @@ gen8_emit_bb_start(struct drm_i915_gem_request *req, ...@@ -2397,23 +2368,21 @@ gen8_emit_bb_start(struct drm_i915_gem_request *req,
u64 offset, u32 len, u64 offset, u32 len,
unsigned int dispatch_flags) unsigned int dispatch_flags)
{ {
struct intel_ring *ring = req->ring;
bool ppgtt = USES_PPGTT(req->i915) && bool ppgtt = USES_PPGTT(req->i915) &&
!(dispatch_flags & I915_DISPATCH_SECURE); !(dispatch_flags & I915_DISPATCH_SECURE);
int ret; u32 *cs;
ret = intel_ring_begin(req, 4); cs = intel_ring_begin(req, 4);
if (ret) if (IS_ERR(cs))
return ret; return PTR_ERR(cs);
/* FIXME(BDW): Address space and security selectors. */ /* FIXME(BDW): Address space and security selectors. */
intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8) | *cs++ = MI_BATCH_BUFFER_START_GEN8 | (ppgtt << 8) | (dispatch_flags &
(dispatch_flags & I915_DISPATCH_RS ? I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0);
MI_BATCH_RESOURCE_STREAMER : 0)); *cs++ = lower_32_bits(offset);
intel_ring_emit(ring, lower_32_bits(offset)); *cs++ = upper_32_bits(offset);
intel_ring_emit(ring, upper_32_bits(offset)); *cs++ = MI_NOOP;
intel_ring_emit(ring, MI_NOOP); intel_ring_advance(req, cs);
intel_ring_advance(ring);
return 0; return 0;
} }
...@@ -2423,22 +2392,19 @@ hsw_emit_bb_start(struct drm_i915_gem_request *req, ...@@ -2423,22 +2392,19 @@ hsw_emit_bb_start(struct drm_i915_gem_request *req,
u64 offset, u32 len, u64 offset, u32 len,
unsigned int dispatch_flags) unsigned int dispatch_flags)
{ {
struct intel_ring *ring = req->ring; u32 *cs;
int ret;
ret = intel_ring_begin(req, 2); cs = intel_ring_begin(req, 2);
if (ret) if (IS_ERR(cs))
return ret; return PTR_ERR(cs);
intel_ring_emit(ring, *cs++ = MI_BATCH_BUFFER_START | (dispatch_flags & I915_DISPATCH_SECURE ?
MI_BATCH_BUFFER_START |
(dispatch_flags & I915_DISPATCH_SECURE ?
0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW) | 0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW) |
(dispatch_flags & I915_DISPATCH_RS ? (dispatch_flags & I915_DISPATCH_RS ?
MI_BATCH_RESOURCE_STREAMER : 0)); MI_BATCH_RESOURCE_STREAMER : 0);
/* bit0-7 is the length on GEN6+ */ /* bit0-7 is the length on GEN6+ */
intel_ring_emit(ring, offset); *cs++ = offset;
intel_ring_advance(ring); intel_ring_advance(req, cs);
return 0; return 0;
} }
...@@ -2448,20 +2414,17 @@ gen6_emit_bb_start(struct drm_i915_gem_request *req, ...@@ -2448,20 +2414,17 @@ gen6_emit_bb_start(struct drm_i915_gem_request *req,
u64 offset, u32 len, u64 offset, u32 len,
unsigned int dispatch_flags) unsigned int dispatch_flags)
{ {
struct intel_ring *ring = req->ring; u32 *cs;
int ret;
ret = intel_ring_begin(req, 2); cs = intel_ring_begin(req, 2);
if (ret) if (IS_ERR(cs))
return ret; return PTR_ERR(cs);
intel_ring_emit(ring, *cs++ = MI_BATCH_BUFFER_START | (dispatch_flags & I915_DISPATCH_SECURE ?
MI_BATCH_BUFFER_START | 0 : MI_BATCH_NON_SECURE_I965);
(dispatch_flags & I915_DISPATCH_SECURE ?
0 : MI_BATCH_NON_SECURE_I965));
/* bit0-7 is the length on GEN6+ */ /* bit0-7 is the length on GEN6+ */
intel_ring_emit(ring, offset); *cs++ = offset;
intel_ring_advance(ring); intel_ring_advance(req, cs);
return 0; return 0;
} }
...@@ -2470,13 +2433,11 @@ gen6_emit_bb_start(struct drm_i915_gem_request *req, ...@@ -2470,13 +2433,11 @@ gen6_emit_bb_start(struct drm_i915_gem_request *req,
static int gen6_ring_flush(struct drm_i915_gem_request *req, u32 mode) static int gen6_ring_flush(struct drm_i915_gem_request *req, u32 mode)
{ {
struct intel_ring *ring = req->ring; u32 cmd, *cs;
uint32_t cmd;
int ret;
ret = intel_ring_begin(req, 4); cs = intel_ring_begin(req, 4);
if (ret) if (IS_ERR(cs))
return ret; return PTR_ERR(cs);
cmd = MI_FLUSH_DW; cmd = MI_FLUSH_DW;
if (INTEL_GEN(req->i915) >= 8) if (INTEL_GEN(req->i915) >= 8)
...@@ -2497,17 +2458,16 @@ static int gen6_ring_flush(struct drm_i915_gem_request *req, u32 mode) ...@@ -2497,17 +2458,16 @@ static int gen6_ring_flush(struct drm_i915_gem_request *req, u32 mode)
*/ */
if (mode & EMIT_INVALIDATE) if (mode & EMIT_INVALIDATE)
cmd |= MI_INVALIDATE_TLB; cmd |= MI_INVALIDATE_TLB;
intel_ring_emit(ring, cmd); *cs++ = cmd;
intel_ring_emit(ring, *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT;
I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
if (INTEL_GEN(req->i915) >= 8) { if (INTEL_GEN(req->i915) >= 8) {
intel_ring_emit(ring, 0); /* upper addr */ *cs++ = 0; /* upper addr */
intel_ring_emit(ring, 0); /* value */ *cs++ = 0; /* value */
} else { } else {
intel_ring_emit(ring, 0); *cs++ = 0;
intel_ring_emit(ring, MI_NOOP); *cs++ = MI_NOOP;
} }
intel_ring_advance(ring); intel_ring_advance(req, cs);
return 0; return 0;
} }
......
...@@ -145,7 +145,6 @@ struct intel_ring { ...@@ -145,7 +145,6 @@ struct intel_ring {
u32 head; u32 head;
u32 tail; u32 tail;
GEM_DEBUG_DECL(u32 advance);
int space; int space;
int size; int size;
...@@ -292,7 +291,7 @@ struct intel_engine_cs { ...@@ -292,7 +291,7 @@ struct intel_engine_cs {
#define I915_DISPATCH_PINNED BIT(1) #define I915_DISPATCH_PINNED BIT(1)
#define I915_DISPATCH_RS BIT(2) #define I915_DISPATCH_RS BIT(2)
void (*emit_breadcrumb)(struct drm_i915_gem_request *req, void (*emit_breadcrumb)(struct drm_i915_gem_request *req,
u32 *out); u32 *cs);
int emit_breadcrumb_sz; int emit_breadcrumb_sz;
/* Pass the request to the hardware queue (e.g. directly into /* Pass the request to the hardware queue (e.g. directly into
...@@ -375,7 +374,7 @@ struct intel_engine_cs { ...@@ -375,7 +374,7 @@ struct intel_engine_cs {
/* AKA wait() */ /* AKA wait() */
int (*sync_to)(struct drm_i915_gem_request *req, int (*sync_to)(struct drm_i915_gem_request *req,
struct drm_i915_gem_request *signal); struct drm_i915_gem_request *signal);
u32 *(*signal)(struct drm_i915_gem_request *req, u32 *out); u32 *(*signal)(struct drm_i915_gem_request *req, u32 *cs);
} semaphore; } semaphore;
/* Execlists */ /* Execlists */
...@@ -497,21 +496,12 @@ void intel_engine_cleanup(struct intel_engine_cs *engine); ...@@ -497,21 +496,12 @@ void intel_engine_cleanup(struct intel_engine_cs *engine);
void intel_legacy_submission_resume(struct drm_i915_private *dev_priv); void intel_legacy_submission_resume(struct drm_i915_private *dev_priv);
int __must_check intel_ring_begin(struct drm_i915_gem_request *req, int n);
int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request *req); int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request *req);
static inline void intel_ring_emit(struct intel_ring *ring, u32 data) u32 __must_check *intel_ring_begin(struct drm_i915_gem_request *req, int n);
{
*(uint32_t *)(ring->vaddr + ring->tail) = data;
ring->tail += 4;
}
static inline void intel_ring_emit_reg(struct intel_ring *ring, i915_reg_t reg) static inline void
{ intel_ring_advance(struct drm_i915_gem_request *req, u32 *cs)
intel_ring_emit(ring, i915_mmio_reg_offset(reg));
}
static inline void intel_ring_advance(struct intel_ring *ring)
{ {
/* Dummy function. /* Dummy function.
* *
...@@ -521,14 +511,16 @@ static inline void intel_ring_advance(struct intel_ring *ring) ...@@ -521,14 +511,16 @@ static inline void intel_ring_advance(struct intel_ring *ring)
* reserved for the command packet (i.e. the value passed to * reserved for the command packet (i.e. the value passed to
* intel_ring_begin()). * intel_ring_begin()).
*/ */
GEM_DEBUG_BUG_ON(ring->tail != ring->advance); GEM_BUG_ON((req->ring->vaddr + req->ring->tail) != cs);
} }
static inline u32 intel_ring_offset(struct intel_ring *ring, void *addr) static inline u32
intel_ring_offset(struct drm_i915_gem_request *req, void *addr)
{ {
/* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */ /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */
u32 offset = addr - ring->vaddr; u32 offset = addr - req->ring->vaddr;
return offset & (ring->size - 1); GEM_BUG_ON(offset > req->ring->size);
return offset & (req->ring->size - 1);
} }
int __intel_ring_space(int head, int tail, int size); int __intel_ring_space(int head, int tail, int size);
......
...@@ -186,6 +186,7 @@ static int gpu_set(struct drm_i915_gem_object *obj, ...@@ -186,6 +186,7 @@ static int gpu_set(struct drm_i915_gem_object *obj,
struct drm_i915_private *i915 = to_i915(obj->base.dev); struct drm_i915_private *i915 = to_i915(obj->base.dev);
struct drm_i915_gem_request *rq; struct drm_i915_gem_request *rq;
struct i915_vma *vma; struct i915_vma *vma;
u32 *cs;
int err; int err;
err = i915_gem_object_set_to_gtt_domain(obj, true); err = i915_gem_object_set_to_gtt_domain(obj, true);
...@@ -202,30 +203,30 @@ static int gpu_set(struct drm_i915_gem_object *obj, ...@@ -202,30 +203,30 @@ static int gpu_set(struct drm_i915_gem_object *obj,
return PTR_ERR(rq); return PTR_ERR(rq);
} }
err = intel_ring_begin(rq, 4); cs = intel_ring_begin(rq, 4);
if (err) { if (IS_ERR(cs)) {
__i915_add_request(rq, false); __i915_add_request(rq, false);
i915_vma_unpin(vma); i915_vma_unpin(vma);
return err; return PTR_ERR(cs);
} }
if (INTEL_GEN(i915) >= 8) { if (INTEL_GEN(i915) >= 8) {
intel_ring_emit(rq->ring, MI_STORE_DWORD_IMM_GEN4 | 1 << 22); *cs++ = MI_STORE_DWORD_IMM_GEN4 | 1 << 22;
intel_ring_emit(rq->ring, lower_32_bits(i915_ggtt_offset(vma) + offset)); *cs++ = lower_32_bits(i915_ggtt_offset(vma) + offset);
intel_ring_emit(rq->ring, upper_32_bits(i915_ggtt_offset(vma) + offset)); *cs++ = upper_32_bits(i915_ggtt_offset(vma) + offset);
intel_ring_emit(rq->ring, v); *cs++ = v;
} else if (INTEL_GEN(i915) >= 4) { } else if (INTEL_GEN(i915) >= 4) {
intel_ring_emit(rq->ring, MI_STORE_DWORD_IMM_GEN4 | 1 << 22); *cs++ = MI_STORE_DWORD_IMM_GEN4 | 1 << 22;
intel_ring_emit(rq->ring, 0); *cs++ = 0;
intel_ring_emit(rq->ring, i915_ggtt_offset(vma) + offset); *cs++ = i915_ggtt_offset(vma) + offset;
intel_ring_emit(rq->ring, v); *cs++ = v;
} else { } else {
intel_ring_emit(rq->ring, MI_STORE_DWORD_IMM | 1 << 22); *cs++ = MI_STORE_DWORD_IMM | 1 << 22;
intel_ring_emit(rq->ring, i915_ggtt_offset(vma) + offset); *cs++ = i915_ggtt_offset(vma) + offset;
intel_ring_emit(rq->ring, v); *cs++ = v;
intel_ring_emit(rq->ring, MI_NOOP); *cs++ = MI_NOOP;
} }
intel_ring_advance(rq->ring); intel_ring_advance(rq, cs);
i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
i915_vma_unpin(vma); i915_vma_unpin(vma);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment