Commit 8fde4107 authored by Chris Wilson's avatar Chris Wilson Committed by Zhenyu Wang

drm/i915/gvt: Wean gvt off dev_priv->engine[]

Stop trying to escape out of the gvt layer to find the engine that we
initially setup for use with gvt. Record the engines during initialisation
and use them henceforth.

add/remove: 1/4 grow/shrink: 22/28 up/down: 341/-1410 (-1069)

[Zhenyu: rebase, fix nonpriv register check fault, fix gvt engine
thread run failure.]

Cc: Ding Zhuocheng <zhuocheng.ding@intel.com>
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Acked-by: default avatarZhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: default avatarZhenyu Wang <zhenyuw@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20200304032307.2983-2-zhenyuw@linux.intel.com
parent aa444fc7
This diff is collapsed.
......@@ -39,8 +39,7 @@
#define _EL_OFFSET_STATUS_BUF 0x370
#define _EL_OFFSET_STATUS_PTR 0x3A0
#define execlist_ring_mmio(gvt, ring_id, offset) \
(gvt->dev_priv->engine[ring_id]->mmio_base + (offset))
#define execlist_ring_mmio(e, offset) ((e)->mmio_base + (offset))
#define valid_context(ctx) ((ctx)->valid)
#define same_context(a, b) (((a)->context_id == (b)->context_id) && \
......@@ -54,12 +53,12 @@ static int context_switch_events[] = {
[VECS0] = VECS_AS_CONTEXT_SWITCH,
};
static int ring_id_to_context_switch_event(unsigned int ring_id)
static int to_context_switch_event(const struct intel_engine_cs *engine)
{
if (WARN_ON(ring_id >= ARRAY_SIZE(context_switch_events)))
if (WARN_ON(engine->id >= ARRAY_SIZE(context_switch_events)))
return -EINVAL;
return context_switch_events[ring_id];
return context_switch_events[engine->id];
}
static void switch_virtual_execlist_slot(struct intel_vgpu_execlist *execlist)
......@@ -93,9 +92,8 @@ static void emulate_execlist_status(struct intel_vgpu_execlist *execlist)
struct execlist_ctx_descriptor_format *desc = execlist->running_context;
struct intel_vgpu *vgpu = execlist->vgpu;
struct execlist_status_format status;
int ring_id = execlist->ring_id;
u32 status_reg = execlist_ring_mmio(vgpu->gvt,
ring_id, _EL_OFFSET_STATUS);
u32 status_reg =
execlist_ring_mmio(execlist->engine, _EL_OFFSET_STATUS);
status.ldw = vgpu_vreg(vgpu, status_reg);
status.udw = vgpu_vreg(vgpu, status_reg + 4);
......@@ -124,21 +122,19 @@ static void emulate_execlist_status(struct intel_vgpu_execlist *execlist)
}
static void emulate_csb_update(struct intel_vgpu_execlist *execlist,
struct execlist_context_status_format *status,
bool trigger_interrupt_later)
struct execlist_context_status_format *status,
bool trigger_interrupt_later)
{
struct intel_vgpu *vgpu = execlist->vgpu;
int ring_id = execlist->ring_id;
struct execlist_context_status_pointer_format ctx_status_ptr;
u32 write_pointer;
u32 ctx_status_ptr_reg, ctx_status_buf_reg, offset;
unsigned long hwsp_gpa;
struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
ctx_status_ptr_reg = execlist_ring_mmio(vgpu->gvt, ring_id,
_EL_OFFSET_STATUS_PTR);
ctx_status_buf_reg = execlist_ring_mmio(vgpu->gvt, ring_id,
_EL_OFFSET_STATUS_BUF);
ctx_status_ptr_reg =
execlist_ring_mmio(execlist->engine, _EL_OFFSET_STATUS_PTR);
ctx_status_buf_reg =
execlist_ring_mmio(execlist->engine, _EL_OFFSET_STATUS_BUF);
ctx_status_ptr.dw = vgpu_vreg(vgpu, ctx_status_ptr_reg);
......@@ -161,26 +157,24 @@ static void emulate_csb_update(struct intel_vgpu_execlist *execlist,
/* Update the CSB and CSB write pointer in HWSP */
hwsp_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm,
vgpu->hws_pga[ring_id]);
vgpu->hws_pga[execlist->engine->id]);
if (hwsp_gpa != INTEL_GVT_INVALID_ADDR) {
intel_gvt_hypervisor_write_gpa(vgpu,
hwsp_gpa + I915_HWS_CSB_BUF0_INDEX * 4 +
write_pointer * 8,
status, 8);
hwsp_gpa + I915_HWS_CSB_BUF0_INDEX * 4 + write_pointer * 8,
status, 8);
intel_gvt_hypervisor_write_gpa(vgpu,
hwsp_gpa +
intel_hws_csb_write_index(dev_priv) * 4,
&write_pointer, 4);
hwsp_gpa + intel_hws_csb_write_index(execlist->engine->i915) * 4,
&write_pointer, 4);
}
gvt_dbg_el("vgpu%d: w pointer %u reg %x csb l %x csb h %x\n",
vgpu->id, write_pointer, offset, status->ldw, status->udw);
vgpu->id, write_pointer, offset, status->ldw, status->udw);
if (trigger_interrupt_later)
return;
intel_vgpu_trigger_virtual_event(vgpu,
ring_id_to_context_switch_event(execlist->ring_id));
to_context_switch_event(execlist->engine));
}
static int emulate_execlist_ctx_schedule_out(
......@@ -261,9 +255,8 @@ static struct intel_vgpu_execlist_slot *get_next_execlist_slot(
struct intel_vgpu_execlist *execlist)
{
struct intel_vgpu *vgpu = execlist->vgpu;
int ring_id = execlist->ring_id;
u32 status_reg = execlist_ring_mmio(vgpu->gvt, ring_id,
_EL_OFFSET_STATUS);
u32 status_reg =
execlist_ring_mmio(execlist->engine, _EL_OFFSET_STATUS);
struct execlist_status_format status;
status.ldw = vgpu_vreg(vgpu, status_reg);
......@@ -379,7 +372,6 @@ static int prepare_execlist_workload(struct intel_vgpu_workload *workload)
struct intel_vgpu *vgpu = workload->vgpu;
struct intel_vgpu_submission *s = &vgpu->submission;
struct execlist_ctx_descriptor_format ctx[2];
int ring_id = workload->ring_id;
int ret;
if (!workload->emulate_schedule_in)
......@@ -388,7 +380,8 @@ static int prepare_execlist_workload(struct intel_vgpu_workload *workload)
ctx[0] = *get_desc_from_elsp_dwords(&workload->elsp_dwords, 0);
ctx[1] = *get_desc_from_elsp_dwords(&workload->elsp_dwords, 1);
ret = emulate_execlist_schedule_in(&s->execlist[ring_id], ctx);
ret = emulate_execlist_schedule_in(&s->execlist[workload->engine->id],
ctx);
if (ret) {
gvt_vgpu_err("fail to emulate execlist schedule in\n");
return ret;
......@@ -399,21 +392,21 @@ static int prepare_execlist_workload(struct intel_vgpu_workload *workload)
static int complete_execlist_workload(struct intel_vgpu_workload *workload)
{
struct intel_vgpu *vgpu = workload->vgpu;
int ring_id = workload->ring_id;
struct intel_vgpu_submission *s = &vgpu->submission;
struct intel_vgpu_execlist *execlist = &s->execlist[ring_id];
struct intel_vgpu_execlist *execlist =
&s->execlist[workload->engine->id];
struct intel_vgpu_workload *next_workload;
struct list_head *next = workload_q_head(vgpu, ring_id)->next;
struct list_head *next = workload_q_head(vgpu, workload->engine)->next;
bool lite_restore = false;
int ret = 0;
gvt_dbg_el("complete workload %p status %d\n", workload,
workload->status);
gvt_dbg_el("complete workload %p status %d\n",
workload, workload->status);
if (workload->status || (vgpu->resetting_eng & BIT(ring_id)))
if (workload->status || vgpu->resetting_eng & workload->engine->mask)
goto out;
if (!list_empty(workload_q_head(vgpu, ring_id))) {
if (!list_empty(workload_q_head(vgpu, workload->engine))) {
struct execlist_ctx_descriptor_format *this_desc, *next_desc;
next_workload = container_of(next,
......@@ -436,14 +429,15 @@ static int complete_execlist_workload(struct intel_vgpu_workload *workload)
return ret;
}
static int submit_context(struct intel_vgpu *vgpu, int ring_id,
struct execlist_ctx_descriptor_format *desc,
bool emulate_schedule_in)
static int submit_context(struct intel_vgpu *vgpu,
const struct intel_engine_cs *engine,
struct execlist_ctx_descriptor_format *desc,
bool emulate_schedule_in)
{
struct intel_vgpu_submission *s = &vgpu->submission;
struct intel_vgpu_workload *workload = NULL;
workload = intel_vgpu_create_workload(vgpu, ring_id, desc);
workload = intel_vgpu_create_workload(vgpu, engine, desc);
if (IS_ERR(workload))
return PTR_ERR(workload);
......@@ -452,19 +446,20 @@ static int submit_context(struct intel_vgpu *vgpu, int ring_id,
workload->emulate_schedule_in = emulate_schedule_in;
if (emulate_schedule_in)
workload->elsp_dwords = s->execlist[ring_id].elsp_dwords;
workload->elsp_dwords = s->execlist[engine->id].elsp_dwords;
gvt_dbg_el("workload %p emulate schedule_in %d\n", workload,
emulate_schedule_in);
emulate_schedule_in);
intel_vgpu_queue_workload(workload);
return 0;
}
int intel_vgpu_submit_execlist(struct intel_vgpu *vgpu, int ring_id)
int intel_vgpu_submit_execlist(struct intel_vgpu *vgpu,
const struct intel_engine_cs *engine)
{
struct intel_vgpu_submission *s = &vgpu->submission;
struct intel_vgpu_execlist *execlist = &s->execlist[ring_id];
struct intel_vgpu_execlist *execlist = &s->execlist[engine->id];
struct execlist_ctx_descriptor_format *desc[2];
int i, ret;
......@@ -489,7 +484,7 @@ int intel_vgpu_submit_execlist(struct intel_vgpu *vgpu, int ring_id)
for (i = 0; i < ARRAY_SIZE(desc); i++) {
if (!desc[i]->valid)
continue;
ret = submit_context(vgpu, ring_id, desc[i], i == 0);
ret = submit_context(vgpu, engine, desc[i], i == 0);
if (ret) {
gvt_vgpu_err("failed to submit desc %d\n", i);
return ret;
......@@ -504,22 +499,22 @@ int intel_vgpu_submit_execlist(struct intel_vgpu *vgpu, int ring_id)
return -EINVAL;
}
static void init_vgpu_execlist(struct intel_vgpu *vgpu, int ring_id)
static void init_vgpu_execlist(struct intel_vgpu *vgpu,
const struct intel_engine_cs *engine)
{
struct intel_vgpu_submission *s = &vgpu->submission;
struct intel_vgpu_execlist *execlist = &s->execlist[ring_id];
struct intel_vgpu_execlist *execlist = &s->execlist[engine->id];
struct execlist_context_status_pointer_format ctx_status_ptr;
u32 ctx_status_ptr_reg;
memset(execlist, 0, sizeof(*execlist));
execlist->vgpu = vgpu;
execlist->ring_id = ring_id;
execlist->engine = engine;
execlist->slot[0].index = 0;
execlist->slot[1].index = 1;
ctx_status_ptr_reg = execlist_ring_mmio(vgpu->gvt, ring_id,
_EL_OFFSET_STATUS_PTR);
ctx_status_ptr_reg = execlist_ring_mmio(engine, _EL_OFFSET_STATUS_PTR);
ctx_status_ptr.dw = vgpu_vreg(vgpu, ctx_status_ptr_reg);
ctx_status_ptr.read_ptr = 0;
ctx_status_ptr.write_ptr = 0x7;
......@@ -549,7 +544,7 @@ static void reset_execlist(struct intel_vgpu *vgpu,
intel_engine_mask_t tmp;
for_each_engine_masked(engine, &dev_priv->gt, engine_mask, tmp)
init_vgpu_execlist(vgpu, engine->id);
init_vgpu_execlist(vgpu, engine);
}
static int init_execlist(struct intel_vgpu *vgpu,
......
......@@ -170,16 +170,17 @@ struct intel_vgpu_execlist {
struct intel_vgpu_execlist_slot *running_slot;
struct intel_vgpu_execlist_slot *pending_slot;
struct execlist_ctx_descriptor_format *running_context;
int ring_id;
struct intel_vgpu *vgpu;
struct intel_vgpu_elsp_dwords elsp_dwords;
const struct intel_engine_cs *engine;
};
void intel_vgpu_clean_execlist(struct intel_vgpu *vgpu);
int intel_vgpu_init_execlist(struct intel_vgpu *vgpu);
int intel_vgpu_submit_execlist(struct intel_vgpu *vgpu, int ring_id);
int intel_vgpu_submit_execlist(struct intel_vgpu *vgpu,
const struct intel_engine_cs *engine);
void intel_vgpu_reset_execlist(struct intel_vgpu *vgpu,
intel_engine_mask_t engine_mask);
......
......@@ -318,6 +318,7 @@ int intel_gvt_init_device(struct drm_i915_private *dev_priv)
mutex_init(&gvt->lock);
mutex_init(&gvt->sched_lock);
gvt->dev_priv = dev_priv;
dev_priv->gvt = gvt;
init_device_info(gvt);
......@@ -376,7 +377,6 @@ int intel_gvt_init_device(struct drm_i915_private *dev_priv)
intel_gvt_debugfs_init(gvt);
gvt_dbg_core("gvt device initialization is done\n");
dev_priv->gvt = gvt;
intel_gvt_host.dev = &dev_priv->drm.pdev->dev;
intel_gvt_host.initialized = true;
return 0;
......@@ -402,6 +402,7 @@ int intel_gvt_init_device(struct drm_i915_private *dev_priv)
out_clean_idr:
idr_destroy(&gvt->vgpu_idr);
kfree(gvt);
dev_priv->gvt = NULL;
return ret;
}
......
......@@ -142,25 +142,25 @@ static int new_mmio_info(struct intel_gvt *gvt,
}
/**
* intel_gvt_render_mmio_to_ring_id - convert a mmio offset into ring id
* intel_gvt_render_mmio_to_engine - convert a mmio offset into the engine
* @gvt: a GVT device
* @offset: register offset
*
* Returns:
* Ring ID on success, negative error code if failed.
* The engine containing the offset within its mmio page.
*/
int intel_gvt_render_mmio_to_ring_id(struct intel_gvt *gvt,
unsigned int offset)
const struct intel_engine_cs *
intel_gvt_render_mmio_to_engine(struct intel_gvt *gvt, unsigned int offset)
{
enum intel_engine_id id;
struct intel_engine_cs *engine;
offset &= ~GENMASK(11, 0);
for_each_engine(engine, gvt->dev_priv, id) {
for_each_engine(engine, gvt->dev_priv, id)
if (engine->mmio_base == offset)
return id;
}
return -ENODEV;
return engine;
return NULL;
}
#define offset_to_fence_num(offset) \
......@@ -492,7 +492,7 @@ static i915_reg_t force_nonpriv_white_list[] = {
};
/* a simple bsearch */
static inline bool in_whitelist(unsigned int reg)
static inline bool in_whitelist(u32 reg)
{
int left = 0, right = ARRAY_SIZE(force_nonpriv_white_list);
i915_reg_t *array = force_nonpriv_white_list;
......@@ -514,26 +514,21 @@ static int force_nonpriv_write(struct intel_vgpu *vgpu,
unsigned int offset, void *p_data, unsigned int bytes)
{
u32 reg_nonpriv = (*(u32 *)p_data) & REG_GENMASK(25, 2);
int ring_id = intel_gvt_render_mmio_to_ring_id(vgpu->gvt, offset);
u32 ring_base;
struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
int ret = -EINVAL;
const struct intel_engine_cs *engine =
intel_gvt_render_mmio_to_engine(vgpu->gvt, offset);
if ((bytes != 4) || ((offset & (bytes - 1)) != 0) || ring_id < 0) {
gvt_err("vgpu(%d) ring %d Invalid FORCE_NONPRIV offset %x(%dB)\n",
vgpu->id, ring_id, offset, bytes);
return ret;
if (bytes != 4 || !IS_ALIGNED(offset, bytes) || !engine) {
gvt_err("vgpu(%d) Invalid FORCE_NONPRIV offset %x(%dB)\n",
vgpu->id, offset, bytes);
return -EINVAL;
}
ring_base = dev_priv->engine[ring_id]->mmio_base;
if (in_whitelist(reg_nonpriv) ||
reg_nonpriv == i915_mmio_reg_offset(RING_NOPID(ring_base))) {
ret = intel_vgpu_default_mmio_write(vgpu, offset, p_data,
bytes);
} else
if (!in_whitelist(reg_nonpriv) &&
reg_nonpriv != i915_mmio_reg_offset(RING_NOPID(engine->mmio_base))) {
gvt_err("vgpu(%d) Invalid FORCE_NONPRIV write %x at offset %x\n",
vgpu->id, *(u32 *)p_data, offset);
vgpu->id, reg_nonpriv, offset);
} else
intel_vgpu_default_mmio_write(vgpu, offset, p_data, bytes);
return 0;
}
......@@ -1484,7 +1479,8 @@ static int hws_pga_write(struct intel_vgpu *vgpu, unsigned int offset,
void *p_data, unsigned int bytes)
{
u32 value = *(u32 *)p_data;
int ring_id = intel_gvt_render_mmio_to_ring_id(vgpu->gvt, offset);
const struct intel_engine_cs *engine =
intel_gvt_render_mmio_to_engine(vgpu->gvt, offset);
if (!intel_gvt_ggtt_validate_range(vgpu, value, I915_GTT_PAGE_SIZE)) {
gvt_vgpu_err("write invalid HWSP address, reg:0x%x, value:0x%x\n",
......@@ -1496,12 +1492,12 @@ static int hws_pga_write(struct intel_vgpu *vgpu, unsigned int offset,
* update the VM CSB status correctly. Here listed registers can
* support BDW, SKL or other platforms with same HWSP registers.
*/
if (unlikely(ring_id < 0 || ring_id >= I915_NUM_ENGINES)) {
if (unlikely(!engine)) {
gvt_vgpu_err("access unknown hardware status page register:0x%x\n",
offset);
return -EINVAL;
}
vgpu->hws_pga[ring_id] = value;
vgpu->hws_pga[engine->id] = value;
gvt_dbg_mmio("VM(%d) write: 0x%x to HWSP: 0x%x\n",
vgpu->id, value, offset);
......@@ -1661,22 +1657,20 @@ static int mmio_read_from_hw(struct intel_vgpu *vgpu,
{
struct intel_gvt *gvt = vgpu->gvt;
struct drm_i915_private *dev_priv = gvt->dev_priv;
int ring_id;
u32 ring_base;
const struct intel_engine_cs *engine =
intel_gvt_render_mmio_to_engine(gvt, offset);
ring_id = intel_gvt_render_mmio_to_ring_id(gvt, offset);
/**
* Read HW reg in following case
* a. the offset isn't a ring mmio
* b. the offset's ring is running on hw.
* c. the offset is ring time stamp mmio
*/
if (ring_id >= 0)
ring_base = dev_priv->engine[ring_id]->mmio_base;
if (ring_id < 0 || vgpu == gvt->scheduler.engine_owner[ring_id] ||
offset == i915_mmio_reg_offset(RING_TIMESTAMP(ring_base)) ||
offset == i915_mmio_reg_offset(RING_TIMESTAMP_UDW(ring_base))) {
if (!engine ||
vgpu == gvt->scheduler.engine_owner[engine->id] ||
offset == i915_mmio_reg_offset(RING_TIMESTAMP(engine->mmio_base)) ||
offset == i915_mmio_reg_offset(RING_TIMESTAMP_UDW(engine->mmio_base))) {
mmio_hw_access_pre(dev_priv);
vgpu_vreg(vgpu, offset) = I915_READ(_MMIO(offset));
mmio_hw_access_post(dev_priv);
......@@ -1689,22 +1683,22 @@ static int elsp_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
void *p_data, unsigned int bytes)
{
struct drm_i915_private *i915 = vgpu->gvt->dev_priv;
int ring_id = intel_gvt_render_mmio_to_ring_id(vgpu->gvt, offset);
const struct intel_engine_cs *engine = intel_gvt_render_mmio_to_engine(vgpu->gvt, offset);
struct intel_vgpu_execlist *execlist;
u32 data = *(u32 *)p_data;
int ret = 0;
if (drm_WARN_ON(&i915->drm, ring_id < 0 || ring_id >= I915_NUM_ENGINES))
if (drm_WARN_ON(&i915->drm, !engine))
return -EINVAL;
execlist = &vgpu->submission.execlist[ring_id];
execlist = &vgpu->submission.execlist[engine->id];
execlist->elsp_dwords.data[3 - execlist->elsp_dwords.index] = data;
if (execlist->elsp_dwords.index == 3) {
ret = intel_vgpu_submit_execlist(vgpu, ring_id);
ret = intel_vgpu_submit_execlist(vgpu, engine);
if(ret)
gvt_vgpu_err("fail submit workload on ring %d\n",
ring_id);
gvt_vgpu_err("fail submit workload on ring %s\n",
engine->name);
}
++execlist->elsp_dwords.index;
......@@ -1716,7 +1710,8 @@ static int ring_mode_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
void *p_data, unsigned int bytes)
{
u32 data = *(u32 *)p_data;
int ring_id = intel_gvt_render_mmio_to_ring_id(vgpu->gvt, offset);
const struct intel_engine_cs *engine =
intel_gvt_render_mmio_to_engine(vgpu->gvt, offset);
bool enable_execlist;
int ret;
......@@ -1750,16 +1745,16 @@ static int ring_mode_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
|| (data & _MASKED_BIT_DISABLE(GFX_RUN_LIST_ENABLE))) {
enable_execlist = !!(data & GFX_RUN_LIST_ENABLE);
gvt_dbg_core("EXECLIST %s on ring %d\n",
(enable_execlist ? "enabling" : "disabling"),
ring_id);
gvt_dbg_core("EXECLIST %s on ring %s\n",
(enable_execlist ? "enabling" : "disabling"),
engine->name);
if (!enable_execlist)
return 0;
ret = intel_vgpu_select_submission_ops(vgpu,
BIT(ring_id),
INTEL_VGPU_EXECLIST_SUBMISSION);
engine->mask,
INTEL_VGPU_EXECLIST_SUBMISSION);
if (ret)
return ret;
......
......@@ -69,8 +69,8 @@ struct intel_gvt_mmio_info {
struct hlist_node node;
};
int intel_gvt_render_mmio_to_ring_id(struct intel_gvt *gvt,
unsigned int reg);
const struct intel_engine_cs *
intel_gvt_render_mmio_to_engine(struct intel_gvt *gvt, unsigned int reg);
unsigned long intel_gvt_get_device_type(struct intel_gvt *gvt);
bool intel_gvt_match_device(struct intel_gvt *gvt, unsigned long device);
......
......@@ -214,13 +214,11 @@ restore_context_mmio_for_inhibit(struct intel_vgpu *vgpu,
*cs++ = MI_LOAD_REGISTER_IMM(count);
for (mmio = gvt->engine_mmio_list.mmio;
i915_mmio_reg_valid(mmio->reg); mmio++) {
if (mmio->ring_id != ring_id ||
!mmio->in_context)
if (mmio->id != ring_id || !mmio->in_context)
continue;
*cs++ = i915_mmio_reg_offset(mmio->reg);
*cs++ = vgpu_vreg_t(vgpu, mmio->reg) |
(mmio->mask << 16);
*cs++ = vgpu_vreg_t(vgpu, mmio->reg) | (mmio->mask << 16);
gvt_dbg_core("add lri reg pair 0x%x:0x%x in inhibit ctx, vgpu:%d, rind_id:%d\n",
*(cs-2), *(cs-1), vgpu->id, ring_id);
}
......@@ -344,10 +342,10 @@ static u32 gen8_tlb_mmio_offset_list[] = {
[VECS0] = 0x4270,
};
static void handle_tlb_pending_event(struct intel_vgpu *vgpu, int ring_id)
static void handle_tlb_pending_event(struct intel_vgpu *vgpu,
const struct intel_engine_cs *engine)
{
struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
struct intel_uncore *uncore = &dev_priv->uncore;
struct intel_uncore *uncore = engine->uncore;
struct intel_vgpu_submission *s = &vgpu->submission;
u32 *regs = vgpu->gvt->engine_mmio_list.tlb_mmio_offset_list;
u32 cnt = vgpu->gvt->engine_mmio_list.tlb_mmio_offset_list_cnt;
......@@ -357,13 +355,13 @@ static void handle_tlb_pending_event(struct intel_vgpu *vgpu, int ring_id)
if (!regs)
return;
if (drm_WARN_ON(&dev_priv->drm, ring_id >= cnt))
if (drm_WARN_ON(&engine->i915->drm, engine->id >= cnt))
return;
if (!test_and_clear_bit(ring_id, (void *)s->tlb_handle_pending))
if (!test_and_clear_bit(engine->id, (void *)s->tlb_handle_pending))
return;
reg = _MMIO(regs[ring_id]);
reg = _MMIO(regs[engine->id]);
/* WaForceWakeRenderDuringMmioTLBInvalidate:skl
* we need to put a forcewake when invalidating RCS TLB caches,
......@@ -372,30 +370,27 @@ static void handle_tlb_pending_event(struct intel_vgpu *vgpu, int ring_id)
*/
fw = intel_uncore_forcewake_for_reg(uncore, reg,
FW_REG_READ | FW_REG_WRITE);
if (ring_id == RCS0 && INTEL_GEN(dev_priv) >= 9)
if (engine->id == RCS0 && INTEL_GEN(engine->i915) >= 9)
fw |= FORCEWAKE_RENDER;
intel_uncore_forcewake_get(uncore, fw);
intel_uncore_write_fw(uncore, reg, 0x1);
if (wait_for_atomic((intel_uncore_read_fw(uncore, reg) == 0), 50))
gvt_vgpu_err("timeout in invalidate ring (%d) tlb\n", ring_id);
if (wait_for_atomic(intel_uncore_read_fw(uncore, reg) == 0, 50))
gvt_vgpu_err("timeout in invalidate ring %s tlb\n",
engine->name);
else
vgpu_vreg_t(vgpu, reg) = 0;
intel_uncore_forcewake_put(uncore, fw);
gvt_dbg_core("invalidate TLB for ring %d\n", ring_id);
gvt_dbg_core("invalidate TLB for ring %s\n", engine->name);
}
static void switch_mocs(struct intel_vgpu *pre, struct intel_vgpu *next,
int ring_id)
const struct intel_engine_cs *engine)
{
struct drm_i915_private *dev_priv;
i915_reg_t offset, l3_offset;
u32 old_v, new_v;
u32 regs[] = {
[RCS0] = 0xc800,
[VCS0] = 0xc900,
......@@ -403,36 +398,38 @@ static void switch_mocs(struct intel_vgpu *pre, struct intel_vgpu *next,
[BCS0] = 0xcc00,
[VECS0] = 0xcb00,
};
struct intel_uncore *uncore = engine->uncore;
i915_reg_t offset, l3_offset;
u32 old_v, new_v;
int i;
dev_priv = pre ? pre->gvt->dev_priv : next->gvt->dev_priv;
if (drm_WARN_ON(&dev_priv->drm, ring_id >= ARRAY_SIZE(regs)))
if (drm_WARN_ON(&engine->i915->drm, engine->id >= ARRAY_SIZE(regs)))
return;
if (ring_id == RCS0 && IS_GEN(dev_priv, 9))
if (engine->id == RCS0 && IS_GEN(engine->i915, 9))
return;
if (!pre && !gen9_render_mocs.initialized)
load_render_mocs(dev_priv);
load_render_mocs(engine->i915);
offset.reg = regs[ring_id];
offset.reg = regs[engine->id];
for (i = 0; i < GEN9_MOCS_SIZE; i++) {
if (pre)
old_v = vgpu_vreg_t(pre, offset);
else
old_v = gen9_render_mocs.control_table[ring_id][i];
old_v = gen9_render_mocs.control_table[engine->id][i];
if (next)
new_v = vgpu_vreg_t(next, offset);
else
new_v = gen9_render_mocs.control_table[ring_id][i];
new_v = gen9_render_mocs.control_table[engine->id][i];
if (old_v != new_v)
I915_WRITE_FW(offset, new_v);
intel_uncore_write_fw(uncore, offset, new_v);
offset.reg += 4;
}
if (ring_id == RCS0) {
if (engine->id == RCS0) {
l3_offset.reg = 0xb020;
for (i = 0; i < GEN9_MOCS_SIZE / 2; i++) {
if (pre)
......@@ -445,7 +442,7 @@ static void switch_mocs(struct intel_vgpu *pre, struct intel_vgpu *next,
new_v = gen9_render_mocs.l3cc_table[i];
if (old_v != new_v)
I915_WRITE_FW(l3_offset, new_v);
intel_uncore_write_fw(uncore, l3_offset, new_v);
l3_offset.reg += 4;
}
......@@ -467,38 +464,40 @@ bool is_inhibit_context(struct intel_context *ce)
/* Switch ring mmio values (context). */
static void switch_mmio(struct intel_vgpu *pre,
struct intel_vgpu *next,
int ring_id)
const struct intel_engine_cs *engine)
{
struct drm_i915_private *dev_priv;
struct intel_uncore *uncore = engine->uncore;
struct intel_vgpu_submission *s;
struct engine_mmio *mmio;
u32 old_v, new_v;
dev_priv = pre ? pre->gvt->dev_priv : next->gvt->dev_priv;
if (INTEL_GEN(dev_priv) >= 9)
switch_mocs(pre, next, ring_id);
if (INTEL_GEN(engine->i915) >= 9)
switch_mocs(pre, next, engine);
for (mmio = dev_priv->gvt->engine_mmio_list.mmio;
for (mmio = engine->i915->gvt->engine_mmio_list.mmio;
i915_mmio_reg_valid(mmio->reg); mmio++) {
if (mmio->ring_id != ring_id)
if (mmio->id != engine->id)
continue;
/*
* No need to do save or restore of the mmio which is in context
* state image on gen9, it's initialized by lri command and
* save or restore with context together.
*/
if (IS_GEN(dev_priv, 9) && mmio->in_context)
if (IS_GEN(engine->i915, 9) && mmio->in_context)
continue;
// save
if (pre) {
vgpu_vreg_t(pre, mmio->reg) = I915_READ_FW(mmio->reg);
vgpu_vreg_t(pre, mmio->reg) =
intel_uncore_read_fw(uncore, mmio->reg);
if (mmio->mask)
vgpu_vreg_t(pre, mmio->reg) &=
~(mmio->mask << 16);
~(mmio->mask << 16);
old_v = vgpu_vreg_t(pre, mmio->reg);
} else
old_v = mmio->value = I915_READ_FW(mmio->reg);
} else {
old_v = mmio->value =
intel_uncore_read_fw(uncore, mmio->reg);
}
// restore
if (next) {
......@@ -509,12 +508,12 @@ static void switch_mmio(struct intel_vgpu *pre,
* itself.
*/
if (mmio->in_context &&
!is_inhibit_context(s->shadow[ring_id]))
!is_inhibit_context(s->shadow[engine->id]))
continue;
if (mmio->mask)
new_v = vgpu_vreg_t(next, mmio->reg) |
(mmio->mask << 16);
(mmio->mask << 16);
else
new_v = vgpu_vreg_t(next, mmio->reg);
} else {
......@@ -526,7 +525,7 @@ static void switch_mmio(struct intel_vgpu *pre,
new_v = mmio->value;
}
I915_WRITE_FW(mmio->reg, new_v);
intel_uncore_write_fw(uncore, mmio->reg, new_v);
trace_render_mmio(pre ? pre->id : 0,
next ? next->id : 0,
......@@ -536,39 +535,37 @@ static void switch_mmio(struct intel_vgpu *pre,
}
if (next)
handle_tlb_pending_event(next, ring_id);
handle_tlb_pending_event(next, engine);
}
/**
* intel_gvt_switch_render_mmio - switch mmio context of specific engine
* @pre: the last vGPU that own the engine
* @next: the vGPU to switch to
* @ring_id: specify the engine
* @engine: the engine
*
* If pre is null indicates that host own the engine. If next is null
* indicates that we are switching to host workload.
*/
void intel_gvt_switch_mmio(struct intel_vgpu *pre,
struct intel_vgpu *next, int ring_id)
struct intel_vgpu *next,
const struct intel_engine_cs *engine)
{
struct drm_i915_private *dev_priv;
if (WARN(!pre && !next, "switch ring %d from host to HOST\n", ring_id))
if (WARN(!pre && !next, "switch ring %s from host to HOST\n",
engine->name))
return;
gvt_dbg_render("switch ring %d from %s to %s\n", ring_id,
gvt_dbg_render("switch ring %s from %s to %s\n", engine->name,
pre ? "vGPU" : "host", next ? "vGPU" : "HOST");
dev_priv = pre ? pre->gvt->dev_priv : next->gvt->dev_priv;
/**
* We are using raw mmio access wrapper to improve the
* performace for batch mmio read/write, so we need
* handle forcewake mannually.
*/
intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
switch_mmio(pre, next, ring_id);
intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
intel_uncore_forcewake_get(engine->uncore, FORCEWAKE_ALL);
switch_mmio(pre, next, engine);
intel_uncore_forcewake_put(engine->uncore, FORCEWAKE_ALL);
}
/**
......@@ -595,7 +592,7 @@ void intel_gvt_init_engine_mmio_context(struct intel_gvt *gvt)
for (mmio = gvt->engine_mmio_list.mmio;
i915_mmio_reg_valid(mmio->reg); mmio++) {
if (mmio->in_context) {
gvt->engine_mmio_list.ctx_mmio_count[mmio->ring_id]++;
gvt->engine_mmio_list.ctx_mmio_count[mmio->id]++;
intel_gvt_mmio_set_in_ctx(gvt, mmio->reg.reg);
}
}
......
......@@ -37,7 +37,7 @@
#define __GVT_RENDER_H__
struct engine_mmio {
int ring_id;
enum intel_engine_id id;
i915_reg_t reg;
u32 mask;
bool in_context;
......@@ -45,7 +45,8 @@ struct engine_mmio {
};
void intel_gvt_switch_mmio(struct intel_vgpu *pre,
struct intel_vgpu *next, int ring_id);
struct intel_vgpu *next,
const struct intel_engine_cs *engine);
void intel_gvt_init_engine_mmio_context(struct intel_gvt *gvt);
......
......@@ -40,7 +40,7 @@ static bool vgpu_has_pending_workload(struct intel_vgpu *vgpu)
struct intel_engine_cs *engine;
for_each_engine(engine, vgpu->gvt->dev_priv, i) {
if (!list_empty(workload_q_head(vgpu, i)))
if (!list_empty(workload_q_head(vgpu, engine)))
return true;
}
......@@ -444,9 +444,10 @@ void intel_vgpu_stop_schedule(struct intel_vgpu *vgpu)
{
struct intel_gvt_workload_scheduler *scheduler =
&vgpu->gvt->scheduler;
int ring_id;
struct vgpu_sched_data *vgpu_data = vgpu->sched_data;
struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
struct intel_engine_cs *engine;
enum intel_engine_id id;
if (!vgpu_data->active)
return;
......@@ -467,10 +468,10 @@ void intel_vgpu_stop_schedule(struct intel_vgpu *vgpu)
intel_runtime_pm_get(&dev_priv->runtime_pm);
spin_lock_bh(&scheduler->mmio_context_lock);
for (ring_id = 0; ring_id < I915_NUM_ENGINES; ring_id++) {
if (scheduler->engine_owner[ring_id] == vgpu) {
intel_gvt_switch_mmio(vgpu, NULL, ring_id);
scheduler->engine_owner[ring_id] = NULL;
for_each_engine(engine, &vgpu->gvt->dev_priv->gt, id) {
if (scheduler->engine_owner[engine->id] == vgpu) {
intel_gvt_switch_mmio(vgpu, NULL, engine);
scheduler->engine_owner[engine->id] = NULL;
}
}
spin_unlock_bh(&scheduler->mmio_context_lock);
......
This diff is collapsed.
......@@ -79,7 +79,7 @@ struct intel_shadow_wa_ctx {
struct intel_vgpu_workload {
struct intel_vgpu *vgpu;
int ring_id;
const struct intel_engine_cs *engine;
struct i915_request *req;
/* if this workload has been dispatched to i915? */
bool dispatched;
......@@ -129,8 +129,8 @@ struct intel_vgpu_shadow_bb {
bool ppgtt;
};
#define workload_q_head(vgpu, ring_id) \
(&(vgpu->submission.workload_q_head[ring_id]))
#define workload_q_head(vgpu, e) \
(&(vgpu)->submission.workload_q_head[(e)->id])
void intel_vgpu_queue_workload(struct intel_vgpu_workload *workload);
......@@ -155,7 +155,8 @@ extern const struct intel_vgpu_submission_ops
intel_vgpu_execlist_submission_ops;
struct intel_vgpu_workload *
intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id,
intel_vgpu_create_workload(struct intel_vgpu *vgpu,
const struct intel_engine_cs *engine,
struct execlist_ctx_descriptor_format *desc);
void intel_vgpu_destroy_workload(struct intel_vgpu_workload *workload);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment