Commit 7dfb9ba3 authored by Jani Nikula's avatar Jani Nikula

Merge tag 'gvt-next-2017-06-08' of https://github.com/01org/gvt-linux into drm-intel-next-queued

gvt-next-2017-06-08

First gvt-next pull for 4.13:
- optimization for per-VM mmio save/restore (Changbin)
- optimization for mmio hash table (Changbin)
- scheduler optimization with event (Ping)
- vGPU reset refinement (Fred)
- other misc refactor and cleanups, etc.
Signed-off-by: default avatarJani Nikula <jani.nikula@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170608093547.bjgs436e3iokrzdm@zhen-hp.sh.intel.com
parents 9a30a261 615c16a9
...@@ -3,6 +3,6 @@ GVT_SOURCE := gvt.o aperture_gm.o handlers.o vgpu.o trace_points.o firmware.o \ ...@@ -3,6 +3,6 @@ GVT_SOURCE := gvt.o aperture_gm.o handlers.o vgpu.o trace_points.o firmware.o \
interrupt.o gtt.o cfg_space.o opregion.o mmio.o display.o edid.o \ interrupt.o gtt.o cfg_space.o opregion.o mmio.o display.o edid.o \
execlist.o scheduler.o sched_policy.o render.o cmd_parser.o execlist.o scheduler.o sched_policy.o render.o cmd_parser.o
ccflags-y += -I$(src) -I$(src)/$(GVT_DIR) -Wall ccflags-y += -I$(src) -I$(src)/$(GVT_DIR)
i915-y += $(addprefix $(GVT_DIR)/, $(GVT_SOURCE)) i915-y += $(addprefix $(GVT_DIR)/, $(GVT_SOURCE))
obj-$(CONFIG_DRM_I915_GVT_KVMGT) += $(GVT_DIR)/kvmgt.o obj-$(CONFIG_DRM_I915_GVT_KVMGT) += $(GVT_DIR)/kvmgt.o
...@@ -2414,53 +2414,13 @@ static void add_cmd_entry(struct intel_gvt *gvt, struct cmd_entry *e) ...@@ -2414,53 +2414,13 @@ static void add_cmd_entry(struct intel_gvt *gvt, struct cmd_entry *e)
hash_add(gvt->cmd_table, &e->hlist, e->info->opcode); hash_add(gvt->cmd_table, &e->hlist, e->info->opcode);
} }
#define GVT_MAX_CMD_LENGTH 20 /* In Dword */
static void trace_cs_command(struct parser_exec_state *s,
cycles_t cost_pre_cmd_handler, cycles_t cost_cmd_handler)
{
/* This buffer is used by ftrace to store all commands copied from
* guest gma space. Sometimes commands can cross pages, this should
* not be handled in ftrace logic. So this is just used as a
* 'bounce buffer'
*/
u32 cmd_trace_buf[GVT_MAX_CMD_LENGTH];
int i;
u32 cmd_len = cmd_length(s);
/* The chosen value of GVT_MAX_CMD_LENGTH are just based on
* following two considerations:
* 1) From observation, most common ring commands is not that long.
* But there are execeptions. So it indeed makes sence to observe
* longer commands.
* 2) From the performance and debugging point of view, dumping all
* contents of very commands is not necessary.
* We mgith shrink GVT_MAX_CMD_LENGTH or remove this trace event in
* future for performance considerations.
*/
if (unlikely(cmd_len > GVT_MAX_CMD_LENGTH)) {
gvt_dbg_cmd("cmd length exceed tracing limitation!\n");
cmd_len = GVT_MAX_CMD_LENGTH;
}
for (i = 0; i < cmd_len; i++)
cmd_trace_buf[i] = cmd_val(s, i);
trace_gvt_command(s->vgpu->id, s->ring_id, s->ip_gma, cmd_trace_buf,
cmd_len, s->buf_type == RING_BUFFER_INSTRUCTION,
cost_pre_cmd_handler, cost_cmd_handler);
}
/* call the cmd handler, and advance ip */ /* call the cmd handler, and advance ip */
static int cmd_parser_exec(struct parser_exec_state *s) static int cmd_parser_exec(struct parser_exec_state *s)
{ {
struct intel_vgpu *vgpu = s->vgpu;
struct cmd_info *info; struct cmd_info *info;
u32 cmd; u32 cmd;
int ret = 0; int ret = 0;
cycles_t t0, t1, t2;
struct parser_exec_state s_before_advance_custom;
struct intel_vgpu *vgpu = s->vgpu;
t0 = get_cycles();
cmd = cmd_val(s, 0); cmd = cmd_val(s, 0);
...@@ -2471,13 +2431,10 @@ static int cmd_parser_exec(struct parser_exec_state *s) ...@@ -2471,13 +2431,10 @@ static int cmd_parser_exec(struct parser_exec_state *s)
return -EINVAL; return -EINVAL;
} }
gvt_dbg_cmd("%s\n", info->name);
s->info = info; s->info = info;
t1 = get_cycles(); trace_gvt_command(vgpu->id, s->ring_id, s->ip_gma, s->ip_va,
cmd_length(s), s->buf_type);
s_before_advance_custom = *s;
if (info->handler) { if (info->handler) {
ret = info->handler(s); ret = info->handler(s);
...@@ -2486,9 +2443,6 @@ static int cmd_parser_exec(struct parser_exec_state *s) ...@@ -2486,9 +2443,6 @@ static int cmd_parser_exec(struct parser_exec_state *s)
return ret; return ret;
} }
} }
t2 = get_cycles();
trace_cs_command(&s_before_advance_custom, t1 - t0, t2 - t1);
if (!(info->flag & F_IP_ADVANCE_CUSTOM)) { if (!(info->flag & F_IP_ADVANCE_CUSTOM)) {
ret = cmd_advance_default(s); ret = cmd_advance_default(s);
...@@ -2522,8 +2476,6 @@ static int command_scan(struct parser_exec_state *s, ...@@ -2522,8 +2476,6 @@ static int command_scan(struct parser_exec_state *s,
gma_tail = rb_start + rb_tail; gma_tail = rb_start + rb_tail;
gma_bottom = rb_start + rb_len; gma_bottom = rb_start + rb_len;
gvt_dbg_cmd("scan_start: start=%lx end=%lx\n", gma_head, gma_tail);
while (s->ip_gma != gma_tail) { while (s->ip_gma != gma_tail) {
if (s->buf_type == RING_BUFFER_INSTRUCTION) { if (s->buf_type == RING_BUFFER_INSTRUCTION) {
if (!(s->ip_gma >= rb_start) || if (!(s->ip_gma >= rb_start) ||
...@@ -2552,8 +2504,6 @@ static int command_scan(struct parser_exec_state *s, ...@@ -2552,8 +2504,6 @@ static int command_scan(struct parser_exec_state *s,
} }
} }
gvt_dbg_cmd("scan_end\n");
return ret; return ret;
} }
......
...@@ -708,53 +708,43 @@ static int submit_context(struct intel_vgpu *vgpu, int ring_id, ...@@ -708,53 +708,43 @@ static int submit_context(struct intel_vgpu *vgpu, int ring_id,
int intel_vgpu_submit_execlist(struct intel_vgpu *vgpu, int ring_id) int intel_vgpu_submit_execlist(struct intel_vgpu *vgpu, int ring_id)
{ {
struct intel_vgpu_execlist *execlist = &vgpu->execlist[ring_id]; struct intel_vgpu_execlist *execlist = &vgpu->execlist[ring_id];
struct execlist_ctx_descriptor_format *desc[2], valid_desc[2]; struct execlist_ctx_descriptor_format desc[2];
unsigned long valid_desc_bitmap = 0; int i, ret;
bool emulate_schedule_in = true;
int ret;
int i;
memset(valid_desc, 0, sizeof(valid_desc)); desc[0] = *get_desc_from_elsp_dwords(&execlist->elsp_dwords, 1);
desc[1] = *get_desc_from_elsp_dwords(&execlist->elsp_dwords, 0);
desc[0] = get_desc_from_elsp_dwords(&execlist->elsp_dwords, 1); if (!desc[0].valid) {
desc[1] = get_desc_from_elsp_dwords(&execlist->elsp_dwords, 0); gvt_vgpu_err("invalid elsp submission, desc0 is invalid\n");
goto inv_desc;
}
for (i = 0; i < 2; i++) { for (i = 0; i < ARRAY_SIZE(desc); i++) {
if (!desc[i]->valid) if (!desc[i].valid)
continue; continue;
if (!desc[i].privilege_access) {
if (!desc[i]->privilege_access) {
gvt_vgpu_err("unexpected GGTT elsp submission\n"); gvt_vgpu_err("unexpected GGTT elsp submission\n");
return -EINVAL; goto inv_desc;
} }
/* TODO: add another guest context checks here. */
set_bit(i, &valid_desc_bitmap);
valid_desc[i] = *desc[i];
}
if (!valid_desc_bitmap) {
gvt_vgpu_err("no valid desc in a elsp submission\n");
return -EINVAL;
}
if (!test_bit(0, (void *)&valid_desc_bitmap) &&
test_bit(1, (void *)&valid_desc_bitmap)) {
gvt_vgpu_err("weird elsp submission, desc 0 is not valid\n");
return -EINVAL;
} }
/* submit workload */ /* submit workload */
for_each_set_bit(i, (void *)&valid_desc_bitmap, 2) { for (i = 0; i < ARRAY_SIZE(desc); i++) {
ret = submit_context(vgpu, ring_id, &valid_desc[i], if (!desc[i].valid)
emulate_schedule_in); continue;
ret = submit_context(vgpu, ring_id, &desc[i], i == 0);
if (ret) { if (ret) {
gvt_vgpu_err("fail to schedule workload\n"); gvt_vgpu_err("failed to submit desc %d\n", i);
return ret; return ret;
} }
emulate_schedule_in = false;
} }
return 0; return 0;
inv_desc:
gvt_vgpu_err("descriptors content: desc0 %08x %08x desc1 %08x %08x\n",
desc[0].udw, desc[0].ldw, desc[1].udw, desc[1].ldw);
return -EINVAL;
} }
static void init_vgpu_execlist(struct intel_vgpu *vgpu, int ring_id) static void init_vgpu_execlist(struct intel_vgpu *vgpu, int ring_id)
......
...@@ -102,13 +102,8 @@ static int expose_firmware_sysfs(struct intel_gvt *gvt) ...@@ -102,13 +102,8 @@ static int expose_firmware_sysfs(struct intel_gvt *gvt)
p = firmware + h->mmio_offset; p = firmware + h->mmio_offset;
hash_for_each(gvt->mmio.mmio_info_table, i, e, node) { hash_for_each(gvt->mmio.mmio_info_table, i, e, node)
int j; *(u32 *)(p + e->offset) = I915_READ_NOTRACE(_MMIO(e->offset));
for (j = 0; j < e->length; j += 4)
*(u32 *)(p + e->offset + j) =
I915_READ_NOTRACE(_MMIO(e->offset + j));
}
memcpy(gvt->firmware.mmio, p, info->mmio_size); memcpy(gvt->firmware.mmio, p, info->mmio_size);
......
...@@ -244,15 +244,19 @@ static u64 read_pte64(struct drm_i915_private *dev_priv, unsigned long index) ...@@ -244,15 +244,19 @@ static u64 read_pte64(struct drm_i915_private *dev_priv, unsigned long index)
return readq(addr); return readq(addr);
} }
static void gtt_invalidate(struct drm_i915_private *dev_priv)
{
mmio_hw_access_pre(dev_priv);
I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
mmio_hw_access_post(dev_priv);
}
static void write_pte64(struct drm_i915_private *dev_priv, static void write_pte64(struct drm_i915_private *dev_priv,
unsigned long index, u64 pte) unsigned long index, u64 pte)
{ {
void __iomem *addr = (gen8_pte_t __iomem *)dev_priv->ggtt.gsm + index; void __iomem *addr = (gen8_pte_t __iomem *)dev_priv->ggtt.gsm + index;
writeq(pte, addr); writeq(pte, addr);
I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
POSTING_READ(GFX_FLSH_CNTL_GEN6);
} }
static inline struct intel_gvt_gtt_entry *gtt_get_entry64(void *pt, static inline struct intel_gvt_gtt_entry *gtt_get_entry64(void *pt,
...@@ -1849,6 +1853,7 @@ static int emulate_gtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, ...@@ -1849,6 +1853,7 @@ static int emulate_gtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
} }
ggtt_set_shadow_entry(ggtt_mm, &m, g_gtt_index); ggtt_set_shadow_entry(ggtt_mm, &m, g_gtt_index);
gtt_invalidate(gvt->dev_priv);
ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index); ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index);
return 0; return 0;
} }
...@@ -2301,8 +2306,6 @@ void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu) ...@@ -2301,8 +2306,6 @@ void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu)
u32 num_entries; u32 num_entries;
struct intel_gvt_gtt_entry e; struct intel_gvt_gtt_entry e;
intel_runtime_pm_get(dev_priv);
memset(&e, 0, sizeof(struct intel_gvt_gtt_entry)); memset(&e, 0, sizeof(struct intel_gvt_gtt_entry));
e.type = GTT_TYPE_GGTT_PTE; e.type = GTT_TYPE_GGTT_PTE;
ops->set_pfn(&e, gvt->gtt.scratch_ggtt_mfn); ops->set_pfn(&e, gvt->gtt.scratch_ggtt_mfn);
...@@ -2318,7 +2321,7 @@ void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu) ...@@ -2318,7 +2321,7 @@ void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu)
for (offset = 0; offset < num_entries; offset++) for (offset = 0; offset < num_entries; offset++)
ops->set_entry(NULL, &e, index + offset, false, 0, vgpu); ops->set_entry(NULL, &e, index + offset, false, 0, vgpu);
intel_runtime_pm_put(dev_priv); gtt_invalidate(dev_priv);
} }
/** /**
......
...@@ -147,7 +147,9 @@ static int gvt_service_thread(void *data) ...@@ -147,7 +147,9 @@ static int gvt_service_thread(void *data)
mutex_unlock(&gvt->lock); mutex_unlock(&gvt->lock);
} }
if (test_and_clear_bit(INTEL_GVT_REQUEST_SCHED, if (test_bit(INTEL_GVT_REQUEST_SCHED,
(void *)&gvt->service_request) ||
test_bit(INTEL_GVT_REQUEST_EVENT_SCHED,
(void *)&gvt->service_request)) { (void *)&gvt->service_request)) {
intel_gvt_schedule(gvt); intel_gvt_schedule(gvt);
} }
...@@ -244,7 +246,7 @@ int intel_gvt_init_device(struct drm_i915_private *dev_priv) ...@@ -244,7 +246,7 @@ int intel_gvt_init_device(struct drm_i915_private *dev_priv)
gvt_dbg_core("init gvt device\n"); gvt_dbg_core("init gvt device\n");
idr_init(&gvt->vgpu_idr); idr_init(&gvt->vgpu_idr);
spin_lock_init(&gvt->scheduler.mmio_context_lock);
mutex_init(&gvt->lock); mutex_init(&gvt->lock);
gvt->dev_priv = dev_priv; gvt->dev_priv = dev_priv;
......
...@@ -165,7 +165,6 @@ struct intel_vgpu { ...@@ -165,7 +165,6 @@ struct intel_vgpu {
struct list_head workload_q_head[I915_NUM_ENGINES]; struct list_head workload_q_head[I915_NUM_ENGINES];
struct kmem_cache *workloads; struct kmem_cache *workloads;
atomic_t running_workload_num; atomic_t running_workload_num;
ktime_t last_ctx_submit_time;
DECLARE_BITMAP(tlb_handle_pending, I915_NUM_ENGINES); DECLARE_BITMAP(tlb_handle_pending, I915_NUM_ENGINES);
struct i915_gem_context *shadow_ctx; struct i915_gem_context *shadow_ctx;
...@@ -196,11 +195,27 @@ struct intel_gvt_fence { ...@@ -196,11 +195,27 @@ struct intel_gvt_fence {
unsigned long vgpu_allocated_fence_num; unsigned long vgpu_allocated_fence_num;
}; };
#define INTEL_GVT_MMIO_HASH_BITS 9 #define INTEL_GVT_MMIO_HASH_BITS 11
struct intel_gvt_mmio { struct intel_gvt_mmio {
u32 *mmio_attribute; u8 *mmio_attribute;
/* Register contains RO bits */
#define F_RO (1 << 0)
/* Register contains graphics address */
#define F_GMADR (1 << 1)
/* Mode mask registers with high 16 bits as the mask bits */
#define F_MODE_MASK (1 << 2)
/* This reg can be accessed by GPU commands */
#define F_CMD_ACCESS (1 << 3)
/* This reg has been accessed by a VM */
#define F_ACCESSED (1 << 4)
/* This reg has been accessed through GPU commands */
#define F_CMD_ACCESSED (1 << 5)
/* This reg could be accessed by unaligned address */
#define F_UNALIGN (1 << 6)
DECLARE_HASHTABLE(mmio_info_table, INTEL_GVT_MMIO_HASH_BITS); DECLARE_HASHTABLE(mmio_info_table, INTEL_GVT_MMIO_HASH_BITS);
unsigned int num_tracked_mmio;
}; };
struct intel_gvt_firmware { struct intel_gvt_firmware {
...@@ -257,7 +272,12 @@ static inline struct intel_gvt *to_gvt(struct drm_i915_private *i915) ...@@ -257,7 +272,12 @@ static inline struct intel_gvt *to_gvt(struct drm_i915_private *i915)
enum { enum {
INTEL_GVT_REQUEST_EMULATE_VBLANK = 0, INTEL_GVT_REQUEST_EMULATE_VBLANK = 0,
/* Scheduling trigger by timer */
INTEL_GVT_REQUEST_SCHED = 1, INTEL_GVT_REQUEST_SCHED = 1,
/* Scheduling trigger by event */
INTEL_GVT_REQUEST_EVENT_SCHED = 2,
}; };
static inline void intel_gvt_request_service(struct intel_gvt *gvt, static inline void intel_gvt_request_service(struct intel_gvt *gvt,
...@@ -473,6 +493,80 @@ enum { ...@@ -473,6 +493,80 @@ enum {
GVT_FAILSAFE_INSUFFICIENT_RESOURCE, GVT_FAILSAFE_INSUFFICIENT_RESOURCE,
}; };
static inline void mmio_hw_access_pre(struct drm_i915_private *dev_priv)
{
intel_runtime_pm_get(dev_priv);
}
static inline void mmio_hw_access_post(struct drm_i915_private *dev_priv)
{
intel_runtime_pm_put(dev_priv);
}
/**
* intel_gvt_mmio_set_accessed - mark a MMIO has been accessed
* @gvt: a GVT device
* @offset: register offset
*
*/
static inline void intel_gvt_mmio_set_accessed(
struct intel_gvt *gvt, unsigned int offset)
{
gvt->mmio.mmio_attribute[offset >> 2] |= F_ACCESSED;
}
/**
* intel_gvt_mmio_is_cmd_accessed - mark a MMIO could be accessed by command
* @gvt: a GVT device
* @offset: register offset
*
*/
static inline bool intel_gvt_mmio_is_cmd_access(
struct intel_gvt *gvt, unsigned int offset)
{
return gvt->mmio.mmio_attribute[offset >> 2] & F_CMD_ACCESS;
}
/**
* intel_gvt_mmio_is_unalign - mark a MMIO could be accessed unaligned
* @gvt: a GVT device
* @offset: register offset
*
*/
static inline bool intel_gvt_mmio_is_unalign(
struct intel_gvt *gvt, unsigned int offset)
{
return gvt->mmio.mmio_attribute[offset >> 2] & F_UNALIGN;
}
/**
* intel_gvt_mmio_set_cmd_accessed - mark a MMIO has been accessed by command
* @gvt: a GVT device
* @offset: register offset
*
*/
static inline void intel_gvt_mmio_set_cmd_accessed(
struct intel_gvt *gvt, unsigned int offset)
{
gvt->mmio.mmio_attribute[offset >> 2] |= F_CMD_ACCESSED;
}
/**
* intel_gvt_mmio_has_mode_mask - if a MMIO has a mode mask
* @gvt: a GVT device
* @offset: register offset
*
* Returns:
* True if a MMIO has a mode mask in its higher 16 bits, false if it isn't.
*
*/
static inline bool intel_gvt_mmio_has_mode_mask(
struct intel_gvt *gvt, unsigned int offset)
{
return gvt->mmio.mmio_attribute[offset >> 2] & F_MODE_MASK;
}
#include "trace.h"
#include "mpt.h" #include "mpt.h"
#endif #endif
This diff is collapsed.
...@@ -31,6 +31,7 @@ ...@@ -31,6 +31,7 @@
#include "i915_drv.h" #include "i915_drv.h"
#include "gvt.h" #include "gvt.h"
#include "trace.h"
/* common offset among interrupt control registers */ /* common offset among interrupt control registers */
#define regbase_to_isr(base) (base) #define regbase_to_isr(base) (base)
...@@ -178,8 +179,8 @@ int intel_vgpu_reg_imr_handler(struct intel_vgpu *vgpu, ...@@ -178,8 +179,8 @@ int intel_vgpu_reg_imr_handler(struct intel_vgpu *vgpu,
struct intel_gvt_irq_ops *ops = gvt->irq.ops; struct intel_gvt_irq_ops *ops = gvt->irq.ops;
u32 imr = *(u32 *)p_data; u32 imr = *(u32 *)p_data;
gvt_dbg_irq("write IMR %x, new %08x, old %08x, changed %08x\n", trace_write_ir(vgpu->id, "IMR", reg, imr, vgpu_vreg(vgpu, reg),
reg, imr, vgpu_vreg(vgpu, reg), vgpu_vreg(vgpu, reg) ^ imr); (vgpu_vreg(vgpu, reg) ^ imr));
vgpu_vreg(vgpu, reg) = imr; vgpu_vreg(vgpu, reg) = imr;
...@@ -209,8 +210,8 @@ int intel_vgpu_reg_master_irq_handler(struct intel_vgpu *vgpu, ...@@ -209,8 +210,8 @@ int intel_vgpu_reg_master_irq_handler(struct intel_vgpu *vgpu,
u32 ier = *(u32 *)p_data; u32 ier = *(u32 *)p_data;
u32 virtual_ier = vgpu_vreg(vgpu, reg); u32 virtual_ier = vgpu_vreg(vgpu, reg);
gvt_dbg_irq("write MASTER_IRQ %x, new %08x, old %08x, changed %08x\n", trace_write_ir(vgpu->id, "MASTER_IRQ", reg, ier, virtual_ier,
reg, ier, virtual_ier, virtual_ier ^ ier); (virtual_ier ^ ier));
/* /*
* GEN8_MASTER_IRQ is a special irq register, * GEN8_MASTER_IRQ is a special irq register,
...@@ -248,8 +249,8 @@ int intel_vgpu_reg_ier_handler(struct intel_vgpu *vgpu, ...@@ -248,8 +249,8 @@ int intel_vgpu_reg_ier_handler(struct intel_vgpu *vgpu,
struct intel_gvt_irq_info *info; struct intel_gvt_irq_info *info;
u32 ier = *(u32 *)p_data; u32 ier = *(u32 *)p_data;
gvt_dbg_irq("write IER %x, new %08x, old %08x, changed %08x\n", trace_write_ir(vgpu->id, "IER", reg, ier, vgpu_vreg(vgpu, reg),
reg, ier, vgpu_vreg(vgpu, reg), vgpu_vreg(vgpu, reg) ^ ier); (vgpu_vreg(vgpu, reg) ^ ier));
vgpu_vreg(vgpu, reg) = ier; vgpu_vreg(vgpu, reg) = ier;
...@@ -285,8 +286,8 @@ int intel_vgpu_reg_iir_handler(struct intel_vgpu *vgpu, unsigned int reg, ...@@ -285,8 +286,8 @@ int intel_vgpu_reg_iir_handler(struct intel_vgpu *vgpu, unsigned int reg,
iir_to_regbase(reg)); iir_to_regbase(reg));
u32 iir = *(u32 *)p_data; u32 iir = *(u32 *)p_data;
gvt_dbg_irq("write IIR %x, new %08x, old %08x, changed %08x\n", trace_write_ir(vgpu->id, "IIR", reg, iir, vgpu_vreg(vgpu, reg),
reg, iir, vgpu_vreg(vgpu, reg), vgpu_vreg(vgpu, reg) ^ iir); (vgpu_vreg(vgpu, reg) ^ iir));
if (WARN_ON(!info)) if (WARN_ON(!info))
return -EINVAL; return -EINVAL;
...@@ -411,8 +412,7 @@ static void propagate_event(struct intel_gvt_irq *irq, ...@@ -411,8 +412,7 @@ static void propagate_event(struct intel_gvt_irq *irq,
if (!test_bit(bit, (void *)&vgpu_vreg(vgpu, if (!test_bit(bit, (void *)&vgpu_vreg(vgpu,
regbase_to_imr(reg_base)))) { regbase_to_imr(reg_base)))) {
gvt_dbg_irq("set bit (%d) for (%s) for vgpu (%d)\n", trace_propagate_event(vgpu->id, irq_name[event], bit);
bit, irq_name[event], vgpu->id);
set_bit(bit, (void *)&vgpu_vreg(vgpu, set_bit(bit, (void *)&vgpu_vreg(vgpu,
regbase_to_iir(reg_base))); regbase_to_iir(reg_base)));
} }
......
...@@ -123,7 +123,6 @@ int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, uint64_t pa, ...@@ -123,7 +123,6 @@ int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, uint64_t pa,
void *p_data, unsigned int bytes) void *p_data, unsigned int bytes)
{ {
struct intel_gvt *gvt = vgpu->gvt; struct intel_gvt *gvt = vgpu->gvt;
struct intel_gvt_mmio_info *mmio;
unsigned int offset = 0; unsigned int offset = 0;
int ret = -EINVAL; int ret = -EINVAL;
...@@ -187,32 +186,8 @@ int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, uint64_t pa, ...@@ -187,32 +186,8 @@ int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, uint64_t pa,
goto err; goto err;
} }
mmio = intel_gvt_find_mmio_info(gvt, rounddown(offset, 4)); ret = intel_vgpu_mmio_reg_rw(vgpu, offset, p_data, bytes, true);
if (mmio) { if (ret < 0)
if (!intel_gvt_mmio_is_unalign(gvt, mmio->offset)) {
if (WARN_ON(offset + bytes > mmio->offset + mmio->size))
goto err;
if (WARN_ON(mmio->offset != offset))
goto err;
}
ret = mmio->read(vgpu, offset, p_data, bytes);
} else {
ret = intel_vgpu_default_mmio_read(vgpu, offset, p_data, bytes);
if (!vgpu->mmio.disable_warn_untrack) {
gvt_vgpu_err("read untracked MMIO %x(%dB) val %x\n",
offset, bytes, *(u32 *)p_data);
if (offset == 0x206c) {
gvt_vgpu_err("------------------------------------------\n");
gvt_vgpu_err("likely triggers a gfx reset\n");
gvt_vgpu_err("------------------------------------------\n");
vgpu->mmio.disable_warn_untrack = true;
}
}
}
if (ret)
goto err; goto err;
intel_gvt_mmio_set_accessed(gvt, offset); intel_gvt_mmio_set_accessed(gvt, offset);
...@@ -239,9 +214,7 @@ int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, uint64_t pa, ...@@ -239,9 +214,7 @@ int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, uint64_t pa,
void *p_data, unsigned int bytes) void *p_data, unsigned int bytes)
{ {
struct intel_gvt *gvt = vgpu->gvt; struct intel_gvt *gvt = vgpu->gvt;
struct intel_gvt_mmio_info *mmio;
unsigned int offset = 0; unsigned int offset = 0;
u32 old_vreg = 0, old_sreg = 0;
int ret = -EINVAL; int ret = -EINVAL;
if (vgpu->failsafe) { if (vgpu->failsafe) {
...@@ -296,66 +269,10 @@ int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, uint64_t pa, ...@@ -296,66 +269,10 @@ int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, uint64_t pa,
return ret; return ret;
} }
mmio = intel_gvt_find_mmio_info(gvt, rounddown(offset, 4)); ret = intel_vgpu_mmio_reg_rw(vgpu, offset, p_data, bytes, false);
if (!mmio && !vgpu->mmio.disable_warn_untrack) if (ret < 0)
gvt_dbg_mmio("vgpu%d: write untracked MMIO %x len %d val %x\n",
vgpu->id, offset, bytes, *(u32 *)p_data);
if (!intel_gvt_mmio_is_unalign(gvt, offset)) {
if (WARN_ON(!IS_ALIGNED(offset, bytes)))
goto err;
}
if (mmio) {
u64 ro_mask = mmio->ro_mask;
if (!intel_gvt_mmio_is_unalign(gvt, mmio->offset)) {
if (WARN_ON(offset + bytes > mmio->offset + mmio->size))
goto err;
if (WARN_ON(mmio->offset != offset))
goto err;
}
if (intel_gvt_mmio_has_mode_mask(gvt, mmio->offset)) {
old_vreg = vgpu_vreg(vgpu, offset);
old_sreg = vgpu_sreg(vgpu, offset);
}
if (!ro_mask) {
ret = mmio->write(vgpu, offset, p_data, bytes);
} else {
/* Protect RO bits like HW */
u64 data = 0;
/* all register bits are RO. */
if (ro_mask == ~(u64)0) {
gvt_vgpu_err("try to write RO reg %x\n",
offset);
ret = 0;
goto out;
}
/* keep the RO bits in the virtual register */
memcpy(&data, p_data, bytes);
data &= ~mmio->ro_mask;
data |= vgpu_vreg(vgpu, offset) & mmio->ro_mask;
ret = mmio->write(vgpu, offset, &data, bytes);
}
/* higher 16bits of mode ctl regs are mask bits for change */
if (intel_gvt_mmio_has_mode_mask(gvt, mmio->offset)) {
u32 mask = vgpu_vreg(vgpu, offset) >> 16;
vgpu_vreg(vgpu, offset) = (old_vreg & ~mask)
| (vgpu_vreg(vgpu, offset) & mask);
vgpu_sreg(vgpu, offset) = (old_sreg & ~mask)
| (vgpu_sreg(vgpu, offset) & mask);
}
} else
ret = intel_vgpu_default_mmio_write(vgpu, offset, p_data,
bytes);
if (ret)
goto err; goto err;
out:
intel_gvt_mmio_set_accessed(gvt, offset); intel_gvt_mmio_set_accessed(gvt, offset);
mutex_unlock(&gvt->lock); mutex_unlock(&gvt->lock);
return 0; return 0;
...@@ -372,20 +289,32 @@ int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, uint64_t pa, ...@@ -372,20 +289,32 @@ int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, uint64_t pa,
* @vgpu: a vGPU * @vgpu: a vGPU
* *
*/ */
void intel_vgpu_reset_mmio(struct intel_vgpu *vgpu) void intel_vgpu_reset_mmio(struct intel_vgpu *vgpu, bool dmlr)
{ {
struct intel_gvt *gvt = vgpu->gvt; struct intel_gvt *gvt = vgpu->gvt;
const struct intel_gvt_device_info *info = &gvt->device_info; const struct intel_gvt_device_info *info = &gvt->device_info;
void *mmio = gvt->firmware.mmio;
if (dmlr) {
memcpy(vgpu->mmio.vreg, mmio, info->mmio_size);
memcpy(vgpu->mmio.sreg, mmio, info->mmio_size);
memcpy(vgpu->mmio.vreg, gvt->firmware.mmio, info->mmio_size); vgpu_vreg(vgpu, GEN6_GT_THREAD_STATUS_REG) = 0;
memcpy(vgpu->mmio.sreg, gvt->firmware.mmio, info->mmio_size);
vgpu_vreg(vgpu, GEN6_GT_THREAD_STATUS_REG) = 0; /* set the bit 0:2(Core C-State ) to C0 */
vgpu_vreg(vgpu, GEN6_GT_CORE_STATUS) = 0;
/* set the bit 0:2(Core C-State ) to C0 */ vgpu->mmio.disable_warn_untrack = false;
vgpu_vreg(vgpu, GEN6_GT_CORE_STATUS) = 0; } else {
#define GVT_GEN8_MMIO_RESET_OFFSET (0x44200)
/* only reset the engine related, so starting with 0x44200
* interrupt include DE,display mmio related will not be
* touched
*/
memcpy(vgpu->mmio.vreg, mmio, GVT_GEN8_MMIO_RESET_OFFSET);
memcpy(vgpu->mmio.sreg, mmio, GVT_GEN8_MMIO_RESET_OFFSET);
}
vgpu->mmio.disable_warn_untrack = false;
} }
/** /**
...@@ -405,7 +334,7 @@ int intel_vgpu_init_mmio(struct intel_vgpu *vgpu) ...@@ -405,7 +334,7 @@ int intel_vgpu_init_mmio(struct intel_vgpu *vgpu)
vgpu->mmio.sreg = vgpu->mmio.vreg + info->mmio_size; vgpu->mmio.sreg = vgpu->mmio.vreg + info->mmio_size;
intel_vgpu_reset_mmio(vgpu); intel_vgpu_reset_mmio(vgpu, true);
return 0; return 0;
} }
......
...@@ -39,36 +39,28 @@ ...@@ -39,36 +39,28 @@
struct intel_gvt; struct intel_gvt;
struct intel_vgpu; struct intel_vgpu;
#define D_SNB (1 << 0) #define D_BDW (1 << 0)
#define D_IVB (1 << 1) #define D_SKL (1 << 1)
#define D_HSW (1 << 2) #define D_KBL (1 << 2)
#define D_BDW (1 << 3)
#define D_SKL (1 << 4)
#define D_KBL (1 << 5)
#define D_GEN9PLUS (D_SKL | D_KBL) #define D_GEN9PLUS (D_SKL | D_KBL)
#define D_GEN8PLUS (D_BDW | D_SKL | D_KBL) #define D_GEN8PLUS (D_BDW | D_SKL | D_KBL)
#define D_GEN75PLUS (D_HSW | D_BDW | D_SKL | D_KBL)
#define D_GEN7PLUS (D_IVB | D_HSW | D_BDW | D_SKL | D_KBL)
#define D_SKL_PLUS (D_SKL | D_KBL) #define D_SKL_PLUS (D_SKL | D_KBL)
#define D_BDW_PLUS (D_BDW | D_SKL | D_KBL) #define D_BDW_PLUS (D_BDW | D_SKL | D_KBL)
#define D_HSW_PLUS (D_HSW | D_BDW | D_SKL | D_KBL)
#define D_IVB_PLUS (D_IVB | D_HSW | D_BDW | D_SKL | D_KBL)
#define D_PRE_BDW (D_SNB | D_IVB | D_HSW) #define D_PRE_SKL (D_BDW)
#define D_PRE_SKL (D_SNB | D_IVB | D_HSW | D_BDW) #define D_ALL (D_BDW | D_SKL | D_KBL)
#define D_ALL (D_SNB | D_IVB | D_HSW | D_BDW | D_SKL | D_KBL)
typedef int (*gvt_mmio_func)(struct intel_vgpu *, unsigned int, void *,
unsigned int);
struct intel_gvt_mmio_info { struct intel_gvt_mmio_info {
u32 offset; u32 offset;
u32 size;
u32 length;
u32 addr_mask;
u64 ro_mask; u64 ro_mask;
u32 device; u32 device;
int (*read)(struct intel_vgpu *, unsigned int, void *, unsigned int); gvt_mmio_func read;
int (*write)(struct intel_vgpu *, unsigned int, void *, unsigned int); gvt_mmio_func write;
u32 addr_range; u32 addr_range;
struct hlist_node node; struct hlist_node node;
}; };
...@@ -79,8 +71,6 @@ bool intel_gvt_match_device(struct intel_gvt *gvt, unsigned long device); ...@@ -79,8 +71,6 @@ bool intel_gvt_match_device(struct intel_gvt *gvt, unsigned long device);
int intel_gvt_setup_mmio_info(struct intel_gvt *gvt); int intel_gvt_setup_mmio_info(struct intel_gvt *gvt);
void intel_gvt_clean_mmio_info(struct intel_gvt *gvt); void intel_gvt_clean_mmio_info(struct intel_gvt *gvt);
struct intel_gvt_mmio_info *intel_gvt_find_mmio_info(struct intel_gvt *gvt,
unsigned int offset);
#define INTEL_GVT_MMIO_OFFSET(reg) ({ \ #define INTEL_GVT_MMIO_OFFSET(reg) ({ \
typeof(reg) __reg = reg; \ typeof(reg) __reg = reg; \
u32 *offset = (u32 *)&__reg; \ u32 *offset = (u32 *)&__reg; \
...@@ -88,7 +78,7 @@ struct intel_gvt_mmio_info *intel_gvt_find_mmio_info(struct intel_gvt *gvt, ...@@ -88,7 +78,7 @@ struct intel_gvt_mmio_info *intel_gvt_find_mmio_info(struct intel_gvt *gvt,
}) })
int intel_vgpu_init_mmio(struct intel_vgpu *vgpu); int intel_vgpu_init_mmio(struct intel_vgpu *vgpu);
void intel_vgpu_reset_mmio(struct intel_vgpu *vgpu); void intel_vgpu_reset_mmio(struct intel_vgpu *vgpu, bool dmlr);
void intel_vgpu_clean_mmio(struct intel_vgpu *vgpu); void intel_vgpu_clean_mmio(struct intel_vgpu *vgpu);
int intel_vgpu_gpa_to_mmio_offset(struct intel_vgpu *vgpu, u64 gpa); int intel_vgpu_gpa_to_mmio_offset(struct intel_vgpu *vgpu, u64 gpa);
...@@ -97,13 +87,7 @@ int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, u64 pa, ...@@ -97,13 +87,7 @@ int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, u64 pa,
void *p_data, unsigned int bytes); void *p_data, unsigned int bytes);
int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, u64 pa, int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, u64 pa,
void *p_data, unsigned int bytes); void *p_data, unsigned int bytes);
bool intel_gvt_mmio_is_cmd_access(struct intel_gvt *gvt,
unsigned int offset);
bool intel_gvt_mmio_is_unalign(struct intel_gvt *gvt, unsigned int offset);
void intel_gvt_mmio_set_accessed(struct intel_gvt *gvt, unsigned int offset);
void intel_gvt_mmio_set_cmd_accessed(struct intel_gvt *gvt,
unsigned int offset);
bool intel_gvt_mmio_has_mode_mask(struct intel_gvt *gvt, unsigned int offset);
int intel_vgpu_default_mmio_read(struct intel_vgpu *vgpu, unsigned int offset, int intel_vgpu_default_mmio_read(struct intel_vgpu *vgpu, unsigned int offset,
void *p_data, unsigned int bytes); void *p_data, unsigned int bytes);
int intel_vgpu_default_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, int intel_vgpu_default_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
...@@ -111,4 +95,8 @@ int intel_vgpu_default_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, ...@@ -111,4 +95,8 @@ int intel_vgpu_default_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
bool intel_gvt_in_force_nonpriv_whitelist(struct intel_gvt *gvt, bool intel_gvt_in_force_nonpriv_whitelist(struct intel_gvt *gvt,
unsigned int offset); unsigned int offset);
int intel_vgpu_mmio_reg_rw(struct intel_vgpu *vgpu, unsigned int offset,
void *pdata, unsigned int bytes, bool is_read);
#endif #endif
...@@ -133,8 +133,7 @@ static inline int intel_gvt_hypervisor_inject_msi(struct intel_vgpu *vgpu) ...@@ -133,8 +133,7 @@ static inline int intel_gvt_hypervisor_inject_msi(struct intel_vgpu *vgpu)
if (WARN(control & GENMASK(15, 1), "only support one MSI format\n")) if (WARN(control & GENMASK(15, 1), "only support one MSI format\n"))
return -EINVAL; return -EINVAL;
gvt_dbg_irq("vgpu%d: inject msi address %x data%x\n", vgpu->id, addr, trace_inject_msi(vgpu->id, addr, data);
data);
ret = intel_gvt_host.mpt->inject_msi(vgpu->handle, addr, data); ret = intel_gvt_host.mpt->inject_msi(vgpu->handle, addr, data);
if (ret) if (ret)
......
...@@ -35,6 +35,7 @@ ...@@ -35,6 +35,7 @@
#include "i915_drv.h" #include "i915_drv.h"
#include "gvt.h" #include "gvt.h"
#include "trace.h"
struct render_mmio { struct render_mmio {
int ring_id; int ring_id;
...@@ -260,7 +261,8 @@ static void restore_mocs(struct intel_vgpu *vgpu, int ring_id) ...@@ -260,7 +261,8 @@ static void restore_mocs(struct intel_vgpu *vgpu, int ring_id)
#define CTX_CONTEXT_CONTROL_VAL 0x03 #define CTX_CONTEXT_CONTROL_VAL 0x03
void intel_gvt_load_render_mmio(struct intel_vgpu *vgpu, int ring_id) /* Switch ring mmio values (context) from host to a vgpu. */
static void switch_mmio_to_vgpu(struct intel_vgpu *vgpu, int ring_id)
{ {
struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
struct render_mmio *mmio; struct render_mmio *mmio;
...@@ -305,14 +307,15 @@ void intel_gvt_load_render_mmio(struct intel_vgpu *vgpu, int ring_id) ...@@ -305,14 +307,15 @@ void intel_gvt_load_render_mmio(struct intel_vgpu *vgpu, int ring_id)
I915_WRITE(mmio->reg, v); I915_WRITE(mmio->reg, v);
POSTING_READ(mmio->reg); POSTING_READ(mmio->reg);
gvt_dbg_render("load reg %x old %x new %x\n", trace_render_mmio(vgpu->id, "load",
i915_mmio_reg_offset(mmio->reg), i915_mmio_reg_offset(mmio->reg),
mmio->value, v); mmio->value, v);
} }
handle_tlb_pending_event(vgpu, ring_id); handle_tlb_pending_event(vgpu, ring_id);
} }
void intel_gvt_restore_render_mmio(struct intel_vgpu *vgpu, int ring_id) /* Switch ring mmio values (context) from vgpu to host. */
static void switch_mmio_to_host(struct intel_vgpu *vgpu, int ring_id)
{ {
struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
struct render_mmio *mmio; struct render_mmio *mmio;
...@@ -346,8 +349,37 @@ void intel_gvt_restore_render_mmio(struct intel_vgpu *vgpu, int ring_id) ...@@ -346,8 +349,37 @@ void intel_gvt_restore_render_mmio(struct intel_vgpu *vgpu, int ring_id)
I915_WRITE(mmio->reg, v); I915_WRITE(mmio->reg, v);
POSTING_READ(mmio->reg); POSTING_READ(mmio->reg);
gvt_dbg_render("restore reg %x old %x new %x\n", trace_render_mmio(vgpu->id, "restore",
i915_mmio_reg_offset(mmio->reg), i915_mmio_reg_offset(mmio->reg),
mmio->value, v); mmio->value, v);
} }
} }
/**
* intel_gvt_switch_render_mmio - switch mmio context of specific engine
* @pre: the last vGPU that own the engine
* @next: the vGPU to switch to
* @ring_id: specify the engine
*
* If pre is null indicates that host own the engine. If next is null
* indicates that we are switching to host workload.
*/
void intel_gvt_switch_mmio(struct intel_vgpu *pre,
struct intel_vgpu *next, int ring_id)
{
if (WARN_ON(!pre && !next))
return;
gvt_dbg_render("switch ring %d from %s to %s\n", ring_id,
pre ? "vGPU" : "host", next ? "vGPU" : "HOST");
/**
* TODO: Optimize for vGPU to vGPU switch by merging
* switch_mmio_to_host() and switch_mmio_to_vgpu().
*/
if (pre)
switch_mmio_to_host(pre, ring_id);
if (next)
switch_mmio_to_vgpu(next, ring_id);
}
...@@ -36,8 +36,8 @@ ...@@ -36,8 +36,8 @@
#ifndef __GVT_RENDER_H__ #ifndef __GVT_RENDER_H__
#define __GVT_RENDER_H__ #define __GVT_RENDER_H__
void intel_gvt_load_render_mmio(struct intel_vgpu *vgpu, int ring_id); void intel_gvt_switch_mmio(struct intel_vgpu *pre,
struct intel_vgpu *next, int ring_id);
void intel_gvt_restore_render_mmio(struct intel_vgpu *vgpu, int ring_id);
#endif #endif
...@@ -202,11 +202,6 @@ static void tbs_sched_func(struct gvt_sched_data *sched_data) ...@@ -202,11 +202,6 @@ static void tbs_sched_func(struct gvt_sched_data *sched_data)
struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
struct vgpu_sched_data *vgpu_data; struct vgpu_sched_data *vgpu_data;
struct intel_vgpu *vgpu = NULL; struct intel_vgpu *vgpu = NULL;
static uint64_t timer_check;
if (!(timer_check++ % GVT_TS_BALANCE_PERIOD_MS))
gvt_balance_timeslice(sched_data);
/* no active vgpu or has already had a target */ /* no active vgpu or has already had a target */
if (list_empty(&sched_data->lru_runq_head) || scheduler->next_vgpu) if (list_empty(&sched_data->lru_runq_head) || scheduler->next_vgpu)
goto out; goto out;
...@@ -231,9 +226,19 @@ static void tbs_sched_func(struct gvt_sched_data *sched_data) ...@@ -231,9 +226,19 @@ static void tbs_sched_func(struct gvt_sched_data *sched_data)
void intel_gvt_schedule(struct intel_gvt *gvt) void intel_gvt_schedule(struct intel_gvt *gvt)
{ {
struct gvt_sched_data *sched_data = gvt->scheduler.sched_data; struct gvt_sched_data *sched_data = gvt->scheduler.sched_data;
static uint64_t timer_check;
mutex_lock(&gvt->lock); mutex_lock(&gvt->lock);
if (test_and_clear_bit(INTEL_GVT_REQUEST_SCHED,
(void *)&gvt->service_request)) {
if (!(timer_check++ % GVT_TS_BALANCE_PERIOD_MS))
gvt_balance_timeslice(sched_data);
}
clear_bit(INTEL_GVT_REQUEST_EVENT_SCHED, (void *)&gvt->service_request);
tbs_sched_func(sched_data); tbs_sched_func(sched_data);
mutex_unlock(&gvt->lock); mutex_unlock(&gvt->lock);
} }
...@@ -303,8 +308,20 @@ static int tbs_sched_init_vgpu(struct intel_vgpu *vgpu) ...@@ -303,8 +308,20 @@ static int tbs_sched_init_vgpu(struct intel_vgpu *vgpu)
static void tbs_sched_clean_vgpu(struct intel_vgpu *vgpu) static void tbs_sched_clean_vgpu(struct intel_vgpu *vgpu)
{ {
struct intel_gvt_workload_scheduler *scheduler = &vgpu->gvt->scheduler;
int ring_id;
kfree(vgpu->sched_data); kfree(vgpu->sched_data);
vgpu->sched_data = NULL; vgpu->sched_data = NULL;
spin_lock_bh(&scheduler->mmio_context_lock);
for (ring_id = 0; ring_id < I915_NUM_ENGINES; ring_id++) {
if (scheduler->engine_owner[ring_id] == vgpu) {
intel_gvt_switch_mmio(vgpu, NULL, ring_id);
scheduler->engine_owner[ring_id] = NULL;
}
}
spin_unlock_bh(&scheduler->mmio_context_lock);
} }
static void tbs_sched_start_schedule(struct intel_vgpu *vgpu) static void tbs_sched_start_schedule(struct intel_vgpu *vgpu)
......
...@@ -138,21 +138,42 @@ static int shadow_context_status_change(struct notifier_block *nb, ...@@ -138,21 +138,42 @@ static int shadow_context_status_change(struct notifier_block *nb,
struct intel_gvt *gvt = container_of(nb, struct intel_gvt, struct intel_gvt *gvt = container_of(nb, struct intel_gvt,
shadow_ctx_notifier_block[req->engine->id]); shadow_ctx_notifier_block[req->engine->id]);
struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
struct intel_vgpu_workload *workload = enum intel_engine_id ring_id = req->engine->id;
scheduler->current_workload[req->engine->id]; struct intel_vgpu_workload *workload;
if (!is_gvt_request(req)) {
spin_lock_bh(&scheduler->mmio_context_lock);
if (action == INTEL_CONTEXT_SCHEDULE_IN &&
scheduler->engine_owner[ring_id]) {
/* Switch ring from vGPU to host. */
intel_gvt_switch_mmio(scheduler->engine_owner[ring_id],
NULL, ring_id);
scheduler->engine_owner[ring_id] = NULL;
}
spin_unlock_bh(&scheduler->mmio_context_lock);
if (!is_gvt_request(req) || unlikely(!workload)) return NOTIFY_OK;
}
workload = scheduler->current_workload[ring_id];
if (unlikely(!workload))
return NOTIFY_OK; return NOTIFY_OK;
switch (action) { switch (action) {
case INTEL_CONTEXT_SCHEDULE_IN: case INTEL_CONTEXT_SCHEDULE_IN:
intel_gvt_load_render_mmio(workload->vgpu, spin_lock_bh(&scheduler->mmio_context_lock);
workload->ring_id); if (workload->vgpu != scheduler->engine_owner[ring_id]) {
/* Switch ring from host to vGPU or vGPU to vGPU. */
intel_gvt_switch_mmio(scheduler->engine_owner[ring_id],
workload->vgpu, ring_id);
scheduler->engine_owner[ring_id] = workload->vgpu;
} else
gvt_dbg_sched("skip ring %d mmio switch for vgpu%d\n",
ring_id, workload->vgpu->id);
spin_unlock_bh(&scheduler->mmio_context_lock);
atomic_set(&workload->shadow_ctx_active, 1); atomic_set(&workload->shadow_ctx_active, 1);
break; break;
case INTEL_CONTEXT_SCHEDULE_OUT: case INTEL_CONTEXT_SCHEDULE_OUT:
intel_gvt_restore_render_mmio(workload->vgpu,
workload->ring_id);
/* If the status is -EINPROGRESS means this workload /* If the status is -EINPROGRESS means this workload
* doesn't meet any issue during dispatching so when * doesn't meet any issue during dispatching so when
* get the SCHEDULE_OUT set the status to be zero for * get the SCHEDULE_OUT set the status to be zero for
...@@ -431,6 +452,10 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id) ...@@ -431,6 +452,10 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id)
atomic_dec(&vgpu->running_workload_num); atomic_dec(&vgpu->running_workload_num);
wake_up(&scheduler->workload_complete_wq); wake_up(&scheduler->workload_complete_wq);
if (gvt->scheduler.need_reschedule)
intel_gvt_request_service(gvt, INTEL_GVT_REQUEST_EVENT_SCHED);
mutex_unlock(&gvt->lock); mutex_unlock(&gvt->lock);
} }
......
...@@ -42,6 +42,10 @@ struct intel_gvt_workload_scheduler { ...@@ -42,6 +42,10 @@ struct intel_gvt_workload_scheduler {
struct intel_vgpu_workload *current_workload[I915_NUM_ENGINES]; struct intel_vgpu_workload *current_workload[I915_NUM_ENGINES];
bool need_reschedule; bool need_reschedule;
spinlock_t mmio_context_lock;
/* can be null when owner is host */
struct intel_vgpu *engine_owner[I915_NUM_ENGINES];
wait_queue_head_t workload_complete_wq; wait_queue_head_t workload_complete_wq;
struct task_struct *thread[I915_NUM_ENGINES]; struct task_struct *thread[I915_NUM_ENGINES];
wait_queue_head_t waitq[I915_NUM_ENGINES]; wait_queue_head_t waitq[I915_NUM_ENGINES];
......
...@@ -224,58 +224,138 @@ TRACE_EVENT(oos_sync, ...@@ -224,58 +224,138 @@ TRACE_EVENT(oos_sync,
TP_printk("%s", __entry->buf) TP_printk("%s", __entry->buf)
); );
#define MAX_CMD_STR_LEN 256
TRACE_EVENT(gvt_command, TRACE_EVENT(gvt_command,
TP_PROTO(u8 vm_id, u8 ring_id, u32 ip_gma, u32 *cmd_va, u32 cmd_len, bool ring_buffer_cmd, cycles_t cost_pre_cmd_handler, cycles_t cost_cmd_handler), TP_PROTO(u8 vgpu_id, u8 ring_id, u32 ip_gma, u32 *cmd_va, u32 cmd_len,
u32 buf_type),
TP_ARGS(vm_id, ring_id, ip_gma, cmd_va, cmd_len, ring_buffer_cmd, cost_pre_cmd_handler, cost_cmd_handler),
TP_ARGS(vgpu_id, ring_id, ip_gma, cmd_va, cmd_len, buf_type),
TP_STRUCT__entry(
__field(u8, vm_id) TP_STRUCT__entry(
__field(u8, ring_id) __field(u8, vgpu_id)
__field(int, i) __field(u8, ring_id)
__array(char, tmp_buf, MAX_CMD_STR_LEN) __field(u32, ip_gma)
__array(char, cmd_str, MAX_CMD_STR_LEN) __field(u32, buf_type)
), __field(u32, cmd_len)
__dynamic_array(u32, raw_cmd, cmd_len)
TP_fast_assign( ),
__entry->vm_id = vm_id;
__entry->ring_id = ring_id; TP_fast_assign(
__entry->cmd_str[0] = '\0'; __entry->vgpu_id = vgpu_id;
snprintf(__entry->tmp_buf, MAX_CMD_STR_LEN, "VM(%d) Ring(%d): %s ip(%08x) pre handler cost (%llu), handler cost (%llu) ", vm_id, ring_id, ring_buffer_cmd ? "RB":"BB", ip_gma, cost_pre_cmd_handler, cost_cmd_handler); __entry->ring_id = ring_id;
strcat(__entry->cmd_str, __entry->tmp_buf); __entry->ip_gma = ip_gma;
entry->i = 0; __entry->buf_type = buf_type;
while (cmd_len > 0) { __entry->cmd_len = cmd_len;
if (cmd_len >= 8) { memcpy(__get_dynamic_array(raw_cmd), cmd_va, cmd_len * sizeof(*cmd_va));
snprintf(__entry->tmp_buf, MAX_CMD_STR_LEN, "%08x %08x %08x %08x %08x %08x %08x %08x ", ),
cmd_va[__entry->i], cmd_va[__entry->i+1], cmd_va[__entry->i+2], cmd_va[__entry->i+3],
cmd_va[__entry->i+4], cmd_va[__entry->i+5], cmd_va[__entry->i+6], cmd_va[__entry->i+7]);
__entry->i += 8; TP_printk("vgpu%d ring %d: buf_type %u, ip_gma %08x, raw cmd %s",
cmd_len -= 8; __entry->vgpu_id,
strcat(__entry->cmd_str, __entry->tmp_buf); __entry->ring_id,
} else if (cmd_len >= 4) { __entry->buf_type,
snprintf(__entry->tmp_buf, MAX_CMD_STR_LEN, "%08x %08x %08x %08x ", __entry->ip_gma,
cmd_va[__entry->i], cmd_va[__entry->i+1], cmd_va[__entry->i+2], cmd_va[__entry->i+3]); __print_array(__get_dynamic_array(raw_cmd), __entry->cmd_len, 4))
__entry->i += 4; );
cmd_len -= 4;
strcat(__entry->cmd_str, __entry->tmp_buf); #define GVT_TEMP_STR_LEN 10
} else if (cmd_len >= 2) { TRACE_EVENT(write_ir,
snprintf(__entry->tmp_buf, MAX_CMD_STR_LEN, "%08x %08x ", cmd_va[__entry->i], cmd_va[__entry->i+1]); TP_PROTO(int id, char *reg_name, unsigned int reg, unsigned int new_val,
__entry->i += 2; unsigned int old_val, bool changed),
cmd_len -= 2;
strcat(__entry->cmd_str, __entry->tmp_buf); TP_ARGS(id, reg_name, reg, new_val, old_val, changed),
} else if (cmd_len == 1) {
snprintf(__entry->tmp_buf, MAX_CMD_STR_LEN, "%08x ", cmd_va[__entry->i]); TP_STRUCT__entry(
__entry->i += 1; __field(int, id)
cmd_len -= 1; __array(char, buf, GVT_TEMP_STR_LEN)
strcat(__entry->cmd_str, __entry->tmp_buf); __field(unsigned int, reg)
} __field(unsigned int, new_val)
} __field(unsigned int, old_val)
strcat(__entry->cmd_str, "\n"); __field(bool, changed)
), ),
TP_fast_assign(
__entry->id = id;
snprintf(__entry->buf, GVT_TEMP_STR_LEN, "%s", reg_name);
__entry->reg = reg;
__entry->new_val = new_val;
__entry->old_val = old_val;
__entry->changed = changed;
),
TP_printk("VM%u write [%s] %x, new %08x, old %08x, changed %08x\n",
__entry->id, __entry->buf, __entry->reg, __entry->new_val,
__entry->old_val, __entry->changed)
);
TRACE_EVENT(propagate_event,
TP_PROTO(int id, const char *irq_name, int bit),
TP_ARGS(id, irq_name, bit),
TP_STRUCT__entry(
__field(int, id)
__array(char, buf, GVT_TEMP_STR_LEN)
__field(int, bit)
),
TP_printk("%s", __entry->cmd_str) TP_fast_assign(
__entry->id = id;
snprintf(__entry->buf, GVT_TEMP_STR_LEN, "%s", irq_name);
__entry->bit = bit;
),
TP_printk("Set bit (%d) for (%s) for vgpu (%d)\n",
__entry->bit, __entry->buf, __entry->id)
); );
TRACE_EVENT(inject_msi,
TP_PROTO(int id, unsigned int address, unsigned int data),
TP_ARGS(id, address, data),
TP_STRUCT__entry(
__field(int, id)
__field(unsigned int, address)
__field(unsigned int, data)
),
TP_fast_assign(
__entry->id = id;
__entry->address = address;
__entry->data = data;
),
TP_printk("vgpu%d:inject msi address %x data %x\n",
__entry->id, __entry->address, __entry->data)
);
TRACE_EVENT(render_mmio,
TP_PROTO(int id, char *action, unsigned int reg,
unsigned int old_val, unsigned int new_val),
TP_ARGS(id, action, reg, new_val, old_val),
TP_STRUCT__entry(
__field(int, id)
__array(char, buf, GVT_TEMP_STR_LEN)
__field(unsigned int, reg)
__field(unsigned int, old_val)
__field(unsigned int, new_val)
),
TP_fast_assign(
__entry->id = id;
snprintf(__entry->buf, GVT_TEMP_STR_LEN, "%s", action);
__entry->reg = reg;
__entry->old_val = old_val;
__entry->new_val = new_val;
),
TP_printk("VM%u %s reg %x, old %08x new %08x\n",
__entry->id, __entry->buf, __entry->reg,
__entry->old_val, __entry->new_val)
);
#endif /* _GVT_TRACE_H_ */ #endif /* _GVT_TRACE_H_ */
/* This part must be out of protection */ /* This part must be out of protection */
......
...@@ -501,9 +501,14 @@ void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr, ...@@ -501,9 +501,14 @@ void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr,
/* full GPU reset or device model level reset */ /* full GPU reset or device model level reset */
if (engine_mask == ALL_ENGINES || dmlr) { if (engine_mask == ALL_ENGINES || dmlr) {
intel_vgpu_reset_gtt(vgpu, dmlr); intel_vgpu_reset_gtt(vgpu, dmlr);
intel_vgpu_reset_resource(vgpu);
intel_vgpu_reset_mmio(vgpu); /*fence will not be reset during virtual reset */
if (dmlr)
intel_vgpu_reset_resource(vgpu);
intel_vgpu_reset_mmio(vgpu, dmlr);
populate_pvinfo_page(vgpu); populate_pvinfo_page(vgpu);
intel_vgpu_reset_display(vgpu); intel_vgpu_reset_display(vgpu);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment