Commit 305b9edd authored by Dave Airlie's avatar Dave Airlie

Merge tag 'drm-intel-next-2017-06-19' of git://anongit.freedesktop.org/git/drm-intel into drm-next

Final pile of features for 4.13

New uabi:
- batch bo in first slot, for faster execbuf assembly in userspace
  (Chris Wilson)
- (sub)slice getparam, needed for mesa perf support (Robert Bragg)

First pile of patches for cnl/cfl support, maintained by Rodrigo but
with lots of contributions from others. Still incomplete since public
review still ongoing.

Features/refactoring:
- Make execbuf faster (Chris Wilson), a pile of series to make execbuf
  buffer handling have fewer passes, use less list walking, postpone
  more work to async workers and shuffle buffers less, all to make the
  common case much faster (in some cases at least).
- cold boot support for glk dsi (Madhav Chauhan)
- Clean up pipe A quirk and related old platform hacks (Ville)
- perf sampling support for kbl/glk (Lionel)
- perf cleanups (Robert Bragg)
- wire atomic state to backlight code, to avoid pipe lookup hacks
  (Maarten)
- reduce request waiting latency/overhead to remove the spinning and
  associated cpu cycle wasting (Chris)
- fix 90/270 rotation wm computation (Ville)
- new ddb allocation algo for skl (Kumar Mahesh)
- fix regression due to system suspend optimiazatino (Imre)
- the usual pile of small cleanups and refactors all over

GVT updates contained in this tag:
- optimization for per-VM mmio save/restore (Changbin)
- optimization for mmio hash table (Changbin)
- scheduler optimization with event (Ping)
- vGPU reset refinement (Fred)
- other misc refactor and cleanups, etc.

* tag 'drm-intel-next-2017-06-19' of git://anongit.freedesktop.org/git/drm-intel: (170 commits)
  drm/i915: Update DRIVER_DATE to 20170619
  drm/i915/cfl: Introduce Coffee Lake workarounds.
  drm/i915: Store 9 bits of PCI Device ID for platforms with a LP PCH
  drm/i915: Stash a pointer to the obj's resv in the vma
  drm/i915: Async GPU relocation processing
  drm/i915: Allow execbuffer to use the first object as the batch
  drm/i915: Wait upon userptr get-user-pages within execbuffer
  drm/i915: First try the previous execbuffer location
  drm/i915: Store a persistent reference for an object in the execbuffer cache
  drm/i915: Eliminate lots of iterations over the execobjects array
  drm/i915: Disable EXEC_OBJECT_ASYNC when doing relocations
  drm/i915: Pass vma to relocate entry
  drm/i915: Store a direct lookup from object handle to vma
  drm/i915: Fix retrieval of hangcheck stats
  drm/i915: Store i915_gem_object_is_coherent() as a bit next to cache-dirty
  drm/i915: Mark CPU cache as dirty on every transition for CPU writes
  drm/i915: Make i915_vma_destroy() static
  drm/i915: Actually attach the tv_format property to the SDVO connector
  Revert "drm/i915/skl: New ddb allocation algorithm"
  drm/i915/glk: Add cold boot sequence for GLK DSI
  ...
parents eafae133 9ddb8e17
......@@ -129,7 +129,16 @@ i915-y += i915_vgpu.o
# perf code
i915-y += i915_perf.o \
i915_oa_hsw.o
i915_oa_hsw.o \
i915_oa_bdw.o \
i915_oa_chv.o \
i915_oa_sklgt2.o \
i915_oa_sklgt3.o \
i915_oa_sklgt4.o \
i915_oa_bxt.o \
i915_oa_kblgt2.o \
i915_oa_kblgt3.o \
i915_oa_glk.o
ifeq ($(CONFIG_DRM_I915_GVT),y)
i915-y += intel_gvt.o
......
......@@ -217,9 +217,8 @@ static bool ch7xxx_init(struct intel_dvo_device *dvo,
name = ch7xxx_get_id(vendor);
if (!name) {
DRM_DEBUG_KMS("ch7xxx not detected; got 0x%02x from %s "
"slave %d.\n",
vendor, adapter->name, dvo->slave_addr);
DRM_DEBUG_KMS("ch7xxx not detected; got VID 0x%02x from %s slave %d.\n",
vendor, adapter->name, dvo->slave_addr);
goto out;
}
......@@ -229,9 +228,8 @@ static bool ch7xxx_init(struct intel_dvo_device *dvo,
devid = ch7xxx_get_did(device);
if (!devid) {
DRM_DEBUG_KMS("ch7xxx not detected; got 0x%02x from %s "
"slave %d.\n",
vendor, adapter->name, dvo->slave_addr);
DRM_DEBUG_KMS("ch7xxx not detected; got DID 0x%02x from %s slave %d.\n",
device, adapter->name, dvo->slave_addr);
goto out;
}
......
......@@ -3,6 +3,6 @@ GVT_SOURCE := gvt.o aperture_gm.o handlers.o vgpu.o trace_points.o firmware.o \
interrupt.o gtt.o cfg_space.o opregion.o mmio.o display.o edid.o \
execlist.o scheduler.o sched_policy.o render.o cmd_parser.o
ccflags-y += -I$(src) -I$(src)/$(GVT_DIR) -Wall
ccflags-y += -I$(src) -I$(src)/$(GVT_DIR)
i915-y += $(addprefix $(GVT_DIR)/, $(GVT_SOURCE))
obj-$(CONFIG_DRM_I915_GVT_KVMGT) += $(GVT_DIR)/kvmgt.o
......@@ -2414,53 +2414,13 @@ static void add_cmd_entry(struct intel_gvt *gvt, struct cmd_entry *e)
hash_add(gvt->cmd_table, &e->hlist, e->info->opcode);
}
#define GVT_MAX_CMD_LENGTH 20 /* In Dword */
static void trace_cs_command(struct parser_exec_state *s,
cycles_t cost_pre_cmd_handler, cycles_t cost_cmd_handler)
{
/* This buffer is used by ftrace to store all commands copied from
* guest gma space. Sometimes commands can cross pages, this should
* not be handled in ftrace logic. So this is just used as a
* 'bounce buffer'
*/
u32 cmd_trace_buf[GVT_MAX_CMD_LENGTH];
int i;
u32 cmd_len = cmd_length(s);
/* The chosen value of GVT_MAX_CMD_LENGTH are just based on
* following two considerations:
* 1) From observation, most common ring commands is not that long.
* But there are execeptions. So it indeed makes sence to observe
* longer commands.
* 2) From the performance and debugging point of view, dumping all
* contents of very commands is not necessary.
* We mgith shrink GVT_MAX_CMD_LENGTH or remove this trace event in
* future for performance considerations.
*/
if (unlikely(cmd_len > GVT_MAX_CMD_LENGTH)) {
gvt_dbg_cmd("cmd length exceed tracing limitation!\n");
cmd_len = GVT_MAX_CMD_LENGTH;
}
for (i = 0; i < cmd_len; i++)
cmd_trace_buf[i] = cmd_val(s, i);
trace_gvt_command(s->vgpu->id, s->ring_id, s->ip_gma, cmd_trace_buf,
cmd_len, s->buf_type == RING_BUFFER_INSTRUCTION,
cost_pre_cmd_handler, cost_cmd_handler);
}
/* call the cmd handler, and advance ip */
static int cmd_parser_exec(struct parser_exec_state *s)
{
struct intel_vgpu *vgpu = s->vgpu;
struct cmd_info *info;
u32 cmd;
int ret = 0;
cycles_t t0, t1, t2;
struct parser_exec_state s_before_advance_custom;
struct intel_vgpu *vgpu = s->vgpu;
t0 = get_cycles();
cmd = cmd_val(s, 0);
......@@ -2471,13 +2431,10 @@ static int cmd_parser_exec(struct parser_exec_state *s)
return -EINVAL;
}
gvt_dbg_cmd("%s\n", info->name);
s->info = info;
t1 = get_cycles();
s_before_advance_custom = *s;
trace_gvt_command(vgpu->id, s->ring_id, s->ip_gma, s->ip_va,
cmd_length(s), s->buf_type);
if (info->handler) {
ret = info->handler(s);
......@@ -2486,9 +2443,6 @@ static int cmd_parser_exec(struct parser_exec_state *s)
return ret;
}
}
t2 = get_cycles();
trace_cs_command(&s_before_advance_custom, t1 - t0, t2 - t1);
if (!(info->flag & F_IP_ADVANCE_CUSTOM)) {
ret = cmd_advance_default(s);
......@@ -2522,8 +2476,6 @@ static int command_scan(struct parser_exec_state *s,
gma_tail = rb_start + rb_tail;
gma_bottom = rb_start + rb_len;
gvt_dbg_cmd("scan_start: start=%lx end=%lx\n", gma_head, gma_tail);
while (s->ip_gma != gma_tail) {
if (s->buf_type == RING_BUFFER_INSTRUCTION) {
if (!(s->ip_gma >= rb_start) ||
......@@ -2552,8 +2504,6 @@ static int command_scan(struct parser_exec_state *s,
}
}
gvt_dbg_cmd("scan_end\n");
return ret;
}
......
......@@ -708,53 +708,43 @@ static int submit_context(struct intel_vgpu *vgpu, int ring_id,
int intel_vgpu_submit_execlist(struct intel_vgpu *vgpu, int ring_id)
{
struct intel_vgpu_execlist *execlist = &vgpu->execlist[ring_id];
struct execlist_ctx_descriptor_format *desc[2], valid_desc[2];
unsigned long valid_desc_bitmap = 0;
bool emulate_schedule_in = true;
int ret;
int i;
struct execlist_ctx_descriptor_format desc[2];
int i, ret;
memset(valid_desc, 0, sizeof(valid_desc));
desc[0] = *get_desc_from_elsp_dwords(&execlist->elsp_dwords, 1);
desc[1] = *get_desc_from_elsp_dwords(&execlist->elsp_dwords, 0);
desc[0] = get_desc_from_elsp_dwords(&execlist->elsp_dwords, 1);
desc[1] = get_desc_from_elsp_dwords(&execlist->elsp_dwords, 0);
if (!desc[0].valid) {
gvt_vgpu_err("invalid elsp submission, desc0 is invalid\n");
goto inv_desc;
}
for (i = 0; i < 2; i++) {
if (!desc[i]->valid)
for (i = 0; i < ARRAY_SIZE(desc); i++) {
if (!desc[i].valid)
continue;
if (!desc[i]->privilege_access) {
if (!desc[i].privilege_access) {
gvt_vgpu_err("unexpected GGTT elsp submission\n");
return -EINVAL;
goto inv_desc;
}
/* TODO: add another guest context checks here. */
set_bit(i, &valid_desc_bitmap);
valid_desc[i] = *desc[i];
}
if (!valid_desc_bitmap) {
gvt_vgpu_err("no valid desc in a elsp submission\n");
return -EINVAL;
}
if (!test_bit(0, (void *)&valid_desc_bitmap) &&
test_bit(1, (void *)&valid_desc_bitmap)) {
gvt_vgpu_err("weird elsp submission, desc 0 is not valid\n");
return -EINVAL;
}
/* submit workload */
for_each_set_bit(i, (void *)&valid_desc_bitmap, 2) {
ret = submit_context(vgpu, ring_id, &valid_desc[i],
emulate_schedule_in);
for (i = 0; i < ARRAY_SIZE(desc); i++) {
if (!desc[i].valid)
continue;
ret = submit_context(vgpu, ring_id, &desc[i], i == 0);
if (ret) {
gvt_vgpu_err("fail to schedule workload\n");
gvt_vgpu_err("failed to submit desc %d\n", i);
return ret;
}
emulate_schedule_in = false;
}
return 0;
inv_desc:
gvt_vgpu_err("descriptors content: desc0 %08x %08x desc1 %08x %08x\n",
desc[0].udw, desc[0].ldw, desc[1].udw, desc[1].ldw);
return -EINVAL;
}
static void init_vgpu_execlist(struct intel_vgpu *vgpu, int ring_id)
......
......@@ -102,13 +102,8 @@ static int expose_firmware_sysfs(struct intel_gvt *gvt)
p = firmware + h->mmio_offset;
hash_for_each(gvt->mmio.mmio_info_table, i, e, node) {
int j;
for (j = 0; j < e->length; j += 4)
*(u32 *)(p + e->offset + j) =
I915_READ_NOTRACE(_MMIO(e->offset + j));
}
hash_for_each(gvt->mmio.mmio_info_table, i, e, node)
*(u32 *)(p + e->offset) = I915_READ_NOTRACE(_MMIO(e->offset));
memcpy(gvt->firmware.mmio, p, info->mmio_size);
......
......@@ -244,15 +244,19 @@ static u64 read_pte64(struct drm_i915_private *dev_priv, unsigned long index)
return readq(addr);
}
static void gtt_invalidate(struct drm_i915_private *dev_priv)
{
mmio_hw_access_pre(dev_priv);
I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
mmio_hw_access_post(dev_priv);
}
static void write_pte64(struct drm_i915_private *dev_priv,
unsigned long index, u64 pte)
{
void __iomem *addr = (gen8_pte_t __iomem *)dev_priv->ggtt.gsm + index;
writeq(pte, addr);
I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
POSTING_READ(GFX_FLSH_CNTL_GEN6);
}
static inline struct intel_gvt_gtt_entry *gtt_get_entry64(void *pt,
......@@ -1849,6 +1853,7 @@ static int emulate_gtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
}
ggtt_set_shadow_entry(ggtt_mm, &m, g_gtt_index);
gtt_invalidate(gvt->dev_priv);
ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index);
return 0;
}
......@@ -2301,8 +2306,6 @@ void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu)
u32 num_entries;
struct intel_gvt_gtt_entry e;
intel_runtime_pm_get(dev_priv);
memset(&e, 0, sizeof(struct intel_gvt_gtt_entry));
e.type = GTT_TYPE_GGTT_PTE;
ops->set_pfn(&e, gvt->gtt.scratch_ggtt_mfn);
......@@ -2318,7 +2321,7 @@ void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu)
for (offset = 0; offset < num_entries; offset++)
ops->set_entry(NULL, &e, index + offset, false, 0, vgpu);
intel_runtime_pm_put(dev_priv);
gtt_invalidate(dev_priv);
}
/**
......
......@@ -147,7 +147,9 @@ static int gvt_service_thread(void *data)
mutex_unlock(&gvt->lock);
}
if (test_and_clear_bit(INTEL_GVT_REQUEST_SCHED,
if (test_bit(INTEL_GVT_REQUEST_SCHED,
(void *)&gvt->service_request) ||
test_bit(INTEL_GVT_REQUEST_EVENT_SCHED,
(void *)&gvt->service_request)) {
intel_gvt_schedule(gvt);
}
......@@ -244,7 +246,7 @@ int intel_gvt_init_device(struct drm_i915_private *dev_priv)
gvt_dbg_core("init gvt device\n");
idr_init(&gvt->vgpu_idr);
spin_lock_init(&gvt->scheduler.mmio_context_lock);
mutex_init(&gvt->lock);
gvt->dev_priv = dev_priv;
......
......@@ -165,7 +165,6 @@ struct intel_vgpu {
struct list_head workload_q_head[I915_NUM_ENGINES];
struct kmem_cache *workloads;
atomic_t running_workload_num;
ktime_t last_ctx_submit_time;
DECLARE_BITMAP(tlb_handle_pending, I915_NUM_ENGINES);
struct i915_gem_context *shadow_ctx;
......@@ -196,11 +195,27 @@ struct intel_gvt_fence {
unsigned long vgpu_allocated_fence_num;
};
#define INTEL_GVT_MMIO_HASH_BITS 9
#define INTEL_GVT_MMIO_HASH_BITS 11
struct intel_gvt_mmio {
u32 *mmio_attribute;
u8 *mmio_attribute;
/* Register contains RO bits */
#define F_RO (1 << 0)
/* Register contains graphics address */
#define F_GMADR (1 << 1)
/* Mode mask registers with high 16 bits as the mask bits */
#define F_MODE_MASK (1 << 2)
/* This reg can be accessed by GPU commands */
#define F_CMD_ACCESS (1 << 3)
/* This reg has been accessed by a VM */
#define F_ACCESSED (1 << 4)
/* This reg has been accessed through GPU commands */
#define F_CMD_ACCESSED (1 << 5)
/* This reg could be accessed by unaligned address */
#define F_UNALIGN (1 << 6)
DECLARE_HASHTABLE(mmio_info_table, INTEL_GVT_MMIO_HASH_BITS);
unsigned int num_tracked_mmio;
};
struct intel_gvt_firmware {
......@@ -257,7 +272,12 @@ static inline struct intel_gvt *to_gvt(struct drm_i915_private *i915)
enum {
INTEL_GVT_REQUEST_EMULATE_VBLANK = 0,
/* Scheduling trigger by timer */
INTEL_GVT_REQUEST_SCHED = 1,
/* Scheduling trigger by event */
INTEL_GVT_REQUEST_EVENT_SCHED = 2,
};
static inline void intel_gvt_request_service(struct intel_gvt *gvt,
......@@ -473,6 +493,80 @@ enum {
GVT_FAILSAFE_INSUFFICIENT_RESOURCE,
};
static inline void mmio_hw_access_pre(struct drm_i915_private *dev_priv)
{
intel_runtime_pm_get(dev_priv);
}
static inline void mmio_hw_access_post(struct drm_i915_private *dev_priv)
{
intel_runtime_pm_put(dev_priv);
}
/**
* intel_gvt_mmio_set_accessed - mark a MMIO has been accessed
* @gvt: a GVT device
* @offset: register offset
*
*/
static inline void intel_gvt_mmio_set_accessed(
struct intel_gvt *gvt, unsigned int offset)
{
gvt->mmio.mmio_attribute[offset >> 2] |= F_ACCESSED;
}
/**
* intel_gvt_mmio_is_cmd_accessed - mark a MMIO could be accessed by command
* @gvt: a GVT device
* @offset: register offset
*
*/
static inline bool intel_gvt_mmio_is_cmd_access(
struct intel_gvt *gvt, unsigned int offset)
{
return gvt->mmio.mmio_attribute[offset >> 2] & F_CMD_ACCESS;
}
/**
* intel_gvt_mmio_is_unalign - mark a MMIO could be accessed unaligned
* @gvt: a GVT device
* @offset: register offset
*
*/
static inline bool intel_gvt_mmio_is_unalign(
struct intel_gvt *gvt, unsigned int offset)
{
return gvt->mmio.mmio_attribute[offset >> 2] & F_UNALIGN;
}
/**
* intel_gvt_mmio_set_cmd_accessed - mark a MMIO has been accessed by command
* @gvt: a GVT device
* @offset: register offset
*
*/
static inline void intel_gvt_mmio_set_cmd_accessed(
struct intel_gvt *gvt, unsigned int offset)
{
gvt->mmio.mmio_attribute[offset >> 2] |= F_CMD_ACCESSED;
}
/**
* intel_gvt_mmio_has_mode_mask - if a MMIO has a mode mask
* @gvt: a GVT device
* @offset: register offset
*
* Returns:
* True if a MMIO has a mode mask in its higher 16 bits, false if it isn't.
*
*/
static inline bool intel_gvt_mmio_has_mode_mask(
struct intel_gvt *gvt, unsigned int offset)
{
return gvt->mmio.mmio_attribute[offset >> 2] & F_MODE_MASK;
}
#include "trace.h"
#include "mpt.h"
#endif
This diff is collapsed.
......@@ -31,6 +31,7 @@
#include "i915_drv.h"
#include "gvt.h"
#include "trace.h"
/* common offset among interrupt control registers */
#define regbase_to_isr(base) (base)
......@@ -178,8 +179,8 @@ int intel_vgpu_reg_imr_handler(struct intel_vgpu *vgpu,
struct intel_gvt_irq_ops *ops = gvt->irq.ops;
u32 imr = *(u32 *)p_data;
gvt_dbg_irq("write IMR %x, new %08x, old %08x, changed %08x\n",
reg, imr, vgpu_vreg(vgpu, reg), vgpu_vreg(vgpu, reg) ^ imr);
trace_write_ir(vgpu->id, "IMR", reg, imr, vgpu_vreg(vgpu, reg),
(vgpu_vreg(vgpu, reg) ^ imr));
vgpu_vreg(vgpu, reg) = imr;
......@@ -209,8 +210,8 @@ int intel_vgpu_reg_master_irq_handler(struct intel_vgpu *vgpu,
u32 ier = *(u32 *)p_data;
u32 virtual_ier = vgpu_vreg(vgpu, reg);
gvt_dbg_irq("write MASTER_IRQ %x, new %08x, old %08x, changed %08x\n",
reg, ier, virtual_ier, virtual_ier ^ ier);
trace_write_ir(vgpu->id, "MASTER_IRQ", reg, ier, virtual_ier,
(virtual_ier ^ ier));
/*
* GEN8_MASTER_IRQ is a special irq register,
......@@ -248,8 +249,8 @@ int intel_vgpu_reg_ier_handler(struct intel_vgpu *vgpu,
struct intel_gvt_irq_info *info;
u32 ier = *(u32 *)p_data;
gvt_dbg_irq("write IER %x, new %08x, old %08x, changed %08x\n",
reg, ier, vgpu_vreg(vgpu, reg), vgpu_vreg(vgpu, reg) ^ ier);
trace_write_ir(vgpu->id, "IER", reg, ier, vgpu_vreg(vgpu, reg),
(vgpu_vreg(vgpu, reg) ^ ier));
vgpu_vreg(vgpu, reg) = ier;
......@@ -285,8 +286,8 @@ int intel_vgpu_reg_iir_handler(struct intel_vgpu *vgpu, unsigned int reg,
iir_to_regbase(reg));
u32 iir = *(u32 *)p_data;
gvt_dbg_irq("write IIR %x, new %08x, old %08x, changed %08x\n",
reg, iir, vgpu_vreg(vgpu, reg), vgpu_vreg(vgpu, reg) ^ iir);
trace_write_ir(vgpu->id, "IIR", reg, iir, vgpu_vreg(vgpu, reg),
(vgpu_vreg(vgpu, reg) ^ iir));
if (WARN_ON(!info))
return -EINVAL;
......@@ -411,8 +412,7 @@ static void propagate_event(struct intel_gvt_irq *irq,
if (!test_bit(bit, (void *)&vgpu_vreg(vgpu,
regbase_to_imr(reg_base)))) {
gvt_dbg_irq("set bit (%d) for (%s) for vgpu (%d)\n",
bit, irq_name[event], vgpu->id);
trace_propagate_event(vgpu->id, irq_name[event], bit);
set_bit(bit, (void *)&vgpu_vreg(vgpu,
regbase_to_iir(reg_base)));
}
......
......@@ -123,7 +123,6 @@ int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, uint64_t pa,
void *p_data, unsigned int bytes)
{
struct intel_gvt *gvt = vgpu->gvt;
struct intel_gvt_mmio_info *mmio;
unsigned int offset = 0;
int ret = -EINVAL;
......@@ -187,32 +186,8 @@ int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, uint64_t pa,
goto err;
}
mmio = intel_gvt_find_mmio_info(gvt, rounddown(offset, 4));
if (mmio) {
if (!intel_gvt_mmio_is_unalign(gvt, mmio->offset)) {
if (WARN_ON(offset + bytes > mmio->offset + mmio->size))
goto err;
if (WARN_ON(mmio->offset != offset))
goto err;
}
ret = mmio->read(vgpu, offset, p_data, bytes);
} else {
ret = intel_vgpu_default_mmio_read(vgpu, offset, p_data, bytes);
if (!vgpu->mmio.disable_warn_untrack) {
gvt_vgpu_err("read untracked MMIO %x(%dB) val %x\n",
offset, bytes, *(u32 *)p_data);
if (offset == 0x206c) {
gvt_vgpu_err("------------------------------------------\n");
gvt_vgpu_err("likely triggers a gfx reset\n");
gvt_vgpu_err("------------------------------------------\n");
vgpu->mmio.disable_warn_untrack = true;
}
}
}
if (ret)
ret = intel_vgpu_mmio_reg_rw(vgpu, offset, p_data, bytes, true);
if (ret < 0)
goto err;
intel_gvt_mmio_set_accessed(gvt, offset);
......@@ -239,9 +214,7 @@ int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, uint64_t pa,
void *p_data, unsigned int bytes)
{
struct intel_gvt *gvt = vgpu->gvt;
struct intel_gvt_mmio_info *mmio;
unsigned int offset = 0;
u32 old_vreg = 0, old_sreg = 0;
int ret = -EINVAL;
if (vgpu->failsafe) {
......@@ -296,66 +269,10 @@ int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, uint64_t pa,
return ret;
}
mmio = intel_gvt_find_mmio_info(gvt, rounddown(offset, 4));
if (!mmio && !vgpu->mmio.disable_warn_untrack)
gvt_dbg_mmio("vgpu%d: write untracked MMIO %x len %d val %x\n",
vgpu->id, offset, bytes, *(u32 *)p_data);
if (!intel_gvt_mmio_is_unalign(gvt, offset)) {
if (WARN_ON(!IS_ALIGNED(offset, bytes)))
goto err;
}
if (mmio) {
u64 ro_mask = mmio->ro_mask;
if (!intel_gvt_mmio_is_unalign(gvt, mmio->offset)) {
if (WARN_ON(offset + bytes > mmio->offset + mmio->size))
goto err;
if (WARN_ON(mmio->offset != offset))
goto err;
}
if (intel_gvt_mmio_has_mode_mask(gvt, mmio->offset)) {
old_vreg = vgpu_vreg(vgpu, offset);
old_sreg = vgpu_sreg(vgpu, offset);
}
if (!ro_mask) {
ret = mmio->write(vgpu, offset, p_data, bytes);
} else {
/* Protect RO bits like HW */
u64 data = 0;
/* all register bits are RO. */
if (ro_mask == ~(u64)0) {
gvt_vgpu_err("try to write RO reg %x\n",
offset);
ret = 0;
goto out;
}
/* keep the RO bits in the virtual register */
memcpy(&data, p_data, bytes);
data &= ~mmio->ro_mask;
data |= vgpu_vreg(vgpu, offset) & mmio->ro_mask;
ret = mmio->write(vgpu, offset, &data, bytes);
}
/* higher 16bits of mode ctl regs are mask bits for change */
if (intel_gvt_mmio_has_mode_mask(gvt, mmio->offset)) {
u32 mask = vgpu_vreg(vgpu, offset) >> 16;
vgpu_vreg(vgpu, offset) = (old_vreg & ~mask)
| (vgpu_vreg(vgpu, offset) & mask);
vgpu_sreg(vgpu, offset) = (old_sreg & ~mask)
| (vgpu_sreg(vgpu, offset) & mask);
}
} else
ret = intel_vgpu_default_mmio_write(vgpu, offset, p_data,
bytes);
if (ret)
ret = intel_vgpu_mmio_reg_rw(vgpu, offset, p_data, bytes, false);
if (ret < 0)
goto err;
out:
intel_gvt_mmio_set_accessed(gvt, offset);
mutex_unlock(&gvt->lock);
return 0;
......@@ -372,20 +289,32 @@ int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, uint64_t pa,
* @vgpu: a vGPU
*
*/
void intel_vgpu_reset_mmio(struct intel_vgpu *vgpu)
void intel_vgpu_reset_mmio(struct intel_vgpu *vgpu, bool dmlr)
{
struct intel_gvt *gvt = vgpu->gvt;
const struct intel_gvt_device_info *info = &gvt->device_info;
void *mmio = gvt->firmware.mmio;
if (dmlr) {
memcpy(vgpu->mmio.vreg, mmio, info->mmio_size);
memcpy(vgpu->mmio.sreg, mmio, info->mmio_size);
memcpy(vgpu->mmio.vreg, gvt->firmware.mmio, info->mmio_size);
memcpy(vgpu->mmio.sreg, gvt->firmware.mmio, info->mmio_size);
vgpu_vreg(vgpu, GEN6_GT_THREAD_STATUS_REG) = 0;
vgpu_vreg(vgpu, GEN6_GT_THREAD_STATUS_REG) = 0;
/* set the bit 0:2(Core C-State ) to C0 */
vgpu_vreg(vgpu, GEN6_GT_CORE_STATUS) = 0;
/* set the bit 0:2(Core C-State ) to C0 */
vgpu_vreg(vgpu, GEN6_GT_CORE_STATUS) = 0;
vgpu->mmio.disable_warn_untrack = false;
} else {
#define GVT_GEN8_MMIO_RESET_OFFSET (0x44200)
/* only reset the engine related, so starting with 0x44200
* interrupt include DE,display mmio related will not be
* touched
*/
memcpy(vgpu->mmio.vreg, mmio, GVT_GEN8_MMIO_RESET_OFFSET);
memcpy(vgpu->mmio.sreg, mmio, GVT_GEN8_MMIO_RESET_OFFSET);
}
vgpu->mmio.disable_warn_untrack = false;
}
/**
......@@ -405,7 +334,7 @@ int intel_vgpu_init_mmio(struct intel_vgpu *vgpu)
vgpu->mmio.sreg = vgpu->mmio.vreg + info->mmio_size;
intel_vgpu_reset_mmio(vgpu);
intel_vgpu_reset_mmio(vgpu, true);
return 0;
}
......
......@@ -39,36 +39,28 @@
struct intel_gvt;
struct intel_vgpu;
#define D_SNB (1 << 0)
#define D_IVB (1 << 1)
#define D_HSW (1 << 2)
#define D_BDW (1 << 3)
#define D_SKL (1 << 4)
#define D_KBL (1 << 5)
#define D_BDW (1 << 0)
#define D_SKL (1 << 1)
#define D_KBL (1 << 2)
#define D_GEN9PLUS (D_SKL | D_KBL)
#define D_GEN8PLUS (D_BDW | D_SKL | D_KBL)
#define D_GEN75PLUS (D_HSW | D_BDW | D_SKL | D_KBL)
#define D_GEN7PLUS (D_IVB | D_HSW | D_BDW | D_SKL | D_KBL)
#define D_SKL_PLUS (D_SKL | D_KBL)
#define D_BDW_PLUS (D_BDW | D_SKL | D_KBL)
#define D_HSW_PLUS (D_HSW | D_BDW | D_SKL | D_KBL)
#define D_IVB_PLUS (D_IVB | D_HSW | D_BDW | D_SKL | D_KBL)
#define D_PRE_BDW (D_SNB | D_IVB | D_HSW)
#define D_PRE_SKL (D_SNB | D_IVB | D_HSW | D_BDW)
#define D_ALL (D_SNB | D_IVB | D_HSW | D_BDW | D_SKL | D_KBL)
#define D_PRE_SKL (D_BDW)
#define D_ALL (D_BDW | D_SKL | D_KBL)
typedef int (*gvt_mmio_func)(struct intel_vgpu *, unsigned int, void *,
unsigned int);
struct intel_gvt_mmio_info {
u32 offset;
u32 size;
u32 length;
u32 addr_mask;
u64 ro_mask;
u32 device;
int (*read)(struct intel_vgpu *, unsigned int, void *, unsigned int);
int (*write)(struct intel_vgpu *, unsigned int, void *, unsigned int);
gvt_mmio_func read;
gvt_mmio_func write;
u32 addr_range;
struct hlist_node node;
};
......@@ -79,8 +71,6 @@ bool intel_gvt_match_device(struct intel_gvt *gvt, unsigned long device);
int intel_gvt_setup_mmio_info(struct intel_gvt *gvt);
void intel_gvt_clean_mmio_info(struct intel_gvt *gvt);
struct intel_gvt_mmio_info *intel_gvt_find_mmio_info(struct intel_gvt *gvt,
unsigned int offset);
#define INTEL_GVT_MMIO_OFFSET(reg) ({ \
typeof(reg) __reg = reg; \
u32 *offset = (u32 *)&__reg; \
......@@ -88,7 +78,7 @@ struct intel_gvt_mmio_info *intel_gvt_find_mmio_info(struct intel_gvt *gvt,
})
int intel_vgpu_init_mmio(struct intel_vgpu *vgpu);
void intel_vgpu_reset_mmio(struct intel_vgpu *vgpu);
void intel_vgpu_reset_mmio(struct intel_vgpu *vgpu, bool dmlr);
void intel_vgpu_clean_mmio(struct intel_vgpu *vgpu);
int intel_vgpu_gpa_to_mmio_offset(struct intel_vgpu *vgpu, u64 gpa);
......@@ -97,13 +87,7 @@ int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, u64 pa,
void *p_data, unsigned int bytes);
int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, u64 pa,
void *p_data, unsigned int bytes);
bool intel_gvt_mmio_is_cmd_access(struct intel_gvt *gvt,
unsigned int offset);
bool intel_gvt_mmio_is_unalign(struct intel_gvt *gvt, unsigned int offset);
void intel_gvt_mmio_set_accessed(struct intel_gvt *gvt, unsigned int offset);
void intel_gvt_mmio_set_cmd_accessed(struct intel_gvt *gvt,
unsigned int offset);
bool intel_gvt_mmio_has_mode_mask(struct intel_gvt *gvt, unsigned int offset);
int intel_vgpu_default_mmio_read(struct intel_vgpu *vgpu, unsigned int offset,
void *p_data, unsigned int bytes);
int intel_vgpu_default_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
......@@ -111,4 +95,8 @@ int intel_vgpu_default_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
bool intel_gvt_in_force_nonpriv_whitelist(struct intel_gvt *gvt,
unsigned int offset);
int intel_vgpu_mmio_reg_rw(struct intel_vgpu *vgpu, unsigned int offset,
void *pdata, unsigned int bytes, bool is_read);
#endif
......@@ -133,8 +133,7 @@ static inline int intel_gvt_hypervisor_inject_msi(struct intel_vgpu *vgpu)
if (WARN(control & GENMASK(15, 1), "only support one MSI format\n"))
return -EINVAL;
gvt_dbg_irq("vgpu%d: inject msi address %x data%x\n", vgpu->id, addr,
data);
trace_inject_msi(vgpu->id, addr, data);
ret = intel_gvt_host.mpt->inject_msi(vgpu->handle, addr, data);
if (ret)
......
......@@ -35,6 +35,7 @@
#include "i915_drv.h"
#include "gvt.h"
#include "trace.h"
struct render_mmio {
int ring_id;
......@@ -260,7 +261,8 @@ static void restore_mocs(struct intel_vgpu *vgpu, int ring_id)
#define CTX_CONTEXT_CONTROL_VAL 0x03
void intel_gvt_load_render_mmio(struct intel_vgpu *vgpu, int ring_id)
/* Switch ring mmio values (context) from host to a vgpu. */
static void switch_mmio_to_vgpu(struct intel_vgpu *vgpu, int ring_id)
{
struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
struct render_mmio *mmio;
......@@ -305,14 +307,15 @@ void intel_gvt_load_render_mmio(struct intel_vgpu *vgpu, int ring_id)
I915_WRITE(mmio->reg, v);
POSTING_READ(mmio->reg);
gvt_dbg_render("load reg %x old %x new %x\n",
i915_mmio_reg_offset(mmio->reg),
mmio->value, v);
trace_render_mmio(vgpu->id, "load",
i915_mmio_reg_offset(mmio->reg),
mmio->value, v);
}
handle_tlb_pending_event(vgpu, ring_id);
}
void intel_gvt_restore_render_mmio(struct intel_vgpu *vgpu, int ring_id)
/* Switch ring mmio values (context) from vgpu to host. */
static void switch_mmio_to_host(struct intel_vgpu *vgpu, int ring_id)
{
struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
struct render_mmio *mmio;
......@@ -346,8 +349,37 @@ void intel_gvt_restore_render_mmio(struct intel_vgpu *vgpu, int ring_id)
I915_WRITE(mmio->reg, v);
POSTING_READ(mmio->reg);
gvt_dbg_render("restore reg %x old %x new %x\n",
i915_mmio_reg_offset(mmio->reg),
mmio->value, v);
trace_render_mmio(vgpu->id, "restore",
i915_mmio_reg_offset(mmio->reg),
mmio->value, v);
}
}
/**
* intel_gvt_switch_render_mmio - switch mmio context of specific engine
* @pre: the last vGPU that own the engine
* @next: the vGPU to switch to
* @ring_id: specify the engine
*
* If pre is null indicates that host own the engine. If next is null
* indicates that we are switching to host workload.
*/
void intel_gvt_switch_mmio(struct intel_vgpu *pre,
struct intel_vgpu *next, int ring_id)
{
if (WARN_ON(!pre && !next))
return;
gvt_dbg_render("switch ring %d from %s to %s\n", ring_id,
pre ? "vGPU" : "host", next ? "vGPU" : "HOST");
/**
* TODO: Optimize for vGPU to vGPU switch by merging
* switch_mmio_to_host() and switch_mmio_to_vgpu().
*/
if (pre)
switch_mmio_to_host(pre, ring_id);
if (next)
switch_mmio_to_vgpu(next, ring_id);
}
......@@ -36,8 +36,8 @@
#ifndef __GVT_RENDER_H__
#define __GVT_RENDER_H__
void intel_gvt_load_render_mmio(struct intel_vgpu *vgpu, int ring_id);
void intel_gvt_switch_mmio(struct intel_vgpu *pre,
struct intel_vgpu *next, int ring_id);
void intel_gvt_restore_render_mmio(struct intel_vgpu *vgpu, int ring_id);
#endif
......@@ -202,11 +202,6 @@ static void tbs_sched_func(struct gvt_sched_data *sched_data)
struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
struct vgpu_sched_data *vgpu_data;
struct intel_vgpu *vgpu = NULL;
static uint64_t timer_check;
if (!(timer_check++ % GVT_TS_BALANCE_PERIOD_MS))
gvt_balance_timeslice(sched_data);
/* no active vgpu or has already had a target */
if (list_empty(&sched_data->lru_runq_head) || scheduler->next_vgpu)
goto out;
......@@ -231,9 +226,19 @@ static void tbs_sched_func(struct gvt_sched_data *sched_data)
void intel_gvt_schedule(struct intel_gvt *gvt)
{
struct gvt_sched_data *sched_data = gvt->scheduler.sched_data;
static uint64_t timer_check;
mutex_lock(&gvt->lock);
if (test_and_clear_bit(INTEL_GVT_REQUEST_SCHED,
(void *)&gvt->service_request)) {
if (!(timer_check++ % GVT_TS_BALANCE_PERIOD_MS))
gvt_balance_timeslice(sched_data);
}
clear_bit(INTEL_GVT_REQUEST_EVENT_SCHED, (void *)&gvt->service_request);
tbs_sched_func(sched_data);
mutex_unlock(&gvt->lock);
}
......@@ -303,8 +308,20 @@ static int tbs_sched_init_vgpu(struct intel_vgpu *vgpu)
static void tbs_sched_clean_vgpu(struct intel_vgpu *vgpu)
{
struct intel_gvt_workload_scheduler *scheduler = &vgpu->gvt->scheduler;
int ring_id;
kfree(vgpu->sched_data);
vgpu->sched_data = NULL;
spin_lock_bh(&scheduler->mmio_context_lock);
for (ring_id = 0; ring_id < I915_NUM_ENGINES; ring_id++) {
if (scheduler->engine_owner[ring_id] == vgpu) {
intel_gvt_switch_mmio(vgpu, NULL, ring_id);
scheduler->engine_owner[ring_id] = NULL;
}
}
spin_unlock_bh(&scheduler->mmio_context_lock);
}
static void tbs_sched_start_schedule(struct intel_vgpu *vgpu)
......
......@@ -138,21 +138,42 @@ static int shadow_context_status_change(struct notifier_block *nb,
struct intel_gvt *gvt = container_of(nb, struct intel_gvt,
shadow_ctx_notifier_block[req->engine->id]);
struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
struct intel_vgpu_workload *workload =
scheduler->current_workload[req->engine->id];
enum intel_engine_id ring_id = req->engine->id;
struct intel_vgpu_workload *workload;
if (!is_gvt_request(req)) {
spin_lock_bh(&scheduler->mmio_context_lock);
if (action == INTEL_CONTEXT_SCHEDULE_IN &&
scheduler->engine_owner[ring_id]) {
/* Switch ring from vGPU to host. */
intel_gvt_switch_mmio(scheduler->engine_owner[ring_id],
NULL, ring_id);
scheduler->engine_owner[ring_id] = NULL;
}
spin_unlock_bh(&scheduler->mmio_context_lock);
if (!is_gvt_request(req) || unlikely(!workload))
return NOTIFY_OK;
}
workload = scheduler->current_workload[ring_id];
if (unlikely(!workload))
return NOTIFY_OK;
switch (action) {
case INTEL_CONTEXT_SCHEDULE_IN:
intel_gvt_load_render_mmio(workload->vgpu,
workload->ring_id);
spin_lock_bh(&scheduler->mmio_context_lock);
if (workload->vgpu != scheduler->engine_owner[ring_id]) {
/* Switch ring from host to vGPU or vGPU to vGPU. */
intel_gvt_switch_mmio(scheduler->engine_owner[ring_id],
workload->vgpu, ring_id);
scheduler->engine_owner[ring_id] = workload->vgpu;
} else
gvt_dbg_sched("skip ring %d mmio switch for vgpu%d\n",
ring_id, workload->vgpu->id);
spin_unlock_bh(&scheduler->mmio_context_lock);
atomic_set(&workload->shadow_ctx_active, 1);
break;
case INTEL_CONTEXT_SCHEDULE_OUT:
intel_gvt_restore_render_mmio(workload->vgpu,
workload->ring_id);
/* If the status is -EINPROGRESS means this workload
* doesn't meet any issue during dispatching so when
* get the SCHEDULE_OUT set the status to be zero for
......@@ -431,6 +452,10 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id)
atomic_dec(&vgpu->running_workload_num);
wake_up(&scheduler->workload_complete_wq);
if (gvt->scheduler.need_reschedule)
intel_gvt_request_service(gvt, INTEL_GVT_REQUEST_EVENT_SCHED);
mutex_unlock(&gvt->lock);
}
......
......@@ -42,6 +42,10 @@ struct intel_gvt_workload_scheduler {
struct intel_vgpu_workload *current_workload[I915_NUM_ENGINES];
bool need_reschedule;
spinlock_t mmio_context_lock;
/* can be null when owner is host */
struct intel_vgpu *engine_owner[I915_NUM_ENGINES];
wait_queue_head_t workload_complete_wq;
struct task_struct *thread[I915_NUM_ENGINES];
wait_queue_head_t waitq[I915_NUM_ENGINES];
......
......@@ -224,58 +224,138 @@ TRACE_EVENT(oos_sync,
TP_printk("%s", __entry->buf)
);
#define MAX_CMD_STR_LEN 256
TRACE_EVENT(gvt_command,
TP_PROTO(u8 vm_id, u8 ring_id, u32 ip_gma, u32 *cmd_va, u32 cmd_len, bool ring_buffer_cmd, cycles_t cost_pre_cmd_handler, cycles_t cost_cmd_handler),
TP_ARGS(vm_id, ring_id, ip_gma, cmd_va, cmd_len, ring_buffer_cmd, cost_pre_cmd_handler, cost_cmd_handler),
TP_STRUCT__entry(
__field(u8, vm_id)
__field(u8, ring_id)
__field(int, i)
__array(char, tmp_buf, MAX_CMD_STR_LEN)
__array(char, cmd_str, MAX_CMD_STR_LEN)
),
TP_fast_assign(
__entry->vm_id = vm_id;
__entry->ring_id = ring_id;
__entry->cmd_str[0] = '\0';
snprintf(__entry->tmp_buf, MAX_CMD_STR_LEN, "VM(%d) Ring(%d): %s ip(%08x) pre handler cost (%llu), handler cost (%llu) ", vm_id, ring_id, ring_buffer_cmd ? "RB":"BB", ip_gma, cost_pre_cmd_handler, cost_cmd_handler);
strcat(__entry->cmd_str, __entry->tmp_buf);
entry->i = 0;
while (cmd_len > 0) {
if (cmd_len >= 8) {
snprintf(__entry->tmp_buf, MAX_CMD_STR_LEN, "%08x %08x %08x %08x %08x %08x %08x %08x ",
cmd_va[__entry->i], cmd_va[__entry->i+1], cmd_va[__entry->i+2], cmd_va[__entry->i+3],
cmd_va[__entry->i+4], cmd_va[__entry->i+5], cmd_va[__entry->i+6], cmd_va[__entry->i+7]);
__entry->i += 8;
cmd_len -= 8;
strcat(__entry->cmd_str, __entry->tmp_buf);
} else if (cmd_len >= 4) {
snprintf(__entry->tmp_buf, MAX_CMD_STR_LEN, "%08x %08x %08x %08x ",
cmd_va[__entry->i], cmd_va[__entry->i+1], cmd_va[__entry->i+2], cmd_va[__entry->i+3]);
__entry->i += 4;
cmd_len -= 4;
strcat(__entry->cmd_str, __entry->tmp_buf);
} else if (cmd_len >= 2) {
snprintf(__entry->tmp_buf, MAX_CMD_STR_LEN, "%08x %08x ", cmd_va[__entry->i], cmd_va[__entry->i+1]);
__entry->i += 2;
cmd_len -= 2;
strcat(__entry->cmd_str, __entry->tmp_buf);
} else if (cmd_len == 1) {
snprintf(__entry->tmp_buf, MAX_CMD_STR_LEN, "%08x ", cmd_va[__entry->i]);
__entry->i += 1;
cmd_len -= 1;
strcat(__entry->cmd_str, __entry->tmp_buf);
}
}
strcat(__entry->cmd_str, "\n");
),
TP_PROTO(u8 vgpu_id, u8 ring_id, u32 ip_gma, u32 *cmd_va, u32 cmd_len,
u32 buf_type),
TP_ARGS(vgpu_id, ring_id, ip_gma, cmd_va, cmd_len, buf_type),
TP_STRUCT__entry(
__field(u8, vgpu_id)
__field(u8, ring_id)
__field(u32, ip_gma)
__field(u32, buf_type)
__field(u32, cmd_len)
__dynamic_array(u32, raw_cmd, cmd_len)
),
TP_fast_assign(
__entry->vgpu_id = vgpu_id;
__entry->ring_id = ring_id;
__entry->ip_gma = ip_gma;
__entry->buf_type = buf_type;
__entry->cmd_len = cmd_len;
memcpy(__get_dynamic_array(raw_cmd), cmd_va, cmd_len * sizeof(*cmd_va));
),
TP_printk("vgpu%d ring %d: buf_type %u, ip_gma %08x, raw cmd %s",
__entry->vgpu_id,
__entry->ring_id,
__entry->buf_type,
__entry->ip_gma,
__print_array(__get_dynamic_array(raw_cmd), __entry->cmd_len, 4))
);
#define GVT_TEMP_STR_LEN 10
TRACE_EVENT(write_ir,
TP_PROTO(int id, char *reg_name, unsigned int reg, unsigned int new_val,
unsigned int old_val, bool changed),
TP_ARGS(id, reg_name, reg, new_val, old_val, changed),
TP_STRUCT__entry(
__field(int, id)
__array(char, buf, GVT_TEMP_STR_LEN)
__field(unsigned int, reg)
__field(unsigned int, new_val)
__field(unsigned int, old_val)
__field(bool, changed)
),
TP_fast_assign(
__entry->id = id;
snprintf(__entry->buf, GVT_TEMP_STR_LEN, "%s", reg_name);
__entry->reg = reg;
__entry->new_val = new_val;
__entry->old_val = old_val;
__entry->changed = changed;
),
TP_printk("VM%u write [%s] %x, new %08x, old %08x, changed %08x\n",
__entry->id, __entry->buf, __entry->reg, __entry->new_val,
__entry->old_val, __entry->changed)
);
TRACE_EVENT(propagate_event,
TP_PROTO(int id, const char *irq_name, int bit),
TP_ARGS(id, irq_name, bit),
TP_STRUCT__entry(
__field(int, id)
__array(char, buf, GVT_TEMP_STR_LEN)
__field(int, bit)
),
TP_printk("%s", __entry->cmd_str)
TP_fast_assign(
__entry->id = id;
snprintf(__entry->buf, GVT_TEMP_STR_LEN, "%s", irq_name);
__entry->bit = bit;
),
TP_printk("Set bit (%d) for (%s) for vgpu (%d)\n",
__entry->bit, __entry->buf, __entry->id)
);
TRACE_EVENT(inject_msi,
TP_PROTO(int id, unsigned int address, unsigned int data),
TP_ARGS(id, address, data),
TP_STRUCT__entry(
__field(int, id)
__field(unsigned int, address)
__field(unsigned int, data)
),
TP_fast_assign(
__entry->id = id;
__entry->address = address;
__entry->data = data;
),
TP_printk("vgpu%d:inject msi address %x data %x\n",
__entry->id, __entry->address, __entry->data)
);
TRACE_EVENT(render_mmio,
TP_PROTO(int id, char *action, unsigned int reg,
unsigned int old_val, unsigned int new_val),
TP_ARGS(id, action, reg, new_val, old_val),
TP_STRUCT__entry(
__field(int, id)
__array(char, buf, GVT_TEMP_STR_LEN)
__field(unsigned int, reg)
__field(unsigned int, old_val)
__field(unsigned int, new_val)
),
TP_fast_assign(
__entry->id = id;
snprintf(__entry->buf, GVT_TEMP_STR_LEN, "%s", action);
__entry->reg = reg;
__entry->old_val = old_val;
__entry->new_val = new_val;
),
TP_printk("VM%u %s reg %x, old %08x new %08x\n",
__entry->id, __entry->buf, __entry->reg,
__entry->old_val, __entry->new_val)
);
#endif /* _GVT_TRACE_H_ */
/* This part must be out of protection */
......
......@@ -501,9 +501,14 @@ void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr,
/* full GPU reset or device model level reset */
if (engine_mask == ALL_ENGINES || dmlr) {
intel_vgpu_reset_gtt(vgpu, dmlr);
intel_vgpu_reset_resource(vgpu);
intel_vgpu_reset_mmio(vgpu);
/*fence will not be reset during virtual reset */
if (dmlr)
intel_vgpu_reset_resource(vgpu);
intel_vgpu_reset_mmio(vgpu, dmlr);
populate_pvinfo_page(vgpu);
intel_vgpu_reset_display(vgpu);
......
......@@ -1670,12 +1670,22 @@ static int i915_fbc_status(struct seq_file *m, void *unused)
seq_printf(m, "FBC disabled: %s\n",
dev_priv->fbc.no_fbc_reason);
if (intel_fbc_is_active(dev_priv) && INTEL_GEN(dev_priv) >= 7) {
uint32_t mask = INTEL_GEN(dev_priv) >= 8 ?
BDW_FBC_COMPRESSION_MASK :
IVB_FBC_COMPRESSION_MASK;
seq_printf(m, "Compressing: %s\n",
yesno(I915_READ(FBC_STATUS2) & mask));
if (intel_fbc_is_active(dev_priv)) {
u32 mask;
if (INTEL_GEN(dev_priv) >= 8)
mask = I915_READ(IVB_FBC_STATUS2) & BDW_FBC_COMP_SEG_MASK;
else if (INTEL_GEN(dev_priv) >= 7)
mask = I915_READ(IVB_FBC_STATUS2) & IVB_FBC_COMP_SEG_MASK;
else if (INTEL_GEN(dev_priv) >= 5)
mask = I915_READ(ILK_DPFC_STATUS) & ILK_DPFC_COMP_SEG_MASK;
else if (IS_G4X(dev_priv))
mask = I915_READ(DPFC_STATUS) & DPFC_COMP_SEG_MASK;
else
mask = I915_READ(FBC_STATUS) & (FBC_STAT_COMPRESSING |
FBC_STAT_COMPRESSED);
seq_printf(m, "Compressing: %s\n", yesno(mask));
}
mutex_unlock(&dev_priv->fbc.lock);
......@@ -1684,7 +1694,7 @@ static int i915_fbc_status(struct seq_file *m, void *unused)
return 0;
}
static int i915_fbc_fc_get(void *data, u64 *val)
static int i915_fbc_false_color_get(void *data, u64 *val)
{
struct drm_i915_private *dev_priv = data;
......@@ -1696,7 +1706,7 @@ static int i915_fbc_fc_get(void *data, u64 *val)
return 0;
}
static int i915_fbc_fc_set(void *data, u64 val)
static int i915_fbc_false_color_set(void *data, u64 val)
{
struct drm_i915_private *dev_priv = data;
u32 reg;
......@@ -1717,8 +1727,8 @@ static int i915_fbc_fc_set(void *data, u64 val)
return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(i915_fbc_fc_fops,
i915_fbc_fc_get, i915_fbc_fc_set,
DEFINE_SIMPLE_ATTRIBUTE(i915_fbc_false_color_fops,
i915_fbc_false_color_get, i915_fbc_false_color_set,
"%llu\n");
static int i915_ips_status(struct seq_file *m, void *unused)
......@@ -1988,6 +1998,12 @@ static int i915_context_status(struct seq_file *m, void *unused)
seq_putc(m, '\n');
}
seq_printf(m,
"\tvma hashtable size=%u (actual %lu), count=%u\n",
ctx->vma_lut.ht_size,
BIT(ctx->vma_lut.ht_bits),
ctx->vma_lut.ht_count);
seq_putc(m, '\n');
}
......@@ -4289,26 +4305,27 @@ i915_drop_caches_set(void *data, u64 val)
{
struct drm_i915_private *dev_priv = data;
struct drm_device *dev = &dev_priv->drm;
int ret;
int ret = 0;
DRM_DEBUG("Dropping caches: 0x%08llx\n", val);
/* No need to check and wait for gpu resets, only libdrm auto-restarts
* on ioctls on -EAGAIN. */
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
if (val & DROP_ACTIVE) {
ret = i915_gem_wait_for_idle(dev_priv,
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_LOCKED);
if (val & (DROP_ACTIVE | DROP_RETIRE)) {
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
goto unlock;
}
return ret;
if (val & DROP_RETIRE)
i915_gem_retire_requests(dev_priv);
if (val & DROP_ACTIVE)
ret = i915_gem_wait_for_idle(dev_priv,
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_LOCKED);
if (val & DROP_RETIRE)
i915_gem_retire_requests(dev_priv);
mutex_unlock(&dev->struct_mutex);
}
lockdep_set_current_reclaim_state(GFP_KERNEL);
if (val & DROP_BOUND)
......@@ -4321,9 +4338,6 @@ i915_drop_caches_set(void *data, u64 val)
i915_gem_shrink_all(dev_priv);
lockdep_clear_current_reclaim_state();
unlock:
mutex_unlock(&dev->struct_mutex);
if (val & DROP_FREED) {
synchronize_rcu();
i915_gem_drain_freed_objects(dev_priv);
......@@ -4861,7 +4875,7 @@ static const struct i915_debugfs_files {
{"i915_pri_wm_latency", &i915_pri_wm_latency_fops},
{"i915_spr_wm_latency", &i915_spr_wm_latency_fops},
{"i915_cur_wm_latency", &i915_cur_wm_latency_fops},
{"i915_fbc_false_color", &i915_fbc_fc_fops},
{"i915_fbc_false_color", &i915_fbc_false_color_fops},
{"i915_dp_test_data", &i915_displayport_test_data_fops},
{"i915_dp_test_type", &i915_displayport_test_type_fops},
{"i915_dp_test_active", &i915_displayport_test_active_fops},
......
......@@ -139,6 +139,9 @@ static enum intel_pch intel_virt_detect_pch(struct drm_i915_private *dev_priv)
} else if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) {
ret = PCH_SPT;
DRM_DEBUG_KMS("Assuming SunrisePoint PCH\n");
} else if (IS_COFFEELAKE(dev_priv) || IS_CANNONLAKE(dev_priv)) {
ret = PCH_CNP;
DRM_DEBUG_KMS("Assuming CannonPoint PCH\n");
}
return ret;
......@@ -170,24 +173,29 @@ static void intel_detect_pch(struct drm_i915_private *dev_priv)
while ((pch = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, pch))) {
if (pch->vendor == PCI_VENDOR_ID_INTEL) {
unsigned short id = pch->device & INTEL_PCH_DEVICE_ID_MASK;
dev_priv->pch_id = id;
unsigned short id_ext = pch->device &
INTEL_PCH_DEVICE_ID_MASK_EXT;
if (id == INTEL_PCH_IBX_DEVICE_ID_TYPE) {
dev_priv->pch_id = id;
dev_priv->pch_type = PCH_IBX;
DRM_DEBUG_KMS("Found Ibex Peak PCH\n");
WARN_ON(!IS_GEN5(dev_priv));
} else if (id == INTEL_PCH_CPT_DEVICE_ID_TYPE) {
dev_priv->pch_id = id;
dev_priv->pch_type = PCH_CPT;
DRM_DEBUG_KMS("Found CougarPoint PCH\n");
WARN_ON(!(IS_GEN6(dev_priv) ||
IS_IVYBRIDGE(dev_priv)));
} else if (id == INTEL_PCH_PPT_DEVICE_ID_TYPE) {
/* PantherPoint is CPT compatible */
dev_priv->pch_id = id;
dev_priv->pch_type = PCH_CPT;
DRM_DEBUG_KMS("Found PantherPoint PCH\n");
WARN_ON(!(IS_GEN6(dev_priv) ||
IS_IVYBRIDGE(dev_priv)));
} else if (id == INTEL_PCH_LPT_DEVICE_ID_TYPE) {
dev_priv->pch_id = id;
dev_priv->pch_type = PCH_LPT;
DRM_DEBUG_KMS("Found LynxPoint PCH\n");
WARN_ON(!IS_HASWELL(dev_priv) &&
......@@ -195,6 +203,7 @@ static void intel_detect_pch(struct drm_i915_private *dev_priv)
WARN_ON(IS_HSW_ULT(dev_priv) ||
IS_BDW_ULT(dev_priv));
} else if (id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE) {
dev_priv->pch_id = id;
dev_priv->pch_type = PCH_LPT;
DRM_DEBUG_KMS("Found LynxPoint LP PCH\n");
WARN_ON(!IS_HASWELL(dev_priv) &&
......@@ -202,20 +211,35 @@ static void intel_detect_pch(struct drm_i915_private *dev_priv)
WARN_ON(!IS_HSW_ULT(dev_priv) &&
!IS_BDW_ULT(dev_priv));
} else if (id == INTEL_PCH_SPT_DEVICE_ID_TYPE) {
dev_priv->pch_id = id;
dev_priv->pch_type = PCH_SPT;
DRM_DEBUG_KMS("Found SunrisePoint PCH\n");
WARN_ON(!IS_SKYLAKE(dev_priv) &&
!IS_KABYLAKE(dev_priv));
} else if (id == INTEL_PCH_SPT_LP_DEVICE_ID_TYPE) {
} else if (id_ext == INTEL_PCH_SPT_LP_DEVICE_ID_TYPE) {
dev_priv->pch_id = id_ext;
dev_priv->pch_type = PCH_SPT;
DRM_DEBUG_KMS("Found SunrisePoint LP PCH\n");
WARN_ON(!IS_SKYLAKE(dev_priv) &&
!IS_KABYLAKE(dev_priv));
} else if (id == INTEL_PCH_KBP_DEVICE_ID_TYPE) {
dev_priv->pch_id = id;
dev_priv->pch_type = PCH_KBP;
DRM_DEBUG_KMS("Found KabyPoint PCH\n");
WARN_ON(!IS_SKYLAKE(dev_priv) &&
!IS_KABYLAKE(dev_priv));
} else if (id == INTEL_PCH_CNP_DEVICE_ID_TYPE) {
dev_priv->pch_id = id;
dev_priv->pch_type = PCH_CNP;
DRM_DEBUG_KMS("Found CannonPoint PCH\n");
WARN_ON(!IS_CANNONLAKE(dev_priv) &&
!IS_COFFEELAKE(dev_priv));
} else if (id_ext == INTEL_PCH_CNP_LP_DEVICE_ID_TYPE) {
dev_priv->pch_id = id_ext;
dev_priv->pch_type = PCH_CNP;
DRM_DEBUG_KMS("Found CannonPoint LP PCH\n");
WARN_ON(!IS_CANNONLAKE(dev_priv) &&
!IS_COFFEELAKE(dev_priv));
} else if ((id == INTEL_PCH_P2X_DEVICE_ID_TYPE) ||
(id == INTEL_PCH_P3X_DEVICE_ID_TYPE) ||
((id == INTEL_PCH_QEMU_DEVICE_ID_TYPE) &&
......@@ -223,6 +247,7 @@ static void intel_detect_pch(struct drm_i915_private *dev_priv)
PCI_SUBVENDOR_ID_REDHAT_QUMRANET &&
pch->subsystem_device ==
PCI_SUBDEVICE_ID_QEMU)) {
dev_priv->pch_id = id;
dev_priv->pch_type =
intel_virt_detect_pch(dev_priv);
} else
......@@ -351,6 +376,7 @@ static int i915_getparam(struct drm_device *dev, void *data,
case I915_PARAM_HAS_EXEC_ASYNC:
case I915_PARAM_HAS_EXEC_FENCE:
case I915_PARAM_HAS_EXEC_CAPTURE:
case I915_PARAM_HAS_EXEC_BATCH_FIRST:
/* For the time being all of these are always true;
* if some supported hardware does not have one of these
* features this value needs to be provided from
......@@ -358,6 +384,16 @@ static int i915_getparam(struct drm_device *dev, void *data,
*/
value = 1;
break;
case I915_PARAM_SLICE_MASK:
value = INTEL_INFO(dev_priv)->sseu.slice_mask;
if (!value)
return -ENODEV;
break;
case I915_PARAM_SUBSLICE_MASK:
value = INTEL_INFO(dev_priv)->sseu.subslice_mask;
if (!value)
return -ENODEV;
break;
default:
DRM_DEBUG("Unknown parameter %d\n", param->param);
return -EINVAL;
......@@ -553,6 +589,7 @@ static void i915_gem_fini(struct drm_i915_private *dev_priv)
intel_uc_fini_hw(dev_priv);
i915_gem_cleanup_engines(dev_priv);
i915_gem_context_fini(dev_priv);
i915_gem_cleanup_userptr(dev_priv);
mutex_unlock(&dev_priv->drm.struct_mutex);
i915_gem_drain_freed_objects(dev_priv);
......@@ -997,6 +1034,8 @@ static void intel_sanitize_options(struct drm_i915_private *dev_priv)
DRM_DEBUG_DRIVER("use GPU semaphores? %s\n", yesno(i915.semaphores));
intel_uc_sanitize_options(dev_priv);
intel_gvt_sanitize_options(dev_priv);
}
/**
......@@ -2459,9 +2498,6 @@ static int intel_runtime_resume(struct device *kdev)
intel_guc_resume(dev_priv);
if (IS_GEN6(dev_priv))
intel_init_pch_refclk(dev_priv);
if (IS_GEN9_LP(dev_priv)) {
bxt_disable_dc9(dev_priv);
bxt_display_core_init(dev_priv, true);
......
This diff is collapsed.
This diff is collapsed.
......@@ -114,12 +114,27 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
list_for_each_entry(obj, list, batch_pool_link) {
/* The batches are strictly LRU ordered */
if (i915_gem_object_is_active(obj)) {
if (!reservation_object_test_signaled_rcu(obj->resv,
true))
struct reservation_object *resv = obj->resv;
if (!reservation_object_test_signaled_rcu(resv, true))
break;
i915_gem_retire_requests(pool->engine->i915);
GEM_BUG_ON(i915_gem_object_is_active(obj));
/*
* The object is now idle, clear the array of shared
* fences before we add a new request. Although, we
* remain on the same engine, we may be on a different
* timeline and so may continually grow the array,
* trapping a reference to all the old fences, rather
* than replace the existing fence.
*/
if (rcu_access_pointer(resv->fence)) {
reservation_object_lock(resv, NULL);
reservation_object_add_excl_fence(resv, NULL);
reservation_object_unlock(resv);
}
}
GEM_BUG_ON(!reservation_object_test_signaled_rcu(obj->resv,
......
......@@ -71,8 +71,6 @@ static const struct dma_fence_ops i915_clflush_ops = {
static void __i915_do_clflush(struct drm_i915_gem_object *obj)
{
drm_clflush_sg(obj->mm.pages);
obj->cache_dirty = false;
intel_fb_obj_flush(obj, ORIGIN_CPU);
}
......@@ -81,9 +79,6 @@ static void i915_clflush_work(struct work_struct *work)
struct clflush *clflush = container_of(work, typeof(*clflush), work);
struct drm_i915_gem_object *obj = clflush->obj;
if (!obj->cache_dirty)
goto out;
if (i915_gem_object_pin_pages(obj)) {
DRM_ERROR("Failed to acquire obj->pages for clflushing\n");
goto out;
......@@ -131,10 +126,10 @@ void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
* anything not backed by physical memory we consider to be always
* coherent and not need clflushing.
*/
if (!i915_gem_object_has_struct_page(obj))
if (!i915_gem_object_has_struct_page(obj)) {
obj->cache_dirty = false;
return;
obj->cache_dirty = true;
}
/* If the GPU is snooping the contents of the CPU cache,
* we do not need to manually clear the CPU cache lines. However,
......@@ -144,7 +139,7 @@ void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
* snooping behaviour occurs naturally as the result of our domain
* tracking.
*/
if (!(flags & I915_CLFLUSH_FORCE) && i915_gem_object_is_coherent(obj))
if (!(flags & I915_CLFLUSH_FORCE) && obj->cache_coherent)
return;
trace_i915_gem_object_clflush(obj);
......@@ -153,6 +148,8 @@ void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
if (!(flags & I915_CLFLUSH_SYNC))
clflush = kmalloc(sizeof(*clflush), GFP_KERNEL);
if (clflush) {
GEM_BUG_ON(!obj->cache_dirty);
dma_fence_init(&clflush->dma,
&i915_clflush_ops,
&clflush_lock,
......@@ -180,4 +177,6 @@ void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
} else {
GEM_BUG_ON(obj->base.write_domain != I915_GEM_DOMAIN_CPU);
}
obj->cache_dirty = false;
}
......@@ -85,6 +85,7 @@
*
*/
#include <linux/log2.h>
#include <drm/drmP.h>
#include <drm/i915_drm.h>
#include "i915_drv.h"
......@@ -92,6 +93,71 @@
#define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
/* Initial size (as log2) to preallocate the handle->object hashtable */
#define VMA_HT_BITS 2u /* 4 x 2 pointers, 64 bytes minimum */
static void resize_vma_ht(struct work_struct *work)
{
struct i915_gem_context_vma_lut *lut =
container_of(work, typeof(*lut), resize);
unsigned int bits, new_bits, size, i;
struct hlist_head *new_ht;
GEM_BUG_ON(!(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS));
bits = 1 + ilog2(4*lut->ht_count/3 + 1);
new_bits = min_t(unsigned int,
max(bits, VMA_HT_BITS),
sizeof(unsigned int) * BITS_PER_BYTE - 1);
if (new_bits == lut->ht_bits)
goto out;
new_ht = kzalloc(sizeof(*new_ht)<<new_bits, GFP_KERNEL | __GFP_NOWARN);
if (!new_ht)
new_ht = vzalloc(sizeof(*new_ht)<<new_bits);
if (!new_ht)
/* Pretend resize succeeded and stop calling us for a bit! */
goto out;
size = BIT(lut->ht_bits);
for (i = 0; i < size; i++) {
struct i915_vma *vma;
struct hlist_node *tmp;
hlist_for_each_entry_safe(vma, tmp, &lut->ht[i], ctx_node)
hlist_add_head(&vma->ctx_node,
&new_ht[hash_32(vma->ctx_handle,
new_bits)]);
}
kvfree(lut->ht);
lut->ht = new_ht;
lut->ht_bits = new_bits;
out:
smp_store_release(&lut->ht_size, BIT(bits));
GEM_BUG_ON(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS);
}
static void vma_lut_free(struct i915_gem_context *ctx)
{
struct i915_gem_context_vma_lut *lut = &ctx->vma_lut;
unsigned int i, size;
if (lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS)
cancel_work_sync(&lut->resize);
size = BIT(lut->ht_bits);
for (i = 0; i < size; i++) {
struct i915_vma *vma;
hlist_for_each_entry(vma, &lut->ht[i], ctx_node) {
vma->obj->vma_hashed = NULL;
vma->ctx = NULL;
i915_vma_put(vma);
}
}
kvfree(lut->ht);
}
void i915_gem_context_free(struct kref *ctx_ref)
{
struct i915_gem_context *ctx = container_of(ctx_ref, typeof(*ctx), ref);
......@@ -101,6 +167,7 @@ void i915_gem_context_free(struct kref *ctx_ref)
trace_i915_context_free(ctx);
GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
vma_lut_free(ctx);
i915_ppgtt_put(ctx->ppgtt);
for (i = 0; i < I915_NUM_ENGINES; i++) {
......@@ -118,6 +185,7 @@ void i915_gem_context_free(struct kref *ctx_ref)
kfree(ctx->name);
put_pid(ctx->pid);
list_del(&ctx->link);
ida_simple_remove(&ctx->i915->context_hw_ida, ctx->hw_id);
......@@ -201,13 +269,24 @@ __create_hw_context(struct drm_i915_private *dev_priv,
ctx->i915 = dev_priv;
ctx->priority = I915_PRIORITY_NORMAL;
ctx->vma_lut.ht_bits = VMA_HT_BITS;
ctx->vma_lut.ht_size = BIT(VMA_HT_BITS);
BUILD_BUG_ON(BIT(VMA_HT_BITS) == I915_CTX_RESIZE_IN_PROGRESS);
ctx->vma_lut.ht = kcalloc(ctx->vma_lut.ht_size,
sizeof(*ctx->vma_lut.ht),
GFP_KERNEL);
if (!ctx->vma_lut.ht)
goto err_out;
INIT_WORK(&ctx->vma_lut.resize, resize_vma_ht);
/* Default context will never have a file_priv */
ret = DEFAULT_CONTEXT_HANDLE;
if (file_priv) {
ret = idr_alloc(&file_priv->context_idr, ctx,
DEFAULT_CONTEXT_HANDLE, 0, GFP_KERNEL);
if (ret < 0)
goto err_out;
goto err_lut;
}
ctx->user_handle = ret;
......@@ -248,6 +327,8 @@ __create_hw_context(struct drm_i915_private *dev_priv,
err_pid:
put_pid(ctx->pid);
idr_remove(&file_priv->context_idr, ctx->user_handle);
err_lut:
kvfree(ctx->vma_lut.ht);
err_out:
context_close(ctx);
return ERR_PTR(ret);
......@@ -1034,9 +1115,6 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev,
if (args->flags || args->pad)
return -EINVAL;
if (args->ctx_id == DEFAULT_CONTEXT_HANDLE && !capable(CAP_SYS_ADMIN))
return -EPERM;
ret = i915_mutex_lock_interruptible(dev);
if (ret)
return ret;
......
......@@ -143,6 +143,32 @@ struct i915_gem_context {
/** ggtt_offset_bias: placement restriction for context objects */
u32 ggtt_offset_bias;
struct i915_gem_context_vma_lut {
/** ht_size: last request size to allocate the hashtable for. */
unsigned int ht_size;
#define I915_CTX_RESIZE_IN_PROGRESS BIT(0)
/** ht_bits: real log2(size) of hashtable. */
unsigned int ht_bits;
/** ht_count: current number of entries inside the hashtable */
unsigned int ht_count;
/** ht: the array of buckets comprising the simple hashtable */
struct hlist_head *ht;
/**
* resize: After an execbuf completes, we check the load factor
* of the hashtable. If the hashtable is too full, or too empty,
* we schedule a task to resize the hashtable. During the
* resize, the entries are moved between different buckets and
* so we cannot simultaneously read the hashtable as it is
* being resized (unlike rhashtable). Therefore we treat the
* active work as a strong barrier, pausing a subsequent
* execbuf to wait for the resize worker to complete, if
* required.
*/
struct work_struct resize;
} vma_lut;
/** engine: per-engine logical HW state */
struct intel_context {
struct i915_vma *state;
......
......@@ -50,6 +50,29 @@ static bool ggtt_is_idle(struct drm_i915_private *dev_priv)
return true;
}
static int ggtt_flush(struct drm_i915_private *i915)
{
int err;
/* Not everything in the GGTT is tracked via vma (otherwise we
* could evict as required with minimal stalling) so we are forced
* to idle the GPU and explicitly retire outstanding requests in
* the hopes that we can then remove contexts and the like only
* bound by their active reference.
*/
err = i915_gem_switch_to_kernel_context(i915);
if (err)
return err;
err = i915_gem_wait_for_idle(i915,
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_LOCKED);
if (err)
return err;
return 0;
}
static bool
mark_free(struct drm_mm_scan *scan,
struct i915_vma *vma,
......@@ -59,13 +82,10 @@ mark_free(struct drm_mm_scan *scan,
if (i915_vma_is_pinned(vma))
return false;
if (WARN_ON(!list_empty(&vma->exec_list)))
return false;
if (flags & PIN_NONFAULT && !list_empty(&vma->obj->userfault_link))
return false;
list_add(&vma->exec_list, unwind);
list_add(&vma->evict_link, unwind);
return drm_mm_scan_add_block(scan, &vma->node);
}
......@@ -157,11 +177,9 @@ i915_gem_evict_something(struct i915_address_space *vm,
} while (*++phase);
/* Nothing found, clean up and bail out! */
list_for_each_entry_safe(vma, next, &eviction_list, exec_list) {
list_for_each_entry_safe(vma, next, &eviction_list, evict_link) {
ret = drm_mm_scan_remove_block(&scan, &vma->node);
BUG_ON(ret);
INIT_LIST_HEAD(&vma->exec_list);
}
/* Can we unpin some objects such as idle hw contents,
......@@ -180,19 +198,7 @@ i915_gem_evict_something(struct i915_address_space *vm,
return intel_has_pending_fb_unpin(dev_priv) ? -EAGAIN : -ENOSPC;
}
/* Not everything in the GGTT is tracked via vma (otherwise we
* could evict as required with minimal stalling) so we are forced
* to idle the GPU and explicitly retire outstanding requests in
* the hopes that we can then remove contexts and the like only
* bound by their active reference.
*/
ret = i915_gem_switch_to_kernel_context(dev_priv);
if (ret)
return ret;
ret = i915_gem_wait_for_idle(dev_priv,
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_LOCKED);
ret = ggtt_flush(dev_priv);
if (ret)
return ret;
......@@ -205,21 +211,16 @@ i915_gem_evict_something(struct i915_address_space *vm,
* calling unbind (which may remove the active reference
* of any of our objects, thus corrupting the list).
*/
list_for_each_entry_safe(vma, next, &eviction_list, exec_list) {
list_for_each_entry_safe(vma, next, &eviction_list, evict_link) {
if (drm_mm_scan_remove_block(&scan, &vma->node))
__i915_vma_pin(vma);
else
list_del_init(&vma->exec_list);
list_del(&vma->evict_link);
}
/* Unbinding will emit any required flushes */
ret = 0;
while (!list_empty(&eviction_list)) {
vma = list_first_entry(&eviction_list,
struct i915_vma,
exec_list);
list_del_init(&vma->exec_list);
list_for_each_entry_safe(vma, next, &eviction_list, evict_link) {
__i915_vma_unpin(vma);
if (ret == 0)
ret = i915_vma_unbind(vma);
......@@ -315,7 +316,7 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
}
/* Overlap of objects in the same batch? */
if (i915_vma_is_pinned(vma) || !list_empty(&vma->exec_list)) {
if (i915_vma_is_pinned(vma)) {
ret = -ENOSPC;
if (vma->exec_entry &&
vma->exec_entry->flags & EXEC_OBJECT_PINNED)
......@@ -332,11 +333,10 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
* reference) another in our eviction list.
*/
__i915_vma_pin(vma);
list_add(&vma->exec_list, &eviction_list);
list_add(&vma->evict_link, &eviction_list);
}
list_for_each_entry_safe(vma, next, &eviction_list, exec_list) {
list_del_init(&vma->exec_list);
list_for_each_entry_safe(vma, next, &eviction_list, evict_link) {
__i915_vma_unpin(vma);
if (ret == 0)
ret = i915_vma_unbind(vma);
......@@ -348,10 +348,8 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
/**
* i915_gem_evict_vm - Evict all idle vmas from a vm
* @vm: Address space to cleanse
* @do_idle: Boolean directing whether to idle first.
*
* This function evicts all idles vmas from a vm. If all unpinned vmas should be
* evicted the @do_idle needs to be set to true.
* This function evicts all vmas from a vm.
*
* This is used by the execbuf code as a last-ditch effort to defragment the
* address space.
......@@ -359,37 +357,50 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
* To clarify: This is for freeing up virtual address space, not for freeing
* memory in e.g. the shrinker.
*/
int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle)
int i915_gem_evict_vm(struct i915_address_space *vm)
{
struct list_head *phases[] = {
&vm->inactive_list,
&vm->active_list,
NULL
}, **phase;
struct list_head eviction_list;
struct i915_vma *vma, *next;
int ret;
lockdep_assert_held(&vm->i915->drm.struct_mutex);
trace_i915_gem_evict_vm(vm);
if (do_idle) {
struct drm_i915_private *dev_priv = vm->i915;
if (i915_is_ggtt(vm)) {
ret = i915_gem_switch_to_kernel_context(dev_priv);
if (ret)
return ret;
}
ret = i915_gem_wait_for_idle(dev_priv,
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_LOCKED);
/* Switch back to the default context in order to unpin
* the existing context objects. However, such objects only
* pin themselves inside the global GTT and performing the
* switch otherwise is ineffective.
*/
if (i915_is_ggtt(vm)) {
ret = ggtt_flush(vm->i915);
if (ret)
return ret;
WARN_ON(!list_empty(&vm->active_list));
}
list_for_each_entry_safe(vma, next, &vm->inactive_list, vm_link)
if (!i915_vma_is_pinned(vma))
WARN_ON(i915_vma_unbind(vma));
INIT_LIST_HEAD(&eviction_list);
phase = phases;
do {
list_for_each_entry(vma, *phase, vm_link) {
if (i915_vma_is_pinned(vma))
continue;
return 0;
__i915_vma_pin(vma);
list_add(&vma->evict_link, &eviction_list);
}
} while (*++phase);
ret = 0;
list_for_each_entry_safe(vma, next, &eviction_list, evict_link) {
__i915_vma_unpin(vma);
if (ret == 0)
ret = i915_vma_unbind(vma);
}
return ret;
}
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
......
This diff is collapsed.
......@@ -1884,7 +1884,7 @@ static void gtt_write_workarounds(struct drm_i915_private *dev_priv)
* called on driver load and after a GPU reset, so you can place
* workarounds here even if they get overwritten by GPU reset.
*/
/* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk */
/* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl */
if (IS_BROADWELL(dev_priv))
I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW);
else if (IS_CHERRYVIEW(dev_priv))
......@@ -3095,13 +3095,17 @@ int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv)
void i915_ggtt_enable_guc(struct drm_i915_private *i915)
{
GEM_BUG_ON(i915->ggtt.invalidate != gen6_ggtt_invalidate);
i915->ggtt.invalidate = guc_ggtt_invalidate;
}
void i915_ggtt_disable_guc(struct drm_i915_private *i915)
{
if (i915->ggtt.invalidate == guc_ggtt_invalidate)
i915->ggtt.invalidate = gen6_ggtt_invalidate;
/* We should only be called after i915_ggtt_enable_guc() */
GEM_BUG_ON(i915->ggtt.invalidate != guc_ggtt_invalidate);
i915->ggtt.invalidate = gen6_ggtt_invalidate;
}
void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv)
......@@ -3398,6 +3402,9 @@ int i915_gem_gtt_reserve(struct i915_address_space *vm,
if (err != -ENOSPC)
return err;
if (flags & PIN_NOEVICT)
return -ENOSPC;
err = i915_gem_evict_for_node(vm, node, flags);
if (err == 0)
err = drm_mm_reserve_node(&vm->mm, node);
......@@ -3512,6 +3519,9 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
if (err != -ENOSPC)
return err;
if (flags & PIN_NOEVICT)
return -ENOSPC;
/* No free space, pick a slot at random.
*
* There is a pathological case here using a GTT shared between
......
......@@ -255,6 +255,7 @@ struct i915_address_space {
struct drm_i915_file_private *file;
struct list_head global_link;
u64 total; /* size addr space maps (ex. 2GB for ggtt) */
u64 reserved; /* size addr space reserved */
bool closed;
......@@ -588,6 +589,7 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
#define PIN_MAPPABLE BIT(1)
#define PIN_ZONE_4G BIT(2)
#define PIN_NONFAULT BIT(3)
#define PIN_NOEVICT BIT(4)
#define PIN_MBZ BIT(5) /* I915_VMA_PIN_OVERFLOW */
#define PIN_GLOBAL BIT(6) /* I915_VMA_GLOBAL_BIND */
......
......@@ -188,9 +188,11 @@ i915_gem_object_create_internal(struct drm_i915_private *i915,
drm_gem_private_object_init(&i915->drm, &obj->base, size);
i915_gem_object_init(obj, &i915_gem_object_internal_ops);
obj->base.write_domain = I915_GEM_DOMAIN_CPU;
obj->base.read_domains = I915_GEM_DOMAIN_CPU;
obj->base.write_domain = I915_GEM_DOMAIN_CPU;
obj->cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE;
obj->cache_coherent = i915_gem_object_is_coherent(obj);
obj->cache_dirty = !obj->cache_coherent;
return obj;
}
......@@ -68,9 +68,25 @@ struct drm_i915_gem_object {
const struct drm_i915_gem_object_ops *ops;
/** List of VMAs backed by this object */
/**
* @vma_list: List of VMAs backed by this object
*
* The VMA on this list are ordered by type, all GGTT vma are placed
* at the head and all ppGTT vma are placed at the tail. The different
* types of GGTT vma are unordered between themselves, use the
* @vma_tree (which has a defined order between all VMA) to find an
* exact match.
*/
struct list_head vma_list;
/**
* @vma_tree: Ordered tree of VMAs backed by this object
*
* All VMA created for this object are placed in the @vma_tree for
* fast retrieval via a binary search in i915_vma_instance().
* They are also added to @vma_list for easy iteration.
*/
struct rb_root vma_tree;
struct i915_vma *vma_hashed;
/** Stolen memory for this object, instead of being backed by shmem. */
struct drm_mm_node *stolen;
......@@ -85,9 +101,6 @@ struct drm_i915_gem_object {
*/
struct list_head userfault_link;
/** Used in execbuf to temporarily hold a ref */
struct list_head obj_exec_link;
struct list_head batch_pool_link;
I915_SELFTEST_DECLARE(struct list_head st_link);
......@@ -106,6 +119,7 @@ struct drm_i915_gem_object {
unsigned long gt_ro:1;
unsigned int cache_level:3;
unsigned int cache_dirty:1;
unsigned int cache_coherent:1;
atomic_t frontbuffer_bits;
unsigned int frontbuffer_ggtt_origin; /* write once */
......
......@@ -62,7 +62,7 @@ static bool i915_fence_enable_signaling(struct dma_fence *fence)
return false;
intel_engine_enable_signaling(to_request(fence), true);
return true;
return !i915_fence_signaled(fence);
}
static signed long i915_fence_wait(struct dma_fence *fence,
......@@ -683,7 +683,6 @@ static int
i915_gem_request_await_request(struct drm_i915_gem_request *to,
struct drm_i915_gem_request *from)
{
u32 seqno;
int ret;
GEM_BUG_ON(to == from);
......@@ -707,18 +706,14 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
return ret < 0 ? ret : 0;
}
seqno = i915_gem_request_global_seqno(from);
if (!seqno)
goto await_dma_fence;
if (to->engine->semaphore.sync_to) {
u32 seqno;
if (!to->engine->semaphore.sync_to) {
if (!__i915_gem_request_started(from, seqno))
goto await_dma_fence;
GEM_BUG_ON(!from->engine->semaphore.signal);
if (!__i915_spin_request(from, seqno, TASK_INTERRUPTIBLE, 2))
seqno = i915_gem_request_global_seqno(from);
if (!seqno)
goto await_dma_fence;
} else {
GEM_BUG_ON(!from->engine->semaphore.signal);
if (seqno <= to->timeline->global_sync[from->engine->id])
return 0;
......@@ -729,10 +724,9 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
return ret;
to->timeline->global_sync[from->engine->id] = seqno;
return 0;
}
return 0;
await_dma_fence:
ret = i915_sw_fence_await_dma_fence(&to->submit,
&from->fence, 0,
......
......@@ -38,16 +38,21 @@
static bool shrinker_lock(struct drm_i915_private *dev_priv, bool *unlock)
{
switch (mutex_trylock_recursive(&dev_priv->drm.struct_mutex)) {
case MUTEX_TRYLOCK_FAILED:
return false;
case MUTEX_TRYLOCK_SUCCESS:
*unlock = true;
return true;
case MUTEX_TRYLOCK_RECURSIVE:
*unlock = false;
return true;
case MUTEX_TRYLOCK_FAILED:
do {
cpu_relax();
if (mutex_trylock(&dev_priv->drm.struct_mutex)) {
case MUTEX_TRYLOCK_SUCCESS:
*unlock = true;
return true;
}
} while (!need_resched());
return false;
}
BUG();
......@@ -332,6 +337,15 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
sc->nr_to_scan - freed,
I915_SHRINK_BOUND |
I915_SHRINK_UNBOUND);
if (freed < sc->nr_to_scan && current_is_kswapd()) {
intel_runtime_pm_get(dev_priv);
freed += i915_gem_shrink(dev_priv,
sc->nr_to_scan - freed,
I915_SHRINK_ACTIVE |
I915_SHRINK_BOUND |
I915_SHRINK_UNBOUND);
intel_runtime_pm_put(dev_priv);
}
shrinker_unlock(dev_priv, unlock);
......
......@@ -590,6 +590,7 @@ _i915_gem_object_create_stolen(struct drm_i915_private *dev_priv,
obj->stolen = stolen;
obj->base.read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT;
obj->cache_level = HAS_LLC(dev_priv) ? I915_CACHE_LLC : I915_CACHE_NONE;
obj->cache_coherent = true; /* assumptions! more like cache_oblivious */
if (i915_gem_object_pin_pages(obj))
goto cleanup;
......
......@@ -378,7 +378,7 @@ __i915_mm_struct_free(struct kref *kref)
mutex_unlock(&mm->i915->mm_lock);
INIT_WORK(&mm->work, __i915_mm_struct_free__worker);
schedule_work(&mm->work);
queue_work(mm->i915->mm.userptr_wq, &mm->work);
}
static void
......@@ -598,7 +598,7 @@ __i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj)
get_task_struct(work->task);
INIT_WORK(&work->work, __i915_gem_userptr_get_pages_worker);
schedule_work(&work->work);
queue_work(to_i915(obj->base.dev)->mm.userptr_wq, &work->work);
return ERR_PTR(-EAGAIN);
}
......@@ -802,9 +802,11 @@ i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file
drm_gem_private_object_init(dev, &obj->base, args->user_size);
i915_gem_object_init(obj, &i915_gem_userptr_ops);
obj->cache_level = I915_CACHE_LLC;
obj->base.write_domain = I915_GEM_DOMAIN_CPU;
obj->base.read_domains = I915_GEM_DOMAIN_CPU;
obj->base.write_domain = I915_GEM_DOMAIN_CPU;
obj->cache_level = I915_CACHE_LLC;
obj->cache_coherent = i915_gem_object_is_coherent(obj);
obj->cache_dirty = !obj->cache_coherent;
obj->userptr.ptr = args->user_ptr;
obj->userptr.read_only = !!(args->flags & I915_USERPTR_READ_ONLY);
......@@ -828,8 +830,20 @@ i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file
return 0;
}
void i915_gem_init_userptr(struct drm_i915_private *dev_priv)
int i915_gem_init_userptr(struct drm_i915_private *dev_priv)
{
mutex_init(&dev_priv->mm_lock);
hash_init(dev_priv->mm_structs);
dev_priv->mm.userptr_wq =
alloc_workqueue("i915-userptr-acquire", WQ_HIGHPRI, 0);
if (!dev_priv->mm.userptr_wq)
return -ENOMEM;
return 0;
}
void i915_gem_cleanup_userptr(struct drm_i915_private *dev_priv)
{
destroy_workqueue(dev_priv->mm.userptr_wq);
}
......@@ -105,7 +105,7 @@ static int __reserve_doorbell(struct i915_guc_client *client)
end += offset;
}
id = find_next_zero_bit(client->guc->doorbell_bitmap, offset, end);
id = find_next_zero_bit(client->guc->doorbell_bitmap, end, offset);
if (id == end)
return -ENOSPC;
......
......@@ -2548,7 +2548,8 @@ gen8_de_irq_handler(struct drm_i915_private *dev_priv, u32 master_ctl)
I915_WRITE(SDEIIR, iir);
ret = IRQ_HANDLED;
if (HAS_PCH_SPT(dev_priv) || HAS_PCH_KBP(dev_priv))
if (HAS_PCH_SPT(dev_priv) || HAS_PCH_KBP(dev_priv) ||
HAS_PCH_CNP(dev_priv))
spt_irq_handler(dev_priv, iir);
else
cpt_irq_handler(dev_priv, iir);
......@@ -4289,7 +4290,8 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
dev->driver->disable_vblank = gen8_disable_vblank;
if (IS_GEN9_LP(dev_priv))
dev_priv->display.hpd_irq_setup = bxt_hpd_irq_setup;
else if (HAS_PCH_SPT(dev_priv) || HAS_PCH_KBP(dev_priv))
else if (HAS_PCH_SPT(dev_priv) || HAS_PCH_KBP(dev_priv) ||
HAS_PCH_CNP(dev_priv))
dev_priv->display.hpd_irq_setup = spt_hpd_irq_setup;
else
dev_priv->display.hpd_irq_setup = ilk_hpd_irq_setup;
......
This diff is collapsed.
/*
* Autogenerated file by GPU Top : https://github.com/rib/gputop
* DO NOT EDIT manually!
*
*
* Copyright (c) 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
*/
#ifndef __I915_OA_BDW_H__
#define __I915_OA_BDW_H__
extern int i915_oa_n_builtin_metric_sets_bdw;
extern int i915_oa_select_metric_set_bdw(struct drm_i915_private *dev_priv);
extern int i915_perf_register_sysfs_bdw(struct drm_i915_private *dev_priv);
extern void i915_perf_unregister_sysfs_bdw(struct drm_i915_private *dev_priv);
#endif
This diff is collapsed.
/*
* Autogenerated file by GPU Top : https://github.com/rib/gputop
* DO NOT EDIT manually!
*
*
* Copyright (c) 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
*/
#ifndef __I915_OA_BXT_H__
#define __I915_OA_BXT_H__
extern int i915_oa_n_builtin_metric_sets_bxt;
extern int i915_oa_select_metric_set_bxt(struct drm_i915_private *dev_priv);
extern int i915_perf_register_sysfs_bxt(struct drm_i915_private *dev_priv);
extern void i915_perf_unregister_sysfs_bxt(struct drm_i915_private *dev_priv);
#endif
This diff is collapsed.
/*
* Autogenerated file by GPU Top : https://github.com/rib/gputop
* DO NOT EDIT manually!
*
*
* Copyright (c) 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
*/
#ifndef __I915_OA_CHV_H__
#define __I915_OA_CHV_H__
extern int i915_oa_n_builtin_metric_sets_chv;
extern int i915_oa_select_metric_set_chv(struct drm_i915_private *dev_priv);
extern int i915_perf_register_sysfs_chv(struct drm_i915_private *dev_priv);
extern void i915_perf_unregister_sysfs_chv(struct drm_i915_private *dev_priv);
#endif
This diff is collapsed.
/*
* Autogenerated file by GPU Top : https://github.com/rib/gputop
* DO NOT EDIT manually!
*
*
* Copyright (c) 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
*/
#ifndef __I915_OA_GLK_H__
#define __I915_OA_GLK_H__
extern int i915_oa_n_builtin_metric_sets_glk;
extern int i915_oa_select_metric_set_glk(struct drm_i915_private *dev_priv);
extern int i915_perf_register_sysfs_glk(struct drm_i915_private *dev_priv);
extern void i915_perf_unregister_sysfs_glk(struct drm_i915_private *dev_priv);
#endif
This diff is collapsed.
/*
* Autogenerated file, DO NOT EDIT manually!
* Autogenerated file by GPU Top : https://github.com/rib/gputop
* DO NOT EDIT manually!
*
*
* Copyright (c) 2015 Intel Corporation
*
......
This diff is collapsed.
/*
* Autogenerated file by GPU Top : https://github.com/rib/gputop
* DO NOT EDIT manually!
*
*
* Copyright (c) 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
*/
#ifndef __I915_OA_KBLGT2_H__
#define __I915_OA_KBLGT2_H__
extern int i915_oa_n_builtin_metric_sets_kblgt2;
extern int i915_oa_select_metric_set_kblgt2(struct drm_i915_private *dev_priv);
extern int i915_perf_register_sysfs_kblgt2(struct drm_i915_private *dev_priv);
extern void i915_perf_unregister_sysfs_kblgt2(struct drm_i915_private *dev_priv);
#endif
This diff is collapsed.
/*
* Autogenerated file by GPU Top : https://github.com/rib/gputop
* DO NOT EDIT manually!
*
*
* Copyright (c) 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
*/
#ifndef __I915_OA_KBLGT3_H__
#define __I915_OA_KBLGT3_H__
extern int i915_oa_n_builtin_metric_sets_kblgt3;
extern int i915_oa_select_metric_set_kblgt3(struct drm_i915_private *dev_priv);
extern int i915_perf_register_sysfs_kblgt3(struct drm_i915_private *dev_priv);
extern void i915_perf_unregister_sysfs_kblgt3(struct drm_i915_private *dev_priv);
#endif
This diff is collapsed.
/*
* Autogenerated file by GPU Top : https://github.com/rib/gputop
* DO NOT EDIT manually!
*
*
* Copyright (c) 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
*/
#ifndef __I915_OA_SKLGT2_H__
#define __I915_OA_SKLGT2_H__
extern int i915_oa_n_builtin_metric_sets_sklgt2;
extern int i915_oa_select_metric_set_sklgt2(struct drm_i915_private *dev_priv);
extern int i915_perf_register_sysfs_sklgt2(struct drm_i915_private *dev_priv);
extern void i915_perf_unregister_sysfs_sklgt2(struct drm_i915_private *dev_priv);
#endif
This diff is collapsed.
/*
* Autogenerated file by GPU Top : https://github.com/rib/gputop
* DO NOT EDIT manually!
*
*
* Copyright (c) 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
*/
#ifndef __I915_OA_SKLGT3_H__
#define __I915_OA_SKLGT3_H__
extern int i915_oa_n_builtin_metric_sets_sklgt3;
extern int i915_oa_select_metric_set_sklgt3(struct drm_i915_private *dev_priv);
extern int i915_perf_register_sysfs_sklgt3(struct drm_i915_private *dev_priv);
extern void i915_perf_unregister_sysfs_sklgt3(struct drm_i915_private *dev_priv);
#endif
This diff is collapsed.
/*
* Autogenerated file by GPU Top : https://github.com/rib/gputop
* DO NOT EDIT manually!
*
*
* Copyright (c) 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
*/
#ifndef __I915_OA_SKLGT4_H__
#define __I915_OA_SKLGT4_H__
extern int i915_oa_n_builtin_metric_sets_sklgt4;
extern int i915_oa_select_metric_set_sklgt4(struct drm_i915_private *dev_priv);
extern int i915_perf_register_sysfs_sklgt4(struct drm_i915_private *dev_priv);
extern void i915_perf_unregister_sysfs_sklgt4(struct drm_i915_private *dev_priv);
#endif
......@@ -312,16 +312,17 @@ static const struct intel_device_info intel_haswell_info = {
.has_full_48bit_ppgtt = 1, \
.has_64bit_reloc = 1
#define BDW_PLATFORM \
BDW_FEATURES, \
.gen = 8, \
.platform = INTEL_BROADWELL
static const struct intel_device_info intel_broadwell_info = {
BDW_FEATURES,
.gen = 8,
.platform = INTEL_BROADWELL,
BDW_PLATFORM,
};
static const struct intel_device_info intel_broadwell_gt3_info = {
BDW_FEATURES,
.gen = 8,
.platform = INTEL_BROADWELL,
BDW_PLATFORM,
.ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING,
};
......@@ -347,22 +348,20 @@ static const struct intel_device_info intel_cherryview_info = {
CHV_COLORS,
};
#define SKL_PLATFORM \
BDW_FEATURES, \
.gen = 9, \
.platform = INTEL_SKYLAKE, \
.has_csr = 1, \
.has_guc = 1, \
.ddb_size = 896
static const struct intel_device_info intel_skylake_info = {
BDW_FEATURES,
.platform = INTEL_SKYLAKE,
.gen = 9,
.has_csr = 1,
.has_guc = 1,
.ddb_size = 896,
SKL_PLATFORM,
};
static const struct intel_device_info intel_skylake_gt3_info = {
BDW_FEATURES,
.platform = INTEL_SKYLAKE,
.gen = 9,
.has_csr = 1,
.has_guc = 1,
.ddb_size = 896,
SKL_PLATFORM,
.ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING,
};
......@@ -401,28 +400,52 @@ static const struct intel_device_info intel_broxton_info = {
static const struct intel_device_info intel_geminilake_info = {
GEN9_LP_FEATURES,
.platform = INTEL_GEMINILAKE,
.is_alpha_support = 1,
.ddb_size = 1024,
.color = { .degamma_lut_size = 0, .gamma_lut_size = 1024 }
};
#define KBL_PLATFORM \
BDW_FEATURES, \
.gen = 9, \
.platform = INTEL_KABYLAKE, \
.has_csr = 1, \
.has_guc = 1, \
.ddb_size = 896
static const struct intel_device_info intel_kabylake_info = {
BDW_FEATURES,
.platform = INTEL_KABYLAKE,
.gen = 9,
.has_csr = 1,
.has_guc = 1,
.ddb_size = 896,
KBL_PLATFORM,
};
static const struct intel_device_info intel_kabylake_gt3_info = {
KBL_PLATFORM,
.ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING,
};
#define CFL_PLATFORM \
.is_alpha_support = 1, \
BDW_FEATURES, \
.gen = 9, \
.platform = INTEL_COFFEELAKE, \
.has_csr = 1, \
.has_guc = 1, \
.ddb_size = 896
static const struct intel_device_info intel_coffeelake_info = {
CFL_PLATFORM,
};
static const struct intel_device_info intel_coffeelake_gt3_info = {
CFL_PLATFORM,
.ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING,
};
static const struct intel_device_info intel_cannonlake_info = {
BDW_FEATURES,
.platform = INTEL_KABYLAKE,
.gen = 9,
.is_alpha_support = 1,
.platform = INTEL_CANNONLAKE,
.gen = 10,
.ddb_size = 1024,
.has_csr = 1,
.has_guc = 1,
.ddb_size = 896,
.ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING,
};
/*
......@@ -469,6 +492,10 @@ static const struct pci_device_id pciidlist[] = {
INTEL_KBL_GT2_IDS(&intel_kabylake_info),
INTEL_KBL_GT3_IDS(&intel_kabylake_gt3_info),
INTEL_KBL_GT4_IDS(&intel_kabylake_gt3_info),
INTEL_CFL_S_IDS(&intel_coffeelake_info),
INTEL_CFL_H_IDS(&intel_coffeelake_info),
INTEL_CFL_U_IDS(&intel_coffeelake_gt3_info),
INTEL_CNL_IDS(&intel_cannonlake_info),
{0, 0, 0}
};
MODULE_DEVICE_TABLE(pci, pciidlist);
......
This diff is collapsed.
......@@ -36,10 +36,6 @@
#define VGT_VERSION_MAJOR 1
#define VGT_VERSION_MINOR 0
#define INTEL_VGT_IF_VERSION_ENCODE(major, minor) ((major) << 16 | (minor))
#define INTEL_VGT_IF_VERSION \
INTEL_VGT_IF_VERSION_ENCODE(VGT_VERSION_MAJOR, VGT_VERSION_MINOR)
/*
* notifications from guest to vgpu device model
*/
......@@ -55,8 +51,8 @@ enum vgt_g2v_type {
struct vgt_if {
u64 magic; /* VGT_MAGIC */
uint16_t version_major;
uint16_t version_minor;
u16 version_major;
u16 version_minor;
u32 vgt_id; /* ID of vGT instance */
u32 rsv1[12]; /* pad to offset 0x40 */
/*
......
This diff is collapsed.
......@@ -99,6 +99,11 @@
__T; \
})
#define u64_to_ptr(T, x) ({ \
typecheck(u64, x); \
(T *)(uintptr_t)(x); \
})
#define __mask_next_bit(mask) ({ \
int __idx = ffs(mask) - 1; \
mask &= ~BIT(__idx); \
......
......@@ -60,8 +60,8 @@
*/
void i915_check_vgpu(struct drm_i915_private *dev_priv)
{
uint64_t magic;
uint32_t version;
u64 magic;
u16 version_major;
BUILD_BUG_ON(sizeof(struct vgt_if) != VGT_PVINFO_SIZE);
......@@ -69,10 +69,8 @@ void i915_check_vgpu(struct drm_i915_private *dev_priv)
if (magic != VGT_MAGIC)
return;
version = INTEL_VGT_IF_VERSION_ENCODE(
__raw_i915_read16(dev_priv, vgtif_reg(version_major)),
__raw_i915_read16(dev_priv, vgtif_reg(version_minor)));
if (version != INTEL_VGT_IF_VERSION) {
version_major = __raw_i915_read16(dev_priv, vgtif_reg(version_major));
if (version_major < VGT_VERSION_MAJOR) {
DRM_INFO("VGT interface version mismatch!\n");
return;
}
......@@ -92,6 +90,18 @@ struct _balloon_info_ {
static struct _balloon_info_ bl_info;
static void vgt_deballoon_space(struct i915_ggtt *ggtt,
struct drm_mm_node *node)
{
DRM_DEBUG_DRIVER("deballoon space: range [0x%llx - 0x%llx] %llu KiB.\n",
node->start,
node->start + node->size,
node->size / 1024);
ggtt->base.reserved -= node->size;
drm_mm_remove_node(node);
}
/**
* intel_vgt_deballoon - deballoon reserved graphics address trunks
* @dev_priv: i915 device private data
......@@ -108,12 +118,8 @@ void intel_vgt_deballoon(struct drm_i915_private *dev_priv)
DRM_DEBUG("VGT deballoon.\n");
for (i = 0; i < 4; i++) {
if (bl_info.space[i].allocated)
drm_mm_remove_node(&bl_info.space[i]);
}
memset(&bl_info, 0, sizeof(bl_info));
for (i = 0; i < 4; i++)
vgt_deballoon_space(&dev_priv->ggtt, &bl_info.space[i]);
}
static int vgt_balloon_space(struct i915_ggtt *ggtt,
......@@ -121,15 +127,20 @@ static int vgt_balloon_space(struct i915_ggtt *ggtt,
unsigned long start, unsigned long end)
{
unsigned long size = end - start;
int ret;
if (start >= end)
return -EINVAL;
DRM_INFO("balloon space: range [ 0x%lx - 0x%lx ] %lu KiB.\n",
start, end, size / 1024);
return i915_gem_gtt_reserve(&ggtt->base, node,
size, start, I915_COLOR_UNEVICTABLE,
0);
ret = i915_gem_gtt_reserve(&ggtt->base, node,
size, start, I915_COLOR_UNEVICTABLE,
0);
if (!ret)
ggtt->base.reserved += size;
return ret;
}
/**
......@@ -222,7 +233,7 @@ int intel_vgt_balloon(struct drm_i915_private *dev_priv)
ret = vgt_balloon_space(ggtt, &bl_info.space[3],
unmappable_end, ggtt_end);
if (ret)
goto err;
goto err_upon_mappable;
}
/* Mappable graphic memory ballooning */
......@@ -231,7 +242,7 @@ int intel_vgt_balloon(struct drm_i915_private *dev_priv)
0, mappable_base);
if (ret)
goto err;
goto err_upon_unmappable;
}
if (mappable_end < ggtt->mappable_end) {
......@@ -239,14 +250,19 @@ int intel_vgt_balloon(struct drm_i915_private *dev_priv)
mappable_end, ggtt->mappable_end);
if (ret)
goto err;
goto err_below_mappable;
}
DRM_INFO("VGT balloon successfully\n");
return 0;
err_below_mappable:
vgt_deballoon_space(ggtt, &bl_info.space[0]);
err_upon_unmappable:
vgt_deballoon_space(ggtt, &bl_info.space[3]);
err_upon_mappable:
vgt_deballoon_space(ggtt, &bl_info.space[2]);
err:
DRM_ERROR("VGT balloon fail\n");
intel_vgt_deballoon(dev_priv);
return ret;
}
......@@ -85,12 +85,12 @@ vma_create(struct drm_i915_gem_object *obj,
if (vma == NULL)
return ERR_PTR(-ENOMEM);
INIT_LIST_HEAD(&vma->exec_list);
for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
init_request_active(&vma->last_read[i], i915_vma_retire);
init_request_active(&vma->last_fence, NULL);
vma->vm = vm;
vma->obj = obj;
vma->resv = obj->resv;
vma->size = obj->base.size;
vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
......@@ -464,7 +464,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
size, obj->base.size,
flags & PIN_MAPPABLE ? "mappable" : "total",
end);
return -E2BIG;
return -ENOSPC;
}
ret = i915_gem_object_pin_pages(obj);
......@@ -577,7 +577,7 @@ int __i915_vma_do_pin(struct i915_vma *vma,
return ret;
}
void i915_vma_destroy(struct i915_vma *vma)
static void i915_vma_destroy(struct i915_vma *vma)
{
GEM_BUG_ON(vma->node.allocated);
GEM_BUG_ON(i915_vma_is_active(vma));
......@@ -591,11 +591,33 @@ void i915_vma_destroy(struct i915_vma *vma)
kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma);
}
void i915_vma_unlink_ctx(struct i915_vma *vma)
{
struct i915_gem_context *ctx = vma->ctx;
if (ctx->vma_lut.ht_size & I915_CTX_RESIZE_IN_PROGRESS) {
cancel_work_sync(&ctx->vma_lut.resize);
ctx->vma_lut.ht_size &= ~I915_CTX_RESIZE_IN_PROGRESS;
}
__hlist_del(&vma->ctx_node);
ctx->vma_lut.ht_count--;
if (i915_vma_is_ggtt(vma))
vma->obj->vma_hashed = NULL;
vma->ctx = NULL;
i915_vma_put(vma);
}
void i915_vma_close(struct i915_vma *vma)
{
GEM_BUG_ON(i915_vma_is_closed(vma));
vma->flags |= I915_VMA_CLOSED;
if (vma->ctx)
i915_vma_unlink_ctx(vma);
list_del(&vma->obj_link);
rb_erase(&vma->obj_node, &vma->obj->vma_tree);
......
......@@ -50,6 +50,7 @@ struct i915_vma {
struct drm_i915_gem_object *obj;
struct i915_address_space *vm;
struct drm_i915_fence_reg *fence;
struct reservation_object *resv; /** Alias of obj->resv */
struct sg_table *pages;
void __iomem *iomap;
u64 size;
......@@ -99,16 +100,25 @@ struct i915_vma {
struct list_head obj_link; /* Link in the object's VMA list */
struct rb_node obj_node;
struct hlist_node obj_hash;
/** This vma's place in the batchbuffer or on the eviction list */
struct list_head exec_list;
/** This vma's place in the execbuf reservation list */
struct list_head exec_link;
struct list_head reloc_link;
/** This vma's place in the eviction list */
struct list_head evict_link;
/**
* Used for performing relocations during execbuffer insertion.
*/
struct hlist_node exec_node;
unsigned long exec_handle;
struct drm_i915_gem_exec_object2 *exec_entry;
struct hlist_node exec_node;
u32 exec_handle;
struct i915_gem_context *ctx;
struct hlist_node ctx_node;
u32 ctx_handle;
};
struct i915_vma *
......@@ -232,8 +242,8 @@ bool i915_vma_misplaced(const struct i915_vma *vma,
u64 size, u64 alignment, u64 flags);
void __i915_vma_set_map_and_fenceable(struct i915_vma *vma);
int __must_check i915_vma_unbind(struct i915_vma *vma);
void i915_vma_unlink_ctx(struct i915_vma *vma);
void i915_vma_close(struct i915_vma *vma);
void i915_vma_destroy(struct i915_vma *vma);
int __i915_vma_do_pin(struct i915_vma *vma,
u64 size, u64 alignment, u64 flags);
......
This diff is collapsed.
......@@ -234,7 +234,7 @@ static void enable_fake_irq(struct intel_breadcrumbs *b)
mod_timer(&b->hangcheck, wait_timeout());
}
static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
static bool __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
{
struct intel_engine_cs *engine =
container_of(b, struct intel_engine_cs, breadcrumbs);
......@@ -242,7 +242,7 @@ static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
lockdep_assert_held(&b->irq_lock);
if (b->irq_armed)
return;
return false;
/* The breadcrumb irq will be disarmed on the interrupt after the
* waiters are signaled. This gives us a single interrupt window in
......@@ -260,7 +260,7 @@ static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
* implementation to call intel_engine_wakeup()
* itself when it wants to simulate a user interrupt,
*/
return;
return true;
}
/* Since we are waiting on a request, the GPU should be busy
......@@ -278,6 +278,7 @@ static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
}
enable_fake_irq(b);
return true;
}
static inline struct intel_wait *to_wait(struct rb_node *node)
......@@ -329,7 +330,7 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
{
struct intel_breadcrumbs *b = &engine->breadcrumbs;
struct rb_node **p, *parent, *completed;
bool first;
bool first, armed;
u32 seqno;
/* Insert the request into the retirement ordered list
......@@ -344,6 +345,7 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
* removing stale elements in the tree, we may be able to reduce the
* ping-pong between the old bottom-half and ourselves as first-waiter.
*/
armed = false;
first = true;
parent = NULL;
completed = NULL;
......@@ -399,7 +401,7 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
* in the unlocked read of b->irq_seqno_bh in the irq handler)
* and so we miss the wake up.
*/
__intel_breadcrumbs_enable_irq(b);
armed = __intel_breadcrumbs_enable_irq(b);
spin_unlock(&b->irq_lock);
}
......@@ -426,20 +428,24 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
GEM_BUG_ON(!b->irq_armed);
GEM_BUG_ON(rb_first(&b->waiters) != &b->irq_wait->node);
return first;
return armed;
}
bool intel_engine_add_wait(struct intel_engine_cs *engine,
struct intel_wait *wait)
{
struct intel_breadcrumbs *b = &engine->breadcrumbs;
bool first;
bool armed;
spin_lock_irq(&b->rb_lock);
first = __intel_engine_add_wait(engine, wait);
armed = __intel_engine_add_wait(engine, wait);
spin_unlock_irq(&b->rb_lock);
if (armed)
return armed;
return first;
/* Make the caller recheck if its request has already started. */
return i915_seqno_passed(intel_engine_get_seqno(engine),
wait->seqno - 1);
}
static inline bool chain_wakeup(struct rb_node *rb, int priority)
......@@ -672,8 +678,6 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
{
struct intel_engine_cs *engine = request->engine;
struct intel_breadcrumbs *b = &engine->breadcrumbs;
struct rb_node *parent, **p;
bool first;
u32 seqno;
/* Note that we may be called from an interrupt handler on another
......@@ -708,27 +712,36 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
*/
wakeup &= __intel_engine_add_wait(engine, &request->signaling.wait);
/* Now insert ourselves into the retirement ordered list of signals
* on this engine. We track the oldest seqno as that will be the
* first signal to complete.
*/
parent = NULL;
first = true;
p = &b->signals.rb_node;
while (*p) {
parent = *p;
if (i915_seqno_passed(seqno,
to_signaler(parent)->signaling.wait.seqno)) {
p = &parent->rb_right;
first = false;
} else {
p = &parent->rb_left;
if (!__i915_gem_request_completed(request, seqno)) {
struct rb_node *parent, **p;
bool first;
/* Now insert ourselves into the retirement ordered list of
* signals on this engine. We track the oldest seqno as that
* will be the first signal to complete.
*/
parent = NULL;
first = true;
p = &b->signals.rb_node;
while (*p) {
parent = *p;
if (i915_seqno_passed(seqno,
to_signaler(parent)->signaling.wait.seqno)) {
p = &parent->rb_right;
first = false;
} else {
p = &parent->rb_left;
}
}
rb_link_node(&request->signaling.node, parent, p);
rb_insert_color(&request->signaling.node, &b->signals);
if (first)
rcu_assign_pointer(b->first_signal, request);
} else {
__intel_engine_remove_wait(engine, &request->signaling.wait);
i915_gem_request_put(request);
wakeup = false;
}
rb_link_node(&request->signaling.node, parent, p);
rb_insert_color(&request->signaling.node, &b->signals);
if (first)
rcu_assign_pointer(b->first_signal, request);
spin_unlock(&b->rb_lock);
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -262,7 +262,7 @@ static bool __intel_set_cpu_fifo_underrun_reporting(struct drm_device *dev,
ironlake_set_fifo_underrun_reporting(dev, pipe, enable);
else if (IS_GEN7(dev_priv))
ivybridge_set_fifo_underrun_reporting(dev, pipe, enable, old);
else if (IS_GEN8(dev_priv) || IS_GEN9(dev_priv))
else if (INTEL_GEN(dev_priv) >= 8)
broadwell_set_fifo_underrun_reporting(dev, pipe, enable);
return old;
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment