Commit 5a4c6f1b authored by Chris Wilson's avatar Chris Wilson

drm/i915: The return of i915_gpu_info to debugfs

Once upon a time before we had automated GPU state capture upon hangs,
we had intel_gpu_dump. Now we come almost full circle and reinstate that
view of the current GPU queues and registers by using the error capture
facility to snapshot the GPU state when debugfs/.../i915_gpu_info is
opened - which should provided useful debugging to both the error
capture routines (without having to cause a hang and avoid the error
state being eaten by igt) and generally.

v2: Rename drm_i915_error_state to i915_gpu_state to alleviate some name
collisions between the error state dump and inspecting the gpu state.
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Reviewed-by: default avatarMika Kuoppala <mika.kuoppala@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170214164611.11381-1-chris@chris-wilson.co.uk
parent 65300b1f
......@@ -988,89 +988,93 @@ static int i915_gem_fence_regs_info(struct seq_file *m, void *data)
}
#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
static ssize_t
i915_error_state_write(struct file *filp,
const char __user *ubuf,
size_t cnt,
loff_t *ppos)
static ssize_t gpu_state_read(struct file *file, char __user *ubuf,
size_t count, loff_t *pos)
{
struct i915_error_state_file_priv *error_priv = filp->private_data;
struct i915_gpu_state *error = file->private_data;
struct drm_i915_error_state_buf str;
ssize_t ret;
loff_t tmp;
DRM_DEBUG_DRIVER("Resetting error state\n");
i915_destroy_error_state(error_priv->i915);
return cnt;
}
static int i915_error_state_open(struct inode *inode, struct file *file)
{
struct drm_i915_private *dev_priv = inode->i_private;
struct i915_error_state_file_priv *error_priv;
if (!error)
return 0;
error_priv = kzalloc(sizeof(*error_priv), GFP_KERNEL);
if (!error_priv)
return -ENOMEM;
ret = i915_error_state_buf_init(&str, error->i915, count, *pos);
if (ret)
return ret;
error_priv->i915 = dev_priv;
ret = i915_error_state_to_str(&str, error);
if (ret)
goto out;
i915_error_state_get(&dev_priv->drm, error_priv);
tmp = 0;
ret = simple_read_from_buffer(ubuf, count, &tmp, str.buf, str.bytes);
if (ret < 0)
goto out;
file->private_data = error_priv;
*pos = str.start + ret;
out:
i915_error_state_buf_release(&str);
return ret;
}
static int gpu_state_release(struct inode *inode, struct file *file)
{
i915_gpu_state_put(file->private_data);
return 0;
}
static int i915_error_state_release(struct inode *inode, struct file *file)
static int i915_gpu_info_open(struct inode *inode, struct file *file)
{
struct i915_error_state_file_priv *error_priv = file->private_data;
struct i915_gpu_state *gpu;
i915_error_state_put(error_priv);
kfree(error_priv);
gpu = i915_capture_gpu_state(inode->i_private);
if (!gpu)
return -ENOMEM;
file->private_data = gpu;
return 0;
}
static ssize_t i915_error_state_read(struct file *file, char __user *userbuf,
size_t count, loff_t *pos)
static const struct file_operations i915_gpu_info_fops = {
.owner = THIS_MODULE,
.open = i915_gpu_info_open,
.read = gpu_state_read,
.llseek = default_llseek,
.release = gpu_state_release,
};
static ssize_t
i915_error_state_write(struct file *filp,
const char __user *ubuf,
size_t cnt,
loff_t *ppos)
{
struct i915_error_state_file_priv *error_priv = file->private_data;
struct drm_i915_error_state_buf error_str;
loff_t tmp_pos = 0;
ssize_t ret_count = 0;
int ret;
struct i915_gpu_state *error = filp->private_data;
ret = i915_error_state_buf_init(&error_str, error_priv->i915,
count, *pos);
if (ret)
return ret;
if (!error)
return 0;
ret = i915_error_state_to_str(&error_str, error_priv);
if (ret)
goto out;
DRM_DEBUG_DRIVER("Resetting error state\n");
i915_reset_error_state(error->i915);
ret_count = simple_read_from_buffer(userbuf, count, &tmp_pos,
error_str.buf,
error_str.bytes);
return cnt;
}
if (ret_count < 0)
ret = ret_count;
else
*pos = error_str.start + ret_count;
out:
i915_error_state_buf_release(&error_str);
return ret ?: ret_count;
static int i915_error_state_open(struct inode *inode, struct file *file)
{
file->private_data = i915_first_error_state(inode->i_private);
return 0;
}
static const struct file_operations i915_error_state_fops = {
.owner = THIS_MODULE,
.open = i915_error_state_open,
.read = i915_error_state_read,
.read = gpu_state_read,
.write = i915_error_state_write,
.llseek = default_llseek,
.release = i915_error_state_release,
.release = gpu_state_release,
};
#endif
static int
......@@ -4811,6 +4815,7 @@ static const struct i915_debugfs_files {
{"i915_gem_drop_caches", &i915_drop_caches_fops},
#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
{"i915_error_state", &i915_error_state_fops},
{"i915_gpu_info", &i915_gpu_info_fops},
#endif
{"i915_next_seqno", &i915_next_seqno_fops},
{"i915_display_crc_ctl", &i915_display_crc_ctl_fops},
......
......@@ -1370,7 +1370,7 @@ void i915_driver_unload(struct drm_device *dev)
/* Free error state after interrupts are fully disabled. */
cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
i915_destroy_error_state(dev_priv);
i915_reset_error_state(dev_priv);
/* Flush any outstanding unpin_work. */
drain_workqueue(dev_priv->wq);
......
......@@ -897,7 +897,7 @@ struct intel_device_info {
struct intel_display_error_state;
struct drm_i915_error_state {
struct i915_gpu_state {
struct kref ref;
struct timeval time;
struct timeval boottime;
......@@ -917,7 +917,7 @@ struct drm_i915_error_state {
u32 eir;
u32 pgtbl_er;
u32 ier;
u32 gtier[4];
u32 gtier[4], ngtier;
u32 ccid;
u32 derrmr;
u32 forcewake;
......@@ -931,6 +931,7 @@ struct drm_i915_error_state {
u32 gab_ctl;
u32 gfx_mode;
u32 nfence;
u64 fence[I915_MAX_NUM_FENCES];
struct intel_overlay_error_state *overlay;
struct intel_display_error_state *display;
......@@ -1512,11 +1513,6 @@ struct drm_i915_error_state_buf {
loff_t pos;
};
struct i915_error_state_file_priv {
struct drm_i915_private *i915;
struct drm_i915_error_state *error;
};
#define I915_RESET_TIMEOUT (10 * HZ) /* 10s */
#define I915_FENCE_TIMEOUT (10 * HZ) /* 10s */
......@@ -1533,7 +1529,7 @@ struct i915_gpu_error {
/* For reset and error_state handling. */
spinlock_t lock;
/* Protected by the above dev->gpu_error.lock. */
struct drm_i915_error_state *first_error;
struct i915_gpu_state *first_error;
unsigned long missed_irq_rings;
......@@ -3574,7 +3570,7 @@ static inline void intel_display_crc_init(struct drm_i915_private *dev_priv) {}
__printf(2, 3)
void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
int i915_error_state_to_str(struct drm_i915_error_state_buf *estr,
const struct i915_error_state_file_priv *error);
const struct i915_gpu_state *gpu);
int i915_error_state_buf_init(struct drm_i915_error_state_buf *eb,
struct drm_i915_private *i915,
size_t count, loff_t pos);
......@@ -3583,13 +3579,28 @@ static inline void i915_error_state_buf_release(
{
kfree(eb->buf);
}
struct i915_gpu_state *i915_capture_gpu_state(struct drm_i915_private *i915);
void i915_capture_error_state(struct drm_i915_private *dev_priv,
u32 engine_mask,
const char *error_msg);
void i915_error_state_get(struct drm_device *dev,
struct i915_error_state_file_priv *error_priv);
void i915_error_state_put(struct i915_error_state_file_priv *error_priv);
void i915_destroy_error_state(struct drm_i915_private *dev_priv);
static inline struct i915_gpu_state *
i915_gpu_state_get(struct i915_gpu_state *gpu)
{
kref_get(&gpu->ref);
return gpu;
}
void __i915_gpu_state_free(struct kref *kref);
static inline void i915_gpu_state_put(struct i915_gpu_state *gpu)
{
if (gpu)
kref_put(&gpu->ref, __i915_gpu_state_free);
}
struct i915_gpu_state *i915_first_error_state(struct drm_i915_private *i915);
void i915_reset_error_state(struct drm_i915_private *i915);
#else
......@@ -3599,7 +3610,13 @@ static inline void i915_capture_error_state(struct drm_i915_private *dev_priv,
{
}
static inline void i915_destroy_error_state(struct drm_i915_private *dev_priv)
static inline struct i915_gpu_state *
i915_first_error_state(struct drm_i915_private *i915)
{
return NULL;
}
static inline void i915_reset_error_state(struct drm_i915_private *i915)
{
}
......@@ -3747,7 +3764,6 @@ extern void intel_overlay_print_error_state(struct drm_i915_error_state_buf *e,
extern struct intel_display_error_state *
intel_display_capture_error_state(struct drm_i915_private *dev_priv);
extern void intel_display_print_error_state(struct drm_i915_error_state_buf *e,
struct drm_i915_private *dev_priv,
struct intel_display_error_state *error);
int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val);
......
This diff is collapsed.
......@@ -522,33 +522,27 @@ static ssize_t error_state_read(struct file *filp, struct kobject *kobj,
struct device *kdev = kobj_to_dev(kobj);
struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
struct drm_device *dev = &dev_priv->drm;
struct i915_error_state_file_priv error_priv;
struct drm_i915_error_state_buf error_str;
ssize_t ret_count = 0;
int ret;
memset(&error_priv, 0, sizeof(error_priv));
struct i915_gpu_state *gpu;
ssize_t ret;
ret = i915_error_state_buf_init(&error_str, to_i915(dev), count, off);
ret = i915_error_state_buf_init(&error_str, dev_priv, count, off);
if (ret)
return ret;
error_priv.i915 = dev_priv;
i915_error_state_get(dev, &error_priv);
ret = i915_error_state_to_str(&error_str, &error_priv);
gpu = i915_first_error_state(dev_priv);
ret = i915_error_state_to_str(&error_str, gpu);
if (ret)
goto out;
ret_count = count < error_str.bytes ? count : error_str.bytes;
ret = count < error_str.bytes ? count : error_str.bytes;
memcpy(buf, error_str.buf, ret);
memcpy(buf, error_str.buf, ret_count);
out:
i915_error_state_put(&error_priv);
i915_gpu_state_put(gpu);
i915_error_state_buf_release(&error_str);
return ret ?: ret_count;
return ret;
}
static ssize_t error_state_write(struct file *file, struct kobject *kobj,
......@@ -559,7 +553,7 @@ static ssize_t error_state_write(struct file *file, struct kobject *kobj,
struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
DRM_DEBUG_DRIVER("Resetting error state\n");
i915_destroy_error_state(dev_priv);
i915_reset_error_state(dev_priv);
return count;
}
......
......@@ -15838,9 +15838,9 @@ intel_display_capture_error_state(struct drm_i915_private *dev_priv)
void
intel_display_print_error_state(struct drm_i915_error_state_buf *m,
struct drm_i915_private *dev_priv,
struct intel_display_error_state *error)
{
struct drm_i915_private *dev_priv = m->i915;
int i;
if (!error)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment