Commit 3da3c5c1 authored by Chris Wilson's avatar Chris Wilson Committed by Rodrigo Vivi

drm/i915: Exclude low pages (128KiB) of stolen from use

The GPU is trashing the low pages of its reserved memory upon reset. If
we are using this memory for ringbuffers, then we will dutiful resubmit
the trashed rings after the reset causing further resets, and worse. We
must exclude this range from our own use. The value of 128KiB was found
by empirical measurement (and verified now with a selftest) on gen9.
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Cc: stable@vger.kernel.org
Reviewed-by: default avatarMika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20201019165005.18128-2-chris@chris-wilson.co.uk
(cherry picked from commit d3606757)
Signed-off-by: default avatarRodrigo Vivi <rodrigo.vivi@intel.com>
parent b8cff311
...@@ -153,6 +153,7 @@ config DRM_I915_SELFTEST ...@@ -153,6 +153,7 @@ config DRM_I915_SELFTEST
select DRM_EXPORT_FOR_TESTS if m select DRM_EXPORT_FOR_TESTS if m
select FAULT_INJECTION select FAULT_INJECTION
select PRIME_NUMBERS select PRIME_NUMBERS
select CRC32
help help
Choose this option to allow the driver to perform selftests upon Choose this option to allow the driver to perform selftests upon
loading; also requires the i915.selftest=1 module parameter. To loading; also requires the i915.selftest=1 module parameter. To
......
...@@ -53,8 +53,10 @@ int i915_gem_stolen_insert_node(struct drm_i915_private *i915, ...@@ -53,8 +53,10 @@ int i915_gem_stolen_insert_node(struct drm_i915_private *i915,
struct drm_mm_node *node, u64 size, struct drm_mm_node *node, u64 size,
unsigned alignment) unsigned alignment)
{ {
return i915_gem_stolen_insert_node_in_range(i915, node, size, return i915_gem_stolen_insert_node_in_range(i915, node,
alignment, 0, U64_MAX); size, alignment,
I915_GEM_STOLEN_BIAS,
U64_MAX);
} }
void i915_gem_stolen_remove_node(struct drm_i915_private *i915, void i915_gem_stolen_remove_node(struct drm_i915_private *i915,
......
...@@ -30,4 +30,6 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv ...@@ -30,4 +30,6 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv
resource_size_t stolen_offset, resource_size_t stolen_offset,
resource_size_t size); resource_size_t size);
#define I915_GEM_STOLEN_BIAS SZ_128K
#endif /* __I915_GEM_STOLEN_H__ */ #endif /* __I915_GEM_STOLEN_H__ */
...@@ -3,9 +3,203 @@ ...@@ -3,9 +3,203 @@
* Copyright © 2018 Intel Corporation * Copyright © 2018 Intel Corporation
*/ */
#include <linux/crc32.h>
#include "gem/i915_gem_stolen.h"
#include "i915_memcpy.h"
#include "i915_selftest.h" #include "i915_selftest.h"
#include "selftests/igt_reset.h" #include "selftests/igt_reset.h"
#include "selftests/igt_atomic.h" #include "selftests/igt_atomic.h"
#include "selftests/igt_spinner.h"
static int
__igt_reset_stolen(struct intel_gt *gt,
intel_engine_mask_t mask,
const char *msg)
{
struct i915_ggtt *ggtt = &gt->i915->ggtt;
const struct resource *dsm = &gt->i915->dsm;
resource_size_t num_pages, page;
struct intel_engine_cs *engine;
intel_wakeref_t wakeref;
enum intel_engine_id id;
struct igt_spinner spin;
long max, count;
void *tmp;
u32 *crc;
int err;
if (!drm_mm_node_allocated(&ggtt->error_capture))
return 0;
num_pages = resource_size(dsm) >> PAGE_SHIFT;
if (!num_pages)
return 0;
crc = kmalloc_array(num_pages, sizeof(u32), GFP_KERNEL);
if (!crc)
return -ENOMEM;
tmp = kmalloc(PAGE_SIZE, GFP_KERNEL);
if (!tmp) {
err = -ENOMEM;
goto err_crc;
}
igt_global_reset_lock(gt);
wakeref = intel_runtime_pm_get(gt->uncore->rpm);
err = igt_spinner_init(&spin, gt);
if (err)
goto err_lock;
for_each_engine(engine, gt, id) {
struct intel_context *ce;
struct i915_request *rq;
if (!(mask & engine->mask))
continue;
if (!intel_engine_can_store_dword(engine))
continue;
ce = intel_context_create(engine);
if (IS_ERR(ce)) {
err = PTR_ERR(ce);
goto err_spin;
}
rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
intel_context_put(ce);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto err_spin;
}
i915_request_add(rq);
}
for (page = 0; page < num_pages; page++) {
dma_addr_t dma = (dma_addr_t)dsm->start + (page << PAGE_SHIFT);
void __iomem *s;
void *in;
ggtt->vm.insert_page(&ggtt->vm, dma,
ggtt->error_capture.start,
I915_CACHE_NONE, 0);
mb();
s = io_mapping_map_wc(&ggtt->iomap,
ggtt->error_capture.start,
PAGE_SIZE);
if (!__drm_mm_interval_first(&gt->i915->mm.stolen,
page << PAGE_SHIFT,
((page + 1) << PAGE_SHIFT) - 1))
memset32(s, STACK_MAGIC, PAGE_SIZE / sizeof(u32));
in = s;
if (i915_memcpy_from_wc(tmp, s, PAGE_SIZE))
in = tmp;
crc[page] = crc32_le(0, in, PAGE_SIZE);
io_mapping_unmap(s);
}
mb();
ggtt->vm.clear_range(&ggtt->vm, ggtt->error_capture.start, PAGE_SIZE);
if (mask == ALL_ENGINES) {
intel_gt_reset(gt, mask, NULL);
} else {
for_each_engine(engine, gt, id) {
if (mask & engine->mask)
intel_engine_reset(engine, NULL);
}
}
max = -1;
count = 0;
for (page = 0; page < num_pages; page++) {
dma_addr_t dma = (dma_addr_t)dsm->start + (page << PAGE_SHIFT);
void __iomem *s;
void *in;
u32 x;
ggtt->vm.insert_page(&ggtt->vm, dma,
ggtt->error_capture.start,
I915_CACHE_NONE, 0);
mb();
s = io_mapping_map_wc(&ggtt->iomap,
ggtt->error_capture.start,
PAGE_SIZE);
in = s;
if (i915_memcpy_from_wc(tmp, s, PAGE_SIZE))
in = tmp;
x = crc32_le(0, in, PAGE_SIZE);
if (x != crc[page] &&
!__drm_mm_interval_first(&gt->i915->mm.stolen,
page << PAGE_SHIFT,
((page + 1) << PAGE_SHIFT) - 1)) {
pr_debug("unused stolen page %pa modified by GPU reset\n",
&page);
if (count++ == 0)
igt_hexdump(in, PAGE_SIZE);
max = page;
}
io_mapping_unmap(s);
}
mb();
ggtt->vm.clear_range(&ggtt->vm, ggtt->error_capture.start, PAGE_SIZE);
if (count > 0) {
pr_info("%s reset clobbered %ld pages of stolen, last clobber at page %ld\n",
msg, count, max);
}
if (max >= I915_GEM_STOLEN_BIAS >> PAGE_SHIFT) {
pr_err("%s reset clobbered unreserved area [above %x] of stolen; may cause severe faults\n",
msg, I915_GEM_STOLEN_BIAS);
err = -EINVAL;
}
err_spin:
igt_spinner_fini(&spin);
err_lock:
intel_runtime_pm_put(gt->uncore->rpm, wakeref);
igt_global_reset_unlock(gt);
kfree(tmp);
err_crc:
kfree(crc);
return err;
}
static int igt_reset_device_stolen(void *arg)
{
return __igt_reset_stolen(arg, ALL_ENGINES, "device");
}
static int igt_reset_engines_stolen(void *arg)
{
struct intel_gt *gt = arg;
struct intel_engine_cs *engine;
enum intel_engine_id id;
int err;
if (!intel_has_reset_engine(gt))
return 0;
for_each_engine(engine, gt, id) {
err = __igt_reset_stolen(gt, engine->mask, engine->name);
if (err)
return err;
}
return 0;
}
static int igt_global_reset(void *arg) static int igt_global_reset(void *arg)
{ {
...@@ -164,6 +358,8 @@ int intel_reset_live_selftests(struct drm_i915_private *i915) ...@@ -164,6 +358,8 @@ int intel_reset_live_selftests(struct drm_i915_private *i915)
{ {
static const struct i915_subtest tests[] = { static const struct i915_subtest tests[] = {
SUBTEST(igt_global_reset), /* attempt to recover GPU first */ SUBTEST(igt_global_reset), /* attempt to recover GPU first */
SUBTEST(igt_reset_device_stolen),
SUBTEST(igt_reset_engines_stolen),
SUBTEST(igt_wedged_reset), SUBTEST(igt_wedged_reset),
SUBTEST(igt_atomic_reset), SUBTEST(igt_atomic_reset),
SUBTEST(igt_atomic_engine_reset), SUBTEST(igt_atomic_engine_reset),
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment