Commit 6ca9a2be authored by Chris Wilson's avatar Chris Wilson

drm/i915: Unwind i915_gem_init() failure

Since Michal introduced new user controllable errors other than -EIO
during i915_gem_init(), we need to actually unwind on the error path as
we have to abort the module load (and we expect to do so cleanly!).

As we now teardown key state and then mark the driver as wedged (on
EIO), we have to be careful to not allow ourselves to resume and
unwedge, thus attempting to use the uninitialised driver.

v2: Try not to free driver state for the suppressed EIO
v3: Use load-fault-injection to test both error/recovery paths.

References: 8620eb1d ("drm/i915/uc: Don't use -EIO to report missing firmware")
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Sagar Arun Kamble <sagar.a.kamble@intel.com>
Reviewed-by: default avatarMichał Winiarski <michal.winiarski@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20171213134347.4608-1-chris@chris-wilson.co.uk
parent 31c70f97
...@@ -4865,7 +4865,8 @@ void i915_gem_resume(struct drm_i915_private *i915) ...@@ -4865,7 +4865,8 @@ void i915_gem_resume(struct drm_i915_private *i915)
i915_gem_restore_gtt_mappings(i915); i915_gem_restore_gtt_mappings(i915);
i915_gem_restore_fences(i915); i915_gem_restore_fences(i915);
/* As we didn't flush the kernel context before suspend, we cannot /*
* As we didn't flush the kernel context before suspend, we cannot
* guarantee that the context image is complete. So let's just reset * guarantee that the context image is complete. So let's just reset
* it and start again. * it and start again.
*/ */
...@@ -4886,8 +4887,10 @@ void i915_gem_resume(struct drm_i915_private *i915) ...@@ -4886,8 +4887,10 @@ void i915_gem_resume(struct drm_i915_private *i915)
return; return;
err_wedged: err_wedged:
DRM_ERROR("failed to re-initialize GPU, declaring wedged!\n"); if (!i915_terminally_wedged(&i915->gpu_error)) {
i915_gem_set_wedged(i915); DRM_ERROR("failed to re-initialize GPU, declaring wedged!\n");
i915_gem_set_wedged(i915);
}
goto out_unlock; goto out_unlock;
} }
...@@ -5170,22 +5173,28 @@ int i915_gem_init(struct drm_i915_private *dev_priv) ...@@ -5170,22 +5173,28 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
ret = i915_gem_init_ggtt(dev_priv); ret = i915_gem_init_ggtt(dev_priv);
if (ret) if (ret) {
goto out_unlock; GEM_BUG_ON(ret == -EIO);
goto err_unlock;
}
ret = i915_gem_contexts_init(dev_priv); ret = i915_gem_contexts_init(dev_priv);
if (ret) if (ret) {
goto out_unlock; GEM_BUG_ON(ret == -EIO);
goto err_ggtt;
}
ret = intel_engines_init(dev_priv); ret = intel_engines_init(dev_priv);
if (ret) if (ret) {
goto out_unlock; GEM_BUG_ON(ret == -EIO);
goto err_context;
}
intel_init_gt_powersave(dev_priv); intel_init_gt_powersave(dev_priv);
ret = i915_gem_init_hw(dev_priv); ret = i915_gem_init_hw(dev_priv);
if (ret) if (ret)
goto out_unlock; goto err_pm;
/* /*
* Despite its name intel_init_clock_gating applies both display * Despite its name intel_init_clock_gating applies both display
...@@ -5199,9 +5208,53 @@ int i915_gem_init(struct drm_i915_private *dev_priv) ...@@ -5199,9 +5208,53 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
intel_init_clock_gating(dev_priv); intel_init_clock_gating(dev_priv);
ret = __intel_engines_record_defaults(dev_priv); ret = __intel_engines_record_defaults(dev_priv);
out_unlock: if (ret)
goto err_init_hw;
if (i915_inject_load_failure()) {
ret = -ENODEV;
goto err_init_hw;
}
if (i915_inject_load_failure()) {
ret = -EIO;
goto err_init_hw;
}
intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
mutex_unlock(&dev_priv->drm.struct_mutex);
return 0;
/*
* Unwinding is complicated by that we want to handle -EIO to mean
* disable GPU submission but keep KMS alive. We want to mark the
* HW as irrevisibly wedged, but keep enough state around that the
* driver doesn't explode during runtime.
*/
err_init_hw:
i915_gem_wait_for_idle(dev_priv, I915_WAIT_LOCKED);
i915_gem_contexts_lost(dev_priv);
intel_uc_fini_hw(dev_priv);
err_pm:
if (ret != -EIO) {
intel_cleanup_gt_powersave(dev_priv);
i915_gem_cleanup_engines(dev_priv);
}
err_context:
if (ret != -EIO)
i915_gem_contexts_fini(dev_priv);
err_ggtt:
err_unlock:
intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
mutex_unlock(&dev_priv->drm.struct_mutex);
if (ret != -EIO)
i915_gem_cleanup_userptr(dev_priv);
if (ret == -EIO) { if (ret == -EIO) {
/* Allow engine initialisation to fail by marking the GPU as /*
* Allow engine initialisation to fail by marking the GPU as
* wedged. But we only want to do this where the GPU is angry, * wedged. But we only want to do this where the GPU is angry,
* for all other failure, such as an allocation failure, bail. * for all other failure, such as an allocation failure, bail.
*/ */
...@@ -5211,9 +5264,8 @@ int i915_gem_init(struct drm_i915_private *dev_priv) ...@@ -5211,9 +5264,8 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
} }
ret = 0; ret = 0;
} }
intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
mutex_unlock(&dev_priv->drm.struct_mutex);
i915_gem_drain_freed_objects(dev_priv);
return ret; return ret;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment