Commit f1892138 authored by Chunming Zhou's avatar Chunming Zhou Committed by Alex Deucher

drm/amdgpu: return -ENODEV to user space when vram is lost v2

below ioctl will return -ENODEV:
amdgpu_cs_ioctl
amdgpu_cs_wait_ioctl
amdgpu_cs_wait_fences_ioctl
amdgpu_gem_va_ioctl
amdgpu_info_ioctl

v2: only for map and replace cases in amdgpu_gem_va_ioctl
Signed-off-by: default avatarChunming Zhou <David1.Zhou@amd.com>
Reviewed-by: default avatarChristian König <christian.koenig@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 0c49e0b8
...@@ -824,6 +824,7 @@ struct amdgpu_fpriv { ...@@ -824,6 +824,7 @@ struct amdgpu_fpriv {
struct mutex bo_list_lock; struct mutex bo_list_lock;
struct idr bo_list_handles; struct idr bo_list_handles;
struct amdgpu_ctx_mgr ctx_mgr; struct amdgpu_ctx_mgr ctx_mgr;
u32 vram_lost_counter;
}; };
/* /*
...@@ -1528,6 +1529,7 @@ struct amdgpu_device { ...@@ -1528,6 +1529,7 @@ struct amdgpu_device {
atomic64_t num_bytes_moved; atomic64_t num_bytes_moved;
atomic64_t num_evictions; atomic64_t num_evictions;
atomic_t gpu_reset_counter; atomic_t gpu_reset_counter;
atomic_t vram_lost_counter;
/* data for buffer migration throttling */ /* data for buffer migration throttling */
struct { struct {
...@@ -1914,6 +1916,8 @@ static inline bool amdgpu_has_atpx(void) { return false; } ...@@ -1914,6 +1916,8 @@ static inline bool amdgpu_has_atpx(void) { return false; }
extern const struct drm_ioctl_desc amdgpu_ioctls_kms[]; extern const struct drm_ioctl_desc amdgpu_ioctls_kms[];
extern const int amdgpu_max_kms_ioctl; extern const int amdgpu_max_kms_ioctl;
bool amdgpu_kms_vram_lost(struct amdgpu_device *adev,
struct amdgpu_fpriv *fpriv);
int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags); int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags);
void amdgpu_driver_unload_kms(struct drm_device *dev); void amdgpu_driver_unload_kms(struct drm_device *dev);
void amdgpu_driver_lastclose_kms(struct drm_device *dev); void amdgpu_driver_lastclose_kms(struct drm_device *dev);
......
...@@ -1097,6 +1097,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, ...@@ -1097,6 +1097,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
{ {
struct amdgpu_device *adev = dev->dev_private; struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_fpriv *fpriv = filp->driver_priv;
union drm_amdgpu_cs *cs = data; union drm_amdgpu_cs *cs = data;
struct amdgpu_cs_parser parser = {}; struct amdgpu_cs_parser parser = {};
bool reserved_buffers = false; bool reserved_buffers = false;
...@@ -1104,6 +1105,8 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) ...@@ -1104,6 +1105,8 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
if (!adev->accel_working) if (!adev->accel_working)
return -EBUSY; return -EBUSY;
if (amdgpu_kms_vram_lost(adev, fpriv))
return -ENODEV;
parser.adev = adev; parser.adev = adev;
parser.filp = filp; parser.filp = filp;
...@@ -1165,12 +1168,15 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, ...@@ -1165,12 +1168,15 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
{ {
union drm_amdgpu_wait_cs *wait = data; union drm_amdgpu_wait_cs *wait = data;
struct amdgpu_device *adev = dev->dev_private; struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_fpriv *fpriv = filp->driver_priv;
unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout); unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
struct amdgpu_ring *ring = NULL; struct amdgpu_ring *ring = NULL;
struct amdgpu_ctx *ctx; struct amdgpu_ctx *ctx;
struct dma_fence *fence; struct dma_fence *fence;
long r; long r;
if (amdgpu_kms_vram_lost(adev, fpriv))
return -ENODEV;
r = amdgpu_cs_get_ring(adev, wait->in.ip_type, wait->in.ip_instance, r = amdgpu_cs_get_ring(adev, wait->in.ip_type, wait->in.ip_instance,
wait->in.ring, &ring); wait->in.ring, &ring);
if (r) if (r)
...@@ -1344,12 +1350,15 @@ int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data, ...@@ -1344,12 +1350,15 @@ int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp) struct drm_file *filp)
{ {
struct amdgpu_device *adev = dev->dev_private; struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_fpriv *fpriv = filp->driver_priv;
union drm_amdgpu_wait_fences *wait = data; union drm_amdgpu_wait_fences *wait = data;
uint32_t fence_count = wait->in.fence_count; uint32_t fence_count = wait->in.fence_count;
struct drm_amdgpu_fence *fences_user; struct drm_amdgpu_fence *fences_user;
struct drm_amdgpu_fence *fences; struct drm_amdgpu_fence *fences;
int r; int r;
if (amdgpu_kms_vram_lost(adev, fpriv))
return -ENODEV;
/* Get the fences from userspace */ /* Get the fences from userspace */
fences = kmalloc_array(fence_count, sizeof(struct drm_amdgpu_fence), fences = kmalloc_array(fence_count, sizeof(struct drm_amdgpu_fence),
GFP_KERNEL); GFP_KERNEL);
......
...@@ -2839,8 +2839,10 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev) ...@@ -2839,8 +2839,10 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev)
if (r) if (r)
goto out; goto out;
vram_lost = amdgpu_check_vram_lost(adev); vram_lost = amdgpu_check_vram_lost(adev);
if (vram_lost) if (vram_lost) {
DRM_ERROR("VRAM is lost!\n"); DRM_ERROR("VRAM is lost!\n");
atomic_inc(&adev->vram_lost_counter);
}
r = amdgpu_ttm_recover_gart(adev); r = amdgpu_ttm_recover_gart(adev);
if (r) if (r)
goto out; goto out;
......
...@@ -597,6 +597,11 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, ...@@ -597,6 +597,11 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
args->operation); args->operation);
return -EINVAL; return -EINVAL;
} }
if ((args->operation == AMDGPU_VA_OP_MAP) ||
(args->operation == AMDGPU_VA_OP_REPLACE)) {
if (amdgpu_kms_vram_lost(adev, fpriv))
return -ENODEV;
}
INIT_LIST_HEAD(&list); INIT_LIST_HEAD(&list);
if ((args->operation != AMDGPU_VA_OP_CLEAR) && if ((args->operation != AMDGPU_VA_OP_CLEAR) &&
......
...@@ -235,6 +235,7 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info, ...@@ -235,6 +235,7 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info,
static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
{ {
struct amdgpu_device *adev = dev->dev_private; struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_fpriv *fpriv = filp->driver_priv;
struct drm_amdgpu_info *info = data; struct drm_amdgpu_info *info = data;
struct amdgpu_mode_info *minfo = &adev->mode_info; struct amdgpu_mode_info *minfo = &adev->mode_info;
void __user *out = (void __user *)(uintptr_t)info->return_pointer; void __user *out = (void __user *)(uintptr_t)info->return_pointer;
...@@ -247,6 +248,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file ...@@ -247,6 +248,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
if (!info->return_size || !info->return_pointer) if (!info->return_size || !info->return_pointer)
return -EINVAL; return -EINVAL;
if (amdgpu_kms_vram_lost(adev, fpriv))
return -ENODEV;
switch (info->query) { switch (info->query) {
case AMDGPU_INFO_ACCEL_WORKING: case AMDGPU_INFO_ACCEL_WORKING:
...@@ -747,6 +750,12 @@ void amdgpu_driver_lastclose_kms(struct drm_device *dev) ...@@ -747,6 +750,12 @@ void amdgpu_driver_lastclose_kms(struct drm_device *dev)
vga_switcheroo_process_delayed_switch(); vga_switcheroo_process_delayed_switch();
} }
bool amdgpu_kms_vram_lost(struct amdgpu_device *adev,
struct amdgpu_fpriv *fpriv)
{
return fpriv->vram_lost_counter != atomic_read(&adev->vram_lost_counter);
}
/** /**
* amdgpu_driver_open_kms - drm callback for open * amdgpu_driver_open_kms - drm callback for open
* *
...@@ -799,6 +808,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) ...@@ -799,6 +808,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
amdgpu_ctx_mgr_init(&fpriv->ctx_mgr); amdgpu_ctx_mgr_init(&fpriv->ctx_mgr);
fpriv->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
file_priv->driver_priv = fpriv; file_priv->driver_priv = fpriv;
out_suspend: out_suspend:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment