Commit 7a41ed8b authored by Alex Deucher's avatar Alex Deucher

drm/amdgpu: add new INFO ioctl query for the last GPU page fault

Add a interface to query the last GPU page fault for the process.
Useful for debugging context lost errors.

v2: split vmhub representation between kernel and userspace
v3: add locking when fetching fault info in INFO IOCTL

Mesa MR: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23238
libdrm MR: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23238

Cc: samuel.pitoiset@gmail.com
Reviewed-by: default avatarChristian König <christian.koenig@amd.com>
Acked-by: default avatarGuchun Chen <guchun.chen@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 9cff0879
...@@ -113,9 +113,10 @@ ...@@ -113,9 +113,10 @@
* gl1c_cache_size, gl2c_cache_size, mall_size, enabled_rb_pipes_mask_hi * gl1c_cache_size, gl2c_cache_size, mall_size, enabled_rb_pipes_mask_hi
* 3.53.0 - Support for GFX11 CP GFX shadowing * 3.53.0 - Support for GFX11 CP GFX shadowing
* 3.54.0 - Add AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS support * 3.54.0 - Add AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS support
* - 3.55.0 - Add AMDGPU_INFO_GPUVM_FAULT query
*/ */
#define KMS_DRIVER_MAJOR 3 #define KMS_DRIVER_MAJOR 3
#define KMS_DRIVER_MINOR 54 #define KMS_DRIVER_MINOR 55
#define KMS_DRIVER_PATCHLEVEL 0 #define KMS_DRIVER_PATCHLEVEL 0
/* /*
......
...@@ -1224,6 +1224,26 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) ...@@ -1224,6 +1224,26 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
return copy_to_user(out, max_ibs, return copy_to_user(out, max_ibs,
min((size_t)size, sizeof(max_ibs))) ? -EFAULT : 0; min((size_t)size, sizeof(max_ibs))) ? -EFAULT : 0;
} }
case AMDGPU_INFO_GPUVM_FAULT: {
struct amdgpu_fpriv *fpriv = filp->driver_priv;
struct amdgpu_vm *vm = &fpriv->vm;
struct drm_amdgpu_info_gpuvm_fault gpuvm_fault;
unsigned long flags;
if (!vm)
return -EINVAL;
memset(&gpuvm_fault, 0, sizeof(gpuvm_fault));
xa_lock_irqsave(&adev->vm_manager.pasids, flags);
gpuvm_fault.addr = vm->fault_info.addr;
gpuvm_fault.status = vm->fault_info.status;
gpuvm_fault.vmhub = vm->fault_info.vmhub;
xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
return copy_to_user(out, &gpuvm_fault,
min((size_t)size, sizeof(gpuvm_fault))) ? -EFAULT : 0;
}
default: default:
DRM_DEBUG_KMS("Invalid request %d\n", info->query); DRM_DEBUG_KMS("Invalid request %d\n", info->query);
return -EINVAL; return -EINVAL;
......
...@@ -2756,7 +2756,21 @@ void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev, ...@@ -2756,7 +2756,21 @@ void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev,
if (vm) { if (vm) {
vm->fault_info.addr = addr; vm->fault_info.addr = addr;
vm->fault_info.status = status; vm->fault_info.status = status;
vm->fault_info.vmhub = vmhub; if (AMDGPU_IS_GFXHUB(vmhub)) {
vm->fault_info.vmhub = AMDGPU_VMHUB_TYPE_GFX;
vm->fault_info.vmhub |=
(vmhub - AMDGPU_GFXHUB_START) << AMDGPU_VMHUB_IDX_SHIFT;
} else if (AMDGPU_IS_MMHUB0(vmhub)) {
vm->fault_info.vmhub = AMDGPU_VMHUB_TYPE_MM0;
vm->fault_info.vmhub |=
(vmhub - AMDGPU_MMHUB0_START) << AMDGPU_VMHUB_IDX_SHIFT;
} else if (AMDGPU_IS_MMHUB1(vmhub)) {
vm->fault_info.vmhub = AMDGPU_VMHUB_TYPE_MM1;
vm->fault_info.vmhub |=
(vmhub - AMDGPU_MMHUB1_START) << AMDGPU_VMHUB_IDX_SHIFT;
} else {
WARN_ONCE(1, "Invalid vmhub %u\n", vmhub);
}
} }
xa_unlock_irqrestore(&adev->vm_manager.pasids, flags); xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
} }
......
...@@ -124,9 +124,16 @@ struct amdgpu_mem_stats; ...@@ -124,9 +124,16 @@ struct amdgpu_mem_stats;
* layout: max 8 GFXHUB + 4 MMHUB0 + 1 MMHUB1 * layout: max 8 GFXHUB + 4 MMHUB0 + 1 MMHUB1
*/ */
#define AMDGPU_MAX_VMHUBS 13 #define AMDGPU_MAX_VMHUBS 13
#define AMDGPU_GFXHUB(x) (x) #define AMDGPU_GFXHUB_START 0
#define AMDGPU_MMHUB0(x) (8 + x) #define AMDGPU_MMHUB0_START 8
#define AMDGPU_MMHUB1(x) (8 + 4 + x) #define AMDGPU_MMHUB1_START 12
#define AMDGPU_GFXHUB(x) (AMDGPU_GFXHUB_START + (x))
#define AMDGPU_MMHUB0(x) (AMDGPU_MMHUB0_START + (x))
#define AMDGPU_MMHUB1(x) (AMDGPU_MMHUB1_START + (x))
#define AMDGPU_IS_GFXHUB(x) ((x) >= AMDGPU_GFXHUB_START && (x) < AMDGPU_MMHUB0_START)
#define AMDGPU_IS_MMHUB0(x) ((x) >= AMDGPU_MMHUB0_START && (x) < AMDGPU_MMHUB1_START)
#define AMDGPU_IS_MMHUB1(x) ((x) >= AMDGPU_MMHUB1_START && (x) < AMDGPU_MAX_VMHUBS)
/* Reserve 2MB at top/bottom of address space for kernel use */ /* Reserve 2MB at top/bottom of address space for kernel use */
#define AMDGPU_VA_RESERVED_SIZE (2ULL << 20) #define AMDGPU_VA_RESERVED_SIZE (2ULL << 20)
......
...@@ -906,6 +906,8 @@ struct drm_amdgpu_cs_chunk_cp_gfx_shadow { ...@@ -906,6 +906,8 @@ struct drm_amdgpu_cs_chunk_cp_gfx_shadow {
#define AMDGPU_INFO_VIDEO_CAPS_ENCODE 1 #define AMDGPU_INFO_VIDEO_CAPS_ENCODE 1
/* Query the max number of IBs per gang per submission */ /* Query the max number of IBs per gang per submission */
#define AMDGPU_INFO_MAX_IBS 0x22 #define AMDGPU_INFO_MAX_IBS 0x22
/* query last page fault info */
#define AMDGPU_INFO_GPUVM_FAULT 0x23
#define AMDGPU_INFO_MMR_SE_INDEX_SHIFT 0 #define AMDGPU_INFO_MMR_SE_INDEX_SHIFT 0
#define AMDGPU_INFO_MMR_SE_INDEX_MASK 0xff #define AMDGPU_INFO_MMR_SE_INDEX_MASK 0xff
...@@ -1231,6 +1233,20 @@ struct drm_amdgpu_info_video_caps { ...@@ -1231,6 +1233,20 @@ struct drm_amdgpu_info_video_caps {
struct drm_amdgpu_info_video_codec_info codec_info[AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_COUNT]; struct drm_amdgpu_info_video_codec_info codec_info[AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_COUNT];
}; };
#define AMDGPU_VMHUB_TYPE_MASK 0xff
#define AMDGPU_VMHUB_TYPE_SHIFT 0
#define AMDGPU_VMHUB_TYPE_GFX 0
#define AMDGPU_VMHUB_TYPE_MM0 1
#define AMDGPU_VMHUB_TYPE_MM1 2
#define AMDGPU_VMHUB_IDX_MASK 0xff00
#define AMDGPU_VMHUB_IDX_SHIFT 8
struct drm_amdgpu_info_gpuvm_fault {
__u64 addr;
__u32 status;
__u32 vmhub;
};
/* /*
* Supported GPU families * Supported GPU families
*/ */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment