Commit 12976e6a authored by Jonathan Kim's avatar Jonathan Kim Committed by Alex Deucher

drm/amdkfd: add debug device snapshot operation

Similar to queue snapshot, return an array of device information using
an entry_size check and return.
Unlike queue snapshots, the debugger needs to pass to correct number of
devices that exist.  If it fails to do so, the KFD will return the
number of actual devices so that the debugger can make a subsequent
successful call.
Signed-off-by: default avatarJonathan Kim <jonathan.kim@amd.com>
Reviewed-by: default avatarFelix Kuehling <felix.kuehling@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent b17bd5db
...@@ -3060,8 +3060,11 @@ static int kfd_ioctl_set_debug_trap(struct file *filep, struct kfd_process *p, v ...@@ -3060,8 +3060,11 @@ static int kfd_ioctl_set_debug_trap(struct file *filep, struct kfd_process *p, v
&args->queue_snapshot.entry_size); &args->queue_snapshot.entry_size);
break; break;
case KFD_IOC_DBG_TRAP_GET_DEVICE_SNAPSHOT: case KFD_IOC_DBG_TRAP_GET_DEVICE_SNAPSHOT:
pr_warn("Debug op %i not supported yet\n", args->op); r = kfd_dbg_trap_device_snapshot(target,
r = -EACCES; args->device_snapshot.exception_mask,
(void __user *)args->device_snapshot.snapshot_buf_ptr,
&args->device_snapshot.num_devices,
&args->device_snapshot.entry_size);
break; break;
default: default:
pr_err("Invalid option: %i\n", args->op); pr_err("Invalid option: %i\n", args->op);
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include "kfd_debug.h" #include "kfd_debug.h"
#include "kfd_device_queue_manager.h" #include "kfd_device_queue_manager.h"
#include "kfd_topology.h"
#include <linux/file.h> #include <linux/file.h>
#include <uapi/linux/kfd_ioctl.h> #include <uapi/linux/kfd_ioctl.h>
...@@ -1010,6 +1011,78 @@ int kfd_dbg_trap_query_exception_info(struct kfd_process *target, ...@@ -1010,6 +1011,78 @@ int kfd_dbg_trap_query_exception_info(struct kfd_process *target,
return r; return r;
} }
int kfd_dbg_trap_device_snapshot(struct kfd_process *target,
uint64_t exception_clear_mask,
void __user *user_info,
uint32_t *number_of_device_infos,
uint32_t *entry_size)
{
struct kfd_dbg_device_info_entry device_info;
uint32_t tmp_entry_size = *entry_size, tmp_num_devices;
int i, r = 0;
if (!(target && user_info && number_of_device_infos && entry_size))
return -EINVAL;
tmp_num_devices = min_t(size_t, *number_of_device_infos, target->n_pdds);
*number_of_device_infos = target->n_pdds;
*entry_size = min_t(size_t, *entry_size, sizeof(device_info));
if (!tmp_num_devices)
return 0;
memset(&device_info, 0, sizeof(device_info));
mutex_lock(&target->event_mutex);
/* Run over all pdd of the process */
for (i = 0; i < tmp_num_devices; i++) {
struct kfd_process_device *pdd = target->pdds[i];
struct kfd_topology_device *topo_dev = kfd_topology_device_by_id(pdd->dev->id);
device_info.gpu_id = pdd->dev->id;
device_info.exception_status = pdd->exception_status;
device_info.lds_base = pdd->lds_base;
device_info.lds_limit = pdd->lds_limit;
device_info.scratch_base = pdd->scratch_base;
device_info.scratch_limit = pdd->scratch_limit;
device_info.gpuvm_base = pdd->gpuvm_base;
device_info.gpuvm_limit = pdd->gpuvm_limit;
device_info.location_id = topo_dev->node_props.location_id;
device_info.vendor_id = topo_dev->node_props.vendor_id;
device_info.device_id = topo_dev->node_props.device_id;
device_info.revision_id = pdd->dev->adev->pdev->revision;
device_info.subsystem_vendor_id = pdd->dev->adev->pdev->subsystem_vendor;
device_info.subsystem_device_id = pdd->dev->adev->pdev->subsystem_device;
device_info.fw_version = pdd->dev->kfd->mec_fw_version;
device_info.gfx_target_version =
topo_dev->node_props.gfx_target_version;
device_info.simd_count = topo_dev->node_props.simd_count;
device_info.max_waves_per_simd =
topo_dev->node_props.max_waves_per_simd;
device_info.array_count = topo_dev->node_props.array_count;
device_info.simd_arrays_per_engine =
topo_dev->node_props.simd_arrays_per_engine;
device_info.num_xcc = NUM_XCC(pdd->dev->xcc_mask);
device_info.capability = topo_dev->node_props.capability;
device_info.debug_prop = topo_dev->node_props.debug_prop;
if (exception_clear_mask)
pdd->exception_status &= ~exception_clear_mask;
if (copy_to_user(user_info, &device_info, *entry_size)) {
r = -EFAULT;
break;
}
user_info += tmp_entry_size;
}
mutex_unlock(&target->event_mutex);
return r;
}
void kfd_dbg_set_enabled_debug_exception_mask(struct kfd_process *target, void kfd_dbg_set_enabled_debug_exception_mask(struct kfd_process *target,
uint64_t exception_set_mask) uint64_t exception_set_mask)
{ {
......
...@@ -81,6 +81,11 @@ static inline bool kfd_dbg_is_per_vmid_supported(struct kfd_node *dev) ...@@ -81,6 +81,11 @@ static inline bool kfd_dbg_is_per_vmid_supported(struct kfd_node *dev)
} }
void debug_event_write_work_handler(struct work_struct *work); void debug_event_write_work_handler(struct work_struct *work);
int kfd_dbg_trap_device_snapshot(struct kfd_process *target,
uint64_t exception_clear_mask,
void __user *user_info,
uint32_t *number_of_device_infos,
uint32_t *entry_size);
void kfd_dbg_set_enabled_debug_exception_mask(struct kfd_process *target, void kfd_dbg_set_enabled_debug_exception_mask(struct kfd_process *target,
uint64_t exception_set_mask); uint64_t exception_set_mask);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment