Commit 01541a87 authored by Dave Airlie's avatar Dave Airlie

Merge tag 'amd-drm-fixes-6.12-2024-10-16' of...

Merge tag 'amd-drm-fixes-6.12-2024-10-16' of https://gitlab.freedesktop.org/agd5f/linux into drm-fixes

amd-drm-fixes-6.12-2024-10-16:

amdgpu:
- SR-IOV fix
- CS chunk handling fix
- MES fixes
- SMU13 fixes

amdkfd:
- VRAM usage reporting fix

radeon:
- Fix possible_clones handling
Signed-off-by: default avatarDave Airlie <airlied@redhat.com>

From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241016200514.3520286-1-alexander.deucher@amd.com
parents 4cd33d97 ec1aab78
...@@ -265,7 +265,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, ...@@ -265,7 +265,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
/* Only a single BO list is allowed to simplify handling. */ /* Only a single BO list is allowed to simplify handling. */
if (p->bo_list) if (p->bo_list)
ret = -EINVAL; goto free_partial_kdata;
ret = amdgpu_cs_p1_bo_handles(p, p->chunks[i].kdata); ret = amdgpu_cs_p1_bo_handles(p, p->chunks[i].kdata);
if (ret) if (ret)
......
...@@ -1635,11 +1635,9 @@ int amdgpu_gfx_sysfs_isolation_shader_init(struct amdgpu_device *adev) ...@@ -1635,11 +1635,9 @@ int amdgpu_gfx_sysfs_isolation_shader_init(struct amdgpu_device *adev)
{ {
int r; int r;
if (!amdgpu_sriov_vf(adev)) { r = device_create_file(adev->dev, &dev_attr_enforce_isolation);
r = device_create_file(adev->dev, &dev_attr_enforce_isolation); if (r)
if (r) return r;
return r;
}
r = device_create_file(adev->dev, &dev_attr_run_cleaner_shader); r = device_create_file(adev->dev, &dev_attr_run_cleaner_shader);
if (r) if (r)
...@@ -1650,8 +1648,7 @@ int amdgpu_gfx_sysfs_isolation_shader_init(struct amdgpu_device *adev) ...@@ -1650,8 +1648,7 @@ int amdgpu_gfx_sysfs_isolation_shader_init(struct amdgpu_device *adev)
void amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device *adev) void amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device *adev)
{ {
if (!amdgpu_sriov_vf(adev)) device_remove_file(adev->dev, &dev_attr_enforce_isolation);
device_remove_file(adev->dev, &dev_attr_enforce_isolation);
device_remove_file(adev->dev, &dev_attr_run_cleaner_shader); device_remove_file(adev->dev, &dev_attr_run_cleaner_shader);
} }
......
...@@ -1203,8 +1203,10 @@ int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id, ...@@ -1203,8 +1203,10 @@ int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
r = amdgpu_ring_init(adev, ring, 1024, NULL, 0, r = amdgpu_ring_init(adev, ring, 1024, NULL, 0,
AMDGPU_RING_PRIO_DEFAULT, NULL); AMDGPU_RING_PRIO_DEFAULT, NULL);
if (r) if (r) {
amdgpu_mes_unlock(&adev->mes);
goto clean_up_memory; goto clean_up_memory;
}
amdgpu_mes_ring_to_queue_props(adev, ring, &qprops); amdgpu_mes_ring_to_queue_props(adev, ring, &qprops);
...@@ -1237,7 +1239,6 @@ int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id, ...@@ -1237,7 +1239,6 @@ int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
amdgpu_ring_fini(ring); amdgpu_ring_fini(ring);
clean_up_memory: clean_up_memory:
kfree(ring); kfree(ring);
amdgpu_mes_unlock(&adev->mes);
return r; return r;
} }
......
...@@ -621,7 +621,7 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes, int pipe) ...@@ -621,7 +621,7 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes, int pipe)
if (amdgpu_mes_log_enable) { if (amdgpu_mes_log_enable) {
mes_set_hw_res_pkt.enable_mes_event_int_logging = 1; mes_set_hw_res_pkt.enable_mes_event_int_logging = 1;
mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = mes->event_log_gpu_addr; mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = mes->event_log_gpu_addr + pipe * AMDGPU_MES_LOG_BUFFER_SIZE;
} }
return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
...@@ -1336,7 +1336,7 @@ static int mes_v12_0_sw_init(void *handle) ...@@ -1336,7 +1336,7 @@ static int mes_v12_0_sw_init(void *handle)
adev->mes.kiq_hw_fini = &mes_v12_0_kiq_hw_fini; adev->mes.kiq_hw_fini = &mes_v12_0_kiq_hw_fini;
adev->mes.enable_legacy_queue_map = true; adev->mes.enable_legacy_queue_map = true;
adev->mes.event_log_size = AMDGPU_MES_LOG_BUFFER_SIZE; adev->mes.event_log_size = adev->enable_uni_mes ? (AMDGPU_MAX_MES_PIPES * AMDGPU_MES_LOG_BUFFER_SIZE) : AMDGPU_MES_LOG_BUFFER_SIZE;
r = amdgpu_mes_init(adev); r = amdgpu_mes_init(adev);
if (r) if (r)
......
...@@ -1148,7 +1148,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, ...@@ -1148,7 +1148,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
if (flags & KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM) if (flags & KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM)
size >>= 1; size >>= 1;
WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + PAGE_ALIGN(size)); atomic64_add(PAGE_ALIGN(size), &pdd->vram_usage);
} }
mutex_unlock(&p->mutex); mutex_unlock(&p->mutex);
...@@ -1219,7 +1219,7 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep, ...@@ -1219,7 +1219,7 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
kfd_process_device_remove_obj_handle( kfd_process_device_remove_obj_handle(
pdd, GET_IDR_HANDLE(args->handle)); pdd, GET_IDR_HANDLE(args->handle));
WRITE_ONCE(pdd->vram_usage, pdd->vram_usage - size); atomic64_sub(size, &pdd->vram_usage);
err_unlock: err_unlock:
err_pdd: err_pdd:
...@@ -2347,7 +2347,7 @@ static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd, ...@@ -2347,7 +2347,7 @@ static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd,
} else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { } else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
bo_bucket->restored_offset = offset; bo_bucket->restored_offset = offset;
/* Update the VRAM usage count */ /* Update the VRAM usage count */
WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + bo_bucket->size); atomic64_add(bo_bucket->size, &pdd->vram_usage);
} }
return 0; return 0;
} }
......
...@@ -775,7 +775,7 @@ struct kfd_process_device { ...@@ -775,7 +775,7 @@ struct kfd_process_device {
enum kfd_pdd_bound bound; enum kfd_pdd_bound bound;
/* VRAM usage */ /* VRAM usage */
uint64_t vram_usage; atomic64_t vram_usage;
struct attribute attr_vram; struct attribute attr_vram;
char vram_filename[MAX_SYSFS_FILENAME_LEN]; char vram_filename[MAX_SYSFS_FILENAME_LEN];
......
...@@ -332,7 +332,7 @@ static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr, ...@@ -332,7 +332,7 @@ static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr,
} else if (strncmp(attr->name, "vram_", 5) == 0) { } else if (strncmp(attr->name, "vram_", 5) == 0) {
struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device, struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device,
attr_vram); attr_vram);
return snprintf(buffer, PAGE_SIZE, "%llu\n", READ_ONCE(pdd->vram_usage)); return snprintf(buffer, PAGE_SIZE, "%llu\n", atomic64_read(&pdd->vram_usage));
} else if (strncmp(attr->name, "sdma_", 5) == 0) { } else if (strncmp(attr->name, "sdma_", 5) == 0) {
struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device, struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device,
attr_sdma); attr_sdma);
...@@ -1625,7 +1625,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev, ...@@ -1625,7 +1625,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev,
pdd->bound = PDD_UNBOUND; pdd->bound = PDD_UNBOUND;
pdd->already_dequeued = false; pdd->already_dequeued = false;
pdd->runtime_inuse = false; pdd->runtime_inuse = false;
pdd->vram_usage = 0; atomic64_set(&pdd->vram_usage, 0);
pdd->sdma_past_activity_counter = 0; pdd->sdma_past_activity_counter = 0;
pdd->user_gpu_id = dev->id; pdd->user_gpu_id = dev->id;
atomic64_set(&pdd->evict_duration_counter, 0); atomic64_set(&pdd->evict_duration_counter, 0);
......
...@@ -405,6 +405,27 @@ static void svm_range_bo_release(struct kref *kref) ...@@ -405,6 +405,27 @@ static void svm_range_bo_release(struct kref *kref)
spin_lock(&svm_bo->list_lock); spin_lock(&svm_bo->list_lock);
} }
spin_unlock(&svm_bo->list_lock); spin_unlock(&svm_bo->list_lock);
if (mmget_not_zero(svm_bo->eviction_fence->mm)) {
struct kfd_process_device *pdd;
struct kfd_process *p;
struct mm_struct *mm;
mm = svm_bo->eviction_fence->mm;
/*
* The forked child process takes svm_bo device pages ref, svm_bo could be
* released after parent process is gone.
*/
p = kfd_lookup_process_by_mm(mm);
if (p) {
pdd = kfd_get_process_device_data(svm_bo->node, p);
if (pdd)
atomic64_sub(amdgpu_bo_size(svm_bo->bo), &pdd->vram_usage);
kfd_unref_process(p);
}
mmput(mm);
}
if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base)) if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base))
/* We're not in the eviction worker. Signal the fence. */ /* We're not in the eviction worker. Signal the fence. */
dma_fence_signal(&svm_bo->eviction_fence->base); dma_fence_signal(&svm_bo->eviction_fence->base);
...@@ -532,6 +553,7 @@ int ...@@ -532,6 +553,7 @@ int
svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange, svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange,
bool clear) bool clear)
{ {
struct kfd_process_device *pdd;
struct amdgpu_bo_param bp; struct amdgpu_bo_param bp;
struct svm_range_bo *svm_bo; struct svm_range_bo *svm_bo;
struct amdgpu_bo_user *ubo; struct amdgpu_bo_user *ubo;
...@@ -623,6 +645,10 @@ svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange, ...@@ -623,6 +645,10 @@ svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange,
list_add(&prange->svm_bo_list, &svm_bo->range_list); list_add(&prange->svm_bo_list, &svm_bo->range_list);
spin_unlock(&svm_bo->list_lock); spin_unlock(&svm_bo->list_lock);
pdd = svm_range_get_pdd_by_node(prange, node);
if (pdd)
atomic64_add(amdgpu_bo_size(bo), &pdd->vram_usage);
return 0; return 0;
reserve_bo_failed: reserve_bo_failed:
......
...@@ -1264,7 +1264,11 @@ static int smu_sw_init(void *handle) ...@@ -1264,7 +1264,11 @@ static int smu_sw_init(void *handle)
smu->workload_prority[PP_SMC_POWER_PROFILE_VR] = 4; smu->workload_prority[PP_SMC_POWER_PROFILE_VR] = 4;
smu->workload_prority[PP_SMC_POWER_PROFILE_COMPUTE] = 5; smu->workload_prority[PP_SMC_POWER_PROFILE_COMPUTE] = 5;
smu->workload_prority[PP_SMC_POWER_PROFILE_CUSTOM] = 6; smu->workload_prority[PP_SMC_POWER_PROFILE_CUSTOM] = 6;
smu->workload_mask = 1 << smu->workload_prority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT];
if (smu->is_apu)
smu->workload_mask = 1 << smu->workload_prority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT];
else
smu->workload_mask = 1 << smu->workload_prority[PP_SMC_POWER_PROFILE_FULLSCREEN3D];
smu->workload_setting[0] = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; smu->workload_setting[0] = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
smu->workload_setting[1] = PP_SMC_POWER_PROFILE_FULLSCREEN3D; smu->workload_setting[1] = PP_SMC_POWER_PROFILE_FULLSCREEN3D;
...@@ -2226,7 +2230,7 @@ static int smu_bump_power_profile_mode(struct smu_context *smu, ...@@ -2226,7 +2230,7 @@ static int smu_bump_power_profile_mode(struct smu_context *smu,
static int smu_adjust_power_state_dynamic(struct smu_context *smu, static int smu_adjust_power_state_dynamic(struct smu_context *smu,
enum amd_dpm_forced_level level, enum amd_dpm_forced_level level,
bool skip_display_settings, bool skip_display_settings,
bool force_update) bool init)
{ {
int ret = 0; int ret = 0;
int index = 0; int index = 0;
...@@ -2255,7 +2259,7 @@ static int smu_adjust_power_state_dynamic(struct smu_context *smu, ...@@ -2255,7 +2259,7 @@ static int smu_adjust_power_state_dynamic(struct smu_context *smu,
} }
} }
if (force_update || smu_dpm_ctx->dpm_level != level) { if (smu_dpm_ctx->dpm_level != level) {
ret = smu_asic_set_performance_level(smu, level); ret = smu_asic_set_performance_level(smu, level);
if (ret) { if (ret) {
dev_err(smu->adev->dev, "Failed to set performance level!"); dev_err(smu->adev->dev, "Failed to set performance level!");
...@@ -2272,7 +2276,7 @@ static int smu_adjust_power_state_dynamic(struct smu_context *smu, ...@@ -2272,7 +2276,7 @@ static int smu_adjust_power_state_dynamic(struct smu_context *smu,
index = index > 0 && index <= WORKLOAD_POLICY_MAX ? index - 1 : 0; index = index > 0 && index <= WORKLOAD_POLICY_MAX ? index - 1 : 0;
workload[0] = smu->workload_setting[index]; workload[0] = smu->workload_setting[index];
if (force_update || smu->power_profile_mode != workload[0]) if (init || smu->power_profile_mode != workload[0])
smu_bump_power_profile_mode(smu, workload, 0); smu_bump_power_profile_mode(smu, workload, 0);
} }
......
...@@ -2555,18 +2555,16 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu, ...@@ -2555,18 +2555,16 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu,
workload_mask = 1 << workload_type; workload_mask = 1 << workload_type;
/* Add optimizations for SMU13.0.0/10. Reuse the power saving profile */ /* Add optimizations for SMU13.0.0/10. Reuse the power saving profile */
if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_COMPUTE) { if ((amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 0) &&
if ((amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 0) && ((smu->adev->pm.fw_version == 0x004e6601) ||
((smu->adev->pm.fw_version == 0x004e6601) || (smu->adev->pm.fw_version >= 0x004e7300))) ||
(smu->adev->pm.fw_version >= 0x004e7300))) || (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 10) &&
(amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 10) && smu->adev->pm.fw_version >= 0x00504500)) {
smu->adev->pm.fw_version >= 0x00504500)) { workload_type = smu_cmn_to_asic_specific_index(smu,
workload_type = smu_cmn_to_asic_specific_index(smu, CMN2ASIC_MAPPING_WORKLOAD,
CMN2ASIC_MAPPING_WORKLOAD, PP_SMC_POWER_PROFILE_POWERSAVING);
PP_SMC_POWER_PROFILE_POWERSAVING); if (workload_type >= 0)
if (workload_type >= 0) workload_mask |= 1 << workload_type;
workload_mask |= 1 << workload_type;
}
} }
ret = smu_cmn_send_smc_msg_with_param(smu, ret = smu_cmn_send_smc_msg_with_param(smu,
......
...@@ -43,7 +43,7 @@ static uint32_t radeon_encoder_clones(struct drm_encoder *encoder) ...@@ -43,7 +43,7 @@ static uint32_t radeon_encoder_clones(struct drm_encoder *encoder)
struct radeon_device *rdev = dev->dev_private; struct radeon_device *rdev = dev->dev_private;
struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
struct drm_encoder *clone_encoder; struct drm_encoder *clone_encoder;
uint32_t index_mask = 0; uint32_t index_mask = drm_encoder_mask(encoder);
int count; int count;
/* DIG routing gets problematic */ /* DIG routing gets problematic */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment