Commit 7cee6a68 authored by Jonathan Kim's avatar Jonathan Kim Committed by Alex Deucher

drm/amdgpu: add configurable grace period for unmap queues

The HWS schedule allows a grace period for wave completion prior to
preemption for better performance by avoiding CWSR on waves that can
potentially complete quickly. The debugger, on the other hand, will
want to inspect wave status immediately after it actively triggers
preemption (a suspend function to be provided).

To minimize latency between preemption and debugger wave inspection, allow
immediate preemption by setting the grace period to 0.

Note that setting the preepmtion grace period to 0 will result in an
infinite grace period being set due to a CP FW bug so set it to 1 for now.
Signed-off-by: default avatarJonathan Kim <jonathan.kim@amd.com>
Reviewed-by: default avatarFelix Kuehling <felix.kuehling@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 33f3437a
...@@ -82,5 +82,7 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = { ...@@ -82,5 +82,7 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = {
.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy, .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
.enable_debug_trap = kgd_aldebaran_enable_debug_trap, .enable_debug_trap = kgd_aldebaran_enable_debug_trap,
.disable_debug_trap = kgd_aldebaran_disable_debug_trap, .disable_debug_trap = kgd_aldebaran_disable_debug_trap,
.get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
.build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info,
.program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings, .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings,
}; };
...@@ -410,6 +410,8 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = { ...@@ -410,6 +410,8 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = {
kgd_gfx_v9_set_vm_context_page_table_base, kgd_gfx_v9_set_vm_context_page_table_base,
.enable_debug_trap = kgd_arcturus_enable_debug_trap, .enable_debug_trap = kgd_arcturus_enable_debug_trap,
.disable_debug_trap = kgd_arcturus_disable_debug_trap, .disable_debug_trap = kgd_arcturus_disable_debug_trap,
.get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
.build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info,
.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy, .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
.program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings
}; };
...@@ -803,6 +803,47 @@ uint32_t kgd_gfx_v10_disable_debug_trap(struct amdgpu_device *adev, ...@@ -803,6 +803,47 @@ uint32_t kgd_gfx_v10_disable_debug_trap(struct amdgpu_device *adev,
return 0; return 0;
} }
/* kgd_gfx_v10_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2 values
* The values read are:
* ib_offload_wait_time -- Wait Count for Indirect Buffer Offloads.
* atomic_offload_wait_time -- Wait Count for L2 and GDS Atomics Offloads.
* wrm_offload_wait_time -- Wait Count for WAIT_REG_MEM Offloads.
* gws_wait_time -- Wait Count for Global Wave Syncs.
* que_sleep_wait_time -- Wait Count for Dequeue Retry.
* sch_wave_wait_time -- Wait Count for Scheduling Wave Message.
* sem_rearm_wait_time -- Wait Count for Semaphore re-arm.
* deq_retry_wait_time -- Wait Count for Global Wave Syncs.
*/
void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev,
uint32_t *wait_times)
{
*wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2));
}
void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev,
uint32_t wait_times,
uint32_t grace_period,
uint32_t *reg_offset,
uint32_t *reg_data)
{
*reg_data = wait_times;
/*
* The CP cannont handle a 0 grace period input and will result in
* an infinite grace period being set so set to 1 to prevent this.
*/
if (grace_period == 0)
grace_period = 1;
*reg_data = REG_SET_FIELD(*reg_data,
CP_IQ_WAIT_TIME2,
SCH_WAVE,
grace_period);
*reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2);
}
static void program_trap_handler_settings(struct amdgpu_device *adev, static void program_trap_handler_settings(struct amdgpu_device *adev,
uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr, uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr,
uint32_t inst) uint32_t inst)
...@@ -848,5 +889,7 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = { ...@@ -848,5 +889,7 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
.set_vm_context_page_table_base = set_vm_context_page_table_base, .set_vm_context_page_table_base = set_vm_context_page_table_base,
.enable_debug_trap = kgd_gfx_v10_enable_debug_trap, .enable_debug_trap = kgd_gfx_v10_enable_debug_trap,
.disable_debug_trap = kgd_gfx_v10_disable_debug_trap, .disable_debug_trap = kgd_gfx_v10_disable_debug_trap,
.get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times,
.build_grace_period_packet_info = kgd_gfx_v10_build_grace_period_packet_info,
.program_trap_handler_settings = program_trap_handler_settings, .program_trap_handler_settings = program_trap_handler_settings,
}; };
...@@ -26,3 +26,9 @@ uint32_t kgd_gfx_v10_enable_debug_trap(struct amdgpu_device *adev, ...@@ -26,3 +26,9 @@ uint32_t kgd_gfx_v10_enable_debug_trap(struct amdgpu_device *adev,
uint32_t kgd_gfx_v10_disable_debug_trap(struct amdgpu_device *adev, uint32_t kgd_gfx_v10_disable_debug_trap(struct amdgpu_device *adev,
bool keep_trap_enabled, bool keep_trap_enabled,
uint32_t vmid); uint32_t vmid);
void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev, uint32_t *wait_times);
void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev,
uint32_t wait_times,
uint32_t grace_period,
uint32_t *reg_offset,
uint32_t *reg_data);
...@@ -672,6 +672,8 @@ const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = { ...@@ -672,6 +672,8 @@ const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = {
.get_atc_vmid_pasid_mapping_info = get_atc_vmid_pasid_mapping_info_v10_3, .get_atc_vmid_pasid_mapping_info = get_atc_vmid_pasid_mapping_info_v10_3,
.set_vm_context_page_table_base = set_vm_context_page_table_base_v10_3, .set_vm_context_page_table_base = set_vm_context_page_table_base_v10_3,
.program_trap_handler_settings = program_trap_handler_settings_v10_3, .program_trap_handler_settings = program_trap_handler_settings_v10_3,
.get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times,
.build_grace_period_packet_info = kgd_gfx_v10_build_grace_period_packet_info,
.enable_debug_trap = kgd_gfx_v10_enable_debug_trap, .enable_debug_trap = kgd_gfx_v10_enable_debug_trap,
.disable_debug_trap = kgd_gfx_v10_disable_debug_trap .disable_debug_trap = kgd_gfx_v10_disable_debug_trap
}; };
...@@ -739,6 +739,24 @@ uint32_t kgd_gfx_v9_disable_debug_trap(struct amdgpu_device *adev, ...@@ -739,6 +739,24 @@ uint32_t kgd_gfx_v9_disable_debug_trap(struct amdgpu_device *adev,
return 0; return 0;
} }
/* kgd_gfx_v9_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2 values
* The values read are:
* ib_offload_wait_time -- Wait Count for Indirect Buffer Offloads.
* atomic_offload_wait_time -- Wait Count for L2 and GDS Atomics Offloads.
* wrm_offload_wait_time -- Wait Count for WAIT_REG_MEM Offloads.
* gws_wait_time -- Wait Count for Global Wave Syncs.
* que_sleep_wait_time -- Wait Count for Dequeue Retry.
* sch_wave_wait_time -- Wait Count for Scheduling Wave Message.
* sem_rearm_wait_time -- Wait Count for Semaphore re-arm.
* deq_retry_wait_time -- Wait Count for Global Wave Syncs.
*/
void kgd_gfx_v9_get_iq_wait_times(struct amdgpu_device *adev,
uint32_t *wait_times)
{
*wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2));
}
void kgd_gfx_v9_set_vm_context_page_table_base(struct amdgpu_device *adev, void kgd_gfx_v9_set_vm_context_page_table_base(struct amdgpu_device *adev,
uint32_t vmid, uint64_t page_table_base) uint32_t vmid, uint64_t page_table_base)
{ {
...@@ -926,6 +944,29 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid, ...@@ -926,6 +944,29 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid,
adev->gfx.cu_info.max_waves_per_simd; adev->gfx.cu_info.max_waves_per_simd;
} }
void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev,
uint32_t wait_times,
uint32_t grace_period,
uint32_t *reg_offset,
uint32_t *reg_data)
{
*reg_data = wait_times;
/*
* The CP cannont handle a 0 grace period input and will result in
* an infinite grace period being set so set to 1 to prevent this.
*/
if (grace_period == 0)
grace_period = 1;
*reg_data = REG_SET_FIELD(*reg_data,
CP_IQ_WAIT_TIME2,
SCH_WAVE,
grace_period);
*reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2);
}
void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev, void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev,
uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr, uint32_t inst) uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr, uint32_t inst)
{ {
...@@ -969,6 +1010,8 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = { ...@@ -969,6 +1010,8 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
.set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base,
.enable_debug_trap = kgd_gfx_v9_enable_debug_trap, .enable_debug_trap = kgd_gfx_v9_enable_debug_trap,
.disable_debug_trap = kgd_gfx_v9_disable_debug_trap, .disable_debug_trap = kgd_gfx_v9_disable_debug_trap,
.get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
.build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info,
.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy, .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
.program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings, .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings,
}; };
...@@ -20,8 +20,6 @@ ...@@ -20,8 +20,6 @@
* OTHER DEALINGS IN THE SOFTWARE. * OTHER DEALINGS IN THE SOFTWARE.
*/ */
void kgd_gfx_v9_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmid, void kgd_gfx_v9_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmid,
uint32_t sh_mem_config, uint32_t sh_mem_config,
uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit,
...@@ -73,3 +71,9 @@ uint32_t kgd_gfx_v9_enable_debug_trap(struct amdgpu_device *adev, ...@@ -73,3 +71,9 @@ uint32_t kgd_gfx_v9_enable_debug_trap(struct amdgpu_device *adev,
uint32_t kgd_gfx_v9_disable_debug_trap(struct amdgpu_device *adev, uint32_t kgd_gfx_v9_disable_debug_trap(struct amdgpu_device *adev,
bool keep_trap_enabled, bool keep_trap_enabled,
uint32_t vmid); uint32_t vmid);
void kgd_gfx_v9_get_iq_wait_times(struct amdgpu_device *adev, uint32_t *wait_times);
void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev,
uint32_t wait_times,
uint32_t grace_period,
uint32_t *reg_offset,
uint32_t *reg_data);
...@@ -46,10 +46,13 @@ static int set_pasid_vmid_mapping(struct device_queue_manager *dqm, ...@@ -46,10 +46,13 @@ static int set_pasid_vmid_mapping(struct device_queue_manager *dqm,
static int execute_queues_cpsch(struct device_queue_manager *dqm, static int execute_queues_cpsch(struct device_queue_manager *dqm,
enum kfd_unmap_queues_filter filter, enum kfd_unmap_queues_filter filter,
uint32_t filter_param); uint32_t filter_param,
uint32_t grace_period);
static int unmap_queues_cpsch(struct device_queue_manager *dqm, static int unmap_queues_cpsch(struct device_queue_manager *dqm,
enum kfd_unmap_queues_filter filter, enum kfd_unmap_queues_filter filter,
uint32_t filter_param, bool reset); uint32_t filter_param,
uint32_t grace_period,
bool reset);
static int map_queues_cpsch(struct device_queue_manager *dqm); static int map_queues_cpsch(struct device_queue_manager *dqm);
...@@ -866,7 +869,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q, ...@@ -866,7 +869,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q,
if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) { if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) {
if (!dqm->dev->kfd->shared_resources.enable_mes) if (!dqm->dev->kfd->shared_resources.enable_mes)
retval = unmap_queues_cpsch(dqm, retval = unmap_queues_cpsch(dqm,
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false); KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD, false);
else if (prev_active) else if (prev_active)
retval = remove_queue_mes(dqm, q, &pdd->qpd); retval = remove_queue_mes(dqm, q, &pdd->qpd);
...@@ -1042,7 +1045,8 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm, ...@@ -1042,7 +1045,8 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
retval = execute_queues_cpsch(dqm, retval = execute_queues_cpsch(dqm,
qpd->is_debug ? qpd->is_debug ?
KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES : KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES :
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0,
USE_DEFAULT_GRACE_PERIOD);
out: out:
dqm_unlock(dqm); dqm_unlock(dqm);
...@@ -1182,8 +1186,7 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm, ...@@ -1182,8 +1186,7 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
} }
if (!dqm->dev->kfd->shared_resources.enable_mes) if (!dqm->dev->kfd->shared_resources.enable_mes)
retval = execute_queues_cpsch(dqm, retval = execute_queues_cpsch(dqm,
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD);
eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp; eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp;
atomic64_add(eviction_duration, &pdd->evict_duration_counter); atomic64_add(eviction_duration, &pdd->evict_duration_counter);
vm_not_acquired: vm_not_acquired:
...@@ -1525,6 +1528,9 @@ static int initialize_cpsch(struct device_queue_manager *dqm) ...@@ -1525,6 +1528,9 @@ static int initialize_cpsch(struct device_queue_manager *dqm)
init_sdma_bitmaps(dqm); init_sdma_bitmaps(dqm);
if (dqm->dev->kfd2kgd->get_iq_wait_times)
dqm->dev->kfd2kgd->get_iq_wait_times(dqm->dev->adev,
&dqm->wait_times);
return 0; return 0;
} }
...@@ -1563,8 +1569,9 @@ static int start_cpsch(struct device_queue_manager *dqm) ...@@ -1563,8 +1569,9 @@ static int start_cpsch(struct device_queue_manager *dqm)
dqm->is_hws_hang = false; dqm->is_hws_hang = false;
dqm->is_resetting = false; dqm->is_resetting = false;
dqm->sched_running = true; dqm->sched_running = true;
if (!dqm->dev->kfd->shared_resources.enable_mes) if (!dqm->dev->kfd->shared_resources.enable_mes)
execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD);
dqm_unlock(dqm); dqm_unlock(dqm);
return 0; return 0;
...@@ -1589,7 +1596,7 @@ static int stop_cpsch(struct device_queue_manager *dqm) ...@@ -1589,7 +1596,7 @@ static int stop_cpsch(struct device_queue_manager *dqm)
if (!dqm->is_hws_hang) { if (!dqm->is_hws_hang) {
if (!dqm->dev->kfd->shared_resources.enable_mes) if (!dqm->dev->kfd->shared_resources.enable_mes)
unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, false); unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD, false);
else else
remove_all_queues_mes(dqm); remove_all_queues_mes(dqm);
} }
...@@ -1631,7 +1638,8 @@ static int create_kernel_queue_cpsch(struct device_queue_manager *dqm, ...@@ -1631,7 +1638,8 @@ static int create_kernel_queue_cpsch(struct device_queue_manager *dqm,
list_add(&kq->list, &qpd->priv_queue_list); list_add(&kq->list, &qpd->priv_queue_list);
increment_queue_count(dqm, qpd, kq->queue); increment_queue_count(dqm, qpd, kq->queue);
qpd->is_debug = true; qpd->is_debug = true;
execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0,
USE_DEFAULT_GRACE_PERIOD);
dqm_unlock(dqm); dqm_unlock(dqm);
return 0; return 0;
...@@ -1645,7 +1653,8 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, ...@@ -1645,7 +1653,8 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
list_del(&kq->list); list_del(&kq->list);
decrement_queue_count(dqm, qpd, kq->queue); decrement_queue_count(dqm, qpd, kq->queue);
qpd->is_debug = false; qpd->is_debug = false;
execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0,
USE_DEFAULT_GRACE_PERIOD);
/* /*
* Unconditionally decrement this counter, regardless of the queue's * Unconditionally decrement this counter, regardless of the queue's
* type. * type.
...@@ -1722,7 +1731,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, ...@@ -1722,7 +1731,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
if (!dqm->dev->kfd->shared_resources.enable_mes) if (!dqm->dev->kfd->shared_resources.enable_mes)
retval = execute_queues_cpsch(dqm, retval = execute_queues_cpsch(dqm,
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD);
else else
retval = add_queue_mes(dqm, q, qpd); retval = add_queue_mes(dqm, q, qpd);
if (retval) if (retval)
...@@ -1811,7 +1820,9 @@ static int map_queues_cpsch(struct device_queue_manager *dqm) ...@@ -1811,7 +1820,9 @@ static int map_queues_cpsch(struct device_queue_manager *dqm)
/* dqm->lock mutex has to be locked before calling this function */ /* dqm->lock mutex has to be locked before calling this function */
static int unmap_queues_cpsch(struct device_queue_manager *dqm, static int unmap_queues_cpsch(struct device_queue_manager *dqm,
enum kfd_unmap_queues_filter filter, enum kfd_unmap_queues_filter filter,
uint32_t filter_param, bool reset) uint32_t filter_param,
uint32_t grace_period,
bool reset)
{ {
int retval = 0; int retval = 0;
struct mqd_manager *mqd_mgr; struct mqd_manager *mqd_mgr;
...@@ -1823,6 +1834,12 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, ...@@ -1823,6 +1834,12 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
if (!dqm->active_runlist) if (!dqm->active_runlist)
return retval; return retval;
if (grace_period != USE_DEFAULT_GRACE_PERIOD) {
retval = pm_update_grace_period(&dqm->packet_mgr, grace_period);
if (retval)
return retval;
}
retval = pm_send_unmap_queue(&dqm->packet_mgr, filter, filter_param, reset); retval = pm_send_unmap_queue(&dqm->packet_mgr, filter, filter_param, reset);
if (retval) if (retval)
return retval; return retval;
...@@ -1855,6 +1872,13 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, ...@@ -1855,6 +1872,13 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
return -ETIME; return -ETIME;
} }
/* We need to reset the grace period value for this device */
if (grace_period != USE_DEFAULT_GRACE_PERIOD) {
if (pm_update_grace_period(&dqm->packet_mgr,
USE_DEFAULT_GRACE_PERIOD))
pr_err("Failed to reset grace period\n");
}
pm_release_ib(&dqm->packet_mgr); pm_release_ib(&dqm->packet_mgr);
dqm->active_runlist = false; dqm->active_runlist = false;
...@@ -1870,7 +1894,7 @@ static int reset_queues_cpsch(struct device_queue_manager *dqm, ...@@ -1870,7 +1894,7 @@ static int reset_queues_cpsch(struct device_queue_manager *dqm,
dqm_lock(dqm); dqm_lock(dqm);
retval = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_BY_PASID, retval = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_BY_PASID,
pasid, true); pasid, USE_DEFAULT_GRACE_PERIOD, true);
dqm_unlock(dqm); dqm_unlock(dqm);
return retval; return retval;
...@@ -1879,13 +1903,14 @@ static int reset_queues_cpsch(struct device_queue_manager *dqm, ...@@ -1879,13 +1903,14 @@ static int reset_queues_cpsch(struct device_queue_manager *dqm,
/* dqm->lock mutex has to be locked before calling this function */ /* dqm->lock mutex has to be locked before calling this function */
static int execute_queues_cpsch(struct device_queue_manager *dqm, static int execute_queues_cpsch(struct device_queue_manager *dqm,
enum kfd_unmap_queues_filter filter, enum kfd_unmap_queues_filter filter,
uint32_t filter_param) uint32_t filter_param,
uint32_t grace_period)
{ {
int retval; int retval;
if (dqm->is_hws_hang) if (dqm->is_hws_hang)
return -EIO; return -EIO;
retval = unmap_queues_cpsch(dqm, filter, filter_param, false); retval = unmap_queues_cpsch(dqm, filter, filter_param, grace_period, false);
if (retval) if (retval)
return retval; return retval;
...@@ -1943,7 +1968,8 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, ...@@ -1943,7 +1968,8 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
if (!dqm->dev->kfd->shared_resources.enable_mes) { if (!dqm->dev->kfd->shared_resources.enable_mes) {
decrement_queue_count(dqm, qpd, q); decrement_queue_count(dqm, qpd, q);
retval = execute_queues_cpsch(dqm, retval = execute_queues_cpsch(dqm,
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0,
USE_DEFAULT_GRACE_PERIOD);
if (retval == -ETIME) if (retval == -ETIME)
qpd->reset_wavefronts = true; qpd->reset_wavefronts = true;
} else { } else {
...@@ -2228,7 +2254,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, ...@@ -2228,7 +2254,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
} }
if (!dqm->dev->kfd->shared_resources.enable_mes) if (!dqm->dev->kfd->shared_resources.enable_mes)
retval = execute_queues_cpsch(dqm, filter, 0); retval = execute_queues_cpsch(dqm, filter, 0, USE_DEFAULT_GRACE_PERIOD);
if ((!dqm->is_hws_hang) && (retval || qpd->reset_wavefronts)) { if ((!dqm->is_hws_hang) && (retval || qpd->reset_wavefronts)) {
pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev); pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev);
...@@ -2589,7 +2615,8 @@ int dqm_debugfs_hang_hws(struct device_queue_manager *dqm) ...@@ -2589,7 +2615,8 @@ int dqm_debugfs_hang_hws(struct device_queue_manager *dqm)
return r; return r;
} }
dqm->active_runlist = true; dqm->active_runlist = true;
r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES,
0, USE_DEFAULT_GRACE_PERIOD);
dqm_unlock(dqm); dqm_unlock(dqm);
return r; return r;
......
...@@ -37,6 +37,7 @@ ...@@ -37,6 +37,7 @@
#define KFD_MES_PROCESS_QUANTUM 100000 #define KFD_MES_PROCESS_QUANTUM 100000
#define KFD_MES_GANG_QUANTUM 10000 #define KFD_MES_GANG_QUANTUM 10000
#define USE_DEFAULT_GRACE_PERIOD 0xffffffff
struct device_process_node { struct device_process_node {
struct qcm_process_device *qpd; struct qcm_process_device *qpd;
...@@ -259,6 +260,8 @@ struct device_queue_manager { ...@@ -259,6 +260,8 @@ struct device_queue_manager {
/* used for GFX 9.4.3 only */ /* used for GFX 9.4.3 only */
uint32_t current_logical_xcc_start; uint32_t current_logical_xcc_start;
uint32_t wait_times;
}; };
void device_queue_manager_init_cik( void device_queue_manager_init_cik(
......
...@@ -370,6 +370,38 @@ int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address, ...@@ -370,6 +370,38 @@ int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
return retval; return retval;
} }
int pm_update_grace_period(struct packet_manager *pm, uint32_t grace_period)
{
int retval = 0;
uint32_t *buffer, size;
size = pm->pmf->set_grace_period_size;
mutex_lock(&pm->lock);
if (size) {
kq_acquire_packet_buffer(pm->priv_queue,
size / sizeof(uint32_t),
(unsigned int **)&buffer);
if (!buffer) {
pr_err("Failed to allocate buffer on kernel queue\n");
retval = -ENOMEM;
goto out;
}
retval = pm->pmf->set_grace_period(pm, buffer, grace_period);
if (!retval)
kq_submit_packet(pm->priv_queue);
else
kq_rollback_packet(pm->priv_queue);
}
out:
mutex_unlock(&pm->lock);
return retval;
}
int pm_send_unmap_queue(struct packet_manager *pm, int pm_send_unmap_queue(struct packet_manager *pm,
enum kfd_unmap_queues_filter filter, enum kfd_unmap_queues_filter filter,
uint32_t filter_param, bool reset) uint32_t filter_param, bool reset)
......
...@@ -262,6 +262,41 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer, ...@@ -262,6 +262,41 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
return 0; return 0;
} }
static int pm_set_grace_period_v9(struct packet_manager *pm,
uint32_t *buffer,
uint32_t grace_period)
{
struct pm4_mec_write_data_mmio *packet;
uint32_t reg_offset = 0;
uint32_t reg_data = 0;
pm->dqm->dev->kfd2kgd->build_grace_period_packet_info(
pm->dqm->dev->adev,
pm->dqm->wait_times,
grace_period,
&reg_offset,
&reg_data);
if (grace_period == USE_DEFAULT_GRACE_PERIOD)
reg_data = pm->dqm->wait_times;
packet = (struct pm4_mec_write_data_mmio *)buffer;
memset(buffer, 0, sizeof(struct pm4_mec_write_data_mmio));
packet->header.u32All = pm_build_pm4_header(IT_WRITE_DATA,
sizeof(struct pm4_mec_write_data_mmio));
packet->bitfields2.dst_sel = dst_sel___write_data__mem_mapped_register;
packet->bitfields2.addr_incr =
addr_incr___write_data__do_not_increment_address;
packet->bitfields3.dst_mmreg_addr = reg_offset;
packet->data = reg_data;
return 0;
}
static int pm_unmap_queues_v9(struct packet_manager *pm, uint32_t *buffer, static int pm_unmap_queues_v9(struct packet_manager *pm, uint32_t *buffer,
enum kfd_unmap_queues_filter filter, enum kfd_unmap_queues_filter filter,
uint32_t filter_param, bool reset) uint32_t filter_param, bool reset)
...@@ -345,6 +380,7 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = { ...@@ -345,6 +380,7 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
.set_resources = pm_set_resources_v9, .set_resources = pm_set_resources_v9,
.map_queues = pm_map_queues_v9, .map_queues = pm_map_queues_v9,
.unmap_queues = pm_unmap_queues_v9, .unmap_queues = pm_unmap_queues_v9,
.set_grace_period = pm_set_grace_period_v9,
.query_status = pm_query_status_v9, .query_status = pm_query_status_v9,
.release_mem = NULL, .release_mem = NULL,
.map_process_size = sizeof(struct pm4_mes_map_process), .map_process_size = sizeof(struct pm4_mes_map_process),
...@@ -352,6 +388,7 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = { ...@@ -352,6 +388,7 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
.set_resources_size = sizeof(struct pm4_mes_set_resources), .set_resources_size = sizeof(struct pm4_mes_set_resources),
.map_queues_size = sizeof(struct pm4_mes_map_queues), .map_queues_size = sizeof(struct pm4_mes_map_queues),
.unmap_queues_size = sizeof(struct pm4_mes_unmap_queues), .unmap_queues_size = sizeof(struct pm4_mes_unmap_queues),
.set_grace_period_size = sizeof(struct pm4_mec_write_data_mmio),
.query_status_size = sizeof(struct pm4_mes_query_status), .query_status_size = sizeof(struct pm4_mes_query_status),
.release_mem_size = 0, .release_mem_size = 0,
}; };
...@@ -362,6 +399,7 @@ const struct packet_manager_funcs kfd_aldebaran_pm_funcs = { ...@@ -362,6 +399,7 @@ const struct packet_manager_funcs kfd_aldebaran_pm_funcs = {
.set_resources = pm_set_resources_v9, .set_resources = pm_set_resources_v9,
.map_queues = pm_map_queues_v9, .map_queues = pm_map_queues_v9,
.unmap_queues = pm_unmap_queues_v9, .unmap_queues = pm_unmap_queues_v9,
.set_grace_period = pm_set_grace_period_v9,
.query_status = pm_query_status_v9, .query_status = pm_query_status_v9,
.release_mem = NULL, .release_mem = NULL,
.map_process_size = sizeof(struct pm4_mes_map_process_aldebaran), .map_process_size = sizeof(struct pm4_mes_map_process_aldebaran),
...@@ -369,6 +407,7 @@ const struct packet_manager_funcs kfd_aldebaran_pm_funcs = { ...@@ -369,6 +407,7 @@ const struct packet_manager_funcs kfd_aldebaran_pm_funcs = {
.set_resources_size = sizeof(struct pm4_mes_set_resources), .set_resources_size = sizeof(struct pm4_mes_set_resources),
.map_queues_size = sizeof(struct pm4_mes_map_queues), .map_queues_size = sizeof(struct pm4_mes_map_queues),
.unmap_queues_size = sizeof(struct pm4_mes_unmap_queues), .unmap_queues_size = sizeof(struct pm4_mes_unmap_queues),
.set_grace_period_size = sizeof(struct pm4_mec_write_data_mmio),
.query_status_size = sizeof(struct pm4_mes_query_status), .query_status_size = sizeof(struct pm4_mes_query_status),
.release_mem_size = 0, .release_mem_size = 0,
}; };
...@@ -303,6 +303,7 @@ const struct packet_manager_funcs kfd_vi_pm_funcs = { ...@@ -303,6 +303,7 @@ const struct packet_manager_funcs kfd_vi_pm_funcs = {
.set_resources = pm_set_resources_vi, .set_resources = pm_set_resources_vi,
.map_queues = pm_map_queues_vi, .map_queues = pm_map_queues_vi,
.unmap_queues = pm_unmap_queues_vi, .unmap_queues = pm_unmap_queues_vi,
.set_grace_period = NULL,
.query_status = pm_query_status_vi, .query_status = pm_query_status_vi,
.release_mem = pm_release_mem_vi, .release_mem = pm_release_mem_vi,
.map_process_size = sizeof(struct pm4_mes_map_process), .map_process_size = sizeof(struct pm4_mes_map_process),
...@@ -310,6 +311,7 @@ const struct packet_manager_funcs kfd_vi_pm_funcs = { ...@@ -310,6 +311,7 @@ const struct packet_manager_funcs kfd_vi_pm_funcs = {
.set_resources_size = sizeof(struct pm4_mes_set_resources), .set_resources_size = sizeof(struct pm4_mes_set_resources),
.map_queues_size = sizeof(struct pm4_mes_map_queues), .map_queues_size = sizeof(struct pm4_mes_map_queues),
.unmap_queues_size = sizeof(struct pm4_mes_unmap_queues), .unmap_queues_size = sizeof(struct pm4_mes_unmap_queues),
.set_grace_period_size = 0,
.query_status_size = sizeof(struct pm4_mes_query_status), .query_status_size = sizeof(struct pm4_mes_query_status),
.release_mem_size = sizeof(struct pm4_mec_release_mem) .release_mem_size = sizeof(struct pm4_mec_release_mem)
}; };
...@@ -584,6 +584,71 @@ struct pm4_mec_release_mem { ...@@ -584,6 +584,71 @@ struct pm4_mec_release_mem {
#endif #endif
#ifndef PM4_MEC_WRITE_DATA_DEFINED
#define PM4_MEC_WRITE_DATA_DEFINED
enum WRITE_DATA_dst_sel_enum {
dst_sel___write_data__mem_mapped_register = 0,
dst_sel___write_data__tc_l2 = 2,
dst_sel___write_data__gds = 3,
dst_sel___write_data__memory = 5,
dst_sel___write_data__memory_mapped_adc_persistent_state = 6,
};
enum WRITE_DATA_addr_incr_enum {
addr_incr___write_data__increment_address = 0,
addr_incr___write_data__do_not_increment_address = 1
};
enum WRITE_DATA_wr_confirm_enum {
wr_confirm___write_data__do_not_wait_for_write_confirmation = 0,
wr_confirm___write_data__wait_for_write_confirmation = 1
};
enum WRITE_DATA_cache_policy_enum {
cache_policy___write_data__lru = 0,
cache_policy___write_data__stream = 1
};
struct pm4_mec_write_data_mmio {
union {
union PM4_MES_TYPE_3_HEADER header; /*header */
unsigned int ordinal1;
};
union {
struct {
unsigned int reserved1:8;
unsigned int dst_sel:4;
unsigned int reserved2:4;
unsigned int addr_incr:1;
unsigned int reserved3:2;
unsigned int resume_vf:1;
unsigned int wr_confirm:1;
unsigned int reserved4:4;
unsigned int cache_policy:2;
unsigned int reserved5:5;
} bitfields2;
unsigned int ordinal2;
};
union {
struct {
unsigned int dst_mmreg_addr:18;
unsigned int reserved6:14;
} bitfields3;
unsigned int ordinal3;
};
uint32_t reserved7;
uint32_t data;
};
#endif
enum { enum {
CACHE_FLUSH_AND_INV_TS_EVENT = 0x00000014 CACHE_FLUSH_AND_INV_TS_EVENT = 0x00000014
}; };
......
...@@ -1350,6 +1350,8 @@ struct packet_manager_funcs { ...@@ -1350,6 +1350,8 @@ struct packet_manager_funcs {
int (*unmap_queues)(struct packet_manager *pm, uint32_t *buffer, int (*unmap_queues)(struct packet_manager *pm, uint32_t *buffer,
enum kfd_unmap_queues_filter mode, enum kfd_unmap_queues_filter mode,
uint32_t filter_param, bool reset); uint32_t filter_param, bool reset);
int (*set_grace_period)(struct packet_manager *pm, uint32_t *buffer,
uint32_t grace_period);
int (*query_status)(struct packet_manager *pm, uint32_t *buffer, int (*query_status)(struct packet_manager *pm, uint32_t *buffer,
uint64_t fence_address, uint64_t fence_value); uint64_t fence_address, uint64_t fence_value);
int (*release_mem)(uint64_t gpu_addr, uint32_t *buffer); int (*release_mem)(uint64_t gpu_addr, uint32_t *buffer);
...@@ -1360,6 +1362,7 @@ struct packet_manager_funcs { ...@@ -1360,6 +1362,7 @@ struct packet_manager_funcs {
int set_resources_size; int set_resources_size;
int map_queues_size; int map_queues_size;
int unmap_queues_size; int unmap_queues_size;
int set_grace_period_size;
int query_status_size; int query_status_size;
int release_mem_size; int release_mem_size;
}; };
...@@ -1382,6 +1385,8 @@ int pm_send_unmap_queue(struct packet_manager *pm, ...@@ -1382,6 +1385,8 @@ int pm_send_unmap_queue(struct packet_manager *pm,
void pm_release_ib(struct packet_manager *pm); void pm_release_ib(struct packet_manager *pm);
int pm_update_grace_period(struct packet_manager *pm, uint32_t grace_period);
/* Following PM funcs can be shared among VI and AI */ /* Following PM funcs can be shared among VI and AI */
unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size); unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment