Commit cbaa99ed authored by Oded Gabbay's avatar Oded Gabbay

habanalabs: perform accounting for active CS

This patch adds accounting for active CS. Active means that the CS was
submitted to the H/W queues and was not completed yet.

This is necessary to support suspend operation. Because the device will be
reset upon suspend, we can only suspend after all active CS have been
completed. Hence, we need to perform accounting on their number.
Signed-off-by: default avatarOded Gabbay <oded.gabbay@gmail.com>
parent d12a5e24
...@@ -179,6 +179,12 @@ static void cs_do_release(struct kref *ref) ...@@ -179,6 +179,12 @@ static void cs_do_release(struct kref *ref)
/* We also need to update CI for internal queues */ /* We also need to update CI for internal queues */
if (cs->submitted) { if (cs->submitted) {
int cs_cnt = atomic_dec_return(&hdev->cs_active_cnt);
WARN_ONCE((cs_cnt < 0),
"hl%d: error in CS active cnt %d\n",
hdev->id, cs_cnt);
hl_int_hw_queue_update_ci(cs); hl_int_hw_queue_update_ci(cs);
spin_lock(&hdev->hw_queues_mirror_lock); spin_lock(&hdev->hw_queues_mirror_lock);
......
...@@ -218,6 +218,7 @@ static int device_early_init(struct hl_device *hdev) ...@@ -218,6 +218,7 @@ static int device_early_init(struct hl_device *hdev)
spin_lock_init(&hdev->hw_queues_mirror_lock); spin_lock_init(&hdev->hw_queues_mirror_lock);
atomic_set(&hdev->in_reset, 0); atomic_set(&hdev->in_reset, 0);
atomic_set(&hdev->fd_open_cnt, 0); atomic_set(&hdev->fd_open_cnt, 0);
atomic_set(&hdev->cs_active_cnt, 0);
return 0; return 0;
......
...@@ -1056,13 +1056,15 @@ struct hl_device_reset_work { ...@@ -1056,13 +1056,15 @@ struct hl_device_reset_work {
* @cb_pool_lock: protects the CB pool. * @cb_pool_lock: protects the CB pool.
* @user_ctx: current user context executing. * @user_ctx: current user context executing.
* @dram_used_mem: current DRAM memory consumption. * @dram_used_mem: current DRAM memory consumption.
* @in_reset: is device in reset flow.
* @curr_pll_profile: current PLL profile.
* @fd_open_cnt: number of open user processes.
* @timeout_jiffies: device CS timeout value. * @timeout_jiffies: device CS timeout value.
* @max_power: the max power of the device, as configured by the sysadmin. This * @max_power: the max power of the device, as configured by the sysadmin. This
* value is saved so in case of hard-reset, KMD will restore this * value is saved so in case of hard-reset, KMD will restore this
* value and update the F/W after the re-initialization * value and update the F/W after the re-initialization
* @in_reset: is device in reset flow.
* @curr_pll_profile: current PLL profile.
* @fd_open_cnt: number of open user processes.
* @cs_active_cnt: number of active command submissions on this device (active
* means already in H/W queues)
* @major: habanalabs KMD major. * @major: habanalabs KMD major.
* @high_pll: high PLL profile frequency. * @high_pll: high PLL profile frequency.
* @soft_reset_cnt: number of soft reset since KMD loading. * @soft_reset_cnt: number of soft reset since KMD loading.
...@@ -1128,11 +1130,12 @@ struct hl_device { ...@@ -1128,11 +1130,12 @@ struct hl_device {
struct hl_ctx *user_ctx; struct hl_ctx *user_ctx;
atomic64_t dram_used_mem; atomic64_t dram_used_mem;
u64 timeout_jiffies;
u64 max_power;
atomic_t in_reset; atomic_t in_reset;
atomic_t curr_pll_profile; atomic_t curr_pll_profile;
atomic_t fd_open_cnt; atomic_t fd_open_cnt;
u64 timeout_jiffies; atomic_t cs_active_cnt;
u64 max_power;
u32 major; u32 major;
u32 high_pll; u32 high_pll;
u32 soft_reset_cnt; u32 soft_reset_cnt;
......
...@@ -370,12 +370,13 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs) ...@@ -370,12 +370,13 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
spin_unlock(&hdev->hw_queues_mirror_lock); spin_unlock(&hdev->hw_queues_mirror_lock);
} }
list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) { atomic_inc(&hdev->cs_active_cnt);
list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
if (job->ext_queue) if (job->ext_queue)
ext_hw_queue_schedule_job(job); ext_hw_queue_schedule_job(job);
else else
int_hw_queue_schedule_job(job); int_hw_queue_schedule_job(job);
}
cs->submitted = true; cs->submitted = true;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment