Commit 1fa185c6 authored by Omer Shpigelman's avatar Omer Shpigelman Committed by Oded Gabbay

habanalabs: re-factor H/W queues initialization

We want to remove the following restrictions/assumptions in our driver:
1. The H/W queue index is also the completion queue index.
2. The H/W queue index is also the IRQ number of the completion queue.
3. All queues of the same type have consecutive indexes.

Therefore we add the support for H/W queues of the same type with
nonconsecutive indexes and completion queue index and IRQ number different
than the H/W queue index.
Signed-off-by: default avatarOmer Shpigelman <oshpigelman@habana.ai>
Reviewed-by: default avatarOded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: default avatarOded Gabbay <oded.gabbay@gmail.com>
parent 76cedc73
...@@ -1062,7 +1062,7 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset, ...@@ -1062,7 +1062,7 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset,
*/ */
int hl_device_init(struct hl_device *hdev, struct class *hclass) int hl_device_init(struct hl_device *hdev, struct class *hclass)
{ {
int i, rc, cq_ready_cnt; int i, rc, cq_cnt, cq_ready_cnt;
char *name; char *name;
bool add_cdev_sysfs_on_err = false; bool add_cdev_sysfs_on_err = false;
...@@ -1120,14 +1120,16 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass) ...@@ -1120,14 +1120,16 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
goto sw_fini; goto sw_fini;
} }
cq_cnt = hdev->asic_prop.completion_queues_count;
/* /*
* Initialize the completion queues. Must be done before hw_init, * Initialize the completion queues. Must be done before hw_init,
* because there the addresses of the completion queues are being * because there the addresses of the completion queues are being
* passed as arguments to request_irq * passed as arguments to request_irq
*/ */
hdev->completion_queue = hdev->completion_queue = kcalloc(cq_cnt,
kcalloc(hdev->asic_prop.completion_queues_count, sizeof(*hdev->completion_queue),
sizeof(*hdev->completion_queue), GFP_KERNEL); GFP_KERNEL);
if (!hdev->completion_queue) { if (!hdev->completion_queue) {
dev_err(hdev->dev, "failed to allocate completion queues\n"); dev_err(hdev->dev, "failed to allocate completion queues\n");
...@@ -1135,10 +1137,9 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass) ...@@ -1135,10 +1137,9 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
goto hw_queues_destroy; goto hw_queues_destroy;
} }
for (i = 0, cq_ready_cnt = 0; for (i = 0, cq_ready_cnt = 0 ; i < cq_cnt ; i++, cq_ready_cnt++) {
i < hdev->asic_prop.completion_queues_count; rc = hl_cq_init(hdev, &hdev->completion_queue[i],
i++, cq_ready_cnt++) { hdev->asic_funcs->get_queue_id_for_cq(hdev, i));
rc = hl_cq_init(hdev, &hdev->completion_queue[i], i);
if (rc) { if (rc) {
dev_err(hdev->dev, dev_err(hdev->dev,
"failed to initialize completion queue\n"); "failed to initialize completion queue\n");
......
...@@ -890,6 +890,7 @@ void goya_init_dma_qmans(struct hl_device *hdev) ...@@ -890,6 +890,7 @@ void goya_init_dma_qmans(struct hl_device *hdev)
q = &hdev->kernel_queues[0]; q = &hdev->kernel_queues[0];
for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++, q++) { for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++, q++) {
q->cq_id = q->msi_vec = i;
goya_init_dma_qman(hdev, i, q->bus_address); goya_init_dma_qman(hdev, i, q->bus_address);
goya_init_dma_ch(hdev, i); goya_init_dma_ch(hdev, i);
} }
...@@ -5273,6 +5274,11 @@ static enum hl_device_hw_state goya_get_hw_state(struct hl_device *hdev) ...@@ -5273,6 +5274,11 @@ static enum hl_device_hw_state goya_get_hw_state(struct hl_device *hdev)
return RREG32(mmHW_STATE); return RREG32(mmHW_STATE);
} }
u32 goya_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
{
return cq_idx;
}
static const struct hl_asic_funcs goya_funcs = { static const struct hl_asic_funcs goya_funcs = {
.early_init = goya_early_init, .early_init = goya_early_init,
.early_fini = goya_early_fini, .early_fini = goya_early_fini,
...@@ -5332,7 +5338,8 @@ static const struct hl_asic_funcs goya_funcs = { ...@@ -5332,7 +5338,8 @@ static const struct hl_asic_funcs goya_funcs = {
.rreg = hl_rreg, .rreg = hl_rreg,
.wreg = hl_wreg, .wreg = hl_wreg,
.halt_coresight = goya_halt_coresight, .halt_coresight = goya_halt_coresight,
.get_clk_rate = goya_get_clk_rate .get_clk_rate = goya_get_clk_rate,
.get_queue_id_for_cq = goya_get_queue_id_for_cq
}; };
/* /*
......
...@@ -234,5 +234,6 @@ void goya_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, ...@@ -234,5 +234,6 @@ void goya_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev); void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev);
int goya_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk); int goya_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk);
u32 goya_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx);
#endif /* GOYAP_H_ */ #endif /* GOYAP_H_ */
...@@ -365,6 +365,8 @@ struct hl_cs_job; ...@@ -365,6 +365,8 @@ struct hl_cs_job;
* @pi: holds the queue's pi value. * @pi: holds the queue's pi value.
* @ci: holds the queue's ci value, AS CALCULATED BY THE DRIVER (not real ci). * @ci: holds the queue's ci value, AS CALCULATED BY THE DRIVER (not real ci).
* @hw_queue_id: the id of the H/W queue. * @hw_queue_id: the id of the H/W queue.
* @cq_id: the id for the corresponding CQ for this H/W queue.
* @msi_vec: the IRQ number of the H/W queue.
* @int_queue_len: length of internal queue (number of entries). * @int_queue_len: length of internal queue (number of entries).
* @valid: is the queue valid (we have array of 32 queues, not all of them * @valid: is the queue valid (we have array of 32 queues, not all of them
* exists). * exists).
...@@ -377,6 +379,8 @@ struct hl_hw_queue { ...@@ -377,6 +379,8 @@ struct hl_hw_queue {
u32 pi; u32 pi;
u32 ci; u32 ci;
u32 hw_queue_id; u32 hw_queue_id;
u32 cq_id;
u32 msi_vec;
u16 int_queue_len; u16 int_queue_len;
u8 valid; u8 valid;
}; };
...@@ -534,6 +538,7 @@ enum hl_pll_frequency { ...@@ -534,6 +538,7 @@ enum hl_pll_frequency {
* @wreg: Write a register. Needed for simulator support. * @wreg: Write a register. Needed for simulator support.
* @halt_coresight: stop the ETF and ETR traces. * @halt_coresight: stop the ETF and ETR traces.
* @get_clk_rate: Retrieve the ASIC current and maximum clock rate in MHz * @get_clk_rate: Retrieve the ASIC current and maximum clock rate in MHz
* @get_queue_id_for_cq: Get the H/W queue id related to the given CQ index.
*/ */
struct hl_asic_funcs { struct hl_asic_funcs {
int (*early_init)(struct hl_device *hdev); int (*early_init)(struct hl_device *hdev);
...@@ -620,6 +625,7 @@ struct hl_asic_funcs { ...@@ -620,6 +625,7 @@ struct hl_asic_funcs {
void (*wreg)(struct hl_device *hdev, u32 reg, u32 val); void (*wreg)(struct hl_device *hdev, u32 reg, u32 val);
void (*halt_coresight)(struct hl_device *hdev); void (*halt_coresight)(struct hl_device *hdev);
int (*get_clk_rate)(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk); int (*get_clk_rate)(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk);
u32 (*get_queue_id_for_cq)(struct hl_device *hdev, u32 cq_idx);
}; };
......
...@@ -111,7 +111,7 @@ static int ext_queue_sanity_checks(struct hl_device *hdev, ...@@ -111,7 +111,7 @@ static int ext_queue_sanity_checks(struct hl_device *hdev,
bool reserve_cq_entry) bool reserve_cq_entry)
{ {
atomic_t *free_slots = atomic_t *free_slots =
&hdev->completion_queue[q->hw_queue_id].free_slots_cnt; &hdev->completion_queue[q->cq_id].free_slots_cnt;
int free_slots_cnt; int free_slots_cnt;
/* Check we have enough space in the queue */ /* Check we have enough space in the queue */
...@@ -194,7 +194,7 @@ static int hw_queue_sanity_checks(struct hl_device *hdev, struct hl_hw_queue *q, ...@@ -194,7 +194,7 @@ static int hw_queue_sanity_checks(struct hl_device *hdev, struct hl_hw_queue *q,
int num_of_entries) int num_of_entries)
{ {
atomic_t *free_slots = atomic_t *free_slots =
&hdev->completion_queue[q->hw_queue_id].free_slots_cnt; &hdev->completion_queue[q->cq_id].free_slots_cnt;
/* /*
* Check we have enough space in the completion queue. * Check we have enough space in the completion queue.
...@@ -308,13 +308,13 @@ static void ext_queue_schedule_job(struct hl_cs_job *job) ...@@ -308,13 +308,13 @@ static void ext_queue_schedule_job(struct hl_cs_job *job)
* No need to check if CQ is full because it was already * No need to check if CQ is full because it was already
* checked in ext_queue_sanity_checks * checked in ext_queue_sanity_checks
*/ */
cq = &hdev->completion_queue[q->hw_queue_id]; cq = &hdev->completion_queue[q->cq_id];
cq_addr = cq->bus_address + cq->pi * sizeof(struct hl_cq_entry); cq_addr = cq->bus_address + cq->pi * sizeof(struct hl_cq_entry);
hdev->asic_funcs->add_end_of_cb_packets(hdev, cb->kernel_address, len, hdev->asic_funcs->add_end_of_cb_packets(hdev, cb->kernel_address, len,
cq_addr, cq_addr,
le32_to_cpu(cq_pkt.data), le32_to_cpu(cq_pkt.data),
q->hw_queue_id); q->msi_vec);
q->shadow_queue[hl_pi_2_offset(q->pi)] = job; q->shadow_queue[hl_pi_2_offset(q->pi)] = job;
...@@ -401,7 +401,7 @@ static void hw_queue_schedule_job(struct hl_cs_job *job) ...@@ -401,7 +401,7 @@ static void hw_queue_schedule_job(struct hl_cs_job *job)
* No need to check if CQ is full because it was already * No need to check if CQ is full because it was already
* checked in hw_queue_sanity_checks * checked in hw_queue_sanity_checks
*/ */
cq = &hdev->completion_queue[q->hw_queue_id]; cq = &hdev->completion_queue[q->cq_id];
cq->pi = hl_cq_inc_ptr(cq->pi); cq->pi = hl_cq_inc_ptr(cq->pi);
ext_and_hw_queue_submit_bd(hdev, q, ctl, len, ptr); ext_and_hw_queue_submit_bd(hdev, q, ctl, len, ptr);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment