Commit 838ac90d authored by Daniel Vetter's avatar Daniel Vetter

Merge tag 'drm-habanalabs-next-2023-04-10' of...

Merge tag 'drm-habanalabs-next-2023-04-10' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux into drm-next

This tag contains additional habanalabs driver changes for v6.4:

- uAPI changes:
  - Add a definition of a new Gaudi2 server type. This is used by userspace
    to know what is the connectivity between the accelerators inside the
    server

- New features and improvements:
  - speedup h/w queues test in Gaudi2 to reduce device initialization times.

- Firmware related fixes:
  - Fixes to the handshake protocol during f/w initialization.
  - Sync f/w events interrupt in hard reset to avoid warning message.
  - Improvements to extraction of the firmware version.

- Misc bug fixes and code cleanups. Notable fixes are:
  - Multiple fixes for interrupt handling in Gaudi2.
  - Unmap mapped memory in case TLB invalidation fails.
Signed-off-by: default avatarDaniel Vetter <daniel.vetter@ffwll.ch>
From: Oded Gabbay <ogabbay@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20230410124637.GA2441888@ogabbay-vm-u20.habana-labs.com
parents 4d877b1a 56499c46
...@@ -45,20 +45,29 @@ static int cb_map_mem(struct hl_ctx *ctx, struct hl_cb *cb) ...@@ -45,20 +45,29 @@ static int cb_map_mem(struct hl_ctx *ctx, struct hl_cb *cb)
} }
mutex_lock(&hdev->mmu_lock); mutex_lock(&hdev->mmu_lock);
rc = hl_mmu_map_contiguous(ctx, cb->virtual_addr, cb->bus_address, cb->roundup_size); rc = hl_mmu_map_contiguous(ctx, cb->virtual_addr, cb->bus_address, cb->roundup_size);
if (rc) { if (rc) {
dev_err(hdev->dev, "Failed to map VA %#llx to CB\n", cb->virtual_addr); dev_err(hdev->dev, "Failed to map VA %#llx to CB\n", cb->virtual_addr);
goto err_va_umap; goto err_va_pool_free;
} }
rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR | MMU_OP_SKIP_LOW_CACHE_INV); rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR | MMU_OP_SKIP_LOW_CACHE_INV);
if (rc)
goto err_mmu_unmap;
mutex_unlock(&hdev->mmu_lock); mutex_unlock(&hdev->mmu_lock);
cb->is_mmu_mapped = true; cb->is_mmu_mapped = true;
return rc;
err_va_umap: return 0;
err_mmu_unmap:
hl_mmu_unmap_contiguous(ctx, cb->virtual_addr, cb->roundup_size);
err_va_pool_free:
mutex_unlock(&hdev->mmu_lock); mutex_unlock(&hdev->mmu_lock);
gen_pool_free(ctx->cb_va_pool, cb->virtual_addr, cb->roundup_size); gen_pool_free(ctx->cb_va_pool, cb->virtual_addr, cb->roundup_size);
return rc; return rc;
} }
......
...@@ -43,48 +43,46 @@ static void dec_print_abnrm_intr_source(struct hl_device *hdev, u32 irq_status) ...@@ -43,48 +43,46 @@ static void dec_print_abnrm_intr_source(struct hl_device *hdev, u32 irq_status)
intr_source[2], intr_source[3], intr_source[4], intr_source[5]); intr_source[2], intr_source[3], intr_source[4], intr_source[5]);
} }
static void dec_error_intr_work(struct hl_device *hdev, u32 base_addr, u32 core_id) static void dec_abnrm_intr_work(struct work_struct *work)
{ {
struct hl_dec *dec = container_of(work, struct hl_dec, abnrm_intr_work);
struct hl_device *hdev = dec->hdev;
u32 irq_status, event_mask = 0;
bool reset_required = false; bool reset_required = false;
u32 irq_status, event_mask;
irq_status = RREG32(base_addr + VCMD_IRQ_STATUS_OFFSET); irq_status = RREG32(dec->base_addr + VCMD_IRQ_STATUS_OFFSET);
dev_err(hdev->dev, "Decoder abnormal interrupt %#x, core %d\n", irq_status, core_id); dev_err(hdev->dev, "Decoder abnormal interrupt %#x, core %d\n", irq_status, dec->core_id);
dec_print_abnrm_intr_source(hdev, irq_status); dec_print_abnrm_intr_source(hdev, irq_status);
/* Clear the interrupt */ /* Clear the interrupt */
WREG32(base_addr + VCMD_IRQ_STATUS_OFFSET, irq_status); WREG32(dec->base_addr + VCMD_IRQ_STATUS_OFFSET, irq_status);
/* Flush the interrupt clear */ /* Flush the interrupt clear */
RREG32(base_addr + VCMD_IRQ_STATUS_OFFSET); RREG32(dec->base_addr + VCMD_IRQ_STATUS_OFFSET);
if (irq_status & VCMD_IRQ_STATUS_TIMEOUT_MASK) { if (irq_status & VCMD_IRQ_STATUS_TIMEOUT_MASK) {
reset_required = true; reset_required = true;
event_mask = HL_NOTIFIER_EVENT_GENERAL_HW_ERR; event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
} else if (irq_status & VCMD_IRQ_STATUS_CMDERR_MASK) {
event_mask = HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;
} else {
event_mask = HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
} }
if (irq_status & VCMD_IRQ_STATUS_CMDERR_MASK)
event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;
if (irq_status & (VCMD_IRQ_STATUS_ENDCMD_MASK |
VCMD_IRQ_STATUS_BUSERR_MASK |
VCMD_IRQ_STATUS_ABORT_MASK))
event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
if (reset_required) { if (reset_required) {
event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET; event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
hl_device_cond_reset(hdev, 0, event_mask); hl_device_cond_reset(hdev, 0, event_mask);
} else { } else if (event_mask) {
hl_notifier_event_send_all(hdev, event_mask); hl_notifier_event_send_all(hdev, event_mask);
} }
} }
static void dec_completion_abnrm(struct work_struct *work)
{
struct hl_dec *dec = container_of(work, struct hl_dec, completion_abnrm_work);
struct hl_device *hdev = dec->hdev;
dec_error_intr_work(hdev, dec->base_addr, dec->core_id);
}
void hl_dec_fini(struct hl_device *hdev) void hl_dec_fini(struct hl_device *hdev)
{ {
kfree(hdev->dec); kfree(hdev->dec);
...@@ -108,7 +106,7 @@ int hl_dec_init(struct hl_device *hdev) ...@@ -108,7 +106,7 @@ int hl_dec_init(struct hl_device *hdev)
dec = hdev->dec + j; dec = hdev->dec + j;
dec->hdev = hdev; dec->hdev = hdev;
INIT_WORK(&dec->completion_abnrm_work, dec_completion_abnrm); INIT_WORK(&dec->abnrm_intr_work, dec_abnrm_intr_work);
dec->core_id = j; dec->core_id = j;
dec->base_addr = hdev->asic_funcs->get_dec_base_addr(hdev, j); dec->base_addr = hdev->asic_funcs->get_dec_base_addr(hdev, j);
if (!dec->base_addr) { if (!dec->base_addr) {
......
...@@ -1271,7 +1271,6 @@ int hl_device_resume(struct hl_device *hdev) ...@@ -1271,7 +1271,6 @@ int hl_device_resume(struct hl_device *hdev)
return 0; return 0;
disable_device: disable_device:
pci_clear_master(hdev->pdev);
pci_disable_device(hdev->pdev); pci_disable_device(hdev->pdev);
return rc; return rc;
...@@ -1381,6 +1380,34 @@ static void device_disable_open_processes(struct hl_device *hdev, bool control_d ...@@ -1381,6 +1380,34 @@ static void device_disable_open_processes(struct hl_device *hdev, bool control_d
mutex_unlock(fd_lock); mutex_unlock(fd_lock);
} }
static void send_disable_pci_access(struct hl_device *hdev, u32 flags)
{
/* If reset is due to heartbeat, device CPU is no responsive in
* which case no point sending PCI disable message to it.
*/
if ((flags & HL_DRV_RESET_HARD) &&
!(flags & (HL_DRV_RESET_HEARTBEAT | HL_DRV_RESET_BYPASS_REQ_TO_FW))) {
/* Disable PCI access from device F/W so he won't send
* us additional interrupts. We disable MSI/MSI-X at
* the halt_engines function and we can't have the F/W
* sending us interrupts after that. We need to disable
* the access here because if the device is marked
* disable, the message won't be send. Also, in case
* of heartbeat, the device CPU is marked as disable
* so this message won't be sent
*/
if (hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0)) {
dev_warn(hdev->dev, "Failed to disable FW's PCI access\n");
return;
}
/* verify that last EQs are handled before disabled is set */
if (hdev->cpu_queues_enable)
synchronize_irq(pci_irq_vector(hdev->pdev,
hdev->asic_prop.eq_interrupt_id));
}
}
static void handle_reset_trigger(struct hl_device *hdev, u32 flags) static void handle_reset_trigger(struct hl_device *hdev, u32 flags)
{ {
u32 cur_reset_trigger = HL_RESET_TRIGGER_DEFAULT; u32 cur_reset_trigger = HL_RESET_TRIGGER_DEFAULT;
...@@ -1419,28 +1446,6 @@ static void handle_reset_trigger(struct hl_device *hdev, u32 flags) ...@@ -1419,28 +1446,6 @@ static void handle_reset_trigger(struct hl_device *hdev, u32 flags)
} else { } else {
hdev->reset_info.reset_trigger_repeated = 1; hdev->reset_info.reset_trigger_repeated = 1;
} }
/* If reset is due to heartbeat, device CPU is no responsive in
* which case no point sending PCI disable message to it.
*
* If F/W is performing the reset, no need to send it a message to disable
* PCI access
*/
if ((flags & HL_DRV_RESET_HARD) &&
!(flags & (HL_DRV_RESET_HEARTBEAT | HL_DRV_RESET_BYPASS_REQ_TO_FW))) {
/* Disable PCI access from device F/W so he won't send
* us additional interrupts. We disable MSI/MSI-X at
* the halt_engines function and we can't have the F/W
* sending us interrupts after that. We need to disable
* the access here because if the device is marked
* disable, the message won't be send. Also, in case
* of heartbeat, the device CPU is marked as disable
* so this message won't be sent
*/
if (hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0))
dev_warn(hdev->dev,
"Failed to disable FW's PCI access\n");
}
} }
/* /*
...@@ -1561,6 +1566,7 @@ int hl_device_reset(struct hl_device *hdev, u32 flags) ...@@ -1561,6 +1566,7 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
escalate_reset_flow: escalate_reset_flow:
handle_reset_trigger(hdev, flags); handle_reset_trigger(hdev, flags);
send_disable_pci_access(hdev, flags);
/* This also blocks future CS/VM/JOB completion operations */ /* This also blocks future CS/VM/JOB completion operations */
hdev->disabled = true; hdev->disabled = true;
...@@ -1823,9 +1829,7 @@ int hl_device_reset(struct hl_device *hdev, u32 flags) ...@@ -1823,9 +1829,7 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
dev_info(hdev->dev, "Performing hard reset scheduled during compute reset\n"); dev_info(hdev->dev, "Performing hard reset scheduled during compute reset\n");
flags = hdev->reset_info.hard_reset_schedule_flags; flags = hdev->reset_info.hard_reset_schedule_flags;
hdev->reset_info.hard_reset_schedule_flags = 0; hdev->reset_info.hard_reset_schedule_flags = 0;
hdev->disabled = true;
hard_reset = true; hard_reset = true;
handle_reset_trigger(hdev, flags);
goto escalate_reset_flow; goto escalate_reset_flow;
} }
} }
......
...@@ -71,7 +71,7 @@ static char *extract_fw_ver_from_str(const char *fw_str) ...@@ -71,7 +71,7 @@ static char *extract_fw_ver_from_str(const char *fw_str)
return NULL; return NULL;
} }
static int extract_fw_sub_versions(struct hl_device *hdev, char *preboot_ver) static int hl_get_preboot_major_minor(struct hl_device *hdev, char *preboot_ver)
{ {
char major[8], minor[8], *first_dot, *second_dot; char major[8], minor[8], *first_dot, *second_dot;
int rc; int rc;
...@@ -86,7 +86,7 @@ static int extract_fw_sub_versions(struct hl_device *hdev, char *preboot_ver) ...@@ -86,7 +86,7 @@ static int extract_fw_sub_versions(struct hl_device *hdev, char *preboot_ver)
if (rc) { if (rc) {
dev_err(hdev->dev, "Error %d parsing preboot major version\n", rc); dev_err(hdev->dev, "Error %d parsing preboot major version\n", rc);
goto out; return rc;
} }
/* skip the first dot */ /* skip the first dot */
...@@ -102,9 +102,6 @@ static int extract_fw_sub_versions(struct hl_device *hdev, char *preboot_ver) ...@@ -102,9 +102,6 @@ static int extract_fw_sub_versions(struct hl_device *hdev, char *preboot_ver)
if (rc) if (rc)
dev_err(hdev->dev, "Error %d parsing preboot minor version\n", rc); dev_err(hdev->dev, "Error %d parsing preboot minor version\n", rc);
out:
kfree(preboot_ver);
return rc; return rc;
} }
...@@ -1263,7 +1260,7 @@ void hl_fw_ask_hard_reset_without_linux(struct hl_device *hdev) ...@@ -1263,7 +1260,7 @@ void hl_fw_ask_hard_reset_without_linux(struct hl_device *hdev)
COMMS_RST_DEV, 0, false, COMMS_RST_DEV, 0, false,
hdev->fw_loader.cpu_timeout); hdev->fw_loader.cpu_timeout);
if (rc) if (rc)
dev_warn(hdev->dev, "Failed sending COMMS_RST_DEV\n"); dev_err(hdev->dev, "Failed sending COMMS_RST_DEV\n");
} else { } else {
WREG32(static_loader->kmd_msg_to_cpu_reg, KMD_MSG_RST_DEV); WREG32(static_loader->kmd_msg_to_cpu_reg, KMD_MSG_RST_DEV);
} }
...@@ -1281,10 +1278,10 @@ void hl_fw_ask_halt_machine_without_linux(struct hl_device *hdev) ...@@ -1281,10 +1278,10 @@ void hl_fw_ask_halt_machine_without_linux(struct hl_device *hdev)
/* Stop device CPU to make sure nothing bad happens */ /* Stop device CPU to make sure nothing bad happens */
if (hdev->asic_prop.dynamic_fw_load) { if (hdev->asic_prop.dynamic_fw_load) {
rc = hl_fw_dynamic_send_protocol_cmd(hdev, &hdev->fw_loader, rc = hl_fw_dynamic_send_protocol_cmd(hdev, &hdev->fw_loader,
COMMS_GOTO_WFE, 0, true, COMMS_GOTO_WFE, 0, false,
hdev->fw_loader.cpu_timeout); hdev->fw_loader.cpu_timeout);
if (rc) if (rc)
dev_warn(hdev->dev, "Failed sending COMMS_GOTO_WFE\n"); dev_err(hdev->dev, "Failed sending COMMS_GOTO_WFE\n");
} else { } else {
WREG32(static_loader->kmd_msg_to_cpu_reg, KMD_MSG_GOTO_WFE); WREG32(static_loader->kmd_msg_to_cpu_reg, KMD_MSG_GOTO_WFE);
msleep(static_loader->cpu_reset_wait_msec); msleep(static_loader->cpu_reset_wait_msec);
...@@ -2181,8 +2178,8 @@ static int hl_fw_dynamic_read_device_fw_version(struct hl_device *hdev, ...@@ -2181,8 +2178,8 @@ static int hl_fw_dynamic_read_device_fw_version(struct hl_device *hdev,
dev_info(hdev->dev, "preboot version %s\n", preboot_ver); dev_info(hdev->dev, "preboot version %s\n", preboot_ver);
/* This function takes care of freeing preboot_ver */ rc = hl_get_preboot_major_minor(hdev, preboot_ver);
rc = extract_fw_sub_versions(hdev, preboot_ver); kfree(preboot_ver);
if (rc) if (rc)
return rc; return rc;
} }
......
...@@ -662,7 +662,7 @@ struct hl_hints_range { ...@@ -662,7 +662,7 @@ struct hl_hints_range {
* @user_interrupt_count: number of user interrupts. * @user_interrupt_count: number of user interrupts.
* @user_dec_intr_count: number of decoder interrupts exposed to user. * @user_dec_intr_count: number of decoder interrupts exposed to user.
* @tpc_interrupt_id: interrupt id for TPC to use in order to raise events towards the host. * @tpc_interrupt_id: interrupt id for TPC to use in order to raise events towards the host.
* @unexpected_user_error_interrupt_id: interrupt id used to indicate an unexpected user error. * @eq_interrupt_id: interrupt id for EQ, uses to synchronize EQ interrupts in hard-reset.
* @cache_line_size: device cache line size. * @cache_line_size: device cache line size.
* @server_type: Server type that the ASIC is currently installed in. * @server_type: Server type that the ASIC is currently installed in.
* The value is according to enum hl_server_type in uapi file. * The value is according to enum hl_server_type in uapi file.
...@@ -793,7 +793,7 @@ struct asic_fixed_properties { ...@@ -793,7 +793,7 @@ struct asic_fixed_properties {
u16 user_interrupt_count; u16 user_interrupt_count;
u16 user_dec_intr_count; u16 user_dec_intr_count;
u16 tpc_interrupt_id; u16 tpc_interrupt_id;
u16 unexpected_user_error_interrupt_id; u16 eq_interrupt_id;
u16 cache_line_size; u16 cache_line_size;
u16 server_type; u16 server_type;
u8 completion_queues_count; u8 completion_queues_count;
...@@ -1211,15 +1211,15 @@ struct hl_eq { ...@@ -1211,15 +1211,15 @@ struct hl_eq {
/** /**
* struct hl_dec - describes a decoder sw instance. * struct hl_dec - describes a decoder sw instance.
* @hdev: pointer to the device structure. * @hdev: pointer to the device structure.
* @completion_abnrm_work: workqueue object to run when decoder generates an error interrupt * @abnrm_intr_work: workqueue work item to run when decoder generates an error interrupt.
* @core_id: ID of the decoder. * @core_id: ID of the decoder.
* @base_addr: base address of the decoder. * @base_addr: base address of the decoder.
*/ */
struct hl_dec { struct hl_dec {
struct hl_device *hdev; struct hl_device *hdev;
struct work_struct completion_abnrm_work; struct work_struct abnrm_intr_work;
u32 core_id; u32 core_id;
u32 base_addr; u32 base_addr;
}; };
/** /**
......
...@@ -415,8 +415,8 @@ irqreturn_t hl_irq_handler_eq(int irq, void *arg) ...@@ -415,8 +415,8 @@ irqreturn_t hl_irq_handler_eq(int irq, void *arg)
struct hl_eq_entry *eq_base; struct hl_eq_entry *eq_base;
struct hl_eqe_work *handle_eqe_work; struct hl_eqe_work *handle_eqe_work;
bool entry_ready; bool entry_ready;
u32 cur_eqe; u32 cur_eqe, ctl;
u16 cur_eqe_index; u16 cur_eqe_index, event_type;
eq_base = eq->kernel_address; eq_base = eq->kernel_address;
...@@ -449,7 +449,10 @@ irqreturn_t hl_irq_handler_eq(int irq, void *arg) ...@@ -449,7 +449,10 @@ irqreturn_t hl_irq_handler_eq(int irq, void *arg)
dma_rmb(); dma_rmb();
if (hdev->disabled && !hdev->reset_info.in_compute_reset) { if (hdev->disabled && !hdev->reset_info.in_compute_reset) {
dev_warn(hdev->dev, "Device disabled but received an EQ event\n"); ctl = le32_to_cpu(eq_entry->hdr.ctl);
event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK) >> EQ_CTL_EVENT_TYPE_SHIFT);
dev_warn(hdev->dev,
"Device disabled but received an EQ event (%u)\n", event_type);
goto skip_irq; goto skip_irq;
} }
...@@ -486,7 +489,7 @@ irqreturn_t hl_irq_handler_dec_abnrm(int irq, void *arg) ...@@ -486,7 +489,7 @@ irqreturn_t hl_irq_handler_dec_abnrm(int irq, void *arg)
{ {
struct hl_dec *dec = arg; struct hl_dec *dec = arg;
schedule_work(&dec->completion_abnrm_work); schedule_work(&dec->abnrm_intr_work);
return IRQ_HANDLED; return IRQ_HANDLED;
} }
......
...@@ -605,6 +605,7 @@ static u64 get_va_block(struct hl_device *hdev, ...@@ -605,6 +605,7 @@ static u64 get_va_block(struct hl_device *hdev,
bool is_align_pow_2 = is_power_of_2(va_range->page_size); bool is_align_pow_2 = is_power_of_2(va_range->page_size);
bool is_hint_dram_addr = hl_is_dram_va(hdev, hint_addr); bool is_hint_dram_addr = hl_is_dram_va(hdev, hint_addr);
bool force_hint = flags & HL_MEM_FORCE_HINT; bool force_hint = flags & HL_MEM_FORCE_HINT;
int rc;
if (is_align_pow_2) if (is_align_pow_2)
align_mask = ~((u64)va_block_align - 1); align_mask = ~((u64)va_block_align - 1);
...@@ -722,9 +723,13 @@ static u64 get_va_block(struct hl_device *hdev, ...@@ -722,9 +723,13 @@ static u64 get_va_block(struct hl_device *hdev,
kfree(new_va_block); kfree(new_va_block);
} }
if (add_prev) if (add_prev) {
add_va_block_locked(hdev, &va_range->list, prev_start, rc = add_va_block_locked(hdev, &va_range->list, prev_start, prev_end);
prev_end); if (rc) {
reserved_valid_start = 0;
goto out;
}
}
print_va_list_locked(hdev, &va_range->list); print_va_list_locked(hdev, &va_range->list);
out: out:
......
...@@ -679,7 +679,9 @@ int hl_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags) ...@@ -679,7 +679,9 @@ int hl_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags)
rc = hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags); rc = hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
if (rc) if (rc)
dev_err_ratelimited(hdev->dev, "MMU cache invalidation failed\n"); dev_err_ratelimited(hdev->dev,
"%s cache invalidation failed, rc=%d\n",
flags == VM_TYPE_USERPTR ? "PMMU" : "HMMU", rc);
return rc; return rc;
} }
...@@ -692,7 +694,9 @@ int hl_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard, ...@@ -692,7 +694,9 @@ int hl_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard,
rc = hdev->asic_funcs->mmu_invalidate_cache_range(hdev, is_hard, flags, rc = hdev->asic_funcs->mmu_invalidate_cache_range(hdev, is_hard, flags,
asid, va, size); asid, va, size);
if (rc) if (rc)
dev_err_ratelimited(hdev->dev, "MMU cache range invalidation failed\n"); dev_err_ratelimited(hdev->dev,
"%s cache range invalidation failed: va=%#llx, size=%llu, rc=%d",
flags == VM_TYPE_USERPTR ? "PMMU" : "HMMU", va, size, rc);
return rc; return rc;
} }
......
...@@ -420,7 +420,6 @@ int hl_pci_init(struct hl_device *hdev) ...@@ -420,7 +420,6 @@ int hl_pci_init(struct hl_device *hdev)
unmap_pci_bars: unmap_pci_bars:
hl_pci_bars_unmap(hdev); hl_pci_bars_unmap(hdev);
disable_device: disable_device:
pci_clear_master(pdev);
pci_disable_device(pdev); pci_disable_device(pdev);
return rc; return rc;
...@@ -436,6 +435,5 @@ void hl_pci_fini(struct hl_device *hdev) ...@@ -436,6 +435,5 @@ void hl_pci_fini(struct hl_device *hdev)
{ {
hl_pci_bars_unmap(hdev); hl_pci_bars_unmap(hdev);
pci_clear_master(hdev->pdev);
pci_disable_device(hdev->pdev); pci_disable_device(hdev->pdev);
} }
...@@ -497,10 +497,14 @@ int hl_sysfs_init(struct hl_device *hdev) ...@@ -497,10 +497,14 @@ int hl_sysfs_init(struct hl_device *hdev)
if (rc) { if (rc) {
dev_err(hdev->dev, dev_err(hdev->dev,
"Failed to add groups to device, error %d\n", rc); "Failed to add groups to device, error %d\n", rc);
return rc; goto remove_groups;
} }
return 0; return 0;
remove_groups:
device_remove_groups(hdev->dev, hl_dev_attr_groups);
return rc;
} }
void hl_sysfs_fini(struct hl_device *hdev) void hl_sysfs_fini(struct hl_device *hdev)
......
...@@ -682,6 +682,9 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev) ...@@ -682,6 +682,9 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev)
prop->first_available_user_interrupt = USHRT_MAX; prop->first_available_user_interrupt = USHRT_MAX;
prop->tpc_interrupt_id = USHRT_MAX; prop->tpc_interrupt_id = USHRT_MAX;
/* single msi */
prop->eq_interrupt_id = 0;
for (i = 0 ; i < HL_MAX_DCORES ; i++) for (i = 0 ; i < HL_MAX_DCORES ; i++)
prop->first_available_cq[i] = USHRT_MAX; prop->first_available_cq[i] = USHRT_MAX;
...@@ -2017,38 +2020,6 @@ static int gaudi_enable_msi_single(struct hl_device *hdev) ...@@ -2017,38 +2020,6 @@ static int gaudi_enable_msi_single(struct hl_device *hdev)
return rc; return rc;
} }
static int gaudi_enable_msi_multi(struct hl_device *hdev)
{
int cq_cnt = hdev->asic_prop.completion_queues_count;
int rc, i, irq_cnt_init, irq;
for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
irq = gaudi_pci_irq_vector(hdev, i, false);
rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
&hdev->completion_queue[i]);
if (rc) {
dev_err(hdev->dev, "Failed to request IRQ %d", irq);
goto free_irqs;
}
}
irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
&hdev->event_queue);
if (rc) {
dev_err(hdev->dev, "Failed to request IRQ %d", irq);
goto free_irqs;
}
return 0;
free_irqs:
for (i = 0 ; i < irq_cnt_init ; i++)
free_irq(gaudi_pci_irq_vector(hdev, i, false),
&hdev->completion_queue[i]);
return rc;
}
static int gaudi_enable_msi(struct hl_device *hdev) static int gaudi_enable_msi(struct hl_device *hdev)
{ {
struct gaudi_device *gaudi = hdev->asic_specific; struct gaudi_device *gaudi = hdev->asic_specific;
...@@ -2063,14 +2034,7 @@ static int gaudi_enable_msi(struct hl_device *hdev) ...@@ -2063,14 +2034,7 @@ static int gaudi_enable_msi(struct hl_device *hdev)
return rc; return rc;
} }
if (rc < NUMBER_OF_INTERRUPTS) { rc = gaudi_enable_msi_single(hdev);
gaudi->multi_msi_mode = false;
rc = gaudi_enable_msi_single(hdev);
} else {
gaudi->multi_msi_mode = true;
rc = gaudi_enable_msi_multi(hdev);
}
if (rc) if (rc)
goto free_pci_irq_vectors; goto free_pci_irq_vectors;
...@@ -2086,47 +2050,23 @@ static int gaudi_enable_msi(struct hl_device *hdev) ...@@ -2086,47 +2050,23 @@ static int gaudi_enable_msi(struct hl_device *hdev)
static void gaudi_sync_irqs(struct hl_device *hdev) static void gaudi_sync_irqs(struct hl_device *hdev)
{ {
struct gaudi_device *gaudi = hdev->asic_specific; struct gaudi_device *gaudi = hdev->asic_specific;
int i, cq_cnt = hdev->asic_prop.completion_queues_count;
if (!(gaudi->hw_cap_initialized & HW_CAP_MSI)) if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
return; return;
/* Wait for all pending IRQs to be finished */ /* Wait for all pending IRQs to be finished */
if (gaudi->multi_msi_mode) { synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
for (i = 0 ; i < cq_cnt ; i++)
synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
synchronize_irq(gaudi_pci_irq_vector(hdev,
GAUDI_EVENT_QUEUE_MSI_IDX,
true));
} else {
synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
}
} }
static void gaudi_disable_msi(struct hl_device *hdev) static void gaudi_disable_msi(struct hl_device *hdev)
{ {
struct gaudi_device *gaudi = hdev->asic_specific; struct gaudi_device *gaudi = hdev->asic_specific;
int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
if (!(gaudi->hw_cap_initialized & HW_CAP_MSI)) if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
return; return;
gaudi_sync_irqs(hdev); gaudi_sync_irqs(hdev);
free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
if (gaudi->multi_msi_mode) {
irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
true);
free_irq(irq, &hdev->event_queue);
for (i = 0 ; i < cq_cnt ; i++) {
irq = gaudi_pci_irq_vector(hdev, i, false);
free_irq(irq, &hdev->completion_queue[i]);
}
} else {
free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
}
pci_free_irq_vectors(hdev->pdev); pci_free_irq_vectors(hdev->pdev);
gaudi->hw_cap_initialized &= ~HW_CAP_MSI; gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
...@@ -3921,11 +3861,7 @@ static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout) ...@@ -3921,11 +3861,7 @@ static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
WREG32(mmCPU_IF_PF_PQ_PI, 0); WREG32(mmCPU_IF_PF_PQ_PI, 0);
if (gaudi->multi_msi_mode) WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
else
WREG32(mmCPU_IF_QUEUE_INIT,
PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
irq_handler_offset = prop->gic_interrupts_enable ? irq_handler_offset = prop->gic_interrupts_enable ?
mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
...@@ -5602,7 +5538,6 @@ static void gaudi_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_add ...@@ -5602,7 +5538,6 @@ static void gaudi_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_add
u32 len, u32 original_len, u64 cq_addr, u32 cq_val, u32 len, u32 original_len, u64 cq_addr, u32 cq_val,
u32 msi_vec, bool eb) u32 msi_vec, bool eb)
{ {
struct gaudi_device *gaudi = hdev->asic_specific;
struct packet_msg_prot *cq_pkt; struct packet_msg_prot *cq_pkt;
struct packet_nop *cq_padding; struct packet_nop *cq_padding;
u64 msi_addr; u64 msi_addr;
...@@ -5632,12 +5567,7 @@ static void gaudi_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_add ...@@ -5632,12 +5567,7 @@ static void gaudi_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_add
tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
cq_pkt->ctl = cpu_to_le32(tmp); cq_pkt->ctl = cpu_to_le32(tmp);
cq_pkt->value = cpu_to_le32(1); cq_pkt->value = cpu_to_le32(1);
msi_addr = hdev->pdev ? mmPCIE_CORE_MSI_REQ : mmPCIE_MSI_INTR_0 + msi_vec * 4;
if (gaudi->multi_msi_mode)
msi_addr = mmPCIE_MSI_INTR_0 + msi_vec * 4;
else
msi_addr = mmPCIE_CORE_MSI_REQ;
cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr); cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr);
} }
......
...@@ -28,20 +28,8 @@ ...@@ -28,20 +28,8 @@
#define NUMBER_OF_COLLECTIVE_QUEUES 12 #define NUMBER_OF_COLLECTIVE_QUEUES 12
#define NUMBER_OF_SOBS_IN_GRP 11 #define NUMBER_OF_SOBS_IN_GRP 11
/*
* Number of MSI interrupts IDS:
* Each completion queue has 1 ID
* The event queue has 1 ID
*/
#define NUMBER_OF_INTERRUPTS (NUMBER_OF_CMPLT_QUEUES + \
NUMBER_OF_CPU_HW_QUEUES)
#define GAUDI_STREAM_MASTER_ARR_SIZE 8 #define GAUDI_STREAM_MASTER_ARR_SIZE 8
#if (NUMBER_OF_INTERRUPTS > GAUDI_MSI_ENTRIES)
#error "Number of MSI interrupts must be smaller or equal to GAUDI_MSI_ENTRIES"
#endif
#define CORESIGHT_TIMEOUT_USEC 100000 /* 100 ms */ #define CORESIGHT_TIMEOUT_USEC 100000 /* 100 ms */
#define GAUDI_MAX_CLK_FREQ 2200000000ull /* 2200 MHz */ #define GAUDI_MAX_CLK_FREQ 2200000000ull /* 2200 MHz */
...@@ -324,8 +312,6 @@ struct gaudi_internal_qman_info { ...@@ -324,8 +312,6 @@ struct gaudi_internal_qman_info {
* signal we can use this engine in later code paths. * signal we can use this engine in later code paths.
* Each bit is cleared upon reset of its corresponding H/W * Each bit is cleared upon reset of its corresponding H/W
* engine. * engine.
* @multi_msi_mode: whether we are working in multi MSI single MSI mode.
* Multi MSI is possible only with IOMMU enabled.
* @mmu_cache_inv_pi: PI for MMU cache invalidation flow. The H/W expects an * @mmu_cache_inv_pi: PI for MMU cache invalidation flow. The H/W expects an
* 8-bit value so use u8. * 8-bit value so use u8.
*/ */
...@@ -345,7 +331,6 @@ struct gaudi_device { ...@@ -345,7 +331,6 @@ struct gaudi_device {
u32 events_stat[GAUDI_EVENT_SIZE]; u32 events_stat[GAUDI_EVENT_SIZE];
u32 events_stat_aggregate[GAUDI_EVENT_SIZE]; u32 events_stat_aggregate[GAUDI_EVENT_SIZE];
u32 hw_cap_initialized; u32 hw_cap_initialized;
u8 multi_msi_mode;
u8 mmu_cache_inv_pi; u8 mmu_cache_inv_pi;
}; };
......
This diff is collapsed.
...@@ -240,6 +240,8 @@ ...@@ -240,6 +240,8 @@
#define GAUDI2_SOB_INCREMENT_BY_ONE (FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1) | \ #define GAUDI2_SOB_INCREMENT_BY_ONE (FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1) | \
FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1)) FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1))
#define GAUDI2_NUM_TESTED_QS (GAUDI2_QUEUE_ID_CPU_PQ - GAUDI2_QUEUE_ID_PDMA_0_0)
#define GAUDI2_NUM_OF_GLBL_ERR_CAUSE 8 #define GAUDI2_NUM_OF_GLBL_ERR_CAUSE 8
enum gaudi2_reserved_sob_id { enum gaudi2_reserved_sob_id {
...@@ -452,6 +454,17 @@ struct dup_block_ctx { ...@@ -452,6 +454,17 @@ struct dup_block_ctx {
unsigned int instances; unsigned int instances;
}; };
/**
* struct gaudi2_queues_test_info - Holds the address of a the messages used for testing the
* device queues.
* @dma_addr: the address used by the HW for accessing the message.
* @kern_addr: The address used by the driver for accessing the message.
*/
struct gaudi2_queues_test_info {
dma_addr_t dma_addr;
void *kern_addr;
};
/** /**
* struct gaudi2_device - ASIC specific manage structure. * struct gaudi2_device - ASIC specific manage structure.
* @cpucp_info_get: get information on device from CPU-CP * @cpucp_info_get: get information on device from CPU-CP
...@@ -510,6 +523,7 @@ struct dup_block_ctx { ...@@ -510,6 +523,7 @@ struct dup_block_ctx {
* @flush_db_fifo: flag to force flush DB FIFO after a write. * @flush_db_fifo: flag to force flush DB FIFO after a write.
* @hbm_cfg: HBM subsystem settings * @hbm_cfg: HBM subsystem settings
* @hw_queues_lock_mutex: used by simulator instead of hw_queues_lock. * @hw_queues_lock_mutex: used by simulator instead of hw_queues_lock.
* @queues_test_info: information used by the driver when testing the HW queues.
*/ */
struct gaudi2_device { struct gaudi2_device {
int (*cpucp_info_get)(struct hl_device *hdev); int (*cpucp_info_get)(struct hl_device *hdev);
...@@ -537,6 +551,9 @@ struct gaudi2_device { ...@@ -537,6 +551,9 @@ struct gaudi2_device {
u32 events_stat[GAUDI2_EVENT_SIZE]; u32 events_stat[GAUDI2_EVENT_SIZE];
u32 events_stat_aggregate[GAUDI2_EVENT_SIZE]; u32 events_stat_aggregate[GAUDI2_EVENT_SIZE];
u32 num_of_valid_hw_events; u32 num_of_valid_hw_events;
/* Queue testing */
struct gaudi2_queues_test_info queues_test_info[GAUDI2_NUM_TESTED_QS];
}; };
/* /*
......
...@@ -473,6 +473,7 @@ int goya_set_fixed_properties(struct hl_device *hdev) ...@@ -473,6 +473,7 @@ int goya_set_fixed_properties(struct hl_device *hdev)
prop->first_available_user_interrupt = USHRT_MAX; prop->first_available_user_interrupt = USHRT_MAX;
prop->tpc_interrupt_id = USHRT_MAX; prop->tpc_interrupt_id = USHRT_MAX;
prop->eq_interrupt_id = GOYA_EVENT_QUEUE_MSIX_IDX;
for (i = 0 ; i < HL_MAX_DCORES ; i++) for (i = 0 ; i < HL_MAX_DCORES ; i++)
prop->first_available_cq[i] = USHRT_MAX; prop->first_available_cq[i] = USHRT_MAX;
......
/* SPDX-License-Identifier: GPL-2.0 /* SPDX-License-Identifier: GPL-2.0
* *
* Copyright 2020-2022 HabanaLabs, Ltd. * Copyright 2020-2023 HabanaLabs, Ltd.
* All Rights Reserved. * All Rights Reserved.
* *
*/ */
...@@ -543,6 +543,8 @@ ...@@ -543,6 +543,8 @@
#define HBM_MC_SPI_IEEE1500_COMP_MASK BIT(3) #define HBM_MC_SPI_IEEE1500_COMP_MASK BIT(3)
#define HBM_MC_SPI_IEEE1500_PAUSED_MASK BIT(4) #define HBM_MC_SPI_IEEE1500_PAUSED_MASK BIT(4)
#define ARC_FARM_OFFSET (mmARC_FARM_ARC1_AUX_BASE - mmARC_FARM_ARC0_AUX_BASE)
#include "nic0_qpc0_regs.h" #include "nic0_qpc0_regs.h"
#include "nic0_qm0_regs.h" #include "nic0_qm0_regs.h"
#include "nic0_qm_arc_aux0_regs.h" #include "nic0_qm_arc_aux0_regs.h"
......
...@@ -708,7 +708,8 @@ enum hl_server_type { ...@@ -708,7 +708,8 @@ enum hl_server_type {
HL_SERVER_GAUDI_HLS1H = 2, HL_SERVER_GAUDI_HLS1H = 2,
HL_SERVER_GAUDI_TYPE1 = 3, HL_SERVER_GAUDI_TYPE1 = 3,
HL_SERVER_GAUDI_TYPE2 = 4, HL_SERVER_GAUDI_TYPE2 = 4,
HL_SERVER_GAUDI2_HLS2 = 5 HL_SERVER_GAUDI2_HLS2 = 5,
HL_SERVER_GAUDI2_TYPE1 = 7
}; };
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment