Merge tag 'drm-habanalabs-next-2023-04-10' of...

Merge tag 'drm-habanalabs-next-2023-04-10' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux into drm-next This tag contains additional habanalabs driver changes for v6.4: - uAPI changes: - Add a definition of a new Gaudi2 server type. This is used by userspace to know what is the connectivity between the accelerators inside the server - New features and improvements: - speedup h/w queues test in Gaudi2 to reduce device initialization times. - Firmware related fixes: - Fixes to the handshake protocol during f/w initialization. - Sync f/w events interrupt in hard reset to avoid warning message. - Improvements to extraction of the firmware version. - Misc bug fixes and code cleanups. Notable fixes are: - Multiple fixes for interrupt handling in Gaudi2. - Unmap mapped memory in case TLB invalidation fails. Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> From: Oded Gabbay <ogabbay@kernel.org> Link: https://patchwork.freedesktop.org/patch/msgid/20230410124637.GA2441888@ogabbay-vm-u20.habana-labs.com

Merge tag 'drm-habanalabs-next-2023-04-10' of...
Merge tag 'drm-habanalabs-next-2023-04-10' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux into drm-next This tag contains additional habanalabs driver changes for v6.4: - uAPI changes: - Add a definition of a new Gaudi2 server type. This is used by userspace to know what is the connectivity between the accelerators inside the server - New features and improvements: - speedup h/w queues test in Gaudi2 to reduce device initialization times. - Firmware related fixes: - Fixes to the handshake protocol during f/w initialization. - Sync f/w events interrupt in hard reset to avoid warning message. - Improvements to extraction of the firmware version. - Misc bug fixes and code cleanups. Notable fixes are: - Multiple fixes for interrupt handling in Gaudi2. - Unmap mapped memory in case TLB invalidation fails. Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> From: Oded Gabbay <ogabbay@kernel.org> Link: https://patchwork.freedesktop.org/patch/msgid/20230410124637.GA2441888@ogabbay-vm-u20.habana-labs.com
838ac90d · Daniel Vetter · 4d877b1a · 56499c46 · 838ac90d · 838ac90d
Commit 838ac90d authored Apr 11, 2023 by Daniel Vetter
17 changed files
--- a/drivers/accel/habanalabs/common/command_buffer.c
+++ b/drivers/accel/habanalabs/common/command_buffer.c
@@ -45,20 +45,29 @@ static int cb_map_mem(struct hl_ctx *ctx, struct hl_cb *cb)
 	}

 	mutex_lock(&hdev->mmu_lock);
+
 	rc = hl_mmu_map_contiguous(ctx, cb->virtual_addr, cb->bus_address, cb->roundup_size);
 	if (rc) {
 		dev_err(hdev->dev, "Failed to map VA %#llx to CB\n", cb->virtual_addr);
-		goto err_va_umap;
+		goto err_va_pool_free;
 	}
+
 	rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR | MMU_OP_SKIP_LOW_CACHE_INV);
+	if (rc)
+		goto err_mmu_unmap;
+
 	mutex_unlock(&hdev->mmu_lock);

 	cb->is_mmu_mapped = true;
-	return rc;

-err_va_umap:
+	return 0;
+
+err_mmu_unmap:
+	hl_mmu_unmap_contiguous(ctx, cb->virtual_addr, cb->roundup_size);
+err_va_pool_free:
 	mutex_unlock(&hdev->mmu_lock);
 	gen_pool_free(ctx->cb_va_pool, cb->virtual_addr, cb->roundup_size);
+
 	return rc;
 }


--- a/drivers/accel/habanalabs/common/decoder.c
+++ b/drivers/accel/habanalabs/common/decoder.c
@@ -43,48 +43,46 @@ static void dec_print_abnrm_intr_source(struct hl_device *hdev, u32 irq_status)
 		intr_source[2], intr_source[3], intr_source[4], intr_source[5]);
 }

-static void dec_error_intr_work(struct hl_device *hdev, u32 base_addr, u32 core_id)
+static void dec_abnrm_intr_work(struct work_struct *work)
 {
+	struct hl_dec *dec = container_of(work, struct hl_dec, abnrm_intr_work);
+	struct hl_device *hdev = dec->hdev;
+	u32 irq_status, event_mask = 0;
 	bool reset_required = false;
-	u32 irq_status, event_mask;

-	irq_status = RREG32(base_addr + VCMD_IRQ_STATUS_OFFSET);
+	irq_status = RREG32(dec->base_addr + VCMD_IRQ_STATUS_OFFSET);

-	dev_err(hdev->dev, "Decoder abnormal interrupt %#x, core %d\n", irq_status, core_id);
+	dev_err(hdev->dev, "Decoder abnormal interrupt %#x, core %d\n", irq_status, dec->core_id);

 	dec_print_abnrm_intr_source(hdev, irq_status);

 	/* Clear the interrupt */
-	WREG32(base_addr + VCMD_IRQ_STATUS_OFFSET, irq_status);
+	WREG32(dec->base_addr + VCMD_IRQ_STATUS_OFFSET, irq_status);

 	/* Flush the interrupt clear */
-	RREG32(base_addr + VCMD_IRQ_STATUS_OFFSET);
+	RREG32(dec->base_addr + VCMD_IRQ_STATUS_OFFSET);

 	if (irq_status & VCMD_IRQ_STATUS_TIMEOUT_MASK) {
 		reset_required = true;
-		event_mask = HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
-	} else if (irq_status & VCMD_IRQ_STATUS_CMDERR_MASK) {
-		event_mask = HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;
-	} else {
-		event_mask = HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
+		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
 	}

+	if (irq_status & VCMD_IRQ_STATUS_CMDERR_MASK)
+		event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;
+
+	if (irq_status & (VCMD_IRQ_STATUS_ENDCMD_MASK |
+				VCMD_IRQ_STATUS_BUSERR_MASK |
+				VCMD_IRQ_STATUS_ABORT_MASK))
+		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
+
 	if (reset_required) {
 		event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
 		hl_device_cond_reset(hdev, 0, event_mask);
-	} else {
+	} else if (event_mask) {
 		hl_notifier_event_send_all(hdev, event_mask);
 	}
 }

-static void dec_completion_abnrm(struct work_struct *work)
-{
-	struct hl_dec *dec = container_of(work, struct hl_dec, completion_abnrm_work);
-	struct hl_device *hdev = dec->hdev;
-
-	dec_error_intr_work(hdev, dec->base_addr, dec->core_id);
-}
-
 void hl_dec_fini(struct hl_device *hdev)
 {
 	kfree(hdev->dec);
@@ -108,7 +106,7 @@ int hl_dec_init(struct hl_device *hdev)
 		dec = hdev->dec + j;

 		dec->hdev = hdev;
-		INIT_WORK(&dec->completion_abnrm_work, dec_completion_abnrm);
+		INIT_WORK(&dec->abnrm_intr_work, dec_abnrm_intr_work);
 		dec->core_id = j;
 		dec->base_addr = hdev->asic_funcs->get_dec_base_addr(hdev, j);
 		if (!dec->base_addr) {

--- a/drivers/accel/habanalabs/common/device.c
+++ b/drivers/accel/habanalabs/common/device.c
@@ -1271,7 +1271,6 @@ int hl_device_resume(struct hl_device *hdev)
 	return 0;

 disable_device:
-	pci_clear_master(hdev->pdev);
 	pci_disable_device(hdev->pdev);

 	return rc;
@@ -1381,6 +1380,34 @@ static void device_disable_open_processes(struct hl_device *hdev, bool control_d
 	mutex_unlock(fd_lock);
 }

+static void send_disable_pci_access(struct hl_device *hdev, u32 flags)
+{
+	/* If reset is due to heartbeat, device CPU is no responsive in
+	 * which case no point sending PCI disable message to it.
+	 */
+	if ((flags & HL_DRV_RESET_HARD) &&
+			!(flags & (HL_DRV_RESET_HEARTBEAT | HL_DRV_RESET_BYPASS_REQ_TO_FW))) {
+		/* Disable PCI access from device F/W so he won't send
+		 * us additional interrupts. We disable MSI/MSI-X at
+		 * the halt_engines function and we can't have the F/W
+		 * sending us interrupts after that. We need to disable
+		 * the access here because if the device is marked
+		 * disable, the message won't be send. Also, in case
+		 * of heartbeat, the device CPU is marked as disable
+		 * so this message won't be sent
+		 */
+		if (hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0)) {
+			dev_warn(hdev->dev, "Failed to disable FW's PCI access\n");
+			return;
+		}
+
+		/* verify that last EQs are handled before disabled is set */
+		if (hdev->cpu_queues_enable)
+			synchronize_irq(pci_irq_vector(hdev->pdev,
+					hdev->asic_prop.eq_interrupt_id));
+	}
+}
+
 static void handle_reset_trigger(struct hl_device *hdev, u32 flags)
 {
 	u32 cur_reset_trigger = HL_RESET_TRIGGER_DEFAULT;
@@ -1419,28 +1446,6 @@ static void handle_reset_trigger(struct hl_device *hdev, u32 flags)
 	} else {
 		hdev->reset_info.reset_trigger_repeated = 1;
 	}
-
-	/* If reset is due to heartbeat, device CPU is no responsive in
-	 * which case no point sending PCI disable message to it.
-	 *
-	 * If F/W is performing the reset, no need to send it a message to disable
-	 * PCI access
-	 */
-	if ((flags & HL_DRV_RESET_HARD) &&
-			!(flags & (HL_DRV_RESET_HEARTBEAT | HL_DRV_RESET_BYPASS_REQ_TO_FW))) {
-		/* Disable PCI access from device F/W so he won't send
-		 * us additional interrupts. We disable MSI/MSI-X at
-		 * the halt_engines function and we can't have the F/W
-		 * sending us interrupts after that. We need to disable
-		 * the access here because if the device is marked
-		 * disable, the message won't be send. Also, in case
-		 * of heartbeat, the device CPU is marked as disable
-		 * so this message won't be sent
-		 */
-		if (hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0))
-			dev_warn(hdev->dev,
-				"Failed to disable FW's PCI access\n");
-	}
 }

 /*
@@ -1561,6 +1566,7 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)

 escalate_reset_flow:
 		handle_reset_trigger(hdev, flags);
+		send_disable_pci_access(hdev, flags);

 		/* This also blocks future CS/VM/JOB completion operations */
 		hdev->disabled = true;
@@ -1823,9 +1829,7 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
 			dev_info(hdev->dev, "Performing hard reset scheduled during compute reset\n");
 			flags = hdev->reset_info.hard_reset_schedule_flags;
 			hdev->reset_info.hard_reset_schedule_flags = 0;
-			hdev->disabled = true;
 			hard_reset = true;
-			handle_reset_trigger(hdev, flags);
 			goto escalate_reset_flow;
 		}
 	}

--- a/drivers/accel/habanalabs/common/firmware_if.c
+++ b/drivers/accel/habanalabs/common/firmware_if.c
@@ -71,7 +71,7 @@ static char *extract_fw_ver_from_str(const char *fw_str)
 	return NULL;
 }

-static int extract_fw_sub_versions(struct hl_device *hdev, char *preboot_ver)
+static int hl_get_preboot_major_minor(struct hl_device *hdev, char *preboot_ver)
 {
 	char major[8], minor[8], *first_dot, *second_dot;
 	int rc;
@@ -86,7 +86,7 @@ static int extract_fw_sub_versions(struct hl_device *hdev, char *preboot_ver)

 	if (rc) {
 		dev_err(hdev->dev, "Error %d parsing preboot major version\n", rc);
-		goto out;
+		return rc;
 	}

 	/* skip the first dot */
@@ -102,9 +102,6 @@ static int extract_fw_sub_versions(struct hl_device *hdev, char *preboot_ver)

 	if (rc)
 		dev_err(hdev->dev, "Error %d parsing preboot minor version\n", rc);
-
-out:
-	kfree(preboot_ver);
 	return rc;
 }

@@ -1263,7 +1260,7 @@ void hl_fw_ask_hard_reset_without_linux(struct hl_device *hdev)
 				COMMS_RST_DEV, 0, false,
 				hdev->fw_loader.cpu_timeout);
 		if (rc)
-			dev_warn(hdev->dev, "Failed sending COMMS_RST_DEV\n");
+			dev_err(hdev->dev, "Failed sending COMMS_RST_DEV\n");
 	} else {
 		WREG32(static_loader->kmd_msg_to_cpu_reg, KMD_MSG_RST_DEV);
 	}
@@ -1281,10 +1278,10 @@ void hl_fw_ask_halt_machine_without_linux(struct hl_device *hdev)
 	/* Stop device CPU to make sure nothing bad happens */
 	if (hdev->asic_prop.dynamic_fw_load) {
 		rc = hl_fw_dynamic_send_protocol_cmd(hdev, &hdev->fw_loader,
-				COMMS_GOTO_WFE, 0, true,
+				COMMS_GOTO_WFE, 0, false,
 				hdev->fw_loader.cpu_timeout);
 		if (rc)
-			dev_warn(hdev->dev, "Failed sending COMMS_GOTO_WFE\n");
+			dev_err(hdev->dev, "Failed sending COMMS_GOTO_WFE\n");
 	} else {
 		WREG32(static_loader->kmd_msg_to_cpu_reg, KMD_MSG_GOTO_WFE);
 		msleep(static_loader->cpu_reset_wait_msec);
@@ -2181,8 +2178,8 @@ static int hl_fw_dynamic_read_device_fw_version(struct hl_device *hdev,

 			dev_info(hdev->dev, "preboot version %s\n", preboot_ver);

-			/* This function takes care of freeing preboot_ver */
-			rc = extract_fw_sub_versions(hdev, preboot_ver);
+			rc = hl_get_preboot_major_minor(hdev, preboot_ver);
+			kfree(preboot_ver);
 			if (rc)
 				return rc;
 		}

--- a/drivers/accel/habanalabs/common/habanalabs.h
+++ b/drivers/accel/habanalabs/common/habanalabs.h
@@ -662,7 +662,7 @@ struct hl_hints_range {
 * @user_interrupt_count: number of user interrupts.
 * @user_dec_intr_count: number of decoder interrupts exposed to user.
 * @tpc_interrupt_id: interrupt id for TPC to use in order to raise events towards the host.
- * @unexpected_user_error_interrupt_id: interrupt id used to indicate an unexpected user error.
+ * @eq_interrupt_id: interrupt id for EQ, uses to synchronize EQ interrupts in hard-reset.
 * @cache_line_size: device cache line size.
 * @server_type: Server type that the ASIC is currently installed in.
 *               The value is according to enum hl_server_type in uapi file.
@@ -793,7 +793,7 @@ struct asic_fixed_properties {
 	u16				user_interrupt_count;
 	u16				user_dec_intr_count;
 	u16				tpc_interrupt_id;
-	u16				unexpected_user_error_interrupt_id;
+	u16				eq_interrupt_id;
 	u16				cache_line_size;
 	u16				server_type;
 	u8				completion_queues_count;
@@ -1211,15 +1211,15 @@ struct hl_eq {
 /**
 * struct hl_dec - describes a decoder sw instance.
 * @hdev: pointer to the device structure.
- * @completion_abnrm_work: workqueue object to run when decoder generates an error interrupt
+ * @abnrm_intr_work: workqueue work item to run when decoder generates an error interrupt.
 * @core_id: ID of the decoder.
 * @base_addr: base address of the decoder.
 */
 struct hl_dec {
-	struct hl_device		*hdev;
-	struct work_struct		completion_abnrm_work;
-	u32				core_id;
-	u32				base_addr;
+	struct hl_device	*hdev;
+	struct work_struct	abnrm_intr_work;
+	u32			core_id;
+	u32			base_addr;
 };

 /**

--- a/drivers/accel/habanalabs/common/irq.c
+++ b/drivers/accel/habanalabs/common/irq.c
@@ -415,8 +415,8 @@ irqreturn_t hl_irq_handler_eq(int irq, void *arg)
 	struct hl_eq_entry *eq_base;
 	struct hl_eqe_work *handle_eqe_work;
 	bool entry_ready;
-	u32 cur_eqe;
-	u16 cur_eqe_index;
+	u32 cur_eqe, ctl;
+	u16 cur_eqe_index, event_type;

 	eq_base = eq->kernel_address;

@@ -449,7 +449,10 @@ irqreturn_t hl_irq_handler_eq(int irq, void *arg)
 		dma_rmb();

 		if (hdev->disabled && !hdev->reset_info.in_compute_reset) {
-			dev_warn(hdev->dev, "Device disabled but received an EQ event\n");
+			ctl = le32_to_cpu(eq_entry->hdr.ctl);
+			event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK) >> EQ_CTL_EVENT_TYPE_SHIFT);
+			dev_warn(hdev->dev,
+				"Device disabled but received an EQ event (%u)\n", event_type);
 			goto skip_irq;
 		}

@@ -486,7 +489,7 @@ irqreturn_t hl_irq_handler_dec_abnrm(int irq, void *arg)
 {
 	struct hl_dec *dec = arg;

-	schedule_work(&dec->completion_abnrm_work);
+	schedule_work(&dec->abnrm_intr_work);

 	return IRQ_HANDLED;
 }

--- a/drivers/accel/habanalabs/common/memory.c
+++ b/drivers/accel/habanalabs/common/memory.c
@@ -605,6 +605,7 @@ static u64 get_va_block(struct hl_device *hdev,
 	bool is_align_pow_2  = is_power_of_2(va_range->page_size);
 	bool is_hint_dram_addr = hl_is_dram_va(hdev, hint_addr);
 	bool force_hint = flags & HL_MEM_FORCE_HINT;
+	int rc;

 	if (is_align_pow_2)
 		align_mask = ~((u64)va_block_align - 1);
@@ -722,9 +723,13 @@ static u64 get_va_block(struct hl_device *hdev,
 		kfree(new_va_block);
 	}

-	if (add_prev)
-		add_va_block_locked(hdev, &va_range->list, prev_start,
-				prev_end);
+	if (add_prev) {
+		rc = add_va_block_locked(hdev, &va_range->list, prev_start, prev_end);
+		if (rc) {
+			reserved_valid_start = 0;
+			goto out;
+		}
+	}

 	print_va_list_locked(hdev, &va_range->list);
 out:

--- a/drivers/accel/habanalabs/common/mmu/mmu.c
+++ b/drivers/accel/habanalabs/common/mmu/mmu.c
@@ -679,7 +679,9 @@ int hl_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags)

 	rc = hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
 	if (rc)
-		dev_err_ratelimited(hdev->dev, "MMU cache invalidation failed\n");
+		dev_err_ratelimited(hdev->dev,
+				"%s cache invalidation failed, rc=%d\n",
+				flags == VM_TYPE_USERPTR ? "PMMU" : "HMMU", rc);

 	return rc;
 }
@@ -692,7 +694,9 @@ int hl_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard,
 	rc = hdev->asic_funcs->mmu_invalidate_cache_range(hdev, is_hard, flags,
 								asid, va, size);
 	if (rc)
-		dev_err_ratelimited(hdev->dev, "MMU cache range invalidation failed\n");
+		dev_err_ratelimited(hdev->dev,
+				"%s cache range invalidation failed: va=%#llx, size=%llu, rc=%d",
+				flags == VM_TYPE_USERPTR ? "PMMU" : "HMMU", va, size, rc);

 	return rc;
 }

--- a/drivers/accel/habanalabs/common/pci/pci.c
+++ b/drivers/accel/habanalabs/common/pci/pci.c
@@ -420,7 +420,6 @@ int hl_pci_init(struct hl_device *hdev)
 unmap_pci_bars:
 	hl_pci_bars_unmap(hdev);
 disable_device:
-	pci_clear_master(pdev);
 	pci_disable_device(pdev);

 	return rc;
@@ -436,6 +435,5 @@ void hl_pci_fini(struct hl_device *hdev)
 {
 	hl_pci_bars_unmap(hdev);

-	pci_clear_master(hdev->pdev);
 	pci_disable_device(hdev->pdev);
 }
--- a/drivers/accel/habanalabs/common/sysfs.c
+++ b/drivers/accel/habanalabs/common/sysfs.c
@@ -497,10 +497,14 @@ int hl_sysfs_init(struct hl_device *hdev)
 	if (rc) {
 		dev_err(hdev->dev,
 			"Failed to add groups to device, error %d\n", rc);
-		return rc;
+		goto remove_groups;
 	}

 	return 0;
+
+remove_groups:
+	device_remove_groups(hdev->dev, hl_dev_attr_groups);
+	return rc;
 }

 void hl_sysfs_fini(struct hl_device *hdev)

--- a/drivers/accel/habanalabs/gaudi/gaudi.c
+++ b/drivers/accel/habanalabs/gaudi/gaudi.c
@@ -682,6 +682,9 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev)
 	prop->first_available_user_interrupt = USHRT_MAX;
 	prop->tpc_interrupt_id = USHRT_MAX;

+	/* single msi */
+	prop->eq_interrupt_id = 0;
+
 	for (i = 0 ; i < HL_MAX_DCORES ; i++)
 		prop->first_available_cq[i] = USHRT_MAX;

@@ -2017,38 +2020,6 @@ static int gaudi_enable_msi_single(struct hl_device *hdev)
 	return rc;
 }

-static int gaudi_enable_msi_multi(struct hl_device *hdev)
-{
-	int cq_cnt = hdev->asic_prop.completion_queues_count;
-	int rc, i, irq_cnt_init, irq;
-
-	for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
-		irq = gaudi_pci_irq_vector(hdev, i, false);
-		rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
-				&hdev->completion_queue[i]);
-		if (rc) {
-			dev_err(hdev->dev, "Failed to request IRQ %d", irq);
-			goto free_irqs;
-		}
-	}
-
-	irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
-	rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
-				&hdev->event_queue);
-	if (rc) {
-		dev_err(hdev->dev, "Failed to request IRQ %d", irq);
-		goto free_irqs;
-	}
-
-	return 0;
-
-free_irqs:
-	for (i = 0 ; i < irq_cnt_init ; i++)
-		free_irq(gaudi_pci_irq_vector(hdev, i, false),
-				&hdev->completion_queue[i]);
-	return rc;
-}
-
 static int gaudi_enable_msi(struct hl_device *hdev)
 {
 	struct gaudi_device *gaudi = hdev->asic_specific;
@@ -2063,14 +2034,7 @@ static int gaudi_enable_msi(struct hl_device *hdev)
 		return rc;
 	}

-	if (rc < NUMBER_OF_INTERRUPTS) {
-		gaudi->multi_msi_mode = false;
-		rc = gaudi_enable_msi_single(hdev);
-	} else {
-		gaudi->multi_msi_mode = true;
-		rc = gaudi_enable_msi_multi(hdev);
-	}
-
+	rc = gaudi_enable_msi_single(hdev);
 	if (rc)
 		goto free_pci_irq_vectors;

@@ -2086,47 +2050,23 @@ static int gaudi_enable_msi(struct hl_device *hdev)
 static void gaudi_sync_irqs(struct hl_device *hdev)
 {
 	struct gaudi_device *gaudi = hdev->asic_specific;
-	int i, cq_cnt = hdev->asic_prop.completion_queues_count;

 	if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
 		return;

 	/* Wait for all pending IRQs to be finished */
-	if (gaudi->multi_msi_mode) {
-		for (i = 0 ; i < cq_cnt ; i++)
-			synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
-
-		synchronize_irq(gaudi_pci_irq_vector(hdev,
-						GAUDI_EVENT_QUEUE_MSI_IDX,
-						true));
-	} else {
-		synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
-	}
+	synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
 }

 static void gaudi_disable_msi(struct hl_device *hdev)
 {
 	struct gaudi_device *gaudi = hdev->asic_specific;
-	int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;

 	if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
 		return;

 	gaudi_sync_irqs(hdev);
-
-	if (gaudi->multi_msi_mode) {
-		irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
-						true);
-		free_irq(irq, &hdev->event_queue);
-
-		for (i = 0 ; i < cq_cnt ; i++) {
-			irq = gaudi_pci_irq_vector(hdev, i, false);
-			free_irq(irq, &hdev->completion_queue[i]);
-		}
-	} else {
-		free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
-	}
-
+	free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
 	pci_free_irq_vectors(hdev->pdev);

 	gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
@@ -3921,11 +3861,7 @@ static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)

 	WREG32(mmCPU_IF_PF_PQ_PI, 0);

-	if (gaudi->multi_msi_mode)
-		WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
-	else
-		WREG32(mmCPU_IF_QUEUE_INIT,
-			PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
+	WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);

 	irq_handler_offset = prop->gic_interrupts_enable ?
 			mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
@@ -5602,7 +5538,6 @@ static void gaudi_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_add
 				u32 len, u32 original_len, u64 cq_addr, u32 cq_val,
 				u32 msi_vec, bool eb)
 {
-	struct gaudi_device *gaudi = hdev->asic_specific;
 	struct packet_msg_prot *cq_pkt;
 	struct packet_nop *cq_padding;
 	u64 msi_addr;
@@ -5632,12 +5567,7 @@ static void gaudi_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_add
 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
 	cq_pkt->ctl = cpu_to_le32(tmp);
 	cq_pkt->value = cpu_to_le32(1);
-
-	if (gaudi->multi_msi_mode)
-		msi_addr = mmPCIE_MSI_INTR_0 + msi_vec * 4;
-	else
-		msi_addr = mmPCIE_CORE_MSI_REQ;
-
+	msi_addr = hdev->pdev ? mmPCIE_CORE_MSI_REQ : mmPCIE_MSI_INTR_0 + msi_vec * 4;
 	cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr);
 }


--- a/drivers/accel/habanalabs/gaudi/gaudiP.h
+++ b/drivers/accel/habanalabs/gaudi/gaudiP.h
@@ -28,20 +28,8 @@
 #define NUMBER_OF_COLLECTIVE_QUEUES	12
 #define NUMBER_OF_SOBS_IN_GRP		11

-/*
- * Number of MSI interrupts IDS:
- * Each completion queue has 1 ID
- * The event queue has 1 ID
- */
-#define NUMBER_OF_INTERRUPTS		(NUMBER_OF_CMPLT_QUEUES + \
-						NUMBER_OF_CPU_HW_QUEUES)
-
 #define GAUDI_STREAM_MASTER_ARR_SIZE	8

-#if (NUMBER_OF_INTERRUPTS > GAUDI_MSI_ENTRIES)
-#error "Number of MSI interrupts must be smaller or equal to GAUDI_MSI_ENTRIES"
-#endif
-
 #define CORESIGHT_TIMEOUT_USEC		100000		/* 100 ms */

 #define GAUDI_MAX_CLK_FREQ		2200000000ull	/* 2200 MHz */
@@ -324,8 +312,6 @@ struct gaudi_internal_qman_info {
 *                      signal we can use this engine in later code paths.
 *                      Each bit is cleared upon reset of its corresponding H/W
 *                      engine.
- * @multi_msi_mode: whether we are working in multi MSI single MSI mode.
- *                  Multi MSI is possible only with IOMMU enabled.
 * @mmu_cache_inv_pi: PI for MMU cache invalidation flow. The H/W expects an
 *                    8-bit value so use u8.
 */
@@ -345,7 +331,6 @@ struct gaudi_device {
 	u32				events_stat[GAUDI_EVENT_SIZE];
 	u32				events_stat_aggregate[GAUDI_EVENT_SIZE];
 	u32				hw_cap_initialized;
-	u8				multi_msi_mode;
 	u8				mmu_cache_inv_pi;
 };


--- a/drivers/accel/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c
--- a/drivers/accel/habanalabs/gaudi2/gaudi2P.h
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2P.h
@@ -240,6 +240,8 @@
 #define GAUDI2_SOB_INCREMENT_BY_ONE	(FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1) | \
 					FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1))

+#define GAUDI2_NUM_TESTED_QS (GAUDI2_QUEUE_ID_CPU_PQ - GAUDI2_QUEUE_ID_PDMA_0_0)
+
 #define GAUDI2_NUM_OF_GLBL_ERR_CAUSE		8

 enum gaudi2_reserved_sob_id {
@@ -452,6 +454,17 @@ struct dup_block_ctx {
 	unsigned int instances;
 };

+/**
+ * struct gaudi2_queues_test_info - Holds the address of a the messages used for testing the
+ *                                  device queues.
+ * @dma_addr: the address used by the HW for accessing the message.
+ * @kern_addr: The address used by the driver for accessing the message.
+ */
+struct gaudi2_queues_test_info {
+	dma_addr_t dma_addr;
+	void *kern_addr;
+};
+
 /**
 * struct gaudi2_device - ASIC specific manage structure.
 * @cpucp_info_get: get information on device from CPU-CP
@@ -510,6 +523,7 @@ struct dup_block_ctx {
 * @flush_db_fifo: flag to force flush DB FIFO after a write.
 * @hbm_cfg: HBM subsystem settings
 * @hw_queues_lock_mutex: used by simulator instead of hw_queues_lock.
+ * @queues_test_info: information used by the driver when testing the HW queues.
 */
 struct gaudi2_device {
 	int (*cpucp_info_get)(struct hl_device *hdev);
@@ -537,6 +551,9 @@ struct gaudi2_device {
 	u32				events_stat[GAUDI2_EVENT_SIZE];
 	u32				events_stat_aggregate[GAUDI2_EVENT_SIZE];
 	u32				num_of_valid_hw_events;
+
+	/* Queue testing */
+	struct gaudi2_queues_test_info	queues_test_info[GAUDI2_NUM_TESTED_QS];
 };

 /*

--- a/drivers/accel/habanalabs/goya/goya.c
+++ b/drivers/accel/habanalabs/goya/goya.c
@@ -473,6 +473,7 @@ int goya_set_fixed_properties(struct hl_device *hdev)

 	prop->first_available_user_interrupt = USHRT_MAX;
 	prop->tpc_interrupt_id = USHRT_MAX;
+	prop->eq_interrupt_id = GOYA_EVENT_QUEUE_MSIX_IDX;

 	for (i = 0 ; i < HL_MAX_DCORES ; i++)
 		prop->first_available_cq[i] = USHRT_MAX;

--- a/drivers/accel/habanalabs/include/gaudi2/asic_reg/gaudi2_regs.h
+++ b/drivers/accel/habanalabs/include/gaudi2/asic_reg/gaudi2_regs.h
 /* SPDX-License-Identifier: GPL-2.0
 *
- * Copyright 2020-2022 HabanaLabs, Ltd.
+ * Copyright 2020-2023 HabanaLabs, Ltd.
 * All Rights Reserved.
 *
 */
@@ -543,6 +543,8 @@
 #define HBM_MC_SPI_IEEE1500_COMP_MASK		BIT(3)
 #define HBM_MC_SPI_IEEE1500_PAUSED_MASK		BIT(4)

+#define ARC_FARM_OFFSET (mmARC_FARM_ARC1_AUX_BASE - mmARC_FARM_ARC0_AUX_BASE)
+
 #include "nic0_qpc0_regs.h"
 #include "nic0_qm0_regs.h"
 #include "nic0_qm_arc_aux0_regs.h"

--- a/include/uapi/drm/habanalabs_accel.h
+++ b/include/uapi/drm/habanalabs_accel.h
@@ -708,7 +708,8 @@ enum hl_server_type {
 	HL_SERVER_GAUDI_HLS1H = 2,
 	HL_SERVER_GAUDI_TYPE1 = 3,
 	HL_SERVER_GAUDI_TYPE2 = 4,
-	HL_SERVER_GAUDI2_HLS2 = 5
+	HL_SERVER_GAUDI2_HLS2 = 5,
+	HL_SERVER_GAUDI2_TYPE1 = 7
 };

 /*