Commit e3e3eaab authored by Greg Kroah-Hartman's avatar Greg Kroah-Hartman

Merge tag 'misc-habanalabs-next-2021-02-08' of...

Merge tag 'misc-habanalabs-next-2021-02-08' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux into char-misc-next

Oded writes:

This tag contains the following changes for 5.12-rc1:

- Improve communication protocol with device CPU CP application.
  The change prevents random (rare) out-of-sync errors.

- Notify F/W to start sending events only after initialization of
  device is done. This fixes the issue where fatal events were received
  but ignored.

- Fix integer handling (static analysis warning).

- Always fetch HBM ECC errors from F/W (if available).

- Minor fix in GAUDI-specific initialization code.

* tag 'misc-habanalabs-next-2021-02-08' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux:
  habanalabs/gaudi: don't enable clock gating on DMA5
  habanalabs: return block size + block ID
  habanalabs: update security map after init CPU Qs
  habanalabs: enable F/W events after init done
  habanalabs/gaudi: use HBM_ECC_EN bit for ECC ERR
  habanalabs: support fetching first available user CQ
  habanalabs: improve communication protocol with cpucp
  habanalabs: fix integer handling issue
parents 47ddb856 da5dfbb9
...@@ -1159,12 +1159,20 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset, ...@@ -1159,12 +1159,20 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset,
atomic_set(&hdev->in_reset, 0); atomic_set(&hdev->in_reset, 0);
hdev->needs_reset = false; hdev->needs_reset = false;
if (hard_reset) dev_notice(hdev->dev, "Successfully finished resetting the device\n");
if (hard_reset) {
hdev->hard_reset_cnt++; hdev->hard_reset_cnt++;
else
hdev->soft_reset_cnt++;
dev_warn(hdev->dev, "Successfully finished resetting the device\n"); /* After reset is done, we are ready to receive events from
* the F/W. We can't do it before because we will ignore events
* and if those events are fatal, we won't know about it and
* the device will be operational although it shouldn't be
*/
hdev->asic_funcs->enable_events_from_fw(hdev);
} else {
hdev->soft_reset_cnt++;
}
return 0; return 0;
...@@ -1415,6 +1423,13 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass) ...@@ -1415,6 +1423,13 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
hdev->init_done = true; hdev->init_done = true;
/* After initialization is done, we are ready to receive events from
* the F/W. We can't do it before because we will ignore events and if
* those events are fatal, we won't know about it and the device will
* be operational although it shouldn't be
*/
hdev->asic_funcs->enable_events_from_fw(hdev);
return 0; return 0;
release_ctx: release_ctx:
......
...@@ -90,9 +90,10 @@ int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode) ...@@ -90,9 +90,10 @@ int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode)
int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
u16 len, u32 timeout, u64 *result) u16 len, u32 timeout, u64 *result)
{ {
struct hl_hw_queue *queue = &hdev->kernel_queues[hw_queue_id];
struct cpucp_packet *pkt; struct cpucp_packet *pkt;
dma_addr_t pkt_dma_addr; dma_addr_t pkt_dma_addr;
u32 tmp; u32 tmp, expected_ack_val;
int rc = 0; int rc = 0;
pkt = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, len, pkt = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, len,
...@@ -115,14 +116,23 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, ...@@ -115,14 +116,23 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
goto out; goto out;
} }
/* set fence to a non valid value */
pkt->fence = UINT_MAX;
rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, len, pkt_dma_addr); rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, len, pkt_dma_addr);
if (rc) { if (rc) {
dev_err(hdev->dev, "Failed to send CB on CPU PQ (%d)\n", rc); dev_err(hdev->dev, "Failed to send CB on CPU PQ (%d)\n", rc);
goto out; goto out;
} }
if (hdev->asic_prop.fw_app_security_map &
CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN)
expected_ack_val = queue->pi;
else
expected_ack_val = CPUCP_PACKET_FENCE_VAL;
rc = hl_poll_timeout_memory(hdev, &pkt->fence, tmp, rc = hl_poll_timeout_memory(hdev, &pkt->fence, tmp,
(tmp == CPUCP_PACKET_FENCE_VAL), 1000, (tmp == expected_ack_val), 1000,
timeout, true); timeout, true);
hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id); hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
......
...@@ -411,6 +411,7 @@ struct hl_mmu_properties { ...@@ -411,6 +411,7 @@ struct hl_mmu_properties {
* @first_available_user_mon: first monitor available for the user * @first_available_user_mon: first monitor available for the user
* @first_available_user_msix_interrupt: first available msix interrupt * @first_available_user_msix_interrupt: first available msix interrupt
* reserved for the user * reserved for the user
* @first_available_cq: first available CQ for the user.
* @tpc_enabled_mask: which TPCs are enabled. * @tpc_enabled_mask: which TPCs are enabled.
* @completion_queues_count: number of completion queues. * @completion_queues_count: number of completion queues.
* @fw_security_disabled: true if security measures are disabled in firmware, * @fw_security_disabled: true if security measures are disabled in firmware,
...@@ -473,6 +474,7 @@ struct asic_fixed_properties { ...@@ -473,6 +474,7 @@ struct asic_fixed_properties {
u16 first_available_user_sob[HL_MAX_DCORES]; u16 first_available_user_sob[HL_MAX_DCORES];
u16 first_available_user_mon[HL_MAX_DCORES]; u16 first_available_user_mon[HL_MAX_DCORES];
u16 first_available_user_msix_interrupt; u16 first_available_user_msix_interrupt;
u16 first_available_cq[HL_MAX_DCORES];
u8 tpc_enabled_mask; u8 tpc_enabled_mask;
u8 completion_queues_count; u8 completion_queues_count;
u8 fw_security_disabled; u8 fw_security_disabled;
...@@ -855,12 +857,18 @@ enum div_select_defs { ...@@ -855,12 +857,18 @@ enum div_select_defs {
* and place them in the relevant cs jobs * and place them in the relevant cs jobs
* @collective_wait_create_jobs: allocate collective wait cs jobs * @collective_wait_create_jobs: allocate collective wait cs jobs
* @scramble_addr: Routine to scramble the address prior of mapping it * @scramble_addr: Routine to scramble the address prior of mapping it
* in the MMU. * in the MMU.
* @descramble_addr: Routine to de-scramble the address prior of * @descramble_addr: Routine to de-scramble the address prior of
* showing it to users. * showing it to users.
* @ack_protection_bits_errors: ack and dump all security violations * @ack_protection_bits_errors: ack and dump all security violations
* @get_hw_block_id: retrieve a HW block id to be used by the user to mmap it. * @get_hw_block_id: retrieve a HW block id to be used by the user to mmap it.
* also returns the size of the block if caller supplies
* a valid pointer for it
* @hw_block_mmap: mmap a HW block with a given id. * @hw_block_mmap: mmap a HW block with a given id.
* @enable_events_from_fw: send interrupt to firmware to notify them the
* driver is ready to receive asynchronous events. This
* function should be called during the first init and
* after every hard-reset of the device
*/ */
struct hl_asic_funcs { struct hl_asic_funcs {
int (*early_init)(struct hl_device *hdev); int (*early_init)(struct hl_device *hdev);
...@@ -974,9 +982,10 @@ struct hl_asic_funcs { ...@@ -974,9 +982,10 @@ struct hl_asic_funcs {
u64 (*descramble_addr)(struct hl_device *hdev, u64 addr); u64 (*descramble_addr)(struct hl_device *hdev, u64 addr);
void (*ack_protection_bits_errors)(struct hl_device *hdev); void (*ack_protection_bits_errors)(struct hl_device *hdev);
int (*get_hw_block_id)(struct hl_device *hdev, u64 block_addr, int (*get_hw_block_id)(struct hl_device *hdev, u64 block_addr,
u32 *block_id); u32 *block_size, u32 *block_id);
int (*hw_block_mmap)(struct hl_device *hdev, struct vm_area_struct *vma, int (*hw_block_mmap)(struct hl_device *hdev, struct vm_area_struct *vma,
u32 block_id, u32 block_size); u32 block_id, u32 block_size);
void (*enable_events_from_fw)(struct hl_device *hdev);
}; };
......
...@@ -397,7 +397,8 @@ static int sync_manager_info(struct hl_fpriv *hpriv, struct hl_info_args *args) ...@@ -397,7 +397,8 @@ static int sync_manager_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
prop->first_available_user_sob[args->dcore_id]; prop->first_available_user_sob[args->dcore_id];
sm_info.first_available_monitor = sm_info.first_available_monitor =
prop->first_available_user_mon[args->dcore_id]; prop->first_available_user_mon[args->dcore_id];
sm_info.first_available_cq =
prop->first_available_cq[args->dcore_id];
return copy_to_user(out, &sm_info, min_t(size_t, (size_t) max_size, return copy_to_user(out, &sm_info, min_t(size_t, (size_t) max_size,
sizeof(sm_info))) ? -EFAULT : 0; sizeof(sm_info))) ? -EFAULT : 0;
......
...@@ -1289,12 +1289,13 @@ static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, ...@@ -1289,12 +1289,13 @@ static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
return rc; return rc;
} }
static int map_block(struct hl_device *hdev, u64 address, u64 *handle) static int map_block(struct hl_device *hdev, u64 address, u64 *handle,
u32 *size)
{ {
u32 block_id = 0; u32 block_id = 0;
int rc; int rc;
rc = hdev->asic_funcs->get_hw_block_id(hdev, address, &block_id); rc = hdev->asic_funcs->get_hw_block_id(hdev, address, size, &block_id);
*handle = block_id | HL_MMAP_TYPE_BLOCK; *handle = block_id | HL_MMAP_TYPE_BLOCK;
*handle <<= PAGE_SHIFT; *handle <<= PAGE_SHIFT;
...@@ -1371,7 +1372,7 @@ static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args) ...@@ -1371,7 +1372,7 @@ static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args)
struct hl_device *hdev = hpriv->hdev; struct hl_device *hdev = hpriv->hdev;
struct hl_ctx *ctx = hpriv->ctx; struct hl_ctx *ctx = hpriv->ctx;
u64 block_handle, device_addr = 0; u64 block_handle, device_addr = 0;
u32 handle = 0; u32 handle = 0, block_size;
int rc; int rc;
switch (args->in.op) { switch (args->in.op) {
...@@ -1416,8 +1417,9 @@ static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args) ...@@ -1416,8 +1417,9 @@ static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args)
case HL_MEM_OP_MAP_BLOCK: case HL_MEM_OP_MAP_BLOCK:
rc = map_block(hdev, args->in.map_block.block_addr, rc = map_block(hdev, args->in.map_block.block_addr,
&block_handle); &block_handle, &block_size);
args->out.handle = block_handle; args->out.block_handle = block_handle;
args->out.block_size = block_size;
break; break;
default: default:
...@@ -1437,7 +1439,7 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data) ...@@ -1437,7 +1439,7 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
struct hl_device *hdev = hpriv->hdev; struct hl_device *hdev = hpriv->hdev;
struct hl_ctx *ctx = hpriv->ctx; struct hl_ctx *ctx = hpriv->ctx;
u64 block_handle, device_addr = 0; u64 block_handle, device_addr = 0;
u32 handle = 0; u32 handle = 0, block_size;
int rc; int rc;
if (!hl_device_operational(hdev, &status)) { if (!hl_device_operational(hdev, &status)) {
...@@ -1524,8 +1526,9 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data) ...@@ -1524,8 +1526,9 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
case HL_MEM_OP_MAP_BLOCK: case HL_MEM_OP_MAP_BLOCK:
rc = map_block(hdev, args->in.map_block.block_addr, rc = map_block(hdev, args->in.map_block.block_addr,
&block_handle); &block_handle, &block_size);
args->out.handle = block_handle; args->out.block_handle = block_handle;
args->out.block_size = block_size;
break; break;
default: default:
......
...@@ -507,7 +507,7 @@ static void hl_mmu_pa_page_with_offset(struct hl_ctx *ctx, u64 virt_addr, ...@@ -507,7 +507,7 @@ static void hl_mmu_pa_page_with_offset(struct hl_ctx *ctx, u64 virt_addr,
p = (char *)p + hop0_shift_off; p = (char *)p + hop0_shift_off;
p = (char *)p + ((hops->used_hops - 1) * sizeof(u64)); p = (char *)p + ((hops->used_hops - 1) * sizeof(u64));
hop_shift = *(u64 *)p; hop_shift = *(u64 *)p;
offset_mask = (1 << hop_shift) - 1; offset_mask = (1ull << hop_shift) - 1;
addr_mask = ~(offset_mask); addr_mask = ~(offset_mask);
*phys_addr = (tmp_phys_addr & addr_mask) | *phys_addr = (tmp_phys_addr & addr_mask) |
(virt_addr & offset_mask); (virt_addr & offset_mask);
......
...@@ -529,6 +529,9 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev) ...@@ -529,6 +529,9 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev)
prop->first_available_user_msix_interrupt = USHRT_MAX; prop->first_available_user_msix_interrupt = USHRT_MAX;
for (i = 0 ; i < HL_MAX_DCORES ; i++)
prop->first_available_cq[i] = USHRT_MAX;
/* disable fw security for now, set it in a later stage */ /* disable fw security for now, set it in a later stage */
prop->fw_security_disabled = true; prop->fw_security_disabled = true;
prop->fw_security_status_valid = false; prop->fw_security_status_valid = false;
...@@ -1379,8 +1382,6 @@ static int gaudi_late_init(struct hl_device *hdev) ...@@ -1379,8 +1382,6 @@ static int gaudi_late_init(struct hl_device *hdev)
return rc; return rc;
} }
WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_INTS_REGISTER);
rc = gaudi_fetch_psoc_frequency(hdev); rc = gaudi_fetch_psoc_frequency(hdev);
if (rc) { if (rc) {
dev_err(hdev->dev, "Failed to fetch psoc frequency\n"); dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
...@@ -3459,6 +3460,12 @@ static void gaudi_set_clock_gating(struct hl_device *hdev) ...@@ -3459,6 +3460,12 @@ static void gaudi_set_clock_gating(struct hl_device *hdev)
enable = !!(hdev->clock_gating_mask & enable = !!(hdev->clock_gating_mask &
(BIT_ULL(gaudi_dma_assignment[i]))); (BIT_ULL(gaudi_dma_assignment[i])));
/* GC sends work to DMA engine through Upper CP in DMA5 so
* we need to not enable clock gating in that DMA
*/
if (i == GAUDI_HBM_DMA_4)
enable = 0;
qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET; qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
enable ? QMAN_CGM1_PWR_GATE_EN : 0); enable ? QMAN_CGM1_PWR_GATE_EN : 0);
...@@ -3725,6 +3732,7 @@ static int gaudi_init_cpu(struct hl_device *hdev) ...@@ -3725,6 +3732,7 @@ static int gaudi_init_cpu(struct hl_device *hdev)
static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout) static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
{ {
struct gaudi_device *gaudi = hdev->asic_specific; struct gaudi_device *gaudi = hdev->asic_specific;
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct hl_eq *eq; struct hl_eq *eq;
u32 status; u32 status;
struct hl_hw_queue *cpu_pq = struct hl_hw_queue *cpu_pq =
...@@ -3781,6 +3789,10 @@ static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout) ...@@ -3781,6 +3789,10 @@ static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
return -EIO; return -EIO;
} }
/* update FW application security bits */
if (prop->fw_security_status_valid)
prop->fw_app_security_map = RREG32(mmCPU_BOOT_DEV_STS0);
gaudi->hw_cap_initialized |= HW_CAP_CPU_Q; gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
return 0; return 0;
} }
...@@ -4438,9 +4450,12 @@ static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi) ...@@ -4438,9 +4450,12 @@ static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
/* ring the doorbell */ /* ring the doorbell */
WREG32(db_reg_offset, db_value); WREG32(db_reg_offset, db_value);
if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
/* make sure device CPU will read latest data from host */
mb();
WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
GAUDI_EVENT_PI_UPDATE); GAUDI_EVENT_PI_UPDATE);
}
} }
static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe, static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
...@@ -7098,7 +7113,9 @@ static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device, ...@@ -7098,7 +7113,9 @@ static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch; u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
int err = 0; int err = 0;
if (!hdev->asic_prop.fw_security_disabled) { if (hdev->asic_prop.fw_security_status_valid &&
(hdev->asic_prop.fw_app_security_map &
CPU_BOOT_DEV_STS0_HBM_ECC_EN)) {
if (!hbm_ecc_data) { if (!hbm_ecc_data) {
dev_err(hdev->dev, "No FW ECC data"); dev_err(hdev->dev, "No FW ECC data");
return 0; return 0;
...@@ -7120,14 +7137,24 @@ static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device, ...@@ -7120,14 +7137,24 @@ static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
dev_err(hdev->dev, dev_err(hdev->dev,
"HBM%d pc%d ECC: TYPE=%d, WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n", "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
device, ch, type, wr_par, rd_par, ca_par, serr, derr); device, ch, wr_par, rd_par, ca_par, serr, derr);
dev_err(hdev->dev,
"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
device, ch, hbm_ecc_data->first_addr, type,
hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
hbm_ecc_data->dec_cnt);
err = 1; err = 1;
return 0; return 0;
} }
if (!hdev->asic_prop.fw_security_disabled) {
dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
return 0;
}
base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET; base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) { for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF); val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
...@@ -8469,7 +8496,7 @@ static u64 gaudi_get_device_time(struct hl_device *hdev) ...@@ -8469,7 +8496,7 @@ static u64 gaudi_get_device_time(struct hl_device *hdev)
} }
static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr, static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
u32 *block_id) u32 *block_size, u32 *block_id)
{ {
return -EPERM; return -EPERM;
} }
...@@ -8481,6 +8508,11 @@ static int gaudi_block_mmap(struct hl_device *hdev, ...@@ -8481,6 +8508,11 @@ static int gaudi_block_mmap(struct hl_device *hdev,
return -EPERM; return -EPERM;
} }
static void gaudi_enable_events_from_fw(struct hl_device *hdev)
{
WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_INTS_REGISTER);
}
static const struct hl_asic_funcs gaudi_funcs = { static const struct hl_asic_funcs gaudi_funcs = {
.early_init = gaudi_early_init, .early_init = gaudi_early_init,
.early_fini = gaudi_early_fini, .early_fini = gaudi_early_fini,
...@@ -8562,7 +8594,8 @@ static const struct hl_asic_funcs gaudi_funcs = { ...@@ -8562,7 +8594,8 @@ static const struct hl_asic_funcs gaudi_funcs = {
.descramble_addr = hl_mmu_descramble_addr, .descramble_addr = hl_mmu_descramble_addr,
.ack_protection_bits_errors = gaudi_ack_protection_bits_errors, .ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
.get_hw_block_id = gaudi_get_hw_block_id, .get_hw_block_id = gaudi_get_hw_block_id,
.hw_block_mmap = gaudi_block_mmap .hw_block_mmap = gaudi_block_mmap,
.enable_events_from_fw = gaudi_enable_events_from_fw
}; };
/** /**
......
...@@ -457,6 +457,9 @@ int goya_get_fixed_properties(struct hl_device *hdev) ...@@ -457,6 +457,9 @@ int goya_get_fixed_properties(struct hl_device *hdev)
prop->first_available_user_msix_interrupt = USHRT_MAX; prop->first_available_user_msix_interrupt = USHRT_MAX;
for (i = 0 ; i < HL_MAX_DCORES ; i++)
prop->first_available_cq[i] = USHRT_MAX;
/* disable fw security for now, set it in a later stage */ /* disable fw security for now, set it in a later stage */
prop->fw_security_disabled = true; prop->fw_security_disabled = true;
prop->fw_security_status_valid = false; prop->fw_security_status_valid = false;
...@@ -794,9 +797,6 @@ int goya_late_init(struct hl_device *hdev) ...@@ -794,9 +797,6 @@ int goya_late_init(struct hl_device *hdev)
return rc; return rc;
} }
WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
GOYA_ASYNC_EVENT_ID_INTS_REGISTER);
return 0; return 0;
} }
...@@ -1188,6 +1188,7 @@ static int goya_stop_external_queues(struct hl_device *hdev) ...@@ -1188,6 +1188,7 @@ static int goya_stop_external_queues(struct hl_device *hdev)
int goya_init_cpu_queues(struct hl_device *hdev) int goya_init_cpu_queues(struct hl_device *hdev)
{ {
struct goya_device *goya = hdev->asic_specific; struct goya_device *goya = hdev->asic_specific;
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct hl_eq *eq; struct hl_eq *eq;
u32 status; u32 status;
struct hl_hw_queue *cpu_pq = &hdev->kernel_queues[GOYA_QUEUE_ID_CPU_PQ]; struct hl_hw_queue *cpu_pq = &hdev->kernel_queues[GOYA_QUEUE_ID_CPU_PQ];
...@@ -1240,6 +1241,10 @@ int goya_init_cpu_queues(struct hl_device *hdev) ...@@ -1240,6 +1241,10 @@ int goya_init_cpu_queues(struct hl_device *hdev)
return -EIO; return -EIO;
} }
/* update FW application security bits */
if (prop->fw_security_status_valid)
prop->fw_app_security_map = RREG32(mmCPU_BOOT_DEV_STS0);
goya->hw_cap_initialized |= HW_CAP_CPU_Q; goya->hw_cap_initialized |= HW_CAP_CPU_Q;
return 0; return 0;
} }
...@@ -2806,9 +2811,12 @@ void goya_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi) ...@@ -2806,9 +2811,12 @@ void goya_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
/* ring the doorbell */ /* ring the doorbell */
WREG32(db_reg_offset, db_value); WREG32(db_reg_offset, db_value);
if (hw_queue_id == GOYA_QUEUE_ID_CPU_PQ) if (hw_queue_id == GOYA_QUEUE_ID_CPU_PQ) {
/* make sure device CPU will read latest data from host */
mb();
WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
GOYA_ASYNC_EVENT_ID_PI_UPDATE); GOYA_ASYNC_EVENT_ID_PI_UPDATE);
}
} }
void goya_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd) void goya_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd)
...@@ -5382,7 +5390,7 @@ static void goya_ctx_fini(struct hl_ctx *ctx) ...@@ -5382,7 +5390,7 @@ static void goya_ctx_fini(struct hl_ctx *ctx)
} }
static int goya_get_hw_block_id(struct hl_device *hdev, u64 block_addr, static int goya_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
u32 *block_id) u32 *block_size, u32 *block_id)
{ {
return -EPERM; return -EPERM;
} }
...@@ -5393,6 +5401,12 @@ static int goya_block_mmap(struct hl_device *hdev, struct vm_area_struct *vma, ...@@ -5393,6 +5401,12 @@ static int goya_block_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
return -EPERM; return -EPERM;
} }
static void goya_enable_events_from_fw(struct hl_device *hdev)
{
WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
GOYA_ASYNC_EVENT_ID_INTS_REGISTER);
}
static const struct hl_asic_funcs goya_funcs = { static const struct hl_asic_funcs goya_funcs = {
.early_init = goya_early_init, .early_init = goya_early_init,
.early_fini = goya_early_fini, .early_fini = goya_early_fini,
...@@ -5474,7 +5488,8 @@ static const struct hl_asic_funcs goya_funcs = { ...@@ -5474,7 +5488,8 @@ static const struct hl_asic_funcs goya_funcs = {
.descramble_addr = hl_mmu_descramble_addr, .descramble_addr = hl_mmu_descramble_addr,
.ack_protection_bits_errors = goya_ack_protection_bits_errors, .ack_protection_bits_errors = goya_ack_protection_bits_errors,
.get_hw_block_id = goya_get_hw_block_id, .get_hw_block_id = goya_get_hw_block_id,
.hw_block_mmap = goya_block_mmap .hw_block_mmap = goya_block_mmap,
.enable_events_from_fw = goya_enable_events_from_fw
}; };
/* /*
......
...@@ -166,6 +166,10 @@ ...@@ -166,6 +166,10 @@
* FW handles HBM ECC indications. * FW handles HBM ECC indications.
* Initialized in: linux * Initialized in: linux
* *
* CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN Packets ack value used in the armcpd
* is set to the PI counter.
* Initialized in: linux
*
* CPU_BOOT_DEV_STS0_ENABLED Device status register enabled. * CPU_BOOT_DEV_STS0_ENABLED Device status register enabled.
* This is a main indication that the * This is a main indication that the
* running FW populates the device status * running FW populates the device status
...@@ -190,6 +194,7 @@ ...@@ -190,6 +194,7 @@
#define CPU_BOOT_DEV_STS0_SP_SRAM_EN (1 << 12) #define CPU_BOOT_DEV_STS0_SP_SRAM_EN (1 << 12)
#define CPU_BOOT_DEV_STS0_CLK_GATE_EN (1 << 13) #define CPU_BOOT_DEV_STS0_CLK_GATE_EN (1 << 13)
#define CPU_BOOT_DEV_STS0_HBM_ECC_EN (1 << 14) #define CPU_BOOT_DEV_STS0_HBM_ECC_EN (1 << 14)
#define CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN (1 << 15)
#define CPU_BOOT_DEV_STS0_ENABLED (1 << 31) #define CPU_BOOT_DEV_STS0_ENABLED (1 << 31)
enum cpu_boot_status { enum cpu_boot_status {
......
...@@ -414,10 +414,13 @@ struct hl_pll_frequency_info { ...@@ -414,10 +414,13 @@ struct hl_pll_frequency_info {
* struct hl_info_sync_manager - sync manager information * struct hl_info_sync_manager - sync manager information
* @first_available_sync_object: first available sob * @first_available_sync_object: first available sob
* @first_available_monitor: first available monitor * @first_available_monitor: first available monitor
* @first_available_cq: first available cq
*/ */
struct hl_info_sync_manager { struct hl_info_sync_manager {
__u32 first_available_sync_object; __u32 first_available_sync_object;
__u32 first_available_monitor; __u32 first_available_monitor;
__u32 first_available_cq;
__u32 reserved;
}; };
/** /**
...@@ -779,10 +782,10 @@ struct hl_mem_in { ...@@ -779,10 +782,10 @@ struct hl_mem_in {
/* HL_MEM_OP_MAP_BLOCK - map a hw block */ /* HL_MEM_OP_MAP_BLOCK - map a hw block */
struct { struct {
/* /*
* HW block address to map, a handle will be returned * HW block address to map, a handle and size will be
* to the user and will be used to mmap the relevant * returned to the user and will be used to mmap the
* block. Only addresses from configuration space are * relevant block. Only addresses from configuration
* allowed. * space are allowed.
*/ */
__u64 block_addr; __u64 block_addr;
} map_block; } map_block;
...@@ -813,11 +816,26 @@ struct hl_mem_out { ...@@ -813,11 +816,26 @@ struct hl_mem_out {
__u64 device_virt_addr; __u64 device_virt_addr;
/* /*
* Used for HL_MEM_OP_ALLOC and HL_MEM_OP_MAP_BLOCK. * Used in HL_MEM_OP_ALLOC
* This is the assigned handle for the allocated memory * This is the assigned handle for the allocated memory
* or mapped block
*/ */
__u64 handle; __u64 handle;
struct {
/*
* Used in HL_MEM_OP_MAP_BLOCK.
* This is the assigned handle for the mapped block
*/
__u64 block_handle;
/*
* Used in HL_MEM_OP_MAP_BLOCK
* This is the size of the mapped block
*/
__u32 block_size;
__u32 pad;
};
}; };
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment