Commit 0acfbe9c authored by Greg Kroah-Hartman's avatar Greg Kroah-Hartman

Merge tag 'misc-habanalabs-fixes-2020-12-30' of...

Merge tag 'misc-habanalabs-fixes-2020-12-30' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux into char-misc-linus

Oded writes:

This tag contains the following fixes for 5.11-rc2:

- Fixes that are needed for supporting the new F/W with security features:
  - Correctly fetch PLL information in GOYA when security is enabled in F/W
  - Fix hard-reset support when F/W is in its preboot stage
  - Disable clock gating when initializing the H/W
  - Fix hard-reset procedure
  - Fix PCI controller initialization
- Remove setting of Engine-Barrier in collective wait operations. This
  barrier created a drop in performance
- Retry loading the TPC firmware in case of EINTR during loading
- Fix CS counters
- Register to PCI shutdown callback to fix handling of VM shutdown
- Fix order of status check
- Fix memory leak in reset procedure
- Fix and add comments and fix indentations

* tag 'misc-habanalabs-fixes-2020-12-30' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux:
  habanalabs: Fix memleak in hl_device_reset
  habanalabs: fix order of status check
  habanalabs: register to pci shutdown callback
  habanalabs: add validation cs counter, fix misplaced counters
  habanalabs/gaudi: retry loading TPC f/w on -EINTR
  habanalabs: adjust pci controller init to new firmware
  habanalabs: update comment in hl_boot_if.h
  habanalabs/gaudi: enhance reset message
  habanalabs: full FW hard reset support
  habanalabs/gaudi: disable CGM at HW initialization
  habanalabs: Revise comment to align with mirror list name
  habanalabs/gaudi: do not set EB in collective slave queues
  habanalabs: preboot hard reset support
  habanalabs: remove generic gaudi get_pll_freq function
  habanalabs: fetch PSOC PLL frequency from F/W in goya
  habanalabs: add comment for pll frequency ioctl opcode
  habanalabs: Fix a missing-braces warning
parents 5c8fe583 b000700d
...@@ -17,12 +17,12 @@ enum hl_device_status hl_device_status(struct hl_device *hdev) ...@@ -17,12 +17,12 @@ enum hl_device_status hl_device_status(struct hl_device *hdev)
{ {
enum hl_device_status status; enum hl_device_status status;
if (hdev->disabled) if (atomic_read(&hdev->in_reset))
status = HL_DEVICE_STATUS_MALFUNCTION;
else if (atomic_read(&hdev->in_reset))
status = HL_DEVICE_STATUS_IN_RESET; status = HL_DEVICE_STATUS_IN_RESET;
else if (hdev->needs_reset) else if (hdev->needs_reset)
status = HL_DEVICE_STATUS_NEEDS_RESET; status = HL_DEVICE_STATUS_NEEDS_RESET;
else if (hdev->disabled)
status = HL_DEVICE_STATUS_MALFUNCTION;
else else
status = HL_DEVICE_STATUS_OPERATIONAL; status = HL_DEVICE_STATUS_OPERATIONAL;
...@@ -1092,6 +1092,7 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset, ...@@ -1092,6 +1092,7 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset,
GFP_KERNEL); GFP_KERNEL);
if (!hdev->kernel_ctx) { if (!hdev->kernel_ctx) {
rc = -ENOMEM; rc = -ENOMEM;
hl_mmu_fini(hdev);
goto out_err; goto out_err;
} }
...@@ -1103,6 +1104,7 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset, ...@@ -1103,6 +1104,7 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset,
"failed to init kernel ctx in hard reset\n"); "failed to init kernel ctx in hard reset\n");
kfree(hdev->kernel_ctx); kfree(hdev->kernel_ctx);
hdev->kernel_ctx = NULL; hdev->kernel_ctx = NULL;
hl_mmu_fini(hdev);
goto out_err; goto out_err;
} }
} }
......
...@@ -627,23 +627,36 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg, ...@@ -627,23 +627,36 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg,
security_status = RREG32(cpu_security_boot_status_reg); security_status = RREG32(cpu_security_boot_status_reg);
/* We read security status multiple times during boot: /* We read security status multiple times during boot:
* 1. preboot - we check if fw security feature is supported * 1. preboot - a. Check whether the security status bits are valid
* 2. boot cpu - we get boot cpu security status * b. Check whether fw security is enabled
* 3. FW application - we get FW application security status * c. Check whether hard reset is done by preboot
* 2. boot cpu - a. Fetch boot cpu security status
* b. Check whether hard reset is done by boot cpu
* 3. FW application - a. Fetch fw application security status
* b. Check whether hard reset is done by fw app
* *
* Preboot: * Preboot:
* Check security status bit (CPU_BOOT_DEV_STS0_ENABLED), if it is set * Check security status bit (CPU_BOOT_DEV_STS0_ENABLED), if it is set
* check security enabled bit (CPU_BOOT_DEV_STS0_SECURITY_EN) * check security enabled bit (CPU_BOOT_DEV_STS0_SECURITY_EN)
*/ */
if (security_status & CPU_BOOT_DEV_STS0_ENABLED) { if (security_status & CPU_BOOT_DEV_STS0_ENABLED) {
hdev->asic_prop.fw_security_status_valid = 1; prop->fw_security_status_valid = 1;
prop->fw_security_disabled =
!(security_status & CPU_BOOT_DEV_STS0_SECURITY_EN); if (security_status & CPU_BOOT_DEV_STS0_SECURITY_EN)
prop->fw_security_disabled = false;
else
prop->fw_security_disabled = true;
if (security_status & CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
prop->hard_reset_done_by_fw = true;
} else { } else {
hdev->asic_prop.fw_security_status_valid = 0; prop->fw_security_status_valid = 0;
prop->fw_security_disabled = true; prop->fw_security_disabled = true;
} }
dev_dbg(hdev->dev, "Firmware preboot hard-reset is %s\n",
prop->hard_reset_done_by_fw ? "enabled" : "disabled");
dev_info(hdev->dev, "firmware-level security is %s\n", dev_info(hdev->dev, "firmware-level security is %s\n",
prop->fw_security_disabled ? "disabled" : "enabled"); prop->fw_security_disabled ? "disabled" : "enabled");
...@@ -655,6 +668,7 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg, ...@@ -655,6 +668,7 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
u32 cpu_security_boot_status_reg, u32 boot_err0_reg, u32 cpu_security_boot_status_reg, u32 boot_err0_reg,
bool skip_bmc, u32 cpu_timeout, u32 boot_fit_timeout) bool skip_bmc, u32 cpu_timeout, u32 boot_fit_timeout)
{ {
struct asic_fixed_properties *prop = &hdev->asic_prop;
u32 status; u32 status;
int rc; int rc;
...@@ -723,11 +737,22 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg, ...@@ -723,11 +737,22 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
/* Read U-Boot version now in case we will later fail */ /* Read U-Boot version now in case we will later fail */
hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_UBOOT); hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_UBOOT);
/* Clear reset status since we need to read it again from boot CPU */
prop->hard_reset_done_by_fw = false;
/* Read boot_cpu security bits */ /* Read boot_cpu security bits */
if (hdev->asic_prop.fw_security_status_valid) if (prop->fw_security_status_valid) {
hdev->asic_prop.fw_boot_cpu_security_map = prop->fw_boot_cpu_security_map =
RREG32(cpu_security_boot_status_reg); RREG32(cpu_security_boot_status_reg);
if (prop->fw_boot_cpu_security_map &
CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
prop->hard_reset_done_by_fw = true;
}
dev_dbg(hdev->dev, "Firmware boot CPU hard-reset is %s\n",
prop->hard_reset_done_by_fw ? "enabled" : "disabled");
if (rc) { if (rc) {
detect_cpu_boot_status(hdev, status); detect_cpu_boot_status(hdev, status);
rc = -EIO; rc = -EIO;
...@@ -796,18 +821,21 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg, ...@@ -796,18 +821,21 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
goto out; goto out;
} }
/* Clear reset status since we need to read again from app */
prop->hard_reset_done_by_fw = false;
/* Read FW application security bits */ /* Read FW application security bits */
if (hdev->asic_prop.fw_security_status_valid) { if (prop->fw_security_status_valid) {
hdev->asic_prop.fw_app_security_map = prop->fw_app_security_map =
RREG32(cpu_security_boot_status_reg); RREG32(cpu_security_boot_status_reg);
if (hdev->asic_prop.fw_app_security_map & if (prop->fw_app_security_map &
CPU_BOOT_DEV_STS0_FW_HARD_RST_EN) CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
hdev->asic_prop.hard_reset_done_by_fw = true; prop->hard_reset_done_by_fw = true;
} }
dev_dbg(hdev->dev, "Firmware hard-reset is %s\n", dev_dbg(hdev->dev, "Firmware application CPU hard-reset is %s\n",
hdev->asic_prop.hard_reset_done_by_fw ? "enabled" : "disabled"); prop->hard_reset_done_by_fw ? "enabled" : "disabled");
dev_info(hdev->dev, "Successfully loaded firmware to device\n"); dev_info(hdev->dev, "Successfully loaded firmware to device\n");
......
...@@ -944,7 +944,7 @@ struct hl_asic_funcs { ...@@ -944,7 +944,7 @@ struct hl_asic_funcs {
u32 (*get_signal_cb_size)(struct hl_device *hdev); u32 (*get_signal_cb_size)(struct hl_device *hdev);
u32 (*get_wait_cb_size)(struct hl_device *hdev); u32 (*get_wait_cb_size)(struct hl_device *hdev);
u32 (*gen_signal_cb)(struct hl_device *hdev, void *data, u16 sob_id, u32 (*gen_signal_cb)(struct hl_device *hdev, void *data, u16 sob_id,
u32 size); u32 size, bool eb);
u32 (*gen_wait_cb)(struct hl_device *hdev, u32 (*gen_wait_cb)(struct hl_device *hdev,
struct hl_gen_wait_properties *prop); struct hl_gen_wait_properties *prop);
void (*reset_sob)(struct hl_device *hdev, void *data); void (*reset_sob)(struct hl_device *hdev, void *data);
...@@ -1000,6 +1000,7 @@ struct hl_va_range { ...@@ -1000,6 +1000,7 @@ struct hl_va_range {
* @queue_full_drop_cnt: dropped due to queue full * @queue_full_drop_cnt: dropped due to queue full
* @device_in_reset_drop_cnt: dropped due to device in reset * @device_in_reset_drop_cnt: dropped due to device in reset
* @max_cs_in_flight_drop_cnt: dropped due to maximum CS in-flight * @max_cs_in_flight_drop_cnt: dropped due to maximum CS in-flight
* @validation_drop_cnt: dropped due to error in validation
*/ */
struct hl_cs_counters_atomic { struct hl_cs_counters_atomic {
atomic64_t out_of_mem_drop_cnt; atomic64_t out_of_mem_drop_cnt;
...@@ -1007,6 +1008,7 @@ struct hl_cs_counters_atomic { ...@@ -1007,6 +1008,7 @@ struct hl_cs_counters_atomic {
atomic64_t queue_full_drop_cnt; atomic64_t queue_full_drop_cnt;
atomic64_t device_in_reset_drop_cnt; atomic64_t device_in_reset_drop_cnt;
atomic64_t max_cs_in_flight_drop_cnt; atomic64_t max_cs_in_flight_drop_cnt;
atomic64_t validation_drop_cnt;
}; };
/** /**
......
...@@ -544,6 +544,7 @@ static struct pci_driver hl_pci_driver = { ...@@ -544,6 +544,7 @@ static struct pci_driver hl_pci_driver = {
.id_table = ids, .id_table = ids,
.probe = hl_pci_probe, .probe = hl_pci_probe,
.remove = hl_pci_remove, .remove = hl_pci_remove,
.shutdown = hl_pci_remove,
.driver.pm = &hl_pm_ops, .driver.pm = &hl_pm_ops,
.err_handler = &hl_pci_err_handler, .err_handler = &hl_pci_err_handler,
}; };
......
...@@ -335,6 +335,8 @@ static int cs_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args) ...@@ -335,6 +335,8 @@ static int cs_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
atomic64_read(&cntr->device_in_reset_drop_cnt); atomic64_read(&cntr->device_in_reset_drop_cnt);
cs_counters.total_max_cs_in_flight_drop_cnt = cs_counters.total_max_cs_in_flight_drop_cnt =
atomic64_read(&cntr->max_cs_in_flight_drop_cnt); atomic64_read(&cntr->max_cs_in_flight_drop_cnt);
cs_counters.total_validation_drop_cnt =
atomic64_read(&cntr->validation_drop_cnt);
if (hpriv->ctx) { if (hpriv->ctx) {
cs_counters.ctx_out_of_mem_drop_cnt = cs_counters.ctx_out_of_mem_drop_cnt =
...@@ -352,6 +354,9 @@ static int cs_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args) ...@@ -352,6 +354,9 @@ static int cs_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
cs_counters.ctx_max_cs_in_flight_drop_cnt = cs_counters.ctx_max_cs_in_flight_drop_cnt =
atomic64_read( atomic64_read(
&hpriv->ctx->cs_counters.max_cs_in_flight_drop_cnt); &hpriv->ctx->cs_counters.max_cs_in_flight_drop_cnt);
cs_counters.ctx_validation_drop_cnt =
atomic64_read(
&hpriv->ctx->cs_counters.validation_drop_cnt);
} }
return copy_to_user(out, &cs_counters, return copy_to_user(out, &cs_counters,
...@@ -406,7 +411,7 @@ static int total_energy_consumption_info(struct hl_fpriv *hpriv, ...@@ -406,7 +411,7 @@ static int total_energy_consumption_info(struct hl_fpriv *hpriv,
static int pll_frequency_info(struct hl_fpriv *hpriv, struct hl_info_args *args) static int pll_frequency_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
{ {
struct hl_device *hdev = hpriv->hdev; struct hl_device *hdev = hpriv->hdev;
struct hl_pll_frequency_info freq_info = {0}; struct hl_pll_frequency_info freq_info = { {0} };
u32 max_size = args->return_size; u32 max_size = args->return_size;
void __user *out = (void __user *) (uintptr_t) args->return_pointer; void __user *out = (void __user *) (uintptr_t) args->return_pointer;
int rc; int rc;
......
...@@ -418,8 +418,11 @@ static void init_signal_cs(struct hl_device *hdev, ...@@ -418,8 +418,11 @@ static void init_signal_cs(struct hl_device *hdev,
"generate signal CB, sob_id: %d, sob val: 0x%x, q_idx: %d\n", "generate signal CB, sob_id: %d, sob val: 0x%x, q_idx: %d\n",
cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, q_idx); cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, q_idx);
/* we set an EB since we must make sure all oeprations are done
* when sending the signal
*/
hdev->asic_funcs->gen_signal_cb(hdev, job->patched_cb, hdev->asic_funcs->gen_signal_cb(hdev, job->patched_cb,
cs_cmpl->hw_sob->sob_id, 0); cs_cmpl->hw_sob->sob_id, 0, true);
kref_get(&hw_sob->kref); kref_get(&hw_sob->kref);
......
...@@ -130,10 +130,8 @@ static int hl_pci_elbi_write(struct hl_device *hdev, u64 addr, u32 data) ...@@ -130,10 +130,8 @@ static int hl_pci_elbi_write(struct hl_device *hdev, u64 addr, u32 data)
if ((val & PCI_CONFIG_ELBI_STS_MASK) == PCI_CONFIG_ELBI_STS_DONE) if ((val & PCI_CONFIG_ELBI_STS_MASK) == PCI_CONFIG_ELBI_STS_DONE)
return 0; return 0;
if (val & PCI_CONFIG_ELBI_STS_ERR) { if (val & PCI_CONFIG_ELBI_STS_ERR)
dev_err(hdev->dev, "Error writing to ELBI\n");
return -EIO; return -EIO;
}
if (!(val & PCI_CONFIG_ELBI_STS_MASK)) { if (!(val & PCI_CONFIG_ELBI_STS_MASK)) {
dev_err(hdev->dev, "ELBI write didn't finish in time\n"); dev_err(hdev->dev, "ELBI write didn't finish in time\n");
...@@ -160,8 +158,12 @@ int hl_pci_iatu_write(struct hl_device *hdev, u32 addr, u32 data) ...@@ -160,8 +158,12 @@ int hl_pci_iatu_write(struct hl_device *hdev, u32 addr, u32 data)
dbi_offset = addr & 0xFFF; dbi_offset = addr & 0xFFF;
rc = hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0x00300000); /* Ignore result of writing to pcie_aux_dbi_reg_addr as it could fail
rc |= hl_pci_elbi_write(hdev, prop->pcie_dbi_base_address + dbi_offset, * in case the firmware security is enabled
*/
hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0x00300000);
rc = hl_pci_elbi_write(hdev, prop->pcie_dbi_base_address + dbi_offset,
data); data);
if (rc) if (rc)
...@@ -244,9 +246,11 @@ int hl_pci_set_inbound_region(struct hl_device *hdev, u8 region, ...@@ -244,9 +246,11 @@ int hl_pci_set_inbound_region(struct hl_device *hdev, u8 region,
rc |= hl_pci_iatu_write(hdev, offset + 0x4, ctrl_reg_val); rc |= hl_pci_iatu_write(hdev, offset + 0x4, ctrl_reg_val);
/* Return the DBI window to the default location */ /* Return the DBI window to the default location
rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0); * Ignore result of writing to pcie_aux_dbi_reg_addr as it could fail
rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr + 4, 0); * in case the firmware security is enabled
*/
hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0);
if (rc) if (rc)
dev_err(hdev->dev, "failed to map bar %u to 0x%08llx\n", dev_err(hdev->dev, "failed to map bar %u to 0x%08llx\n",
...@@ -294,9 +298,11 @@ int hl_pci_set_outbound_region(struct hl_device *hdev, ...@@ -294,9 +298,11 @@ int hl_pci_set_outbound_region(struct hl_device *hdev,
/* Enable */ /* Enable */
rc |= hl_pci_iatu_write(hdev, 0x004, 0x80000000); rc |= hl_pci_iatu_write(hdev, 0x004, 0x80000000);
/* Return the DBI window to the default location */ /* Return the DBI window to the default location
rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0); * Ignore result of writing to pcie_aux_dbi_reg_addr as it could fail
rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr + 4, 0); * in case the firmware security is enabled
*/
hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0);
return rc; return rc;
} }
......
...@@ -151,19 +151,6 @@ static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = { ...@@ -151,19 +151,6 @@ static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
[PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe) [PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe)
}; };
static const u32 gaudi_pll_base_addresses[GAUDI_PLL_MAX] = {
[CPU_PLL] = mmPSOC_CPU_PLL_NR,
[PCI_PLL] = mmPSOC_PCI_PLL_NR,
[SRAM_PLL] = mmSRAM_W_PLL_NR,
[HBM_PLL] = mmPSOC_HBM_PLL_NR,
[NIC_PLL] = mmNIC0_PLL_NR,
[DMA_PLL] = mmDMA_W_PLL_NR,
[MESH_PLL] = mmMESH_W_PLL_NR,
[MME_PLL] = mmPSOC_MME_PLL_NR,
[TPC_PLL] = mmPSOC_TPC_PLL_NR,
[IF_PLL] = mmIF_W_PLL_NR
};
static inline bool validate_packet_id(enum packet_id id) static inline bool validate_packet_id(enum packet_id id)
{ {
switch (id) { switch (id) {
...@@ -374,7 +361,7 @@ static int gaudi_cpucp_info_get(struct hl_device *hdev); ...@@ -374,7 +361,7 @@ static int gaudi_cpucp_info_get(struct hl_device *hdev);
static void gaudi_disable_clock_gating(struct hl_device *hdev); static void gaudi_disable_clock_gating(struct hl_device *hdev);
static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid); static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
u32 size); u32 size, bool eb);
static u32 gaudi_gen_wait_cb(struct hl_device *hdev, static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
struct hl_gen_wait_properties *prop); struct hl_gen_wait_properties *prop);
...@@ -667,12 +654,6 @@ static int gaudi_early_init(struct hl_device *hdev) ...@@ -667,12 +654,6 @@ static int gaudi_early_init(struct hl_device *hdev)
if (rc) if (rc)
goto free_queue_props; goto free_queue_props;
if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
dev_info(hdev->dev,
"H/W state is dirty, must reset before initializing\n");
hdev->asic_funcs->hw_fini(hdev, true);
}
/* Before continuing in the initialization, we need to read the preboot /* Before continuing in the initialization, we need to read the preboot
* version to determine whether we run with a security-enabled firmware * version to determine whether we run with a security-enabled firmware
*/ */
...@@ -685,6 +666,12 @@ static int gaudi_early_init(struct hl_device *hdev) ...@@ -685,6 +666,12 @@ static int gaudi_early_init(struct hl_device *hdev)
goto pci_fini; goto pci_fini;
} }
if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
dev_info(hdev->dev,
"H/W state is dirty, must reset before initializing\n");
hdev->asic_funcs->hw_fini(hdev, true);
}
return 0; return 0;
pci_fini: pci_fini:
...@@ -703,41 +690,25 @@ static int gaudi_early_fini(struct hl_device *hdev) ...@@ -703,41 +690,25 @@ static int gaudi_early_fini(struct hl_device *hdev)
} }
/** /**
* gaudi_fetch_pll_frequency - Fetch PLL frequency values * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
* *
* @hdev: pointer to hl_device structure * @hdev: pointer to hl_device structure
* @pll_index: index of the pll to fetch frequency from
* @pll_freq: pointer to store the pll frequency in MHz in each of the available
* outputs. if a certain output is not available a 0 will be set
* *
*/ */
static int gaudi_fetch_pll_frequency(struct hl_device *hdev, static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
enum gaudi_pll_index pll_index,
u16 *pll_freq_arr)
{ {
u32 nr = 0, nf = 0, od = 0, pll_clk = 0, div_fctr, div_sel, struct asic_fixed_properties *prop = &hdev->asic_prop;
pll_base_addr = gaudi_pll_base_addresses[pll_index]; u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
u16 freq = 0; u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
int i, rc; int rc;
if (hdev->asic_prop.fw_security_status_valid &&
(hdev->asic_prop.fw_app_security_map &
CPU_BOOT_DEV_STS0_PLL_INFO_EN)) {
rc = hl_fw_cpucp_pll_info_get(hdev, pll_index, pll_freq_arr);
if (rc) if (hdev->asic_prop.fw_security_disabled) {
return rc;
} else if (hdev->asic_prop.fw_security_disabled) {
/* Backward compatibility */ /* Backward compatibility */
nr = RREG32(pll_base_addr + PLL_NR_OFFSET); div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
nf = RREG32(pll_base_addr + PLL_NF_OFFSET); div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
od = RREG32(pll_base_addr + PLL_OD_OFFSET); nr = RREG32(mmPSOC_CPU_PLL_NR);
nf = RREG32(mmPSOC_CPU_PLL_NF);
for (i = 0; i < HL_PLL_NUM_OUTPUTS; i++) { od = RREG32(mmPSOC_CPU_PLL_OD);
div_fctr = RREG32(pll_base_addr +
PLL_DIV_FACTOR_0_OFFSET + i * 4);
div_sel = RREG32(pll_base_addr +
PLL_DIV_SEL_0_OFFSET + i * 4);
if (div_sel == DIV_SEL_REF_CLK || if (div_sel == DIV_SEL_REF_CLK ||
div_sel == DIV_SEL_DIVIDED_REF) { div_sel == DIV_SEL_DIVIDED_REF) {
...@@ -757,39 +728,22 @@ static int gaudi_fetch_pll_frequency(struct hl_device *hdev, ...@@ -757,39 +728,22 @@ static int gaudi_fetch_pll_frequency(struct hl_device *hdev,
dev_warn(hdev->dev, dev_warn(hdev->dev,
"Received invalid div select value: %d", "Received invalid div select value: %d",
div_sel); div_sel);
} freq = 0;
pll_freq_arr[i] = freq;
} }
} else { } else {
dev_err(hdev->dev, "Failed to fetch PLL frequency values\n"); rc = hl_fw_cpucp_pll_info_get(hdev, CPU_PLL, pll_freq_arr);
return -EIO;
}
return 0;
}
/**
* gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
*
* @hdev: pointer to hl_device structure
*
*/
static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
u16 pll_freq[HL_PLL_NUM_OUTPUTS];
int rc;
rc = gaudi_fetch_pll_frequency(hdev, CPU_PLL, pll_freq);
if (rc) if (rc)
return rc; return rc;
prop->psoc_timestamp_frequency = pll_freq[2]; freq = pll_freq_arr[2];
prop->psoc_pci_pll_nr = 0; }
prop->psoc_pci_pll_nf = 0;
prop->psoc_pci_pll_od = 0; prop->psoc_timestamp_frequency = freq;
prop->psoc_pci_pll_div_factor = 0; prop->psoc_pci_pll_nr = nr;
prop->psoc_pci_pll_nf = nf;
prop->psoc_pci_pll_od = od;
prop->psoc_pci_pll_div_factor = div_fctr;
return 0; return 0;
} }
...@@ -884,11 +838,17 @@ static int gaudi_init_tpc_mem(struct hl_device *hdev) ...@@ -884,11 +838,17 @@ static int gaudi_init_tpc_mem(struct hl_device *hdev)
size_t fw_size; size_t fw_size;
void *cpu_addr; void *cpu_addr;
dma_addr_t dma_handle; dma_addr_t dma_handle;
int rc; int rc, count = 5;
again:
rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev); rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
if (rc == -EINTR && count-- > 0) {
msleep(50);
goto again;
}
if (rc) { if (rc) {
dev_err(hdev->dev, "Firmware file %s is not found!\n", dev_err(hdev->dev, "Failed to load firmware file %s\n",
GAUDI_TPC_FW_FILE); GAUDI_TPC_FW_FILE);
goto out; goto out;
} }
...@@ -1110,7 +1070,7 @@ static void gaudi_collective_slave_init_job(struct hl_device *hdev, ...@@ -1110,7 +1070,7 @@ static void gaudi_collective_slave_init_job(struct hl_device *hdev,
prop->collective_sob_id, queue_id); prop->collective_sob_id, queue_id);
cb_size += gaudi_gen_signal_cb(hdev, job->user_cb, cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
prop->collective_sob_id, cb_size); prop->collective_sob_id, cb_size, false);
} }
static void gaudi_collective_wait_init_cs(struct hl_cs *cs) static void gaudi_collective_wait_init_cs(struct hl_cs *cs)
...@@ -2449,8 +2409,6 @@ static void gaudi_init_golden_registers(struct hl_device *hdev) ...@@ -2449,8 +2409,6 @@ static void gaudi_init_golden_registers(struct hl_device *hdev)
gaudi_init_e2e(hdev); gaudi_init_e2e(hdev);
gaudi_init_hbm_cred(hdev); gaudi_init_hbm_cred(hdev);
hdev->asic_funcs->disable_clock_gating(hdev);
for (tpc_id = 0, tpc_offset = 0; for (tpc_id = 0, tpc_offset = 0;
tpc_id < TPC_NUMBER_OF_ENGINES; tpc_id < TPC_NUMBER_OF_ENGINES;
tpc_id++, tpc_offset += TPC_CFG_OFFSET) { tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
...@@ -3462,6 +3420,9 @@ static void gaudi_set_clock_gating(struct hl_device *hdev) ...@@ -3462,6 +3420,9 @@ static void gaudi_set_clock_gating(struct hl_device *hdev)
if (hdev->in_debug) if (hdev->in_debug)
return; return;
if (!hdev->asic_prop.fw_security_disabled)
return;
for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) { for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) {
enable = !!(hdev->clock_gating_mask & enable = !!(hdev->clock_gating_mask &
(BIT_ULL(gaudi_dma_assignment[i]))); (BIT_ULL(gaudi_dma_assignment[i])));
...@@ -3513,7 +3474,7 @@ static void gaudi_disable_clock_gating(struct hl_device *hdev) ...@@ -3513,7 +3474,7 @@ static void gaudi_disable_clock_gating(struct hl_device *hdev)
u32 qman_offset; u32 qman_offset;
int i; int i;
if (!(gaudi->hw_cap_initialized & HW_CAP_CLK_GATE)) if (!hdev->asic_prop.fw_security_disabled)
return; return;
for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) { for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
...@@ -3806,7 +3767,7 @@ static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout) ...@@ -3806,7 +3767,7 @@ static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
static void gaudi_pre_hw_init(struct hl_device *hdev) static void gaudi_pre_hw_init(struct hl_device *hdev)
{ {
/* Perform read from the device to make sure device is up */ /* Perform read from the device to make sure device is up */
RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG); RREG32(mmHW_STATE);
if (hdev->asic_prop.fw_security_disabled) { if (hdev->asic_prop.fw_security_disabled) {
/* Set the access through PCI bars (Linux driver only) as /* Set the access through PCI bars (Linux driver only) as
...@@ -3847,6 +3808,13 @@ static int gaudi_hw_init(struct hl_device *hdev) ...@@ -3847,6 +3808,13 @@ static int gaudi_hw_init(struct hl_device *hdev)
return rc; return rc;
} }
/* In case the clock gating was enabled in preboot we need to disable
* it here before touching the MME/TPC registers.
* There is no need to take clk gating mutex because when this function
* runs, no other relevant code can run
*/
hdev->asic_funcs->disable_clock_gating(hdev);
/* SRAM scrambler must be initialized after CPU is running from HBM */ /* SRAM scrambler must be initialized after CPU is running from HBM */
gaudi_init_scrambler_sram(hdev); gaudi_init_scrambler_sram(hdev);
...@@ -3885,7 +3853,7 @@ static int gaudi_hw_init(struct hl_device *hdev) ...@@ -3885,7 +3853,7 @@ static int gaudi_hw_init(struct hl_device *hdev)
} }
/* Perform read from the device to flush all configuration */ /* Perform read from the device to flush all configuration */
RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG); RREG32(mmHW_STATE);
return 0; return 0;
...@@ -3927,6 +3895,9 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset) ...@@ -3927,6 +3895,9 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
/* I don't know what is the state of the CPU so make sure it is /* I don't know what is the state of the CPU so make sure it is
* stopped in any means necessary * stopped in any means necessary
*/ */
if (hdev->asic_prop.hard_reset_done_by_fw)
WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_RST_DEV);
else
WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_GOTO_WFE); WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_GOTO_WFE);
WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_HALT_MACHINE); WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_HALT_MACHINE);
...@@ -3971,11 +3942,15 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset) ...@@ -3971,11 +3942,15 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST, WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT); 1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
}
dev_info(hdev->dev, dev_info(hdev->dev,
"Issued HARD reset command, going to wait %dms\n", "Issued HARD reset command, going to wait %dms\n",
reset_timeout_ms); reset_timeout_ms);
} else {
dev_info(hdev->dev,
"Firmware performs HARD reset, going to wait %dms\n",
reset_timeout_ms);
}
/* /*
* After hard reset, we can't poll the BTM_FSM register because the PSOC * After hard reset, we can't poll the BTM_FSM register because the PSOC
...@@ -7936,7 +7911,7 @@ static u32 gaudi_get_wait_cb_size(struct hl_device *hdev) ...@@ -7936,7 +7911,7 @@ static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
} }
static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
u32 size) u32 size, bool eb)
{ {
struct hl_cb *cb = (struct hl_cb *) data; struct hl_cb *cb = (struct hl_cb *) data;
struct packet_msg_short *pkt; struct packet_msg_short *pkt;
...@@ -7953,7 +7928,7 @@ static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, ...@@ -7953,7 +7928,7 @@ static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */ ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */ ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT); ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 1); ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, eb);
ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1); ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1); ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
......
...@@ -105,13 +105,6 @@ ...@@ -105,13 +105,6 @@
#define MME_ACC_OFFSET (mmMME1_ACC_BASE - mmMME0_ACC_BASE) #define MME_ACC_OFFSET (mmMME1_ACC_BASE - mmMME0_ACC_BASE)
#define SRAM_BANK_OFFSET (mmSRAM_Y0_X1_RTR_BASE - mmSRAM_Y0_X0_RTR_BASE) #define SRAM_BANK_OFFSET (mmSRAM_Y0_X1_RTR_BASE - mmSRAM_Y0_X0_RTR_BASE)
#define PLL_NR_OFFSET 0
#define PLL_NF_OFFSET (mmPSOC_CPU_PLL_NF - mmPSOC_CPU_PLL_NR)
#define PLL_OD_OFFSET (mmPSOC_CPU_PLL_OD - mmPSOC_CPU_PLL_NR)
#define PLL_DIV_FACTOR_0_OFFSET (mmPSOC_CPU_PLL_DIV_FACTOR_0 - \
mmPSOC_CPU_PLL_NR)
#define PLL_DIV_SEL_0_OFFSET (mmPSOC_CPU_PLL_DIV_SEL_0 - mmPSOC_CPU_PLL_NR)
#define NUM_OF_SOB_IN_BLOCK \ #define NUM_OF_SOB_IN_BLOCK \
(((mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_2047 - \ (((mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_2047 - \
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0) + 4) >> 2) mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0) + 4) >> 2)
......
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
#include "../include/gaudi/gaudi_coresight.h" #include "../include/gaudi/gaudi_coresight.h"
#include "../include/gaudi/asic_reg/gaudi_regs.h" #include "../include/gaudi/asic_reg/gaudi_regs.h"
#include "../include/gaudi/gaudi_masks.h" #include "../include/gaudi/gaudi_masks.h"
#include "../include/gaudi/gaudi_reg_map.h"
#include <uapi/misc/habanalabs.h> #include <uapi/misc/habanalabs.h>
#define SPMU_SECTION_SIZE MME0_ACC_SPMU_MAX_OFFSET #define SPMU_SECTION_SIZE MME0_ACC_SPMU_MAX_OFFSET
...@@ -874,7 +875,7 @@ int gaudi_debug_coresight(struct hl_device *hdev, void *data) ...@@ -874,7 +875,7 @@ int gaudi_debug_coresight(struct hl_device *hdev, void *data)
} }
/* Perform read from the device to flush all configuration */ /* Perform read from the device to flush all configuration */
RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG); RREG32(mmHW_STATE);
return rc; return rc;
} }
......
...@@ -613,12 +613,6 @@ static int goya_early_init(struct hl_device *hdev) ...@@ -613,12 +613,6 @@ static int goya_early_init(struct hl_device *hdev)
if (rc) if (rc)
goto free_queue_props; goto free_queue_props;
if (goya_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
dev_info(hdev->dev,
"H/W state is dirty, must reset before initializing\n");
hdev->asic_funcs->hw_fini(hdev, true);
}
/* Before continuing in the initialization, we need to read the preboot /* Before continuing in the initialization, we need to read the preboot
* version to determine whether we run with a security-enabled firmware * version to determine whether we run with a security-enabled firmware
*/ */
...@@ -631,6 +625,12 @@ static int goya_early_init(struct hl_device *hdev) ...@@ -631,6 +625,12 @@ static int goya_early_init(struct hl_device *hdev)
goto pci_fini; goto pci_fini;
} }
if (goya_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
dev_info(hdev->dev,
"H/W state is dirty, must reset before initializing\n");
hdev->asic_funcs->hw_fini(hdev, true);
}
if (!hdev->pldm) { if (!hdev->pldm) {
val = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS); val = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS);
if (val & PSOC_GLOBAL_CONF_BOOT_STRAP_PINS_SRIOV_EN_MASK) if (val & PSOC_GLOBAL_CONF_BOOT_STRAP_PINS_SRIOV_EN_MASK)
...@@ -694,32 +694,47 @@ static void goya_qman0_set_security(struct hl_device *hdev, bool secure) ...@@ -694,32 +694,47 @@ static void goya_qman0_set_security(struct hl_device *hdev, bool secure)
static void goya_fetch_psoc_frequency(struct hl_device *hdev) static void goya_fetch_psoc_frequency(struct hl_device *hdev)
{ {
struct asic_fixed_properties *prop = &hdev->asic_prop; struct asic_fixed_properties *prop = &hdev->asic_prop;
u32 trace_freq = 0; u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
u32 pll_clk = 0; u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
u32 div_fctr = RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1); int rc;
u32 div_sel = RREG32(mmPSOC_PCI_PLL_DIV_SEL_1);
u32 nr = RREG32(mmPSOC_PCI_PLL_NR); if (hdev->asic_prop.fw_security_disabled) {
u32 nf = RREG32(mmPSOC_PCI_PLL_NF); div_fctr = RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1);
u32 od = RREG32(mmPSOC_PCI_PLL_OD); div_sel = RREG32(mmPSOC_PCI_PLL_DIV_SEL_1);
nr = RREG32(mmPSOC_PCI_PLL_NR);
if (div_sel == DIV_SEL_REF_CLK || div_sel == DIV_SEL_DIVIDED_REF) { nf = RREG32(mmPSOC_PCI_PLL_NF);
od = RREG32(mmPSOC_PCI_PLL_OD);
if (div_sel == DIV_SEL_REF_CLK ||
div_sel == DIV_SEL_DIVIDED_REF) {
if (div_sel == DIV_SEL_REF_CLK) if (div_sel == DIV_SEL_REF_CLK)
trace_freq = PLL_REF_CLK; freq = PLL_REF_CLK;
else else
trace_freq = PLL_REF_CLK / (div_fctr + 1); freq = PLL_REF_CLK / (div_fctr + 1);
} else if (div_sel == DIV_SEL_PLL_CLK || } else if (div_sel == DIV_SEL_PLL_CLK ||
div_sel == DIV_SEL_DIVIDED_PLL) { div_sel == DIV_SEL_DIVIDED_PLL) {
pll_clk = PLL_REF_CLK * (nf + 1) / ((nr + 1) * (od + 1)); pll_clk = PLL_REF_CLK * (nf + 1) /
((nr + 1) * (od + 1));
if (div_sel == DIV_SEL_PLL_CLK) if (div_sel == DIV_SEL_PLL_CLK)
trace_freq = pll_clk; freq = pll_clk;
else else
trace_freq = pll_clk / (div_fctr + 1); freq = pll_clk / (div_fctr + 1);
} else { } else {
dev_warn(hdev->dev, dev_warn(hdev->dev,
"Received invalid div select value: %d", div_sel); "Received invalid div select value: %d",
div_sel);
freq = 0;
}
} else {
rc = hl_fw_cpucp_pll_info_get(hdev, PCI_PLL, pll_freq_arr);
if (rc)
return;
freq = pll_freq_arr[1];
} }
prop->psoc_timestamp_frequency = trace_freq; prop->psoc_timestamp_frequency = freq;
prop->psoc_pci_pll_nr = nr; prop->psoc_pci_pll_nr = nr;
prop->psoc_pci_pll_nf = nf; prop->psoc_pci_pll_nf = nf;
prop->psoc_pci_pll_od = od; prop->psoc_pci_pll_od = od;
...@@ -5324,7 +5339,7 @@ static u32 goya_get_wait_cb_size(struct hl_device *hdev) ...@@ -5324,7 +5339,7 @@ static u32 goya_get_wait_cb_size(struct hl_device *hdev)
} }
static u32 goya_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, static u32 goya_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
u32 size) u32 size, bool eb)
{ {
return 0; return 0;
} }
......
...@@ -145,11 +145,15 @@ ...@@ -145,11 +145,15 @@
* implemented. This means that FW will * implemented. This means that FW will
* perform hard reset procedure on * perform hard reset procedure on
* receiving the halt-machine event. * receiving the halt-machine event.
* Initialized in: linux * Initialized in: preboot, u-boot, linux
* *
* CPU_BOOT_DEV_STS0_PLL_INFO_EN FW retrieval of PLL info is enabled. * CPU_BOOT_DEV_STS0_PLL_INFO_EN FW retrieval of PLL info is enabled.
* Initialized in: linux * Initialized in: linux
* *
* CPU_BOOT_DEV_STS0_CLK_GATE_EN Clock Gating enabled.
* FW initialized Clock Gating.
* Initialized in: preboot
*
* CPU_BOOT_DEV_STS0_ENABLED Device status register enabled. * CPU_BOOT_DEV_STS0_ENABLED Device status register enabled.
* This is a main indication that the * This is a main indication that the
* running FW populates the device status * running FW populates the device status
...@@ -171,6 +175,7 @@ ...@@ -171,6 +175,7 @@
#define CPU_BOOT_DEV_STS0_DRAM_SCR_EN (1 << 9) #define CPU_BOOT_DEV_STS0_DRAM_SCR_EN (1 << 9)
#define CPU_BOOT_DEV_STS0_FW_HARD_RST_EN (1 << 10) #define CPU_BOOT_DEV_STS0_FW_HARD_RST_EN (1 << 10)
#define CPU_BOOT_DEV_STS0_PLL_INFO_EN (1 << 11) #define CPU_BOOT_DEV_STS0_PLL_INFO_EN (1 << 11)
#define CPU_BOOT_DEV_STS0_CLK_GATE_EN (1 << 13)
#define CPU_BOOT_DEV_STS0_ENABLED (1 << 31) #define CPU_BOOT_DEV_STS0_ENABLED (1 << 31)
enum cpu_boot_status { enum cpu_boot_status {
...@@ -204,6 +209,8 @@ enum kmd_msg { ...@@ -204,6 +209,8 @@ enum kmd_msg {
KMD_MSG_GOTO_WFE, KMD_MSG_GOTO_WFE,
KMD_MSG_FIT_RDY, KMD_MSG_FIT_RDY,
KMD_MSG_SKIP_BMC, KMD_MSG_SKIP_BMC,
RESERVED,
KMD_MSG_RST_DEV,
}; };
enum cpu_msg_status { enum cpu_msg_status {
......
...@@ -279,6 +279,7 @@ enum hl_device_status { ...@@ -279,6 +279,7 @@ enum hl_device_status {
* HL_INFO_CLK_THROTTLE_REASON - Retrieve clock throttling reason * HL_INFO_CLK_THROTTLE_REASON - Retrieve clock throttling reason
* HL_INFO_SYNC_MANAGER - Retrieve sync manager info per dcore * HL_INFO_SYNC_MANAGER - Retrieve sync manager info per dcore
* HL_INFO_TOTAL_ENERGY - Retrieve total energy consumption * HL_INFO_TOTAL_ENERGY - Retrieve total energy consumption
* HL_INFO_PLL_FREQUENCY - Retrieve PLL frequency
*/ */
#define HL_INFO_HW_IP_INFO 0 #define HL_INFO_HW_IP_INFO 0
#define HL_INFO_HW_EVENTS 1 #define HL_INFO_HW_EVENTS 1
...@@ -425,6 +426,8 @@ struct hl_info_sync_manager { ...@@ -425,6 +426,8 @@ struct hl_info_sync_manager {
* @ctx_device_in_reset_drop_cnt: context dropped due to device in reset * @ctx_device_in_reset_drop_cnt: context dropped due to device in reset
* @total_max_cs_in_flight_drop_cnt: total dropped due to maximum CS in-flight * @total_max_cs_in_flight_drop_cnt: total dropped due to maximum CS in-flight
* @ctx_max_cs_in_flight_drop_cnt: context dropped due to maximum CS in-flight * @ctx_max_cs_in_flight_drop_cnt: context dropped due to maximum CS in-flight
* @total_validation_drop_cnt: total dropped due to validation error
* @ctx_validation_drop_cnt: context dropped due to validation error
*/ */
struct hl_info_cs_counters { struct hl_info_cs_counters {
__u64 total_out_of_mem_drop_cnt; __u64 total_out_of_mem_drop_cnt;
...@@ -437,6 +440,8 @@ struct hl_info_cs_counters { ...@@ -437,6 +440,8 @@ struct hl_info_cs_counters {
__u64 ctx_device_in_reset_drop_cnt; __u64 ctx_device_in_reset_drop_cnt;
__u64 total_max_cs_in_flight_drop_cnt; __u64 total_max_cs_in_flight_drop_cnt;
__u64 ctx_max_cs_in_flight_drop_cnt; __u64 ctx_max_cs_in_flight_drop_cnt;
__u64 total_validation_drop_cnt;
__u64 ctx_validation_drop_cnt;
}; };
enum gaudi_dcores { enum gaudi_dcores {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment