Commit 20faaeec authored by Ohad Sharabi's avatar Ohad Sharabi Committed by Oded Gabbay

habanalabs: add uapi to flush inbound HBM transactions

When doing p2p with a NIC device, the NIC needs to make sure all the
writes to the HBM (through the PCI bar of the Gaudi device) were
flushed.

It can be done by either the NIC or the host reading through the PCI
bar.

To support the host side, we supply a simple uapi to perform this flush
through the driver, because the user can't create such a transaction
by itself (the PCI bar isn't exposed to normal users).
Signed-off-by: default avatarOhad Sharabi <osharabi@habana.ai>
Reviewed-by: default avatarOded Gabbay <ogabbay@kernel.org>
Signed-off-by: default avatarOded Gabbay <ogabbay@kernel.org>
parent e65e175b
...@@ -13,7 +13,8 @@ ...@@ -13,7 +13,8 @@
#define HL_CS_FLAGS_TYPE_MASK (HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT | \ #define HL_CS_FLAGS_TYPE_MASK (HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT | \
HL_CS_FLAGS_COLLECTIVE_WAIT | HL_CS_FLAGS_RESERVE_SIGNALS_ONLY | \ HL_CS_FLAGS_COLLECTIVE_WAIT | HL_CS_FLAGS_RESERVE_SIGNALS_ONLY | \
HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY | HL_CS_FLAGS_ENGINE_CORE_COMMAND) HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY | HL_CS_FLAGS_ENGINE_CORE_COMMAND | \
HL_CS_FLAGS_FLUSH_PCI_HBW_WRITES)
#define MAX_TS_ITER_NUM 10 #define MAX_TS_ITER_NUM 10
...@@ -1295,6 +1296,8 @@ static enum hl_cs_type hl_cs_get_cs_type(u32 cs_type_flags) ...@@ -1295,6 +1296,8 @@ static enum hl_cs_type hl_cs_get_cs_type(u32 cs_type_flags)
return CS_UNRESERVE_SIGNALS; return CS_UNRESERVE_SIGNALS;
else if (cs_type_flags & HL_CS_FLAGS_ENGINE_CORE_COMMAND) else if (cs_type_flags & HL_CS_FLAGS_ENGINE_CORE_COMMAND)
return CS_TYPE_ENGINE_CORE; return CS_TYPE_ENGINE_CORE;
else if (cs_type_flags & HL_CS_FLAGS_FLUSH_PCI_HBW_WRITES)
return CS_TYPE_FLUSH_PCI_HBW_WRITES;
else else
return CS_TYPE_DEFAULT; return CS_TYPE_DEFAULT;
} }
...@@ -2443,6 +2446,21 @@ static int cs_ioctl_engine_cores(struct hl_fpriv *hpriv, u64 engine_cores, ...@@ -2443,6 +2446,21 @@ static int cs_ioctl_engine_cores(struct hl_fpriv *hpriv, u64 engine_cores,
return rc; return rc;
} }
static int cs_ioctl_flush_pci_hbw_writes(struct hl_fpriv *hpriv)
{
struct hl_device *hdev = hpriv->hdev;
struct asic_fixed_properties *prop = &hdev->asic_prop;
if (!prop->hbw_flush_reg) {
dev_dbg(hdev->dev, "HBW flush is not supported\n");
return -EOPNOTSUPP;
}
RREG32(prop->hbw_flush_reg);
return 0;
}
int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data) int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
{ {
union hl_cs_args *args = data; union hl_cs_args *args = data;
...@@ -2499,6 +2517,9 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data) ...@@ -2499,6 +2517,9 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
rc = cs_ioctl_engine_cores(hpriv, args->in.engine_cores, rc = cs_ioctl_engine_cores(hpriv, args->in.engine_cores,
args->in.num_engine_cores, args->in.core_command); args->in.num_engine_cores, args->in.core_command);
break; break;
case CS_TYPE_FLUSH_PCI_HBW_WRITES:
rc = cs_ioctl_flush_pci_hbw_writes(hpriv);
break;
default: default:
rc = cs_ioctl_default(hpriv, chunks, num_chunks, &cs_seq, rc = cs_ioctl_default(hpriv, chunks, num_chunks, &cs_seq,
args->in.cs_flags, args->in.cs_flags,
......
...@@ -375,7 +375,8 @@ enum hl_cs_type { ...@@ -375,7 +375,8 @@ enum hl_cs_type {
CS_TYPE_COLLECTIVE_WAIT, CS_TYPE_COLLECTIVE_WAIT,
CS_RESERVE_SIGNALS, CS_RESERVE_SIGNALS,
CS_UNRESERVE_SIGNALS, CS_UNRESERVE_SIGNALS,
CS_TYPE_ENGINE_CORE CS_TYPE_ENGINE_CORE,
CS_TYPE_FLUSH_PCI_HBW_WRITES,
}; };
/* /*
...@@ -644,6 +645,8 @@ struct hl_hints_range { ...@@ -644,6 +645,8 @@ struct hl_hints_range {
* (i.e. the DRAM supports multiple page sizes), otherwise * (i.e. the DRAM supports multiple page sizes), otherwise
* it will shall be equal to dram_page_size. * it will shall be equal to dram_page_size.
* @num_engine_cores: number of engine cpu cores * @num_engine_cores: number of engine cpu cores
* @hbw_flush_reg: register to read to generate HBW flush. value of 0 means HBW flush is
* not supported.
* @collective_first_sob: first sync object available for collective use * @collective_first_sob: first sync object available for collective use
* @collective_first_mon: first monitor available for collective use * @collective_first_mon: first monitor available for collective use
* @sync_stream_first_sob: first sync object available for sync stream use * @sync_stream_first_sob: first sync object available for sync stream use
...@@ -764,6 +767,7 @@ struct asic_fixed_properties { ...@@ -764,6 +767,7 @@ struct asic_fixed_properties {
u32 xbar_edge_enabled_mask; u32 xbar_edge_enabled_mask;
u32 device_mem_alloc_default_page_size; u32 device_mem_alloc_default_page_size;
u32 num_engine_cores; u32 num_engine_cores;
u32 hbw_flush_reg;
u16 collective_first_sob; u16 collective_first_sob;
u16 collective_first_mon; u16 collective_first_mon;
u16 sync_stream_first_sob; u16 sync_stream_first_sob;
......
...@@ -701,6 +701,8 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev) ...@@ -701,6 +701,8 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev)
prop->dma_mask = 48; prop->dma_mask = 48;
prop->hbw_flush_reg = mmPCIE_WRAP_RR_ELBI_RD_SEC_REG_CTRL;
return 0; return 0;
} }
......
...@@ -2071,6 +2071,8 @@ static int gaudi2_set_fixed_properties(struct hl_device *hdev) ...@@ -2071,6 +2071,8 @@ static int gaudi2_set_fixed_properties(struct hl_device *hdev)
prop->dma_mask = 64; prop->dma_mask = 64;
prop->hbw_flush_reg = mmPCIE_WRAP_SPECIAL_GLBL_SPARE_0;
return 0; return 0;
} }
......
...@@ -320,4 +320,6 @@ ...@@ -320,4 +320,6 @@
#define mmPSOC_TPC_PLL_NR 0xC73100 #define mmPSOC_TPC_PLL_NR 0xC73100
#define mmIF_W_PLL_NR 0x488100 #define mmIF_W_PLL_NR 0x488100
#define mmPCIE_WRAP_RR_ELBI_RD_SEC_REG_CTRL 0xC01208
#endif /* ASIC_REG_GAUDI_REGS_H_ */ #endif /* ASIC_REG_GAUDI_REGS_H_ */
...@@ -1478,6 +1478,14 @@ struct hl_cs_chunk { ...@@ -1478,6 +1478,14 @@ struct hl_cs_chunk {
*/ */
#define HL_CS_FLAGS_ENGINE_CORE_COMMAND 0x4000 #define HL_CS_FLAGS_ENGINE_CORE_COMMAND 0x4000
/*
* The flush HBW PCI writes is merged into the existing CS ioctls.
* Used to flush all HBW PCI writes.
* This is a blocking operation and for this reason the user shall not use
* the return sequence number (which will be invalid anyway)
*/
#define HL_CS_FLAGS_FLUSH_PCI_HBW_WRITES 0x8000
#define HL_CS_STATUS_SUCCESS 0 #define HL_CS_STATUS_SUCCESS 0
#define HL_MAX_JOBS_PER_CS 512 #define HL_MAX_JOBS_PER_CS 512
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment