Commit 64a7e295 authored by Omer Shpigelman's avatar Omer Shpigelman Committed by Oded Gabbay

habanalabs: split the host MMU properties

Host memory may be allocated with huge pages.
A different virtual range may be used for mapping in this case.
Add Huge PCI MMU (HPMMU) properties to support it.
This patch is a prerequisite for future ASICs support and has no effect on
Goya ASIC as currently a single virtual host range is used for all page
sizes.
Signed-off-by: default avatarOmer Shpigelman <oshpigelman@habana.ai>
Reviewed-by: default avatarOded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: default avatarOded Gabbay <oded.gabbay@gmail.com>
parent 240c92fd
...@@ -393,9 +393,10 @@ static int mmu_show(struct seq_file *s, void *data) ...@@ -393,9 +393,10 @@ static int mmu_show(struct seq_file *s, void *data)
} }
is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size, is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
prop->va_space_dram_start_address, prop->dmmu.start_addr,
prop->va_space_dram_end_address); prop->dmmu.end_addr);
/* shifts and masks are the same in PMMU and HPMMU, use one of them */
mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
mutex_lock(&ctx->mmu_lock); mutex_lock(&ctx->mmu_lock);
...@@ -547,12 +548,15 @@ static bool hl_is_device_va(struct hl_device *hdev, u64 addr) ...@@ -547,12 +548,15 @@ static bool hl_is_device_va(struct hl_device *hdev, u64 addr)
goto out; goto out;
if (hdev->dram_supports_virtual_memory && if (hdev->dram_supports_virtual_memory &&
addr >= prop->va_space_dram_start_address && (addr >= prop->dmmu.start_addr && addr < prop->dmmu.end_addr))
addr < prop->va_space_dram_end_address)
return true; return true;
if (addr >= prop->va_space_host_start_address && if (addr >= prop->pmmu.start_addr &&
addr < prop->va_space_host_end_address) addr < prop->pmmu.end_addr)
return true;
if (addr >= prop->pmmu_huge.start_addr &&
addr < prop->pmmu_huge.end_addr)
return true; return true;
out: out:
return false; return false;
...@@ -575,9 +579,10 @@ static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr, ...@@ -575,9 +579,10 @@ static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr,
} }
is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size, is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
prop->va_space_dram_start_address, prop->dmmu.start_addr,
prop->va_space_dram_end_address); prop->dmmu.end_addr);
/* shifts and masks are the same in PMMU and HPMMU, use one of them */
mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
mutex_lock(&ctx->mmu_lock); mutex_lock(&ctx->mmu_lock);
......
...@@ -393,19 +393,21 @@ void goya_get_fixed_properties(struct hl_device *hdev) ...@@ -393,19 +393,21 @@ void goya_get_fixed_properties(struct hl_device *hdev)
prop->dmmu.hop2_mask = HOP2_MASK; prop->dmmu.hop2_mask = HOP2_MASK;
prop->dmmu.hop3_mask = HOP3_MASK; prop->dmmu.hop3_mask = HOP3_MASK;
prop->dmmu.hop4_mask = HOP4_MASK; prop->dmmu.hop4_mask = HOP4_MASK;
prop->dmmu.huge_page_size = PAGE_SIZE_2MB; prop->dmmu.start_addr = VA_DDR_SPACE_START;
prop->dmmu.end_addr = VA_DDR_SPACE_END;
prop->dmmu.page_size = PAGE_SIZE_2MB;
/* No difference between PMMU and DMMU except of page size */ /* shifts and masks are the same in PMMU and DMMU */
memcpy(&prop->pmmu, &prop->dmmu, sizeof(prop->dmmu)); memcpy(&prop->pmmu, &prop->dmmu, sizeof(prop->dmmu));
prop->dmmu.page_size = PAGE_SIZE_2MB; prop->pmmu.start_addr = VA_HOST_SPACE_START;
prop->pmmu.end_addr = VA_HOST_SPACE_END;
prop->pmmu.page_size = PAGE_SIZE_4KB; prop->pmmu.page_size = PAGE_SIZE_4KB;
prop->va_space_host_start_address = VA_HOST_SPACE_START; /* PMMU and HPMMU are the same except of page size */
prop->va_space_host_end_address = VA_HOST_SPACE_END; memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
prop->va_space_dram_start_address = VA_DDR_SPACE_START; prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
prop->va_space_dram_end_address = VA_DDR_SPACE_END;
prop->dram_size_for_default_page_mapping = prop->dram_size_for_default_page_mapping = VA_DDR_SPACE_END;
prop->va_space_dram_end_address;
prop->cfg_size = CFG_SIZE; prop->cfg_size = CFG_SIZE;
prop->max_asid = MAX_ASID; prop->max_asid = MAX_ASID;
prop->num_of_events = GOYA_ASYNC_EVENT_ID_SIZE; prop->num_of_events = GOYA_ASYNC_EVENT_ID_SIZE;
...@@ -3443,12 +3445,13 @@ static int goya_validate_dma_pkt_mmu(struct hl_device *hdev, ...@@ -3443,12 +3445,13 @@ static int goya_validate_dma_pkt_mmu(struct hl_device *hdev,
/* /*
* WA for HW-23. * WA for HW-23.
* We can't allow user to read from Host using QMANs other than 1. * We can't allow user to read from Host using QMANs other than 1.
* PMMU and HPMMU addresses are equal, check only one of them.
*/ */
if (parser->hw_queue_id != GOYA_QUEUE_ID_DMA_1 && if (parser->hw_queue_id != GOYA_QUEUE_ID_DMA_1 &&
hl_mem_area_inside_range(le64_to_cpu(user_dma_pkt->src_addr), hl_mem_area_inside_range(le64_to_cpu(user_dma_pkt->src_addr),
le32_to_cpu(user_dma_pkt->tsize), le32_to_cpu(user_dma_pkt->tsize),
hdev->asic_prop.va_space_host_start_address, hdev->asic_prop.pmmu.start_addr,
hdev->asic_prop.va_space_host_end_address)) { hdev->asic_prop.pmmu.end_addr)) {
dev_err(hdev->dev, dev_err(hdev->dev,
"Can't DMA from host on queue other then 1\n"); "Can't DMA from host on queue other then 1\n");
return -EFAULT; return -EFAULT;
......
...@@ -364,8 +364,8 @@ static int goya_etr_validate_address(struct hl_device *hdev, u64 addr, ...@@ -364,8 +364,8 @@ static int goya_etr_validate_address(struct hl_device *hdev, u64 addr,
u64 range_start, range_end; u64 range_start, range_end;
if (hdev->mmu_enable) { if (hdev->mmu_enable) {
range_start = prop->va_space_dram_start_address; range_start = prop->dmmu.start_addr;
range_end = prop->va_space_dram_end_address; range_end = prop->dmmu.end_addr;
} else { } else {
range_start = prop->dram_user_base_address; range_start = prop->dram_user_base_address;
range_end = prop->dram_end_address; range_end = prop->dram_end_address;
......
...@@ -132,6 +132,8 @@ enum hl_device_hw_state { ...@@ -132,6 +132,8 @@ enum hl_device_hw_state {
/** /**
* struct hl_mmu_properties - ASIC specific MMU address translation properties. * struct hl_mmu_properties - ASIC specific MMU address translation properties.
* @start_addr: virtual start address of the memory region.
* @end_addr: virtual end address of the memory region.
* @hop0_shift: shift of hop 0 mask. * @hop0_shift: shift of hop 0 mask.
* @hop1_shift: shift of hop 1 mask. * @hop1_shift: shift of hop 1 mask.
* @hop2_shift: shift of hop 2 mask. * @hop2_shift: shift of hop 2 mask.
...@@ -143,9 +145,10 @@ enum hl_device_hw_state { ...@@ -143,9 +145,10 @@ enum hl_device_hw_state {
* @hop3_mask: mask to get the PTE address in hop 3. * @hop3_mask: mask to get the PTE address in hop 3.
* @hop4_mask: mask to get the PTE address in hop 4. * @hop4_mask: mask to get the PTE address in hop 4.
* @page_size: default page size used to allocate memory. * @page_size: default page size used to allocate memory.
* @huge_page_size: page size used to allocate memory with huge pages.
*/ */
struct hl_mmu_properties { struct hl_mmu_properties {
u64 start_addr;
u64 end_addr;
u64 hop0_shift; u64 hop0_shift;
u64 hop1_shift; u64 hop1_shift;
u64 hop2_shift; u64 hop2_shift;
...@@ -157,7 +160,6 @@ struct hl_mmu_properties { ...@@ -157,7 +160,6 @@ struct hl_mmu_properties {
u64 hop3_mask; u64 hop3_mask;
u64 hop4_mask; u64 hop4_mask;
u32 page_size; u32 page_size;
u32 huge_page_size;
}; };
/** /**
...@@ -169,6 +171,8 @@ struct hl_mmu_properties { ...@@ -169,6 +171,8 @@ struct hl_mmu_properties {
* @preboot_ver: F/W Preboot version. * @preboot_ver: F/W Preboot version.
* @dmmu: DRAM MMU address translation properties. * @dmmu: DRAM MMU address translation properties.
* @pmmu: PCI (host) MMU address translation properties. * @pmmu: PCI (host) MMU address translation properties.
* @pmmu_huge: PCI (host) MMU address translation properties for memory
* allocated with huge pages.
* @sram_base_address: SRAM physical start address. * @sram_base_address: SRAM physical start address.
* @sram_end_address: SRAM physical end address. * @sram_end_address: SRAM physical end address.
* @sram_user_base_address - SRAM physical start address for user access. * @sram_user_base_address - SRAM physical start address for user access.
...@@ -178,14 +182,6 @@ struct hl_mmu_properties { ...@@ -178,14 +182,6 @@ struct hl_mmu_properties {
* @dram_size: DRAM total size. * @dram_size: DRAM total size.
* @dram_pci_bar_size: size of PCI bar towards DRAM. * @dram_pci_bar_size: size of PCI bar towards DRAM.
* @max_power_default: max power of the device after reset * @max_power_default: max power of the device after reset
* @va_space_host_start_address: base address of virtual memory range for
* mapping host memory.
* @va_space_host_end_address: end address of virtual memory range for
* mapping host memory.
* @va_space_dram_start_address: base address of virtual memory range for
* mapping DRAM memory.
* @va_space_dram_end_address: end address of virtual memory range for
* mapping DRAM memory.
* @dram_size_for_default_page_mapping: DRAM size needed to map to avoid page * @dram_size_for_default_page_mapping: DRAM size needed to map to avoid page
* fault. * fault.
* @pcie_dbi_base_address: Base address of the PCIE_DBI block. * @pcie_dbi_base_address: Base address of the PCIE_DBI block.
...@@ -218,6 +214,7 @@ struct asic_fixed_properties { ...@@ -218,6 +214,7 @@ struct asic_fixed_properties {
char preboot_ver[VERSION_MAX_LEN]; char preboot_ver[VERSION_MAX_LEN];
struct hl_mmu_properties dmmu; struct hl_mmu_properties dmmu;
struct hl_mmu_properties pmmu; struct hl_mmu_properties pmmu;
struct hl_mmu_properties pmmu_huge;
u64 sram_base_address; u64 sram_base_address;
u64 sram_end_address; u64 sram_end_address;
u64 sram_user_base_address; u64 sram_user_base_address;
...@@ -227,10 +224,6 @@ struct asic_fixed_properties { ...@@ -227,10 +224,6 @@ struct asic_fixed_properties {
u64 dram_size; u64 dram_size;
u64 dram_pci_bar_size; u64 dram_pci_bar_size;
u64 max_power_default; u64 max_power_default;
u64 va_space_host_start_address;
u64 va_space_host_end_address;
u64 va_space_dram_start_address;
u64 va_space_dram_end_address;
u64 dram_size_for_default_page_mapping; u64 dram_size_for_default_page_mapping;
u64 pcie_dbi_base_address; u64 pcie_dbi_base_address;
u64 pcie_aux_dbi_reg_addr; u64 pcie_aux_dbi_reg_addr;
...@@ -658,6 +651,8 @@ struct hl_va_range { ...@@ -658,6 +651,8 @@ struct hl_va_range {
* this hits 0l. It is incremented on CS and CS_WAIT. * this hits 0l. It is incremented on CS and CS_WAIT.
* @cs_pending: array of DMA fence objects representing pending CS. * @cs_pending: array of DMA fence objects representing pending CS.
* @host_va_range: holds available virtual addresses for host mappings. * @host_va_range: holds available virtual addresses for host mappings.
* @host_huge_va_range: holds available virtual addresses for host mappings
* with huge pages.
* @dram_va_range: holds available virtual addresses for DRAM mappings. * @dram_va_range: holds available virtual addresses for DRAM mappings.
* @mem_hash_lock: protects the mem_hash. * @mem_hash_lock: protects the mem_hash.
* @mmu_lock: protects the MMU page tables. Any change to the PGT, modifing the * @mmu_lock: protects the MMU page tables. Any change to the PGT, modifing the
...@@ -688,8 +683,9 @@ struct hl_ctx { ...@@ -688,8 +683,9 @@ struct hl_ctx {
struct hl_device *hdev; struct hl_device *hdev;
struct kref refcount; struct kref refcount;
struct dma_fence *cs_pending[HL_MAX_PENDING_CS]; struct dma_fence *cs_pending[HL_MAX_PENDING_CS];
struct hl_va_range host_va_range; struct hl_va_range *host_va_range;
struct hl_va_range dram_va_range; struct hl_va_range *host_huge_va_range;
struct hl_va_range *dram_va_range;
struct mutex mem_hash_lock; struct mutex mem_hash_lock;
struct mutex mmu_lock; struct mutex mmu_lock;
struct list_head debugfs_list; struct list_head debugfs_list;
...@@ -1291,6 +1287,8 @@ struct hl_device_idle_busy_ts { ...@@ -1291,6 +1287,8 @@ struct hl_device_idle_busy_ts {
* otherwise. * otherwise.
* @dram_supports_virtual_memory: is MMU enabled towards DRAM. * @dram_supports_virtual_memory: is MMU enabled towards DRAM.
* @dram_default_page_mapping: is DRAM default page mapping enabled. * @dram_default_page_mapping: is DRAM default page mapping enabled.
* @pmmu_huge_range: is a different virtual addresses range used for PMMU with
* huge pages.
* @init_done: is the initialization of the device done. * @init_done: is the initialization of the device done.
* @mmu_enable: is MMU enabled. * @mmu_enable: is MMU enabled.
* @device_cpu_disabled: is the device CPU disabled (due to timeouts) * @device_cpu_disabled: is the device CPU disabled (due to timeouts)
...@@ -1372,6 +1370,7 @@ struct hl_device { ...@@ -1372,6 +1370,7 @@ struct hl_device {
u8 reset_on_lockup; u8 reset_on_lockup;
u8 dram_supports_virtual_memory; u8 dram_supports_virtual_memory;
u8 dram_default_page_mapping; u8 dram_default_page_mapping;
u8 pmmu_huge_range;
u8 init_done; u8 init_done;
u8 device_cpu_disabled; u8 device_cpu_disabled;
u8 dma_mask; u8 dma_mask;
......
This diff is collapsed.
...@@ -254,6 +254,15 @@ static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr) ...@@ -254,6 +254,15 @@ static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr)
return phys_hop_addr + pte_offset; return phys_hop_addr + pte_offset;
} }
static bool is_dram_va(struct hl_device *hdev, u64 virt_addr)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
return hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
prop->dmmu.start_addr,
prop->dmmu.end_addr);
}
static int dram_default_mapping_init(struct hl_ctx *ctx) static int dram_default_mapping_init(struct hl_ctx *ctx)
{ {
struct hl_device *hdev = ctx->hdev; struct hl_device *hdev = ctx->hdev;
...@@ -548,6 +557,7 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr) ...@@ -548,6 +557,7 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr)
curr_pte; curr_pte;
bool is_huge, clear_hop3 = true; bool is_huge, clear_hop3 = true;
/* shifts and masks are the same in PMMU and HPMMU, use one of them */
mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
hop0_addr = get_hop0_addr(ctx); hop0_addr = get_hop0_addr(ctx);
...@@ -702,26 +712,25 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size, ...@@ -702,26 +712,25 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
if (!hdev->mmu_enable) if (!hdev->mmu_enable)
return 0; return 0;
is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size, is_dram_addr = is_dram_va(hdev, virt_addr);
prop->va_space_dram_start_address,
prop->va_space_dram_end_address);
mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; if (is_dram_addr)
mmu_prop = &prop->dmmu;
else if ((page_size % prop->pmmu_huge.page_size) == 0)
mmu_prop = &prop->pmmu_huge;
else
mmu_prop = &prop->pmmu;
/* /*
* The H/W handles mapping of specific page sizes. Hence if the page * The H/W handles mapping of specific page sizes. Hence if the page
* size is bigger, we break it to sub-pages and unmap them separately. * size is bigger, we break it to sub-pages and unmap them separately.
*/ */
if ((page_size % mmu_prop->huge_page_size) == 0) { if ((page_size % mmu_prop->page_size) == 0) {
real_page_size = mmu_prop->huge_page_size;
} else if ((page_size % mmu_prop->page_size) == 0) {
real_page_size = mmu_prop->page_size; real_page_size = mmu_prop->page_size;
} else { } else {
dev_err(hdev->dev, dev_err(hdev->dev,
"page size of %u is not %uKB nor %uMB aligned, can't unmap\n", "page size of %u is not %uKB aligned, can't unmap\n",
page_size, page_size, mmu_prop->page_size >> 10);
mmu_prop->page_size >> 10,
mmu_prop->huge_page_size >> 20);
return -EFAULT; return -EFAULT;
} }
...@@ -759,8 +768,6 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, ...@@ -759,8 +768,6 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
hop4_new = false, is_huge; hop4_new = false, is_huge;
int rc = -ENOMEM; int rc = -ENOMEM;
mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
/* /*
* This mapping function can map a page or a huge page. For huge page * This mapping function can map a page or a huge page. For huge page
* there are only 3 hops rather than 4. Currently the DRAM allocation * there are only 3 hops rather than 4. Currently the DRAM allocation
...@@ -768,11 +775,15 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, ...@@ -768,11 +775,15 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
* one of the two page sizes. Since this is a common code for all the * one of the two page sizes. Since this is a common code for all the
* three cases, we need this hugs page check. * three cases, we need this hugs page check.
*/ */
is_huge = page_size == mmu_prop->huge_page_size; if (is_dram_addr) {
mmu_prop = &prop->dmmu;
if (is_dram_addr && !is_huge) { is_huge = true;
dev_err(hdev->dev, "DRAM mapping should use huge pages only\n"); } else if (page_size == prop->pmmu_huge.page_size) {
return -EFAULT; mmu_prop = &prop->pmmu_huge;
is_huge = true;
} else {
mmu_prop = &prop->pmmu;
is_huge = false;
} }
hop0_addr = get_hop0_addr(ctx); hop0_addr = get_hop0_addr(ctx);
...@@ -942,26 +953,25 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size, ...@@ -942,26 +953,25 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size,
if (!hdev->mmu_enable) if (!hdev->mmu_enable)
return 0; return 0;
is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size, is_dram_addr = is_dram_va(hdev, virt_addr);
prop->va_space_dram_start_address,
prop->va_space_dram_end_address);
mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; if (is_dram_addr)
mmu_prop = &prop->dmmu;
else if ((page_size % prop->pmmu_huge.page_size) == 0)
mmu_prop = &prop->pmmu_huge;
else
mmu_prop = &prop->pmmu;
/* /*
* The H/W handles mapping of specific page sizes. Hence if the page * The H/W handles mapping of specific page sizes. Hence if the page
* size is bigger, we break it to sub-pages and map them separately. * size is bigger, we break it to sub-pages and map them separately.
*/ */
if ((page_size % mmu_prop->huge_page_size) == 0) { if ((page_size % mmu_prop->page_size) == 0) {
real_page_size = mmu_prop->huge_page_size;
} else if ((page_size % mmu_prop->page_size) == 0) {
real_page_size = mmu_prop->page_size; real_page_size = mmu_prop->page_size;
} else { } else {
dev_err(hdev->dev, dev_err(hdev->dev,
"page size of %u is not %dKB nor %dMB aligned, can't unmap\n", "page size of %u is not %uKB aligned, can't unmap\n",
page_size, page_size, mmu_prop->page_size >> 10);
mmu_prop->page_size >> 10,
mmu_prop->huge_page_size >> 20);
return -EFAULT; return -EFAULT;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment