Commit 64a7e295 authored by Omer Shpigelman's avatar Omer Shpigelman Committed by Oded Gabbay

habanalabs: split the host MMU properties

Host memory may be allocated with huge pages.
A different virtual range may be used for mapping in this case.
Add Huge PCI MMU (HPMMU) properties to support it.
This patch is a prerequisite for future ASICs support and has no effect on
Goya ASIC as currently a single virtual host range is used for all page
sizes.
Signed-off-by: default avatarOmer Shpigelman <oshpigelman@habana.ai>
Reviewed-by: default avatarOded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: default avatarOded Gabbay <oded.gabbay@gmail.com>
parent 240c92fd
......@@ -393,9 +393,10 @@ static int mmu_show(struct seq_file *s, void *data)
}
is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
prop->va_space_dram_start_address,
prop->va_space_dram_end_address);
prop->dmmu.start_addr,
prop->dmmu.end_addr);
/* shifts and masks are the same in PMMU and HPMMU, use one of them */
mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
mutex_lock(&ctx->mmu_lock);
......@@ -547,12 +548,15 @@ static bool hl_is_device_va(struct hl_device *hdev, u64 addr)
goto out;
if (hdev->dram_supports_virtual_memory &&
addr >= prop->va_space_dram_start_address &&
addr < prop->va_space_dram_end_address)
(addr >= prop->dmmu.start_addr && addr < prop->dmmu.end_addr))
return true;
if (addr >= prop->va_space_host_start_address &&
addr < prop->va_space_host_end_address)
if (addr >= prop->pmmu.start_addr &&
addr < prop->pmmu.end_addr)
return true;
if (addr >= prop->pmmu_huge.start_addr &&
addr < prop->pmmu_huge.end_addr)
return true;
out:
return false;
......@@ -575,9 +579,10 @@ static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr,
}
is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
prop->va_space_dram_start_address,
prop->va_space_dram_end_address);
prop->dmmu.start_addr,
prop->dmmu.end_addr);
/* shifts and masks are the same in PMMU and HPMMU, use one of them */
mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
mutex_lock(&ctx->mmu_lock);
......
......@@ -393,19 +393,21 @@ void goya_get_fixed_properties(struct hl_device *hdev)
prop->dmmu.hop2_mask = HOP2_MASK;
prop->dmmu.hop3_mask = HOP3_MASK;
prop->dmmu.hop4_mask = HOP4_MASK;
prop->dmmu.huge_page_size = PAGE_SIZE_2MB;
prop->dmmu.start_addr = VA_DDR_SPACE_START;
prop->dmmu.end_addr = VA_DDR_SPACE_END;
prop->dmmu.page_size = PAGE_SIZE_2MB;
/* No difference between PMMU and DMMU except of page size */
/* shifts and masks are the same in PMMU and DMMU */
memcpy(&prop->pmmu, &prop->dmmu, sizeof(prop->dmmu));
prop->dmmu.page_size = PAGE_SIZE_2MB;
prop->pmmu.start_addr = VA_HOST_SPACE_START;
prop->pmmu.end_addr = VA_HOST_SPACE_END;
prop->pmmu.page_size = PAGE_SIZE_4KB;
prop->va_space_host_start_address = VA_HOST_SPACE_START;
prop->va_space_host_end_address = VA_HOST_SPACE_END;
prop->va_space_dram_start_address = VA_DDR_SPACE_START;
prop->va_space_dram_end_address = VA_DDR_SPACE_END;
prop->dram_size_for_default_page_mapping =
prop->va_space_dram_end_address;
/* PMMU and HPMMU are the same except of page size */
memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
prop->dram_size_for_default_page_mapping = VA_DDR_SPACE_END;
prop->cfg_size = CFG_SIZE;
prop->max_asid = MAX_ASID;
prop->num_of_events = GOYA_ASYNC_EVENT_ID_SIZE;
......@@ -3443,12 +3445,13 @@ static int goya_validate_dma_pkt_mmu(struct hl_device *hdev,
/*
* WA for HW-23.
* We can't allow user to read from Host using QMANs other than 1.
* PMMU and HPMMU addresses are equal, check only one of them.
*/
if (parser->hw_queue_id != GOYA_QUEUE_ID_DMA_1 &&
hl_mem_area_inside_range(le64_to_cpu(user_dma_pkt->src_addr),
le32_to_cpu(user_dma_pkt->tsize),
hdev->asic_prop.va_space_host_start_address,
hdev->asic_prop.va_space_host_end_address)) {
hdev->asic_prop.pmmu.start_addr,
hdev->asic_prop.pmmu.end_addr)) {
dev_err(hdev->dev,
"Can't DMA from host on queue other then 1\n");
return -EFAULT;
......
......@@ -364,8 +364,8 @@ static int goya_etr_validate_address(struct hl_device *hdev, u64 addr,
u64 range_start, range_end;
if (hdev->mmu_enable) {
range_start = prop->va_space_dram_start_address;
range_end = prop->va_space_dram_end_address;
range_start = prop->dmmu.start_addr;
range_end = prop->dmmu.end_addr;
} else {
range_start = prop->dram_user_base_address;
range_end = prop->dram_end_address;
......
......@@ -132,6 +132,8 @@ enum hl_device_hw_state {
/**
* struct hl_mmu_properties - ASIC specific MMU address translation properties.
* @start_addr: virtual start address of the memory region.
* @end_addr: virtual end address of the memory region.
* @hop0_shift: shift of hop 0 mask.
* @hop1_shift: shift of hop 1 mask.
* @hop2_shift: shift of hop 2 mask.
......@@ -143,9 +145,10 @@ enum hl_device_hw_state {
* @hop3_mask: mask to get the PTE address in hop 3.
* @hop4_mask: mask to get the PTE address in hop 4.
* @page_size: default page size used to allocate memory.
* @huge_page_size: page size used to allocate memory with huge pages.
*/
struct hl_mmu_properties {
u64 start_addr;
u64 end_addr;
u64 hop0_shift;
u64 hop1_shift;
u64 hop2_shift;
......@@ -157,7 +160,6 @@ struct hl_mmu_properties {
u64 hop3_mask;
u64 hop4_mask;
u32 page_size;
u32 huge_page_size;
};
/**
......@@ -169,6 +171,8 @@ struct hl_mmu_properties {
* @preboot_ver: F/W Preboot version.
* @dmmu: DRAM MMU address translation properties.
* @pmmu: PCI (host) MMU address translation properties.
* @pmmu_huge: PCI (host) MMU address translation properties for memory
* allocated with huge pages.
* @sram_base_address: SRAM physical start address.
* @sram_end_address: SRAM physical end address.
* @sram_user_base_address - SRAM physical start address for user access.
......@@ -178,14 +182,6 @@ struct hl_mmu_properties {
* @dram_size: DRAM total size.
* @dram_pci_bar_size: size of PCI bar towards DRAM.
* @max_power_default: max power of the device after reset
* @va_space_host_start_address: base address of virtual memory range for
* mapping host memory.
* @va_space_host_end_address: end address of virtual memory range for
* mapping host memory.
* @va_space_dram_start_address: base address of virtual memory range for
* mapping DRAM memory.
* @va_space_dram_end_address: end address of virtual memory range for
* mapping DRAM memory.
* @dram_size_for_default_page_mapping: DRAM size needed to map to avoid page
* fault.
* @pcie_dbi_base_address: Base address of the PCIE_DBI block.
......@@ -218,6 +214,7 @@ struct asic_fixed_properties {
char preboot_ver[VERSION_MAX_LEN];
struct hl_mmu_properties dmmu;
struct hl_mmu_properties pmmu;
struct hl_mmu_properties pmmu_huge;
u64 sram_base_address;
u64 sram_end_address;
u64 sram_user_base_address;
......@@ -227,10 +224,6 @@ struct asic_fixed_properties {
u64 dram_size;
u64 dram_pci_bar_size;
u64 max_power_default;
u64 va_space_host_start_address;
u64 va_space_host_end_address;
u64 va_space_dram_start_address;
u64 va_space_dram_end_address;
u64 dram_size_for_default_page_mapping;
u64 pcie_dbi_base_address;
u64 pcie_aux_dbi_reg_addr;
......@@ -658,6 +651,8 @@ struct hl_va_range {
* this hits 0l. It is incremented on CS and CS_WAIT.
* @cs_pending: array of DMA fence objects representing pending CS.
* @host_va_range: holds available virtual addresses for host mappings.
* @host_huge_va_range: holds available virtual addresses for host mappings
* with huge pages.
* @dram_va_range: holds available virtual addresses for DRAM mappings.
* @mem_hash_lock: protects the mem_hash.
* @mmu_lock: protects the MMU page tables. Any change to the PGT, modifing the
......@@ -688,8 +683,9 @@ struct hl_ctx {
struct hl_device *hdev;
struct kref refcount;
struct dma_fence *cs_pending[HL_MAX_PENDING_CS];
struct hl_va_range host_va_range;
struct hl_va_range dram_va_range;
struct hl_va_range *host_va_range;
struct hl_va_range *host_huge_va_range;
struct hl_va_range *dram_va_range;
struct mutex mem_hash_lock;
struct mutex mmu_lock;
struct list_head debugfs_list;
......@@ -1291,6 +1287,8 @@ struct hl_device_idle_busy_ts {
* otherwise.
* @dram_supports_virtual_memory: is MMU enabled towards DRAM.
* @dram_default_page_mapping: is DRAM default page mapping enabled.
* @pmmu_huge_range: is a different virtual addresses range used for PMMU with
* huge pages.
* @init_done: is the initialization of the device done.
* @mmu_enable: is MMU enabled.
* @device_cpu_disabled: is the device CPU disabled (due to timeouts)
......@@ -1372,6 +1370,7 @@ struct hl_device {
u8 reset_on_lockup;
u8 dram_supports_virtual_memory;
u8 dram_default_page_mapping;
u8 pmmu_huge_range;
u8 init_done;
u8 device_cpu_disabled;
u8 dma_mask;
......
This diff is collapsed.
......@@ -254,6 +254,15 @@ static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr)
return phys_hop_addr + pte_offset;
}
static bool is_dram_va(struct hl_device *hdev, u64 virt_addr)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
return hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
prop->dmmu.start_addr,
prop->dmmu.end_addr);
}
static int dram_default_mapping_init(struct hl_ctx *ctx)
{
struct hl_device *hdev = ctx->hdev;
......@@ -548,6 +557,7 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr)
curr_pte;
bool is_huge, clear_hop3 = true;
/* shifts and masks are the same in PMMU and HPMMU, use one of them */
mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
hop0_addr = get_hop0_addr(ctx);
......@@ -702,26 +712,25 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
if (!hdev->mmu_enable)
return 0;
is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
prop->va_space_dram_start_address,
prop->va_space_dram_end_address);
is_dram_addr = is_dram_va(hdev, virt_addr);
mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
if (is_dram_addr)
mmu_prop = &prop->dmmu;
else if ((page_size % prop->pmmu_huge.page_size) == 0)
mmu_prop = &prop->pmmu_huge;
else
mmu_prop = &prop->pmmu;
/*
* The H/W handles mapping of specific page sizes. Hence if the page
* size is bigger, we break it to sub-pages and unmap them separately.
*/
if ((page_size % mmu_prop->huge_page_size) == 0) {
real_page_size = mmu_prop->huge_page_size;
} else if ((page_size % mmu_prop->page_size) == 0) {
if ((page_size % mmu_prop->page_size) == 0) {
real_page_size = mmu_prop->page_size;
} else {
dev_err(hdev->dev,
"page size of %u is not %uKB nor %uMB aligned, can't unmap\n",
page_size,
mmu_prop->page_size >> 10,
mmu_prop->huge_page_size >> 20);
"page size of %u is not %uKB aligned, can't unmap\n",
page_size, mmu_prop->page_size >> 10);
return -EFAULT;
}
......@@ -759,8 +768,6 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
hop4_new = false, is_huge;
int rc = -ENOMEM;
mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
/*
* This mapping function can map a page or a huge page. For huge page
* there are only 3 hops rather than 4. Currently the DRAM allocation
......@@ -768,11 +775,15 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
* one of the two page sizes. Since this is a common code for all the
* three cases, we need this hugs page check.
*/
is_huge = page_size == mmu_prop->huge_page_size;
if (is_dram_addr && !is_huge) {
dev_err(hdev->dev, "DRAM mapping should use huge pages only\n");
return -EFAULT;
if (is_dram_addr) {
mmu_prop = &prop->dmmu;
is_huge = true;
} else if (page_size == prop->pmmu_huge.page_size) {
mmu_prop = &prop->pmmu_huge;
is_huge = true;
} else {
mmu_prop = &prop->pmmu;
is_huge = false;
}
hop0_addr = get_hop0_addr(ctx);
......@@ -942,26 +953,25 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size,
if (!hdev->mmu_enable)
return 0;
is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
prop->va_space_dram_start_address,
prop->va_space_dram_end_address);
is_dram_addr = is_dram_va(hdev, virt_addr);
mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
if (is_dram_addr)
mmu_prop = &prop->dmmu;
else if ((page_size % prop->pmmu_huge.page_size) == 0)
mmu_prop = &prop->pmmu_huge;
else
mmu_prop = &prop->pmmu;
/*
* The H/W handles mapping of specific page sizes. Hence if the page
* size is bigger, we break it to sub-pages and map them separately.
*/
if ((page_size % mmu_prop->huge_page_size) == 0) {
real_page_size = mmu_prop->huge_page_size;
} else if ((page_size % mmu_prop->page_size) == 0) {
if ((page_size % mmu_prop->page_size) == 0) {
real_page_size = mmu_prop->page_size;
} else {
dev_err(hdev->dev,
"page size of %u is not %dKB nor %dMB aligned, can't unmap\n",
page_size,
mmu_prop->page_size >> 10,
mmu_prop->huge_page_size >> 20);
"page size of %u is not %uKB aligned, can't unmap\n",
page_size, mmu_prop->page_size >> 10);
return -EFAULT;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment