Commit e6128a8e authored by Ard Biesheuvel's avatar Ard Biesheuvel Committed by Catalin Marinas

arm64: mm: Use 48-bit virtual addressing for the permanent ID map

Even though we support loading kernels anywhere in 48-bit addressable
physical memory, we create the ID maps based on the number of levels
that we happened to configure for the kernel VA and user VA spaces.

The reason for this is that the PGD/PUD/PMD based classification of
translation levels, along with the associated folding when the number of
levels is less than 5, does not permit creating a page table hierarchy
of a set number of levels. This means that, for instance, on 39-bit VA
kernels we need to configure an additional level above PGD level on the
fly, and 36-bit VA kernels still only support 47-bit virtual addressing
with this trick applied.

Now that we have a separate helper to populate page table hierarchies
that does not define the levels in terms of PUDS/PMDS/etc at all, let's
reuse it to create the permanent ID map with a fixed VA size of 48 bits.
Signed-off-by: default avatarArd Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20240214122845.2033971-64-ardb+git@google.comSigned-off-by: default avatarCatalin Marinas <catalin.marinas@arm.com>
parent 97a6f43b
...@@ -35,6 +35,9 @@ ...@@ -35,6 +35,9 @@
#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS) #define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS)
#endif #endif
#define IDMAP_VA_BITS 48
#define IDMAP_LEVELS ARM64_HW_PGTABLE_LEVELS(IDMAP_VA_BITS)
#define IDMAP_ROOT_LEVEL (4 - IDMAP_LEVELS)
/* /*
* A relocatable kernel may execute from an address that differs from the one at * A relocatable kernel may execute from an address that differs from the one at
......
...@@ -729,6 +729,11 @@ SYM_FUNC_START_LOCAL(__no_granule_support) ...@@ -729,6 +729,11 @@ SYM_FUNC_START_LOCAL(__no_granule_support)
SYM_FUNC_END(__no_granule_support) SYM_FUNC_END(__no_granule_support)
SYM_FUNC_START_LOCAL(__primary_switch) SYM_FUNC_START_LOCAL(__primary_switch)
mrs x1, tcr_el1
mov x2, #64 - VA_BITS
tcr_set_t0sz x1, x2
msr tcr_el1, x1
adrp x1, reserved_pg_dir adrp x1, reserved_pg_dir
adrp x2, init_idmap_pg_dir adrp x2, init_idmap_pg_dir
bl __enable_mmu bl __enable_mmu
......
...@@ -1874,16 +1874,9 @@ int __init kvm_mmu_init(u32 *hyp_va_bits) ...@@ -1874,16 +1874,9 @@ int __init kvm_mmu_init(u32 *hyp_va_bits)
BUG_ON((hyp_idmap_start ^ (hyp_idmap_end - 1)) & PAGE_MASK); BUG_ON((hyp_idmap_start ^ (hyp_idmap_end - 1)) & PAGE_MASK);
/* /*
* The ID map may be configured to use an extended virtual address * The ID map is always configured for 48 bits of translation, which
* range. This is only the case if system RAM is out of range for the * may be fewer than the number of VA bits used by the regular kernel
* currently configured page size and VA_BITS_MIN, in which case we will * stage 1, when VA_BITS=52.
* also need the extended virtual range for the HYP ID map, or we won't
* be able to enable the EL2 MMU.
*
* However, in some cases the ID map may be configured for fewer than
* the number of VA bits used by the regular kernel stage 1. This
* happens when VA_BITS=52 and the kernel image is placed in PA space
* below 48 bits.
* *
* At EL2, there is only one TTBR register, and we can't switch between * At EL2, there is only one TTBR register, and we can't switch between
* translation tables *and* update TCR_EL2.T0SZ at the same time. Bottom * translation tables *and* update TCR_EL2.T0SZ at the same time. Bottom
...@@ -1894,7 +1887,7 @@ int __init kvm_mmu_init(u32 *hyp_va_bits) ...@@ -1894,7 +1887,7 @@ int __init kvm_mmu_init(u32 *hyp_va_bits)
* 1 VA bits to assure that the hypervisor can both ID map its code page * 1 VA bits to assure that the hypervisor can both ID map its code page
* and map any kernel memory. * and map any kernel memory.
*/ */
idmap_bits = 64 - ((idmap_t0sz & TCR_T0SZ_MASK) >> TCR_T0SZ_OFFSET); idmap_bits = IDMAP_VA_BITS;
kernel_bits = vabits_actual; kernel_bits = vabits_actual;
*hyp_va_bits = max(idmap_bits, kernel_bits); *hyp_va_bits = max(idmap_bits, kernel_bits);
......
...@@ -757,22 +757,21 @@ static void __init map_kernel(pgd_t *pgdp) ...@@ -757,22 +757,21 @@ static void __init map_kernel(pgd_t *pgdp)
kasan_copy_shadow(pgdp); kasan_copy_shadow(pgdp);
} }
void __pi_map_range(u64 *pgd, u64 start, u64 end, u64 pa, pgprot_t prot,
int level, pte_t *tbl, bool may_use_cont, u64 va_offset);
static u8 idmap_ptes[IDMAP_LEVELS - 1][PAGE_SIZE] __aligned(PAGE_SIZE) __ro_after_init,
kpti_ptes[IDMAP_LEVELS - 1][PAGE_SIZE] __aligned(PAGE_SIZE) __ro_after_init;
static void __init create_idmap(void) static void __init create_idmap(void)
{ {
u64 start = __pa_symbol(__idmap_text_start); u64 start = __pa_symbol(__idmap_text_start);
u64 size = __pa_symbol(__idmap_text_end) - start; u64 end = __pa_symbol(__idmap_text_end);
pgd_t *pgd = idmap_pg_dir; u64 ptep = __pa_symbol(idmap_ptes);
u64 pgd_phys;
__pi_map_range(&ptep, start, end, start, PAGE_KERNEL_ROX,
/* check if we need an additional level of translation */ IDMAP_ROOT_LEVEL, (pte_t *)idmap_pg_dir, false,
if (VA_BITS < 48 && idmap_t0sz < (64 - VA_BITS_MIN)) { __phys_to_virt(ptep) - ptep);
pgd_phys = early_pgtable_alloc(PAGE_SHIFT);
set_pgd(&idmap_pg_dir[start >> VA_BITS],
__pgd(pgd_phys | P4D_TYPE_TABLE));
pgd = __va(pgd_phys);
}
__create_pgd_mapping(pgd, start, start, size, PAGE_KERNEL_ROX,
early_pgtable_alloc, 0);
if (IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0)) { if (IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0)) {
extern u32 __idmap_kpti_flag; extern u32 __idmap_kpti_flag;
...@@ -782,8 +781,10 @@ static void __init create_idmap(void) ...@@ -782,8 +781,10 @@ static void __init create_idmap(void)
* The KPTI G-to-nG conversion code needs a read-write mapping * The KPTI G-to-nG conversion code needs a read-write mapping
* of its synchronization flag in the ID map. * of its synchronization flag in the ID map.
*/ */
__create_pgd_mapping(pgd, pa, pa, sizeof(u32), PAGE_KERNEL, ptep = __pa_symbol(kpti_ptes);
early_pgtable_alloc, 0); __pi_map_range(&ptep, pa, pa + sizeof(u32), pa, PAGE_KERNEL,
IDMAP_ROOT_LEVEL, (pte_t *)idmap_pg_dir, false,
__phys_to_virt(ptep) - ptep);
} }
} }
...@@ -808,6 +809,7 @@ void __init paging_init(void) ...@@ -808,6 +809,7 @@ void __init paging_init(void)
memblock_allow_resize(); memblock_allow_resize();
create_idmap(); create_idmap();
idmap_t0sz = TCR_T0SZ(IDMAP_VA_BITS);
} }
#ifdef CONFIG_MEMORY_HOTPLUG #ifdef CONFIG_MEMORY_HOTPLUG
......
...@@ -421,9 +421,9 @@ SYM_FUNC_START(__cpu_setup) ...@@ -421,9 +421,9 @@ SYM_FUNC_START(__cpu_setup)
mair .req x17 mair .req x17
tcr .req x16 tcr .req x16
mov_q mair, MAIR_EL1_SET mov_q mair, MAIR_EL1_SET
mov_q tcr, TCR_TxSZ(VA_BITS) | TCR_CACHE_FLAGS | TCR_SMP_FLAGS | \ mov_q tcr, TCR_T0SZ(IDMAP_VA_BITS) | TCR_T1SZ(VA_BITS) | TCR_CACHE_FLAGS | \
TCR_TG_FLAGS | TCR_KASLR_FLAGS | TCR_ASID16 | \ TCR_SMP_FLAGS | TCR_TG_FLAGS | TCR_KASLR_FLAGS | TCR_ASID16 | \
TCR_TBI0 | TCR_A1 | TCR_KASAN_SW_FLAGS | TCR_MTE_FLAGS TCR_TBI0 | TCR_A1 | TCR_KASAN_SW_FLAGS | TCR_MTE_FLAGS
tcr_clear_errata_bits tcr, x9, x5 tcr_clear_errata_bits tcr, x9, x5
...@@ -431,10 +431,7 @@ SYM_FUNC_START(__cpu_setup) ...@@ -431,10 +431,7 @@ SYM_FUNC_START(__cpu_setup)
sub x9, xzr, x0 sub x9, xzr, x0
add x9, x9, #64 add x9, x9, #64
tcr_set_t1sz tcr, x9 tcr_set_t1sz tcr, x9
#else
idmap_get_t0sz x9
#endif #endif
tcr_set_t0sz tcr, x9
/* /*
* Set the IPS bits in TCR_EL1. * Set the IPS bits in TCR_EL1.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment