Commit 270315b8 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'riscv-for-linus-5.10-mw0' of git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux

Pull RISC-V updates from Palmer Dabbelt:
 "A handful of cleanups and new features:

   - A handful of cleanups for our page fault handling

   - Improvements to how we fill out cacheinfo

   - Support for EFI-based systems"

* tag 'riscv-for-linus-5.10-mw0' of git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux: (22 commits)
  RISC-V: Add page table dump support for uefi
  RISC-V: Add EFI runtime services
  RISC-V: Add EFI stub support.
  RISC-V: Add PE/COFF header for EFI stub
  RISC-V: Implement late mapping page table allocation functions
  RISC-V: Add early ioremap support
  RISC-V: Move DT mapping outof fixmap
  RISC-V: Fix duplicate included thread_info.h
  riscv/mm/fault: Set FAULT_FLAG_INSTRUCTION flag in do_page_fault()
  riscv/mm/fault: Fix inline placement in vmalloc_fault() declaration
  riscv: Add cache information in AUX vector
  riscv: Define AT_VECTOR_SIZE_ARCH for ARCH_DLINFO
  riscv: Set more data to cacheinfo
  riscv/mm/fault: Move access error check to function
  riscv/mm/fault: Move FAULT_FLAG_WRITE handling in do_page_fault()
  riscv/mm/fault: Simplify mm_fault_error()
  riscv/mm/fault: Move fault error handling to mm_fault_error()
  riscv/mm/fault: Simplify fault error handling
  riscv/mm/fault: Move vmalloc fault handling to vmalloc_fault()
  riscv/mm/fault: Move bad area handling to bad_area()
  ...
parents d3876ff7 de22d210
...@@ -38,6 +38,7 @@ config RISCV ...@@ -38,6 +38,7 @@ config RISCV
select GENERIC_ARCH_TOPOLOGY if SMP select GENERIC_ARCH_TOPOLOGY if SMP
select GENERIC_ATOMIC64 if !64BIT select GENERIC_ATOMIC64 if !64BIT
select GENERIC_CLOCKEVENTS select GENERIC_CLOCKEVENTS
select GENERIC_EARLY_IOREMAP
select GENERIC_GETTIMEOFDAY if HAVE_GENERIC_VDSO select GENERIC_GETTIMEOFDAY if HAVE_GENERIC_VDSO
select GENERIC_IOREMAP select GENERIC_IOREMAP
select GENERIC_IRQ_MULTI_HANDLER select GENERIC_IRQ_MULTI_HANDLER
...@@ -388,6 +389,28 @@ config CMDLINE_FORCE ...@@ -388,6 +389,28 @@ config CMDLINE_FORCE
endchoice endchoice
config EFI_STUB
bool
config EFI
bool "UEFI runtime support"
depends on OF
select LIBFDT
select UCS2_STRING
select EFI_PARAMS_FROM_FDT
select EFI_STUB
select EFI_GENERIC_STUB
select EFI_RUNTIME_WRAPPERS
select RISCV_ISA_C
depends on MMU
default y
help
This option provides support for runtime services provided
by UEFI firmware (such as non-volatile variables, realtime
clock, and platform reset). A UEFI stub is also provided to
allow the kernel to be booted as an EFI application. This
is only useful on systems that have UEFI firmware.
endmenu endmenu
config BUILTIN_DTB config BUILTIN_DTB
...@@ -400,3 +423,5 @@ menu "Power management options" ...@@ -400,3 +423,5 @@ menu "Power management options"
source "kernel/power/Kconfig" source "kernel/power/Kconfig"
endmenu endmenu
source "drivers/firmware/Kconfig"
...@@ -80,6 +80,7 @@ head-y := arch/riscv/kernel/head.o ...@@ -80,6 +80,7 @@ head-y := arch/riscv/kernel/head.o
core-y += arch/riscv/ core-y += arch/riscv/
libs-y += arch/riscv/lib/ libs-y += arch/riscv/lib/
libs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a
PHONY += vdso_install PHONY += vdso_install
vdso_install: vdso_install:
......
...@@ -130,3 +130,4 @@ CONFIG_DEBUG_BLOCK_EXT_DEVT=y ...@@ -130,3 +130,4 @@ CONFIG_DEBUG_BLOCK_EXT_DEVT=y
# CONFIG_RUNTIME_TESTING_MENU is not set # CONFIG_RUNTIME_TESTING_MENU is not set
CONFIG_MEMTEST=y CONFIG_MEMTEST=y
# CONFIG_SYSFS_SYSCALL is not set # CONFIG_SYSFS_SYSCALL is not set
CONFIG_EFI=y
# SPDX-License-Identifier: GPL-2.0 # SPDX-License-Identifier: GPL-2.0
generic-y += early_ioremap.h
generic-y += extable.h generic-y += extable.h
generic-y += flat.h generic-y += flat.h
generic-y += kvm_para.h generic-y += kvm_para.h
......
/* SPDX-License-Identifier: GPL-2.0 */ /* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2020 SiFive
*/
#ifndef _ASM_RISCV_CACHEINFO_H #ifndef _ASM_RISCV_CACHEINFO_H
#define _ASM_RISCV_CACHEINFO_H #define _ASM_RISCV_CACHEINFO_H
...@@ -11,5 +14,7 @@ struct riscv_cacheinfo_ops { ...@@ -11,5 +14,7 @@ struct riscv_cacheinfo_ops {
}; };
void riscv_set_cacheinfo_ops(struct riscv_cacheinfo_ops *ops); void riscv_set_cacheinfo_ops(struct riscv_cacheinfo_ops *ops);
uintptr_t get_cache_size(u32 level, enum cache_type type);
uintptr_t get_cache_geometry(u32 level, enum cache_type type);
#endif /* _ASM_RISCV_CACHEINFO_H */ #endif /* _ASM_RISCV_CACHEINFO_H */
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2020 Western Digital Corporation or its affiliates.
*/
#ifndef _ASM_EFI_H
#define _ASM_EFI_H
#include <asm/csr.h>
#include <asm/io.h>
#include <asm/mmu_context.h>
#include <asm/ptrace.h>
#include <asm/tlbflush.h>
#ifdef CONFIG_EFI
extern void efi_init(void);
#else
#define efi_init()
#endif
int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md);
int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md);
#define arch_efi_call_virt_setup() efi_virtmap_load()
#define arch_efi_call_virt_teardown() efi_virtmap_unload()
#define arch_efi_call_virt(p, f, args...) p->f(args)
#define ARCH_EFI_IRQ_FLAGS_MASK (SR_IE | SR_SPIE)
/* on RISC-V, the FDT may be located anywhere in system RAM */
static inline unsigned long efi_get_max_fdt_addr(unsigned long image_addr)
{
return ULONG_MAX;
}
/* Load initrd at enough distance from DRAM start */
static inline unsigned long efi_get_max_initrd_addr(unsigned long image_addr)
{
return image_addr + SZ_256M;
}
#define alloc_screen_info(x...) (&screen_info)
static inline void free_screen_info(struct screen_info *si)
{
}
static inline void efifb_setup_from_dmi(struct screen_info *si, const char *opt)
{
}
void efi_virtmap_load(void);
void efi_virtmap_unload(void);
#endif /* _ASM_EFI_H */
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#include <uapi/asm/elf.h> #include <uapi/asm/elf.h>
#include <asm/auxvec.h> #include <asm/auxvec.h>
#include <asm/byteorder.h> #include <asm/byteorder.h>
#include <asm/cacheinfo.h>
/* /*
* These are used to set parameters in the core dumps. * These are used to set parameters in the core dumps.
...@@ -61,6 +62,18 @@ extern unsigned long elf_hwcap; ...@@ -61,6 +62,18 @@ extern unsigned long elf_hwcap;
do { \ do { \
NEW_AUX_ENT(AT_SYSINFO_EHDR, \ NEW_AUX_ENT(AT_SYSINFO_EHDR, \
(elf_addr_t)current->mm->context.vdso); \ (elf_addr_t)current->mm->context.vdso); \
NEW_AUX_ENT(AT_L1I_CACHESIZE, \
get_cache_size(1, CACHE_TYPE_INST)); \
NEW_AUX_ENT(AT_L1I_CACHEGEOMETRY, \
get_cache_geometry(1, CACHE_TYPE_INST)); \
NEW_AUX_ENT(AT_L1D_CACHESIZE, \
get_cache_size(1, CACHE_TYPE_DATA)); \
NEW_AUX_ENT(AT_L1D_CACHEGEOMETRY, \
get_cache_geometry(1, CACHE_TYPE_DATA)); \
NEW_AUX_ENT(AT_L2_CACHESIZE, \
get_cache_size(2, CACHE_TYPE_UNIFIED)); \
NEW_AUX_ENT(AT_L2_CACHEGEOMETRY, \
get_cache_geometry(2, CACHE_TYPE_UNIFIED)); \
} while (0) } while (0)
#define ARCH_HAS_SETUP_ADDITIONAL_PAGES #define ARCH_HAS_SETUP_ADDITIONAL_PAGES
struct linux_binprm; struct linux_binprm;
......
...@@ -22,14 +22,24 @@ ...@@ -22,14 +22,24 @@
*/ */
enum fixed_addresses { enum fixed_addresses {
FIX_HOLE, FIX_HOLE,
#define FIX_FDT_SIZE SZ_1M
FIX_FDT_END,
FIX_FDT = FIX_FDT_END + FIX_FDT_SIZE / PAGE_SIZE - 1,
FIX_PTE, FIX_PTE,
FIX_PMD, FIX_PMD,
FIX_TEXT_POKE1, FIX_TEXT_POKE1,
FIX_TEXT_POKE0, FIX_TEXT_POKE0,
FIX_EARLYCON_MEM_BASE, FIX_EARLYCON_MEM_BASE,
__end_of_permanent_fixed_addresses,
/*
* Temporary boot-time mappings, used by early_ioremap(),
* before ioremap() is functional.
*/
#define NR_FIX_BTMAPS (SZ_256K / PAGE_SIZE)
#define FIX_BTMAPS_SLOTS 7
#define TOTAL_FIX_BTMAPS (NR_FIX_BTMAPS * FIX_BTMAPS_SLOTS)
FIX_BTMAP_END = __end_of_permanent_fixed_addresses,
FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1,
__end_of_fixed_addresses __end_of_fixed_addresses
}; };
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include <linux/types.h> #include <linux/types.h>
#include <linux/pgtable.h> #include <linux/pgtable.h>
#include <asm/mmiowb.h> #include <asm/mmiowb.h>
#include <asm/early_ioremap.h>
/* /*
* MMIO access functions are separated out to break dependency cycles * MMIO access functions are separated out to break dependency cycles
......
...@@ -20,6 +20,8 @@ typedef struct { ...@@ -20,6 +20,8 @@ typedef struct {
#endif #endif
} mm_context_t; } mm_context_t;
void __init create_pgd_mapping(pgd_t *pgdp, uintptr_t va, phys_addr_t pa,
phys_addr_t sz, pgprot_t prot);
#endif /* __ASSEMBLY__ */ #endif /* __ASSEMBLY__ */
#endif /* _ASM_RISCV_MMU_H */ #endif /* _ASM_RISCV_MMU_H */
...@@ -100,6 +100,10 @@ ...@@ -100,6 +100,10 @@
#define PAGE_KERNEL __pgprot(_PAGE_KERNEL) #define PAGE_KERNEL __pgprot(_PAGE_KERNEL)
#define PAGE_KERNEL_EXEC __pgprot(_PAGE_KERNEL | _PAGE_EXEC) #define PAGE_KERNEL_EXEC __pgprot(_PAGE_KERNEL | _PAGE_EXEC)
#define PAGE_KERNEL_READ __pgprot(_PAGE_KERNEL & ~_PAGE_WRITE)
#define PAGE_KERNEL_EXEC __pgprot(_PAGE_KERNEL | _PAGE_EXEC)
#define PAGE_KERNEL_READ_EXEC __pgprot((_PAGE_KERNEL & ~_PAGE_WRITE) \
| _PAGE_EXEC)
#define PAGE_TABLE __pgprot(_PAGE_TABLE) #define PAGE_TABLE __pgprot(_PAGE_TABLE)
...@@ -464,6 +468,7 @@ static inline void __kernel_map_pages(struct page *page, int numpages, int enabl ...@@ -464,6 +468,7 @@ static inline void __kernel_map_pages(struct page *page, int numpages, int enabl
#define kern_addr_valid(addr) (1) /* FIXME */ #define kern_addr_valid(addr) (1) /* FIXME */
extern void *dtb_early_va; extern void *dtb_early_va;
extern uintptr_t dtb_early_pa;
void setup_bootmem(void); void setup_bootmem(void);
void paging_init(void); void paging_init(void);
......
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2020 Western Digital Corporation or its affiliates.
*/
#ifndef __ASM_SECTIONS_H
#define __ASM_SECTIONS_H
#include <asm-generic/sections.h>
extern char _start[];
extern char _start_kernel[];
#endif /* __ASM_SECTIONS_H */
...@@ -10,4 +10,28 @@ ...@@ -10,4 +10,28 @@
/* vDSO location */ /* vDSO location */
#define AT_SYSINFO_EHDR 33 #define AT_SYSINFO_EHDR 33
/*
* The set of entries below represent more extensive information
* about the caches, in the form of two entry per cache type,
* one entry containing the cache size in bytes, and the other
* containing the cache line size in bytes in the bottom 16 bits
* and the cache associativity in the next 16 bits.
*
* The associativity is such that if N is the 16-bit value, the
* cache is N way set associative. A value if 0xffff means fully
* associative, a value of 1 means directly mapped.
*
* For all these fields, a value of 0 means that the information
* is not known.
*/
#define AT_L1I_CACHESIZE 40
#define AT_L1I_CACHEGEOMETRY 41
#define AT_L1D_CACHESIZE 42
#define AT_L1D_CACHEGEOMETRY 43
#define AT_L2_CACHESIZE 44
#define AT_L2_CACHEGEOMETRY 45
/* entries in ARCH_DLINFO */
#define AT_VECTOR_SIZE_ARCH 7
#endif /* _UAPI_ASM_RISCV_AUXVEC_H */ #endif /* _UAPI_ASM_RISCV_AUXVEC_H */
...@@ -55,4 +55,6 @@ obj-$(CONFIG_KGDB) += kgdb.o ...@@ -55,4 +55,6 @@ obj-$(CONFIG_KGDB) += kgdb.o
obj-$(CONFIG_JUMP_LABEL) += jump_label.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o
obj-$(CONFIG_EFI) += efi.o
clean: clean:
...@@ -3,7 +3,6 @@ ...@@ -3,7 +3,6 @@
* Copyright (C) 2017 SiFive * Copyright (C) 2017 SiFive
*/ */
#include <linux/cacheinfo.h>
#include <linux/cpu.h> #include <linux/cpu.h>
#include <linux/of.h> #include <linux/of.h>
#include <linux/of_device.h> #include <linux/of_device.h>
...@@ -25,12 +24,84 @@ cache_get_priv_group(struct cacheinfo *this_leaf) ...@@ -25,12 +24,84 @@ cache_get_priv_group(struct cacheinfo *this_leaf)
return NULL; return NULL;
} }
static void ci_leaf_init(struct cacheinfo *this_leaf, static struct cacheinfo *get_cacheinfo(u32 level, enum cache_type type)
struct device_node *node, {
enum cache_type type, unsigned int level) struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(smp_processor_id());
struct cacheinfo *this_leaf;
int index;
for (index = 0; index < this_cpu_ci->num_leaves; index++) {
this_leaf = this_cpu_ci->info_list + index;
if (this_leaf->level == level && this_leaf->type == type)
return this_leaf;
}
return NULL;
}
uintptr_t get_cache_size(u32 level, enum cache_type type)
{
struct cacheinfo *this_leaf = get_cacheinfo(level, type);
return this_leaf ? this_leaf->size : 0;
}
uintptr_t get_cache_geometry(u32 level, enum cache_type type)
{
struct cacheinfo *this_leaf = get_cacheinfo(level, type);
return this_leaf ? (this_leaf->ways_of_associativity << 16 |
this_leaf->coherency_line_size) :
0;
}
static void ci_leaf_init(struct cacheinfo *this_leaf, enum cache_type type,
unsigned int level, unsigned int size,
unsigned int sets, unsigned int line_size)
{ {
this_leaf->level = level; this_leaf->level = level;
this_leaf->type = type; this_leaf->type = type;
this_leaf->size = size;
this_leaf->number_of_sets = sets;
this_leaf->coherency_line_size = line_size;
/*
* If the cache is fully associative, there is no need to
* check the other properties.
*/
if (sets == 1)
return;
/*
* Set the ways number for n-ways associative, make sure
* all properties are big than zero.
*/
if (sets > 0 && size > 0 && line_size > 0)
this_leaf->ways_of_associativity = (size / sets) / line_size;
}
static void fill_cacheinfo(struct cacheinfo **this_leaf,
struct device_node *node, unsigned int level)
{
unsigned int size, sets, line_size;
if (!of_property_read_u32(node, "cache-size", &size) &&
!of_property_read_u32(node, "cache-block-size", &line_size) &&
!of_property_read_u32(node, "cache-sets", &sets)) {
ci_leaf_init((*this_leaf)++, CACHE_TYPE_UNIFIED, level, size, sets, line_size);
}
if (!of_property_read_u32(node, "i-cache-size", &size) &&
!of_property_read_u32(node, "i-cache-sets", &sets) &&
!of_property_read_u32(node, "i-cache-block-size", &line_size)) {
ci_leaf_init((*this_leaf)++, CACHE_TYPE_INST, level, size, sets, line_size);
}
if (!of_property_read_u32(node, "d-cache-size", &size) &&
!of_property_read_u32(node, "d-cache-sets", &sets) &&
!of_property_read_u32(node, "d-cache-block-size", &line_size)) {
ci_leaf_init((*this_leaf)++, CACHE_TYPE_DATA, level, size, sets, line_size);
}
} }
static int __init_cache_level(unsigned int cpu) static int __init_cache_level(unsigned int cpu)
...@@ -83,29 +154,24 @@ static int __populate_cache_leaves(unsigned int cpu) ...@@ -83,29 +154,24 @@ static int __populate_cache_leaves(unsigned int cpu)
struct device_node *prev = NULL; struct device_node *prev = NULL;
int levels = 1, level = 1; int levels = 1, level = 1;
if (of_property_read_bool(np, "cache-size")) /* Level 1 caches in cpu node */
ci_leaf_init(this_leaf++, np, CACHE_TYPE_UNIFIED, level); fill_cacheinfo(&this_leaf, np, level);
if (of_property_read_bool(np, "i-cache-size"))
ci_leaf_init(this_leaf++, np, CACHE_TYPE_INST, level);
if (of_property_read_bool(np, "d-cache-size"))
ci_leaf_init(this_leaf++, np, CACHE_TYPE_DATA, level);
/* Next level caches in cache nodes */
prev = np; prev = np;
while ((np = of_find_next_cache_node(np))) { while ((np = of_find_next_cache_node(np))) {
of_node_put(prev); of_node_put(prev);
prev = np; prev = np;
if (!of_device_is_compatible(np, "cache")) if (!of_device_is_compatible(np, "cache"))
break; break;
if (of_property_read_u32(np, "cache-level", &level)) if (of_property_read_u32(np, "cache-level", &level))
break; break;
if (level <= levels) if (level <= levels)
break; break;
if (of_property_read_bool(np, "cache-size"))
ci_leaf_init(this_leaf++, np, CACHE_TYPE_UNIFIED, level); fill_cacheinfo(&this_leaf, np, level);
if (of_property_read_bool(np, "i-cache-size"))
ci_leaf_init(this_leaf++, np, CACHE_TYPE_INST, level);
if (of_property_read_bool(np, "d-cache-size"))
ci_leaf_init(this_leaf++, np, CACHE_TYPE_DATA, level);
levels = level; levels = level;
} }
of_node_put(np); of_node_put(np);
......
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2020 Western Digital Corporation or its affiliates.
* Adapted from arch/arm64/kernel/efi-header.S
*/
#include <linux/pe.h>
#include <linux/sizes.h>
.macro __EFI_PE_HEADER
.long PE_MAGIC
coff_header:
#ifdef CONFIG_64BIT
.short IMAGE_FILE_MACHINE_RISCV64 // Machine
#else
.short IMAGE_FILE_MACHINE_RISCV32 // Machine
#endif
.short section_count // NumberOfSections
.long 0 // TimeDateStamp
.long 0 // PointerToSymbolTable
.long 0 // NumberOfSymbols
.short section_table - optional_header // SizeOfOptionalHeader
.short IMAGE_FILE_DEBUG_STRIPPED | \
IMAGE_FILE_EXECUTABLE_IMAGE | \
IMAGE_FILE_LINE_NUMS_STRIPPED // Characteristics
optional_header:
#ifdef CONFIG_64BIT
.short PE_OPT_MAGIC_PE32PLUS // PE32+ format
#else
.short PE_OPT_MAGIC_PE32 // PE32 format
#endif
.byte 0x02 // MajorLinkerVersion
.byte 0x14 // MinorLinkerVersion
.long __pecoff_text_end - efi_header_end // SizeOfCode
.long __pecoff_data_virt_size // SizeOfInitializedData
.long 0 // SizeOfUninitializedData
.long __efistub_efi_pe_entry - _start // AddressOfEntryPoint
.long efi_header_end - _start // BaseOfCode
#ifdef CONFIG_32BIT
.long __pecoff_text_end - _start // BaseOfData
#endif
extra_header_fields:
.quad 0 // ImageBase
.long PECOFF_SECTION_ALIGNMENT // SectionAlignment
.long PECOFF_FILE_ALIGNMENT // FileAlignment
.short 0 // MajorOperatingSystemVersion
.short 0 // MinorOperatingSystemVersion
.short LINUX_EFISTUB_MAJOR_VERSION // MajorImageVersion
.short LINUX_EFISTUB_MINOR_VERSION // MinorImageVersion
.short 0 // MajorSubsystemVersion
.short 0 // MinorSubsystemVersion
.long 0 // Win32VersionValue
.long _end - _start // SizeOfImage
// Everything before the kernel image is considered part of the header
.long efi_header_end - _start // SizeOfHeaders
.long 0 // CheckSum
.short IMAGE_SUBSYSTEM_EFI_APPLICATION // Subsystem
.short 0 // DllCharacteristics
.quad 0 // SizeOfStackReserve
.quad 0 // SizeOfStackCommit
.quad 0 // SizeOfHeapReserve
.quad 0 // SizeOfHeapCommit
.long 0 // LoaderFlags
.long (section_table - .) / 8 // NumberOfRvaAndSizes
.quad 0 // ExportTable
.quad 0 // ImportTable
.quad 0 // ResourceTable
.quad 0 // ExceptionTable
.quad 0 // CertificationTable
.quad 0 // BaseRelocationTable
// Section table
section_table:
.ascii ".text\0\0\0"
.long __pecoff_text_end - efi_header_end // VirtualSize
.long efi_header_end - _start // VirtualAddress
.long __pecoff_text_end - efi_header_end // SizeOfRawData
.long efi_header_end - _start // PointerToRawData
.long 0 // PointerToRelocations
.long 0 // PointerToLineNumbers
.short 0 // NumberOfRelocations
.short 0 // NumberOfLineNumbers
.long IMAGE_SCN_CNT_CODE | \
IMAGE_SCN_MEM_READ | \
IMAGE_SCN_MEM_EXECUTE // Characteristics
.ascii ".data\0\0\0"
.long __pecoff_data_virt_size // VirtualSize
.long __pecoff_text_end - _start // VirtualAddress
.long __pecoff_data_raw_size // SizeOfRawData
.long __pecoff_text_end - _start // PointerToRawData
.long 0 // PointerToRelocations
.long 0 // PointerToLineNumbers
.short 0 // NumberOfRelocations
.short 0 // NumberOfLineNumbers
.long IMAGE_SCN_CNT_INITIALIZED_DATA | \
IMAGE_SCN_MEM_READ | \
IMAGE_SCN_MEM_WRITE // Characteristics
.set section_count, (. - section_table) / 40
.balign 0x1000
efi_header_end:
.endm
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2020 Western Digital Corporation or its affiliates.
* Adapted from arch/arm64/kernel/efi.c
*/
#include <linux/efi.h>
#include <linux/init.h>
#include <asm/efi.h>
#include <asm/pgtable.h>
#include <asm/pgtable-bits.h>
/*
* Only regions of type EFI_RUNTIME_SERVICES_CODE need to be
* executable, everything else can be mapped with the XN bits
* set. Also take the new (optional) RO/XP bits into account.
*/
static __init pgprot_t efimem_to_pgprot_map(efi_memory_desc_t *md)
{
u64 attr = md->attribute;
u32 type = md->type;
if (type == EFI_MEMORY_MAPPED_IO)
return PAGE_KERNEL;
/* R-- */
if ((attr & (EFI_MEMORY_XP | EFI_MEMORY_RO)) ==
(EFI_MEMORY_XP | EFI_MEMORY_RO))
return PAGE_KERNEL_READ;
/* R-X */
if (attr & EFI_MEMORY_RO)
return PAGE_KERNEL_READ_EXEC;
/* RW- */
if (((attr & (EFI_MEMORY_RP | EFI_MEMORY_WP | EFI_MEMORY_XP)) ==
EFI_MEMORY_XP) ||
type != EFI_RUNTIME_SERVICES_CODE)
return PAGE_KERNEL;
/* RWX */
return PAGE_KERNEL_EXEC;
}
int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md)
{
pgprot_t prot = __pgprot(pgprot_val(efimem_to_pgprot_map(md)) &
~(_PAGE_GLOBAL));
int i;
/* RISC-V maps one page at a time */
for (i = 0; i < md->num_pages; i++)
create_pgd_mapping(mm->pgd, md->virt_addr + i * PAGE_SIZE,
md->phys_addr + i * PAGE_SIZE,
PAGE_SIZE, prot);
return 0;
}
static int __init set_permissions(pte_t *ptep, unsigned long addr, void *data)
{
efi_memory_desc_t *md = data;
pte_t pte = READ_ONCE(*ptep);
unsigned long val;
if (md->attribute & EFI_MEMORY_RO) {
val = pte_val(pte) & ~_PAGE_WRITE;
val = pte_val(pte) | _PAGE_READ;
pte = __pte(val);
}
if (md->attribute & EFI_MEMORY_XP) {
val = pte_val(pte) & ~_PAGE_EXEC;
pte = __pte(val);
}
set_pte(ptep, pte);
return 0;
}
int __init efi_set_mapping_permissions(struct mm_struct *mm,
efi_memory_desc_t *md)
{
BUG_ON(md->type != EFI_RUNTIME_SERVICES_CODE &&
md->type != EFI_RUNTIME_SERVICES_DATA);
/*
* Calling apply_to_page_range() is only safe on regions that are
* guaranteed to be mapped down to pages. Since we are only called
* for regions that have been mapped using efi_create_mapping() above
* (and this is checked by the generic Memory Attributes table parsing
* routines), there is no need to check that again here.
*/
return apply_to_page_range(mm, md->virt_addr,
md->num_pages << EFI_PAGE_SHIFT,
set_permissions, md);
}
...@@ -3,7 +3,6 @@ ...@@ -3,7 +3,6 @@
* Copyright (C) 2012 Regents of the University of California * Copyright (C) 2012 Regents of the University of California
*/ */
#include <asm/thread_info.h>
#include <asm/asm-offsets.h> #include <asm/asm-offsets.h>
#include <asm/asm.h> #include <asm/asm.h>
#include <linux/init.h> #include <linux/init.h>
...@@ -13,6 +12,7 @@ ...@@ -13,6 +12,7 @@
#include <asm/csr.h> #include <asm/csr.h>
#include <asm/hwcap.h> #include <asm/hwcap.h>
#include <asm/image.h> #include <asm/image.h>
#include "efi-header.S"
__HEAD __HEAD
ENTRY(_start) ENTRY(_start)
...@@ -22,10 +22,18 @@ ENTRY(_start) ...@@ -22,10 +22,18 @@ ENTRY(_start)
* Do not modify it without modifying the structure and all bootloaders * Do not modify it without modifying the structure and all bootloaders
* that expects this header format!! * that expects this header format!!
*/ */
#ifdef CONFIG_EFI
/*
* This instruction decodes to "MZ" ASCII required by UEFI.
*/
c.li s4,-13
j _start_kernel
#else
/* jump to start kernel */ /* jump to start kernel */
j _start_kernel j _start_kernel
/* reserved */ /* reserved */
.word 0 .word 0
#endif
.balign 8 .balign 8
#if __riscv_xlen == 64 #if __riscv_xlen == 64
/* Image load offset(2MB) from start of RAM */ /* Image load offset(2MB) from start of RAM */
...@@ -43,7 +51,14 @@ ENTRY(_start) ...@@ -43,7 +51,14 @@ ENTRY(_start)
.ascii RISCV_IMAGE_MAGIC .ascii RISCV_IMAGE_MAGIC
.balign 4 .balign 4
.ascii RISCV_IMAGE_MAGIC2 .ascii RISCV_IMAGE_MAGIC2
#ifdef CONFIG_EFI
.word pe_head_start - _start
pe_head_start:
__EFI_PE_HEADER
#else
.word 0 .word 0
#endif
.align 2 .align 2
#ifdef CONFIG_MMU #ifdef CONFIG_MMU
...@@ -259,7 +274,6 @@ clear_bss_done: ...@@ -259,7 +274,6 @@ clear_bss_done:
#endif #endif
/* Start the kernel */ /* Start the kernel */
call soc_early_init call soc_early_init
call parse_dtb
tail start_kernel tail start_kernel
.Lsecondary_start: .Lsecondary_start:
......
...@@ -16,6 +16,4 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa); ...@@ -16,6 +16,4 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa);
extern void *__cpu_up_stack_pointer[]; extern void *__cpu_up_stack_pointer[];
extern void *__cpu_up_task_pointer[]; extern void *__cpu_up_task_pointer[];
void __init parse_dtb(void);
#endif /* __ASM_HEAD_H */ #endif /* __ASM_HEAD_H */
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2020 Western Digital Corporation or its affiliates.
* Linker script variables to be set after section resolution, as
* ld.lld does not like variables assigned before SECTIONS is processed.
* Based on arch/arm64/kerne/image-vars.h
*/
#ifndef __RISCV_KERNEL_IMAGE_VARS_H
#define __RISCV_KERNEL_IMAGE_VARS_H
#ifndef LINKER_SCRIPT
#error This file should only be included in vmlinux.lds.S
#endif
#ifdef CONFIG_EFI
/*
* The EFI stub has its own symbol namespace prefixed by __efistub_, to
* isolate it from the kernel proper. The following symbols are legally
* accessed by the stub, so provide some aliases to make them accessible.
* Only include data symbols here, or text symbols of functions that are
* guaranteed to be safe when executed at another offset than they were
* linked at. The routines below are all implemented in assembler in a
* position independent manner
*/
__efistub_memcmp = memcmp;
__efistub_memchr = memchr;
__efistub_memcpy = memcpy;
__efistub_memmove = memmove;
__efistub_memset = memset;
__efistub_strlen = strlen;
__efistub_strnlen = strnlen;
__efistub_strcmp = strcmp;
__efistub_strncmp = strncmp;
__efistub_strrchr = strrchr;
#ifdef CONFIG_KASAN
__efistub___memcpy = memcpy;
__efistub___memmove = memmove;
__efistub___memset = memset;
#endif
__efistub__start = _start;
__efistub__start_kernel = _start_kernel;
__efistub__end = _end;
__efistub__edata = _edata;
__efistub_screen_info = screen_info;
#endif
#endif /* __RISCV_KERNEL_IMAGE_VARS_H */
...@@ -17,19 +17,22 @@ ...@@ -17,19 +17,22 @@
#include <linux/sched/task.h> #include <linux/sched/task.h>
#include <linux/swiotlb.h> #include <linux/swiotlb.h>
#include <linux/smp.h> #include <linux/smp.h>
#include <linux/efi.h>
#include <asm/cpu_ops.h> #include <asm/cpu_ops.h>
#include <asm/early_ioremap.h>
#include <asm/setup.h> #include <asm/setup.h>
#include <asm/sections.h> #include <asm/sections.h>
#include <asm/sbi.h> #include <asm/sbi.h>
#include <asm/tlbflush.h> #include <asm/tlbflush.h>
#include <asm/thread_info.h> #include <asm/thread_info.h>
#include <asm/kasan.h> #include <asm/kasan.h>
#include <asm/efi.h>
#include "head.h" #include "head.h"
#ifdef CONFIG_DUMMY_CONSOLE #if defined(CONFIG_DUMMY_CONSOLE) || defined(CONFIG_EFI)
struct screen_info screen_info = { struct screen_info screen_info __section(.data) = {
.orig_video_lines = 30, .orig_video_lines = 30,
.orig_video_cols = 80, .orig_video_cols = 80,
.orig_video_mode = 0, .orig_video_mode = 0,
...@@ -48,8 +51,9 @@ atomic_t hart_lottery __section(.sdata); ...@@ -48,8 +51,9 @@ atomic_t hart_lottery __section(.sdata);
unsigned long boot_cpu_hartid; unsigned long boot_cpu_hartid;
static DEFINE_PER_CPU(struct cpu, cpu_devices); static DEFINE_PER_CPU(struct cpu, cpu_devices);
void __init parse_dtb(void) static void __init parse_dtb(void)
{ {
/* Early scan of device tree from init memory */
if (early_init_dt_scan(dtb_early_va)) if (early_init_dt_scan(dtb_early_va))
return; return;
...@@ -62,6 +66,7 @@ void __init parse_dtb(void) ...@@ -62,6 +66,7 @@ void __init parse_dtb(void)
void __init setup_arch(char **cmdline_p) void __init setup_arch(char **cmdline_p)
{ {
parse_dtb();
init_mm.start_code = (unsigned long) _stext; init_mm.start_code = (unsigned long) _stext;
init_mm.end_code = (unsigned long) _etext; init_mm.end_code = (unsigned long) _etext;
init_mm.end_data = (unsigned long) _edata; init_mm.end_data = (unsigned long) _edata;
...@@ -69,14 +74,19 @@ void __init setup_arch(char **cmdline_p) ...@@ -69,14 +74,19 @@ void __init setup_arch(char **cmdline_p)
*cmdline_p = boot_command_line; *cmdline_p = boot_command_line;
early_ioremap_setup();
parse_early_param(); parse_early_param();
efi_init();
setup_bootmem(); setup_bootmem();
paging_init(); paging_init();
#if IS_ENABLED(CONFIG_BUILTIN_DTB) #if IS_ENABLED(CONFIG_BUILTIN_DTB)
unflatten_and_copy_device_tree(); unflatten_and_copy_device_tree();
#else #else
unflatten_device_tree(); if (early_init_dt_verify(__va(dtb_early_pa)))
unflatten_device_tree();
else
pr_err("No DTB found in kernel mappings\n");
#endif #endif
#ifdef CONFIG_SWIOTLB #ifdef CONFIG_SWIOTLB
......
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
#include <asm/cache.h> #include <asm/cache.h>
#include <asm/thread_info.h> #include <asm/thread_info.h>
#include <asm/set_memory.h> #include <asm/set_memory.h>
#include "image-vars.h"
#include <linux/sizes.h> #include <linux/sizes.h>
OUTPUT_ARCH(riscv) OUTPUT_ARCH(riscv)
...@@ -17,6 +18,9 @@ ENTRY(_start) ...@@ -17,6 +18,9 @@ ENTRY(_start)
jiffies = jiffies_64; jiffies = jiffies_64;
PECOFF_SECTION_ALIGNMENT = 0x1000;
PECOFF_FILE_ALIGNMENT = 0x200;
SECTIONS SECTIONS
{ {
/* Beginning of code and text segment */ /* Beginning of code and text segment */
...@@ -66,6 +70,11 @@ SECTIONS ...@@ -66,6 +70,11 @@ SECTIONS
_etext = .; _etext = .;
} }
#ifdef CONFIG_EFI
. = ALIGN(PECOFF_SECTION_ALIGNMENT);
__pecoff_text_end = .;
#endif
INIT_DATA_SECTION(16) INIT_DATA_SECTION(16)
/* Start of data section */ /* Start of data section */
...@@ -84,16 +93,26 @@ SECTIONS ...@@ -84,16 +93,26 @@ SECTIONS
.sdata : { .sdata : {
__global_pointer$ = . + 0x800; __global_pointer$ = . + 0x800;
*(.sdata*) *(.sdata*)
/* End of data section */
_edata = .;
} }
#ifdef CONFIG_EFI
.pecoff_edata_padding : { BYTE(0); . = ALIGN(PECOFF_FILE_ALIGNMENT); }
__pecoff_data_raw_size = ABSOLUTE(. - __pecoff_text_end);
#endif
/* End of data section */
_edata = .;
BSS_SECTION(PAGE_SIZE, PAGE_SIZE, 0) BSS_SECTION(PAGE_SIZE, PAGE_SIZE, 0)
.rel.dyn : { .rel.dyn : {
*(.rel.dyn*) *(.rel.dyn*)
} }
#ifdef CONFIG_EFI
. = ALIGN(PECOFF_SECTION_ALIGNMENT);
__pecoff_data_virt_size = ABSOLUTE(. - __pecoff_text_end);
#endif
_end = .; _end = .;
STABS_DEBUG STABS_DEBUG
......
...@@ -19,6 +19,167 @@ ...@@ -19,6 +19,167 @@
#include "../kernel/head.h" #include "../kernel/head.h"
static inline void no_context(struct pt_regs *regs, unsigned long addr)
{
/* Are we prepared to handle this kernel fault? */
if (fixup_exception(regs))
return;
/*
* Oops. The kernel tried to access some bad page. We'll have to
* terminate things with extreme prejudice.
*/
bust_spinlocks(1);
pr_alert("Unable to handle kernel %s at virtual address " REG_FMT "\n",
(addr < PAGE_SIZE) ? "NULL pointer dereference" :
"paging request", addr);
die(regs, "Oops");
do_exit(SIGKILL);
}
static inline void mm_fault_error(struct pt_regs *regs, unsigned long addr, vm_fault_t fault)
{
if (fault & VM_FAULT_OOM) {
/*
* We ran out of memory, call the OOM killer, and return the userspace
* (which will retry the fault, or kill us if we got oom-killed).
*/
if (!user_mode(regs)) {
no_context(regs, addr);
return;
}
pagefault_out_of_memory();
return;
} else if (fault & VM_FAULT_SIGBUS) {
/* Kernel mode? Handle exceptions or die */
if (!user_mode(regs)) {
no_context(regs, addr);
return;
}
do_trap(regs, SIGBUS, BUS_ADRERR, addr);
return;
}
BUG();
}
static inline void bad_area(struct pt_regs *regs, struct mm_struct *mm, int code, unsigned long addr)
{
/*
* Something tried to access memory that isn't in our memory map.
* Fix it, but check if it's kernel or user first.
*/
mmap_read_unlock(mm);
/* User mode accesses just cause a SIGSEGV */
if (user_mode(regs)) {
do_trap(regs, SIGSEGV, code, addr);
return;
}
no_context(regs, addr);
}
static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long addr)
{
pgd_t *pgd, *pgd_k;
pud_t *pud, *pud_k;
p4d_t *p4d, *p4d_k;
pmd_t *pmd, *pmd_k;
pte_t *pte_k;
int index;
/* User mode accesses just cause a SIGSEGV */
if (user_mode(regs))
return do_trap(regs, SIGSEGV, code, addr);
/*
* Synchronize this task's top level page-table
* with the 'reference' page table.
*
* Do _not_ use "tsk->active_mm->pgd" here.
* We might be inside an interrupt in the middle
* of a task switch.
*/
index = pgd_index(addr);
pgd = (pgd_t *)pfn_to_virt(csr_read(CSR_SATP)) + index;
pgd_k = init_mm.pgd + index;
if (!pgd_present(*pgd_k)) {
no_context(regs, addr);
return;
}
set_pgd(pgd, *pgd_k);
p4d = p4d_offset(pgd, addr);
p4d_k = p4d_offset(pgd_k, addr);
if (!p4d_present(*p4d_k)) {
no_context(regs, addr);
return;
}
pud = pud_offset(p4d, addr);
pud_k = pud_offset(p4d_k, addr);
if (!pud_present(*pud_k)) {
no_context(regs, addr);
return;
}
/*
* Since the vmalloc area is global, it is unnecessary
* to copy individual PTEs
*/
pmd = pmd_offset(pud, addr);
pmd_k = pmd_offset(pud_k, addr);
if (!pmd_present(*pmd_k)) {
no_context(regs, addr);
return;
}
set_pmd(pmd, *pmd_k);
/*
* Make sure the actual PTE exists as well to
* catch kernel vmalloc-area accesses to non-mapped
* addresses. If we don't do this, this will just
* silently loop forever.
*/
pte_k = pte_offset_kernel(pmd_k, addr);
if (!pte_present(*pte_k)) {
no_context(regs, addr);
return;
}
/*
* The kernel assumes that TLBs don't cache invalid
* entries, but in RISC-V, SFENCE.VMA specifies an
* ordering constraint, not a cache flush; it is
* necessary even after writing invalid entries.
*/
local_flush_tlb_page(addr);
}
static inline bool access_error(unsigned long cause, struct vm_area_struct *vma)
{
switch (cause) {
case EXC_INST_PAGE_FAULT:
if (!(vma->vm_flags & VM_EXEC)) {
return true;
}
break;
case EXC_LOAD_PAGE_FAULT:
if (!(vma->vm_flags & VM_READ)) {
return true;
}
break;
case EXC_STORE_PAGE_FAULT:
if (!(vma->vm_flags & VM_WRITE)) {
return true;
}
break;
default:
panic("%s: unhandled cause %lu", __func__, cause);
}
return false;
}
/* /*
* This routine handles page faults. It determines the address and the * This routine handles page faults. It determines the address and the
* problem, and then passes it off to one of the appropriate routines. * problem, and then passes it off to one of the appropriate routines.
...@@ -48,8 +209,10 @@ asmlinkage void do_page_fault(struct pt_regs *regs) ...@@ -48,8 +209,10 @@ asmlinkage void do_page_fault(struct pt_regs *regs)
* only copy the information from the master page table, * only copy the information from the master page table,
* nothing more. * nothing more.
*/ */
if (unlikely((addr >= VMALLOC_START) && (addr <= VMALLOC_END))) if (unlikely((addr >= VMALLOC_START) && (addr <= VMALLOC_END))) {
goto vmalloc_fault; vmalloc_fault(regs, code, addr);
return;
}
/* Enable interrupts if they were enabled in the parent context. */ /* Enable interrupts if they were enabled in the parent context. */
if (likely(regs->status & SR_PIE)) if (likely(regs->status & SR_PIE))
...@@ -59,25 +222,37 @@ asmlinkage void do_page_fault(struct pt_regs *regs) ...@@ -59,25 +222,37 @@ asmlinkage void do_page_fault(struct pt_regs *regs)
* If we're in an interrupt, have no user context, or are running * If we're in an interrupt, have no user context, or are running
* in an atomic region, then we must not take the fault. * in an atomic region, then we must not take the fault.
*/ */
if (unlikely(faulthandler_disabled() || !mm)) if (unlikely(faulthandler_disabled() || !mm)) {
goto no_context; no_context(regs, addr);
return;
}
if (user_mode(regs)) if (user_mode(regs))
flags |= FAULT_FLAG_USER; flags |= FAULT_FLAG_USER;
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
if (cause == EXC_STORE_PAGE_FAULT)
flags |= FAULT_FLAG_WRITE;
else if (cause == EXC_INST_PAGE_FAULT)
flags |= FAULT_FLAG_INSTRUCTION;
retry: retry:
mmap_read_lock(mm); mmap_read_lock(mm);
vma = find_vma(mm, addr); vma = find_vma(mm, addr);
if (unlikely(!vma)) if (unlikely(!vma)) {
goto bad_area; bad_area(regs, mm, code, addr);
return;
}
if (likely(vma->vm_start <= addr)) if (likely(vma->vm_start <= addr))
goto good_area; goto good_area;
if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) {
goto bad_area; bad_area(regs, mm, code, addr);
if (unlikely(expand_stack(vma, addr))) return;
goto bad_area; }
if (unlikely(expand_stack(vma, addr))) {
bad_area(regs, mm, code, addr);
return;
}
/* /*
* Ok, we have a good vm_area for this memory access, so * Ok, we have a good vm_area for this memory access, so
...@@ -86,22 +261,9 @@ asmlinkage void do_page_fault(struct pt_regs *regs) ...@@ -86,22 +261,9 @@ asmlinkage void do_page_fault(struct pt_regs *regs)
good_area: good_area:
code = SEGV_ACCERR; code = SEGV_ACCERR;
switch (cause) { if (unlikely(access_error(cause, vma))) {
case EXC_INST_PAGE_FAULT: bad_area(regs, mm, code, addr);
if (!(vma->vm_flags & VM_EXEC)) return;
goto bad_area;
break;
case EXC_LOAD_PAGE_FAULT:
if (!(vma->vm_flags & VM_READ))
goto bad_area;
break;
case EXC_STORE_PAGE_FAULT:
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
flags |= FAULT_FLAG_WRITE;
break;
default:
panic("%s: unhandled cause %lu", __func__, cause);
} }
/* /*
...@@ -119,144 +281,22 @@ asmlinkage void do_page_fault(struct pt_regs *regs) ...@@ -119,144 +281,22 @@ asmlinkage void do_page_fault(struct pt_regs *regs)
if (fault_signal_pending(fault, regs)) if (fault_signal_pending(fault, regs))
return; return;
if (unlikely(fault & VM_FAULT_ERROR)) { if (unlikely((fault & VM_FAULT_RETRY) && (flags & FAULT_FLAG_ALLOW_RETRY))) {
if (fault & VM_FAULT_OOM) flags |= FAULT_FLAG_TRIED;
goto out_of_memory;
else if (fault & VM_FAULT_SIGBUS)
goto do_sigbus;
BUG();
}
if (flags & FAULT_FLAG_ALLOW_RETRY) {
if (fault & VM_FAULT_RETRY) {
flags |= FAULT_FLAG_TRIED;
/* /*
* No need to mmap_read_unlock(mm) as we would * No need to mmap_read_unlock(mm) as we would
* have already released it in __lock_page_or_retry * have already released it in __lock_page_or_retry
* in mm/filemap.c. * in mm/filemap.c.
*/ */
goto retry; goto retry;
}
} }
mmap_read_unlock(mm); mmap_read_unlock(mm);
return;
/* if (unlikely(fault & VM_FAULT_ERROR)) {
* Something tried to access memory that isn't in our memory map. mm_fault_error(regs, addr, fault);
* Fix it, but check if it's kernel or user first.
*/
bad_area:
mmap_read_unlock(mm);
/* User mode accesses just cause a SIGSEGV */
if (user_mode(regs)) {
do_trap(regs, SIGSEGV, code, addr);
return; return;
} }
no_context:
/* Are we prepared to handle this kernel fault? */
if (fixup_exception(regs))
return;
/*
* Oops. The kernel tried to access some bad page. We'll have to
* terminate things with extreme prejudice.
*/
bust_spinlocks(1);
pr_alert("Unable to handle kernel %s at virtual address " REG_FMT "\n",
(addr < PAGE_SIZE) ? "NULL pointer dereference" :
"paging request", addr);
die(regs, "Oops");
do_exit(SIGKILL);
/*
* We ran out of memory, call the OOM killer, and return the userspace
* (which will retry the fault, or kill us if we got oom-killed).
*/
out_of_memory:
mmap_read_unlock(mm);
if (!user_mode(regs))
goto no_context;
pagefault_out_of_memory();
return;
do_sigbus:
mmap_read_unlock(mm);
/* Kernel mode? Handle exceptions or die */
if (!user_mode(regs))
goto no_context;
do_trap(regs, SIGBUS, BUS_ADRERR, addr);
return; return;
vmalloc_fault:
{
pgd_t *pgd, *pgd_k;
pud_t *pud, *pud_k;
p4d_t *p4d, *p4d_k;
pmd_t *pmd, *pmd_k;
pte_t *pte_k;
int index;
/* User mode accesses just cause a SIGSEGV */
if (user_mode(regs))
return do_trap(regs, SIGSEGV, code, addr);
/*
* Synchronize this task's top level page-table
* with the 'reference' page table.
*
* Do _not_ use "tsk->active_mm->pgd" here.
* We might be inside an interrupt in the middle
* of a task switch.
*/
index = pgd_index(addr);
pgd = (pgd_t *)pfn_to_virt(csr_read(CSR_SATP)) + index;
pgd_k = init_mm.pgd + index;
if (!pgd_present(*pgd_k))
goto no_context;
set_pgd(pgd, *pgd_k);
p4d = p4d_offset(pgd, addr);
p4d_k = p4d_offset(pgd_k, addr);
if (!p4d_present(*p4d_k))
goto no_context;
pud = pud_offset(p4d, addr);
pud_k = pud_offset(p4d_k, addr);
if (!pud_present(*pud_k))
goto no_context;
/*
* Since the vmalloc area is global, it is unnecessary
* to copy individual PTEs
*/
pmd = pmd_offset(pud, addr);
pmd_k = pmd_offset(pud_k, addr);
if (!pmd_present(*pmd_k))
goto no_context;
set_pmd(pmd, *pmd_k);
/*
* Make sure the actual PTE exists as well to
* catch kernel vmalloc-area accesses to non-mapped
* addresses. If we don't do this, this will just
* silently loop forever.
*/
pte_k = pte_offset_kernel(pmd_k, addr);
if (!pte_present(*pte_k))
goto no_context;
/*
* The kernel assumes that TLBs don't cache invalid
* entries, but in RISC-V, SFENCE.VMA specifies an
* ordering constraint, not a cache flush; it is
* necessary even after writing invalid entries.
*/
local_flush_tlb_page(addr);
return;
}
} }
...@@ -28,7 +28,18 @@ unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] ...@@ -28,7 +28,18 @@ unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
EXPORT_SYMBOL(empty_zero_page); EXPORT_SYMBOL(empty_zero_page);
extern char _start[]; extern char _start[];
void *dtb_early_va; #define DTB_EARLY_BASE_VA PGDIR_SIZE
void *dtb_early_va __initdata;
uintptr_t dtb_early_pa __initdata;
struct pt_alloc_ops {
pte_t *(*get_pte_virt)(phys_addr_t pa);
phys_addr_t (*alloc_pte)(uintptr_t va);
#ifndef __PAGETABLE_PMD_FOLDED
pmd_t *(*get_pmd_virt)(phys_addr_t pa);
phys_addr_t (*alloc_pmd)(uintptr_t va);
#endif
};
static void __init zone_sizes_init(void) static void __init zone_sizes_init(void)
{ {
...@@ -141,8 +152,6 @@ static void __init setup_initrd(void) ...@@ -141,8 +152,6 @@ static void __init setup_initrd(void)
} }
#endif /* CONFIG_BLK_DEV_INITRD */ #endif /* CONFIG_BLK_DEV_INITRD */
static phys_addr_t dtb_early_pa __initdata;
void __init setup_bootmem(void) void __init setup_bootmem(void)
{ {
phys_addr_t mem_size = 0; phys_addr_t mem_size = 0;
...@@ -194,6 +203,8 @@ void __init setup_bootmem(void) ...@@ -194,6 +203,8 @@ void __init setup_bootmem(void)
} }
#ifdef CONFIG_MMU #ifdef CONFIG_MMU
static struct pt_alloc_ops pt_ops;
unsigned long va_pa_offset; unsigned long va_pa_offset;
EXPORT_SYMBOL(va_pa_offset); EXPORT_SYMBOL(va_pa_offset);
unsigned long pfn_base; unsigned long pfn_base;
...@@ -202,7 +213,6 @@ EXPORT_SYMBOL(pfn_base); ...@@ -202,7 +213,6 @@ EXPORT_SYMBOL(pfn_base);
pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss; pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss; pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss; pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss;
static bool mmu_enabled;
#define MAX_EARLY_MAPPING_SIZE SZ_128M #define MAX_EARLY_MAPPING_SIZE SZ_128M
...@@ -224,27 +234,46 @@ void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot) ...@@ -224,27 +234,46 @@ void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot)
local_flush_tlb_page(addr); local_flush_tlb_page(addr);
} }
static pte_t *__init get_pte_virt(phys_addr_t pa) static inline pte_t *__init get_pte_virt_early(phys_addr_t pa)
{ {
if (mmu_enabled) { return (pte_t *)((uintptr_t)pa);
clear_fixmap(FIX_PTE);
return (pte_t *)set_fixmap_offset(FIX_PTE, pa);
} else {
return (pte_t *)((uintptr_t)pa);
}
} }
static phys_addr_t __init alloc_pte(uintptr_t va) static inline pte_t *__init get_pte_virt_fixmap(phys_addr_t pa)
{
clear_fixmap(FIX_PTE);
return (pte_t *)set_fixmap_offset(FIX_PTE, pa);
}
static inline pte_t *get_pte_virt_late(phys_addr_t pa)
{
return (pte_t *) __va(pa);
}
static inline phys_addr_t __init alloc_pte_early(uintptr_t va)
{ {
/* /*
* We only create PMD or PGD early mappings so we * We only create PMD or PGD early mappings so we
* should never reach here with MMU disabled. * should never reach here with MMU disabled.
*/ */
BUG_ON(!mmu_enabled); BUG();
}
static inline phys_addr_t __init alloc_pte_fixmap(uintptr_t va)
{
return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE); return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
} }
static phys_addr_t alloc_pte_late(uintptr_t va)
{
unsigned long vaddr;
vaddr = __get_free_page(GFP_KERNEL);
if (!vaddr || !pgtable_pte_page_ctor(virt_to_page(vaddr)))
BUG();
return __pa(vaddr);
}
static void __init create_pte_mapping(pte_t *ptep, static void __init create_pte_mapping(pte_t *ptep,
uintptr_t va, phys_addr_t pa, uintptr_t va, phys_addr_t pa,
phys_addr_t sz, pgprot_t prot) phys_addr_t sz, pgprot_t prot)
...@@ -269,28 +298,46 @@ pmd_t fixmap_pmd[PTRS_PER_PMD] __page_aligned_bss; ...@@ -269,28 +298,46 @@ pmd_t fixmap_pmd[PTRS_PER_PMD] __page_aligned_bss;
#endif #endif
pmd_t early_pmd[PTRS_PER_PMD * NUM_EARLY_PMDS] __initdata __aligned(PAGE_SIZE); pmd_t early_pmd[PTRS_PER_PMD * NUM_EARLY_PMDS] __initdata __aligned(PAGE_SIZE);
static pmd_t *__init get_pmd_virt(phys_addr_t pa) static pmd_t *__init get_pmd_virt_early(phys_addr_t pa)
{ {
if (mmu_enabled) { /* Before MMU is enabled */
clear_fixmap(FIX_PMD); return (pmd_t *)((uintptr_t)pa);
return (pmd_t *)set_fixmap_offset(FIX_PMD, pa);
} else {
return (pmd_t *)((uintptr_t)pa);
}
} }
static phys_addr_t __init alloc_pmd(uintptr_t va) static pmd_t *__init get_pmd_virt_fixmap(phys_addr_t pa)
{ {
uintptr_t pmd_num; clear_fixmap(FIX_PMD);
return (pmd_t *)set_fixmap_offset(FIX_PMD, pa);
}
if (mmu_enabled) static pmd_t *get_pmd_virt_late(phys_addr_t pa)
return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE); {
return (pmd_t *) __va(pa);
}
static phys_addr_t __init alloc_pmd_early(uintptr_t va)
{
uintptr_t pmd_num;
pmd_num = (va - PAGE_OFFSET) >> PGDIR_SHIFT; pmd_num = (va - PAGE_OFFSET) >> PGDIR_SHIFT;
BUG_ON(pmd_num >= NUM_EARLY_PMDS); BUG_ON(pmd_num >= NUM_EARLY_PMDS);
return (uintptr_t)&early_pmd[pmd_num * PTRS_PER_PMD]; return (uintptr_t)&early_pmd[pmd_num * PTRS_PER_PMD];
} }
static phys_addr_t __init alloc_pmd_fixmap(uintptr_t va)
{
return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
}
static phys_addr_t alloc_pmd_late(uintptr_t va)
{
unsigned long vaddr;
vaddr = __get_free_page(GFP_KERNEL);
BUG_ON(!vaddr);
return __pa(vaddr);
}
static void __init create_pmd_mapping(pmd_t *pmdp, static void __init create_pmd_mapping(pmd_t *pmdp,
uintptr_t va, phys_addr_t pa, uintptr_t va, phys_addr_t pa,
phys_addr_t sz, pgprot_t prot) phys_addr_t sz, pgprot_t prot)
...@@ -306,34 +353,34 @@ static void __init create_pmd_mapping(pmd_t *pmdp, ...@@ -306,34 +353,34 @@ static void __init create_pmd_mapping(pmd_t *pmdp,
} }
if (pmd_none(pmdp[pmd_idx])) { if (pmd_none(pmdp[pmd_idx])) {
pte_phys = alloc_pte(va); pte_phys = pt_ops.alloc_pte(va);
pmdp[pmd_idx] = pfn_pmd(PFN_DOWN(pte_phys), PAGE_TABLE); pmdp[pmd_idx] = pfn_pmd(PFN_DOWN(pte_phys), PAGE_TABLE);
ptep = get_pte_virt(pte_phys); ptep = pt_ops.get_pte_virt(pte_phys);
memset(ptep, 0, PAGE_SIZE); memset(ptep, 0, PAGE_SIZE);
} else { } else {
pte_phys = PFN_PHYS(_pmd_pfn(pmdp[pmd_idx])); pte_phys = PFN_PHYS(_pmd_pfn(pmdp[pmd_idx]));
ptep = get_pte_virt(pte_phys); ptep = pt_ops.get_pte_virt(pte_phys);
} }
create_pte_mapping(ptep, va, pa, sz, prot); create_pte_mapping(ptep, va, pa, sz, prot);
} }
#define pgd_next_t pmd_t #define pgd_next_t pmd_t
#define alloc_pgd_next(__va) alloc_pmd(__va) #define alloc_pgd_next(__va) pt_ops.alloc_pmd(__va)
#define get_pgd_next_virt(__pa) get_pmd_virt(__pa) #define get_pgd_next_virt(__pa) pt_ops.get_pmd_virt(__pa)
#define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \ #define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \
create_pmd_mapping(__nextp, __va, __pa, __sz, __prot) create_pmd_mapping(__nextp, __va, __pa, __sz, __prot)
#define fixmap_pgd_next fixmap_pmd #define fixmap_pgd_next fixmap_pmd
#else #else
#define pgd_next_t pte_t #define pgd_next_t pte_t
#define alloc_pgd_next(__va) alloc_pte(__va) #define alloc_pgd_next(__va) pt_ops.alloc_pte(__va)
#define get_pgd_next_virt(__pa) get_pte_virt(__pa) #define get_pgd_next_virt(__pa) pt_ops.get_pte_virt(__pa)
#define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \ #define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \
create_pte_mapping(__nextp, __va, __pa, __sz, __prot) create_pte_mapping(__nextp, __va, __pa, __sz, __prot)
#define fixmap_pgd_next fixmap_pte #define fixmap_pgd_next fixmap_pte
#endif #endif
static void __init create_pgd_mapping(pgd_t *pgdp, void __init create_pgd_mapping(pgd_t *pgdp,
uintptr_t va, phys_addr_t pa, uintptr_t va, phys_addr_t pa,
phys_addr_t sz, pgprot_t prot) phys_addr_t sz, pgprot_t prot)
{ {
...@@ -389,10 +436,13 @@ static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size) ...@@ -389,10 +436,13 @@ static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size)
asmlinkage void __init setup_vm(uintptr_t dtb_pa) asmlinkage void __init setup_vm(uintptr_t dtb_pa)
{ {
uintptr_t va, end_va; uintptr_t va, pa, end_va;
uintptr_t load_pa = (uintptr_t)(&_start); uintptr_t load_pa = (uintptr_t)(&_start);
uintptr_t load_sz = (uintptr_t)(&_end) - load_pa; uintptr_t load_sz = (uintptr_t)(&_end) - load_pa;
uintptr_t map_size = best_map_size(load_pa, MAX_EARLY_MAPPING_SIZE); uintptr_t map_size = best_map_size(load_pa, MAX_EARLY_MAPPING_SIZE);
#ifndef __PAGETABLE_PMD_FOLDED
pmd_t fix_bmap_spmd, fix_bmap_epmd;
#endif
va_pa_offset = PAGE_OFFSET - load_pa; va_pa_offset = PAGE_OFFSET - load_pa;
pfn_base = PFN_DOWN(load_pa); pfn_base = PFN_DOWN(load_pa);
...@@ -408,6 +458,12 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) ...@@ -408,6 +458,12 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
BUG_ON((load_pa % map_size) != 0); BUG_ON((load_pa % map_size) != 0);
BUG_ON(load_sz > MAX_EARLY_MAPPING_SIZE); BUG_ON(load_sz > MAX_EARLY_MAPPING_SIZE);
pt_ops.alloc_pte = alloc_pte_early;
pt_ops.get_pte_virt = get_pte_virt_early;
#ifndef __PAGETABLE_PMD_FOLDED
pt_ops.alloc_pmd = alloc_pmd_early;
pt_ops.get_pmd_virt = get_pmd_virt_early;
#endif
/* Setup early PGD for fixmap */ /* Setup early PGD for fixmap */
create_pgd_mapping(early_pg_dir, FIXADDR_START, create_pgd_mapping(early_pg_dir, FIXADDR_START,
(uintptr_t)fixmap_pgd_next, PGDIR_SIZE, PAGE_TABLE); (uintptr_t)fixmap_pgd_next, PGDIR_SIZE, PAGE_TABLE);
...@@ -438,17 +494,44 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) ...@@ -438,17 +494,44 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
load_pa + (va - PAGE_OFFSET), load_pa + (va - PAGE_OFFSET),
map_size, PAGE_KERNEL_EXEC); map_size, PAGE_KERNEL_EXEC);
/* Create fixed mapping for early FDT parsing */ /* Create two consecutive PGD mappings for FDT early scan */
end_va = __fix_to_virt(FIX_FDT) + FIX_FDT_SIZE; pa = dtb_pa & ~(PGDIR_SIZE - 1);
for (va = __fix_to_virt(FIX_FDT); va < end_va; va += PAGE_SIZE) create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA,
create_pte_mapping(fixmap_pte, va, pa, PGDIR_SIZE, PAGE_KERNEL);
dtb_pa + (va - __fix_to_virt(FIX_FDT)), create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA + PGDIR_SIZE,
PAGE_SIZE, PAGE_KERNEL); pa + PGDIR_SIZE, PGDIR_SIZE, PAGE_KERNEL);
dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PGDIR_SIZE - 1));
/* Save pointer to DTB for early FDT parsing */
dtb_early_va = (void *)fix_to_virt(FIX_FDT) + (dtb_pa & ~PAGE_MASK);
/* Save physical address for memblock reservation */
dtb_early_pa = dtb_pa; dtb_early_pa = dtb_pa;
/*
* Bootime fixmap only can handle PMD_SIZE mapping. Thus, boot-ioremap
* range can not span multiple pmds.
*/
BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
!= (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
#ifndef __PAGETABLE_PMD_FOLDED
/*
* Early ioremap fixmap is already created as it lies within first 2MB
* of fixmap region. We always map PMD_SIZE. Thus, both FIX_BTMAP_END
* FIX_BTMAP_BEGIN should lie in the same pmd. Verify that and warn
* the user if not.
*/
fix_bmap_spmd = fixmap_pmd[pmd_index(__fix_to_virt(FIX_BTMAP_BEGIN))];
fix_bmap_epmd = fixmap_pmd[pmd_index(__fix_to_virt(FIX_BTMAP_END))];
if (pmd_val(fix_bmap_spmd) != pmd_val(fix_bmap_epmd)) {
WARN_ON(1);
pr_warn("fixmap btmap start [%08lx] != end [%08lx]\n",
pmd_val(fix_bmap_spmd), pmd_val(fix_bmap_epmd));
pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
fix_to_virt(FIX_BTMAP_BEGIN));
pr_warn("fix_to_virt(FIX_BTMAP_END): %08lx\n",
fix_to_virt(FIX_BTMAP_END));
pr_warn("FIX_BTMAP_END: %d\n", FIX_BTMAP_END);
pr_warn("FIX_BTMAP_BEGIN: %d\n", FIX_BTMAP_BEGIN);
}
#endif
} }
static void __init setup_vm_final(void) static void __init setup_vm_final(void)
...@@ -457,9 +540,16 @@ static void __init setup_vm_final(void) ...@@ -457,9 +540,16 @@ static void __init setup_vm_final(void)
phys_addr_t pa, start, end; phys_addr_t pa, start, end;
u64 i; u64 i;
/* Set mmu_enabled flag */ /**
mmu_enabled = true; * MMU is enabled at this point. But page table setup is not complete yet.
* fixmap page table alloc functions should be used at this point
*/
pt_ops.alloc_pte = alloc_pte_fixmap;
pt_ops.get_pte_virt = get_pte_virt_fixmap;
#ifndef __PAGETABLE_PMD_FOLDED
pt_ops.alloc_pmd = alloc_pmd_fixmap;
pt_ops.get_pmd_virt = get_pmd_virt_fixmap;
#endif
/* Setup swapper PGD for fixmap */ /* Setup swapper PGD for fixmap */
create_pgd_mapping(swapper_pg_dir, FIXADDR_START, create_pgd_mapping(swapper_pg_dir, FIXADDR_START,
__pa_symbol(fixmap_pgd_next), __pa_symbol(fixmap_pgd_next),
...@@ -488,6 +578,14 @@ static void __init setup_vm_final(void) ...@@ -488,6 +578,14 @@ static void __init setup_vm_final(void)
/* Move to swapper page table */ /* Move to swapper page table */
csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | SATP_MODE); csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | SATP_MODE);
local_flush_tlb_all(); local_flush_tlb_all();
/* generic page allocation functions must be used to setup page table */
pt_ops.alloc_pte = alloc_pte_late;
pt_ops.get_pte_virt = get_pte_virt_late;
#ifndef __PAGETABLE_PMD_FOLDED
pt_ops.alloc_pmd = alloc_pmd_late;
pt_ops.get_pmd_virt = get_pmd_virt_late;
#endif
} }
#else #else
asmlinkage void __init setup_vm(uintptr_t dtb_pa) asmlinkage void __init setup_vm(uintptr_t dtb_pa)
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
* Copyright (C) 2019 SiFive * Copyright (C) 2019 SiFive
*/ */
#include <linux/efi.h>
#include <linux/init.h> #include <linux/init.h>
#include <linux/debugfs.h> #include <linux/debugfs.h>
#include <linux/seq_file.h> #include <linux/seq_file.h>
...@@ -49,6 +50,14 @@ struct addr_marker { ...@@ -49,6 +50,14 @@ struct addr_marker {
const char *name; const char *name;
}; };
/* Private information for debugfs */
struct ptd_mm_info {
struct mm_struct *mm;
const struct addr_marker *markers;
unsigned long base_addr;
unsigned long end;
};
static struct addr_marker address_markers[] = { static struct addr_marker address_markers[] = {
#ifdef CONFIG_KASAN #ifdef CONFIG_KASAN
{KASAN_SHADOW_START, "Kasan shadow start"}, {KASAN_SHADOW_START, "Kasan shadow start"},
...@@ -68,6 +77,28 @@ static struct addr_marker address_markers[] = { ...@@ -68,6 +77,28 @@ static struct addr_marker address_markers[] = {
{-1, NULL}, {-1, NULL},
}; };
static struct ptd_mm_info kernel_ptd_info = {
.mm = &init_mm,
.markers = address_markers,
.base_addr = KERN_VIRT_START,
.end = ULONG_MAX,
};
#ifdef CONFIG_EFI
static struct addr_marker efi_addr_markers[] = {
{ 0, "UEFI runtime start" },
{ SZ_1G, "UEFI runtime end" },
{ -1, NULL }
};
static struct ptd_mm_info efi_ptd_info = {
.mm = &efi_mm,
.markers = efi_addr_markers,
.base_addr = 0,
.end = SZ_2G,
};
#endif
/* Page Table Entry */ /* Page Table Entry */
struct prot_bits { struct prot_bits {
u64 mask; u64 mask;
...@@ -245,22 +276,22 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr, ...@@ -245,22 +276,22 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr,
} }
} }
static void ptdump_walk(struct seq_file *s) static void ptdump_walk(struct seq_file *s, struct ptd_mm_info *pinfo)
{ {
struct pg_state st = { struct pg_state st = {
.seq = s, .seq = s,
.marker = address_markers, .marker = pinfo->markers,
.level = -1, .level = -1,
.ptdump = { .ptdump = {
.note_page = note_page, .note_page = note_page,
.range = (struct ptdump_range[]) { .range = (struct ptdump_range[]) {
{KERN_VIRT_START, ULONG_MAX}, {pinfo->base_addr, pinfo->end},
{0, 0} {0, 0}
} }
} }
}; };
ptdump_walk_pgd(&st.ptdump, &init_mm, NULL); ptdump_walk_pgd(&st.ptdump, pinfo->mm, NULL);
} }
void ptdump_check_wx(void) void ptdump_check_wx(void)
...@@ -293,7 +324,7 @@ void ptdump_check_wx(void) ...@@ -293,7 +324,7 @@ void ptdump_check_wx(void)
static int ptdump_show(struct seq_file *m, void *v) static int ptdump_show(struct seq_file *m, void *v)
{ {
ptdump_walk(m); ptdump_walk(m, m->private);
return 0; return 0;
} }
...@@ -308,8 +339,13 @@ static int ptdump_init(void) ...@@ -308,8 +339,13 @@ static int ptdump_init(void)
for (j = 0; j < ARRAY_SIZE(pte_bits); j++) for (j = 0; j < ARRAY_SIZE(pte_bits); j++)
pg_level[i].mask |= pte_bits[j].mask; pg_level[i].mask |= pte_bits[j].mask;
debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, debugfs_create_file("kernel_page_tables", 0400, NULL, &kernel_ptd_info,
&ptdump_fops); &ptdump_fops);
#ifdef CONFIG_EFI
if (efi_enabled(EFI_RUNTIME_SERVICES))
debugfs_create_file("efi_page_tables", 0400, NULL, &efi_ptd_info,
&ptdump_fops);
#endif
return 0; return 0;
} }
......
...@@ -106,7 +106,7 @@ config EFI_GENERIC_STUB ...@@ -106,7 +106,7 @@ config EFI_GENERIC_STUB
config EFI_ARMSTUB_DTB_LOADER config EFI_ARMSTUB_DTB_LOADER
bool "Enable the DTB loader" bool "Enable the DTB loader"
depends on EFI_GENERIC_STUB depends on EFI_GENERIC_STUB && !RISCV
default y default y
help help
Select this config option to add support for the dtb= command Select this config option to add support for the dtb= command
...@@ -123,6 +123,7 @@ config EFI_GENERIC_STUB_INITRD_CMDLINE_LOADER ...@@ -123,6 +123,7 @@ config EFI_GENERIC_STUB_INITRD_CMDLINE_LOADER
bool "Enable the command line initrd loader" if !X86 bool "Enable the command line initrd loader" if !X86
depends on EFI_STUB && (EFI_GENERIC_STUB || X86) depends on EFI_STUB && (EFI_GENERIC_STUB || X86)
default y default y
depends on !RISCV
help help
Select this config option to add support for the initrd= command Select this config option to add support for the initrd= command
line parameter, allowing an initrd that resides on the same volume line parameter, allowing an initrd that resides on the same volume
......
...@@ -36,6 +36,8 @@ fake_map-$(CONFIG_X86) += x86_fake_mem.o ...@@ -36,6 +36,8 @@ fake_map-$(CONFIG_X86) += x86_fake_mem.o
arm-obj-$(CONFIG_EFI) := efi-init.o arm-runtime.o arm-obj-$(CONFIG_EFI) := efi-init.o arm-runtime.o
obj-$(CONFIG_ARM) += $(arm-obj-y) obj-$(CONFIG_ARM) += $(arm-obj-y)
obj-$(CONFIG_ARM64) += $(arm-obj-y) obj-$(CONFIG_ARM64) += $(arm-obj-y)
riscv-obj-$(CONFIG_EFI) := efi-init.o riscv-runtime.o
obj-$(CONFIG_RISCV) += $(riscv-obj-y)
obj-$(CONFIG_EFI_CAPSULE_LOADER) += capsule-loader.o obj-$(CONFIG_EFI_CAPSULE_LOADER) += capsule-loader.o
obj-$(CONFIG_EFI_EARLYCON) += earlycon.o obj-$(CONFIG_EFI_EARLYCON) += earlycon.o
obj-$(CONFIG_UEFI_CPER_ARM) += cper-arm.o obj-$(CONFIG_UEFI_CPER_ARM) += cper-arm.o
......
...@@ -23,6 +23,8 @@ cflags-$(CONFIG_ARM64) := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) \ ...@@ -23,6 +23,8 @@ cflags-$(CONFIG_ARM64) := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) \
cflags-$(CONFIG_ARM) := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) \ cflags-$(CONFIG_ARM) := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) \
-fno-builtin -fpic \ -fno-builtin -fpic \
$(call cc-option,-mno-single-pic-base) $(call cc-option,-mno-single-pic-base)
cflags-$(CONFIG_RISCV) := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) \
-fpic
cflags-$(CONFIG_EFI_GENERIC_STUB) += -I$(srctree)/scripts/dtc/libfdt cflags-$(CONFIG_EFI_GENERIC_STUB) += -I$(srctree)/scripts/dtc/libfdt
...@@ -64,6 +66,7 @@ lib-$(CONFIG_EFI_GENERIC_STUB) += efi-stub.o fdt.o string.o \ ...@@ -64,6 +66,7 @@ lib-$(CONFIG_EFI_GENERIC_STUB) += efi-stub.o fdt.o string.o \
lib-$(CONFIG_ARM) += arm32-stub.o lib-$(CONFIG_ARM) += arm32-stub.o
lib-$(CONFIG_ARM64) += arm64-stub.o lib-$(CONFIG_ARM64) += arm64-stub.o
lib-$(CONFIG_X86) += x86-stub.o lib-$(CONFIG_X86) += x86-stub.o
lib-$(CONFIG_RISCV) += riscv-stub.o
CFLAGS_arm32-stub.o := -DTEXT_OFFSET=$(TEXT_OFFSET) CFLAGS_arm32-stub.o := -DTEXT_OFFSET=$(TEXT_OFFSET)
# Even when -mbranch-protection=none is set, Clang will generate a # Even when -mbranch-protection=none is set, Clang will generate a
...@@ -112,6 +115,13 @@ STUBCOPY_FLAGS-$(CONFIG_ARM64) += --prefix-alloc-sections=.init \ ...@@ -112,6 +115,13 @@ STUBCOPY_FLAGS-$(CONFIG_ARM64) += --prefix-alloc-sections=.init \
--prefix-symbols=__efistub_ --prefix-symbols=__efistub_
STUBCOPY_RELOC-$(CONFIG_ARM64) := R_AARCH64_ABS STUBCOPY_RELOC-$(CONFIG_ARM64) := R_AARCH64_ABS
# For RISC-V, we don't need anything special other than arm64. Keep all the
# symbols in .init section and make sure that no absolute symbols references
# doesn't exist.
STUBCOPY_FLAGS-$(CONFIG_RISCV) += --prefix-alloc-sections=.init \
--prefix-symbols=__efistub_
STUBCOPY_RELOC-$(CONFIG_RISCV) := R_RISCV_HI20
$(obj)/%.stub.o: $(obj)/%.o FORCE $(obj)/%.stub.o: $(obj)/%.o FORCE
$(call if_changed,stubcopy) $(call if_changed,stubcopy)
......
...@@ -17,7 +17,10 @@ ...@@ -17,7 +17,10 @@
/* /*
* This is the base address at which to start allocating virtual memory ranges * This is the base address at which to start allocating virtual memory ranges
* for UEFI Runtime Services. This is in the low TTBR0 range so that we can use * for UEFI Runtime Services.
*
* For ARM/ARM64:
* This is in the low TTBR0 range so that we can use
* any allocation we choose, and eliminate the risk of a conflict after kexec. * any allocation we choose, and eliminate the risk of a conflict after kexec.
* The value chosen is the largest non-zero power of 2 suitable for this purpose * The value chosen is the largest non-zero power of 2 suitable for this purpose
* both on 32-bit and 64-bit ARM CPUs, to maximize the likelihood that it can * both on 32-bit and 64-bit ARM CPUs, to maximize the likelihood that it can
...@@ -25,6 +28,12 @@ ...@@ -25,6 +28,12 @@
* Since 32-bit ARM could potentially execute with a 1G/3G user/kernel split, * Since 32-bit ARM could potentially execute with a 1G/3G user/kernel split,
* map everything below 1 GB. (512 MB is a reasonable upper bound for the * map everything below 1 GB. (512 MB is a reasonable upper bound for the
* entire footprint of the UEFI runtime services memory regions) * entire footprint of the UEFI runtime services memory regions)
*
* For RISC-V:
* There is no specific reason for which, this address (512MB) can't be used
* EFI runtime virtual address for RISC-V. It also helps to use EFI runtime
* services on both RV32/RV64. Keep the same runtime virtual address for RISC-V
* as well to minimize the code churn.
*/ */
#define EFI_RT_VIRTUAL_BASE SZ_512M #define EFI_RT_VIRTUAL_BASE SZ_512M
#define EFI_RT_VIRTUAL_SIZE SZ_512M #define EFI_RT_VIRTUAL_SIZE SZ_512M
......
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2020 Western Digital Corporation or its affiliates.
*/
#include <linux/efi.h>
#include <linux/libfdt.h>
#include <asm/efi.h>
#include <asm/sections.h>
#include "efistub.h"
/*
* RISC-V requires the kernel image to placed 2 MB aligned base for 64 bit and
* 4MB for 32 bit.
*/
#ifdef CONFIG_64BIT
#define MIN_KIMG_ALIGN SZ_2M
#else
#define MIN_KIMG_ALIGN SZ_4M
#endif
typedef void __noreturn (*jump_kernel_func)(unsigned int, unsigned long);
static u32 hartid;
static u32 get_boot_hartid_from_fdt(void)
{
const void *fdt;
int chosen_node, len;
const fdt32_t *prop;
fdt = get_efi_config_table(DEVICE_TREE_GUID);
if (!fdt)
return U32_MAX;
chosen_node = fdt_path_offset(fdt, "/chosen");
if (chosen_node < 0)
return U32_MAX;
prop = fdt_getprop((void *)fdt, chosen_node, "boot-hartid", &len);
if (!prop || len != sizeof(u32))
return U32_MAX;
return fdt32_to_cpu(*prop);
}
efi_status_t check_platform_features(void)
{
hartid = get_boot_hartid_from_fdt();
if (hartid == U32_MAX) {
efi_err("/chosen/boot-hartid missing or invalid!\n");
return EFI_UNSUPPORTED;
}
return EFI_SUCCESS;
}
void __noreturn efi_enter_kernel(unsigned long entrypoint, unsigned long fdt,
unsigned long fdt_size)
{
unsigned long stext_offset = _start_kernel - _start;
unsigned long kernel_entry = entrypoint + stext_offset;
jump_kernel_func jump_kernel = (jump_kernel_func)kernel_entry;
/*
* Jump to real kernel here with following constraints.
* 1. MMU should be disabled.
* 2. a0 should contain hartid
* 3. a1 should DT address
*/
csr_write(CSR_SATP, 0);
jump_kernel(hartid, fdt);
}
efi_status_t handle_kernel_image(unsigned long *image_addr,
unsigned long *image_size,
unsigned long *reserve_addr,
unsigned long *reserve_size,
efi_loaded_image_t *image)
{
unsigned long kernel_size = 0;
unsigned long preferred_addr;
efi_status_t status;
kernel_size = _edata - _start;
*image_addr = (unsigned long)_start;
*image_size = kernel_size + (_end - _edata);
/*
* RISC-V kernel maps PAGE_OFFSET virtual address to the same physical
* address where kernel is booted. That's why kernel should boot from
* as low as possible to avoid wastage of memory. Currently, dram_base
* is occupied by the firmware. So the preferred address for kernel to
* boot is next aligned address. If preferred address is not available,
* relocate_kernel will fall back to efi_low_alloc_above to allocate
* lowest possible memory region as long as the address and size meets
* the alignment constraints.
*/
preferred_addr = MIN_KIMG_ALIGN;
status = efi_relocate_kernel(image_addr, kernel_size, *image_size,
preferred_addr, MIN_KIMG_ALIGN, 0x0);
if (status != EFI_SUCCESS) {
efi_err("Failed to relocate kernel\n");
*image_size = 0;
}
return status;
}
// SPDX-License-Identifier: GPL-2.0
/*
* Extensible Firmware Interface
*
* Copyright (C) 2020 Western Digital Corporation or its affiliates.
*
* Based on Extensible Firmware Interface Specification version 2.4
* Adapted from drivers/firmware/efi/arm-runtime.c
*
*/
#include <linux/dmi.h>
#include <linux/efi.h>
#include <linux/io.h>
#include <linux/memblock.h>
#include <linux/mm_types.h>
#include <linux/preempt.h>
#include <linux/rbtree.h>
#include <linux/rwsem.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/pgtable.h>
#include <asm/cacheflush.h>
#include <asm/efi.h>
#include <asm/mmu.h>
#include <asm/pgalloc.h>
static bool __init efi_virtmap_init(void)
{
efi_memory_desc_t *md;
efi_mm.pgd = pgd_alloc(&efi_mm);
mm_init_cpumask(&efi_mm);
init_new_context(NULL, &efi_mm);
for_each_efi_memory_desc(md) {
phys_addr_t phys = md->phys_addr;
int ret;
if (!(md->attribute & EFI_MEMORY_RUNTIME))
continue;
if (md->virt_addr == 0)
return false;
ret = efi_create_mapping(&efi_mm, md);
if (ret) {
pr_warn(" EFI remap %pa: failed to create mapping (%d)\n",
&phys, ret);
return false;
}
}
if (efi_memattr_apply_permissions(&efi_mm, efi_set_mapping_permissions))
return false;
return true;
}
/*
* Enable the UEFI Runtime Services if all prerequisites are in place, i.e.,
* non-early mapping of the UEFI system table and virtual mappings for all
* EFI_MEMORY_RUNTIME regions.
*/
static int __init riscv_enable_runtime_services(void)
{
u64 mapsize;
if (!efi_enabled(EFI_BOOT)) {
pr_info("EFI services will not be available.\n");
return 0;
}
efi_memmap_unmap();
mapsize = efi.memmap.desc_size * efi.memmap.nr_map;
if (efi_memmap_init_late(efi.memmap.phys_map, mapsize)) {
pr_err("Failed to remap EFI memory map\n");
return 0;
}
if (efi_soft_reserve_enabled()) {
efi_memory_desc_t *md;
for_each_efi_memory_desc(md) {
int md_size = md->num_pages << EFI_PAGE_SHIFT;
struct resource *res;
if (!(md->attribute & EFI_MEMORY_SP))
continue;
res = kzalloc(sizeof(*res), GFP_KERNEL);
if (WARN_ON(!res))
break;
res->start = md->phys_addr;
res->end = md->phys_addr + md_size - 1;
res->name = "Soft Reserved";
res->flags = IORESOURCE_MEM;
res->desc = IORES_DESC_SOFT_RESERVED;
insert_resource(&iomem_resource, res);
}
}
if (efi_runtime_disabled()) {
pr_info("EFI runtime services will be disabled.\n");
return 0;
}
if (efi_enabled(EFI_RUNTIME_SERVICES)) {
pr_info("EFI runtime services access via paravirt.\n");
return 0;
}
pr_info("Remapping and enabling EFI services.\n");
if (!efi_virtmap_init()) {
pr_err("UEFI virtual mapping missing or invalid -- runtime services will not be available\n");
return -ENOMEM;
}
/* Set up runtime services function pointers */
efi_native_runtime_setup();
set_bit(EFI_RUNTIME_SERVICES, &efi.flags);
return 0;
}
early_initcall(riscv_enable_runtime_services);
void efi_virtmap_load(void)
{
preempt_disable();
switch_mm(current->active_mm, &efi_mm, NULL);
}
void efi_virtmap_unload(void)
{
switch_mm(&efi_mm, current->active_mm, NULL);
preempt_enable();
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment