Commit 2dc10ad8 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux

Pull arm64 updates from Catalin Marinas:

 - "genirq: Introduce generic irq migration for cpu hotunplugged" patch
   merged from tip/irq/for-arm to allow the arm64-specific part to be
   upstreamed via the arm64 tree

 - CPU feature detection reworked to cope with heterogeneous systems
   where CPUs may not have exactly the same features.  The features
   reported by the kernel via internal data structures or ELF_HWCAP are
   delayed until all the CPUs are up (and before user space starts)

 - Support for 16KB pages, with the additional bonus of a 36-bit VA
   space, though the latter only depending on EXPERT

 - Implement native {relaxed, acquire, release} atomics for arm64

 - New ASID allocation algorithm which avoids IPI on roll-over, together
   with TLB invalidation optimisations (using local vs global where
   feasible)

 - KASan support for arm64

 - EFI_STUB clean-up and isolation for the kernel proper (required by
   KASan)

 - copy_{to,from,in}_user optimisations (sharing the memcpy template)

 - perf: moving arm64 to the arm32/64 shared PMU framework

 - L1_CACHE_BYTES increased to 128 to accommodate Cavium hardware

 - Support for the contiguous PTE hint on kernel mapping (16 consecutive
   entries may be able to use a single TLB entry)

 - Generic CONFIG_HZ now used on arm64

 - defconfig updates

* tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (91 commits)
  arm64/efi: fix libstub build under CONFIG_MODVERSIONS
  ARM64: Enable multi-core scheduler support by default
  arm64/efi: move arm64 specific stub C code to libstub
  arm64: page-align sections for DEBUG_RODATA
  arm64: Fix build with CONFIG_ZONE_DMA=n
  arm64: Fix compat register mappings
  arm64: Increase the max granular size
  arm64: remove bogus TASK_SIZE_64 check
  arm64: make Timer Interrupt Frequency selectable
  arm64/mm: use PAGE_ALIGNED instead of IS_ALIGNED
  arm64: cachetype: fix definitions of ICACHEF_* flags
  arm64: cpufeature: declare enable_cpu_capabilities as static
  genirq: Make the cpuhotplug migration code less noisy
  arm64: Constify hwcap name string arrays
  arm64/kvm: Make use of the system wide safe values
  arm64/debug: Make use of the system wide safe value
  arm64: Move FP/ASIMD hwcap handling to common code
  arm64/HWCAP: Use system wide safe values
  arm64/capabilities: Make use of system wide safe value
  arm64: Delay cpu feature capability checks
  ...
parents e627078a f8f8bdc4
...@@ -58,5 +58,3 @@ linux,uefi-mmap-desc-size | 32-bit | Size in bytes of each entry in the UEFI ...@@ -58,5 +58,3 @@ linux,uefi-mmap-desc-size | 32-bit | Size in bytes of each entry in the UEFI
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
linux,uefi-mmap-desc-ver | 32-bit | Version of the mmap descriptor format. linux,uefi-mmap-desc-ver | 32-bit | Version of the mmap descriptor format.
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
linux,uefi-stub-kern-ver | string | Copy of linux_banner from build.
--------------------------------------------------------------------------------
...@@ -104,7 +104,12 @@ Header notes: ...@@ -104,7 +104,12 @@ Header notes:
- The flags field (introduced in v3.17) is a little-endian 64-bit field - The flags field (introduced in v3.17) is a little-endian 64-bit field
composed as follows: composed as follows:
Bit 0: Kernel endianness. 1 if BE, 0 if LE. Bit 0: Kernel endianness. 1 if BE, 0 if LE.
Bits 1-63: Reserved. Bit 1-2: Kernel Page size.
0 - Unspecified.
1 - 4K
2 - 16K
3 - 64K
Bits 3-63: Reserved.
- When image_size is zero, a bootloader should attempt to keep as much - When image_size is zero, a bootloader should attempt to keep as much
memory as possible free for use by the kernel immediately after the memory as possible free for use by the kernel immediately after the
......
...@@ -8,6 +8,8 @@ Required properties: ...@@ -8,6 +8,8 @@ Required properties:
- compatible : should be one of - compatible : should be one of
"arm,armv8-pmuv3" "arm,armv8-pmuv3"
"arm.cortex-a57-pmu"
"arm.cortex-a53-pmu"
"arm,cortex-a17-pmu" "arm,cortex-a17-pmu"
"arm,cortex-a15-pmu" "arm,cortex-a15-pmu"
"arm,cortex-a12-pmu" "arm,cortex-a12-pmu"
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
| alpha: | TODO | | alpha: | TODO |
| arc: | TODO | | arc: | TODO |
| arm: | TODO | | arm: | TODO |
| arm64: | TODO | | arm64: | ok |
| avr32: | TODO | | avr32: | TODO |
| blackfin: | TODO | | blackfin: | TODO |
| c6x: | TODO | | c6x: | TODO |
......
...@@ -823,12 +823,13 @@ F: arch/arm/include/asm/floppy.h ...@@ -823,12 +823,13 @@ F: arch/arm/include/asm/floppy.h
ARM PMU PROFILING AND DEBUGGING ARM PMU PROFILING AND DEBUGGING
M: Will Deacon <will.deacon@arm.com> M: Will Deacon <will.deacon@arm.com>
R: Mark Rutland <mark.rutland@arm.com>
S: Maintained S: Maintained
F: arch/arm/kernel/perf_* F: arch/arm*/kernel/perf_*
F: arch/arm/oprofile/common.c F: arch/arm/oprofile/common.c
F: arch/arm/kernel/hw_breakpoint.c F: arch/arm*/kernel/hw_breakpoint.c
F: arch/arm/include/asm/hw_breakpoint.h F: arch/arm*/include/asm/hw_breakpoint.h
F: arch/arm/include/asm/perf_event.h F: arch/arm*/include/asm/perf_event.h
F: drivers/perf/arm_pmu.c F: drivers/perf/arm_pmu.c
F: include/linux/perf/arm_pmu.h F: include/linux/perf/arm_pmu.h
......
...@@ -48,6 +48,7 @@ config ARM64 ...@@ -48,6 +48,7 @@ config ARM64
select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_BITREVERSE select HAVE_ARCH_BITREVERSE
select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_JUMP_LABEL
select HAVE_ARCH_KASAN if SPARSEMEM_VMEMMAP
select HAVE_ARCH_KGDB select HAVE_ARCH_KGDB
select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRACEHOOK
...@@ -169,10 +170,12 @@ config FIX_EARLYCON_MEM ...@@ -169,10 +170,12 @@ config FIX_EARLYCON_MEM
config PGTABLE_LEVELS config PGTABLE_LEVELS
int int
default 2 if ARM64_16K_PAGES && ARM64_VA_BITS_36
default 2 if ARM64_64K_PAGES && ARM64_VA_BITS_42 default 2 if ARM64_64K_PAGES && ARM64_VA_BITS_42
default 3 if ARM64_64K_PAGES && ARM64_VA_BITS_48 default 3 if ARM64_64K_PAGES && ARM64_VA_BITS_48
default 3 if ARM64_4K_PAGES && ARM64_VA_BITS_39 default 3 if ARM64_4K_PAGES && ARM64_VA_BITS_39
default 4 if ARM64_4K_PAGES && ARM64_VA_BITS_48 default 3 if ARM64_16K_PAGES && ARM64_VA_BITS_47
default 4 if !ARM64_64K_PAGES && ARM64_VA_BITS_48
source "init/Kconfig" source "init/Kconfig"
...@@ -389,25 +392,37 @@ config ARM64_4K_PAGES ...@@ -389,25 +392,37 @@ config ARM64_4K_PAGES
help help
This feature enables 4KB pages support. This feature enables 4KB pages support.
config ARM64_16K_PAGES
bool "16KB"
help
The system will use 16KB pages support. AArch32 emulation
requires applications compiled with 16K (or a multiple of 16K)
aligned segments.
config ARM64_64K_PAGES config ARM64_64K_PAGES
bool "64KB" bool "64KB"
help help
This feature enables 64KB pages support (4KB by default) This feature enables 64KB pages support (4KB by default)
allowing only two levels of page tables and faster TLB allowing only two levels of page tables and faster TLB
look-up. AArch32 emulation is not available when this feature look-up. AArch32 emulation requires applications compiled
is enabled. with 64K aligned segments.
endchoice endchoice
choice choice
prompt "Virtual address space size" prompt "Virtual address space size"
default ARM64_VA_BITS_39 if ARM64_4K_PAGES default ARM64_VA_BITS_39 if ARM64_4K_PAGES
default ARM64_VA_BITS_47 if ARM64_16K_PAGES
default ARM64_VA_BITS_42 if ARM64_64K_PAGES default ARM64_VA_BITS_42 if ARM64_64K_PAGES
help help
Allows choosing one of multiple possible virtual address Allows choosing one of multiple possible virtual address
space sizes. The level of translation table is determined by space sizes. The level of translation table is determined by
a combination of page size and virtual address space size. a combination of page size and virtual address space size.
config ARM64_VA_BITS_36
bool "36-bit" if EXPERT
depends on ARM64_16K_PAGES
config ARM64_VA_BITS_39 config ARM64_VA_BITS_39
bool "39-bit" bool "39-bit"
depends on ARM64_4K_PAGES depends on ARM64_4K_PAGES
...@@ -416,6 +431,10 @@ config ARM64_VA_BITS_42 ...@@ -416,6 +431,10 @@ config ARM64_VA_BITS_42
bool "42-bit" bool "42-bit"
depends on ARM64_64K_PAGES depends on ARM64_64K_PAGES
config ARM64_VA_BITS_47
bool "47-bit"
depends on ARM64_16K_PAGES
config ARM64_VA_BITS_48 config ARM64_VA_BITS_48
bool "48-bit" bool "48-bit"
...@@ -423,8 +442,10 @@ endchoice ...@@ -423,8 +442,10 @@ endchoice
config ARM64_VA_BITS config ARM64_VA_BITS
int int
default 36 if ARM64_VA_BITS_36
default 39 if ARM64_VA_BITS_39 default 39 if ARM64_VA_BITS_39
default 42 if ARM64_VA_BITS_42 default 42 if ARM64_VA_BITS_42
default 47 if ARM64_VA_BITS_47
default 48 if ARM64_VA_BITS_48 default 48 if ARM64_VA_BITS_48
config CPU_BIG_ENDIAN config CPU_BIG_ENDIAN
...@@ -454,15 +475,13 @@ config NR_CPUS ...@@ -454,15 +475,13 @@ config NR_CPUS
config HOTPLUG_CPU config HOTPLUG_CPU
bool "Support for hot-pluggable CPUs" bool "Support for hot-pluggable CPUs"
select GENERIC_IRQ_MIGRATION
help help
Say Y here to experiment with turning CPUs off and on. CPUs Say Y here to experiment with turning CPUs off and on. CPUs
can be controlled through /sys/devices/system/cpu. can be controlled through /sys/devices/system/cpu.
source kernel/Kconfig.preempt source kernel/Kconfig.preempt
source kernel/Kconfig.hz
config HZ
int
default 100
config ARCH_HAS_HOLES_MEMORYMODEL config ARCH_HAS_HOLES_MEMORYMODEL
def_bool y if SPARSEMEM def_bool y if SPARSEMEM
...@@ -481,12 +500,8 @@ config HAVE_ARCH_PFN_VALID ...@@ -481,12 +500,8 @@ config HAVE_ARCH_PFN_VALID
def_bool ARCH_HAS_HOLES_MEMORYMODEL || !SPARSEMEM def_bool ARCH_HAS_HOLES_MEMORYMODEL || !SPARSEMEM
config HW_PERF_EVENTS config HW_PERF_EVENTS
bool "Enable hardware performance counter support for perf events" def_bool y
depends on PERF_EVENTS depends on ARM_PMU
default y
help
Enable hardware performance counter support for perf events. If
disabled, perf events will use software events only.
config SYS_SUPPORTS_HUGETLBFS config SYS_SUPPORTS_HUGETLBFS
def_bool y def_bool y
...@@ -495,7 +510,7 @@ config ARCH_WANT_GENERAL_HUGETLB ...@@ -495,7 +510,7 @@ config ARCH_WANT_GENERAL_HUGETLB
def_bool y def_bool y
config ARCH_WANT_HUGE_PMD_SHARE config ARCH_WANT_HUGE_PMD_SHARE
def_bool y if !ARM64_64K_PAGES def_bool y if ARM64_4K_PAGES || (ARM64_16K_PAGES && !ARM64_VA_BITS_36)
config HAVE_ARCH_TRANSPARENT_HUGEPAGE config HAVE_ARCH_TRANSPARENT_HUGEPAGE
def_bool y def_bool y
...@@ -532,7 +547,25 @@ config XEN ...@@ -532,7 +547,25 @@ config XEN
config FORCE_MAX_ZONEORDER config FORCE_MAX_ZONEORDER
int int
default "14" if (ARM64_64K_PAGES && TRANSPARENT_HUGEPAGE) default "14" if (ARM64_64K_PAGES && TRANSPARENT_HUGEPAGE)
default "12" if (ARM64_16K_PAGES && TRANSPARENT_HUGEPAGE)
default "11" default "11"
help
The kernel memory allocator divides physically contiguous memory
blocks into "zones", where each zone is a power of two number of
pages. This option selects the largest power of two that the kernel
keeps in the memory allocator. If you need to allocate very large
blocks of physically contiguous memory, then you may need to
increase this value.
This config option is actually maximum order plus one. For example,
a value of 11 means that the largest free memory block is 2^10 pages.
We make sure that we can allocate upto a HugePage size for each configuration.
Hence we have :
MAX_ORDER = (PMD_SHIFT - PAGE_SHIFT) + 1 => PAGE_SHIFT - 2
However for 4K, we choose a higher default value, 11 as opposed to 10, giving us
4M allocations matching the default size used by generic code.
menuconfig ARMV8_DEPRECATED menuconfig ARMV8_DEPRECATED
bool "Emulate deprecated/obsolete ARMv8 instructions" bool "Emulate deprecated/obsolete ARMv8 instructions"
...@@ -707,7 +740,7 @@ source "fs/Kconfig.binfmt" ...@@ -707,7 +740,7 @@ source "fs/Kconfig.binfmt"
config COMPAT config COMPAT
bool "Kernel support for 32-bit EL0" bool "Kernel support for 32-bit EL0"
depends on !ARM64_64K_PAGES || EXPERT depends on ARM64_4K_PAGES || EXPERT
select COMPAT_BINFMT_ELF select COMPAT_BINFMT_ELF
select HAVE_UID16 select HAVE_UID16
select OLD_SIGSUSPEND3 select OLD_SIGSUSPEND3
...@@ -718,9 +751,9 @@ config COMPAT ...@@ -718,9 +751,9 @@ config COMPAT
the user helper functions, VFP support and the ptrace interface are the user helper functions, VFP support and the ptrace interface are
handled appropriately by the kernel. handled appropriately by the kernel.
If you also enabled CONFIG_ARM64_64K_PAGES, please be aware that you If you use a page size other than 4KB (i.e, 16KB or 64KB), please be aware
will only be able to execute AArch32 binaries that were compiled with that you will only be able to execute AArch32 binaries that were compiled
64k aligned segments. with page size aligned segments.
If you want to execute 32-bit userspace applications, say Y. If you want to execute 32-bit userspace applications, say Y.
......
...@@ -77,7 +77,7 @@ config DEBUG_RODATA ...@@ -77,7 +77,7 @@ config DEBUG_RODATA
If in doubt, say Y If in doubt, say Y
config DEBUG_ALIGN_RODATA config DEBUG_ALIGN_RODATA
depends on DEBUG_RODATA && !ARM64_64K_PAGES depends on DEBUG_RODATA && ARM64_4K_PAGES
bool "Align linker sections up to SECTION_SIZE" bool "Align linker sections up to SECTION_SIZE"
help help
If this option is enabled, sections that may potentially be marked as If this option is enabled, sections that may potentially be marked as
......
...@@ -55,6 +55,13 @@ else ...@@ -55,6 +55,13 @@ else
TEXT_OFFSET := 0x00080000 TEXT_OFFSET := 0x00080000
endif endif
# KASAN_SHADOW_OFFSET = VA_START + (1 << (VA_BITS - 3)) - (1 << 61)
# in 32-bit arithmetic
KASAN_SHADOW_OFFSET := $(shell printf "0x%08x00000000\n" $$(( \
(0xffffffff & (-1 << ($(CONFIG_ARM64_VA_BITS) - 32))) \
+ (1 << ($(CONFIG_ARM64_VA_BITS) - 32 - 3)) \
- (1 << (64 - 32 - 3)) )) )
export TEXT_OFFSET GZFLAGS export TEXT_OFFSET GZFLAGS
core-y += arch/arm64/kernel/ arch/arm64/mm/ core-y += arch/arm64/kernel/ arch/arm64/mm/
......
...@@ -91,17 +91,21 @@ A53_L2: l2-cache1 { ...@@ -91,17 +91,21 @@ A53_L2: l2-cache1 {
}; };
}; };
pmu { pmu_a57 {
compatible = "arm,armv8-pmuv3"; compatible = "arm,cortex-a57-pmu";
interrupts = <GIC_SPI 02 IRQ_TYPE_LEVEL_HIGH>, interrupts = <GIC_SPI 02 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 06 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 06 IRQ_TYPE_LEVEL_HIGH>;
<GIC_SPI 18 IRQ_TYPE_LEVEL_HIGH>, interrupt-affinity = <&A57_0>,
<&A57_1>;
};
pmu_a53 {
compatible = "arm,cortex-a53-pmu";
interrupts = <GIC_SPI 18 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 22 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 22 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 26 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 26 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>; <GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>;
interrupt-affinity = <&A57_0>, interrupt-affinity = <&A53_0>,
<&A57_1>,
<&A53_0>,
<&A53_1>, <&A53_1>,
<&A53_2>, <&A53_2>,
<&A53_3>; <&A53_3>;
......
...@@ -91,17 +91,21 @@ A53_L2: l2-cache1 { ...@@ -91,17 +91,21 @@ A53_L2: l2-cache1 {
}; };
}; };
pmu { pmu_a57 {
compatible = "arm,armv8-pmuv3"; compatible = "arm,cortex-a57-pmu";
interrupts = <GIC_SPI 02 IRQ_TYPE_LEVEL_HIGH>, interrupts = <GIC_SPI 02 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 06 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 06 IRQ_TYPE_LEVEL_HIGH>;
<GIC_SPI 18 IRQ_TYPE_LEVEL_HIGH>, interrupt-affinity = <&A57_0>,
<&A57_1>;
};
pmu_a53 {
compatible = "arm,cortex-a53-pmu";
interrupts = <GIC_SPI 18 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 22 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 22 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 26 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 26 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>; <GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>;
interrupt-affinity = <&A57_0>, interrupt-affinity = <&A53_0>,
<&A57_1>,
<&A53_0>,
<&A53_1>, <&A53_1>,
<&A53_2>, <&A53_2>,
<&A53_3>; <&A53_3>;
......
...@@ -51,6 +51,7 @@ CONFIG_PCI=y ...@@ -51,6 +51,7 @@ CONFIG_PCI=y
CONFIG_PCI_MSI=y CONFIG_PCI_MSI=y
CONFIG_PCI_XGENE=y CONFIG_PCI_XGENE=y
CONFIG_SMP=y CONFIG_SMP=y
CONFIG_SCHED_MC=y
CONFIG_PREEMPT=y CONFIG_PREEMPT=y
CONFIG_KSM=y CONFIG_KSM=y
CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_TRANSPARENT_HUGEPAGE=y
...@@ -109,6 +110,10 @@ CONFIG_SERIAL_8250_DW=y ...@@ -109,6 +110,10 @@ CONFIG_SERIAL_8250_DW=y
CONFIG_SERIAL_8250_MT6577=y CONFIG_SERIAL_8250_MT6577=y
CONFIG_SERIAL_AMBA_PL011=y CONFIG_SERIAL_AMBA_PL011=y
CONFIG_SERIAL_AMBA_PL011_CONSOLE=y CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
CONFIG_SERIAL_SAMSUNG=y
CONFIG_SERIAL_SAMSUNG_UARTS_4=y
CONFIG_SERIAL_SAMSUNG_UARTS=4
CONFIG_SERIAL_SAMSUNG_CONSOLE=y
CONFIG_SERIAL_MSM=y CONFIG_SERIAL_MSM=y
CONFIG_SERIAL_MSM_CONSOLE=y CONFIG_SERIAL_MSM_CONSOLE=y
CONFIG_SERIAL_OF_PLATFORM=y CONFIG_SERIAL_OF_PLATFORM=y
...@@ -145,6 +150,10 @@ CONFIG_MMC_ARMMMCI=y ...@@ -145,6 +150,10 @@ CONFIG_MMC_ARMMMCI=y
CONFIG_MMC_SDHCI=y CONFIG_MMC_SDHCI=y
CONFIG_MMC_SDHCI_PLTFM=y CONFIG_MMC_SDHCI_PLTFM=y
CONFIG_MMC_SPI=y CONFIG_MMC_SPI=y
CONFIG_MMC_DW=y
CONFIG_MMC_DW_IDMAC=y
CONFIG_MMC_DW_PLTFM=y
CONFIG_MMC_DW_EXYNOS=y
CONFIG_NEW_LEDS=y CONFIG_NEW_LEDS=y
CONFIG_LEDS_CLASS=y CONFIG_LEDS_CLASS=y
CONFIG_LEDS_SYSCON=y CONFIG_LEDS_SYSCON=y
......
...@@ -193,4 +193,15 @@ lr .req x30 // link register ...@@ -193,4 +193,15 @@ lr .req x30 // link register
str \src, [\tmp, :lo12:\sym] str \src, [\tmp, :lo12:\sym]
.endm .endm
/*
* Annotate a function as position independent, i.e., safe to be called before
* the kernel virtual mapping is activated.
*/
#define ENDPIPROC(x) \
.globl __pi_##x; \
.type __pi_##x, %function; \
.set __pi_##x, x; \
.size __pi_##x, . - x; \
ENDPROC(x)
#endif /* __ASM_ASSEMBLER_H */ #endif /* __ASM_ASSEMBLER_H */
...@@ -55,13 +55,42 @@ ...@@ -55,13 +55,42 @@
#define atomic_read(v) READ_ONCE((v)->counter) #define atomic_read(v) READ_ONCE((v)->counter)
#define atomic_set(v, i) WRITE_ONCE(((v)->counter), (i)) #define atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
#define atomic_add_return_relaxed atomic_add_return_relaxed
#define atomic_add_return_acquire atomic_add_return_acquire
#define atomic_add_return_release atomic_add_return_release
#define atomic_add_return atomic_add_return
#define atomic_inc_return_relaxed(v) atomic_add_return_relaxed(1, (v))
#define atomic_inc_return_acquire(v) atomic_add_return_acquire(1, (v))
#define atomic_inc_return_release(v) atomic_add_return_release(1, (v))
#define atomic_inc_return(v) atomic_add_return(1, (v))
#define atomic_sub_return_relaxed atomic_sub_return_relaxed
#define atomic_sub_return_acquire atomic_sub_return_acquire
#define atomic_sub_return_release atomic_sub_return_release
#define atomic_sub_return atomic_sub_return
#define atomic_dec_return_relaxed(v) atomic_sub_return_relaxed(1, (v))
#define atomic_dec_return_acquire(v) atomic_sub_return_acquire(1, (v))
#define atomic_dec_return_release(v) atomic_sub_return_release(1, (v))
#define atomic_dec_return(v) atomic_sub_return(1, (v))
#define atomic_xchg_relaxed(v, new) xchg_relaxed(&((v)->counter), (new))
#define atomic_xchg_acquire(v, new) xchg_acquire(&((v)->counter), (new))
#define atomic_xchg_release(v, new) xchg_release(&((v)->counter), (new))
#define atomic_xchg(v, new) xchg(&((v)->counter), (new)) #define atomic_xchg(v, new) xchg(&((v)->counter), (new))
#define atomic_cmpxchg_relaxed(v, old, new) \
cmpxchg_relaxed(&((v)->counter), (old), (new))
#define atomic_cmpxchg_acquire(v, old, new) \
cmpxchg_acquire(&((v)->counter), (old), (new))
#define atomic_cmpxchg_release(v, old, new) \
cmpxchg_release(&((v)->counter), (old), (new))
#define atomic_cmpxchg(v, old, new) cmpxchg(&((v)->counter), (old), (new)) #define atomic_cmpxchg(v, old, new) cmpxchg(&((v)->counter), (old), (new))
#define atomic_inc(v) atomic_add(1, (v)) #define atomic_inc(v) atomic_add(1, (v))
#define atomic_dec(v) atomic_sub(1, (v)) #define atomic_dec(v) atomic_sub(1, (v))
#define atomic_inc_return(v) atomic_add_return(1, (v))
#define atomic_dec_return(v) atomic_sub_return(1, (v))
#define atomic_inc_and_test(v) (atomic_inc_return(v) == 0) #define atomic_inc_and_test(v) (atomic_inc_return(v) == 0)
#define atomic_dec_and_test(v) (atomic_dec_return(v) == 0) #define atomic_dec_and_test(v) (atomic_dec_return(v) == 0)
#define atomic_sub_and_test(i, v) (atomic_sub_return((i), (v)) == 0) #define atomic_sub_and_test(i, v) (atomic_sub_return((i), (v)) == 0)
...@@ -75,13 +104,39 @@ ...@@ -75,13 +104,39 @@
#define ATOMIC64_INIT ATOMIC_INIT #define ATOMIC64_INIT ATOMIC_INIT
#define atomic64_read atomic_read #define atomic64_read atomic_read
#define atomic64_set atomic_set #define atomic64_set atomic_set
#define atomic64_add_return_relaxed atomic64_add_return_relaxed
#define atomic64_add_return_acquire atomic64_add_return_acquire
#define atomic64_add_return_release atomic64_add_return_release
#define atomic64_add_return atomic64_add_return
#define atomic64_inc_return_relaxed(v) atomic64_add_return_relaxed(1, (v))
#define atomic64_inc_return_acquire(v) atomic64_add_return_acquire(1, (v))
#define atomic64_inc_return_release(v) atomic64_add_return_release(1, (v))
#define atomic64_inc_return(v) atomic64_add_return(1, (v))
#define atomic64_sub_return_relaxed atomic64_sub_return_relaxed
#define atomic64_sub_return_acquire atomic64_sub_return_acquire
#define atomic64_sub_return_release atomic64_sub_return_release
#define atomic64_sub_return atomic64_sub_return
#define atomic64_dec_return_relaxed(v) atomic64_sub_return_relaxed(1, (v))
#define atomic64_dec_return_acquire(v) atomic64_sub_return_acquire(1, (v))
#define atomic64_dec_return_release(v) atomic64_sub_return_release(1, (v))
#define atomic64_dec_return(v) atomic64_sub_return(1, (v))
#define atomic64_xchg_relaxed atomic_xchg_relaxed
#define atomic64_xchg_acquire atomic_xchg_acquire
#define atomic64_xchg_release atomic_xchg_release
#define atomic64_xchg atomic_xchg #define atomic64_xchg atomic_xchg
#define atomic64_cmpxchg_relaxed atomic_cmpxchg_relaxed
#define atomic64_cmpxchg_acquire atomic_cmpxchg_acquire
#define atomic64_cmpxchg_release atomic_cmpxchg_release
#define atomic64_cmpxchg atomic_cmpxchg #define atomic64_cmpxchg atomic_cmpxchg
#define atomic64_inc(v) atomic64_add(1, (v)) #define atomic64_inc(v) atomic64_add(1, (v))
#define atomic64_dec(v) atomic64_sub(1, (v)) #define atomic64_dec(v) atomic64_sub(1, (v))
#define atomic64_inc_return(v) atomic64_add_return(1, (v))
#define atomic64_dec_return(v) atomic64_sub_return(1, (v))
#define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0) #define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0)
#define atomic64_dec_and_test(v) (atomic64_dec_return(v) == 0) #define atomic64_dec_and_test(v) (atomic64_dec_return(v) == 0)
#define atomic64_sub_and_test(i, v) (atomic64_sub_return((i), (v)) == 0) #define atomic64_sub_and_test(i, v) (atomic64_sub_return((i), (v)) == 0)
......
...@@ -55,40 +55,47 @@ __LL_SC_PREFIX(atomic_##op(int i, atomic_t *v)) \ ...@@ -55,40 +55,47 @@ __LL_SC_PREFIX(atomic_##op(int i, atomic_t *v)) \
} \ } \
__LL_SC_EXPORT(atomic_##op); __LL_SC_EXPORT(atomic_##op);
#define ATOMIC_OP_RETURN(op, asm_op) \ #define ATOMIC_OP_RETURN(name, mb, acq, rel, cl, op, asm_op) \
__LL_SC_INLINE int \ __LL_SC_INLINE int \
__LL_SC_PREFIX(atomic_##op##_return(int i, atomic_t *v)) \ __LL_SC_PREFIX(atomic_##op##_return##name(int i, atomic_t *v)) \
{ \ { \
unsigned long tmp; \ unsigned long tmp; \
int result; \ int result; \
\ \
asm volatile("// atomic_" #op "_return\n" \ asm volatile("// atomic_" #op "_return" #name "\n" \
" prfm pstl1strm, %2\n" \ " prfm pstl1strm, %2\n" \
"1: ldxr %w0, %2\n" \ "1: ld" #acq "xr %w0, %2\n" \
" " #asm_op " %w0, %w0, %w3\n" \ " " #asm_op " %w0, %w0, %w3\n" \
" stlxr %w1, %w0, %2\n" \ " st" #rel "xr %w1, %w0, %2\n" \
" cbnz %w1, 1b" \ " cbnz %w1, 1b\n" \
" " #mb \
: "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \ : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \
: "Ir" (i) \ : "Ir" (i) \
: "memory"); \ : cl); \
\ \
smp_mb(); \
return result; \ return result; \
} \ } \
__LL_SC_EXPORT(atomic_##op##_return); __LL_SC_EXPORT(atomic_##op##_return##name);
#define ATOMIC_OPS(...) \
ATOMIC_OP(__VA_ARGS__) \
ATOMIC_OP_RETURN( , dmb ish, , l, "memory", __VA_ARGS__)
#define ATOMIC_OPS(op, asm_op) \ #define ATOMIC_OPS_RLX(...) \
ATOMIC_OP(op, asm_op) \ ATOMIC_OPS(__VA_ARGS__) \
ATOMIC_OP_RETURN(op, asm_op) ATOMIC_OP_RETURN(_relaxed, , , , , __VA_ARGS__)\
ATOMIC_OP_RETURN(_acquire, , a, , "memory", __VA_ARGS__)\
ATOMIC_OP_RETURN(_release, , , l, "memory", __VA_ARGS__)
ATOMIC_OPS(add, add) ATOMIC_OPS_RLX(add, add)
ATOMIC_OPS(sub, sub) ATOMIC_OPS_RLX(sub, sub)
ATOMIC_OP(and, and) ATOMIC_OP(and, and)
ATOMIC_OP(andnot, bic) ATOMIC_OP(andnot, bic)
ATOMIC_OP(or, orr) ATOMIC_OP(or, orr)
ATOMIC_OP(xor, eor) ATOMIC_OP(xor, eor)
#undef ATOMIC_OPS_RLX
#undef ATOMIC_OPS #undef ATOMIC_OPS
#undef ATOMIC_OP_RETURN #undef ATOMIC_OP_RETURN
#undef ATOMIC_OP #undef ATOMIC_OP
...@@ -111,40 +118,47 @@ __LL_SC_PREFIX(atomic64_##op(long i, atomic64_t *v)) \ ...@@ -111,40 +118,47 @@ __LL_SC_PREFIX(atomic64_##op(long i, atomic64_t *v)) \
} \ } \
__LL_SC_EXPORT(atomic64_##op); __LL_SC_EXPORT(atomic64_##op);
#define ATOMIC64_OP_RETURN(op, asm_op) \ #define ATOMIC64_OP_RETURN(name, mb, acq, rel, cl, op, asm_op) \
__LL_SC_INLINE long \ __LL_SC_INLINE long \
__LL_SC_PREFIX(atomic64_##op##_return(long i, atomic64_t *v)) \ __LL_SC_PREFIX(atomic64_##op##_return##name(long i, atomic64_t *v)) \
{ \ { \
long result; \ long result; \
unsigned long tmp; \ unsigned long tmp; \
\ \
asm volatile("// atomic64_" #op "_return\n" \ asm volatile("// atomic64_" #op "_return" #name "\n" \
" prfm pstl1strm, %2\n" \ " prfm pstl1strm, %2\n" \
"1: ldxr %0, %2\n" \ "1: ld" #acq "xr %0, %2\n" \
" " #asm_op " %0, %0, %3\n" \ " " #asm_op " %0, %0, %3\n" \
" stlxr %w1, %0, %2\n" \ " st" #rel "xr %w1, %0, %2\n" \
" cbnz %w1, 1b" \ " cbnz %w1, 1b\n" \
" " #mb \
: "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \ : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \
: "Ir" (i) \ : "Ir" (i) \
: "memory"); \ : cl); \
\ \
smp_mb(); \
return result; \ return result; \
} \ } \
__LL_SC_EXPORT(atomic64_##op##_return); __LL_SC_EXPORT(atomic64_##op##_return##name);
#define ATOMIC64_OPS(...) \
ATOMIC64_OP(__VA_ARGS__) \
ATOMIC64_OP_RETURN(, dmb ish, , l, "memory", __VA_ARGS__)
#define ATOMIC64_OPS(op, asm_op) \ #define ATOMIC64_OPS_RLX(...) \
ATOMIC64_OP(op, asm_op) \ ATOMIC64_OPS(__VA_ARGS__) \
ATOMIC64_OP_RETURN(op, asm_op) ATOMIC64_OP_RETURN(_relaxed,, , , , __VA_ARGS__) \
ATOMIC64_OP_RETURN(_acquire,, a, , "memory", __VA_ARGS__) \
ATOMIC64_OP_RETURN(_release,, , l, "memory", __VA_ARGS__)
ATOMIC64_OPS(add, add) ATOMIC64_OPS_RLX(add, add)
ATOMIC64_OPS(sub, sub) ATOMIC64_OPS_RLX(sub, sub)
ATOMIC64_OP(and, and) ATOMIC64_OP(and, and)
ATOMIC64_OP(andnot, bic) ATOMIC64_OP(andnot, bic)
ATOMIC64_OP(or, orr) ATOMIC64_OP(or, orr)
ATOMIC64_OP(xor, eor) ATOMIC64_OP(xor, eor)
#undef ATOMIC64_OPS_RLX
#undef ATOMIC64_OPS #undef ATOMIC64_OPS
#undef ATOMIC64_OP_RETURN #undef ATOMIC64_OP_RETURN
#undef ATOMIC64_OP #undef ATOMIC64_OP
...@@ -172,7 +186,7 @@ __LL_SC_PREFIX(atomic64_dec_if_positive(atomic64_t *v)) ...@@ -172,7 +186,7 @@ __LL_SC_PREFIX(atomic64_dec_if_positive(atomic64_t *v))
} }
__LL_SC_EXPORT(atomic64_dec_if_positive); __LL_SC_EXPORT(atomic64_dec_if_positive);
#define __CMPXCHG_CASE(w, sz, name, mb, rel, cl) \ #define __CMPXCHG_CASE(w, sz, name, mb, acq, rel, cl) \
__LL_SC_INLINE unsigned long \ __LL_SC_INLINE unsigned long \
__LL_SC_PREFIX(__cmpxchg_case_##name(volatile void *ptr, \ __LL_SC_PREFIX(__cmpxchg_case_##name(volatile void *ptr, \
unsigned long old, \ unsigned long old, \
...@@ -182,7 +196,7 @@ __LL_SC_PREFIX(__cmpxchg_case_##name(volatile void *ptr, \ ...@@ -182,7 +196,7 @@ __LL_SC_PREFIX(__cmpxchg_case_##name(volatile void *ptr, \
\ \
asm volatile( \ asm volatile( \
" prfm pstl1strm, %[v]\n" \ " prfm pstl1strm, %[v]\n" \
"1: ldxr" #sz "\t%" #w "[oldval], %[v]\n" \ "1: ld" #acq "xr" #sz "\t%" #w "[oldval], %[v]\n" \
" eor %" #w "[tmp], %" #w "[oldval], %" #w "[old]\n" \ " eor %" #w "[tmp], %" #w "[oldval], %" #w "[old]\n" \
" cbnz %" #w "[tmp], 2f\n" \ " cbnz %" #w "[tmp], 2f\n" \
" st" #rel "xr" #sz "\t%w[tmp], %" #w "[new], %[v]\n" \ " st" #rel "xr" #sz "\t%w[tmp], %" #w "[new], %[v]\n" \
...@@ -199,14 +213,22 @@ __LL_SC_PREFIX(__cmpxchg_case_##name(volatile void *ptr, \ ...@@ -199,14 +213,22 @@ __LL_SC_PREFIX(__cmpxchg_case_##name(volatile void *ptr, \
} \ } \
__LL_SC_EXPORT(__cmpxchg_case_##name); __LL_SC_EXPORT(__cmpxchg_case_##name);
__CMPXCHG_CASE(w, b, 1, , , ) __CMPXCHG_CASE(w, b, 1, , , , )
__CMPXCHG_CASE(w, h, 2, , , ) __CMPXCHG_CASE(w, h, 2, , , , )
__CMPXCHG_CASE(w, , 4, , , ) __CMPXCHG_CASE(w, , 4, , , , )
__CMPXCHG_CASE( , , 8, , , ) __CMPXCHG_CASE( , , 8, , , , )
__CMPXCHG_CASE(w, b, mb_1, dmb ish, l, "memory") __CMPXCHG_CASE(w, b, acq_1, , a, , "memory")
__CMPXCHG_CASE(w, h, mb_2, dmb ish, l, "memory") __CMPXCHG_CASE(w, h, acq_2, , a, , "memory")
__CMPXCHG_CASE(w, , mb_4, dmb ish, l, "memory") __CMPXCHG_CASE(w, , acq_4, , a, , "memory")
__CMPXCHG_CASE( , , mb_8, dmb ish, l, "memory") __CMPXCHG_CASE( , , acq_8, , a, , "memory")
__CMPXCHG_CASE(w, b, rel_1, , , l, "memory")
__CMPXCHG_CASE(w, h, rel_2, , , l, "memory")
__CMPXCHG_CASE(w, , rel_4, , , l, "memory")
__CMPXCHG_CASE( , , rel_8, , , l, "memory")
__CMPXCHG_CASE(w, b, mb_1, dmb ish, , l, "memory")
__CMPXCHG_CASE(w, h, mb_2, dmb ish, , l, "memory")
__CMPXCHG_CASE(w, , mb_4, dmb ish, , l, "memory")
__CMPXCHG_CASE( , , mb_8, dmb ish, , l, "memory")
#undef __CMPXCHG_CASE #undef __CMPXCHG_CASE
......
...@@ -75,24 +75,32 @@ static inline void atomic_add(int i, atomic_t *v) ...@@ -75,24 +75,32 @@ static inline void atomic_add(int i, atomic_t *v)
: "x30"); : "x30");
} }
static inline int atomic_add_return(int i, atomic_t *v) #define ATOMIC_OP_ADD_RETURN(name, mb, cl...) \
{ static inline int atomic_add_return##name(int i, atomic_t *v) \
register int w0 asm ("w0") = i; { \
register atomic_t *x1 asm ("x1") = v; register int w0 asm ("w0") = i; \
register atomic_t *x1 asm ("x1") = v; \
\
asm volatile(ARM64_LSE_ATOMIC_INSN( \
/* LL/SC */ \
" nop\n" \
__LL_SC_ATOMIC(add_return##name), \
/* LSE atomics */ \
" ldadd" #mb " %w[i], w30, %[v]\n" \
" add %w[i], %w[i], w30") \
: [i] "+r" (w0), [v] "+Q" (v->counter) \
: "r" (x1) \
: "x30" , ##cl); \
\
return w0; \
}
asm volatile(ARM64_LSE_ATOMIC_INSN( ATOMIC_OP_ADD_RETURN(_relaxed, )
/* LL/SC */ ATOMIC_OP_ADD_RETURN(_acquire, a, "memory")
" nop\n" ATOMIC_OP_ADD_RETURN(_release, l, "memory")
__LL_SC_ATOMIC(add_return), ATOMIC_OP_ADD_RETURN( , al, "memory")
/* LSE atomics */
" ldaddal %w[i], w30, %[v]\n"
" add %w[i], %w[i], w30")
: [i] "+r" (w0), [v] "+Q" (v->counter)
: "r" (x1)
: "x30", "memory");
return w0; #undef ATOMIC_OP_ADD_RETURN
}
static inline void atomic_and(int i, atomic_t *v) static inline void atomic_and(int i, atomic_t *v)
{ {
...@@ -128,27 +136,34 @@ static inline void atomic_sub(int i, atomic_t *v) ...@@ -128,27 +136,34 @@ static inline void atomic_sub(int i, atomic_t *v)
: "x30"); : "x30");
} }
static inline int atomic_sub_return(int i, atomic_t *v) #define ATOMIC_OP_SUB_RETURN(name, mb, cl...) \
{ static inline int atomic_sub_return##name(int i, atomic_t *v) \
register int w0 asm ("w0") = i; { \
register atomic_t *x1 asm ("x1") = v; register int w0 asm ("w0") = i; \
register atomic_t *x1 asm ("x1") = v; \
asm volatile(ARM64_LSE_ATOMIC_INSN( \
/* LL/SC */ asm volatile(ARM64_LSE_ATOMIC_INSN( \
" nop\n" /* LL/SC */ \
__LL_SC_ATOMIC(sub_return) " nop\n" \
" nop", __LL_SC_ATOMIC(sub_return##name) \
/* LSE atomics */ " nop", \
" neg %w[i], %w[i]\n" /* LSE atomics */ \
" ldaddal %w[i], w30, %[v]\n" " neg %w[i], %w[i]\n" \
" add %w[i], %w[i], w30") " ldadd" #mb " %w[i], w30, %[v]\n" \
: [i] "+r" (w0), [v] "+Q" (v->counter) " add %w[i], %w[i], w30") \
: "r" (x1) : [i] "+r" (w0), [v] "+Q" (v->counter) \
: "x30", "memory"); : "r" (x1) \
: "x30" , ##cl); \
return w0; \
return w0; \
} }
ATOMIC_OP_SUB_RETURN(_relaxed, )
ATOMIC_OP_SUB_RETURN(_acquire, a, "memory")
ATOMIC_OP_SUB_RETURN(_release, l, "memory")
ATOMIC_OP_SUB_RETURN( , al, "memory")
#undef ATOMIC_OP_SUB_RETURN
#undef __LL_SC_ATOMIC #undef __LL_SC_ATOMIC
#define __LL_SC_ATOMIC64(op) __LL_SC_CALL(atomic64_##op) #define __LL_SC_ATOMIC64(op) __LL_SC_CALL(atomic64_##op)
...@@ -201,24 +216,32 @@ static inline void atomic64_add(long i, atomic64_t *v) ...@@ -201,24 +216,32 @@ static inline void atomic64_add(long i, atomic64_t *v)
: "x30"); : "x30");
} }
static inline long atomic64_add_return(long i, atomic64_t *v) #define ATOMIC64_OP_ADD_RETURN(name, mb, cl...) \
{ static inline long atomic64_add_return##name(long i, atomic64_t *v) \
register long x0 asm ("x0") = i; { \
register atomic64_t *x1 asm ("x1") = v; register long x0 asm ("x0") = i; \
register atomic64_t *x1 asm ("x1") = v; \
\
asm volatile(ARM64_LSE_ATOMIC_INSN( \
/* LL/SC */ \
" nop\n" \
__LL_SC_ATOMIC64(add_return##name), \
/* LSE atomics */ \
" ldadd" #mb " %[i], x30, %[v]\n" \
" add %[i], %[i], x30") \
: [i] "+r" (x0), [v] "+Q" (v->counter) \
: "r" (x1) \
: "x30" , ##cl); \
\
return x0; \
}
asm volatile(ARM64_LSE_ATOMIC_INSN( ATOMIC64_OP_ADD_RETURN(_relaxed, )
/* LL/SC */ ATOMIC64_OP_ADD_RETURN(_acquire, a, "memory")
" nop\n" ATOMIC64_OP_ADD_RETURN(_release, l, "memory")
__LL_SC_ATOMIC64(add_return), ATOMIC64_OP_ADD_RETURN( , al, "memory")
/* LSE atomics */
" ldaddal %[i], x30, %[v]\n"
" add %[i], %[i], x30")
: [i] "+r" (x0), [v] "+Q" (v->counter)
: "r" (x1)
: "x30", "memory");
return x0; #undef ATOMIC64_OP_ADD_RETURN
}
static inline void atomic64_and(long i, atomic64_t *v) static inline void atomic64_and(long i, atomic64_t *v)
{ {
...@@ -254,26 +277,34 @@ static inline void atomic64_sub(long i, atomic64_t *v) ...@@ -254,26 +277,34 @@ static inline void atomic64_sub(long i, atomic64_t *v)
: "x30"); : "x30");
} }
static inline long atomic64_sub_return(long i, atomic64_t *v) #define ATOMIC64_OP_SUB_RETURN(name, mb, cl...) \
{ static inline long atomic64_sub_return##name(long i, atomic64_t *v) \
register long x0 asm ("x0") = i; { \
register atomic64_t *x1 asm ("x1") = v; register long x0 asm ("x0") = i; \
register atomic64_t *x1 asm ("x1") = v; \
\
asm volatile(ARM64_LSE_ATOMIC_INSN( \
/* LL/SC */ \
" nop\n" \
__LL_SC_ATOMIC64(sub_return##name) \
" nop", \
/* LSE atomics */ \
" neg %[i], %[i]\n" \
" ldadd" #mb " %[i], x30, %[v]\n" \
" add %[i], %[i], x30") \
: [i] "+r" (x0), [v] "+Q" (v->counter) \
: "r" (x1) \
: "x30" , ##cl); \
\
return x0; \
}
asm volatile(ARM64_LSE_ATOMIC_INSN( ATOMIC64_OP_SUB_RETURN(_relaxed, )
/* LL/SC */ ATOMIC64_OP_SUB_RETURN(_acquire, a, "memory")
" nop\n" ATOMIC64_OP_SUB_RETURN(_release, l, "memory")
__LL_SC_ATOMIC64(sub_return) ATOMIC64_OP_SUB_RETURN( , al, "memory")
" nop",
/* LSE atomics */
" neg %[i], %[i]\n"
" ldaddal %[i], x30, %[v]\n"
" add %[i], %[i], x30")
: [i] "+r" (x0), [v] "+Q" (v->counter)
: "r" (x1)
: "x30", "memory");
return x0; #undef ATOMIC64_OP_SUB_RETURN
}
static inline long atomic64_dec_if_positive(atomic64_t *v) static inline long atomic64_dec_if_positive(atomic64_t *v)
{ {
...@@ -337,6 +368,14 @@ __CMPXCHG_CASE(w, b, 1, ) ...@@ -337,6 +368,14 @@ __CMPXCHG_CASE(w, b, 1, )
__CMPXCHG_CASE(w, h, 2, ) __CMPXCHG_CASE(w, h, 2, )
__CMPXCHG_CASE(w, , 4, ) __CMPXCHG_CASE(w, , 4, )
__CMPXCHG_CASE(x, , 8, ) __CMPXCHG_CASE(x, , 8, )
__CMPXCHG_CASE(w, b, acq_1, a, "memory")
__CMPXCHG_CASE(w, h, acq_2, a, "memory")
__CMPXCHG_CASE(w, , acq_4, a, "memory")
__CMPXCHG_CASE(x, , acq_8, a, "memory")
__CMPXCHG_CASE(w, b, rel_1, l, "memory")
__CMPXCHG_CASE(w, h, rel_2, l, "memory")
__CMPXCHG_CASE(w, , rel_4, l, "memory")
__CMPXCHG_CASE(x, , rel_8, l, "memory")
__CMPXCHG_CASE(w, b, mb_1, al, "memory") __CMPXCHG_CASE(w, b, mb_1, al, "memory")
__CMPXCHG_CASE(w, h, mb_2, al, "memory") __CMPXCHG_CASE(w, h, mb_2, al, "memory")
__CMPXCHG_CASE(w, , mb_4, al, "memory") __CMPXCHG_CASE(w, , mb_4, al, "memory")
......
...@@ -18,7 +18,7 @@ ...@@ -18,7 +18,7 @@
#include <asm/cachetype.h> #include <asm/cachetype.h>
#define L1_CACHE_SHIFT 6 #define L1_CACHE_SHIFT 7
#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
/* /*
......
...@@ -115,6 +115,13 @@ extern void copy_to_user_page(struct vm_area_struct *, struct page *, ...@@ -115,6 +115,13 @@ extern void copy_to_user_page(struct vm_area_struct *, struct page *,
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
extern void flush_dcache_page(struct page *); extern void flush_dcache_page(struct page *);
static inline void __local_flush_icache_all(void)
{
asm("ic iallu");
dsb(nsh);
isb();
}
static inline void __flush_icache_all(void) static inline void __flush_icache_all(void)
{ {
asm("ic ialluis"); asm("ic ialluis");
......
...@@ -34,8 +34,8 @@ ...@@ -34,8 +34,8 @@
#define CTR_L1IP(ctr) (((ctr) >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK) #define CTR_L1IP(ctr) (((ctr) >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK)
#define ICACHEF_ALIASING BIT(0) #define ICACHEF_ALIASING 0
#define ICACHEF_AIVIVT BIT(1) #define ICACHEF_AIVIVT 1
extern unsigned long __icache_flags; extern unsigned long __icache_flags;
......
...@@ -25,154 +25,151 @@ ...@@ -25,154 +25,151 @@
#include <asm/barrier.h> #include <asm/barrier.h>
#include <asm/lse.h> #include <asm/lse.h>
static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size) /*
{ * We need separate acquire parameters for ll/sc and lse, since the full
unsigned long ret, tmp; * barrier case is generated as release+dmb for the former and
* acquire+release for the latter.
switch (size) { */
case 1: #define __XCHG_CASE(w, sz, name, mb, nop_lse, acq, acq_lse, rel, cl) \
asm volatile(ARM64_LSE_ATOMIC_INSN( static inline unsigned long __xchg_case_##name(unsigned long x, \
/* LL/SC */ volatile void *ptr) \
" prfm pstl1strm, %2\n" { \
"1: ldxrb %w0, %2\n" unsigned long ret, tmp; \
" stlxrb %w1, %w3, %2\n" \
" cbnz %w1, 1b\n" asm volatile(ARM64_LSE_ATOMIC_INSN( \
" dmb ish", /* LL/SC */ \
/* LSE atomics */ " prfm pstl1strm, %2\n" \
" nop\n" "1: ld" #acq "xr" #sz "\t%" #w "0, %2\n" \
" nop\n" " st" #rel "xr" #sz "\t%w1, %" #w "3, %2\n" \
" swpalb %w3, %w0, %2\n" " cbnz %w1, 1b\n" \
" nop\n" " " #mb, \
" nop") /* LSE atomics */ \
: "=&r" (ret), "=&r" (tmp), "+Q" (*(u8 *)ptr) " nop\n" \
: "r" (x) " nop\n" \
: "memory"); " swp" #acq_lse #rel #sz "\t%" #w "3, %" #w "0, %2\n" \
break; " nop\n" \
case 2: " " #nop_lse) \
asm volatile(ARM64_LSE_ATOMIC_INSN( : "=&r" (ret), "=&r" (tmp), "+Q" (*(u8 *)ptr) \
/* LL/SC */ : "r" (x) \
" prfm pstl1strm, %2\n" : cl); \
"1: ldxrh %w0, %2\n" \
" stlxrh %w1, %w3, %2\n" return ret; \
" cbnz %w1, 1b\n" }
" dmb ish",
/* LSE atomics */ __XCHG_CASE(w, b, 1, , , , , , )
" nop\n" __XCHG_CASE(w, h, 2, , , , , , )
" nop\n" __XCHG_CASE(w, , 4, , , , , , )
" swpalh %w3, %w0, %2\n" __XCHG_CASE( , , 8, , , , , , )
" nop\n" __XCHG_CASE(w, b, acq_1, , , a, a, , "memory")
" nop") __XCHG_CASE(w, h, acq_2, , , a, a, , "memory")
: "=&r" (ret), "=&r" (tmp), "+Q" (*(u16 *)ptr) __XCHG_CASE(w, , acq_4, , , a, a, , "memory")
: "r" (x) __XCHG_CASE( , , acq_8, , , a, a, , "memory")
: "memory"); __XCHG_CASE(w, b, rel_1, , , , , l, "memory")
break; __XCHG_CASE(w, h, rel_2, , , , , l, "memory")
case 4: __XCHG_CASE(w, , rel_4, , , , , l, "memory")
asm volatile(ARM64_LSE_ATOMIC_INSN( __XCHG_CASE( , , rel_8, , , , , l, "memory")
/* LL/SC */ __XCHG_CASE(w, b, mb_1, dmb ish, nop, , a, l, "memory")
" prfm pstl1strm, %2\n" __XCHG_CASE(w, h, mb_2, dmb ish, nop, , a, l, "memory")
"1: ldxr %w0, %2\n" __XCHG_CASE(w, , mb_4, dmb ish, nop, , a, l, "memory")
" stlxr %w1, %w3, %2\n" __XCHG_CASE( , , mb_8, dmb ish, nop, , a, l, "memory")
" cbnz %w1, 1b\n"
" dmb ish", #undef __XCHG_CASE
/* LSE atomics */
" nop\n" #define __XCHG_GEN(sfx) \
" nop\n" static inline unsigned long __xchg##sfx(unsigned long x, \
" swpal %w3, %w0, %2\n" volatile void *ptr, \
" nop\n" int size) \
" nop") { \
: "=&r" (ret), "=&r" (tmp), "+Q" (*(u32 *)ptr) switch (size) { \
: "r" (x) case 1: \
: "memory"); return __xchg_case##sfx##_1(x, ptr); \
break; case 2: \
case 8: return __xchg_case##sfx##_2(x, ptr); \
asm volatile(ARM64_LSE_ATOMIC_INSN( case 4: \
/* LL/SC */ return __xchg_case##sfx##_4(x, ptr); \
" prfm pstl1strm, %2\n" case 8: \
"1: ldxr %0, %2\n" return __xchg_case##sfx##_8(x, ptr); \
" stlxr %w1, %3, %2\n" default: \
" cbnz %w1, 1b\n" BUILD_BUG(); \
" dmb ish", } \
/* LSE atomics */ \
" nop\n" unreachable(); \
" nop\n"
" swpal %3, %0, %2\n"
" nop\n"
" nop")
: "=&r" (ret), "=&r" (tmp), "+Q" (*(u64 *)ptr)
: "r" (x)
: "memory");
break;
default:
BUILD_BUG();
}
return ret;
} }
#define xchg(ptr,x) \ __XCHG_GEN()
__XCHG_GEN(_acq)
__XCHG_GEN(_rel)
__XCHG_GEN(_mb)
#undef __XCHG_GEN
#define __xchg_wrapper(sfx, ptr, x) \
({ \ ({ \
__typeof__(*(ptr)) __ret; \ __typeof__(*(ptr)) __ret; \
__ret = (__typeof__(*(ptr))) \ __ret = (__typeof__(*(ptr))) \
__xchg((unsigned long)(x), (ptr), sizeof(*(ptr))); \ __xchg##sfx((unsigned long)(x), (ptr), sizeof(*(ptr))); \
__ret; \ __ret; \
}) })
static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, /* xchg */
unsigned long new, int size) #define xchg_relaxed(...) __xchg_wrapper( , __VA_ARGS__)
{ #define xchg_acquire(...) __xchg_wrapper(_acq, __VA_ARGS__)
switch (size) { #define xchg_release(...) __xchg_wrapper(_rel, __VA_ARGS__)
case 1: #define xchg(...) __xchg_wrapper( _mb, __VA_ARGS__)
return __cmpxchg_case_1(ptr, (u8)old, new);
case 2: #define __CMPXCHG_GEN(sfx) \
return __cmpxchg_case_2(ptr, (u16)old, new); static inline unsigned long __cmpxchg##sfx(volatile void *ptr, \
case 4: unsigned long old, \
return __cmpxchg_case_4(ptr, old, new); unsigned long new, \
case 8: int size) \
return __cmpxchg_case_8(ptr, old, new); { \
default: switch (size) { \
BUILD_BUG(); case 1: \
} return __cmpxchg_case##sfx##_1(ptr, (u8)old, new); \
case 2: \
unreachable(); return __cmpxchg_case##sfx##_2(ptr, (u16)old, new); \
case 4: \
return __cmpxchg_case##sfx##_4(ptr, old, new); \
case 8: \
return __cmpxchg_case##sfx##_8(ptr, old, new); \
default: \
BUILD_BUG(); \
} \
\
unreachable(); \
} }
static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old, __CMPXCHG_GEN()
unsigned long new, int size) __CMPXCHG_GEN(_acq)
{ __CMPXCHG_GEN(_rel)
switch (size) { __CMPXCHG_GEN(_mb)
case 1:
return __cmpxchg_case_mb_1(ptr, (u8)old, new);
case 2:
return __cmpxchg_case_mb_2(ptr, (u16)old, new);
case 4:
return __cmpxchg_case_mb_4(ptr, old, new);
case 8:
return __cmpxchg_case_mb_8(ptr, old, new);
default:
BUILD_BUG();
}
unreachable();
}
#define cmpxchg(ptr, o, n) \ #undef __CMPXCHG_GEN
({ \
__typeof__(*(ptr)) __ret; \
__ret = (__typeof__(*(ptr))) \
__cmpxchg_mb((ptr), (unsigned long)(o), (unsigned long)(n), \
sizeof(*(ptr))); \
__ret; \
})
#define cmpxchg_local(ptr, o, n) \ #define __cmpxchg_wrapper(sfx, ptr, o, n) \
({ \ ({ \
__typeof__(*(ptr)) __ret; \ __typeof__(*(ptr)) __ret; \
__ret = (__typeof__(*(ptr))) \ __ret = (__typeof__(*(ptr))) \
__cmpxchg((ptr), (unsigned long)(o), \ __cmpxchg##sfx((ptr), (unsigned long)(o), \
(unsigned long)(n), sizeof(*(ptr))); \ (unsigned long)(n), sizeof(*(ptr))); \
__ret; \ __ret; \
}) })
/* cmpxchg */
#define cmpxchg_relaxed(...) __cmpxchg_wrapper( , __VA_ARGS__)
#define cmpxchg_acquire(...) __cmpxchg_wrapper(_acq, __VA_ARGS__)
#define cmpxchg_release(...) __cmpxchg_wrapper(_rel, __VA_ARGS__)
#define cmpxchg(...) __cmpxchg_wrapper( _mb, __VA_ARGS__)
#define cmpxchg_local cmpxchg_relaxed
/* cmpxchg64 */
#define cmpxchg64_relaxed cmpxchg_relaxed
#define cmpxchg64_acquire cmpxchg_acquire
#define cmpxchg64_release cmpxchg_release
#define cmpxchg64 cmpxchg
#define cmpxchg64_local cmpxchg_local
/* cmpxchg_double */
#define system_has_cmpxchg_double() 1 #define system_has_cmpxchg_double() 1
#define __cmpxchg_double_check(ptr1, ptr2) \ #define __cmpxchg_double_check(ptr1, ptr2) \
...@@ -202,6 +199,7 @@ static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old, ...@@ -202,6 +199,7 @@ static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old,
__ret; \ __ret; \
}) })
/* this_cpu_cmpxchg */
#define _protect_cmpxchg_local(pcp, o, n) \ #define _protect_cmpxchg_local(pcp, o, n) \
({ \ ({ \
typeof(*raw_cpu_ptr(&(pcp))) __ret; \ typeof(*raw_cpu_ptr(&(pcp))) __ret; \
...@@ -227,9 +225,4 @@ static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old, ...@@ -227,9 +225,4 @@ static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old,
__ret; \ __ret; \
}) })
#define cmpxchg64(ptr,o,n) cmpxchg((ptr),(o),(n))
#define cmpxchg64_local(ptr,o,n) cmpxchg_local((ptr),(o),(n))
#define cmpxchg64_relaxed(ptr,o,n) cmpxchg_local((ptr),(o),(n))
#endif /* __ASM_CMPXCHG_H */ #endif /* __ASM_CMPXCHG_H */
...@@ -63,4 +63,8 @@ DECLARE_PER_CPU(struct cpuinfo_arm64, cpu_data); ...@@ -63,4 +63,8 @@ DECLARE_PER_CPU(struct cpuinfo_arm64, cpu_data);
void cpuinfo_store_cpu(void); void cpuinfo_store_cpu(void);
void __init cpuinfo_store_boot_cpu(void); void __init cpuinfo_store_boot_cpu(void);
void __init init_cpu_features(struct cpuinfo_arm64 *info);
void update_cpu_features(int cpu, struct cpuinfo_arm64 *info,
struct cpuinfo_arm64 *boot);
#endif /* __ASM_CPU_H */ #endif /* __ASM_CPU_H */
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
#define __ASM_CPUFEATURE_H #define __ASM_CPUFEATURE_H
#include <asm/hwcap.h> #include <asm/hwcap.h>
#include <asm/sysreg.h>
/* /*
* In the arm64 world (as in the ARM world), elf_hwcap is used both internally * In the arm64 world (as in the ARM world), elf_hwcap is used both internally
...@@ -35,11 +36,42 @@ ...@@ -35,11 +36,42 @@
#include <linux/kernel.h> #include <linux/kernel.h>
/* CPU feature register tracking */
enum ftr_type {
FTR_EXACT, /* Use a predefined safe value */
FTR_LOWER_SAFE, /* Smaller value is safe */
FTR_HIGHER_SAFE,/* Bigger value is safe */
};
#define FTR_STRICT true /* SANITY check strict matching required */
#define FTR_NONSTRICT false /* SANITY check ignored */
struct arm64_ftr_bits {
bool strict; /* CPU Sanity check: strict matching required ? */
enum ftr_type type;
u8 shift;
u8 width;
s64 safe_val; /* safe value for discrete features */
};
/*
* @arm64_ftr_reg - Feature register
* @strict_mask Bits which should match across all CPUs for sanity.
* @sys_val Safe value across the CPUs (system view)
*/
struct arm64_ftr_reg {
u32 sys_id;
const char *name;
u64 strict_mask;
u64 sys_val;
struct arm64_ftr_bits *ftr_bits;
};
struct arm64_cpu_capabilities { struct arm64_cpu_capabilities {
const char *desc; const char *desc;
u16 capability; u16 capability;
bool (*matches)(const struct arm64_cpu_capabilities *); bool (*matches)(const struct arm64_cpu_capabilities *);
void (*enable)(void); void (*enable)(void *); /* Called on all active CPUs */
union { union {
struct { /* To be used for erratum handling only */ struct { /* To be used for erratum handling only */
u32 midr_model; u32 midr_model;
...@@ -47,8 +79,11 @@ struct arm64_cpu_capabilities { ...@@ -47,8 +79,11 @@ struct arm64_cpu_capabilities {
}; };
struct { /* Feature register checking */ struct { /* Feature register checking */
u32 sys_reg;
int field_pos; int field_pos;
int min_field_value; int min_field_value;
int hwcap_type;
unsigned long hwcap;
}; };
}; };
}; };
...@@ -76,19 +111,59 @@ static inline void cpus_set_cap(unsigned int num) ...@@ -76,19 +111,59 @@ static inline void cpus_set_cap(unsigned int num)
__set_bit(num, cpu_hwcaps); __set_bit(num, cpu_hwcaps);
} }
static inline int __attribute_const__ cpuid_feature_extract_field(u64 features, static inline int __attribute_const__
int field) cpuid_feature_extract_field_width(u64 features, int field, int width)
{
return (s64)(features << (64 - width - field)) >> (64 - width);
}
static inline int __attribute_const__
cpuid_feature_extract_field(u64 features, int field)
{
return cpuid_feature_extract_field_width(features, field, 4);
}
static inline u64 arm64_ftr_mask(struct arm64_ftr_bits *ftrp)
{
return (u64)GENMASK(ftrp->shift + ftrp->width - 1, ftrp->shift);
}
static inline s64 arm64_ftr_value(struct arm64_ftr_bits *ftrp, u64 val)
{
return cpuid_feature_extract_field_width(val, ftrp->shift, ftrp->width);
}
static inline bool id_aa64mmfr0_mixed_endian_el0(u64 mmfr0)
{ {
return (s64)(features << (64 - 4 - field)) >> (64 - 4); return cpuid_feature_extract_field(mmfr0, ID_AA64MMFR0_BIGENDEL_SHIFT) == 0x1 ||
cpuid_feature_extract_field(mmfr0, ID_AA64MMFR0_BIGENDEL0_SHIFT) == 0x1;
} }
void __init setup_cpu_features(void);
void check_cpu_capabilities(const struct arm64_cpu_capabilities *caps, void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
const char *info); const char *info);
void check_local_cpu_errata(void); void check_local_cpu_errata(void);
void check_local_cpu_features(void);
bool cpu_supports_mixed_endian_el0(void); #ifdef CONFIG_HOTPLUG_CPU
bool system_supports_mixed_endian_el0(void); void verify_local_cpu_capabilities(void);
#else
static inline void verify_local_cpu_capabilities(void)
{
}
#endif
u64 read_system_reg(u32 id);
static inline bool cpu_supports_mixed_endian_el0(void)
{
return id_aa64mmfr0_mixed_endian_el0(read_cpuid(ID_AA64MMFR0_EL1));
}
static inline bool system_supports_mixed_endian_el0(void)
{
return id_aa64mmfr0_mixed_endian_el0(read_system_reg(SYS_ID_AA64MMFR0_EL1));
}
#endif /* __ASSEMBLY__ */ #endif /* __ASSEMBLY__ */
......
...@@ -75,15 +75,6 @@ ...@@ -75,15 +75,6 @@
#define CAVIUM_CPU_PART_THUNDERX 0x0A1 #define CAVIUM_CPU_PART_THUNDERX 0x0A1
#define ID_AA64MMFR0_BIGENDEL0_SHIFT 16
#define ID_AA64MMFR0_BIGENDEL0_MASK (0xf << ID_AA64MMFR0_BIGENDEL0_SHIFT)
#define ID_AA64MMFR0_BIGENDEL0(mmfr0) \
(((mmfr0) & ID_AA64MMFR0_BIGENDEL0_MASK) >> ID_AA64MMFR0_BIGENDEL0_SHIFT)
#define ID_AA64MMFR0_BIGEND_SHIFT 8
#define ID_AA64MMFR0_BIGEND_MASK (0xf << ID_AA64MMFR0_BIGEND_SHIFT)
#define ID_AA64MMFR0_BIGEND(mmfr0) \
(((mmfr0) & ID_AA64MMFR0_BIGEND_MASK) >> ID_AA64MMFR0_BIGEND_SHIFT)
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
/* /*
...@@ -115,12 +106,6 @@ static inline u32 __attribute_const__ read_cpuid_cachetype(void) ...@@ -115,12 +106,6 @@ static inline u32 __attribute_const__ read_cpuid_cachetype(void)
{ {
return read_cpuid(CTR_EL0); return read_cpuid(CTR_EL0);
} }
static inline bool id_aa64mmfr0_mixed_endian_el0(u64 mmfr0)
{
return (ID_AA64MMFR0_BIGEND(mmfr0) == 0x1) ||
(ID_AA64MMFR0_BIGENDEL0(mmfr0) == 0x1);
}
#endif /* __ASSEMBLY__ */ #endif /* __ASSEMBLY__ */
#endif #endif
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/sizes.h>
#include <asm/boot.h> #include <asm/boot.h>
#include <asm/page.h> #include <asm/page.h>
...@@ -55,11 +56,7 @@ enum fixed_addresses { ...@@ -55,11 +56,7 @@ enum fixed_addresses {
* Temporary boot-time mappings, used by early_ioremap(), * Temporary boot-time mappings, used by early_ioremap(),
* before ioremap() is functional. * before ioremap() is functional.
*/ */
#ifdef CONFIG_ARM64_64K_PAGES #define NR_FIX_BTMAPS (SZ_256K / PAGE_SIZE)
#define NR_FIX_BTMAPS 4
#else
#define NR_FIX_BTMAPS 64
#endif
#define FIX_BTMAPS_SLOTS 7 #define FIX_BTMAPS_SLOTS 7
#define TOTAL_FIX_BTMAPS (NR_FIX_BTMAPS * FIX_BTMAPS_SLOTS) #define TOTAL_FIX_BTMAPS (NR_FIX_BTMAPS * FIX_BTMAPS_SLOTS)
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#define __ASM_HW_BREAKPOINT_H #define __ASM_HW_BREAKPOINT_H
#include <asm/cputype.h> #include <asm/cputype.h>
#include <asm/cpufeature.h>
#ifdef __KERNEL__ #ifdef __KERNEL__
...@@ -137,13 +138,17 @@ extern struct pmu perf_ops_bp; ...@@ -137,13 +138,17 @@ extern struct pmu perf_ops_bp;
/* Determine number of BRP registers available. */ /* Determine number of BRP registers available. */
static inline int get_num_brps(void) static inline int get_num_brps(void)
{ {
return ((read_cpuid(ID_AA64DFR0_EL1) >> 12) & 0xf) + 1; return 1 +
cpuid_feature_extract_field(read_system_reg(SYS_ID_AA64DFR0_EL1),
ID_AA64DFR0_BRPS_SHIFT);
} }
/* Determine number of WRP registers available. */ /* Determine number of WRP registers available. */
static inline int get_num_wrps(void) static inline int get_num_wrps(void)
{ {
return ((read_cpuid(ID_AA64DFR0_EL1) >> 20) & 0xf) + 1; return 1 +
cpuid_feature_extract_field(read_system_reg(SYS_ID_AA64DFR0_EL1),
ID_AA64DFR0_WRPS_SHIFT);
} }
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */
......
...@@ -52,6 +52,14 @@ ...@@ -52,6 +52,14 @@
extern unsigned int compat_elf_hwcap, compat_elf_hwcap2; extern unsigned int compat_elf_hwcap, compat_elf_hwcap2;
#endif #endif
enum {
CAP_HWCAP = 1,
#ifdef CONFIG_COMPAT
CAP_COMPAT_HWCAP,
CAP_COMPAT_HWCAP2,
#endif
};
extern unsigned long elf_hwcap; extern unsigned long elf_hwcap;
#endif #endif
#endif #endif
...@@ -7,7 +7,6 @@ ...@@ -7,7 +7,6 @@
struct pt_regs; struct pt_regs;
extern void migrate_irqs(void);
extern void set_handle_irq(void (*handle_irq)(struct pt_regs *)); extern void set_handle_irq(void (*handle_irq)(struct pt_regs *));
static inline void acpi_irq_init(void) static inline void acpi_irq_init(void)
......
#ifndef __ASM_KASAN_H
#define __ASM_KASAN_H
#ifndef __ASSEMBLY__
#ifdef CONFIG_KASAN
#include <linux/linkage.h>
#include <asm/memory.h>
/*
* KASAN_SHADOW_START: beginning of the kernel virtual addresses.
* KASAN_SHADOW_END: KASAN_SHADOW_START + 1/8 of kernel virtual addresses.
*/
#define KASAN_SHADOW_START (VA_START)
#define KASAN_SHADOW_END (KASAN_SHADOW_START + (1UL << (VA_BITS - 3)))
/*
* This value is used to map an address to the corresponding shadow
* address by the following formula:
* shadow_addr = (address >> 3) + KASAN_SHADOW_OFFSET;
*
* (1 << 61) shadow addresses - [KASAN_SHADOW_OFFSET,KASAN_SHADOW_END]
* cover all 64-bits of virtual addresses. So KASAN_SHADOW_OFFSET
* should satisfy the following equation:
* KASAN_SHADOW_OFFSET = KASAN_SHADOW_END - (1ULL << 61)
*/
#define KASAN_SHADOW_OFFSET (KASAN_SHADOW_END - (1ULL << (64 - 3)))
void kasan_init(void);
asmlinkage void kasan_early_init(void);
#else
static inline void kasan_init(void) { }
#endif
#endif
#endif
/*
* Kernel page table mapping
*
* Copyright (C) 2015 ARM Ltd.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_KERNEL_PGTABLE_H
#define __ASM_KERNEL_PGTABLE_H
/*
* The linear mapping and the start of memory are both 2M aligned (per
* the arm64 booting.txt requirements). Hence we can use section mapping
* with 4K (section size = 2M) but not with 16K (section size = 32M) or
* 64K (section size = 512M).
*/
#ifdef CONFIG_ARM64_4K_PAGES
#define ARM64_SWAPPER_USES_SECTION_MAPS 1
#else
#define ARM64_SWAPPER_USES_SECTION_MAPS 0
#endif
/*
* The idmap and swapper page tables need some space reserved in the kernel
* image. Both require pgd, pud (4 levels only) and pmd tables to (section)
* map the kernel. With the 64K page configuration, swapper and idmap need to
* map to pte level. The swapper also maps the FDT (see __create_page_tables
* for more information). Note that the number of ID map translation levels
* could be increased on the fly if system RAM is out of reach for the default
* VA range, so pages required to map highest possible PA are reserved in all
* cases.
*/
#if ARM64_SWAPPER_USES_SECTION_MAPS
#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS - 1)
#define IDMAP_PGTABLE_LEVELS (ARM64_HW_PGTABLE_LEVELS(PHYS_MASK_SHIFT) - 1)
#else
#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS)
#define IDMAP_PGTABLE_LEVELS (ARM64_HW_PGTABLE_LEVELS(PHYS_MASK_SHIFT))
#endif
#define SWAPPER_DIR_SIZE (SWAPPER_PGTABLE_LEVELS * PAGE_SIZE)
#define IDMAP_DIR_SIZE (IDMAP_PGTABLE_LEVELS * PAGE_SIZE)
/* Initial memory map size */
#if ARM64_SWAPPER_USES_SECTION_MAPS
#define SWAPPER_BLOCK_SHIFT SECTION_SHIFT
#define SWAPPER_BLOCK_SIZE SECTION_SIZE
#define SWAPPER_TABLE_SHIFT PUD_SHIFT
#else
#define SWAPPER_BLOCK_SHIFT PAGE_SHIFT
#define SWAPPER_BLOCK_SIZE PAGE_SIZE
#define SWAPPER_TABLE_SHIFT PMD_SHIFT
#endif
/* The size of the initial kernel direct mapping */
#define SWAPPER_INIT_MAP_SIZE (_AC(1, UL) << SWAPPER_TABLE_SHIFT)
/*
* Initial memory map attributes.
*/
#define SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
#define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
#if ARM64_SWAPPER_USES_SECTION_MAPS
#define SWAPPER_MM_MMUFLAGS (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS)
#else
#define SWAPPER_MM_MMUFLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS)
#endif
#endif /* __ASM_KERNEL_PGTABLE_H */
...@@ -42,12 +42,14 @@ ...@@ -42,12 +42,14 @@
* PAGE_OFFSET - the virtual address of the start of the kernel image (top * PAGE_OFFSET - the virtual address of the start of the kernel image (top
* (VA_BITS - 1)) * (VA_BITS - 1))
* VA_BITS - the maximum number of bits for virtual addresses. * VA_BITS - the maximum number of bits for virtual addresses.
* VA_START - the first kernel virtual address.
* TASK_SIZE - the maximum size of a user space task. * TASK_SIZE - the maximum size of a user space task.
* TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area. * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area.
* The module space lives between the addresses given by TASK_SIZE * The module space lives between the addresses given by TASK_SIZE
* and PAGE_OFFSET - it must be within 128MB of the kernel text. * and PAGE_OFFSET - it must be within 128MB of the kernel text.
*/ */
#define VA_BITS (CONFIG_ARM64_VA_BITS) #define VA_BITS (CONFIG_ARM64_VA_BITS)
#define VA_START (UL(0xffffffffffffffff) << VA_BITS)
#define PAGE_OFFSET (UL(0xffffffffffffffff) << (VA_BITS - 1)) #define PAGE_OFFSET (UL(0xffffffffffffffff) << (VA_BITS - 1))
#define MODULES_END (PAGE_OFFSET) #define MODULES_END (PAGE_OFFSET)
#define MODULES_VADDR (MODULES_END - SZ_64M) #define MODULES_VADDR (MODULES_END - SZ_64M)
...@@ -68,10 +70,6 @@ ...@@ -68,10 +70,6 @@
#define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 4)) #define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 4))
#if TASK_SIZE_64 > MODULES_VADDR
#error Top of 64-bit user space clashes with start of module space
#endif
/* /*
* Physical vs virtual RAM address space conversion. These are * Physical vs virtual RAM address space conversion. These are
* private definitions which should NOT be used outside memory.h * private definitions which should NOT be used outside memory.h
......
...@@ -17,15 +17,16 @@ ...@@ -17,15 +17,16 @@
#define __ASM_MMU_H #define __ASM_MMU_H
typedef struct { typedef struct {
unsigned int id; atomic64_t id;
raw_spinlock_t id_lock;
void *vdso; void *vdso;
} mm_context_t; } mm_context_t;
#define INIT_MM_CONTEXT(name) \ /*
.context.id_lock = __RAW_SPIN_LOCK_UNLOCKED(name.context.id_lock), * This macro is only used by the TLBI code, which cannot race with an
* ASID change and therefore doesn't need to reload the counter using
#define ASID(mm) ((mm)->context.id & 0xffff) * atomic64_read.
*/
#define ASID(mm) ((mm)->context.id.counter & 0xffff)
extern void paging_init(void); extern void paging_init(void);
extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt); extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt);
......
...@@ -28,13 +28,6 @@ ...@@ -28,13 +28,6 @@
#include <asm/cputype.h> #include <asm/cputype.h>
#include <asm/pgtable.h> #include <asm/pgtable.h>
#define MAX_ASID_BITS 16
extern unsigned int cpu_last_asid;
void __init_new_context(struct task_struct *tsk, struct mm_struct *mm);
void __new_context(struct mm_struct *mm);
#ifdef CONFIG_PID_IN_CONTEXTIDR #ifdef CONFIG_PID_IN_CONTEXTIDR
static inline void contextidr_thread_switch(struct task_struct *next) static inline void contextidr_thread_switch(struct task_struct *next)
{ {
...@@ -77,96 +70,38 @@ static inline bool __cpu_uses_extended_idmap(void) ...@@ -77,96 +70,38 @@ static inline bool __cpu_uses_extended_idmap(void)
unlikely(idmap_t0sz != TCR_T0SZ(VA_BITS))); unlikely(idmap_t0sz != TCR_T0SZ(VA_BITS)));
} }
static inline void __cpu_set_tcr_t0sz(u64 t0sz) /*
* Set TCR.T0SZ to its default value (based on VA_BITS)
*/
static inline void cpu_set_default_tcr_t0sz(void)
{ {
unsigned long tcr; unsigned long tcr;
if (__cpu_uses_extended_idmap()) if (!__cpu_uses_extended_idmap())
return;
asm volatile ( asm volatile (
" mrs %0, tcr_el1 ;" " mrs %0, tcr_el1 ;"
" bfi %0, %1, %2, %3 ;" " bfi %0, %1, %2, %3 ;"
" msr tcr_el1, %0 ;" " msr tcr_el1, %0 ;"
" isb" " isb"
: "=&r" (tcr) : "=&r" (tcr)
: "r"(t0sz), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH)); : "r"(TCR_T0SZ(VA_BITS)), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH));
} }
/* /*
* Set TCR.T0SZ to the value appropriate for activating the identity map. * It would be nice to return ASIDs back to the allocator, but unfortunately
* that introduces a race with a generation rollover where we could erroneously
* free an ASID allocated in a future generation. We could workaround this by
* freeing the ASID from the context of the dying mm (e.g. in arch_exit_mmap),
* but we'd then need to make sure that we didn't dirty any TLBs afterwards.
* Setting a reserved TTBR0 or EPD0 would work, but it all gets ugly when you
* take CPU migration into account.
*/ */
static inline void cpu_set_idmap_tcr_t0sz(void)
{
__cpu_set_tcr_t0sz(idmap_t0sz);
}
/*
* Set TCR.T0SZ to its default value (based on VA_BITS)
*/
static inline void cpu_set_default_tcr_t0sz(void)
{
__cpu_set_tcr_t0sz(TCR_T0SZ(VA_BITS));
}
static inline void switch_new_context(struct mm_struct *mm)
{
unsigned long flags;
__new_context(mm);
local_irq_save(flags);
cpu_switch_mm(mm->pgd, mm);
local_irq_restore(flags);
}
static inline void check_and_switch_context(struct mm_struct *mm,
struct task_struct *tsk)
{
/*
* Required during context switch to avoid speculative page table
* walking with the wrong TTBR.
*/
cpu_set_reserved_ttbr0();
if (!((mm->context.id ^ cpu_last_asid) >> MAX_ASID_BITS))
/*
* The ASID is from the current generation, just switch to the
* new pgd. This condition is only true for calls from
* context_switch() and interrupts are already disabled.
*/
cpu_switch_mm(mm->pgd, mm);
else if (irqs_disabled())
/*
* Defer the new ASID allocation until after the context
* switch critical region since __new_context() cannot be
* called with interrupts disabled.
*/
set_ti_thread_flag(task_thread_info(tsk), TIF_SWITCH_MM);
else
/*
* That is a direct call to switch_mm() or activate_mm() with
* interrupts enabled and a new context.
*/
switch_new_context(mm);
}
#define init_new_context(tsk,mm) (__init_new_context(tsk,mm),0)
#define destroy_context(mm) do { } while(0) #define destroy_context(mm) do { } while(0)
void check_and_switch_context(struct mm_struct *mm, unsigned int cpu);
#define finish_arch_post_lock_switch \ #define init_new_context(tsk,mm) ({ atomic64_set(&mm->context.id, 0); 0; })
finish_arch_post_lock_switch
static inline void finish_arch_post_lock_switch(void)
{
if (test_and_clear_thread_flag(TIF_SWITCH_MM)) {
struct mm_struct *mm = current->mm;
unsigned long flags;
__new_context(mm);
local_irq_save(flags);
cpu_switch_mm(mm->pgd, mm);
local_irq_restore(flags);
}
}
/* /*
* This is called when "tsk" is about to enter lazy TLB mode. * This is called when "tsk" is about to enter lazy TLB mode.
...@@ -194,6 +129,9 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next, ...@@ -194,6 +129,9 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
{ {
unsigned int cpu = smp_processor_id(); unsigned int cpu = smp_processor_id();
if (prev == next)
return;
/* /*
* init_mm.pgd does not contain any user mappings and it is always * init_mm.pgd does not contain any user mappings and it is always
* active for kernel addresses in TTBR1. Just set the reserved TTBR0. * active for kernel addresses in TTBR1. Just set the reserved TTBR0.
...@@ -203,8 +141,7 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next, ...@@ -203,8 +141,7 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
return; return;
} }
if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next)) || prev != next) check_and_switch_context(next, cpu);
check_and_switch_context(next, tsk);
} }
#define deactivate_mm(tsk,mm) do { } while (0) #define deactivate_mm(tsk,mm) do { } while (0)
......
...@@ -20,31 +20,22 @@ ...@@ -20,31 +20,22 @@
#define __ASM_PAGE_H #define __ASM_PAGE_H
/* PAGE_SHIFT determines the page size */ /* PAGE_SHIFT determines the page size */
/* CONT_SHIFT determines the number of pages which can be tracked together */
#ifdef CONFIG_ARM64_64K_PAGES #ifdef CONFIG_ARM64_64K_PAGES
#define PAGE_SHIFT 16 #define PAGE_SHIFT 16
#define CONT_SHIFT 5
#elif defined(CONFIG_ARM64_16K_PAGES)
#define PAGE_SHIFT 14
#define CONT_SHIFT 7
#else #else
#define PAGE_SHIFT 12 #define PAGE_SHIFT 12
#define CONT_SHIFT 4
#endif #endif
#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) #define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT)
#define PAGE_MASK (~(PAGE_SIZE-1)) #define PAGE_MASK (~(PAGE_SIZE-1))
/* #define CONT_SIZE (_AC(1, UL) << (CONT_SHIFT + PAGE_SHIFT))
* The idmap and swapper page tables need some space reserved in the kernel #define CONT_MASK (~(CONT_SIZE-1))
* image. Both require pgd, pud (4 levels only) and pmd tables to (section)
* map the kernel. With the 64K page configuration, swapper and idmap need to
* map to pte level. The swapper also maps the FDT (see __create_page_tables
* for more information). Note that the number of ID map translation levels
* could be increased on the fly if system RAM is out of reach for the default
* VA range, so 3 pages are reserved in all cases.
*/
#ifdef CONFIG_ARM64_64K_PAGES
#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS)
#else
#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS - 1)
#endif
#define SWAPPER_DIR_SIZE (SWAPPER_PGTABLE_LEVELS * PAGE_SIZE)
#define IDMAP_DIR_SIZE (3 * PAGE_SIZE)
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
......
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#define check_pgt_cache() do { } while (0) #define check_pgt_cache() do { } while (0)
#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO) #define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO)
#define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t))
#if CONFIG_PGTABLE_LEVELS > 2 #if CONFIG_PGTABLE_LEVELS > 2
......
...@@ -16,13 +16,46 @@ ...@@ -16,13 +16,46 @@
#ifndef __ASM_PGTABLE_HWDEF_H #ifndef __ASM_PGTABLE_HWDEF_H
#define __ASM_PGTABLE_HWDEF_H #define __ASM_PGTABLE_HWDEF_H
/*
* Number of page-table levels required to address 'va_bits' wide
* address, without section mapping. We resolve the top (va_bits - PAGE_SHIFT)
* bits with (PAGE_SHIFT - 3) bits at each page table level. Hence:
*
* levels = DIV_ROUND_UP((va_bits - PAGE_SHIFT), (PAGE_SHIFT - 3))
*
* where DIV_ROUND_UP(n, d) => (((n) + (d) - 1) / (d))
*
* We cannot include linux/kernel.h which defines DIV_ROUND_UP here
* due to build issues. So we open code DIV_ROUND_UP here:
*
* ((((va_bits) - PAGE_SHIFT) + (PAGE_SHIFT - 3) - 1) / (PAGE_SHIFT - 3))
*
* which gets simplified as :
*/
#define ARM64_HW_PGTABLE_LEVELS(va_bits) (((va_bits) - 4) / (PAGE_SHIFT - 3))
/*
* Size mapped by an entry at level n ( 0 <= n <= 3)
* We map (PAGE_SHIFT - 3) at all translation levels and PAGE_SHIFT bits
* in the final page. The maximum number of translation levels supported by
* the architecture is 4. Hence, starting at at level n, we have further
* ((4 - n) - 1) levels of translation excluding the offset within the page.
* So, the total number of bits mapped by an entry at level n is :
*
* ((4 - n) - 1) * (PAGE_SHIFT - 3) + PAGE_SHIFT
*
* Rearranging it a bit we get :
* (4 - n) * (PAGE_SHIFT - 3) + 3
*/
#define ARM64_HW_PGTABLE_LEVEL_SHIFT(n) ((PAGE_SHIFT - 3) * (4 - (n)) + 3)
#define PTRS_PER_PTE (1 << (PAGE_SHIFT - 3)) #define PTRS_PER_PTE (1 << (PAGE_SHIFT - 3))
/* /*
* PMD_SHIFT determines the size a level 2 page table entry can map. * PMD_SHIFT determines the size a level 2 page table entry can map.
*/ */
#if CONFIG_PGTABLE_LEVELS > 2 #if CONFIG_PGTABLE_LEVELS > 2
#define PMD_SHIFT ((PAGE_SHIFT - 3) * 2 + 3) #define PMD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(2)
#define PMD_SIZE (_AC(1, UL) << PMD_SHIFT) #define PMD_SIZE (_AC(1, UL) << PMD_SHIFT)
#define PMD_MASK (~(PMD_SIZE-1)) #define PMD_MASK (~(PMD_SIZE-1))
#define PTRS_PER_PMD PTRS_PER_PTE #define PTRS_PER_PMD PTRS_PER_PTE
...@@ -32,7 +65,7 @@ ...@@ -32,7 +65,7 @@
* PUD_SHIFT determines the size a level 1 page table entry can map. * PUD_SHIFT determines the size a level 1 page table entry can map.
*/ */
#if CONFIG_PGTABLE_LEVELS > 3 #if CONFIG_PGTABLE_LEVELS > 3
#define PUD_SHIFT ((PAGE_SHIFT - 3) * 3 + 3) #define PUD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(1)
#define PUD_SIZE (_AC(1, UL) << PUD_SHIFT) #define PUD_SIZE (_AC(1, UL) << PUD_SHIFT)
#define PUD_MASK (~(PUD_SIZE-1)) #define PUD_MASK (~(PUD_SIZE-1))
#define PTRS_PER_PUD PTRS_PER_PTE #define PTRS_PER_PUD PTRS_PER_PTE
...@@ -42,7 +75,7 @@ ...@@ -42,7 +75,7 @@
* PGDIR_SHIFT determines the size a top-level page table entry can map * PGDIR_SHIFT determines the size a top-level page table entry can map
* (depending on the configuration, this level can be 0, 1 or 2). * (depending on the configuration, this level can be 0, 1 or 2).
*/ */
#define PGDIR_SHIFT ((PAGE_SHIFT - 3) * CONFIG_PGTABLE_LEVELS + 3) #define PGDIR_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - CONFIG_PGTABLE_LEVELS)
#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) #define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE-1)) #define PGDIR_MASK (~(PGDIR_SIZE-1))
#define PTRS_PER_PGD (1 << (VA_BITS - PGDIR_SHIFT)) #define PTRS_PER_PGD (1 << (VA_BITS - PGDIR_SHIFT))
...@@ -54,6 +87,13 @@ ...@@ -54,6 +87,13 @@
#define SECTION_SIZE (_AC(1, UL) << SECTION_SHIFT) #define SECTION_SIZE (_AC(1, UL) << SECTION_SHIFT)
#define SECTION_MASK (~(SECTION_SIZE-1)) #define SECTION_MASK (~(SECTION_SIZE-1))
/*
* Contiguous page definitions.
*/
#define CONT_PTES (_AC(1, UL) << CONT_SHIFT)
/* the the numerical offset of the PTE within a range of CONT_PTES */
#define CONT_RANGE_OFFSET(addr) (((addr)>>PAGE_SHIFT)&(CONT_PTES-1))
/* /*
* Hardware page table definitions. * Hardware page table definitions.
* *
...@@ -83,6 +123,7 @@ ...@@ -83,6 +123,7 @@
#define PMD_SECT_S (_AT(pmdval_t, 3) << 8) #define PMD_SECT_S (_AT(pmdval_t, 3) << 8)
#define PMD_SECT_AF (_AT(pmdval_t, 1) << 10) #define PMD_SECT_AF (_AT(pmdval_t, 1) << 10)
#define PMD_SECT_NG (_AT(pmdval_t, 1) << 11) #define PMD_SECT_NG (_AT(pmdval_t, 1) << 11)
#define PMD_SECT_CONT (_AT(pmdval_t, 1) << 52)
#define PMD_SECT_PXN (_AT(pmdval_t, 1) << 53) #define PMD_SECT_PXN (_AT(pmdval_t, 1) << 53)
#define PMD_SECT_UXN (_AT(pmdval_t, 1) << 54) #define PMD_SECT_UXN (_AT(pmdval_t, 1) << 54)
...@@ -105,6 +146,7 @@ ...@@ -105,6 +146,7 @@
#define PTE_AF (_AT(pteval_t, 1) << 10) /* Access Flag */ #define PTE_AF (_AT(pteval_t, 1) << 10) /* Access Flag */
#define PTE_NG (_AT(pteval_t, 1) << 11) /* nG */ #define PTE_NG (_AT(pteval_t, 1) << 11) /* nG */
#define PTE_DBM (_AT(pteval_t, 1) << 51) /* Dirty Bit Management */ #define PTE_DBM (_AT(pteval_t, 1) << 51) /* Dirty Bit Management */
#define PTE_CONT (_AT(pteval_t, 1) << 52) /* Contiguous range */
#define PTE_PXN (_AT(pteval_t, 1) << 53) /* Privileged XN */ #define PTE_PXN (_AT(pteval_t, 1) << 53) /* Privileged XN */
#define PTE_UXN (_AT(pteval_t, 1) << 54) /* User XN */ #define PTE_UXN (_AT(pteval_t, 1) << 54) /* User XN */
......
...@@ -41,7 +41,14 @@ ...@@ -41,7 +41,14 @@
* fixed mappings and modules * fixed mappings and modules
*/ */
#define VMEMMAP_SIZE ALIGN((1UL << (VA_BITS - PAGE_SHIFT)) * sizeof(struct page), PUD_SIZE) #define VMEMMAP_SIZE ALIGN((1UL << (VA_BITS - PAGE_SHIFT)) * sizeof(struct page), PUD_SIZE)
#define VMALLOC_START (UL(0xffffffffffffffff) << VA_BITS)
#ifndef CONFIG_KASAN
#define VMALLOC_START (VA_START)
#else
#include <asm/kasan.h>
#define VMALLOC_START (KASAN_SHADOW_END + SZ_64K)
#endif
#define VMALLOC_END (PAGE_OFFSET - PUD_SIZE - VMEMMAP_SIZE - SZ_64K) #define VMALLOC_END (PAGE_OFFSET - PUD_SIZE - VMEMMAP_SIZE - SZ_64K)
#define vmemmap ((struct page *)(VMALLOC_END + SZ_64K)) #define vmemmap ((struct page *)(VMALLOC_END + SZ_64K))
...@@ -74,6 +81,7 @@ extern void __pgd_error(const char *file, int line, unsigned long val); ...@@ -74,6 +81,7 @@ extern void __pgd_error(const char *file, int line, unsigned long val);
#define PAGE_KERNEL __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE) #define PAGE_KERNEL __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE)
#define PAGE_KERNEL_EXEC __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE) #define PAGE_KERNEL_EXEC __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE)
#define PAGE_KERNEL_EXEC_CONT __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_CONT)
#define PAGE_HYP __pgprot(_PAGE_DEFAULT | PTE_HYP) #define PAGE_HYP __pgprot(_PAGE_DEFAULT | PTE_HYP)
#define PAGE_HYP_DEVICE __pgprot(PROT_DEVICE_nGnRE | PTE_HYP) #define PAGE_HYP_DEVICE __pgprot(PROT_DEVICE_nGnRE | PTE_HYP)
...@@ -142,6 +150,7 @@ extern struct page *empty_zero_page; ...@@ -142,6 +150,7 @@ extern struct page *empty_zero_page;
#define pte_special(pte) (!!(pte_val(pte) & PTE_SPECIAL)) #define pte_special(pte) (!!(pte_val(pte) & PTE_SPECIAL))
#define pte_write(pte) (!!(pte_val(pte) & PTE_WRITE)) #define pte_write(pte) (!!(pte_val(pte) & PTE_WRITE))
#define pte_exec(pte) (!(pte_val(pte) & PTE_UXN)) #define pte_exec(pte) (!(pte_val(pte) & PTE_UXN))
#define pte_cont(pte) (!!(pte_val(pte) & PTE_CONT))
#ifdef CONFIG_ARM64_HW_AFDBM #ifdef CONFIG_ARM64_HW_AFDBM
#define pte_hw_dirty(pte) (pte_write(pte) && !(pte_val(pte) & PTE_RDONLY)) #define pte_hw_dirty(pte) (pte_write(pte) && !(pte_val(pte) & PTE_RDONLY))
...@@ -204,6 +213,16 @@ static inline pte_t pte_mkspecial(pte_t pte) ...@@ -204,6 +213,16 @@ static inline pte_t pte_mkspecial(pte_t pte)
return set_pte_bit(pte, __pgprot(PTE_SPECIAL)); return set_pte_bit(pte, __pgprot(PTE_SPECIAL));
} }
static inline pte_t pte_mkcont(pte_t pte)
{
return set_pte_bit(pte, __pgprot(PTE_CONT));
}
static inline pte_t pte_mknoncont(pte_t pte)
{
return clear_pte_bit(pte, __pgprot(PTE_CONT));
}
static inline void set_pte(pte_t *ptep, pte_t pte) static inline void set_pte(pte_t *ptep, pte_t pte)
{ {
*ptep = pte; *ptep = pte;
...@@ -648,14 +667,17 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, ...@@ -648,14 +667,17 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep) unsigned long addr, pte_t *ptep)
{ {
/* /*
* set_pte() does not have a DSB for user mappings, so make sure that * We don't do anything here, so there's a very small chance of
* the page table write is visible. * us retaking a user fault which we just fixed up. The alternative
* is doing a dsb(ishst), but that penalises the fastpath.
*/ */
dsb(ishst);
} }
#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0) #define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
#define kc_vaddr_to_offset(v) ((v) & ~VA_START)
#define kc_offset_to_vaddr(o) ((o) | VA_START)
#endif /* !__ASSEMBLY__ */ #endif /* !__ASSEMBLY__ */
#endif /* __ASM_PGTABLE_H */ #endif /* __ASM_PGTABLE_H */
/*
* Based on arch/arm/include/asm/pmu.h
*
* Copyright (C) 2009 picoChip Designs Ltd, Jamie Iles
* Copyright (C) 2012 ARM Ltd.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_PMU_H
#define __ASM_PMU_H
#ifdef CONFIG_HW_PERF_EVENTS
/* The events for a given PMU register set. */
struct pmu_hw_events {
/*
* The events that are active on the PMU for the given index.
*/
struct perf_event **events;
/*
* A 1 bit for an index indicates that the counter is being used for
* an event. A 0 means that the counter can be used.
*/
unsigned long *used_mask;
/*
* Hardware lock to serialize accesses to PMU registers. Needed for the
* read/modify/write sequences.
*/
raw_spinlock_t pmu_lock;
};
struct arm_pmu {
struct pmu pmu;
cpumask_t active_irqs;
int *irq_affinity;
const char *name;
irqreturn_t (*handle_irq)(int irq_num, void *dev);
void (*enable)(struct hw_perf_event *evt, int idx);
void (*disable)(struct hw_perf_event *evt, int idx);
int (*get_event_idx)(struct pmu_hw_events *hw_events,
struct hw_perf_event *hwc);
int (*set_event_filter)(struct hw_perf_event *evt,
struct perf_event_attr *attr);
u32 (*read_counter)(int idx);
void (*write_counter)(int idx, u32 val);
void (*start)(void);
void (*stop)(void);
void (*reset)(void *);
int (*map_event)(struct perf_event *event);
int num_events;
atomic_t active_events;
struct mutex reserve_mutex;
u64 max_period;
struct platform_device *plat_device;
struct pmu_hw_events *(*get_hw_events)(void);
};
#define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu))
int __init armpmu_register(struct arm_pmu *armpmu, char *name, int type);
u64 armpmu_event_update(struct perf_event *event,
struct hw_perf_event *hwc,
int idx);
int armpmu_event_set_period(struct perf_event *event,
struct hw_perf_event *hwc,
int idx);
#endif /* CONFIG_HW_PERF_EVENTS */
#endif /* __ASM_PMU_H */
...@@ -186,6 +186,6 @@ static inline void spin_lock_prefetch(const void *x) ...@@ -186,6 +186,6 @@ static inline void spin_lock_prefetch(const void *x)
#endif #endif
void cpu_enable_pan(void); void cpu_enable_pan(void *__unused);
#endif /* __ASM_PROCESSOR_H */ #endif /* __ASM_PROCESSOR_H */
...@@ -83,14 +83,14 @@ ...@@ -83,14 +83,14 @@
#define compat_sp regs[13] #define compat_sp regs[13]
#define compat_lr regs[14] #define compat_lr regs[14]
#define compat_sp_hyp regs[15] #define compat_sp_hyp regs[15]
#define compat_sp_irq regs[16] #define compat_lr_irq regs[16]
#define compat_lr_irq regs[17] #define compat_sp_irq regs[17]
#define compat_sp_svc regs[18] #define compat_lr_svc regs[18]
#define compat_lr_svc regs[19] #define compat_sp_svc regs[19]
#define compat_sp_abt regs[20] #define compat_lr_abt regs[20]
#define compat_lr_abt regs[21] #define compat_sp_abt regs[21]
#define compat_sp_und regs[22] #define compat_lr_und regs[22]
#define compat_lr_und regs[23] #define compat_sp_und regs[23]
#define compat_r8_fiq regs[24] #define compat_r8_fiq regs[24]
#define compat_r9_fiq regs[25] #define compat_r9_fiq regs[25]
#define compat_r10_fiq regs[26] #define compat_r10_fiq regs[26]
......
...@@ -36,17 +36,33 @@ extern __kernel_size_t strnlen(const char *, __kernel_size_t); ...@@ -36,17 +36,33 @@ extern __kernel_size_t strnlen(const char *, __kernel_size_t);
#define __HAVE_ARCH_MEMCPY #define __HAVE_ARCH_MEMCPY
extern void *memcpy(void *, const void *, __kernel_size_t); extern void *memcpy(void *, const void *, __kernel_size_t);
extern void *__memcpy(void *, const void *, __kernel_size_t);
#define __HAVE_ARCH_MEMMOVE #define __HAVE_ARCH_MEMMOVE
extern void *memmove(void *, const void *, __kernel_size_t); extern void *memmove(void *, const void *, __kernel_size_t);
extern void *__memmove(void *, const void *, __kernel_size_t);
#define __HAVE_ARCH_MEMCHR #define __HAVE_ARCH_MEMCHR
extern void *memchr(const void *, int, __kernel_size_t); extern void *memchr(const void *, int, __kernel_size_t);
#define __HAVE_ARCH_MEMSET #define __HAVE_ARCH_MEMSET
extern void *memset(void *, int, __kernel_size_t); extern void *memset(void *, int, __kernel_size_t);
extern void *__memset(void *, int, __kernel_size_t);
#define __HAVE_ARCH_MEMCMP #define __HAVE_ARCH_MEMCMP
extern int memcmp(const void *, const void *, size_t); extern int memcmp(const void *, const void *, size_t);
#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)
/*
* For files that are not instrumented (e.g. mm/slub.c) we
* should use not instrumented version of mem* functions.
*/
#define memcpy(dst, src, len) __memcpy(dst, src, len)
#define memmove(dst, src, len) __memmove(dst, src, len)
#define memset(s, c, n) __memset(s, c, n)
#endif
#endif #endif
...@@ -22,9 +22,6 @@ ...@@ -22,9 +22,6 @@
#include <asm/opcodes.h> #include <asm/opcodes.h>
#define SCTLR_EL1_CP15BEN (0x1 << 5)
#define SCTLR_EL1_SED (0x1 << 8)
/* /*
* ARMv8 ARM reserves the following encoding for system registers: * ARMv8 ARM reserves the following encoding for system registers:
* (Ref: ARMv8 ARM, Section: "System instruction class encoding overview", * (Ref: ARMv8 ARM, Section: "System instruction class encoding overview",
...@@ -38,12 +35,162 @@ ...@@ -38,12 +35,162 @@
#define sys_reg(op0, op1, crn, crm, op2) \ #define sys_reg(op0, op1, crn, crm, op2) \
((((op0)&3)<<19)|((op1)<<16)|((crn)<<12)|((crm)<<8)|((op2)<<5)) ((((op0)&3)<<19)|((op1)<<16)|((crn)<<12)|((crm)<<8)|((op2)<<5))
#define SYS_MIDR_EL1 sys_reg(3, 0, 0, 0, 0)
#define SYS_MPIDR_EL1 sys_reg(3, 0, 0, 0, 5)
#define SYS_REVIDR_EL1 sys_reg(3, 0, 0, 0, 6)
#define SYS_ID_PFR0_EL1 sys_reg(3, 0, 0, 1, 0)
#define SYS_ID_PFR1_EL1 sys_reg(3, 0, 0, 1, 1)
#define SYS_ID_DFR0_EL1 sys_reg(3, 0, 0, 1, 2)
#define SYS_ID_MMFR0_EL1 sys_reg(3, 0, 0, 1, 4)
#define SYS_ID_MMFR1_EL1 sys_reg(3, 0, 0, 1, 5)
#define SYS_ID_MMFR2_EL1 sys_reg(3, 0, 0, 1, 6)
#define SYS_ID_MMFR3_EL1 sys_reg(3, 0, 0, 1, 7)
#define SYS_ID_ISAR0_EL1 sys_reg(3, 0, 0, 2, 0)
#define SYS_ID_ISAR1_EL1 sys_reg(3, 0, 0, 2, 1)
#define SYS_ID_ISAR2_EL1 sys_reg(3, 0, 0, 2, 2)
#define SYS_ID_ISAR3_EL1 sys_reg(3, 0, 0, 2, 3)
#define SYS_ID_ISAR4_EL1 sys_reg(3, 0, 0, 2, 4)
#define SYS_ID_ISAR5_EL1 sys_reg(3, 0, 0, 2, 5)
#define SYS_ID_MMFR4_EL1 sys_reg(3, 0, 0, 2, 6)
#define SYS_MVFR0_EL1 sys_reg(3, 0, 0, 3, 0)
#define SYS_MVFR1_EL1 sys_reg(3, 0, 0, 3, 1)
#define SYS_MVFR2_EL1 sys_reg(3, 0, 0, 3, 2)
#define SYS_ID_AA64PFR0_EL1 sys_reg(3, 0, 0, 4, 0)
#define SYS_ID_AA64PFR1_EL1 sys_reg(3, 0, 0, 4, 1)
#define SYS_ID_AA64DFR0_EL1 sys_reg(3, 0, 0, 5, 0)
#define SYS_ID_AA64DFR1_EL1 sys_reg(3, 0, 0, 5, 1)
#define SYS_ID_AA64ISAR0_EL1 sys_reg(3, 0, 0, 6, 0)
#define SYS_ID_AA64ISAR1_EL1 sys_reg(3, 0, 0, 6, 1)
#define SYS_ID_AA64MMFR0_EL1 sys_reg(3, 0, 0, 7, 0)
#define SYS_ID_AA64MMFR1_EL1 sys_reg(3, 0, 0, 7, 1)
#define SYS_CNTFRQ_EL0 sys_reg(3, 3, 14, 0, 0)
#define SYS_CTR_EL0 sys_reg(3, 3, 0, 0, 1)
#define SYS_DCZID_EL0 sys_reg(3, 3, 0, 0, 7)
#define REG_PSTATE_PAN_IMM sys_reg(0, 0, 4, 0, 4) #define REG_PSTATE_PAN_IMM sys_reg(0, 0, 4, 0, 4)
#define SCTLR_EL1_SPAN (1 << 23)
#define SET_PSTATE_PAN(x) __inst_arm(0xd5000000 | REG_PSTATE_PAN_IMM |\ #define SET_PSTATE_PAN(x) __inst_arm(0xd5000000 | REG_PSTATE_PAN_IMM |\
(!!x)<<8 | 0x1f) (!!x)<<8 | 0x1f)
/* SCTLR_EL1 */
#define SCTLR_EL1_CP15BEN (0x1 << 5)
#define SCTLR_EL1_SED (0x1 << 8)
#define SCTLR_EL1_SPAN (0x1 << 23)
/* id_aa64isar0 */
#define ID_AA64ISAR0_RDM_SHIFT 28
#define ID_AA64ISAR0_ATOMICS_SHIFT 20
#define ID_AA64ISAR0_CRC32_SHIFT 16
#define ID_AA64ISAR0_SHA2_SHIFT 12
#define ID_AA64ISAR0_SHA1_SHIFT 8
#define ID_AA64ISAR0_AES_SHIFT 4
/* id_aa64pfr0 */
#define ID_AA64PFR0_GIC_SHIFT 24
#define ID_AA64PFR0_ASIMD_SHIFT 20
#define ID_AA64PFR0_FP_SHIFT 16
#define ID_AA64PFR0_EL3_SHIFT 12
#define ID_AA64PFR0_EL2_SHIFT 8
#define ID_AA64PFR0_EL1_SHIFT 4
#define ID_AA64PFR0_EL0_SHIFT 0
#define ID_AA64PFR0_FP_NI 0xf
#define ID_AA64PFR0_FP_SUPPORTED 0x0
#define ID_AA64PFR0_ASIMD_NI 0xf
#define ID_AA64PFR0_ASIMD_SUPPORTED 0x0
#define ID_AA64PFR0_EL1_64BIT_ONLY 0x1
#define ID_AA64PFR0_EL0_64BIT_ONLY 0x1
/* id_aa64mmfr0 */
#define ID_AA64MMFR0_TGRAN4_SHIFT 28
#define ID_AA64MMFR0_TGRAN64_SHIFT 24
#define ID_AA64MMFR0_TGRAN16_SHIFT 20
#define ID_AA64MMFR0_BIGENDEL0_SHIFT 16
#define ID_AA64MMFR0_SNSMEM_SHIFT 12
#define ID_AA64MMFR0_BIGENDEL_SHIFT 8
#define ID_AA64MMFR0_ASID_SHIFT 4
#define ID_AA64MMFR0_PARANGE_SHIFT 0
#define ID_AA64MMFR0_TGRAN4_NI 0xf
#define ID_AA64MMFR0_TGRAN4_SUPPORTED 0x0
#define ID_AA64MMFR0_TGRAN64_NI 0xf
#define ID_AA64MMFR0_TGRAN64_SUPPORTED 0x0
#define ID_AA64MMFR0_TGRAN16_NI 0x0
#define ID_AA64MMFR0_TGRAN16_SUPPORTED 0x1
/* id_aa64mmfr1 */
#define ID_AA64MMFR1_PAN_SHIFT 20
#define ID_AA64MMFR1_LOR_SHIFT 16
#define ID_AA64MMFR1_HPD_SHIFT 12
#define ID_AA64MMFR1_VHE_SHIFT 8
#define ID_AA64MMFR1_VMIDBITS_SHIFT 4
#define ID_AA64MMFR1_HADBS_SHIFT 0
/* id_aa64dfr0 */
#define ID_AA64DFR0_CTX_CMPS_SHIFT 28
#define ID_AA64DFR0_WRPS_SHIFT 20
#define ID_AA64DFR0_BRPS_SHIFT 12
#define ID_AA64DFR0_PMUVER_SHIFT 8
#define ID_AA64DFR0_TRACEVER_SHIFT 4
#define ID_AA64DFR0_DEBUGVER_SHIFT 0
#define ID_ISAR5_RDM_SHIFT 24
#define ID_ISAR5_CRC32_SHIFT 16
#define ID_ISAR5_SHA2_SHIFT 12
#define ID_ISAR5_SHA1_SHIFT 8
#define ID_ISAR5_AES_SHIFT 4
#define ID_ISAR5_SEVL_SHIFT 0
#define MVFR0_FPROUND_SHIFT 28
#define MVFR0_FPSHVEC_SHIFT 24
#define MVFR0_FPSQRT_SHIFT 20
#define MVFR0_FPDIVIDE_SHIFT 16
#define MVFR0_FPTRAP_SHIFT 12
#define MVFR0_FPDP_SHIFT 8
#define MVFR0_FPSP_SHIFT 4
#define MVFR0_SIMD_SHIFT 0
#define MVFR1_SIMDFMAC_SHIFT 28
#define MVFR1_FPHP_SHIFT 24
#define MVFR1_SIMDHP_SHIFT 20
#define MVFR1_SIMDSP_SHIFT 16
#define MVFR1_SIMDINT_SHIFT 12
#define MVFR1_SIMDLS_SHIFT 8
#define MVFR1_FPDNAN_SHIFT 4
#define MVFR1_FPFTZ_SHIFT 0
#define ID_AA64MMFR0_TGRAN4_SHIFT 28
#define ID_AA64MMFR0_TGRAN64_SHIFT 24
#define ID_AA64MMFR0_TGRAN16_SHIFT 20
#define ID_AA64MMFR0_TGRAN4_NI 0xf
#define ID_AA64MMFR0_TGRAN4_SUPPORTED 0x0
#define ID_AA64MMFR0_TGRAN64_NI 0xf
#define ID_AA64MMFR0_TGRAN64_SUPPORTED 0x0
#define ID_AA64MMFR0_TGRAN16_NI 0x0
#define ID_AA64MMFR0_TGRAN16_SUPPORTED 0x1
#if defined(CONFIG_ARM64_4K_PAGES)
#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN4_SHIFT
#define ID_AA64MMFR0_TGRAN_SUPPORTED ID_AA64MMFR0_TGRAN4_SUPPORTED
#elif defined(CONFIG_ARM64_16K_PAGES)
#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN16_SHIFT
#define ID_AA64MMFR0_TGRAN_SUPPORTED ID_AA64MMFR0_TGRAN16_SUPPORTED
#elif defined(CONFIG_ARM64_64K_PAGES)
#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN64_SHIFT
#define ID_AA64MMFR0_TGRAN_SUPPORTED ID_AA64MMFR0_TGRAN64_SUPPORTED
#endif
#ifdef __ASSEMBLY__ #ifdef __ASSEMBLY__
.irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30 .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30
......
...@@ -23,8 +23,10 @@ ...@@ -23,8 +23,10 @@
#include <linux/compiler.h> #include <linux/compiler.h>
#ifndef CONFIG_ARM64_64K_PAGES #ifdef CONFIG_ARM64_4K_PAGES
#define THREAD_SIZE_ORDER 2 #define THREAD_SIZE_ORDER 2
#elif defined(CONFIG_ARM64_16K_PAGES)
#define THREAD_SIZE_ORDER 0
#endif #endif
#define THREAD_SIZE 16384 #define THREAD_SIZE 16384
...@@ -111,7 +113,6 @@ static inline struct thread_info *current_thread_info(void) ...@@ -111,7 +113,6 @@ static inline struct thread_info *current_thread_info(void)
#define TIF_RESTORE_SIGMASK 20 #define TIF_RESTORE_SIGMASK 20
#define TIF_SINGLESTEP 21 #define TIF_SINGLESTEP 21
#define TIF_32BIT 22 /* 32bit process */ #define TIF_32BIT 22 /* 32bit process */
#define TIF_SWITCH_MM 23 /* deferred switch_mm */
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
......
...@@ -37,17 +37,21 @@ static inline void __tlb_remove_table(void *_table) ...@@ -37,17 +37,21 @@ static inline void __tlb_remove_table(void *_table)
static inline void tlb_flush(struct mmu_gather *tlb) static inline void tlb_flush(struct mmu_gather *tlb)
{ {
if (tlb->fullmm) {
flush_tlb_mm(tlb->mm);
} else {
struct vm_area_struct vma = { .vm_mm = tlb->mm, }; struct vm_area_struct vma = { .vm_mm = tlb->mm, };
/*
* The ASID allocator will either invalidate the ASID or mark
* it as used.
*/
if (tlb->fullmm)
return;
/* /*
* The intermediate page table levels are already handled by * The intermediate page table levels are already handled by
* the __(pte|pmd|pud)_free_tlb() functions, so last level * the __(pte|pmd|pud)_free_tlb() functions, so last level
* TLBI is sufficient here. * TLBI is sufficient here.
*/ */
__flush_tlb_range(&vma, tlb->start, tlb->end, true); __flush_tlb_range(&vma, tlb->start, tlb->end, true);
}
} }
static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
......
...@@ -63,6 +63,14 @@ ...@@ -63,6 +63,14 @@
* only require the D-TLB to be invalidated. * only require the D-TLB to be invalidated.
* - kaddr - Kernel virtual memory address * - kaddr - Kernel virtual memory address
*/ */
static inline void local_flush_tlb_all(void)
{
dsb(nshst);
asm("tlbi vmalle1");
dsb(nsh);
isb();
}
static inline void flush_tlb_all(void) static inline void flush_tlb_all(void)
{ {
dsb(ishst); dsb(ishst);
...@@ -73,7 +81,7 @@ static inline void flush_tlb_all(void) ...@@ -73,7 +81,7 @@ static inline void flush_tlb_all(void)
static inline void flush_tlb_mm(struct mm_struct *mm) static inline void flush_tlb_mm(struct mm_struct *mm)
{ {
unsigned long asid = (unsigned long)ASID(mm) << 48; unsigned long asid = ASID(mm) << 48;
dsb(ishst); dsb(ishst);
asm("tlbi aside1is, %0" : : "r" (asid)); asm("tlbi aside1is, %0" : : "r" (asid));
...@@ -83,8 +91,7 @@ static inline void flush_tlb_mm(struct mm_struct *mm) ...@@ -83,8 +91,7 @@ static inline void flush_tlb_mm(struct mm_struct *mm)
static inline void flush_tlb_page(struct vm_area_struct *vma, static inline void flush_tlb_page(struct vm_area_struct *vma,
unsigned long uaddr) unsigned long uaddr)
{ {
unsigned long addr = uaddr >> 12 | unsigned long addr = uaddr >> 12 | (ASID(vma->vm_mm) << 48);
((unsigned long)ASID(vma->vm_mm) << 48);
dsb(ishst); dsb(ishst);
asm("tlbi vale1is, %0" : : "r" (addr)); asm("tlbi vale1is, %0" : : "r" (addr));
...@@ -101,7 +108,7 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma, ...@@ -101,7 +108,7 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end, unsigned long start, unsigned long end,
bool last_level) bool last_level)
{ {
unsigned long asid = (unsigned long)ASID(vma->vm_mm) << 48; unsigned long asid = ASID(vma->vm_mm) << 48;
unsigned long addr; unsigned long addr;
if ((end - start) > MAX_TLB_RANGE) { if ((end - start) > MAX_TLB_RANGE) {
...@@ -154,9 +161,8 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end ...@@ -154,9 +161,8 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end
static inline void __flush_tlb_pgtable(struct mm_struct *mm, static inline void __flush_tlb_pgtable(struct mm_struct *mm,
unsigned long uaddr) unsigned long uaddr)
{ {
unsigned long addr = uaddr >> 12 | ((unsigned long)ASID(mm) << 48); unsigned long addr = uaddr >> 12 | (ASID(mm) << 48);
dsb(ishst);
asm("tlbi vae1is, %0" : : "r" (addr)); asm("tlbi vae1is, %0" : : "r" (addr));
dsb(ish); dsb(ish);
} }
......
...@@ -4,7 +4,6 @@ ...@@ -4,7 +4,6 @@
CPPFLAGS_vmlinux.lds := -DTEXT_OFFSET=$(TEXT_OFFSET) CPPFLAGS_vmlinux.lds := -DTEXT_OFFSET=$(TEXT_OFFSET)
AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET) AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET)
CFLAGS_efi-stub.o := -DTEXT_OFFSET=$(TEXT_OFFSET)
CFLAGS_armv8_deprecated.o := -I$(src) CFLAGS_armv8_deprecated.o := -I$(src)
CFLAGS_REMOVE_ftrace.o = -pg CFLAGS_REMOVE_ftrace.o = -pg
...@@ -20,6 +19,12 @@ arm64-obj-y := debug-monitors.o entry.o irq.o fpsimd.o \ ...@@ -20,6 +19,12 @@ arm64-obj-y := debug-monitors.o entry.o irq.o fpsimd.o \
cpufeature.o alternative.o cacheinfo.o \ cpufeature.o alternative.o cacheinfo.o \
smp.o smp_spin_table.o topology.o smp.o smp_spin_table.o topology.o
extra-$(CONFIG_EFI) := efi-entry.o
OBJCOPYFLAGS := --prefix-symbols=__efistub_
$(obj)/%.stub.o: $(obj)/%.o FORCE
$(call if_changed,objcopy)
arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \ arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \
sys_compat.o entry32.o \ sys_compat.o entry32.o \
../../arm/kernel/opcodes.o ../../arm/kernel/opcodes.o
...@@ -32,7 +37,7 @@ arm64-obj-$(CONFIG_CPU_PM) += sleep.o suspend.o ...@@ -32,7 +37,7 @@ arm64-obj-$(CONFIG_CPU_PM) += sleep.o suspend.o
arm64-obj-$(CONFIG_CPU_IDLE) += cpuidle.o arm64-obj-$(CONFIG_CPU_IDLE) += cpuidle.o
arm64-obj-$(CONFIG_JUMP_LABEL) += jump_label.o arm64-obj-$(CONFIG_JUMP_LABEL) += jump_label.o
arm64-obj-$(CONFIG_KGDB) += kgdb.o arm64-obj-$(CONFIG_KGDB) += kgdb.o
arm64-obj-$(CONFIG_EFI) += efi.o efi-stub.o efi-entry.o arm64-obj-$(CONFIG_EFI) += efi.o efi-entry.stub.o
arm64-obj-$(CONFIG_PCI) += pci.o arm64-obj-$(CONFIG_PCI) += pci.o
arm64-obj-$(CONFIG_ARMV8_DEPRECATED) += armv8_deprecated.o arm64-obj-$(CONFIG_ARMV8_DEPRECATED) += armv8_deprecated.o
arm64-obj-$(CONFIG_ACPI) += acpi.o arm64-obj-$(CONFIG_ACPI) += acpi.o
...@@ -40,7 +45,7 @@ arm64-obj-$(CONFIG_ACPI) += acpi.o ...@@ -40,7 +45,7 @@ arm64-obj-$(CONFIG_ACPI) += acpi.o
obj-y += $(arm64-obj-y) vdso/ obj-y += $(arm64-obj-y) vdso/
obj-m += $(arm64-obj-m) obj-m += $(arm64-obj-m)
head-y := head.o head-y := head.o
extra-y := $(head-y) vmlinux.lds extra-y += $(head-y) vmlinux.lds
# vDSO - this must be built first to generate the symbol offsets # vDSO - this must be built first to generate the symbol offsets
$(call objectify,$(arm64-obj-y)): $(obj)/vdso/vdso-offsets.h $(call objectify,$(arm64-obj-y)): $(obj)/vdso/vdso-offsets.h
......
...@@ -51,6 +51,9 @@ EXPORT_SYMBOL(strnlen); ...@@ -51,6 +51,9 @@ EXPORT_SYMBOL(strnlen);
EXPORT_SYMBOL(memset); EXPORT_SYMBOL(memset);
EXPORT_SYMBOL(memcpy); EXPORT_SYMBOL(memcpy);
EXPORT_SYMBOL(memmove); EXPORT_SYMBOL(memmove);
EXPORT_SYMBOL(__memset);
EXPORT_SYMBOL(__memcpy);
EXPORT_SYMBOL(__memmove);
EXPORT_SYMBOL(memchr); EXPORT_SYMBOL(memchr);
EXPORT_SYMBOL(memcmp); EXPORT_SYMBOL(memcmp);
......
...@@ -60,7 +60,7 @@ int main(void) ...@@ -60,7 +60,7 @@ int main(void)
DEFINE(S_SYSCALLNO, offsetof(struct pt_regs, syscallno)); DEFINE(S_SYSCALLNO, offsetof(struct pt_regs, syscallno));
DEFINE(S_FRAME_SIZE, sizeof(struct pt_regs)); DEFINE(S_FRAME_SIZE, sizeof(struct pt_regs));
BLANK(); BLANK();
DEFINE(MM_CONTEXT_ID, offsetof(struct mm_struct, context.id)); DEFINE(MM_CONTEXT_ID, offsetof(struct mm_struct, context.id.counter));
BLANK(); BLANK();
DEFINE(VMA_VM_MM, offsetof(struct vm_area_struct, vm_mm)); DEFINE(VMA_VM_MM, offsetof(struct vm_area_struct, vm_mm));
DEFINE(VMA_VM_FLAGS, offsetof(struct vm_area_struct, vm_flags)); DEFINE(VMA_VM_FLAGS, offsetof(struct vm_area_struct, vm_flags));
......
...@@ -97,5 +97,5 @@ const struct arm64_cpu_capabilities arm64_errata[] = { ...@@ -97,5 +97,5 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
void check_local_cpu_errata(void) void check_local_cpu_errata(void)
{ {
check_cpu_capabilities(arm64_errata, "enabling workaround for"); update_cpu_capabilities(arm64_errata, "enabling workaround for");
} }
This diff is collapsed.
...@@ -24,8 +24,11 @@ ...@@ -24,8 +24,11 @@
#include <linux/bug.h> #include <linux/bug.h>
#include <linux/init.h> #include <linux/init.h>
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/personality.h>
#include <linux/preempt.h> #include <linux/preempt.h>
#include <linux/printk.h> #include <linux/printk.h>
#include <linux/seq_file.h>
#include <linux/sched.h>
#include <linux/smp.h> #include <linux/smp.h>
/* /*
...@@ -35,7 +38,6 @@ ...@@ -35,7 +38,6 @@
*/ */
DEFINE_PER_CPU(struct cpuinfo_arm64, cpu_data); DEFINE_PER_CPU(struct cpuinfo_arm64, cpu_data);
static struct cpuinfo_arm64 boot_cpu_data; static struct cpuinfo_arm64 boot_cpu_data;
static bool mixed_endian_el0 = true;
static char *icache_policy_str[] = { static char *icache_policy_str[] = {
[ICACHE_POLICY_RESERVED] = "RESERVED/UNKNOWN", [ICACHE_POLICY_RESERVED] = "RESERVED/UNKNOWN",
...@@ -46,157 +48,148 @@ static char *icache_policy_str[] = { ...@@ -46,157 +48,148 @@ static char *icache_policy_str[] = {
unsigned long __icache_flags; unsigned long __icache_flags;
static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info) static const char *const hwcap_str[] = {
"fp",
"asimd",
"evtstrm",
"aes",
"pmull",
"sha1",
"sha2",
"crc32",
"atomics",
NULL
};
#ifdef CONFIG_COMPAT
static const char *const compat_hwcap_str[] = {
"swp",
"half",
"thumb",
"26bit",
"fastmult",
"fpa",
"vfp",
"edsp",
"java",
"iwmmxt",
"crunch",
"thumbee",
"neon",
"vfpv3",
"vfpv3d16",
"tls",
"vfpv4",
"idiva",
"idivt",
"vfpd32",
"lpae",
"evtstrm"
};
static const char *const compat_hwcap2_str[] = {
"aes",
"pmull",
"sha1",
"sha2",
"crc32",
NULL
};
#endif /* CONFIG_COMPAT */
static int c_show(struct seq_file *m, void *v)
{ {
unsigned int cpu = smp_processor_id(); int i, j;
u32 l1ip = CTR_L1IP(info->reg_ctr);
for_each_online_cpu(i) {
struct cpuinfo_arm64 *cpuinfo = &per_cpu(cpu_data, i);
u32 midr = cpuinfo->reg_midr;
if (l1ip != ICACHE_POLICY_PIPT) {
/* /*
* VIPT caches are non-aliasing if the VA always equals the PA * glibc reads /proc/cpuinfo to determine the number of
* in all bit positions that are covered by the index. This is * online processors, looking for lines beginning with
* the case if the size of a way (# of sets * line size) does * "processor". Give glibc what it expects.
* not exceed PAGE_SIZE.
*/ */
u32 waysize = icache_get_numsets() * icache_get_linesize(); seq_printf(m, "processor\t: %d\n", i);
if (l1ip != ICACHE_POLICY_VIPT || waysize > PAGE_SIZE) /*
set_bit(ICACHEF_ALIASING, &__icache_flags); * Dump out the common processor features in a single line.
* Userspace should read the hwcaps with getauxval(AT_HWCAP)
* rather than attempting to parse this, but there's a body of
* software which does already (at least for 32-bit).
*/
seq_puts(m, "Features\t:");
if (personality(current->personality) == PER_LINUX32) {
#ifdef CONFIG_COMPAT
for (j = 0; compat_hwcap_str[j]; j++)
if (compat_elf_hwcap & (1 << j))
seq_printf(m, " %s", compat_hwcap_str[j]);
for (j = 0; compat_hwcap2_str[j]; j++)
if (compat_elf_hwcap2 & (1 << j))
seq_printf(m, " %s", compat_hwcap2_str[j]);
#endif /* CONFIG_COMPAT */
} else {
for (j = 0; hwcap_str[j]; j++)
if (elf_hwcap & (1 << j))
seq_printf(m, " %s", hwcap_str[j]);
}
seq_puts(m, "\n");
seq_printf(m, "CPU implementer\t: 0x%02x\n",
MIDR_IMPLEMENTOR(midr));
seq_printf(m, "CPU architecture: 8\n");
seq_printf(m, "CPU variant\t: 0x%x\n", MIDR_VARIANT(midr));
seq_printf(m, "CPU part\t: 0x%03x\n", MIDR_PARTNUM(midr));
seq_printf(m, "CPU revision\t: %d\n\n", MIDR_REVISION(midr));
} }
if (l1ip == ICACHE_POLICY_AIVIVT)
set_bit(ICACHEF_AIVIVT, &__icache_flags);
pr_info("Detected %s I-cache on CPU%d\n", icache_policy_str[l1ip], cpu);
}
bool cpu_supports_mixed_endian_el0(void)
{
return id_aa64mmfr0_mixed_endian_el0(read_cpuid(ID_AA64MMFR0_EL1));
}
bool system_supports_mixed_endian_el0(void) return 0;
{
return mixed_endian_el0;
} }
static void update_mixed_endian_el0_support(struct cpuinfo_arm64 *info) static void *c_start(struct seq_file *m, loff_t *pos)
{ {
mixed_endian_el0 &= id_aa64mmfr0_mixed_endian_el0(info->reg_id_aa64mmfr0); return *pos < 1 ? (void *)1 : NULL;
} }
static void update_cpu_features(struct cpuinfo_arm64 *info) static void *c_next(struct seq_file *m, void *v, loff_t *pos)
{ {
update_mixed_endian_el0_support(info); ++*pos;
return NULL;
} }
static int check_reg_mask(char *name, u64 mask, u64 boot, u64 cur, int cpu) static void c_stop(struct seq_file *m, void *v)
{ {
if ((boot & mask) == (cur & mask))
return 0;
pr_warn("SANITY CHECK: Unexpected variation in %s. Boot CPU: %#016lx, CPU%d: %#016lx\n",
name, (unsigned long)boot, cpu, (unsigned long)cur);
return 1;
} }
#define CHECK_MASK(field, mask, boot, cur, cpu) \ const struct seq_operations cpuinfo_op = {
check_reg_mask(#field, mask, (boot)->reg_ ## field, (cur)->reg_ ## field, cpu) .start = c_start,
.next = c_next,
#define CHECK(field, boot, cur, cpu) \ .stop = c_stop,
CHECK_MASK(field, ~0ULL, boot, cur, cpu) .show = c_show
};
/* static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info)
* Verify that CPUs don't have unexpected differences that will cause problems.
*/
static void cpuinfo_sanity_check(struct cpuinfo_arm64 *cur)
{ {
unsigned int cpu = smp_processor_id(); unsigned int cpu = smp_processor_id();
struct cpuinfo_arm64 *boot = &boot_cpu_data; u32 l1ip = CTR_L1IP(info->reg_ctr);
unsigned int diff = 0;
/*
* The kernel can handle differing I-cache policies, but otherwise
* caches should look identical. Userspace JITs will make use of
* *minLine.
*/
diff |= CHECK_MASK(ctr, 0xffff3fff, boot, cur, cpu);
/*
* Userspace may perform DC ZVA instructions. Mismatched block sizes
* could result in too much or too little memory being zeroed if a
* process is preempted and migrated between CPUs.
*/
diff |= CHECK(dczid, boot, cur, cpu);
/* If different, timekeeping will be broken (especially with KVM) */
diff |= CHECK(cntfrq, boot, cur, cpu);
/*
* The kernel uses self-hosted debug features and expects CPUs to
* support identical debug features. We presently need CTX_CMPs, WRPs,
* and BRPs to be identical.
* ID_AA64DFR1 is currently RES0.
*/
diff |= CHECK(id_aa64dfr0, boot, cur, cpu);
diff |= CHECK(id_aa64dfr1, boot, cur, cpu);
/*
* Even in big.LITTLE, processors should be identical instruction-set
* wise.
*/
diff |= CHECK(id_aa64isar0, boot, cur, cpu);
diff |= CHECK(id_aa64isar1, boot, cur, cpu);
/*
* Differing PARange support is fine as long as all peripherals and
* memory are mapped within the minimum PARange of all CPUs.
* Linux should not care about secure memory.
* ID_AA64MMFR1 is currently RES0.
*/
diff |= CHECK_MASK(id_aa64mmfr0, 0xffffffffffff0ff0, boot, cur, cpu);
diff |= CHECK(id_aa64mmfr1, boot, cur, cpu);
/*
* EL3 is not our concern.
* ID_AA64PFR1 is currently RES0.
*/
diff |= CHECK_MASK(id_aa64pfr0, 0xffffffffffff0fff, boot, cur, cpu);
diff |= CHECK(id_aa64pfr1, boot, cur, cpu);
if (l1ip != ICACHE_POLICY_PIPT) {
/* /*
* If we have AArch32, we care about 32-bit features for compat. These * VIPT caches are non-aliasing if the VA always equals the PA
* registers should be RES0 otherwise. * in all bit positions that are covered by the index. This is
*/ * the case if the size of a way (# of sets * line size) does
diff |= CHECK(id_dfr0, boot, cur, cpu); * not exceed PAGE_SIZE.
diff |= CHECK(id_isar0, boot, cur, cpu);
diff |= CHECK(id_isar1, boot, cur, cpu);
diff |= CHECK(id_isar2, boot, cur, cpu);
diff |= CHECK(id_isar3, boot, cur, cpu);
diff |= CHECK(id_isar4, boot, cur, cpu);
diff |= CHECK(id_isar5, boot, cur, cpu);
/*
* Regardless of the value of the AuxReg field, the AIFSR, ADFSR, and
* ACTLR formats could differ across CPUs and therefore would have to
* be trapped for virtualization anyway.
*/ */
diff |= CHECK_MASK(id_mmfr0, 0xff0fffff, boot, cur, cpu); u32 waysize = icache_get_numsets() * icache_get_linesize();
diff |= CHECK(id_mmfr1, boot, cur, cpu);
diff |= CHECK(id_mmfr2, boot, cur, cpu);
diff |= CHECK(id_mmfr3, boot, cur, cpu);
diff |= CHECK(id_pfr0, boot, cur, cpu);
diff |= CHECK(id_pfr1, boot, cur, cpu);
diff |= CHECK(mvfr0, boot, cur, cpu); if (l1ip != ICACHE_POLICY_VIPT || waysize > PAGE_SIZE)
diff |= CHECK(mvfr1, boot, cur, cpu); set_bit(ICACHEF_ALIASING, &__icache_flags);
diff |= CHECK(mvfr2, boot, cur, cpu); }
if (l1ip == ICACHE_POLICY_AIVIVT)
set_bit(ICACHEF_AIVIVT, &__icache_flags);
/* pr_info("Detected %s I-cache on CPU%d\n", icache_policy_str[l1ip], cpu);
* Mismatched CPU features are a recipe for disaster. Don't even
* pretend to support them.
*/
WARN_TAINT_ONCE(diff, TAINT_CPU_OUT_OF_SPEC,
"Unsupported CPU feature variation.\n");
} }
static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
...@@ -236,15 +229,13 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) ...@@ -236,15 +229,13 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
cpuinfo_detect_icache_policy(info); cpuinfo_detect_icache_policy(info);
check_local_cpu_errata(); check_local_cpu_errata();
check_local_cpu_features();
update_cpu_features(info);
} }
void cpuinfo_store_cpu(void) void cpuinfo_store_cpu(void)
{ {
struct cpuinfo_arm64 *info = this_cpu_ptr(&cpu_data); struct cpuinfo_arm64 *info = this_cpu_ptr(&cpu_data);
__cpuinfo_store_cpu(info); __cpuinfo_store_cpu(info);
cpuinfo_sanity_check(info); update_cpu_features(smp_processor_id(), info, &boot_cpu_data);
} }
void __init cpuinfo_store_boot_cpu(void) void __init cpuinfo_store_boot_cpu(void)
...@@ -253,4 +244,5 @@ void __init cpuinfo_store_boot_cpu(void) ...@@ -253,4 +244,5 @@ void __init cpuinfo_store_boot_cpu(void)
__cpuinfo_store_cpu(info); __cpuinfo_store_cpu(info);
boot_cpu_data = *info; boot_cpu_data = *info;
init_cpu_features(&boot_cpu_data);
} }
...@@ -26,14 +26,16 @@ ...@@ -26,14 +26,16 @@
#include <linux/stat.h> #include <linux/stat.h>
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include <asm/debug-monitors.h> #include <asm/cpufeature.h>
#include <asm/cputype.h> #include <asm/cputype.h>
#include <asm/debug-monitors.h>
#include <asm/system_misc.h> #include <asm/system_misc.h>
/* Determine debug architecture. */ /* Determine debug architecture. */
u8 debug_monitors_arch(void) u8 debug_monitors_arch(void)
{ {
return read_cpuid(ID_AA64DFR0_EL1) & 0xf; return cpuid_feature_extract_field(read_system_reg(SYS_ID_AA64DFR0_EL1),
ID_AA64DFR0_DEBUGVER_SHIFT);
} }
/* /*
......
...@@ -29,7 +29,7 @@ ...@@ -29,7 +29,7 @@
* we want to be. The kernel image wants to be placed at TEXT_OFFSET * we want to be. The kernel image wants to be placed at TEXT_OFFSET
* from start of RAM. * from start of RAM.
*/ */
ENTRY(efi_stub_entry) ENTRY(entry)
/* /*
* Create a stack frame to save FP/LR with extra space * Create a stack frame to save FP/LR with extra space
* for image_addr variable passed to efi_entry(). * for image_addr variable passed to efi_entry().
...@@ -86,8 +86,8 @@ ENTRY(efi_stub_entry) ...@@ -86,8 +86,8 @@ ENTRY(efi_stub_entry)
* entries for the VA range of the current image, so no maintenance is * entries for the VA range of the current image, so no maintenance is
* necessary. * necessary.
*/ */
adr x0, efi_stub_entry adr x0, entry
adr x1, efi_stub_entry_end adr x1, entry_end
sub x1, x1, x0 sub x1, x1, x0
bl __flush_dcache_area bl __flush_dcache_area
...@@ -120,5 +120,5 @@ efi_load_fail: ...@@ -120,5 +120,5 @@ efi_load_fail:
ldp x29, x30, [sp], #32 ldp x29, x30, [sp], #32
ret ret
efi_stub_entry_end: entry_end:
ENDPROC(efi_stub_entry) ENDPROC(entry)
...@@ -48,7 +48,6 @@ static struct mm_struct efi_mm = { ...@@ -48,7 +48,6 @@ static struct mm_struct efi_mm = {
.mmap_sem = __RWSEM_INITIALIZER(efi_mm.mmap_sem), .mmap_sem = __RWSEM_INITIALIZER(efi_mm.mmap_sem),
.page_table_lock = __SPIN_LOCK_UNLOCKED(efi_mm.page_table_lock), .page_table_lock = __SPIN_LOCK_UNLOCKED(efi_mm.page_table_lock),
.mmlist = LIST_HEAD_INIT(efi_mm.mmlist), .mmlist = LIST_HEAD_INIT(efi_mm.mmlist),
INIT_MM_CONTEXT(efi_mm)
}; };
static int __init is_normal_ram(efi_memory_desc_t *md) static int __init is_normal_ram(efi_memory_desc_t *md)
...@@ -335,9 +334,9 @@ static void efi_set_pgd(struct mm_struct *mm) ...@@ -335,9 +334,9 @@ static void efi_set_pgd(struct mm_struct *mm)
else else
cpu_switch_mm(mm->pgd, mm); cpu_switch_mm(mm->pgd, mm);
flush_tlb_all(); local_flush_tlb_all();
if (icache_is_aivivt()) if (icache_is_aivivt())
__flush_icache_all(); __local_flush_icache_all();
} }
void efi_virtmap_load(void) void efi_virtmap_load(void)
......
...@@ -430,6 +430,8 @@ el0_sync_compat: ...@@ -430,6 +430,8 @@ el0_sync_compat:
b.eq el0_fpsimd_acc b.eq el0_fpsimd_acc
cmp x24, #ESR_ELx_EC_FP_EXC32 // FP/ASIMD exception cmp x24, #ESR_ELx_EC_FP_EXC32 // FP/ASIMD exception
b.eq el0_fpsimd_exc b.eq el0_fpsimd_exc
cmp x24, #ESR_ELx_EC_PC_ALIGN // pc alignment exception
b.eq el0_sp_pc
cmp x24, #ESR_ELx_EC_UNKNOWN // unknown exception in EL0 cmp x24, #ESR_ELx_EC_UNKNOWN // unknown exception in EL0
b.eq el0_undef b.eq el0_undef
cmp x24, #ESR_ELx_EC_CP15_32 // CP15 MRC/MCR trap cmp x24, #ESR_ELx_EC_CP15_32 // CP15 MRC/MCR trap
......
...@@ -332,21 +332,15 @@ static inline void fpsimd_hotplug_init(void) { } ...@@ -332,21 +332,15 @@ static inline void fpsimd_hotplug_init(void) { }
*/ */
static int __init fpsimd_init(void) static int __init fpsimd_init(void)
{ {
u64 pfr = read_cpuid(ID_AA64PFR0_EL1); if (elf_hwcap & HWCAP_FP) {
fpsimd_pm_init();
if (pfr & (0xf << 16)) { fpsimd_hotplug_init();
} else {
pr_notice("Floating-point is not implemented\n"); pr_notice("Floating-point is not implemented\n");
return 0;
} }
elf_hwcap |= HWCAP_FP;
if (pfr & (0xf << 20)) if (!(elf_hwcap & HWCAP_ASIMD))
pr_notice("Advanced SIMD is not implemented\n"); pr_notice("Advanced SIMD is not implemented\n");
else
elf_hwcap |= HWCAP_ASIMD;
fpsimd_pm_init();
fpsimd_hotplug_init();
return 0; return 0;
} }
......
...@@ -29,11 +29,13 @@ ...@@ -29,11 +29,13 @@
#include <asm/asm-offsets.h> #include <asm/asm-offsets.h>
#include <asm/cache.h> #include <asm/cache.h>
#include <asm/cputype.h> #include <asm/cputype.h>
#include <asm/kernel-pgtable.h>
#include <asm/memory.h> #include <asm/memory.h>
#include <asm/thread_info.h>
#include <asm/pgtable-hwdef.h> #include <asm/pgtable-hwdef.h>
#include <asm/pgtable.h> #include <asm/pgtable.h>
#include <asm/page.h> #include <asm/page.h>
#include <asm/sysreg.h>
#include <asm/thread_info.h>
#include <asm/virt.h> #include <asm/virt.h>
#define __PHYS_OFFSET (KERNEL_START - TEXT_OFFSET) #define __PHYS_OFFSET (KERNEL_START - TEXT_OFFSET)
...@@ -46,31 +48,9 @@ ...@@ -46,31 +48,9 @@
#error TEXT_OFFSET must be less than 2MB #error TEXT_OFFSET must be less than 2MB
#endif #endif
#ifdef CONFIG_ARM64_64K_PAGES
#define BLOCK_SHIFT PAGE_SHIFT
#define BLOCK_SIZE PAGE_SIZE
#define TABLE_SHIFT PMD_SHIFT
#else
#define BLOCK_SHIFT SECTION_SHIFT
#define BLOCK_SIZE SECTION_SIZE
#define TABLE_SHIFT PUD_SHIFT
#endif
#define KERNEL_START _text #define KERNEL_START _text
#define KERNEL_END _end #define KERNEL_END _end
/*
* Initial memory map attributes.
*/
#define PTE_FLAGS PTE_TYPE_PAGE | PTE_AF | PTE_SHARED
#define PMD_FLAGS PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S
#ifdef CONFIG_ARM64_64K_PAGES
#define MM_MMUFLAGS PTE_ATTRINDX(MT_NORMAL) | PTE_FLAGS
#else
#define MM_MMUFLAGS PMD_ATTRINDX(MT_NORMAL) | PMD_FLAGS
#endif
/* /*
* Kernel startup entry point. * Kernel startup entry point.
* --------------------------- * ---------------------------
...@@ -120,8 +100,8 @@ efi_head: ...@@ -120,8 +100,8 @@ efi_head:
#endif #endif
#ifdef CONFIG_EFI #ifdef CONFIG_EFI
.globl stext_offset .globl __efistub_stext_offset
.set stext_offset, stext - efi_head .set __efistub_stext_offset, stext - efi_head
.align 3 .align 3
pe_header: pe_header:
.ascii "PE" .ascii "PE"
...@@ -144,8 +124,8 @@ optional_header: ...@@ -144,8 +124,8 @@ optional_header:
.long _end - stext // SizeOfCode .long _end - stext // SizeOfCode
.long 0 // SizeOfInitializedData .long 0 // SizeOfInitializedData
.long 0 // SizeOfUninitializedData .long 0 // SizeOfUninitializedData
.long efi_stub_entry - efi_head // AddressOfEntryPoint .long __efistub_entry - efi_head // AddressOfEntryPoint
.long stext_offset // BaseOfCode .long __efistub_stext_offset // BaseOfCode
extra_header_fields: extra_header_fields:
.quad 0 // ImageBase .quad 0 // ImageBase
...@@ -162,7 +142,7 @@ extra_header_fields: ...@@ -162,7 +142,7 @@ extra_header_fields:
.long _end - efi_head // SizeOfImage .long _end - efi_head // SizeOfImage
// Everything before the kernel image is considered part of the header // Everything before the kernel image is considered part of the header
.long stext_offset // SizeOfHeaders .long __efistub_stext_offset // SizeOfHeaders
.long 0 // CheckSum .long 0 // CheckSum
.short 0xa // Subsystem (EFI application) .short 0xa // Subsystem (EFI application)
.short 0 // DllCharacteristics .short 0 // DllCharacteristics
...@@ -207,9 +187,9 @@ section_table: ...@@ -207,9 +187,9 @@ section_table:
.byte 0 .byte 0
.byte 0 // end of 0 padding of section name .byte 0 // end of 0 padding of section name
.long _end - stext // VirtualSize .long _end - stext // VirtualSize
.long stext_offset // VirtualAddress .long __efistub_stext_offset // VirtualAddress
.long _edata - stext // SizeOfRawData .long _edata - stext // SizeOfRawData
.long stext_offset // PointerToRawData .long __efistub_stext_offset // PointerToRawData
.long 0 // PointerToRelocations (0 for executables) .long 0 // PointerToRelocations (0 for executables)
.long 0 // PointerToLineNumbers (0 for executables) .long 0 // PointerToLineNumbers (0 for executables)
...@@ -292,8 +272,11 @@ ENDPROC(preserve_boot_args) ...@@ -292,8 +272,11 @@ ENDPROC(preserve_boot_args)
*/ */
.macro create_pgd_entry, tbl, virt, tmp1, tmp2 .macro create_pgd_entry, tbl, virt, tmp1, tmp2
create_table_entry \tbl, \virt, PGDIR_SHIFT, PTRS_PER_PGD, \tmp1, \tmp2 create_table_entry \tbl, \virt, PGDIR_SHIFT, PTRS_PER_PGD, \tmp1, \tmp2
#if SWAPPER_PGTABLE_LEVELS == 3 #if SWAPPER_PGTABLE_LEVELS > 3
create_table_entry \tbl, \virt, TABLE_SHIFT, PTRS_PER_PTE, \tmp1, \tmp2 create_table_entry \tbl, \virt, PUD_SHIFT, PTRS_PER_PUD, \tmp1, \tmp2
#endif
#if SWAPPER_PGTABLE_LEVELS > 2
create_table_entry \tbl, \virt, SWAPPER_TABLE_SHIFT, PTRS_PER_PTE, \tmp1, \tmp2
#endif #endif
.endm .endm
...@@ -305,15 +288,15 @@ ENDPROC(preserve_boot_args) ...@@ -305,15 +288,15 @@ ENDPROC(preserve_boot_args)
* Corrupts: phys, start, end, pstate * Corrupts: phys, start, end, pstate
*/ */
.macro create_block_map, tbl, flags, phys, start, end .macro create_block_map, tbl, flags, phys, start, end
lsr \phys, \phys, #BLOCK_SHIFT lsr \phys, \phys, #SWAPPER_BLOCK_SHIFT
lsr \start, \start, #BLOCK_SHIFT lsr \start, \start, #SWAPPER_BLOCK_SHIFT
and \start, \start, #PTRS_PER_PTE - 1 // table index and \start, \start, #PTRS_PER_PTE - 1 // table index
orr \phys, \flags, \phys, lsl #BLOCK_SHIFT // table entry orr \phys, \flags, \phys, lsl #SWAPPER_BLOCK_SHIFT // table entry
lsr \end, \end, #BLOCK_SHIFT lsr \end, \end, #SWAPPER_BLOCK_SHIFT
and \end, \end, #PTRS_PER_PTE - 1 // table end index and \end, \end, #PTRS_PER_PTE - 1 // table end index
9999: str \phys, [\tbl, \start, lsl #3] // store the entry 9999: str \phys, [\tbl, \start, lsl #3] // store the entry
add \start, \start, #1 // next entry add \start, \start, #1 // next entry
add \phys, \phys, #BLOCK_SIZE // next block add \phys, \phys, #SWAPPER_BLOCK_SIZE // next block
cmp \start, \end cmp \start, \end
b.ls 9999b b.ls 9999b
.endm .endm
...@@ -350,7 +333,7 @@ __create_page_tables: ...@@ -350,7 +333,7 @@ __create_page_tables:
cmp x0, x6 cmp x0, x6
b.lo 1b b.lo 1b
ldr x7, =MM_MMUFLAGS ldr x7, =SWAPPER_MM_MMUFLAGS
/* /*
* Create the identity mapping. * Create the identity mapping.
...@@ -444,6 +427,9 @@ __mmap_switched: ...@@ -444,6 +427,9 @@ __mmap_switched:
str_l x21, __fdt_pointer, x5 // Save FDT pointer str_l x21, __fdt_pointer, x5 // Save FDT pointer
str_l x24, memstart_addr, x6 // Save PHYS_OFFSET str_l x24, memstart_addr, x6 // Save PHYS_OFFSET
mov x29, #0 mov x29, #0
#ifdef CONFIG_KASAN
bl kasan_early_init
#endif
b start_kernel b start_kernel
ENDPROC(__mmap_switched) ENDPROC(__mmap_switched)
...@@ -630,10 +616,17 @@ ENDPROC(__secondary_switched) ...@@ -630,10 +616,17 @@ ENDPROC(__secondary_switched)
* x0 = SCTLR_EL1 value for turning on the MMU. * x0 = SCTLR_EL1 value for turning on the MMU.
* x27 = *virtual* address to jump to upon completion * x27 = *virtual* address to jump to upon completion
* *
* other registers depend on the function called upon completion * Other registers depend on the function called upon completion.
*
* Checks if the selected granule size is supported by the CPU.
* If it isn't, park the CPU
*/ */
.section ".idmap.text", "ax" .section ".idmap.text", "ax"
__enable_mmu: __enable_mmu:
mrs x1, ID_AA64MMFR0_EL1
ubfx x2, x1, #ID_AA64MMFR0_TGRAN_SHIFT, 4
cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED
b.ne __no_granule_support
ldr x5, =vectors ldr x5, =vectors
msr vbar_el1, x5 msr vbar_el1, x5
msr ttbr0_el1, x25 // load TTBR0 msr ttbr0_el1, x25 // load TTBR0
...@@ -651,3 +644,8 @@ __enable_mmu: ...@@ -651,3 +644,8 @@ __enable_mmu:
isb isb
br x27 br x27
ENDPROC(__enable_mmu) ENDPROC(__enable_mmu)
__no_granule_support:
wfe
b __no_granule_support
ENDPROC(__no_granule_support)
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
#include <linux/ptrace.h> #include <linux/ptrace.h>
#include <linux/smp.h> #include <linux/smp.h>
#include <asm/compat.h>
#include <asm/current.h> #include <asm/current.h>
#include <asm/debug-monitors.h> #include <asm/debug-monitors.h>
#include <asm/hw_breakpoint.h> #include <asm/hw_breakpoint.h>
...@@ -163,6 +164,20 @@ enum hw_breakpoint_ops { ...@@ -163,6 +164,20 @@ enum hw_breakpoint_ops {
HW_BREAKPOINT_RESTORE HW_BREAKPOINT_RESTORE
}; };
static int is_compat_bp(struct perf_event *bp)
{
struct task_struct *tsk = bp->hw.target;
/*
* tsk can be NULL for per-cpu (non-ptrace) breakpoints.
* In this case, use the native interface, since we don't have
* the notion of a "compat CPU" and could end up relying on
* deprecated behaviour if we use unaligned watchpoints in
* AArch64 state.
*/
return tsk && is_compat_thread(task_thread_info(tsk));
}
/** /**
* hw_breakpoint_slot_setup - Find and setup a perf slot according to * hw_breakpoint_slot_setup - Find and setup a perf slot according to
* operations * operations
...@@ -420,7 +435,7 @@ static int arch_build_bp_info(struct perf_event *bp) ...@@ -420,7 +435,7 @@ static int arch_build_bp_info(struct perf_event *bp)
* Watchpoints can be of length 1, 2, 4 or 8 bytes. * Watchpoints can be of length 1, 2, 4 or 8 bytes.
*/ */
if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) { if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) {
if (is_compat_task()) { if (is_compat_bp(bp)) {
if (info->ctrl.len != ARM_BREAKPOINT_LEN_2 && if (info->ctrl.len != ARM_BREAKPOINT_LEN_2 &&
info->ctrl.len != ARM_BREAKPOINT_LEN_4) info->ctrl.len != ARM_BREAKPOINT_LEN_4)
return -EINVAL; return -EINVAL;
...@@ -477,7 +492,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) ...@@ -477,7 +492,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
* AArch32 tasks expect some simple alignment fixups, so emulate * AArch32 tasks expect some simple alignment fixups, so emulate
* that here. * that here.
*/ */
if (is_compat_task()) { if (is_compat_bp(bp)) {
if (info->ctrl.len == ARM_BREAKPOINT_LEN_8) if (info->ctrl.len == ARM_BREAKPOINT_LEN_8)
alignment_mask = 0x7; alignment_mask = 0x7;
else else
......
...@@ -47,7 +47,10 @@ ...@@ -47,7 +47,10 @@
#define __HEAD_FLAG_BE 0 #define __HEAD_FLAG_BE 0
#endif #endif
#define __HEAD_FLAGS (__HEAD_FLAG_BE << 0) #define __HEAD_FLAG_PAGE_SIZE ((PAGE_SHIFT - 10) / 2)
#define __HEAD_FLAGS ((__HEAD_FLAG_BE << 0) | \
(__HEAD_FLAG_PAGE_SIZE << 1))
/* /*
* These will output as part of the Image header, which should be little-endian * These will output as part of the Image header, which should be little-endian
...@@ -59,4 +62,37 @@ ...@@ -59,4 +62,37 @@
_kernel_offset_le = DATA_LE64(TEXT_OFFSET); \ _kernel_offset_le = DATA_LE64(TEXT_OFFSET); \
_kernel_flags_le = DATA_LE64(__HEAD_FLAGS); _kernel_flags_le = DATA_LE64(__HEAD_FLAGS);
#ifdef CONFIG_EFI
/*
* The EFI stub has its own symbol namespace prefixed by __efistub_, to
* isolate it from the kernel proper. The following symbols are legally
* accessed by the stub, so provide some aliases to make them accessible.
* Only include data symbols here, or text symbols of functions that are
* guaranteed to be safe when executed at another offset than they were
* linked at. The routines below are all implemented in assembler in a
* position independent manner
*/
__efistub_memcmp = __pi_memcmp;
__efistub_memchr = __pi_memchr;
__efistub_memcpy = __pi_memcpy;
__efistub_memmove = __pi_memmove;
__efistub_memset = __pi_memset;
__efistub_strlen = __pi_strlen;
__efistub_strcmp = __pi_strcmp;
__efistub_strncmp = __pi_strncmp;
__efistub___flush_dcache_area = __pi___flush_dcache_area;
#ifdef CONFIG_KASAN
__efistub___memcpy = __pi_memcpy;
__efistub___memmove = __pi_memmove;
__efistub___memset = __pi_memset;
#endif
__efistub__text = _text;
__efistub__end = _end;
__efistub__edata = _edata;
#endif
#endif /* __ASM_IMAGE_H */ #endif /* __ASM_IMAGE_H */
...@@ -27,7 +27,6 @@ ...@@ -27,7 +27,6 @@
#include <linux/init.h> #include <linux/init.h>
#include <linux/irqchip.h> #include <linux/irqchip.h>
#include <linux/seq_file.h> #include <linux/seq_file.h>
#include <linux/ratelimit.h>
unsigned long irq_err_count; unsigned long irq_err_count;
...@@ -54,64 +53,3 @@ void __init init_IRQ(void) ...@@ -54,64 +53,3 @@ void __init init_IRQ(void)
if (!handle_arch_irq) if (!handle_arch_irq)
panic("No interrupt controller found."); panic("No interrupt controller found.");
} }
#ifdef CONFIG_HOTPLUG_CPU
static bool migrate_one_irq(struct irq_desc *desc)
{
struct irq_data *d = irq_desc_get_irq_data(desc);
const struct cpumask *affinity = irq_data_get_affinity_mask(d);
struct irq_chip *c;
bool ret = false;
/*
* If this is a per-CPU interrupt, or the affinity does not
* include this CPU, then we have nothing to do.
*/
if (irqd_is_per_cpu(d) || !cpumask_test_cpu(smp_processor_id(), affinity))
return false;
if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
affinity = cpu_online_mask;
ret = true;
}
c = irq_data_get_irq_chip(d);
if (!c->irq_set_affinity)
pr_debug("IRQ%u: unable to set affinity\n", d->irq);
else if (c->irq_set_affinity(d, affinity, false) == IRQ_SET_MASK_OK && ret)
cpumask_copy(irq_data_get_affinity_mask(d), affinity);
return ret;
}
/*
* The current CPU has been marked offline. Migrate IRQs off this CPU.
* If the affinity settings do not allow other CPUs, force them onto any
* available CPU.
*
* Note: we must iterate over all IRQs, whether they have an attached
* action structure or not, as we need to get chained interrupts too.
*/
void migrate_irqs(void)
{
unsigned int i;
struct irq_desc *desc;
unsigned long flags;
local_irq_save(flags);
for_each_irq_desc(i, desc) {
bool affinity_broken;
raw_spin_lock(&desc->lock);
affinity_broken = migrate_one_irq(desc);
raw_spin_unlock(&desc->lock);
if (affinity_broken)
pr_warn_ratelimited("IRQ%u no longer affine to CPU%u\n",
i, smp_processor_id());
}
local_irq_restore(flags);
}
#endif /* CONFIG_HOTPLUG_CPU */
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include <linux/bitops.h> #include <linux/bitops.h>
#include <linux/elf.h> #include <linux/elf.h>
#include <linux/gfp.h> #include <linux/gfp.h>
#include <linux/kasan.h>
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/moduleloader.h> #include <linux/moduleloader.h>
...@@ -34,9 +35,18 @@ ...@@ -34,9 +35,18 @@
void *module_alloc(unsigned long size) void *module_alloc(unsigned long size)
{ {
return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, void *p;
p = __vmalloc_node_range(size, MODULE_ALIGN, MODULES_VADDR, MODULES_END,
GFP_KERNEL, PAGE_KERNEL_EXEC, 0, GFP_KERNEL, PAGE_KERNEL_EXEC, 0,
NUMA_NO_NODE, __builtin_return_address(0)); NUMA_NO_NODE, __builtin_return_address(0));
if (p && (kasan_module_alloc(p, size) < 0)) {
vfree(p);
return NULL;
}
return p;
} }
enum aarch64_reloc_op { enum aarch64_reloc_op {
......
This diff is collapsed.
...@@ -44,6 +44,7 @@ ...@@ -44,6 +44,7 @@
#include <linux/hw_breakpoint.h> #include <linux/hw_breakpoint.h>
#include <linux/personality.h> #include <linux/personality.h>
#include <linux/notifier.h> #include <linux/notifier.h>
#include <trace/events/power.h>
#include <asm/compat.h> #include <asm/compat.h>
#include <asm/cacheflush.h> #include <asm/cacheflush.h>
...@@ -75,8 +76,10 @@ void arch_cpu_idle(void) ...@@ -75,8 +76,10 @@ void arch_cpu_idle(void)
* This should do all the clock switching and wait for interrupt * This should do all the clock switching and wait for interrupt
* tricks * tricks
*/ */
trace_cpu_idle_rcuidle(1, smp_processor_id());
cpu_do_idle(); cpu_do_idle();
local_irq_enable(); local_irq_enable();
trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
} }
#ifdef CONFIG_HOTPLUG_CPU #ifdef CONFIG_HOTPLUG_CPU
......
...@@ -28,7 +28,6 @@ ...@@ -28,7 +28,6 @@
#include <linux/console.h> #include <linux/console.h>
#include <linux/cache.h> #include <linux/cache.h>
#include <linux/bootmem.h> #include <linux/bootmem.h>
#include <linux/seq_file.h>
#include <linux/screen_info.h> #include <linux/screen_info.h>
#include <linux/init.h> #include <linux/init.h>
#include <linux/kexec.h> #include <linux/kexec.h>
...@@ -44,7 +43,6 @@ ...@@ -44,7 +43,6 @@
#include <linux/of_fdt.h> #include <linux/of_fdt.h>
#include <linux/of_platform.h> #include <linux/of_platform.h>
#include <linux/efi.h> #include <linux/efi.h>
#include <linux/personality.h>
#include <linux/psci.h> #include <linux/psci.h>
#include <asm/acpi.h> #include <asm/acpi.h>
...@@ -54,6 +52,7 @@ ...@@ -54,6 +52,7 @@
#include <asm/elf.h> #include <asm/elf.h>
#include <asm/cpufeature.h> #include <asm/cpufeature.h>
#include <asm/cpu_ops.h> #include <asm/cpu_ops.h>
#include <asm/kasan.h>
#include <asm/sections.h> #include <asm/sections.h>
#include <asm/setup.h> #include <asm/setup.h>
#include <asm/smp_plat.h> #include <asm/smp_plat.h>
...@@ -64,23 +63,6 @@ ...@@ -64,23 +63,6 @@
#include <asm/efi.h> #include <asm/efi.h>
#include <asm/xen/hypervisor.h> #include <asm/xen/hypervisor.h>
unsigned long elf_hwcap __read_mostly;
EXPORT_SYMBOL_GPL(elf_hwcap);
#ifdef CONFIG_COMPAT
#define COMPAT_ELF_HWCAP_DEFAULT \
(COMPAT_HWCAP_HALF|COMPAT_HWCAP_THUMB|\
COMPAT_HWCAP_FAST_MULT|COMPAT_HWCAP_EDSP|\
COMPAT_HWCAP_TLS|COMPAT_HWCAP_VFP|\
COMPAT_HWCAP_VFPv3|COMPAT_HWCAP_VFPv4|\
COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV|\
COMPAT_HWCAP_LPAE)
unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT;
unsigned int compat_elf_hwcap2 __read_mostly;
#endif
DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
phys_addr_t __fdt_pointer __initdata; phys_addr_t __fdt_pointer __initdata;
/* /*
...@@ -195,104 +177,6 @@ static void __init smp_build_mpidr_hash(void) ...@@ -195,104 +177,6 @@ static void __init smp_build_mpidr_hash(void)
__flush_dcache_area(&mpidr_hash, sizeof(struct mpidr_hash)); __flush_dcache_area(&mpidr_hash, sizeof(struct mpidr_hash));
} }
static void __init setup_processor(void)
{
u64 features;
s64 block;
u32 cwg;
int cls;
printk("CPU: AArch64 Processor [%08x] revision %d\n",
read_cpuid_id(), read_cpuid_id() & 15);
sprintf(init_utsname()->machine, ELF_PLATFORM);
elf_hwcap = 0;
cpuinfo_store_boot_cpu();
/*
* Check for sane CTR_EL0.CWG value.
*/
cwg = cache_type_cwg();
cls = cache_line_size();
if (!cwg)
pr_warn("No Cache Writeback Granule information, assuming cache line size %d\n",
cls);
if (L1_CACHE_BYTES < cls)
pr_warn("L1_CACHE_BYTES smaller than the Cache Writeback Granule (%d < %d)\n",
L1_CACHE_BYTES, cls);
/*
* ID_AA64ISAR0_EL1 contains 4-bit wide signed feature blocks.
* The blocks we test below represent incremental functionality
* for non-negative values. Negative values are reserved.
*/
features = read_cpuid(ID_AA64ISAR0_EL1);
block = cpuid_feature_extract_field(features, 4);
if (block > 0) {
switch (block) {
default:
case 2:
elf_hwcap |= HWCAP_PMULL;
case 1:
elf_hwcap |= HWCAP_AES;
case 0:
break;
}
}
if (cpuid_feature_extract_field(features, 8) > 0)
elf_hwcap |= HWCAP_SHA1;
if (cpuid_feature_extract_field(features, 12) > 0)
elf_hwcap |= HWCAP_SHA2;
if (cpuid_feature_extract_field(features, 16) > 0)
elf_hwcap |= HWCAP_CRC32;
block = cpuid_feature_extract_field(features, 20);
if (block > 0) {
switch (block) {
default:
case 2:
elf_hwcap |= HWCAP_ATOMICS;
case 1:
/* RESERVED */
case 0:
break;
}
}
#ifdef CONFIG_COMPAT
/*
* ID_ISAR5_EL1 carries similar information as above, but pertaining to
* the AArch32 32-bit execution state.
*/
features = read_cpuid(ID_ISAR5_EL1);
block = cpuid_feature_extract_field(features, 4);
if (block > 0) {
switch (block) {
default:
case 2:
compat_elf_hwcap2 |= COMPAT_HWCAP2_PMULL;
case 1:
compat_elf_hwcap2 |= COMPAT_HWCAP2_AES;
case 0:
break;
}
}
if (cpuid_feature_extract_field(features, 8) > 0)
compat_elf_hwcap2 |= COMPAT_HWCAP2_SHA1;
if (cpuid_feature_extract_field(features, 12) > 0)
compat_elf_hwcap2 |= COMPAT_HWCAP2_SHA2;
if (cpuid_feature_extract_field(features, 16) > 0)
compat_elf_hwcap2 |= COMPAT_HWCAP2_CRC32;
#endif
}
static void __init setup_machine_fdt(phys_addr_t dt_phys) static void __init setup_machine_fdt(phys_addr_t dt_phys)
{ {
void *dt_virt = fixmap_remap_fdt(dt_phys); void *dt_virt = fixmap_remap_fdt(dt_phys);
...@@ -406,8 +290,9 @@ u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID }; ...@@ -406,8 +290,9 @@ u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID };
void __init setup_arch(char **cmdline_p) void __init setup_arch(char **cmdline_p)
{ {
setup_processor(); pr_info("Boot CPU: AArch64 Processor [%08x]\n", read_cpuid_id());
sprintf(init_utsname()->machine, ELF_PLATFORM);
init_mm.start_code = (unsigned long) _text; init_mm.start_code = (unsigned long) _text;
init_mm.end_code = (unsigned long) _etext; init_mm.end_code = (unsigned long) _etext;
init_mm.end_data = (unsigned long) _edata; init_mm.end_data = (unsigned long) _edata;
...@@ -436,6 +321,9 @@ void __init setup_arch(char **cmdline_p) ...@@ -436,6 +321,9 @@ void __init setup_arch(char **cmdline_p)
paging_init(); paging_init();
relocate_initrd(); relocate_initrd();
kasan_init();
request_standard_resources(); request_standard_resources();
early_ioremap_reset(); early_ioremap_reset();
...@@ -493,124 +381,3 @@ static int __init topology_init(void) ...@@ -493,124 +381,3 @@ static int __init topology_init(void)
return 0; return 0;
} }
subsys_initcall(topology_init); subsys_initcall(topology_init);
static const char *hwcap_str[] = {
"fp",
"asimd",
"evtstrm",
"aes",
"pmull",
"sha1",
"sha2",
"crc32",
"atomics",
NULL
};
#ifdef CONFIG_COMPAT
static const char *compat_hwcap_str[] = {
"swp",
"half",
"thumb",
"26bit",
"fastmult",
"fpa",
"vfp",
"edsp",
"java",
"iwmmxt",
"crunch",
"thumbee",
"neon",
"vfpv3",
"vfpv3d16",
"tls",
"vfpv4",
"idiva",
"idivt",
"vfpd32",
"lpae",
"evtstrm"
};
static const char *compat_hwcap2_str[] = {
"aes",
"pmull",
"sha1",
"sha2",
"crc32",
NULL
};
#endif /* CONFIG_COMPAT */
static int c_show(struct seq_file *m, void *v)
{
int i, j;
for_each_online_cpu(i) {
struct cpuinfo_arm64 *cpuinfo = &per_cpu(cpu_data, i);
u32 midr = cpuinfo->reg_midr;
/*
* glibc reads /proc/cpuinfo to determine the number of
* online processors, looking for lines beginning with
* "processor". Give glibc what it expects.
*/
seq_printf(m, "processor\t: %d\n", i);
/*
* Dump out the common processor features in a single line.
* Userspace should read the hwcaps with getauxval(AT_HWCAP)
* rather than attempting to parse this, but there's a body of
* software which does already (at least for 32-bit).
*/
seq_puts(m, "Features\t:");
if (personality(current->personality) == PER_LINUX32) {
#ifdef CONFIG_COMPAT
for (j = 0; compat_hwcap_str[j]; j++)
if (compat_elf_hwcap & (1 << j))
seq_printf(m, " %s", compat_hwcap_str[j]);
for (j = 0; compat_hwcap2_str[j]; j++)
if (compat_elf_hwcap2 & (1 << j))
seq_printf(m, " %s", compat_hwcap2_str[j]);
#endif /* CONFIG_COMPAT */
} else {
for (j = 0; hwcap_str[j]; j++)
if (elf_hwcap & (1 << j))
seq_printf(m, " %s", hwcap_str[j]);
}
seq_puts(m, "\n");
seq_printf(m, "CPU implementer\t: 0x%02x\n",
MIDR_IMPLEMENTOR(midr));
seq_printf(m, "CPU architecture: 8\n");
seq_printf(m, "CPU variant\t: 0x%x\n", MIDR_VARIANT(midr));
seq_printf(m, "CPU part\t: 0x%03x\n", MIDR_PARTNUM(midr));
seq_printf(m, "CPU revision\t: %d\n\n", MIDR_REVISION(midr));
}
return 0;
}
static void *c_start(struct seq_file *m, loff_t *pos)
{
return *pos < 1 ? (void *)1 : NULL;
}
static void *c_next(struct seq_file *m, void *v, loff_t *pos)
{
++*pos;
return NULL;
}
static void c_stop(struct seq_file *m, void *v)
{
}
const struct seq_operations cpuinfo_op = {
.start = c_start,
.next = c_next,
.stop = c_stop,
.show = c_show
};
...@@ -142,22 +142,27 @@ asmlinkage void secondary_start_kernel(void) ...@@ -142,22 +142,27 @@ asmlinkage void secondary_start_kernel(void)
*/ */
atomic_inc(&mm->mm_count); atomic_inc(&mm->mm_count);
current->active_mm = mm; current->active_mm = mm;
cpumask_set_cpu(cpu, mm_cpumask(mm));
set_my_cpu_offset(per_cpu_offset(smp_processor_id())); set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
printk("CPU%u: Booted secondary processor\n", cpu);
/* /*
* TTBR0 is only used for the identity mapping at this stage. Make it * TTBR0 is only used for the identity mapping at this stage. Make it
* point to zero page to avoid speculatively fetching new entries. * point to zero page to avoid speculatively fetching new entries.
*/ */
cpu_set_reserved_ttbr0(); cpu_set_reserved_ttbr0();
flush_tlb_all(); local_flush_tlb_all();
cpu_set_default_tcr_t0sz(); cpu_set_default_tcr_t0sz();
preempt_disable(); preempt_disable();
trace_hardirqs_off(); trace_hardirqs_off();
/*
* If the system has established the capabilities, make sure
* this CPU ticks all of those. If it doesn't, the CPU will
* fail to come online.
*/
verify_local_cpu_capabilities();
if (cpu_ops[cpu]->cpu_postboot) if (cpu_ops[cpu]->cpu_postboot)
cpu_ops[cpu]->cpu_postboot(); cpu_ops[cpu]->cpu_postboot();
...@@ -178,6 +183,8 @@ asmlinkage void secondary_start_kernel(void) ...@@ -178,6 +183,8 @@ asmlinkage void secondary_start_kernel(void)
* the CPU migration code to notice that the CPU is online * the CPU migration code to notice that the CPU is online
* before we continue. * before we continue.
*/ */
pr_info("CPU%u: Booted secondary processor [%08x]\n",
cpu, read_cpuid_id());
set_cpu_online(cpu, true); set_cpu_online(cpu, true);
complete(&cpu_running); complete(&cpu_running);
...@@ -232,12 +239,7 @@ int __cpu_disable(void) ...@@ -232,12 +239,7 @@ int __cpu_disable(void)
/* /*
* OK - migrate IRQs away from this CPU * OK - migrate IRQs away from this CPU
*/ */
migrate_irqs(); irq_migrate_all_off_this_cpu();
/*
* Remove this CPU from the vm mask set of all processes.
*/
clear_tasks_mm_cpumask(cpu);
return 0; return 0;
} }
...@@ -325,12 +327,14 @@ static void __init hyp_mode_check(void) ...@@ -325,12 +327,14 @@ static void __init hyp_mode_check(void)
void __init smp_cpus_done(unsigned int max_cpus) void __init smp_cpus_done(unsigned int max_cpus)
{ {
pr_info("SMP: Total of %d processors activated.\n", num_online_cpus()); pr_info("SMP: Total of %d processors activated.\n", num_online_cpus());
setup_cpu_features();
hyp_mode_check(); hyp_mode_check();
apply_alternatives_all(); apply_alternatives_all();
} }
void __init smp_prepare_boot_cpu(void) void __init smp_prepare_boot_cpu(void)
{ {
cpuinfo_store_boot_cpu();
set_my_cpu_offset(per_cpu_offset(smp_processor_id())); set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
} }
......
...@@ -90,7 +90,7 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) ...@@ -90,7 +90,7 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
* restoration before returning. * restoration before returning.
*/ */
cpu_set_reserved_ttbr0(); cpu_set_reserved_ttbr0();
flush_tlb_all(); local_flush_tlb_all();
cpu_set_default_tcr_t0sz(); cpu_set_default_tcr_t0sz();
if (mm != &init_mm) if (mm != &init_mm)
......
...@@ -103,12 +103,12 @@ static void dump_mem(const char *lvl, const char *str, unsigned long bottom, ...@@ -103,12 +103,12 @@ static void dump_mem(const char *lvl, const char *str, unsigned long bottom,
set_fs(fs); set_fs(fs);
} }
static void dump_backtrace_entry(unsigned long where, unsigned long stack) static void dump_backtrace_entry(unsigned long where)
{ {
/*
* Note that 'where' can have a physical address, but it's not handled.
*/
print_ip_sym(where); print_ip_sym(where);
if (in_exception_text(where))
dump_mem("", "Exception stack", stack,
stack + sizeof(struct pt_regs), false);
} }
static void dump_instr(const char *lvl, struct pt_regs *regs) static void dump_instr(const char *lvl, struct pt_regs *regs)
...@@ -172,12 +172,17 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) ...@@ -172,12 +172,17 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
pr_emerg("Call trace:\n"); pr_emerg("Call trace:\n");
while (1) { while (1) {
unsigned long where = frame.pc; unsigned long where = frame.pc;
unsigned long stack;
int ret; int ret;
dump_backtrace_entry(where);
ret = unwind_frame(&frame); ret = unwind_frame(&frame);
if (ret < 0) if (ret < 0)
break; break;
dump_backtrace_entry(where, frame.sp); stack = frame.sp;
if (in_exception_text(where))
dump_mem("", "Exception stack", stack,
stack + sizeof(struct pt_regs), false);
} }
} }
......
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
*/ */
#include <asm-generic/vmlinux.lds.h> #include <asm-generic/vmlinux.lds.h>
#include <asm/kernel-pgtable.h>
#include <asm/thread_info.h> #include <asm/thread_info.h>
#include <asm/memory.h> #include <asm/memory.h>
#include <asm/page.h> #include <asm/page.h>
...@@ -60,9 +61,12 @@ PECOFF_FILE_ALIGNMENT = 0x200; ...@@ -60,9 +61,12 @@ PECOFF_FILE_ALIGNMENT = 0x200;
#define PECOFF_EDATA_PADDING #define PECOFF_EDATA_PADDING
#endif #endif
#ifdef CONFIG_DEBUG_ALIGN_RODATA #if defined(CONFIG_DEBUG_ALIGN_RODATA)
#define ALIGN_DEBUG_RO . = ALIGN(1<<SECTION_SHIFT); #define ALIGN_DEBUG_RO . = ALIGN(1<<SECTION_SHIFT);
#define ALIGN_DEBUG_RO_MIN(min) ALIGN_DEBUG_RO #define ALIGN_DEBUG_RO_MIN(min) ALIGN_DEBUG_RO
#elif defined(CONFIG_DEBUG_RODATA)
#define ALIGN_DEBUG_RO . = ALIGN(1<<PAGE_SHIFT);
#define ALIGN_DEBUG_RO_MIN(min) ALIGN_DEBUG_RO
#else #else
#define ALIGN_DEBUG_RO #define ALIGN_DEBUG_RO
#define ALIGN_DEBUG_RO_MIN(min) . = ALIGN(min); #define ALIGN_DEBUG_RO_MIN(min) . = ALIGN(min);
......
...@@ -22,6 +22,7 @@ config KVM_ARM_VGIC_V3 ...@@ -22,6 +22,7 @@ config KVM_ARM_VGIC_V3
config KVM config KVM
bool "Kernel-based Virtual Machine (KVM) support" bool "Kernel-based Virtual Machine (KVM) support"
depends on OF depends on OF
depends on !ARM64_16K_PAGES
select MMU_NOTIFIER select MMU_NOTIFIER
select PREEMPT_NOTIFIERS select PREEMPT_NOTIFIERS
select ANON_INODES select ANON_INODES
...@@ -37,6 +38,8 @@ config KVM ...@@ -37,6 +38,8 @@ config KVM
select KVM_ARM_VGIC_V3 select KVM_ARM_VGIC_V3
---help--- ---help---
Support hosting virtualized guest machines. Support hosting virtualized guest machines.
We don't support KVM with 16K page tables yet, due to the multiple
levels of fake page tables.
If unsure, say N. If unsure, say N.
......
...@@ -53,7 +53,7 @@ static bool cpu_has_32bit_el1(void) ...@@ -53,7 +53,7 @@ static bool cpu_has_32bit_el1(void)
{ {
u64 pfr0; u64 pfr0;
pfr0 = read_cpuid(ID_AA64PFR0_EL1); pfr0 = read_system_reg(SYS_ID_AA64PFR0_EL1);
return !!(pfr0 & 0x20); return !!(pfr0 & 0x20);
} }
......
...@@ -693,13 +693,13 @@ static bool trap_dbgidr(struct kvm_vcpu *vcpu, ...@@ -693,13 +693,13 @@ static bool trap_dbgidr(struct kvm_vcpu *vcpu,
if (p->is_write) { if (p->is_write) {
return ignore_write(vcpu, p); return ignore_write(vcpu, p);
} else { } else {
u64 dfr = read_cpuid(ID_AA64DFR0_EL1); u64 dfr = read_system_reg(SYS_ID_AA64DFR0_EL1);
u64 pfr = read_cpuid(ID_AA64PFR0_EL1); u64 pfr = read_system_reg(SYS_ID_AA64PFR0_EL1);
u32 el3 = !!((pfr >> 12) & 0xf); u32 el3 = !!cpuid_feature_extract_field(pfr, ID_AA64PFR0_EL3_SHIFT);
*vcpu_reg(vcpu, p->Rt) = ((((dfr >> 20) & 0xf) << 28) | *vcpu_reg(vcpu, p->Rt) = ((((dfr >> ID_AA64DFR0_WRPS_SHIFT) & 0xf) << 28) |
(((dfr >> 12) & 0xf) << 24) | (((dfr >> ID_AA64DFR0_BRPS_SHIFT) & 0xf) << 24) |
(((dfr >> 28) & 0xf) << 20) | (((dfr >> ID_AA64DFR0_CTX_CMPS_SHIFT) & 0xf) << 20) |
(6 << 16) | (el3 << 14) | (el3 << 12)); (6 << 16) | (el3 << 14) | (el3 << 12));
return true; return true;
} }
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include <asm/alternative.h> #include <asm/alternative.h>
#include <asm/assembler.h> #include <asm/assembler.h>
#include <asm/cache.h>
#include <asm/cpufeature.h> #include <asm/cpufeature.h>
#include <asm/sysreg.h> #include <asm/sysreg.h>
...@@ -31,49 +32,58 @@ ...@@ -31,49 +32,58 @@
* Returns: * Returns:
* x0 - bytes not copied * x0 - bytes not copied
*/ */
.macro ldrb1 ptr, regB, val
USER(9998f, ldrb \ptr, [\regB], \val)
.endm
.macro strb1 ptr, regB, val
strb \ptr, [\regB], \val
.endm
.macro ldrh1 ptr, regB, val
USER(9998f, ldrh \ptr, [\regB], \val)
.endm
.macro strh1 ptr, regB, val
strh \ptr, [\regB], \val
.endm
.macro ldr1 ptr, regB, val
USER(9998f, ldr \ptr, [\regB], \val)
.endm
.macro str1 ptr, regB, val
str \ptr, [\regB], \val
.endm
.macro ldp1 ptr, regB, regC, val
USER(9998f, ldp \ptr, \regB, [\regC], \val)
.endm
.macro stp1 ptr, regB, regC, val
stp \ptr, \regB, [\regC], \val
.endm
end .req x5
ENTRY(__copy_from_user) ENTRY(__copy_from_user)
ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \ ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \
CONFIG_ARM64_PAN) CONFIG_ARM64_PAN)
add x5, x1, x2 // upper user buffer boundary add end, x0, x2
subs x2, x2, #16 #include "copy_template.S"
b.mi 1f
0:
USER(9f, ldp x3, x4, [x1], #16)
subs x2, x2, #16
stp x3, x4, [x0], #16
b.pl 0b
1: adds x2, x2, #8
b.mi 2f
USER(9f, ldr x3, [x1], #8 )
sub x2, x2, #8
str x3, [x0], #8
2: adds x2, x2, #4
b.mi 3f
USER(9f, ldr w3, [x1], #4 )
sub x2, x2, #4
str w3, [x0], #4
3: adds x2, x2, #2
b.mi 4f
USER(9f, ldrh w3, [x1], #2 )
sub x2, x2, #2
strh w3, [x0], #2
4: adds x2, x2, #1
b.mi 5f
USER(9f, ldrb w3, [x1] )
strb w3, [x0]
5: mov x0, #0
ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \ ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \
CONFIG_ARM64_PAN) CONFIG_ARM64_PAN)
mov x0, #0 // Nothing to copy
ret ret
ENDPROC(__copy_from_user) ENDPROC(__copy_from_user)
.section .fixup,"ax" .section .fixup,"ax"
.align 2 .align 2
9: sub x2, x5, x1 9998:
mov x3, x2 sub x0, end, dst
10: strb wzr, [x0], #1 // zero remaining buffer space 9999:
subs x3, x3, #1 strb wzr, [dst], #1 // zero remaining buffer space
b.ne 10b cmp dst, end
mov x0, x2 // bytes not copied b.lo 9999b
ret ret
.previous .previous
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include <asm/alternative.h> #include <asm/alternative.h>
#include <asm/assembler.h> #include <asm/assembler.h>
#include <asm/cache.h>
#include <asm/cpufeature.h> #include <asm/cpufeature.h>
#include <asm/sysreg.h> #include <asm/sysreg.h>
...@@ -33,44 +34,52 @@ ...@@ -33,44 +34,52 @@
* Returns: * Returns:
* x0 - bytes not copied * x0 - bytes not copied
*/ */
.macro ldrb1 ptr, regB, val
USER(9998f, ldrb \ptr, [\regB], \val)
.endm
.macro strb1 ptr, regB, val
USER(9998f, strb \ptr, [\regB], \val)
.endm
.macro ldrh1 ptr, regB, val
USER(9998f, ldrh \ptr, [\regB], \val)
.endm
.macro strh1 ptr, regB, val
USER(9998f, strh \ptr, [\regB], \val)
.endm
.macro ldr1 ptr, regB, val
USER(9998f, ldr \ptr, [\regB], \val)
.endm
.macro str1 ptr, regB, val
USER(9998f, str \ptr, [\regB], \val)
.endm
.macro ldp1 ptr, regB, regC, val
USER(9998f, ldp \ptr, \regB, [\regC], \val)
.endm
.macro stp1 ptr, regB, regC, val
USER(9998f, stp \ptr, \regB, [\regC], \val)
.endm
end .req x5
ENTRY(__copy_in_user) ENTRY(__copy_in_user)
ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \ ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \
CONFIG_ARM64_PAN) CONFIG_ARM64_PAN)
add x5, x0, x2 // upper user buffer boundary add end, x0, x2
subs x2, x2, #16 #include "copy_template.S"
b.mi 1f
0:
USER(9f, ldp x3, x4, [x1], #16)
subs x2, x2, #16
USER(9f, stp x3, x4, [x0], #16)
b.pl 0b
1: adds x2, x2, #8
b.mi 2f
USER(9f, ldr x3, [x1], #8 )
sub x2, x2, #8
USER(9f, str x3, [x0], #8 )
2: adds x2, x2, #4
b.mi 3f
USER(9f, ldr w3, [x1], #4 )
sub x2, x2, #4
USER(9f, str w3, [x0], #4 )
3: adds x2, x2, #2
b.mi 4f
USER(9f, ldrh w3, [x1], #2 )
sub x2, x2, #2
USER(9f, strh w3, [x0], #2 )
4: adds x2, x2, #1
b.mi 5f
USER(9f, ldrb w3, [x1] )
USER(9f, strb w3, [x0] )
5: mov x0, #0
ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \ ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \
CONFIG_ARM64_PAN) CONFIG_ARM64_PAN)
mov x0, #0
ret ret
ENDPROC(__copy_in_user) ENDPROC(__copy_in_user)
.section .fixup,"ax" .section .fixup,"ax"
.align 2 .align 2
9: sub x0, x5, x0 // bytes not copied 9998: sub x0, end, dst // bytes not copied
ret ret
.previous .previous
/*
* Copyright (C) 2013 ARM Ltd.
* Copyright (C) 2013 Linaro.
*
* This code is based on glibc cortex strings work originally authored by Linaro
* and re-licensed under GPLv2 for the Linux kernel. The original code can
* be found @
*
* http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
* files/head:/src/aarch64/
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* Copy a buffer from src to dest (alignment handled by the hardware)
*
* Parameters:
* x0 - dest
* x1 - src
* x2 - n
* Returns:
* x0 - dest
*/
dstin .req x0
src .req x1
count .req x2
tmp1 .req x3
tmp1w .req w3
tmp2 .req x4
tmp2w .req w4
dst .req x6
A_l .req x7
A_h .req x8
B_l .req x9
B_h .req x10
C_l .req x11
C_h .req x12
D_l .req x13
D_h .req x14
mov dst, dstin
cmp count, #16
/*When memory length is less than 16, the accessed are not aligned.*/
b.lo .Ltiny15
neg tmp2, src
ands tmp2, tmp2, #15/* Bytes to reach alignment. */
b.eq .LSrcAligned
sub count, count, tmp2
/*
* Copy the leading memory data from src to dst in an increasing
* address order.By this way,the risk of overwritting the source
* memory data is eliminated when the distance between src and
* dst is less than 16. The memory accesses here are alignment.
*/
tbz tmp2, #0, 1f
ldrb1 tmp1w, src, #1
strb1 tmp1w, dst, #1
1:
tbz tmp2, #1, 2f
ldrh1 tmp1w, src, #2
strh1 tmp1w, dst, #2
2:
tbz tmp2, #2, 3f
ldr1 tmp1w, src, #4
str1 tmp1w, dst, #4
3:
tbz tmp2, #3, .LSrcAligned
ldr1 tmp1, src, #8
str1 tmp1, dst, #8
.LSrcAligned:
cmp count, #64
b.ge .Lcpy_over64
/*
* Deal with small copies quickly by dropping straight into the
* exit block.
*/
.Ltail63:
/*
* Copy up to 48 bytes of data. At this point we only need the
* bottom 6 bits of count to be accurate.
*/
ands tmp1, count, #0x30
b.eq .Ltiny15
cmp tmp1w, #0x20
b.eq 1f
b.lt 2f
ldp1 A_l, A_h, src, #16
stp1 A_l, A_h, dst, #16
1:
ldp1 A_l, A_h, src, #16
stp1 A_l, A_h, dst, #16
2:
ldp1 A_l, A_h, src, #16
stp1 A_l, A_h, dst, #16
.Ltiny15:
/*
* Prefer to break one ldp/stp into several load/store to access
* memory in an increasing address order,rather than to load/store 16
* bytes from (src-16) to (dst-16) and to backward the src to aligned
* address,which way is used in original cortex memcpy. If keeping
* the original memcpy process here, memmove need to satisfy the
* precondition that src address is at least 16 bytes bigger than dst
* address,otherwise some source data will be overwritten when memove
* call memcpy directly. To make memmove simpler and decouple the
* memcpy's dependency on memmove, withdrew the original process.
*/
tbz count, #3, 1f
ldr1 tmp1, src, #8
str1 tmp1, dst, #8
1:
tbz count, #2, 2f
ldr1 tmp1w, src, #4
str1 tmp1w, dst, #4
2:
tbz count, #1, 3f
ldrh1 tmp1w, src, #2
strh1 tmp1w, dst, #2
3:
tbz count, #0, .Lexitfunc
ldrb1 tmp1w, src, #1
strb1 tmp1w, dst, #1
b .Lexitfunc
.Lcpy_over64:
subs count, count, #128
b.ge .Lcpy_body_large
/*
* Less than 128 bytes to copy, so handle 64 here and then jump
* to the tail.
*/
ldp1 A_l, A_h, src, #16
stp1 A_l, A_h, dst, #16
ldp1 B_l, B_h, src, #16
ldp1 C_l, C_h, src, #16
stp1 B_l, B_h, dst, #16
stp1 C_l, C_h, dst, #16
ldp1 D_l, D_h, src, #16
stp1 D_l, D_h, dst, #16
tst count, #0x3f
b.ne .Ltail63
b .Lexitfunc
/*
* Critical loop. Start at a new cache line boundary. Assuming
* 64 bytes per line this ensures the entire loop is in one line.
*/
.p2align L1_CACHE_SHIFT
.Lcpy_body_large:
/* pre-get 64 bytes data. */
ldp1 A_l, A_h, src, #16
ldp1 B_l, B_h, src, #16
ldp1 C_l, C_h, src, #16
ldp1 D_l, D_h, src, #16
1:
/*
* interlace the load of next 64 bytes data block with store of the last
* loaded 64 bytes data.
*/
stp1 A_l, A_h, dst, #16
ldp1 A_l, A_h, src, #16
stp1 B_l, B_h, dst, #16
ldp1 B_l, B_h, src, #16
stp1 C_l, C_h, dst, #16
ldp1 C_l, C_h, src, #16
stp1 D_l, D_h, dst, #16
ldp1 D_l, D_h, src, #16
subs count, count, #64
b.ge 1b
stp1 A_l, A_h, dst, #16
stp1 B_l, B_h, dst, #16
stp1 C_l, C_h, dst, #16
stp1 D_l, D_h, dst, #16
tst count, #0x3f
b.ne .Ltail63
.Lexitfunc:
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include <asm/alternative.h> #include <asm/alternative.h>
#include <asm/assembler.h> #include <asm/assembler.h>
#include <asm/cache.h>
#include <asm/cpufeature.h> #include <asm/cpufeature.h>
#include <asm/sysreg.h> #include <asm/sysreg.h>
...@@ -31,44 +32,52 @@ ...@@ -31,44 +32,52 @@
* Returns: * Returns:
* x0 - bytes not copied * x0 - bytes not copied
*/ */
.macro ldrb1 ptr, regB, val
ldrb \ptr, [\regB], \val
.endm
.macro strb1 ptr, regB, val
USER(9998f, strb \ptr, [\regB], \val)
.endm
.macro ldrh1 ptr, regB, val
ldrh \ptr, [\regB], \val
.endm
.macro strh1 ptr, regB, val
USER(9998f, strh \ptr, [\regB], \val)
.endm
.macro ldr1 ptr, regB, val
ldr \ptr, [\regB], \val
.endm
.macro str1 ptr, regB, val
USER(9998f, str \ptr, [\regB], \val)
.endm
.macro ldp1 ptr, regB, regC, val
ldp \ptr, \regB, [\regC], \val
.endm
.macro stp1 ptr, regB, regC, val
USER(9998f, stp \ptr, \regB, [\regC], \val)
.endm
end .req x5
ENTRY(__copy_to_user) ENTRY(__copy_to_user)
ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \ ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \
CONFIG_ARM64_PAN) CONFIG_ARM64_PAN)
add x5, x0, x2 // upper user buffer boundary add end, x0, x2
subs x2, x2, #16 #include "copy_template.S"
b.mi 1f
0:
ldp x3, x4, [x1], #16
subs x2, x2, #16
USER(9f, stp x3, x4, [x0], #16)
b.pl 0b
1: adds x2, x2, #8
b.mi 2f
ldr x3, [x1], #8
sub x2, x2, #8
USER(9f, str x3, [x0], #8 )
2: adds x2, x2, #4
b.mi 3f
ldr w3, [x1], #4
sub x2, x2, #4
USER(9f, str w3, [x0], #4 )
3: adds x2, x2, #2
b.mi 4f
ldrh w3, [x1], #2
sub x2, x2, #2
USER(9f, strh w3, [x0], #2 )
4: adds x2, x2, #1
b.mi 5f
ldrb w3, [x1]
USER(9f, strb w3, [x0] )
5: mov x0, #0
ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \ ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \
CONFIG_ARM64_PAN) CONFIG_ARM64_PAN)
mov x0, #0
ret ret
ENDPROC(__copy_to_user) ENDPROC(__copy_to_user)
.section .fixup,"ax" .section .fixup,"ax"
.align 2 .align 2
9: sub x0, x5, x0 // bytes not copied 9998: sub x0, end, dst // bytes not copied
ret ret
.previous .previous
...@@ -41,4 +41,4 @@ ENTRY(memchr) ...@@ -41,4 +41,4 @@ ENTRY(memchr)
ret ret
2: mov x0, #0 2: mov x0, #0
ret ret
ENDPROC(memchr) ENDPIPROC(memchr)
...@@ -255,4 +255,4 @@ CPU_LE( rev data2, data2 ) ...@@ -255,4 +255,4 @@ CPU_LE( rev data2, data2 )
.Lret0: .Lret0:
mov result, #0 mov result, #0
ret ret
ENDPROC(memcmp) ENDPIPROC(memcmp)
This diff is collapsed.
...@@ -57,12 +57,14 @@ C_h .req x12 ...@@ -57,12 +57,14 @@ C_h .req x12
D_l .req x13 D_l .req x13
D_h .req x14 D_h .req x14
.weak memmove
ENTRY(__memmove)
ENTRY(memmove) ENTRY(memmove)
cmp dstin, src cmp dstin, src
b.lo memcpy b.lo __memcpy
add tmp1, src, count add tmp1, src, count
cmp dstin, tmp1 cmp dstin, tmp1
b.hs memcpy /* No overlap. */ b.hs __memcpy /* No overlap. */
add dst, dstin, count add dst, dstin, count
add src, src, count add src, src, count
...@@ -194,4 +196,5 @@ ENTRY(memmove) ...@@ -194,4 +196,5 @@ ENTRY(memmove)
tst count, #0x3f tst count, #0x3f
b.ne .Ltail63 b.ne .Ltail63
ret ret
ENDPROC(memmove) ENDPIPROC(memmove)
ENDPROC(__memmove)
...@@ -54,6 +54,8 @@ dst .req x8 ...@@ -54,6 +54,8 @@ dst .req x8
tmp3w .req w9 tmp3w .req w9
tmp3 .req x9 tmp3 .req x9
.weak memset
ENTRY(__memset)
ENTRY(memset) ENTRY(memset)
mov dst, dstin /* Preserve return value. */ mov dst, dstin /* Preserve return value. */
and A_lw, val, #255 and A_lw, val, #255
...@@ -213,4 +215,5 @@ ENTRY(memset) ...@@ -213,4 +215,5 @@ ENTRY(memset)
ands count, count, zva_bits_x ands count, count, zva_bits_x
b.ne .Ltail_maybe_long b.ne .Ltail_maybe_long
ret ret
ENDPROC(memset) ENDPIPROC(memset)
ENDPROC(__memset)
...@@ -231,4 +231,4 @@ CPU_BE( orr syndrome, diff, has_nul ) ...@@ -231,4 +231,4 @@ CPU_BE( orr syndrome, diff, has_nul )
lsr data1, data1, #56 lsr data1, data1, #56
sub result, data1, data2, lsr #56 sub result, data1, data2, lsr #56
ret ret
ENDPROC(strcmp) ENDPIPROC(strcmp)
...@@ -123,4 +123,4 @@ CPU_LE( lsr tmp2, tmp2, tmp1 ) /* Shift (tmp1 & 63). */ ...@@ -123,4 +123,4 @@ CPU_LE( lsr tmp2, tmp2, tmp1 ) /* Shift (tmp1 & 63). */
csinv data1, data1, xzr, le csinv data1, data1, xzr, le
csel data2, data2, data2a, le csel data2, data2, data2a, le
b .Lrealigned b .Lrealigned
ENDPROC(strlen) ENDPIPROC(strlen)
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment