Commit d4b6f156 authored by Huacai Chen's avatar Huacai Chen

LoongArch: Add Non-Uniform Memory Access (NUMA) support

Add Non-Uniform Memory Access (NUMA) support for LoongArch. LoongArch
has 48-bit physical address, but the HyperTransport I/O bus only support
40-bit address, so we need a custom phys_to_dma() and dma_to_phys() to
extract the 4-bit node id (bit 44~47) from Loongson-3's 48-bit physical
address space and embed it into 40-bit. In the 40-bit dma address, node
id offset can be read from the LS7A_DMA_CFG register.
Reviewed-by: default avatarWANG Xuerui <git@xen0n.name>
Reviewed-by: default avatarJiaxun Yang <jiaxun.yang@flygoat.com>
Signed-off-by: default avatarHuacai Chen <chenhuacai@loongson.cn>
parent 46859ac8
...@@ -7,6 +7,7 @@ config LOONGARCH ...@@ -7,6 +7,7 @@ config LOONGARCH
select ARCH_ENABLE_MEMORY_HOTPLUG select ARCH_ENABLE_MEMORY_HOTPLUG
select ARCH_ENABLE_MEMORY_HOTREMOVE select ARCH_ENABLE_MEMORY_HOTREMOVE
select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
select ARCH_HAS_PHYS_TO_DMA
select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_INLINE_READ_LOCK if !PREEMPTION select ARCH_INLINE_READ_LOCK if !PREEMPTION
...@@ -41,6 +42,7 @@ config LOONGARCH ...@@ -41,6 +42,7 @@ config LOONGARCH
select ARCH_SUPPORTS_ACPI select ARCH_SUPPORTS_ACPI
select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_SUPPORTS_HUGETLBFS select ARCH_SUPPORTS_HUGETLBFS
select ARCH_SUPPORTS_NUMA_BALANCING
select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_BUILTIN_BSWAP
select ARCH_USE_CMPXCHG_LOCKREF select ARCH_USE_CMPXCHG_LOCKREF
select ARCH_USE_QUEUED_RWLOCKS select ARCH_USE_QUEUED_RWLOCKS
...@@ -91,12 +93,15 @@ config LOONGARCH ...@@ -91,12 +93,15 @@ config LOONGARCH
select HAVE_PERF_EVENTS select HAVE_PERF_EVENTS
select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RSEQ select HAVE_RSEQ
select HAVE_SETUP_PER_CPU_AREA if NUMA
select HAVE_SYSCALL_TRACEPOINTS select HAVE_SYSCALL_TRACEPOINTS
select HAVE_TIF_NOHZ select HAVE_TIF_NOHZ
select HAVE_VIRT_CPU_ACCOUNTING_GEN if !SMP select HAVE_VIRT_CPU_ACCOUNTING_GEN if !SMP
select IRQ_FORCED_THREADING select IRQ_FORCED_THREADING
select IRQ_LOONGARCH_CPU select IRQ_LOONGARCH_CPU
select MODULES_USE_ELF_RELA if MODULES select MODULES_USE_ELF_RELA if MODULES
select NEED_PER_CPU_EMBED_FIRST_CHUNK
select NEED_PER_CPU_PAGE_FIRST_CHUNK
select OF select OF
select OF_EARLY_FLATTREE select OF_EARLY_FLATTREE
select PERF_USE_VMALLOC select PERF_USE_VMALLOC
...@@ -105,6 +110,7 @@ config LOONGARCH ...@@ -105,6 +110,7 @@ config LOONGARCH
select SYSCTL_EXCEPTION_TRACE select SYSCTL_EXCEPTION_TRACE
select SWIOTLB select SWIOTLB
select TRACE_IRQFLAGS_SUPPORT select TRACE_IRQFLAGS_SUPPORT
select USE_PERCPU_NUMA_NODE_ID
select ZONE_DMA32 select ZONE_DMA32
config 32BIT config 32BIT
...@@ -335,6 +341,20 @@ config NR_CPUS ...@@ -335,6 +341,20 @@ config NR_CPUS
This allows you to specify the maximum number of CPUs which this This allows you to specify the maximum number of CPUs which this
kernel will support. kernel will support.
config NUMA
bool "NUMA Support"
select ACPI_NUMA if ACPI
help
Say Y to compile the kernel with NUMA (Non-Uniform Memory Access)
support. This option improves performance on systems with more
than one NUMA node; on single node systems it is generally better
to leave it disabled.
config NODES_SHIFT
int
default "6"
depends on NUMA
config FORCE_MAX_ZONEORDER config FORCE_MAX_ZONEORDER
int "Maximum zone order" int "Maximum zone order"
range 14 64 if PAGE_SIZE_64KB range 14 64 if PAGE_SIZE_64KB
...@@ -381,6 +401,7 @@ config ARCH_SELECT_MEMORY_MODEL ...@@ -381,6 +401,7 @@ config ARCH_SELECT_MEMORY_MODEL
config ARCH_FLATMEM_ENABLE config ARCH_FLATMEM_ENABLE
def_bool y def_bool y
depends on !NUMA
config ARCH_SPARSEMEM_ENABLE config ARCH_SPARSEMEM_ENABLE
def_bool y def_bool y
......
...@@ -13,6 +13,8 @@ const char *get_system_type(void); ...@@ -13,6 +13,8 @@ const char *get_system_type(void);
extern void init_environ(void); extern void init_environ(void);
extern void memblock_init(void); extern void memblock_init(void);
extern void platform_init(void); extern void platform_init(void);
extern void plat_swiotlb_setup(void);
extern int __init init_numa_memory(void);
struct loongson_board_info { struct loongson_board_info {
int bios_size; int bios_size;
......
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2020-2022 Loongson Technology Corporation Limited
*/
#ifndef _LOONGARCH_DMA_DIRECT_H
#define _LOONGARCH_DMA_DIRECT_H
dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr);
phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr);
#endif /* _LOONGARCH_DMA_DIRECT_H */
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Author: Huacai Chen (chenhuacai@loongson.cn)
* Copyright (C) 2020-2022 Loongson Technology Corporation Limited
*/
#ifndef _ASM_MMZONE_H_
#define _ASM_MMZONE_H_
#include <asm/page.h>
#include <asm/numa.h>
extern struct pglist_data *node_data[];
#define NODE_DATA(nid) (node_data[(nid)])
extern void setup_zero_pages(void);
#endif /* _ASM_MMZONE_H_ */
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Author: Jianmin Lv <lvjianmin@loongson.cn>
* Huacai Chen <chenhuacai@loongson.cn>
*
* Copyright (C) 2020-2022 Loongson Technology Corporation Limited
*/
#ifndef _ASM_LOONGARCH_NUMA_H
#define _ASM_LOONGARCH_NUMA_H
#include <linux/nodemask.h>
#define NODE_ADDRSPACE_SHIFT 44
#define pa_to_nid(addr) (((addr) & 0xf00000000000) >> NODE_ADDRSPACE_SHIFT)
#define nid_to_addrbase(nid) (_ULCAST_(nid) << NODE_ADDRSPACE_SHIFT)
#ifdef CONFIG_NUMA
extern int numa_off;
extern s16 __cpuid_to_node[CONFIG_NR_CPUS];
extern nodemask_t numa_nodes_parsed __initdata;
struct numa_memblk {
u64 start;
u64 end;
int nid;
};
#define NR_NODE_MEMBLKS (MAX_NUMNODES*2)
struct numa_meminfo {
int nr_blks;
struct numa_memblk blk[NR_NODE_MEMBLKS];
};
extern int __init numa_add_memblk(int nodeid, u64 start, u64 end);
extern void __init early_numa_add_cpu(int cpuid, s16 node);
extern void numa_add_cpu(unsigned int cpu);
extern void numa_remove_cpu(unsigned int cpu);
static inline void numa_clear_node(int cpu)
{
}
static inline void set_cpuid_to_node(int cpuid, s16 node)
{
__cpuid_to_node[cpuid] = node;
}
extern int early_cpu_to_node(int cpu);
#else
static inline void early_numa_add_cpu(int cpuid, s16 node) { }
static inline void numa_add_cpu(unsigned int cpu) { }
static inline void numa_remove_cpu(unsigned int cpu) { }
static inline int early_cpu_to_node(int cpu)
{
return 0;
}
#endif /* CONFIG_NUMA */
#endif /* _ASM_LOONGARCH_NUMA_H */
...@@ -541,6 +541,18 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, ...@@ -541,6 +541,18 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#ifdef CONFIG_NUMA_BALANCING
static inline long pte_protnone(pte_t pte)
{
return (pte_val(pte) & _PAGE_PROTNONE);
}
static inline long pmd_protnone(pmd_t pmd)
{
return (pmd_val(pmd) & _PAGE_PROTNONE);
}
#endif /* CONFIG_NUMA_BALANCING */
/* /*
* We provide our own get_unmapped area to cope with the virtual aliasing * We provide our own get_unmapped area to cope with the virtual aliasing
* constraints placed on us by the cache architecture. * constraints placed on us by the cache architecture.
......
...@@ -7,6 +7,27 @@ ...@@ -7,6 +7,27 @@
#include <linux/smp.h> #include <linux/smp.h>
#ifdef CONFIG_NUMA
extern cpumask_t cpus_on_node[];
#define cpumask_of_node(node) (&cpus_on_node[node])
struct pci_bus;
extern int pcibus_to_node(struct pci_bus *);
#define cpumask_of_pcibus(bus) (cpu_online_mask)
extern unsigned char node_distances[MAX_NUMNODES][MAX_NUMNODES];
void numa_set_distance(int from, int to, int distance);
#define node_distance(from, to) (node_distances[(from)][(to)])
#else
#define pcibus_to_node(bus) 0
#endif
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
#define topology_physical_package_id(cpu) (cpu_data[cpu].package) #define topology_physical_package_id(cpu) (cpu_data[cpu].package)
#define topology_core_id(cpu) (cpu_data[cpu].core) #define topology_core_id(cpu) (cpu_data[cpu].core)
......
...@@ -20,4 +20,6 @@ obj-$(CONFIG_PROC_FS) += proc.o ...@@ -20,4 +20,6 @@ obj-$(CONFIG_PROC_FS) += proc.o
obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_NUMA) += numa.o
CPPFLAGS_vmlinux.lds := $(KBUILD_CFLAGS) CPPFLAGS_vmlinux.lds := $(KBUILD_CFLAGS)
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include <linux/memblock.h> #include <linux/memblock.h>
#include <linux/serial_core.h> #include <linux/serial_core.h>
#include <asm/io.h> #include <asm/io.h>
#include <asm/numa.h>
#include <asm/loongson.h> #include <asm/loongson.h>
int acpi_disabled; int acpi_disabled;
...@@ -199,6 +200,79 @@ int __init acpi_boot_init(void) ...@@ -199,6 +200,79 @@ int __init acpi_boot_init(void)
return 0; return 0;
} }
#ifdef CONFIG_ACPI_NUMA
static __init int setup_node(int pxm)
{
return acpi_map_pxm_to_node(pxm);
}
/*
* Callback for SLIT parsing. pxm_to_node() returns NUMA_NO_NODE for
* I/O localities since SRAT does not list them. I/O localities are
* not supported at this point.
*/
unsigned int numa_distance_cnt;
static inline unsigned int get_numa_distances_cnt(struct acpi_table_slit *slit)
{
return slit->locality_count;
}
void __init numa_set_distance(int from, int to, int distance)
{
if ((u8)distance != distance || (from == to && distance != LOCAL_DISTANCE)) {
pr_warn_once("Warning: invalid distance parameter, from=%d to=%d distance=%d\n",
from, to, distance);
return;
}
node_distances[from][to] = distance;
}
/* Callback for Proximity Domain -> CPUID mapping */
void __init
acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
{
int pxm, node;
if (srat_disabled())
return;
if (pa->header.length != sizeof(struct acpi_srat_cpu_affinity)) {
bad_srat();
return;
}
if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0)
return;
pxm = pa->proximity_domain_lo;
if (acpi_srat_revision >= 2) {
pxm |= (pa->proximity_domain_hi[0] << 8);
pxm |= (pa->proximity_domain_hi[1] << 16);
pxm |= (pa->proximity_domain_hi[2] << 24);
}
node = setup_node(pxm);
if (node < 0) {
pr_err("SRAT: Too many proximity domains %x\n", pxm);
bad_srat();
return;
}
if (pa->apic_id >= CONFIG_NR_CPUS) {
pr_info("SRAT: PXM %u -> CPU 0x%02x -> Node %u skipped apicid that is too big\n",
pxm, pa->apic_id, node);
return;
}
early_numa_add_cpu(pa->apic_id, node);
set_cpuid_to_node(pa->apic_id, node);
node_set(node, numa_nodes_parsed);
pr_info("SRAT: PXM %u -> CPU 0x%02x -> Node %u\n", pxm, pa->apic_id, node);
}
void __init acpi_numa_arch_fixup(void) {}
#endif
void __init arch_reserve_mem_area(acpi_physical_address addr, size_t size) void __init arch_reserve_mem_area(acpi_physical_address addr, size_t size)
{ {
memblock_reserve(addr, size); memblock_reserve(addr, size);
...@@ -208,6 +282,22 @@ void __init arch_reserve_mem_area(acpi_physical_address addr, size_t size) ...@@ -208,6 +282,22 @@ void __init arch_reserve_mem_area(acpi_physical_address addr, size_t size)
#include <acpi/processor.h> #include <acpi/processor.h>
static int __ref acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
{
#ifdef CONFIG_ACPI_NUMA
int nid;
nid = acpi_get_node(handle);
if (nid != NUMA_NO_NODE) {
set_cpuid_to_node(physid, nid);
node_set(nid, numa_nodes_parsed);
set_cpu_numa_node(cpu, nid);
cpumask_set_cpu(cpu, cpumask_of_node(nid));
}
#endif
return 0;
}
int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 acpi_id, int *pcpu) int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 acpi_id, int *pcpu)
{ {
int cpu; int cpu;
...@@ -218,6 +308,8 @@ int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 acpi_id, int *pcpu ...@@ -218,6 +308,8 @@ int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 acpi_id, int *pcpu
return cpu; return cpu;
} }
acpi_map_cpu2node(handle, cpu, physid);
*pcpu = cpu; *pcpu = cpu;
return 0; return 0;
...@@ -226,6 +318,9 @@ EXPORT_SYMBOL(acpi_map_cpu); ...@@ -226,6 +318,9 @@ EXPORT_SYMBOL(acpi_map_cpu);
int acpi_unmap_cpu(int cpu) int acpi_unmap_cpu(int cpu)
{ {
#ifdef CONFIG_ACPI_NUMA
set_cpuid_to_node(cpu_logical_map(cpu), NUMA_NO_NODE);
#endif
set_cpu_present(cpu, false); set_cpu_present(cpu, false);
num_processors--; num_processors--;
......
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2020-2022 Loongson Technology Corporation Limited
*/
#include <linux/init.h>
#include <linux/dma-direct.h>
#include <linux/dma-mapping.h>
#include <linux/dma-map-ops.h>
#include <linux/swiotlb.h>
#include <asm/bootinfo.h>
#include <asm/dma.h>
#include <asm/loongson.h>
/*
* We extract 4bit node id (bit 44~47) from Loongson-3's
* 48bit physical address space and embed it into 40bit.
*/
static int node_id_offset;
dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
{
long nid = (paddr >> 44) & 0xf;
return ((nid << 44) ^ paddr) | (nid << node_id_offset);
}
phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
{
long nid = (daddr >> node_id_offset) & 0xf;
return ((nid << node_id_offset) ^ daddr) | (nid << 44);
}
void __init plat_swiotlb_setup(void)
{
swiotlb_init(true, SWIOTLB_VERBOSE);
node_id_offset = ((readl(LS7A_DMA_CFG) & LS7A_DMA_NODE_MASK) >> LS7A_DMA_NODE_SHF) + 36;
}
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#include <linux/moduleloader.h> #include <linux/moduleloader.h>
#include <linux/elf.h> #include <linux/elf.h>
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/numa.h>
#include <linux/vmalloc.h> #include <linux/vmalloc.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/fs.h> #include <linux/fs.h>
......
// SPDX-License-Identifier: GPL-2.0
/*
* Author: Xiang Gao <gaoxiang@loongson.cn>
* Huacai Chen <chenhuacai@loongson.cn>
*
* Copyright (C) 2020-2022 Loongson Technology Corporation Limited
*/
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/mmzone.h>
#include <linux/export.h>
#include <linux/nodemask.h>
#include <linux/swap.h>
#include <linux/memblock.h>
#include <linux/pfn.h>
#include <linux/acpi.h>
#include <linux/efi.h>
#include <linux/irq.h>
#include <linux/pci.h>
#include <asm/bootinfo.h>
#include <asm/loongson.h>
#include <asm/numa.h>
#include <asm/page.h>
#include <asm/pgalloc.h>
#include <asm/sections.h>
#include <asm/time.h>
int numa_off;
struct pglist_data *node_data[MAX_NUMNODES];
unsigned char node_distances[MAX_NUMNODES][MAX_NUMNODES];
EXPORT_SYMBOL(node_data);
EXPORT_SYMBOL(node_distances);
static struct numa_meminfo numa_meminfo;
cpumask_t cpus_on_node[MAX_NUMNODES];
cpumask_t phys_cpus_on_node[MAX_NUMNODES];
EXPORT_SYMBOL(cpus_on_node);
/*
* apicid, cpu, node mappings
*/
s16 __cpuid_to_node[CONFIG_NR_CPUS] = {
[0 ... CONFIG_NR_CPUS - 1] = NUMA_NO_NODE
};
EXPORT_SYMBOL(__cpuid_to_node);
nodemask_t numa_nodes_parsed __initdata;
#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA
unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
EXPORT_SYMBOL(__per_cpu_offset);
static int __init pcpu_cpu_to_node(int cpu)
{
return early_cpu_to_node(cpu);
}
static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
{
if (early_cpu_to_node(from) == early_cpu_to_node(to))
return LOCAL_DISTANCE;
else
return REMOTE_DISTANCE;
}
void __init pcpu_populate_pte(unsigned long addr)
{
pgd_t *pgd = pgd_offset_k(addr);
p4d_t *p4d = p4d_offset(pgd, addr);
pud_t *pud;
pmd_t *pmd;
if (p4d_none(*p4d)) {
pud_t *new;
new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
pgd_populate(&init_mm, pgd, new);
#ifndef __PAGETABLE_PUD_FOLDED
pud_init((unsigned long)new, (unsigned long)invalid_pmd_table);
#endif
}
pud = pud_offset(p4d, addr);
if (pud_none(*pud)) {
pmd_t *new;
new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
pud_populate(&init_mm, pud, new);
#ifndef __PAGETABLE_PMD_FOLDED
pmd_init((unsigned long)new, (unsigned long)invalid_pte_table);
#endif
}
pmd = pmd_offset(pud, addr);
if (!pmd_present(*pmd)) {
pte_t *new;
new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
pmd_populate_kernel(&init_mm, pmd, new);
}
}
void __init setup_per_cpu_areas(void)
{
unsigned long delta;
unsigned int cpu;
int rc = -EINVAL;
if (pcpu_chosen_fc == PCPU_FC_AUTO) {
if (nr_node_ids >= 8)
pcpu_chosen_fc = PCPU_FC_PAGE;
else
pcpu_chosen_fc = PCPU_FC_EMBED;
}
/*
* Always reserve area for module percpu variables. That's
* what the legacy allocator did.
*/
if (pcpu_chosen_fc != PCPU_FC_PAGE) {
rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
PERCPU_DYNAMIC_RESERVE, PMD_SIZE,
pcpu_cpu_distance, pcpu_cpu_to_node);
if (rc < 0)
pr_warn("%s allocator failed (%d), falling back to page size\n",
pcpu_fc_names[pcpu_chosen_fc], rc);
}
if (rc < 0)
rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE, pcpu_cpu_to_node);
if (rc < 0)
panic("cannot initialize percpu area (err=%d)", rc);
delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
for_each_possible_cpu(cpu)
__per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
}
#endif
/*
* Get nodeid by logical cpu number.
* __cpuid_to_node maps phyical cpu id to node, so we
* should use cpu_logical_map(cpu) to index it.
*
* This routine is only used in early phase during
* booting, after setup_per_cpu_areas calling and numa_node
* initialization, cpu_to_node will be used instead.
*/
int early_cpu_to_node(int cpu)
{
int physid = cpu_logical_map(cpu);
if (physid < 0)
return NUMA_NO_NODE;
return __cpuid_to_node[physid];
}
void __init early_numa_add_cpu(int cpuid, s16 node)
{
int cpu = __cpu_number_map[cpuid];
if (cpu < 0)
return;
cpumask_set_cpu(cpu, &cpus_on_node[node]);
cpumask_set_cpu(cpuid, &phys_cpus_on_node[node]);
}
void numa_add_cpu(unsigned int cpu)
{
int nid = cpu_to_node(cpu);
cpumask_set_cpu(cpu, &cpus_on_node[nid]);
}
void numa_remove_cpu(unsigned int cpu)
{
int nid = cpu_to_node(cpu);
cpumask_clear_cpu(cpu, &cpus_on_node[nid]);
}
static int __init numa_add_memblk_to(int nid, u64 start, u64 end,
struct numa_meminfo *mi)
{
/* ignore zero length blks */
if (start == end)
return 0;
/* whine about and ignore invalid blks */
if (start > end || nid < 0 || nid >= MAX_NUMNODES) {
pr_warn("NUMA: Warning: invalid memblk node %d [mem %#010Lx-%#010Lx]\n",
nid, start, end - 1);
return 0;
}
if (mi->nr_blks >= NR_NODE_MEMBLKS) {
pr_err("NUMA: too many memblk ranges\n");
return -EINVAL;
}
mi->blk[mi->nr_blks].start = PFN_ALIGN(start);
mi->blk[mi->nr_blks].end = PFN_ALIGN(end - PAGE_SIZE + 1);
mi->blk[mi->nr_blks].nid = nid;
mi->nr_blks++;
return 0;
}
/**
* numa_add_memblk - Add one numa_memblk to numa_meminfo
* @nid: NUMA node ID of the new memblk
* @start: Start address of the new memblk
* @end: End address of the new memblk
*
* Add a new memblk to the default numa_meminfo.
*
* RETURNS:
* 0 on success, -errno on failure.
*/
int __init numa_add_memblk(int nid, u64 start, u64 end)
{
return numa_add_memblk_to(nid, start, end, &numa_meminfo);
}
static void __init alloc_node_data(int nid)
{
void *nd;
unsigned long nd_pa;
size_t nd_sz = roundup(sizeof(pg_data_t), PAGE_SIZE);
nd_pa = memblock_phys_alloc_try_nid(nd_sz, SMP_CACHE_BYTES, nid);
if (!nd_pa) {
pr_err("Cannot find %zu Byte for node_data (initial node: %d)\n", nd_sz, nid);
return;
}
nd = __va(nd_pa);
node_data[nid] = nd;
memset(nd, 0, sizeof(pg_data_t));
}
static void __init node_mem_init(unsigned int node)
{
unsigned long start_pfn, end_pfn;
unsigned long node_addrspace_offset;
node_addrspace_offset = nid_to_addrbase(node);
pr_info("Node%d's addrspace_offset is 0x%lx\n",
node, node_addrspace_offset);
get_pfn_range_for_nid(node, &start_pfn, &end_pfn);
pr_info("Node%d: start_pfn=0x%lx, end_pfn=0x%lx\n",
node, start_pfn, end_pfn);
alloc_node_data(node);
}
#ifdef CONFIG_ACPI_NUMA
/*
* Sanity check to catch more bad NUMA configurations (they are amazingly
* common). Make sure the nodes cover all memory.
*/
static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi)
{
int i;
u64 numaram, biosram;
numaram = 0;
for (i = 0; i < mi->nr_blks; i++) {
u64 s = mi->blk[i].start >> PAGE_SHIFT;
u64 e = mi->blk[i].end >> PAGE_SHIFT;
numaram += e - s;
numaram -= __absent_pages_in_range(mi->blk[i].nid, s, e);
if ((s64)numaram < 0)
numaram = 0;
}
max_pfn = max_low_pfn;
biosram = max_pfn - absent_pages_in_range(0, max_pfn);
BUG_ON((s64)(biosram - numaram) >= (1 << (20 - PAGE_SHIFT)));
return true;
}
static void __init add_node_intersection(u32 node, u64 start, u64 size, u32 type)
{
static unsigned long num_physpages;
num_physpages += (size >> PAGE_SHIFT);
pr_info("Node%d: mem_type:%d, mem_start:0x%llx, mem_size:0x%llx Bytes\n",
node, type, start, size);
pr_info(" start_pfn:0x%llx, end_pfn:0x%llx, num_physpages:0x%lx\n",
start >> PAGE_SHIFT, (start + size) >> PAGE_SHIFT, num_physpages);
memblock_set_node(start, size, &memblock.memory, node);
}
/*
* add_numamem_region
*
* Add a uasable memory region described by BIOS. The
* routine gets each intersection between BIOS's region
* and node's region, and adds them into node's memblock
* pool.
*
*/
static void __init add_numamem_region(u64 start, u64 end, u32 type)
{
u32 i;
u64 ofs = start;
if (start >= end) {
pr_debug("Invalid region: %016llx-%016llx\n", start, end);
return;
}
for (i = 0; i < numa_meminfo.nr_blks; i++) {
struct numa_memblk *mb = &numa_meminfo.blk[i];
if (ofs > mb->end)
continue;
if (end > mb->end) {
add_node_intersection(mb->nid, ofs, mb->end - ofs, type);
ofs = mb->end;
} else {
add_node_intersection(mb->nid, ofs, end - ofs, type);
break;
}
}
}
static void __init init_node_memblock(void)
{
u32 mem_type;
u64 mem_end, mem_start, mem_size;
efi_memory_desc_t *md;
/* Parse memory information and activate */
for_each_efi_memory_desc(md) {
mem_type = md->type;
mem_start = md->phys_addr;
mem_size = md->num_pages << EFI_PAGE_SHIFT;
mem_end = mem_start + mem_size;
switch (mem_type) {
case EFI_LOADER_CODE:
case EFI_LOADER_DATA:
case EFI_BOOT_SERVICES_CODE:
case EFI_BOOT_SERVICES_DATA:
case EFI_PERSISTENT_MEMORY:
case EFI_CONVENTIONAL_MEMORY:
add_numamem_region(mem_start, mem_end, mem_type);
break;
case EFI_PAL_CODE:
case EFI_UNUSABLE_MEMORY:
case EFI_ACPI_RECLAIM_MEMORY:
add_numamem_region(mem_start, mem_end, mem_type);
fallthrough;
case EFI_RESERVED_TYPE:
case EFI_RUNTIME_SERVICES_CODE:
case EFI_RUNTIME_SERVICES_DATA:
case EFI_MEMORY_MAPPED_IO:
case EFI_MEMORY_MAPPED_IO_PORT_SPACE:
pr_info("Resvd: mem_type:%d, mem_start:0x%llx, mem_size:0x%llx Bytes\n",
mem_type, mem_start, mem_size);
break;
}
}
}
static void __init numa_default_distance(void)
{
int row, col;
for (row = 0; row < MAX_NUMNODES; row++)
for (col = 0; col < MAX_NUMNODES; col++) {
if (col == row)
node_distances[row][col] = LOCAL_DISTANCE;
else
/* We assume that one node per package here!
*
* A SLIT should be used for multiple nodes
* per package to override default setting.
*/
node_distances[row][col] = REMOTE_DISTANCE;
}
}
int __init init_numa_memory(void)
{
int i;
int ret;
int node;
for (i = 0; i < NR_CPUS; i++)
set_cpuid_to_node(i, NUMA_NO_NODE);
numa_default_distance();
nodes_clear(numa_nodes_parsed);
nodes_clear(node_possible_map);
nodes_clear(node_online_map);
memset(&numa_meminfo, 0, sizeof(numa_meminfo));
/* Parse SRAT and SLIT if provided by firmware. */
ret = acpi_numa_init();
if (ret < 0)
return ret;
node_possible_map = numa_nodes_parsed;
if (WARN_ON(nodes_empty(node_possible_map)))
return -EINVAL;
init_node_memblock();
if (numa_meminfo_cover_memory(&numa_meminfo) == false)
return -EINVAL;
for_each_node_mask(node, node_possible_map) {
node_mem_init(node);
node_set_online(node);
}
max_low_pfn = PHYS_PFN(memblock_end_of_DRAM());
setup_nr_node_ids();
loongson_sysconf.nr_nodes = nr_node_ids;
loongson_sysconf.cores_per_node = cpumask_weight(&phys_cpus_on_node[0]);
return 0;
}
EXPORT_SYMBOL(init_numa_memory);
#endif
void __init paging_init(void)
{
unsigned int node;
unsigned long zones_size[MAX_NR_ZONES] = {0, };
for_each_online_node(node) {
unsigned long start_pfn, end_pfn;
get_pfn_range_for_nid(node, &start_pfn, &end_pfn);
if (end_pfn > max_low_pfn)
max_low_pfn = end_pfn;
}
#ifdef CONFIG_ZONE_DMA32
zones_size[ZONE_DMA32] = MAX_DMA32_PFN;
#endif
zones_size[ZONE_NORMAL] = max_low_pfn;
free_area_init(zones_size);
}
void __init mem_init(void)
{
high_memory = (void *) __va(get_num_physpages() << PAGE_SHIFT);
memblock_free_all();
setup_zero_pages(); /* This comes from node 0 */
}
int pcibus_to_node(struct pci_bus *bus)
{
return dev_to_node(&bus->dev);
}
EXPORT_SYMBOL(pcibus_to_node);
...@@ -35,6 +35,7 @@ ...@@ -35,6 +35,7 @@
#include <asm/dma.h> #include <asm/dma.h>
#include <asm/efi.h> #include <asm/efi.h>
#include <asm/loongson.h> #include <asm/loongson.h>
#include <asm/numa.h>
#include <asm/pgalloc.h> #include <asm/pgalloc.h>
#include <asm/sections.h> #include <asm/sections.h>
#include <asm/setup.h> #include <asm/setup.h>
...@@ -185,7 +186,10 @@ static int __init early_parse_mem(char *p) ...@@ -185,7 +186,10 @@ static int __init early_parse_mem(char *p)
return -EINVAL; return -EINVAL;
} }
if (!IS_ENABLED(CONFIG_NUMA))
memblock_add(start, size); memblock_add(start, size);
else
memblock_add_node(start, size, pa_to_nid(start), MEMBLOCK_NONE);
return 0; return 0;
} }
...@@ -203,6 +207,9 @@ void __init platform_init(void) ...@@ -203,6 +207,9 @@ void __init platform_init(void)
acpi_boot_init(); acpi_boot_init();
#endif #endif
#ifdef CONFIG_NUMA
init_numa_memory();
#endif
dmi_setup(); dmi_setup();
smbios_parse(); smbios_parse();
pr_info("The BIOS Version: %s\n", b_info.bios_version); pr_info("The BIOS Version: %s\n", b_info.bios_version);
...@@ -241,7 +248,7 @@ static void __init arch_mem_init(char **cmdline_p) ...@@ -241,7 +248,7 @@ static void __init arch_mem_init(char **cmdline_p)
sparse_init(); sparse_init();
memblock_set_bottom_up(true); memblock_set_bottom_up(true);
swiotlb_init(true, SWIOTLB_VERBOSE); plat_swiotlb_setup();
dma_contiguous_reserve(PFN_PHYS(max_low_pfn)); dma_contiguous_reserve(PFN_PHYS(max_low_pfn));
......
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
#include <asm/idle.h> #include <asm/idle.h>
#include <asm/loongson.h> #include <asm/loongson.h>
#include <asm/mmu_context.h> #include <asm/mmu_context.h>
#include <asm/numa.h>
#include <asm/processor.h> #include <asm/processor.h>
#include <asm/setup.h> #include <asm/setup.h>
#include <asm/time.h> #include <asm/time.h>
...@@ -222,6 +223,9 @@ void loongson3_init_secondary(void) ...@@ -222,6 +223,9 @@ void loongson3_init_secondary(void)
iocsr_write32(0xffffffff, LOONGARCH_IOCSR_IPI_EN); iocsr_write32(0xffffffff, LOONGARCH_IOCSR_IPI_EN);
#ifdef CONFIG_NUMA
numa_add_cpu(cpu);
#endif
per_cpu(cpu_state, cpu) = CPU_ONLINE; per_cpu(cpu_state, cpu) = CPU_ONLINE;
cpu_data[cpu].core = cpu_data[cpu].core =
cpu_logical_map(cpu) % loongson_sysconf.cores_per_package; cpu_logical_map(cpu) % loongson_sysconf.cores_per_package;
...@@ -254,6 +258,9 @@ int loongson3_cpu_disable(void) ...@@ -254,6 +258,9 @@ int loongson3_cpu_disable(void)
if (io_master(cpu)) if (io_master(cpu))
return -EBUSY; return -EBUSY;
#ifdef CONFIG_NUMA
numa_remove_cpu(cpu);
#endif
set_cpu_online(cpu, false); set_cpu_online(cpu, false);
calculate_cpu_foreign_map(); calculate_cpu_foreign_map();
local_irq_save(flags); local_irq_save(flags);
...@@ -478,14 +485,36 @@ void calculate_cpu_foreign_map(void) ...@@ -478,14 +485,36 @@ void calculate_cpu_foreign_map(void)
/* Preload SMP state for boot cpu */ /* Preload SMP state for boot cpu */
void smp_prepare_boot_cpu(void) void smp_prepare_boot_cpu(void)
{ {
unsigned int cpu; unsigned int cpu, node, rr_node;
set_cpu_possible(0, true); set_cpu_possible(0, true);
set_cpu_online(0, true); set_cpu_online(0, true);
set_my_cpu_offset(per_cpu_offset(0)); set_my_cpu_offset(per_cpu_offset(0));
for_each_possible_cpu(cpu) rr_node = first_node(node_online_map);
set_cpu_numa_node(cpu, 0); for_each_possible_cpu(cpu) {
node = early_cpu_to_node(cpu);
/*
* The mapping between present cpus and nodes has been
* built during MADT and SRAT parsing.
*
* If possible cpus = present cpus here, early_cpu_to_node
* will return valid node.
*
* If possible cpus > present cpus here (e.g. some possible
* cpus will be added by cpu-hotplug later), for possible but
* not present cpus, early_cpu_to_node will return NUMA_NO_NODE,
* and we just map them to online nodes in round-robin way.
* Once hotplugged, new correct mapping will be built for them.
*/
if (node != NUMA_NO_NODE)
set_cpu_numa_node(cpu, node);
else {
set_cpu_numa_node(cpu, rr_node);
rr_node = next_node_in(rr_node, node_online_map);
}
}
} }
/* called from main before smp_init() */ /* called from main before smp_init() */
...@@ -651,17 +680,10 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned l ...@@ -651,17 +680,10 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned l
on_each_cpu_mask(mm_cpumask(mm), flush_tlb_range_ipi, &fd, 1); on_each_cpu_mask(mm_cpumask(mm), flush_tlb_range_ipi, &fd, 1);
} else { } else {
unsigned int cpu; unsigned int cpu;
int exec = vma->vm_flags & VM_EXEC;
for_each_online_cpu(cpu) { for_each_online_cpu(cpu) {
/*
* flush_cache_range() will only fully flush icache if
* the VMA is executable, otherwise we must invalidate
* ASID without it appearing to has_valid_asid() as if
* mm has been completely unused by that CPU.
*/
if (cpu != smp_processor_id() && cpu_context(cpu, mm)) if (cpu != smp_processor_id() && cpu_context(cpu, mm))
cpu_context(cpu, mm) = !exec; cpu_context(cpu, mm) = 0;
} }
local_flush_tlb_range(vma, start, end); local_flush_tlb_range(vma, start, end);
} }
...@@ -706,14 +728,8 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long page) ...@@ -706,14 +728,8 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
unsigned int cpu; unsigned int cpu;
for_each_online_cpu(cpu) { for_each_online_cpu(cpu) {
/*
* flush_cache_page() only does partial flushes, so
* invalidate ASID without it appearing to
* has_valid_asid() as if mm has been completely unused
* by that CPU.
*/
if (cpu != smp_processor_id() && cpu_context(cpu, vma->vm_mm)) if (cpu != smp_processor_id() && cpu_context(cpu, vma->vm_mm))
cpu_context(cpu, vma->vm_mm) = 1; cpu_context(cpu, vma->vm_mm) = 0;
} }
local_flush_tlb_page(vma, page); local_flush_tlb_page(vma, page);
} }
......
...@@ -630,7 +630,7 @@ asmlinkage void noinstr do_vint(struct pt_regs *regs, unsigned long sp) ...@@ -630,7 +630,7 @@ asmlinkage void noinstr do_vint(struct pt_regs *regs, unsigned long sp)
irqentry_exit(regs, state); irqentry_exit(regs, state);
} }
extern void tlb_init(void); extern void tlb_init(int cpu);
extern void cache_error_setup(void); extern void cache_error_setup(void);
unsigned long eentry; unsigned long eentry;
...@@ -669,7 +669,7 @@ void per_cpu_trap_init(int cpu) ...@@ -669,7 +669,7 @@ void per_cpu_trap_init(int cpu)
for (i = 0; i < 64; i++) for (i = 0; i < 64; i++)
set_handler(i * VECSIZE, handle_reserved, VECSIZE); set_handler(i * VECSIZE, handle_reserved, VECSIZE);
tlb_init(); tlb_init(cpu);
cpu_cache_init(); cpu_cache_init();
} }
......
...@@ -84,6 +84,7 @@ int __ref page_is_ram(unsigned long pfn) ...@@ -84,6 +84,7 @@ int __ref page_is_ram(unsigned long pfn)
return memblock_is_memory(addr) && !memblock_is_reserved(addr); return memblock_is_memory(addr) && !memblock_is_reserved(addr);
} }
#ifndef CONFIG_NUMA
void __init paging_init(void) void __init paging_init(void)
{ {
unsigned long max_zone_pfns[MAX_NR_ZONES]; unsigned long max_zone_pfns[MAX_NR_ZONES];
...@@ -107,6 +108,7 @@ void __init mem_init(void) ...@@ -107,6 +108,7 @@ void __init mem_init(void)
memblock_free_all(); memblock_free_all();
setup_zero_pages(); /* Setup zeroed pages. */ setup_zero_pages(); /* Setup zeroed pages. */
} }
#endif /* !CONFIG_NUMA */
void __ref free_initmem(void) void __ref free_initmem(void)
{ {
...@@ -129,6 +131,17 @@ int arch_add_memory(int nid, u64 start, u64 size, struct mhp_params *params) ...@@ -129,6 +131,17 @@ int arch_add_memory(int nid, u64 start, u64 size, struct mhp_params *params)
return ret; return ret;
} }
#ifdef CONFIG_NUMA
int memory_add_physaddr_to_nid(u64 start)
{
int nid;
nid = pa_to_nid(start);
return nid;
}
EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
#endif
#ifdef CONFIG_MEMORY_HOTREMOVE #ifdef CONFIG_MEMORY_HOTREMOVE
void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
{ {
......
...@@ -250,15 +250,18 @@ static void output_pgtable_bits_defines(void) ...@@ -250,15 +250,18 @@ static void output_pgtable_bits_defines(void)
pr_debug("\n"); pr_debug("\n");
} }
void setup_tlb_handler(void) #ifdef CONFIG_NUMA
{ static unsigned long pcpu_handlers[NR_CPUS];
static int run_once = 0; #endif
extern long exception_handlers[VECSIZE * 128 / sizeof(long)];
void setup_tlb_handler(int cpu)
{
setup_ptwalker(); setup_ptwalker();
output_pgtable_bits_defines(); output_pgtable_bits_defines();
/* The tlb handlers are generated only once */ /* The tlb handlers are generated only once */
if (!run_once) { if (cpu == 0) {
memcpy((void *)tlbrentry, handle_tlb_refill, 0x80); memcpy((void *)tlbrentry, handle_tlb_refill, 0x80);
local_flush_icache_range(tlbrentry, tlbrentry + 0x80); local_flush_icache_range(tlbrentry, tlbrentry + 0x80);
set_handler(EXCCODE_TLBI * VECSIZE, handle_tlb_load, VECSIZE); set_handler(EXCCODE_TLBI * VECSIZE, handle_tlb_load, VECSIZE);
...@@ -268,15 +271,35 @@ void setup_tlb_handler(void) ...@@ -268,15 +271,35 @@ void setup_tlb_handler(void)
set_handler(EXCCODE_TLBNR * VECSIZE, handle_tlb_protect, VECSIZE); set_handler(EXCCODE_TLBNR * VECSIZE, handle_tlb_protect, VECSIZE);
set_handler(EXCCODE_TLBNX * VECSIZE, handle_tlb_protect, VECSIZE); set_handler(EXCCODE_TLBNX * VECSIZE, handle_tlb_protect, VECSIZE);
set_handler(EXCCODE_TLBPE * VECSIZE, handle_tlb_protect, VECSIZE); set_handler(EXCCODE_TLBPE * VECSIZE, handle_tlb_protect, VECSIZE);
run_once++;
} }
#ifdef CONFIG_NUMA
else {
void *addr;
struct page *page;
const int vec_sz = sizeof(exception_handlers);
if (pcpu_handlers[cpu])
return;
page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL, get_order(vec_sz));
if (!page)
return;
addr = page_address(page);
pcpu_handlers[cpu] = virt_to_phys(addr);
memcpy((void *)addr, (void *)eentry, vec_sz);
local_flush_icache_range((unsigned long)addr, (unsigned long)addr + vec_sz);
csr_write64(pcpu_handlers[cpu], LOONGARCH_CSR_TLBRENTRY);
csr_write64(pcpu_handlers[cpu] + 80*VECSIZE, LOONGARCH_CSR_TLBRENTRY);
}
#endif
} }
void tlb_init(void) void tlb_init(int cpu)
{ {
write_csr_pagesize(PS_DEFAULT_SIZE); write_csr_pagesize(PS_DEFAULT_SIZE);
write_csr_stlbpgsize(PS_DEFAULT_SIZE); write_csr_stlbpgsize(PS_DEFAULT_SIZE);
write_csr_tlbrefill_pagesize(PS_DEFAULT_SIZE); write_csr_tlbrefill_pagesize(PS_DEFAULT_SIZE);
setup_tlb_handler(); setup_tlb_handler(cpu);
local_flush_tlb_all(); local_flush_tlb_all();
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment