Commit 68d00bbe authored by H. Peter Anvin's avatar H. Peter Anvin

Merge remote-tracking branch 'origin/x86/mm' into x86/mm2

Explicitly merging these two branches due to nontrivial conflicts and
to allow further work.

Resolved Conflicts:
	arch/x86/kernel/head32.c
	arch/x86/kernel/head64.c
	arch/x86/mm/init_64.c
	arch/x86/realmode/init.c
Signed-off-by: default avatarH. Peter Anvin <hpa@linux.intel.com>
parents ac2cbab2 07f4207a
...@@ -1253,10 +1253,6 @@ config NODES_SHIFT ...@@ -1253,10 +1253,6 @@ config NODES_SHIFT
Specify the maximum number of NUMA Nodes available on the target Specify the maximum number of NUMA Nodes available on the target
system. Increases memory reserved to accommodate various tables. system. Increases memory reserved to accommodate various tables.
config HAVE_ARCH_ALLOC_REMAP
def_bool y
depends on X86_32 && NUMA
config ARCH_HAVE_MEMORY_PRESENT config ARCH_HAVE_MEMORY_PRESENT
def_bool y def_bool y
depends on X86_32 && DISCONTIGMEM depends on X86_32 && DISCONTIGMEM
......
...@@ -14,12 +14,6 @@ extern struct pglist_data *node_data[]; ...@@ -14,12 +14,6 @@ extern struct pglist_data *node_data[];
#include <asm/numaq.h> #include <asm/numaq.h>
extern void resume_map_numa_kva(pgd_t *pgd);
#else /* !CONFIG_NUMA */
static inline void resume_map_numa_kva(pgd_t *pgd) {}
#endif /* CONFIG_NUMA */ #endif /* CONFIG_NUMA */
#ifdef CONFIG_DISCONTIGMEM #ifdef CONFIG_DISCONTIGMEM
......
...@@ -48,7 +48,8 @@ static inline void copy_user_page(void *to, void *from, unsigned long vaddr, ...@@ -48,7 +48,8 @@ static inline void copy_user_page(void *to, void *from, unsigned long vaddr,
* case properly. Once all supported versions of gcc understand it, we can * case properly. Once all supported versions of gcc understand it, we can
* remove this Voodoo magic stuff. (i.e. once gcc3.x is deprecated) * remove this Voodoo magic stuff. (i.e. once gcc3.x is deprecated)
*/ */
#define __pa_symbol(x) __pa(__phys_reloc_hide((unsigned long)(x))) #define __pa_symbol(x) \
__phys_addr_symbol(__phys_reloc_hide((unsigned long)(x)))
#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) #define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET))
......
...@@ -15,6 +15,7 @@ extern unsigned long __phys_addr(unsigned long); ...@@ -15,6 +15,7 @@ extern unsigned long __phys_addr(unsigned long);
#else #else
#define __phys_addr(x) __phys_addr_nodebug(x) #define __phys_addr(x) __phys_addr_nodebug(x)
#endif #endif
#define __phys_addr_symbol(x) __phys_addr(x)
#define __phys_reloc_hide(x) RELOC_HIDE((x), 0) #define __phys_reloc_hide(x) RELOC_HIDE((x), 0)
#ifdef CONFIG_FLATMEM #ifdef CONFIG_FLATMEM
......
...@@ -3,4 +3,40 @@ ...@@ -3,4 +3,40 @@
#include <asm/page_64_types.h> #include <asm/page_64_types.h>
#ifndef __ASSEMBLY__
/* duplicated to the one in bootmem.h */
extern unsigned long max_pfn;
extern unsigned long phys_base;
static inline unsigned long __phys_addr_nodebug(unsigned long x)
{
unsigned long y = x - __START_KERNEL_map;
/* use the carry flag to determine if x was < __START_KERNEL_map */
x = y + ((x > y) ? phys_base : (__START_KERNEL_map - PAGE_OFFSET));
return x;
}
#ifdef CONFIG_DEBUG_VIRTUAL
extern unsigned long __phys_addr(unsigned long);
extern unsigned long __phys_addr_symbol(unsigned long);
#else
#define __phys_addr(x) __phys_addr_nodebug(x)
#define __phys_addr_symbol(x) \
((unsigned long)(x) - __START_KERNEL_map + phys_base)
#endif
#define __phys_reloc_hide(x) (x)
#ifdef CONFIG_FLATMEM
#define pfn_valid(pfn) ((pfn) < max_pfn)
#endif
void clear_page(void *page);
void copy_page(void *to, void *from);
#endif /* !__ASSEMBLY__ */
#endif /* _ASM_X86_PAGE_64_H */ #endif /* _ASM_X86_PAGE_64_H */
...@@ -50,26 +50,4 @@ ...@@ -50,26 +50,4 @@
#define KERNEL_IMAGE_SIZE (512 * 1024 * 1024) #define KERNEL_IMAGE_SIZE (512 * 1024 * 1024)
#define KERNEL_IMAGE_START _AC(0xffffffff80000000, UL) #define KERNEL_IMAGE_START _AC(0xffffffff80000000, UL)
#ifndef __ASSEMBLY__
void clear_page(void *page);
void copy_page(void *to, void *from);
/* duplicated to the one in bootmem.h */
extern unsigned long max_pfn;
extern unsigned long phys_base;
extern unsigned long __phys_addr(unsigned long);
#define __phys_reloc_hide(x) (x)
#define vmemmap ((struct page *)VMEMMAP_START)
extern void init_extra_mapping_uc(unsigned long phys, unsigned long size);
extern void init_extra_mapping_wb(unsigned long phys, unsigned long size);
#endif /* !__ASSEMBLY__ */
#ifdef CONFIG_FLATMEM
#define pfn_valid(pfn) ((pfn) < max_pfn)
#endif
#endif /* _ASM_X86_PAGE_64_DEFS_H */ #endif /* _ASM_X86_PAGE_64_DEFS_H */
...@@ -390,6 +390,7 @@ pte_t *populate_extra_pte(unsigned long vaddr); ...@@ -390,6 +390,7 @@ pte_t *populate_extra_pte(unsigned long vaddr);
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
#include <linux/mm_types.h> #include <linux/mm_types.h>
#include <linux/log2.h>
static inline int pte_none(pte_t pte) static inline int pte_none(pte_t pte)
{ {
...@@ -783,6 +784,19 @@ static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count) ...@@ -783,6 +784,19 @@ static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
memcpy(dst, src, count * sizeof(pgd_t)); memcpy(dst, src, count * sizeof(pgd_t));
} }
#define PTE_SHIFT ilog2(PTRS_PER_PTE)
static inline int page_level_shift(enum pg_level level)
{
return (PAGE_SHIFT - PTE_SHIFT) + level * PTE_SHIFT;
}
static inline unsigned long page_level_size(enum pg_level level)
{
return 1UL << page_level_shift(level);
}
static inline unsigned long page_level_mask(enum pg_level level)
{
return ~(page_level_size(level) - 1);
}
#include <asm-generic/pgtable.h> #include <asm-generic/pgtable.h>
#endif /* __ASSEMBLY__ */ #endif /* __ASSEMBLY__ */
......
...@@ -183,6 +183,11 @@ extern void cleanup_highmap(void); ...@@ -183,6 +183,11 @@ extern void cleanup_highmap(void);
#define __HAVE_ARCH_PTE_SAME #define __HAVE_ARCH_PTE_SAME
#define vmemmap ((struct page *)VMEMMAP_START)
extern void init_extra_mapping_uc(unsigned long phys, unsigned long size);
extern void init_extra_mapping_wb(unsigned long phys, unsigned long size);
#endif /* !__ASSEMBLY__ */ #endif /* !__ASSEMBLY__ */
#endif /* _ASM_X86_PGTABLE_64_H */ #endif /* _ASM_X86_PGTABLE_64_H */
...@@ -330,7 +330,7 @@ extern void native_pagetable_init(void); ...@@ -330,7 +330,7 @@ extern void native_pagetable_init(void);
struct seq_file; struct seq_file;
extern void arch_report_meminfo(struct seq_file *m); extern void arch_report_meminfo(struct seq_file *m);
enum { enum pg_level {
PG_LEVEL_NONE, PG_LEVEL_NONE,
PG_LEVEL_4K, PG_LEVEL_4K,
PG_LEVEL_2M, PG_LEVEL_2M,
...@@ -351,6 +351,7 @@ static inline void update_page_count(int level, unsigned long pages) { } ...@@ -351,6 +351,7 @@ static inline void update_page_count(int level, unsigned long pages) { }
* as a pte too. * as a pte too.
*/ */
extern pte_t *lookup_address(unsigned long address, unsigned int *level); extern pte_t *lookup_address(unsigned long address, unsigned int *level);
extern phys_addr_t slow_virt_to_phys(void *__address);
#endif /* !__ASSEMBLY__ */ #endif /* !__ASSEMBLY__ */
......
...@@ -69,7 +69,7 @@ int acpi_suspend_lowlevel(void) ...@@ -69,7 +69,7 @@ int acpi_suspend_lowlevel(void)
#ifndef CONFIG_64BIT #ifndef CONFIG_64BIT
header->pmode_entry = (u32)&wakeup_pmode_return; header->pmode_entry = (u32)&wakeup_pmode_return;
header->pmode_cr3 = (u32)__pa(&initial_page_table); header->pmode_cr3 = (u32)__pa_symbol(initial_page_table);
saved_magic = 0x12345678; saved_magic = 0x12345678;
#else /* CONFIG_64BIT */ #else /* CONFIG_64BIT */
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
......
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
#include <asm/apic.h> #include <asm/apic.h>
#include <asm/ipi.h> #include <asm/ipi.h>
#include <asm/apic_flat_64.h> #include <asm/apic_flat_64.h>
#include <asm/pgtable.h>
static int numachip_system __read_mostly; static int numachip_system __read_mostly;
......
...@@ -167,7 +167,7 @@ int __cpuinit ppro_with_ram_bug(void) ...@@ -167,7 +167,7 @@ int __cpuinit ppro_with_ram_bug(void)
#ifdef CONFIG_X86_F00F_BUG #ifdef CONFIG_X86_F00F_BUG
static void __cpuinit trap_init_f00f_bug(void) static void __cpuinit trap_init_f00f_bug(void)
{ {
__set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO); __set_fixmap(FIX_F00F_IDT, __pa_symbol(idt_table), PAGE_KERNEL_RO);
/* /*
* Update the IDT descriptor and reload the IDT so that * Update the IDT descriptor and reload the IDT so that
......
...@@ -89,7 +89,7 @@ do_ftrace_mod_code(unsigned long ip, const void *new_code) ...@@ -89,7 +89,7 @@ do_ftrace_mod_code(unsigned long ip, const void *new_code)
* kernel identity mapping to modify code. * kernel identity mapping to modify code.
*/ */
if (within(ip, (unsigned long)_text, (unsigned long)_etext)) if (within(ip, (unsigned long)_text, (unsigned long)_etext))
ip = (unsigned long)__va(__pa(ip)); ip = (unsigned long)__va(__pa_symbol(ip));
return probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE); return probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE);
} }
...@@ -279,7 +279,7 @@ static int ftrace_write(unsigned long ip, const char *val, int size) ...@@ -279,7 +279,7 @@ static int ftrace_write(unsigned long ip, const char *val, int size)
* kernel identity mapping to modify code. * kernel identity mapping to modify code.
*/ */
if (within(ip, (unsigned long)_text, (unsigned long)_etext)) if (within(ip, (unsigned long)_text, (unsigned long)_etext))
ip = (unsigned long)__va(__pa(ip)); ip = (unsigned long)__va(__pa_symbol(ip));
return probe_kernel_write((void *)ip, val, size); return probe_kernel_write((void *)ip, val, size);
} }
......
...@@ -297,9 +297,9 @@ static void kvm_register_steal_time(void) ...@@ -297,9 +297,9 @@ static void kvm_register_steal_time(void)
memset(st, 0, sizeof(*st)); memset(st, 0, sizeof(*st));
wrmsrl(MSR_KVM_STEAL_TIME, (__pa(st) | KVM_MSR_ENABLED)); wrmsrl(MSR_KVM_STEAL_TIME, (slow_virt_to_phys(st) | KVM_MSR_ENABLED));
printk(KERN_INFO "kvm-stealtime: cpu %d, msr %lx\n", printk(KERN_INFO "kvm-stealtime: cpu %d, msr %lx\n",
cpu, __pa(st)); cpu, slow_virt_to_phys(st));
} }
static DEFINE_PER_CPU(unsigned long, kvm_apic_eoi) = KVM_PV_EOI_DISABLED; static DEFINE_PER_CPU(unsigned long, kvm_apic_eoi) = KVM_PV_EOI_DISABLED;
...@@ -324,7 +324,7 @@ void __cpuinit kvm_guest_cpu_init(void) ...@@ -324,7 +324,7 @@ void __cpuinit kvm_guest_cpu_init(void)
return; return;
if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf) { if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf) {
u64 pa = __pa(&__get_cpu_var(apf_reason)); u64 pa = slow_virt_to_phys(&__get_cpu_var(apf_reason));
#ifdef CONFIG_PREEMPT #ifdef CONFIG_PREEMPT
pa |= KVM_ASYNC_PF_SEND_ALWAYS; pa |= KVM_ASYNC_PF_SEND_ALWAYS;
...@@ -340,7 +340,8 @@ void __cpuinit kvm_guest_cpu_init(void) ...@@ -340,7 +340,8 @@ void __cpuinit kvm_guest_cpu_init(void)
/* Size alignment is implied but just to make it explicit. */ /* Size alignment is implied but just to make it explicit. */
BUILD_BUG_ON(__alignof__(kvm_apic_eoi) < 4); BUILD_BUG_ON(__alignof__(kvm_apic_eoi) < 4);
__get_cpu_var(kvm_apic_eoi) = 0; __get_cpu_var(kvm_apic_eoi) = 0;
pa = __pa(&__get_cpu_var(kvm_apic_eoi)) | KVM_MSR_ENABLED; pa = slow_virt_to_phys(&__get_cpu_var(kvm_apic_eoi))
| KVM_MSR_ENABLED;
wrmsrl(MSR_KVM_PV_EOI_EN, pa); wrmsrl(MSR_KVM_PV_EOI_EN, pa);
} }
......
...@@ -162,8 +162,8 @@ int kvm_register_clock(char *txt) ...@@ -162,8 +162,8 @@ int kvm_register_clock(char *txt)
int low, high, ret; int low, high, ret;
struct pvclock_vcpu_time_info *src = &hv_clock[cpu].pvti; struct pvclock_vcpu_time_info *src = &hv_clock[cpu].pvti;
low = (int)__pa(src) | 1; low = (int)slow_virt_to_phys(src) | 1;
high = ((u64)__pa(src) >> 32); high = ((u64)slow_virt_to_phys(src) >> 32);
ret = native_write_msr_safe(msr_kvm_system_time, low, high); ret = native_write_msr_safe(msr_kvm_system_time, low, high);
printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n", printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n",
cpu, high, low, txt); cpu, high, low, txt);
......
...@@ -284,8 +284,8 @@ static void __init cleanup_highmap(void) ...@@ -284,8 +284,8 @@ static void __init cleanup_highmap(void)
static void __init reserve_brk(void) static void __init reserve_brk(void)
{ {
if (_brk_end > _brk_start) if (_brk_end > _brk_start)
memblock_reserve(__pa(_brk_start), memblock_reserve(__pa_symbol(_brk_start),
__pa(_brk_end) - __pa(_brk_start)); _brk_end - _brk_start);
/* Mark brk area as locked down and no longer taking any /* Mark brk area as locked down and no longer taking any
new allocations */ new allocations */
...@@ -903,12 +903,12 @@ void __init setup_arch(char **cmdline_p) ...@@ -903,12 +903,12 @@ void __init setup_arch(char **cmdline_p)
init_mm.end_data = (unsigned long) _edata; init_mm.end_data = (unsigned long) _edata;
init_mm.brk = _brk_end; init_mm.brk = _brk_end;
code_resource.start = virt_to_phys(_text); code_resource.start = __pa_symbol(_text);
code_resource.end = virt_to_phys(_etext)-1; code_resource.end = __pa_symbol(_etext)-1;
data_resource.start = virt_to_phys(_etext); data_resource.start = __pa_symbol(_etext);
data_resource.end = virt_to_phys(_edata)-1; data_resource.end = __pa_symbol(_edata)-1;
bss_resource.start = virt_to_phys(&__bss_start); bss_resource.start = __pa_symbol(__bss_start);
bss_resource.end = virt_to_phys(&__bss_stop)-1; bss_resource.end = __pa_symbol(__bss_stop)-1;
#ifdef CONFIG_CMDLINE_BOOL #ifdef CONFIG_CMDLINE_BOOL
#ifdef CONFIG_CMDLINE_OVERRIDE #ifdef CONFIG_CMDLINE_OVERRIDE
......
...@@ -59,6 +59,9 @@ EXPORT_SYMBOL(memcpy); ...@@ -59,6 +59,9 @@ EXPORT_SYMBOL(memcpy);
EXPORT_SYMBOL(__memcpy); EXPORT_SYMBOL(__memcpy);
EXPORT_SYMBOL(memmove); EXPORT_SYMBOL(memmove);
#ifndef CONFIG_DEBUG_VIRTUAL
EXPORT_SYMBOL(phys_base);
#endif
EXPORT_SYMBOL(empty_zero_page); EXPORT_SYMBOL(empty_zero_page);
#ifndef CONFIG_PARAVIRT #ifndef CONFIG_PARAVIRT
EXPORT_SYMBOL(native_load_gs_index); EXPORT_SYMBOL(native_load_gs_index);
......
...@@ -552,7 +552,8 @@ static void lguest_write_cr3(unsigned long cr3) ...@@ -552,7 +552,8 @@ static void lguest_write_cr3(unsigned long cr3)
current_cr3 = cr3; current_cr3 = cr3;
/* These two page tables are simple, linear, and used during boot */ /* These two page tables are simple, linear, and used during boot */
if (cr3 != __pa(swapper_pg_dir) && cr3 != __pa(initial_page_table)) if (cr3 != __pa_symbol(swapper_pg_dir) &&
cr3 != __pa_symbol(initial_page_table))
cr3_changed = true; cr3_changed = true;
} }
......
...@@ -804,12 +804,10 @@ void set_kernel_text_ro(void) ...@@ -804,12 +804,10 @@ void set_kernel_text_ro(void)
void mark_rodata_ro(void) void mark_rodata_ro(void)
{ {
unsigned long start = PFN_ALIGN(_text); unsigned long start = PFN_ALIGN(_text);
unsigned long rodata_start = unsigned long rodata_start = PFN_ALIGN(__start_rodata);
((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK;
unsigned long end = (unsigned long) &__end_rodata_hpage_align; unsigned long end = (unsigned long) &__end_rodata_hpage_align;
unsigned long text_end = PAGE_ALIGN((unsigned long) &__stop___ex_table); unsigned long text_end = PFN_ALIGN(&__stop___ex_table);
unsigned long rodata_end = PAGE_ALIGN((unsigned long) &__end_rodata); unsigned long rodata_end = PFN_ALIGN(&__end_rodata);
unsigned long data_start = (unsigned long) &_sdata;
unsigned long all_end = PFN_ALIGN(&_end); unsigned long all_end = PFN_ALIGN(&_end);
printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
...@@ -835,12 +833,12 @@ void mark_rodata_ro(void) ...@@ -835,12 +833,12 @@ void mark_rodata_ro(void)
#endif #endif
free_init_pages("unused kernel memory", free_init_pages("unused kernel memory",
(unsigned long) page_address(virt_to_page(text_end)), (unsigned long) __va(__pa_symbol(text_end)),
(unsigned long) (unsigned long) __va(__pa_symbol(rodata_start)));
page_address(virt_to_page(rodata_start)));
free_init_pages("unused kernel memory", free_init_pages("unused kernel memory",
(unsigned long) page_address(virt_to_page(rodata_end)), (unsigned long) __va(__pa_symbol(rodata_end)),
(unsigned long) page_address(virt_to_page(data_start))); (unsigned long) __va(__pa_symbol(_sdata)));
} }
#endif #endif
......
...@@ -193,7 +193,6 @@ int __init numa_add_memblk(int nid, u64 start, u64 end) ...@@ -193,7 +193,6 @@ int __init numa_add_memblk(int nid, u64 start, u64 end)
static void __init setup_node_data(int nid, u64 start, u64 end) static void __init setup_node_data(int nid, u64 start, u64 end)
{ {
const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE); const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
bool remapped = false;
u64 nd_pa; u64 nd_pa;
void *nd; void *nd;
int tnid; int tnid;
...@@ -205,37 +204,28 @@ static void __init setup_node_data(int nid, u64 start, u64 end) ...@@ -205,37 +204,28 @@ static void __init setup_node_data(int nid, u64 start, u64 end)
if (end && (end - start) < NODE_MIN_SIZE) if (end && (end - start) < NODE_MIN_SIZE)
return; return;
/* initialize remap allocator before aligning to ZONE_ALIGN */
init_alloc_remap(nid, start, end);
start = roundup(start, ZONE_ALIGN); start = roundup(start, ZONE_ALIGN);
printk(KERN_INFO "Initmem setup node %d [mem %#010Lx-%#010Lx]\n", printk(KERN_INFO "Initmem setup node %d [mem %#010Lx-%#010Lx]\n",
nid, start, end - 1); nid, start, end - 1);
/* /*
* Allocate node data. Try remap allocator first, node-local * Allocate node data. Try node-local memory and then any node.
* memory and then any node. Never allocate in DMA zone. * Never allocate in DMA zone.
*/ */
nd = alloc_remap(nid, nd_size); nd_pa = memblock_alloc_nid(nd_size, SMP_CACHE_BYTES, nid);
if (nd) { if (!nd_pa) {
nd_pa = __pa(nd); pr_err("Cannot find %zu bytes in node %d\n",
remapped = true; nd_size, nid);
} else { return;
nd_pa = memblock_alloc_nid(nd_size, SMP_CACHE_BYTES, nid);
if (!nd_pa) {
pr_err("Cannot find %zu bytes in node %d\n",
nd_size, nid);
return;
}
nd = __va(nd_pa);
} }
nd = __va(nd_pa);
/* report and initialize */ /* report and initialize */
printk(KERN_INFO " NODE_DATA [mem %#010Lx-%#010Lx]%s\n", printk(KERN_INFO " NODE_DATA [mem %#010Lx-%#010Lx]\n",
nd_pa, nd_pa + nd_size - 1, remapped ? " (remapped)" : ""); nd_pa, nd_pa + nd_size - 1);
tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT); tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT);
if (!remapped && tnid != nid) if (tnid != nid)
printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nid, tnid); printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nid, tnid);
node_data[nid] = nd; node_data[nid] = nd;
......
...@@ -73,167 +73,6 @@ unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn, ...@@ -73,167 +73,6 @@ unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn,
extern unsigned long highend_pfn, highstart_pfn; extern unsigned long highend_pfn, highstart_pfn;
#define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE)
static void *node_remap_start_vaddr[MAX_NUMNODES];
void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
/*
* Remap memory allocator
*/
static unsigned long node_remap_start_pfn[MAX_NUMNODES];
static void *node_remap_end_vaddr[MAX_NUMNODES];
static void *node_remap_alloc_vaddr[MAX_NUMNODES];
/**
* alloc_remap - Allocate remapped memory
* @nid: NUMA node to allocate memory from
* @size: The size of allocation
*
* Allocate @size bytes from the remap area of NUMA node @nid. The
* size of the remap area is predetermined by init_alloc_remap() and
* only the callers considered there should call this function. For
* more info, please read the comment on top of init_alloc_remap().
*
* The caller must be ready to handle allocation failure from this
* function and fall back to regular memory allocator in such cases.
*
* CONTEXT:
* Single CPU early boot context.
*
* RETURNS:
* Pointer to the allocated memory on success, %NULL on failure.
*/
void *alloc_remap(int nid, unsigned long size)
{
void *allocation = node_remap_alloc_vaddr[nid];
size = ALIGN(size, L1_CACHE_BYTES);
if (!allocation || (allocation + size) > node_remap_end_vaddr[nid])
return NULL;
node_remap_alloc_vaddr[nid] += size;
memset(allocation, 0, size);
return allocation;
}
#ifdef CONFIG_HIBERNATION
/**
* resume_map_numa_kva - add KVA mapping to the temporary page tables created
* during resume from hibernation
* @pgd_base - temporary resume page directory
*/
void resume_map_numa_kva(pgd_t *pgd_base)
{
int node;
for_each_online_node(node) {
unsigned long start_va, start_pfn, nr_pages, pfn;
start_va = (unsigned long)node_remap_start_vaddr[node];
start_pfn = node_remap_start_pfn[node];
nr_pages = (node_remap_end_vaddr[node] -
node_remap_start_vaddr[node]) >> PAGE_SHIFT;
printk(KERN_DEBUG "%s: node %d\n", __func__, node);
for (pfn = 0; pfn < nr_pages; pfn += PTRS_PER_PTE) {
unsigned long vaddr = start_va + (pfn << PAGE_SHIFT);
pgd_t *pgd = pgd_base + pgd_index(vaddr);
pud_t *pud = pud_offset(pgd, vaddr);
pmd_t *pmd = pmd_offset(pud, vaddr);
set_pmd(pmd, pfn_pmd(start_pfn + pfn,
PAGE_KERNEL_LARGE_EXEC));
printk(KERN_DEBUG "%s: %08lx -> pfn %08lx\n",
__func__, vaddr, start_pfn + pfn);
}
}
}
#endif
/**
* init_alloc_remap - Initialize remap allocator for a NUMA node
* @nid: NUMA node to initizlie remap allocator for
*
* NUMA nodes may end up without any lowmem. As allocating pgdat and
* memmap on a different node with lowmem is inefficient, a special
* remap allocator is implemented which can be used by alloc_remap().
*
* For each node, the amount of memory which will be necessary for
* pgdat and memmap is calculated and two memory areas of the size are
* allocated - one in the node and the other in lowmem; then, the area
* in the node is remapped to the lowmem area.
*
* As pgdat and memmap must be allocated in lowmem anyway, this
* doesn't waste lowmem address space; however, the actual lowmem
* which gets remapped over is wasted. The amount shouldn't be
* problematic on machines this feature will be used.
*
* Initialization failure isn't fatal. alloc_remap() is used
* opportunistically and the callers will fall back to other memory
* allocation mechanisms on failure.
*/
void __init init_alloc_remap(int nid, u64 start, u64 end)
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long end_pfn = end >> PAGE_SHIFT;
unsigned long size, pfn;
u64 node_pa, remap_pa;
void *remap_va;
/*
* The acpi/srat node info can show hot-add memroy zones where
* memory could be added but not currently present.
*/
printk(KERN_DEBUG "node %d pfn: [%lx - %lx]\n",
nid, start_pfn, end_pfn);
/* calculate the necessary space aligned to large page size */
size = node_memmap_size_bytes(nid, start_pfn, end_pfn);
size += ALIGN(sizeof(pg_data_t), PAGE_SIZE);
size = ALIGN(size, LARGE_PAGE_BYTES);
/* allocate node memory and the lowmem remap area */
node_pa = memblock_find_in_range(start, end, size, LARGE_PAGE_BYTES);
if (!node_pa) {
pr_warning("remap_alloc: failed to allocate %lu bytes for node %d\n",
size, nid);
return;
}
memblock_reserve(node_pa, size);
remap_pa = memblock_find_in_range(min_low_pfn << PAGE_SHIFT,
max_low_pfn << PAGE_SHIFT,
size, LARGE_PAGE_BYTES);
if (!remap_pa) {
pr_warning("remap_alloc: failed to allocate %lu bytes remap area for node %d\n",
size, nid);
memblock_free(node_pa, size);
return;
}
memblock_reserve(remap_pa, size);
remap_va = phys_to_virt(remap_pa);
/* perform actual remap */
for (pfn = 0; pfn < size >> PAGE_SHIFT; pfn += PTRS_PER_PTE)
set_pmd_pfn((unsigned long)remap_va + (pfn << PAGE_SHIFT),
(node_pa >> PAGE_SHIFT) + pfn,
PAGE_KERNEL_LARGE);
/* initialize remap allocator parameters */
node_remap_start_pfn[nid] = node_pa >> PAGE_SHIFT;
node_remap_start_vaddr[nid] = remap_va;
node_remap_end_vaddr[nid] = remap_va + size;
node_remap_alloc_vaddr[nid] = remap_va;
printk(KERN_DEBUG "remap_alloc: node %d [%08llx-%08llx) -> [%p-%p)\n",
nid, node_pa, node_pa + size, remap_va, remap_va + size);
}
void __init initmem_init(void) void __init initmem_init(void)
{ {
x86_numa_init(); x86_numa_init();
......
...@@ -21,12 +21,6 @@ void __init numa_reset_distance(void); ...@@ -21,12 +21,6 @@ void __init numa_reset_distance(void);
void __init x86_numa_init(void); void __init x86_numa_init(void);
#ifdef CONFIG_X86_64
static inline void init_alloc_remap(int nid, u64 start, u64 end) { }
#else
void __init init_alloc_remap(int nid, u64 start, u64 end);
#endif
#ifdef CONFIG_NUMA_EMU #ifdef CONFIG_NUMA_EMU
void __init numa_emulation(struct numa_meminfo *numa_meminfo, void __init numa_emulation(struct numa_meminfo *numa_meminfo,
int numa_dist_cnt); int numa_dist_cnt);
......
...@@ -94,12 +94,12 @@ static inline void split_page_count(int level) { } ...@@ -94,12 +94,12 @@ static inline void split_page_count(int level) { }
static inline unsigned long highmap_start_pfn(void) static inline unsigned long highmap_start_pfn(void)
{ {
return __pa(_text) >> PAGE_SHIFT; return __pa_symbol(_text) >> PAGE_SHIFT;
} }
static inline unsigned long highmap_end_pfn(void) static inline unsigned long highmap_end_pfn(void)
{ {
return __pa(roundup(_brk_end, PMD_SIZE)) >> PAGE_SHIFT; return __pa_symbol(roundup(_brk_end, PMD_SIZE)) >> PAGE_SHIFT;
} }
#endif #endif
...@@ -276,8 +276,8 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, ...@@ -276,8 +276,8 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
* The .rodata section needs to be read-only. Using the pfn * The .rodata section needs to be read-only. Using the pfn
* catches all aliases. * catches all aliases.
*/ */
if (within(pfn, __pa((unsigned long)__start_rodata) >> PAGE_SHIFT, if (within(pfn, __pa_symbol(__start_rodata) >> PAGE_SHIFT,
__pa((unsigned long)__end_rodata) >> PAGE_SHIFT)) __pa_symbol(__end_rodata) >> PAGE_SHIFT))
pgprot_val(forbidden) |= _PAGE_RW; pgprot_val(forbidden) |= _PAGE_RW;
#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) #if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA)
...@@ -363,6 +363,37 @@ pte_t *lookup_address(unsigned long address, unsigned int *level) ...@@ -363,6 +363,37 @@ pte_t *lookup_address(unsigned long address, unsigned int *level)
} }
EXPORT_SYMBOL_GPL(lookup_address); EXPORT_SYMBOL_GPL(lookup_address);
/*
* This is necessary because __pa() does not work on some
* kinds of memory, like vmalloc() or the alloc_remap()
* areas on 32-bit NUMA systems. The percpu areas can
* end up in this kind of memory, for instance.
*
* This could be optimized, but it is only intended to be
* used at inititalization time, and keeping it
* unoptimized should increase the testing coverage for
* the more obscure platforms.
*/
phys_addr_t slow_virt_to_phys(void *__virt_addr)
{
unsigned long virt_addr = (unsigned long)__virt_addr;
phys_addr_t phys_addr;
unsigned long offset;
enum pg_level level;
unsigned long psize;
unsigned long pmask;
pte_t *pte;
pte = lookup_address(virt_addr, &level);
BUG_ON(!pte);
psize = page_level_size(level);
pmask = page_level_mask(level);
offset = virt_addr & ~pmask;
phys_addr = pte_pfn(*pte) << PAGE_SHIFT;
return (phys_addr | offset);
}
EXPORT_SYMBOL_GPL(slow_virt_to_phys);
/* /*
* Set the new pmd in all the pgds we know about: * Set the new pmd in all the pgds we know about:
*/ */
...@@ -396,7 +427,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, ...@@ -396,7 +427,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
pte_t new_pte, old_pte, *tmp; pte_t new_pte, old_pte, *tmp;
pgprot_t old_prot, new_prot, req_prot; pgprot_t old_prot, new_prot, req_prot;
int i, do_split = 1; int i, do_split = 1;
unsigned int level; enum pg_level level;
if (cpa->force_split) if (cpa->force_split)
return 1; return 1;
...@@ -412,15 +443,12 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, ...@@ -412,15 +443,12 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
switch (level) { switch (level) {
case PG_LEVEL_2M: case PG_LEVEL_2M:
psize = PMD_PAGE_SIZE;
pmask = PMD_PAGE_MASK;
break;
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
case PG_LEVEL_1G: case PG_LEVEL_1G:
psize = PUD_PAGE_SIZE;
pmask = PUD_PAGE_MASK;
break;
#endif #endif
psize = page_level_size(level);
pmask = page_level_mask(level);
break;
default: default:
do_split = -EINVAL; do_split = -EINVAL;
goto out_unlock; goto out_unlock;
......
...@@ -560,10 +560,10 @@ int kernel_map_sync_memtype(u64 base, unsigned long size, unsigned long flags) ...@@ -560,10 +560,10 @@ int kernel_map_sync_memtype(u64 base, unsigned long size, unsigned long flags)
{ {
unsigned long id_sz; unsigned long id_sz;
if (base >= __pa(high_memory)) if (base > __pa(high_memory-1))
return 0; return 0;
id_sz = (__pa(high_memory) < base + size) ? id_sz = (__pa(high_memory-1) <= base + size) ?
__pa(high_memory) - base : __pa(high_memory) - base :
size; size;
......
...@@ -334,7 +334,12 @@ int pmdp_set_access_flags(struct vm_area_struct *vma, ...@@ -334,7 +334,12 @@ int pmdp_set_access_flags(struct vm_area_struct *vma,
if (changed && dirty) { if (changed && dirty) {
*pmdp = entry; *pmdp = entry;
pmd_update_defer(vma->vm_mm, address, pmdp); pmd_update_defer(vma->vm_mm, address, pmdp);
flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); /*
* We had a write-protection fault here and changed the pmd
* to to more permissive. No need to flush the TLB for that,
* #PF is architecturally guaranteed to do that and in the
* worst-case we'll generate a spurious fault.
*/
} }
return changed; return changed;
......
#include <linux/bootmem.h>
#include <linux/mmdebug.h> #include <linux/mmdebug.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/mm.h> #include <linux/mm.h>
...@@ -8,33 +9,54 @@ ...@@ -8,33 +9,54 @@
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
#ifdef CONFIG_DEBUG_VIRTUAL
unsigned long __phys_addr(unsigned long x) unsigned long __phys_addr(unsigned long x)
{ {
if (x >= __START_KERNEL_map) { unsigned long y = x - __START_KERNEL_map;
x -= __START_KERNEL_map;
VIRTUAL_BUG_ON(x >= KERNEL_IMAGE_SIZE); /* use the carry flag to determine if x was < __START_KERNEL_map */
x += phys_base; if (unlikely(x > y)) {
x = y + phys_base;
VIRTUAL_BUG_ON(y >= KERNEL_IMAGE_SIZE);
} else { } else {
VIRTUAL_BUG_ON(x < PAGE_OFFSET); x = y + (__START_KERNEL_map - PAGE_OFFSET);
x -= PAGE_OFFSET;
VIRTUAL_BUG_ON(!phys_addr_valid(x)); /* carry flag will be set if starting x was >= PAGE_OFFSET */
VIRTUAL_BUG_ON((x > y) || !phys_addr_valid(x));
} }
return x; return x;
} }
EXPORT_SYMBOL(__phys_addr); EXPORT_SYMBOL(__phys_addr);
unsigned long __phys_addr_symbol(unsigned long x)
{
unsigned long y = x - __START_KERNEL_map;
/* only check upper bounds since lower bounds will trigger carry */
VIRTUAL_BUG_ON(y >= KERNEL_IMAGE_SIZE);
return y + phys_base;
}
EXPORT_SYMBOL(__phys_addr_symbol);
#endif
bool __virt_addr_valid(unsigned long x) bool __virt_addr_valid(unsigned long x)
{ {
if (x >= __START_KERNEL_map) { unsigned long y = x - __START_KERNEL_map;
x -= __START_KERNEL_map;
if (x >= KERNEL_IMAGE_SIZE) /* use the carry flag to determine if x was < __START_KERNEL_map */
if (unlikely(x > y)) {
x = y + phys_base;
if (y >= KERNEL_IMAGE_SIZE)
return false; return false;
x += phys_base;
} else { } else {
if (x < PAGE_OFFSET) x = y + (__START_KERNEL_map - PAGE_OFFSET);
return false;
x -= PAGE_OFFSET; /* carry flag will be set if starting x was >= PAGE_OFFSET */
if (!phys_addr_valid(x)) if ((x > y) || !phys_addr_valid(x))
return false; return false;
} }
...@@ -47,10 +69,16 @@ EXPORT_SYMBOL(__virt_addr_valid); ...@@ -47,10 +69,16 @@ EXPORT_SYMBOL(__virt_addr_valid);
#ifdef CONFIG_DEBUG_VIRTUAL #ifdef CONFIG_DEBUG_VIRTUAL
unsigned long __phys_addr(unsigned long x) unsigned long __phys_addr(unsigned long x)
{ {
unsigned long phys_addr = x - PAGE_OFFSET;
/* VMALLOC_* aren't constants */ /* VMALLOC_* aren't constants */
VIRTUAL_BUG_ON(x < PAGE_OFFSET); VIRTUAL_BUG_ON(x < PAGE_OFFSET);
VIRTUAL_BUG_ON(__vmalloc_start_set && is_vmalloc_addr((void *) x)); VIRTUAL_BUG_ON(__vmalloc_start_set && is_vmalloc_addr((void *) x));
return x - PAGE_OFFSET; /* max_low_pfn is set early, but not _that_ early */
if (max_low_pfn) {
VIRTUAL_BUG_ON((phys_addr >> PAGE_SHIFT) > max_low_pfn);
BUG_ON(slow_virt_to_phys((void *)x) != phys_addr);
}
return phys_addr;
} }
EXPORT_SYMBOL(__phys_addr); EXPORT_SYMBOL(__phys_addr);
#endif #endif
......
...@@ -410,8 +410,8 @@ void __init efi_reserve_boot_services(void) ...@@ -410,8 +410,8 @@ void __init efi_reserve_boot_services(void)
* - Not within any part of the kernel * - Not within any part of the kernel
* - Not the bios reserved area * - Not the bios reserved area
*/ */
if ((start+size >= virt_to_phys(_text) if ((start+size >= __pa_symbol(_text)
&& start <= virt_to_phys(_end)) || && start <= __pa_symbol(_end)) ||
!e820_all_mapped(start, start+size, E820_RAM) || !e820_all_mapped(start, start+size, E820_RAM) ||
memblock_is_region_reserved(start, size)) { memblock_is_region_reserved(start, size)) {
/* Could not reserve, skip it */ /* Could not reserve, skip it */
......
...@@ -129,8 +129,6 @@ static int resume_physical_mapping_init(pgd_t *pgd_base) ...@@ -129,8 +129,6 @@ static int resume_physical_mapping_init(pgd_t *pgd_base)
} }
} }
resume_map_numa_kva(pgd_base);
return 0; return 0;
} }
......
...@@ -70,9 +70,9 @@ void __init setup_real_mode(void) ...@@ -70,9 +70,9 @@ void __init setup_real_mode(void)
__va(real_mode_header->trampoline_header); __va(real_mode_header->trampoline_header);
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
trampoline_header->start = __pa(startup_32_smp); trampoline_header->start = __pa_symbol(startup_32_smp);
trampoline_header->gdt_limit = __BOOT_DS + 7; trampoline_header->gdt_limit = __BOOT_DS + 7;
trampoline_header->gdt_base = __pa(boot_gdt); trampoline_header->gdt_base = __pa_symbol(boot_gdt);
#else #else
/* /*
* Some AMD processors will #GP(0) if EFER.LMA is set in WRMSR * Some AMD processors will #GP(0) if EFER.LMA is set in WRMSR
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment