Commit 0c7fc30f authored by Linus Torvalds's avatar Linus Torvalds

Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc

Pull sparc fixes from David Miller:

 1) Fix section mismatches in some builds, from Paul Gortmaker.

 2) Need to count huge zero page mappings when doing TSB sizing, from
    Mike Kravetz.

 3) Fix handing of cpu_possible_mask when nr_cpus module option is
    specified, from Atish Patra.

 4) Don't allocate irq stacks until nr_irqs has been processed, also
    from Atish Patra.

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc:
  sparc64: Fix non-SMP build.
  sparc64: Fix irq stack bootmem allocation.
  sparc64: Fix cpu_possible_mask if nr_cpus is set
  sparc64 mm: Fix more TSB sizing issues
  sparc64: fix section mismatch in find_numa_latencies_for_group
parents bb6bbc7c 2a0100d7
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
#define HPAGE_MASK (~(HPAGE_SIZE - 1UL)) #define HPAGE_MASK (~(HPAGE_SIZE - 1UL))
#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
#define REAL_HPAGE_PER_HPAGE (_AC(1,UL) << (HPAGE_SHIFT - REAL_HPAGE_SHIFT))
#endif #endif
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
......
...@@ -43,6 +43,7 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask); ...@@ -43,6 +43,7 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask);
int hard_smp_processor_id(void); int hard_smp_processor_id(void);
#define raw_smp_processor_id() (current_thread_info()->cpu) #define raw_smp_processor_id() (current_thread_info()->cpu)
void smp_fill_in_cpu_possible_map(void);
void smp_fill_in_sib_core_maps(void); void smp_fill_in_sib_core_maps(void);
void cpu_play_dead(void); void cpu_play_dead(void);
...@@ -72,6 +73,7 @@ void __cpu_die(unsigned int cpu); ...@@ -72,6 +73,7 @@ void __cpu_die(unsigned int cpu);
#define smp_fill_in_sib_core_maps() do { } while (0) #define smp_fill_in_sib_core_maps() do { } while (0)
#define smp_fetch_global_regs() do { } while (0) #define smp_fetch_global_regs() do { } while (0)
#define smp_fetch_global_pmu() do { } while (0) #define smp_fetch_global_pmu() do { } while (0)
#define smp_fill_in_cpu_possible_map() do { } while (0)
#endif /* !(CONFIG_SMP) */ #endif /* !(CONFIG_SMP) */
......
...@@ -31,6 +31,7 @@ ...@@ -31,6 +31,7 @@
#include <linux/initrd.h> #include <linux/initrd.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/start_kernel.h> #include <linux/start_kernel.h>
#include <linux/bootmem.h>
#include <asm/io.h> #include <asm/io.h>
#include <asm/processor.h> #include <asm/processor.h>
...@@ -50,6 +51,8 @@ ...@@ -50,6 +51,8 @@
#include <asm/elf.h> #include <asm/elf.h>
#include <asm/mdesc.h> #include <asm/mdesc.h>
#include <asm/cacheflush.h> #include <asm/cacheflush.h>
#include <asm/dma.h>
#include <asm/irq.h>
#ifdef CONFIG_IP_PNP #ifdef CONFIG_IP_PNP
#include <net/ipconfig.h> #include <net/ipconfig.h>
...@@ -590,6 +593,22 @@ static void __init init_sparc64_elf_hwcap(void) ...@@ -590,6 +593,22 @@ static void __init init_sparc64_elf_hwcap(void)
pause_patch(); pause_patch();
} }
void __init alloc_irqstack_bootmem(void)
{
unsigned int i, node;
for_each_possible_cpu(i) {
node = cpu_to_node(i);
softirq_stack[i] = __alloc_bootmem_node(NODE_DATA(node),
THREAD_SIZE,
THREAD_SIZE, 0);
hardirq_stack[i] = __alloc_bootmem_node(NODE_DATA(node),
THREAD_SIZE,
THREAD_SIZE, 0);
}
}
void __init setup_arch(char **cmdline_p) void __init setup_arch(char **cmdline_p)
{ {
/* Initialize PROM console and command line. */ /* Initialize PROM console and command line. */
...@@ -650,6 +669,13 @@ void __init setup_arch(char **cmdline_p) ...@@ -650,6 +669,13 @@ void __init setup_arch(char **cmdline_p)
paging_init(); paging_init();
init_sparc64_elf_hwcap(); init_sparc64_elf_hwcap();
smp_fill_in_cpu_possible_map();
/*
* Once the OF device tree and MDESC have been setup and nr_cpus has
* been parsed, we know the list of possible cpus. Therefore we can
* allocate the IRQ stacks.
*/
alloc_irqstack_bootmem();
} }
extern int stop_a_enabled; extern int stop_a_enabled;
......
...@@ -1227,6 +1227,20 @@ void __init smp_setup_processor_id(void) ...@@ -1227,6 +1227,20 @@ void __init smp_setup_processor_id(void)
xcall_deliver_impl = hypervisor_xcall_deliver; xcall_deliver_impl = hypervisor_xcall_deliver;
} }
void __init smp_fill_in_cpu_possible_map(void)
{
int possible_cpus = num_possible_cpus();
int i;
if (possible_cpus > nr_cpu_ids)
possible_cpus = nr_cpu_ids;
for (i = 0; i < possible_cpus; i++)
set_cpu_possible(i, true);
for (; i < NR_CPUS; i++)
set_cpu_possible(i, false);
}
void smp_fill_in_sib_core_maps(void) void smp_fill_in_sib_core_maps(void)
{ {
unsigned int i; unsigned int i;
......
...@@ -484,6 +484,7 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) ...@@ -484,6 +484,7 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs)
tsb_grow(mm, MM_TSB_BASE, mm_rss); tsb_grow(mm, MM_TSB_BASE, mm_rss);
#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
mm_rss = mm->context.hugetlb_pte_count + mm->context.thp_pte_count; mm_rss = mm->context.hugetlb_pte_count + mm->context.thp_pte_count;
mm_rss *= REAL_HPAGE_PER_HPAGE;
if (unlikely(mm_rss > if (unlikely(mm_rss >
mm->context.tsb_block[MM_TSB_HUGE].tsb_rss_limit)) { mm->context.tsb_block[MM_TSB_HUGE].tsb_rss_limit)) {
if (mm->context.tsb_block[MM_TSB_HUGE].tsb) if (mm->context.tsb_block[MM_TSB_HUGE].tsb)
......
...@@ -1160,7 +1160,7 @@ int __node_distance(int from, int to) ...@@ -1160,7 +1160,7 @@ int __node_distance(int from, int to)
return numa_latency[from][to]; return numa_latency[from][to];
} }
static int find_best_numa_node_for_mlgroup(struct mdesc_mlgroup *grp) static int __init find_best_numa_node_for_mlgroup(struct mdesc_mlgroup *grp)
{ {
int i; int i;
...@@ -1173,8 +1173,8 @@ static int find_best_numa_node_for_mlgroup(struct mdesc_mlgroup *grp) ...@@ -1173,8 +1173,8 @@ static int find_best_numa_node_for_mlgroup(struct mdesc_mlgroup *grp)
return i; return i;
} }
static void find_numa_latencies_for_group(struct mdesc_handle *md, u64 grp, static void __init find_numa_latencies_for_group(struct mdesc_handle *md,
int index) u64 grp, int index)
{ {
u64 arc; u64 arc;
...@@ -2081,7 +2081,6 @@ void __init paging_init(void) ...@@ -2081,7 +2081,6 @@ void __init paging_init(void)
{ {
unsigned long end_pfn, shift, phys_base; unsigned long end_pfn, shift, phys_base;
unsigned long real_end, i; unsigned long real_end, i;
int node;
setup_page_offset(); setup_page_offset();
...@@ -2250,21 +2249,6 @@ void __init paging_init(void) ...@@ -2250,21 +2249,6 @@ void __init paging_init(void)
/* Setup bootmem... */ /* Setup bootmem... */
last_valid_pfn = end_pfn = bootmem_init(phys_base); last_valid_pfn = end_pfn = bootmem_init(phys_base);
/* Once the OF device tree and MDESC have been setup, we know
* the list of possible cpus. Therefore we can allocate the
* IRQ stacks.
*/
for_each_possible_cpu(i) {
node = cpu_to_node(i);
softirq_stack[i] = __alloc_bootmem_node(NODE_DATA(node),
THREAD_SIZE,
THREAD_SIZE, 0);
hardirq_stack[i] = __alloc_bootmem_node(NODE_DATA(node),
THREAD_SIZE,
THREAD_SIZE, 0);
}
kernel_physical_mapping_init(); kernel_physical_mapping_init();
{ {
......
...@@ -174,10 +174,25 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, ...@@ -174,10 +174,25 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
return; return;
if ((pmd_val(pmd) ^ pmd_val(orig)) & _PAGE_PMD_HUGE) { if ((pmd_val(pmd) ^ pmd_val(orig)) & _PAGE_PMD_HUGE) {
if (pmd_val(pmd) & _PAGE_PMD_HUGE) /*
* Note that this routine only sets pmds for THP pages.
* Hugetlb pages are handled elsewhere. We need to check
* for huge zero page. Huge zero pages are like hugetlb
* pages in that there is no RSS, but there is the need
* for TSB entries. So, huge zero page counts go into
* hugetlb_pte_count.
*/
if (pmd_val(pmd) & _PAGE_PMD_HUGE) {
if (is_huge_zero_page(pmd_page(pmd)))
mm->context.hugetlb_pte_count++;
else
mm->context.thp_pte_count++; mm->context.thp_pte_count++;
} else {
if (is_huge_zero_page(pmd_page(orig)))
mm->context.hugetlb_pte_count--;
else else
mm->context.thp_pte_count--; mm->context.thp_pte_count--;
}
/* Do not try to allocate the TSB hash table if we /* Do not try to allocate the TSB hash table if we
* don't have one already. We have various locks held * don't have one already. We have various locks held
...@@ -204,6 +219,9 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, ...@@ -204,6 +219,9 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
} }
} }
/*
* This routine is only called when splitting a THP
*/
void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
pmd_t *pmdp) pmd_t *pmdp)
{ {
...@@ -213,6 +231,15 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, ...@@ -213,6 +231,15 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
set_pmd_at(vma->vm_mm, address, pmdp, entry); set_pmd_at(vma->vm_mm, address, pmdp, entry);
flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
/*
* set_pmd_at() will not be called in a way to decrement
* thp_pte_count when splitting a THP, so do it now.
* Sanity check pmd before doing the actual decrement.
*/
if ((pmd_val(entry) & _PAGE_PMD_HUGE) &&
!is_huge_zero_page(pmd_page(entry)))
(vma->vm_mm)->context.thp_pte_count--;
} }
void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
......
...@@ -469,8 +469,10 @@ void tsb_grow(struct mm_struct *mm, unsigned long tsb_index, unsigned long rss) ...@@ -469,8 +469,10 @@ void tsb_grow(struct mm_struct *mm, unsigned long tsb_index, unsigned long rss)
int init_new_context(struct task_struct *tsk, struct mm_struct *mm) int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
{ {
unsigned long mm_rss = get_mm_rss(mm);
#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
unsigned long total_huge_pte_count; unsigned long saved_hugetlb_pte_count;
unsigned long saved_thp_pte_count;
#endif #endif
unsigned int i; unsigned int i;
...@@ -483,10 +485,12 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) ...@@ -483,10 +485,12 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
* will re-increment the counters as the parent PTEs are * will re-increment the counters as the parent PTEs are
* copied into the child address space. * copied into the child address space.
*/ */
total_huge_pte_count = mm->context.hugetlb_pte_count + saved_hugetlb_pte_count = mm->context.hugetlb_pte_count;
mm->context.thp_pte_count; saved_thp_pte_count = mm->context.thp_pte_count;
mm->context.hugetlb_pte_count = 0; mm->context.hugetlb_pte_count = 0;
mm->context.thp_pte_count = 0; mm->context.thp_pte_count = 0;
mm_rss -= saved_thp_pte_count * (HPAGE_SIZE / PAGE_SIZE);
#endif #endif
/* copy_mm() copies over the parent's mm_struct before calling /* copy_mm() copies over the parent's mm_struct before calling
...@@ -499,11 +503,13 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) ...@@ -499,11 +503,13 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
/* If this is fork, inherit the parent's TSB size. We would /* If this is fork, inherit the parent's TSB size. We would
* grow it to that size on the first page fault anyways. * grow it to that size on the first page fault anyways.
*/ */
tsb_grow(mm, MM_TSB_BASE, get_mm_rss(mm)); tsb_grow(mm, MM_TSB_BASE, mm_rss);
#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
if (unlikely(total_huge_pte_count)) if (unlikely(saved_hugetlb_pte_count + saved_thp_pte_count))
tsb_grow(mm, MM_TSB_HUGE, total_huge_pte_count); tsb_grow(mm, MM_TSB_HUGE,
(saved_hugetlb_pte_count + saved_thp_pte_count) *
REAL_HPAGE_PER_HPAGE);
#endif #endif
if (unlikely(!mm->context.tsb_block[MM_TSB_BASE].tsb)) if (unlikely(!mm->context.tsb_block[MM_TSB_BASE].tsb))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment