Commit e34eb39c authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'x86-cpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

* 'x86-cpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86, amd: Include linux/elf.h since we use stuff from asm/elf.h
  x86: cache_info: Update calculation of AMD L3 cache indices
  x86: cache_info: Kill the atomic allocation in amd_init_l3_cache()
  x86: cache_info: Kill the moronic shadow struct
  x86: cache_info: Remove bogus free of amd_l3_cache data
  x86, amd: Include elf.h explicitly, prepare the code for the module.h split
  x86-32, amd: Move va_align definition to unbreak 32-bit build
  x86, amd: Move BSP code to cpu_dev helper
  x86: Add a BSP cpu_dev helper
  x86, amd: Avoid cache aliasing penalties on AMD family 15h
parents 396e6e49 910b2c51
...@@ -307,6 +307,19 @@ bytes respectively. Such letter suffixes can also be entirely omitted. ...@@ -307,6 +307,19 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
behaviour to be specified. Bit 0 enables warnings, behaviour to be specified. Bit 0 enables warnings,
bit 1 enables fixups, and bit 2 sends a segfault. bit 1 enables fixups, and bit 2 sends a segfault.
align_va_addr= [X86-64]
Align virtual addresses by clearing slice [14:12] when
allocating a VMA at process creation time. This option
gives you up to 3% performance improvement on AMD F15h
machines (where it is enabled by default) for a
CPU-intensive style benchmark, and it can vary highly in
a microbenchmark depending on workload and compiler.
1: only for 32-bit processes
2: only for 64-bit processes
on: enable for both 32- and 64-bit processes
off: disable for both 32- and 64-bit processes
amd_iommu= [HW,X86-84] amd_iommu= [HW,X86-84]
Pass parameters to the AMD IOMMU driver in the system. Pass parameters to the AMD IOMMU driver in the system.
Possible values are: Possible values are:
......
...@@ -19,9 +19,15 @@ extern int amd_numa_init(void); ...@@ -19,9 +19,15 @@ extern int amd_numa_init(void);
extern int amd_get_subcaches(int); extern int amd_get_subcaches(int);
extern int amd_set_subcaches(int, int); extern int amd_set_subcaches(int, int);
struct amd_l3_cache {
unsigned indices;
u8 subcaches[4];
};
struct amd_northbridge { struct amd_northbridge {
struct pci_dev *misc; struct pci_dev *misc;
struct pci_dev *link; struct pci_dev *link;
struct amd_l3_cache l3_cache;
}; };
struct amd_northbridge_info { struct amd_northbridge_info {
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
/* /*
* ELF register definitions.. * ELF register definitions..
*/ */
#include <linux/thread_info.h>
#include <asm/ptrace.h> #include <asm/ptrace.h>
#include <asm/user.h> #include <asm/user.h>
...@@ -320,4 +321,34 @@ extern int syscall32_setup_pages(struct linux_binprm *, int exstack); ...@@ -320,4 +321,34 @@ extern int syscall32_setup_pages(struct linux_binprm *, int exstack);
extern unsigned long arch_randomize_brk(struct mm_struct *mm); extern unsigned long arch_randomize_brk(struct mm_struct *mm);
#define arch_randomize_brk arch_randomize_brk #define arch_randomize_brk arch_randomize_brk
/*
* True on X86_32 or when emulating IA32 on X86_64
*/
static inline int mmap_is_ia32(void)
{
#ifdef CONFIG_X86_32
return 1;
#endif
#ifdef CONFIG_IA32_EMULATION
if (test_thread_flag(TIF_IA32))
return 1;
#endif
return 0;
}
/* The first two values are special, do not change. See align_addr() */
enum align_flags {
ALIGN_VA_32 = BIT(0),
ALIGN_VA_64 = BIT(1),
ALIGN_VDSO = BIT(2),
ALIGN_TOPDOWN = BIT(3),
};
struct va_alignment {
int flags;
unsigned long mask;
} ____cacheline_aligned;
extern struct va_alignment va_align;
extern unsigned long align_addr(unsigned long, struct file *, enum align_flags);
#endif /* _ASM_X86_ELF_H */ #endif /* _ASM_X86_ELF_H */
#include <linux/init.h> #include <linux/init.h>
#include <linux/bitops.h> #include <linux/bitops.h>
#include <linux/elf.h>
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/io.h> #include <linux/io.h>
...@@ -410,6 +411,34 @@ static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c) ...@@ -410,6 +411,34 @@ static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c)
#endif #endif
} }
static void __cpuinit bsp_init_amd(struct cpuinfo_x86 *c)
{
if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) {
if (c->x86 > 0x10 ||
(c->x86 == 0x10 && c->x86_model >= 0x2)) {
u64 val;
rdmsrl(MSR_K7_HWCR, val);
if (!(val & BIT(24)))
printk(KERN_WARNING FW_BUG "TSC doesn't count "
"with P0 frequency!\n");
}
}
if (c->x86 == 0x15) {
unsigned long upperbit;
u32 cpuid, assoc;
cpuid = cpuid_edx(0x80000005);
assoc = cpuid >> 16 & 0xff;
upperbit = ((cpuid >> 24) << 10) / assoc;
va_align.mask = (upperbit - 1) & PAGE_MASK;
va_align.flags = ALIGN_VA_32 | ALIGN_VA_64;
}
}
static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
{ {
early_init_amd_mc(c); early_init_amd_mc(c);
...@@ -441,23 +470,6 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) ...@@ -441,23 +470,6 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
set_cpu_cap(c, X86_FEATURE_EXTD_APICID); set_cpu_cap(c, X86_FEATURE_EXTD_APICID);
} }
#endif #endif
/* We need to do the following only once */
if (c != &boot_cpu_data)
return;
if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) {
if (c->x86 > 0x10 ||
(c->x86 == 0x10 && c->x86_model >= 0x2)) {
u64 val;
rdmsrl(MSR_K7_HWCR, val);
if (!(val & BIT(24)))
printk(KERN_WARNING FW_BUG "TSC doesn't count "
"with P0 frequency!\n");
}
}
} }
static void __cpuinit init_amd(struct cpuinfo_x86 *c) static void __cpuinit init_amd(struct cpuinfo_x86 *c)
...@@ -679,6 +691,7 @@ static const struct cpu_dev __cpuinitconst amd_cpu_dev = { ...@@ -679,6 +691,7 @@ static const struct cpu_dev __cpuinitconst amd_cpu_dev = {
.c_size_cache = amd_size_cache, .c_size_cache = amd_size_cache,
#endif #endif
.c_early_init = early_init_amd, .c_early_init = early_init_amd,
.c_bsp_init = bsp_init_amd,
.c_init = init_amd, .c_init = init_amd,
.c_x86_vendor = X86_VENDOR_AMD, .c_x86_vendor = X86_VENDOR_AMD,
}; };
......
...@@ -681,6 +681,9 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) ...@@ -681,6 +681,9 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
filter_cpuid_features(c, false); filter_cpuid_features(c, false);
setup_smep(c); setup_smep(c);
if (this_cpu->c_bsp_init)
this_cpu->c_bsp_init(c);
} }
void __init early_cpu_init(void) void __init early_cpu_init(void)
......
...@@ -18,6 +18,7 @@ struct cpu_dev { ...@@ -18,6 +18,7 @@ struct cpu_dev {
struct cpu_model_info c_models[4]; struct cpu_model_info c_models[4];
void (*c_early_init)(struct cpuinfo_x86 *); void (*c_early_init)(struct cpuinfo_x86 *);
void (*c_bsp_init)(struct cpuinfo_x86 *);
void (*c_init)(struct cpuinfo_x86 *); void (*c_init)(struct cpuinfo_x86 *);
void (*c_identify)(struct cpuinfo_x86 *); void (*c_identify)(struct cpuinfo_x86 *);
unsigned int (*c_size_cache)(struct cpuinfo_x86 *, unsigned int); unsigned int (*c_size_cache)(struct cpuinfo_x86 *, unsigned int);
......
This diff is collapsed.
...@@ -14,10 +14,73 @@ ...@@ -14,10 +14,73 @@
#include <linux/personality.h> #include <linux/personality.h>
#include <linux/random.h> #include <linux/random.h>
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include <linux/elf.h>
#include <asm/ia32.h> #include <asm/ia32.h>
#include <asm/syscalls.h> #include <asm/syscalls.h>
/*
* Align a virtual address to avoid aliasing in the I$ on AMD F15h.
*
* @flags denotes the allocation direction - bottomup or topdown -
* or vDSO; see call sites below.
*/
unsigned long align_addr(unsigned long addr, struct file *filp,
enum align_flags flags)
{
unsigned long tmp_addr;
/* handle 32- and 64-bit case with a single conditional */
if (va_align.flags < 0 || !(va_align.flags & (2 - mmap_is_ia32())))
return addr;
if (!(current->flags & PF_RANDOMIZE))
return addr;
if (!((flags & ALIGN_VDSO) || filp))
return addr;
tmp_addr = addr;
/*
* We need an address which is <= than the original
* one only when in topdown direction.
*/
if (!(flags & ALIGN_TOPDOWN))
tmp_addr += va_align.mask;
tmp_addr &= ~va_align.mask;
return tmp_addr;
}
static int __init control_va_addr_alignment(char *str)
{
/* guard against enabling this on other CPU families */
if (va_align.flags < 0)
return 1;
if (*str == 0)
return 1;
if (*str == '=')
str++;
if (!strcmp(str, "32"))
va_align.flags = ALIGN_VA_32;
else if (!strcmp(str, "64"))
va_align.flags = ALIGN_VA_64;
else if (!strcmp(str, "off"))
va_align.flags = 0;
else if (!strcmp(str, "on"))
va_align.flags = ALIGN_VA_32 | ALIGN_VA_64;
else
return 0;
return 1;
}
__setup("align_va_addr", control_va_addr_alignment);
SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
unsigned long, prot, unsigned long, flags, unsigned long, prot, unsigned long, flags,
unsigned long, fd, unsigned long, off) unsigned long, fd, unsigned long, off)
...@@ -92,6 +155,9 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, ...@@ -92,6 +155,9 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
start_addr = addr; start_addr = addr;
full_search: full_search:
addr = align_addr(addr, filp, 0);
for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
/* At this point: (!vma || addr < vma->vm_end). */ /* At this point: (!vma || addr < vma->vm_end). */
if (end - len < addr) { if (end - len < addr) {
...@@ -117,6 +183,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, ...@@ -117,6 +183,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
mm->cached_hole_size = vma->vm_start - addr; mm->cached_hole_size = vma->vm_start - addr;
addr = vma->vm_end; addr = vma->vm_end;
addr = align_addr(addr, filp, 0);
} }
} }
...@@ -161,10 +228,13 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, ...@@ -161,10 +228,13 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
/* make sure it can fit in the remaining address space */ /* make sure it can fit in the remaining address space */
if (addr > len) { if (addr > len) {
vma = find_vma(mm, addr-len); unsigned long tmp_addr = align_addr(addr - len, filp,
if (!vma || addr <= vma->vm_start) ALIGN_TOPDOWN);
vma = find_vma(mm, tmp_addr);
if (!vma || tmp_addr + len <= vma->vm_start)
/* remember the address as a hint for next time */ /* remember the address as a hint for next time */
return mm->free_area_cache = addr-len; return mm->free_area_cache = tmp_addr;
} }
if (mm->mmap_base < len) if (mm->mmap_base < len)
...@@ -173,6 +243,8 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, ...@@ -173,6 +243,8 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
addr = mm->mmap_base-len; addr = mm->mmap_base-len;
do { do {
addr = align_addr(addr, filp, ALIGN_TOPDOWN);
/* /*
* Lookup failure means no vma is above this address, * Lookup failure means no vma is above this address,
* else if new region fits below vma->vm_start, * else if new region fits below vma->vm_start,
......
...@@ -31,6 +31,10 @@ ...@@ -31,6 +31,10 @@
#include <linux/sched.h> #include <linux/sched.h>
#include <asm/elf.h> #include <asm/elf.h>
struct __read_mostly va_alignment va_align = {
.flags = -1,
};
static unsigned int stack_maxrandom_size(void) static unsigned int stack_maxrandom_size(void)
{ {
unsigned int max = 0; unsigned int max = 0;
...@@ -42,7 +46,6 @@ static unsigned int stack_maxrandom_size(void) ...@@ -42,7 +46,6 @@ static unsigned int stack_maxrandom_size(void)
return max; return max;
} }
/* /*
* Top of mmap area (just below the process stack). * Top of mmap area (just below the process stack).
* *
...@@ -51,21 +54,6 @@ static unsigned int stack_maxrandom_size(void) ...@@ -51,21 +54,6 @@ static unsigned int stack_maxrandom_size(void)
#define MIN_GAP (128*1024*1024UL + stack_maxrandom_size()) #define MIN_GAP (128*1024*1024UL + stack_maxrandom_size())
#define MAX_GAP (TASK_SIZE/6*5) #define MAX_GAP (TASK_SIZE/6*5)
/*
* True on X86_32 or when emulating IA32 on X86_64
*/
static int mmap_is_ia32(void)
{
#ifdef CONFIG_X86_32
return 1;
#endif
#ifdef CONFIG_IA32_EMULATION
if (test_thread_flag(TIF_IA32))
return 1;
#endif
return 0;
}
static int mmap_is_legacy(void) static int mmap_is_legacy(void)
{ {
if (current->personality & ADDR_COMPAT_LAYOUT) if (current->personality & ADDR_COMPAT_LAYOUT)
......
...@@ -89,6 +89,15 @@ static unsigned long vdso_addr(unsigned long start, unsigned len) ...@@ -89,6 +89,15 @@ static unsigned long vdso_addr(unsigned long start, unsigned len)
addr = start + (offset << PAGE_SHIFT); addr = start + (offset << PAGE_SHIFT);
if (addr >= end) if (addr >= end)
addr = end; addr = end;
/*
* page-align it here so that get_unmapped_area doesn't
* align it wrongfully again to the next page. addr can come in 4K
* unaligned here as a result of stack start randomization.
*/
addr = PAGE_ALIGN(addr);
addr = align_addr(addr, NULL, ALIGN_VDSO);
return addr; return addr;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment