Commit c1ff8431 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 fixes from Peter Anvin:
 "Quite a varied little collection of fixes.  Most of them are
  relatively small or isolated; the biggest one is Mel Gorman's fixes
  for TLB range flushing.

  A couple of AMD-related fixes (including not crashing when given an
  invalid microcode image) and fix a crash when compiled with gcov"

* 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86, microcode, AMD: Unify valid container checks
  x86, hweight: Fix BUG when booting with CONFIG_GCOV_PROFILE_ALL=y
  x86/efi: Allow mapping BGRT on x86-32
  x86: Fix the initialization of physnode_map
  x86, cpu hotplug: Fix stack frame warning in check_irq_vectors_for_cpu_disable()
  x86/intel/mid: Fix X86_INTEL_MID dependencies
  arch/x86/mm/srat: Skip NUMA_NO_NODE while parsing SLIT
  mm, x86: Revisit tlb_flushall_shift tuning for page flushes except on IvyBridge
  x86: mm: change tlb_flushall_shift for IvyBridge
  x86/mm: Eliminate redundant page table walk during TLB range flushing
  x86/mm: Clean up inconsistencies when flushing TLB ranges
  mm, x86: Account for TLB flushes only when debugging
  x86/AMD/NB: Fix amd_set_subcaches() parameter type
  x86/quirks: Add workaround for AMD F16h Erratum792
  x86, doc, kconfig: Fix dud URL for Microcode data
parents ec2e6cb2 a3b072cd
......@@ -444,6 +444,7 @@ config X86_INTEL_MID
bool "Intel MID platform support"
depends on X86_32
depends on X86_EXTENDED_PLATFORM
depends on X86_PLATFORM_DEVICES
depends on PCI
depends on PCI_GOANY
depends on X86_IO_APIC
......@@ -1051,9 +1052,9 @@ config MICROCODE_INTEL
This options enables microcode patch loading support for Intel
processors.
For latest news and information on obtaining all the required
Intel ingredients for this driver, check:
<http://www.urbanmyth.org/microcode/>.
For the current Intel microcode data package go to
<https://downloadcenter.intel.com> and search for
'Linux Processor Microcode Data File'.
config MICROCODE_AMD
bool "AMD microcode loading support"
......
......@@ -19,7 +19,7 @@ extern int amd_cache_northbridges(void);
extern void amd_flush_garts(void);
extern int amd_numa_init(void);
extern int amd_get_subcaches(int);
extern int amd_set_subcaches(int, int);
extern int amd_set_subcaches(int, unsigned long);
struct amd_l3_cache {
unsigned indices;
......
......@@ -62,7 +62,7 @@ static inline void __flush_tlb_all(void)
static inline void __flush_tlb_one(unsigned long addr)
{
count_vm_event(NR_TLB_LOCAL_FLUSH_ONE);
count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
__flush_tlb_single(addr);
}
......@@ -93,13 +93,13 @@ static inline void __flush_tlb_one(unsigned long addr)
*/
static inline void __flush_tlb_up(void)
{
count_vm_event(NR_TLB_LOCAL_FLUSH_ALL);
count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
__flush_tlb();
}
static inline void flush_tlb_all(void)
{
count_vm_event(NR_TLB_LOCAL_FLUSH_ALL);
count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
__flush_tlb_all();
}
......
......@@ -179,7 +179,7 @@ int amd_get_subcaches(int cpu)
return (mask >> (4 * cuid)) & 0xf;
}
int amd_set_subcaches(int cpu, int mask)
int amd_set_subcaches(int cpu, unsigned long mask)
{
static unsigned int reset, ban;
struct amd_northbridge *nb = node_to_amd_nb(amd_get_nb_id(cpu));
......
......@@ -767,10 +767,7 @@ static unsigned int amd_size_cache(struct cpuinfo_x86 *c, unsigned int size)
static void cpu_set_tlb_flushall_shift(struct cpuinfo_x86 *c)
{
tlb_flushall_shift = 5;
if (c->x86 <= 0x11)
tlb_flushall_shift = 4;
tlb_flushall_shift = 6;
}
static void cpu_detect_tlb_amd(struct cpuinfo_x86 *c)
......
......@@ -640,21 +640,17 @@ static void intel_tlb_flushall_shift_set(struct cpuinfo_x86 *c)
case 0x61d: /* six-core 45 nm xeon "Dunnington" */
tlb_flushall_shift = -1;
break;
case 0x63a: /* Ivybridge */
tlb_flushall_shift = 2;
break;
case 0x61a: /* 45 nm nehalem, "Bloomfield" */
case 0x61e: /* 45 nm nehalem, "Lynnfield" */
case 0x625: /* 32 nm nehalem, "Clarkdale" */
case 0x62c: /* 32 nm nehalem, "Gulftown" */
case 0x62e: /* 45 nm nehalem-ex, "Beckton" */
case 0x62f: /* 32 nm Xeon E7 */
tlb_flushall_shift = 6;
break;
case 0x62a: /* SandyBridge */
case 0x62d: /* SandyBridge, "Romely-EP" */
tlb_flushall_shift = 5;
break;
case 0x63a: /* Ivybridge */
tlb_flushall_shift = 1;
break;
default:
tlb_flushall_shift = 6;
}
......
......@@ -285,6 +285,15 @@ static void __init collect_cpu_sig_on_bsp(void *arg)
uci->cpu_sig.sig = cpuid_eax(0x00000001);
}
static void __init get_bsp_sig(void)
{
unsigned int bsp = boot_cpu_data.cpu_index;
struct ucode_cpu_info *uci = ucode_cpu_info + bsp;
if (!uci->cpu_sig.sig)
smp_call_function_single(bsp, collect_cpu_sig_on_bsp, NULL, 1);
}
#else
void load_ucode_amd_ap(void)
{
......@@ -337,31 +346,37 @@ void load_ucode_amd_ap(void)
int __init save_microcode_in_initrd_amd(void)
{
unsigned long cont;
enum ucode_state ret;
u32 eax;
#ifdef CONFIG_X86_32
unsigned int bsp = boot_cpu_data.cpu_index;
struct ucode_cpu_info *uci = ucode_cpu_info + bsp;
if (!uci->cpu_sig.sig)
smp_call_function_single(bsp, collect_cpu_sig_on_bsp, NULL, 1);
if (!container)
return -EINVAL;
#ifdef CONFIG_X86_32
get_bsp_sig();
cont = (unsigned long)container;
#else
/*
* Take into account the fact that the ramdisk might get relocated
* and therefore we need to recompute the container's position in
* virtual memory space.
* We need the physical address of the container for both bitness since
* boot_params.hdr.ramdisk_image is a physical address.
*/
container = (u8 *)(__va((u32)relocated_ramdisk) +
((u32)container - boot_params.hdr.ramdisk_image));
cont = __pa(container);
#endif
/*
* Take into account the fact that the ramdisk might get relocated and
* therefore we need to recompute the container's position in virtual
* memory space.
*/
if (relocated_ramdisk)
container = (u8 *)(__va(relocated_ramdisk) +
(cont - boot_params.hdr.ramdisk_image));
if (ucode_new_rev)
pr_info("microcode: updated early to new patch_level=0x%08x\n",
ucode_new_rev);
if (!container)
return -EINVAL;
eax = cpuid_eax(0x00000001);
eax = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff);
......
......@@ -683,7 +683,7 @@ static void prepare_set(void) __acquires(set_atomicity_lock)
}
/* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */
count_vm_event(NR_TLB_LOCAL_FLUSH_ALL);
count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
__flush_tlb();
/* Save MTRR state */
......@@ -697,7 +697,7 @@ static void prepare_set(void) __acquires(set_atomicity_lock)
static void post_set(void) __releases(set_atomicity_lock)
{
/* Flush TLBs (no need to flush caches - they are disabled) */
count_vm_event(NR_TLB_LOCAL_FLUSH_ALL);
count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
__flush_tlb();
/* Intel (P6) standard MTRRs */
......
......@@ -266,6 +266,14 @@ __visible void smp_trace_x86_platform_ipi(struct pt_regs *regs)
EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq);
#ifdef CONFIG_HOTPLUG_CPU
/* These two declarations are only used in check_irq_vectors_for_cpu_disable()
* below, which is protected by stop_machine(). Putting them on the stack
* results in a stack frame overflow. Dynamically allocating could result in a
* failure so declare these two cpumasks as global.
*/
static struct cpumask affinity_new, online_new;
/*
* This cpu is going to be removed and its vectors migrated to the remaining
* online cpus. Check to see if there are enough vectors in the remaining cpus.
......@@ -277,7 +285,6 @@ int check_irq_vectors_for_cpu_disable(void)
unsigned int this_cpu, vector, this_count, count;
struct irq_desc *desc;
struct irq_data *data;
struct cpumask affinity_new, online_new;
this_cpu = smp_processor_id();
cpumask_copy(&online_new, cpu_online_mask);
......
......@@ -571,3 +571,40 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F5,
quirk_amd_nb_node);
#endif
#ifdef CONFIG_PCI
/*
* Processor does not ensure DRAM scrub read/write sequence
* is atomic wrt accesses to CC6 save state area. Therefore
* if a concurrent scrub read/write access is to same address
* the entry may appear as if it is not written. This quirk
* applies to Fam16h models 00h-0Fh
*
* See "Revision Guide" for AMD F16h models 00h-0fh,
* document 51810 rev. 3.04, Nov 2013
*/
static void amd_disable_seq_and_redirect_scrub(struct pci_dev *dev)
{
u32 val;
/*
* Suggested workaround:
* set D18F3x58[4:0] = 00h and set D18F3x5C[0] = 0b
*/
pci_read_config_dword(dev, 0x58, &val);
if (val & 0x1F) {
val &= ~(0x1F);
pci_write_config_dword(dev, 0x58, val);
}
pci_read_config_dword(dev, 0x5C, &val);
if (val & BIT(0)) {
val &= ~BIT(0);
pci_write_config_dword(dev, 0x5c, val);
}
}
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3,
amd_disable_seq_and_redirect_scrub);
#endif
......@@ -52,6 +52,8 @@ void memory_present(int nid, unsigned long start, unsigned long end)
nid, start, end);
printk(KERN_DEBUG " Setting physnode_map array to node %d for pfns:\n", nid);
printk(KERN_DEBUG " ");
start = round_down(start, PAGES_PER_SECTION);
end = round_up(end, PAGES_PER_SECTION);
for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) {
physnode_map[pfn / PAGES_PER_SECTION] = nid;
printk(KERN_CONT "%lx ", pfn);
......
......@@ -42,15 +42,25 @@ static __init inline int srat_disabled(void)
return acpi_numa < 0;
}
/* Callback for SLIT parsing */
/*
* Callback for SLIT parsing. pxm_to_node() returns NUMA_NO_NODE for
* I/O localities since SRAT does not list them. I/O localities are
* not supported at this point.
*/
void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
{
int i, j;
for (i = 0; i < slit->locality_count; i++)
for (j = 0; j < slit->locality_count; j++)
for (i = 0; i < slit->locality_count; i++) {
if (pxm_to_node(i) == NUMA_NO_NODE)
continue;
for (j = 0; j < slit->locality_count; j++) {
if (pxm_to_node(j) == NUMA_NO_NODE)
continue;
numa_set_distance(pxm_to_node(i), pxm_to_node(j),
slit->entry[slit->locality_count * i + j]);
}
}
}
/* Callback for Proximity Domain -> x2APIC mapping */
......
......@@ -103,7 +103,7 @@ static void flush_tlb_func(void *info)
if (f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm))
return;
count_vm_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
if (f->flush_end == TLB_FLUSH_ALL)
local_flush_tlb();
......@@ -131,7 +131,7 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
info.flush_start = start;
info.flush_end = end;
count_vm_event(NR_TLB_REMOTE_FLUSH);
count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
if (is_uv_system()) {
unsigned int cpu;
......@@ -151,44 +151,19 @@ void flush_tlb_current_task(void)
preempt_disable();
count_vm_event(NR_TLB_LOCAL_FLUSH_ALL);
count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
local_flush_tlb();
if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL);
preempt_enable();
}
/*
* It can find out the THP large page, or
* HUGETLB page in tlb_flush when THP disabled
*/
static inline unsigned long has_large_page(struct mm_struct *mm,
unsigned long start, unsigned long end)
{
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
unsigned long addr = ALIGN(start, HPAGE_SIZE);
for (; addr < end; addr += HPAGE_SIZE) {
pgd = pgd_offset(mm, addr);
if (likely(!pgd_none(*pgd))) {
pud = pud_offset(pgd, addr);
if (likely(!pud_none(*pud))) {
pmd = pmd_offset(pud, addr);
if (likely(!pmd_none(*pmd)))
if (pmd_large(*pmd))
return addr;
}
}
}
return 0;
}
void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
unsigned long end, unsigned long vmflag)
{
unsigned long addr;
unsigned act_entries, tlb_entries = 0;
unsigned long nr_base_pages;
preempt_disable();
if (current->active_mm != mm)
......@@ -210,21 +185,20 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
tlb_entries = tlb_lli_4k[ENTRIES];
else
tlb_entries = tlb_lld_4k[ENTRIES];
/* Assume all of TLB entries was occupied by this task */
act_entries = mm->total_vm > tlb_entries ? tlb_entries : mm->total_vm;
act_entries = tlb_entries >> tlb_flushall_shift;
act_entries = mm->total_vm > act_entries ? act_entries : mm->total_vm;
nr_base_pages = (end - start) >> PAGE_SHIFT;
/* tlb_flushall_shift is on balance point, details in commit log */
if ((end - start) >> PAGE_SHIFT > act_entries >> tlb_flushall_shift) {
count_vm_event(NR_TLB_LOCAL_FLUSH_ALL);
if (nr_base_pages > act_entries) {
count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
local_flush_tlb();
} else {
if (has_large_page(mm, start, end)) {
local_flush_tlb();
goto flush_all;
}
/* flush range by one by one 'invlpg' */
for (addr = start; addr < end; addr += PAGE_SIZE) {
count_vm_event(NR_TLB_LOCAL_FLUSH_ONE);
count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
__flush_tlb_single(addr);
}
......@@ -262,7 +236,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long start)
static void do_flush_tlb_all(void *info)
{
count_vm_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
__flush_tlb_all();
if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY)
leave_mm(smp_processor_id());
......@@ -270,7 +244,7 @@ static void do_flush_tlb_all(void *info)
void flush_tlb_all(void)
{
count_vm_event(NR_TLB_REMOTE_FLUSH);
count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
on_each_cpu(do_flush_tlb_all, NULL, 1);
}
......
......@@ -49,7 +49,8 @@ void __init efi_bgrt_init(void)
image = efi_lookup_mapped_addr(bgrt_tab->image_address);
if (!image) {
image = ioremap(bgrt_tab->image_address, sizeof(bmp_header));
image = early_memremap(bgrt_tab->image_address,
sizeof(bmp_header));
ioremapped = true;
if (!image)
return;
......@@ -57,7 +58,7 @@ void __init efi_bgrt_init(void)
memcpy_fromio(&bmp_header, image, sizeof(bmp_header));
if (ioremapped)
iounmap(image);
early_iounmap(image, sizeof(bmp_header));
bgrt_image_size = bmp_header.size;
bgrt_image = kmalloc(bgrt_image_size, GFP_KERNEL);
......@@ -65,7 +66,8 @@ void __init efi_bgrt_init(void)
return;
if (ioremapped) {
image = ioremap(bgrt_tab->image_address, bmp_header.size);
image = early_memremap(bgrt_tab->image_address,
bmp_header.size);
if (!image) {
kfree(bgrt_image);
bgrt_image = NULL;
......@@ -75,5 +77,5 @@ void __init efi_bgrt_init(void)
memcpy_fromio(bgrt_image, image, bgrt_image_size);
if (ioremapped)
iounmap(image);
early_iounmap(image, bmp_header.size);
}
......@@ -71,12 +71,14 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
THP_ZERO_PAGE_ALLOC,
THP_ZERO_PAGE_ALLOC_FAILED,
#endif
#ifdef CONFIG_DEBUG_TLBFLUSH
#ifdef CONFIG_SMP
NR_TLB_REMOTE_FLUSH, /* cpu tried to flush others' tlbs */
NR_TLB_REMOTE_FLUSH_RECEIVED,/* cpu received ipi for flush */
#endif
#endif /* CONFIG_SMP */
NR_TLB_LOCAL_FLUSH_ALL,
NR_TLB_LOCAL_FLUSH_ONE,
#endif /* CONFIG_DEBUG_TLBFLUSH */
NR_VM_EVENT_ITEMS
};
......
......@@ -83,6 +83,14 @@ static inline void vm_events_fold_cpu(int cpu)
#define count_vm_numa_events(x, y) do { (void)(y); } while (0)
#endif /* CONFIG_NUMA_BALANCING */
#ifdef CONFIG_DEBUG_TLBFLUSH
#define count_vm_tlb_event(x) count_vm_event(x)
#define count_vm_tlb_events(x, y) count_vm_events(x, y)
#else
#define count_vm_tlb_event(x) do {} while (0)
#define count_vm_tlb_events(x, y) do { (void)(y); } while (0)
#endif
#define __count_zone_vm_events(item, zone, delta) \
__count_vm_events(item##_NORMAL - ZONE_NORMAL + \
zone_idx(zone), delta)
......
......@@ -45,6 +45,7 @@ obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o
obj-$(CONFIG_CHECK_SIGNATURE) += check_signature.o
obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o
GCOV_PROFILE_hweight.o := n
CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS))
obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
......
......@@ -851,12 +851,14 @@ const char * const vmstat_text[] = {
"thp_zero_page_alloc",
"thp_zero_page_alloc_failed",
#endif
#ifdef CONFIG_DEBUG_TLBFLUSH
#ifdef CONFIG_SMP
"nr_tlb_remote_flush",
"nr_tlb_remote_flush_received",
#endif
#endif /* CONFIG_SMP */
"nr_tlb_local_flush_all",
"nr_tlb_local_flush_one",
#endif /* CONFIG_DEBUG_TLBFLUSH */
#endif /* CONFIG_VM_EVENTS_COUNTERS */
};
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment