Commit f2197649 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'akpm' (patches from Andrew)

Merge misc fixes from Andrew Morton:
 "15 fixes"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
  scripts/gdb: make lx-dmesg command work (reliably)
  mm: consider memblock reservations for deferred memory initialization sizing
  mm/hugetlb: report -EHWPOISON not -EFAULT when FOLL_HWPOISON is specified
  mlock: fix mlock count can not decrease in race condition
  mm/migrate: fix refcount handling when !hugepage_migration_supported()
  dax: fix race between colliding PMD & PTE entries
  mm: avoid spurious 'bad pmd' warning messages
  mm/page_alloc.c: make sure OOM victim can try allocations with no watermarks once
  pcmcia: remove left-over %Z format
  slub/memcg: cure the brainless abuse of sysfs attributes
  initramfs: fix disabling of initramfs (and its compression)
  mm: clarify why we want kmalloc before falling backto vmallock
  frv: declare jiffies to be located in the .data section
  include/linux/gfp.h: fix ___GFP_NOLOCKDEP value
  ksm: prevent crash after write_protect_page fails
parents e6e6d074 d6c97087
......@@ -16,5 +16,11 @@ static inline cycles_t get_cycles(void)
#define vxtime_lock() do {} while (0)
#define vxtime_unlock() do {} while (0)
/* This attribute is used in include/linux/jiffies.h alongside with
* __cacheline_aligned_in_smp. It is assumed that __cacheline_aligned_in_smp
* for frv does not contain another section specification.
*/
#define __jiffy_arch_data __attribute__((__section__(".data")))
#endif
......@@ -374,7 +374,7 @@ static ssize_t cm4040_write(struct file *filp, const char __user *buf,
rc = write_sync_reg(SCR_HOST_TO_READER_START, dev);
if (rc <= 0) {
DEBUGP(5, dev, "write_sync_reg c=%.2Zx\n", rc);
DEBUGP(5, dev, "write_sync_reg c=%.2zx\n", rc);
DEBUGP(2, dev, "<- cm4040_write (failed)\n");
if (rc == -ERESTARTSYS)
return rc;
......@@ -387,7 +387,7 @@ static ssize_t cm4040_write(struct file *filp, const char __user *buf,
for (i = 0; i < bytes_to_write; i++) {
rc = wait_for_bulk_out_ready(dev);
if (rc <= 0) {
DEBUGP(5, dev, "wait_for_bulk_out_ready rc=%.2Zx\n",
DEBUGP(5, dev, "wait_for_bulk_out_ready rc=%.2zx\n",
rc);
DEBUGP(2, dev, "<- cm4040_write (failed)\n");
if (rc == -ERESTARTSYS)
......@@ -403,7 +403,7 @@ static ssize_t cm4040_write(struct file *filp, const char __user *buf,
rc = write_sync_reg(SCR_HOST_TO_READER_DONE, dev);
if (rc <= 0) {
DEBUGP(5, dev, "write_sync_reg c=%.2Zx\n", rc);
DEBUGP(5, dev, "write_sync_reg c=%.2zx\n", rc);
DEBUGP(2, dev, "<- cm4040_write (failed)\n");
if (rc == -ERESTARTSYS)
return rc;
......
......@@ -1154,6 +1154,17 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf,
goto out;
}
/*
* It is possible, particularly with mixed reads & writes to private
* mappings, that we have raced with a PMD fault that overlaps with
* the PTE we need to set up. If so just return and the fault will be
* retried.
*/
if (pmd_trans_huge(*vmf->pmd) || pmd_devmap(*vmf->pmd)) {
vmf_ret = VM_FAULT_NOPAGE;
goto unlock_entry;
}
/*
* Note that we don't bother to use iomap_apply here: DAX required
* the file system block size to be equal the page size, which means
......@@ -1397,6 +1408,18 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf,
if (IS_ERR(entry))
goto fallback;
/*
* It is possible, particularly with mixed reads & writes to private
* mappings, that we have raced with a PTE fault that overlaps with
* the PMD we need to set up. If so just return and the fault will be
* retried.
*/
if (!pmd_none(*vmf->pmd) && !pmd_trans_huge(*vmf->pmd) &&
!pmd_devmap(*vmf->pmd)) {
result = 0;
goto unlock_entry;
}
/*
* Note that we don't use iomap_apply here. We aren't doing I/O, only
* setting up a mapping, so really we're using iomap_begin() as a way
......
......@@ -41,7 +41,7 @@ struct vm_area_struct;
#define ___GFP_WRITE 0x800000u
#define ___GFP_KSWAPD_RECLAIM 0x1000000u
#ifdef CONFIG_LOCKDEP
#define ___GFP_NOLOCKDEP 0x4000000u
#define ___GFP_NOLOCKDEP 0x2000000u
#else
#define ___GFP_NOLOCKDEP 0
#endif
......
......@@ -64,13 +64,17 @@ extern int register_refined_jiffies(long clock_tick_rate);
/* TICK_USEC is the time between ticks in usec assuming fake USER_HZ */
#define TICK_USEC ((1000000UL + USER_HZ/2) / USER_HZ)
#ifndef __jiffy_arch_data
#define __jiffy_arch_data
#endif
/*
* The 64-bit value is not atomic - you MUST NOT read it
* without sampling the sequence number in jiffies_lock.
* get_jiffies_64() will do this for you as appropriate.
*/
extern u64 __cacheline_aligned_in_smp jiffies_64;
extern unsigned long volatile __cacheline_aligned_in_smp jiffies;
extern unsigned long volatile __cacheline_aligned_in_smp __jiffy_arch_data jiffies;
#if (BITS_PER_LONG < 64)
u64 get_jiffies_64(void);
......
......@@ -425,12 +425,20 @@ static inline void early_memtest(phys_addr_t start, phys_addr_t end)
}
#endif
extern unsigned long memblock_reserved_memory_within(phys_addr_t start_addr,
phys_addr_t end_addr);
#else
static inline phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align)
{
return 0;
}
static inline unsigned long memblock_reserved_memory_within(phys_addr_t start_addr,
phys_addr_t end_addr)
{
return 0;
}
#endif /* CONFIG_HAVE_MEMBLOCK */
#endif /* __KERNEL__ */
......
......@@ -2327,6 +2327,17 @@ static inline struct page *follow_page(struct vm_area_struct *vma,
#define FOLL_REMOTE 0x2000 /* we are working on non-current tsk/mm */
#define FOLL_COW 0x4000 /* internal GUP flag */
static inline int vm_fault_to_errno(int vm_fault, int foll_flags)
{
if (vm_fault & VM_FAULT_OOM)
return -ENOMEM;
if (vm_fault & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE))
return (foll_flags & FOLL_HWPOISON) ? -EHWPOISON : -EFAULT;
if (vm_fault & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV))
return -EFAULT;
return 0;
}
typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
void *data);
extern int apply_to_page_range(struct mm_struct *mm, unsigned long address,
......
......@@ -678,6 +678,7 @@ typedef struct pglist_data {
* is the first PFN that needs to be initialised.
*/
unsigned long first_deferred_pfn;
unsigned long static_init_size;
#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
......
......@@ -407,12 +407,10 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
ret = handle_mm_fault(vma, address, fault_flags);
if (ret & VM_FAULT_ERROR) {
if (ret & VM_FAULT_OOM)
return -ENOMEM;
if (ret & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE))
return *flags & FOLL_HWPOISON ? -EHWPOISON : -EFAULT;
if (ret & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV))
return -EFAULT;
int err = vm_fault_to_errno(ret, *flags);
if (err)
return err;
BUG();
}
......@@ -723,12 +721,10 @@ int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
ret = handle_mm_fault(vma, address, fault_flags);
major |= ret & VM_FAULT_MAJOR;
if (ret & VM_FAULT_ERROR) {
if (ret & VM_FAULT_OOM)
return -ENOMEM;
if (ret & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE))
return -EHWPOISON;
if (ret & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV))
return -EFAULT;
int err = vm_fault_to_errno(ret, 0);
if (err)
return err;
BUG();
}
......
......@@ -4170,6 +4170,11 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
}
ret = hugetlb_fault(mm, vma, vaddr, fault_flags);
if (ret & VM_FAULT_ERROR) {
int err = vm_fault_to_errno(ret, flags);
if (err)
return err;
remainder = 0;
break;
}
......
......@@ -1028,8 +1028,7 @@ static int try_to_merge_one_page(struct vm_area_struct *vma,
goto out;
if (PageTransCompound(page)) {
err = split_huge_page(page);
if (err)
if (split_huge_page(page))
goto out_unlock;
}
......
......@@ -1739,6 +1739,29 @@ static void __init_memblock memblock_dump(struct memblock_type *type)
}
}
extern unsigned long __init_memblock
memblock_reserved_memory_within(phys_addr_t start_addr, phys_addr_t end_addr)
{
struct memblock_region *rgn;
unsigned long size = 0;
int idx;
for_each_memblock_type((&memblock.reserved), rgn) {
phys_addr_t start, end;
if (rgn->base + rgn->size < start_addr)
continue;
if (rgn->base > end_addr)
continue;
start = rgn->base;
end = start + rgn->size;
size += end - start;
}
return size;
}
void __init_memblock __memblock_dump_all(void)
{
pr_info("MEMBLOCK configuration:\n");
......
......@@ -1595,12 +1595,8 @@ static int soft_offline_huge_page(struct page *page, int flags)
if (ret) {
pr_info("soft offline: %#lx: migration failed %d, type %lx (%pGp)\n",
pfn, ret, page->flags, &page->flags);
/*
* We know that soft_offline_huge_page() tries to migrate
* only one hugepage pointed to by hpage, so we need not
* run through the pagelist here.
*/
putback_active_hugepage(hpage);
if (!list_empty(&pagelist))
putback_movable_pages(&pagelist);
if (ret > 0)
ret = -EIO;
} else {
......
......@@ -3029,6 +3029,17 @@ static int __do_fault(struct vm_fault *vmf)
return ret;
}
/*
* The ordering of these checks is important for pmds with _PAGE_DEVMAP set.
* If we check pmd_trans_unstable() first we will trip the bad_pmd() check
* inside of pmd_none_or_trans_huge_or_clear_bad(). This will end up correctly
* returning 1 but not before it spams dmesg with the pmd_clear_bad() output.
*/
static int pmd_devmap_trans_unstable(pmd_t *pmd)
{
return pmd_devmap(*pmd) || pmd_trans_unstable(pmd);
}
static int pte_alloc_one_map(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
......@@ -3052,18 +3063,27 @@ static int pte_alloc_one_map(struct vm_fault *vmf)
map_pte:
/*
* If a huge pmd materialized under us just retry later. Use
* pmd_trans_unstable() instead of pmd_trans_huge() to ensure the pmd
* didn't become pmd_trans_huge under us and then back to pmd_none, as
* a result of MADV_DONTNEED running immediately after a huge pmd fault
* in a different thread of this mm, in turn leading to a misleading
* pmd_trans_huge() retval. All we have to ensure is that it is a
* regular pmd that we can walk with pte_offset_map() and we can do that
* through an atomic read in C, which is what pmd_trans_unstable()
* provides.
* pmd_trans_unstable() via pmd_devmap_trans_unstable() instead of
* pmd_trans_huge() to ensure the pmd didn't become pmd_trans_huge
* under us and then back to pmd_none, as a result of MADV_DONTNEED
* running immediately after a huge pmd fault in a different thread of
* this mm, in turn leading to a misleading pmd_trans_huge() retval.
* All we have to ensure is that it is a regular pmd that we can walk
* with pte_offset_map() and we can do that through an atomic read in
* C, which is what pmd_trans_unstable() provides.
*/
if (pmd_trans_unstable(vmf->pmd) || pmd_devmap(*vmf->pmd))
if (pmd_devmap_trans_unstable(vmf->pmd))
return VM_FAULT_NOPAGE;
/*
* At this point we know that our vmf->pmd points to a page of ptes
* and it cannot become pmd_none(), pmd_devmap() or pmd_trans_huge()
* for the duration of the fault. If a racing MADV_DONTNEED runs and
* we zap the ptes pointed to by our vmf->pmd, the vmf->ptl will still
* be valid and we will re-check to make sure the vmf->pte isn't
* pte_none() under vmf->ptl protection when we return to
* alloc_set_pte().
*/
vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,
&vmf->ptl);
return 0;
......@@ -3690,7 +3710,7 @@ static int handle_pte_fault(struct vm_fault *vmf)
vmf->pte = NULL;
} else {
/* See comment in pte_alloc_one_map() */
if (pmd_trans_unstable(vmf->pmd) || pmd_devmap(*vmf->pmd))
if (pmd_devmap_trans_unstable(vmf->pmd))
return 0;
/*
* A regular pmd is established and it can't morph into a huge
......
......@@ -284,7 +284,7 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
{
int i;
int nr = pagevec_count(pvec);
int delta_munlocked;
int delta_munlocked = -nr;
struct pagevec pvec_putback;
int pgrescued = 0;
......@@ -304,6 +304,8 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
continue;
else
__munlock_isolation_failed(page);
} else {
delta_munlocked++;
}
/*
......@@ -315,7 +317,6 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
pagevec_add(&pvec_putback, pvec->pages[i]);
pvec->pages[i] = NULL;
}
delta_munlocked = -nr + pagevec_count(&pvec_putback);
__mod_zone_page_state(zone, NR_MLOCK, delta_munlocked);
spin_unlock_irq(zone_lru_lock(zone));
......
......@@ -292,6 +292,26 @@ int page_group_by_mobility_disabled __read_mostly;
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
static inline void reset_deferred_meminit(pg_data_t *pgdat)
{
unsigned long max_initialise;
unsigned long reserved_lowmem;
/*
* Initialise at least 2G of a node but also take into account that
* two large system hashes that can take up 1GB for 0.25TB/node.
*/
max_initialise = max(2UL << (30 - PAGE_SHIFT),
(pgdat->node_spanned_pages >> 8));
/*
* Compensate the all the memblock reservations (e.g. crash kernel)
* from the initial estimation to make sure we will initialize enough
* memory to boot.
*/
reserved_lowmem = memblock_reserved_memory_within(pgdat->node_start_pfn,
pgdat->node_start_pfn + max_initialise);
max_initialise += reserved_lowmem;
pgdat->static_init_size = min(max_initialise, pgdat->node_spanned_pages);
pgdat->first_deferred_pfn = ULONG_MAX;
}
......@@ -314,20 +334,11 @@ static inline bool update_defer_init(pg_data_t *pgdat,
unsigned long pfn, unsigned long zone_end,
unsigned long *nr_initialised)
{
unsigned long max_initialise;
/* Always populate low zones for address-contrained allocations */
if (zone_end < pgdat_end_pfn(pgdat))
return true;
/*
* Initialise at least 2G of a node but also take into account that
* two large system hashes that can take up 1GB for 0.25TB/node.
*/
max_initialise = max(2UL << (30 - PAGE_SHIFT),
(pgdat->node_spanned_pages >> 8));
(*nr_initialised)++;
if ((*nr_initialised > max_initialise) &&
if ((*nr_initialised > pgdat->static_init_size) &&
(pfn & (PAGES_PER_SECTION - 1)) == 0) {
pgdat->first_deferred_pfn = pfn;
return false;
......@@ -3870,7 +3881,9 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
goto got_pg;
/* Avoid allocations with no watermarks from looping endlessly */
if (test_thread_flag(TIF_MEMDIE))
if (test_thread_flag(TIF_MEMDIE) &&
(alloc_flags == ALLOC_NO_WATERMARKS ||
(gfp_mask & __GFP_NOMEMALLOC)))
goto nopage;
/* Retry as long as the OOM killer is making progress */
......@@ -6136,7 +6149,6 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
/* pg_data_t should be reset to zero when it's allocated */
WARN_ON(pgdat->nr_zones || pgdat->kswapd_classzone_idx);
reset_deferred_meminit(pgdat);
pgdat->node_id = nid;
pgdat->node_start_pfn = node_start_pfn;
pgdat->per_cpu_nodestats = NULL;
......@@ -6158,6 +6170,7 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
(unsigned long)pgdat->node_mem_map);
#endif
reset_deferred_meminit(pgdat);
free_area_init_core(pgdat);
}
......
......@@ -5512,6 +5512,7 @@ static void memcg_propagate_slab_attrs(struct kmem_cache *s)
char mbuf[64];
char *buf;
struct slab_attribute *attr = to_slab_attr(slab_attrs[i]);
ssize_t len;
if (!attr || !attr->store || !attr->show)
continue;
......@@ -5536,8 +5537,9 @@ static void memcg_propagate_slab_attrs(struct kmem_cache *s)
buf = buffer;
}
attr->show(root_cache, buf);
attr->store(s, buf, strlen(buf));
len = attr->show(root_cache, buf);
if (len > 0)
attr->store(s, buf, len);
}
if (buffer)
......
......@@ -357,8 +357,11 @@ void *kvmalloc_node(size_t size, gfp_t flags, int node)
WARN_ON_ONCE((flags & GFP_KERNEL) != GFP_KERNEL);
/*
* Make sure that larger requests are not too disruptive - no OOM
* killer and no allocation failure warnings as we have a fallback
* We want to attempt a large physically contiguous block first because
* it is less likely to fragment multiple larger blocks and therefore
* contribute to a long term fragmentation less than vmalloc fallback.
* However make sure that larger requests are not too disruptive - no
* OOM killer and no allocation failure warnings as we have a fallback.
*/
if (size > PAGE_SIZE) {
kmalloc_flags |= __GFP_NOWARN;
......
......@@ -23,10 +23,11 @@ class LxDmesg(gdb.Command):
super(LxDmesg, self).__init__("lx-dmesg", gdb.COMMAND_DATA)
def invoke(self, arg, from_tty):
log_buf_addr = int(str(gdb.parse_and_eval("log_buf")).split()[0], 16)
log_first_idx = int(gdb.parse_and_eval("log_first_idx"))
log_next_idx = int(gdb.parse_and_eval("log_next_idx"))
log_buf_len = int(gdb.parse_and_eval("log_buf_len"))
log_buf_addr = int(str(gdb.parse_and_eval(
"'printk.c'::log_buf")).split()[0], 16)
log_first_idx = int(gdb.parse_and_eval("'printk.c'::log_first_idx"))
log_next_idx = int(gdb.parse_and_eval("'printk.c'::log_next_idx"))
log_buf_len = int(gdb.parse_and_eval("'printk.c'::log_buf_len"))
inf = gdb.inferiors()[0]
start = log_buf_addr + log_first_idx
......
......@@ -220,6 +220,7 @@ config INITRAMFS_COMPRESSION_LZ4
endchoice
config INITRAMFS_COMPRESSION
depends on INITRAMFS_SOURCE!=""
string
default "" if INITRAMFS_COMPRESSION_NONE
default ".gz" if INITRAMFS_COMPRESSION_GZIP
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment