Commit c67a98c0 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'akpm' (patches from Andrew)

Merge misc fixes from Andrew Morton:
 "16 fixes"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
  mm/memblock.c: fix a typo in __next_mem_pfn_range() comments
  mm, page_alloc: check for max order in hot path
  scripts/spdxcheck.py: make python3 compliant
  tmpfs: make lseek(SEEK_DATA/SEK_HOLE) return ENXIO with a negative offset
  lib/ubsan.c: don't mark __ubsan_handle_builtin_unreachable as noreturn
  mm/vmstat.c: fix NUMA statistics updates
  mm/gup.c: fix follow_page_mask() kerneldoc comment
  ocfs2: free up write context when direct IO failed
  scripts/faddr2line: fix location of start_kernel in comment
  mm: don't reclaim inodes with many attached pages
  mm, memory_hotplug: check zone_movable in has_unmovable_pages
  mm/swapfile.c: use kvzalloc for swap_info_struct allocation
  MAINTAINERS: update OMAP MMC entry
  hugetlbfs: fix kernel BUG at fs/hugetlbfs/inode.c:444!
  kernel/sched/psi.c: simplify cgroup_move_task()
  z3fold: fix possible reclaim races
parents 03582f33 45e79815
......@@ -2138,6 +2138,10 @@ E: paul@laufernet.com
D: Soundblaster driver fixes, ISAPnP quirk
S: California, USA
N: Jarkko Lavinen
E: jarkko.lavinen@nokia.com
D: OMAP MMC support
N: Jonathan Layes
D: ARPD support
......
......@@ -10808,9 +10808,9 @@ F: drivers/media/platform/omap3isp/
F: drivers/staging/media/omap4iss/
OMAP MMC SUPPORT
M: Jarkko Lavinen <jarkko.lavinen@nokia.com>
M: Aaro Koskinen <aaro.koskinen@iki.fi>
L: linux-omap@vger.kernel.org
S: Maintained
S: Odd Fixes
F: drivers/mmc/host/omap.c
OMAP POWER MANAGEMENT SUPPORT
......
......@@ -730,8 +730,11 @@ static enum lru_status inode_lru_isolate(struct list_head *item,
return LRU_REMOVED;
}
/* recently referenced inodes get one more pass */
if (inode->i_state & I_REFERENCED) {
/*
* Recently referenced inodes and inodes with many attached pages
* get one more pass.
*/
if (inode->i_state & I_REFERENCED || inode->i_data.nrpages > 1) {
inode->i_state &= ~I_REFERENCED;
spin_unlock(&inode->i_lock);
return LRU_ROTATE;
......
......@@ -2411,8 +2411,16 @@ static int ocfs2_dio_end_io(struct kiocb *iocb,
/* this io's submitter should not have unlocked this before we could */
BUG_ON(!ocfs2_iocb_is_rw_locked(iocb));
if (bytes > 0 && private)
ret = ocfs2_dio_end_io_write(inode, private, offset, bytes);
if (bytes <= 0)
mlog_ratelimited(ML_ERROR, "Direct IO failed, bytes = %lld",
(long long)bytes);
if (private) {
if (bytes > 0)
ret = ocfs2_dio_end_io_write(inode, private, offset,
bytes);
else
ocfs2_dio_free_write_ctx(inode, private);
}
ocfs2_iocb_clear_rw_locked(iocb);
......
......@@ -178,6 +178,15 @@ do { \
##__VA_ARGS__); \
} while (0)
#define mlog_ratelimited(mask, fmt, ...) \
do { \
static DEFINE_RATELIMIT_STATE(_rs, \
DEFAULT_RATELIMIT_INTERVAL, \
DEFAULT_RATELIMIT_BURST); \
if (__ratelimit(&_rs)) \
mlog(mask, fmt, ##__VA_ARGS__); \
} while (0)
#define mlog_errno(st) ({ \
int _st = (st); \
if (_st != -ERESTARTSYS && _st != -EINTR && \
......
......@@ -633,38 +633,39 @@ void psi_cgroup_free(struct cgroup *cgroup)
*/
void cgroup_move_task(struct task_struct *task, struct css_set *to)
{
bool move_psi = !psi_disabled;
unsigned int task_flags = 0;
struct rq_flags rf;
struct rq *rq;
if (move_psi) {
rq = task_rq_lock(task, &rf);
if (psi_disabled) {
/*
* Lame to do this here, but the scheduler cannot be locked
* from the outside, so we move cgroups from inside sched/.
*/
rcu_assign_pointer(task->cgroups, to);
return;
}
if (task_on_rq_queued(task))
task_flags = TSK_RUNNING;
else if (task->in_iowait)
task_flags = TSK_IOWAIT;
rq = task_rq_lock(task, &rf);
if (task->flags & PF_MEMSTALL)
task_flags |= TSK_MEMSTALL;
if (task_on_rq_queued(task))
task_flags = TSK_RUNNING;
else if (task->in_iowait)
task_flags = TSK_IOWAIT;
if (task_flags)
psi_task_change(task, task_flags, 0);
}
if (task->flags & PF_MEMSTALL)
task_flags |= TSK_MEMSTALL;
/*
* Lame to do this here, but the scheduler cannot be locked
* from the outside, so we move cgroups from inside sched/.
*/
if (task_flags)
psi_task_change(task, task_flags, 0);
/* See comment above */
rcu_assign_pointer(task->cgroups, to);
if (move_psi) {
if (task_flags)
psi_task_change(task, 0, task_flags);
if (task_flags)
psi_task_change(task, 0, task_flags);
task_rq_unlock(rq, task, &rf);
}
task_rq_unlock(rq, task, &rf);
}
#endif /* CONFIG_CGROUPS */
......
......@@ -427,8 +427,7 @@ void __ubsan_handle_shift_out_of_bounds(struct shift_out_of_bounds_data *data,
EXPORT_SYMBOL(__ubsan_handle_shift_out_of_bounds);
void __noreturn
__ubsan_handle_builtin_unreachable(struct unreachable_data *data)
void __ubsan_handle_builtin_unreachable(struct unreachable_data *data)
{
unsigned long flags;
......
......@@ -385,11 +385,17 @@ static struct page *follow_p4d_mask(struct vm_area_struct *vma,
* @vma: vm_area_struct mapping @address
* @address: virtual address to look up
* @flags: flags modifying lookup behaviour
* @page_mask: on output, *page_mask is set according to the size of the page
* @ctx: contains dev_pagemap for %ZONE_DEVICE memory pinning and a
* pointer to output page_mask
*
* @flags can have FOLL_ flags set, defined in <linux/mm.h>
*
* Returns the mapped (struct page *), %NULL if no mapping exists, or
* When getting pages from ZONE_DEVICE memory, the @ctx->pgmap caches
* the device's dev_pagemap metadata to avoid repeating expensive lookups.
*
* On output, the @ctx->page_mask is set according to the size of the page.
*
* Return: the mapped (struct page *), %NULL if no mapping exists, or
* an error pointer if there is a mapping to something not represented
* by a page descriptor (see also vm_normal_page()).
*/
......
......@@ -3233,7 +3233,7 @@ static int is_hugetlb_entry_hwpoisoned(pte_t pte)
int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
struct vm_area_struct *vma)
{
pte_t *src_pte, *dst_pte, entry;
pte_t *src_pte, *dst_pte, entry, dst_entry;
struct page *ptepage;
unsigned long addr;
int cow;
......@@ -3261,15 +3261,30 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
break;
}
/* If the pagetables are shared don't copy or take references */
if (dst_pte == src_pte)
/*
* If the pagetables are shared don't copy or take references.
* dst_pte == src_pte is the common case of src/dest sharing.
*
* However, src could have 'unshared' and dst shares with
* another vma. If dst_pte !none, this implies sharing.
* Check here before taking page table lock, and once again
* after taking the lock below.
*/
dst_entry = huge_ptep_get(dst_pte);
if ((dst_pte == src_pte) || !huge_pte_none(dst_entry))
continue;
dst_ptl = huge_pte_lock(h, dst, dst_pte);
src_ptl = huge_pte_lockptr(h, src, src_pte);
spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
entry = huge_ptep_get(src_pte);
if (huge_pte_none(entry)) { /* skip none entry */
dst_entry = huge_ptep_get(dst_pte);
if (huge_pte_none(entry) || !huge_pte_none(dst_entry)) {
/*
* Skip if src entry none. Also, skip in the
* unlikely case dst entry !none as this implies
* sharing with another vma.
*/
;
} else if (unlikely(is_hugetlb_entry_migration(entry) ||
is_hugetlb_entry_hwpoisoned(entry))) {
......
......@@ -1179,7 +1179,7 @@ void __init_memblock __next_mem_range_rev(u64 *idx, int nid,
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
/*
* Common iterator interface used to define for_each_mem_range().
* Common iterator interface used to define for_each_mem_pfn_range().
*/
void __init_memblock __next_mem_pfn_range(int *idx, int nid,
unsigned long *out_start_pfn,
......
......@@ -4060,17 +4060,6 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
unsigned int cpuset_mems_cookie;
int reserve_flags;
/*
* In the slowpath, we sanity check order to avoid ever trying to
* reclaim >= MAX_ORDER areas which will never succeed. Callers may
* be using allocators in order of preference for an area that is
* too large.
*/
if (order >= MAX_ORDER) {
WARN_ON_ONCE(!(gfp_mask & __GFP_NOWARN));
return NULL;
}
/*
* We also sanity check to catch abuse of atomic reserves being used by
* callers that are not in atomic context.
......@@ -4364,6 +4353,15 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid,
gfp_t alloc_mask; /* The gfp_t that was actually used for allocation */
struct alloc_context ac = { };
/*
* There are several places where we assume that the order value is sane
* so bail out early if the request is out of bound.
*/
if (unlikely(order >= MAX_ORDER)) {
WARN_ON_ONCE(!(gfp_mask & __GFP_NOWARN));
return NULL;
}
gfp_mask &= gfp_allowed_mask;
alloc_mask = gfp_mask;
if (!prepare_alloc_pages(gfp_mask, order, preferred_nid, nodemask, &ac, &alloc_mask, &alloc_flags))
......@@ -7788,6 +7786,14 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count,
if (PageReserved(page))
goto unmovable;
/*
* If the zone is movable and we have ruled out all reserved
* pages then it should be reasonably safe to assume the rest
* is movable.
*/
if (zone_idx(zone) == ZONE_MOVABLE)
continue;
/*
* Hugepages are not in LRU lists, but they're movable.
* We need not scan over tail pages bacause we don't
......
......@@ -2563,9 +2563,7 @@ static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence)
inode_lock(inode);
/* We're holding i_mutex so we can access i_size directly */
if (offset < 0)
offset = -EINVAL;
else if (offset >= inode->i_size)
if (offset < 0 || offset >= inode->i_size)
offset = -ENXIO;
else {
start = offset >> PAGE_SHIFT;
......
......@@ -2813,7 +2813,7 @@ static struct swap_info_struct *alloc_swap_info(void)
unsigned int type;
int i;
p = kzalloc(sizeof(*p), GFP_KERNEL);
p = kvzalloc(sizeof(*p), GFP_KERNEL);
if (!p)
return ERR_PTR(-ENOMEM);
......@@ -2824,7 +2824,7 @@ static struct swap_info_struct *alloc_swap_info(void)
}
if (type >= MAX_SWAPFILES) {
spin_unlock(&swap_lock);
kfree(p);
kvfree(p);
return ERR_PTR(-EPERM);
}
if (type >= nr_swapfiles) {
......@@ -2838,7 +2838,7 @@ static struct swap_info_struct *alloc_swap_info(void)
smp_wmb();
nr_swapfiles++;
} else {
kfree(p);
kvfree(p);
p = swap_info[type];
/*
* Do not memset this entry: a racing procfs swap_next()
......
......@@ -1827,12 +1827,13 @@ static bool need_update(int cpu)
/*
* The fast way of checking if there are any vmstat diffs.
* This works because the diffs are byte sized items.
*/
if (memchr_inv(p->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS))
if (memchr_inv(p->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS *
sizeof(p->vm_stat_diff[0])))
return true;
#ifdef CONFIG_NUMA
if (memchr_inv(p->vm_numa_stat_diff, 0, NR_VM_NUMA_STAT_ITEMS))
if (memchr_inv(p->vm_numa_stat_diff, 0, NR_VM_NUMA_STAT_ITEMS *
sizeof(p->vm_numa_stat_diff[0])))
return true;
#endif
}
......
......@@ -99,6 +99,7 @@ struct z3fold_header {
#define NCHUNKS ((PAGE_SIZE - ZHDR_SIZE_ALIGNED) >> CHUNK_SHIFT)
#define BUDDY_MASK (0x3)
#define BUDDY_SHIFT 2
/**
* struct z3fold_pool - stores metadata for each z3fold pool
......@@ -145,7 +146,7 @@ enum z3fold_page_flags {
MIDDLE_CHUNK_MAPPED,
NEEDS_COMPACTING,
PAGE_STALE,
UNDER_RECLAIM
PAGE_CLAIMED, /* by either reclaim or free */
};
/*****************
......@@ -174,7 +175,7 @@ static struct z3fold_header *init_z3fold_page(struct page *page,
clear_bit(MIDDLE_CHUNK_MAPPED, &page->private);
clear_bit(NEEDS_COMPACTING, &page->private);
clear_bit(PAGE_STALE, &page->private);
clear_bit(UNDER_RECLAIM, &page->private);
clear_bit(PAGE_CLAIMED, &page->private);
spin_lock_init(&zhdr->page_lock);
kref_init(&zhdr->refcount);
......@@ -223,8 +224,11 @@ static unsigned long encode_handle(struct z3fold_header *zhdr, enum buddy bud)
unsigned long handle;
handle = (unsigned long)zhdr;
if (bud != HEADLESS)
handle += (bud + zhdr->first_num) & BUDDY_MASK;
if (bud != HEADLESS) {
handle |= (bud + zhdr->first_num) & BUDDY_MASK;
if (bud == LAST)
handle |= (zhdr->last_chunks << BUDDY_SHIFT);
}
return handle;
}
......@@ -234,6 +238,12 @@ static struct z3fold_header *handle_to_z3fold_header(unsigned long handle)
return (struct z3fold_header *)(handle & PAGE_MASK);
}
/* only for LAST bud, returns zero otherwise */
static unsigned short handle_to_chunks(unsigned long handle)
{
return (handle & ~PAGE_MASK) >> BUDDY_SHIFT;
}
/*
* (handle & BUDDY_MASK) < zhdr->first_num is possible in encode_handle
* but that doesn't matter. because the masking will result in the
......@@ -720,37 +730,39 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
page = virt_to_page(zhdr);
if (test_bit(PAGE_HEADLESS, &page->private)) {
/* HEADLESS page stored */
bud = HEADLESS;
} else {
z3fold_page_lock(zhdr);
bud = handle_to_buddy(handle);
switch (bud) {
case FIRST:
zhdr->first_chunks = 0;
break;
case MIDDLE:
zhdr->middle_chunks = 0;
zhdr->start_middle = 0;
break;
case LAST:
zhdr->last_chunks = 0;
break;
default:
pr_err("%s: unknown bud %d\n", __func__, bud);
WARN_ON(1);
z3fold_page_unlock(zhdr);
return;
/* if a headless page is under reclaim, just leave.
* NB: we use test_and_set_bit for a reason: if the bit
* has not been set before, we release this page
* immediately so we don't care about its value any more.
*/
if (!test_and_set_bit(PAGE_CLAIMED, &page->private)) {
spin_lock(&pool->lock);
list_del(&page->lru);
spin_unlock(&pool->lock);
free_z3fold_page(page);
atomic64_dec(&pool->pages_nr);
}
return;
}
if (bud == HEADLESS) {
spin_lock(&pool->lock);
list_del(&page->lru);
spin_unlock(&pool->lock);
free_z3fold_page(page);
atomic64_dec(&pool->pages_nr);
/* Non-headless case */
z3fold_page_lock(zhdr);
bud = handle_to_buddy(handle);
switch (bud) {
case FIRST:
zhdr->first_chunks = 0;
break;
case MIDDLE:
zhdr->middle_chunks = 0;
break;
case LAST:
zhdr->last_chunks = 0;
break;
default:
pr_err("%s: unknown bud %d\n", __func__, bud);
WARN_ON(1);
z3fold_page_unlock(zhdr);
return;
}
......@@ -758,7 +770,7 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
atomic64_dec(&pool->pages_nr);
return;
}
if (test_bit(UNDER_RECLAIM, &page->private)) {
if (test_bit(PAGE_CLAIMED, &page->private)) {
z3fold_page_unlock(zhdr);
return;
}
......@@ -836,20 +848,30 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
}
list_for_each_prev(pos, &pool->lru) {
page = list_entry(pos, struct page, lru);
/* this bit could have been set by free, in which case
* we pass over to the next page in the pool.
*/
if (test_and_set_bit(PAGE_CLAIMED, &page->private))
continue;
zhdr = page_address(page);
if (test_bit(PAGE_HEADLESS, &page->private))
/* candidate found */
break;
zhdr = page_address(page);
if (!z3fold_page_trylock(zhdr))
if (!z3fold_page_trylock(zhdr)) {
zhdr = NULL;
continue; /* can't evict at this point */
}
kref_get(&zhdr->refcount);
list_del_init(&zhdr->buddy);
zhdr->cpu = -1;
set_bit(UNDER_RECLAIM, &page->private);
break;
}
if (!zhdr)
break;
list_del_init(&page->lru);
spin_unlock(&pool->lock);
......@@ -898,6 +920,7 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
if (test_bit(PAGE_HEADLESS, &page->private)) {
if (ret == 0) {
free_z3fold_page(page);
atomic64_dec(&pool->pages_nr);
return 0;
}
spin_lock(&pool->lock);
......@@ -905,7 +928,7 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
spin_unlock(&pool->lock);
} else {
z3fold_page_lock(zhdr);
clear_bit(UNDER_RECLAIM, &page->private);
clear_bit(PAGE_CLAIMED, &page->private);
if (kref_put(&zhdr->refcount,
release_z3fold_page_locked)) {
atomic64_dec(&pool->pages_nr);
......@@ -964,7 +987,7 @@ static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle)
set_bit(MIDDLE_CHUNK_MAPPED, &page->private);
break;
case LAST:
addr += PAGE_SIZE - (zhdr->last_chunks << CHUNK_SHIFT);
addr += PAGE_SIZE - (handle_to_chunks(handle) << CHUNK_SHIFT);
break;
default:
pr_err("unknown buddy id %d\n", buddy);
......
......@@ -71,7 +71,7 @@ die() {
# Try to figure out the source directory prefix so we can remove it from the
# addr2line output. HACK ALERT: This assumes that start_kernel() is in
# kernel/init.c! This only works for vmlinux. Otherwise it falls back to
# init/main.c! This only works for vmlinux. Otherwise it falls back to
# printing the absolute path.
find_dir_prefix() {
local objfile=$1
......
......@@ -168,7 +168,6 @@ class id_parser(object):
self.curline = 0
try:
for line in fd:
line = line.decode(locale.getpreferredencoding(False), errors='ignore')
self.curline += 1
if self.curline > maxlines:
break
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment