Commit f0ab773f authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'akpm' (patches from Andrew)

Merge misc fixes from Andrew Morton:
 "13 fixes"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
  rbtree: include rcu.h
  scripts/faddr2line: fix error when addr2line output contains discriminator
  ocfs2: take inode cluster lock before moving reflinked inode from orphan dir
  mm, oom: fix concurrent munlock and oom reaper unmap, v3
  mm: migrate: fix double call of radix_tree_replace_slot()
  proc/kcore: don't bounds check against address 0
  mm: don't show nr_indirectly_reclaimable in /proc/vmstat
  mm: sections are not offlined during memory hotremove
  z3fold: fix reclaim lock-ups
  init: fix false positives in W+X checking
  lib/find_bit_benchmark.c: avoid soft lockup in test_find_first_bit()
  KASAN: prohibit KASAN+STRUCTLEAK combination
  MAINTAINERS: update Shuah's email address
parents 4bc87198 2075b16e
...@@ -3691,7 +3691,6 @@ F: drivers/cpufreq/arm_big_little_dt.c ...@@ -3691,7 +3691,6 @@ F: drivers/cpufreq/arm_big_little_dt.c
CPU POWER MONITORING SUBSYSTEM CPU POWER MONITORING SUBSYSTEM
M: Thomas Renninger <trenn@suse.com> M: Thomas Renninger <trenn@suse.com>
M: Shuah Khan <shuahkh@osg.samsung.com>
M: Shuah Khan <shuah@kernel.org> M: Shuah Khan <shuah@kernel.org>
L: linux-pm@vger.kernel.org L: linux-pm@vger.kernel.org
S: Maintained S: Maintained
...@@ -7696,7 +7695,6 @@ F: include/linux/sunrpc/ ...@@ -7696,7 +7695,6 @@ F: include/linux/sunrpc/
F: include/uapi/linux/sunrpc/ F: include/uapi/linux/sunrpc/
KERNEL SELFTEST FRAMEWORK KERNEL SELFTEST FRAMEWORK
M: Shuah Khan <shuahkh@osg.samsung.com>
M: Shuah Khan <shuah@kernel.org> M: Shuah Khan <shuah@kernel.org>
L: linux-kselftest@vger.kernel.org L: linux-kselftest@vger.kernel.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git T: git git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git
...@@ -14650,7 +14648,6 @@ F: drivers/usb/common/usb-otg-fsm.c ...@@ -14650,7 +14648,6 @@ F: drivers/usb/common/usb-otg-fsm.c
USB OVER IP DRIVER USB OVER IP DRIVER
M: Valentina Manea <valentina.manea.m@gmail.com> M: Valentina Manea <valentina.manea.m@gmail.com>
M: Shuah Khan <shuahkh@osg.samsung.com>
M: Shuah Khan <shuah@kernel.org> M: Shuah Khan <shuah@kernel.org>
L: linux-usb@vger.kernel.org L: linux-usb@vger.kernel.org
S: Maintained S: Maintained
......
...@@ -464,6 +464,10 @@ config GCC_PLUGIN_LATENT_ENTROPY ...@@ -464,6 +464,10 @@ config GCC_PLUGIN_LATENT_ENTROPY
config GCC_PLUGIN_STRUCTLEAK config GCC_PLUGIN_STRUCTLEAK
bool "Force initialization of variables containing userspace addresses" bool "Force initialization of variables containing userspace addresses"
depends on GCC_PLUGINS depends on GCC_PLUGINS
# Currently STRUCTLEAK inserts initialization out of live scope of
# variables from KASAN point of view. This leads to KASAN false
# positive reports. Prohibit this combination for now.
depends on !KASAN_EXTRA
help help
This plugin zero-initializes any structures containing a This plugin zero-initializes any structures containing a
__user attribute. This can prevent some classes of information __user attribute. This can prevent some classes of information
......
...@@ -4250,10 +4250,11 @@ static int __ocfs2_reflink(struct dentry *old_dentry, ...@@ -4250,10 +4250,11 @@ static int __ocfs2_reflink(struct dentry *old_dentry,
static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir, static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
struct dentry *new_dentry, bool preserve) struct dentry *new_dentry, bool preserve)
{ {
int error; int error, had_lock;
struct inode *inode = d_inode(old_dentry); struct inode *inode = d_inode(old_dentry);
struct buffer_head *old_bh = NULL; struct buffer_head *old_bh = NULL;
struct inode *new_orphan_inode = NULL; struct inode *new_orphan_inode = NULL;
struct ocfs2_lock_holder oh;
if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb)))
return -EOPNOTSUPP; return -EOPNOTSUPP;
...@@ -4295,6 +4296,14 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir, ...@@ -4295,6 +4296,14 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
goto out; goto out;
} }
had_lock = ocfs2_inode_lock_tracker(new_orphan_inode, NULL, 1,
&oh);
if (had_lock < 0) {
error = had_lock;
mlog_errno(error);
goto out;
}
/* If the security isn't preserved, we need to re-initialize them. */ /* If the security isn't preserved, we need to re-initialize them. */
if (!preserve) { if (!preserve) {
error = ocfs2_init_security_and_acl(dir, new_orphan_inode, error = ocfs2_init_security_and_acl(dir, new_orphan_inode,
...@@ -4302,14 +4311,15 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir, ...@@ -4302,14 +4311,15 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
if (error) if (error)
mlog_errno(error); mlog_errno(error);
} }
out:
if (!error) { if (!error) {
error = ocfs2_mv_orphaned_inode_to_new(dir, new_orphan_inode, error = ocfs2_mv_orphaned_inode_to_new(dir, new_orphan_inode,
new_dentry); new_dentry);
if (error) if (error)
mlog_errno(error); mlog_errno(error);
} }
ocfs2_inode_unlock_tracker(new_orphan_inode, 1, &oh, had_lock);
out:
if (new_orphan_inode) { if (new_orphan_inode) {
/* /*
* We need to open_unlock the inode no matter whether we * We need to open_unlock the inode no matter whether we
......
...@@ -209,25 +209,34 @@ kclist_add_private(unsigned long pfn, unsigned long nr_pages, void *arg) ...@@ -209,25 +209,34 @@ kclist_add_private(unsigned long pfn, unsigned long nr_pages, void *arg)
{ {
struct list_head *head = (struct list_head *)arg; struct list_head *head = (struct list_head *)arg;
struct kcore_list *ent; struct kcore_list *ent;
struct page *p;
if (!pfn_valid(pfn))
return 1;
p = pfn_to_page(pfn);
if (!memmap_valid_within(pfn, p, page_zone(p)))
return 1;
ent = kmalloc(sizeof(*ent), GFP_KERNEL); ent = kmalloc(sizeof(*ent), GFP_KERNEL);
if (!ent) if (!ent)
return -ENOMEM; return -ENOMEM;
ent->addr = (unsigned long)__va((pfn << PAGE_SHIFT)); ent->addr = (unsigned long)page_to_virt(p);
ent->size = nr_pages << PAGE_SHIFT; ent->size = nr_pages << PAGE_SHIFT;
/* Sanity check: Can happen in 32bit arch...maybe */ if (!virt_addr_valid(ent->addr))
if (ent->addr < (unsigned long) __va(0))
goto free_out; goto free_out;
/* cut not-mapped area. ....from ppc-32 code. */ /* cut not-mapped area. ....from ppc-32 code. */
if (ULONG_MAX - ent->addr < ent->size) if (ULONG_MAX - ent->addr < ent->size)
ent->size = ULONG_MAX - ent->addr; ent->size = ULONG_MAX - ent->addr;
/* cut when vmalloc() area is higher than direct-map area */ /*
if (VMALLOC_START > (unsigned long)__va(0)) { * We've already checked virt_addr_valid so we know this address
if (ent->addr > VMALLOC_START) * is a valid pointer, therefore we can check against it to determine
goto free_out; * if we need to trim
*/
if (VMALLOC_START > ent->addr) {
if (VMALLOC_START - ent->addr < ent->size) if (VMALLOC_START - ent->addr < ent->size)
ent->size = VMALLOC_START - ent->addr; ent->size = VMALLOC_START - ent->addr;
} }
......
...@@ -95,6 +95,8 @@ static inline int check_stable_address_space(struct mm_struct *mm) ...@@ -95,6 +95,8 @@ static inline int check_stable_address_space(struct mm_struct *mm)
return 0; return 0;
} }
void __oom_reap_task_mm(struct mm_struct *mm);
extern unsigned long oom_badness(struct task_struct *p, extern unsigned long oom_badness(struct task_struct *p,
struct mem_cgroup *memcg, const nodemask_t *nodemask, struct mem_cgroup *memcg, const nodemask_t *nodemask,
unsigned long totalpages); unsigned long totalpages);
......
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#include <linux/compiler.h> #include <linux/compiler.h>
#include <linux/rbtree.h> #include <linux/rbtree.h>
#include <linux/rcupdate.h>
/* /*
* Please note - only struct rb_augment_callbacks and the prototypes for * Please note - only struct rb_augment_callbacks and the prototypes for
......
...@@ -35,6 +35,7 @@ ...@@ -35,6 +35,7 @@
#include <linux/rbtree.h> #include <linux/rbtree.h>
#include <linux/seqlock.h> #include <linux/seqlock.h>
#include <linux/rcupdate.h>
struct latch_tree_node { struct latch_tree_node {
struct rb_node node[2]; struct rb_node node[2];
......
...@@ -1034,6 +1034,13 @@ __setup("rodata=", set_debug_rodata); ...@@ -1034,6 +1034,13 @@ __setup("rodata=", set_debug_rodata);
static void mark_readonly(void) static void mark_readonly(void)
{ {
if (rodata_enabled) { if (rodata_enabled) {
/*
* load_module() results in W+X mappings, which are cleaned up
* with call_rcu_sched(). Let's make sure that queued work is
* flushed so that we don't hit false positives looking for
* insecure pages which are W+X.
*/
rcu_barrier_sched();
mark_rodata_ro(); mark_rodata_ro();
rodata_test(); rodata_test();
} else } else
......
...@@ -3517,6 +3517,11 @@ static noinline int do_init_module(struct module *mod) ...@@ -3517,6 +3517,11 @@ static noinline int do_init_module(struct module *mod)
* walking this with preempt disabled. In all the failure paths, we * walking this with preempt disabled. In all the failure paths, we
* call synchronize_sched(), but we don't want to slow down the success * call synchronize_sched(), but we don't want to slow down the success
* path, so use actual RCU here. * path, so use actual RCU here.
* Note that module_alloc() on most architectures creates W+X page
* mappings which won't be cleaned up until do_free_init() runs. Any
* code such as mark_rodata_ro() which depends on those mappings to
* be cleaned up needs to sync with the queued work - ie
* rcu_barrier_sched()
*/ */
call_rcu_sched(&freeinit->rcu, do_free_init); call_rcu_sched(&freeinit->rcu, do_free_init);
mutex_unlock(&module_mutex); mutex_unlock(&module_mutex);
......
...@@ -132,7 +132,12 @@ static int __init find_bit_test(void) ...@@ -132,7 +132,12 @@ static int __init find_bit_test(void)
test_find_next_bit(bitmap, BITMAP_LEN); test_find_next_bit(bitmap, BITMAP_LEN);
test_find_next_zero_bit(bitmap, BITMAP_LEN); test_find_next_zero_bit(bitmap, BITMAP_LEN);
test_find_last_bit(bitmap, BITMAP_LEN); test_find_last_bit(bitmap, BITMAP_LEN);
test_find_first_bit(bitmap, BITMAP_LEN);
/*
* test_find_first_bit() may take some time, so
* traverse only part of bitmap to avoid soft lockup.
*/
test_find_first_bit(bitmap, BITMAP_LEN / 10);
test_find_next_and_bit(bitmap, bitmap2, BITMAP_LEN); test_find_next_and_bit(bitmap, bitmap2, BITMAP_LEN);
pr_err("\nStart testing find_bit() with sparse bitmap\n"); pr_err("\nStart testing find_bit() with sparse bitmap\n");
......
...@@ -528,14 +528,12 @@ int migrate_page_move_mapping(struct address_space *mapping, ...@@ -528,14 +528,12 @@ int migrate_page_move_mapping(struct address_space *mapping,
int i; int i;
int index = page_index(page); int index = page_index(page);
for (i = 0; i < HPAGE_PMD_NR; i++) { for (i = 1; i < HPAGE_PMD_NR; i++) {
pslot = radix_tree_lookup_slot(&mapping->i_pages, pslot = radix_tree_lookup_slot(&mapping->i_pages,
index + i); index + i);
radix_tree_replace_slot(&mapping->i_pages, pslot, radix_tree_replace_slot(&mapping->i_pages, pslot,
newpage + i); newpage + i);
} }
} else {
radix_tree_replace_slot(&mapping->i_pages, pslot, newpage);
} }
/* /*
......
...@@ -3056,6 +3056,32 @@ void exit_mmap(struct mm_struct *mm) ...@@ -3056,6 +3056,32 @@ void exit_mmap(struct mm_struct *mm)
/* mm's last user has gone, and its about to be pulled down */ /* mm's last user has gone, and its about to be pulled down */
mmu_notifier_release(mm); mmu_notifier_release(mm);
if (unlikely(mm_is_oom_victim(mm))) {
/*
* Manually reap the mm to free as much memory as possible.
* Then, as the oom reaper does, set MMF_OOM_SKIP to disregard
* this mm from further consideration. Taking mm->mmap_sem for
* write after setting MMF_OOM_SKIP will guarantee that the oom
* reaper will not run on this mm again after mmap_sem is
* dropped.
*
* Nothing can be holding mm->mmap_sem here and the above call
* to mmu_notifier_release(mm) ensures mmu notifier callbacks in
* __oom_reap_task_mm() will not block.
*
* This needs to be done before calling munlock_vma_pages_all(),
* which clears VM_LOCKED, otherwise the oom reaper cannot
* reliably test it.
*/
mutex_lock(&oom_lock);
__oom_reap_task_mm(mm);
mutex_unlock(&oom_lock);
set_bit(MMF_OOM_SKIP, &mm->flags);
down_write(&mm->mmap_sem);
up_write(&mm->mmap_sem);
}
if (mm->locked_vm) { if (mm->locked_vm) {
vma = mm->mmap; vma = mm->mmap;
while (vma) { while (vma) {
...@@ -3077,24 +3103,6 @@ void exit_mmap(struct mm_struct *mm) ...@@ -3077,24 +3103,6 @@ void exit_mmap(struct mm_struct *mm)
/* update_hiwater_rss(mm) here? but nobody should be looking */ /* update_hiwater_rss(mm) here? but nobody should be looking */
/* Use -1 here to ensure all VMAs in the mm are unmapped */ /* Use -1 here to ensure all VMAs in the mm are unmapped */
unmap_vmas(&tlb, vma, 0, -1); unmap_vmas(&tlb, vma, 0, -1);
if (unlikely(mm_is_oom_victim(mm))) {
/*
* Wait for oom_reap_task() to stop working on this
* mm. Because MMF_OOM_SKIP is already set before
* calling down_read(), oom_reap_task() will not run
* on this "mm" post up_write().
*
* mm_is_oom_victim() cannot be set from under us
* either because victim->mm is already set to NULL
* under task_lock before calling mmput and oom_mm is
* set not NULL by the OOM killer only if victim->mm
* is found not NULL while holding the task_lock.
*/
set_bit(MMF_OOM_SKIP, &mm->flags);
down_write(&mm->mmap_sem);
up_write(&mm->mmap_sem);
}
free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING); free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING);
tlb_finish_mmu(&tlb, 0, -1); tlb_finish_mmu(&tlb, 0, -1);
......
...@@ -469,7 +469,6 @@ bool process_shares_mm(struct task_struct *p, struct mm_struct *mm) ...@@ -469,7 +469,6 @@ bool process_shares_mm(struct task_struct *p, struct mm_struct *mm)
return false; return false;
} }
#ifdef CONFIG_MMU #ifdef CONFIG_MMU
/* /*
* OOM Reaper kernel thread which tries to reap the memory used by the OOM * OOM Reaper kernel thread which tries to reap the memory used by the OOM
...@@ -480,16 +479,54 @@ static DECLARE_WAIT_QUEUE_HEAD(oom_reaper_wait); ...@@ -480,16 +479,54 @@ static DECLARE_WAIT_QUEUE_HEAD(oom_reaper_wait);
static struct task_struct *oom_reaper_list; static struct task_struct *oom_reaper_list;
static DEFINE_SPINLOCK(oom_reaper_lock); static DEFINE_SPINLOCK(oom_reaper_lock);
static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm) void __oom_reap_task_mm(struct mm_struct *mm)
{ {
struct mmu_gather tlb;
struct vm_area_struct *vma; struct vm_area_struct *vma;
/*
* Tell all users of get_user/copy_from_user etc... that the content
* is no longer stable. No barriers really needed because unmapping
* should imply barriers already and the reader would hit a page fault
* if it stumbled over a reaped memory.
*/
set_bit(MMF_UNSTABLE, &mm->flags);
for (vma = mm->mmap ; vma; vma = vma->vm_next) {
if (!can_madv_dontneed_vma(vma))
continue;
/*
* Only anonymous pages have a good chance to be dropped
* without additional steps which we cannot afford as we
* are OOM already.
*
* We do not even care about fs backed pages because all
* which are reclaimable have already been reclaimed and
* we do not want to block exit_mmap by keeping mm ref
* count elevated without a good reason.
*/
if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) {
const unsigned long start = vma->vm_start;
const unsigned long end = vma->vm_end;
struct mmu_gather tlb;
tlb_gather_mmu(&tlb, mm, start, end);
mmu_notifier_invalidate_range_start(mm, start, end);
unmap_page_range(&tlb, vma, start, end, NULL);
mmu_notifier_invalidate_range_end(mm, start, end);
tlb_finish_mmu(&tlb, start, end);
}
}
}
static bool oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
{
bool ret = true; bool ret = true;
/* /*
* We have to make sure to not race with the victim exit path * We have to make sure to not race with the victim exit path
* and cause premature new oom victim selection: * and cause premature new oom victim selection:
* __oom_reap_task_mm exit_mm * oom_reap_task_mm exit_mm
* mmget_not_zero * mmget_not_zero
* mmput * mmput
* atomic_dec_and_test * atomic_dec_and_test
...@@ -534,39 +571,8 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm) ...@@ -534,39 +571,8 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
trace_start_task_reaping(tsk->pid); trace_start_task_reaping(tsk->pid);
/* __oom_reap_task_mm(mm);
* Tell all users of get_user/copy_from_user etc... that the content
* is no longer stable. No barriers really needed because unmapping
* should imply barriers already and the reader would hit a page fault
* if it stumbled over a reaped memory.
*/
set_bit(MMF_UNSTABLE, &mm->flags);
for (vma = mm->mmap ; vma; vma = vma->vm_next) {
if (!can_madv_dontneed_vma(vma))
continue;
/*
* Only anonymous pages have a good chance to be dropped
* without additional steps which we cannot afford as we
* are OOM already.
*
* We do not even care about fs backed pages because all
* which are reclaimable have already been reclaimed and
* we do not want to block exit_mmap by keeping mm ref
* count elevated without a good reason.
*/
if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) {
const unsigned long start = vma->vm_start;
const unsigned long end = vma->vm_end;
tlb_gather_mmu(&tlb, mm, start, end);
mmu_notifier_invalidate_range_start(mm, start, end);
unmap_page_range(&tlb, vma, start, end, NULL);
mmu_notifier_invalidate_range_end(mm, start, end);
tlb_finish_mmu(&tlb, start, end);
}
}
pr_info("oom_reaper: reaped process %d (%s), now anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n", pr_info("oom_reaper: reaped process %d (%s), now anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n",
task_pid_nr(tsk), tsk->comm, task_pid_nr(tsk), tsk->comm,
K(get_mm_counter(mm, MM_ANONPAGES)), K(get_mm_counter(mm, MM_ANONPAGES)),
...@@ -587,14 +593,13 @@ static void oom_reap_task(struct task_struct *tsk) ...@@ -587,14 +593,13 @@ static void oom_reap_task(struct task_struct *tsk)
struct mm_struct *mm = tsk->signal->oom_mm; struct mm_struct *mm = tsk->signal->oom_mm;
/* Retry the down_read_trylock(mmap_sem) a few times */ /* Retry the down_read_trylock(mmap_sem) a few times */
while (attempts++ < MAX_OOM_REAP_RETRIES && !__oom_reap_task_mm(tsk, mm)) while (attempts++ < MAX_OOM_REAP_RETRIES && !oom_reap_task_mm(tsk, mm))
schedule_timeout_idle(HZ/10); schedule_timeout_idle(HZ/10);
if (attempts <= MAX_OOM_REAP_RETRIES || if (attempts <= MAX_OOM_REAP_RETRIES ||
test_bit(MMF_OOM_SKIP, &mm->flags)) test_bit(MMF_OOM_SKIP, &mm->flags))
goto done; goto done;
pr_info("oom_reaper: unable to reap pid:%d (%s)\n", pr_info("oom_reaper: unable to reap pid:%d (%s)\n",
task_pid_nr(tsk), tsk->comm); task_pid_nr(tsk), tsk->comm);
debug_show_all_locks(); debug_show_all_locks();
......
...@@ -629,7 +629,7 @@ void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn) ...@@ -629,7 +629,7 @@ void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn)
unsigned long pfn; unsigned long pfn;
for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
unsigned long section_nr = pfn_to_section_nr(start_pfn); unsigned long section_nr = pfn_to_section_nr(pfn);
struct mem_section *ms; struct mem_section *ms;
/* /*
......
...@@ -1161,7 +1161,7 @@ const char * const vmstat_text[] = { ...@@ -1161,7 +1161,7 @@ const char * const vmstat_text[] = {
"nr_vmscan_immediate_reclaim", "nr_vmscan_immediate_reclaim",
"nr_dirtied", "nr_dirtied",
"nr_written", "nr_written",
"nr_indirectly_reclaimable", "", /* nr_indirectly_reclaimable */
/* enum writeback_stat_item counters */ /* enum writeback_stat_item counters */
"nr_dirty_threshold", "nr_dirty_threshold",
...@@ -1740,6 +1740,10 @@ static int vmstat_show(struct seq_file *m, void *arg) ...@@ -1740,6 +1740,10 @@ static int vmstat_show(struct seq_file *m, void *arg)
unsigned long *l = arg; unsigned long *l = arg;
unsigned long off = l - (unsigned long *)m->private; unsigned long off = l - (unsigned long *)m->private;
/* Skip hidden vmstat items. */
if (*vmstat_text[off] == '\0')
return 0;
seq_puts(m, vmstat_text[off]); seq_puts(m, vmstat_text[off]);
seq_put_decimal_ull(m, " ", *l); seq_put_decimal_ull(m, " ", *l);
seq_putc(m, '\n'); seq_putc(m, '\n');
......
...@@ -144,7 +144,8 @@ enum z3fold_page_flags { ...@@ -144,7 +144,8 @@ enum z3fold_page_flags {
PAGE_HEADLESS = 0, PAGE_HEADLESS = 0,
MIDDLE_CHUNK_MAPPED, MIDDLE_CHUNK_MAPPED,
NEEDS_COMPACTING, NEEDS_COMPACTING,
PAGE_STALE PAGE_STALE,
UNDER_RECLAIM
}; };
/***************** /*****************
...@@ -173,6 +174,7 @@ static struct z3fold_header *init_z3fold_page(struct page *page, ...@@ -173,6 +174,7 @@ static struct z3fold_header *init_z3fold_page(struct page *page,
clear_bit(MIDDLE_CHUNK_MAPPED, &page->private); clear_bit(MIDDLE_CHUNK_MAPPED, &page->private);
clear_bit(NEEDS_COMPACTING, &page->private); clear_bit(NEEDS_COMPACTING, &page->private);
clear_bit(PAGE_STALE, &page->private); clear_bit(PAGE_STALE, &page->private);
clear_bit(UNDER_RECLAIM, &page->private);
spin_lock_init(&zhdr->page_lock); spin_lock_init(&zhdr->page_lock);
kref_init(&zhdr->refcount); kref_init(&zhdr->refcount);
...@@ -756,6 +758,10 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle) ...@@ -756,6 +758,10 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
atomic64_dec(&pool->pages_nr); atomic64_dec(&pool->pages_nr);
return; return;
} }
if (test_bit(UNDER_RECLAIM, &page->private)) {
z3fold_page_unlock(zhdr);
return;
}
if (test_and_set_bit(NEEDS_COMPACTING, &page->private)) { if (test_and_set_bit(NEEDS_COMPACTING, &page->private)) {
z3fold_page_unlock(zhdr); z3fold_page_unlock(zhdr);
return; return;
...@@ -840,6 +846,8 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) ...@@ -840,6 +846,8 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
kref_get(&zhdr->refcount); kref_get(&zhdr->refcount);
list_del_init(&zhdr->buddy); list_del_init(&zhdr->buddy);
zhdr->cpu = -1; zhdr->cpu = -1;
set_bit(UNDER_RECLAIM, &page->private);
break;
} }
list_del_init(&page->lru); list_del_init(&page->lru);
...@@ -887,25 +895,35 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) ...@@ -887,25 +895,35 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
goto next; goto next;
} }
next: next:
spin_lock(&pool->lock);
if (test_bit(PAGE_HEADLESS, &page->private)) { if (test_bit(PAGE_HEADLESS, &page->private)) {
if (ret == 0) { if (ret == 0) {
spin_unlock(&pool->lock);
free_z3fold_page(page); free_z3fold_page(page);
return 0; return 0;
} }
} else if (kref_put(&zhdr->refcount, release_z3fold_page)) { spin_lock(&pool->lock);
atomic64_dec(&pool->pages_nr); list_add(&page->lru, &pool->lru);
spin_unlock(&pool->lock); spin_unlock(&pool->lock);
} else {
z3fold_page_lock(zhdr);
clear_bit(UNDER_RECLAIM, &page->private);
if (kref_put(&zhdr->refcount,
release_z3fold_page_locked)) {
atomic64_dec(&pool->pages_nr);
return 0; return 0;
} }
/* /*
* Add to the beginning of LRU. * if we are here, the page is still not completely
* Pool lock has to be kept here to ensure the page has * free. Take the global pool lock then to be able
* not already been released * to add it back to the lru list
*/ */
spin_lock(&pool->lock);
list_add(&page->lru, &pool->lru); list_add(&page->lru, &pool->lru);
spin_unlock(&pool->lock);
z3fold_page_unlock(zhdr);
}
/* We started off locked to we need to lock the pool back */
spin_lock(&pool->lock);
} }
spin_unlock(&pool->lock); spin_unlock(&pool->lock);
return -EAGAIN; return -EAGAIN;
......
...@@ -170,7 +170,10 @@ __faddr2line() { ...@@ -170,7 +170,10 @@ __faddr2line() {
echo "$file_lines" | while read -r line echo "$file_lines" | while read -r line
do do
echo $line echo $line
eval $(echo $line | awk -F "[ :]" '{printf("n1=%d;n2=%d;f=%s",$NF-5, $NF+5, $(NF-1))}') n=$(echo $line | sed 's/.*:\([0-9]\+\).*/\1/g')
n1=$[$n-5]
n2=$[$n+5]
f=$(echo $line | sed 's/.*at \(.\+\):.*/\1/g')
awk 'NR>=strtonum("'$n1'") && NR<=strtonum("'$n2'") {printf("%d\t%s\n", NR, $0)}' $f awk 'NR>=strtonum("'$n1'") && NR<=strtonum("'$n2'") {printf("%d\t%s\n", NR, $0)}' $f
done done
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment