Commit b9de6efe authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'akpm' (patches from Andrew)

Merge misc fixes from Andrew Morton:
 "24 fixes"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (24 commits)
  autofs: fix error return in autofs_fill_super()
  autofs: drop dentry reference only when it is never used
  fs/drop_caches.c: avoid softlockups in drop_pagecache_sb()
  mm: migrate: don't rely on __PageMovable() of newpage after unlocking it
  psi: clarify the Kconfig text for the default-disable option
  mm, memory_hotplug: __offline_pages fix wrong locking
  mm: hwpoison: use do_send_sig_info() instead of force_sig()
  kasan: mark file common so ftrace doesn't trace it
  init/Kconfig: fix grammar by moving a closing parenthesis
  lib/test_kmod.c: potential double free in error handling
  mm, oom: fix use-after-free in oom_kill_process
  mm/hotplug: invalid PFNs from pfn_to_online_page()
  mm,memory_hotplug: fix scan_movable_pages() for gigantic hugepages
  psi: fix aggregation idle shut-off
  mm, memory_hotplug: test_pages_in_a_zone do not pass the end of zone
  mm, memory_hotplug: is_mem_section_removable do not pass the end of a zone
  oom, oom_reaper: do not enqueue same task twice
  mm: migrate: make buffer_migrate_page_norefs() actually succeed
  kernel/exit.c: release ptraced tasks before zap_pid_ns_processes
  x86_64: increase stack size for KASAN_EXTRA
  ...
parents cd984a5b f585b283
...@@ -30,6 +30,7 @@ generic-y += pgalloc.h ...@@ -30,6 +30,7 @@ generic-y += pgalloc.h
generic-y += preempt.h generic-y += preempt.h
generic-y += segment.h generic-y += segment.h
generic-y += serial.h generic-y += serial.h
generic-y += shmparam.h
generic-y += tlbflush.h generic-y += tlbflush.h
generic-y += topology.h generic-y += topology.h
generic-y += trace_clock.h generic-y += trace_clock.h
......
include include/uapi/asm-generic/Kbuild.asm include include/uapi/asm-generic/Kbuild.asm
generic-y += kvm_para.h generic-y += kvm_para.h
generic-y += shmparam.h
generic-y += ucontext.h generic-y += ucontext.h
...@@ -40,6 +40,7 @@ generic-y += preempt.h ...@@ -40,6 +40,7 @@ generic-y += preempt.h
generic-y += scatterlist.h generic-y += scatterlist.h
generic-y += sections.h generic-y += sections.h
generic-y += serial.h generic-y += serial.h
generic-y += shmparam.h
generic-y += sizes.h generic-y += sizes.h
generic-y += spinlock.h generic-y += spinlock.h
generic-y += timex.h generic-y += timex.h
......
include include/uapi/asm-generic/Kbuild.asm include include/uapi/asm-generic/Kbuild.asm
generic-y += kvm_para.h generic-y += kvm_para.h
generic-y += shmparam.h
generic-y += ucontext.h generic-y += ucontext.h
...@@ -30,6 +30,7 @@ generic-y += rwsem.h ...@@ -30,6 +30,7 @@ generic-y += rwsem.h
generic-y += sections.h generic-y += sections.h
generic-y += segment.h generic-y += segment.h
generic-y += serial.h generic-y += serial.h
generic-y += shmparam.h
generic-y += sizes.h generic-y += sizes.h
generic-y += topology.h generic-y += topology.h
generic-y += trace_clock.h generic-y += trace_clock.h
......
include include/uapi/asm-generic/Kbuild.asm include include/uapi/asm-generic/Kbuild.asm
generic-y += shmparam.h
generic-y += ucontext.h generic-y += ucontext.h
...@@ -20,6 +20,7 @@ generic-y += mm-arch-hooks.h ...@@ -20,6 +20,7 @@ generic-y += mm-arch-hooks.h
generic-y += percpu.h generic-y += percpu.h
generic-y += preempt.h generic-y += preempt.h
generic-y += sections.h generic-y += sections.h
generic-y += shmparam.h
generic-y += spinlock.h generic-y += spinlock.h
generic-y += topology.h generic-y += topology.h
generic-y += trace_clock.h generic-y += trace_clock.h
......
...@@ -2,4 +2,3 @@ include include/uapi/asm-generic/Kbuild.asm ...@@ -2,4 +2,3 @@ include include/uapi/asm-generic/Kbuild.asm
generated-y += unistd_32.h generated-y += unistd_32.h
generic-y += kvm_para.h generic-y += kvm_para.h
generic-y += shmparam.h
...@@ -26,6 +26,7 @@ generic-y += parport.h ...@@ -26,6 +26,7 @@ generic-y += parport.h
generic-y += percpu.h generic-y += percpu.h
generic-y += preempt.h generic-y += preempt.h
generic-y += serial.h generic-y += serial.h
generic-y += shmparam.h
generic-y += syscalls.h generic-y += syscalls.h
generic-y += topology.h generic-y += topology.h
generic-y += trace_clock.h generic-y += trace_clock.h
......
...@@ -2,5 +2,4 @@ include include/uapi/asm-generic/Kbuild.asm ...@@ -2,5 +2,4 @@ include include/uapi/asm-generic/Kbuild.asm
generated-y += unistd_32.h generated-y += unistd_32.h
generic-y += kvm_para.h generic-y += kvm_para.h
generic-y += shmparam.h
generic-y += ucontext.h generic-y += ucontext.h
...@@ -34,6 +34,7 @@ generic-y += qrwlock_types.h ...@@ -34,6 +34,7 @@ generic-y += qrwlock_types.h
generic-y += qrwlock.h generic-y += qrwlock.h
generic-y += sections.h generic-y += sections.h
generic-y += segment.h generic-y += segment.h
generic-y += shmparam.h
generic-y += string.h generic-y += string.h
generic-y += switch_to.h generic-y += switch_to.h
generic-y += topology.h generic-y += topology.h
......
include include/uapi/asm-generic/Kbuild.asm include include/uapi/asm-generic/Kbuild.asm
generic-y += kvm_para.h generic-y += kvm_para.h
generic-y += shmparam.h
generic-y += ucontext.h generic-y += ucontext.h
...@@ -28,6 +28,7 @@ generic-y += preempt.h ...@@ -28,6 +28,7 @@ generic-y += preempt.h
generic-y += sections.h generic-y += sections.h
generic-y += segment.h generic-y += segment.h
generic-y += serial.h generic-y += serial.h
generic-y += shmparam.h
generic-y += sizes.h generic-y += sizes.h
generic-y += syscalls.h generic-y += syscalls.h
generic-y += topology.h generic-y += topology.h
......
include include/uapi/asm-generic/Kbuild.asm include include/uapi/asm-generic/Kbuild.asm
generic-y += kvm_para.h generic-y += kvm_para.h
generic-y += shmparam.h
generic-y += ucontext.h generic-y += ucontext.h
...@@ -7,7 +7,11 @@ ...@@ -7,7 +7,11 @@
#endif #endif
#ifdef CONFIG_KASAN #ifdef CONFIG_KASAN
#ifdef CONFIG_KASAN_EXTRA
#define KASAN_STACK_ORDER 2
#else
#define KASAN_STACK_ORDER 1 #define KASAN_STACK_ORDER 1
#endif
#else #else
#define KASAN_STACK_ORDER 0 #define KASAN_STACK_ORDER 0
#endif #endif
......
...@@ -596,7 +596,6 @@ int autofs_expire_run(struct super_block *sb, ...@@ -596,7 +596,6 @@ int autofs_expire_run(struct super_block *sb,
pkt.len = dentry->d_name.len; pkt.len = dentry->d_name.len;
memcpy(pkt.name, dentry->d_name.name, pkt.len); memcpy(pkt.name, dentry->d_name.name, pkt.len);
pkt.name[pkt.len] = '\0'; pkt.name[pkt.len] = '\0';
dput(dentry);
if (copy_to_user(pkt_p, &pkt, sizeof(struct autofs_packet_expire))) if (copy_to_user(pkt_p, &pkt, sizeof(struct autofs_packet_expire)))
ret = -EFAULT; ret = -EFAULT;
...@@ -609,6 +608,8 @@ int autofs_expire_run(struct super_block *sb, ...@@ -609,6 +608,8 @@ int autofs_expire_run(struct super_block *sb,
complete_all(&ino->expire_complete); complete_all(&ino->expire_complete);
spin_unlock(&sbi->fs_lock); spin_unlock(&sbi->fs_lock);
dput(dentry);
return ret; return ret;
} }
......
...@@ -266,8 +266,10 @@ int autofs_fill_super(struct super_block *s, void *data, int silent) ...@@ -266,8 +266,10 @@ int autofs_fill_super(struct super_block *s, void *data, int silent)
} }
root_inode = autofs_get_inode(s, S_IFDIR | 0755); root_inode = autofs_get_inode(s, S_IFDIR | 0755);
root = d_make_root(root_inode); root = d_make_root(root_inode);
if (!root) if (!root) {
ret = -ENOMEM;
goto fail_ino; goto fail_ino;
}
pipe = NULL; pipe = NULL;
root->d_fsdata = ino; root->d_fsdata = ino;
......
...@@ -21,8 +21,13 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused) ...@@ -21,8 +21,13 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
spin_lock(&sb->s_inode_list_lock); spin_lock(&sb->s_inode_list_lock);
list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
/*
* We must skip inodes in unusual state. We may also skip
* inodes without pages but we deliberately won't in case
* we need to reschedule to avoid softlockups.
*/
if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) || if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
(inode->i_mapping->nrpages == 0)) { (inode->i_mapping->nrpages == 0 && !need_resched())) {
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
continue; continue;
} }
...@@ -30,6 +35,7 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused) ...@@ -30,6 +35,7 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
spin_unlock(&sb->s_inode_list_lock); spin_unlock(&sb->s_inode_list_lock);
cond_resched();
invalidate_mapping_pages(inode->i_mapping, 0, -1); invalidate_mapping_pages(inode->i_mapping, 0, -1);
iput(toput_inode); iput(toput_inode);
toput_inode = inode; toput_inode = inode;
......
...@@ -256,7 +256,7 @@ struct dentry *proc_lookup_de(struct inode *dir, struct dentry *dentry, ...@@ -256,7 +256,7 @@ struct dentry *proc_lookup_de(struct inode *dir, struct dentry *dentry,
inode = proc_get_inode(dir->i_sb, de); inode = proc_get_inode(dir->i_sb, de);
if (!inode) if (!inode)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
d_set_d_op(dentry, &proc_misc_dentry_ops); d_set_d_op(dentry, de->proc_dops);
return d_splice_alias(inode, dentry); return d_splice_alias(inode, dentry);
} }
read_unlock(&proc_subdir_lock); read_unlock(&proc_subdir_lock);
...@@ -429,6 +429,8 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, ...@@ -429,6 +429,8 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
INIT_LIST_HEAD(&ent->pde_openers); INIT_LIST_HEAD(&ent->pde_openers);
proc_set_user(ent, (*parent)->uid, (*parent)->gid); proc_set_user(ent, (*parent)->uid, (*parent)->gid);
ent->proc_dops = &proc_misc_dentry_ops;
out: out:
return ent; return ent;
} }
......
...@@ -44,6 +44,7 @@ struct proc_dir_entry { ...@@ -44,6 +44,7 @@ struct proc_dir_entry {
struct completion *pde_unload_completion; struct completion *pde_unload_completion;
const struct inode_operations *proc_iops; const struct inode_operations *proc_iops;
const struct file_operations *proc_fops; const struct file_operations *proc_fops;
const struct dentry_operations *proc_dops;
union { union {
const struct seq_operations *seq_ops; const struct seq_operations *seq_ops;
int (*single_show)(struct seq_file *, void *); int (*single_show)(struct seq_file *, void *);
......
...@@ -38,6 +38,22 @@ static struct net *get_proc_net(const struct inode *inode) ...@@ -38,6 +38,22 @@ static struct net *get_proc_net(const struct inode *inode)
return maybe_get_net(PDE_NET(PDE(inode))); return maybe_get_net(PDE_NET(PDE(inode)));
} }
static int proc_net_d_revalidate(struct dentry *dentry, unsigned int flags)
{
return 0;
}
static const struct dentry_operations proc_net_dentry_ops = {
.d_revalidate = proc_net_d_revalidate,
.d_delete = always_delete_dentry,
};
static void pde_force_lookup(struct proc_dir_entry *pde)
{
/* /proc/net/ entries can be changed under us by setns(CLONE_NEWNET) */
pde->proc_dops = &proc_net_dentry_ops;
}
static int seq_open_net(struct inode *inode, struct file *file) static int seq_open_net(struct inode *inode, struct file *file)
{ {
unsigned int state_size = PDE(inode)->state_size; unsigned int state_size = PDE(inode)->state_size;
...@@ -90,6 +106,7 @@ struct proc_dir_entry *proc_create_net_data(const char *name, umode_t mode, ...@@ -90,6 +106,7 @@ struct proc_dir_entry *proc_create_net_data(const char *name, umode_t mode,
p = proc_create_reg(name, mode, &parent, data); p = proc_create_reg(name, mode, &parent, data);
if (!p) if (!p)
return NULL; return NULL;
pde_force_lookup(p);
p->proc_fops = &proc_net_seq_fops; p->proc_fops = &proc_net_seq_fops;
p->seq_ops = ops; p->seq_ops = ops;
p->state_size = state_size; p->state_size = state_size;
...@@ -133,6 +150,7 @@ struct proc_dir_entry *proc_create_net_data_write(const char *name, umode_t mode ...@@ -133,6 +150,7 @@ struct proc_dir_entry *proc_create_net_data_write(const char *name, umode_t mode
p = proc_create_reg(name, mode, &parent, data); p = proc_create_reg(name, mode, &parent, data);
if (!p) if (!p)
return NULL; return NULL;
pde_force_lookup(p);
p->proc_fops = &proc_net_seq_fops; p->proc_fops = &proc_net_seq_fops;
p->seq_ops = ops; p->seq_ops = ops;
p->state_size = state_size; p->state_size = state_size;
...@@ -181,6 +199,7 @@ struct proc_dir_entry *proc_create_net_single(const char *name, umode_t mode, ...@@ -181,6 +199,7 @@ struct proc_dir_entry *proc_create_net_single(const char *name, umode_t mode,
p = proc_create_reg(name, mode, &parent, data); p = proc_create_reg(name, mode, &parent, data);
if (!p) if (!p)
return NULL; return NULL;
pde_force_lookup(p);
p->proc_fops = &proc_net_single_fops; p->proc_fops = &proc_net_single_fops;
p->single_show = show; p->single_show = show;
return proc_register(parent, p); return proc_register(parent, p);
...@@ -223,6 +242,7 @@ struct proc_dir_entry *proc_create_net_single_write(const char *name, umode_t mo ...@@ -223,6 +242,7 @@ struct proc_dir_entry *proc_create_net_single_write(const char *name, umode_t mo
p = proc_create_reg(name, mode, &parent, data); p = proc_create_reg(name, mode, &parent, data);
if (!p) if (!p)
return NULL; return NULL;
pde_force_lookup(p);
p->proc_fops = &proc_net_single_fops; p->proc_fops = &proc_net_single_fops;
p->single_show = show; p->single_show = show;
p->write = write; p->write = write;
......
...@@ -21,14 +21,16 @@ struct vmem_altmap; ...@@ -21,14 +21,16 @@ struct vmem_altmap;
* walkers which rely on the fully initialized page->flags and others * walkers which rely on the fully initialized page->flags and others
* should use this rather than pfn_valid && pfn_to_page * should use this rather than pfn_valid && pfn_to_page
*/ */
#define pfn_to_online_page(pfn) \ #define pfn_to_online_page(pfn) \
({ \ ({ \
struct page *___page = NULL; \ struct page *___page = NULL; \
unsigned long ___nr = pfn_to_section_nr(pfn); \ unsigned long ___pfn = pfn; \
\ unsigned long ___nr = pfn_to_section_nr(___pfn); \
if (___nr < NR_MEM_SECTIONS && online_section_nr(___nr))\ \
___page = pfn_to_page(pfn); \ if (___nr < NR_MEM_SECTIONS && online_section_nr(___nr) && \
___page; \ pfn_valid_within(___pfn)) \
___page = pfn_to_page(___pfn); \
___page; \
}) })
/* /*
......
...@@ -71,6 +71,7 @@ static inline int get_dumpable(struct mm_struct *mm) ...@@ -71,6 +71,7 @@ static inline int get_dumpable(struct mm_struct *mm)
#define MMF_HUGE_ZERO_PAGE 23 /* mm has ever used the global huge zero page */ #define MMF_HUGE_ZERO_PAGE 23 /* mm has ever used the global huge zero page */
#define MMF_DISABLE_THP 24 /* disable THP for all VMAs */ #define MMF_DISABLE_THP 24 /* disable THP for all VMAs */
#define MMF_OOM_VICTIM 25 /* mm is the oom victim */ #define MMF_OOM_VICTIM 25 /* mm is the oom victim */
#define MMF_OOM_REAP_QUEUED 26 /* mm was queued for oom_reaper */
#define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP) #define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP)
#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\ #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\
......
...@@ -512,6 +512,17 @@ config PSI_DEFAULT_DISABLED ...@@ -512,6 +512,17 @@ config PSI_DEFAULT_DISABLED
per default but can be enabled through passing psi=1 on the per default but can be enabled through passing psi=1 on the
kernel commandline during boot. kernel commandline during boot.
This feature adds some code to the task wakeup and sleep
paths of the scheduler. The overhead is too low to affect
common scheduling-intense workloads in practice (such as
webservers, memcache), but it does show up in artificial
scheduler stress tests, such as hackbench.
If you are paranoid and not sure what the kernel will be
used for, say Y.
Say N if unsure.
endmenu # "CPU/Task time and stats accounting" endmenu # "CPU/Task time and stats accounting"
config CPU_ISOLATION config CPU_ISOLATION
...@@ -825,7 +836,7 @@ config CGROUP_PIDS ...@@ -825,7 +836,7 @@ config CGROUP_PIDS
PIDs controller is designed to stop this from happening. PIDs controller is designed to stop this from happening.
It should be noted that organisational operations (such as attaching It should be noted that organisational operations (such as attaching
to a cgroup hierarchy will *not* be blocked by the PIDs controller), to a cgroup hierarchy) will *not* be blocked by the PIDs controller,
since the PIDs limit only affects a process's ability to fork, not to since the PIDs limit only affects a process's ability to fork, not to
attach to a cgroup. attach to a cgroup.
......
...@@ -558,12 +558,14 @@ static struct task_struct *find_alive_thread(struct task_struct *p) ...@@ -558,12 +558,14 @@ static struct task_struct *find_alive_thread(struct task_struct *p)
return NULL; return NULL;
} }
static struct task_struct *find_child_reaper(struct task_struct *father) static struct task_struct *find_child_reaper(struct task_struct *father,
struct list_head *dead)
__releases(&tasklist_lock) __releases(&tasklist_lock)
__acquires(&tasklist_lock) __acquires(&tasklist_lock)
{ {
struct pid_namespace *pid_ns = task_active_pid_ns(father); struct pid_namespace *pid_ns = task_active_pid_ns(father);
struct task_struct *reaper = pid_ns->child_reaper; struct task_struct *reaper = pid_ns->child_reaper;
struct task_struct *p, *n;
if (likely(reaper != father)) if (likely(reaper != father))
return reaper; return reaper;
...@@ -579,6 +581,12 @@ static struct task_struct *find_child_reaper(struct task_struct *father) ...@@ -579,6 +581,12 @@ static struct task_struct *find_child_reaper(struct task_struct *father)
panic("Attempted to kill init! exitcode=0x%08x\n", panic("Attempted to kill init! exitcode=0x%08x\n",
father->signal->group_exit_code ?: father->exit_code); father->signal->group_exit_code ?: father->exit_code);
} }
list_for_each_entry_safe(p, n, dead, ptrace_entry) {
list_del_init(&p->ptrace_entry);
release_task(p);
}
zap_pid_ns_processes(pid_ns); zap_pid_ns_processes(pid_ns);
write_lock_irq(&tasklist_lock); write_lock_irq(&tasklist_lock);
...@@ -668,7 +676,7 @@ static void forget_original_parent(struct task_struct *father, ...@@ -668,7 +676,7 @@ static void forget_original_parent(struct task_struct *father,
exit_ptrace(father, dead); exit_ptrace(father, dead);
/* Can drop and reacquire tasklist_lock */ /* Can drop and reacquire tasklist_lock */
reaper = find_child_reaper(father); reaper = find_child_reaper(father, dead);
if (list_empty(&father->children)) if (list_empty(&father->children))
return; return;
......
...@@ -124,6 +124,7 @@ ...@@ -124,6 +124,7 @@
* sampling of the aggregate task states would be. * sampling of the aggregate task states would be.
*/ */
#include "../workqueue_internal.h"
#include <linux/sched/loadavg.h> #include <linux/sched/loadavg.h>
#include <linux/seq_file.h> #include <linux/seq_file.h>
#include <linux/proc_fs.h> #include <linux/proc_fs.h>
...@@ -480,9 +481,6 @@ static void psi_group_change(struct psi_group *group, int cpu, ...@@ -480,9 +481,6 @@ static void psi_group_change(struct psi_group *group, int cpu,
groupc->tasks[t]++; groupc->tasks[t]++;
write_seqcount_end(&groupc->seq); write_seqcount_end(&groupc->seq);
if (!delayed_work_pending(&group->clock_work))
schedule_delayed_work(&group->clock_work, PSI_FREQ);
} }
static struct psi_group *iterate_groups(struct task_struct *task, void **iter) static struct psi_group *iterate_groups(struct task_struct *task, void **iter)
...@@ -513,6 +511,7 @@ void psi_task_change(struct task_struct *task, int clear, int set) ...@@ -513,6 +511,7 @@ void psi_task_change(struct task_struct *task, int clear, int set)
{ {
int cpu = task_cpu(task); int cpu = task_cpu(task);
struct psi_group *group; struct psi_group *group;
bool wake_clock = true;
void *iter = NULL; void *iter = NULL;
if (!task->pid) if (!task->pid)
...@@ -530,8 +529,22 @@ void psi_task_change(struct task_struct *task, int clear, int set) ...@@ -530,8 +529,22 @@ void psi_task_change(struct task_struct *task, int clear, int set)
task->psi_flags &= ~clear; task->psi_flags &= ~clear;
task->psi_flags |= set; task->psi_flags |= set;
while ((group = iterate_groups(task, &iter))) /*
* Periodic aggregation shuts off if there is a period of no
* task changes, so we wake it back up if necessary. However,
* don't do this if the task change is the aggregation worker
* itself going to sleep, or we'll ping-pong forever.
*/
if (unlikely((clear & TSK_RUNNING) &&
(task->flags & PF_WQ_WORKER) &&
wq_worker_last_func(task) == psi_update_work))
wake_clock = false;
while ((group = iterate_groups(task, &iter))) {
psi_group_change(group, cpu, clear, set); psi_group_change(group, cpu, clear, set);
if (wake_clock && !delayed_work_pending(&group->clock_work))
schedule_delayed_work(&group->clock_work, PSI_FREQ);
}
} }
void psi_memstall_tick(struct task_struct *task, int cpu) void psi_memstall_tick(struct task_struct *task, int cpu)
......
...@@ -909,6 +909,26 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task) ...@@ -909,6 +909,26 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task)
return to_wakeup ? to_wakeup->task : NULL; return to_wakeup ? to_wakeup->task : NULL;
} }
/**
* wq_worker_last_func - retrieve worker's last work function
*
* Determine the last function a worker executed. This is called from
* the scheduler to get a worker's last known identity.
*
* CONTEXT:
* spin_lock_irq(rq->lock)
*
* Return:
* The last work function %current executed as a worker, NULL if it
* hasn't executed any work yet.
*/
work_func_t wq_worker_last_func(struct task_struct *task)
{
struct worker *worker = kthread_data(task);
return worker->last_func;
}
/** /**
* worker_set_flags - set worker flags and adjust nr_running accordingly * worker_set_flags - set worker flags and adjust nr_running accordingly
* @worker: self * @worker: self
...@@ -2184,6 +2204,9 @@ __acquires(&pool->lock) ...@@ -2184,6 +2204,9 @@ __acquires(&pool->lock)
if (unlikely(cpu_intensive)) if (unlikely(cpu_intensive))
worker_clr_flags(worker, WORKER_CPU_INTENSIVE); worker_clr_flags(worker, WORKER_CPU_INTENSIVE);
/* tag the worker for identification in schedule() */
worker->last_func = worker->current_func;
/* we're done with it, release */ /* we're done with it, release */
hash_del(&worker->hentry); hash_del(&worker->hentry);
worker->current_work = NULL; worker->current_work = NULL;
......
...@@ -53,6 +53,9 @@ struct worker { ...@@ -53,6 +53,9 @@ struct worker {
/* used only by rescuers to point to the target workqueue */ /* used only by rescuers to point to the target workqueue */
struct workqueue_struct *rescue_wq; /* I: the workqueue to rescue */ struct workqueue_struct *rescue_wq; /* I: the workqueue to rescue */
/* used by the scheduler to determine a worker's last known identity */
work_func_t last_func;
}; };
/** /**
...@@ -67,9 +70,10 @@ static inline struct worker *current_wq_worker(void) ...@@ -67,9 +70,10 @@ static inline struct worker *current_wq_worker(void)
/* /*
* Scheduler hooks for concurrency managed workqueue. Only to be used from * Scheduler hooks for concurrency managed workqueue. Only to be used from
* sched/core.c and workqueue.c. * sched/ and workqueue.c.
*/ */
void wq_worker_waking_up(struct task_struct *task, int cpu); void wq_worker_waking_up(struct task_struct *task, int cpu);
struct task_struct *wq_worker_sleeping(struct task_struct *task); struct task_struct *wq_worker_sleeping(struct task_struct *task);
work_func_t wq_worker_last_func(struct task_struct *task);
#endif /* _KERNEL_WORKQUEUE_INTERNAL_H */ #endif /* _KERNEL_WORKQUEUE_INTERNAL_H */
...@@ -632,7 +632,7 @@ static void __kmod_config_free(struct test_config *config) ...@@ -632,7 +632,7 @@ static void __kmod_config_free(struct test_config *config)
config->test_driver = NULL; config->test_driver = NULL;
kfree_const(config->test_fs); kfree_const(config->test_fs);
config->test_driver = NULL; config->test_fs = NULL;
} }
static void kmod_config_free(struct kmod_test_device *test_dev) static void kmod_config_free(struct kmod_test_device *test_dev)
......
...@@ -4268,7 +4268,8 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -4268,7 +4268,8 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
break; break;
} }
if (ret & VM_FAULT_RETRY) { if (ret & VM_FAULT_RETRY) {
if (nonblocking) if (nonblocking &&
!(fault_flags & FAULT_FLAG_RETRY_NOWAIT))
*nonblocking = 0; *nonblocking = 0;
*nr_pages = 0; *nr_pages = 0;
/* /*
......
...@@ -5,6 +5,7 @@ UBSAN_SANITIZE_generic.o := n ...@@ -5,6 +5,7 @@ UBSAN_SANITIZE_generic.o := n
UBSAN_SANITIZE_tags.o := n UBSAN_SANITIZE_tags.o := n
KCOV_INSTRUMENT := n KCOV_INSTRUMENT := n
CFLAGS_REMOVE_common.o = -pg
CFLAGS_REMOVE_generic.o = -pg CFLAGS_REMOVE_generic.o = -pg
# Function splitter causes unnecessary splits in __asan_load1/__asan_store1 # Function splitter causes unnecessary splits in __asan_load1/__asan_store1
# see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63533 # see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63533
......
...@@ -372,7 +372,8 @@ static void kill_procs(struct list_head *to_kill, int forcekill, bool fail, ...@@ -372,7 +372,8 @@ static void kill_procs(struct list_head *to_kill, int forcekill, bool fail,
if (fail || tk->addr_valid == 0) { if (fail || tk->addr_valid == 0) {
pr_err("Memory failure: %#lx: forcibly killing %s:%d because of failure to unmap corrupted page\n", pr_err("Memory failure: %#lx: forcibly killing %s:%d because of failure to unmap corrupted page\n",
pfn, tk->tsk->comm, tk->tsk->pid); pfn, tk->tsk->comm, tk->tsk->pid);
force_sig(SIGKILL, tk->tsk); do_send_sig_info(SIGKILL, SEND_SIG_PRIV,
tk->tsk, PIDTYPE_PID);
} }
/* /*
......
...@@ -1233,7 +1233,8 @@ static bool is_pageblock_removable_nolock(struct page *page) ...@@ -1233,7 +1233,8 @@ static bool is_pageblock_removable_nolock(struct page *page)
bool is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages) bool is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages)
{ {
struct page *page = pfn_to_page(start_pfn); struct page *page = pfn_to_page(start_pfn);
struct page *end_page = page + nr_pages; unsigned long end_pfn = min(start_pfn + nr_pages, zone_end_pfn(page_zone(page)));
struct page *end_page = pfn_to_page(end_pfn);
/* Check the starting page of each pageblock within the range */ /* Check the starting page of each pageblock within the range */
for (; page < end_page; page = next_active_pageblock(page)) { for (; page < end_page; page = next_active_pageblock(page)) {
...@@ -1273,6 +1274,9 @@ int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn, ...@@ -1273,6 +1274,9 @@ int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn,
i++; i++;
if (i == MAX_ORDER_NR_PAGES || pfn + i >= end_pfn) if (i == MAX_ORDER_NR_PAGES || pfn + i >= end_pfn)
continue; continue;
/* Check if we got outside of the zone */
if (zone && !zone_spans_pfn(zone, pfn + i))
return 0;
page = pfn_to_page(pfn + i); page = pfn_to_page(pfn + i);
if (zone && page_zone(page) != zone) if (zone && page_zone(page) != zone)
return 0; return 0;
...@@ -1301,23 +1305,27 @@ int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn, ...@@ -1301,23 +1305,27 @@ int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn,
static unsigned long scan_movable_pages(unsigned long start, unsigned long end) static unsigned long scan_movable_pages(unsigned long start, unsigned long end)
{ {
unsigned long pfn; unsigned long pfn;
struct page *page;
for (pfn = start; pfn < end; pfn++) { for (pfn = start; pfn < end; pfn++) {
if (pfn_valid(pfn)) { struct page *page, *head;
page = pfn_to_page(pfn); unsigned long skip;
if (PageLRU(page))
return pfn; if (!pfn_valid(pfn))
if (__PageMovable(page)) continue;
return pfn; page = pfn_to_page(pfn);
if (PageHuge(page)) { if (PageLRU(page))
if (hugepage_migration_supported(page_hstate(page)) && return pfn;
page_huge_active(page)) if (__PageMovable(page))
return pfn; return pfn;
else
pfn = round_up(pfn + 1, if (!PageHuge(page))
1 << compound_order(page)) - 1; continue;
} head = compound_head(page);
} if (hugepage_migration_supported(page_hstate(head)) &&
page_huge_active(head))
return pfn;
skip = (1 << compound_order(head)) - (page - head);
pfn += skip - 1;
} }
return 0; return 0;
} }
...@@ -1344,7 +1352,6 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) ...@@ -1344,7 +1352,6 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
{ {
unsigned long pfn; unsigned long pfn;
struct page *page; struct page *page;
int not_managed = 0;
int ret = 0; int ret = 0;
LIST_HEAD(source); LIST_HEAD(source);
...@@ -1392,7 +1399,6 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) ...@@ -1392,7 +1399,6 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
else else
ret = isolate_movable_page(page, ISOLATE_UNEVICTABLE); ret = isolate_movable_page(page, ISOLATE_UNEVICTABLE);
if (!ret) { /* Success */ if (!ret) { /* Success */
put_page(page);
list_add_tail(&page->lru, &source); list_add_tail(&page->lru, &source);
if (!__PageMovable(page)) if (!__PageMovable(page))
inc_node_page_state(page, NR_ISOLATED_ANON + inc_node_page_state(page, NR_ISOLATED_ANON +
...@@ -1401,22 +1407,10 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) ...@@ -1401,22 +1407,10 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
} else { } else {
pr_warn("failed to isolate pfn %lx\n", pfn); pr_warn("failed to isolate pfn %lx\n", pfn);
dump_page(page, "isolation failed"); dump_page(page, "isolation failed");
put_page(page);
/* Because we don't have big zone->lock. we should
check this again here. */
if (page_count(page)) {
not_managed++;
ret = -EBUSY;
break;
}
} }
put_page(page);
} }
if (!list_empty(&source)) { if (!list_empty(&source)) {
if (not_managed) {
putback_movable_pages(&source);
goto out;
}
/* Allocate a new page from the nearest neighbor node */ /* Allocate a new page from the nearest neighbor node */
ret = migrate_pages(&source, new_node_page, NULL, 0, ret = migrate_pages(&source, new_node_page, NULL, 0,
MIGRATE_SYNC, MR_MEMORY_HOTPLUG); MIGRATE_SYNC, MR_MEMORY_HOTPLUG);
...@@ -1429,7 +1423,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) ...@@ -1429,7 +1423,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
putback_movable_pages(&source); putback_movable_pages(&source);
} }
} }
out:
return ret; return ret;
} }
...@@ -1576,7 +1570,6 @@ static int __ref __offline_pages(unsigned long start_pfn, ...@@ -1576,7 +1570,6 @@ static int __ref __offline_pages(unsigned long start_pfn,
we assume this for now. .*/ we assume this for now. .*/
if (!test_pages_in_a_zone(start_pfn, end_pfn, &valid_start, if (!test_pages_in_a_zone(start_pfn, end_pfn, &valid_start,
&valid_end)) { &valid_end)) {
mem_hotplug_done();
ret = -EINVAL; ret = -EINVAL;
reason = "multizone range"; reason = "multizone range";
goto failed_removal; goto failed_removal;
...@@ -1591,7 +1584,6 @@ static int __ref __offline_pages(unsigned long start_pfn, ...@@ -1591,7 +1584,6 @@ static int __ref __offline_pages(unsigned long start_pfn,
MIGRATE_MOVABLE, MIGRATE_MOVABLE,
SKIP_HWPOISON | REPORT_FAILURE); SKIP_HWPOISON | REPORT_FAILURE);
if (ret) { if (ret) {
mem_hotplug_done();
reason = "failure to isolate range"; reason = "failure to isolate range";
goto failed_removal; goto failed_removal;
} }
......
...@@ -709,7 +709,6 @@ static bool buffer_migrate_lock_buffers(struct buffer_head *head, ...@@ -709,7 +709,6 @@ static bool buffer_migrate_lock_buffers(struct buffer_head *head,
/* Simple case, sync compaction */ /* Simple case, sync compaction */
if (mode != MIGRATE_ASYNC) { if (mode != MIGRATE_ASYNC) {
do { do {
get_bh(bh);
lock_buffer(bh); lock_buffer(bh);
bh = bh->b_this_page; bh = bh->b_this_page;
...@@ -720,18 +719,15 @@ static bool buffer_migrate_lock_buffers(struct buffer_head *head, ...@@ -720,18 +719,15 @@ static bool buffer_migrate_lock_buffers(struct buffer_head *head,
/* async case, we cannot block on lock_buffer so use trylock_buffer */ /* async case, we cannot block on lock_buffer so use trylock_buffer */
do { do {
get_bh(bh);
if (!trylock_buffer(bh)) { if (!trylock_buffer(bh)) {
/* /*
* We failed to lock the buffer and cannot stall in * We failed to lock the buffer and cannot stall in
* async migration. Release the taken locks * async migration. Release the taken locks
*/ */
struct buffer_head *failed_bh = bh; struct buffer_head *failed_bh = bh;
put_bh(failed_bh);
bh = head; bh = head;
while (bh != failed_bh) { while (bh != failed_bh) {
unlock_buffer(bh); unlock_buffer(bh);
put_bh(bh);
bh = bh->b_this_page; bh = bh->b_this_page;
} }
return false; return false;
...@@ -818,7 +814,6 @@ static int __buffer_migrate_page(struct address_space *mapping, ...@@ -818,7 +814,6 @@ static int __buffer_migrate_page(struct address_space *mapping,
bh = head; bh = head;
do { do {
unlock_buffer(bh); unlock_buffer(bh);
put_bh(bh);
bh = bh->b_this_page; bh = bh->b_this_page;
} while (bh != head); } while (bh != head);
...@@ -1135,10 +1130,13 @@ static int __unmap_and_move(struct page *page, struct page *newpage, ...@@ -1135,10 +1130,13 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
* If migration is successful, decrease refcount of the newpage * If migration is successful, decrease refcount of the newpage
* which will not free the page because new page owner increased * which will not free the page because new page owner increased
* refcounter. As well, if it is LRU page, add the page to LRU * refcounter. As well, if it is LRU page, add the page to LRU
* list in here. * list in here. Use the old state of the isolated source page to
* determine if we migrated a LRU page. newpage was already unlocked
* and possibly modified by its owner - don't rely on the page
* state.
*/ */
if (rc == MIGRATEPAGE_SUCCESS) { if (rc == MIGRATEPAGE_SUCCESS) {
if (unlikely(__PageMovable(newpage))) if (unlikely(!is_lru))
put_page(newpage); put_page(newpage);
else else
putback_lru_page(newpage); putback_lru_page(newpage);
......
...@@ -647,8 +647,8 @@ static int oom_reaper(void *unused) ...@@ -647,8 +647,8 @@ static int oom_reaper(void *unused)
static void wake_oom_reaper(struct task_struct *tsk) static void wake_oom_reaper(struct task_struct *tsk)
{ {
/* tsk is already queued? */ /* mm is already queued? */
if (tsk == oom_reaper_list || tsk->oom_reaper_list) if (test_and_set_bit(MMF_OOM_REAP_QUEUED, &tsk->signal->oom_mm->flags))
return; return;
get_task_struct(tsk); get_task_struct(tsk);
...@@ -975,6 +975,13 @@ static void oom_kill_process(struct oom_control *oc, const char *message) ...@@ -975,6 +975,13 @@ static void oom_kill_process(struct oom_control *oc, const char *message)
* still freeing memory. * still freeing memory.
*/ */
read_lock(&tasklist_lock); read_lock(&tasklist_lock);
/*
* The task 'p' might have already exited before reaching here. The
* put_task_struct() will free task_struct 'p' while the loop still try
* to access the field of 'p', so, get an extra reference.
*/
get_task_struct(p);
for_each_thread(p, t) { for_each_thread(p, t) {
list_for_each_entry(child, &t->children, sibling) { list_for_each_entry(child, &t->children, sibling) {
unsigned int child_points; unsigned int child_points;
...@@ -994,6 +1001,7 @@ static void oom_kill_process(struct oom_control *oc, const char *message) ...@@ -994,6 +1001,7 @@ static void oom_kill_process(struct oom_control *oc, const char *message)
} }
} }
} }
put_task_struct(p);
read_unlock(&tasklist_lock); read_unlock(&tasklist_lock);
/* /*
......
...@@ -10,4 +10,5 @@ ...@@ -10,4 +10,5 @@
/proc-uptime-002 /proc-uptime-002
/read /read
/self /self
/setns-dcache
/thread-self /thread-self
...@@ -14,6 +14,7 @@ TEST_GEN_PROGS += proc-uptime-001 ...@@ -14,6 +14,7 @@ TEST_GEN_PROGS += proc-uptime-001
TEST_GEN_PROGS += proc-uptime-002 TEST_GEN_PROGS += proc-uptime-002
TEST_GEN_PROGS += read TEST_GEN_PROGS += read
TEST_GEN_PROGS += self TEST_GEN_PROGS += self
TEST_GEN_PROGS += setns-dcache
TEST_GEN_PROGS += thread-self TEST_GEN_PROGS += thread-self
include ../lib.mk include ../lib.mk
/*
* Copyright © 2019 Alexey Dobriyan <adobriyan@gmail.com>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
/*
* Test that setns(CLONE_NEWNET) points to new /proc/net content even
* if old one is in dcache.
*
* FIXME /proc/net/unix is under CONFIG_UNIX which can be disabled.
*/
#undef NDEBUG
#include <assert.h>
#include <errno.h>
#include <sched.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/socket.h>
static pid_t pid = -1;
static void f(void)
{
if (pid > 0) {
kill(pid, SIGTERM);
}
}
int main(void)
{
int fd[2];
char _ = 0;
int nsfd;
atexit(f);
/* Check for priviledges and syscall availability straight away. */
if (unshare(CLONE_NEWNET) == -1) {
if (errno == ENOSYS || errno == EPERM) {
return 4;
}
return 1;
}
/* Distinguisher between two otherwise empty net namespaces. */
if (socket(AF_UNIX, SOCK_STREAM, 0) == -1) {
return 1;
}
if (pipe(fd) == -1) {
return 1;
}
pid = fork();
if (pid == -1) {
return 1;
}
if (pid == 0) {
if (unshare(CLONE_NEWNET) == -1) {
return 1;
}
if (write(fd[1], &_, 1) != 1) {
return 1;
}
pause();
return 0;
}
if (read(fd[0], &_, 1) != 1) {
return 1;
}
{
char buf[64];
snprintf(buf, sizeof(buf), "/proc/%u/ns/net", pid);
nsfd = open(buf, O_RDONLY);
if (nsfd == -1) {
return 1;
}
}
/* Reliably pin dentry into dcache. */
(void)open("/proc/net/unix", O_RDONLY);
if (setns(nsfd, CLONE_NEWNET) == -1) {
return 1;
}
kill(pid, SIGTERM);
pid = 0;
{
char buf[4096];
ssize_t rv;
int fd;
fd = open("/proc/net/unix", O_RDONLY);
if (fd == -1) {
return 1;
}
#define S "Num RefCount Protocol Flags Type St Inode Path\n"
rv = read(fd, buf, sizeof(buf));
assert(rv == strlen(S));
assert(memcmp(buf, S, strlen(S)) == 0);
}
return 0;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment