Commit 32cdba1e authored by Oleg Nesterov's avatar Oleg Nesterov

uprobes: Use percpu_rw_semaphore to fix register/unregister vs dup_mmap() race

This was always racy, but 26872090
"uprobes: Rework register_for_each_vma() to make it O(n)" should be
blamed anyway, it made everything worse and I didn't notice.

register/unregister call build_map_info() and then do install/remove
breakpoint for every mm which mmaps inode/offset. This can obviously
race with fork()->dup_mmap() in between and we can miss the child.

uprobe_register() could be easily fixed but unregister is much worse,
the new mm inherits "int3" from parent and there is no way to detect
this if uprobe goes away.

So this patch simply adds percpu_down_read/up_read around dup_mmap(),
and percpu_down_write/up_write into register_for_each_vma().

This adds 2 new hooks into dup_mmap() but we can kill uprobe_dup_mmap()
and fold it into uprobe_end_dup_mmap().
Reported-by: default avatarSrikar Dronamraju <srikar@linux.vnet.ibm.com>
Acked-by: default avatarSrikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: default avatarOleg Nesterov <oleg@redhat.com>
parent 65b6ecc0
...@@ -97,6 +97,8 @@ extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_con ...@@ -97,6 +97,8 @@ extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_con
extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
extern int uprobe_mmap(struct vm_area_struct *vma); extern int uprobe_mmap(struct vm_area_struct *vma);
extern void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end); extern void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end);
extern void uprobe_start_dup_mmap(void);
extern void uprobe_end_dup_mmap(void);
extern void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm); extern void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm);
extern void uprobe_free_utask(struct task_struct *t); extern void uprobe_free_utask(struct task_struct *t);
extern void uprobe_copy_process(struct task_struct *t); extern void uprobe_copy_process(struct task_struct *t);
...@@ -127,6 +129,12 @@ static inline void ...@@ -127,6 +129,12 @@ static inline void
uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end) uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end)
{ {
} }
static inline void uprobe_start_dup_mmap(void)
{
}
static inline void uprobe_end_dup_mmap(void)
{
}
static inline void static inline void
uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm) uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm)
{ {
......
...@@ -33,6 +33,7 @@ ...@@ -33,6 +33,7 @@
#include <linux/ptrace.h> /* user_enable_single_step */ #include <linux/ptrace.h> /* user_enable_single_step */
#include <linux/kdebug.h> /* notifier mechanism */ #include <linux/kdebug.h> /* notifier mechanism */
#include "../../mm/internal.h" /* munlock_vma_page */ #include "../../mm/internal.h" /* munlock_vma_page */
#include <linux/percpu-rwsem.h>
#include <linux/uprobes.h> #include <linux/uprobes.h>
...@@ -71,6 +72,8 @@ static struct mutex uprobes_mutex[UPROBES_HASH_SZ]; ...@@ -71,6 +72,8 @@ static struct mutex uprobes_mutex[UPROBES_HASH_SZ];
static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ]; static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ];
#define uprobes_mmap_hash(v) (&uprobes_mmap_mutex[((unsigned long)(v)) % UPROBES_HASH_SZ]) #define uprobes_mmap_hash(v) (&uprobes_mmap_mutex[((unsigned long)(v)) % UPROBES_HASH_SZ])
static struct percpu_rw_semaphore dup_mmap_sem;
/* /*
* uprobe_events allows us to skip the uprobe_mmap if there are no uprobe * uprobe_events allows us to skip the uprobe_mmap if there are no uprobe
* events active at this time. Probably a fine grained per inode count is * events active at this time. Probably a fine grained per inode count is
...@@ -766,10 +769,13 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register) ...@@ -766,10 +769,13 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register)
struct map_info *info; struct map_info *info;
int err = 0; int err = 0;
percpu_down_write(&dup_mmap_sem);
info = build_map_info(uprobe->inode->i_mapping, info = build_map_info(uprobe->inode->i_mapping,
uprobe->offset, is_register); uprobe->offset, is_register);
if (IS_ERR(info)) if (IS_ERR(info)) {
return PTR_ERR(info); err = PTR_ERR(info);
goto out;
}
while (info) { while (info) {
struct mm_struct *mm = info->mm; struct mm_struct *mm = info->mm;
...@@ -799,7 +805,8 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register) ...@@ -799,7 +805,8 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register)
mmput(mm); mmput(mm);
info = free_map_info(info); info = free_map_info(info);
} }
out:
percpu_up_write(&dup_mmap_sem);
return err; return err;
} }
...@@ -1131,6 +1138,16 @@ void uprobe_clear_state(struct mm_struct *mm) ...@@ -1131,6 +1138,16 @@ void uprobe_clear_state(struct mm_struct *mm)
kfree(area); kfree(area);
} }
void uprobe_start_dup_mmap(void)
{
percpu_down_read(&dup_mmap_sem);
}
void uprobe_end_dup_mmap(void)
{
percpu_up_read(&dup_mmap_sem);
}
void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm) void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm)
{ {
newmm->uprobes_state.xol_area = NULL; newmm->uprobes_state.xol_area = NULL;
...@@ -1597,6 +1614,9 @@ static int __init init_uprobes(void) ...@@ -1597,6 +1614,9 @@ static int __init init_uprobes(void)
mutex_init(&uprobes_mmap_mutex[i]); mutex_init(&uprobes_mmap_mutex[i]);
} }
if (percpu_init_rwsem(&dup_mmap_sem))
return -ENOMEM;
return register_die_notifier(&uprobe_exception_nb); return register_die_notifier(&uprobe_exception_nb);
} }
module_init(init_uprobes); module_init(init_uprobes);
......
...@@ -352,6 +352,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) ...@@ -352,6 +352,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
unsigned long charge; unsigned long charge;
struct mempolicy *pol; struct mempolicy *pol;
uprobe_start_dup_mmap();
down_write(&oldmm->mmap_sem); down_write(&oldmm->mmap_sem);
flush_cache_dup_mm(oldmm); flush_cache_dup_mm(oldmm);
uprobe_dup_mmap(oldmm, mm); uprobe_dup_mmap(oldmm, mm);
...@@ -469,6 +470,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) ...@@ -469,6 +470,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
up_write(&mm->mmap_sem); up_write(&mm->mmap_sem);
flush_tlb_mm(oldmm); flush_tlb_mm(oldmm);
up_write(&oldmm->mmap_sem); up_write(&oldmm->mmap_sem);
uprobe_end_dup_mmap();
return retval; return retval;
fail_nomem_anon_vma_fork: fail_nomem_anon_vma_fork:
mpol_put(pol); mpol_put(pol);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment