Commit ac9721f3 authored by Peter Zijlstra's avatar Peter Zijlstra Committed by Ingo Molnar

perf_events: Fix races and clean up perf_event and perf_mmap_data interaction

In order to move toward separate buffer objects, rework the whole
perf_mmap_data construct to be a more self-sufficient entity, one
with its own lifetime rules.

This greatly sanitizes the whole output redirection code, which
was riddled with bugs and races.
Signed-off-by: default avatarPeter Zijlstra <a.p.zijlstra@chello.nl>
Cc: <stable@kernel.org>
LKML-Reference: <new-submission>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent 67a3e12b
...@@ -585,6 +585,7 @@ enum perf_event_active_state { ...@@ -585,6 +585,7 @@ enum perf_event_active_state {
struct file; struct file;
struct perf_mmap_data { struct perf_mmap_data {
atomic_t refcount;
struct rcu_head rcu_head; struct rcu_head rcu_head;
#ifdef CONFIG_PERF_USE_VMALLOC #ifdef CONFIG_PERF_USE_VMALLOC
struct work_struct work; struct work_struct work;
...@@ -592,7 +593,6 @@ struct perf_mmap_data { ...@@ -592,7 +593,6 @@ struct perf_mmap_data {
#endif #endif
int nr_pages; /* nr of data pages */ int nr_pages; /* nr of data pages */
int writable; /* are we writable */ int writable; /* are we writable */
int nr_locked; /* nr pages mlocked */
atomic_t poll; /* POLL_ for wakeups */ atomic_t poll; /* POLL_ for wakeups */
...@@ -643,7 +643,6 @@ struct perf_event { ...@@ -643,7 +643,6 @@ struct perf_event {
int nr_siblings; int nr_siblings;
int group_flags; int group_flags;
struct perf_event *group_leader; struct perf_event *group_leader;
struct perf_event *output;
const struct pmu *pmu; const struct pmu *pmu;
enum perf_event_active_state state; enum perf_event_active_state state;
...@@ -704,6 +703,8 @@ struct perf_event { ...@@ -704,6 +703,8 @@ struct perf_event {
/* mmap bits */ /* mmap bits */
struct mutex mmap_mutex; struct mutex mmap_mutex;
atomic_t mmap_count; atomic_t mmap_count;
int mmap_locked;
struct user_struct *mmap_user;
struct perf_mmap_data *data; struct perf_mmap_data *data;
/* poll related */ /* poll related */
......
...@@ -1841,6 +1841,7 @@ static void free_event_rcu(struct rcu_head *head) ...@@ -1841,6 +1841,7 @@ static void free_event_rcu(struct rcu_head *head)
} }
static void perf_pending_sync(struct perf_event *event); static void perf_pending_sync(struct perf_event *event);
static void perf_mmap_data_put(struct perf_mmap_data *data);
static void free_event(struct perf_event *event) static void free_event(struct perf_event *event)
{ {
...@@ -1856,9 +1857,9 @@ static void free_event(struct perf_event *event) ...@@ -1856,9 +1857,9 @@ static void free_event(struct perf_event *event)
atomic_dec(&nr_task_events); atomic_dec(&nr_task_events);
} }
if (event->output) { if (event->data) {
fput(event->output->filp); perf_mmap_data_put(event->data);
event->output = NULL; event->data = NULL;
} }
if (event->destroy) if (event->destroy)
...@@ -2175,7 +2176,27 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg) ...@@ -2175,7 +2176,27 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg)
return ret; return ret;
} }
static int perf_event_set_output(struct perf_event *event, int output_fd); static const struct file_operations perf_fops;
static struct perf_event *perf_fget_light(int fd, int *fput_needed)
{
struct file *file;
file = fget_light(fd, fput_needed);
if (!file)
return ERR_PTR(-EBADF);
if (file->f_op != &perf_fops) {
fput_light(file, *fput_needed);
*fput_needed = 0;
return ERR_PTR(-EBADF);
}
return file->private_data;
}
static int perf_event_set_output(struct perf_event *event,
struct perf_event *output_event);
static int perf_event_set_filter(struct perf_event *event, void __user *arg); static int perf_event_set_filter(struct perf_event *event, void __user *arg);
static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
...@@ -2202,7 +2223,23 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) ...@@ -2202,7 +2223,23 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
return perf_event_period(event, (u64 __user *)arg); return perf_event_period(event, (u64 __user *)arg);
case PERF_EVENT_IOC_SET_OUTPUT: case PERF_EVENT_IOC_SET_OUTPUT:
return perf_event_set_output(event, arg); {
struct perf_event *output_event = NULL;
int fput_needed = 0;
int ret;
if (arg != -1) {
output_event = perf_fget_light(arg, &fput_needed);
if (IS_ERR(output_event))
return PTR_ERR(output_event);
}
ret = perf_event_set_output(event, output_event);
if (output_event)
fput_light(output_event->filp, fput_needed);
return ret;
}
case PERF_EVENT_IOC_SET_FILTER: case PERF_EVENT_IOC_SET_FILTER:
return perf_event_set_filter(event, (void __user *)arg); return perf_event_set_filter(event, (void __user *)arg);
...@@ -2335,8 +2372,6 @@ perf_mmap_data_alloc(struct perf_event *event, int nr_pages) ...@@ -2335,8 +2372,6 @@ perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
unsigned long size; unsigned long size;
int i; int i;
WARN_ON(atomic_read(&event->mmap_count));
size = sizeof(struct perf_mmap_data); size = sizeof(struct perf_mmap_data);
size += nr_pages * sizeof(void *); size += nr_pages * sizeof(void *);
...@@ -2452,8 +2487,6 @@ perf_mmap_data_alloc(struct perf_event *event, int nr_pages) ...@@ -2452,8 +2487,6 @@ perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
unsigned long size; unsigned long size;
void *all_buf; void *all_buf;
WARN_ON(atomic_read(&event->mmap_count));
size = sizeof(struct perf_mmap_data); size = sizeof(struct perf_mmap_data);
size += sizeof(void *); size += sizeof(void *);
...@@ -2536,7 +2569,7 @@ perf_mmap_data_init(struct perf_event *event, struct perf_mmap_data *data) ...@@ -2536,7 +2569,7 @@ perf_mmap_data_init(struct perf_event *event, struct perf_mmap_data *data)
if (!data->watermark) if (!data->watermark)
data->watermark = max_size / 2; data->watermark = max_size / 2;
atomic_set(&data->refcount, 1);
rcu_assign_pointer(event->data, data); rcu_assign_pointer(event->data, data);
} }
...@@ -2548,13 +2581,26 @@ static void perf_mmap_data_free_rcu(struct rcu_head *rcu_head) ...@@ -2548,13 +2581,26 @@ static void perf_mmap_data_free_rcu(struct rcu_head *rcu_head)
perf_mmap_data_free(data); perf_mmap_data_free(data);
} }
static void perf_mmap_data_release(struct perf_event *event) static struct perf_mmap_data *perf_mmap_data_get(struct perf_event *event)
{ {
struct perf_mmap_data *data = event->data; struct perf_mmap_data *data;
rcu_read_lock();
data = rcu_dereference(event->data);
if (data) {
if (!atomic_inc_not_zero(&data->refcount))
data = NULL;
}
rcu_read_unlock();
return data;
}
WARN_ON(atomic_read(&event->mmap_count)); static void perf_mmap_data_put(struct perf_mmap_data *data)
{
if (!atomic_dec_and_test(&data->refcount))
return;
rcu_assign_pointer(event->data, NULL);
call_rcu(&data->rcu_head, perf_mmap_data_free_rcu); call_rcu(&data->rcu_head, perf_mmap_data_free_rcu);
} }
...@@ -2569,15 +2615,18 @@ static void perf_mmap_close(struct vm_area_struct *vma) ...@@ -2569,15 +2615,18 @@ static void perf_mmap_close(struct vm_area_struct *vma)
{ {
struct perf_event *event = vma->vm_file->private_data; struct perf_event *event = vma->vm_file->private_data;
WARN_ON_ONCE(event->ctx->parent_ctx);
if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) { if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) {
unsigned long size = perf_data_size(event->data); unsigned long size = perf_data_size(event->data);
struct user_struct *user = current_user(); struct user_struct *user = event->mmap_user;
struct perf_mmap_data *data = event->data;
atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm); atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm);
vma->vm_mm->locked_vm -= event->data->nr_locked; vma->vm_mm->locked_vm -= event->mmap_locked;
perf_mmap_data_release(event); rcu_assign_pointer(event->data, NULL);
mutex_unlock(&event->mmap_mutex); mutex_unlock(&event->mmap_mutex);
perf_mmap_data_put(data);
free_uid(user);
} }
} }
...@@ -2629,13 +2678,10 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) ...@@ -2629,13 +2678,10 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
WARN_ON_ONCE(event->ctx->parent_ctx); WARN_ON_ONCE(event->ctx->parent_ctx);
mutex_lock(&event->mmap_mutex); mutex_lock(&event->mmap_mutex);
if (event->output) { if (event->data) {
ret = -EINVAL; if (event->data->nr_pages == nr_pages)
goto unlock; atomic_inc(&event->data->refcount);
} else
if (atomic_inc_not_zero(&event->mmap_count)) {
if (nr_pages != event->data->nr_pages)
ret = -EINVAL; ret = -EINVAL;
goto unlock; goto unlock;
} }
...@@ -2667,21 +2713,23 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) ...@@ -2667,21 +2713,23 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
WARN_ON(event->data); WARN_ON(event->data);
data = perf_mmap_data_alloc(event, nr_pages); data = perf_mmap_data_alloc(event, nr_pages);
if (!data) {
ret = -ENOMEM; ret = -ENOMEM;
if (!data)
goto unlock; goto unlock;
}
ret = 0;
perf_mmap_data_init(event, data); perf_mmap_data_init(event, data);
atomic_set(&event->mmap_count, 1);
atomic_long_add(user_extra, &user->locked_vm);
vma->vm_mm->locked_vm += extra;
event->data->nr_locked = extra;
if (vma->vm_flags & VM_WRITE) if (vma->vm_flags & VM_WRITE)
event->data->writable = 1; event->data->writable = 1;
atomic_long_add(user_extra, &user->locked_vm);
event->mmap_locked = extra;
event->mmap_user = get_current_user();
vma->vm_mm->locked_vm += event->mmap_locked;
unlock: unlock:
if (!ret)
atomic_inc(&event->mmap_count);
mutex_unlock(&event->mmap_mutex); mutex_unlock(&event->mmap_mutex);
vma->vm_flags |= VM_RESERVED; vma->vm_flags |= VM_RESERVED;
...@@ -2993,7 +3041,6 @@ int perf_output_begin(struct perf_output_handle *handle, ...@@ -2993,7 +3041,6 @@ int perf_output_begin(struct perf_output_handle *handle,
struct perf_event *event, unsigned int size, struct perf_event *event, unsigned int size,
int nmi, int sample) int nmi, int sample)
{ {
struct perf_event *output_event;
struct perf_mmap_data *data; struct perf_mmap_data *data;
unsigned long tail, offset, head; unsigned long tail, offset, head;
int have_lost; int have_lost;
...@@ -3010,10 +3057,6 @@ int perf_output_begin(struct perf_output_handle *handle, ...@@ -3010,10 +3057,6 @@ int perf_output_begin(struct perf_output_handle *handle,
if (event->parent) if (event->parent)
event = event->parent; event = event->parent;
output_event = rcu_dereference(event->output);
if (output_event)
event = output_event;
data = rcu_dereference(event->data); data = rcu_dereference(event->data);
if (!data) if (!data)
goto out; goto out;
...@@ -4912,39 +4955,17 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr, ...@@ -4912,39 +4955,17 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
goto out; goto out;
} }
static int perf_event_set_output(struct perf_event *event, int output_fd) static int
perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
{ {
struct perf_event *output_event = NULL; struct perf_mmap_data *data = NULL, *old_data = NULL;
struct file *output_file = NULL;
struct perf_event *old_output;
int fput_needed = 0;
int ret = -EINVAL; int ret = -EINVAL;
/* if (!output_event)
* Don't allow output of inherited per-task events. This would
* create performance issues due to cross cpu access.
*/
if (event->cpu == -1 && event->attr.inherit)
return -EINVAL;
if (!output_fd)
goto set; goto set;
output_file = fget_light(output_fd, &fput_needed); /* don't allow circular references */
if (!output_file) if (event == output_event)
return -EBADF;
if (output_file->f_op != &perf_fops)
goto out;
output_event = output_file->private_data;
/* Don't chain output fds */
if (output_event->output)
goto out;
/* Don't set an output fd when we already have an output channel */
if (event->data)
goto out; goto out;
/* /*
...@@ -4959,26 +4980,28 @@ static int perf_event_set_output(struct perf_event *event, int output_fd) ...@@ -4959,26 +4980,28 @@ static int perf_event_set_output(struct perf_event *event, int output_fd)
if (output_event->cpu == -1 && output_event->ctx != event->ctx) if (output_event->cpu == -1 && output_event->ctx != event->ctx)
goto out; goto out;
atomic_long_inc(&output_file->f_count);
set: set:
mutex_lock(&event->mmap_mutex); mutex_lock(&event->mmap_mutex);
old_output = event->output; /* Can't redirect output if we've got an active mmap() */
rcu_assign_pointer(event->output, output_event); if (atomic_read(&event->mmap_count))
mutex_unlock(&event->mmap_mutex); goto unlock;
if (old_output) { if (output_event) {
/* /* get the buffer we want to redirect to */
* we need to make sure no existing perf_output_*() data = perf_mmap_data_get(output_event);
* is still referencing this event. if (!data)
*/ goto unlock;
synchronize_rcu();
fput(old_output->filp);
} }
old_data = event->data;
rcu_assign_pointer(event->data, data);
ret = 0; ret = 0;
unlock:
mutex_unlock(&event->mmap_mutex);
if (old_data)
perf_mmap_data_put(old_data);
out: out:
fput_light(output_file, fput_needed);
return ret; return ret;
} }
...@@ -4994,7 +5017,7 @@ SYSCALL_DEFINE5(perf_event_open, ...@@ -4994,7 +5017,7 @@ SYSCALL_DEFINE5(perf_event_open,
struct perf_event_attr __user *, attr_uptr, struct perf_event_attr __user *, attr_uptr,
pid_t, pid, int, cpu, int, group_fd, unsigned long, flags) pid_t, pid, int, cpu, int, group_fd, unsigned long, flags)
{ {
struct perf_event *event, *group_leader; struct perf_event *event, *group_leader = NULL, *output_event = NULL;
struct perf_event_attr attr; struct perf_event_attr attr;
struct perf_event_context *ctx; struct perf_event_context *ctx;
struct file *event_file = NULL; struct file *event_file = NULL;
...@@ -5034,19 +5057,25 @@ SYSCALL_DEFINE5(perf_event_open, ...@@ -5034,19 +5057,25 @@ SYSCALL_DEFINE5(perf_event_open,
goto err_fd; goto err_fd;
} }
if (group_fd != -1) {
group_leader = perf_fget_light(group_fd, &fput_needed);
if (IS_ERR(group_leader)) {
err = PTR_ERR(group_leader);
goto err_put_context;
}
group_file = group_leader->filp;
if (flags & PERF_FLAG_FD_OUTPUT)
output_event = group_leader;
if (flags & PERF_FLAG_FD_NO_GROUP)
group_leader = NULL;
}
/* /*
* Look up the group leader (we will attach this event to it): * Look up the group leader (we will attach this event to it):
*/ */
group_leader = NULL; if (group_leader) {
if (group_fd != -1 && !(flags & PERF_FLAG_FD_NO_GROUP)) {
err = -EINVAL; err = -EINVAL;
group_file = fget_light(group_fd, &fput_needed);
if (!group_file)
goto err_put_context;
if (group_file->f_op != &perf_fops)
goto err_put_context;
group_leader = group_file->private_data;
/* /*
* Do not allow a recursive hierarchy (this new sibling * Do not allow a recursive hierarchy (this new sibling
* becoming part of another group-sibling): * becoming part of another group-sibling):
...@@ -5068,9 +5097,16 @@ SYSCALL_DEFINE5(perf_event_open, ...@@ -5068,9 +5097,16 @@ SYSCALL_DEFINE5(perf_event_open,
event = perf_event_alloc(&attr, cpu, ctx, group_leader, event = perf_event_alloc(&attr, cpu, ctx, group_leader,
NULL, NULL, GFP_KERNEL); NULL, NULL, GFP_KERNEL);
if (IS_ERR(event)) {
err = PTR_ERR(event); err = PTR_ERR(event);
if (IS_ERR(event))
goto err_put_context; goto err_put_context;
}
if (output_event) {
err = perf_event_set_output(event, output_event);
if (err)
goto err_free_put_context;
}
event_file = anon_inode_getfile("[perf_event]", &perf_fops, event, O_RDWR); event_file = anon_inode_getfile("[perf_event]", &perf_fops, event, O_RDWR);
if (IS_ERR(event_file)) { if (IS_ERR(event_file)) {
...@@ -5078,12 +5114,6 @@ SYSCALL_DEFINE5(perf_event_open, ...@@ -5078,12 +5114,6 @@ SYSCALL_DEFINE5(perf_event_open,
goto err_free_put_context; goto err_free_put_context;
} }
if (flags & PERF_FLAG_FD_OUTPUT) {
err = perf_event_set_output(event, group_fd);
if (err)
goto err_fput_free_put_context;
}
event->filp = event_file; event->filp = event_file;
WARN_ON_ONCE(ctx->parent_ctx); WARN_ON_ONCE(ctx->parent_ctx);
mutex_lock(&ctx->mutex); mutex_lock(&ctx->mutex);
...@@ -5101,8 +5131,6 @@ SYSCALL_DEFINE5(perf_event_open, ...@@ -5101,8 +5131,6 @@ SYSCALL_DEFINE5(perf_event_open,
fd_install(event_fd, event_file); fd_install(event_fd, event_file);
return event_fd; return event_fd;
err_fput_free_put_context:
fput(event_file);
err_free_put_context: err_free_put_context:
free_event(event); free_event(event);
err_put_context: err_put_context:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment