Commit f4d34a87 authored by Steven Rostedt's avatar Steven Rostedt

tracing: Use pid bitmap instead of a pid array for set_event_pid

In order to add the ability to let tasks that are filtered by the events
have their children also be traced on fork (and then not traced on exit),
convert the array into a pid bitmask. Most of the time the number of pids is
only 32768 pids or a 4k bitmask, which is the same size as the default list
currently is, and that list could grow if more pids are listed.

This also greatly simplifies the code.
Suggested-by: default avatar"H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: default avatarSteven Rostedt <rostedt@goodmis.org>
parent 9ebc57cf
...@@ -177,9 +177,8 @@ struct trace_options { ...@@ -177,9 +177,8 @@ struct trace_options {
}; };
struct trace_pid_list { struct trace_pid_list {
unsigned int nr_pids; int pid_max;
int order; unsigned long *pids;
pid_t *pids;
}; };
/* /*
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
#include <linux/kthread.h> #include <linux/kthread.h>
#include <linux/tracefs.h> #include <linux/tracefs.h>
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include <linux/bsearch.h> #include <linux/vmalloc.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/ctype.h> #include <linux/ctype.h>
#include <linux/sort.h> #include <linux/sort.h>
...@@ -471,23 +471,13 @@ static void ftrace_clear_events(struct trace_array *tr) ...@@ -471,23 +471,13 @@ static void ftrace_clear_events(struct trace_array *tr)
mutex_unlock(&event_mutex); mutex_unlock(&event_mutex);
} }
static int cmp_pid(const void *key, const void *elt) /* Shouldn't this be in a header? */
{ extern int pid_max;
const pid_t *search_pid = key;
const pid_t *pid = elt;
if (*search_pid == *pid)
return 0;
if (*search_pid < *pid)
return -1;
return 1;
}
static bool static bool
ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task) ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
{ {
pid_t search_pid; pid_t pid;
pid_t *pid;
/* /*
* Return false, because if filtered_pids does not exist, * Return false, because if filtered_pids does not exist,
...@@ -496,15 +486,16 @@ ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task) ...@@ -496,15 +486,16 @@ ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
if (!filtered_pids) if (!filtered_pids)
return false; return false;
search_pid = task->pid; pid = task->pid;
pid = bsearch(&search_pid, filtered_pids->pids, /*
filtered_pids->nr_pids, sizeof(pid_t), * If pid_max changed after filtered_pids was created, we
cmp_pid); * by default ignore all pids greater than the previous pid_max.
if (!pid) */
if (task->pid >= filtered_pids->pid_max)
return true; return true;
return false; return !test_bit(task->pid, filtered_pids->pids);
} }
static void static void
...@@ -602,7 +593,7 @@ static void __ftrace_clear_event_pids(struct trace_array *tr) ...@@ -602,7 +593,7 @@ static void __ftrace_clear_event_pids(struct trace_array *tr)
/* Wait till all users are no longer using pid filtering */ /* Wait till all users are no longer using pid filtering */
synchronize_sched(); synchronize_sched();
free_pages((unsigned long)pid_list->pids, pid_list->order); vfree(pid_list->pids);
kfree(pid_list); kfree(pid_list);
} }
...@@ -946,11 +937,32 @@ static void t_stop(struct seq_file *m, void *p) ...@@ -946,11 +937,32 @@ static void t_stop(struct seq_file *m, void *p)
mutex_unlock(&event_mutex); mutex_unlock(&event_mutex);
} }
static void *
p_next(struct seq_file *m, void *v, loff_t *pos)
{
struct trace_array *tr = m->private;
struct trace_pid_list *pid_list = rcu_dereference_sched(tr->filtered_pids);
unsigned long pid = (unsigned long)v;
(*pos)++;
/* pid already is +1 of the actual prevous bit */
pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
/* Return pid + 1 to allow zero to be represented */
if (pid < pid_list->pid_max)
return (void *)(pid + 1);
return NULL;
}
static void *p_start(struct seq_file *m, loff_t *pos) static void *p_start(struct seq_file *m, loff_t *pos)
__acquires(RCU) __acquires(RCU)
{ {
struct trace_pid_list *pid_list; struct trace_pid_list *pid_list;
struct trace_array *tr = m->private; struct trace_array *tr = m->private;
unsigned long pid;
loff_t l = 0;
/* /*
* Grab the mutex, to keep calls to p_next() having the same * Grab the mutex, to keep calls to p_next() having the same
...@@ -963,10 +975,18 @@ static void *p_start(struct seq_file *m, loff_t *pos) ...@@ -963,10 +975,18 @@ static void *p_start(struct seq_file *m, loff_t *pos)
pid_list = rcu_dereference_sched(tr->filtered_pids); pid_list = rcu_dereference_sched(tr->filtered_pids);
if (!pid_list || *pos >= pid_list->nr_pids) if (!pid_list)
return NULL;
pid = find_first_bit(pid_list->pids, pid_list->pid_max);
if (pid >= pid_list->pid_max)
return NULL; return NULL;
return (void *)&pid_list->pids[*pos]; /* Return pid + 1 so that zero can be the exit value */
for (pid++; pid && l < *pos;
pid = (unsigned long)p_next(m, (void *)pid, &l))
;
return (void *)pid;
} }
static void p_stop(struct seq_file *m, void *p) static void p_stop(struct seq_file *m, void *p)
...@@ -976,25 +996,11 @@ static void p_stop(struct seq_file *m, void *p) ...@@ -976,25 +996,11 @@ static void p_stop(struct seq_file *m, void *p)
mutex_unlock(&event_mutex); mutex_unlock(&event_mutex);
} }
static void *
p_next(struct seq_file *m, void *v, loff_t *pos)
{
struct trace_array *tr = m->private;
struct trace_pid_list *pid_list = rcu_dereference_sched(tr->filtered_pids);
(*pos)++;
if (*pos >= pid_list->nr_pids)
return NULL;
return (void *)&pid_list->pids[*pos];
}
static int p_show(struct seq_file *m, void *v) static int p_show(struct seq_file *m, void *v)
{ {
pid_t *pid = v; unsigned long pid = (unsigned long)v - 1;
seq_printf(m, "%d\n", *pid); seq_printf(m, "%lu\n", pid);
return 0; return 0;
} }
...@@ -1543,11 +1549,6 @@ show_header(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) ...@@ -1543,11 +1549,6 @@ show_header(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
return r; return r;
} }
static int max_pids(struct trace_pid_list *pid_list)
{
return (PAGE_SIZE << pid_list->order) / sizeof(pid_t);
}
static void ignore_task_cpu(void *data) static void ignore_task_cpu(void *data)
{ {
struct trace_array *tr = data; struct trace_array *tr = data;
...@@ -1571,7 +1572,7 @@ ftrace_event_pid_write(struct file *filp, const char __user *ubuf, ...@@ -1571,7 +1572,7 @@ ftrace_event_pid_write(struct file *filp, const char __user *ubuf,
struct seq_file *m = filp->private_data; struct seq_file *m = filp->private_data;
struct trace_array *tr = m->private; struct trace_array *tr = m->private;
struct trace_pid_list *filtered_pids = NULL; struct trace_pid_list *filtered_pids = NULL;
struct trace_pid_list *pid_list = NULL; struct trace_pid_list *pid_list;
struct trace_event_file *file; struct trace_event_file *file;
struct trace_parser parser; struct trace_parser parser;
unsigned long val; unsigned long val;
...@@ -1579,7 +1580,7 @@ ftrace_event_pid_write(struct file *filp, const char __user *ubuf, ...@@ -1579,7 +1580,7 @@ ftrace_event_pid_write(struct file *filp, const char __user *ubuf,
ssize_t read = 0; ssize_t read = 0;
ssize_t ret = 0; ssize_t ret = 0;
pid_t pid; pid_t pid;
int i; int nr_pids = 0;
if (!cnt) if (!cnt)
return 0; return 0;
...@@ -1592,10 +1593,43 @@ ftrace_event_pid_write(struct file *filp, const char __user *ubuf, ...@@ -1592,10 +1593,43 @@ ftrace_event_pid_write(struct file *filp, const char __user *ubuf,
return -ENOMEM; return -ENOMEM;
mutex_lock(&event_mutex); mutex_lock(&event_mutex);
filtered_pids = rcu_dereference_protected(tr->filtered_pids,
lockdep_is_held(&event_mutex));
/* /*
* Load as many pids into the array before doing a * Always recreate a new array. The write is an all or nothing
* swap from the tr->filtered_pids to the new list. * operation. Always create a new array when adding new pids by
* the user. If the operation fails, then the current list is
* not modified.
*/ */
pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
if (!pid_list) {
read = -ENOMEM;
goto out;
}
pid_list->pid_max = READ_ONCE(pid_max);
/* Only truncating will shrink pid_max */
if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
pid_list->pid_max = filtered_pids->pid_max;
pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
if (!pid_list->pids) {
kfree(pid_list);
read = -ENOMEM;
goto out;
}
if (filtered_pids) {
/* copy the current bits to the new max */
pid = find_first_bit(filtered_pids->pids,
filtered_pids->pid_max);
while (pid < filtered_pids->pid_max) {
set_bit(pid, pid_list->pids);
pid = find_next_bit(filtered_pids->pids,
filtered_pids->pid_max,
pid + 1);
nr_pids++;
}
}
while (cnt > 0) { while (cnt > 0) {
this_pos = 0; this_pos = 0;
...@@ -1613,92 +1647,35 @@ ftrace_event_pid_write(struct file *filp, const char __user *ubuf, ...@@ -1613,92 +1647,35 @@ ftrace_event_pid_write(struct file *filp, const char __user *ubuf,
ret = -EINVAL; ret = -EINVAL;
if (kstrtoul(parser.buffer, 0, &val)) if (kstrtoul(parser.buffer, 0, &val))
break; break;
if (val > INT_MAX) if (val >= pid_list->pid_max)
break; break;
pid = (pid_t)val; pid = (pid_t)val;
ret = -ENOMEM; set_bit(pid, pid_list->pids);
if (!pid_list) { nr_pids++;
pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
if (!pid_list)
break;
filtered_pids = rcu_dereference_protected(tr->filtered_pids,
lockdep_is_held(&event_mutex));
if (filtered_pids)
pid_list->order = filtered_pids->order;
else
pid_list->order = 0;
pid_list->pids = (void *)__get_free_pages(GFP_KERNEL,
pid_list->order);
if (!pid_list->pids)
break;
if (filtered_pids) {
pid_list->nr_pids = filtered_pids->nr_pids;
memcpy(pid_list->pids, filtered_pids->pids,
pid_list->nr_pids * sizeof(pid_t));
} else
pid_list->nr_pids = 0;
}
if (pid_list->nr_pids >= max_pids(pid_list)) {
pid_t *pid_page;
pid_page = (void *)__get_free_pages(GFP_KERNEL,
pid_list->order + 1);
if (!pid_page)
break;
memcpy(pid_page, pid_list->pids,
pid_list->nr_pids * sizeof(pid_t));
free_pages((unsigned long)pid_list->pids, pid_list->order);
pid_list->order++;
pid_list->pids = pid_page;
}
pid_list->pids[pid_list->nr_pids++] = pid;
trace_parser_clear(&parser); trace_parser_clear(&parser);
ret = 0; ret = 0;
} }
trace_parser_put(&parser); trace_parser_put(&parser);
if (ret < 0) { if (ret < 0) {
if (pid_list) vfree(pid_list->pids);
free_pages((unsigned long)pid_list->pids, pid_list->order);
kfree(pid_list); kfree(pid_list);
mutex_unlock(&event_mutex); read = ret;
return ret; goto out;
}
if (!pid_list) {
mutex_unlock(&event_mutex);
return ret;
} }
sort(pid_list->pids, pid_list->nr_pids, sizeof(pid_t), cmp_pid, NULL); if (!nr_pids) {
/* Cleared the list of pids */
/* Remove duplicates */ vfree(pid_list->pids);
for (i = 1; i < pid_list->nr_pids; i++) { kfree(pid_list);
int start = i; read = ret;
if (!filtered_pids)
while (i < pid_list->nr_pids && goto out;
pid_list->pids[i - 1] == pid_list->pids[i]) pid_list = NULL;
i++;
if (start != i) {
if (i < pid_list->nr_pids) {
memmove(&pid_list->pids[start], &pid_list->pids[i],
(pid_list->nr_pids - i) * sizeof(pid_t));
pid_list->nr_pids -= i - start;
i = start;
} else
pid_list->nr_pids = start;
}
} }
rcu_assign_pointer(tr->filtered_pids, pid_list); rcu_assign_pointer(tr->filtered_pids, pid_list);
list_for_each_entry(file, &tr->events, list) { list_for_each_entry(file, &tr->events, list) {
...@@ -1708,7 +1685,7 @@ ftrace_event_pid_write(struct file *filp, const char __user *ubuf, ...@@ -1708,7 +1685,7 @@ ftrace_event_pid_write(struct file *filp, const char __user *ubuf,
if (filtered_pids) { if (filtered_pids) {
synchronize_sched(); synchronize_sched();
free_pages((unsigned long)filtered_pids->pids, filtered_pids->order); vfree(filtered_pids->pids);
kfree(filtered_pids); kfree(filtered_pids);
} else { } else {
/* /*
...@@ -1745,9 +1722,11 @@ ftrace_event_pid_write(struct file *filp, const char __user *ubuf, ...@@ -1745,9 +1722,11 @@ ftrace_event_pid_write(struct file *filp, const char __user *ubuf,
*/ */
on_each_cpu(ignore_task_cpu, tr, 1); on_each_cpu(ignore_task_cpu, tr, 1);
out:
mutex_unlock(&event_mutex); mutex_unlock(&event_mutex);
ret = read; ret = read;
if (read > 0)
*ppos += read; *ppos += read;
return ret; return ret;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment