Commit 59a47fff authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'trace-v4.3' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace

Pull tracing update from Steven Rostedt:
 "Mostly this is just clean ups and micro optimizations.

  The changes with more meat are:

   - Allowing the trace event filters to filter on CPU number and
     process ids

   - Two new markers for trace output latency were added (10 and 100
     msec latencies)

   - Have tracing_thresh filter function profiling time

  I also worked on modifying the ring buffer code for some future work,
  and moved the adding of the timestamp around.  One of my changes
  caused a regression, and since other changes were built on top of it
  and already tested, I had to operate a revert of that change.  Instead
  of rebasing, this change set has the code that caused a regression as
  well as the code to revert that change without touching the other
  changes that were made on top of it"

* tag 'trace-v4.3' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace:
  ring-buffer: Revert "ring-buffer: Get timestamp after event is allocated"
  tracing: Don't make assumptions about length of string on task rename
  tracing: Allow triggers to filter for CPU ids and process names
  ftrace: Format MCOUNT_ADDR address as type unsigned long
  tracing: Introduce two additional marks for delay
  ftrace: Fix function_graph duration spacing with 7-digits
  ftrace: add tracing_thresh to function profile
  tracing: Clean up stack tracing and fix fentry updates
  ring-buffer: Reorganize function locations
  ring-buffer: Make sure event has enough room for extend and padding
  ring-buffer: Get timestamp after event is allocated
  ring-buffer: Move the adding of the extended timestamp out of line
  ring-buffer: Add event descriptor to simplify passing data
  ftrace: correct the counter increment for trace_buffer data
  tracing: Fix for non-continuous cpu ids
  tracing: Prefer kcalloc over kzalloc with multiply
parents 425afcff b7dc42fd
......@@ -691,6 +691,8 @@ The above is mostly meaningful for kernel developers.
The marks are determined by the difference between this
current trace and the next trace.
'$' - greater than 1 second
'@' - greater than 100 milisecond
'*' - greater than 10 milisecond
'#' - greater than 1000 microsecond
'!' - greater than 100 microsecond
'+' - greater than 10 microsecond
......@@ -1944,26 +1946,49 @@ want, depending on your needs.
ie:
0) | up_write() {
0) 0.646 us | _spin_lock_irqsave();
0) 0.684 us | _spin_unlock_irqrestore();
0) 3.123 us | }
0) 0.548 us | fput();
0) + 58.628 us | }
3) # 1837.709 us | } /* __switch_to */
3) | finish_task_switch() {
3) 0.313 us | _raw_spin_unlock_irq();
3) 3.177 us | }
3) # 1889.063 us | } /* __schedule */
3) ! 140.417 us | } /* __schedule */
3) # 2034.948 us | } /* schedule */
3) * 33998.59 us | } /* schedule_preempt_disabled */
[...]
0) | putname() {
0) | kmem_cache_free() {
0) 0.518 us | __phys_addr();
0) 1.757 us | }
0) 2.861 us | }
0) ! 115.305 us | }
0) ! 116.402 us | }
1) 0.260 us | msecs_to_jiffies();
1) 0.313 us | __rcu_read_unlock();
1) + 61.770 us | }
1) + 64.479 us | }
1) 0.313 us | rcu_bh_qs();
1) 0.313 us | __local_bh_enable();
1) ! 217.240 us | }
1) 0.365 us | idle_cpu();
1) | rcu_irq_exit() {
1) 0.417 us | rcu_eqs_enter_common.isra.47();
1) 3.125 us | }
1) ! 227.812 us | }
1) ! 457.395 us | }
1) @ 119760.2 us | }
[...]
2) | handle_IPI() {
1) 6.979 us | }
2) 0.417 us | scheduler_ipi();
1) 9.791 us | }
1) + 12.917 us | }
2) 3.490 us | }
1) + 15.729 us | }
1) + 18.542 us | }
2) $ 3594274 us | }
+ means that the function exceeded 10 usecs.
! means that the function exceeded 100 usecs.
# means that the function exceeded 1000 usecs.
* means that the function exceeded 10 msecs.
@ means that the function exceeded 100 msecs.
$ means that the function exceeded 1 sec.
......
......@@ -6,7 +6,7 @@
#ifndef __ASSEMBLY__
extern void mcount_wrapper(void);
#define MCOUNT_ADDR ((long)(mcount_wrapper))
#define MCOUNT_ADDR ((unsigned long)(mcount_wrapper))
static inline unsigned long ftrace_call_adjust(unsigned long addr)
{
......
......@@ -3,7 +3,7 @@
#ifdef CONFIG_FUNCTION_TRACER
#define MCOUNT_ADDR ((long)(_mcount))
#define MCOUNT_ADDR ((unsigned long)(_mcount))
#define MCOUNT_INSN_SIZE 8 /* sizeof mcount call */
#ifndef __ASSEMBLY__
......
......@@ -2,7 +2,7 @@
#define _ASM_POWERPC_FTRACE
#ifdef CONFIG_FUNCTION_TRACER
#define MCOUNT_ADDR ((long)(_mcount))
#define MCOUNT_ADDR ((unsigned long)(_mcount))
#define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */
#ifdef __ASSEMBLY__
......
......@@ -9,7 +9,7 @@
#ifndef __ASSEMBLY__
extern void mcount(void);
#define MCOUNT_ADDR ((long)(mcount))
#define MCOUNT_ADDR ((unsigned long)(mcount))
#ifdef CONFIG_DYNAMIC_FTRACE
#define CALL_ADDR ((long)(ftrace_call))
......
......@@ -2,7 +2,7 @@
#define _ASM_SPARC64_FTRACE
#ifdef CONFIG_MCOUNT
#define MCOUNT_ADDR ((long)(_mcount))
#define MCOUNT_ADDR ((unsigned long)(_mcount))
#define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */
#ifndef __ASSEMBLY__
......
......@@ -3,9 +3,9 @@
#ifdef CONFIG_FUNCTION_TRACER
#ifdef CC_USING_FENTRY
# define MCOUNT_ADDR ((long)(__fentry__))
# define MCOUNT_ADDR ((unsigned long)(__fentry__))
#else
# define MCOUNT_ADDR ((long)(mcount))
# define MCOUNT_ADDR ((unsigned long)(mcount))
#endif
#define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */
......
......@@ -46,7 +46,7 @@ TRACE_EVENT(task_rename,
TP_fast_assign(
__entry->pid = task->pid;
memcpy(entry->oldcomm, task->comm, TASK_COMM_LEN);
memcpy(entry->newcomm, comm, TASK_COMM_LEN);
strlcpy(entry->newcomm, comm, TASK_COMM_LEN);
__entry->oom_score_adj = task->signal->oom_score_adj;
),
......
......@@ -630,13 +630,18 @@ static int function_stat_show(struct seq_file *m, void *v)
goto out;
}
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
avg = rec->time;
do_div(avg, rec->counter);
if (tracing_thresh && (avg < tracing_thresh))
goto out;
#endif
kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
seq_printf(m, " %-30.30s %10lu", str, rec->counter);
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
seq_puts(m, " ");
avg = rec->time;
do_div(avg, rec->counter);
/* Sample standard deviation (s^2) */
if (rec->counter <= 1)
......
This diff is collapsed.
......@@ -3035,7 +3035,7 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot)
if (!iter)
return ERR_PTR(-ENOMEM);
iter->buffer_iter = kzalloc(sizeof(*iter->buffer_iter) * num_possible_cpus(),
iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
GFP_KERNEL);
if (!iter->buffer_iter)
goto release;
......@@ -6990,7 +6990,7 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
trace_init_global_iter(&iter);
for_each_tracing_cpu(cpu) {
atomic_inc(&per_cpu_ptr(iter.tr->trace_buffer.data, cpu)->disabled);
atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
}
old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ;
......
......@@ -30,6 +30,7 @@
DEFINE_MUTEX(event_mutex);
LIST_HEAD(ftrace_events);
static LIST_HEAD(ftrace_generic_fields);
static LIST_HEAD(ftrace_common_fields);
#define GFP_TRACE (GFP_KERNEL | __GFP_ZERO)
......@@ -94,6 +95,10 @@ trace_find_event_field(struct trace_event_call *call, char *name)
struct ftrace_event_field *field;
struct list_head *head;
field = __find_event_field(&ftrace_generic_fields, name);
if (field)
return field;
field = __find_event_field(&ftrace_common_fields, name);
if (field)
return field;
......@@ -144,6 +149,13 @@ int trace_define_field(struct trace_event_call *call, const char *type,
}
EXPORT_SYMBOL_GPL(trace_define_field);
#define __generic_field(type, item, filter_type) \
ret = __trace_define_field(&ftrace_generic_fields, #type, \
#item, 0, 0, is_signed_type(type), \
filter_type); \
if (ret) \
return ret;
#define __common_field(type, item) \
ret = __trace_define_field(&ftrace_common_fields, #type, \
"common_" #item, \
......@@ -153,6 +165,16 @@ EXPORT_SYMBOL_GPL(trace_define_field);
if (ret) \
return ret;
static int trace_define_generic_fields(void)
{
int ret;
__generic_field(int, cpu, FILTER_OTHER);
__generic_field(char *, comm, FILTER_PTR_STRING);
return ret;
}
static int trace_define_common_fields(void)
{
int ret;
......@@ -2671,6 +2693,9 @@ static __init int event_trace_init(void)
if (!entry)
pr_warn("Could not create tracefs 'available_events' entry\n");
if (trace_define_generic_fields())
pr_warn("tracing: Failed to allocated generic fields");
if (trace_define_common_fields())
pr_warn("tracing: Failed to allocate common fields");
......
......@@ -252,6 +252,50 @@ static int filter_pred_strloc(struct filter_pred *pred, void *event)
return match;
}
/* Filter predicate for CPUs. */
static int filter_pred_cpu(struct filter_pred *pred, void *event)
{
int cpu, cmp;
int match = 0;
cpu = raw_smp_processor_id();
cmp = pred->val;
switch (pred->op) {
case OP_EQ:
match = cpu == cmp;
break;
case OP_LT:
match = cpu < cmp;
break;
case OP_LE:
match = cpu <= cmp;
break;
case OP_GT:
match = cpu > cmp;
break;
case OP_GE:
match = cpu >= cmp;
break;
default:
break;
}
return !!match == !pred->not;
}
/* Filter predicate for COMM. */
static int filter_pred_comm(struct filter_pred *pred, void *event)
{
int cmp, match;
cmp = pred->regex.match(current->comm, &pred->regex,
pred->regex.field_len);
match = cmp ^ pred->not;
return match;
}
static int filter_pred_none(struct filter_pred *pred, void *event)
{
return 0;
......@@ -1002,7 +1046,10 @@ static int init_pred(struct filter_parse_state *ps,
if (is_string_field(field)) {
filter_build_regex(pred);
if (field->filter_type == FILTER_STATIC_STRING) {
if (!strcmp(field->name, "comm")) {
fn = filter_pred_comm;
pred->regex.field_len = TASK_COMM_LEN;
} else if (field->filter_type == FILTER_STATIC_STRING) {
fn = filter_pred_string;
pred->regex.field_len = field->size;
} else if (field->filter_type == FILTER_DYN_STRING)
......@@ -1025,7 +1072,10 @@ static int init_pred(struct filter_parse_state *ps,
}
pred->val = val;
fn = select_comparison_fn(pred->op, field->size,
if (!strcmp(field->name, "cpu"))
fn = filter_pred_cpu;
else
fn = select_comparison_fn(pred->op, field->size,
field->is_signed);
if (!fn) {
parse_error(ps, FILT_ERR_INVALID_OP, 0);
......
......@@ -715,13 +715,13 @@ trace_print_graph_duration(unsigned long long duration, struct trace_seq *s)
snprintf(nsecs_str, slen, "%03lu", nsecs_rem);
trace_seq_printf(s, ".%s", nsecs_str);
len += strlen(nsecs_str);
len += strlen(nsecs_str) + 1;
}
trace_seq_puts(s, " us ");
/* Print remaining spaces to fit the row's width */
for (i = len; i < 7; i++)
for (i = len; i < 8; i++)
trace_seq_putc(s, ' ');
}
......
......@@ -496,6 +496,8 @@ static const struct trace_mark {
char sym;
} mark[] = {
MARK(1000000000ULL , '$'), /* 1 sec */
MARK(100000000ULL , '@'), /* 100 msec */
MARK(10000000ULL , '*'), /* 10 msec */
MARK(1000000ULL , '#'), /* 1000 usecs */
MARK(100000ULL , '!'), /* 100 usecs */
MARK(10000ULL , '+'), /* 10 usecs */
......@@ -508,7 +510,7 @@ char trace_find_mark(unsigned long long d)
int size = ARRAY_SIZE(mark);
for (i = 0; i < size; i++) {
if (d >= mark[i].val)
if (d > mark[i].val)
break;
}
......
......@@ -18,12 +18,6 @@
#define STACK_TRACE_ENTRIES 500
#ifdef CC_USING_FENTRY
# define fentry 1
#else
# define fentry 0
#endif
static unsigned long stack_dump_trace[STACK_TRACE_ENTRIES+1] =
{ [0 ... (STACK_TRACE_ENTRIES)] = ULONG_MAX };
static unsigned stack_dump_index[STACK_TRACE_ENTRIES];
......@@ -35,7 +29,7 @@ static unsigned stack_dump_index[STACK_TRACE_ENTRIES];
*/
static struct stack_trace max_stack_trace = {
.max_entries = STACK_TRACE_ENTRIES - 1,
.entries = &stack_dump_trace[1],
.entries = &stack_dump_trace[0],
};
static unsigned long max_stack_size;
......@@ -55,7 +49,7 @@ static inline void print_max_stack(void)
pr_emerg(" Depth Size Location (%d entries)\n"
" ----- ---- --------\n",
max_stack_trace.nr_entries - 1);
max_stack_trace.nr_entries);
for (i = 0; i < max_stack_trace.nr_entries; i++) {
if (stack_dump_trace[i] == ULONG_MAX)
......@@ -77,7 +71,7 @@ check_stack(unsigned long ip, unsigned long *stack)
unsigned long this_size, flags; unsigned long *p, *top, *start;
static int tracer_frame;
int frame_size = ACCESS_ONCE(tracer_frame);
int i;
int i, x;
this_size = ((unsigned long)stack) & (THREAD_SIZE-1);
this_size = THREAD_SIZE - this_size;
......@@ -105,26 +99,20 @@ check_stack(unsigned long ip, unsigned long *stack)
max_stack_size = this_size;
max_stack_trace.nr_entries = 0;
if (using_ftrace_ops_list_func())
max_stack_trace.skip = 4;
else
max_stack_trace.skip = 3;
max_stack_trace.skip = 3;
save_stack_trace(&max_stack_trace);
/*
* Add the passed in ip from the function tracer.
* Searching for this on the stack will skip over
* most of the overhead from the stack tracer itself.
*/
stack_dump_trace[0] = ip;
max_stack_trace.nr_entries++;
/* Skip over the overhead of the stack tracer itself */
for (i = 0; i < max_stack_trace.nr_entries; i++) {
if (stack_dump_trace[i] == ip)
break;
}
/*
* Now find where in the stack these are.
*/
i = 0;
x = 0;
start = stack;
top = (unsigned long *)
(((unsigned long)start & ~(THREAD_SIZE-1)) + THREAD_SIZE);
......@@ -139,12 +127,15 @@ check_stack(unsigned long ip, unsigned long *stack)
while (i < max_stack_trace.nr_entries) {
int found = 0;
stack_dump_index[i] = this_size;
stack_dump_index[x] = this_size;
p = start;
for (; p < top && i < max_stack_trace.nr_entries; p++) {
if (stack_dump_trace[i] == ULONG_MAX)
break;
if (*p == stack_dump_trace[i]) {
this_size = stack_dump_index[i++] =
stack_dump_trace[x] = stack_dump_trace[i++];
this_size = stack_dump_index[x++] =
(top - p) * sizeof(unsigned long);
found = 1;
/* Start the search from here */
......@@ -156,7 +147,7 @@ check_stack(unsigned long ip, unsigned long *stack)
* out what that is, then figure it out
* now.
*/
if (unlikely(!tracer_frame) && i == 1) {
if (unlikely(!tracer_frame)) {
tracer_frame = (p - stack) *
sizeof(unsigned long);
max_stack_size -= tracer_frame;
......@@ -168,6 +159,10 @@ check_stack(unsigned long ip, unsigned long *stack)
i++;
}
max_stack_trace.nr_entries = x;
for (; x < i; x++)
stack_dump_trace[x] = ULONG_MAX;
if (task_stack_end_corrupted(current)) {
print_max_stack();
BUG();
......@@ -192,24 +187,7 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip,
if (per_cpu(trace_active, cpu)++ != 0)
goto out;
/*
* When fentry is used, the traced function does not get
* its stack frame set up, and we lose the parent.
* The ip is pretty useless because the function tracer
* was called before that function set up its stack frame.
* In this case, we use the parent ip.
*
* By adding the return address of either the parent ip
* or the current ip we can disregard most of the stack usage
* caused by the stack tracer itself.
*
* The function tracer always reports the address of where the
* mcount call was, but the stack will hold the return address.
*/
if (fentry)
ip = parent_ip;
else
ip += MCOUNT_INSN_SIZE;
ip += MCOUNT_INSN_SIZE;
check_stack(ip, &stack);
......@@ -284,7 +262,7 @@ __next(struct seq_file *m, loff_t *pos)
{
long n = *pos - 1;
if (n >= max_stack_trace.nr_entries || stack_dump_trace[n] == ULONG_MAX)
if (n > max_stack_trace.nr_entries || stack_dump_trace[n] == ULONG_MAX)
return NULL;
m->private = (void *)n;
......@@ -354,7 +332,7 @@ static int t_show(struct seq_file *m, void *v)
seq_printf(m, " Depth Size Location"
" (%d entries)\n"
" ----- ---- --------\n",
max_stack_trace.nr_entries - 1);
max_stack_trace.nr_entries);
if (!stack_tracer_enabled && !max_stack_size)
print_disabled(m);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment