perf trace: Allow selecting use the use of the ordered_events code

I was trigger happy on this one, as using ordered_events as implemented
by Jiri for use with the --block code under discussion on lkml incurs
in delaying processing to form batches that then get ordered and then
printed.

With 'perf trace' we want to process the events as they go, without that
delay, and doing it that way works well for the common case which is to
trace a thread or a workload started by 'perf trace'.

So revert back to not using ordered_events but add an option to select
that mode so that users can experiment with their particular use case to
see if works better, i.e. if the added delay is not a problem and the
ordering helps.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: https://lkml.kernel.org/n/tip-8ki7sld6rusnjhhtaly26i5o@git.kernel.orgSigned-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
parent 7ba61524
...@@ -205,6 +205,12 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs. ...@@ -205,6 +205,12 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
because the file may be huge. A time out is needed in such cases. because the file may be huge. A time out is needed in such cases.
This option sets the time out limit. The default value is 500 ms. This option sets the time out limit. The default value is 500 ms.
--sort-events::
Do sorting on batches of events, use when noticing out of order events that
may happen, for instance, when a thread gets migrated to a different CPU
while processing a syscall.
PAGEFAULTS PAGEFAULTS
---------- ----------
......
...@@ -110,6 +110,7 @@ struct trace { ...@@ -110,6 +110,7 @@ struct trace {
} stats; } stats;
unsigned int max_stack; unsigned int max_stack;
unsigned int min_stack; unsigned int min_stack;
bool sort_events;
bool raw_augmented_syscalls; bool raw_augmented_syscalls;
bool not_ev_qualifier; bool not_ev_qualifier;
bool live; bool live;
...@@ -2656,7 +2657,7 @@ static int __trace__deliver_event(struct trace *trace, union perf_event *event) ...@@ -2656,7 +2657,7 @@ static int __trace__deliver_event(struct trace *trace, union perf_event *event)
return 0; return 0;
} }
static int trace__flush_events(struct trace *trace) static int __trace__flush_events(struct trace *trace)
{ {
u64 first = ordered_events__first_time(&trace->oe.data); u64 first = ordered_events__first_time(&trace->oe.data);
u64 flush = trace->oe.last - NSEC_PER_SEC; u64 flush = trace->oe.last - NSEC_PER_SEC;
...@@ -2668,12 +2669,19 @@ static int trace__flush_events(struct trace *trace) ...@@ -2668,12 +2669,19 @@ static int trace__flush_events(struct trace *trace)
return 0; return 0;
} }
static int trace__flush_events(struct trace *trace)
{
return !trace->sort_events ? 0 : __trace__flush_events(trace);
}
static int trace__deliver_event(struct trace *trace, union perf_event *event) static int trace__deliver_event(struct trace *trace, union perf_event *event)
{ {
struct perf_evlist *evlist = trace->evlist;
int err; int err;
err = perf_evlist__parse_sample_timestamp(evlist, event, &trace->oe.last); if (!trace->sort_events)
return __trace__deliver_event(trace, event);
err = perf_evlist__parse_sample_timestamp(trace->evlist, event, &trace->oe.last);
if (err && err != -1) if (err && err != -1)
return err; return err;
...@@ -2897,6 +2905,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) ...@@ -2897,6 +2905,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
perf_evlist__disable(evlist); perf_evlist__disable(evlist);
if (trace->sort_events)
ordered_events__flush(&trace->oe.data, OE_FLUSH__FINAL); ordered_events__flush(&trace->oe.data, OE_FLUSH__FINAL);
if (!err) { if (!err) {
...@@ -3516,6 +3525,8 @@ int cmd_trace(int argc, const char **argv) ...@@ -3516,6 +3525,8 @@ int cmd_trace(int argc, const char **argv)
"Set the maximum stack depth when parsing the callchain, " "Set the maximum stack depth when parsing the callchain, "
"anything beyond the specified depth will be ignored. " "anything beyond the specified depth will be ignored. "
"Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)), "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
OPT_BOOLEAN(0, "sort-events", &trace.sort_events,
"Sort batch of events before processing, use if getting out of order events"),
OPT_BOOLEAN(0, "print-sample", &trace.print_sample, OPT_BOOLEAN(0, "print-sample", &trace.print_sample,
"print the PERF_RECORD_SAMPLE PERF_SAMPLE_ info, for debugging"), "print the PERF_RECORD_SAMPLE PERF_SAMPLE_ info, for debugging"),
OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout, OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
...@@ -3609,8 +3620,10 @@ int cmd_trace(int argc, const char **argv) ...@@ -3609,8 +3620,10 @@ int cmd_trace(int argc, const char **argv)
} }
} }
if (trace.sort_events) {
ordered_events__init(&trace.oe.data, ordered_events__deliver_event, &trace); ordered_events__init(&trace.oe.data, ordered_events__deliver_event, &trace);
ordered_events__set_copy_on_queue(&trace.oe.data, true); ordered_events__set_copy_on_queue(&trace.oe.data, true);
}
/* /*
* If we are augmenting syscalls, then combine what we put in the * If we are augmenting syscalls, then combine what we put in the
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment