Commit ea57c4f5 authored by Ingo Molnar's avatar Ingo Molnar

perf tools: Implement counter output multiplexing

Finish the -M/--multiplex option implementation:

 - separate it out from group_fd

 - correctly set it via the ioctl and dont mmap counters that
   are multiplexed

 - modify the perf record event loop to deal with buffer-less
   counters.

 - remove the -g option from perf sched record

 - account for unordered events in perf sched latency

 - (add -f to perf sched record to ease measurements)

 - skip idle threads (pid==0) in latency output

The result is better latency output by 'perf sched latency':

 -----------------------------------------------------------------------------------
  Task              |  Runtime ms | Switches | Average delay ms | Maximum delay ms |
 -----------------------------------------------------------------------------------
  ksoftirqd/8       |    0.071 ms |        2 | avg:    0.458 ms | max:    0.913 ms |
  at-spi-registry   |    0.609 ms |       19 | avg:    0.013 ms | max:    0.023 ms |
  perf              |    3.316 ms |       16 | avg:    0.013 ms | max:    0.054 ms |
  Xorg              |    0.392 ms |       19 | avg:    0.011 ms | max:    0.018 ms |
  sleep             |    0.537 ms |        2 | avg:    0.009 ms | max:    0.009 ms |
 -----------------------------------------------------------------------------------
  TOTAL:            |    4.925 ms |       58 |
 ---------------------------------------------

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <new-submission>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent aa1ab9d2
...@@ -49,6 +49,7 @@ static int inherit_stat = 0; ...@@ -49,6 +49,7 @@ static int inherit_stat = 0;
static int no_samples = 0; static int no_samples = 0;
static int sample_address = 0; static int sample_address = 0;
static int multiplex = 0; static int multiplex = 0;
static int multiplex_fd = -1;
static long samples; static long samples;
static struct timeval last_read; static struct timeval last_read;
...@@ -471,7 +472,15 @@ static void create_counter(int counter, int cpu, pid_t pid) ...@@ -471,7 +472,15 @@ static void create_counter(int counter, int cpu, pid_t pid)
*/ */
if (group && group_fd == -1) if (group && group_fd == -1)
group_fd = fd[nr_cpu][counter]; group_fd = fd[nr_cpu][counter];
if (multiplex && multiplex_fd == -1)
multiplex_fd = fd[nr_cpu][counter];
if (multiplex && fd[nr_cpu][counter] != multiplex_fd) {
int ret;
ret = ioctl(fd[nr_cpu][counter], PERF_COUNTER_IOC_SET_OUTPUT, multiplex_fd);
assert(ret != -1);
} else {
event_array[nr_poll].fd = fd[nr_cpu][counter]; event_array[nr_poll].fd = fd[nr_cpu][counter];
event_array[nr_poll].events = POLLIN; event_array[nr_poll].events = POLLIN;
nr_poll++; nr_poll++;
...@@ -485,9 +494,7 @@ static void create_counter(int counter, int cpu, pid_t pid) ...@@ -485,9 +494,7 @@ static void create_counter(int counter, int cpu, pid_t pid)
error("failed to mmap with %d (%s)\n", errno, strerror(errno)); error("failed to mmap with %d (%s)\n", errno, strerror(errno));
exit(-1); exit(-1);
} }
}
if (multiplex && fd[nr_cpu][counter] != group_fd)
ioctl(fd[nr_cpu][counter], PERF_COUNTER_IOC_SET_OUTPUT, group_fd);
ioctl(fd[nr_cpu][counter], PERF_COUNTER_IOC_ENABLE); ioctl(fd[nr_cpu][counter], PERF_COUNTER_IOC_ENABLE);
} }
...@@ -618,9 +625,11 @@ static int __cmd_record(int argc, const char **argv) ...@@ -618,9 +625,11 @@ static int __cmd_record(int argc, const char **argv)
int hits = samples; int hits = samples;
for (i = 0; i < nr_cpu; i++) { for (i = 0; i < nr_cpu; i++) {
for (counter = 0; counter < nr_counters; counter++) for (counter = 0; counter < nr_counters; counter++) {
if (mmap_array[i][counter].base)
mmap_read(&mmap_array[i][counter]); mmap_read(&mmap_array[i][counter]);
} }
}
if (hits == samples) { if (hits == samples) {
if (done) if (done)
......
...@@ -116,6 +116,8 @@ static u64 sum_fluct; ...@@ -116,6 +116,8 @@ static u64 sum_fluct;
static u64 run_avg; static u64 run_avg;
static unsigned long replay_repeat = 10; static unsigned long replay_repeat = 10;
static unsigned long nr_timestamps;
static unsigned long unordered_timestamps;
#define TASK_STATE_TO_CHAR_STR "RSDTtZX" #define TASK_STATE_TO_CHAR_STR "RSDTtZX"
...@@ -1109,8 +1111,11 @@ latency_wakeup_event(struct trace_wakeup_event *wakeup_event, ...@@ -1109,8 +1111,11 @@ latency_wakeup_event(struct trace_wakeup_event *wakeup_event,
if (atom->state != THREAD_SLEEPING) if (atom->state != THREAD_SLEEPING)
return; return;
if (atom->sched_out_time > timestamp) nr_timestamps++;
if (atom->sched_out_time > timestamp) {
unordered_timestamps++;
return; return;
}
atom->state = THREAD_WAIT_CPU; atom->state = THREAD_WAIT_CPU;
atom->wake_up_time = timestamp; atom->wake_up_time = timestamp;
...@@ -1130,6 +1135,11 @@ static void output_lat_thread(struct task_atoms *atom_list) ...@@ -1130,6 +1135,11 @@ static void output_lat_thread(struct task_atoms *atom_list)
if (!atom_list->nb_atoms) if (!atom_list->nb_atoms)
return; return;
/*
* Ignore idle threads:
*/
if (!atom_list->thread->pid)
return;
all_runtime += atom_list->total_runtime; all_runtime += atom_list->total_runtime;
all_count += atom_list->nb_atoms; all_count += atom_list->nb_atoms;
...@@ -1301,8 +1311,16 @@ static void __cmd_lat(void) ...@@ -1301,8 +1311,16 @@ static void __cmd_lat(void)
} }
printf("-----------------------------------------------------------------------------------\n"); printf("-----------------------------------------------------------------------------------\n");
printf(" TOTAL: |%9.3f ms |%9Ld |\n", printf(" TOTAL: |%9.3f ms |%9Ld |",
(double)all_runtime/1e6, all_count); (double)all_runtime/1e6, all_count);
if (unordered_timestamps && nr_timestamps) {
printf(" INFO: %.2f%% unordered events.\n",
(double)unordered_timestamps/(double)nr_timestamps*100.0);
} else {
printf("\n");
}
printf("---------------------------------------------\n"); printf("---------------------------------------------\n");
} }
...@@ -1667,12 +1685,13 @@ static const char *record_args[] = { ...@@ -1667,12 +1685,13 @@ static const char *record_args[] = {
"-a", "-a",
"-R", "-R",
"-M", "-M",
"-g", "-f",
"-c", "1", "-c", "1",
"-e", "sched:sched_switch:r", "-e", "sched:sched_switch:r",
"-e", "sched:sched_stat_wait:r", "-e", "sched:sched_stat_wait:r",
"-e", "sched:sched_stat_sleep:r", "-e", "sched:sched_stat_sleep:r",
"-e", "sched:sched_stat_iowait:r", "-e", "sched:sched_stat_iowait:r",
"-e", "sched:sched_stat_runtime:r",
"-e", "sched:sched_process_exit:r", "-e", "sched:sched_process_exit:r",
"-e", "sched:sched_process_fork:r", "-e", "sched:sched_process_fork:r",
"-e", "sched:sched_wakeup:r", "-e", "sched:sched_wakeup:r",
......
...@@ -2722,8 +2722,10 @@ void print_event(int cpu, void *data, int size, unsigned long long nsecs, ...@@ -2722,8 +2722,10 @@ void print_event(int cpu, void *data, int size, unsigned long long nsecs,
type = trace_parse_common_type(data); type = trace_parse_common_type(data);
event = trace_find_event(type); event = trace_find_event(type);
if (!event) if (!event) {
die("ug! no event found for type %d", type); printf("ug! no event found for type %d\n", type);
return;
}
pid = parse_common_pid(data); pid = parse_common_pid(data);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment