Commit d7e7a451 authored by Stephane Eranian's avatar Stephane Eranian Committed by Arnaldo Carvalho de Melo

perf stat: Add per processor socket count aggregation

This patch adds per-processor socket count aggregation for system-wide
mode measurements. This is a useful mode to detect imbalance between
sockets.

To enable this mode, use --aggr-socket in addition
to -a. (system-wide).

The output includes the socket number and the number of online
processors on that socket. This is useful to gauge the amount of
aggregation.

 # ./perf stat -I 1000 -a --aggr-socket -e cycles sleep 2
 #           time socket cpus             counts events
      1.000097680 S0        4          5,788,785 cycles
      2.000379943 S0        4         27,361,546 cycles
      2.001167808 S0        4            818,275 cycles
Signed-off-by: default avatarStephane Eranian <eranian@google.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung.kim@lge.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1360161962-9675-3-git-send-email-eranian@google.com
[ committer note: Added missing man page entry based on above comments ]
Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
parent 5ac59a8a
...@@ -116,9 +116,16 @@ perf stat --repeat 10 --null --sync --pre 'make -s O=defconfig-build/clean' -- m ...@@ -116,9 +116,16 @@ perf stat --repeat 10 --null --sync --pre 'make -s O=defconfig-build/clean' -- m
-I msecs:: -I msecs::
--interval-print msecs:: --interval-print msecs::
print count deltas every N milliseconds (minimum: 100ms) Print count deltas every N milliseconds (minimum: 100ms)
example: perf stat -I 1000 -e cycles -a sleep 5 example: perf stat -I 1000 -e cycles -a sleep 5
--aggr-socket::
Aggregate counts per processor socket for system-wide mode measurements. This
is a useful mode to detect imbalance between sockets. To enable this mode,
use --aggr-socket in addition to -a. (system-wide). The output includes the
socket number and the number of online processors on that socket. This is
useful to gauge the amount of aggregation.
EXAMPLES EXAMPLES
-------- --------
......
...@@ -68,6 +68,7 @@ ...@@ -68,6 +68,7 @@
static void print_stat(int argc, const char **argv); static void print_stat(int argc, const char **argv);
static void print_counter_aggr(struct perf_evsel *counter, char *prefix); static void print_counter_aggr(struct perf_evsel *counter, char *prefix);
static void print_counter(struct perf_evsel *counter, char *prefix); static void print_counter(struct perf_evsel *counter, char *prefix);
static void print_aggr_socket(char *prefix);
static struct perf_evlist *evsel_list; static struct perf_evlist *evsel_list;
...@@ -79,6 +80,7 @@ static int run_count = 1; ...@@ -79,6 +80,7 @@ static int run_count = 1;
static bool no_inherit = false; static bool no_inherit = false;
static bool scale = true; static bool scale = true;
static bool no_aggr = false; static bool no_aggr = false;
static bool aggr_socket = false;
static pid_t child_pid = -1; static pid_t child_pid = -1;
static bool null_run = false; static bool null_run = false;
static int detailed_run = 0; static int detailed_run = 0;
...@@ -93,6 +95,7 @@ static const char *post_cmd = NULL; ...@@ -93,6 +95,7 @@ static const char *post_cmd = NULL;
static bool sync_run = false; static bool sync_run = false;
static unsigned int interval = 0; static unsigned int interval = 0;
static struct timespec ref_time; static struct timespec ref_time;
static struct cpu_map *sock_map;
static volatile int done = 0; static volatile int done = 0;
...@@ -312,7 +315,9 @@ static void print_interval(void) ...@@ -312,7 +315,9 @@ static void print_interval(void)
sprintf(prefix, "%6lu.%09lu%s", rs.tv_sec, rs.tv_nsec, csv_sep); sprintf(prefix, "%6lu.%09lu%s", rs.tv_sec, rs.tv_nsec, csv_sep);
if (num_print_interval == 0 && !csv_output) { if (num_print_interval == 0 && !csv_output) {
if (no_aggr) if (aggr_socket)
fprintf(output, "# time socket cpus counts events\n");
else if (no_aggr)
fprintf(output, "# time CPU counts events\n"); fprintf(output, "# time CPU counts events\n");
else else
fprintf(output, "# time counts events\n"); fprintf(output, "# time counts events\n");
...@@ -321,7 +326,9 @@ static void print_interval(void) ...@@ -321,7 +326,9 @@ static void print_interval(void)
if (++num_print_interval == 25) if (++num_print_interval == 25)
num_print_interval = 0; num_print_interval = 0;
if (no_aggr) { if (aggr_socket)
print_aggr_socket(prefix);
else if (no_aggr) {
list_for_each_entry(counter, &evsel_list->entries, node) list_for_each_entry(counter, &evsel_list->entries, node)
print_counter(counter, prefix); print_counter(counter, prefix);
} else { } else {
...@@ -349,6 +356,12 @@ static int __run_perf_stat(int argc __maybe_unused, const char **argv) ...@@ -349,6 +356,12 @@ static int __run_perf_stat(int argc __maybe_unused, const char **argv)
ts.tv_nsec = 0; ts.tv_nsec = 0;
} }
if (aggr_socket
&& cpu_map__build_socket_map(evsel_list->cpus, &sock_map)) {
perror("cannot build socket map");
return -1;
}
if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) { if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
perror("failed to create pipes"); perror("failed to create pipes");
return -1; return -1;
...@@ -529,13 +542,21 @@ static void print_noise(struct perf_evsel *evsel, double avg) ...@@ -529,13 +542,21 @@ static void print_noise(struct perf_evsel *evsel, double avg)
print_noise_pct(stddev_stats(&ps->res_stats[0]), avg); print_noise_pct(stddev_stats(&ps->res_stats[0]), avg);
} }
static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg) static void nsec_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
{ {
double msecs = avg / 1e6; double msecs = avg / 1e6;
char cpustr[16] = { '\0', }; char cpustr[16] = { '\0', };
const char *fmt = csv_output ? "%s%.6f%s%s" : "%s%18.6f%s%-25s"; const char *fmt = csv_output ? "%s%.6f%s%s" : "%s%18.6f%s%-25s";
if (no_aggr) if (aggr_socket)
sprintf(cpustr, "S%*d%s%*d%s",
csv_output ? 0 : -5,
cpu,
csv_sep,
csv_output ? 0 : 4,
nr,
csv_sep);
else if (no_aggr)
sprintf(cpustr, "CPU%*d%s", sprintf(cpustr, "CPU%*d%s",
csv_output ? 0 : -4, csv_output ? 0 : -4,
perf_evsel__cpus(evsel)->map[cpu], csv_sep); perf_evsel__cpus(evsel)->map[cpu], csv_sep);
...@@ -734,7 +755,7 @@ static void print_ll_cache_misses(int cpu, ...@@ -734,7 +755,7 @@ static void print_ll_cache_misses(int cpu,
fprintf(output, " of all LL-cache hits "); fprintf(output, " of all LL-cache hits ");
} }
static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
{ {
double total, ratio = 0.0; double total, ratio = 0.0;
char cpustr[16] = { '\0', }; char cpustr[16] = { '\0', };
...@@ -747,7 +768,15 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) ...@@ -747,7 +768,15 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
else else
fmt = "%s%18.0f%s%-25s"; fmt = "%s%18.0f%s%-25s";
if (no_aggr) if (aggr_socket)
sprintf(cpustr, "S%*d%s%*d%s",
csv_output ? 0 : -5,
cpu,
csv_sep,
csv_output ? 0 : 4,
nr,
csv_sep);
else if (no_aggr)
sprintf(cpustr, "CPU%*d%s", sprintf(cpustr, "CPU%*d%s",
csv_output ? 0 : -4, csv_output ? 0 : -4,
perf_evsel__cpus(evsel)->map[cpu], csv_sep); perf_evsel__cpus(evsel)->map[cpu], csv_sep);
...@@ -853,6 +882,70 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) ...@@ -853,6 +882,70 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
} }
} }
static void print_aggr_socket(char *prefix)
{
struct perf_evsel *counter;
u64 ena, run, val;
int cpu, s, s2, sock, nr;
if (!sock_map)
return;
for (s = 0; s < sock_map->nr; s++) {
sock = cpu_map__socket(sock_map, s);
list_for_each_entry(counter, &evsel_list->entries, node) {
val = ena = run = 0;
nr = 0;
for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
s2 = cpu_map__get_socket(evsel_list->cpus, cpu);
if (s2 != sock)
continue;
val += counter->counts->cpu[cpu].val;
ena += counter->counts->cpu[cpu].ena;
run += counter->counts->cpu[cpu].run;
nr++;
}
if (prefix)
fprintf(output, "%s", prefix);
if (run == 0 || ena == 0) {
fprintf(output, "S%*d%s%*d%s%*s%s%*s",
csv_output ? 0 : -5,
s,
csv_sep,
csv_output ? 0 : 4,
nr,
csv_sep,
csv_output ? 0 : 18,
counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
csv_sep,
csv_output ? 0 : -24,
perf_evsel__name(counter));
if (counter->cgrp)
fprintf(output, "%s%s",
csv_sep, counter->cgrp->name);
fputc('\n', output);
continue;
}
if (nsec_counter(counter))
nsec_printout(sock, nr, counter, val);
else
abs_printout(sock, nr, counter, val);
if (!csv_output) {
print_noise(counter, 1.0);
if (run != ena)
fprintf(output, " (%.2f%%)",
100.0 * run / ena);
}
fputc('\n', output);
}
}
}
/* /*
* Print out the results of a single counter: * Print out the results of a single counter:
* aggregated counts in system-wide mode * aggregated counts in system-wide mode
...@@ -882,9 +975,9 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix) ...@@ -882,9 +975,9 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
} }
if (nsec_counter(counter)) if (nsec_counter(counter))
nsec_printout(-1, counter, avg); nsec_printout(-1, 0, counter, avg);
else else
abs_printout(-1, counter, avg); abs_printout(-1, 0, counter, avg);
print_noise(counter, avg); print_noise(counter, avg);
...@@ -940,9 +1033,9 @@ static void print_counter(struct perf_evsel *counter, char *prefix) ...@@ -940,9 +1033,9 @@ static void print_counter(struct perf_evsel *counter, char *prefix)
} }
if (nsec_counter(counter)) if (nsec_counter(counter))
nsec_printout(cpu, counter, val); nsec_printout(cpu, 0, counter, val);
else else
abs_printout(cpu, counter, val); abs_printout(cpu, 0, counter, val);
if (!csv_output) { if (!csv_output) {
print_noise(counter, 1.0); print_noise(counter, 1.0);
...@@ -980,7 +1073,9 @@ static void print_stat(int argc, const char **argv) ...@@ -980,7 +1073,9 @@ static void print_stat(int argc, const char **argv)
fprintf(output, ":\n\n"); fprintf(output, ":\n\n");
} }
if (no_aggr) { if (aggr_socket)
print_aggr_socket(NULL);
else if (no_aggr) {
list_for_each_entry(counter, &evsel_list->entries, node) list_for_each_entry(counter, &evsel_list->entries, node)
print_counter(counter, NULL); print_counter(counter, NULL);
} else { } else {
...@@ -1228,6 +1323,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) ...@@ -1228,6 +1323,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
"command to run after to the measured command"), "command to run after to the measured command"),
OPT_UINTEGER('I', "interval-print", &interval, OPT_UINTEGER('I', "interval-print", &interval,
"print counts at regular interval in ms (>= 100)"), "print counts at regular interval in ms (>= 100)"),
OPT_BOOLEAN(0, "aggr-socket", &aggr_socket, "aggregate counts per processor socket"),
OPT_END() OPT_END()
}; };
const char * const stat_usage[] = { const char * const stat_usage[] = {
...@@ -1314,6 +1410,14 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) ...@@ -1314,6 +1410,14 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
usage_with_options(stat_usage, options); usage_with_options(stat_usage, options);
} }
if (aggr_socket) {
if (!perf_target__has_cpu(&target)) {
fprintf(stderr, "--aggr-socket only available in system-wide mode (-a)\n");
usage_with_options(stat_usage, options);
}
no_aggr = true;
}
if (add_default_attributes()) if (add_default_attributes())
goto out; goto out;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment