Commit 25f69c69 authored by Changbin Du's avatar Changbin Du Committed by Arnaldo Carvalho de Melo

perf stat: Fix counting when initial delay configured

When creating counters with initial delay configured, the enable_on_exec
field is not set. So we need to enable the counters later. The problem
is, when a workload is specified the target__none() is true. So we also
need to check stat_config.initial_delay.

In this change, we add a new field 'initial_delay' for struct target
which could be shared by other subcommands. And define
target__enable_on_exec() which returns whether enable_on_exec should be
set on normal cases.

Before this fix the event is not counted:

  $ ./perf stat -e instructions -D 100 sleep 2
  Events disabled
  Events enabled

   Performance counter stats for 'sleep 2':

       <not counted>      instructions

         1.901661124 seconds time elapsed

         0.001602000 seconds user
         0.000000000 seconds sys

After fix it works:

  $ ./perf stat -e instructions -D 100 sleep 2
  Events disabled
  Events enabled

   Performance counter stats for 'sleep 2':

             404,214      instructions

         1.901743475 seconds time elapsed

         0.001617000 seconds user
         0.000000000 seconds sys

Fixes: c587e77e ("perf stat: Do not delay the workload with --delay")
Signed-off-by: default avatarChangbin Du <changbin.du@huawei.com>
Acked-by: default avatarNamhyung Kim <namhyung@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Hui Wang <hw.huiwang@huawei.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/20230302031146.2801588-2-changbin.du@huawei.comSigned-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
parent a98c0710
...@@ -539,12 +539,7 @@ static int enable_counters(void) ...@@ -539,12 +539,7 @@ static int enable_counters(void)
return err; return err;
} }
/* if (!target__enable_on_exec(&target)) {
* We need to enable counters only if:
* - we don't have tracee (attaching to task or cpu)
* - we have initial delay configured
*/
if (!target__none(&target)) {
if (!all_counters_use_bpf) if (!all_counters_use_bpf)
evlist__enable(evsel_list); evlist__enable(evsel_list);
} }
...@@ -914,7 +909,7 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) ...@@ -914,7 +909,7 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
return err; return err;
} }
if (stat_config.initial_delay) { if (target.initial_delay) {
pr_info(EVLIST_DISABLED_MSG); pr_info(EVLIST_DISABLED_MSG);
} else { } else {
err = enable_counters(); err = enable_counters();
...@@ -926,8 +921,8 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) ...@@ -926,8 +921,8 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
if (forks) if (forks)
evlist__start_workload(evsel_list); evlist__start_workload(evsel_list);
if (stat_config.initial_delay > 0) { if (target.initial_delay > 0) {
usleep(stat_config.initial_delay * USEC_PER_MSEC); usleep(target.initial_delay * USEC_PER_MSEC);
err = enable_counters(); err = enable_counters();
if (err) if (err)
return -1; return -1;
...@@ -1248,7 +1243,7 @@ static struct option stat_options[] = { ...@@ -1248,7 +1243,7 @@ static struct option stat_options[] = {
"aggregate counts per thread", AGGR_THREAD), "aggregate counts per thread", AGGR_THREAD),
OPT_SET_UINT(0, "per-node", &stat_config.aggr_mode, OPT_SET_UINT(0, "per-node", &stat_config.aggr_mode,
"aggregate counts per numa node", AGGR_NODE), "aggregate counts per numa node", AGGR_NODE),
OPT_INTEGER('D', "delay", &stat_config.initial_delay, OPT_INTEGER('D', "delay", &target.initial_delay,
"ms to wait before starting measurement after program start (-1: start with events disabled)"), "ms to wait before starting measurement after program start (-1: start with events disabled)"),
OPT_CALLBACK_NOOPT(0, "metric-only", &stat_config.metric_only, NULL, OPT_CALLBACK_NOOPT(0, "metric-only", &stat_config.metric_only, NULL,
"Only print computed metrics. No raw values", enable_metric_only), "Only print computed metrics. No raw values", enable_metric_only),
......
...@@ -842,11 +842,7 @@ int create_perf_stat_counter(struct evsel *evsel, ...@@ -842,11 +842,7 @@ int create_perf_stat_counter(struct evsel *evsel,
if (evsel__is_group_leader(evsel)) { if (evsel__is_group_leader(evsel)) {
attr->disabled = 1; attr->disabled = 1;
/* if (target__enable_on_exec(target))
* In case of initial_delay we enable tracee
* events manually.
*/
if (target__none(target) && !config->initial_delay)
attr->enable_on_exec = 1; attr->enable_on_exec = 1;
} }
......
...@@ -166,7 +166,6 @@ struct perf_stat_config { ...@@ -166,7 +166,6 @@ struct perf_stat_config {
FILE *output; FILE *output;
unsigned int interval; unsigned int interval;
unsigned int timeout; unsigned int timeout;
int initial_delay;
unsigned int unit_width; unsigned int unit_width;
unsigned int metric_only_len; unsigned int metric_only_len;
int times; int times;
......
...@@ -18,6 +18,7 @@ struct target { ...@@ -18,6 +18,7 @@ struct target {
bool per_thread; bool per_thread;
bool use_bpf; bool use_bpf;
bool hybrid; bool hybrid;
int initial_delay;
const char *attr_map; const char *attr_map;
}; };
...@@ -72,6 +73,17 @@ static inline bool target__none(struct target *target) ...@@ -72,6 +73,17 @@ static inline bool target__none(struct target *target)
return !target__has_task(target) && !target__has_cpu(target); return !target__has_task(target) && !target__has_cpu(target);
} }
static inline bool target__enable_on_exec(struct target *target)
{
/*
* Normally enable_on_exec should be set if:
* 1) The tracee process is forked (not attaching to existed task or cpu).
* 2) And initial_delay is not configured.
* Otherwise, we enable tracee events manually.
*/
return target__none(target) && !target->initial_delay;
}
static inline bool target__has_per_thread(struct target *target) static inline bool target__has_per_thread(struct target *target)
{ {
return target->system_wide && target->per_thread; return target->system_wide && target->per_thread;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment