Commit 7630b3e2 authored by Wang Nan's avatar Wang Nan Committed by Arnaldo Carvalho de Melo

perf tools: Enable passing event to BPF object

A new syntax is added to the parser so that the user can access
predefined perf events in BPF objects.

After this patch, BPF programs for perf are finally able to utilize
bpf_perf_event_read() introduced in commit 35578d79 ("bpf: Implement
function bpf_perf_event_read() that get the selected hardware PMU
counter").

Test result:

  # cat test_bpf_map_2.c
  /************************ BEGIN **************************/
  #include <uapi/linux/bpf.h>
  #define SEC(NAME) __attribute__((section(NAME), used))
  struct bpf_map_def {
      unsigned int type;
      unsigned int key_size;
      unsigned int value_size;
      unsigned int max_entries;
  };
  static int (*trace_printk)(const char *fmt, int fmt_size, ...) =
      (void *)BPF_FUNC_trace_printk;
  static int (*get_smp_processor_id)(void) =
      (void *)BPF_FUNC_get_smp_processor_id;
  static int (*perf_event_read)(struct bpf_map_def *, int) =
      (void *)BPF_FUNC_perf_event_read;

  struct bpf_map_def SEC("maps") pmu_map = {
      .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
      .key_size = sizeof(int),
      .value_size = sizeof(int),
      .max_entries = __NR_CPUS__,
  };
  SEC("func_write=sys_write")
  int func_write(void *ctx)
  {
      unsigned long long val;
      char fmt[] = "sys_write:        pmu=%llu\n";
      val = perf_event_read(&pmu_map, get_smp_processor_id());
      trace_printk(fmt, sizeof(fmt), val);
      return 0;
  }

  SEC("func_write_return=sys_write%return")
  int func_write_return(void *ctx)
  {
      unsigned long long val = 0;
      char fmt[] = "sys_write_return: pmu=%llu\n";
      val = perf_event_read(&pmu_map, get_smp_processor_id());
      trace_printk(fmt, sizeof(fmt), val);
      return 0;
  }
  char _license[] SEC("license") = "GPL";
  int _version SEC("version") = LINUX_VERSION_CODE;
  /************************* END ***************************/

Normal case:

  # echo "" > /sys/kernel/debug/tracing/trace
  # perf record -i -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' ls /
  [SNIP]
  [ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 0.013 MB perf.data (7 samples) ]
  # cat /sys/kernel/debug/tracing/trace | grep ls
                ls-17066 [000] d... 938449.863301: : sys_write:        pmu=1157327
                ls-17066 [000] dN.. 938449.863342: : sys_write_return: pmu=1225218
                ls-17066 [000] d... 938449.863349: : sys_write:        pmu=1241922
                ls-17066 [000] dN.. 938449.863369: : sys_write_return: pmu=1267445

Normal case (system wide):

  # echo "" > /sys/kernel/debug/tracing/trace
  # perf record -i -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' -a
  ^C[ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 0.811 MB perf.data (120 samples) ]

  # cat /sys/kernel/debug/tracing/trace | grep -v '18446744073709551594' | grep -v perf | head -n 20
  [SNIP]
  #           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION
  #              | |       |   ||||       |         |
             gmain-30828 [002] d... 2740551.068992: : sys_write:        pmu=84373
             gmain-30828 [002] d... 2740551.068992: : sys_write_return: pmu=87696
             gmain-30828 [002] d... 2740551.068996: : sys_write:        pmu=100658
             gmain-30828 [002] d... 2740551.068997: : sys_write_return: pmu=102572

Error case 1:

  # perf record -e './test_bpf_map_2.c' ls /
  [SNIP]
  [ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 0.014 MB perf.data ]
  # cat /sys/kernel/debug/tracing/trace | grep ls
                ls-17115 [007] d... 2724279.665625: : sys_write:        pmu=18446744073709551614
                ls-17115 [007] dN.. 2724279.665651: : sys_write_return: pmu=18446744073709551614
                ls-17115 [007] d... 2724279.665658: : sys_write:        pmu=18446744073709551614
                ls-17115 [007] dN.. 2724279.665677: : sys_write_return: pmu=18446744073709551614

  (18446744073709551614 is 0xfffffffffffffffe (-2))

Error case 2:

  # perf record -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=evt/' -a
  event syntax error: '..ps:pmu_map.event=evt/'
                                    \___ Event not found for map setting

  Hint:	Valid config terms:
       	map:[<arraymap>].value=[value]
       	map:[<eventmap>].event=[event]
  [SNIP]

Error case 3:
  # ls /proc/2348/task/
  2348  2505  2506  2507  2508
  # perf record -i -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' -p 2348
  ERROR: Apply config to BPF failed: Cannot set event to BPF map in multi-thread tracing

Error case 4:
  # perf record -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' ls /
  ERROR: Apply config to BPF failed: Doesn't support inherit event (Hint: use -i to turn off inherit)

Error case 5:
  # perf record -i -e raw_syscalls:sys_enter -e './test_bpf_map_2.c/map:pmu_map.event=raw_syscalls:sys_enter/' ls
  ERROR: Apply config to BPF failed: Can only put raw, hardware and BPF output event into a BPF map

Error case 6:
  # perf record -i -e './test_bpf_map_2.c/map:pmu_map.event=123/' ls /
  event syntax error: '.._map.event=123/'
                                    \___ Incorrect value type for map
  [SNIP]
Signed-off-by: default avatarWang Nan <wangnan0@huawei.com>
Acked-by: default avatarJiri Olsa <jolsa@kernel.org>
Tested-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Brendan Gregg <brendan.d.gregg@gmail.com>
Cc: Cody P Schafer <dev@codyps.com>
Cc: He Kuang <hekuang@huawei.com>
Cc: Jeremie Galarneau <jeremie.galarneau@efficios.com>
Cc: Kirill Smelkov <kirr@nexedi.com>
Cc: Li Zefan <lizefan@huawei.com>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Zefan Li <lizefan@huawei.com>
Cc: pi3orama@163.com
Link: http://lkml.kernel.org/r/1456132275-98875-7-git-send-email-wangnan0@huawei.comSigned-off-by: default avatarHe Kuang <hekuang@huawei.com>
Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
parent 8690a2a7
...@@ -742,6 +742,7 @@ int bpf__foreach_tev(struct bpf_object *obj, ...@@ -742,6 +742,7 @@ int bpf__foreach_tev(struct bpf_object *obj,
enum bpf_map_op_type { enum bpf_map_op_type {
BPF_MAP_OP_SET_VALUE, BPF_MAP_OP_SET_VALUE,
BPF_MAP_OP_SET_EVSEL,
}; };
enum bpf_map_key_type { enum bpf_map_key_type {
...@@ -754,6 +755,7 @@ struct bpf_map_op { ...@@ -754,6 +755,7 @@ struct bpf_map_op {
enum bpf_map_key_type key_type; enum bpf_map_key_type key_type;
union { union {
u64 value; u64 value;
struct perf_evsel *evsel;
} v; } v;
}; };
...@@ -838,6 +840,24 @@ bpf_map__add_op(struct bpf_map *map, struct bpf_map_op *op) ...@@ -838,6 +840,24 @@ bpf_map__add_op(struct bpf_map *map, struct bpf_map_op *op)
return 0; return 0;
} }
static struct bpf_map_op *
bpf_map__add_newop(struct bpf_map *map)
{
struct bpf_map_op *op;
int err;
op = bpf_map_op__new();
if (IS_ERR(op))
return op;
err = bpf_map__add_op(map, op);
if (err) {
bpf_map_op__delete(op);
return ERR_PTR(err);
}
return op;
}
static int static int
__bpf_map__config_value(struct bpf_map *map, __bpf_map__config_value(struct bpf_map *map,
struct parse_events_term *term) struct parse_events_term *term)
...@@ -876,16 +896,12 @@ __bpf_map__config_value(struct bpf_map *map, ...@@ -876,16 +896,12 @@ __bpf_map__config_value(struct bpf_map *map,
return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUESIZE; return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUESIZE;
} }
op = bpf_map_op__new(); op = bpf_map__add_newop(map);
if (IS_ERR(op)) if (IS_ERR(op))
return PTR_ERR(op); return PTR_ERR(op);
op->op_type = BPF_MAP_OP_SET_VALUE; op->op_type = BPF_MAP_OP_SET_VALUE;
op->v.value = term->val.num; op->v.value = term->val.num;
return 0;
err = bpf_map__add_op(map, op);
if (err)
bpf_map_op__delete(op);
return err;
} }
static int static int
...@@ -899,13 +915,75 @@ bpf_map__config_value(struct bpf_map *map, ...@@ -899,13 +915,75 @@ bpf_map__config_value(struct bpf_map *map,
} }
if (term->type_val != PARSE_EVENTS__TERM_TYPE_NUM) { if (term->type_val != PARSE_EVENTS__TERM_TYPE_NUM) {
pr_debug("ERROR: wrong value type\n"); pr_debug("ERROR: wrong value type for 'value'\n");
return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE; return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE;
} }
return __bpf_map__config_value(map, term); return __bpf_map__config_value(map, term);
} }
static int
__bpf_map__config_event(struct bpf_map *map,
struct parse_events_term *term,
struct perf_evlist *evlist)
{
struct perf_evsel *evsel;
struct bpf_map_def def;
struct bpf_map_op *op;
const char *map_name;
int err;
map_name = bpf_map__get_name(map);
evsel = perf_evlist__find_evsel_by_str(evlist, term->val.str);
if (!evsel) {
pr_debug("Event (for '%s') '%s' doesn't exist\n",
map_name, term->val.str);
return -BPF_LOADER_ERRNO__OBJCONF_MAP_NOEVT;
}
err = bpf_map__get_def(map, &def);
if (err) {
pr_debug("Unable to get map definition from '%s'\n",
map_name);
return err;
}
/*
* No need to check key_size and value_size:
* kernel has already checked them.
*/
if (def.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
pr_debug("Map %s type is not BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
map_name);
return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE;
}
op = bpf_map__add_newop(map);
if (IS_ERR(op))
return PTR_ERR(op);
op->op_type = BPF_MAP_OP_SET_EVSEL;
op->v.evsel = evsel;
return 0;
}
static int
bpf_map__config_event(struct bpf_map *map,
struct parse_events_term *term,
struct perf_evlist *evlist)
{
if (!term->err_val) {
pr_debug("Config value not set\n");
return -BPF_LOADER_ERRNO__OBJCONF_CONF;
}
if (term->type_val != PARSE_EVENTS__TERM_TYPE_STR) {
pr_debug("ERROR: wrong value type for 'event'\n");
return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE;
}
return __bpf_map__config_event(map, term, evlist);
}
struct bpf_obj_config__map_func { struct bpf_obj_config__map_func {
const char *config_opt; const char *config_opt;
int (*config_func)(struct bpf_map *, struct parse_events_term *, int (*config_func)(struct bpf_map *, struct parse_events_term *,
...@@ -914,6 +992,7 @@ struct bpf_obj_config__map_func { ...@@ -914,6 +992,7 @@ struct bpf_obj_config__map_func {
struct bpf_obj_config__map_func bpf_obj_config__map_funcs[] = { struct bpf_obj_config__map_func bpf_obj_config__map_funcs[] = {
{"value", bpf_map__config_value}, {"value", bpf_map__config_value},
{"event", bpf_map__config_event},
}; };
static int static int
...@@ -1057,6 +1136,7 @@ bpf_map_config_foreach_key(struct bpf_map *map, ...@@ -1057,6 +1136,7 @@ bpf_map_config_foreach_key(struct bpf_map *map,
list_for_each_entry(op, &priv->ops_list, list) { list_for_each_entry(op, &priv->ops_list, list) {
switch (def.type) { switch (def.type) {
case BPF_MAP_TYPE_ARRAY: case BPF_MAP_TYPE_ARRAY:
case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
switch (op->key_type) { switch (op->key_type) {
case BPF_MAP_KEY_ALL: case BPF_MAP_KEY_ALL:
err = foreach_key_array_all(func, arg, name, err = foreach_key_array_all(func, arg, name,
...@@ -1114,6 +1194,60 @@ apply_config_value_for_key(int map_fd, void *pkey, ...@@ -1114,6 +1194,60 @@ apply_config_value_for_key(int map_fd, void *pkey,
return err; return err;
} }
static int
apply_config_evsel_for_key(const char *name, int map_fd, void *pkey,
struct perf_evsel *evsel)
{
struct xyarray *xy = evsel->fd;
struct perf_event_attr *attr;
unsigned int key, events;
bool check_pass = false;
int *evt_fd;
int err;
if (!xy) {
pr_debug("ERROR: evsel not ready for map %s\n", name);
return -BPF_LOADER_ERRNO__INTERNAL;
}
if (xy->row_size / xy->entry_size != 1) {
pr_debug("ERROR: Dimension of target event is incorrect for map %s\n",
name);
return -BPF_LOADER_ERRNO__OBJCONF_MAP_EVTDIM;
}
attr = &evsel->attr;
if (attr->inherit) {
pr_debug("ERROR: Can't put inherit event into map %s\n", name);
return -BPF_LOADER_ERRNO__OBJCONF_MAP_EVTINH;
}
if (attr->type == PERF_TYPE_RAW)
check_pass = true;
if (attr->type == PERF_TYPE_HARDWARE)
check_pass = true;
if (attr->type == PERF_TYPE_SOFTWARE &&
attr->config == PERF_COUNT_SW_BPF_OUTPUT)
check_pass = true;
if (!check_pass) {
pr_debug("ERROR: Event type is wrong for map %s\n", name);
return -BPF_LOADER_ERRNO__OBJCONF_MAP_EVTTYPE;
}
events = xy->entries / (xy->row_size / xy->entry_size);
key = *((unsigned int *)pkey);
if (key >= events) {
pr_debug("ERROR: there is no event %d for map %s\n",
key, name);
return -BPF_LOADER_ERRNO__OBJCONF_MAP_MAPSIZE;
}
evt_fd = xyarray__entry(xy, key, 0);
err = bpf_map_update_elem(map_fd, pkey, evt_fd, BPF_ANY);
if (err && errno)
err = -errno;
return err;
}
static int static int
apply_obj_config_map_for_key(const char *name, int map_fd, apply_obj_config_map_for_key(const char *name, int map_fd,
struct bpf_map_def *pdef __maybe_unused, struct bpf_map_def *pdef __maybe_unused,
...@@ -1128,6 +1262,10 @@ apply_obj_config_map_for_key(const char *name, int map_fd, ...@@ -1128,6 +1262,10 @@ apply_obj_config_map_for_key(const char *name, int map_fd,
pdef->value_size, pdef->value_size,
op->v.value); op->v.value);
break; break;
case BPF_MAP_OP_SET_EVSEL:
err = apply_config_evsel_for_key(name, map_fd, pkey,
op->v.evsel);
break;
default: default:
pr_debug("ERROR: unknown value type for '%s'\n", name); pr_debug("ERROR: unknown value type for '%s'\n", name);
err = -BPF_LOADER_ERRNO__INTERNAL; err = -BPF_LOADER_ERRNO__INTERNAL;
...@@ -1193,6 +1331,11 @@ static const char *bpf_loader_strerror_table[NR_ERRNO] = { ...@@ -1193,6 +1331,11 @@ static const char *bpf_loader_strerror_table[NR_ERRNO] = {
[ERRCODE_OFFSET(OBJCONF_MAP_TYPE)] = "Incorrect map type", [ERRCODE_OFFSET(OBJCONF_MAP_TYPE)] = "Incorrect map type",
[ERRCODE_OFFSET(OBJCONF_MAP_KEYSIZE)] = "Incorrect map key size", [ERRCODE_OFFSET(OBJCONF_MAP_KEYSIZE)] = "Incorrect map key size",
[ERRCODE_OFFSET(OBJCONF_MAP_VALUESIZE)] = "Incorrect map value size", [ERRCODE_OFFSET(OBJCONF_MAP_VALUESIZE)] = "Incorrect map value size",
[ERRCODE_OFFSET(OBJCONF_MAP_NOEVT)] = "Event not found for map setting",
[ERRCODE_OFFSET(OBJCONF_MAP_MAPSIZE)] = "Invalid map size for event setting",
[ERRCODE_OFFSET(OBJCONF_MAP_EVTDIM)] = "Event dimension too large",
[ERRCODE_OFFSET(OBJCONF_MAP_EVTINH)] = "Doesn't support inherit event",
[ERRCODE_OFFSET(OBJCONF_MAP_EVTTYPE)] = "Wrong event type for map",
}; };
static int static int
...@@ -1329,6 +1472,12 @@ int bpf__strerror_config_obj(struct bpf_object *obj __maybe_unused, ...@@ -1329,6 +1472,12 @@ int bpf__strerror_config_obj(struct bpf_object *obj __maybe_unused,
int bpf__strerror_apply_obj_config(int err, char *buf, size_t size) int bpf__strerror_apply_obj_config(int err, char *buf, size_t size)
{ {
bpf__strerror_head(err, buf, size); bpf__strerror_head(err, buf, size);
bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_EVTDIM,
"Cannot set event to BPF map in multi-thread tracing");
bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_EVTINH,
"%s (Hint: use -i to turn off inherit)", emsg);
bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_EVTTYPE,
"Can only put raw, hardware and BPF output event into a BPF map");
bpf__strerror_end(buf, size); bpf__strerror_end(buf, size);
return 0; return 0;
} }
...@@ -33,6 +33,11 @@ enum bpf_loader_errno { ...@@ -33,6 +33,11 @@ enum bpf_loader_errno {
BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE, /* Incorrect map type */ BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE, /* Incorrect map type */
BPF_LOADER_ERRNO__OBJCONF_MAP_KEYSIZE, /* Incorrect map key size */ BPF_LOADER_ERRNO__OBJCONF_MAP_KEYSIZE, /* Incorrect map key size */
BPF_LOADER_ERRNO__OBJCONF_MAP_VALUESIZE,/* Incorrect map value size */ BPF_LOADER_ERRNO__OBJCONF_MAP_VALUESIZE,/* Incorrect map value size */
BPF_LOADER_ERRNO__OBJCONF_MAP_NOEVT, /* Event not found for map setting */
BPF_LOADER_ERRNO__OBJCONF_MAP_MAPSIZE, /* Invalid map size for event setting */
BPF_LOADER_ERRNO__OBJCONF_MAP_EVTDIM, /* Event dimension too large */
BPF_LOADER_ERRNO__OBJCONF_MAP_EVTINH, /* Doesn't support inherit event */
BPF_LOADER_ERRNO__OBJCONF_MAP_EVTTYPE, /* Wrong event type for map */
__BPF_LOADER_ERRNO__END, __BPF_LOADER_ERRNO__END,
}; };
......
...@@ -1741,3 +1741,19 @@ void perf_evlist__set_tracking_event(struct perf_evlist *evlist, ...@@ -1741,3 +1741,19 @@ void perf_evlist__set_tracking_event(struct perf_evlist *evlist,
tracking_evsel->tracking = true; tracking_evsel->tracking = true;
} }
struct perf_evsel *
perf_evlist__find_evsel_by_str(struct perf_evlist *evlist,
const char *str)
{
struct perf_evsel *evsel;
evlist__for_each(evlist, evsel) {
if (!evsel->name)
continue;
if (strcmp(str, evsel->name) == 0)
return evsel;
}
return NULL;
}
...@@ -294,4 +294,7 @@ void perf_evlist__set_tracking_event(struct perf_evlist *evlist, ...@@ -294,4 +294,7 @@ void perf_evlist__set_tracking_event(struct perf_evlist *evlist,
struct perf_evsel *tracking_evsel); struct perf_evsel *tracking_evsel);
void perf_event_attr__set_max_precise_ip(struct perf_event_attr *attr); void perf_event_attr__set_max_precise_ip(struct perf_event_attr *attr);
struct perf_evsel *
perf_evlist__find_evsel_by_str(struct perf_evlist *evlist, const char *str);
#endif /* __PERF_EVLIST_H */ #endif /* __PERF_EVLIST_H */
...@@ -697,14 +697,16 @@ parse_events_config_bpf(struct parse_events_evlist *data, ...@@ -697,14 +697,16 @@ parse_events_config_bpf(struct parse_events_evlist *data,
return -EINVAL; return -EINVAL;
} }
err = bpf__config_obj(obj, term, NULL, &error_pos); err = bpf__config_obj(obj, term, data->evlist, &error_pos);
if (err) { if (err) {
bpf__strerror_config_obj(obj, term, NULL, bpf__strerror_config_obj(obj, term, data->evlist,
&error_pos, err, errbuf, &error_pos, err, errbuf,
sizeof(errbuf)); sizeof(errbuf));
data->error->help = strdup( data->error->help = strdup(
"Hint:\tValid config term:\n" "Hint:\tValid config terms:\n"
" \tmap:[<arraymap>].value=[value]\n" " \tmap:[<arraymap>].value=[value]\n"
" \tmap:[<eventmap>].event=[event]\n"
"\n"
" \t(add -v to see detail)"); " \t(add -v to see detail)");
data->error->str = strdup(errbuf); data->error->str = strdup(errbuf);
if (err == -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE) if (err == -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE)
...@@ -1530,9 +1532,10 @@ int parse_events(struct perf_evlist *evlist, const char *str, ...@@ -1530,9 +1532,10 @@ int parse_events(struct perf_evlist *evlist, const char *str,
struct parse_events_error *err) struct parse_events_error *err)
{ {
struct parse_events_evlist data = { struct parse_events_evlist data = {
.list = LIST_HEAD_INIT(data.list), .list = LIST_HEAD_INIT(data.list),
.idx = evlist->nr_entries, .idx = evlist->nr_entries,
.error = err, .error = err,
.evlist = evlist,
}; };
int ret; int ret;
......
...@@ -99,6 +99,7 @@ struct parse_events_evlist { ...@@ -99,6 +99,7 @@ struct parse_events_evlist {
int idx; int idx;
int nr_groups; int nr_groups;
struct parse_events_error *error; struct parse_events_error *error;
struct perf_evlist *evlist;
}; };
struct parse_events_terms { struct parse_events_terms {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment