Commit aa55dfd3 authored by Andrii Nakryiko's avatar Andrii Nakryiko

Merge branch 'Parameterize task iterators.'

Kui-Feng Lee says:

====================

Allow creating an iterator that loops through resources of one task/thread.

People could only create iterators to loop through all resources of
files, vma, and tasks in the system, even though they were interested in only the
resources of a specific task or process.  Passing the additional
parameters, people can now create an iterator to go through all
resources or only the resources of a task.

Major Changes:

 - Add new parameters in bpf_iter_link_info to indicate to go through
   all tasks or to go through a specific task.

 - Change the implementations of BPF iterators of vma, files, and
   tasks to allow going through only the resources of a specific task.

 - Provide the arguments of parameterized task iterators in
   bpf_link_info.

Differences from v10:

 - Check pid_alive() to avoid potential errors.

Differences from v9:

  - Fix the boundary check of computing page_shift.

  - Rewording the reason of checking and returning the same task.

Differences from v8:

 - Fix uninitialized variable.

 - Avoid redundant work of getting task from pid.

 - Change format string to use %u instead of %d.

 - Use the value of page_shift to compute correct offset in
   bpf_iter_vm_offset.c.

Differences from v7:

 - Travel the tasks of a process through task_group linked list
   instead of traveling through the whole namespace.

Differences from v6:

 - Add part 5 to make bpftool show the value of parameters.

 - Change of wording of show_fdinfo() to show pid or tid instead of
   always pid.

 - Simplify error handling and naming of test cases.

Differences from v5:

 - Use user-space tid/pid terminologies in bpf_iter_link_info and
   bpf_link_info.

 - Fix reference count

 - Merge all variants to one 'u32 pid' in internal structs.
   (bpf_iter_aux_info and bpf_iter_seq_task_common)

 - Compare the result of get_uprobe_offset() with the implementation
   with the vma iterators.

 - Implement show_fdinfo.

Differences from v4:

 - Remove 'type' from bpf_iter_link_info and bpf_link_info.

v10: https://lore.kernel.org/all/20220831181039.2680134-1-kuifeng@fb.com/
v9: https://lore.kernel.org/bpf/20220829192317.486946-1-kuifeng@fb.com/
v8: https://lore.kernel.org/bpf/20220829192317.486946-1-kuifeng@fb.com/
v7: https://lore.kernel.org/bpf/20220826003712.2810158-1-kuifeng@fb.com/
v6: https://lore.kernel.org/bpf/20220819220927.3409575-1-kuifeng@fb.com/
v5: https://lore.kernel.org/bpf/20220811001654.1316689-1-kuifeng@fb.com/
v4: https://lore.kernel.org/bpf/20220809195429.1043220-1-kuifeng@fb.com/
v3: https://lore.kernel.org/bpf/20220809063501.667610-1-kuifeng@fb.com/
v2: https://lore.kernel.org/bpf/20220801232649.2306614-1-kuifeng@fb.com/
v1: https://lore.kernel.org/bpf/20220726051713.840431-1-kuifeng@fb.com/
====================
Signed-off-by: default avatarAndrii Nakryiko <andrii@kernel.org>
parents 87dbdc23 6bdb6d6b
......@@ -1796,6 +1796,27 @@ int bpf_obj_get_user(const char __user *pathname, int flags);
extern int bpf_iter_ ## target(args); \
int __init bpf_iter_ ## target(args) { return 0; }
/*
* The task type of iterators.
*
* For BPF task iterators, they can be parameterized with various
* parameters to visit only some of tasks.
*
* BPF_TASK_ITER_ALL (default)
* Iterate over resources of every task.
*
* BPF_TASK_ITER_TID
* Iterate over resources of a task/tid.
*
* BPF_TASK_ITER_TGID
* Iterate over resources of every task of a process / task group.
*/
enum bpf_iter_task_type {
BPF_TASK_ITER_ALL = 0,
BPF_TASK_ITER_TID,
BPF_TASK_ITER_TGID,
};
struct bpf_iter_aux_info {
/* for map_elem iter */
struct bpf_map *map;
......@@ -1805,6 +1826,10 @@ struct bpf_iter_aux_info {
struct cgroup *start; /* starting cgroup */
enum bpf_cgroup_iter_order order;
} cgroup;
struct {
enum bpf_iter_task_type type;
u32 pid;
} task;
};
typedef int (*bpf_iter_attach_target_t)(struct bpf_prog *prog,
......
......@@ -110,6 +110,12 @@ union bpf_iter_link_info {
__u32 cgroup_fd;
__u64 cgroup_id;
} cgroup;
/* Parameters of task iterators. */
struct {
__u32 tid;
__u32 pid;
__u32 pid_fd;
} task;
};
/* BPF syscall commands, see bpf(2) man-page for more details. */
......@@ -6259,6 +6265,10 @@ struct bpf_link_info {
__u64 cgroup_id;
__u32 order;
} cgroup;
struct {
__u32 tid;
__u32 pid;
} task;
};
} iter;
struct {
......
This diff is collapsed.
......@@ -106,6 +106,13 @@ static const char *cgroup_order_string(__u32 order)
}
}
static bool is_iter_task_target(const char *target_name)
{
return strcmp(target_name, "task") == 0 ||
strcmp(target_name, "task_file") == 0 ||
strcmp(target_name, "task_vma") == 0;
}
static void show_iter_json(struct bpf_link_info *info, json_writer_t *wtr)
{
const char *target_name = u64_to_ptr(info->iter.target_name);
......@@ -114,6 +121,12 @@ static void show_iter_json(struct bpf_link_info *info, json_writer_t *wtr)
if (is_iter_map_target(target_name))
jsonw_uint_field(wtr, "map_id", info->iter.map.map_id);
else if (is_iter_task_target(target_name)) {
if (info->iter.task.tid)
jsonw_uint_field(wtr, "tid", info->iter.task.tid);
else if (info->iter.task.pid)
jsonw_uint_field(wtr, "pid", info->iter.task.pid);
}
if (is_iter_cgroup_target(target_name)) {
jsonw_lluint_field(wtr, "cgroup_id", info->iter.cgroup.cgroup_id);
......@@ -237,6 +250,12 @@ static void show_iter_plain(struct bpf_link_info *info)
if (is_iter_map_target(target_name))
printf("map_id %u ", info->iter.map.map_id);
else if (is_iter_task_target(target_name)) {
if (info->iter.task.tid)
printf("tid %u ", info->iter.task.tid);
else if (info->iter.task.pid)
printf("pid %u ", info->iter.task.pid);
}
if (is_iter_cgroup_target(target_name)) {
printf("cgroup_id %llu ", info->iter.cgroup.cgroup_id);
......
......@@ -110,6 +110,12 @@ union bpf_iter_link_info {
__u32 cgroup_fd;
__u64 cgroup_id;
} cgroup;
/* Parameters of task iterators. */
struct {
__u32 tid;
__u32 pid;
__u32 pid_fd;
} task;
};
/* BPF syscall commands, see bpf(2) man-page for more details. */
......@@ -6259,6 +6265,10 @@ struct bpf_link_info {
__u64 cgroup_id;
__u32 order;
} cgroup;
struct {
__u32 tid;
__u32 pid;
} task;
};
} iter;
struct {
......
......@@ -764,7 +764,7 @@ static void test_btf_dump_struct_data(struct btf *btf, struct btf_dump *d,
/* union with nested struct */
TEST_BTF_DUMP_DATA(btf, d, "union", str, union bpf_iter_link_info, BTF_F_COMPACT,
"(union bpf_iter_link_info){.map = (struct){.map_fd = (__u32)1,},.cgroup = (struct){.order = (enum bpf_cgroup_iter_order)BPF_CGROUP_ITER_SELF_ONLY,.cgroup_fd = (__u32)1,},}",
"(union bpf_iter_link_info){.map = (struct){.map_fd = (__u32)1,},.cgroup = (struct){.order = (enum bpf_cgroup_iter_order)BPF_CGROUP_ITER_SELF_ONLY,.cgroup_fd = (__u32)1,},.task = (struct){.tid = (__u32)1,.pid = (__u32)1,},}",
{ .cgroup = { .order = 1, .cgroup_fd = 1, }});
/* struct skb with nested structs/unions; because type output is so
......
......@@ -6,6 +6,10 @@
char _license[] SEC("license") = "GPL";
uint32_t tid = 0;
int num_unknown_tid = 0;
int num_known_tid = 0;
SEC("iter/task")
int dump_task(struct bpf_iter__task *ctx)
{
......@@ -18,6 +22,11 @@ int dump_task(struct bpf_iter__task *ctx)
return 0;
}
if (task->pid != tid)
num_unknown_tid++;
else
num_known_tid++;
if (ctx->meta->seq_num == 0)
BPF_SEQ_PRINTF(seq, " tgid gid\n");
......
......@@ -7,14 +7,16 @@ char _license[] SEC("license") = "GPL";
int count = 0;
int tgid = 0;
int last_tgid = 0;
int unique_tgid_count = 0;
SEC("iter/task_file")
int dump_task_file(struct bpf_iter__task_file *ctx)
{
struct seq_file *seq = ctx->meta->seq;
struct task_struct *task = ctx->task;
__u32 fd = ctx->fd;
struct file *file = ctx->file;
__u32 fd = ctx->fd;
if (task == (void *)0 || file == (void *)0)
return 0;
......@@ -27,6 +29,11 @@ int dump_task_file(struct bpf_iter__task_file *ctx)
if (tgid == task->tgid && task->tgid != task->pid)
count++;
if (last_tgid != task->tgid) {
last_tgid = task->tgid;
unique_tgid_count++;
}
BPF_SEQ_PRINTF(seq, "%8d %8d %8d %lx\n", task->tgid, task->pid, fd,
(long)file->f_op);
return 0;
......
......@@ -20,6 +20,8 @@ char _license[] SEC("license") = "GPL";
#define D_PATH_BUF_SIZE 1024
char d_path_buf[D_PATH_BUF_SIZE] = {};
__u32 pid = 0;
__u32 one_task = 0;
__u32 one_task_error = 0;
SEC("iter/task_vma") int proc_maps(struct bpf_iter__task_vma *ctx)
{
......@@ -33,8 +35,11 @@ SEC("iter/task_vma") int proc_maps(struct bpf_iter__task_vma *ctx)
return 0;
file = vma->vm_file;
if (task->tgid != pid)
if (task->tgid != pid) {
if (one_task)
one_task_error = 1;
return 0;
}
perm_str[0] = (vma->vm_flags & VM_READ) ? 'r' : '-';
perm_str[1] = (vma->vm_flags & VM_WRITE) ? 'w' : '-';
perm_str[2] = (vma->vm_flags & VM_EXEC) ? 'x' : '-';
......
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
#include "bpf_iter.h"
#include <bpf/bpf_helpers.h>
char _license[] SEC("license") = "GPL";
__u32 unique_tgid_cnt = 0;
uintptr_t address = 0;
uintptr_t offset = 0;
__u32 last_tgid = 0;
__u32 pid = 0;
__u32 page_shift = 0;
SEC("iter/task_vma")
int get_vma_offset(struct bpf_iter__task_vma *ctx)
{
struct vm_area_struct *vma = ctx->vma;
struct seq_file *seq = ctx->meta->seq;
struct task_struct *task = ctx->task;
if (task == NULL || vma == NULL)
return 0;
if (last_tgid != task->tgid)
unique_tgid_cnt++;
last_tgid = task->tgid;
if (task->tgid != pid)
return 0;
if (vma->vm_start <= address && vma->vm_end > address) {
offset = address - vma->vm_start + (vma->vm_pgoff << page_shift);
BPF_SEQ_PRINTF(seq, "OK\n");
}
return 0;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment