Commit b08d4d3b authored by Yonghong Song's avatar Yonghong Song Committed by Alexei Starovoitov

net: bpf: Add bpf_seq_afinfo in tcp_iter_state

A new field bpf_seq_afinfo is added to tcp_iter_state
to provide bpf tcp iterator afinfo. There are two
reasons on why we did this.

First, the current way to get afinfo from PDE_DATA
does not work for bpf iterator as its seq_file
inode does not conform to /proc/net/{tcp,tcp6}
inode structures. More specifically, anonymous
bpf iterator will use an anonymous inode which
is shared in the system and we cannot change inode
private data structure at all.

Second, bpf iterator for tcp/tcp6 wants to
traverse all tcp and tcp6 sockets in one pass
and bpf program can control whether they want
to skip one sk_family or not. Having a different
afinfo with family AF_UNSPEC make it easier
to understand in the code.

This patch does not change /proc/net/{tcp,tcp6} behavior
as the bpf_seq_afinfo will be NULL for these two proc files.
Signed-off-by: default avatarYonghong Song <yhs@fb.com>
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
Acked-by: default avatarMartin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20200623230804.3987829-1-yhs@fb.com
parent f9bcf968
...@@ -1935,6 +1935,7 @@ struct tcp_iter_state { ...@@ -1935,6 +1935,7 @@ struct tcp_iter_state {
struct seq_net_private p; struct seq_net_private p;
enum tcp_seq_states state; enum tcp_seq_states state;
struct sock *syn_wait_sk; struct sock *syn_wait_sk;
struct tcp_seq_afinfo *bpf_seq_afinfo;
int bucket, offset, sbucket, num; int bucket, offset, sbucket, num;
loff_t last_pos; loff_t last_pos;
}; };
......
...@@ -2211,13 +2211,18 @@ EXPORT_SYMBOL(tcp_v4_destroy_sock); ...@@ -2211,13 +2211,18 @@ EXPORT_SYMBOL(tcp_v4_destroy_sock);
*/ */
static void *listening_get_next(struct seq_file *seq, void *cur) static void *listening_get_next(struct seq_file *seq, void *cur)
{ {
struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file)); struct tcp_seq_afinfo *afinfo;
struct tcp_iter_state *st = seq->private; struct tcp_iter_state *st = seq->private;
struct net *net = seq_file_net(seq); struct net *net = seq_file_net(seq);
struct inet_listen_hashbucket *ilb; struct inet_listen_hashbucket *ilb;
struct hlist_nulls_node *node; struct hlist_nulls_node *node;
struct sock *sk = cur; struct sock *sk = cur;
if (st->bpf_seq_afinfo)
afinfo = st->bpf_seq_afinfo;
else
afinfo = PDE_DATA(file_inode(seq->file));
if (!sk) { if (!sk) {
get_head: get_head:
ilb = &tcp_hashinfo.listening_hash[st->bucket]; ilb = &tcp_hashinfo.listening_hash[st->bucket];
...@@ -2235,7 +2240,8 @@ static void *listening_get_next(struct seq_file *seq, void *cur) ...@@ -2235,7 +2240,8 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
sk_nulls_for_each_from(sk, node) { sk_nulls_for_each_from(sk, node) {
if (!net_eq(sock_net(sk), net)) if (!net_eq(sock_net(sk), net))
continue; continue;
if (sk->sk_family == afinfo->family) if (afinfo->family == AF_UNSPEC ||
sk->sk_family == afinfo->family)
return sk; return sk;
} }
spin_unlock(&ilb->lock); spin_unlock(&ilb->lock);
...@@ -2272,11 +2278,16 @@ static inline bool empty_bucket(const struct tcp_iter_state *st) ...@@ -2272,11 +2278,16 @@ static inline bool empty_bucket(const struct tcp_iter_state *st)
*/ */
static void *established_get_first(struct seq_file *seq) static void *established_get_first(struct seq_file *seq)
{ {
struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file)); struct tcp_seq_afinfo *afinfo;
struct tcp_iter_state *st = seq->private; struct tcp_iter_state *st = seq->private;
struct net *net = seq_file_net(seq); struct net *net = seq_file_net(seq);
void *rc = NULL; void *rc = NULL;
if (st->bpf_seq_afinfo)
afinfo = st->bpf_seq_afinfo;
else
afinfo = PDE_DATA(file_inode(seq->file));
st->offset = 0; st->offset = 0;
for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) { for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
struct sock *sk; struct sock *sk;
...@@ -2289,7 +2300,8 @@ static void *established_get_first(struct seq_file *seq) ...@@ -2289,7 +2300,8 @@ static void *established_get_first(struct seq_file *seq)
spin_lock_bh(lock); spin_lock_bh(lock);
sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
if (sk->sk_family != afinfo->family || if ((afinfo->family != AF_UNSPEC &&
sk->sk_family != afinfo->family) ||
!net_eq(sock_net(sk), net)) { !net_eq(sock_net(sk), net)) {
continue; continue;
} }
...@@ -2304,19 +2316,25 @@ static void *established_get_first(struct seq_file *seq) ...@@ -2304,19 +2316,25 @@ static void *established_get_first(struct seq_file *seq)
static void *established_get_next(struct seq_file *seq, void *cur) static void *established_get_next(struct seq_file *seq, void *cur)
{ {
struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file)); struct tcp_seq_afinfo *afinfo;
struct sock *sk = cur; struct sock *sk = cur;
struct hlist_nulls_node *node; struct hlist_nulls_node *node;
struct tcp_iter_state *st = seq->private; struct tcp_iter_state *st = seq->private;
struct net *net = seq_file_net(seq); struct net *net = seq_file_net(seq);
if (st->bpf_seq_afinfo)
afinfo = st->bpf_seq_afinfo;
else
afinfo = PDE_DATA(file_inode(seq->file));
++st->num; ++st->num;
++st->offset; ++st->offset;
sk = sk_nulls_next(sk); sk = sk_nulls_next(sk);
sk_nulls_for_each_from(sk, node) { sk_nulls_for_each_from(sk, node) {
if (sk->sk_family == afinfo->family && if ((afinfo->family == AF_UNSPEC ||
sk->sk_family == afinfo->family) &&
net_eq(sock_net(sk), net)) net_eq(sock_net(sk), net))
return sk; return sk;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment