Commit 1cd62c21 authored by Kuniyuki Iwashima's avatar Kuniyuki Iwashima Committed by Daniel Borkmann

tcp: Add reuseport_migrate_sock() to select a new listener.

reuseport_migrate_sock() does the same check done in
reuseport_listen_stop_sock(). If the reuseport group is capable of
migration, reuseport_migrate_sock() selects a new listener by the child
socket hash and increments the listener's sk_refcnt beforehand. Thus, if we
fail in the migration, we have to decrement it later.

We will support migration by eBPF in the later commits.
Signed-off-by: default avatarKuniyuki Iwashima <kuniyu@amazon.co.jp>
Signed-off-by: default avatarMartin KaFai Lau <kafai@fb.com>
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
Reviewed-by: default avatarEric Dumazet <edumazet@google.com>
Link: https://lore.kernel.org/bpf/20210612123224.12525-5-kuniyu@amazon.co.jp
parent 333bb73f
...@@ -37,6 +37,9 @@ extern struct sock *reuseport_select_sock(struct sock *sk, ...@@ -37,6 +37,9 @@ extern struct sock *reuseport_select_sock(struct sock *sk,
u32 hash, u32 hash,
struct sk_buff *skb, struct sk_buff *skb,
int hdr_len); int hdr_len);
struct sock *reuseport_migrate_sock(struct sock *sk,
struct sock *migrating_sk,
struct sk_buff *skb);
extern int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog); extern int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog);
extern int reuseport_detach_prog(struct sock *sk); extern int reuseport_detach_prog(struct sock *sk);
......
...@@ -44,7 +44,7 @@ static void __reuseport_add_sock(struct sock *sk, ...@@ -44,7 +44,7 @@ static void __reuseport_add_sock(struct sock *sk,
struct sock_reuseport *reuse) struct sock_reuseport *reuse)
{ {
reuse->socks[reuse->num_socks] = sk; reuse->socks[reuse->num_socks] = sk;
/* paired with smp_rmb() in reuseport_select_sock() */ /* paired with smp_rmb() in reuseport_(select|migrate)_sock() */
smp_wmb(); smp_wmb();
reuse->num_socks++; reuse->num_socks++;
} }
...@@ -434,6 +434,23 @@ static struct sock *run_bpf_filter(struct sock_reuseport *reuse, u16 socks, ...@@ -434,6 +434,23 @@ static struct sock *run_bpf_filter(struct sock_reuseport *reuse, u16 socks,
return reuse->socks[index]; return reuse->socks[index];
} }
static struct sock *reuseport_select_sock_by_hash(struct sock_reuseport *reuse,
u32 hash, u16 num_socks)
{
int i, j;
i = j = reciprocal_scale(hash, num_socks);
while (reuse->socks[i]->sk_state == TCP_ESTABLISHED) {
i++;
if (i >= num_socks)
i = 0;
if (i == j)
return NULL;
}
return reuse->socks[i];
}
/** /**
* reuseport_select_sock - Select a socket from an SO_REUSEPORT group. * reuseport_select_sock - Select a socket from an SO_REUSEPORT group.
* @sk: First socket in the group. * @sk: First socket in the group.
...@@ -477,19 +494,8 @@ struct sock *reuseport_select_sock(struct sock *sk, ...@@ -477,19 +494,8 @@ struct sock *reuseport_select_sock(struct sock *sk,
select_by_hash: select_by_hash:
/* no bpf or invalid bpf result: fall back to hash usage */ /* no bpf or invalid bpf result: fall back to hash usage */
if (!sk2) { if (!sk2)
int i, j; sk2 = reuseport_select_sock_by_hash(reuse, hash, socks);
i = j = reciprocal_scale(hash, socks);
while (reuse->socks[i]->sk_state == TCP_ESTABLISHED) {
i++;
if (i >= socks)
i = 0;
if (i == j)
goto out;
}
sk2 = reuse->socks[i];
}
} }
out: out:
...@@ -498,6 +504,50 @@ struct sock *reuseport_select_sock(struct sock *sk, ...@@ -498,6 +504,50 @@ struct sock *reuseport_select_sock(struct sock *sk,
} }
EXPORT_SYMBOL(reuseport_select_sock); EXPORT_SYMBOL(reuseport_select_sock);
/**
* reuseport_migrate_sock - Select a socket from an SO_REUSEPORT group.
* @sk: close()ed or shutdown()ed socket in the group.
* @migrating_sk: ESTABLISHED/SYN_RECV full socket in the accept queue or
* NEW_SYN_RECV request socket during 3WHS.
* @skb: skb to run through BPF filter.
* Returns a socket (with sk_refcnt +1) that should accept the child socket
* (or NULL on error).
*/
struct sock *reuseport_migrate_sock(struct sock *sk,
struct sock *migrating_sk,
struct sk_buff *skb)
{
struct sock_reuseport *reuse;
struct sock *nsk = NULL;
u16 socks;
u32 hash;
rcu_read_lock();
reuse = rcu_dereference(sk->sk_reuseport_cb);
if (!reuse)
goto out;
socks = READ_ONCE(reuse->num_socks);
if (unlikely(!socks))
goto out;
/* paired with smp_wmb() in __reuseport_add_sock() */
smp_rmb();
hash = migrating_sk->sk_hash;
if (sock_net(sk)->ipv4.sysctl_tcp_migrate_req)
nsk = reuseport_select_sock_by_hash(reuse, hash, socks);
if (nsk && unlikely(!refcount_inc_not_zero(&nsk->sk_refcnt)))
nsk = NULL;
out:
rcu_read_unlock();
return nsk;
}
EXPORT_SYMBOL(reuseport_migrate_sock);
int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog) int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog)
{ {
struct sock_reuseport *reuse; struct sock_reuseport *reuse;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment