Commit f71c6143 authored by Joe Stringer's avatar Joe Stringer Committed by Alexei Starovoitov

bpf: Support sk lookup in netns with id 0

David Ahern and Nicolas Dichtel report that the handling of the netns id
0 is incorrect for the BPF socket lookup helpers: rather than finding
the netns with id 0, it is resolving to the current netns. This renders
the netns_id 0 inaccessible.

To fix this, adjust the API for the netns to treat all negative s32
values as a lookup in the current netns (including u64 values which when
truncated to s32 become negative), while any values with a positive
value in the signed 32-bit integer space would result in a lookup for a
socket in the netns corresponding to that id. As before, if the netns
with that ID does not exist, no socket will be found. Any netns outside
of these ranges will fail to find a corresponding socket, as those
values are reserved for future usage.
Signed-off-by: default avatarJoe Stringer <joe@wand.net.nz>
Acked-by: default avatarNicolas Dichtel <nicolas.dichtel@6wind.com>
Acked-by: default avatarJoey Pabalinas <joeypabalinas@gmail.com>
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parent b7df9ada
...@@ -2170,7 +2170,7 @@ union bpf_attr { ...@@ -2170,7 +2170,7 @@ union bpf_attr {
* Return * Return
* 0 on success, or a negative error in case of failure. * 0 on success, or a negative error in case of failure.
* *
* struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags) * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags)
* Description * Description
* Look for TCP socket matching *tuple*, optionally in a child * Look for TCP socket matching *tuple*, optionally in a child
* network namespace *netns*. The return value must be checked, * network namespace *netns*. The return value must be checked,
...@@ -2187,12 +2187,14 @@ union bpf_attr { ...@@ -2187,12 +2187,14 @@ union bpf_attr {
* **sizeof**\ (*tuple*\ **->ipv6**) * **sizeof**\ (*tuple*\ **->ipv6**)
* Look for an IPv6 socket. * Look for an IPv6 socket.
* *
* If the *netns* is zero, then the socket lookup table in the * If the *netns* is a negative signed 32-bit integer, then the
* netns associated with the *ctx* will be used. For the TC hooks, * socket lookup table in the netns associated with the *ctx* will
* this in the netns of the device in the skb. For socket hooks, * will be used. For the TC hooks, this is the netns of the device
* this in the netns of the socket. If *netns* is non-zero, then * in the skb. For socket hooks, this is the netns of the socket.
* it specifies the ID of the netns relative to the netns * If *netns* is any other signed 32-bit value greater than or
* associated with the *ctx*. * equal to zero then it specifies the ID of the netns relative to
* the netns associated with the *ctx*. *netns* values beyond the
* range of 32-bit integers are reserved for future use.
* *
* All values for *flags* are reserved for future usage, and must * All values for *flags* are reserved for future usage, and must
* be left at zero. * be left at zero.
...@@ -2202,7 +2204,7 @@ union bpf_attr { ...@@ -2202,7 +2204,7 @@ union bpf_attr {
* Return * Return
* Pointer to *struct bpf_sock*, or NULL in case of failure. * Pointer to *struct bpf_sock*, or NULL in case of failure.
* *
* struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags) * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags)
* Description * Description
* Look for UDP socket matching *tuple*, optionally in a child * Look for UDP socket matching *tuple*, optionally in a child
* network namespace *netns*. The return value must be checked, * network namespace *netns*. The return value must be checked,
...@@ -2219,12 +2221,14 @@ union bpf_attr { ...@@ -2219,12 +2221,14 @@ union bpf_attr {
* **sizeof**\ (*tuple*\ **->ipv6**) * **sizeof**\ (*tuple*\ **->ipv6**)
* Look for an IPv6 socket. * Look for an IPv6 socket.
* *
* If the *netns* is zero, then the socket lookup table in the * If the *netns* is a negative signed 32-bit integer, then the
* netns associated with the *ctx* will be used. For the TC hooks, * socket lookup table in the netns associated with the *ctx* will
* this in the netns of the device in the skb. For socket hooks, * will be used. For the TC hooks, this is the netns of the device
* this in the netns of the socket. If *netns* is non-zero, then * in the skb. For socket hooks, this is the netns of the socket.
* it specifies the ID of the netns relative to the netns * If *netns* is any other signed 32-bit value greater than or
* associated with the *ctx*. * equal to zero then it specifies the ID of the netns relative to
* the netns associated with the *ctx*. *netns* values beyond the
* range of 32-bit integers are reserved for future use.
* *
* All values for *flags* are reserved for future usage, and must * All values for *flags* are reserved for future usage, and must
* be left at zero. * be left at zero.
...@@ -2405,6 +2409,9 @@ enum bpf_func_id { ...@@ -2405,6 +2409,9 @@ enum bpf_func_id {
/* BPF_FUNC_perf_event_output for sk_buff input context. */ /* BPF_FUNC_perf_event_output for sk_buff input context. */
#define BPF_F_CTXLEN_MASK (0xfffffULL << 32) #define BPF_F_CTXLEN_MASK (0xfffffULL << 32)
/* Current network namespace */
#define BPF_F_CURRENT_NETNS (-1L)
/* Mode for BPF_FUNC_skb_adjust_room helper. */ /* Mode for BPF_FUNC_skb_adjust_room helper. */
enum bpf_adj_room_mode { enum bpf_adj_room_mode {
BPF_ADJ_ROOM_NET, BPF_ADJ_ROOM_NET,
......
...@@ -4890,22 +4890,23 @@ bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, ...@@ -4890,22 +4890,23 @@ bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
struct net *net; struct net *net;
family = len == sizeof(tuple->ipv4) ? AF_INET : AF_INET6; family = len == sizeof(tuple->ipv4) ? AF_INET : AF_INET6;
if (unlikely(family == AF_UNSPEC || netns_id > U32_MAX || flags)) if (unlikely(family == AF_UNSPEC || flags ||
!((s32)netns_id < 0 || netns_id <= S32_MAX)))
goto out; goto out;
if (skb->dev) if (skb->dev)
caller_net = dev_net(skb->dev); caller_net = dev_net(skb->dev);
else else
caller_net = sock_net(skb->sk); caller_net = sock_net(skb->sk);
if (netns_id) { if ((s32)netns_id < 0) {
net = caller_net;
sk = sk_lookup(net, tuple, skb, family, proto);
} else {
net = get_net_ns_by_id(caller_net, netns_id); net = get_net_ns_by_id(caller_net, netns_id);
if (unlikely(!net)) if (unlikely(!net))
goto out; goto out;
sk = sk_lookup(net, tuple, skb, family, proto); sk = sk_lookup(net, tuple, skb, family, proto);
put_net(net); put_net(net);
} else {
net = caller_net;
sk = sk_lookup(net, tuple, skb, family, proto);
} }
if (sk) if (sk)
......
...@@ -2170,7 +2170,7 @@ union bpf_attr { ...@@ -2170,7 +2170,7 @@ union bpf_attr {
* Return * Return
* 0 on success, or a negative error in case of failure. * 0 on success, or a negative error in case of failure.
* *
* struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags) * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags)
* Description * Description
* Look for TCP socket matching *tuple*, optionally in a child * Look for TCP socket matching *tuple*, optionally in a child
* network namespace *netns*. The return value must be checked, * network namespace *netns*. The return value must be checked,
...@@ -2187,12 +2187,14 @@ union bpf_attr { ...@@ -2187,12 +2187,14 @@ union bpf_attr {
* **sizeof**\ (*tuple*\ **->ipv6**) * **sizeof**\ (*tuple*\ **->ipv6**)
* Look for an IPv6 socket. * Look for an IPv6 socket.
* *
* If the *netns* is zero, then the socket lookup table in the * If the *netns* is a negative signed 32-bit integer, then the
* netns associated with the *ctx* will be used. For the TC hooks, * socket lookup table in the netns associated with the *ctx* will
* this in the netns of the device in the skb. For socket hooks, * will be used. For the TC hooks, this is the netns of the device
* this in the netns of the socket. If *netns* is non-zero, then * in the skb. For socket hooks, this is the netns of the socket.
* it specifies the ID of the netns relative to the netns * If *netns* is any other signed 32-bit value greater than or
* associated with the *ctx*. * equal to zero then it specifies the ID of the netns relative to
* the netns associated with the *ctx*. *netns* values beyond the
* range of 32-bit integers are reserved for future use.
* *
* All values for *flags* are reserved for future usage, and must * All values for *flags* are reserved for future usage, and must
* be left at zero. * be left at zero.
...@@ -2201,8 +2203,10 @@ union bpf_attr { ...@@ -2201,8 +2203,10 @@ union bpf_attr {
* **CONFIG_NET** configuration option. * **CONFIG_NET** configuration option.
* Return * Return
* Pointer to *struct bpf_sock*, or NULL in case of failure. * Pointer to *struct bpf_sock*, or NULL in case of failure.
* For sockets with reuseport option, *struct bpf_sock*
* return is from reuse->socks[] using hash of the packet.
* *
* struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags) * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags)
* Description * Description
* Look for UDP socket matching *tuple*, optionally in a child * Look for UDP socket matching *tuple*, optionally in a child
* network namespace *netns*. The return value must be checked, * network namespace *netns*. The return value must be checked,
...@@ -2219,12 +2223,14 @@ union bpf_attr { ...@@ -2219,12 +2223,14 @@ union bpf_attr {
* **sizeof**\ (*tuple*\ **->ipv6**) * **sizeof**\ (*tuple*\ **->ipv6**)
* Look for an IPv6 socket. * Look for an IPv6 socket.
* *
* If the *netns* is zero, then the socket lookup table in the * If the *netns* is a negative signed 32-bit integer, then the
* netns associated with the *ctx* will be used. For the TC hooks, * socket lookup table in the netns associated with the *ctx* will
* this in the netns of the device in the skb. For socket hooks, * will be used. For the TC hooks, this is the netns of the device
* this in the netns of the socket. If *netns* is non-zero, then * in the skb. For socket hooks, this is the netns of the socket.
* it specifies the ID of the netns relative to the netns * If *netns* is any other signed 32-bit value greater than or
* associated with the *ctx*. * equal to zero then it specifies the ID of the netns relative to
* the netns associated with the *ctx*. *netns* values beyond the
* range of 32-bit integers are reserved for future use.
* *
* All values for *flags* are reserved for future usage, and must * All values for *flags* are reserved for future usage, and must
* be left at zero. * be left at zero.
...@@ -2233,6 +2239,8 @@ union bpf_attr { ...@@ -2233,6 +2239,8 @@ union bpf_attr {
* **CONFIG_NET** configuration option. * **CONFIG_NET** configuration option.
* Return * Return
* Pointer to *struct bpf_sock*, or NULL in case of failure. * Pointer to *struct bpf_sock*, or NULL in case of failure.
* For sockets with reuseport option, *struct bpf_sock*
* return is from reuse->socks[] using hash of the packet.
* *
* int bpf_sk_release(struct bpf_sock *sk) * int bpf_sk_release(struct bpf_sock *sk)
* Description * Description
...@@ -2405,6 +2413,9 @@ enum bpf_func_id { ...@@ -2405,6 +2413,9 @@ enum bpf_func_id {
/* BPF_FUNC_perf_event_output for sk_buff input context. */ /* BPF_FUNC_perf_event_output for sk_buff input context. */
#define BPF_F_CTXLEN_MASK (0xfffffULL << 32) #define BPF_F_CTXLEN_MASK (0xfffffULL << 32)
/* Current network namespace */
#define BPF_F_CURRENT_NETNS (-1L)
/* Mode for BPF_FUNC_skb_adjust_room helper. */ /* Mode for BPF_FUNC_skb_adjust_room helper. */
enum bpf_adj_room_mode { enum bpf_adj_room_mode {
BPF_ADJ_ROOM_NET, BPF_ADJ_ROOM_NET,
......
...@@ -154,12 +154,12 @@ static unsigned long long (*bpf_skb_ancestor_cgroup_id)(void *ctx, int level) = ...@@ -154,12 +154,12 @@ static unsigned long long (*bpf_skb_ancestor_cgroup_id)(void *ctx, int level) =
(void *) BPF_FUNC_skb_ancestor_cgroup_id; (void *) BPF_FUNC_skb_ancestor_cgroup_id;
static struct bpf_sock *(*bpf_sk_lookup_tcp)(void *ctx, static struct bpf_sock *(*bpf_sk_lookup_tcp)(void *ctx,
struct bpf_sock_tuple *tuple, struct bpf_sock_tuple *tuple,
int size, unsigned int netns_id, int size, unsigned long long netns_id,
unsigned long long flags) = unsigned long long flags) =
(void *) BPF_FUNC_sk_lookup_tcp; (void *) BPF_FUNC_sk_lookup_tcp;
static struct bpf_sock *(*bpf_sk_lookup_udp)(void *ctx, static struct bpf_sock *(*bpf_sk_lookup_udp)(void *ctx,
struct bpf_sock_tuple *tuple, struct bpf_sock_tuple *tuple,
int size, unsigned int netns_id, int size, unsigned long long netns_id,
unsigned long long flags) = unsigned long long flags) =
(void *) BPF_FUNC_sk_lookup_udp; (void *) BPF_FUNC_sk_lookup_udp;
static int (*bpf_sk_release)(struct bpf_sock *sk) = static int (*bpf_sk_release)(struct bpf_sock *sk) =
......
...@@ -72,7 +72,7 @@ int bpf_sk_lookup_test0(struct __sk_buff *skb) ...@@ -72,7 +72,7 @@ int bpf_sk_lookup_test0(struct __sk_buff *skb)
return TC_ACT_SHOT; return TC_ACT_SHOT;
tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6); tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6);
sk = bpf_sk_lookup_tcp(skb, tuple, tuple_len, 0, 0); sk = bpf_sk_lookup_tcp(skb, tuple, tuple_len, BPF_F_CURRENT_NETNS, 0);
if (sk) if (sk)
bpf_sk_release(sk); bpf_sk_release(sk);
return sk ? TC_ACT_OK : TC_ACT_UNSPEC; return sk ? TC_ACT_OK : TC_ACT_UNSPEC;
...@@ -84,7 +84,7 @@ int bpf_sk_lookup_test1(struct __sk_buff *skb) ...@@ -84,7 +84,7 @@ int bpf_sk_lookup_test1(struct __sk_buff *skb)
struct bpf_sock_tuple tuple = {}; struct bpf_sock_tuple tuple = {};
struct bpf_sock *sk; struct bpf_sock *sk;
sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
if (sk) if (sk)
bpf_sk_release(sk); bpf_sk_release(sk);
return 0; return 0;
...@@ -97,7 +97,7 @@ int bpf_sk_lookup_uaf(struct __sk_buff *skb) ...@@ -97,7 +97,7 @@ int bpf_sk_lookup_uaf(struct __sk_buff *skb)
struct bpf_sock *sk; struct bpf_sock *sk;
__u32 family = 0; __u32 family = 0;
sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
if (sk) { if (sk) {
bpf_sk_release(sk); bpf_sk_release(sk);
family = sk->family; family = sk->family;
...@@ -112,7 +112,7 @@ int bpf_sk_lookup_modptr(struct __sk_buff *skb) ...@@ -112,7 +112,7 @@ int bpf_sk_lookup_modptr(struct __sk_buff *skb)
struct bpf_sock *sk; struct bpf_sock *sk;
__u32 family; __u32 family;
sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
if (sk) { if (sk) {
sk += 1; sk += 1;
bpf_sk_release(sk); bpf_sk_release(sk);
...@@ -127,7 +127,7 @@ int bpf_sk_lookup_modptr_or_null(struct __sk_buff *skb) ...@@ -127,7 +127,7 @@ int bpf_sk_lookup_modptr_or_null(struct __sk_buff *skb)
struct bpf_sock *sk; struct bpf_sock *sk;
__u32 family; __u32 family;
sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
sk += 1; sk += 1;
if (sk) if (sk)
bpf_sk_release(sk); bpf_sk_release(sk);
...@@ -139,7 +139,7 @@ int bpf_sk_lookup_test2(struct __sk_buff *skb) ...@@ -139,7 +139,7 @@ int bpf_sk_lookup_test2(struct __sk_buff *skb)
{ {
struct bpf_sock_tuple tuple = {}; struct bpf_sock_tuple tuple = {};
bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
return 0; return 0;
} }
...@@ -149,7 +149,7 @@ int bpf_sk_lookup_test3(struct __sk_buff *skb) ...@@ -149,7 +149,7 @@ int bpf_sk_lookup_test3(struct __sk_buff *skb)
struct bpf_sock_tuple tuple = {}; struct bpf_sock_tuple tuple = {};
struct bpf_sock *sk; struct bpf_sock *sk;
sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
bpf_sk_release(sk); bpf_sk_release(sk);
bpf_sk_release(sk); bpf_sk_release(sk);
return 0; return 0;
...@@ -161,7 +161,7 @@ int bpf_sk_lookup_test4(struct __sk_buff *skb) ...@@ -161,7 +161,7 @@ int bpf_sk_lookup_test4(struct __sk_buff *skb)
struct bpf_sock_tuple tuple = {}; struct bpf_sock_tuple tuple = {};
struct bpf_sock *sk; struct bpf_sock *sk;
sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
bpf_sk_release(sk); bpf_sk_release(sk);
return 0; return 0;
} }
...@@ -169,7 +169,7 @@ int bpf_sk_lookup_test4(struct __sk_buff *skb) ...@@ -169,7 +169,7 @@ int bpf_sk_lookup_test4(struct __sk_buff *skb)
void lookup_no_release(struct __sk_buff *skb) void lookup_no_release(struct __sk_buff *skb)
{ {
struct bpf_sock_tuple tuple = {}; struct bpf_sock_tuple tuple = {};
bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
} }
SEC("fail_no_release_subcall") SEC("fail_no_release_subcall")
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment