Commit b3f8adee authored by Daniel Borkmann's avatar Daniel Borkmann

Merge branch 'bpf-sockmap-fixes'

John Fastabend says:

====================
While testing sockmap with more programs (besides our test programs)
I found a couple issues.

The attached series fixes an issue where pinned maps were not
working correctly, blocking sockets returned zero, and an error
path that when the sock hit an out of memory case resulted in a
double page_put() while doing ingress redirects.

See individual patches for more details.

v2: Incorporated Daniel's feedback to use map ops for uref put op
    which also fixed the build error discovered in v1.
v3: rename map_put_uref to map_release_uref
====================
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
parents 4dfe1bb9 4fcfdfb8
...@@ -31,6 +31,7 @@ struct bpf_map_ops { ...@@ -31,6 +31,7 @@ struct bpf_map_ops {
void (*map_release)(struct bpf_map *map, struct file *map_file); void (*map_release)(struct bpf_map *map, struct file *map_file);
void (*map_free)(struct bpf_map *map); void (*map_free)(struct bpf_map *map);
int (*map_get_next_key)(struct bpf_map *map, void *key, void *next_key); int (*map_get_next_key)(struct bpf_map *map, void *key, void *next_key);
void (*map_release_uref)(struct bpf_map *map);
/* funcs callable from userspace and from eBPF programs */ /* funcs callable from userspace and from eBPF programs */
void *(*map_lookup_elem)(struct bpf_map *map, void *key); void *(*map_lookup_elem)(struct bpf_map *map, void *key);
...@@ -436,7 +437,6 @@ int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value); ...@@ -436,7 +437,6 @@ int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value);
int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file, int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
void *key, void *value, u64 map_flags); void *key, void *value, u64 map_flags);
int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value); int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value);
void bpf_fd_array_map_clear(struct bpf_map *map);
int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file, int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file,
void *key, void *value, u64 map_flags); void *key, void *value, u64 map_flags);
int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value); int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value);
......
...@@ -476,7 +476,7 @@ static u32 prog_fd_array_sys_lookup_elem(void *ptr) ...@@ -476,7 +476,7 @@ static u32 prog_fd_array_sys_lookup_elem(void *ptr)
} }
/* decrement refcnt of all bpf_progs that are stored in this map */ /* decrement refcnt of all bpf_progs that are stored in this map */
void bpf_fd_array_map_clear(struct bpf_map *map) static void bpf_fd_array_map_clear(struct bpf_map *map)
{ {
struct bpf_array *array = container_of(map, struct bpf_array, map); struct bpf_array *array = container_of(map, struct bpf_array, map);
int i; int i;
...@@ -495,6 +495,7 @@ const struct bpf_map_ops prog_array_map_ops = { ...@@ -495,6 +495,7 @@ const struct bpf_map_ops prog_array_map_ops = {
.map_fd_get_ptr = prog_fd_array_get_ptr, .map_fd_get_ptr = prog_fd_array_get_ptr,
.map_fd_put_ptr = prog_fd_array_put_ptr, .map_fd_put_ptr = prog_fd_array_put_ptr,
.map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem, .map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem,
.map_release_uref = bpf_fd_array_map_clear,
}; };
static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file, static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file,
......
...@@ -43,6 +43,7 @@ ...@@ -43,6 +43,7 @@
#include <net/tcp.h> #include <net/tcp.h>
#include <linux/ptr_ring.h> #include <linux/ptr_ring.h>
#include <net/inet_common.h> #include <net/inet_common.h>
#include <linux/sched/signal.h>
#define SOCK_CREATE_FLAG_MASK \ #define SOCK_CREATE_FLAG_MASK \
(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY) (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
...@@ -523,8 +524,6 @@ static int bpf_tcp_ingress(struct sock *sk, int apply_bytes, ...@@ -523,8 +524,6 @@ static int bpf_tcp_ingress(struct sock *sk, int apply_bytes,
i = md->sg_start; i = md->sg_start;
do { do {
r->sg_data[i] = md->sg_data[i];
size = (apply && apply_bytes < md->sg_data[i].length) ? size = (apply && apply_bytes < md->sg_data[i].length) ?
apply_bytes : md->sg_data[i].length; apply_bytes : md->sg_data[i].length;
...@@ -535,6 +534,7 @@ static int bpf_tcp_ingress(struct sock *sk, int apply_bytes, ...@@ -535,6 +534,7 @@ static int bpf_tcp_ingress(struct sock *sk, int apply_bytes,
} }
sk_mem_charge(sk, size); sk_mem_charge(sk, size);
r->sg_data[i] = md->sg_data[i];
r->sg_data[i].length = size; r->sg_data[i].length = size;
md->sg_data[i].length -= size; md->sg_data[i].length -= size;
md->sg_data[i].offset += size; md->sg_data[i].offset += size;
...@@ -732,6 +732,26 @@ static int bpf_exec_tx_verdict(struct smap_psock *psock, ...@@ -732,6 +732,26 @@ static int bpf_exec_tx_verdict(struct smap_psock *psock,
return err; return err;
} }
static int bpf_wait_data(struct sock *sk,
struct smap_psock *psk, int flags,
long timeo, int *err)
{
int rc;
DEFINE_WAIT_FUNC(wait, woken_wake_function);
add_wait_queue(sk_sleep(sk), &wait);
sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
rc = sk_wait_event(sk, &timeo,
!list_empty(&psk->ingress) ||
!skb_queue_empty(&sk->sk_receive_queue),
&wait);
sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
remove_wait_queue(sk_sleep(sk), &wait);
return rc;
}
static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
int nonblock, int flags, int *addr_len) int nonblock, int flags, int *addr_len)
{ {
...@@ -755,6 +775,7 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, ...@@ -755,6 +775,7 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len); return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
lock_sock(sk); lock_sock(sk);
bytes_ready:
while (copied != len) { while (copied != len) {
struct scatterlist *sg; struct scatterlist *sg;
struct sk_msg_buff *md; struct sk_msg_buff *md;
...@@ -809,6 +830,28 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, ...@@ -809,6 +830,28 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
} }
} }
if (!copied) {
long timeo;
int data;
int err = 0;
timeo = sock_rcvtimeo(sk, nonblock);
data = bpf_wait_data(sk, psock, flags, timeo, &err);
if (data) {
if (!skb_queue_empty(&sk->sk_receive_queue)) {
release_sock(sk);
smap_release_sock(psock, sk);
copied = tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
return copied;
}
goto bytes_ready;
}
if (err)
copied = err;
}
release_sock(sk); release_sock(sk);
smap_release_sock(psock, sk); smap_release_sock(psock, sk);
return copied; return copied;
...@@ -1831,7 +1874,7 @@ static int sock_map_update_elem(struct bpf_map *map, ...@@ -1831,7 +1874,7 @@ static int sock_map_update_elem(struct bpf_map *map,
return err; return err;
} }
static void sock_map_release(struct bpf_map *map, struct file *map_file) static void sock_map_release(struct bpf_map *map)
{ {
struct bpf_stab *stab = container_of(map, struct bpf_stab, map); struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
struct bpf_prog *orig; struct bpf_prog *orig;
...@@ -1855,7 +1898,7 @@ const struct bpf_map_ops sock_map_ops = { ...@@ -1855,7 +1898,7 @@ const struct bpf_map_ops sock_map_ops = {
.map_get_next_key = sock_map_get_next_key, .map_get_next_key = sock_map_get_next_key,
.map_update_elem = sock_map_update_elem, .map_update_elem = sock_map_update_elem,
.map_delete_elem = sock_map_delete_elem, .map_delete_elem = sock_map_delete_elem,
.map_release = sock_map_release, .map_release_uref = sock_map_release,
}; };
BPF_CALL_4(bpf_sock_map_update, struct bpf_sock_ops_kern *, bpf_sock, BPF_CALL_4(bpf_sock_map_update, struct bpf_sock_ops_kern *, bpf_sock,
......
...@@ -257,8 +257,8 @@ static void bpf_map_free_deferred(struct work_struct *work) ...@@ -257,8 +257,8 @@ static void bpf_map_free_deferred(struct work_struct *work)
static void bpf_map_put_uref(struct bpf_map *map) static void bpf_map_put_uref(struct bpf_map *map)
{ {
if (atomic_dec_and_test(&map->usercnt)) { if (atomic_dec_and_test(&map->usercnt)) {
if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) if (map->ops->map_release_uref)
bpf_fd_array_map_clear(map); map->ops->map_release_uref(map);
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment