Commit ec1c8fa0 authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'af_xdp-fixes'

Björn Töpel says:

====================
William found two bugs, when doing socket teardown within the same
process.

The first issue was an invalid munmap call, and the second one was an
invalid XSKMAP cleanup. Both resulted in that the process kept
references to the socket, which was not correctly cleaned up. When a
new socket was created, the bind() call would fail, since the old
socket was still lingering, refusing to give up the queue on the
netdev.

More details can be found in the individual commits.

Thanks,
Björn
====================
Reviewed-by: default avatarJonathan Lemon <jonathan.lemon@gmail.com>
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 6cea3370 5750902a
...@@ -248,8 +248,7 @@ int xsk_umem__create(struct xsk_umem **umem_ptr, void *umem_area, __u64 size, ...@@ -248,8 +248,7 @@ int xsk_umem__create(struct xsk_umem **umem_ptr, void *umem_area, __u64 size,
return 0; return 0;
out_mmap: out_mmap:
munmap(umem->fill, munmap(map, off.fr.desc + umem->config.fill_size * sizeof(__u64));
off.fr.desc + umem->config.fill_size * sizeof(__u64));
out_socket: out_socket:
close(umem->fd); close(umem->fd);
out_umem_alloc: out_umem_alloc:
...@@ -388,21 +387,17 @@ static void xsk_delete_bpf_maps(struct xsk_socket *xsk) ...@@ -388,21 +387,17 @@ static void xsk_delete_bpf_maps(struct xsk_socket *xsk)
{ {
close(xsk->qidconf_map_fd); close(xsk->qidconf_map_fd);
close(xsk->xsks_map_fd); close(xsk->xsks_map_fd);
xsk->qidconf_map_fd = -1;
xsk->xsks_map_fd = -1;
} }
static int xsk_update_bpf_maps(struct xsk_socket *xsk, int qidconf_value, static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
int xsks_value)
{ {
bool qidconf_map_updated = false, xsks_map_updated = false; __u32 i, *map_ids, num_maps, prog_len = sizeof(struct bpf_prog_info);
__u32 map_len = sizeof(struct bpf_map_info);
struct bpf_prog_info prog_info = {}; struct bpf_prog_info prog_info = {};
__u32 prog_len = sizeof(prog_info);
struct bpf_map_info map_info; struct bpf_map_info map_info;
__u32 map_len = sizeof(map_info); int fd, err;
__u32 *map_ids;
int reset_value = 0;
__u32 num_maps;
unsigned int i;
int err;
err = bpf_obj_get_info_by_fd(xsk->prog_fd, &prog_info, &prog_len); err = bpf_obj_get_info_by_fd(xsk->prog_fd, &prog_info, &prog_len);
if (err) if (err)
...@@ -423,66 +418,71 @@ static int xsk_update_bpf_maps(struct xsk_socket *xsk, int qidconf_value, ...@@ -423,66 +418,71 @@ static int xsk_update_bpf_maps(struct xsk_socket *xsk, int qidconf_value,
goto out_map_ids; goto out_map_ids;
for (i = 0; i < prog_info.nr_map_ids; i++) { for (i = 0; i < prog_info.nr_map_ids; i++) {
int fd; if (xsk->qidconf_map_fd != -1 && xsk->xsks_map_fd != -1)
break;
fd = bpf_map_get_fd_by_id(map_ids[i]); fd = bpf_map_get_fd_by_id(map_ids[i]);
if (fd < 0) { if (fd < 0)
err = -errno; continue;
goto out_maps;
}
err = bpf_obj_get_info_by_fd(fd, &map_info, &map_len); err = bpf_obj_get_info_by_fd(fd, &map_info, &map_len);
if (err) if (err) {
goto out_maps; close(fd);
continue;
}
if (!strcmp(map_info.name, "qidconf_map")) { if (!strcmp(map_info.name, "qidconf_map")) {
err = bpf_map_update_elem(fd, &xsk->queue_id,
&qidconf_value, 0);
if (err)
goto out_maps;
qidconf_map_updated = true;
xsk->qidconf_map_fd = fd; xsk->qidconf_map_fd = fd;
} else if (!strcmp(map_info.name, "xsks_map")) { continue;
err = bpf_map_update_elem(fd, &xsk->queue_id, }
&xsks_value, 0);
if (err) if (!strcmp(map_info.name, "xsks_map")) {
goto out_maps;
xsks_map_updated = true;
xsk->xsks_map_fd = fd; xsk->xsks_map_fd = fd;
continue;
} }
if (qidconf_map_updated && xsks_map_updated) close(fd);
break;
} }
if (!(qidconf_map_updated && xsks_map_updated)) { err = 0;
if (xsk->qidconf_map_fd < 0 || xsk->xsks_map_fd < 0) {
err = -ENOENT; err = -ENOENT;
goto out_maps; xsk_delete_bpf_maps(xsk);
} }
err = 0;
goto out_success;
out_maps:
if (qidconf_map_updated)
(void)bpf_map_update_elem(xsk->qidconf_map_fd, &xsk->queue_id,
&reset_value, 0);
if (xsks_map_updated)
(void)bpf_map_update_elem(xsk->xsks_map_fd, &xsk->queue_id,
&reset_value, 0);
out_success:
if (qidconf_map_updated)
close(xsk->qidconf_map_fd);
if (xsks_map_updated)
close(xsk->xsks_map_fd);
out_map_ids: out_map_ids:
free(map_ids); free(map_ids);
return err; return err;
} }
static void xsk_clear_bpf_maps(struct xsk_socket *xsk)
{
int qid = false;
(void)bpf_map_update_elem(xsk->qidconf_map_fd, &xsk->queue_id, &qid, 0);
(void)bpf_map_delete_elem(xsk->xsks_map_fd, &xsk->queue_id);
}
static int xsk_set_bpf_maps(struct xsk_socket *xsk)
{
int qid = true, fd = xsk->fd, err;
err = bpf_map_update_elem(xsk->qidconf_map_fd, &xsk->queue_id, &qid, 0);
if (err)
goto out;
err = bpf_map_update_elem(xsk->xsks_map_fd, &xsk->queue_id, &fd, 0);
if (err)
goto out;
return 0;
out:
xsk_clear_bpf_maps(xsk);
return err;
}
static int xsk_setup_xdp_prog(struct xsk_socket *xsk) static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
{ {
bool prog_attached = false;
__u32 prog_id = 0; __u32 prog_id = 0;
int err; int err;
...@@ -492,7 +492,6 @@ static int xsk_setup_xdp_prog(struct xsk_socket *xsk) ...@@ -492,7 +492,6 @@ static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
return err; return err;
if (!prog_id) { if (!prog_id) {
prog_attached = true;
err = xsk_create_bpf_maps(xsk); err = xsk_create_bpf_maps(xsk);
if (err) if (err)
return err; return err;
...@@ -502,20 +501,21 @@ static int xsk_setup_xdp_prog(struct xsk_socket *xsk) ...@@ -502,20 +501,21 @@ static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
goto out_maps; goto out_maps;
} else { } else {
xsk->prog_fd = bpf_prog_get_fd_by_id(prog_id); xsk->prog_fd = bpf_prog_get_fd_by_id(prog_id);
err = xsk_lookup_bpf_maps(xsk);
if (err)
goto out_load;
} }
err = xsk_update_bpf_maps(xsk, true, xsk->fd); err = xsk_set_bpf_maps(xsk);
if (err) if (err)
goto out_load; goto out_load;
return 0; return 0;
out_load: out_load:
if (prog_attached) close(xsk->prog_fd);
close(xsk->prog_fd);
out_maps: out_maps:
if (prog_attached) xsk_delete_bpf_maps(xsk);
xsk_delete_bpf_maps(xsk);
return err; return err;
} }
...@@ -524,11 +524,11 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, ...@@ -524,11 +524,11 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
struct xsk_ring_cons *rx, struct xsk_ring_prod *tx, struct xsk_ring_cons *rx, struct xsk_ring_prod *tx,
const struct xsk_socket_config *usr_config) const struct xsk_socket_config *usr_config)
{ {
void *rx_map = NULL, *tx_map = NULL;
struct sockaddr_xdp sxdp = {}; struct sockaddr_xdp sxdp = {};
struct xdp_mmap_offsets off; struct xdp_mmap_offsets off;
struct xsk_socket *xsk; struct xsk_socket *xsk;
socklen_t optlen; socklen_t optlen;
void *map;
int err; int err;
if (!umem || !xsk_ptr || !rx || !tx) if (!umem || !xsk_ptr || !rx || !tx)
...@@ -594,40 +594,40 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, ...@@ -594,40 +594,40 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
} }
if (rx) { if (rx) {
map = xsk_mmap(NULL, off.rx.desc + rx_map = xsk_mmap(NULL, off.rx.desc +
xsk->config.rx_size * sizeof(struct xdp_desc), xsk->config.rx_size * sizeof(struct xdp_desc),
PROT_READ | PROT_WRITE, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_POPULATE, MAP_SHARED | MAP_POPULATE,
xsk->fd, XDP_PGOFF_RX_RING); xsk->fd, XDP_PGOFF_RX_RING);
if (map == MAP_FAILED) { if (rx_map == MAP_FAILED) {
err = -errno; err = -errno;
goto out_socket; goto out_socket;
} }
rx->mask = xsk->config.rx_size - 1; rx->mask = xsk->config.rx_size - 1;
rx->size = xsk->config.rx_size; rx->size = xsk->config.rx_size;
rx->producer = map + off.rx.producer; rx->producer = rx_map + off.rx.producer;
rx->consumer = map + off.rx.consumer; rx->consumer = rx_map + off.rx.consumer;
rx->ring = map + off.rx.desc; rx->ring = rx_map + off.rx.desc;
} }
xsk->rx = rx; xsk->rx = rx;
if (tx) { if (tx) {
map = xsk_mmap(NULL, off.tx.desc + tx_map = xsk_mmap(NULL, off.tx.desc +
xsk->config.tx_size * sizeof(struct xdp_desc), xsk->config.tx_size * sizeof(struct xdp_desc),
PROT_READ | PROT_WRITE, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_POPULATE, MAP_SHARED | MAP_POPULATE,
xsk->fd, XDP_PGOFF_TX_RING); xsk->fd, XDP_PGOFF_TX_RING);
if (map == MAP_FAILED) { if (tx_map == MAP_FAILED) {
err = -errno; err = -errno;
goto out_mmap_rx; goto out_mmap_rx;
} }
tx->mask = xsk->config.tx_size - 1; tx->mask = xsk->config.tx_size - 1;
tx->size = xsk->config.tx_size; tx->size = xsk->config.tx_size;
tx->producer = map + off.tx.producer; tx->producer = tx_map + off.tx.producer;
tx->consumer = map + off.tx.consumer; tx->consumer = tx_map + off.tx.consumer;
tx->ring = map + off.tx.desc; tx->ring = tx_map + off.tx.desc;
tx->cached_cons = xsk->config.tx_size; tx->cached_cons = xsk->config.tx_size;
} }
xsk->tx = tx; xsk->tx = tx;
...@@ -643,6 +643,9 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, ...@@ -643,6 +643,9 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
goto out_mmap_tx; goto out_mmap_tx;
} }
xsk->qidconf_map_fd = -1;
xsk->xsks_map_fd = -1;
if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) { if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) {
err = xsk_setup_xdp_prog(xsk); err = xsk_setup_xdp_prog(xsk);
if (err) if (err)
...@@ -654,13 +657,11 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, ...@@ -654,13 +657,11 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
out_mmap_tx: out_mmap_tx:
if (tx) if (tx)
munmap(xsk->tx, munmap(tx_map, off.tx.desc +
off.tx.desc +
xsk->config.tx_size * sizeof(struct xdp_desc)); xsk->config.tx_size * sizeof(struct xdp_desc));
out_mmap_rx: out_mmap_rx:
if (rx) if (rx)
munmap(xsk->rx, munmap(rx_map, off.rx.desc +
off.rx.desc +
xsk->config.rx_size * sizeof(struct xdp_desc)); xsk->config.rx_size * sizeof(struct xdp_desc));
out_socket: out_socket:
if (--umem->refcount) if (--umem->refcount)
...@@ -685,10 +686,12 @@ int xsk_umem__delete(struct xsk_umem *umem) ...@@ -685,10 +686,12 @@ int xsk_umem__delete(struct xsk_umem *umem)
optlen = sizeof(off); optlen = sizeof(off);
err = getsockopt(umem->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen); err = getsockopt(umem->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
if (!err) { if (!err) {
munmap(umem->fill->ring, (void)munmap(umem->fill->ring - off.fr.desc,
off.fr.desc + umem->config.fill_size * sizeof(__u64)); off.fr.desc +
munmap(umem->comp->ring, umem->config.fill_size * sizeof(__u64));
off.cr.desc + umem->config.comp_size * sizeof(__u64)); (void)munmap(umem->comp->ring - off.cr.desc,
off.cr.desc +
umem->config.comp_size * sizeof(__u64));
} }
close(umem->fd); close(umem->fd);
...@@ -699,6 +702,7 @@ int xsk_umem__delete(struct xsk_umem *umem) ...@@ -699,6 +702,7 @@ int xsk_umem__delete(struct xsk_umem *umem)
void xsk_socket__delete(struct xsk_socket *xsk) void xsk_socket__delete(struct xsk_socket *xsk)
{ {
size_t desc_sz = sizeof(struct xdp_desc);
struct xdp_mmap_offsets off; struct xdp_mmap_offsets off;
socklen_t optlen; socklen_t optlen;
int err; int err;
...@@ -706,19 +710,23 @@ void xsk_socket__delete(struct xsk_socket *xsk) ...@@ -706,19 +710,23 @@ void xsk_socket__delete(struct xsk_socket *xsk)
if (!xsk) if (!xsk)
return; return;
(void)xsk_update_bpf_maps(xsk, 0, 0); xsk_clear_bpf_maps(xsk);
xsk_delete_bpf_maps(xsk);
optlen = sizeof(off); optlen = sizeof(off);
err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen); err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
if (!err) { if (!err) {
if (xsk->rx) if (xsk->rx) {
munmap(xsk->rx->ring, (void)munmap(xsk->rx->ring - off.rx.desc,
off.rx.desc + off.rx.desc +
xsk->config.rx_size * sizeof(struct xdp_desc)); xsk->config.rx_size * desc_sz);
if (xsk->tx) }
munmap(xsk->tx->ring, if (xsk->tx) {
off.tx.desc + (void)munmap(xsk->tx->ring - off.tx.desc,
xsk->config.tx_size * sizeof(struct xdp_desc)); off.tx.desc +
xsk->config.tx_size * desc_sz);
}
} }
xsk->umem->refcount--; xsk->umem->refcount--;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment