Commit 7f93d129 authored by David S. Miller's avatar David S. Miller

Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf

Alexei Starovoitov says:

====================
pull-request: bpf 2018-07-07

The following pull-request contains BPF updates for your *net* tree.

Plenty of fixes for different components:

1) A set of critical fixes for sockmap and sockhash, from John Fastabend.

2) fixes for several race conditions in af_xdp, from Magnus Karlsson.

3) hash map refcnt fix, from Mauricio Vasquez.

4) samples/bpf fixes, from Taeung Song.

5) ifup+mtu check for xdp_redirect, from Toshiaki Makita.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents f6f2a4a2 d8d7218a
...@@ -765,8 +765,8 @@ static inline bool bpf_dump_raw_ok(void) ...@@ -765,8 +765,8 @@ static inline bool bpf_dump_raw_ok(void)
struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
const struct bpf_insn *patch, u32 len); const struct bpf_insn *patch, u32 len);
static inline int __xdp_generic_ok_fwd_dev(struct sk_buff *skb, static inline int xdp_ok_fwd_dev(const struct net_device *fwd,
struct net_device *fwd) unsigned int pktlen)
{ {
unsigned int len; unsigned int len;
...@@ -774,7 +774,7 @@ static inline int __xdp_generic_ok_fwd_dev(struct sk_buff *skb, ...@@ -774,7 +774,7 @@ static inline int __xdp_generic_ok_fwd_dev(struct sk_buff *skb,
return -ENETDOWN; return -ENETDOWN;
len = fwd->mtu + fwd->hard_header_len + VLAN_HLEN; len = fwd->mtu + fwd->hard_header_len + VLAN_HLEN;
if (skb->len > len) if (pktlen > len)
return -EMSGSIZE; return -EMSGSIZE;
return 0; return 0;
......
...@@ -828,6 +828,10 @@ struct tcp_skb_cb { ...@@ -828,6 +828,10 @@ struct tcp_skb_cb {
#define TCP_SKB_CB(__skb) ((struct tcp_skb_cb *)&((__skb)->cb[0])) #define TCP_SKB_CB(__skb) ((struct tcp_skb_cb *)&((__skb)->cb[0]))
static inline void bpf_compute_data_end_sk_skb(struct sk_buff *skb)
{
TCP_SKB_CB(skb)->bpf.data_end = skb->data + skb_headlen(skb);
}
#if IS_ENABLED(CONFIG_IPV6) #if IS_ENABLED(CONFIG_IPV6)
/* This is the variant of inet6_iif() that must be used by TCP, /* This is the variant of inet6_iif() that must be used by TCP,
......
...@@ -60,6 +60,10 @@ struct xdp_sock { ...@@ -60,6 +60,10 @@ struct xdp_sock {
bool zc; bool zc;
/* Protects multiple processes in the control path */ /* Protects multiple processes in the control path */
struct mutex mutex; struct mutex mutex;
/* Mutual exclusion of NAPI TX thread and sendmsg error paths
* in the SKB destructor callback.
*/
spinlock_t tx_completion_lock;
u64 rx_dropped; u64 rx_dropped;
}; };
......
...@@ -334,10 +334,15 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, ...@@ -334,10 +334,15 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
{ {
struct net_device *dev = dst->dev; struct net_device *dev = dst->dev;
struct xdp_frame *xdpf; struct xdp_frame *xdpf;
int err;
if (!dev->netdev_ops->ndo_xdp_xmit) if (!dev->netdev_ops->ndo_xdp_xmit)
return -EOPNOTSUPP; return -EOPNOTSUPP;
err = xdp_ok_fwd_dev(dev, xdp->data_end - xdp->data);
if (unlikely(err))
return err;
xdpf = convert_to_xdp_frame(xdp); xdpf = convert_to_xdp_frame(xdp);
if (unlikely(!xdpf)) if (unlikely(!xdpf))
return -EOVERFLOW; return -EOVERFLOW;
...@@ -350,7 +355,7 @@ int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, ...@@ -350,7 +355,7 @@ int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
{ {
int err; int err;
err = __xdp_generic_ok_fwd_dev(skb, dst->dev); err = xdp_ok_fwd_dev(dst->dev, skb->len);
if (unlikely(err)) if (unlikely(err))
return err; return err;
skb->dev = dst->dev; skb->dev = dst->dev;
......
...@@ -747,13 +747,15 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, ...@@ -747,13 +747,15 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
* old element will be freed immediately. * old element will be freed immediately.
* Otherwise return an error * Otherwise return an error
*/ */
atomic_dec(&htab->count); l_new = ERR_PTR(-E2BIG);
return ERR_PTR(-E2BIG); goto dec_count;
} }
l_new = kmalloc_node(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN, l_new = kmalloc_node(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN,
htab->map.numa_node); htab->map.numa_node);
if (!l_new) if (!l_new) {
return ERR_PTR(-ENOMEM); l_new = ERR_PTR(-ENOMEM);
goto dec_count;
}
} }
memcpy(l_new->key, key, key_size); memcpy(l_new->key, key, key_size);
...@@ -766,7 +768,8 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, ...@@ -766,7 +768,8 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
GFP_ATOMIC | __GFP_NOWARN); GFP_ATOMIC | __GFP_NOWARN);
if (!pptr) { if (!pptr) {
kfree(l_new); kfree(l_new);
return ERR_PTR(-ENOMEM); l_new = ERR_PTR(-ENOMEM);
goto dec_count;
} }
} }
...@@ -780,6 +783,9 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, ...@@ -780,6 +783,9 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
l_new->hash = hash; l_new->hash = hash;
return l_new; return l_new;
dec_count:
atomic_dec(&htab->count);
return l_new;
} }
static int check_flags(struct bpf_htab *htab, struct htab_elem *l_old, static int check_flags(struct bpf_htab *htab, struct htab_elem *l_old,
......
...@@ -312,10 +312,12 @@ static void bpf_tcp_close(struct sock *sk, long timeout) ...@@ -312,10 +312,12 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
struct smap_psock *psock; struct smap_psock *psock;
struct sock *osk; struct sock *osk;
lock_sock(sk);
rcu_read_lock(); rcu_read_lock();
psock = smap_psock_sk(sk); psock = smap_psock_sk(sk);
if (unlikely(!psock)) { if (unlikely(!psock)) {
rcu_read_unlock(); rcu_read_unlock();
release_sock(sk);
return sk->sk_prot->close(sk, timeout); return sk->sk_prot->close(sk, timeout);
} }
...@@ -371,6 +373,7 @@ static void bpf_tcp_close(struct sock *sk, long timeout) ...@@ -371,6 +373,7 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
e = psock_map_pop(sk, psock); e = psock_map_pop(sk, psock);
} }
rcu_read_unlock(); rcu_read_unlock();
release_sock(sk);
close_fun(sk, timeout); close_fun(sk, timeout);
} }
...@@ -568,7 +571,8 @@ static int free_sg(struct sock *sk, int start, struct sk_msg_buff *md) ...@@ -568,7 +571,8 @@ static int free_sg(struct sock *sk, int start, struct sk_msg_buff *md)
while (sg[i].length) { while (sg[i].length) {
free += sg[i].length; free += sg[i].length;
sk_mem_uncharge(sk, sg[i].length); sk_mem_uncharge(sk, sg[i].length);
put_page(sg_page(&sg[i])); if (!md->skb)
put_page(sg_page(&sg[i]));
sg[i].length = 0; sg[i].length = 0;
sg[i].page_link = 0; sg[i].page_link = 0;
sg[i].offset = 0; sg[i].offset = 0;
...@@ -577,6 +581,8 @@ static int free_sg(struct sock *sk, int start, struct sk_msg_buff *md) ...@@ -577,6 +581,8 @@ static int free_sg(struct sock *sk, int start, struct sk_msg_buff *md)
if (i == MAX_SKB_FRAGS) if (i == MAX_SKB_FRAGS)
i = 0; i = 0;
} }
if (md->skb)
consume_skb(md->skb);
return free; return free;
} }
...@@ -1230,7 +1236,7 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb) ...@@ -1230,7 +1236,7 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb)
*/ */
TCP_SKB_CB(skb)->bpf.sk_redir = NULL; TCP_SKB_CB(skb)->bpf.sk_redir = NULL;
skb->sk = psock->sock; skb->sk = psock->sock;
bpf_compute_data_pointers(skb); bpf_compute_data_end_sk_skb(skb);
preempt_disable(); preempt_disable();
rc = (*prog->bpf_func)(skb, prog->insnsi); rc = (*prog->bpf_func)(skb, prog->insnsi);
preempt_enable(); preempt_enable();
...@@ -1485,7 +1491,7 @@ static int smap_parse_func_strparser(struct strparser *strp, ...@@ -1485,7 +1491,7 @@ static int smap_parse_func_strparser(struct strparser *strp,
* any socket yet. * any socket yet.
*/ */
skb->sk = psock->sock; skb->sk = psock->sock;
bpf_compute_data_pointers(skb); bpf_compute_data_end_sk_skb(skb);
rc = (*prog->bpf_func)(skb, prog->insnsi); rc = (*prog->bpf_func)(skb, prog->insnsi);
skb->sk = NULL; skb->sk = NULL;
rcu_read_unlock(); rcu_read_unlock();
...@@ -1896,7 +1902,7 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map, ...@@ -1896,7 +1902,7 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map,
e = kzalloc(sizeof(*e), GFP_ATOMIC | __GFP_NOWARN); e = kzalloc(sizeof(*e), GFP_ATOMIC | __GFP_NOWARN);
if (!e) { if (!e) {
err = -ENOMEM; err = -ENOMEM;
goto out_progs; goto out_free;
} }
} }
...@@ -2069,7 +2075,13 @@ static int sock_map_update_elem(struct bpf_map *map, ...@@ -2069,7 +2075,13 @@ static int sock_map_update_elem(struct bpf_map *map,
return -EOPNOTSUPP; return -EOPNOTSUPP;
} }
lock_sock(skops.sk);
preempt_disable();
rcu_read_lock();
err = sock_map_ctx_update_elem(&skops, map, key, flags); err = sock_map_ctx_update_elem(&skops, map, key, flags);
rcu_read_unlock();
preempt_enable();
release_sock(skops.sk);
fput(socket->file); fput(socket->file);
return err; return err;
} }
...@@ -2342,7 +2354,10 @@ static int sock_hash_ctx_update_elem(struct bpf_sock_ops_kern *skops, ...@@ -2342,7 +2354,10 @@ static int sock_hash_ctx_update_elem(struct bpf_sock_ops_kern *skops,
if (err) if (err)
goto err; goto err;
/* bpf_map_update_elem() can be called in_irq() */ /* psock is valid here because otherwise above *ctx_update_elem would
* have thrown an error. It is safe to skip error check.
*/
psock = smap_psock_sk(sock);
raw_spin_lock_bh(&b->lock); raw_spin_lock_bh(&b->lock);
l_old = lookup_elem_raw(head, hash, key, key_size); l_old = lookup_elem_raw(head, hash, key, key_size);
if (l_old && map_flags == BPF_NOEXIST) { if (l_old && map_flags == BPF_NOEXIST) {
...@@ -2360,12 +2375,6 @@ static int sock_hash_ctx_update_elem(struct bpf_sock_ops_kern *skops, ...@@ -2360,12 +2375,6 @@ static int sock_hash_ctx_update_elem(struct bpf_sock_ops_kern *skops,
goto bucket_err; goto bucket_err;
} }
psock = smap_psock_sk(sock);
if (unlikely(!psock)) {
err = -EINVAL;
goto bucket_err;
}
rcu_assign_pointer(e->hash_link, l_new); rcu_assign_pointer(e->hash_link, l_new);
rcu_assign_pointer(e->htab, rcu_assign_pointer(e->htab,
container_of(map, struct bpf_htab, map)); container_of(map, struct bpf_htab, map));
...@@ -2388,12 +2397,10 @@ static int sock_hash_ctx_update_elem(struct bpf_sock_ops_kern *skops, ...@@ -2388,12 +2397,10 @@ static int sock_hash_ctx_update_elem(struct bpf_sock_ops_kern *skops,
raw_spin_unlock_bh(&b->lock); raw_spin_unlock_bh(&b->lock);
return 0; return 0;
bucket_err: bucket_err:
smap_release_sock(psock, sock);
raw_spin_unlock_bh(&b->lock); raw_spin_unlock_bh(&b->lock);
err: err:
kfree(e); kfree(e);
psock = smap_psock_sk(sock);
if (psock)
smap_release_sock(psock, sock);
return err; return err;
} }
...@@ -2415,7 +2422,13 @@ static int sock_hash_update_elem(struct bpf_map *map, ...@@ -2415,7 +2422,13 @@ static int sock_hash_update_elem(struct bpf_map *map,
return -EINVAL; return -EINVAL;
} }
lock_sock(skops.sk);
preempt_disable();
rcu_read_lock();
err = sock_hash_ctx_update_elem(&skops, map, key, flags); err = sock_hash_ctx_update_elem(&skops, map, key, flags);
rcu_read_unlock();
preempt_enable();
release_sock(skops.sk);
fput(socket->file); fput(socket->file);
return err; return err;
} }
...@@ -2472,10 +2485,8 @@ struct sock *__sock_hash_lookup_elem(struct bpf_map *map, void *key) ...@@ -2472,10 +2485,8 @@ struct sock *__sock_hash_lookup_elem(struct bpf_map *map, void *key)
b = __select_bucket(htab, hash); b = __select_bucket(htab, hash);
head = &b->head; head = &b->head;
raw_spin_lock_bh(&b->lock);
l = lookup_elem_raw(head, hash, key, key_size); l = lookup_elem_raw(head, hash, key, key_size);
sk = l ? l->sk : NULL; sk = l ? l->sk : NULL;
raw_spin_unlock_bh(&b->lock);
return sk; return sk;
} }
......
...@@ -735,7 +735,9 @@ static int map_update_elem(union bpf_attr *attr) ...@@ -735,7 +735,9 @@ static int map_update_elem(union bpf_attr *attr)
if (bpf_map_is_dev_bound(map)) { if (bpf_map_is_dev_bound(map)) {
err = bpf_map_offload_update_elem(map, key, value, attr->flags); err = bpf_map_offload_update_elem(map, key, value, attr->flags);
goto out; goto out;
} else if (map->map_type == BPF_MAP_TYPE_CPUMAP) { } else if (map->map_type == BPF_MAP_TYPE_CPUMAP ||
map->map_type == BPF_MAP_TYPE_SOCKHASH ||
map->map_type == BPF_MAP_TYPE_SOCKMAP) {
err = map->ops->map_update_elem(map, key, value, attr->flags); err = map->ops->map_update_elem(map, key, value, attr->flags);
goto out; goto out;
} }
......
...@@ -1762,6 +1762,37 @@ static const struct bpf_func_proto bpf_skb_pull_data_proto = { ...@@ -1762,6 +1762,37 @@ static const struct bpf_func_proto bpf_skb_pull_data_proto = {
.arg2_type = ARG_ANYTHING, .arg2_type = ARG_ANYTHING,
}; };
static inline int sk_skb_try_make_writable(struct sk_buff *skb,
unsigned int write_len)
{
int err = __bpf_try_make_writable(skb, write_len);
bpf_compute_data_end_sk_skb(skb);
return err;
}
BPF_CALL_2(sk_skb_pull_data, struct sk_buff *, skb, u32, len)
{
/* Idea is the following: should the needed direct read/write
* test fail during runtime, we can pull in more data and redo
* again, since implicitly, we invalidate previous checks here.
*
* Or, since we know how much we need to make read/writeable,
* this can be done once at the program beginning for direct
* access case. By this we overcome limitations of only current
* headroom being accessible.
*/
return sk_skb_try_make_writable(skb, len ? : skb_headlen(skb));
}
static const struct bpf_func_proto sk_skb_pull_data_proto = {
.func = sk_skb_pull_data,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_ANYTHING,
};
BPF_CALL_5(bpf_l3_csum_replace, struct sk_buff *, skb, u32, offset, BPF_CALL_5(bpf_l3_csum_replace, struct sk_buff *, skb, u32, offset,
u64, from, u64, to, u64, flags) u64, from, u64, to, u64, flags)
{ {
...@@ -2779,7 +2810,8 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff) ...@@ -2779,7 +2810,8 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff)
static u32 __bpf_skb_max_len(const struct sk_buff *skb) static u32 __bpf_skb_max_len(const struct sk_buff *skb)
{ {
return skb->dev->mtu + skb->dev->hard_header_len; return skb->dev ? skb->dev->mtu + skb->dev->hard_header_len :
SKB_MAX_ALLOC;
} }
static int bpf_skb_adjust_net(struct sk_buff *skb, s32 len_diff) static int bpf_skb_adjust_net(struct sk_buff *skb, s32 len_diff)
...@@ -2863,8 +2895,8 @@ static int bpf_skb_trim_rcsum(struct sk_buff *skb, unsigned int new_len) ...@@ -2863,8 +2895,8 @@ static int bpf_skb_trim_rcsum(struct sk_buff *skb, unsigned int new_len)
return __skb_trim_rcsum(skb, new_len); return __skb_trim_rcsum(skb, new_len);
} }
BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len, static inline int __bpf_skb_change_tail(struct sk_buff *skb, u32 new_len,
u64, flags) u64 flags)
{ {
u32 max_len = __bpf_skb_max_len(skb); u32 max_len = __bpf_skb_max_len(skb);
u32 min_len = __bpf_skb_min_len(skb); u32 min_len = __bpf_skb_min_len(skb);
...@@ -2900,6 +2932,13 @@ BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len, ...@@ -2900,6 +2932,13 @@ BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len,
if (!ret && skb_is_gso(skb)) if (!ret && skb_is_gso(skb))
skb_gso_reset(skb); skb_gso_reset(skb);
} }
return ret;
}
BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len,
u64, flags)
{
int ret = __bpf_skb_change_tail(skb, new_len, flags);
bpf_compute_data_pointers(skb); bpf_compute_data_pointers(skb);
return ret; return ret;
...@@ -2914,8 +2953,26 @@ static const struct bpf_func_proto bpf_skb_change_tail_proto = { ...@@ -2914,8 +2953,26 @@ static const struct bpf_func_proto bpf_skb_change_tail_proto = {
.arg3_type = ARG_ANYTHING, .arg3_type = ARG_ANYTHING,
}; };
BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room, BPF_CALL_3(sk_skb_change_tail, struct sk_buff *, skb, u32, new_len,
u64, flags) u64, flags)
{
int ret = __bpf_skb_change_tail(skb, new_len, flags);
bpf_compute_data_end_sk_skb(skb);
return ret;
}
static const struct bpf_func_proto sk_skb_change_tail_proto = {
.func = sk_skb_change_tail,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_ANYTHING,
.arg3_type = ARG_ANYTHING,
};
static inline int __bpf_skb_change_head(struct sk_buff *skb, u32 head_room,
u64 flags)
{ {
u32 max_len = __bpf_skb_max_len(skb); u32 max_len = __bpf_skb_max_len(skb);
u32 new_len = skb->len + head_room; u32 new_len = skb->len + head_room;
...@@ -2941,8 +2998,16 @@ BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room, ...@@ -2941,8 +2998,16 @@ BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room,
skb_reset_mac_header(skb); skb_reset_mac_header(skb);
} }
return ret;
}
BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room,
u64, flags)
{
int ret = __bpf_skb_change_head(skb, head_room, flags);
bpf_compute_data_pointers(skb); bpf_compute_data_pointers(skb);
return 0; return ret;
} }
static const struct bpf_func_proto bpf_skb_change_head_proto = { static const struct bpf_func_proto bpf_skb_change_head_proto = {
...@@ -2954,6 +3019,23 @@ static const struct bpf_func_proto bpf_skb_change_head_proto = { ...@@ -2954,6 +3019,23 @@ static const struct bpf_func_proto bpf_skb_change_head_proto = {
.arg3_type = ARG_ANYTHING, .arg3_type = ARG_ANYTHING,
}; };
BPF_CALL_3(sk_skb_change_head, struct sk_buff *, skb, u32, head_room,
u64, flags)
{
int ret = __bpf_skb_change_head(skb, head_room, flags);
bpf_compute_data_end_sk_skb(skb);
return ret;
}
static const struct bpf_func_proto sk_skb_change_head_proto = {
.func = sk_skb_change_head,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_ANYTHING,
.arg3_type = ARG_ANYTHING,
};
static unsigned long xdp_get_metalen(const struct xdp_buff *xdp) static unsigned long xdp_get_metalen(const struct xdp_buff *xdp)
{ {
return xdp_data_meta_unsupported(xdp) ? 0 : return xdp_data_meta_unsupported(xdp) ? 0 :
...@@ -3046,12 +3128,16 @@ static int __bpf_tx_xdp(struct net_device *dev, ...@@ -3046,12 +3128,16 @@ static int __bpf_tx_xdp(struct net_device *dev,
u32 index) u32 index)
{ {
struct xdp_frame *xdpf; struct xdp_frame *xdpf;
int sent; int err, sent;
if (!dev->netdev_ops->ndo_xdp_xmit) { if (!dev->netdev_ops->ndo_xdp_xmit) {
return -EOPNOTSUPP; return -EOPNOTSUPP;
} }
err = xdp_ok_fwd_dev(dev, xdp->data_end - xdp->data);
if (unlikely(err))
return err;
xdpf = convert_to_xdp_frame(xdp); xdpf = convert_to_xdp_frame(xdp);
if (unlikely(!xdpf)) if (unlikely(!xdpf))
return -EOVERFLOW; return -EOVERFLOW;
...@@ -3285,7 +3371,8 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, ...@@ -3285,7 +3371,8 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
goto err; goto err;
} }
if (unlikely((err = __xdp_generic_ok_fwd_dev(skb, fwd)))) err = xdp_ok_fwd_dev(fwd, skb->len);
if (unlikely(err))
goto err; goto err;
skb->dev = fwd; skb->dev = fwd;
...@@ -4617,9 +4704,12 @@ bool bpf_helper_changes_pkt_data(void *func) ...@@ -4617,9 +4704,12 @@ bool bpf_helper_changes_pkt_data(void *func)
func == bpf_skb_store_bytes || func == bpf_skb_store_bytes ||
func == bpf_skb_change_proto || func == bpf_skb_change_proto ||
func == bpf_skb_change_head || func == bpf_skb_change_head ||
func == sk_skb_change_head ||
func == bpf_skb_change_tail || func == bpf_skb_change_tail ||
func == sk_skb_change_tail ||
func == bpf_skb_adjust_room || func == bpf_skb_adjust_room ||
func == bpf_skb_pull_data || func == bpf_skb_pull_data ||
func == sk_skb_pull_data ||
func == bpf_clone_redirect || func == bpf_clone_redirect ||
func == bpf_l3_csum_replace || func == bpf_l3_csum_replace ||
func == bpf_l4_csum_replace || func == bpf_l4_csum_replace ||
...@@ -4871,11 +4961,11 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) ...@@ -4871,11 +4961,11 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_skb_load_bytes: case BPF_FUNC_skb_load_bytes:
return &bpf_skb_load_bytes_proto; return &bpf_skb_load_bytes_proto;
case BPF_FUNC_skb_pull_data: case BPF_FUNC_skb_pull_data:
return &bpf_skb_pull_data_proto; return &sk_skb_pull_data_proto;
case BPF_FUNC_skb_change_tail: case BPF_FUNC_skb_change_tail:
return &bpf_skb_change_tail_proto; return &sk_skb_change_tail_proto;
case BPF_FUNC_skb_change_head: case BPF_FUNC_skb_change_head:
return &bpf_skb_change_head_proto; return &sk_skb_change_head_proto;
case BPF_FUNC_get_socket_cookie: case BPF_FUNC_get_socket_cookie:
return &bpf_get_socket_cookie_proto; return &bpf_get_socket_cookie_proto;
case BPF_FUNC_get_socket_uid: case BPF_FUNC_get_socket_uid:
......
...@@ -199,8 +199,11 @@ static void xsk_destruct_skb(struct sk_buff *skb) ...@@ -199,8 +199,11 @@ static void xsk_destruct_skb(struct sk_buff *skb)
{ {
u64 addr = (u64)(long)skb_shinfo(skb)->destructor_arg; u64 addr = (u64)(long)skb_shinfo(skb)->destructor_arg;
struct xdp_sock *xs = xdp_sk(skb->sk); struct xdp_sock *xs = xdp_sk(skb->sk);
unsigned long flags;
spin_lock_irqsave(&xs->tx_completion_lock, flags);
WARN_ON_ONCE(xskq_produce_addr(xs->umem->cq, addr)); WARN_ON_ONCE(xskq_produce_addr(xs->umem->cq, addr));
spin_unlock_irqrestore(&xs->tx_completion_lock, flags);
sock_wfree(skb); sock_wfree(skb);
} }
...@@ -268,15 +271,15 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m, ...@@ -268,15 +271,15 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
skb->destructor = xsk_destruct_skb; skb->destructor = xsk_destruct_skb;
err = dev_direct_xmit(skb, xs->queue_id); err = dev_direct_xmit(skb, xs->queue_id);
xskq_discard_desc(xs->tx);
/* Ignore NET_XMIT_CN as packet might have been sent */ /* Ignore NET_XMIT_CN as packet might have been sent */
if (err == NET_XMIT_DROP || err == NETDEV_TX_BUSY) { if (err == NET_XMIT_DROP || err == NETDEV_TX_BUSY) {
err = -EAGAIN; /* SKB completed but not sent */
/* SKB consumed by dev_direct_xmit() */ err = -EBUSY;
goto out; goto out;
} }
sent_frame = true; sent_frame = true;
xskq_discard_desc(xs->tx);
} }
out: out:
...@@ -755,6 +758,7 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol, ...@@ -755,6 +758,7 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol,
xs = xdp_sk(sk); xs = xdp_sk(sk);
mutex_init(&xs->mutex); mutex_init(&xs->mutex);
spin_lock_init(&xs->tx_completion_lock);
local_bh_disable(); local_bh_disable();
sock_prot_inuse_add(net, &xsk_proto, 1); sock_prot_inuse_add(net, &xsk_proto, 1);
......
...@@ -62,14 +62,9 @@ static inline u32 xskq_nb_avail(struct xsk_queue *q, u32 dcnt) ...@@ -62,14 +62,9 @@ static inline u32 xskq_nb_avail(struct xsk_queue *q, u32 dcnt)
return (entries > dcnt) ? dcnt : entries; return (entries > dcnt) ? dcnt : entries;
} }
static inline u32 xskq_nb_free_lazy(struct xsk_queue *q, u32 producer)
{
return q->nentries - (producer - q->cons_tail);
}
static inline u32 xskq_nb_free(struct xsk_queue *q, u32 producer, u32 dcnt) static inline u32 xskq_nb_free(struct xsk_queue *q, u32 producer, u32 dcnt)
{ {
u32 free_entries = xskq_nb_free_lazy(q, producer); u32 free_entries = q->nentries - (producer - q->cons_tail);
if (free_entries >= dcnt) if (free_entries >= dcnt)
return free_entries; return free_entries;
...@@ -129,7 +124,7 @@ static inline int xskq_produce_addr(struct xsk_queue *q, u64 addr) ...@@ -129,7 +124,7 @@ static inline int xskq_produce_addr(struct xsk_queue *q, u64 addr)
{ {
struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
if (xskq_nb_free(q, q->prod_tail, LAZY_UPDATE_THRESHOLD) == 0) if (xskq_nb_free(q, q->prod_tail, 1) == 0)
return -ENOSPC; return -ENOSPC;
ring->desc[q->prod_tail++ & q->ring_mask] = addr; ring->desc[q->prod_tail++ & q->ring_mask] = addr;
......
cpustat
fds_example
lathist
load_sock_ops
lwt_len_hist
map_perf_test
offwaketime
per_socket_stats_example
sampleip
sock_example
sockex1
sockex2
sockex3
spintest
syscall_nrs.h
syscall_tp
task_fd_query
tc_l2_redirect
test_cgrp2_array_pin
test_cgrp2_attach
test_cgrp2_attach2
test_cgrp2_sock
test_cgrp2_sock2
test_current_task_under_cgroup
test_lru_dist
test_map_in_map
test_overhead
test_probe_write_user
trace_event
trace_output
tracex1
tracex2
tracex3
tracex4
tracex5
tracex6
tracex7
xdp1
xdp2
xdp_adjust_tail
xdp_fwd
xdp_monitor
xdp_redirect
xdp_redirect_cpu
xdp_redirect_map
xdp_router_ipv4
xdp_rxq_info
xdp_tx_iptunnel
xdpsock
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
*/ */
#define KBUILD_MODNAME "foo" #define KBUILD_MODNAME "foo"
#include <linux/if_ether.h> #include <linux/if_ether.h>
#include <linux/if_vlan.h>
#include <linux/ip.h> #include <linux/ip.h>
#include <linux/ipv6.h> #include <linux/ipv6.h>
#include <linux/in.h> #include <linux/in.h>
...@@ -108,11 +109,6 @@ static int parse_ipv6(void *data, uint64_t nh_off, void *data_end) ...@@ -108,11 +109,6 @@ static int parse_ipv6(void *data, uint64_t nh_off, void *data_end)
return 0; return 0;
} }
struct vlan_hdr {
uint16_t h_vlan_TCI;
uint16_t h_vlan_encapsulated_proto;
};
SEC("varlen") SEC("varlen")
int handle_ingress(struct __sk_buff *skb) int handle_ingress(struct __sk_buff *skb)
{ {
......
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
*/ */
#define _GNU_SOURCE #define _GNU_SOURCE
#include <sched.h> #include <sched.h>
#include <errno.h>
#include <stdio.h> #include <stdio.h>
#include <sys/types.h> #include <sys/types.h>
#include <asm/unistd.h> #include <asm/unistd.h>
...@@ -44,8 +45,13 @@ static void test_task_rename(int cpu) ...@@ -44,8 +45,13 @@ static void test_task_rename(int cpu)
exit(1); exit(1);
} }
start_time = time_get_ns(); start_time = time_get_ns();
for (i = 0; i < MAX_CNT; i++) for (i = 0; i < MAX_CNT; i++) {
write(fd, buf, sizeof(buf)); if (write(fd, buf, sizeof(buf)) < 0) {
printf("task rename failed: %s\n", strerror(errno));
close(fd);
return;
}
}
printf("task_rename:%d: %lld events per sec\n", printf("task_rename:%d: %lld events per sec\n",
cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time)); cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
close(fd); close(fd);
...@@ -63,8 +69,13 @@ static void test_urandom_read(int cpu) ...@@ -63,8 +69,13 @@ static void test_urandom_read(int cpu)
exit(1); exit(1);
} }
start_time = time_get_ns(); start_time = time_get_ns();
for (i = 0; i < MAX_CNT; i++) for (i = 0; i < MAX_CNT; i++) {
read(fd, buf, sizeof(buf)); if (read(fd, buf, sizeof(buf)) < 0) {
printf("failed to read from /dev/urandom: %s\n", strerror(errno));
close(fd);
return;
}
}
printf("urandom_read:%d: %lld events per sec\n", printf("urandom_read:%d: %lld events per sec\n",
cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time)); cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
close(fd); close(fd);
......
...@@ -122,6 +122,16 @@ static void print_stacks(void) ...@@ -122,6 +122,16 @@ static void print_stacks(void)
} }
} }
static inline int generate_load(void)
{
if (system("dd if=/dev/zero of=/dev/null count=5000k status=none") < 0) {
printf("failed to generate some load with dd: %s\n", strerror(errno));
return -1;
}
return 0;
}
static void test_perf_event_all_cpu(struct perf_event_attr *attr) static void test_perf_event_all_cpu(struct perf_event_attr *attr)
{ {
int nr_cpus = sysconf(_SC_NPROCESSORS_CONF); int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
...@@ -142,7 +152,11 @@ static void test_perf_event_all_cpu(struct perf_event_attr *attr) ...@@ -142,7 +152,11 @@ static void test_perf_event_all_cpu(struct perf_event_attr *attr)
assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_SET_BPF, prog_fd[0]) == 0); assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_SET_BPF, prog_fd[0]) == 0);
assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_ENABLE) == 0); assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_ENABLE) == 0);
} }
system("dd if=/dev/zero of=/dev/null count=5000k status=none");
if (generate_load() < 0) {
error = 1;
goto all_cpu_err;
}
print_stacks(); print_stacks();
all_cpu_err: all_cpu_err:
for (i--; i >= 0; i--) { for (i--; i >= 0; i--) {
...@@ -156,7 +170,7 @@ static void test_perf_event_all_cpu(struct perf_event_attr *attr) ...@@ -156,7 +170,7 @@ static void test_perf_event_all_cpu(struct perf_event_attr *attr)
static void test_perf_event_task(struct perf_event_attr *attr) static void test_perf_event_task(struct perf_event_attr *attr)
{ {
int pmu_fd; int pmu_fd, error = 0;
/* per task perf event, enable inherit so the "dd ..." command can be traced properly. /* per task perf event, enable inherit so the "dd ..." command can be traced properly.
* Enabling inherit will cause bpf_perf_prog_read_time helper failure. * Enabling inherit will cause bpf_perf_prog_read_time helper failure.
...@@ -171,10 +185,17 @@ static void test_perf_event_task(struct perf_event_attr *attr) ...@@ -171,10 +185,17 @@ static void test_perf_event_task(struct perf_event_attr *attr)
} }
assert(ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd[0]) == 0); assert(ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd[0]) == 0);
assert(ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE) == 0); assert(ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE) == 0);
system("dd if=/dev/zero of=/dev/null count=5000k status=none");
if (generate_load() < 0) {
error = 1;
goto err;
}
print_stacks(); print_stacks();
err:
ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE); ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
close(pmu_fd); close(pmu_fd);
if (error)
int_exit(0);
} }
static void test_bpf_perf_event(void) static void test_bpf_perf_event(void)
......
...@@ -729,7 +729,7 @@ static void kick_tx(int fd) ...@@ -729,7 +729,7 @@ static void kick_tx(int fd)
int ret; int ret;
ret = sendto(fd, NULL, 0, MSG_DONTWAIT, NULL, 0); ret = sendto(fd, NULL, 0, MSG_DONTWAIT, NULL, 0);
if (ret >= 0 || errno == ENOBUFS || errno == EAGAIN) if (ret >= 0 || errno == ENOBUFS || errno == EAGAIN || errno == EBUSY)
return; return;
lassert(0); lassert(0);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment