Commit d24ad3fc authored by David S. Miller's avatar David S. Miller

Merge branch 'bpf-next'

Daniel Borkmann says:

====================
BPF updates

Couple of misc updates to BPF, besides others this series adds
bpf_csum_diff() to be used with L3 csums, allows for managing
tunnel options for collect meta data mode, and enabling ipv6
traffic class for collect meta data in vxlan specifically (geneve
already supports it). For more details, please see individual
patches.

The series requires net to be merged into net-next first to
avoid any further pending merge conflicts.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 810813c4 1400615d
......@@ -775,10 +775,10 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb,
struct flowi4 *fl4,
struct ip_tunnel_info *info)
{
bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
struct geneve_dev *geneve = netdev_priv(dev);
struct dst_cache *dst_cache;
struct rtable *rt = NULL;
bool use_cache = true;
__u8 tos;
memset(fl4, 0, sizeof(*fl4));
......@@ -804,7 +804,6 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb,
dst_cache = &geneve->dst_cache;
}
use_cache = use_cache && !skb->mark;
if (use_cache) {
rt = dst_cache_get_ip4(dst_cache, &fl4->saddr);
if (rt)
......@@ -832,11 +831,11 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb,
struct flowi6 *fl6,
struct ip_tunnel_info *info)
{
bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
struct geneve_dev *geneve = netdev_priv(dev);
struct geneve_sock *gs6 = geneve->sock6;
struct dst_entry *dst = NULL;
struct dst_cache *dst_cache;
bool use_cache = true;
__u8 prio;
memset(fl6, 0, sizeof(*fl6));
......@@ -862,7 +861,6 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb,
dst_cache = &geneve->dst_cache;
}
use_cache = use_cache && !skb->mark;
if (use_cache) {
dst = dst_cache_get_ip6(dst_cache, &fl6->saddr);
if (dst)
......@@ -940,7 +938,7 @@ static netdev_tx_t geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
u8 vni[3];
tunnel_id_to_vni(key->tun_id, vni);
if (key->tun_flags & TUNNEL_GENEVE_OPT)
if (info->options_len)
opts = ip_tunnel_info_opts(info);
if (key->tun_flags & TUNNEL_CSUM)
......@@ -1027,7 +1025,7 @@ static netdev_tx_t geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
u8 vni[3];
tunnel_id_to_vni(key->tun_id, vni);
if (key->tun_flags & TUNNEL_GENEVE_OPT)
if (info->options_len)
opts = ip_tunnel_info_opts(info);
if (key->tun_flags & TUNNEL_CSUM)
......
......@@ -1756,17 +1756,15 @@ static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan,
struct sk_buff *skb, int oif, u8 tos,
__be32 daddr, __be32 *saddr,
struct dst_cache *dst_cache,
struct ip_tunnel_info *info)
const struct ip_tunnel_info *info)
{
bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
struct rtable *rt = NULL;
bool use_cache = false;
struct flowi4 fl4;
/* when the ip_tunnel_info is availble, the tos used for lookup is
* packet independent, so we can use the cache
*/
if (!skb->mark && (!tos || info)) {
use_cache = true;
if (tos && !info)
use_cache = false;
if (use_cache) {
rt = dst_cache_get_ip4(dst_cache, saddr);
if (rt)
return rt;
......@@ -1791,16 +1789,20 @@ static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan,
#if IS_ENABLED(CONFIG_IPV6)
static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
struct sk_buff *skb, int oif,
struct sk_buff *skb, int oif, u8 tos,
const struct in6_addr *daddr,
struct in6_addr *saddr,
struct dst_cache *dst_cache)
struct dst_cache *dst_cache,
const struct ip_tunnel_info *info)
{
bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
struct dst_entry *ndst;
struct flowi6 fl6;
int err;
if (!skb->mark) {
if (tos && !info)
use_cache = false;
if (use_cache) {
ndst = dst_cache_get_ip6(dst_cache, saddr);
if (ndst)
return ndst;
......@@ -1808,6 +1810,7 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_oif = oif;
fl6.flowi6_tos = RT_TOS(tos);
fl6.daddr = *daddr;
fl6.saddr = vxlan->cfg.saddr.sin6.sin6_addr;
fl6.flowi6_mark = skb->mark;
......@@ -1820,7 +1823,7 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
return ERR_PTR(err);
*saddr = fl6.saddr;
if (!skb->mark)
if (use_cache)
dst_cache_set_ip6(dst_cache, ndst, saddr);
return ndst;
}
......@@ -2016,9 +2019,9 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
sk = vxlan->vn6_sock->sock->sk;
ndst = vxlan6_get_route(vxlan, skb,
rdst ? rdst->remote_ifindex : 0,
rdst ? rdst->remote_ifindex : 0, tos,
&dst->sin6.sin6_addr, &saddr,
dst_cache);
dst_cache, info);
if (IS_ERR(ndst)) {
netdev_dbg(dev, "no route to %pI6\n",
&dst->sin6.sin6_addr);
......@@ -2053,6 +2056,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
if (!info)
udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM6_TX);
tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
ttl = ttl ? : ip6_dst_hoplimit(ndst);
skb_scrub_packet(skb, xnet);
err = vxlan_build_skb(skb, ndst, sizeof(struct ipv6hdr),
......@@ -2062,8 +2066,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
return;
}
udp_tunnel6_xmit_skb(ndst, sk, skb, dev,
&saddr, &dst->sin6.sin6_addr,
0, ttl, src_port, dst_port, !udp_sum);
&saddr, &dst->sin6.sin6_addr, tos, ttl,
src_port, dst_port, !udp_sum);
#endif
}
......@@ -2385,9 +2389,9 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
if (!vxlan->vn6_sock)
return -EINVAL;
ndst = vxlan6_get_route(vxlan, skb, 0,
ndst = vxlan6_get_route(vxlan, skb, 0, info->key.tos,
&info->key.u.ipv6.dst,
&info->key.u.ipv6.src, NULL);
&info->key.u.ipv6.src, NULL, info);
if (IS_ERR(ndst))
return PTR_ERR(ndst);
dst_release(ndst);
......
......@@ -120,6 +120,11 @@ static inline __wsum csum_partial_ext(const void *buff, int len, __wsum sum)
#define CSUM_MANGLED_0 ((__force __sum16)0xffff)
static inline void csum_replace_by_diff(__sum16 *sum, __wsum diff)
{
*sum = csum_fold(csum_add(diff, ~csum_unfold(*sum)));
}
static inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to)
{
__wsum tmp = csum_sub(~csum_unfold(*sum), (__force __wsum)from);
......
......@@ -140,6 +140,7 @@ struct ip_tunnel {
#define TUNNEL_CRIT_OPT __cpu_to_be16(0x0400)
#define TUNNEL_GENEVE_OPT __cpu_to_be16(0x0800)
#define TUNNEL_VXLAN_OPT __cpu_to_be16(0x1000)
#define TUNNEL_NOCACHE __cpu_to_be16(0x2000)
#define TUNNEL_OPTIONS_PRESENT (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT)
......@@ -206,6 +207,20 @@ static inline void ip_tunnel_key_init(struct ip_tunnel_key *key,
0, sizeof(*key) - IP_TUNNEL_KEY_SIZE);
}
static inline bool
ip_tunnel_dst_cache_usable(const struct sk_buff *skb,
const struct ip_tunnel_info *info)
{
if (skb->mark)
return false;
if (!info)
return true;
if (info->key.tun_flags & TUNNEL_NOCACHE)
return false;
return true;
}
static inline unsigned short ip_tunnel_info_af(const struct ip_tunnel_info
*tun_info)
{
......
......@@ -298,6 +298,17 @@ enum bpf_func_id {
* Return: csum result
*/
BPF_FUNC_csum_diff,
/**
* bpf_skb_[gs]et_tunnel_opt(skb, opt, size)
* retrieve or populate tunnel options metadata
* @skb: pointer to skb
* @opt: pointer to raw tunnel option data
* @size: size of @opt
* Return: 0 on success for set, option size for get
*/
BPF_FUNC_skb_get_tunnel_opt,
BPF_FUNC_skb_set_tunnel_opt,
__BPF_FUNC_MAX_ID,
};
......@@ -305,6 +316,7 @@ enum bpf_func_id {
/* BPF_FUNC_skb_store_bytes flags. */
#define BPF_F_RECOMPUTE_CSUM (1ULL << 0)
#define BPF_F_INVALIDATE_HASH (1ULL << 1)
/* BPF_FUNC_l3_csum_replace and BPF_FUNC_l4_csum_replace flags.
* First 4 bits are for passing the header field size.
......@@ -329,6 +341,7 @@ enum bpf_func_id {
/* BPF_FUNC_skb_set_tunnel_key flags. */
#define BPF_F_ZERO_CSUM_TX (1ULL << 1)
#define BPF_F_DONT_FRAGMENT (1ULL << 2)
/* user accessible mirror of in-kernel sk_buff.
* new fields can only be added to the end of this structure
......
......@@ -1353,7 +1353,7 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
unsigned int len = (unsigned int) r4;
void *ptr;
if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM)))
if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM | BPF_F_INVALIDATE_HASH)))
return -EINVAL;
/* bpf verifier guarantees that:
......@@ -1384,11 +1384,13 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
if (flags & BPF_F_RECOMPUTE_CSUM)
skb_postpush_rcsum(skb, ptr, len);
if (flags & BPF_F_INVALIDATE_HASH)
skb_clear_hash(skb);
return 0;
}
const struct bpf_func_proto bpf_skb_store_bytes_proto = {
static const struct bpf_func_proto bpf_skb_store_bytes_proto = {
.func = bpf_skb_store_bytes,
.gpl_only = false,
.ret_type = RET_INTEGER,
......@@ -1419,7 +1421,7 @@ static u64 bpf_skb_load_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
return 0;
}
const struct bpf_func_proto bpf_skb_load_bytes_proto = {
static const struct bpf_func_proto bpf_skb_load_bytes_proto = {
.func = bpf_skb_load_bytes,
.gpl_only = false,
.ret_type = RET_INTEGER,
......@@ -1447,6 +1449,12 @@ static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
return -EFAULT;
switch (flags & BPF_F_HDR_FIELD_MASK) {
case 0:
if (unlikely(from != 0))
return -EINVAL;
csum_replace_by_diff(ptr, to);
break;
case 2:
csum_replace2(ptr, from, to);
break;
......@@ -1464,7 +1472,7 @@ static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
return 0;
}
const struct bpf_func_proto bpf_l3_csum_replace_proto = {
static const struct bpf_func_proto bpf_l3_csum_replace_proto = {
.func = bpf_l3_csum_replace,
.gpl_only = false,
.ret_type = RET_INTEGER,
......@@ -1523,7 +1531,7 @@ static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
return 0;
}
const struct bpf_func_proto bpf_l4_csum_replace_proto = {
static const struct bpf_func_proto bpf_l4_csum_replace_proto = {
.func = bpf_l4_csum_replace,
.gpl_only = false,
.ret_type = RET_INTEGER,
......@@ -1562,7 +1570,7 @@ static u64 bpf_csum_diff(u64 r1, u64 from_size, u64 r3, u64 to_size, u64 seed)
return csum_partial(sp->diff, diff_size, seed);
}
const struct bpf_func_proto bpf_csum_diff_proto = {
static const struct bpf_func_proto bpf_csum_diff_proto = {
.func = bpf_csum_diff,
.gpl_only = false,
.ret_type = RET_INTEGER,
......@@ -1600,7 +1608,7 @@ static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5)
return dev_queue_xmit(skb2);
}
const struct bpf_func_proto bpf_clone_redirect_proto = {
static const struct bpf_func_proto bpf_clone_redirect_proto = {
.func = bpf_clone_redirect,
.gpl_only = false,
.ret_type = RET_INTEGER,
......@@ -1652,7 +1660,7 @@ int skb_do_redirect(struct sk_buff *skb)
return dev_queue_xmit(skb);
}
const struct bpf_func_proto bpf_redirect_proto = {
static const struct bpf_func_proto bpf_redirect_proto = {
.func = bpf_redirect,
.gpl_only = false,
.ret_type = RET_INTEGER,
......@@ -1791,7 +1799,7 @@ static u64 bpf_skb_get_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
return 0;
}
const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = {
static const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = {
.func = bpf_skb_get_tunnel_key,
.gpl_only = false,
.ret_type = RET_INTEGER,
......@@ -1801,6 +1809,32 @@ const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = {
.arg4_type = ARG_ANYTHING,
};
static u64 bpf_skb_get_tunnel_opt(u64 r1, u64 r2, u64 size, u64 r4, u64 r5)
{
struct sk_buff *skb = (struct sk_buff *) (long) r1;
u8 *to = (u8 *) (long) r2;
const struct ip_tunnel_info *info = skb_tunnel_info(skb);
if (unlikely(!info ||
!(info->key.tun_flags & TUNNEL_OPTIONS_PRESENT)))
return -ENOENT;
if (unlikely(size < info->options_len))
return -ENOMEM;
ip_tunnel_info_opts_get(to, info);
return info->options_len;
}
static const struct bpf_func_proto bpf_skb_get_tunnel_opt_proto = {
.func = bpf_skb_get_tunnel_opt,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_STACK,
.arg3_type = ARG_CONST_STACK_SIZE,
};
static struct metadata_dst __percpu *md_dst;
static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
......@@ -1811,7 +1845,8 @@ static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
u8 compat[sizeof(struct bpf_tunnel_key)];
struct ip_tunnel_info *info;
if (unlikely(flags & ~(BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX)))
if (unlikely(flags & ~(BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX |
BPF_F_DONT_FRAGMENT)))
return -EINVAL;
if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
switch (size) {
......@@ -1835,7 +1870,10 @@ static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
info = &md->u.tun_info;
info->mode = IP_TUNNEL_INFO_TX;
info->key.tun_flags = TUNNEL_KEY | TUNNEL_CSUM;
info->key.tun_flags = TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_NOCACHE;
if (flags & BPF_F_DONT_FRAGMENT)
info->key.tun_flags |= TUNNEL_DONT_FRAGMENT;
info->key.tun_id = cpu_to_be64(from->tunnel_id);
info->key.tos = from->tunnel_tos;
info->key.ttl = from->tunnel_ttl;
......@@ -1853,7 +1891,7 @@ static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
return 0;
}
const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = {
static const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = {
.func = bpf_skb_set_tunnel_key,
.gpl_only = false,
.ret_type = RET_INTEGER,
......@@ -1863,17 +1901,58 @@ const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = {
.arg4_type = ARG_ANYTHING,
};
static const struct bpf_func_proto *bpf_get_skb_set_tunnel_key_proto(void)
#define BPF_TUNLEN_MAX 255
static u64 bpf_skb_set_tunnel_opt(u64 r1, u64 r2, u64 size, u64 r4, u64 r5)
{
struct sk_buff *skb = (struct sk_buff *) (long) r1;
u8 *from = (u8 *) (long) r2;
struct ip_tunnel_info *info = skb_tunnel_info(skb);
const struct metadata_dst *md = this_cpu_ptr(md_dst);
if (unlikely(info != &md->u.tun_info || (size & (sizeof(u32) - 1))))
return -EINVAL;
if (unlikely(size > BPF_TUNLEN_MAX))
return -ENOMEM;
ip_tunnel_info_opts_set(info, from, size);
return 0;
}
static const struct bpf_func_proto bpf_skb_set_tunnel_opt_proto = {
.func = bpf_skb_set_tunnel_opt,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_STACK,
.arg3_type = ARG_CONST_STACK_SIZE,
};
static const struct bpf_func_proto *
bpf_get_skb_set_tunnel_proto(enum bpf_func_id which)
{
if (!md_dst) {
/* race is not possible, since it's called from
* verifier that is holding verifier mutex
BUILD_BUG_ON(FIELD_SIZEOF(struct ip_tunnel_info,
options_len) != 1);
/* Race is not possible, since it's called from verifier
* that is holding verifier mutex.
*/
md_dst = metadata_dst_alloc_percpu(0, GFP_KERNEL);
md_dst = metadata_dst_alloc_percpu(BPF_TUNLEN_MAX,
GFP_KERNEL);
if (!md_dst)
return NULL;
}
return &bpf_skb_set_tunnel_key_proto;
switch (which) {
case BPF_FUNC_skb_set_tunnel_key:
return &bpf_skb_set_tunnel_key_proto;
case BPF_FUNC_skb_set_tunnel_opt:
return &bpf_skb_set_tunnel_opt_proto;
default:
return NULL;
}
}
static const struct bpf_func_proto *
......@@ -1927,7 +2006,11 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
case BPF_FUNC_skb_get_tunnel_key:
return &bpf_skb_get_tunnel_key_proto;
case BPF_FUNC_skb_set_tunnel_key:
return bpf_get_skb_set_tunnel_key_proto();
return bpf_get_skb_set_tunnel_proto(func_id);
case BPF_FUNC_skb_get_tunnel_opt:
return &bpf_skb_get_tunnel_opt_proto;
case BPF_FUNC_skb_set_tunnel_opt:
return bpf_get_skb_set_tunnel_proto(func_id);
case BPF_FUNC_redirect:
return &bpf_redirect_proto;
case BPF_FUNC_get_route_realm:
......
......@@ -527,11 +527,12 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ip_tunnel_info *tun_info;
const struct ip_tunnel_key *key;
struct rtable *rt = NULL;
struct flowi4 fl;
struct rtable *rt;
int min_headroom;
int tunnel_hlen;
__be16 df, flags;
bool use_cache;
int err;
tun_info = skb_tunnel_info(skb);
......@@ -540,13 +541,14 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev)
goto err_free_skb;
key = &tun_info->key;
rt = !skb->mark ? dst_cache_get_ip4(&tun_info->dst_cache, &fl.saddr) :
NULL;
use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
if (use_cache)
rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl.saddr);
if (!rt) {
rt = gre_get_rt(skb, dev, &fl, key);
if (IS_ERR(rt))
goto err_free_skb;
if (!skb->mark)
if (use_cache)
dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
fl.saddr);
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment