Commit ff1f56d9 authored by Daniel Borkmann's avatar Daniel Borkmann

Merge branch 'bpf-fib-lookup-helper'

David Ahern says:

====================
Provide a helper for doing a FIB and neighbor lookup in the kernel
tables from an XDP program. The helper provides a fastpath for forwarding
packets. If the packet is a local delivery or for any reason is not a
simple lookup and forward, the packet is expected to continue up the stack
for full processing.

The response from a FIB and neighbor lookup is either the egress index
with the bpf_fib_lookup struct filled in with dmac and gateway or
0 meaning the packet should continue up the stack. In time we can
revisit this to return the FIB lookup result errno if it is one of the
special RTN_'s such as RTN_BLACKHOLE (-EINVAL) so that the XDP
programs can do an early drop if desired.

Patches 1-6 do some more refactoring to IPv6 with the end goal of
extracting a FIB lookup function that aligns with fib_lookup for IPv4,
basically returning a fib6_info without creating a dst based entry.

Patch 7 adds lookup functions to the ipv6 stub. These are needed since
bpf is built into the kernel and ipv6 may not be built or loaded.

Patch 8 adds the bpf helper and 9 adds a sample program.

v3
- remove ETH_ALEN and in6_addr from uapi header

v2
- removed pkt_access from bpf_func_proto as noticed by Daniel
- added check in that IPv6 forwarding is enabled
- added DaveM's ack on patches 1-7 and 9 based on v1 response and
  fact that no changes were made to them in v2

v1
- updated commit messages and cover letter
- added comment to sample program noting lack of verification on
  egress device supporting XDP

RFC v2
- fixed use of foward helper from cls_act as noted by Daniel
- in patch 1 rename fib6_lookup_1 as well for consistency
====================
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
parents 68625b76 fe616055
......@@ -223,6 +223,20 @@ struct ipv6_stub {
const struct in6_addr *addr);
int (*ipv6_dst_lookup)(struct net *net, struct sock *sk,
struct dst_entry **dst, struct flowi6 *fl6);
struct fib6_table *(*fib6_get_table)(struct net *net, u32 id);
struct fib6_info *(*fib6_lookup)(struct net *net, int oif,
struct flowi6 *fl6, int flags);
struct fib6_info *(*fib6_table_lookup)(struct net *net,
struct fib6_table *table,
int oif, struct flowi6 *fl6,
int flags);
struct fib6_info *(*fib6_multipath_select)(const struct net *net,
struct fib6_info *f6i,
struct flowi6 *fl6, int oif,
const struct sk_buff *skb,
int strict);
void (*udpv6_encap_enable)(void);
void (*ndisc_send_na)(struct net_device *dev, const struct in6_addr *daddr,
const struct in6_addr *solicited_addr,
......
......@@ -376,9 +376,24 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
const struct sk_buff *skb,
int flags, pol_lookup_t lookup);
struct fib6_node *fib6_lookup(struct fib6_node *root,
const struct in6_addr *daddr,
const struct in6_addr *saddr);
/* called with rcu lock held; can return error pointer
* caller needs to select path
*/
struct fib6_info *fib6_lookup(struct net *net, int oif, struct flowi6 *fl6,
int flags);
/* called with rcu lock held; caller needs to select path */
struct fib6_info *fib6_table_lookup(struct net *net, struct fib6_table *table,
int oif, struct flowi6 *fl6, int strict);
struct fib6_info *fib6_multipath_select(const struct net *net,
struct fib6_info *match,
struct flowi6 *fl6, int oif,
const struct sk_buff *skb, int strict);
struct fib6_node *fib6_node_lookup(struct fib6_node *root,
const struct in6_addr *daddr,
const struct in6_addr *saddr);
struct fib6_node *fib6_locate(struct fib6_node *root,
const struct in6_addr *daddr, int dst_len,
......
......@@ -12,10 +12,10 @@
TRACE_EVENT(fib6_table_lookup,
TP_PROTO(const struct net *net, const struct rt6_info *rt,
TP_PROTO(const struct net *net, const struct fib6_info *f6i,
struct fib6_table *table, const struct flowi6 *flp),
TP_ARGS(net, rt, table, flp),
TP_ARGS(net, f6i, table, flp),
TP_STRUCT__entry(
__field( u32, tb_id )
......@@ -48,20 +48,20 @@ TRACE_EVENT(fib6_table_lookup,
in6 = (struct in6_addr *)__entry->dst;
*in6 = flp->daddr;
if (rt->rt6i_idev) {
__assign_str(name, rt->rt6i_idev->dev->name);
if (f6i->fib6_nh.nh_dev) {
__assign_str(name, f6i->fib6_nh.nh_dev);
} else {
__assign_str(name, "");
}
if (rt == net->ipv6.ip6_null_entry) {
if (f6i == net->ipv6.fib6_null_entry) {
struct in6_addr in6_zero = {};
in6 = (struct in6_addr *)__entry->gw;
*in6 = in6_zero;
} else if (rt) {
} else if (f6i) {
in6 = (struct in6_addr *)__entry->gw;
*in6 = rt->rt6i_gateway;
*in6 = f6i->fib6_nh.nh_gw;
}
),
......
......@@ -1828,6 +1828,33 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
*
* int bpf_fib_lookup(void *ctx, struct bpf_fib_lookup *params, int plen, u32 flags)
* Description
* Do FIB lookup in kernel tables using parameters in *params*.
* If lookup is successful and result shows packet is to be
* forwarded, the neighbor tables are searched for the nexthop.
* If successful (ie., FIB lookup shows forwarding and nexthop
* is resolved), the nexthop address is returned in ipv4_dst,
* ipv6_dst or mpls_out based on family, smac is set to mac
* address of egress device, dmac is set to nexthop mac address,
* rt_metric is set to metric from route.
*
* *plen* argument is the size of the passed in struct.
* *flags* argument can be one or more BPF_FIB_LOOKUP_ flags:
*
* **BPF_FIB_LOOKUP_DIRECT** means do a direct table lookup vs
* full lookup using FIB rules
* **BPF_FIB_LOOKUP_OUTPUT** means do lookup from an egress
* perspective (default is ingress)
*
* *ctx* is either **struct xdp_md** for XDP programs or
* **struct sk_buff** tc cls_act programs.
*
* Return
* Egress device index on success, 0 if packet needs to continue
* up the stack for further processing or a negative error in case
* of failure.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
......@@ -1898,7 +1925,8 @@ union bpf_attr {
FN(xdp_adjust_tail), \
FN(skb_get_xfrm_state), \
FN(get_stack), \
FN(skb_load_bytes_relative),
FN(skb_load_bytes_relative), \
FN(fib_lookup),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
......@@ -2321,4 +2349,55 @@ struct bpf_raw_tracepoint_args {
__u64 args[0];
};
/* DIRECT: Skip the FIB rules and go to FIB table associated with device
* OUTPUT: Do lookup from egress perspective; default is ingress
*/
#define BPF_FIB_LOOKUP_DIRECT BIT(0)
#define BPF_FIB_LOOKUP_OUTPUT BIT(1)
struct bpf_fib_lookup {
/* input */
__u8 family; /* network family, AF_INET, AF_INET6, AF_MPLS */
/* set if lookup is to consider L4 data - e.g., FIB rules */
__u8 l4_protocol;
__be16 sport;
__be16 dport;
/* total length of packet from network header - used for MTU check */
__u16 tot_len;
__u32 ifindex; /* L3 device index for lookup */
union {
/* inputs to lookup */
__u8 tos; /* AF_INET */
__be32 flowlabel; /* AF_INET6 */
/* output: metric of fib result */
__u32 rt_metric;
};
union {
__be32 mpls_in;
__be32 ipv4_src;
__u32 ipv6_src[4]; /* in6_addr; network order */
};
/* input to bpf_fib_lookup, *dst is destination address.
* output: bpf_fib_lookup sets to gateway address
*/
union {
/* return for MPLS lookups */
__be32 mpls_out[4]; /* support up to 4 labels */
__be32 ipv4_dst;
__u32 ipv6_dst[4]; /* in6_addr; network order */
};
/* output */
__be16 h_vlan_proto;
__be16 h_vlan_TCI;
__u8 smac[6]; /* ETH_ALEN */
__u8 dmac[6]; /* ETH_ALEN */
};
#endif /* _UAPI__LINUX_BPF_H__ */
......@@ -60,6 +60,10 @@
#include <net/xfrm.h>
#include <linux/bpf_trace.h>
#include <net/xdp_sock.h>
#include <linux/inetdevice.h>
#include <net/ip_fib.h>
#include <net/flow.h>
#include <net/arp.h>
/**
* sk_filter_trim_cap - run a packet through a socket filter
......@@ -4032,6 +4036,265 @@ static const struct bpf_func_proto bpf_skb_get_xfrm_state_proto = {
};
#endif
#if IS_ENABLED(CONFIG_INET) || IS_ENABLED(CONFIG_IPV6)
static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params,
const struct neighbour *neigh,
const struct net_device *dev)
{
memcpy(params->dmac, neigh->ha, ETH_ALEN);
memcpy(params->smac, dev->dev_addr, ETH_ALEN);
params->h_vlan_TCI = 0;
params->h_vlan_proto = 0;
return dev->ifindex;
}
#endif
#if IS_ENABLED(CONFIG_INET)
static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
u32 flags)
{
struct in_device *in_dev;
struct neighbour *neigh;
struct net_device *dev;
struct fib_result res;
struct fib_nh *nh;
struct flowi4 fl4;
int err;
dev = dev_get_by_index_rcu(net, params->ifindex);
if (unlikely(!dev))
return -ENODEV;
/* verify forwarding is enabled on this interface */
in_dev = __in_dev_get_rcu(dev);
if (unlikely(!in_dev || !IN_DEV_FORWARD(in_dev)))
return 0;
if (flags & BPF_FIB_LOOKUP_OUTPUT) {
fl4.flowi4_iif = 1;
fl4.flowi4_oif = params->ifindex;
} else {
fl4.flowi4_iif = params->ifindex;
fl4.flowi4_oif = 0;
}
fl4.flowi4_tos = params->tos & IPTOS_RT_MASK;
fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
fl4.flowi4_flags = 0;
fl4.flowi4_proto = params->l4_protocol;
fl4.daddr = params->ipv4_dst;
fl4.saddr = params->ipv4_src;
fl4.fl4_sport = params->sport;
fl4.fl4_dport = params->dport;
if (flags & BPF_FIB_LOOKUP_DIRECT) {
u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN;
struct fib_table *tb;
tb = fib_get_table(net, tbid);
if (unlikely(!tb))
return 0;
err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF);
} else {
fl4.flowi4_mark = 0;
fl4.flowi4_secid = 0;
fl4.flowi4_tun_key.tun_id = 0;
fl4.flowi4_uid = sock_net_uid(net, NULL);
err = fib_lookup(net, &fl4, &res, FIB_LOOKUP_NOREF);
}
if (err || res.type != RTN_UNICAST)
return 0;
if (res.fi->fib_nhs > 1)
fib_select_path(net, &res, &fl4, NULL);
nh = &res.fi->fib_nh[res.nh_sel];
/* do not handle lwt encaps right now */
if (nh->nh_lwtstate)
return 0;
dev = nh->nh_dev;
if (unlikely(!dev))
return 0;
if (nh->nh_gw)
params->ipv4_dst = nh->nh_gw;
params->rt_metric = res.fi->fib_priority;
/* xdp and cls_bpf programs are run in RCU-bh so
* rcu_read_lock_bh is not needed here
*/
neigh = __ipv4_neigh_lookup_noref(dev, (__force u32)params->ipv4_dst);
if (neigh)
return bpf_fib_set_fwd_params(params, neigh, dev);
return 0;
}
#endif
#if IS_ENABLED(CONFIG_IPV6)
static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
u32 flags)
{
struct in6_addr *src = (struct in6_addr *) params->ipv6_src;
struct in6_addr *dst = (struct in6_addr *) params->ipv6_dst;
struct neighbour *neigh;
struct net_device *dev;
struct inet6_dev *idev;
struct fib6_info *f6i;
struct flowi6 fl6;
int strict = 0;
int oif;
/* link local addresses are never forwarded */
if (rt6_need_strict(dst) || rt6_need_strict(src))
return 0;
dev = dev_get_by_index_rcu(net, params->ifindex);
if (unlikely(!dev))
return -ENODEV;
idev = __in6_dev_get_safely(dev);
if (unlikely(!idev || !net->ipv6.devconf_all->forwarding))
return 0;
if (flags & BPF_FIB_LOOKUP_OUTPUT) {
fl6.flowi6_iif = 1;
oif = fl6.flowi6_oif = params->ifindex;
} else {
oif = fl6.flowi6_iif = params->ifindex;
fl6.flowi6_oif = 0;
strict = RT6_LOOKUP_F_HAS_SADDR;
}
fl6.flowlabel = params->flowlabel;
fl6.flowi6_scope = 0;
fl6.flowi6_flags = 0;
fl6.mp_hash = 0;
fl6.flowi6_proto = params->l4_protocol;
fl6.daddr = *dst;
fl6.saddr = *src;
fl6.fl6_sport = params->sport;
fl6.fl6_dport = params->dport;
if (flags & BPF_FIB_LOOKUP_DIRECT) {
u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN;
struct fib6_table *tb;
tb = ipv6_stub->fib6_get_table(net, tbid);
if (unlikely(!tb))
return 0;
f6i = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6, strict);
} else {
fl6.flowi6_mark = 0;
fl6.flowi6_secid = 0;
fl6.flowi6_tun_key.tun_id = 0;
fl6.flowi6_uid = sock_net_uid(net, NULL);
f6i = ipv6_stub->fib6_lookup(net, oif, &fl6, strict);
}
if (unlikely(IS_ERR_OR_NULL(f6i) || f6i == net->ipv6.fib6_null_entry))
return 0;
if (unlikely(f6i->fib6_flags & RTF_REJECT ||
f6i->fib6_type != RTN_UNICAST))
return 0;
if (f6i->fib6_nsiblings && fl6.flowi6_oif == 0)
f6i = ipv6_stub->fib6_multipath_select(net, f6i, &fl6,
fl6.flowi6_oif, NULL,
strict);
if (f6i->fib6_nh.nh_lwtstate)
return 0;
if (f6i->fib6_flags & RTF_GATEWAY)
*dst = f6i->fib6_nh.nh_gw;
dev = f6i->fib6_nh.nh_dev;
params->rt_metric = f6i->fib6_metric;
/* xdp and cls_bpf programs are run in RCU-bh so rcu_read_lock_bh is
* not needed here. Can not use __ipv6_neigh_lookup_noref here
* because we need to get nd_tbl via the stub
*/
neigh = ___neigh_lookup_noref(ipv6_stub->nd_tbl, neigh_key_eq128,
ndisc_hashfn, dst, dev);
if (neigh)
return bpf_fib_set_fwd_params(params, neigh, dev);
return 0;
}
#endif
BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx,
struct bpf_fib_lookup *, params, int, plen, u32, flags)
{
if (plen < sizeof(*params))
return -EINVAL;
switch (params->family) {
#if IS_ENABLED(CONFIG_INET)
case AF_INET:
return bpf_ipv4_fib_lookup(dev_net(ctx->rxq->dev), params,
flags);
#endif
#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
return bpf_ipv6_fib_lookup(dev_net(ctx->rxq->dev), params,
flags);
#endif
}
return 0;
}
static const struct bpf_func_proto bpf_xdp_fib_lookup_proto = {
.func = bpf_xdp_fib_lookup,
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_MEM,
.arg3_type = ARG_CONST_SIZE,
.arg4_type = ARG_ANYTHING,
};
BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb,
struct bpf_fib_lookup *, params, int, plen, u32, flags)
{
if (plen < sizeof(*params))
return -EINVAL;
switch (params->family) {
#if IS_ENABLED(CONFIG_INET)
case AF_INET:
return bpf_ipv4_fib_lookup(dev_net(skb->dev), params, flags);
#endif
#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
return bpf_ipv6_fib_lookup(dev_net(skb->dev), params, flags);
#endif
}
return -ENOTSUPP;
}
static const struct bpf_func_proto bpf_skb_fib_lookup_proto = {
.func = bpf_skb_fib_lookup,
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_MEM,
.arg3_type = ARG_CONST_SIZE,
.arg4_type = ARG_ANYTHING,
};
static const struct bpf_func_proto *
bpf_base_func_proto(enum bpf_func_id func_id)
{
......@@ -4181,6 +4444,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_skb_get_xfrm_state:
return &bpf_skb_get_xfrm_state_proto;
#endif
case BPF_FUNC_fib_lookup:
return &bpf_skb_fib_lookup_proto;
default:
return bpf_base_func_proto(func_id);
}
......@@ -4206,6 +4471,8 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_xdp_redirect_map_proto;
case BPF_FUNC_xdp_adjust_tail:
return &bpf_xdp_adjust_tail_proto;
case BPF_FUNC_fib_lookup:
return &bpf_xdp_fib_lookup_proto;
default:
return bpf_base_func_proto(func_id);
}
......
......@@ -134,8 +134,39 @@ static int eafnosupport_ipv6_dst_lookup(struct net *net, struct sock *u1,
return -EAFNOSUPPORT;
}
static struct fib6_table *eafnosupport_fib6_get_table(struct net *net, u32 id)
{
return NULL;
}
static struct fib6_info *
eafnosupport_fib6_table_lookup(struct net *net, struct fib6_table *table,
int oif, struct flowi6 *fl6, int flags)
{
return NULL;
}
static struct fib6_info *
eafnosupport_fib6_lookup(struct net *net, int oif, struct flowi6 *fl6,
int flags)
{
return NULL;
}
static struct fib6_info *
eafnosupport_fib6_multipath_select(const struct net *net, struct fib6_info *f6i,
struct flowi6 *fl6, int oif,
const struct sk_buff *skb, int strict)
{
return f6i;
}
const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) {
.ipv6_dst_lookup = eafnosupport_ipv6_dst_lookup,
.ipv6_dst_lookup = eafnosupport_ipv6_dst_lookup,
.fib6_get_table = eafnosupport_fib6_get_table,
.fib6_table_lookup = eafnosupport_fib6_table_lookup,
.fib6_lookup = eafnosupport_fib6_lookup,
.fib6_multipath_select = eafnosupport_fib6_multipath_select,
};
EXPORT_SYMBOL_GPL(ipv6_stub);
......
......@@ -889,7 +889,11 @@ static struct pernet_operations inet6_net_ops = {
static const struct ipv6_stub ipv6_stub_impl = {
.ipv6_sock_mc_join = ipv6_sock_mc_join,
.ipv6_sock_mc_drop = ipv6_sock_mc_drop,
.ipv6_dst_lookup = ip6_dst_lookup,
.ipv6_dst_lookup = ip6_dst_lookup,
.fib6_get_table = fib6_get_table,
.fib6_table_lookup = fib6_table_lookup,
.fib6_lookup = fib6_lookup,
.fib6_multipath_select = fib6_multipath_select,
.udpv6_encap_enable = udpv6_encap_enable,
.ndisc_send_na = ndisc_send_na,
.nd_tbl = &nd_tbl,
......
......@@ -60,6 +60,39 @@ unsigned int fib6_rules_seq_read(struct net *net)
return fib_rules_seq_read(net, AF_INET6);
}
/* called with rcu lock held; no reference taken on fib6_info */
struct fib6_info *fib6_lookup(struct net *net, int oif, struct flowi6 *fl6,
int flags)
{
struct fib6_info *f6i;
int err;
if (net->ipv6.fib6_has_custom_rules) {
struct fib_lookup_arg arg = {
.lookup_ptr = fib6_table_lookup,
.lookup_data = &oif,
.flags = FIB_LOOKUP_NOREF,
};
l3mdev_update_flow(net, flowi6_to_flowi(fl6));
err = fib_rules_lookup(net->ipv6.fib6_rules_ops,
flowi6_to_flowi(fl6), flags, &arg);
if (err)
return ERR_PTR(err);
f6i = arg.result ? : net->ipv6.fib6_null_entry;
} else {
f6i = fib6_table_lookup(net, net->ipv6.fib6_local_tbl,
oif, fl6, flags);
if (!f6i || f6i == net->ipv6.fib6_null_entry)
f6i = fib6_table_lookup(net, net->ipv6.fib6_main_tbl,
oif, fl6, flags);
}
return f6i;
}
struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
const struct sk_buff *skb,
int flags, pol_lookup_t lookup)
......@@ -96,8 +129,73 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
return &net->ipv6.ip6_null_entry->dst;
}
static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
int flags, struct fib_lookup_arg *arg)
static int fib6_rule_saddr(struct net *net, struct fib_rule *rule, int flags,
struct flowi6 *flp6, const struct net_device *dev)
{
struct fib6_rule *r = (struct fib6_rule *)rule;
/* If we need to find a source address for this traffic,
* we check the result if it meets requirement of the rule.
*/
if ((rule->flags & FIB_RULE_FIND_SADDR) &&
r->src.plen && !(flags & RT6_LOOKUP_F_HAS_SADDR)) {
struct in6_addr saddr;
if (ipv6_dev_get_saddr(net, dev, &flp6->daddr,
rt6_flags2srcprefs(flags), &saddr))
return -EAGAIN;
if (!ipv6_prefix_equal(&saddr, &r->src.addr, r->src.plen))
return -EAGAIN;
flp6->saddr = saddr;
}
return 0;
}
static int fib6_rule_action_alt(struct fib_rule *rule, struct flowi *flp,
int flags, struct fib_lookup_arg *arg)
{
struct flowi6 *flp6 = &flp->u.ip6;
struct net *net = rule->fr_net;
struct fib6_table *table;
struct fib6_info *f6i;
int err = -EAGAIN, *oif;
u32 tb_id;
switch (rule->action) {
case FR_ACT_TO_TBL:
break;
case FR_ACT_UNREACHABLE:
return -ENETUNREACH;
case FR_ACT_PROHIBIT:
return -EACCES;
case FR_ACT_BLACKHOLE:
default:
return -EINVAL;
}
tb_id = fib_rule_get_table(rule, arg);
table = fib6_get_table(net, tb_id);
if (!table)
return -EAGAIN;
oif = (int *)arg->lookup_data;
f6i = fib6_table_lookup(net, table, *oif, flp6, flags);
if (f6i != net->ipv6.fib6_null_entry) {
err = fib6_rule_saddr(net, rule, flags, flp6,
fib6_info_nh_dev(f6i));
if (likely(!err))
arg->result = f6i;
}
return err;
}
static int __fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
int flags, struct fib_lookup_arg *arg)
{
struct flowi6 *flp6 = &flp->u.ip6;
struct rt6_info *rt = NULL;
......@@ -134,27 +232,12 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
rt = lookup(net, table, flp6, arg->lookup_data, flags);
if (rt != net->ipv6.ip6_null_entry) {
struct fib6_rule *r = (struct fib6_rule *)rule;
/*
* If we need to find a source address for this traffic,
* we check the result if it meets requirement of the rule.
*/
if ((rule->flags & FIB_RULE_FIND_SADDR) &&
r->src.plen && !(flags & RT6_LOOKUP_F_HAS_SADDR)) {
struct in6_addr saddr;
if (ipv6_dev_get_saddr(net,
ip6_dst_idev(&rt->dst)->dev,
&flp6->daddr,
rt6_flags2srcprefs(flags),
&saddr))
goto again;
if (!ipv6_prefix_equal(&saddr, &r->src.addr,
r->src.plen))
goto again;
flp6->saddr = saddr;
}
err = fib6_rule_saddr(net, rule, flags, flp6,
ip6_dst_idev(&rt->dst)->dev);
if (err == -EAGAIN)
goto again;
err = rt->dst.error;
if (err != -EAGAIN)
goto out;
......@@ -172,6 +255,15 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
return err;
}
static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
int flags, struct fib_lookup_arg *arg)
{
if (arg->lookup_ptr == fib6_table_lookup)
return fib6_rule_action_alt(rule, flp, flags, arg);
return __fib6_rule_action(rule, flp, flags, arg);
}
static bool fib6_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg)
{
struct rt6_info *rt = (struct rt6_info *) arg->result;
......
......@@ -354,6 +354,13 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
return &rt->dst;
}
/* called with rcu lock held; no reference taken on fib6_info */
struct fib6_info *fib6_lookup(struct net *net, int oif, struct flowi6 *fl6,
int flags)
{
return fib6_table_lookup(net, net->ipv6.fib6_main_tbl, oif, fl6, flags);
}
static void __net_init fib6_tables_init(struct net *net)
{
fib6_link_table(net, net->ipv6.fib6_main_tbl);
......@@ -1354,8 +1361,8 @@ struct lookup_args {
const struct in6_addr *addr; /* search key */
};
static struct fib6_node *fib6_lookup_1(struct fib6_node *root,
struct lookup_args *args)
static struct fib6_node *fib6_node_lookup_1(struct fib6_node *root,
struct lookup_args *args)
{
struct fib6_node *fn;
__be32 dir;
......@@ -1400,7 +1407,8 @@ static struct fib6_node *fib6_lookup_1(struct fib6_node *root,
#ifdef CONFIG_IPV6_SUBTREES
if (subtree) {
struct fib6_node *sfn;
sfn = fib6_lookup_1(subtree, args + 1);
sfn = fib6_node_lookup_1(subtree,
args + 1);
if (!sfn)
goto backtrack;
fn = sfn;
......@@ -1422,8 +1430,9 @@ static struct fib6_node *fib6_lookup_1(struct fib6_node *root,
/* called with rcu_read_lock() held
*/
struct fib6_node *fib6_lookup(struct fib6_node *root, const struct in6_addr *daddr,
const struct in6_addr *saddr)
struct fib6_node *fib6_node_lookup(struct fib6_node *root,
const struct in6_addr *daddr,
const struct in6_addr *saddr)
{
struct fib6_node *fn;
struct lookup_args args[] = {
......@@ -1442,7 +1451,7 @@ struct fib6_node *fib6_lookup(struct fib6_node *root, const struct in6_addr *dad
}
};
fn = fib6_lookup_1(root, daddr ? args : args + 1);
fn = fib6_node_lookup_1(root, daddr ? args : args + 1);
if (!fn || fn->fn_flags & RTN_TL_ROOT)
fn = root;
......
......@@ -419,11 +419,11 @@ static bool rt6_check_expired(const struct rt6_info *rt)
return false;
}
static struct fib6_info *rt6_multipath_select(const struct net *net,
struct fib6_info *match,
struct flowi6 *fl6, int oif,
const struct sk_buff *skb,
int strict)
struct fib6_info *fib6_multipath_select(const struct net *net,
struct fib6_info *match,
struct flowi6 *fl6, int oif,
const struct sk_buff *skb,
int strict)
{
struct fib6_info *sibling, *next_sibling;
......@@ -1006,7 +1006,7 @@ static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
pn = rcu_dereference(fn->parent);
sn = FIB6_SUBTREE(pn);
if (sn && sn != fn)
fn = fib6_lookup(sn, NULL, saddr);
fn = fib6_node_lookup(sn, NULL, saddr);
else
fn = pn;
if (fn->fn_flags & RTN_RTINFO)
......@@ -1059,7 +1059,7 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net,
flags &= ~RT6_LOOKUP_F_IFACE;
rcu_read_lock();
fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
restart:
f6i = rcu_dereference(fn->leaf);
if (!f6i) {
......@@ -1068,8 +1068,9 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net,
f6i = rt6_device_match(net, f6i, &fl6->saddr,
fl6->flowi6_oif, flags);
if (f6i->fib6_nsiblings && fl6->flowi6_oif == 0)
f6i = rt6_multipath_select(net, f6i, fl6,
fl6->flowi6_oif, skb, flags);
f6i = fib6_multipath_select(net, f6i, fl6,
fl6->flowi6_oif, skb,
flags);
}
if (f6i == net->ipv6.fib6_null_entry) {
fn = fib6_backtrack(fn, &fl6->saddr);
......@@ -1077,6 +1078,8 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net,
goto restart;
}
trace_fib6_table_lookup(net, f6i, table, fl6);
/* Search through exception table */
rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
if (rt) {
......@@ -1095,8 +1098,6 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net,
rcu_read_unlock();
trace_fib6_table_lookup(net, rt, table, fl6);
return rt;
}
......@@ -1799,23 +1800,14 @@ void rt6_age_exceptions(struct fib6_info *rt,
rcu_read_unlock_bh();
}
struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
int oif, struct flowi6 *fl6,
const struct sk_buff *skb, int flags)
/* must be called with rcu lock held */
struct fib6_info *fib6_table_lookup(struct net *net, struct fib6_table *table,
int oif, struct flowi6 *fl6, int strict)
{
struct fib6_node *fn, *saved_fn;
struct fib6_info *f6i;
struct rt6_info *rt;
int strict = 0;
strict |= flags & RT6_LOOKUP_F_IFACE;
strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
if (net->ipv6.devconf_all->forwarding == 0)
strict |= RT6_LOOKUP_F_REACHABLE;
rcu_read_lock();
fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
saved_fn = fn;
if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
......@@ -1823,8 +1815,6 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
redo_rt6_select:
f6i = rt6_select(net, fn, oif, strict);
if (f6i->fib6_nsiblings)
f6i = rt6_multipath_select(net, f6i, fl6, oif, skb, strict);
if (f6i == net->ipv6.fib6_null_entry) {
fn = fib6_backtrack(fn, &fl6->saddr);
if (fn)
......@@ -1837,11 +1827,34 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
}
}
trace_fib6_table_lookup(net, f6i, table, fl6);
return f6i;
}
struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
int oif, struct flowi6 *fl6,
const struct sk_buff *skb, int flags)
{
struct fib6_info *f6i;
struct rt6_info *rt;
int strict = 0;
strict |= flags & RT6_LOOKUP_F_IFACE;
strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
if (net->ipv6.devconf_all->forwarding == 0)
strict |= RT6_LOOKUP_F_REACHABLE;
rcu_read_lock();
f6i = fib6_table_lookup(net, table, oif, fl6, strict);
if (f6i->fib6_nsiblings)
f6i = fib6_multipath_select(net, f6i, fl6, oif, skb, strict);
if (f6i == net->ipv6.fib6_null_entry) {
rt = net->ipv6.ip6_null_entry;
rcu_read_unlock();
dst_hold(&rt->dst);
trace_fib6_table_lookup(net, rt, table, fl6);
return rt;
}
......@@ -1852,7 +1865,6 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
dst_use_noref(&rt->dst, jiffies);
rcu_read_unlock();
trace_fib6_table_lookup(net, rt, table, fl6);
return rt;
} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
!(f6i->fib6_flags & RTF_GATEWAY))) {
......@@ -1878,9 +1890,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
dst_hold(&uncached_rt->dst);
}
trace_fib6_table_lookup(net, uncached_rt, table, fl6);
return uncached_rt;
} else {
/* Get a percpu copy */
......@@ -1894,7 +1904,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
local_bh_enable();
rcu_read_unlock();
trace_fib6_table_lookup(net, pcpu_rt, table, fl6);
return pcpu_rt;
}
}
......@@ -2425,7 +2435,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
*/
rcu_read_lock();
fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
restart:
for_each_fib6_node_rt_rcu(fn) {
if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
......@@ -2479,7 +2489,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
rcu_read_unlock();
trace_fib6_table_lookup(net, ret, table, fl6);
trace_fib6_table_lookup(net, rt, table, fl6);
return ret;
};
......
......@@ -46,6 +46,7 @@ hostprogs-y += syscall_tp
hostprogs-y += cpustat
hostprogs-y += xdp_adjust_tail
hostprogs-y += xdpsock
hostprogs-y += xdp_fwd
# Libbpf dependencies
LIBBPF := ../../tools/lib/bpf/bpf.o ../../tools/lib/bpf/nlattr.o
......@@ -100,6 +101,7 @@ syscall_tp-objs := bpf_load.o $(LIBBPF) syscall_tp_user.o
cpustat-objs := bpf_load.o $(LIBBPF) cpustat_user.o
xdp_adjust_tail-objs := bpf_load.o $(LIBBPF) xdp_adjust_tail_user.o
xdpsock-objs := bpf_load.o $(LIBBPF) xdpsock_user.o
xdp_fwd-objs := bpf_load.o $(LIBBPF) xdp_fwd_user.o
# Tell kbuild to always build the programs
always := $(hostprogs-y)
......@@ -154,6 +156,7 @@ always += syscall_tp_kern.o
always += cpustat_kern.o
always += xdp_adjust_tail_kern.o
always += xdpsock_kern.o
always += xdp_fwd_kern.o
HOSTCFLAGS += -I$(objtree)/usr/include
HOSTCFLAGS += -I$(srctree)/tools/lib/
......@@ -201,6 +204,7 @@ HOSTLOADLIBES_syscall_tp += -lelf
HOSTLOADLIBES_cpustat += -lelf
HOSTLOADLIBES_xdp_adjust_tail += -lelf
HOSTLOADLIBES_xdpsock += -lelf -pthread
HOSTLOADLIBES_xdp_fwd += -lelf
# Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
# make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
......
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2017-18 David Ahern <dsahern@gmail.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#define KBUILD_MODNAME "foo"
#include <uapi/linux/bpf.h>
#include <linux/in.h>
#include <linux/if_ether.h>
#include <linux/if_packet.h>
#include <linux/if_vlan.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include "bpf_helpers.h"
#define IPV6_FLOWINFO_MASK cpu_to_be32(0x0FFFFFFF)
struct bpf_map_def SEC("maps") tx_port = {
.type = BPF_MAP_TYPE_DEVMAP,
.key_size = sizeof(int),
.value_size = sizeof(int),
.max_entries = 64,
};
static __always_inline int xdp_fwd_flags(struct xdp_md *ctx, u32 flags)
{
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
struct bpf_fib_lookup fib_params;
struct ethhdr *eth = data;
int out_index;
u16 h_proto;
u64 nh_off;
nh_off = sizeof(*eth);
if (data + nh_off > data_end)
return XDP_DROP;
__builtin_memset(&fib_params, 0, sizeof(fib_params));
h_proto = eth->h_proto;
if (h_proto == htons(ETH_P_IP)) {
struct iphdr *iph = data + nh_off;
if (iph + 1 > data_end)
return XDP_DROP;
fib_params.family = AF_INET;
fib_params.tos = iph->tos;
fib_params.l4_protocol = iph->protocol;
fib_params.sport = 0;
fib_params.dport = 0;
fib_params.tot_len = ntohs(iph->tot_len);
fib_params.ipv4_src = iph->saddr;
fib_params.ipv4_dst = iph->daddr;
} else if (h_proto == htons(ETH_P_IPV6)) {
struct in6_addr *src = (struct in6_addr *) fib_params.ipv6_src;
struct in6_addr *dst = (struct in6_addr *) fib_params.ipv6_dst;
struct ipv6hdr *iph = data + nh_off;
if (iph + 1 > data_end)
return XDP_DROP;
fib_params.family = AF_INET6;
fib_params.flowlabel = *(__be32 *)iph & IPV6_FLOWINFO_MASK;
fib_params.l4_protocol = iph->nexthdr;
fib_params.sport = 0;
fib_params.dport = 0;
fib_params.tot_len = ntohs(iph->payload_len);
*src = iph->saddr;
*dst = iph->daddr;
} else {
return XDP_PASS;
}
fib_params.ifindex = ctx->ingress_ifindex;
out_index = bpf_fib_lookup(ctx, &fib_params, sizeof(fib_params), flags);
/* verify egress index has xdp support
* TO-DO bpf_map_lookup_elem(&tx_port, &key) fails with
* cannot pass map_type 14 into func bpf_map_lookup_elem#1:
* NOTE: without verification that egress index supports XDP
* forwarding packets are dropped.
*/
if (out_index > 0) {
memcpy(eth->h_dest, fib_params.dmac, ETH_ALEN);
memcpy(eth->h_source, fib_params.smac, ETH_ALEN);
return bpf_redirect_map(&tx_port, out_index, 0);
}
return XDP_PASS;
}
SEC("xdp_fwd")
int xdp_fwd_prog(struct xdp_md *ctx)
{
return xdp_fwd_flags(ctx, 0);
}
SEC("xdp_fwd_direct")
int xdp_fwd_direct_prog(struct xdp_md *ctx)
{
return xdp_fwd_flags(ctx, BPF_FIB_LOOKUP_DIRECT);
}
char _license[] SEC("license") = "GPL";
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2017-18 David Ahern <dsahern@gmail.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#include <linux/bpf.h>
#include <linux/if_link.h>
#include <linux/limits.h>
#include <net/if.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <libgen.h>
#include "bpf_load.h"
#include "bpf_util.h"
#include "libbpf.h"
static int do_attach(int idx, int fd, const char *name)
{
int err;
err = bpf_set_link_xdp_fd(idx, fd, 0);
if (err < 0)
printf("ERROR: failed to attach program to %s\n", name);
return err;
}
static int do_detach(int idx, const char *name)
{
int err;
err = bpf_set_link_xdp_fd(idx, -1, 0);
if (err < 0)
printf("ERROR: failed to detach program from %s\n", name);
return err;
}
static void usage(const char *prog)
{
fprintf(stderr,
"usage: %s [OPTS] interface-list\n"
"\nOPTS:\n"
" -d detach program\n"
" -D direct table lookups (skip fib rules)\n",
prog);
}
int main(int argc, char **argv)
{
char filename[PATH_MAX];
int opt, i, idx, err;
int prog_id = 0;
int attach = 1;
int ret = 0;
while ((opt = getopt(argc, argv, ":dD")) != -1) {
switch (opt) {
case 'd':
attach = 0;
break;
case 'D':
prog_id = 1;
break;
default:
usage(basename(argv[0]));
return 1;
}
}
if (optind == argc) {
usage(basename(argv[0]));
return 1;
}
if (attach) {
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
if (access(filename, O_RDONLY) < 0) {
printf("error accessing file %s: %s\n",
filename, strerror(errno));
return 1;
}
if (load_bpf_file(filename)) {
printf("%s", bpf_log_buf);
return 1;
}
if (!prog_fd[prog_id]) {
printf("load_bpf_file: %s\n", strerror(errno));
return 1;
}
}
if (attach) {
for (i = 1; i < 64; ++i)
bpf_map_update_elem(map_fd[0], &i, &i, 0);
}
for (i = optind; i < argc; ++i) {
idx = if_nametoindex(argv[i]);
if (!idx)
idx = strtoul(argv[i], NULL, 0);
if (!idx) {
fprintf(stderr, "Invalid arg\n");
return 1;
}
if (!attach) {
err = do_detach(idx, argv[i]);
if (err)
ret = err;
} else {
err = do_attach(idx, prog_fd[prog_id], argv[i]);
if (err)
ret = err;
}
}
return ret;
}
......@@ -103,6 +103,9 @@ static int (*bpf_skb_get_xfrm_state)(void *ctx, int index, void *state,
(void *) BPF_FUNC_skb_get_xfrm_state;
static int (*bpf_get_stack)(void *ctx, void *buf, int size, int flags) =
(void *) BPF_FUNC_get_stack;
static int (*bpf_fib_lookup)(void *ctx, struct bpf_fib_lookup *params,
int plen, __u32 flags) =
(void *) BPF_FUNC_fib_lookup;
/* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment