Commit 27b29f63 authored by Alexei Starovoitov's avatar Alexei Starovoitov Committed by David S. Miller

bpf: add bpf_redirect() helper

Existing bpf_clone_redirect() helper clones skb before redirecting
it to RX or TX of destination netdev.
Introduce bpf_redirect() helper that does that without cloning.

Benchmarked with two hosts using 10G ixgbe NICs.
One host is doing line rate pktgen.
Another host is configured as:
$ tc qdisc add dev $dev ingress
$ tc filter add dev $dev root pref 10 u32 match u32 0 0 flowid 1:2 \
   action bpf run object-file tcbpf1_kern.o section clone_redirect_xmit drop
so it receives the packet on $dev and immediately xmits it on $dev + 1
The section 'clone_redirect_xmit' in tcbpf1_kern.o file has the program
that does bpf_clone_redirect() and performance is 2.0 Mpps

$ tc filter add dev $dev root pref 10 u32 match u32 0 0 flowid 1:2 \
   action bpf run object-file tcbpf1_kern.o section redirect_xmit drop
which is using bpf_redirect() - 2.4 Mpps

and using cls_bpf with integrated actions as:
$ tc filter add dev $dev root pref 10 \
  bpf run object-file tcbpf1_kern.o section redirect_xmit integ_act classid 1
performance is 2.5 Mpps

To summarize:
u32+act_bpf using clone_redirect - 2.0 Mpps
u32+act_bpf using redirect - 2.4 Mpps
cls_bpf using redirect - 2.5 Mpps

For comparison linux bridge in this setup is doing 2.1 Mpps
and ixgbe rx + drop in ip_rcv - 7.8 Mpps
Signed-off-by: default avatarAlexei Starovoitov <ast@plumgrid.com>
Acked-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
Acked-by: default avatarJohn Fastabend <john.r.fastabend@intel.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 045efa82
...@@ -402,6 +402,7 @@ void __qdisc_calculate_pkt_len(struct sk_buff *skb, ...@@ -402,6 +402,7 @@ void __qdisc_calculate_pkt_len(struct sk_buff *skb,
const struct qdisc_size_table *stab); const struct qdisc_size_table *stab);
bool tcf_destroy(struct tcf_proto *tp, bool force); bool tcf_destroy(struct tcf_proto *tp, bool force);
void tcf_destroy_chain(struct tcf_proto __rcu **fl); void tcf_destroy_chain(struct tcf_proto __rcu **fl);
int skb_do_redirect(struct sk_buff *);
/* Reset all TX qdiscs greater then index of a device. */ /* Reset all TX qdiscs greater then index of a device. */
static inline void qdisc_reset_all_tx_gt(struct net_device *dev, unsigned int i) static inline void qdisc_reset_all_tx_gt(struct net_device *dev, unsigned int i)
......
...@@ -272,6 +272,14 @@ enum bpf_func_id { ...@@ -272,6 +272,14 @@ enum bpf_func_id {
BPF_FUNC_skb_get_tunnel_key, BPF_FUNC_skb_get_tunnel_key,
BPF_FUNC_skb_set_tunnel_key, BPF_FUNC_skb_set_tunnel_key,
BPF_FUNC_perf_event_read, /* u64 bpf_perf_event_read(&map, index) */ BPF_FUNC_perf_event_read, /* u64 bpf_perf_event_read(&map, index) */
/**
* bpf_redirect(ifindex, flags) - redirect to another netdev
* @ifindex: ifindex of the net device
* @flags: bit 0 - if set, redirect to ingress instead of egress
* other bits - reserved
* Return: TC_ACT_REDIRECT
*/
BPF_FUNC_redirect,
__BPF_FUNC_MAX_ID, __BPF_FUNC_MAX_ID,
}; };
......
...@@ -87,6 +87,7 @@ enum { ...@@ -87,6 +87,7 @@ enum {
#define TC_ACT_STOLEN 4 #define TC_ACT_STOLEN 4
#define TC_ACT_QUEUED 5 #define TC_ACT_QUEUED 5
#define TC_ACT_REPEAT 6 #define TC_ACT_REPEAT 6
#define TC_ACT_REDIRECT 7
#define TC_ACT_JUMP 0x10000000 #define TC_ACT_JUMP 0x10000000
/* Action type identifiers*/ /* Action type identifiers*/
......
...@@ -3670,6 +3670,14 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb, ...@@ -3670,6 +3670,14 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
case TC_ACT_QUEUED: case TC_ACT_QUEUED:
kfree_skb(skb); kfree_skb(skb);
return NULL; return NULL;
case TC_ACT_REDIRECT:
/* skb_mac_header check was done by cls/act_bpf, so
* we can safely push the L2 header back before
* redirecting to another netdev
*/
__skb_push(skb, skb->mac_len);
skb_do_redirect(skb);
return NULL;
default: default:
break; break;
} }
......
...@@ -1427,6 +1427,48 @@ const struct bpf_func_proto bpf_clone_redirect_proto = { ...@@ -1427,6 +1427,48 @@ const struct bpf_func_proto bpf_clone_redirect_proto = {
.arg3_type = ARG_ANYTHING, .arg3_type = ARG_ANYTHING,
}; };
struct redirect_info {
u32 ifindex;
u32 flags;
};
static DEFINE_PER_CPU(struct redirect_info, redirect_info);
static u64 bpf_redirect(u64 ifindex, u64 flags, u64 r3, u64 r4, u64 r5)
{
struct redirect_info *ri = this_cpu_ptr(&redirect_info);
ri->ifindex = ifindex;
ri->flags = flags;
return TC_ACT_REDIRECT;
}
int skb_do_redirect(struct sk_buff *skb)
{
struct redirect_info *ri = this_cpu_ptr(&redirect_info);
struct net_device *dev;
dev = dev_get_by_index_rcu(dev_net(skb->dev), ri->ifindex);
ri->ifindex = 0;
if (unlikely(!dev)) {
kfree_skb(skb);
return -EINVAL;
}
if (BPF_IS_REDIRECT_INGRESS(ri->flags))
return dev_forward_skb(dev, skb);
skb->dev = dev;
return dev_queue_xmit(skb);
}
const struct bpf_func_proto bpf_redirect_proto = {
.func = bpf_redirect,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_ANYTHING,
.arg2_type = ARG_ANYTHING,
};
static u64 bpf_get_cgroup_classid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) static u64 bpf_get_cgroup_classid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
{ {
return task_get_classid((struct sk_buff *) (unsigned long) r1); return task_get_classid((struct sk_buff *) (unsigned long) r1);
...@@ -1607,6 +1649,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) ...@@ -1607,6 +1649,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
return &bpf_skb_get_tunnel_key_proto; return &bpf_skb_get_tunnel_key_proto;
case BPF_FUNC_skb_set_tunnel_key: case BPF_FUNC_skb_set_tunnel_key:
return bpf_get_skb_set_tunnel_key_proto(); return bpf_get_skb_set_tunnel_key_proto();
case BPF_FUNC_redirect:
return &bpf_redirect_proto;
default: default:
return sk_filter_func_proto(func_id); return sk_filter_func_proto(func_id);
} }
......
...@@ -72,6 +72,7 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act, ...@@ -72,6 +72,7 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
case TC_ACT_PIPE: case TC_ACT_PIPE:
case TC_ACT_RECLASSIFY: case TC_ACT_RECLASSIFY:
case TC_ACT_OK: case TC_ACT_OK:
case TC_ACT_REDIRECT:
action = filter_res; action = filter_res;
break; break;
case TC_ACT_SHOT: case TC_ACT_SHOT:
......
...@@ -70,6 +70,7 @@ static int cls_bpf_exec_opcode(int code) ...@@ -70,6 +70,7 @@ static int cls_bpf_exec_opcode(int code)
case TC_ACT_PIPE: case TC_ACT_PIPE:
case TC_ACT_STOLEN: case TC_ACT_STOLEN:
case TC_ACT_QUEUED: case TC_ACT_QUEUED:
case TC_ACT_REDIRECT:
case TC_ACT_UNSPEC: case TC_ACT_UNSPEC:
return code; return code;
default: default:
......
...@@ -33,6 +33,10 @@ static int (*bpf_get_current_comm)(void *buf, int buf_size) = ...@@ -33,6 +33,10 @@ static int (*bpf_get_current_comm)(void *buf, int buf_size) =
(void *) BPF_FUNC_get_current_comm; (void *) BPF_FUNC_get_current_comm;
static int (*bpf_perf_event_read)(void *map, int index) = static int (*bpf_perf_event_read)(void *map, int index) =
(void *) BPF_FUNC_perf_event_read; (void *) BPF_FUNC_perf_event_read;
static int (*bpf_clone_redirect)(void *ctx, int ifindex, int flags) =
(void *) BPF_FUNC_clone_redirect;
static int (*bpf_redirect)(int ifindex, int flags) =
(void *) BPF_FUNC_redirect;
/* llvm builtin functions that eBPF C program may use to /* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions * emit BPF_LD_ABS and BPF_LD_IND instructions
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
#include <uapi/linux/in.h> #include <uapi/linux/in.h>
#include <uapi/linux/tcp.h> #include <uapi/linux/tcp.h>
#include <uapi/linux/filter.h> #include <uapi/linux/filter.h>
#include <uapi/linux/pkt_cls.h>
#include "bpf_helpers.h" #include "bpf_helpers.h"
/* compiler workaround */ /* compiler workaround */
...@@ -64,4 +64,26 @@ int bpf_prog1(struct __sk_buff *skb) ...@@ -64,4 +64,26 @@ int bpf_prog1(struct __sk_buff *skb)
return 0; return 0;
} }
SEC("redirect_xmit")
int _redirect_xmit(struct __sk_buff *skb)
{
return bpf_redirect(skb->ifindex + 1, 0);
}
SEC("redirect_recv")
int _redirect_recv(struct __sk_buff *skb)
{
return bpf_redirect(skb->ifindex + 1, 1);
}
SEC("clone_redirect_xmit")
int _clone_redirect_xmit(struct __sk_buff *skb)
{
bpf_clone_redirect(skb, skb->ifindex + 1, 0);
return TC_ACT_SHOT;
}
SEC("clone_redirect_recv")
int _clone_redirect_recv(struct __sk_buff *skb)
{
bpf_clone_redirect(skb, skb->ifindex + 1, 1);
return TC_ACT_SHOT;
}
char _license[] SEC("license") = "GPL"; char _license[] SEC("license") = "GPL";
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment