Commit a29ae44c authored by David S. Miller's avatar David S. Miller

Merge branch 'tun-allow-to-attach-eBPF-filter'

Jason Wang says:

====================
tun: allow to attach eBPF filter

This series tries to implement eBPF socket filter for tun. This could
be used for implementing efficient virtio-net receive filter for
vhost-net.

Changes from V2:
- fix typo
- remove unnecessary double check
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents ca46abd6 aff3d70a
...@@ -196,7 +196,7 @@ struct tun_flow_entry { ...@@ -196,7 +196,7 @@ struct tun_flow_entry {
#define TUN_NUM_FLOW_ENTRIES 1024 #define TUN_NUM_FLOW_ENTRIES 1024
struct tun_steering_prog { struct tun_prog {
struct rcu_head rcu; struct rcu_head rcu;
struct bpf_prog *prog; struct bpf_prog *prog;
}; };
...@@ -238,7 +238,13 @@ struct tun_struct { ...@@ -238,7 +238,13 @@ struct tun_struct {
u32 rx_batched; u32 rx_batched;
struct tun_pcpu_stats __percpu *pcpu_stats; struct tun_pcpu_stats __percpu *pcpu_stats;
struct bpf_prog __rcu *xdp_prog; struct bpf_prog __rcu *xdp_prog;
struct tun_steering_prog __rcu *steering_prog; struct tun_prog __rcu *steering_prog;
struct tun_prog __rcu *filter_prog;
};
struct veth {
__be16 h_vlan_proto;
__be16 h_vlan_TCI;
}; };
bool tun_is_xdp_buff(void *ptr) bool tun_is_xdp_buff(void *ptr)
...@@ -590,7 +596,7 @@ static u16 tun_automq_select_queue(struct tun_struct *tun, struct sk_buff *skb) ...@@ -590,7 +596,7 @@ static u16 tun_automq_select_queue(struct tun_struct *tun, struct sk_buff *skb)
static u16 tun_ebpf_select_queue(struct tun_struct *tun, struct sk_buff *skb) static u16 tun_ebpf_select_queue(struct tun_struct *tun, struct sk_buff *skb)
{ {
struct tun_steering_prog *prog; struct tun_prog *prog;
u16 ret = 0; u16 ret = 0;
prog = rcu_dereference(tun->steering_prog); prog = rcu_dereference(tun->steering_prog);
...@@ -1036,12 +1042,25 @@ static void tun_automq_xmit(struct tun_struct *tun, struct sk_buff *skb) ...@@ -1036,12 +1042,25 @@ static void tun_automq_xmit(struct tun_struct *tun, struct sk_buff *skb)
#endif #endif
} }
static unsigned int run_ebpf_filter(struct tun_struct *tun,
struct sk_buff *skb,
int len)
{
struct tun_prog *prog = rcu_dereference(tun->filter_prog);
if (prog)
len = bpf_prog_run_clear_cb(prog->prog, skb);
return len;
}
/* Net device start xmit */ /* Net device start xmit */
static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
{ {
struct tun_struct *tun = netdev_priv(dev); struct tun_struct *tun = netdev_priv(dev);
int txq = skb->queue_mapping; int txq = skb->queue_mapping;
struct tun_file *tfile; struct tun_file *tfile;
int len = skb->len;
rcu_read_lock(); rcu_read_lock();
tfile = rcu_dereference(tun->tfiles[txq]); tfile = rcu_dereference(tun->tfiles[txq]);
...@@ -1067,6 +1086,15 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -1067,6 +1086,15 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
sk_filter(tfile->socket.sk, skb)) sk_filter(tfile->socket.sk, skb))
goto drop; goto drop;
len = run_ebpf_filter(tun, skb, len);
/* Trim extra bytes since we may insert vlan proto & TCI
* in tun_put_user().
*/
len -= skb_vlan_tag_present(skb) ? sizeof(struct veth) : 0;
if (len <= 0 || pskb_trim(skb, len))
goto drop;
if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC))) if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC)))
goto drop; goto drop;
...@@ -2054,10 +2082,7 @@ static ssize_t tun_put_user(struct tun_struct *tun, ...@@ -2054,10 +2082,7 @@ static ssize_t tun_put_user(struct tun_struct *tun,
if (vlan_hlen) { if (vlan_hlen) {
int ret; int ret;
struct { struct veth veth;
__be16 h_vlan_proto;
__be16 h_vlan_TCI;
} veth;
veth.h_vlan_proto = skb->vlan_proto; veth.h_vlan_proto = skb->vlan_proto;
veth.h_vlan_TCI = htons(skb_vlan_tag_get(skb)); veth.h_vlan_TCI = htons(skb_vlan_tag_get(skb));
...@@ -2184,19 +2209,18 @@ static ssize_t tun_chr_read_iter(struct kiocb *iocb, struct iov_iter *to) ...@@ -2184,19 +2209,18 @@ static ssize_t tun_chr_read_iter(struct kiocb *iocb, struct iov_iter *to)
return ret; return ret;
} }
static void tun_steering_prog_free(struct rcu_head *rcu) static void tun_prog_free(struct rcu_head *rcu)
{ {
struct tun_steering_prog *prog = container_of(rcu, struct tun_prog *prog = container_of(rcu, struct tun_prog, rcu);
struct tun_steering_prog, rcu);
bpf_prog_destroy(prog->prog); bpf_prog_destroy(prog->prog);
kfree(prog); kfree(prog);
} }
static int __tun_set_steering_ebpf(struct tun_struct *tun, static int __tun_set_ebpf(struct tun_struct *tun, struct tun_prog **prog_p,
struct bpf_prog *prog) struct bpf_prog *prog)
{ {
struct tun_steering_prog *old, *new = NULL; struct tun_prog *old, *new = NULL;
if (prog) { if (prog) {
new = kmalloc(sizeof(*new), GFP_KERNEL); new = kmalloc(sizeof(*new), GFP_KERNEL);
...@@ -2206,13 +2230,13 @@ static int __tun_set_steering_ebpf(struct tun_struct *tun, ...@@ -2206,13 +2230,13 @@ static int __tun_set_steering_ebpf(struct tun_struct *tun,
} }
spin_lock_bh(&tun->lock); spin_lock_bh(&tun->lock);
old = rcu_dereference_protected(tun->steering_prog, old = rcu_dereference_protected(*prog_p,
lockdep_is_held(&tun->lock)); lockdep_is_held(&tun->lock));
rcu_assign_pointer(tun->steering_prog, new); rcu_assign_pointer(*prog_p, new);
spin_unlock_bh(&tun->lock); spin_unlock_bh(&tun->lock);
if (old) if (old)
call_rcu(&old->rcu, tun_steering_prog_free); call_rcu(&old->rcu, tun_prog_free);
return 0; return 0;
} }
...@@ -2225,7 +2249,8 @@ static void tun_free_netdev(struct net_device *dev) ...@@ -2225,7 +2249,8 @@ static void tun_free_netdev(struct net_device *dev)
free_percpu(tun->pcpu_stats); free_percpu(tun->pcpu_stats);
tun_flow_uninit(tun); tun_flow_uninit(tun);
security_tun_dev_free_security(tun->security); security_tun_dev_free_security(tun->security);
__tun_set_steering_ebpf(tun, NULL); __tun_set_ebpf(tun, &tun->steering_prog, NULL);
__tun_set_ebpf(tun, &tun->filter_prog, NULL);
} }
static void tun_setup(struct net_device *dev) static void tun_setup(struct net_device *dev)
...@@ -2720,7 +2745,8 @@ static int tun_set_queue(struct file *file, struct ifreq *ifr) ...@@ -2720,7 +2745,8 @@ static int tun_set_queue(struct file *file, struct ifreq *ifr)
return ret; return ret;
} }
static int tun_set_steering_ebpf(struct tun_struct *tun, void __user *data) static int tun_set_ebpf(struct tun_struct *tun, struct tun_prog **prog_p,
void __user *data)
{ {
struct bpf_prog *prog; struct bpf_prog *prog;
int fd; int fd;
...@@ -2736,7 +2762,7 @@ static int tun_set_steering_ebpf(struct tun_struct *tun, void __user *data) ...@@ -2736,7 +2762,7 @@ static int tun_set_steering_ebpf(struct tun_struct *tun, void __user *data)
return PTR_ERR(prog); return PTR_ERR(prog);
} }
return __tun_set_steering_ebpf(tun, prog); return __tun_set_ebpf(tun, prog_p, prog);
} }
static long __tun_chr_ioctl(struct file *file, unsigned int cmd, static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
...@@ -3016,7 +3042,11 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, ...@@ -3016,7 +3042,11 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
break; break;
case TUNSETSTEERINGEBPF: case TUNSETSTEERINGEBPF:
ret = tun_set_steering_ebpf(tun, argp); ret = tun_set_ebpf(tun, &tun->steering_prog, argp);
break;
case TUNSETFILTEREBPF:
ret = tun_set_ebpf(tun, &tun->filter_prog, argp);
break; break;
default: default:
......
...@@ -58,6 +58,7 @@ ...@@ -58,6 +58,7 @@
#define TUNSETVNETBE _IOW('T', 222, int) #define TUNSETVNETBE _IOW('T', 222, int)
#define TUNGETVNETBE _IOR('T', 223, int) #define TUNGETVNETBE _IOR('T', 223, int)
#define TUNSETSTEERINGEBPF _IOR('T', 224, int) #define TUNSETSTEERINGEBPF _IOR('T', 224, int)
#define TUNSETFILTEREBPF _IOR('T', 225, int)
/* TUNSETIFF ifr flags */ /* TUNSETIFF ifr flags */
#define IFF_TUN 0x0001 #define IFF_TUN 0x0001
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment