Commit dfcb7a14 authored by David S. Miller's avatar David S. Miller

Merge branch 'ipvtap'

Sainath Grandhi says:

====================
Refactor macvtap to re-use tap functionality by other virtual intefaces

Tap character devices can be implemented on other virtual interfaces like
ipvlan, similar to macvtap. Source code for tap functionality in macvtap
can be re-used for this purpose.

This patch series splits macvtap source into two modules, macvtap and tap.
This patch series also includes a patch for implementing tap character
device driver based on the IP-VLAN network interface, called ipvtap.

These patches are tested on x86 platform.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 35eeacf1 235a9d89
......@@ -135,6 +135,7 @@ config MACVTAP
tristate "MAC-VLAN based tap driver"
depends on MACVLAN
depends on INET
select TAP
help
This adds a specialized tap character device driver that is based
on the MAC-VLAN network interface, called macvtap. A macvtap device
......@@ -165,6 +166,19 @@ config IPVLAN
To compile this driver as a module, choose M here: the module
will be called ipvlan.
config IPVTAP
tristate "IP-VLAN based tap driver"
depends on IPVLAN
depends on INET
select TAP
---help---
This adds a specialized tap character device driver that is based
on the IP-VLAN network interface, called ipvtap. An ipvtap device
can be added in the same way as a ipvlan device, using 'type
ipvtap', and then be accessed through the tap user space interface.
To compile this driver as a module, choose M here: the module
will be called ipvtap.
config VXLAN
tristate "Virtual eXtensible Local Area Network (VXLAN)"
......@@ -287,6 +301,12 @@ config TUN
If you don't know what to use this for, you don't need it.
config TAP
tristate
---help---
This option is selected by any driver implementing tap user space
interface for a virtual interface to re-use core tap functionality.
config TUN_VNET_CROSS_LE
bool "Support for cross-endian vnet headers on little-endian kernels"
default n
......
......@@ -7,6 +7,7 @@
#
obj-$(CONFIG_BONDING) += bonding/
obj-$(CONFIG_IPVLAN) += ipvlan/
obj-$(CONFIG_IPVTAP) += ipvlan/
obj-$(CONFIG_DUMMY) += dummy.o
obj-$(CONFIG_EQUALIZER) += eql.o
obj-$(CONFIG_IFB) += ifb.o
......@@ -21,6 +22,7 @@ obj-$(CONFIG_PHYLIB) += phy/
obj-$(CONFIG_RIONET) += rionet.o
obj-$(CONFIG_NET_TEAM) += team/
obj-$(CONFIG_TUN) += tun.o
obj-$(CONFIG_TAP) += tap.o
obj-$(CONFIG_VETH) += veth.o
obj-$(CONFIG_VIRTIO_NET) += virtio_net.o
obj-$(CONFIG_VXLAN) += vxlan.o
......
......@@ -3,5 +3,6 @@
#
obj-$(CONFIG_IPVLAN) += ipvlan.o
obj-$(CONFIG_IPVTAP) += ipvtap.o
ipvlan-objs := ipvlan_core.o ipvlan_main.o
......@@ -135,4 +135,11 @@ struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb,
u16 proto);
unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state);
void ipvlan_count_rx(const struct ipvl_dev *ipvlan,
unsigned int len, bool success, bool mcast);
int ipvlan_link_new(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[]);
void ipvlan_link_delete(struct net_device *dev, struct list_head *head);
void ipvlan_link_setup(struct net_device *dev);
int ipvlan_link_register(struct rtnl_link_ops *ops);
#endif /* __IPVLAN_H */
......@@ -16,7 +16,7 @@ void ipvlan_init_secret(void)
net_get_random_once(&ipvlan_jhash_secret, sizeof(ipvlan_jhash_secret));
}
static void ipvlan_count_rx(const struct ipvl_dev *ipvlan,
void ipvlan_count_rx(const struct ipvl_dev *ipvlan,
unsigned int len, bool success, bool mcast)
{
if (likely(success)) {
......@@ -33,6 +33,7 @@ static void ipvlan_count_rx(const struct ipvl_dev *ipvlan,
this_cpu_inc(ipvlan->pcpu_stats->rx_errs);
}
}
EXPORT_SYMBOL_GPL(ipvlan_count_rx);
static u8 ipvlan_get_v6_hash(const void *iaddr)
{
......
......@@ -496,7 +496,7 @@ static int ipvlan_nl_fillinfo(struct sk_buff *skb,
return ret;
}
static int ipvlan_link_new(struct net *src_net, struct net_device *dev,
int ipvlan_link_new(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[])
{
struct ipvl_dev *ipvlan = netdev_priv(dev);
......@@ -594,8 +594,9 @@ static int ipvlan_link_new(struct net *src_net, struct net_device *dev,
ipvlan_port_destroy(phy_dev);
return err;
}
EXPORT_SYMBOL_GPL(ipvlan_link_new);
static void ipvlan_link_delete(struct net_device *dev, struct list_head *head)
void ipvlan_link_delete(struct net_device *dev, struct list_head *head)
{
struct ipvl_dev *ipvlan = netdev_priv(dev);
struct ipvl_addr *addr, *next;
......@@ -611,8 +612,9 @@ static void ipvlan_link_delete(struct net_device *dev, struct list_head *head)
unregister_netdevice_queue(dev, head);
netdev_upper_dev_unlink(ipvlan->phy_dev, dev);
}
EXPORT_SYMBOL_GPL(ipvlan_link_delete);
static void ipvlan_link_setup(struct net_device *dev)
void ipvlan_link_setup(struct net_device *dev)
{
ether_setup(dev);
......@@ -623,6 +625,7 @@ static void ipvlan_link_setup(struct net_device *dev)
dev->header_ops = &ipvlan_header_ops;
dev->ethtool_ops = &ipvlan_ethtool_ops;
}
EXPORT_SYMBOL_GPL(ipvlan_link_setup);
static const struct nla_policy ipvlan_nl_policy[IFLA_IPVLAN_MAX + 1] =
{
......@@ -633,22 +636,22 @@ static struct rtnl_link_ops ipvlan_link_ops = {
.kind = "ipvlan",
.priv_size = sizeof(struct ipvl_dev),
.get_size = ipvlan_nl_getsize,
.policy = ipvlan_nl_policy,
.validate = ipvlan_nl_validate,
.fill_info = ipvlan_nl_fillinfo,
.changelink = ipvlan_nl_changelink,
.maxtype = IFLA_IPVLAN_MAX,
.setup = ipvlan_link_setup,
.newlink = ipvlan_link_new,
.dellink = ipvlan_link_delete,
};
static int ipvlan_link_register(struct rtnl_link_ops *ops)
int ipvlan_link_register(struct rtnl_link_ops *ops)
{
ops->get_size = ipvlan_nl_getsize;
ops->policy = ipvlan_nl_policy;
ops->validate = ipvlan_nl_validate;
ops->fill_info = ipvlan_nl_fillinfo;
ops->changelink = ipvlan_nl_changelink;
ops->maxtype = IFLA_IPVLAN_MAX;
return rtnl_link_register(ops);
}
EXPORT_SYMBOL_GPL(ipvlan_link_register);
static int ipvlan_device_event(struct notifier_block *unused,
unsigned long event, void *ptr)
......
#include <linux/etherdevice.h>
#include "ipvlan.h"
#include <linux/if_vlan.h>
#include <linux/if_tap.h>
#include <linux/interrupt.h>
#include <linux/nsproxy.h>
#include <linux/compat.h>
#include <linux/if_tun.h>
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/cache.h>
#include <linux/sched.h>
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/wait.h>
#include <linux/cdev.h>
#include <linux/idr.h>
#include <linux/fs.h>
#include <linux/uio.h>
#include <net/net_namespace.h>
#include <net/rtnetlink.h>
#include <net/sock.h>
#include <linux/virtio_net.h>
#define TUN_OFFLOADS (NETIF_F_HW_CSUM | NETIF_F_TSO_ECN | NETIF_F_TSO | \
NETIF_F_TSO6 | NETIF_F_UFO)
static dev_t ipvtap_major;
static struct cdev ipvtap_cdev;
static const void *ipvtap_net_namespace(struct device *d)
{
struct net_device *dev = to_net_dev(d->parent);
return dev_net(dev);
}
static struct class ipvtap_class = {
.name = "ipvtap",
.owner = THIS_MODULE,
.ns_type = &net_ns_type_operations,
.namespace = ipvtap_net_namespace,
};
struct ipvtap_dev {
struct ipvl_dev vlan;
struct tap_dev tap;
};
static void ipvtap_count_tx_dropped(struct tap_dev *tap)
{
struct ipvtap_dev *vlantap = container_of(tap, struct ipvtap_dev, tap);
struct ipvl_dev *vlan = &vlantap->vlan;
this_cpu_inc(vlan->pcpu_stats->tx_drps);
}
static void ipvtap_count_rx_dropped(struct tap_dev *tap)
{
struct ipvtap_dev *vlantap = container_of(tap, struct ipvtap_dev, tap);
struct ipvl_dev *vlan = &vlantap->vlan;
ipvlan_count_rx(vlan, 0, 0, 0);
}
static void ipvtap_update_features(struct tap_dev *tap,
netdev_features_t features)
{
struct ipvtap_dev *vlantap = container_of(tap, struct ipvtap_dev, tap);
struct ipvl_dev *vlan = &vlantap->vlan;
vlan->sfeatures = features;
netdev_update_features(vlan->dev);
}
static int ipvtap_newlink(struct net *src_net,
struct net_device *dev,
struct nlattr *tb[],
struct nlattr *data[])
{
struct ipvtap_dev *vlantap = netdev_priv(dev);
int err;
INIT_LIST_HEAD(&vlantap->tap.queue_list);
/* Since macvlan supports all offloads by default, make
* tap support all offloads also.
*/
vlantap->tap.tap_features = TUN_OFFLOADS;
vlantap->tap.count_tx_dropped = ipvtap_count_tx_dropped;
vlantap->tap.update_features = ipvtap_update_features;
vlantap->tap.count_rx_dropped = ipvtap_count_rx_dropped;
err = netdev_rx_handler_register(dev, tap_handle_frame, &vlantap->tap);
if (err)
return err;
/* Don't put anything that may fail after macvlan_common_newlink
* because we can't undo what it does.
*/
err = ipvlan_link_new(src_net, dev, tb, data);
if (err) {
netdev_rx_handler_unregister(dev);
return err;
}
vlantap->tap.dev = vlantap->vlan.dev;
return err;
}
static void ipvtap_dellink(struct net_device *dev,
struct list_head *head)
{
struct ipvtap_dev *vlan = netdev_priv(dev);
netdev_rx_handler_unregister(dev);
tap_del_queues(&vlan->tap);
ipvlan_link_delete(dev, head);
}
static void ipvtap_setup(struct net_device *dev)
{
ipvlan_link_setup(dev);
dev->tx_queue_len = TUN_READQ_SIZE;
dev->priv_flags &= ~IFF_NO_QUEUE;
}
static struct rtnl_link_ops ipvtap_link_ops __read_mostly = {
.kind = "ipvtap",
.setup = ipvtap_setup,
.newlink = ipvtap_newlink,
.dellink = ipvtap_dellink,
.priv_size = sizeof(struct ipvtap_dev),
};
static int ipvtap_device_event(struct notifier_block *unused,
unsigned long event, void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct ipvtap_dev *vlantap;
struct device *classdev;
dev_t devt;
int err;
char tap_name[IFNAMSIZ];
if (dev->rtnl_link_ops != &ipvtap_link_ops)
return NOTIFY_DONE;
snprintf(tap_name, IFNAMSIZ, "tap%d", dev->ifindex);
vlantap = netdev_priv(dev);
switch (event) {
case NETDEV_REGISTER:
/* Create the device node here after the network device has
* been registered but before register_netdevice has
* finished running.
*/
err = tap_get_minor(ipvtap_major, &vlantap->tap);
if (err)
return notifier_from_errno(err);
devt = MKDEV(MAJOR(ipvtap_major), vlantap->tap.minor);
classdev = device_create(&ipvtap_class, &dev->dev, devt,
dev, tap_name);
if (IS_ERR(classdev)) {
tap_free_minor(ipvtap_major, &vlantap->tap);
return notifier_from_errno(PTR_ERR(classdev));
}
err = sysfs_create_link(&dev->dev.kobj, &classdev->kobj,
tap_name);
if (err)
return notifier_from_errno(err);
break;
case NETDEV_UNREGISTER:
/* vlan->minor == 0 if NETDEV_REGISTER above failed */
if (vlantap->tap.minor == 0)
break;
sysfs_remove_link(&dev->dev.kobj, tap_name);
devt = MKDEV(MAJOR(ipvtap_major), vlantap->tap.minor);
device_destroy(&ipvtap_class, devt);
tap_free_minor(ipvtap_major, &vlantap->tap);
break;
case NETDEV_CHANGE_TX_QUEUE_LEN:
if (tap_queue_resize(&vlantap->tap))
return NOTIFY_BAD;
break;
}
return NOTIFY_DONE;
}
static struct notifier_block ipvtap_notifier_block __read_mostly = {
.notifier_call = ipvtap_device_event,
};
static int ipvtap_init(void)
{
int err;
err = tap_create_cdev(&ipvtap_cdev, &ipvtap_major, "ipvtap");
if (err)
goto out1;
err = class_register(&ipvtap_class);
if (err)
goto out2;
err = register_netdevice_notifier(&ipvtap_notifier_block);
if (err)
goto out3;
err = ipvlan_link_register(&ipvtap_link_ops);
if (err)
goto out4;
return 0;
out4:
unregister_netdevice_notifier(&ipvtap_notifier_block);
out3:
class_unregister(&ipvtap_class);
out2:
tap_destroy_cdev(ipvtap_major, &ipvtap_cdev);
out1:
return err;
}
module_init(ipvtap_init);
static void ipvtap_exit(void)
{
rtnl_link_unregister(&ipvtap_link_ops);
unregister_netdevice_notifier(&ipvtap_notifier_block);
class_unregister(&ipvtap_class);
tap_destroy_cdev(ipvtap_major, &ipvtap_cdev);
}
module_exit(ipvtap_exit);
MODULE_ALIAS_RTNL_LINK("ipvtap");
MODULE_AUTHOR("Sainath Grandhi <sainath.grandhi@intel.com>");
MODULE_LICENSE("GPL");
......@@ -1525,7 +1525,6 @@ static const struct nla_policy macvlan_policy[IFLA_MACVLAN_MAX + 1] = {
int macvlan_link_register(struct rtnl_link_ops *ops)
{
/* common fields */
ops->priv_size = sizeof(struct macvlan_dev);
ops->validate = macvlan_validate;
ops->maxtype = IFLA_MACVLAN_MAX;
ops->policy = macvlan_policy;
......@@ -1548,6 +1547,7 @@ static struct rtnl_link_ops macvlan_link_ops = {
.newlink = macvlan_newlink,
.dellink = macvlan_dellink,
.get_link_net = macvlan_get_link_net,
.priv_size = sizeof(struct macvlan_dev),
};
static int macvlan_device_event(struct notifier_block *unused,
......
This diff is collapsed.
This diff is collapsed.
config VHOST_NET
tristate "Host kernel accelerator for virtio net"
depends on NET && EVENTFD && (TUN || !TUN) && (MACVTAP || !MACVTAP)
depends on NET && EVENTFD && (TUN || !TUN) && (TAP || !TAP)
select VHOST
---help---
This kernel module can be loaded in host kernel to accelerate
......
......@@ -24,6 +24,7 @@
#include <linux/if_arp.h>
#include <linux/if_tun.h>
#include <linux/if_macvlan.h>
#include <linux/if_tap.h>
#include <linux/if_vlan.h>
#include <net/sock.h>
......@@ -960,7 +961,7 @@ static struct socket *get_tap_socket(int fd)
sock = tun_get_socket(file);
if (!IS_ERR(sock))
return sock;
sock = macvtap_get_socket(file);
sock = tap_get_socket(file);
if (IS_ERR(sock))
fput(file);
return sock;
......
......@@ -9,19 +9,6 @@
#include <net/netlink.h>
#include <linux/u64_stats_sync.h>
#if IS_ENABLED(CONFIG_MACVTAP)
struct socket *macvtap_get_socket(struct file *);
#else
#include <linux/err.h>
#include <linux/errno.h>
struct file;
struct socket;
static inline struct socket *macvtap_get_socket(struct file *f)
{
return ERR_PTR(-EINVAL);
}
#endif /* CONFIG_MACVTAP */
struct macvlan_port;
struct macvtap_queue;
......@@ -29,7 +16,7 @@ struct macvtap_queue;
* Maximum times a macvtap device can be opened. This can be used to
* configure the number of receive queue, e.g. for multiqueue virtio.
*/
#define MAX_MACVTAP_QUEUES 256
#define MAX_TAP_QUEUES 256
#define MACVLAN_MC_FILTER_BITS 8
#define MACVLAN_MC_FILTER_SZ (1 << MACVLAN_MC_FILTER_BITS)
......@@ -49,7 +36,7 @@ struct macvlan_dev {
enum macvlan_mode mode;
u16 flags;
/* This array tracks active taps. */
struct macvtap_queue __rcu *taps[MAX_MACVTAP_QUEUES];
struct tap_queue __rcu *taps[MAX_TAP_QUEUES];
/* This list tracks all taps (both enabled and disabled) */
struct list_head queue_list;
int numvtaps;
......
#ifndef _LINUX_IF_TAP_H_
#define _LINUX_IF_TAP_H_
#if IS_ENABLED(CONFIG_TAP)
struct socket *tap_get_socket(struct file *);
#else
#include <linux/err.h>
#include <linux/errno.h>
struct file;
struct socket;
static inline struct socket *tap_get_socket(struct file *f)
{
return ERR_PTR(-EINVAL);
}
#endif /* CONFIG_TAP */
#include <net/sock.h>
#include <linux/skb_array.h>
#define MAX_TAP_QUEUES 256
struct tap_queue;
struct tap_dev {
struct net_device *dev;
u16 flags;
/* This array tracks active taps. */
struct tap_queue __rcu *taps[MAX_TAP_QUEUES];
/* This list tracks all taps (both enabled and disabled) */
struct list_head queue_list;
int numvtaps;
int numqueues;
netdev_features_t tap_features;
int minor;
void (*update_features)(struct tap_dev *tap, netdev_features_t features);
void (*count_tx_dropped)(struct tap_dev *tap);
void (*count_rx_dropped)(struct tap_dev *tap);
};
/*
* A tap queue is the central object of tap module, it connects
* an open character device to virtual interface. There can be
* multiple queues on one interface, which map back to queues
* implemented in hardware on the underlying device.
*
* tap_proto is used to allocate queues through the sock allocation
* mechanism.
*
*/
struct tap_queue {
struct sock sk;
struct socket sock;
struct socket_wq wq;
int vnet_hdr_sz;
struct tap_dev __rcu *tap;
struct file *file;
unsigned int flags;
u16 queue_index;
bool enabled;
struct list_head next;
struct skb_array skb_array;
};
rx_handler_result_t tap_handle_frame(struct sk_buff **pskb);
void tap_del_queues(struct tap_dev *tap);
int tap_get_minor(dev_t major, struct tap_dev *tap);
void tap_free_minor(dev_t major, struct tap_dev *tap);
int tap_queue_resize(struct tap_dev *tap);
int tap_create_cdev(struct cdev *tap_cdev,
dev_t *tap_major, const char *device_name);
void tap_destroy_cdev(dev_t major, struct cdev *tap_cdev);
#endif /*_LINUX_IF_TAP_H_*/
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment