Commit 93c60b59 authored by David S. Miller's avatar David S. Miller

Merge branch 'tun-vnet-uso'

Andrew Melnychenko says:

====================
TUN/VirtioNet USO features support.

Added new offloads for TUN devices TUN_F_USO4 and TUN_F_USO6.
Technically they enable NETIF_F_GSO_UDP_L4
(and only if USO4 & USO6 are set simultaneously).
It allows the transmission of large UDP packets.

UDP Segmentation Offload (USO/GSO_UDP_L4) - ability to split UDP packets
into several segments. It's similar to UFO, except it doesn't use IP
fragmentation. The drivers may push big packets and the NIC will split
them(or assemble them in case of receive), but in the case of VirtioNet
we just pass big UDP to the host. So we are freeing the driver from doing
the unnecessary job of splitting. The same thing for several guests
on one host, we can pass big packets between guests.

Different features USO4 and USO6 are required for qemu where Windows
guests can enable disable USO receives for IPv4 and IPv6 separately.
On the other side, Linux can't really differentiate USO4 and USO6, for now.
For now, to enable USO for TUN it requires enabling USO4 and USO6 together.
In the future, there would be a mechanism to control UDP_L4 GSO separately.

New types for virtio-net already in virtio-net specification:
https://github.com/oasis-tcs/virtio-spec/issues/120

Test it WIP Qemu https://github.com/daynix/qemu/tree/USOv3

Changes since v4 & RFC:
 * Fixed typo and refactored.
 * Tun USO offload refactored.
 * Add support for guest-to-guest segmentation offload (thx Jason).
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents dd8b3a80 418044e1
...@@ -957,6 +957,10 @@ static int set_offload(struct tap_queue *q, unsigned long arg) ...@@ -957,6 +957,10 @@ static int set_offload(struct tap_queue *q, unsigned long arg)
if (arg & TUN_F_TSO6) if (arg & TUN_F_TSO6)
feature_mask |= NETIF_F_TSO6; feature_mask |= NETIF_F_TSO6;
} }
/* TODO: for now USO4 and USO6 should work simultaneously */
if ((arg & (TUN_F_USO4 | TUN_F_USO6)) == (TUN_F_USO4 | TUN_F_USO6))
features |= NETIF_F_GSO_UDP_L4;
} }
/* tun/tap driver inverts the usage for TSO offloads, where /* tun/tap driver inverts the usage for TSO offloads, where
...@@ -967,7 +971,8 @@ static int set_offload(struct tap_queue *q, unsigned long arg) ...@@ -967,7 +971,8 @@ static int set_offload(struct tap_queue *q, unsigned long arg)
* When user space turns off TSO, we turn off GSO/LRO so that * When user space turns off TSO, we turn off GSO/LRO so that
* user-space will not receive TSO frames. * user-space will not receive TSO frames.
*/ */
if (feature_mask & (NETIF_F_TSO | NETIF_F_TSO6)) if (feature_mask & (NETIF_F_TSO | NETIF_F_TSO6) ||
(feature_mask & (TUN_F_USO4 | TUN_F_USO6)) == (TUN_F_USO4 | TUN_F_USO6))
features |= RX_OFFLOADS; features |= RX_OFFLOADS;
else else
features &= ~RX_OFFLOADS; features &= ~RX_OFFLOADS;
...@@ -1091,7 +1096,8 @@ static long tap_ioctl(struct file *file, unsigned int cmd, ...@@ -1091,7 +1096,8 @@ static long tap_ioctl(struct file *file, unsigned int cmd,
case TUNSETOFFLOAD: case TUNSETOFFLOAD:
/* let the user check for future flags */ /* let the user check for future flags */
if (arg & ~(TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | if (arg & ~(TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 |
TUN_F_TSO_ECN | TUN_F_UFO)) TUN_F_TSO_ECN | TUN_F_UFO |
TUN_F_USO4 | TUN_F_USO6))
return -EINVAL; return -EINVAL;
rtnl_lock(); rtnl_lock();
......
...@@ -185,7 +185,7 @@ struct tun_struct { ...@@ -185,7 +185,7 @@ struct tun_struct {
struct net_device *dev; struct net_device *dev;
netdev_features_t set_features; netdev_features_t set_features;
#define TUN_USER_FEATURES (NETIF_F_HW_CSUM|NETIF_F_TSO_ECN|NETIF_F_TSO| \ #define TUN_USER_FEATURES (NETIF_F_HW_CSUM|NETIF_F_TSO_ECN|NETIF_F_TSO| \
NETIF_F_TSO6) NETIF_F_TSO6 | NETIF_F_GSO_UDP_L4)
int align; int align;
int vnet_hdr_sz; int vnet_hdr_sz;
...@@ -2878,6 +2878,12 @@ static int set_offload(struct tun_struct *tun, unsigned long arg) ...@@ -2878,6 +2878,12 @@ static int set_offload(struct tun_struct *tun, unsigned long arg)
} }
arg &= ~TUN_F_UFO; arg &= ~TUN_F_UFO;
/* TODO: for now USO4 and USO6 should work simultaneously */
if (arg & TUN_F_USO4 && arg & TUN_F_USO6) {
features |= NETIF_F_GSO_UDP_L4;
arg &= ~(TUN_F_USO4 | TUN_F_USO6);
}
} }
/* This gives the user a way to test for new features in future by /* This gives the user a way to test for new features in future by
......
...@@ -60,13 +60,17 @@ static const unsigned long guest_offloads[] = { ...@@ -60,13 +60,17 @@ static const unsigned long guest_offloads[] = {
VIRTIO_NET_F_GUEST_TSO6, VIRTIO_NET_F_GUEST_TSO6,
VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_ECN,
VIRTIO_NET_F_GUEST_UFO, VIRTIO_NET_F_GUEST_UFO,
VIRTIO_NET_F_GUEST_CSUM VIRTIO_NET_F_GUEST_CSUM,
VIRTIO_NET_F_GUEST_USO4,
VIRTIO_NET_F_GUEST_USO6
}; };
#define GUEST_OFFLOAD_GRO_HW_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ #define GUEST_OFFLOAD_GRO_HW_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \
(1ULL << VIRTIO_NET_F_GUEST_TSO6) | \ (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \
(1ULL << VIRTIO_NET_F_GUEST_ECN) | \ (1ULL << VIRTIO_NET_F_GUEST_ECN) | \
(1ULL << VIRTIO_NET_F_GUEST_UFO)) (1ULL << VIRTIO_NET_F_GUEST_UFO) | \
(1ULL << VIRTIO_NET_F_GUEST_USO4) | \
(1ULL << VIRTIO_NET_F_GUEST_USO6))
struct virtnet_stat_desc { struct virtnet_stat_desc {
char desc[ETH_GSTRING_LEN]; char desc[ETH_GSTRING_LEN];
...@@ -3085,7 +3089,9 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, ...@@ -3085,7 +3089,9 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) || virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) ||
virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) || virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) ||
virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) || virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) ||
virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM))) { virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM) ||
virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO4) ||
virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO6))) {
NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing GRO_HW/CSUM, disable GRO_HW/CSUM first"); NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing GRO_HW/CSUM, disable GRO_HW/CSUM first");
return -EOPNOTSUPP; return -EOPNOTSUPP;
} }
...@@ -3690,7 +3696,9 @@ static bool virtnet_check_guest_gso(const struct virtnet_info *vi) ...@@ -3690,7 +3696,9 @@ static bool virtnet_check_guest_gso(const struct virtnet_info *vi)
return virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) || return virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) ||
virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) || virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) ||
virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) || virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) ||
virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO); virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) ||
(virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO4) &&
virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO6));
} }
static void virtnet_set_big_packets(struct virtnet_info *vi, const int mtu) static void virtnet_set_big_packets(struct virtnet_info *vi, const int mtu)
...@@ -3759,6 +3767,8 @@ static int virtnet_probe(struct virtio_device *vdev) ...@@ -3759,6 +3767,8 @@ static int virtnet_probe(struct virtio_device *vdev)
dev->hw_features |= NETIF_F_TSO6; dev->hw_features |= NETIF_F_TSO6;
if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN)) if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN))
dev->hw_features |= NETIF_F_TSO_ECN; dev->hw_features |= NETIF_F_TSO_ECN;
if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_USO))
dev->hw_features |= NETIF_F_GSO_UDP_L4;
dev->features |= NETIF_F_GSO_ROBUST; dev->features |= NETIF_F_GSO_ROBUST;
...@@ -4036,6 +4046,7 @@ static struct virtio_device_id id_table[] = { ...@@ -4036,6 +4046,7 @@ static struct virtio_device_id id_table[] = {
VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, \ VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, \
VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, \ VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, \
VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, \ VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, \
VIRTIO_NET_F_HOST_USO, VIRTIO_NET_F_GUEST_USO4, VIRTIO_NET_F_GUEST_USO6, \
VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, \ VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, \
VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, \ VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, \
VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, \ VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, \
......
...@@ -15,6 +15,7 @@ static inline bool virtio_net_hdr_match_proto(__be16 protocol, __u8 gso_type) ...@@ -15,6 +15,7 @@ static inline bool virtio_net_hdr_match_proto(__be16 protocol, __u8 gso_type)
case VIRTIO_NET_HDR_GSO_TCPV6: case VIRTIO_NET_HDR_GSO_TCPV6:
return protocol == cpu_to_be16(ETH_P_IPV6); return protocol == cpu_to_be16(ETH_P_IPV6);
case VIRTIO_NET_HDR_GSO_UDP: case VIRTIO_NET_HDR_GSO_UDP:
case VIRTIO_NET_HDR_GSO_UDP_L4:
return protocol == cpu_to_be16(ETH_P_IP) || return protocol == cpu_to_be16(ETH_P_IP) ||
protocol == cpu_to_be16(ETH_P_IPV6); protocol == cpu_to_be16(ETH_P_IPV6);
default: default:
...@@ -31,6 +32,7 @@ static inline int virtio_net_hdr_set_proto(struct sk_buff *skb, ...@@ -31,6 +32,7 @@ static inline int virtio_net_hdr_set_proto(struct sk_buff *skb,
switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
case VIRTIO_NET_HDR_GSO_TCPV4: case VIRTIO_NET_HDR_GSO_TCPV4:
case VIRTIO_NET_HDR_GSO_UDP: case VIRTIO_NET_HDR_GSO_UDP:
case VIRTIO_NET_HDR_GSO_UDP_L4:
skb->protocol = cpu_to_be16(ETH_P_IP); skb->protocol = cpu_to_be16(ETH_P_IP);
break; break;
case VIRTIO_NET_HDR_GSO_TCPV6: case VIRTIO_NET_HDR_GSO_TCPV6:
...@@ -69,6 +71,11 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, ...@@ -69,6 +71,11 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
ip_proto = IPPROTO_UDP; ip_proto = IPPROTO_UDP;
thlen = sizeof(struct udphdr); thlen = sizeof(struct udphdr);
break; break;
case VIRTIO_NET_HDR_GSO_UDP_L4:
gso_type = SKB_GSO_UDP_L4;
ip_proto = IPPROTO_UDP;
thlen = sizeof(struct udphdr);
break;
default: default:
return -EINVAL; return -EINVAL;
} }
...@@ -182,6 +189,8 @@ static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb, ...@@ -182,6 +189,8 @@ static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb,
hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
else if (sinfo->gso_type & SKB_GSO_TCPV6) else if (sinfo->gso_type & SKB_GSO_TCPV6)
hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
else if (sinfo->gso_type & SKB_GSO_UDP_L4)
hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP_L4;
else else
return -EINVAL; return -EINVAL;
if (sinfo->gso_type & SKB_GSO_TCP_ECN) if (sinfo->gso_type & SKB_GSO_TCP_ECN)
......
...@@ -90,6 +90,8 @@ ...@@ -90,6 +90,8 @@
#define TUN_F_TSO6 0x04 /* I can handle TSO for IPv6 packets */ #define TUN_F_TSO6 0x04 /* I can handle TSO for IPv6 packets */
#define TUN_F_TSO_ECN 0x08 /* I can handle TSO with ECN bits. */ #define TUN_F_TSO_ECN 0x08 /* I can handle TSO with ECN bits. */
#define TUN_F_UFO 0x10 /* I can handle UFO packets */ #define TUN_F_UFO 0x10 /* I can handle UFO packets */
#define TUN_F_USO4 0x20 /* I can handle USO for IPv4 packets */
#define TUN_F_USO6 0x40 /* I can handle USO for IPv6 packets */
/* Protocol info prepended to the packets (when IFF_NO_PI is not set) */ /* Protocol info prepended to the packets (when IFF_NO_PI is not set) */
#define TUN_PKT_STRIP 0x0001 #define TUN_PKT_STRIP 0x0001
......
...@@ -57,6 +57,9 @@ ...@@ -57,6 +57,9 @@
* Steering */ * Steering */
#define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */ #define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */
#define VIRTIO_NET_F_NOTF_COAL 53 /* Device supports notifications coalescing */ #define VIRTIO_NET_F_NOTF_COAL 53 /* Device supports notifications coalescing */
#define VIRTIO_NET_F_GUEST_USO4 54 /* Guest can handle USOv4 in. */
#define VIRTIO_NET_F_GUEST_USO6 55 /* Guest can handle USOv6 in. */
#define VIRTIO_NET_F_HOST_USO 56 /* Host can handle USO in. */
#define VIRTIO_NET_F_HASH_REPORT 57 /* Supports hash report */ #define VIRTIO_NET_F_HASH_REPORT 57 /* Supports hash report */
#define VIRTIO_NET_F_RSS 60 /* Supports RSS RX steering */ #define VIRTIO_NET_F_RSS 60 /* Supports RSS RX steering */
#define VIRTIO_NET_F_RSC_EXT 61 /* extended coalescing info */ #define VIRTIO_NET_F_RSC_EXT 61 /* extended coalescing info */
...@@ -130,6 +133,7 @@ struct virtio_net_hdr_v1 { ...@@ -130,6 +133,7 @@ struct virtio_net_hdr_v1 {
#define VIRTIO_NET_HDR_GSO_TCPV4 1 /* GSO frame, IPv4 TCP (TSO) */ #define VIRTIO_NET_HDR_GSO_TCPV4 1 /* GSO frame, IPv4 TCP (TSO) */
#define VIRTIO_NET_HDR_GSO_UDP 3 /* GSO frame, IPv4 UDP (UFO) */ #define VIRTIO_NET_HDR_GSO_UDP 3 /* GSO frame, IPv4 UDP (UFO) */
#define VIRTIO_NET_HDR_GSO_TCPV6 4 /* GSO frame, IPv6 TCP */ #define VIRTIO_NET_HDR_GSO_TCPV6 4 /* GSO frame, IPv6 TCP */
#define VIRTIO_NET_HDR_GSO_UDP_L4 5 /* GSO frame, IPv4& IPv6 UDP (USO) */
#define VIRTIO_NET_HDR_GSO_ECN 0x80 /* TCP has ECN set */ #define VIRTIO_NET_HDR_GSO_ECN 0x80 /* TCP has ECN set */
__u8 gso_type; __u8 gso_type;
__virtio16 hdr_len; /* Ethernet + IP + tcp/udp hdrs */ __virtio16 hdr_len; /* Ethernet + IP + tcp/udp hdrs */
......
...@@ -387,7 +387,8 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, ...@@ -387,7 +387,8 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
if (!pskb_may_pull(skb, sizeof(struct udphdr))) if (!pskb_may_pull(skb, sizeof(struct udphdr)))
goto out; goto out;
if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 &&
!skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST))
return __udp_gso_segment(skb, features, false); return __udp_gso_segment(skb, features, false);
mss = skb_shinfo(skb)->gso_size; mss = skb_shinfo(skb)->gso_size;
......
...@@ -42,7 +42,8 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, ...@@ -42,7 +42,8 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
if (!pskb_may_pull(skb, sizeof(struct udphdr))) if (!pskb_may_pull(skb, sizeof(struct udphdr)))
goto out; goto out;
if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 &&
!skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST))
return __udp_gso_segment(skb, features, true); return __udp_gso_segment(skb, features, true);
mss = skb_shinfo(skb)->gso_size; mss = skb_shinfo(skb)->gso_size;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment