Commit 1728369e authored by David S. Miller's avatar David S. Miller

Merge branch 'gro_tunnels'

Tom Herbert says:

====================
gro: Fixes for tunnels and GRO

This patch set addresses some issue related to tunneling and GRO:

- Fix remote checksum offload to properly deal with frag0 in GRO.
- Add support for GRO at VXLAN tunnel (call gro_cells)

Testing: Ran one netperf TCP_STREAM to highlight impact of different
configurations:

GUE
  Zero UDP checksum
    4628.42 MBps
  UDP checksums enabled
    6800.51 MBps
  UDP checksums and remote checksum offload
    7663.82 MBps
  UDP checksums and remote checksum offload using no-partial
    7287.25 MBps

VXLAN
  Zero UDP checksum
    4112.02
  UDP checksums enabled
    6785.80 MBps
  UDP checksums and remote checksum offload
    7075.56 MBps

v2:
  - Drop "gro: Pull headers into skb head for 1st skb in gro list"
    from patch set
  - In vxlan_remcsum and gue_remcsum return immediately if remcsum
    processing was already done
  - Add gro callbacks for sit offload
  - Use WARN_ON_ONCE if we get a GUE protocol that does not have
    GRO offload support

v3:
  - Don't restore gro callbacks for sit offload
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 9a873c71 27013661
...@@ -519,10 +519,10 @@ static struct vxlanhdr *vxlan_gro_remcsum(struct sk_buff *skb, ...@@ -519,10 +519,10 @@ static struct vxlanhdr *vxlan_gro_remcsum(struct sk_buff *skb,
u32 data, struct gro_remcsum *grc, u32 data, struct gro_remcsum *grc,
bool nopartial) bool nopartial)
{ {
size_t start, offset, plen; size_t start, offset;
if (skb->remcsum_offload) if (skb->remcsum_offload)
return NULL; return vh;
if (!NAPI_GRO_CB(skb)->csum_valid) if (!NAPI_GRO_CB(skb)->csum_valid)
return NULL; return NULL;
...@@ -532,17 +532,8 @@ static struct vxlanhdr *vxlan_gro_remcsum(struct sk_buff *skb, ...@@ -532,17 +532,8 @@ static struct vxlanhdr *vxlan_gro_remcsum(struct sk_buff *skb,
offsetof(struct udphdr, check) : offsetof(struct udphdr, check) :
offsetof(struct tcphdr, check)); offsetof(struct tcphdr, check));
plen = hdrlen + offset + sizeof(u16); vh = skb_gro_remcsum_process(skb, (void *)vh, off, hdrlen,
start, offset, grc, nopartial);
/* Pull checksum that will be written */
if (skb_gro_header_hard(skb, off + plen)) {
vh = skb_gro_header_slow(skb, off + plen, off);
if (!vh)
return NULL;
}
skb_gro_remcsum_process(skb, (void *)vh + hdrlen,
start, offset, grc, nopartial);
skb->remcsum_offload = 1; skb->remcsum_offload = 1;
...@@ -573,7 +564,6 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head, ...@@ -573,7 +564,6 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head,
goto out; goto out;
} }
skb_gro_pull(skb, sizeof(struct vxlanhdr)); /* pull vxlan header */
skb_gro_postpull_rcsum(skb, vh, sizeof(struct vxlanhdr)); skb_gro_postpull_rcsum(skb, vh, sizeof(struct vxlanhdr));
flags = ntohl(vh->vx_flags); flags = ntohl(vh->vx_flags);
...@@ -588,6 +578,8 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head, ...@@ -588,6 +578,8 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head,
goto out; goto out;
} }
skb_gro_pull(skb, sizeof(struct vxlanhdr)); /* pull vxlan header */
flush = 0; flush = 0;
for (p = *head; p; p = p->next) { for (p = *head; p; p = p->next) {
...@@ -1110,6 +1102,9 @@ static struct vxlanhdr *vxlan_remcsum(struct sk_buff *skb, struct vxlanhdr *vh, ...@@ -1110,6 +1102,9 @@ static struct vxlanhdr *vxlan_remcsum(struct sk_buff *skb, struct vxlanhdr *vh,
{ {
size_t start, offset, plen; size_t start, offset, plen;
if (skb->remcsum_offload)
return vh;
start = (data & VXLAN_RCO_MASK) << VXLAN_RCO_SHIFT; start = (data & VXLAN_RCO_MASK) << VXLAN_RCO_SHIFT;
offset = start + ((data & VXLAN_RCO_UDP) ? offset = start + ((data & VXLAN_RCO_UDP) ?
offsetof(struct udphdr, check) : offsetof(struct udphdr, check) :
...@@ -1213,7 +1208,7 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, ...@@ -1213,7 +1208,7 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb,
stats->rx_bytes += skb->len; stats->rx_bytes += skb->len;
u64_stats_update_end(&stats->syncp); u64_stats_update_end(&stats->syncp);
netif_rx(skb); gro_cells_receive(&vxlan->gro_cells, skb);
return; return;
drop: drop:
...@@ -2451,6 +2446,8 @@ static void vxlan_setup(struct net_device *dev) ...@@ -2451,6 +2446,8 @@ static void vxlan_setup(struct net_device *dev)
vxlan->dev = dev; vxlan->dev = dev;
gro_cells_init(&vxlan->gro_cells, dev);
for (h = 0; h < FDB_HASH_SIZE; ++h) for (h = 0; h < FDB_HASH_SIZE; ++h)
INIT_HLIST_HEAD(&vxlan->fdb_head[h]); INIT_HLIST_HEAD(&vxlan->fdb_head[h]);
} }
...@@ -2890,6 +2887,7 @@ static void vxlan_dellink(struct net_device *dev, struct list_head *head) ...@@ -2890,6 +2887,7 @@ static void vxlan_dellink(struct net_device *dev, struct list_head *head)
hlist_del_rcu(&vxlan->hlist); hlist_del_rcu(&vxlan->hlist);
spin_unlock(&vn->sock_lock); spin_unlock(&vn->sock_lock);
gro_cells_destroy(&vxlan->gro_cells);
list_del(&vxlan->next); list_del(&vxlan->next);
unregister_netdevice_queue(dev, head); unregister_netdevice_queue(dev, head);
} }
...@@ -3098,8 +3096,10 @@ static void __net_exit vxlan_exit_net(struct net *net) ...@@ -3098,8 +3096,10 @@ static void __net_exit vxlan_exit_net(struct net *net)
/* If vxlan->dev is in the same netns, it has already been added /* If vxlan->dev is in the same netns, it has already been added
* to the list by the previous loop. * to the list by the previous loop.
*/ */
if (!net_eq(dev_net(vxlan->dev), net)) if (!net_eq(dev_net(vxlan->dev), net)) {
gro_cells_destroy(&vxlan->gro_cells);
unregister_netdevice_queue(vxlan->dev, &list); unregister_netdevice_queue(vxlan->dev, &list);
}
} }
unregister_netdevice_many(&list); unregister_netdevice_many(&list);
......
...@@ -2311,8 +2311,7 @@ __sum16 __skb_gro_checksum_complete(struct sk_buff *skb); ...@@ -2311,8 +2311,7 @@ __sum16 __skb_gro_checksum_complete(struct sk_buff *skb);
static inline bool skb_at_gro_remcsum_start(struct sk_buff *skb) static inline bool skb_at_gro_remcsum_start(struct sk_buff *skb)
{ {
return (NAPI_GRO_CB(skb)->gro_remcsum_start - skb_headroom(skb) == return (NAPI_GRO_CB(skb)->gro_remcsum_start == skb_gro_offset(skb));
skb_gro_offset(skb));
} }
static inline bool __skb_gro_checksum_validate_needed(struct sk_buff *skb, static inline bool __skb_gro_checksum_validate_needed(struct sk_buff *skb,
...@@ -2408,37 +2407,58 @@ static inline void skb_gro_remcsum_init(struct gro_remcsum *grc) ...@@ -2408,37 +2407,58 @@ static inline void skb_gro_remcsum_init(struct gro_remcsum *grc)
grc->delta = 0; grc->delta = 0;
} }
static inline void skb_gro_remcsum_process(struct sk_buff *skb, void *ptr, static inline void *skb_gro_remcsum_process(struct sk_buff *skb, void *ptr,
int start, int offset, unsigned int off, size_t hdrlen,
struct gro_remcsum *grc, int start, int offset,
bool nopartial) struct gro_remcsum *grc,
bool nopartial)
{ {
__wsum delta; __wsum delta;
size_t plen = hdrlen + max_t(size_t, offset + sizeof(u16), start);
BUG_ON(!NAPI_GRO_CB(skb)->csum_valid); BUG_ON(!NAPI_GRO_CB(skb)->csum_valid);
if (!nopartial) { if (!nopartial) {
NAPI_GRO_CB(skb)->gro_remcsum_start = NAPI_GRO_CB(skb)->gro_remcsum_start = off + hdrlen + start;
((unsigned char *)ptr + start) - skb->head; return ptr;
return; }
ptr = skb_gro_header_fast(skb, off);
if (skb_gro_header_hard(skb, off + plen)) {
ptr = skb_gro_header_slow(skb, off + plen, off);
if (!ptr)
return NULL;
} }
delta = remcsum_adjust(ptr, NAPI_GRO_CB(skb)->csum, start, offset); delta = remcsum_adjust(ptr + hdrlen, NAPI_GRO_CB(skb)->csum,
start, offset);
/* Adjust skb->csum since we changed the packet */ /* Adjust skb->csum since we changed the packet */
NAPI_GRO_CB(skb)->csum = csum_add(NAPI_GRO_CB(skb)->csum, delta); NAPI_GRO_CB(skb)->csum = csum_add(NAPI_GRO_CB(skb)->csum, delta);
grc->offset = (ptr + offset) - (void *)skb->head; grc->offset = off + hdrlen + offset;
grc->delta = delta; grc->delta = delta;
return ptr;
} }
static inline void skb_gro_remcsum_cleanup(struct sk_buff *skb, static inline void skb_gro_remcsum_cleanup(struct sk_buff *skb,
struct gro_remcsum *grc) struct gro_remcsum *grc)
{ {
void *ptr;
size_t plen = grc->offset + sizeof(u16);
if (!grc->delta) if (!grc->delta)
return; return;
remcsum_unadjust((__sum16 *)(skb->head + grc->offset), grc->delta); ptr = skb_gro_header_fast(skb, grc->offset);
if (skb_gro_header_hard(skb, grc->offset + sizeof(u16))) {
ptr = skb_gro_header_slow(skb, plen, grc->offset);
if (!ptr)
return;
}
remcsum_unadjust((__sum16 *)ptr, grc->delta);
} }
static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev, static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev,
......
...@@ -161,6 +161,7 @@ struct vxlan_dev { ...@@ -161,6 +161,7 @@ struct vxlan_dev {
struct timer_list age_timer; struct timer_list age_timer;
spinlock_t hash_lock; spinlock_t hash_lock;
unsigned int addrcnt; unsigned int addrcnt;
struct gro_cells gro_cells;
struct vxlan_config cfg; struct vxlan_config cfg;
......
...@@ -79,7 +79,11 @@ static struct guehdr *gue_remcsum(struct sk_buff *skb, struct guehdr *guehdr, ...@@ -79,7 +79,11 @@ static struct guehdr *gue_remcsum(struct sk_buff *skb, struct guehdr *guehdr,
__be16 *pd = data; __be16 *pd = data;
size_t start = ntohs(pd[0]); size_t start = ntohs(pd[0]);
size_t offset = ntohs(pd[1]); size_t offset = ntohs(pd[1]);
size_t plen = hdrlen + max_t(size_t, offset + sizeof(u16), start); size_t plen = sizeof(struct udphdr) + hdrlen +
max_t(size_t, offset + sizeof(u16), start);
if (skb->remcsum_offload)
return guehdr;
if (!pskb_may_pull(skb, plen)) if (!pskb_may_pull(skb, plen))
return NULL; return NULL;
...@@ -221,29 +225,21 @@ static int fou_gro_complete(struct sk_buff *skb, int nhoff, ...@@ -221,29 +225,21 @@ static int fou_gro_complete(struct sk_buff *skb, int nhoff,
static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off, static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off,
struct guehdr *guehdr, void *data, struct guehdr *guehdr, void *data,
size_t hdrlen, u8 ipproto, size_t hdrlen, struct gro_remcsum *grc,
struct gro_remcsum *grc, bool nopartial) bool nopartial)
{ {
__be16 *pd = data; __be16 *pd = data;
size_t start = ntohs(pd[0]); size_t start = ntohs(pd[0]);
size_t offset = ntohs(pd[1]); size_t offset = ntohs(pd[1]);
size_t plen = hdrlen + max_t(size_t, offset + sizeof(u16), start);
if (skb->remcsum_offload) if (skb->remcsum_offload)
return NULL; return guehdr;
if (!NAPI_GRO_CB(skb)->csum_valid) if (!NAPI_GRO_CB(skb)->csum_valid)
return NULL; return NULL;
/* Pull checksum that will be written */ guehdr = skb_gro_remcsum_process(skb, (void *)guehdr, off, hdrlen,
if (skb_gro_header_hard(skb, off + plen)) { start, offset, grc, nopartial);
guehdr = skb_gro_header_slow(skb, off + plen, off);
if (!guehdr)
return NULL;
}
skb_gro_remcsum_process(skb, (void *)guehdr + hdrlen,
start, offset, grc, nopartial);
skb->remcsum_offload = 1; skb->remcsum_offload = 1;
...@@ -307,10 +303,10 @@ static struct sk_buff **gue_gro_receive(struct sk_buff **head, ...@@ -307,10 +303,10 @@ static struct sk_buff **gue_gro_receive(struct sk_buff **head,
if (flags & GUE_PFLAG_REMCSUM) { if (flags & GUE_PFLAG_REMCSUM) {
guehdr = gue_gro_remcsum(skb, off, guehdr, guehdr = gue_gro_remcsum(skb, off, guehdr,
data + doffset, hdrlen, data + doffset, hdrlen, &grc,
guehdr->proto_ctype, &grc,
!!(fou->flags & !!(fou->flags &
FOU_F_REMCSUM_NOPARTIAL)); FOU_F_REMCSUM_NOPARTIAL));
if (!guehdr) if (!guehdr)
goto out; goto out;
...@@ -351,7 +347,7 @@ static struct sk_buff **gue_gro_receive(struct sk_buff **head, ...@@ -351,7 +347,7 @@ static struct sk_buff **gue_gro_receive(struct sk_buff **head,
rcu_read_lock(); rcu_read_lock();
offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads; offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
ops = rcu_dereference(offloads[guehdr->proto_ctype]); ops = rcu_dereference(offloads[guehdr->proto_ctype]);
if (WARN_ON(!ops || !ops->callbacks.gro_receive)) if (WARN_ON_ONCE(!ops || !ops->callbacks.gro_receive))
goto out_unlock; goto out_unlock;
pp = ops->callbacks.gro_receive(head, skb); pp = ops->callbacks.gro_receive(head, skb);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment