Commit 8cb79af5 authored by David S. Miller's avatar David S. Miller

Merge branch 'skb-gro-optimize'

Paolo Abeni says:

====================
sk_buff: optimize GRO for the common case

This is a trimmed down revision of "sk_buff: optimize layout for GRO",
specifically dropping the changes to the sk_buff layout[1].

This series tries to accomplish 2 goals:
- optimize the GRO stage for the most common scenario, avoiding a bunch
  of conditional and some more code
- let owned skbs entering the GRO engine, allowing backpressure in the
  veth GRO forward path.

A new sk_buff flag (!!!) is introduced and maintained for GRO's sake.
Such field uses an existing hole, so there is no change to the sk_buff
size.

[1] two main reasons:
- move skb->inner_ field requires some extra care, as some in kernel
  users access and the fields regardless of skb->encapsulation.
- extending secmark size clash with ct and nft uAPIs

address the all above is possible, I think, but for sure not in a single
series.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 883d71a5 d504fff0
......@@ -713,7 +713,7 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
int mac_len, delta, off;
struct xdp_buff xdp;
skb_orphan_partial(skb);
skb_prepare_for_gro(skb);
rcu_read_lock();
xdp_prog = rcu_dereference(rq->xdp_prog);
......
......@@ -689,6 +689,7 @@ typedef unsigned char *sk_buff_data_t;
* CHECKSUM_UNNECESSARY (max 3)
* @dst_pending_confirm: need to confirm neighbour
* @decrypted: Decrypted SKB
* @slow_gro: state present at GRO time, slower prepare step required
* @napi_id: id of the NAPI struct this skb came from
* @sender_cpu: (aka @napi_id) source CPU in XPS
* @secmark: security marking
......@@ -870,6 +871,7 @@ struct sk_buff {
#ifdef CONFIG_TLS_DEVICE
__u8 decrypted:1;
#endif
__u8 slow_gro:1;
#ifdef CONFIG_NET_SCHED
__u16 tc_index; /* traffic control index */
......@@ -990,6 +992,7 @@ static inline struct dst_entry *skb_dst(const struct sk_buff *skb)
*/
static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst)
{
skb->slow_gro |= !!dst;
skb->_skb_refdst = (unsigned long)dst;
}
......@@ -1006,6 +1009,7 @@ static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst)
static inline void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst)
{
WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
skb->slow_gro = !!dst;
skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF;
}
......@@ -4216,6 +4220,7 @@ static inline unsigned long skb_get_nfct(const struct sk_buff *skb)
static inline void skb_set_nfct(struct sk_buff *skb, unsigned long nfct)
{
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
skb->slow_gro |= !!nfct;
skb->_nfct = nfct;
#endif
}
......@@ -4375,6 +4380,7 @@ static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src)
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
nf_conntrack_put(skb_nfct(dst));
#endif
dst->slow_gro = src->slow_gro;
__nf_copy(dst, src, true);
}
......
......@@ -277,6 +277,7 @@ static inline void skb_dst_drop(struct sk_buff *skb)
static inline void __skb_dst_copy(struct sk_buff *nskb, unsigned long refdst)
{
nskb->slow_gro |= !!refdst;
nskb->_skb_refdst = refdst;
if (!(nskb->_skb_refdst & SKB_DST_NOREF))
dst_clone(skb_dst(nskb));
......@@ -316,6 +317,7 @@ static inline bool skb_dst_force(struct sk_buff *skb)
dst = NULL;
skb->_skb_refdst = (unsigned long)dst;
skb->slow_gro |= !!dst;
}
return skb->_skb_refdst != 0UL;
......
......@@ -2249,6 +2249,15 @@ static inline __must_check bool skb_set_owner_sk_safe(struct sk_buff *skb, struc
return false;
}
static inline void skb_prepare_for_gro(struct sk_buff *skb)
{
if (skb->destructor != sock_wfree) {
skb_orphan(skb);
return;
}
skb->slow_gro = 1;
}
void sk_reset_timer(struct sock *sk, struct timer_list *timer,
unsigned long expires);
......
......@@ -6014,7 +6014,6 @@ static void gro_list_prepare(const struct list_head *head,
diffs |= skb_vlan_tag_present(p) ^ skb_vlan_tag_present(skb);
if (skb_vlan_tag_present(p))
diffs |= skb_vlan_tag_get(p) ^ skb_vlan_tag_get(skb);
diffs |= skb_metadata_dst_cmp(p, skb);
diffs |= skb_metadata_differs(p, skb);
if (maclen == ETH_HLEN)
diffs |= compare_ether_header(skb_mac_header(p),
......@@ -6024,17 +6023,30 @@ static void gro_list_prepare(const struct list_head *head,
skb_mac_header(skb),
maclen);
diffs |= skb_get_nfct(p) ^ skb_get_nfct(skb);
/* in most common scenarions _state is 0
* otherwise we are already on some slower paths
* either skip all the infrequent tests altogether or
* avoid trying too hard to skip each of them individually
*/
if (!diffs && unlikely(skb->slow_gro | p->slow_gro)) {
#if IS_ENABLED(CONFIG_SKB_EXTENSIONS) && IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
struct tc_skb_ext *skb_ext;
struct tc_skb_ext *p_ext;
#endif
diffs |= p->sk != skb->sk;
diffs |= skb_metadata_dst_cmp(p, skb);
diffs |= skb_get_nfct(p) ^ skb_get_nfct(skb);
#if IS_ENABLED(CONFIG_SKB_EXTENSIONS) && IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
if (!diffs) {
struct tc_skb_ext *skb_ext = skb_ext_find(skb, TC_SKB_EXT);
struct tc_skb_ext *p_ext = skb_ext_find(p, TC_SKB_EXT);
skb_ext = skb_ext_find(skb, TC_SKB_EXT);
p_ext = skb_ext_find(p, TC_SKB_EXT);
diffs |= (!!p_ext) ^ (!!skb_ext);
if (!diffs && unlikely(skb_ext))
diffs |= p_ext->chain ^ skb_ext->chain;
}
#endif
}
NAPI_GRO_CB(p)->same_flow = !diffs;
}
......@@ -6299,8 +6311,12 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
skb->encapsulation = 0;
skb_shinfo(skb)->gso_type = 0;
skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
skb_ext_reset(skb);
nf_reset_ct(skb);
if (unlikely(skb->slow_gro)) {
skb_orphan(skb);
skb_ext_reset(skb);
nf_reset_ct(skb);
skb->slow_gro = 0;
}
napi->skb = skb;
}
......
......@@ -954,9 +954,13 @@ void __kfree_skb_defer(struct sk_buff *skb)
void napi_skb_free_stolen_head(struct sk_buff *skb)
{
nf_reset_ct(skb);
skb_dst_drop(skb);
skb_ext_put(skb);
if (unlikely(skb->slow_gro)) {
nf_reset_ct(skb);
skb_dst_drop(skb);
skb_ext_put(skb);
skb_orphan(skb);
skb->slow_gro = 0;
}
napi_skb_cache_put(skb);
}
......@@ -3889,6 +3893,9 @@ int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb)
NAPI_GRO_CB(p)->last = skb;
NAPI_GRO_CB(p)->count++;
p->data_len += skb->len;
/* sk owenrship - if any - completely transferred to the aggregated packet */
skb->destructor = NULL;
p->truesize += skb->truesize;
p->len += skb->len;
......@@ -4256,6 +4263,7 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
unsigned int headlen = skb_headlen(skb);
unsigned int len = skb_gro_len(skb);
unsigned int delta_truesize;
unsigned int new_truesize;
struct sk_buff *lp;
if (unlikely(p->len + len >= 65536 || NAPI_GRO_CB(skb)->flush))
......@@ -4287,10 +4295,10 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
skb_frag_size_sub(frag, offset);
/* all fragments truesize : remove (head size + sk_buff) */
delta_truesize = skb->truesize -
SKB_TRUESIZE(skb_end_offset(skb));
new_truesize = SKB_TRUESIZE(skb_end_offset(skb));
delta_truesize = skb->truesize - new_truesize;
skb->truesize -= skb->data_len;
skb->truesize = new_truesize;
skb->len -= skb->data_len;
skb->data_len = 0;
......@@ -4319,12 +4327,16 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
memcpy(frag + 1, skbinfo->frags, sizeof(*frag) * skbinfo->nr_frags);
/* We dont need to clear skbinfo->nr_frags here */
delta_truesize = skb->truesize - SKB_DATA_ALIGN(sizeof(struct sk_buff));
new_truesize = SKB_TRUESIZE(sizeof(struct sk_buff));
delta_truesize = skb->truesize - new_truesize;
skb->truesize = new_truesize;
NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE_STOLEN_HEAD;
goto done;
}
merge:
/* sk owenrship - if any - completely transferred to the aggregated packet */
skb->destructor = NULL;
delta_truesize = skb->truesize;
if (offset > headlen) {
unsigned int eat = offset - headlen;
......@@ -6449,6 +6461,7 @@ void *skb_ext_add(struct sk_buff *skb, enum skb_ext_id id)
new->chunks = newlen;
new->offset[id] = newoff;
set_active:
skb->slow_gro = 1;
skb->extensions = new;
skb->active_extensions |= 1 << id;
return skb_ext_get_ptr(new, id);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment