Commit d0bf4a9e authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller

net: cleanup and document skb fclone layout

Lets use a proper structure to clearly document and implement
skb fast clones.

Then, we might experiment more easily alternative layouts.

This patch adds a new skb_fclone_busy() helper, used by tcp and xfrm,
to stop leaking of implementation details.
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent b248230c
...@@ -781,6 +781,31 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len, ...@@ -781,6 +781,31 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
int *errcode, int *errcode,
gfp_t gfp_mask); gfp_t gfp_mask);
/* Layout of fast clones : [skb1][skb2][fclone_ref] */
struct sk_buff_fclones {
struct sk_buff skb1;
struct sk_buff skb2;
atomic_t fclone_ref;
};
/**
* skb_fclone_busy - check if fclone is busy
* @skb: buffer
*
* Returns true is skb is a fast clone, and its clone is not freed.
*/
static inline bool skb_fclone_busy(const struct sk_buff *skb)
{
const struct sk_buff_fclones *fclones;
fclones = container_of(skb, struct sk_buff_fclones, skb1);
return skb->fclone == SKB_FCLONE_ORIG &&
fclones->skb2.fclone == SKB_FCLONE_CLONE;
}
static inline struct sk_buff *alloc_skb_fclone(unsigned int size, static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
gfp_t priority) gfp_t priority)
{ {
......
...@@ -257,15 +257,16 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, ...@@ -257,15 +257,16 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
kmemcheck_annotate_variable(shinfo->destructor_arg); kmemcheck_annotate_variable(shinfo->destructor_arg);
if (flags & SKB_ALLOC_FCLONE) { if (flags & SKB_ALLOC_FCLONE) {
struct sk_buff *child = skb + 1; struct sk_buff_fclones *fclones;
atomic_t *fclone_ref = (atomic_t *) (child + 1);
kmemcheck_annotate_bitfield(child, flags1); fclones = container_of(skb, struct sk_buff_fclones, skb1);
kmemcheck_annotate_bitfield(&fclones->skb2, flags1);
skb->fclone = SKB_FCLONE_ORIG; skb->fclone = SKB_FCLONE_ORIG;
atomic_set(fclone_ref, 1); atomic_set(&fclones->fclone_ref, 1);
child->fclone = SKB_FCLONE_UNAVAILABLE; fclones->skb2.fclone = SKB_FCLONE_UNAVAILABLE;
child->pfmemalloc = pfmemalloc; fclones->skb2.pfmemalloc = pfmemalloc;
} }
out: out:
return skb; return skb;
...@@ -524,8 +525,7 @@ static void skb_release_data(struct sk_buff *skb) ...@@ -524,8 +525,7 @@ static void skb_release_data(struct sk_buff *skb)
*/ */
static void kfree_skbmem(struct sk_buff *skb) static void kfree_skbmem(struct sk_buff *skb)
{ {
struct sk_buff *other; struct sk_buff_fclones *fclones;
atomic_t *fclone_ref;
switch (skb->fclone) { switch (skb->fclone) {
case SKB_FCLONE_UNAVAILABLE: case SKB_FCLONE_UNAVAILABLE:
...@@ -533,22 +533,21 @@ static void kfree_skbmem(struct sk_buff *skb) ...@@ -533,22 +533,21 @@ static void kfree_skbmem(struct sk_buff *skb)
break; break;
case SKB_FCLONE_ORIG: case SKB_FCLONE_ORIG:
fclone_ref = (atomic_t *) (skb + 2); fclones = container_of(skb, struct sk_buff_fclones, skb1);
if (atomic_dec_and_test(fclone_ref)) if (atomic_dec_and_test(&fclones->fclone_ref))
kmem_cache_free(skbuff_fclone_cache, skb); kmem_cache_free(skbuff_fclone_cache, fclones);
break; break;
case SKB_FCLONE_CLONE: case SKB_FCLONE_CLONE:
fclone_ref = (atomic_t *) (skb + 1); fclones = container_of(skb, struct sk_buff_fclones, skb2);
other = skb - 1;
/* The clone portion is available for /* The clone portion is available for
* fast-cloning again. * fast-cloning again.
*/ */
skb->fclone = SKB_FCLONE_UNAVAILABLE; skb->fclone = SKB_FCLONE_UNAVAILABLE;
if (atomic_dec_and_test(fclone_ref)) if (atomic_dec_and_test(&fclones->fclone_ref))
kmem_cache_free(skbuff_fclone_cache, other); kmem_cache_free(skbuff_fclone_cache, fclones);
break; break;
} }
} }
...@@ -859,17 +858,18 @@ EXPORT_SYMBOL_GPL(skb_copy_ubufs); ...@@ -859,17 +858,18 @@ EXPORT_SYMBOL_GPL(skb_copy_ubufs);
struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
{ {
struct sk_buff *n; struct sk_buff_fclones *fclones = container_of(skb,
struct sk_buff_fclones,
skb1);
struct sk_buff *n = &fclones->skb2;
if (skb_orphan_frags(skb, gfp_mask)) if (skb_orphan_frags(skb, gfp_mask))
return NULL; return NULL;
n = skb + 1;
if (skb->fclone == SKB_FCLONE_ORIG && if (skb->fclone == SKB_FCLONE_ORIG &&
n->fclone == SKB_FCLONE_UNAVAILABLE) { n->fclone == SKB_FCLONE_UNAVAILABLE) {
atomic_t *fclone_ref = (atomic_t *) (n + 1);
n->fclone = SKB_FCLONE_CLONE; n->fclone = SKB_FCLONE_CLONE;
atomic_inc(fclone_ref); atomic_inc(&fclones->fclone_ref);
} else { } else {
if (skb_pfmemalloc(skb)) if (skb_pfmemalloc(skb))
gfp_mask |= __GFP_MEMALLOC; gfp_mask |= __GFP_MEMALLOC;
...@@ -3240,8 +3240,7 @@ void __init skb_init(void) ...@@ -3240,8 +3240,7 @@ void __init skb_init(void)
SLAB_HWCACHE_ALIGN|SLAB_PANIC, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
NULL); NULL);
skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache", skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache",
(2*sizeof(struct sk_buff)) + sizeof(struct sk_buff_fclones),
sizeof(atomic_t),
0, 0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
NULL); NULL);
......
...@@ -2110,10 +2110,7 @@ bool tcp_schedule_loss_probe(struct sock *sk) ...@@ -2110,10 +2110,7 @@ bool tcp_schedule_loss_probe(struct sock *sk)
static bool skb_still_in_host_queue(const struct sock *sk, static bool skb_still_in_host_queue(const struct sock *sk,
const struct sk_buff *skb) const struct sk_buff *skb)
{ {
const struct sk_buff *fclone = skb + 1; if (unlikely(skb_fclone_busy(skb))) {
if (unlikely(skb->fclone == SKB_FCLONE_ORIG &&
fclone->fclone == SKB_FCLONE_CLONE)) {
NET_INC_STATS_BH(sock_net(sk), NET_INC_STATS_BH(sock_net(sk),
LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES); LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES);
return true; return true;
......
...@@ -1961,10 +1961,8 @@ static int xdst_queue_output(struct sock *sk, struct sk_buff *skb) ...@@ -1961,10 +1961,8 @@ static int xdst_queue_output(struct sock *sk, struct sk_buff *skb)
struct xfrm_dst *xdst = (struct xfrm_dst *) dst; struct xfrm_dst *xdst = (struct xfrm_dst *) dst;
struct xfrm_policy *pol = xdst->pols[0]; struct xfrm_policy *pol = xdst->pols[0];
struct xfrm_policy_queue *pq = &pol->polq; struct xfrm_policy_queue *pq = &pol->polq;
const struct sk_buff *fclone = skb + 1;
if (unlikely(skb->fclone == SKB_FCLONE_ORIG && if (unlikely(skb_fclone_busy(skb))) {
fclone->fclone == SKB_FCLONE_CLONE)) {
kfree_skb(skb); kfree_skb(skb);
return 0; return 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment