Commit ef4ba011 authored by Jakub Kicinski's avatar Jakub Kicinski

Merge branch 'minor-cleanups-to-skb-frag-ref-unref'

Mina Almasry says:

====================
Minor cleanups to skb frag ref/unref

This series is largely motivated by a recent discussion where there was
some confusion on how to properly ref/unref pp pages vs non pp pages:

https://lore.kernel.org/netdev/CAHS8izOoO-EovwMwAm9tLYetwikNPxC0FKyVGu1TPJWSz4bGoA@mail.gmail.com/T/#t

There is some subtely there because pp uses page->pp_ref_count for
refcounting, while non-pp uses get_page()/put_page() for ref counting.
Getting the refcounting pairs wrong can lead to kernel crash.

Additionally currently it may not be obvious to skb users unaware of
page pool internals how to properly acquire a ref on a pp frag. It
requires checking of skb->pp_recycle & is_pp_page() to make the correct
calls and may require some handling at the call site aware of arguable pp
internals.

This series is a minor refactor with a couple of goals:

1. skb users should be able to ref/unref a frag using
   [__]skb_frag_[un]ref() functions without needing to understand pp
   concepts and pp_ref_count vs get/put_page() differences.

2. reference counting functions should have a mirror opposite. I.e. there
   should be a foo_unref() to every foo_ref() with a mirror opposite
   implementation (as much as possible).

This is RFC to collect feedback if this change is desirable, but also so
that I don't race with the fix for the issue Dragos is seeing for his
crash.

https://lore.kernel.org/lkml/CAHS8izN436pn3SndrzsCyhmqvJHLyxgCeDpWXA4r1ANt3RCDLQ@mail.gmail.com/T/
====================

Link: https://lore.kernel.org/r/20240410190505.1225848-1-almasrymina@google.comSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 94426ed2 a580ea99
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
#include <net/ipv6.h> #include <net/ipv6.h>
#include <linux/netdevice.h> #include <linux/netdevice.h>
#include <crypto/aes.h> #include <crypto/aes.h>
#include <linux/skbuff_ref.h>
#include "chcr_ktls.h" #include "chcr_ktls.h"
static LIST_HEAD(uld_ctx_list); static LIST_HEAD(uld_ctx_list);
...@@ -1658,7 +1659,7 @@ static void chcr_ktls_copy_record_in_skb(struct sk_buff *nskb, ...@@ -1658,7 +1659,7 @@ static void chcr_ktls_copy_record_in_skb(struct sk_buff *nskb,
for (i = 0; i < record->num_frags; i++) { for (i = 0; i < record->num_frags; i++) {
skb_shinfo(nskb)->frags[i] = record->frags[i]; skb_shinfo(nskb)->frags[i] = record->frags[i];
/* increase the frag ref count */ /* increase the frag ref count */
__skb_frag_ref(&skb_shinfo(nskb)->frags[i]); __skb_frag_ref(&skb_shinfo(nskb)->frags[i], nskb->pp_recycle);
} }
skb_shinfo(nskb)->nr_frags = record->num_frags; skb_shinfo(nskb)->nr_frags = record->num_frags;
......
...@@ -34,6 +34,7 @@ ...@@ -34,6 +34,7 @@
#include <linux/mii.h> #include <linux/mii.h>
#include <linux/of_net.h> #include <linux/of_net.h>
#include <linux/dmi.h> #include <linux/dmi.h>
#include <linux/skbuff_ref.h>
#include <asm/irq.h> #include <asm/irq.h>
......
...@@ -42,6 +42,7 @@ ...@@ -42,6 +42,7 @@
#include <linux/if_vlan.h> #include <linux/if_vlan.h>
#include <linux/vmalloc.h> #include <linux/vmalloc.h>
#include <linux/irq.h> #include <linux/irq.h>
#include <linux/skbuff_ref.h>
#include <net/ip.h> #include <net/ip.h>
#if IS_ENABLED(CONFIG_IPV6) #if IS_ENABLED(CONFIG_IPV6)
......
...@@ -73,6 +73,7 @@ ...@@ -73,6 +73,7 @@
#include <linux/netdevice.h> #include <linux/netdevice.h>
#include <linux/etherdevice.h> #include <linux/etherdevice.h>
#include <linux/skbuff.h> #include <linux/skbuff.h>
#include <linux/skbuff_ref.h>
#include <linux/ethtool.h> #include <linux/ethtool.h>
#include <linux/crc32.h> #include <linux/crc32.h>
#include <linux/random.h> #include <linux/random.h>
...@@ -1999,7 +2000,7 @@ static int cas_rx_process_pkt(struct cas *cp, struct cas_rx_comp *rxc, ...@@ -1999,7 +2000,7 @@ static int cas_rx_process_pkt(struct cas *cp, struct cas_rx_comp *rxc,
skb->len += hlen - swivel; skb->len += hlen - swivel;
skb_frag_fill_page_desc(frag, page->buffer, off, hlen - swivel); skb_frag_fill_page_desc(frag, page->buffer, off, hlen - swivel);
__skb_frag_ref(frag); __skb_frag_ref(frag, skb->pp_recycle);
/* any more data? */ /* any more data? */
if ((words[0] & RX_COMP1_SPLIT_PKT) && ((dlen -= hlen) > 0)) { if ((words[0] & RX_COMP1_SPLIT_PKT) && ((dlen -= hlen) > 0)) {
...@@ -2023,7 +2024,7 @@ static int cas_rx_process_pkt(struct cas *cp, struct cas_rx_comp *rxc, ...@@ -2023,7 +2024,7 @@ static int cas_rx_process_pkt(struct cas *cp, struct cas_rx_comp *rxc,
frag++; frag++;
skb_frag_fill_page_desc(frag, page->buffer, 0, hlen); skb_frag_fill_page_desc(frag, page->buffer, 0, hlen);
__skb_frag_ref(frag); __skb_frag_ref(frag, skb->pp_recycle);
RX_USED_ADD(page, hlen + cp->crc_size); RX_USED_ADD(page, hlen + cp->crc_size);
} }
......
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#include <linux/ptr_ring.h> #include <linux/ptr_ring.h>
#include <linux/bpf_trace.h> #include <linux/bpf_trace.h>
#include <linux/net_tstamp.h> #include <linux/net_tstamp.h>
#include <linux/skbuff_ref.h>
#include <net/page_pool/helpers.h> #include <net/page_pool/helpers.h>
#define DRV_NAME "veth" #define DRV_NAME "veth"
...@@ -716,7 +717,7 @@ static void veth_xdp_get(struct xdp_buff *xdp) ...@@ -716,7 +717,7 @@ static void veth_xdp_get(struct xdp_buff *xdp)
return; return;
for (i = 0; i < sinfo->nr_frags; i++) for (i = 0; i < sinfo->nr_frags; i++)
__skb_frag_ref(&sinfo->frags[i]); __skb_frag_ref(&sinfo->frags[i], false);
} }
static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq, static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
......
...@@ -38,6 +38,7 @@ ...@@ -38,6 +38,7 @@
#include <linux/if_vlan.h> #include <linux/if_vlan.h>
#include <linux/udp.h> #include <linux/udp.h>
#include <linux/highmem.h> #include <linux/highmem.h>
#include <linux/skbuff_ref.h>
#include <net/tcp.h> #include <net/tcp.h>
......
...@@ -3492,73 +3492,10 @@ static inline struct page *skb_frag_page(const skb_frag_t *frag) ...@@ -3492,73 +3492,10 @@ static inline struct page *skb_frag_page(const skb_frag_t *frag)
return netmem_to_page(frag->netmem); return netmem_to_page(frag->netmem);
} }
/**
* __skb_frag_ref - take an addition reference on a paged fragment.
* @frag: the paged fragment
*
* Takes an additional reference on the paged fragment @frag.
*/
static inline void __skb_frag_ref(skb_frag_t *frag)
{
get_page(skb_frag_page(frag));
}
/**
* skb_frag_ref - take an addition reference on a paged fragment of an skb.
* @skb: the buffer
* @f: the fragment offset.
*
* Takes an additional reference on the @f'th paged fragment of @skb.
*/
static inline void skb_frag_ref(struct sk_buff *skb, int f)
{
__skb_frag_ref(&skb_shinfo(skb)->frags[f]);
}
int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb, int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb,
unsigned int headroom); unsigned int headroom);
int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb, int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb,
struct bpf_prog *prog); struct bpf_prog *prog);
bool napi_pp_put_page(struct page *page);
static inline void
skb_page_unref(struct page *page, bool recycle)
{
#ifdef CONFIG_PAGE_POOL
if (recycle && napi_pp_put_page(page))
return;
#endif
put_page(page);
}
/**
* __skb_frag_unref - release a reference on a paged fragment.
* @frag: the paged fragment
* @recycle: recycle the page if allocated via page_pool
*
* Releases a reference on the paged fragment @frag
* or recycles the page via the page_pool API.
*/
static inline void __skb_frag_unref(skb_frag_t *frag, bool recycle)
{
skb_page_unref(skb_frag_page(frag), recycle);
}
/**
* skb_frag_unref - release a reference on a paged fragment of an skb.
* @skb: the buffer
* @f: the fragment offset
*
* Releases a reference on the @f'th paged fragment of @skb.
*/
static inline void skb_frag_unref(struct sk_buff *skb, int f)
{
struct skb_shared_info *shinfo = skb_shinfo(skb);
if (!skb_zcopy_managed(skb))
__skb_frag_unref(&shinfo->frags[f], skb->pp_recycle);
}
/** /**
* skb_frag_address - gets the address of the data contained in a paged fragment * skb_frag_address - gets the address of the data contained in a paged fragment
* @frag: the paged fragment buffer * @frag: the paged fragment buffer
......
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Skb ref helpers.
*
*/
#ifndef _LINUX_SKBUFF_REF_H
#define _LINUX_SKBUFF_REF_H
#include <linux/skbuff.h>
#include <net/page_pool/helpers.h>
#ifdef CONFIG_PAGE_POOL
static inline bool is_pp_page(struct page *page)
{
return (page->pp_magic & ~0x3UL) == PP_SIGNATURE;
}
static inline bool napi_pp_get_page(struct page *page)
{
page = compound_head(page);
if (!is_pp_page(page))
return false;
page_pool_ref_page(page);
return true;
}
#endif
static inline void skb_page_ref(struct page *page, bool recycle)
{
#ifdef CONFIG_PAGE_POOL
if (recycle && napi_pp_get_page(page))
return;
#endif
get_page(page);
}
/**
* __skb_frag_ref - take an addition reference on a paged fragment.
* @frag: the paged fragment
* @recycle: skb->pp_recycle param of the parent skb. False if no parent skb.
*
* Takes an additional reference on the paged fragment @frag. Obtains the
* correct reference count depending on whether skb->pp_recycle is set and
* whether the frag is a page pool frag.
*/
static inline void __skb_frag_ref(skb_frag_t *frag, bool recycle)
{
skb_page_ref(skb_frag_page(frag), recycle);
}
/**
* skb_frag_ref - take an addition reference on a paged fragment of an skb.
* @skb: the buffer
* @f: the fragment offset.
*
* Takes an additional reference on the @f'th paged fragment of @skb.
*/
static inline void skb_frag_ref(struct sk_buff *skb, int f)
{
__skb_frag_ref(&skb_shinfo(skb)->frags[f], skb->pp_recycle);
}
bool napi_pp_put_page(struct page *page);
static inline void
skb_page_unref(struct page *page, bool recycle)
{
#ifdef CONFIG_PAGE_POOL
if (recycle && napi_pp_put_page(page))
return;
#endif
put_page(page);
}
/**
* __skb_frag_unref - release a reference on a paged fragment.
* @frag: the paged fragment
* @recycle: recycle the page if allocated via page_pool
*
* Releases a reference on the paged fragment @frag
* or recycles the page via the page_pool API.
*/
static inline void __skb_frag_unref(skb_frag_t *frag, bool recycle)
{
skb_page_unref(skb_frag_page(frag), recycle);
}
/**
* skb_frag_unref - release a reference on a paged fragment of an skb.
* @skb: the buffer
* @f: the fragment offset
*
* Releases a reference on the @f'th paged fragment of @skb.
*/
static inline void skb_frag_unref(struct sk_buff *skb, int f)
{
struct skb_shared_info *shinfo = skb_shinfo(skb);
if (!skb_zcopy_managed(skb))
__skb_frag_unref(&shinfo->frags[f], skb->pp_recycle);
}
#endif /* _LINUX_SKBUFF_REF_H */
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
#include <net/dst_metadata.h> #include <net/dst_metadata.h>
#include <net/busy_poll.h> #include <net/busy_poll.h>
#include <trace/events/net.h> #include <trace/events/net.h>
#include <linux/skbuff_ref.h>
#define MAX_GRO_SKBS 8 #define MAX_GRO_SKBS 8
......
...@@ -51,6 +51,7 @@ ...@@ -51,6 +51,7 @@
#endif #endif
#include <linux/string.h> #include <linux/string.h>
#include <linux/skbuff.h> #include <linux/skbuff.h>
#include <linux/skbuff_ref.h>
#include <linux/splice.h> #include <linux/splice.h>
#include <linux/cache.h> #include <linux/cache.h>
#include <linux/rtnetlink.h> #include <linux/rtnetlink.h>
...@@ -906,11 +907,6 @@ static void skb_clone_fraglist(struct sk_buff *skb) ...@@ -906,11 +907,6 @@ static void skb_clone_fraglist(struct sk_buff *skb)
skb_get(list); skb_get(list);
} }
static bool is_pp_page(struct page *page)
{
return (page->pp_magic & ~0x3UL) == PP_SIGNATURE;
}
int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb, int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb,
unsigned int headroom) unsigned int headroom)
{ {
...@@ -1032,37 +1028,6 @@ static bool skb_pp_recycle(struct sk_buff *skb, void *data) ...@@ -1032,37 +1028,6 @@ static bool skb_pp_recycle(struct sk_buff *skb, void *data)
return napi_pp_put_page(virt_to_page(data)); return napi_pp_put_page(virt_to_page(data));
} }
/**
* skb_pp_frag_ref() - Increase fragment references of a page pool aware skb
* @skb: page pool aware skb
*
* Increase the fragment reference count (pp_ref_count) of a skb. This is
* intended to gain fragment references only for page pool aware skbs,
* i.e. when skb->pp_recycle is true, and not for fragments in a
* non-pp-recycling skb. It has a fallback to increase references on normal
* pages, as page pool aware skbs may also have normal page fragments.
*/
static int skb_pp_frag_ref(struct sk_buff *skb)
{
struct skb_shared_info *shinfo;
struct page *head_page;
int i;
if (!skb->pp_recycle)
return -EINVAL;
shinfo = skb_shinfo(skb);
for (i = 0; i < shinfo->nr_frags; i++) {
head_page = compound_head(skb_frag_page(&shinfo->frags[i]));
if (likely(is_pp_page(head_page)))
page_pool_ref_page(head_page);
else
page_ref_inc(head_page);
}
return 0;
}
static void skb_kfree_head(void *head, unsigned int end_offset) static void skb_kfree_head(void *head, unsigned int end_offset)
{ {
if (end_offset == SKB_SMALL_HEAD_HEADROOM) if (end_offset == SKB_SMALL_HEAD_HEADROOM)
...@@ -4175,7 +4140,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen) ...@@ -4175,7 +4140,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
to++; to++;
} else { } else {
__skb_frag_ref(fragfrom); __skb_frag_ref(fragfrom, skb->pp_recycle);
skb_frag_page_copy(fragto, fragfrom); skb_frag_page_copy(fragto, fragfrom);
skb_frag_off_copy(fragto, fragfrom); skb_frag_off_copy(fragto, fragfrom);
skb_frag_size_set(fragto, todo); skb_frag_size_set(fragto, todo);
...@@ -4825,7 +4790,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, ...@@ -4825,7 +4790,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
} }
*nskb_frag = (i < 0) ? skb_head_frag_to_page_desc(frag_skb) : *frag; *nskb_frag = (i < 0) ? skb_head_frag_to_page_desc(frag_skb) : *frag;
__skb_frag_ref(nskb_frag); __skb_frag_ref(nskb_frag, nskb->pp_recycle);
size = skb_frag_size(nskb_frag); size = skb_frag_size(nskb_frag);
if (pos < offset) { if (pos < offset) {
...@@ -5956,10 +5921,8 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, ...@@ -5956,10 +5921,8 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
/* if the skb is not cloned this does nothing /* if the skb is not cloned this does nothing
* since we set nr_frags to 0. * since we set nr_frags to 0.
*/ */
if (skb_pp_frag_ref(from)) {
for (i = 0; i < from_shinfo->nr_frags; i++) for (i = 0; i < from_shinfo->nr_frags; i++)
__skb_frag_ref(&from_shinfo->frags[i]); __skb_frag_ref(&from_shinfo->frags[i], from->pp_recycle);
}
to->truesize += delta; to->truesize += delta;
to->len += len; to->len += len;
......
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include <net/udp.h> #include <net/udp.h>
#include <net/tcp.h> #include <net/tcp.h>
#include <net/espintcp.h> #include <net/espintcp.h>
#include <linux/skbuff_ref.h>
#include <linux/highmem.h> #include <linux/highmem.h>
......
...@@ -44,6 +44,7 @@ ...@@ -44,6 +44,7 @@
#include <linux/gfp.h> #include <linux/gfp.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/static_key.h> #include <linux/static_key.h>
#include <linux/skbuff_ref.h>
#include <trace/events/tcp.h> #include <trace/events/tcp.h>
......
...@@ -36,6 +36,7 @@ ...@@ -36,6 +36,7 @@
#include <net/tcp.h> #include <net/tcp.h>
#include <net/espintcp.h> #include <net/espintcp.h>
#include <net/inet6_hashtables.h> #include <net/inet6_hashtables.h>
#include <linux/skbuff_ref.h>
#include <linux/highmem.h> #include <linux/highmem.h>
......
...@@ -37,6 +37,7 @@ ...@@ -37,6 +37,7 @@
#include <net/inet_connection_sock.h> #include <net/inet_connection_sock.h>
#include <net/tcp.h> #include <net/tcp.h>
#include <net/tls.h> #include <net/tls.h>
#include <linux/skbuff_ref.h>
#include "tls.h" #include "tls.h"
#include "trace.h" #include "trace.h"
......
...@@ -33,6 +33,7 @@ ...@@ -33,6 +33,7 @@
#include <crypto/aead.h> #include <crypto/aead.h>
#include <crypto/scatterwalk.h> #include <crypto/scatterwalk.h>
#include <net/ip6_checksum.h> #include <net/ip6_checksum.h>
#include <linux/skbuff_ref.h>
#include "tls.h" #include "tls.h"
...@@ -277,7 +278,7 @@ static int fill_sg_in(struct scatterlist *sg_in, ...@@ -277,7 +278,7 @@ static int fill_sg_in(struct scatterlist *sg_in,
for (i = 0; remaining > 0; i++) { for (i = 0; remaining > 0; i++) {
skb_frag_t *frag = &record->frags[i]; skb_frag_t *frag = &record->frags[i];
__skb_frag_ref(frag); __skb_frag_ref(frag, false);
sg_set_page(sg_in + i, skb_frag_page(frag), sg_set_page(sg_in + i, skb_frag_page(frag),
skb_frag_size(frag), skb_frag_off(frag)); skb_frag_size(frag), skb_frag_off(frag));
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
/* Copyright (c) 2016 Tom Herbert <tom@herbertland.com> */ /* Copyright (c) 2016 Tom Herbert <tom@herbertland.com> */
#include <linux/skbuff.h> #include <linux/skbuff.h>
#include <linux/skbuff_ref.h>
#include <linux/workqueue.h> #include <linux/workqueue.h>
#include <net/strparser.h> #include <net/strparser.h>
#include <net/tcp.h> #include <net/tcp.h>
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment