Commit 9451980a authored by Alexander Duyck's avatar Alexander Duyck Committed by David S. Miller

net: Use cached copy of pfmemalloc to avoid accessing page

While testing I found that the testing for pfmemalloc in build_skb was
rather expensive.  I found the issue to be two-fold.  First we have to get
from the virtual address to the head page and that comes at the cost of
something like 11 cycles.  Then there is the cost for reading pfmemalloc out
of the head page which can be cache cold due to the fact that
put_page_testzero is likely invalidating the cache-line on one or more
CPUs as the fragments can be shared.

To avoid this extra expense I have added a pfmemalloc member to the
netdev_alloc_cache.  I then pushed pieces of __alloc_rx_skb into
__napi_alloc_skb and __netdev_alloc_skb so that I could rewrite them to
make use of the cached pfmemalloc value.  The result is that my perf traces
show a reduction from 9.28% overhead to 3.7% for the code covered by
build_skb, __alloc_rx_skb, and __napi_alloc_skb when performing a test with
the packet being dropped instead of being handed to napi_gro_receive.
Signed-off-by: default avatarAlexander Duyck <alexander.h.duyck@redhat.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent b396cca6
...@@ -353,6 +353,7 @@ struct netdev_alloc_cache { ...@@ -353,6 +353,7 @@ struct netdev_alloc_cache {
* containing page->_count every time we allocate a fragment. * containing page->_count every time we allocate a fragment.
*/ */
unsigned int pagecnt_bias; unsigned int pagecnt_bias;
bool pfmemalloc;
}; };
static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache); static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache);
static DEFINE_PER_CPU(struct netdev_alloc_cache, napi_alloc_cache); static DEFINE_PER_CPU(struct netdev_alloc_cache, napi_alloc_cache);
...@@ -379,10 +380,9 @@ static struct page *__page_frag_refill(struct netdev_alloc_cache *nc, ...@@ -379,10 +380,9 @@ static struct page *__page_frag_refill(struct netdev_alloc_cache *nc,
return page; return page;
} }
static void *__alloc_page_frag(struct netdev_alloc_cache __percpu *cache, static void *__alloc_page_frag(struct netdev_alloc_cache *nc,
unsigned int fragsz, gfp_t gfp_mask) unsigned int fragsz, gfp_t gfp_mask)
{ {
struct netdev_alloc_cache *nc = this_cpu_ptr(cache);
struct page *page = nc->frag.page; struct page *page = nc->frag.page;
unsigned int size; unsigned int size;
int offset; int offset;
...@@ -402,6 +402,7 @@ static void *__alloc_page_frag(struct netdev_alloc_cache __percpu *cache, ...@@ -402,6 +402,7 @@ static void *__alloc_page_frag(struct netdev_alloc_cache __percpu *cache,
atomic_add(size - 1, &page->_count); atomic_add(size - 1, &page->_count);
/* reset page count bias and offset to start of new frag */ /* reset page count bias and offset to start of new frag */
nc->pfmemalloc = page->pfmemalloc;
nc->pagecnt_bias = size; nc->pagecnt_bias = size;
nc->frag.offset = size; nc->frag.offset = size;
} }
...@@ -430,11 +431,13 @@ static void *__alloc_page_frag(struct netdev_alloc_cache __percpu *cache, ...@@ -430,11 +431,13 @@ static void *__alloc_page_frag(struct netdev_alloc_cache __percpu *cache,
static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
{ {
struct netdev_alloc_cache *nc;
unsigned long flags; unsigned long flags;
void *data; void *data;
local_irq_save(flags); local_irq_save(flags);
data = __alloc_page_frag(&netdev_alloc_cache, fragsz, gfp_mask); nc = this_cpu_ptr(&netdev_alloc_cache);
data = __alloc_page_frag(nc, fragsz, gfp_mask);
local_irq_restore(flags); local_irq_restore(flags);
return data; return data;
} }
...@@ -454,7 +457,9 @@ EXPORT_SYMBOL(netdev_alloc_frag); ...@@ -454,7 +457,9 @@ EXPORT_SYMBOL(netdev_alloc_frag);
static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
{ {
return __alloc_page_frag(&napi_alloc_cache, fragsz, gfp_mask); struct netdev_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
return __alloc_page_frag(nc, fragsz, gfp_mask);
} }
void *napi_alloc_frag(unsigned int fragsz) void *napi_alloc_frag(unsigned int fragsz)
...@@ -464,75 +469,63 @@ void *napi_alloc_frag(unsigned int fragsz) ...@@ -464,75 +469,63 @@ void *napi_alloc_frag(unsigned int fragsz)
EXPORT_SYMBOL(napi_alloc_frag); EXPORT_SYMBOL(napi_alloc_frag);
/** /**
* __alloc_rx_skb - allocate an skbuff for rx * __netdev_alloc_skb - allocate an skbuff for rx on a specific device
* @dev: network device to receive on
* @length: length to allocate * @length: length to allocate
* @gfp_mask: get_free_pages mask, passed to alloc_skb * @gfp_mask: get_free_pages mask, passed to alloc_skb
* @flags: If SKB_ALLOC_RX is set, __GFP_MEMALLOC will be used for
* allocations in case we have to fallback to __alloc_skb()
* If SKB_ALLOC_NAPI is set, page fragment will be allocated
* from napi_cache instead of netdev_cache.
* *
* Allocate a new &sk_buff and assign it a usage count of one. The * Allocate a new &sk_buff and assign it a usage count of one. The
* buffer has unspecified headroom built in. Users should allocate * buffer has NET_SKB_PAD headroom built in. Users should allocate
* the headroom they think they need without accounting for the * the headroom they think they need without accounting for the
* built in space. The built in space is used for optimisations. * built in space. The built in space is used for optimisations.
* *
* %NULL is returned if there is no free memory. * %NULL is returned if there is no free memory.
*/ */
static struct sk_buff *__alloc_rx_skb(unsigned int length, gfp_t gfp_mask, struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len,
int flags) gfp_t gfp_mask)
{ {
struct sk_buff *skb = NULL; struct netdev_alloc_cache *nc;
unsigned int fragsz = SKB_DATA_ALIGN(length) + unsigned long flags;
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); struct sk_buff *skb;
bool pfmemalloc;
if (fragsz <= PAGE_SIZE && !(gfp_mask & (__GFP_WAIT | GFP_DMA))) {
void *data; void *data;
len += NET_SKB_PAD;
if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) ||
(gfp_mask & (__GFP_WAIT | GFP_DMA)))
return __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE);
len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
len = SKB_DATA_ALIGN(len);
if (sk_memalloc_socks()) if (sk_memalloc_socks())
gfp_mask |= __GFP_MEMALLOC; gfp_mask |= __GFP_MEMALLOC;
data = (flags & SKB_ALLOC_NAPI) ? local_irq_save(flags);
__napi_alloc_frag(fragsz, gfp_mask) :
__netdev_alloc_frag(fragsz, gfp_mask); nc = this_cpu_ptr(&netdev_alloc_cache);
data = __alloc_page_frag(nc, len, gfp_mask);
pfmemalloc = nc->pfmemalloc;
if (likely(data)) { local_irq_restore(flags);
skb = build_skb(data, fragsz);
if (unlikely(!skb)) if (unlikely(!data))
return NULL;
skb = __build_skb(data, len);
if (unlikely(!skb)) {
put_page(virt_to_head_page(data)); put_page(virt_to_head_page(data));
return NULL;
} }
} else {
skb = __alloc_skb(length, gfp_mask,
SKB_ALLOC_RX, NUMA_NO_NODE);
}
return skb;
}
/**
* __netdev_alloc_skb - allocate an skbuff for rx on a specific device
* @dev: network device to receive on
* @length: length to allocate
* @gfp_mask: get_free_pages mask, passed to alloc_skb
*
* Allocate a new &sk_buff and assign it a usage count of one. The
* buffer has NET_SKB_PAD headroom built in. Users should allocate
* the headroom they think they need without accounting for the
* built in space. The built in space is used for optimisations.
*
* %NULL is returned if there is no free memory.
*/
struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
unsigned int length, gfp_t gfp_mask)
{
struct sk_buff *skb;
length += NET_SKB_PAD; /* use OR instead of assignment to avoid clearing of bits in mask */
skb = __alloc_rx_skb(length, gfp_mask, 0); if (pfmemalloc)
skb->pfmemalloc = 1;
skb->head_frag = 1;
if (likely(skb)) {
skb_reserve(skb, NET_SKB_PAD); skb_reserve(skb, NET_SKB_PAD);
skb->dev = dev; skb->dev = dev;
}
return skb; return skb;
} }
...@@ -551,18 +544,42 @@ EXPORT_SYMBOL(__netdev_alloc_skb); ...@@ -551,18 +544,42 @@ EXPORT_SYMBOL(__netdev_alloc_skb);
* *
* %NULL is returned if there is no free memory. * %NULL is returned if there is no free memory.
*/ */
struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
unsigned int length, gfp_t gfp_mask) gfp_t gfp_mask)
{ {
struct netdev_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
struct sk_buff *skb; struct sk_buff *skb;
void *data;
length += NET_SKB_PAD + NET_IP_ALIGN; len += NET_SKB_PAD + NET_IP_ALIGN;
skb = __alloc_rx_skb(length, gfp_mask, SKB_ALLOC_NAPI);
if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) ||
(gfp_mask & (__GFP_WAIT | GFP_DMA)))
return __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE);
len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
len = SKB_DATA_ALIGN(len);
if (sk_memalloc_socks())
gfp_mask |= __GFP_MEMALLOC;
data = __alloc_page_frag(nc, len, gfp_mask);
if (unlikely(!data))
return NULL;
skb = __build_skb(data, len);
if (unlikely(!skb)) {
put_page(virt_to_head_page(data));
return NULL;
}
/* use OR instead of assignment to avoid clearing of bits in mask */
if (nc->pfmemalloc)
skb->pfmemalloc = 1;
skb->head_frag = 1;
if (likely(skb)) {
skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
skb->dev = napi->dev; skb->dev = napi->dev;
}
return skb; return skb;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment