net: use bigger pages in __netdev_alloc_frag

We currently use percpu order-0 pages in __netdev_alloc_frag to deliver fragments used by __netdev_alloc_skb() Depending on NIC driver and arch being 32 or 64 bit, it allows a page to be split in several fragments (between 1 and 8), assuming PAGE_SIZE=4096 Switching to bigger pages (32768 bytes for PAGE_SIZE=4096 case) allows : - Better filling of space (the ending hole overhead is less an issue) - Less calls to page allocator or accesses to page->_count - Could allow struct skb_shared_info futures changes without major performance impact. This patch implements a transparent fallback to smaller pages in case of memory pressure. It also uses a standard "struct page_frag" instead of a custom one. Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: Alexander Duyck <alexander.h.duyck@intel.com> Cc: Benjamin LaHaise <bcrl@kvack.org> Signed-off-by: David S. Miller <davem@davemloft.net>

net: use bigger pages in __netdev_alloc_frag
We currently use percpu order-0 pages in __netdev_alloc_frag to deliver fragments used by __netdev_alloc_skb() Depending on NIC driver and arch being 32 or 64 bit, it allows a page to be split in several fragments (between 1 and 8), assuming PAGE_SIZE=4096 Switching to bigger pages (32768 bytes for PAGE_SIZE=4096 case) allows : - Better filling of space (the ending hole overhead is less an issue) - Less calls to page allocator or accesses to page->_count - Could allow struct skb_shared_info futures changes without major performance impact. This patch implements a transparent fallback to smaller pages in case of memory pressure. It also uses a standard "struct page_frag" instead of a custom one. Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: Alexander Duyck <alexander.h.duyck@intel.com> Cc: Benjamin LaHaise <bcrl@kvack.org> Signed-off-by: David S. Miller <davem@davemloft.net>
69b08f62 · Eric Dumazet · David S. Miller · 5dff747b · 69b08f62
Commit 69b08f62 authored Sep 26, 2012 by Eric Dumazet Committed by David S. Miller Sep 27, 2012
Hide whitespace changes
Inline Side-by-side

Showing with 30 additions and 16 deletions

net/core/skbuff.c net/core/skbuff.c +30 -16

No files found.
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -340,43 +340,57 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size)
 EXPORT_SYMBOL(build_skb);
 struct netdev_alloc_cache {
-	struct page *page;
+	struct page_frag	frag;
-	unsigned int offset;
+	/* we maintain a pagecount bias, so that we dont dirty cache line
-	unsigned int pagecnt_bias;
+	 * containing page->_count every time we allocate a fragment.
+	 */
+	unsigned int		pagecnt_bias;
 };
 static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache);
-#define NETDEV_PAGECNT_BIAS (PAGE_SIZE / SMP_CACHE_BYTES)
+#define NETDEV_FRAG_PAGE_MAX_ORDER get_order(32768)
+#define NETDEV_FRAG_PAGE_MAX_SIZE  (PAGE_SIZE << NETDEV_FRAG_PAGE_MAX_ORDER)
+#define NETDEV_PAGECNT_MAX_BIAS	   NETDEV_FRAG_PAGE_MAX_SIZE
 static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
 {
 	struct netdev_alloc_cache *nc;
 	void *data = NULL;
+	int order;
 	unsigned long flags;
 	local_irq_save(flags);
 	nc = &__get_cpu_var(netdev_alloc_cache);
-	if (unlikely(!nc->page)) {
+	if (unlikely(!nc->frag.page)) {
 refill:
-		nc->page = alloc_page(gfp_mask);
+		for (order = NETDEV_FRAG_PAGE_MAX_ORDER; ;) {
-		if (unlikely(!nc->page))
+			gfp_t gfp = gfp_mask;
-			goto end;
+			if (order)
+				gfp |= __GFP_COMP | __GFP_NOWARN;
+			nc->frag.page = alloc_pages(gfp, order);
+			if (likely(nc->frag.page))
+				break;
+			if (--order < 0)
+				goto end;
+		}
+		nc->frag.size = PAGE_SIZE << order;
 recycle:
-		atomic_set(&nc->page->_count, NETDEV_PAGECNT_BIAS);
+		atomic_set(&nc->frag.page->_count, NETDEV_PAGECNT_MAX_BIAS);
-		nc->pagecnt_bias = NETDEV_PAGECNT_BIAS;
+		nc->pagecnt_bias = NETDEV_PAGECNT_MAX_BIAS;
-		nc->offset = 0;
+		nc->frag.offset = 0;
 	}
-	if (nc->offset + fragsz > PAGE_SIZE) {
+	if (nc->frag.offset + fragsz > nc->frag.size) {
 		/* avoid unnecessary locked operations if possible */
-		if ((atomic_read(&nc->page->_count) == nc->pagecnt_bias) ||
+		if ((atomic_read(&nc->frag.page->_count) == nc->pagecnt_bias) ||
-		    atomic_sub_and_test(nc->pagecnt_bias, &nc->page->_count))
+		    atomic_sub_and_test(nc->pagecnt_bias, &nc->frag.page->_count))
 			goto recycle;
 		goto refill;
 	}
-	data = page_address(nc->page) + nc->offset;
+	data = page_address(nc->frag.page) + nc->frag.offset;
-	nc->offset += fragsz;
+	nc->frag.offset += fragsz;
 	nc->pagecnt_bias--;
 end:
 	local_irq_restore(flags);