Commit 2768935a authored by Daniel Pieczko's avatar Daniel Pieczko Committed by Ben Hutchings

sfc: reuse pages to avoid DMA mapping/unmapping costs

On POWER systems, DMA mapping/unmapping operations are very expensive.
These changes reduce these costs by trying to reuse DMA mapped pages.

After all the buffers associated with a page have been processed and
passed up, the page is placed into a ring (if there is room).  For
each page that is required for a refill operation, a page in the ring
is examined to determine if its page count has fallen to 1, ie. the
kernel has released its reference to these packets.  If this is the
case, the page can be immediately added back into the RX descriptor
ring, without having to re-map it for DMA.

If the kernel is still holding a reference to this page, it is removed
from the ring and unmapped for DMA.  Then a new page, which can
immediately be used by RX buffers in the descriptor ring, is allocated
and DMA mapped.

The time a page needs to spend in the recycle ring before the kernel
has released its page references is based on the number of buffers
that use this page.  As large pages can hold more RX buffers, the RX
recycle ring can be shorter.  This reduces memory usage on POWER
systems, while maintaining the performance gain achieved by recycling
pages, following the driver change to pack more than two RX buffers
into large pages.

When an IOMMU is not present, the recycle ring can be small to reduce
memory usage, since DMA mapping operations are inexpensive.

With a small recycle ring, attempting to refill the descriptor queue
with more buffers than the equivalent size of the recycle ring could
ultimately lead to memory leaks if page entries in the recycle ring
were overwritten.  To prevent this, the check to see if the recycle
ring is full is changed to check if the next entry to be written is
NULL.

[bwh: Combine and rebase several commits so this is complete
 before the following buffer-packing changes.  Remove module
 parameter.]
Signed-off-by: default avatarBen Hutchings <bhutchings@solarflare.com>
parent 85740cdf
...@@ -661,6 +661,8 @@ static void efx_start_datapath(struct efx_nic *efx) ...@@ -661,6 +661,8 @@ static void efx_start_datapath(struct efx_nic *efx)
efx->rx_buffer_truesize = PAGE_SIZE << efx->rx_buffer_order; efx->rx_buffer_truesize = PAGE_SIZE << efx->rx_buffer_order;
} }
efx->rx_bufs_per_page = (rx_buf_len <= PAGE_SIZE / 2) ? 2 : 1;
/* RX filters also have scatter-enabled flags */ /* RX filters also have scatter-enabled flags */
if (efx->rx_scatter != old_rx_scatter) if (efx->rx_scatter != old_rx_scatter)
efx_filter_update_rx_scatter(efx); efx_filter_update_rx_scatter(efx);
......
...@@ -264,12 +264,22 @@ struct efx_rx_page_state { ...@@ -264,12 +264,22 @@ struct efx_rx_page_state {
* @notified_count: Number of buffers given to NIC (<= @added_count). * @notified_count: Number of buffers given to NIC (<= @added_count).
* @removed_count: Number of buffers removed from the receive queue. * @removed_count: Number of buffers removed from the receive queue.
* @scatter_n: Number of buffers used by current packet * @scatter_n: Number of buffers used by current packet
* @page_ring: The ring to store DMA mapped pages for reuse.
* @page_add: Counter to calculate the write pointer for the recycle ring.
* @page_remove: Counter to calculate the read pointer for the recycle ring.
* @page_recycle_count: The number of pages that have been recycled.
* @page_recycle_failed: The number of pages that couldn't be recycled because
* the kernel still held a reference to them.
* @page_recycle_full: The number of pages that were released because the
* recycle ring was full.
* @page_ptr_mask: The number of pages in the RX recycle ring minus 1.
* @max_fill: RX descriptor maximum fill level (<= ring size) * @max_fill: RX descriptor maximum fill level (<= ring size)
* @fast_fill_trigger: RX descriptor fill level that will trigger a fast fill * @fast_fill_trigger: RX descriptor fill level that will trigger a fast fill
* (<= @max_fill) * (<= @max_fill)
* @min_fill: RX descriptor minimum non-zero fill level. * @min_fill: RX descriptor minimum non-zero fill level.
* This records the minimum fill level observed when a ring * This records the minimum fill level observed when a ring
* refill was triggered. * refill was triggered.
* @recycle_count: RX buffer recycle counter.
* @slow_fill: Timer used to defer efx_nic_generate_fill_event(). * @slow_fill: Timer used to defer efx_nic_generate_fill_event().
*/ */
struct efx_rx_queue { struct efx_rx_queue {
...@@ -285,10 +295,18 @@ struct efx_rx_queue { ...@@ -285,10 +295,18 @@ struct efx_rx_queue {
unsigned int notified_count; unsigned int notified_count;
unsigned int removed_count; unsigned int removed_count;
unsigned int scatter_n; unsigned int scatter_n;
struct page **page_ring;
unsigned int page_add;
unsigned int page_remove;
unsigned int page_recycle_count;
unsigned int page_recycle_failed;
unsigned int page_recycle_full;
unsigned int page_ptr_mask;
unsigned int max_fill; unsigned int max_fill;
unsigned int fast_fill_trigger; unsigned int fast_fill_trigger;
unsigned int min_fill; unsigned int min_fill;
unsigned int min_overfill; unsigned int min_overfill;
unsigned int recycle_count;
struct timer_list slow_fill; struct timer_list slow_fill;
unsigned int slow_fill_count; unsigned int slow_fill_count;
}; };
...@@ -806,6 +824,7 @@ struct efx_nic { ...@@ -806,6 +824,7 @@ struct efx_nic {
unsigned int rx_dma_len; unsigned int rx_dma_len;
unsigned int rx_buffer_order; unsigned int rx_buffer_order;
unsigned int rx_buffer_truesize; unsigned int rx_buffer_truesize;
unsigned int rx_bufs_per_page;
u8 rx_hash_key[40]; u8 rx_hash_key[40];
u32 rx_indir_table[128]; u32 rx_indir_table[128];
bool rx_scatter; bool rx_scatter;
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment