Commit 9061cb02 authored by David S. Miller's avatar David S. Miller

Merge branch 'gianfar_rx_sg'

Claudiu Manoil says:

====================
gianfar: Add Rx S/G

This patch-set introduces scatter/gather support
on the Rx side, addressing Rx path performance
issues in the driver.
Thanks.

As an example, two boards connected back-to-back
were used to measure the throughput, running the
same kernel 4.1, before and after applying these
patches.
The netperf UDP_STREAM results below show that the
bottleneck lies on the Rx side BEFORE applying the
patches, and that the Rx throughput is even lower
with a larger MTU.  AFTER applying the patches the
Rx bottleneck is gone (Rx throughput matches the
Tx one) and the RX throughput is not influenced by
MTU size any longer (as expected).

BEFORE:

1) MTU 1500 (default)

root@p1010rdb-pb:~# netperf -l 150 -cC -H 192.85.1.1 -p 12867 -t UDP_STREAM -- -m 512
MIGRATED UDP STREAM TEST from 0.0.0.0 () port 0 AF_INET to 192.85.1.1 () port 0 AF_INET
Socket  Message  Elapsed      Messages                   CPU      Service
Size    Size     Time         Okay Errors   Throughput   Util     Demand
bytes   bytes    secs            #      #   10^6bits/sec % SS     us/KB

163840     512   150.00    20119124      0      549.4     100.00   14.911
163840           150.00    14057349             383.9     100.00   14.911

root@p1010rdb-pb:~# netperf -l 150 -cC -H 192.85.1.1 -p 12867 -t UDP_STREAM -- -m 64
MIGRATED UDP STREAM TEST from 0.0.0.0 () port 0 AF_INET to 192.85.1.1 () port 0 AF_INET
Socket  Message  Elapsed      Messages                   CPU      Service
Size    Size     Time         Okay Errors   Throughput   Util     Demand
bytes   bytes    secs            #      #   10^6bits/sec % SS     us/KB

163840      64   150.00    23654013      0       80.7     100.00   101.463
163840           150.00    15875288              54.2     100.00   101.463

2) MTU 8000

root@p1010rdb-pb:~# netperf -l 150 -cC -H 192.85.1.1 -p 12867 -t UDP_STREAM -- -m 512
MIGRATED UDP STREAM TEST from 0.0.0.0 () port 0 AF_INET to 192.85.1.1 () port 0 AF_INET
Socket  Message  Elapsed      Messages                   CPU      Service
Size    Size     Time         Okay Errors   Throughput   Util     Demand
bytes   bytes    secs            #      #   10^6bits/sec % SS     us/KB

163840     512   150.00    20067232      0      548.0     100.00   14.950
163840           150.00    6113498             166.9     99.95    14.942

root@p1010rdb-pb:~# netperf -l 150 -cC -H 192.85.1.1 -p 12867 -t UDP_STREAM -- -m 64
MIGRATED UDP STREAM TEST from 0.0.0.0 () port 0 AF_INET to 192.85.1.1 () port 0 AF_INET
Socket  Message  Elapsed      Messages                   CPU      Service
Size    Size     Time         Okay Errors   Throughput   Util     Demand
bytes   bytes    secs            #      #   10^6bits/sec % SS     us/KB

163840      64   150.00    23621279      0       80.6     100.00   101.604
163840           150.00    5868602              20.0     99.96    101.563

AFTER:
(both MTU 1500 and MTU 8000)

root@p1010rdb-pb:~# netperf -l 150 -cC -H 192.85.1.1 -p 12867 -t UDP_STREAM -- -m 512
MIGRATED UDP STREAM TEST from 0.0.0.0 () port 0 AF_INET to 192.85.1.1 () port 0 AF_INET
Socket  Message  Elapsed      Messages                   CPU      Service
Size    Size     Time         Okay Errors   Throughput   Util     Demand
bytes   bytes    secs            #      #   10^6bits/sec % SS     us/KB

163840     512   150.00    19914969      0      543.8     100.00   15.064
163840           150.00    19914969             543.8     99.35    14.966

root@p1010rdb-pb:~# netperf -l 150 -cC -H 192.85.1.1 -p 12867 -t UDP_STREAM -- -m 64
MIGRATED UDP STREAM TEST from 0.0.0.0 () port 0 AF_INET to 192.85.1.1 () port 0 AF_INET
Socket  Message  Elapsed      Messages                   CPU      Service
Size    Size     Time         Okay Errors   Throughput   Util     Demand
bytes   bytes    secs            #      #   10^6bits/sec % SS     us/KB

163840      64   150.00    23433989      0       80.0     100.00   102.416
163840           150.00    23433989              80.0     99.62    102.023
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents aad0d51e 75354148
This diff is collapsed.
......@@ -71,11 +71,6 @@ struct ethtool_rx_list {
/* Number of bytes to align the rx bufs to */
#define RXBUF_ALIGNMENT 64
/* The number of bytes which composes a unit for the purpose of
* allocating data buffers. ie-for any given MTU, the data buffer
* will be the next highest multiple of 512 bytes. */
#define INCREMENTAL_BUFFER_SIZE 512
#define PHY_INIT_TIMEOUT 100000
#define DRV_NAME "gfar-enet"
......@@ -92,6 +87,8 @@ extern const char gfar_driver_version[];
#define DEFAULT_TX_RING_SIZE 256
#define DEFAULT_RX_RING_SIZE 256
#define GFAR_RX_BUFF_ALLOC 16
#define GFAR_RX_MAX_RING_SIZE 256
#define GFAR_TX_MAX_RING_SIZE 256
......@@ -103,11 +100,14 @@ extern const char gfar_driver_version[];
#define DEFAULT_RX_LFC_THR 16
#define DEFAULT_LFC_PTVVAL 4
#define DEFAULT_RX_BUFFER_SIZE 1536
#define GFAR_RXB_SIZE 1536
#define GFAR_SKBFRAG_SIZE (RXBUF_ALIGNMENT + GFAR_RXB_SIZE \
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
#define GFAR_RXB_TRUESIZE 2048
#define TX_RING_MOD_MASK(size) (size-1)
#define RX_RING_MOD_MASK(size) (size-1)
#define JUMBO_BUFFER_SIZE 9728
#define JUMBO_FRAME_SIZE 9600
#define GFAR_JUMBO_FRAME_SIZE 9600
#define DEFAULT_FIFO_TX_THR 0x100
#define DEFAULT_FIFO_TX_STARVE 0x40
......@@ -640,6 +640,7 @@ struct rmon_mib
};
struct gfar_extra_stats {
atomic64_t rx_alloc_err;
atomic64_t rx_large;
atomic64_t rx_short;
atomic64_t rx_nonoctet;
......@@ -651,7 +652,6 @@ struct gfar_extra_stats {
atomic64_t eberr;
atomic64_t tx_babt;
atomic64_t tx_underrun;
atomic64_t rx_skbmissing;
atomic64_t tx_timeout;
};
......@@ -1012,34 +1012,42 @@ struct rx_q_stats {
unsigned long rx_dropped;
};
struct gfar_rx_buff {
dma_addr_t dma;
struct page *page;
unsigned int page_offset;
};
/**
* struct gfar_priv_rx_q - per rx queue structure
* @rx_skbuff: skb pointers
* @skb_currx: currently use skb pointer
* @rx_buff: Array of buffer info metadata structs
* @rx_bd_base: First rx buffer descriptor
* @cur_rx: Next free rx ring entry
* @next_to_use: index of the next buffer to be alloc'd
* @next_to_clean: index of the next buffer to be cleaned
* @qindex: index of this queue
* @dev: back pointer to the dev structure
* @ndev: back pointer to net_device
* @rx_ring_size: Rx ring size
* @rxcoalescing: enable/disable rx-coalescing
* @rxic: receive interrupt coalescing vlaue
*/
struct gfar_priv_rx_q {
struct sk_buff **rx_skbuff __aligned(SMP_CACHE_BYTES);
dma_addr_t rx_bd_dma_base;
struct gfar_rx_buff *rx_buff __aligned(SMP_CACHE_BYTES);
struct rxbd8 *rx_bd_base;
struct rxbd8 *cur_rx;
struct net_device *dev;
struct gfar_priv_grp *grp;
struct net_device *ndev;
struct device *dev;
u16 rx_ring_size;
u16 qindex;
struct gfar_priv_grp *grp;
u16 next_to_clean;
u16 next_to_use;
u16 next_to_alloc;
struct sk_buff *skb;
struct rx_q_stats stats;
u16 skb_currx;
u16 qindex;
unsigned int rx_ring_size;
/* RX Coalescing values */
u32 __iomem *rfbptr;
unsigned char rxcoalescing;
unsigned long rxic;
u32 __iomem *rfbptr;
dma_addr_t rx_bd_dma_base;
};
enum gfar_irqinfo_id {
......@@ -1109,7 +1117,6 @@ struct gfar_private {
struct device *dev;
struct net_device *ndev;
enum gfar_errata errata;
unsigned int rx_buffer_size;
u16 uses_rxfcb;
u16 padding;
......@@ -1295,6 +1302,23 @@ static inline void gfar_clear_txbd_status(struct txbd8 *bdp)
bdp->lstatus = cpu_to_be32(lstatus);
}
static inline int gfar_rxbd_unused(struct gfar_priv_rx_q *rxq)
{
if (rxq->next_to_clean > rxq->next_to_use)
return rxq->next_to_clean - rxq->next_to_use - 1;
return rxq->rx_ring_size + rxq->next_to_clean - rxq->next_to_use - 1;
}
static inline struct rxbd8 *gfar_rxbd_lastfree(struct gfar_priv_rx_q *rxq)
{
int i;
i = rxq->next_to_use ? rxq->next_to_use - 1 : rxq->rx_ring_size - 1;
return &rxq->rx_bd_base[i];
}
irqreturn_t gfar_receive(int irq, void *dev_id);
int startup_gfar(struct net_device *dev);
void stop_gfar(struct net_device *dev);
......
......@@ -61,6 +61,8 @@ static void gfar_gdrvinfo(struct net_device *dev,
struct ethtool_drvinfo *drvinfo);
static const char stat_gstrings[][ETH_GSTRING_LEN] = {
/* extra stats */
"rx-allocation-errors",
"rx-large-frame-errors",
"rx-short-frame-errors",
"rx-non-octet-errors",
......@@ -72,8 +74,8 @@ static const char stat_gstrings[][ETH_GSTRING_LEN] = {
"ethernet-bus-error",
"tx-babbling-errors",
"tx-underrun-errors",
"rx-skb-missing-errors",
"tx-timeout-errors",
/* rmon stats */
"tx-rx-64-frames",
"tx-rx-65-127-frames",
"tx-rx-128-255-frames",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment