Commit 19433646 authored by David S. Miller's avatar David S. Miller

Merge branch 'gianfar-next'

Claudiu Manoil says:

====================
gianfar: Tx timeout issue

There's an older Tx timeout issue showing up on etsec2 devices
with 2 CPUs.  I pinned this issue down to processing overhead
incurred by supporting multiple Tx/Rx rings, as explained in
the 2nd patch below.  But before this, there's also a concurency
issue leading to Rx/Tx spurrious interrupts, addressed by the
'Tx NAPI' patch below.
The Tx timeout can be triggered with multiple Tx flows,
'iperf -c -N 8' commands, on a 2 CPUs etsec2 based (P1020) board.

Before the patches:
"""
root@p1020rdb-pc:~# iperf -c 172.16.1.3 -n 1000M -P 8 &
[...]
root@p1020rdb-pc:~# NETDEV WATCHDOG: eth1 (fsl-gianfar): transmit queue 1 timed out
WARNING: at net/sched/sch_generic.c:279
Modules linked in:
CPU: 1 PID: 0 Comm: swapper/1 Not tainted 3.13.0-rc3-03386-g89ea59c #23
task: ed84ef40 ti: ed868000 task.ti: ed868000
NIP: c04627a8 LR: c04627a8 CTR: c02fb270
REGS: ed869d00 TRAP: 0700   Not tainted  (3.13.0-rc3-03386-g89ea59c)
MSR: 00029000 <CE,EE,ME>  CR: 44000022  XER: 20000000
[...]

root@p1020rdb-pc:~# [ ID] Interval       Transfer     Bandwidth
[  5]  0.0-19.3 sec  1000 MBytes    434 Mbits/sec
[  8]  0.0-39.7 sec  1000 MBytes    211 Mbits/sec
[  9]  0.0-40.1 sec  1000 MBytes    209 Mbits/sec
[  3]  0.0-40.2 sec  1000 MBytes    209 Mbits/sec
[ 10]  0.0-59.0 sec  1000 MBytes    142 Mbits/sec
[  7]  0.0-74.6 sec  1000 MBytes    112 Mbits/sec
[  6]  0.0-74.7 sec  1000 MBytes    112 Mbits/sec
[  4]  0.0-74.7 sec  1000 MBytes    112 Mbits/sec
[SUM]  0.0-74.7 sec  7.81 GBytes    898 Mbits/sec

root@p1020rdb-pc:~# ifconfig eth1
eth1      Link encap:Ethernet  HWaddr 00:04:9f:00:13:01
          inet addr:172.16.1.1  Bcast:172.16.255.255  Mask:255.255.0.0
          inet6 addr: fe80::204:9fff:fe00:1301/64 Scope:Link
          UP BROADCAST RUNNING MULTICAST  MTU:1500  Metric:1
          RX packets:708722 errors:0 dropped:0 overruns:0 frame:0
          TX packets:8717849 errors:6 dropped:0 overruns:1470 carrier:0
          collisions:0 txqueuelen:1000
          RX bytes:58118018 (55.4 MiB)  TX bytes:274069482 (261.3 MiB)
          Base address:0xa000

"""

After applying the patches:
"""
root@p1020rdb-pc:~# iperf -c 172.16.1.3 -n 1000M -P 8 &
[...]
root@p1020rdb-pc:~# [ ID] Interval       Transfer     Bandwidth
[  9]  0.0-70.5 sec  1000 MBytes    119 Mbits/sec
[  5]  0.0-70.5 sec  1000 MBytes    119 Mbits/sec
[  6]  0.0-70.7 sec  1000 MBytes    119 Mbits/sec
[  4]  0.0-71.0 sec  1000 MBytes    118 Mbits/sec
[  8]  0.0-71.1 sec  1000 MBytes    118 Mbits/sec
[  3]  0.0-71.2 sec  1000 MBytes    118 Mbits/sec
[ 10]  0.0-71.3 sec  1000 MBytes    118 Mbits/sec
[  7]  0.0-71.3 sec  1000 MBytes    118 Mbits/sec
[SUM]  0.0-71.3 sec  7.81 GBytes    942 Mbits/sec

root@p1020rdb-pc:~# ifconfig eth1
eth1      Link encap:Ethernet  HWaddr 00:04:9f:00:13:01
          inet addr:172.16.1.1  Bcast:172.16.255.255  Mask:255.255.0.0
          inet6 addr: fe80::204:9fff:fe00:1301/64 Scope:Link
          UP BROADCAST RUNNING MULTICAST  MTU:1500  Metric:1
          RX packets:728446 errors:0 dropped:0 overruns:0 frame:0
          TX packets:8690057 errors:0 dropped:0 overruns:0 carrier:0
          collisions:0 txqueuelen:1000
          RX bytes:59732650 (56.9 MiB)  TX bytes:271554306 (258.9 MiB)
          Base address:0xa000
"""
v2: PATCH 2:
    Replaced CPP check with run-time condition to
    limit the number of queues. Updated comments.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents be14cc98 71ff9e3d
This diff is collapsed.
...@@ -377,8 +377,11 @@ extern const char gfar_driver_version[]; ...@@ -377,8 +377,11 @@ extern const char gfar_driver_version[];
IMASK_RXFEN0 | IMASK_BSY | IMASK_EBERR | IMASK_BABR | \ IMASK_RXFEN0 | IMASK_BSY | IMASK_EBERR | IMASK_BABR | \
IMASK_XFUN | IMASK_RXC | IMASK_BABT | IMASK_DPE \ IMASK_XFUN | IMASK_RXC | IMASK_BABT | IMASK_DPE \
| IMASK_PERR) | IMASK_PERR)
#define IMASK_RTX_DISABLED ((~(IMASK_RXFEN0 | IMASK_TXFEN | IMASK_BSY)) \ #define IMASK_RX_DEFAULT (IMASK_RXFEN0 | IMASK_BSY)
& IMASK_DEFAULT) #define IMASK_TX_DEFAULT (IMASK_TXFEN | IMASK_TXBEN)
#define IMASK_RX_DISABLED ((~(IMASK_RX_DEFAULT)) & IMASK_DEFAULT)
#define IMASK_TX_DISABLED ((~(IMASK_TX_DEFAULT)) & IMASK_DEFAULT)
/* Fifo management */ /* Fifo management */
#define FIFO_TX_THR_MASK 0x01ff #define FIFO_TX_THR_MASK 0x01ff
...@@ -409,7 +412,9 @@ extern const char gfar_driver_version[]; ...@@ -409,7 +412,9 @@ extern const char gfar_driver_version[];
/* This default RIR value directly corresponds /* This default RIR value directly corresponds
* to the 3-bit hash value generated */ * to the 3-bit hash value generated */
#define DEFAULT_RIR0 0x05397700 #define DEFAULT_8RXQ_RIR0 0x05397700
/* Map even hash values to Q0, and odd ones to Q1 */
#define DEFAULT_2RXQ_RIR0 0x04104100
/* RQFCR register bits */ /* RQFCR register bits */
#define RQFCR_GPI 0x80000000 #define RQFCR_GPI 0x80000000
...@@ -904,6 +909,22 @@ enum { ...@@ -904,6 +909,22 @@ enum {
MQ_MG_MODE MQ_MG_MODE
}; };
/* GFAR_SQ_POLLING: Single Queue NAPI polling mode
* The driver supports a single pair of RX/Tx queues
* per interrupt group (Rx/Tx int line). MQ_MG mode
* devices have 2 interrupt groups, so the device will
* have a total of 2 Tx and 2 Rx queues in this case.
* GFAR_MQ_POLLING: Multi Queue NAPI polling mode
* The driver supports all the 8 Rx and Tx HW queues
* each queue mapped by the Device Tree to one of
* the 2 interrupt groups. This mode implies significant
* processing overhead (CPU and controller level).
*/
enum gfar_poll_mode {
GFAR_SQ_POLLING = 0,
GFAR_MQ_POLLING
};
/* /*
* Per TX queue stats * Per TX queue stats
*/ */
...@@ -1013,17 +1034,20 @@ struct gfar_irqinfo { ...@@ -1013,17 +1034,20 @@ struct gfar_irqinfo {
*/ */
struct gfar_priv_grp { struct gfar_priv_grp {
spinlock_t grplock __attribute__ ((aligned (SMP_CACHE_BYTES))); spinlock_t grplock __aligned(SMP_CACHE_BYTES);
struct napi_struct napi; struct napi_struct napi_rx;
struct gfar_private *priv; struct napi_struct napi_tx;
struct gfar __iomem *regs; struct gfar __iomem *regs;
unsigned int rstat; struct gfar_priv_tx_q *tx_queue;
unsigned long num_rx_queues; struct gfar_priv_rx_q *rx_queue;
unsigned long rx_bit_map;
/* cacheline 3 */
unsigned int tstat; unsigned int tstat;
unsigned int rstat;
struct gfar_private *priv;
unsigned long num_tx_queues; unsigned long num_tx_queues;
unsigned long tx_bit_map; unsigned long tx_bit_map;
unsigned long num_rx_queues;
unsigned long rx_bit_map;
struct gfar_irqinfo *irqinfo[GFAR_NUM_IRQS]; struct gfar_irqinfo *irqinfo[GFAR_NUM_IRQS];
}; };
...@@ -1053,8 +1077,6 @@ enum gfar_dev_state { ...@@ -1053,8 +1077,6 @@ enum gfar_dev_state {
* the buffer descriptor determines the actual condition. * the buffer descriptor determines the actual condition.
*/ */
struct gfar_private { struct gfar_private {
unsigned int num_rx_queues;
struct device *dev; struct device *dev;
struct net_device *ndev; struct net_device *ndev;
enum gfar_errata errata; enum gfar_errata errata;
...@@ -1062,6 +1084,7 @@ struct gfar_private { ...@@ -1062,6 +1084,7 @@ struct gfar_private {
u16 uses_rxfcb; u16 uses_rxfcb;
u16 padding; u16 padding;
u32 device_flags;
/* HW time stamping enabled flag */ /* HW time stamping enabled flag */
int hwts_rx_en; int hwts_rx_en;
...@@ -1072,10 +1095,11 @@ struct gfar_private { ...@@ -1072,10 +1095,11 @@ struct gfar_private {
struct gfar_priv_grp gfargrp[MAXGROUPS]; struct gfar_priv_grp gfargrp[MAXGROUPS];
unsigned long state; unsigned long state;
u32 device_flags;
unsigned int mode; unsigned short mode;
unsigned short poll_mode;
unsigned int num_tx_queues; unsigned int num_tx_queues;
unsigned int num_rx_queues;
unsigned int num_grps; unsigned int num_grps;
/* Network Statistics */ /* Network Statistics */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment