net: mvpp2: Use relaxed I/O in data path

Use relaxed I/O on the hot path. This achieves significant performance improvements. On a 10G link, this makes a basic iperf TCP test go from an average of 4.5 Gbits/sec to about 9.40 Gbits/sec. Signed-off-by: Yan Markman <ymarkman@marvell.com> [Maxime: Commit message, cosmetic changes] Signed-off-by: Maxime Chevallier <maxime.chevallier@bootlin.com> Signed-off-by: David S. Miller <davem@davemloft.net>

net: mvpp2: Use relaxed I/O in data path
Use relaxed I/O on the hot path. This achieves significant performance improvements. On a 10G link, this makes a basic iperf TCP test go from an average of 4.5 Gbits/sec to about 9.40 Gbits/sec. Signed-off-by: Yan Markman <ymarkman@marvell.com> [Maxime: Commit message, cosmetic changes] Signed-off-by: Maxime Chevallier <maxime.chevallier@bootlin.com> Signed-off-by: David S. Miller <davem@davemloft.net>
cdcfeb0f · Yan Markman · David S. Miller · 5f75a186 · cdcfeb0f
Commit cdcfeb0f authored Mar 27, 2018 by Yan Markman Committed by David S. Miller Mar 27, 2018
Show whitespace changes
Inline Side-by-side

Showing with 30 additions and 13 deletions

drivers/net/ethernet/marvell/mvpp2.c drivers/net/ethernet/marvell/mvpp2.c +30 -13

No files found.
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -1359,6 +1359,10 @@ static u32 mvpp2_read(struct mvpp2 *priv, u32 offset)
 	return readl(priv->swth_base[0] + offset);
 }

+static u32 mvpp2_read_relaxed(struct mvpp2 *priv, u32 offset)
+{
+	return readl_relaxed(priv->swth_base[0] + offset);
+}
 /* These accessors should be used to access:
 *
 * - per-CPU registers, where each CPU has its own copy of the
@@ -1407,6 +1411,18 @@ static u32 mvpp2_percpu_read(struct mvpp2 *priv, int cpu,
 	return readl(priv->swth_base[cpu] + offset);
 }

+static void mvpp2_percpu_write_relaxed(struct mvpp2 *priv, int cpu,
+				       u32 offset, u32 data)
+{
+	writel_relaxed(data, priv->swth_base[cpu] + offset);
+}
+
+static u32 mvpp2_percpu_read_relaxed(struct mvpp2 *priv, int cpu,
+				     u32 offset)
+{
+	return readl_relaxed(priv->swth_base[cpu] + offset);
+}
+
 static dma_addr_t mvpp2_txdesc_dma_addr_get(struct mvpp2_port *port,
 					    struct mvpp2_tx_desc *tx_desc)
 {
@@ -4442,7 +4458,7 @@ static inline void mvpp2_bm_pool_put(struct mvpp2_port *port, int pool,
 				<< MVPP22_BM_ADDR_HIGH_VIRT_RLS_SHIFT) &
 				MVPP22_BM_ADDR_HIGH_VIRT_RLS_MASK;

-		mvpp2_percpu_write(port->priv, cpu,
+		mvpp2_percpu_write_relaxed(port->priv, cpu,
 					   MVPP22_BM_ADDR_HIGH_RLS_REG, val);
 	}

@@ -4451,9 +4467,9 @@ static inline void mvpp2_bm_pool_put(struct mvpp2_port *port, int pool,
 	 * descriptor. Instead of storing the virtual address, we
 	 * store the physical address
 	 */
-	mvpp2_percpu_write(port->priv, cpu,
+	mvpp2_percpu_write_relaxed(port->priv, cpu,
 				   MVPP2_BM_VIRT_RLS_REG, buf_phys_addr);
-	mvpp2_percpu_write(port->priv, cpu,
+	mvpp2_percpu_write_relaxed(port->priv, cpu,
 				   MVPP2_BM_PHY_RLS_REG(pool), buf_dma_addr);

 	put_cpu();
@@ -5546,7 +5562,8 @@ static int mvpp2_aggr_desc_num_check(struct mvpp2 *priv,
 	if ((aggr_txq->count + num) > MVPP2_AGGR_TXQ_SIZE) {
 		/* Update number of occupied aggregated Tx descriptors */
 		int cpu = smp_processor_id();
-		u32 val = mvpp2_read(priv, MVPP2_AGGR_TXQ_STATUS_REG(cpu));
+		u32 val = mvpp2_read_relaxed(priv,
+					     MVPP2_AGGR_TXQ_STATUS_REG(cpu));

 		aggr_txq->count = val & MVPP2_AGGR_TXQ_PENDING_MASK;
 	}
@@ -5570,9 +5587,9 @@ static int mvpp2_txq_alloc_reserved_desc(struct mvpp2 *priv,
 	int cpu = smp_processor_id();

 	val = (txq->id << MVPP2_TXQ_RSVD_REQ_Q_OFFSET) | num;
-	mvpp2_percpu_write(priv, cpu, MVPP2_TXQ_RSVD_REQ_REG, val);
+	mvpp2_percpu_write_relaxed(priv, cpu, MVPP2_TXQ_RSVD_REQ_REG, val);

-	val = mvpp2_percpu_read(priv, cpu, MVPP2_TXQ_RSVD_RSLT_REG);
+	val = mvpp2_percpu_read_relaxed(priv, cpu, MVPP2_TXQ_RSVD_RSLT_REG);

 	return val & MVPP2_TXQ_RSVD_RSLT_MASK;
 }
@@ -5677,7 +5694,7 @@ static inline int mvpp2_txq_sent_desc_proc(struct mvpp2_port *port,
 	u32 val;

 	/* Reading status reg resets transmitted descriptor counter */
-	val = mvpp2_percpu_read(port->priv, smp_processor_id(),
+	val = mvpp2_percpu_read_relaxed(port->priv, smp_processor_id(),
 					MVPP2_TXQ_SENT_REG(txq->id));

 	return (val & MVPP2_TRANSMITTED_COUNT_MASK) >>
@@ -7044,7 +7061,7 @@ static int mvpp2_poll(struct napi_struct *napi, int budget)
 	 *
 	 * Each CPU has its own Rx/Tx cause register
 	 */
-	cause_rx_tx = mvpp2_percpu_read(port->priv, qv->sw_thread_id,
+	cause_rx_tx = mvpp2_percpu_read_relaxed(port->priv, qv->sw_thread_id,
 						MVPP2_ISR_RX_TX_CAUSE_REG(port->id));

 	cause_misc = cause_rx_tx & MVPP2_CAUSE_MISC_SUM_MASK;