Commit d7722973 authored by Jakub Kicinski's avatar Jakub Kicinski

Merge branch 'macb-napi-improvements'

Robert Hancock says:

====================
MACB NAPI improvements

Simplify the logic in the Cadence MACB/GEM driver for determining
when to reschedule NAPI processing, and update it to use NAPI for the
TX path as well as the RX path.

Changes since v1: Changed to use separate TX and RX NAPI instances and
poll functions to avoid unnecessary checks of the other ring (TX/RX)
states during polling and to use budget handling for both RX and TX.
Fixed locking to protect against concurrent access to TX ring on
TX transmit and TX poll paths.
====================

Link: https://lore.kernel.org/r/20220509194635.3094080-1-robert.hancock@calian.comSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 11ecf341 138badbc
......@@ -1204,11 +1204,15 @@ struct macb_queue {
unsigned int RBQP;
unsigned int RBQPH;
/* Lock to protect tx_head and tx_tail */
spinlock_t tx_ptr_lock;
unsigned int tx_head, tx_tail;
struct macb_dma_desc *tx_ring;
struct macb_tx_skb *tx_skb;
dma_addr_t tx_ring_dma;
struct work_struct tx_error_task;
bool txubr_pending;
struct napi_struct napi_tx;
dma_addr_t rx_ring_dma;
dma_addr_t rx_buffers_dma;
......@@ -1217,7 +1221,7 @@ struct macb_queue {
struct macb_dma_desc *rx_ring;
struct sk_buff **rx_skbuff;
void *rx_buffers;
struct napi_struct napi;
struct napi_struct napi_rx;
struct queue_stats stats;
#ifdef CONFIG_MACB_USE_HWSTAMP
......
......@@ -959,7 +959,7 @@ static int macb_halt_tx(struct macb *bp)
return -ETIMEDOUT;
}
static void macb_tx_unmap(struct macb *bp, struct macb_tx_skb *tx_skb)
static void macb_tx_unmap(struct macb *bp, struct macb_tx_skb *tx_skb, int budget)
{
if (tx_skb->mapping) {
if (tx_skb->mapped_as_page)
......@@ -972,7 +972,7 @@ static void macb_tx_unmap(struct macb *bp, struct macb_tx_skb *tx_skb)
}
if (tx_skb->skb) {
dev_kfree_skb_any(tx_skb->skb);
napi_consume_skb(tx_skb->skb, budget);
tx_skb->skb = NULL;
}
}
......@@ -1025,12 +1025,13 @@ static void macb_tx_error_task(struct work_struct *work)
(unsigned int)(queue - bp->queues),
queue->tx_tail, queue->tx_head);
/* Prevent the queue IRQ handlers from running: each of them may call
* macb_tx_interrupt(), which in turn may call netif_wake_subqueue().
/* Prevent the queue NAPI TX poll from running, as it calls
* macb_tx_complete(), which in turn may call netif_wake_subqueue().
* As explained below, we have to halt the transmission before updating
* TBQP registers so we call netif_tx_stop_all_queues() to notify the
* network engine about the macb/gem being halted.
*/
napi_disable(&queue->napi_tx);
spin_lock_irqsave(&bp->lock, flags);
/* Make sure nobody is trying to queue up new packets */
......@@ -1058,7 +1059,7 @@ static void macb_tx_error_task(struct work_struct *work)
if (ctrl & MACB_BIT(TX_USED)) {
/* skb is set for the last buffer of the frame */
while (!skb) {
macb_tx_unmap(bp, tx_skb);
macb_tx_unmap(bp, tx_skb, 0);
tail++;
tx_skb = macb_tx_skb(queue, tail);
skb = tx_skb->skb;
......@@ -1088,7 +1089,7 @@ static void macb_tx_error_task(struct work_struct *work)
desc->ctrl = ctrl | MACB_BIT(TX_USED);
}
macb_tx_unmap(bp, tx_skb);
macb_tx_unmap(bp, tx_skb, 0);
}
/* Set end of TX queue */
......@@ -1118,27 +1119,20 @@ static void macb_tx_error_task(struct work_struct *work)
macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(TSTART));
spin_unlock_irqrestore(&bp->lock, flags);
napi_enable(&queue->napi_tx);
}
static void macb_tx_interrupt(struct macb_queue *queue)
static int macb_tx_complete(struct macb_queue *queue, int budget)
{
unsigned int tail;
unsigned int head;
u32 status;
struct macb *bp = queue->bp;
u16 queue_index = queue - bp->queues;
unsigned int tail;
unsigned int head;
int packets = 0;
status = macb_readl(bp, TSR);
macb_writel(bp, TSR, status);
if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE)
queue_writel(queue, ISR, MACB_BIT(TCOMP));
netdev_vdbg(bp->dev, "macb_tx_interrupt status = 0x%03lx\n",
(unsigned long)status);
spin_lock(&queue->tx_ptr_lock);
head = queue->tx_head;
for (tail = queue->tx_tail; tail != head; tail++) {
for (tail = queue->tx_tail; tail != head && packets < budget; tail++) {
struct macb_tx_skb *tx_skb;
struct sk_buff *skb;
struct macb_dma_desc *desc;
......@@ -1179,10 +1173,11 @@ static void macb_tx_interrupt(struct macb_queue *queue)
queue->stats.tx_packets++;
bp->dev->stats.tx_bytes += skb->len;
queue->stats.tx_bytes += skb->len;
packets++;
}
/* Now we can safely release resources */
macb_tx_unmap(bp, tx_skb);
macb_tx_unmap(bp, tx_skb, budget);
/* skb is set only for the last buffer of the frame.
* WARNING: at this point skb has been freed by
......@@ -1198,6 +1193,9 @@ static void macb_tx_interrupt(struct macb_queue *queue)
CIRC_CNT(queue->tx_head, queue->tx_tail,
bp->tx_ring_size) <= MACB_TX_WAKEUP_THRESH(bp))
netif_wake_subqueue(bp->dev, queue_index);
spin_unlock(&queue->tx_ptr_lock);
return packets;
}
static void gem_rx_refill(struct macb_queue *queue)
......@@ -1554,54 +1552,51 @@ static int macb_rx(struct macb_queue *queue, struct napi_struct *napi,
return received;
}
static int macb_poll(struct napi_struct *napi, int budget)
static bool macb_rx_pending(struct macb_queue *queue)
{
struct macb_queue *queue = container_of(napi, struct macb_queue, napi);
struct macb *bp = queue->bp;
int work_done;
u32 status;
unsigned int entry;
struct macb_dma_desc *desc;
entry = macb_rx_ring_wrap(bp, queue->rx_tail);
desc = macb_rx_desc(queue, entry);
status = macb_readl(bp, RSR);
macb_writel(bp, RSR, status);
/* Make hw descriptor updates visible to CPU */
rmb();
netdev_vdbg(bp->dev, "poll: status = %08lx, budget = %d\n",
(unsigned long)status, budget);
return (desc->addr & MACB_BIT(RX_USED)) != 0;
}
static int macb_rx_poll(struct napi_struct *napi, int budget)
{
struct macb_queue *queue = container_of(napi, struct macb_queue, napi_rx);
struct macb *bp = queue->bp;
int work_done;
work_done = bp->macbgem_ops.mog_rx(queue, napi, budget);
if (work_done < budget) {
napi_complete_done(napi, work_done);
/* RSR bits only seem to propagate to raise interrupts when
* interrupts are enabled at the time, so if bits are already
* set due to packets received while interrupts were disabled,
netdev_vdbg(bp->dev, "RX poll: queue = %u, work_done = %d, budget = %d\n",
(unsigned int)(queue - bp->queues), work_done, budget);
if (work_done < budget && napi_complete_done(napi, work_done)) {
queue_writel(queue, IER, bp->rx_intr_mask);
/* Packet completions only seem to propagate to raise
* interrupts when interrupts are enabled at the time, so if
* packets were received while interrupts were disabled,
* they will not cause another interrupt to be generated when
* interrupts are re-enabled.
* Check for this case here. This has been seen to happen
* around 30% of the time under heavy network load.
* Check for this case here to avoid losing a wakeup. This can
* potentially race with the interrupt handler doing the same
* actions if an interrupt is raised just after enabling them,
* but this should be harmless.
*/
status = macb_readl(bp, RSR);
if (status) {
if (macb_rx_pending(queue)) {
queue_writel(queue, IDR, bp->rx_intr_mask);
if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE)
queue_writel(queue, ISR, MACB_BIT(RCOMP));
napi_reschedule(napi);
} else {
queue_writel(queue, IER, bp->rx_intr_mask);
/* In rare cases, packets could have been received in
* the window between the check above and re-enabling
* interrupts. Therefore, a double-check is required
* to avoid losing a wakeup. This can potentially race
* with the interrupt handler doing the same actions
* if an interrupt is raised just after enabling them,
* but this should be harmless.
*/
status = macb_readl(bp, RSR);
if (unlikely(status)) {
queue_writel(queue, IDR, bp->rx_intr_mask);
if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE)
queue_writel(queue, ISR, MACB_BIT(RCOMP));
napi_schedule(napi);
}
netdev_vdbg(bp->dev, "poll: packets pending, reschedule\n");
napi_schedule(napi);
}
}
......@@ -1610,6 +1605,90 @@ static int macb_poll(struct napi_struct *napi, int budget)
return work_done;
}
static void macb_tx_restart(struct macb_queue *queue)
{
struct macb *bp = queue->bp;
unsigned int head_idx, tbqp;
spin_lock(&queue->tx_ptr_lock);
if (queue->tx_head == queue->tx_tail)
goto out_tx_ptr_unlock;
tbqp = queue_readl(queue, TBQP) / macb_dma_desc_get_size(bp);
tbqp = macb_adj_dma_desc_idx(bp, macb_tx_ring_wrap(bp, tbqp));
head_idx = macb_adj_dma_desc_idx(bp, macb_tx_ring_wrap(bp, queue->tx_head));
if (tbqp == head_idx)
goto out_tx_ptr_unlock;
spin_lock_irq(&bp->lock);
macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(TSTART));
spin_unlock_irq(&bp->lock);
out_tx_ptr_unlock:
spin_unlock(&queue->tx_ptr_lock);
}
static bool macb_tx_complete_pending(struct macb_queue *queue)
{
bool retval = false;
spin_lock(&queue->tx_ptr_lock);
if (queue->tx_head != queue->tx_tail) {
/* Make hw descriptor updates visible to CPU */
rmb();
if (macb_tx_desc(queue, queue->tx_tail)->ctrl & MACB_BIT(TX_USED))
retval = true;
}
spin_unlock(&queue->tx_ptr_lock);
return retval;
}
static int macb_tx_poll(struct napi_struct *napi, int budget)
{
struct macb_queue *queue = container_of(napi, struct macb_queue, napi_tx);
struct macb *bp = queue->bp;
int work_done;
work_done = macb_tx_complete(queue, budget);
rmb(); // ensure txubr_pending is up to date
if (queue->txubr_pending) {
queue->txubr_pending = false;
netdev_vdbg(bp->dev, "poll: tx restart\n");
macb_tx_restart(queue);
}
netdev_vdbg(bp->dev, "TX poll: queue = %u, work_done = %d, budget = %d\n",
(unsigned int)(queue - bp->queues), work_done, budget);
if (work_done < budget && napi_complete_done(napi, work_done)) {
queue_writel(queue, IER, MACB_BIT(TCOMP));
/* Packet completions only seem to propagate to raise
* interrupts when interrupts are enabled at the time, so if
* packets were sent while interrupts were disabled,
* they will not cause another interrupt to be generated when
* interrupts are re-enabled.
* Check for this case here to avoid losing a wakeup. This can
* potentially race with the interrupt handler doing the same
* actions if an interrupt is raised just after enabling them,
* but this should be harmless.
*/
if (macb_tx_complete_pending(queue)) {
queue_writel(queue, IDR, MACB_BIT(TCOMP));
if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE)
queue_writel(queue, ISR, MACB_BIT(TCOMP));
netdev_vdbg(bp->dev, "TX poll: packets pending, reschedule\n");
napi_schedule(napi);
}
}
return work_done;
}
static void macb_hresp_error_task(struct tasklet_struct *t)
{
struct macb *bp = from_tasklet(bp, t, hresp_err_tasklet);
......@@ -1649,29 +1728,6 @@ static void macb_hresp_error_task(struct tasklet_struct *t)
netif_tx_start_all_queues(dev);
}
static void macb_tx_restart(struct macb_queue *queue)
{
unsigned int head = queue->tx_head;
unsigned int tail = queue->tx_tail;
struct macb *bp = queue->bp;
unsigned int head_idx, tbqp;
if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE)
queue_writel(queue, ISR, MACB_BIT(TXUBR));
if (head == tail)
return;
tbqp = queue_readl(queue, TBQP) / macb_dma_desc_get_size(bp);
tbqp = macb_adj_dma_desc_idx(bp, macb_tx_ring_wrap(bp, tbqp));
head_idx = macb_adj_dma_desc_idx(bp, macb_tx_ring_wrap(bp, head));
if (tbqp == head_idx)
return;
macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(TSTART));
}
static irqreturn_t macb_wol_interrupt(int irq, void *dev_id)
{
struct macb_queue *queue = dev_id;
......@@ -1768,9 +1824,27 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id)
if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE)
queue_writel(queue, ISR, MACB_BIT(RCOMP));
if (napi_schedule_prep(&queue->napi)) {
if (napi_schedule_prep(&queue->napi_rx)) {
netdev_vdbg(bp->dev, "scheduling RX softirq\n");
__napi_schedule(&queue->napi);
__napi_schedule(&queue->napi_rx);
}
}
if (status & (MACB_BIT(TCOMP) |
MACB_BIT(TXUBR))) {
queue_writel(queue, IDR, MACB_BIT(TCOMP));
if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE)
queue_writel(queue, ISR, MACB_BIT(TCOMP) |
MACB_BIT(TXUBR));
if (status & MACB_BIT(TXUBR)) {
queue->txubr_pending = true;
wmb(); // ensure softirq can see update
}
if (napi_schedule_prep(&queue->napi_tx)) {
netdev_vdbg(bp->dev, "scheduling TX softirq\n");
__napi_schedule(&queue->napi_tx);
}
}
......@@ -1784,12 +1858,6 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id)
break;
}
if (status & MACB_BIT(TCOMP))
macb_tx_interrupt(queue);
if (status & MACB_BIT(TXUBR))
macb_tx_restart(queue);
/* Link change detection isn't possible with RMII, so we'll
* add that if/when we get our hands on a full-blown MII PHY.
*/
......@@ -2022,7 +2090,7 @@ static unsigned int macb_tx_map(struct macb *bp,
for (i = queue->tx_head; i != tx_head; i++) {
tx_skb = macb_tx_skb(queue, i);
macb_tx_unmap(bp, tx_skb);
macb_tx_unmap(bp, tx_skb, 0);
}
return 0;
......@@ -2144,7 +2212,6 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
u16 queue_index = skb_get_queue_mapping(skb);
struct macb *bp = netdev_priv(dev);
struct macb_queue *queue = &bp->queues[queue_index];
unsigned long flags;
unsigned int desc_cnt, nr_frags, frag_size, f;
unsigned int hdrlen;
bool is_lso;
......@@ -2201,16 +2268,16 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
desc_cnt += DIV_ROUND_UP(frag_size, bp->max_tx_length);
}
spin_lock_irqsave(&bp->lock, flags);
spin_lock_bh(&queue->tx_ptr_lock);
/* This is a hard error, log it. */
if (CIRC_SPACE(queue->tx_head, queue->tx_tail,
bp->tx_ring_size) < desc_cnt) {
netif_stop_subqueue(dev, queue_index);
spin_unlock_irqrestore(&bp->lock, flags);
netdev_dbg(bp->dev, "tx_head = %u, tx_tail = %u\n",
queue->tx_head, queue->tx_tail);
return NETDEV_TX_BUSY;
ret = NETDEV_TX_BUSY;
goto unlock;
}
/* Map socket buffer for DMA transfer */
......@@ -2223,13 +2290,15 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
wmb();
skb_tx_timestamp(skb);
spin_lock_irq(&bp->lock);
macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(TSTART));
spin_unlock_irq(&bp->lock);
if (CIRC_SPACE(queue->tx_head, queue->tx_tail, bp->tx_ring_size) < 1)
netif_stop_subqueue(dev, queue_index);
unlock:
spin_unlock_irqrestore(&bp->lock, flags);
spin_unlock_bh(&queue->tx_ptr_lock);
return ret;
}
......@@ -2763,8 +2832,10 @@ static int macb_open(struct net_device *dev)
goto pm_exit;
}
for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue)
napi_enable(&queue->napi);
for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
napi_enable(&queue->napi_rx);
napi_enable(&queue->napi_tx);
}
macb_init_hw(bp);
......@@ -2788,8 +2859,10 @@ static int macb_open(struct net_device *dev)
reset_hw:
macb_reset_hw(bp);
for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue)
napi_disable(&queue->napi);
for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
napi_disable(&queue->napi_rx);
napi_disable(&queue->napi_tx);
}
macb_free_consistent(bp);
pm_exit:
pm_runtime_put_sync(&bp->pdev->dev);
......@@ -2805,8 +2878,10 @@ static int macb_close(struct net_device *dev)
netif_tx_stop_all_queues(dev);
for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue)
napi_disable(&queue->napi);
for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
napi_disable(&queue->napi_rx);
napi_disable(&queue->napi_tx);
}
phylink_stop(bp->phylink);
phylink_disconnect_phy(bp->phylink);
......@@ -3868,7 +3943,9 @@ static int macb_init(struct platform_device *pdev)
queue = &bp->queues[q];
queue->bp = bp;
netif_napi_add(dev, &queue->napi, macb_poll, NAPI_POLL_WEIGHT);
spin_lock_init(&queue->tx_ptr_lock);
netif_napi_add(dev, &queue->napi_rx, macb_rx_poll, NAPI_POLL_WEIGHT);
netif_napi_add(dev, &queue->napi_tx, macb_tx_poll, NAPI_POLL_WEIGHT);
if (hw_q) {
queue->ISR = GEM_ISR(hw_q - 1);
queue->IER = GEM_IER(hw_q - 1);
......@@ -4975,8 +5052,10 @@ static int __maybe_unused macb_suspend(struct device *dev)
netif_device_detach(netdev);
for (q = 0, queue = bp->queues; q < bp->num_queues;
++q, ++queue)
napi_disable(&queue->napi);
++q, ++queue) {
napi_disable(&queue->napi_rx);
napi_disable(&queue->napi_tx);
}
if (!(bp->wol & MACB_WOL_ENABLED)) {
rtnl_lock();
......@@ -5054,8 +5133,10 @@ static int __maybe_unused macb_resume(struct device *dev)
}
for (q = 0, queue = bp->queues; q < bp->num_queues;
++q, ++queue)
napi_enable(&queue->napi);
++q, ++queue) {
napi_enable(&queue->napi_rx);
napi_enable(&queue->napi_tx);
}
if (netdev->hw_features & NETIF_F_NTUPLE)
gem_writel_n(bp, ETHT, SCRT2_ETHT, bp->pm_data.scrt2);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment