Commit 2b56b3d9 authored by Jakub Kicinski's avatar Jakub Kicinski

eth: bnxt: handle invalid Tx completions more gracefully

Invalid Tx completions should never happen (tm) but when they do
they crash the host, because driver blindly trusts that there is
a valid skb pointer on the ring.

The completions I've seen appear to be some form of FW / HW
miscalculation or staleness, they have typical (small) values
(<100), but they are most often higher than number of queued
descriptors. They usually happen after boot.

Instead of crashing, print a warning and schedule a reset.

Link: https://lore.kernel.org/r/20230720010440.1967136-4-kuba@kernel.orgReviewed-by: default avatarMichael Chan <michael.chan@broadcom.com>
Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent 9b1a00fd
...@@ -331,6 +331,22 @@ static void bnxt_sched_reset_rxr(struct bnxt *bp, struct bnxt_rx_ring_info *rxr) ...@@ -331,6 +331,22 @@ static void bnxt_sched_reset_rxr(struct bnxt *bp, struct bnxt_rx_ring_info *rxr)
rxr->rx_next_cons = 0xffff; rxr->rx_next_cons = 0xffff;
} }
void bnxt_sched_reset_txr(struct bnxt *bp, struct bnxt_tx_ring_info *txr,
int idx)
{
struct bnxt_napi *bnapi = txr->bnapi;
if (bnapi->tx_fault)
return;
netdev_err(bp->dev, "Invalid Tx completion (ring:%d tx_pkts:%d cons:%u prod:%u i:%d)",
txr->txq_index, bnapi->tx_pkts,
txr->tx_cons, txr->tx_prod, idx);
WARN_ON_ONCE(1);
bnapi->tx_fault = 1;
bnxt_queue_sp_work(bp, BNXT_RESET_TASK_SP_EVENT);
}
const u16 bnxt_lhint_arr[] = { const u16 bnxt_lhint_arr[] = {
TX_BD_FLAGS_LHINT_512_AND_SMALLER, TX_BD_FLAGS_LHINT_512_AND_SMALLER,
TX_BD_FLAGS_LHINT_512_TO_1023, TX_BD_FLAGS_LHINT_512_TO_1023,
...@@ -690,6 +706,11 @@ static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts) ...@@ -690,6 +706,11 @@ static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts)
skb = tx_buf->skb; skb = tx_buf->skb;
tx_buf->skb = NULL; tx_buf->skb = NULL;
if (unlikely(!skb)) {
bnxt_sched_reset_txr(bp, txr, i);
return;
}
tx_bytes += skb->len; tx_bytes += skb->len;
if (tx_buf->is_push) { if (tx_buf->is_push) {
...@@ -2576,7 +2597,7 @@ static int __bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, ...@@ -2576,7 +2597,7 @@ static int __bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
static void __bnxt_poll_work_done(struct bnxt *bp, struct bnxt_napi *bnapi) static void __bnxt_poll_work_done(struct bnxt *bp, struct bnxt_napi *bnapi)
{ {
if (bnapi->tx_pkts) { if (bnapi->tx_pkts && !bnapi->tx_fault) {
bnapi->tx_int(bp, bnapi, bnapi->tx_pkts); bnapi->tx_int(bp, bnapi, bnapi->tx_pkts);
bnapi->tx_pkts = 0; bnapi->tx_pkts = 0;
} }
...@@ -9429,6 +9450,8 @@ static void bnxt_enable_napi(struct bnxt *bp) ...@@ -9429,6 +9450,8 @@ static void bnxt_enable_napi(struct bnxt *bp)
struct bnxt_napi *bnapi = bp->bnapi[i]; struct bnxt_napi *bnapi = bp->bnapi[i];
struct bnxt_cp_ring_info *cpr; struct bnxt_cp_ring_info *cpr;
bnapi->tx_fault = 0;
cpr = &bnapi->cp_ring; cpr = &bnapi->cp_ring;
if (bnapi->in_reset) if (bnapi->in_reset)
cpr->sw_stats.rx.rx_resets++; cpr->sw_stats.rx.rx_resets++;
......
...@@ -1008,6 +1008,7 @@ struct bnxt_napi { ...@@ -1008,6 +1008,7 @@ struct bnxt_napi {
int); int);
int tx_pkts; int tx_pkts;
u8 events; u8 events;
u8 tx_fault:1;
u32 flags; u32 flags;
#define BNXT_NAPI_FLAG_XDP 0x1 #define BNXT_NAPI_FLAG_XDP 0x1
...@@ -2329,6 +2330,8 @@ int bnxt_get_avail_msix(struct bnxt *bp, int num); ...@@ -2329,6 +2330,8 @@ int bnxt_get_avail_msix(struct bnxt *bp, int num);
int bnxt_reserve_rings(struct bnxt *bp, bool irq_re_init); int bnxt_reserve_rings(struct bnxt *bp, bool irq_re_init);
void bnxt_tx_disable(struct bnxt *bp); void bnxt_tx_disable(struct bnxt *bp);
void bnxt_tx_enable(struct bnxt *bp); void bnxt_tx_enable(struct bnxt *bp);
void bnxt_sched_reset_txr(struct bnxt *bp, struct bnxt_tx_ring_info *txr,
int idx);
void bnxt_report_link(struct bnxt *bp); void bnxt_report_link(struct bnxt *bp);
int bnxt_update_link(struct bnxt *bp, bool chng_link_state); int bnxt_update_link(struct bnxt *bp, bool chng_link_state);
int bnxt_hwrm_set_pause(struct bnxt *); int bnxt_hwrm_set_pause(struct bnxt *);
......
...@@ -149,6 +149,7 @@ void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts) ...@@ -149,6 +149,7 @@ void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts)
tx_buf->action = 0; tx_buf->action = 0;
tx_buf->xdpf = NULL; tx_buf->xdpf = NULL;
} else if (tx_buf->action == XDP_TX) { } else if (tx_buf->action == XDP_TX) {
tx_buf->action = 0;
rx_doorbell_needed = true; rx_doorbell_needed = true;
last_tx_cons = tx_cons; last_tx_cons = tx_cons;
...@@ -158,6 +159,9 @@ void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts) ...@@ -158,6 +159,9 @@ void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts)
tx_buf = &txr->tx_buf_ring[tx_cons]; tx_buf = &txr->tx_buf_ring[tx_cons];
page_pool_recycle_direct(rxr->page_pool, tx_buf->page); page_pool_recycle_direct(rxr->page_pool, tx_buf->page);
} }
} else {
bnxt_sched_reset_txr(bp, txr, i);
return;
} }
tx_cons = NEXT_TX(tx_cons); tx_cons = NEXT_TX(tx_cons);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment