Commit 243d9f43 authored by Don Hiatt's avatar Don Hiatt Committed by Doug Ledford

IB/hfi1: Add transmit fault injection feature

Add ability to fault packets on transmit by opcode.
Dropping by packet can be achieved by setting the mask to 0.

In order to drop non-verbs traffic we set PbcInsertHrc
to NONE (0x2). The packet will still be delivered to
the receiving node but a KHdrHCRCErr (KDETH packet
with a bad HCRC) will be triggered and the packet will
not be delivered to the correct context.

In order to drop regular verbs traffic we set the
PbcTestEbp flag. The packet will still be delivered
to the receiving node but a 'late ebp error' will
be triggered and will be dropped.

A global toggle (/sys/kernel/debug/hfi1/hfi1_X/fault_suppress_err)
has been added to suppress the error messages on the receive
node when a packet was faulted on the sending node.
Reviewed-by: default avatarDennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: default avatarMike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: default avatarDon Hiatt <don.hiatt@intel.com>
Signed-off-by: default avatarDennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: default avatarDoug Ledford <dledford@redhat.com>
parent 0181ce31
...@@ -64,6 +64,7 @@ ...@@ -64,6 +64,7 @@
#include "platform.h" #include "platform.h"
#include "aspm.h" #include "aspm.h"
#include "affinity.h" #include "affinity.h"
#include "debugfs.h"
#define NUM_IB_PORTS 1 #define NUM_IB_PORTS 1
...@@ -7898,6 +7899,9 @@ static void handle_dcc_err(struct hfi1_devdata *dd, u32 unused, u64 reg) ...@@ -7898,6 +7899,9 @@ static void handle_dcc_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
reg &= ~DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_HOST_SMASK; reg &= ~DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_HOST_SMASK;
} }
if (unlikely(hfi1_dbg_fault_suppress_err(&dd->verbs_dev)))
reg &= ~DCC_ERR_FLG_LATE_EBP_ERR_SMASK;
/* report any remaining errors */ /* report any remaining errors */
if (reg) if (reg)
dd_dev_info_ratelimited(dd, "DCC Error: %s\n", dd_dev_info_ratelimited(dd, "DCC Error: %s\n",
......
...@@ -1240,6 +1240,11 @@ static int fault_init_debugfs(struct hfi1_ibdev *ibd) ...@@ -1240,6 +1240,11 @@ static int fault_init_debugfs(struct hfi1_ibdev *ibd)
return ret; return ret;
} }
bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd)
{
return ibd->fault_suppress_err;
}
bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, u32 opcode, bool rx) bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, u32 opcode, bool rx)
{ {
bool ret = false; bool ret = false;
...@@ -1329,6 +1334,9 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd) ...@@ -1329,6 +1334,9 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
} }
#ifdef CONFIG_FAULT_INJECTION #ifdef CONFIG_FAULT_INJECTION
debugfs_create_bool("fault_suppress_err", 0600,
ibd->hfi1_ibdev_dbg,
&ibd->fault_suppress_err);
fault_init_debugfs(ibd); fault_init_debugfs(ibd);
#endif #endif
} }
......
...@@ -75,6 +75,7 @@ struct fault_packet { ...@@ -75,6 +75,7 @@ struct fault_packet {
bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, u32 opcode, bool rx); bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, u32 opcode, bool rx);
bool hfi1_dbg_fault_packet(struct hfi1_packet *packet); bool hfi1_dbg_fault_packet(struct hfi1_packet *packet);
bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd);
#else #else
static inline bool hfi1_dbg_fault_packet(struct hfi1_packet *packet) static inline bool hfi1_dbg_fault_packet(struct hfi1_packet *packet)
{ {
...@@ -86,6 +87,11 @@ static inline bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, ...@@ -86,6 +87,11 @@ static inline bool hfi1_dbg_fault_opcode(struct rvt_qp *qp,
{ {
return false; return false;
} }
static inline bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd)
{
return false;
}
#endif #endif
#else #else
...@@ -115,6 +121,11 @@ static inline bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, ...@@ -115,6 +121,11 @@ static inline bool hfi1_dbg_fault_opcode(struct rvt_qp *qp,
{ {
return false; return false;
} }
static inline bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd)
{
return false;
}
#endif #endif
#endif /* _HFI1_DEBUGFS_H */ #endif /* _HFI1_DEBUGFS_H */
...@@ -1367,6 +1367,11 @@ int process_receive_ib(struct hfi1_packet *packet) ...@@ -1367,6 +1367,11 @@ int process_receive_ib(struct hfi1_packet *packet)
packet->updegr, packet->updegr,
rhf_egr_index(packet->rhf)); rhf_egr_index(packet->rhf));
if (unlikely(
(hfi1_dbg_fault_suppress_err(&packet->rcd->dd->verbs_dev) &&
(packet->rhf & RHF_DC_ERR))))
return RHF_RCV_CONTINUE;
if (unlikely(rhf_err_flags(packet->rhf))) { if (unlikely(rhf_err_flags(packet->rhf))) {
handle_eflags(packet); handle_eflags(packet);
return RHF_RCV_CONTINUE; return RHF_RCV_CONTINUE;
...@@ -1402,6 +1407,12 @@ int process_receive_bypass(struct hfi1_packet *packet) ...@@ -1402,6 +1407,12 @@ int process_receive_bypass(struct hfi1_packet *packet)
int process_receive_error(struct hfi1_packet *packet) int process_receive_error(struct hfi1_packet *packet)
{ {
/* KHdrHCRCErr -- KDETH packet with a bad HCRC */
if (unlikely(
hfi1_dbg_fault_suppress_err(&packet->rcd->dd->verbs_dev) &&
rhf_rcv_type_err(packet->rhf) == 3))
return RHF_RCV_CONTINUE;
handle_eflags(packet); handle_eflags(packet);
if (unlikely(rhf_err_flags(packet->rhf))) if (unlikely(rhf_err_flags(packet->rhf)))
......
...@@ -518,6 +518,35 @@ static inline opcode_handler qp_ok(int opcode, struct hfi1_packet *packet) ...@@ -518,6 +518,35 @@ static inline opcode_handler qp_ok(int opcode, struct hfi1_packet *packet)
return NULL; return NULL;
} }
static u64 hfi1_fault_tx(struct rvt_qp *qp, u8 opcode, u64 pbc)
{
#ifdef CONFIG_FAULT_INJECTION
if ((opcode & IB_OPCODE_MSP) == IB_OPCODE_MSP)
/*
* In order to drop non-IB traffic we
* set PbcInsertHrc to NONE (0x2).
* The packet will still be delivered
* to the receiving node but a
* KHdrHCRCErr (KDETH packet with a bad
* HCRC) will be triggered and the
* packet will not be delivered to the
* correct context.
*/
pbc |= (u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT;
else
/*
* In order to drop regular verbs
* traffic we set the PbcTestEbp
* flag. The packet will still be
* delivered to the receiving node but
* a 'late ebp error' will be
* triggered and will be dropped.
*/
pbc |= PBC_TEST_EBP;
#endif
return pbc;
}
/** /**
* hfi1_ib_rcv - process an incoming packet * hfi1_ib_rcv - process an incoming packet
* @packet: data packet information * @packet: data packet information
...@@ -803,7 +832,6 @@ static int build_verbs_tx_desc( ...@@ -803,7 +832,6 @@ static int build_verbs_tx_desc(
if (ret) if (ret)
goto bail_txadd; goto bail_txadd;
} }
/* add the ulp payload - if any. tx->ss can be NULL for acks */ /* add the ulp payload - if any. tx->ss can be NULL for acks */
if (tx->ss) if (tx->ss)
ret = build_verbs_ulp_payload(sde, length, tx); ret = build_verbs_ulp_payload(sde, length, tx);
...@@ -822,7 +850,6 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, ...@@ -822,7 +850,6 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
struct hfi1_ibdev *dev = ps->dev; struct hfi1_ibdev *dev = ps->dev;
struct hfi1_pportdata *ppd = ps->ppd; struct hfi1_pportdata *ppd = ps->ppd;
struct verbs_txreq *tx; struct verbs_txreq *tx;
u64 pbc_flags = 0;
u8 sc5 = priv->s_sc; u8 sc5 = priv->s_sc;
int ret; int ret;
...@@ -831,12 +858,16 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, ...@@ -831,12 +858,16 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
if (!sdma_txreq_built(&tx->txreq)) { if (!sdma_txreq_built(&tx->txreq)) {
if (likely(pbc == 0)) { if (likely(pbc == 0)) {
u32 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5); u32 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5);
u8 opcode = get_opcode(&tx->phdr.hdr);
/* No vl15 here */ /* No vl15 here */
/* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */ /* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */
pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT; pbc |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
if (unlikely(hfi1_dbg_fault_opcode(qp, opcode, false)))
pbc = hfi1_fault_tx(qp, opcode, pbc);
pbc = create_pbc(ppd, pbc = create_pbc(ppd,
pbc_flags, pbc,
qp->srate_mbps, qp->srate_mbps,
vl, vl,
plen); plen);
...@@ -939,7 +970,6 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, ...@@ -939,7 +970,6 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
u32 plen = hdrwords + dwords + 2; /* includes pbc */ u32 plen = hdrwords + dwords + 2; /* includes pbc */
struct hfi1_pportdata *ppd = ps->ppd; struct hfi1_pportdata *ppd = ps->ppd;
u32 *hdr = (u32 *)&ps->s_txreq->phdr.hdr; u32 *hdr = (u32 *)&ps->s_txreq->phdr.hdr;
u64 pbc_flags = 0;
u8 sc5; u8 sc5;
unsigned long flags = 0; unsigned long flags = 0;
struct send_context *sc; struct send_context *sc;
...@@ -964,9 +994,14 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, ...@@ -964,9 +994,14 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
if (likely(pbc == 0)) { if (likely(pbc == 0)) {
u8 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5); u8 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5);
struct verbs_txreq *tx = ps->s_txreq;
u8 opcode = get_opcode(&tx->phdr.hdr);
/* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */ /* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */
pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT; pbc |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen); if (unlikely(hfi1_dbg_fault_opcode(qp, opcode, false)))
pbc = hfi1_fault_tx(qp, opcode, pbc);
pbc = create_pbc(ppd, pbc, qp->srate_mbps, vl, plen);
} }
if (cb) if (cb)
iowait_pio_inc(&priv->s_iowait); iowait_pio_inc(&priv->s_iowait);
......
...@@ -198,6 +198,7 @@ struct hfi1_ibdev { ...@@ -198,6 +198,7 @@ struct hfi1_ibdev {
#ifdef CONFIG_FAULT_INJECTION #ifdef CONFIG_FAULT_INJECTION
struct fault_opcode *fault_opcode; struct fault_opcode *fault_opcode;
struct fault_packet *fault_packet; struct fault_packet *fault_packet;
bool fault_suppress_err;
#endif #endif
#endif #endif
}; };
......
...@@ -80,6 +80,8 @@ enum { ...@@ -80,6 +80,8 @@ enum {
IB_OPCODE_UD = 0x60, IB_OPCODE_UD = 0x60,
/* per IBTA 1.3 vol 1 Table 38, A10.3.2 */ /* per IBTA 1.3 vol 1 Table 38, A10.3.2 */
IB_OPCODE_CNP = 0x80, IB_OPCODE_CNP = 0x80,
/* Manufacturer specific */
IB_OPCODE_MSP = 0xe0,
/* operations -- just used to define real constants */ /* operations -- just used to define real constants */
IB_OPCODE_SEND_FIRST = 0x00, IB_OPCODE_SEND_FIRST = 0x00,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment