Commit 9e93e967 authored by Kaike Wan's avatar Kaike Wan Committed by Doug Ledford

IB/hfi1: Add a function to receive TID RDMA ACK packet

This patch adds a function to receive TID RDMA ACK packet, which could
be an acknowledge to either a TID RDMA WRITE DATA packet or an TID
RDMA RESYNC packet. For an ACK to TID RDMA WRITE DATA packet, the
request segments are completed appropriately. For an ACK to a TID
RDMA RESYNC packet, any pending segment flow information is updated
accordingly.
Signed-off-by: default avatarMitko Haralanov <mitko.haralanov@intel.com>
Signed-off-by: default avatarMike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: default avatarAshutosh Dixit <ashutosh.dixit@intel.com>
Signed-off-by: default avatarKaike Wan <kaike.wan@intel.com>
Signed-off-by: default avatarDennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: default avatarDoug Ledford <dledford@redhat.com>
parent 0f75e325
...@@ -245,6 +245,9 @@ void opfn_qp_init(struct rvt_qp *qp, struct ib_qp_attr *attr, int attr_mask) ...@@ -245,6 +245,9 @@ void opfn_qp_init(struct rvt_qp *qp, struct ib_qp_attr *attr, int attr_mask)
struct hfi1_qp_priv *priv = qp->priv; struct hfi1_qp_priv *priv = qp->priv;
unsigned long flags; unsigned long flags;
if (attr_mask & IB_QP_RETRY_CNT)
priv->s_retry = attr->retry_cnt;
spin_lock_irqsave(&priv->opfn.lock, flags); spin_lock_irqsave(&priv->opfn.lock, flags);
if (ibqp->qp_type == IB_QPT_RC && HFI1_CAP_IS_KSET(TID_RDMA)) { if (ibqp->qp_type == IB_QPT_RC && HFI1_CAP_IS_KSET(TID_RDMA)) {
struct tid_rdma_params *local = &priv->tid_rdma.local; struct tid_rdma_params *local = &priv->tid_rdma.local;
......
...@@ -65,6 +65,7 @@ extern const struct rvt_operation_params hfi1_post_parms[]; ...@@ -65,6 +65,7 @@ extern const struct rvt_operation_params hfi1_post_parms[];
* HFI1_S_WAIT_PIO_DRAIN - qp waiting for PIOs to drain * HFI1_S_WAIT_PIO_DRAIN - qp waiting for PIOs to drain
* HFI1_S_WAIT_TID_SPACE - a QP is waiting for TID resource * HFI1_S_WAIT_TID_SPACE - a QP is waiting for TID resource
* HFI1_S_WAIT_TID_RESP - waiting for a TID RDMA WRITE response * HFI1_S_WAIT_TID_RESP - waiting for a TID RDMA WRITE response
* HFI1_S_WAIT_HALT - halt the first leg send engine
* HFI1_S_MIN_BIT_MASK - the lowest bit that can be used by hfi1 * HFI1_S_MIN_BIT_MASK - the lowest bit that can be used by hfi1
*/ */
#define HFI1_S_AHG_VALID 0x80000000 #define HFI1_S_AHG_VALID 0x80000000
...@@ -72,6 +73,7 @@ extern const struct rvt_operation_params hfi1_post_parms[]; ...@@ -72,6 +73,7 @@ extern const struct rvt_operation_params hfi1_post_parms[];
#define HFI1_S_WAIT_PIO_DRAIN 0x20000000 #define HFI1_S_WAIT_PIO_DRAIN 0x20000000
#define HFI1_S_WAIT_TID_SPACE 0x10000000 #define HFI1_S_WAIT_TID_SPACE 0x10000000
#define HFI1_S_WAIT_TID_RESP 0x08000000 #define HFI1_S_WAIT_TID_RESP 0x08000000
#define HFI1_S_WAIT_HALT 0x04000000
#define HFI1_S_MIN_BIT_MASK 0x01000000 #define HFI1_S_MIN_BIT_MASK 0x01000000
/* /*
......
...@@ -319,6 +319,7 @@ int hfi1_qp_priv_init(struct rvt_dev_info *rdi, struct rvt_qp *qp, ...@@ -319,6 +319,7 @@ int hfi1_qp_priv_init(struct rvt_dev_info *rdi, struct rvt_qp *qp,
qpriv->flow_state.index = RXE_NUM_TID_FLOWS; qpriv->flow_state.index = RXE_NUM_TID_FLOWS;
qpriv->flow_state.last_index = RXE_NUM_TID_FLOWS; qpriv->flow_state.last_index = RXE_NUM_TID_FLOWS;
qpriv->flow_state.generation = KERN_GENERATION_RESERVED; qpriv->flow_state.generation = KERN_GENERATION_RESERVED;
qpriv->s_state = TID_OP(WRITE_RESP);
qpriv->s_tid_cur = HFI1_QP_WQE_INVALID; qpriv->s_tid_cur = HFI1_QP_WQE_INVALID;
qpriv->s_tid_head = HFI1_QP_WQE_INVALID; qpriv->s_tid_head = HFI1_QP_WQE_INVALID;
qpriv->s_tid_tail = HFI1_QP_WQE_INVALID; qpriv->s_tid_tail = HFI1_QP_WQE_INVALID;
...@@ -327,6 +328,7 @@ int hfi1_qp_priv_init(struct rvt_dev_info *rdi, struct rvt_qp *qp, ...@@ -327,6 +328,7 @@ int hfi1_qp_priv_init(struct rvt_dev_info *rdi, struct rvt_qp *qp,
qpriv->r_tid_tail = HFI1_QP_WQE_INVALID; qpriv->r_tid_tail = HFI1_QP_WQE_INVALID;
qpriv->r_tid_ack = HFI1_QP_WQE_INVALID; qpriv->r_tid_ack = HFI1_QP_WQE_INVALID;
qpriv->r_tid_alloc = HFI1_QP_WQE_INVALID; qpriv->r_tid_alloc = HFI1_QP_WQE_INVALID;
atomic_set(&qpriv->n_tid_requests, 0);
timer_setup(&qpriv->s_tid_timer, hfi1_tid_timeout, 0); timer_setup(&qpriv->s_tid_timer, hfi1_tid_timeout, 0);
INIT_LIST_HEAD(&qpriv->tid_wait); INIT_LIST_HEAD(&qpriv->tid_wait);
...@@ -4318,3 +4320,213 @@ u32 hfi1_build_tid_rdma_write_ack(struct rvt_qp *qp, struct rvt_ack_entry *e, ...@@ -4318,3 +4320,213 @@ u32 hfi1_build_tid_rdma_write_ack(struct rvt_qp *qp, struct rvt_ack_entry *e,
return sizeof(ohdr->u.tid_rdma.ack) / sizeof(u32); return sizeof(ohdr->u.tid_rdma.ack) / sizeof(u32);
} }
void hfi1_rc_rcv_tid_rdma_ack(struct hfi1_packet *packet)
{
struct ib_other_headers *ohdr = packet->ohdr;
struct rvt_qp *qp = packet->qp;
struct hfi1_qp_priv *qpriv = qp->priv;
struct rvt_swqe *wqe;
struct tid_rdma_request *req;
struct tid_rdma_flow *flow;
u32 aeth, psn, req_psn, ack_psn, fspsn, resync_psn, ack_kpsn;
bool is_fecn;
unsigned long flags;
u16 fidx;
is_fecn = process_ecn(qp, packet);
psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
aeth = be32_to_cpu(ohdr->u.tid_rdma.ack.aeth);
req_psn = mask_psn(be32_to_cpu(ohdr->u.tid_rdma.ack.verbs_psn));
resync_psn = mask_psn(be32_to_cpu(ohdr->u.tid_rdma.ack.tid_flow_psn));
spin_lock_irqsave(&qp->s_lock, flags);
/* If we are waiting for an ACK to RESYNC, drop any other packets */
if ((qp->s_flags & HFI1_S_WAIT_HALT) &&
cmp_psn(psn, qpriv->s_resync_psn))
goto ack_op_err;
ack_psn = req_psn;
if (hfi1_tid_rdma_is_resync_psn(psn))
ack_kpsn = resync_psn;
else
ack_kpsn = psn;
if (aeth >> 29) {
ack_psn--;
ack_kpsn--;
}
wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
if (wqe->wr.opcode != IB_WR_TID_RDMA_WRITE)
goto ack_op_err;
req = wqe_to_tid_req(wqe);
flow = &req->flows[req->acked_tail];
/* Drop stale ACK/NAK */
if (cmp_psn(psn, full_flow_psn(flow, flow->flow_state.spsn)) < 0)
goto ack_op_err;
while (cmp_psn(ack_kpsn,
full_flow_psn(flow, flow->flow_state.lpsn)) >= 0 &&
req->ack_seg < req->cur_seg) {
req->ack_seg++;
/* advance acked segment pointer */
req->acked_tail = CIRC_NEXT(req->acked_tail, MAX_FLOWS);
req->r_last_acked = flow->flow_state.resp_ib_psn;
if (req->ack_seg == req->total_segs) {
req->state = TID_REQUEST_COMPLETE;
wqe = do_rc_completion(qp, wqe,
to_iport(qp->ibqp.device,
qp->port_num));
atomic_dec(&qpriv->n_tid_requests);
if (qp->s_acked == qp->s_tail)
break;
if (wqe->wr.opcode != IB_WR_TID_RDMA_WRITE)
break;
req = wqe_to_tid_req(wqe);
}
flow = &req->flows[req->acked_tail];
}
switch (aeth >> 29) {
case 0: /* ACK */
if (qpriv->s_flags & RVT_S_WAIT_ACK)
qpriv->s_flags &= ~RVT_S_WAIT_ACK;
if (!hfi1_tid_rdma_is_resync_psn(psn)) {
hfi1_schedule_send(qp);
} else {
u32 spsn, fpsn, last_acked, generation;
struct tid_rdma_request *rptr;
/* Allow new requests (see hfi1_make_tid_rdma_pkt) */
qp->s_flags &= ~HFI1_S_WAIT_HALT;
/*
* Clear RVT_S_SEND_ONE flag in case that the TID RDMA
* ACK is received after the TID retry timer is fired
* again. In this case, do not send any more TID
* RESYNC request or wait for any more TID ACK packet.
*/
qpriv->s_flags &= ~RVT_S_SEND_ONE;
hfi1_schedule_send(qp);
if ((qp->s_acked == qpriv->s_tid_tail &&
req->ack_seg == req->total_segs) ||
qp->s_acked == qp->s_tail) {
qpriv->s_state = TID_OP(WRITE_DATA_LAST);
goto done;
}
if (req->ack_seg == req->comp_seg) {
qpriv->s_state = TID_OP(WRITE_DATA);
goto done;
}
/*
* The PSN to start with is the next PSN after the
* RESYNC PSN.
*/
psn = mask_psn(psn + 1);
generation = psn >> HFI1_KDETH_BTH_SEQ_SHIFT;
spsn = 0;
/*
* Update to the correct WQE when we get an ACK(RESYNC)
* in the middle of a request.
*/
if (delta_psn(ack_psn, wqe->lpsn))
wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
req = wqe_to_tid_req(wqe);
flow = &req->flows[req->acked_tail];
/*
* RESYNC re-numbers the PSN ranges of all remaining
* segments. Also, PSN's start from 0 in the middle of a
* segment and the first segment size is less than the
* default number of packets. flow->resync_npkts is used
* to track the number of packets from the start of the
* real segment to the point of 0 PSN after the RESYNC
* in order to later correctly rewind the SGE.
*/
fpsn = full_flow_psn(flow, flow->flow_state.spsn);
req->r_ack_psn = psn;
flow->resync_npkts +=
delta_psn(mask_psn(resync_psn + 1), fpsn);
/*
* Renumber all packet sequence number ranges
* based on the new generation.
*/
last_acked = qp->s_acked;
rptr = req;
while (1) {
/* start from last acked segment */
for (fidx = rptr->acked_tail;
CIRC_CNT(rptr->setup_head, fidx,
MAX_FLOWS);
fidx = CIRC_NEXT(fidx, MAX_FLOWS)) {
u32 lpsn;
u32 gen;
flow = &rptr->flows[fidx];
gen = flow->flow_state.generation;
if (WARN_ON(gen == generation &&
flow->flow_state.spsn !=
spsn))
continue;
lpsn = flow->flow_state.lpsn;
lpsn = full_flow_psn(flow, lpsn);
flow->npkts =
delta_psn(lpsn,
mask_psn(resync_psn)
);
flow->flow_state.generation =
generation;
flow->flow_state.spsn = spsn;
flow->flow_state.lpsn =
flow->flow_state.spsn +
flow->npkts - 1;
flow->pkt = 0;
spsn += flow->npkts;
resync_psn += flow->npkts;
}
if (++last_acked == qpriv->s_tid_cur + 1)
break;
if (last_acked == qp->s_size)
last_acked = 0;
wqe = rvt_get_swqe_ptr(qp, last_acked);
rptr = wqe_to_tid_req(wqe);
}
req->cur_seg = req->ack_seg;
qpriv->s_tid_tail = qp->s_acked;
qpriv->s_state = TID_OP(WRITE_REQ);
}
done:
qpriv->s_retry = qp->s_retry_cnt;
break;
case 3: /* NAK */
switch ((aeth >> IB_AETH_CREDIT_SHIFT) &
IB_AETH_CREDIT_MASK) {
case 0: /* PSN sequence error */
flow = &req->flows[req->acked_tail];
fspsn = full_flow_psn(flow, flow->flow_state.spsn);
req->r_ack_psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
req->cur_seg = req->ack_seg;
qpriv->s_tid_tail = qp->s_acked;
qpriv->s_state = TID_OP(WRITE_REQ);
qpriv->s_retry = qp->s_retry_cnt;
break;
default:
break;
}
break;
default:
break;
}
ack_op_err:
spin_unlock_irqrestore(&qp->s_lock, flags);
}
...@@ -101,6 +101,7 @@ struct tid_rdma_request { ...@@ -101,6 +101,7 @@ struct tid_rdma_request {
u32 seg_len; u32 seg_len;
u32 total_len; u32 total_len;
u32 r_ack_psn; /* next expected ack PSN */
u32 r_flow_psn; /* IB PSN of next segment start */ u32 r_flow_psn; /* IB PSN of next segment start */
u32 r_last_acked; /* IB PSN of last ACK'ed packet */ u32 r_last_acked; /* IB PSN of last ACK'ed packet */
u32 s_next_psn; /* IB PSN of next segment start for read */ u32 s_next_psn; /* IB PSN of next segment start for read */
...@@ -285,4 +286,6 @@ u32 hfi1_build_tid_rdma_write_ack(struct rvt_qp *qp, struct rvt_ack_entry *e, ...@@ -285,4 +286,6 @@ u32 hfi1_build_tid_rdma_write_ack(struct rvt_qp *qp, struct rvt_ack_entry *e,
struct ib_other_headers *ohdr, u16 iflow, struct ib_other_headers *ohdr, u16 iflow,
u32 *bth1, u32 *bth2); u32 *bth1, u32 *bth2);
void hfi1_rc_rcv_tid_rdma_ack(struct hfi1_packet *packet);
#endif /* HFI1_TID_RDMA_H */ #endif /* HFI1_TID_RDMA_H */
...@@ -52,7 +52,7 @@ u16 hfi1_trace_get_tid_idx(u32 ent); ...@@ -52,7 +52,7 @@ u16 hfi1_trace_get_tid_idx(u32 ent);
#define TID_READ_SENDER_PRN "[%s] qpn 0x%x newreq %u tid_r_reqs %u " \ #define TID_READ_SENDER_PRN "[%s] qpn 0x%x newreq %u tid_r_reqs %u " \
"tid_r_comp %u pending_tid_r_segs %u " \ "tid_r_comp %u pending_tid_r_segs %u " \
"s_flags 0x%x ps_flags 0x%x iow_flags 0x%lx " \ "s_flags 0x%x ps_flags 0x%x iow_flags 0x%lx " \
"hw_flow_index %u generation 0x%x " \ "s_state 0x%x hw_flow_index %u generation 0x%x " \
"fpsn 0x%x flow_flags 0x%x" "fpsn 0x%x flow_flags 0x%x"
#define TID_REQ_PRN "[%s] qpn 0x%x newreq %u opcode 0x%x psn 0x%x lpsn 0x%x " \ #define TID_REQ_PRN "[%s] qpn 0x%x newreq %u opcode 0x%x psn 0x%x lpsn 0x%x " \
...@@ -844,6 +844,7 @@ DECLARE_EVENT_CLASS(/* tid_read_sender */ ...@@ -844,6 +844,7 @@ DECLARE_EVENT_CLASS(/* tid_read_sender */
__field(u32, s_flags) __field(u32, s_flags)
__field(u32, ps_flags) __field(u32, ps_flags)
__field(unsigned long, iow_flags) __field(unsigned long, iow_flags)
__field(u8, s_state)
__field(u32, hw_flow_index) __field(u32, hw_flow_index)
__field(u32, generation) __field(u32, generation)
__field(u32, fpsn) __field(u32, fpsn)
...@@ -861,6 +862,7 @@ DECLARE_EVENT_CLASS(/* tid_read_sender */ ...@@ -861,6 +862,7 @@ DECLARE_EVENT_CLASS(/* tid_read_sender */
__entry->s_flags = qp->s_flags; __entry->s_flags = qp->s_flags;
__entry->ps_flags = priv->s_flags; __entry->ps_flags = priv->s_flags;
__entry->iow_flags = priv->s_iowait.flags; __entry->iow_flags = priv->s_iowait.flags;
__entry->s_state = priv->s_state;
__entry->hw_flow_index = priv->flow_state.index; __entry->hw_flow_index = priv->flow_state.index;
__entry->generation = priv->flow_state.generation; __entry->generation = priv->flow_state.generation;
__entry->fpsn = priv->flow_state.psn; __entry->fpsn = priv->flow_state.psn;
...@@ -877,6 +879,7 @@ DECLARE_EVENT_CLASS(/* tid_read_sender */ ...@@ -877,6 +879,7 @@ DECLARE_EVENT_CLASS(/* tid_read_sender */
__entry->s_flags, __entry->s_flags,
__entry->ps_flags, __entry->ps_flags,
__entry->iow_flags, __entry->iow_flags,
__entry->s_state,
__entry->hw_flow_index, __entry->hw_flow_index,
__entry->generation, __entry->generation,
__entry->fpsn, __entry->fpsn,
......
...@@ -170,9 +170,12 @@ struct hfi1_qp_priv { ...@@ -170,9 +170,12 @@ struct hfi1_qp_priv {
struct tid_rdma_qp_params tid_rdma; struct tid_rdma_qp_params tid_rdma;
struct rvt_qp *owner; struct rvt_qp *owner;
u8 hdr_type; /* 9B or 16B */ u8 hdr_type; /* 9B or 16B */
atomic_t n_tid_requests; /* # of sent TID RDMA requests */
unsigned long tid_timer_timeout_jiffies; unsigned long tid_timer_timeout_jiffies;
/* variables for the TID RDMA SE state machine */ /* variables for the TID RDMA SE state machine */
u8 s_state;
u8 s_retry;
u8 rnr_nak_state; /* RNR NAK state */ u8 rnr_nak_state; /* RNR NAK state */
u8 s_nak_state; u8 s_nak_state;
u32 s_nak_psn; u32 s_nak_psn;
...@@ -197,6 +200,7 @@ struct hfi1_qp_priv { ...@@ -197,6 +200,7 @@ struct hfi1_qp_priv {
u32 r_next_psn_kdeth; u32 r_next_psn_kdeth;
u32 r_next_psn_kdeth_save; u32 r_next_psn_kdeth_save;
u32 s_resync_psn;
u8 sync_pt; /* Set when QP reaches sync point */ u8 sync_pt; /* Set when QP reaches sync point */
u8 resync; u8 resync;
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment