Commit 7b0b01aa authored by Dean Luick's avatar Dean Luick Committed by Doug Ledford

staging/rdma/hfi1: Split last 8 bytes of copy to user buffer

Copy the last 8 bytes of user mode RC WRITE_ONLY and WRITE_LAST
opcodes separately from the rest of the data.

It is a de-facto standard for some MPI implementations to use a
poll on the last few bytes of a verbs message to indicate that
the message has been received rather than follow the required
function method.  The driver uses the kernel memcpy routine, which
becomes "rep movsb" on modern machines.  This copy, while very
fast, does not guarantee in-order copy completion and the result
is an occasional perceived corrupted packet.  Avoid the issue by
splitting the last 8 bytes to copy from the verbs opcodes where it
matters and performing an in-order byte copy.
Reviewed-by: default avatarMike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: default avatarDean Luick <dean.luick@intel.com>
Signed-off-by: default avatarJubin John <jubin.john@intel.com>
Signed-off-by: default avatarDoug Ledford <dledford@redhat.com>
parent 53f449e4
...@@ -1539,7 +1539,7 @@ static void rc_rcv_resp(struct hfi1_ibport *ibp, ...@@ -1539,7 +1539,7 @@ static void rc_rcv_resp(struct hfi1_ibport *ibp,
qp->s_rdma_read_len -= pmtu; qp->s_rdma_read_len -= pmtu;
update_last_psn(qp, psn); update_last_psn(qp, psn);
spin_unlock_irqrestore(&qp->s_lock, flags); spin_unlock_irqrestore(&qp->s_lock, flags);
hfi1_copy_sge(&qp->s_rdma_read_sge, data, pmtu, 0); hfi1_copy_sge(&qp->s_rdma_read_sge, data, pmtu, 0, 0);
goto bail; goto bail;
case OP(RDMA_READ_RESPONSE_ONLY): case OP(RDMA_READ_RESPONSE_ONLY):
...@@ -1583,7 +1583,7 @@ static void rc_rcv_resp(struct hfi1_ibport *ibp, ...@@ -1583,7 +1583,7 @@ static void rc_rcv_resp(struct hfi1_ibport *ibp,
if (unlikely(tlen != qp->s_rdma_read_len)) if (unlikely(tlen != qp->s_rdma_read_len))
goto ack_len_err; goto ack_len_err;
aeth = be32_to_cpu(ohdr->u.aeth); aeth = be32_to_cpu(ohdr->u.aeth);
hfi1_copy_sge(&qp->s_rdma_read_sge, data, tlen, 0); hfi1_copy_sge(&qp->s_rdma_read_sge, data, tlen, 0, 0);
WARN_ON(qp->s_rdma_read_sge.num_sge); WARN_ON(qp->s_rdma_read_sge.num_sge);
(void) do_rc_ack(qp, aeth, psn, (void) do_rc_ack(qp, aeth, psn,
OP(RDMA_READ_RESPONSE_LAST), 0, rcd); OP(RDMA_READ_RESPONSE_LAST), 0, rcd);
...@@ -1977,6 +1977,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) ...@@ -1977,6 +1977,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
unsigned long flags; unsigned long flags;
u32 bth1; u32 bth1;
int ret, is_fecn = 0; int ret, is_fecn = 0;
int copy_last = 0;
bth0 = be32_to_cpu(ohdr->bth[0]); bth0 = be32_to_cpu(ohdr->bth[0]);
if (hfi1_ruc_check_hdr(ibp, hdr, rcv_flags & HFI1_HAS_GRH, qp, bth0)) if (hfi1_ruc_check_hdr(ibp, hdr, rcv_flags & HFI1_HAS_GRH, qp, bth0))
...@@ -2081,7 +2082,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) ...@@ -2081,7 +2082,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
qp->r_rcv_len += pmtu; qp->r_rcv_len += pmtu;
if (unlikely(qp->r_rcv_len > qp->r_len)) if (unlikely(qp->r_rcv_len > qp->r_len))
goto nack_inv; goto nack_inv;
hfi1_copy_sge(&qp->r_sge, data, pmtu, 1); hfi1_copy_sge(&qp->r_sge, data, pmtu, 1, 0);
break; break;
case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE): case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
...@@ -2109,8 +2110,10 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) ...@@ -2109,8 +2110,10 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
wc.ex.imm_data = ohdr->u.imm_data; wc.ex.imm_data = ohdr->u.imm_data;
wc.wc_flags = IB_WC_WITH_IMM; wc.wc_flags = IB_WC_WITH_IMM;
goto send_last; goto send_last;
case OP(SEND_LAST):
case OP(RDMA_WRITE_LAST): case OP(RDMA_WRITE_LAST):
copy_last = ibpd_to_rvtpd(qp->ibqp.pd)->user;
/* fall through */
case OP(SEND_LAST):
no_immediate_data: no_immediate_data:
wc.wc_flags = 0; wc.wc_flags = 0;
wc.ex.imm_data = 0; wc.ex.imm_data = 0;
...@@ -2126,7 +2129,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) ...@@ -2126,7 +2129,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
wc.byte_len = tlen + qp->r_rcv_len; wc.byte_len = tlen + qp->r_rcv_len;
if (unlikely(wc.byte_len > qp->r_len)) if (unlikely(wc.byte_len > qp->r_len))
goto nack_inv; goto nack_inv;
hfi1_copy_sge(&qp->r_sge, data, tlen, 1); hfi1_copy_sge(&qp->r_sge, data, tlen, 1, copy_last);
rvt_put_ss(&qp->r_sge); rvt_put_ss(&qp->r_sge);
qp->r_msn++; qp->r_msn++;
if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
...@@ -2163,8 +2166,10 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) ...@@ -2163,8 +2166,10 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
(bth0 & IB_BTH_SOLICITED) != 0); (bth0 & IB_BTH_SOLICITED) != 0);
break; break;
case OP(RDMA_WRITE_FIRST):
case OP(RDMA_WRITE_ONLY): case OP(RDMA_WRITE_ONLY):
copy_last = 1;
/* fall through */
case OP(RDMA_WRITE_FIRST):
case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE))) if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
goto nack_inv; goto nack_inv;
......
...@@ -370,6 +370,7 @@ static void ruc_loopback(struct rvt_qp *sqp) ...@@ -370,6 +370,7 @@ static void ruc_loopback(struct rvt_qp *sqp)
enum ib_wc_status send_status; enum ib_wc_status send_status;
int release; int release;
int ret; int ret;
int copy_last = 0;
rcu_read_lock(); rcu_read_lock();
...@@ -459,10 +460,13 @@ static void ruc_loopback(struct rvt_qp *sqp) ...@@ -459,10 +460,13 @@ static void ruc_loopback(struct rvt_qp *sqp)
goto op_err; goto op_err;
if (!ret) if (!ret)
goto rnr_nak; goto rnr_nak;
/* FALLTHROUGH */ /* skip copy_last set and qp_access_flags recheck */
goto do_write;
case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE:
copy_last = ibpd_to_rvtpd(qp->ibqp.pd)->user;
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE))) if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
goto inv_err; goto inv_err;
do_write:
if (wqe->length == 0) if (wqe->length == 0)
if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, wqe->length, if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, wqe->length,
wqe->rdma_wr.remote_addr, wqe->rdma_wr.remote_addr,
...@@ -526,7 +530,7 @@ static void ruc_loopback(struct rvt_qp *sqp) ...@@ -526,7 +530,7 @@ static void ruc_loopback(struct rvt_qp *sqp)
if (len > sge->sge_length) if (len > sge->sge_length)
len = sge->sge_length; len = sge->sge_length;
WARN_ON_ONCE(len == 0); WARN_ON_ONCE(len == 0);
hfi1_copy_sge(&qp->r_sge, sge->vaddr, len, release); hfi1_copy_sge(&qp->r_sge, sge->vaddr, len, release, copy_last);
sge->vaddr += len; sge->vaddr += len;
sge->length -= len; sge->length -= len;
sge->sge_length -= len; sge->sge_length -= len;
......
...@@ -418,7 +418,7 @@ void hfi1_uc_rcv(struct hfi1_packet *packet) ...@@ -418,7 +418,7 @@ void hfi1_uc_rcv(struct hfi1_packet *packet)
qp->r_rcv_len += pmtu; qp->r_rcv_len += pmtu;
if (unlikely(qp->r_rcv_len > qp->r_len)) if (unlikely(qp->r_rcv_len > qp->r_len))
goto rewind; goto rewind;
hfi1_copy_sge(&qp->r_sge, data, pmtu, 0); hfi1_copy_sge(&qp->r_sge, data, pmtu, 0, 0);
break; break;
case OP(SEND_LAST_WITH_IMMEDIATE): case OP(SEND_LAST_WITH_IMMEDIATE):
...@@ -443,7 +443,7 @@ void hfi1_uc_rcv(struct hfi1_packet *packet) ...@@ -443,7 +443,7 @@ void hfi1_uc_rcv(struct hfi1_packet *packet)
if (unlikely(wc.byte_len > qp->r_len)) if (unlikely(wc.byte_len > qp->r_len))
goto rewind; goto rewind;
wc.opcode = IB_WC_RECV; wc.opcode = IB_WC_RECV;
hfi1_copy_sge(&qp->r_sge, data, tlen, 0); hfi1_copy_sge(&qp->r_sge, data, tlen, 0, 0);
rvt_put_ss(&qp->s_rdma_read_sge); rvt_put_ss(&qp->s_rdma_read_sge);
last_imm: last_imm:
wc.wr_id = qp->r_wr_id; wc.wr_id = qp->r_wr_id;
...@@ -518,7 +518,7 @@ void hfi1_uc_rcv(struct hfi1_packet *packet) ...@@ -518,7 +518,7 @@ void hfi1_uc_rcv(struct hfi1_packet *packet)
qp->r_rcv_len += pmtu; qp->r_rcv_len += pmtu;
if (unlikely(qp->r_rcv_len > qp->r_len)) if (unlikely(qp->r_rcv_len > qp->r_len))
goto drop; goto drop;
hfi1_copy_sge(&qp->r_sge, data, pmtu, 1); hfi1_copy_sge(&qp->r_sge, data, pmtu, 1, 0);
break; break;
case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE): case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
...@@ -547,7 +547,7 @@ void hfi1_uc_rcv(struct hfi1_packet *packet) ...@@ -547,7 +547,7 @@ void hfi1_uc_rcv(struct hfi1_packet *packet)
} }
wc.byte_len = qp->r_len; wc.byte_len = qp->r_len;
wc.opcode = IB_WC_RECV_RDMA_WITH_IMM; wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
hfi1_copy_sge(&qp->r_sge, data, tlen, 1); hfi1_copy_sge(&qp->r_sge, data, tlen, 1, 0);
rvt_put_ss(&qp->r_sge); rvt_put_ss(&qp->r_sge);
goto last_imm; goto last_imm;
...@@ -563,7 +563,7 @@ void hfi1_uc_rcv(struct hfi1_packet *packet) ...@@ -563,7 +563,7 @@ void hfi1_uc_rcv(struct hfi1_packet *packet)
tlen -= (hdrsize + pad + 4); tlen -= (hdrsize + pad + 4);
if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) if (unlikely(tlen + qp->r_rcv_len != qp->r_len))
goto drop; goto drop;
hfi1_copy_sge(&qp->r_sge, data, tlen, 1); hfi1_copy_sge(&qp->r_sge, data, tlen, 1, 0);
rvt_put_ss(&qp->r_sge); rvt_put_ss(&qp->r_sge);
break; break;
......
...@@ -187,7 +187,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) ...@@ -187,7 +187,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
if (ah_attr->ah_flags & IB_AH_GRH) { if (ah_attr->ah_flags & IB_AH_GRH) {
hfi1_copy_sge(&qp->r_sge, &ah_attr->grh, hfi1_copy_sge(&qp->r_sge, &ah_attr->grh,
sizeof(struct ib_grh), 1); sizeof(struct ib_grh), 1, 0);
wc.wc_flags |= IB_WC_GRH; wc.wc_flags |= IB_WC_GRH;
} else } else
hfi1_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1); hfi1_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1);
...@@ -203,7 +203,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) ...@@ -203,7 +203,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
if (len > sge->sge_length) if (len > sge->sge_length)
len = sge->sge_length; len = sge->sge_length;
WARN_ON_ONCE(len == 0); WARN_ON_ONCE(len == 0);
hfi1_copy_sge(&qp->r_sge, sge->vaddr, len, 1); hfi1_copy_sge(&qp->r_sge, sge->vaddr, len, 1, 0);
sge->vaddr += len; sge->vaddr += len;
sge->length -= len; sge->length -= len;
sge->sge_length -= len; sge->sge_length -= len;
...@@ -836,11 +836,12 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) ...@@ -836,11 +836,12 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
} }
if (has_grh) { if (has_grh) {
hfi1_copy_sge(&qp->r_sge, &hdr->u.l.grh, hfi1_copy_sge(&qp->r_sge, &hdr->u.l.grh,
sizeof(struct ib_grh), 1); sizeof(struct ib_grh), 1, 0);
wc.wc_flags |= IB_WC_GRH; wc.wc_flags |= IB_WC_GRH;
} else } else
hfi1_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1); hfi1_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1);
hfi1_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh), 1); hfi1_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh),
1, 0);
rvt_put_ss(&qp->r_sge); rvt_put_ss(&qp->r_sge);
if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
return; return;
......
...@@ -242,14 +242,28 @@ __be64 ib_hfi1_sys_image_guid; ...@@ -242,14 +242,28 @@ __be64 ib_hfi1_sys_image_guid;
* @ss: the SGE state * @ss: the SGE state
* @data: the data to copy * @data: the data to copy
* @length: the length of the data * @length: the length of the data
* @copy_last: do a separate copy of the last 8 bytes
*/ */
void hfi1_copy_sge( void hfi1_copy_sge(
struct rvt_sge_state *ss, struct rvt_sge_state *ss,
void *data, u32 length, void *data, u32 length,
int release) int release,
int copy_last)
{ {
struct rvt_sge *sge = &ss->sge; struct rvt_sge *sge = &ss->sge;
int in_last = 0;
int i;
if (copy_last) {
if (length > 8) {
length -= 8;
} else {
copy_last = 0;
in_last = 1;
}
}
again:
while (length) { while (length) {
u32 len = sge->length; u32 len = sge->length;
...@@ -258,7 +272,13 @@ void hfi1_copy_sge( ...@@ -258,7 +272,13 @@ void hfi1_copy_sge(
if (len > sge->sge_length) if (len > sge->sge_length)
len = sge->sge_length; len = sge->sge_length;
WARN_ON_ONCE(len == 0); WARN_ON_ONCE(len == 0);
if (in_last) {
/* enforce byte transer ordering */
for (i = 0; i < len; i++)
((u8 *)sge->vaddr)[i] = ((u8 *)data)[i];
} else {
memcpy(sge->vaddr, data, len); memcpy(sge->vaddr, data, len);
}
sge->vaddr += len; sge->vaddr += len;
sge->length -= len; sge->length -= len;
sge->sge_length -= len; sge->sge_length -= len;
...@@ -281,6 +301,13 @@ void hfi1_copy_sge( ...@@ -281,6 +301,13 @@ void hfi1_copy_sge(
data += len; data += len;
length -= len; length -= len;
} }
if (copy_last) {
copy_last = 0;
in_last = 1;
length = 8;
goto again;
}
} }
/** /**
......
...@@ -398,7 +398,7 @@ void hfi1_put_txreq(struct verbs_txreq *tx); ...@@ -398,7 +398,7 @@ void hfi1_put_txreq(struct verbs_txreq *tx);
int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps); int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps);
void hfi1_copy_sge(struct rvt_sge_state *ss, void *data, u32 length, void hfi1_copy_sge(struct rvt_sge_state *ss, void *data, u32 length,
int release); int release, int copy_last);
void hfi1_skip_sge(struct rvt_sge_state *ss, u32 length, int release); void hfi1_skip_sge(struct rvt_sge_state *ss, u32 length, int release);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment