Commit 46a80d62 authored by Mike Marciniszyn's avatar Mike Marciniszyn Committed by Doug Ledford

IB/qib, staging/rdma/hfi1: add s_hlock for use in post send

This patch adds an additional lock to reduce contention on the s_lock.

This lock is used in post_send() so that the post_send is not
serialized with the send engine and other send related processing.

To do this the s_next_psn is now maintained on post_send() while
post_send() related fields are moved to a new cache line.  There is
an s_avail maintained for the post_send() to mitigate trading cache
lines with the send engine.  The lock is released/acquired around
releasing the just built packet to the egress mechanism.
Reviewed-by: default avatarJubin John <jubin.john@intel.com>
Reviewed-by: default avatarDennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: default avatarDean Luick <dean.luick@intel.com>
Signed-off-by: default avatarHarish Chegondi <harish.chegondi@intel.com>
Signed-off-by: default avatarMike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: default avatarIra Weiny <ira.weiny@intel.com>
Signed-off-by: default avatarDoug Ledford <dledford@redhat.com>
parent 20f333b6
...@@ -474,6 +474,42 @@ void qib_get_credit(struct rvt_qp *qp, u32 aeth) ...@@ -474,6 +474,42 @@ void qib_get_credit(struct rvt_qp *qp, u32 aeth)
} }
} }
/**
* qib_check_send_wqe - validate wr/wqe
* @qp - The qp
* @wqe - The built wqe
*
* validate wr/wqe. This is called
* prior to inserting the wqe into
* the ring but after the wqe has been
* setup.
*
* Returns 0 on success, -EINVAL on failure
*/
int qib_check_send_wqe(struct rvt_qp *qp,
struct rvt_swqe *wqe)
{
struct rvt_ah *ah;
switch (qp->ibqp.qp_type) {
case IB_QPT_RC:
case IB_QPT_UC:
if (wqe->length > 0x80000000U)
return -EINVAL;
break;
case IB_QPT_SMI:
case IB_QPT_GSI:
case IB_QPT_UD:
ah = ibah_to_rvtah(wqe->ud_wr.ah);
if (wqe->length > (1 << ah->log_pmtu))
return -EINVAL;
break;
default:
break;
}
return 0;
}
#ifdef CONFIG_DEBUG_FS #ifdef CONFIG_DEBUG_FS
struct qib_qp_iter { struct qib_qp_iter {
......
...@@ -226,6 +226,8 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct rvt_qp *qp, ...@@ -226,6 +226,8 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct rvt_qp *qp,
* qib_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC) * qib_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC)
* @qp: a pointer to the QP * @qp: a pointer to the QP
* *
* Assumes the s_lock is held.
*
* Return 1 if constructed; otherwise, return 0. * Return 1 if constructed; otherwise, return 0.
*/ */
int qib_make_rc_req(struct rvt_qp *qp) int qib_make_rc_req(struct rvt_qp *qp)
...@@ -241,7 +243,6 @@ int qib_make_rc_req(struct rvt_qp *qp) ...@@ -241,7 +243,6 @@ int qib_make_rc_req(struct rvt_qp *qp)
u32 bth2; u32 bth2;
u32 pmtu = qp->pmtu; u32 pmtu = qp->pmtu;
char newreq; char newreq;
unsigned long flags;
int ret = 0; int ret = 0;
int delta; int delta;
...@@ -249,12 +250,6 @@ int qib_make_rc_req(struct rvt_qp *qp) ...@@ -249,12 +250,6 @@ int qib_make_rc_req(struct rvt_qp *qp)
if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
ohdr = &priv->s_hdr->u.l.oth; ohdr = &priv->s_hdr->u.l.oth;
/*
* The lock is needed to synchronize between the sending tasklet,
* the receive interrupt handler, and timeout resends.
*/
spin_lock_irqsave(&qp->s_lock, flags);
/* Sending responses has higher priority over sending requests. */ /* Sending responses has higher priority over sending requests. */
if ((qp->s_flags & RVT_S_RESP_PENDING) && if ((qp->s_flags & RVT_S_RESP_PENDING) &&
qib_make_rc_ack(dev, qp, ohdr, pmtu)) qib_make_rc_ack(dev, qp, ohdr, pmtu))
...@@ -264,7 +259,8 @@ int qib_make_rc_req(struct rvt_qp *qp) ...@@ -264,7 +259,8 @@ int qib_make_rc_req(struct rvt_qp *qp)
if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND)) if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND))
goto bail; goto bail;
/* We are in the error state, flush the work request. */ /* We are in the error state, flush the work request. */
if (qp->s_last == qp->s_head) smp_read_barrier_depends(); /* see post_one_send() */
if (qp->s_last == ACCESS_ONCE(qp->s_head))
goto bail; goto bail;
/* If DMAs are in progress, we can't flush immediately. */ /* If DMAs are in progress, we can't flush immediately. */
if (atomic_read(&priv->s_dma_busy)) { if (atomic_read(&priv->s_dma_busy)) {
...@@ -321,8 +317,8 @@ int qib_make_rc_req(struct rvt_qp *qp) ...@@ -321,8 +317,8 @@ int qib_make_rc_req(struct rvt_qp *qp)
qp->s_flags |= RVT_S_WAIT_FENCE; qp->s_flags |= RVT_S_WAIT_FENCE;
goto bail; goto bail;
} }
wqe->psn = qp->s_next_psn;
newreq = 1; newreq = 1;
qp->s_psn = wqe->psn;
} }
/* /*
* Note that we have to be careful not to modify the * Note that we have to be careful not to modify the
...@@ -341,9 +337,7 @@ int qib_make_rc_req(struct rvt_qp *qp) ...@@ -341,9 +337,7 @@ int qib_make_rc_req(struct rvt_qp *qp)
qp->s_flags |= RVT_S_WAIT_SSN_CREDIT; qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
goto bail; goto bail;
} }
wqe->lpsn = wqe->psn;
if (len > pmtu) { if (len > pmtu) {
wqe->lpsn += (len - 1) / pmtu;
qp->s_state = OP(SEND_FIRST); qp->s_state = OP(SEND_FIRST);
len = pmtu; len = pmtu;
break; break;
...@@ -381,9 +375,7 @@ int qib_make_rc_req(struct rvt_qp *qp) ...@@ -381,9 +375,7 @@ int qib_make_rc_req(struct rvt_qp *qp)
cpu_to_be32(wqe->rdma_wr.rkey); cpu_to_be32(wqe->rdma_wr.rkey);
ohdr->u.rc.reth.length = cpu_to_be32(len); ohdr->u.rc.reth.length = cpu_to_be32(len);
hwords += sizeof(struct ib_reth) / sizeof(u32); hwords += sizeof(struct ib_reth) / sizeof(u32);
wqe->lpsn = wqe->psn;
if (len > pmtu) { if (len > pmtu) {
wqe->lpsn += (len - 1) / pmtu;
qp->s_state = OP(RDMA_WRITE_FIRST); qp->s_state = OP(RDMA_WRITE_FIRST);
len = pmtu; len = pmtu;
break; break;
...@@ -418,13 +410,6 @@ int qib_make_rc_req(struct rvt_qp *qp) ...@@ -418,13 +410,6 @@ int qib_make_rc_req(struct rvt_qp *qp)
qp->s_num_rd_atomic++; qp->s_num_rd_atomic++;
if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
qp->s_lsn++; qp->s_lsn++;
/*
* Adjust s_next_psn to count the
* expected number of responses.
*/
if (len > pmtu)
qp->s_next_psn += (len - 1) / pmtu;
wqe->lpsn = qp->s_next_psn++;
} }
ohdr->u.rc.reth.vaddr = ohdr->u.rc.reth.vaddr =
...@@ -456,7 +441,6 @@ int qib_make_rc_req(struct rvt_qp *qp) ...@@ -456,7 +441,6 @@ int qib_make_rc_req(struct rvt_qp *qp)
qp->s_num_rd_atomic++; qp->s_num_rd_atomic++;
if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
qp->s_lsn++; qp->s_lsn++;
wqe->lpsn = wqe->psn;
} }
if (wqe->atomic_wr.wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { if (wqe->atomic_wr.wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
qp->s_state = OP(COMPARE_SWAP); qp->s_state = OP(COMPARE_SWAP);
...@@ -499,11 +483,8 @@ int qib_make_rc_req(struct rvt_qp *qp) ...@@ -499,11 +483,8 @@ int qib_make_rc_req(struct rvt_qp *qp)
} }
if (wqe->wr.opcode == IB_WR_RDMA_READ) if (wqe->wr.opcode == IB_WR_RDMA_READ)
qp->s_psn = wqe->lpsn + 1; qp->s_psn = wqe->lpsn + 1;
else { else
qp->s_psn++; qp->s_psn++;
if (qib_cmp24(qp->s_psn, qp->s_next_psn) > 0)
qp->s_next_psn = qp->s_psn;
}
break; break;
case OP(RDMA_READ_RESPONSE_FIRST): case OP(RDMA_READ_RESPONSE_FIRST):
...@@ -523,8 +504,6 @@ int qib_make_rc_req(struct rvt_qp *qp) ...@@ -523,8 +504,6 @@ int qib_make_rc_req(struct rvt_qp *qp)
/* FALLTHROUGH */ /* FALLTHROUGH */
case OP(SEND_MIDDLE): case OP(SEND_MIDDLE):
bth2 = qp->s_psn++ & QIB_PSN_MASK; bth2 = qp->s_psn++ & QIB_PSN_MASK;
if (qib_cmp24(qp->s_psn, qp->s_next_psn) > 0)
qp->s_next_psn = qp->s_psn;
ss = &qp->s_sge; ss = &qp->s_sge;
len = qp->s_len; len = qp->s_len;
if (len > pmtu) { if (len > pmtu) {
...@@ -564,8 +543,6 @@ int qib_make_rc_req(struct rvt_qp *qp) ...@@ -564,8 +543,6 @@ int qib_make_rc_req(struct rvt_qp *qp)
/* FALLTHROUGH */ /* FALLTHROUGH */
case OP(RDMA_WRITE_MIDDLE): case OP(RDMA_WRITE_MIDDLE):
bth2 = qp->s_psn++ & QIB_PSN_MASK; bth2 = qp->s_psn++ & QIB_PSN_MASK;
if (qib_cmp24(qp->s_psn, qp->s_next_psn) > 0)
qp->s_next_psn = qp->s_psn;
ss = &qp->s_sge; ss = &qp->s_sge;
len = qp->s_len; len = qp->s_len;
if (len > pmtu) { if (len > pmtu) {
...@@ -630,13 +607,9 @@ int qib_make_rc_req(struct rvt_qp *qp) ...@@ -630,13 +607,9 @@ int qib_make_rc_req(struct rvt_qp *qp)
qp->s_cur_size = len; qp->s_cur_size = len;
qib_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24), bth2); qib_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24), bth2);
done: done:
ret = 1; return 1;
goto unlock;
bail: bail:
qp->s_flags &= ~RVT_S_BUSY; qp->s_flags &= ~RVT_S_BUSY;
unlock:
spin_unlock_irqrestore(&qp->s_lock, flags);
return ret; return ret;
} }
...@@ -1454,7 +1427,8 @@ static void qib_rc_rcv_resp(struct qib_ibport *ibp, ...@@ -1454,7 +1427,8 @@ static void qib_rc_rcv_resp(struct qib_ibport *ibp,
goto ack_done; goto ack_done;
/* Ignore invalid responses. */ /* Ignore invalid responses. */
if (qib_cmp24(psn, qp->s_next_psn) >= 0) smp_read_barrier_depends(); /* see post_one_send */
if (qib_cmp24(psn, ACCESS_ONCE(qp->s_next_psn)) >= 0)
goto ack_done; goto ack_done;
/* Ignore duplicate responses. */ /* Ignore duplicate responses. */
......
...@@ -391,7 +391,8 @@ static void qib_ruc_loopback(struct rvt_qp *sqp) ...@@ -391,7 +391,8 @@ static void qib_ruc_loopback(struct rvt_qp *sqp)
sqp->s_flags |= RVT_S_BUSY; sqp->s_flags |= RVT_S_BUSY;
again: again:
if (sqp->s_last == sqp->s_head) smp_read_barrier_depends(); /* see post_one_send() */
if (sqp->s_last == ACCESS_ONCE(sqp->s_head))
goto clr_busy; goto clr_busy;
wqe = rvt_get_swqe_ptr(sqp, sqp->s_last); wqe = rvt_get_swqe_ptr(sqp, sqp->s_last);
...@@ -765,22 +766,24 @@ void qib_do_send(struct rvt_qp *qp) ...@@ -765,22 +766,24 @@ void qib_do_send(struct rvt_qp *qp)
qp->s_flags |= RVT_S_BUSY; qp->s_flags |= RVT_S_BUSY;
spin_unlock_irqrestore(&qp->s_lock, flags);
do { do {
/* Check for a constructed packet to be sent. */ /* Check for a constructed packet to be sent. */
if (qp->s_hdrwords != 0) { if (qp->s_hdrwords != 0) {
spin_unlock_irqrestore(&qp->s_lock, flags);
/* /*
* If the packet cannot be sent now, return and * If the packet cannot be sent now, return and
* the send tasklet will be woken up later. * the send tasklet will be woken up later.
*/ */
if (qib_verbs_send(qp, priv->s_hdr, qp->s_hdrwords, if (qib_verbs_send(qp, priv->s_hdr, qp->s_hdrwords,
qp->s_cur_sge, qp->s_cur_size)) qp->s_cur_sge, qp->s_cur_size))
break; return;
/* Record that s_hdr is empty. */ /* Record that s_hdr is empty. */
qp->s_hdrwords = 0; qp->s_hdrwords = 0;
spin_lock_irqsave(&qp->s_lock, flags);
} }
} while (make_req(qp)); } while (make_req(qp));
spin_unlock_irqrestore(&qp->s_lock, flags);
} }
/* /*
......
...@@ -41,6 +41,8 @@ ...@@ -41,6 +41,8 @@
* qib_make_uc_req - construct a request packet (SEND, RDMA write) * qib_make_uc_req - construct a request packet (SEND, RDMA write)
* @qp: a pointer to the QP * @qp: a pointer to the QP
* *
* Assumes the s_lock is held.
*
* Return 1 if constructed; otherwise, return 0. * Return 1 if constructed; otherwise, return 0.
*/ */
int qib_make_uc_req(struct rvt_qp *qp) int qib_make_uc_req(struct rvt_qp *qp)
...@@ -48,20 +50,18 @@ int qib_make_uc_req(struct rvt_qp *qp) ...@@ -48,20 +50,18 @@ int qib_make_uc_req(struct rvt_qp *qp)
struct qib_qp_priv *priv = qp->priv; struct qib_qp_priv *priv = qp->priv;
struct qib_other_headers *ohdr; struct qib_other_headers *ohdr;
struct rvt_swqe *wqe; struct rvt_swqe *wqe;
unsigned long flags;
u32 hwords; u32 hwords;
u32 bth0; u32 bth0;
u32 len; u32 len;
u32 pmtu = qp->pmtu; u32 pmtu = qp->pmtu;
int ret = 0; int ret = 0;
spin_lock_irqsave(&qp->s_lock, flags);
if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) { if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) {
if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND)) if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND))
goto bail; goto bail;
/* We are in the error state, flush the work request. */ /* We are in the error state, flush the work request. */
if (qp->s_last == qp->s_head) smp_read_barrier_depends(); /* see post_one_send() */
if (qp->s_last == ACCESS_ONCE(qp->s_head))
goto bail; goto bail;
/* If DMAs are in progress, we can't flush immediately. */ /* If DMAs are in progress, we can't flush immediately. */
if (atomic_read(&priv->s_dma_busy)) { if (atomic_read(&priv->s_dma_busy)) {
...@@ -90,13 +90,13 @@ int qib_make_uc_req(struct rvt_qp *qp) ...@@ -90,13 +90,13 @@ int qib_make_uc_req(struct rvt_qp *qp)
RVT_PROCESS_NEXT_SEND_OK)) RVT_PROCESS_NEXT_SEND_OK))
goto bail; goto bail;
/* Check if send work queue is empty. */ /* Check if send work queue is empty. */
if (qp->s_cur == qp->s_head) smp_read_barrier_depends(); /* see post_one_send() */
if (qp->s_cur == ACCESS_ONCE(qp->s_head))
goto bail; goto bail;
/* /*
* Start a new request. * Start a new request.
*/ */
wqe->psn = qp->s_next_psn; qp->s_psn = wqe->psn;
qp->s_psn = qp->s_next_psn;
qp->s_sge.sge = wqe->sg_list[0]; qp->s_sge.sge = wqe->sg_list[0];
qp->s_sge.sg_list = wqe->sg_list + 1; qp->s_sge.sg_list = wqe->sg_list + 1;
qp->s_sge.num_sge = wqe->wr.num_sge; qp->s_sge.num_sge = wqe->wr.num_sge;
...@@ -215,15 +215,11 @@ int qib_make_uc_req(struct rvt_qp *qp) ...@@ -215,15 +215,11 @@ int qib_make_uc_req(struct rvt_qp *qp)
qp->s_cur_sge = &qp->s_sge; qp->s_cur_sge = &qp->s_sge;
qp->s_cur_size = len; qp->s_cur_size = len;
qib_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24), qib_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24),
qp->s_next_psn++ & QIB_PSN_MASK); qp->s_psn++ & QIB_PSN_MASK);
done: done:
ret = 1; return 1;
goto unlock;
bail: bail:
qp->s_flags &= ~RVT_S_BUSY; qp->s_flags &= ~RVT_S_BUSY;
unlock:
spin_unlock_irqrestore(&qp->s_lock, flags);
return ret; return ret;
} }
......
...@@ -234,6 +234,8 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) ...@@ -234,6 +234,8 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
* qib_make_ud_req - construct a UD request packet * qib_make_ud_req - construct a UD request packet
* @qp: the QP * @qp: the QP
* *
* Assumes the s_lock is held.
*
* Return 1 if constructed; otherwise, return 0. * Return 1 if constructed; otherwise, return 0.
*/ */
int qib_make_ud_req(struct rvt_qp *qp) int qib_make_ud_req(struct rvt_qp *qp)
...@@ -244,7 +246,6 @@ int qib_make_ud_req(struct rvt_qp *qp) ...@@ -244,7 +246,6 @@ int qib_make_ud_req(struct rvt_qp *qp)
struct qib_pportdata *ppd; struct qib_pportdata *ppd;
struct qib_ibport *ibp; struct qib_ibport *ibp;
struct rvt_swqe *wqe; struct rvt_swqe *wqe;
unsigned long flags;
u32 nwords; u32 nwords;
u32 extra_bytes; u32 extra_bytes;
u32 bth0; u32 bth0;
...@@ -253,13 +254,12 @@ int qib_make_ud_req(struct rvt_qp *qp) ...@@ -253,13 +254,12 @@ int qib_make_ud_req(struct rvt_qp *qp)
int ret = 0; int ret = 0;
int next_cur; int next_cur;
spin_lock_irqsave(&qp->s_lock, flags);
if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_NEXT_SEND_OK)) { if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_NEXT_SEND_OK)) {
if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND)) if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND))
goto bail; goto bail;
/* We are in the error state, flush the work request. */ /* We are in the error state, flush the work request. */
if (qp->s_last == qp->s_head) smp_read_barrier_depends(); /* see post_one_send */
if (qp->s_last == ACCESS_ONCE(qp->s_head))
goto bail; goto bail;
/* If DMAs are in progress, we can't flush immediately. */ /* If DMAs are in progress, we can't flush immediately. */
if (atomic_read(&priv->s_dma_busy)) { if (atomic_read(&priv->s_dma_busy)) {
...@@ -271,7 +271,9 @@ int qib_make_ud_req(struct rvt_qp *qp) ...@@ -271,7 +271,9 @@ int qib_make_ud_req(struct rvt_qp *qp)
goto done; goto done;
} }
if (qp->s_cur == qp->s_head) /* see post_one_send() */
smp_read_barrier_depends();
if (qp->s_cur == ACCESS_ONCE(qp->s_head))
goto bail; goto bail;
wqe = rvt_get_swqe_ptr(qp, qp->s_cur); wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
...@@ -292,6 +294,7 @@ int qib_make_ud_req(struct rvt_qp *qp) ...@@ -292,6 +294,7 @@ int qib_make_ud_req(struct rvt_qp *qp)
this_cpu_inc(ibp->pmastats->n_unicast_xmit); this_cpu_inc(ibp->pmastats->n_unicast_xmit);
lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1); lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1);
if (unlikely(lid == ppd->lid)) { if (unlikely(lid == ppd->lid)) {
unsigned long flags;
/* /*
* If DMAs are in progress, we can't generate * If DMAs are in progress, we can't generate
* a completion for the loopback packet since * a completion for the loopback packet since
...@@ -304,6 +307,7 @@ int qib_make_ud_req(struct rvt_qp *qp) ...@@ -304,6 +307,7 @@ int qib_make_ud_req(struct rvt_qp *qp)
goto bail; goto bail;
} }
qp->s_cur = next_cur; qp->s_cur = next_cur;
local_irq_save(flags);
spin_unlock_irqrestore(&qp->s_lock, flags); spin_unlock_irqrestore(&qp->s_lock, flags);
qib_ud_loopback(qp, wqe); qib_ud_loopback(qp, wqe);
spin_lock_irqsave(&qp->s_lock, flags); spin_lock_irqsave(&qp->s_lock, flags);
...@@ -378,7 +382,7 @@ int qib_make_ud_req(struct rvt_qp *qp) ...@@ -378,7 +382,7 @@ int qib_make_ud_req(struct rvt_qp *qp)
ah_attr->dlid != be16_to_cpu(IB_LID_PERMISSIVE) ? ah_attr->dlid != be16_to_cpu(IB_LID_PERMISSIVE) ?
cpu_to_be32(QIB_MULTICAST_QPN) : cpu_to_be32(QIB_MULTICAST_QPN) :
cpu_to_be32(wqe->ud_wr.remote_qpn); cpu_to_be32(wqe->ud_wr.remote_qpn);
ohdr->bth[2] = cpu_to_be32(qp->s_next_psn++ & QIB_PSN_MASK); ohdr->bth[2] = cpu_to_be32(wqe->psn & QIB_PSN_MASK);
/* /*
* Qkeys with the high order bit set mean use the * Qkeys with the high order bit set mean use the
* qkey from the QP context instead of the WR (see 10.2.5). * qkey from the QP context instead of the WR (see 10.2.5).
...@@ -388,13 +392,9 @@ int qib_make_ud_req(struct rvt_qp *qp) ...@@ -388,13 +392,9 @@ int qib_make_ud_req(struct rvt_qp *qp)
ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num); ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num);
done: done:
ret = 1; return 1;
goto unlock;
bail: bail:
qp->s_flags &= ~RVT_S_BUSY; qp->s_flags &= ~RVT_S_BUSY;
unlock:
spin_unlock_irqrestore(&qp->s_lock, flags);
return ret; return ret;
} }
......
...@@ -1662,6 +1662,7 @@ int qib_register_ib_device(struct qib_devdata *dd) ...@@ -1662,6 +1662,7 @@ int qib_register_ib_device(struct qib_devdata *dd)
dd->verbs_dev.rdi.driver_f.get_card_name = qib_get_card_name; dd->verbs_dev.rdi.driver_f.get_card_name = qib_get_card_name;
dd->verbs_dev.rdi.driver_f.get_pci_dev = qib_get_pci_dev; dd->verbs_dev.rdi.driver_f.get_pci_dev = qib_get_pci_dev;
dd->verbs_dev.rdi.driver_f.check_ah = qib_check_ah; dd->verbs_dev.rdi.driver_f.check_ah = qib_check_ah;
dd->verbs_dev.rdi.driver_f.check_send_wqe = qib_check_send_wqe;
dd->verbs_dev.rdi.driver_f.notify_new_ah = qib_notify_new_ah; dd->verbs_dev.rdi.driver_f.notify_new_ah = qib_notify_new_ah;
dd->verbs_dev.rdi.driver_f.alloc_qpn = qib_alloc_qpn; dd->verbs_dev.rdi.driver_f.alloc_qpn = qib_alloc_qpn;
dd->verbs_dev.rdi.driver_f.qp_priv_alloc = qib_qp_priv_alloc; dd->verbs_dev.rdi.driver_f.qp_priv_alloc = qib_qp_priv_alloc;
...@@ -1677,6 +1678,7 @@ int qib_register_ib_device(struct qib_devdata *dd) ...@@ -1677,6 +1678,7 @@ int qib_register_ib_device(struct qib_devdata *dd)
dd->verbs_dev.rdi.driver_f.mtu_to_path_mtu = qib_mtu_to_path_mtu; dd->verbs_dev.rdi.driver_f.mtu_to_path_mtu = qib_mtu_to_path_mtu;
dd->verbs_dev.rdi.driver_f.mtu_from_qp = qib_mtu_from_qp; dd->verbs_dev.rdi.driver_f.mtu_from_qp = qib_mtu_from_qp;
dd->verbs_dev.rdi.driver_f.get_pmtu_from_attr = qib_get_pmtu_from_attr; dd->verbs_dev.rdi.driver_f.get_pmtu_from_attr = qib_get_pmtu_from_attr;
dd->verbs_dev.rdi.driver_f.schedule_send_no_lock = _qib_schedule_send;
dd->verbs_dev.rdi.driver_f.query_port_state = qib_query_port; dd->verbs_dev.rdi.driver_f.query_port_state = qib_query_port;
dd->verbs_dev.rdi.driver_f.shut_down_port = qib_shut_down_port; dd->verbs_dev.rdi.driver_f.shut_down_port = qib_shut_down_port;
dd->verbs_dev.rdi.driver_f.cap_mask_chg = qib_cap_mask_chg; dd->verbs_dev.rdi.driver_f.cap_mask_chg = qib_cap_mask_chg;
...@@ -1778,17 +1780,34 @@ void qib_unregister_ib_device(struct qib_devdata *dd) ...@@ -1778,17 +1780,34 @@ void qib_unregister_ib_device(struct qib_devdata *dd)
dev->pio_hdrs, dev->pio_hdrs_phys); dev->pio_hdrs, dev->pio_hdrs_phys);
} }
/* /**
* This must be called with s_lock held. * _qib_schedule_send - schedule progress
* @qp - the qp
*
* This schedules progress w/o regard to the s_flags.
*
* It is only used in post send, which doesn't hold
* the s_lock.
*/ */
void qib_schedule_send(struct rvt_qp *qp) void _qib_schedule_send(struct rvt_qp *qp)
{ {
struct qib_ibport *ibp =
to_iport(qp->ibqp.device, qp->port_num);
struct qib_pportdata *ppd = ppd_from_ibp(ibp);
struct qib_qp_priv *priv = qp->priv; struct qib_qp_priv *priv = qp->priv;
if (qib_send_ok(qp)) {
struct qib_ibport *ibp =
to_iport(qp->ibqp.device, qp->port_num);
struct qib_pportdata *ppd = ppd_from_ibp(ibp);
queue_work(ppd->qib_wq, &priv->s_work); queue_work(ppd->qib_wq, &priv->s_work);
} }
/**
* qib_schedule_send - schedule progress
* @qp - the qp
*
* This schedules qp progress. The s_lock
* should be held.
*/
void qib_schedule_send(struct rvt_qp *qp)
{
if (qib_send_ok(qp))
_qib_schedule_send(qp);
} }
...@@ -298,9 +298,7 @@ static inline int qib_send_ok(struct rvt_qp *qp) ...@@ -298,9 +298,7 @@ static inline int qib_send_ok(struct rvt_qp *qp)
!(qp->s_flags & RVT_S_ANY_WAIT_SEND)); !(qp->s_flags & RVT_S_ANY_WAIT_SEND));
} }
/* void _qib_schedule_send(struct rvt_qp *qp);
* This must be called with s_lock held.
*/
void qib_schedule_send(struct rvt_qp *qp); void qib_schedule_send(struct rvt_qp *qp);
static inline int qib_pkey_ok(u16 pkey1, u16 pkey2) static inline int qib_pkey_ok(u16 pkey1, u16 pkey2)
...@@ -392,6 +390,8 @@ void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr, ...@@ -392,6 +390,8 @@ void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
int qib_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr); int qib_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr);
int qib_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe);
struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid); struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid);
void qib_rc_rnr_retry(unsigned long arg); void qib_rc_rnr_retry(unsigned long arg);
......
...@@ -401,6 +401,7 @@ void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, ...@@ -401,6 +401,7 @@ void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
rdi->driver_f.flush_qp_waiters(qp); rdi->driver_f.flush_qp_waiters(qp);
qp->s_flags &= ~(RVT_S_TIMER | RVT_S_ANY_WAIT); qp->s_flags &= ~(RVT_S_TIMER | RVT_S_ANY_WAIT);
spin_unlock(&qp->s_lock); spin_unlock(&qp->s_lock);
spin_unlock(&qp->s_hlock);
spin_unlock_irq(&qp->r_lock); spin_unlock_irq(&qp->r_lock);
/* Stop the send queue and the retry timer */ /* Stop the send queue and the retry timer */
...@@ -415,6 +416,7 @@ void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, ...@@ -415,6 +416,7 @@ void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
/* grab the lock b/c it was locked at call time */ /* grab the lock b/c it was locked at call time */
spin_lock_irq(&qp->r_lock); spin_lock_irq(&qp->r_lock);
spin_lock(&qp->s_hlock);
spin_lock(&qp->s_lock); spin_lock(&qp->s_lock);
rvt_clear_mr_refs(qp, 1); rvt_clear_mr_refs(qp, 1);
...@@ -610,6 +612,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, ...@@ -610,6 +612,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
* except for qp->ibqp.qp_num. * except for qp->ibqp.qp_num.
*/ */
spin_lock_init(&qp->r_lock); spin_lock_init(&qp->r_lock);
spin_lock_init(&qp->s_hlock);
spin_lock_init(&qp->s_lock); spin_lock_init(&qp->s_lock);
spin_lock_init(&qp->r_rq.lock); spin_lock_init(&qp->r_rq.lock);
atomic_set(&qp->refcount, 0); atomic_set(&qp->refcount, 0);
...@@ -620,6 +623,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, ...@@ -620,6 +623,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
qp->state = IB_QPS_RESET; qp->state = IB_QPS_RESET;
qp->s_wq = swq; qp->s_wq = swq;
qp->s_size = init_attr->cap.max_send_wr + 1; qp->s_size = init_attr->cap.max_send_wr + 1;
qp->s_avail = init_attr->cap.max_send_wr;
qp->s_max_sge = init_attr->cap.max_send_sge; qp->s_max_sge = init_attr->cap.max_send_sge;
if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR) if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR)
qp->s_flags = RVT_S_SIGNAL_REQ_WR; qp->s_flags = RVT_S_SIGNAL_REQ_WR;
...@@ -779,6 +783,7 @@ void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends) ...@@ -779,6 +783,7 @@ void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends)
wqe->ud_wr.ah)->refcount); wqe->ud_wr.ah)->refcount);
if (++qp->s_last >= qp->s_size) if (++qp->s_last >= qp->s_size)
qp->s_last = 0; qp->s_last = 0;
smp_wmb(); /* see qp_set_savail */
} }
if (qp->s_rdma_mr) { if (qp->s_rdma_mr) {
rvt_put_mr(qp->s_rdma_mr); rvt_put_mr(qp->s_rdma_mr);
...@@ -833,7 +838,7 @@ int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err) ...@@ -833,7 +838,7 @@ int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err)
rdi->driver_f.notify_error_qp(qp); rdi->driver_f.notify_error_qp(qp);
/* Schedule the sending tasklet to drain the send work queue. */ /* Schedule the sending tasklet to drain the send work queue. */
if (qp->s_last != qp->s_head) if (ACCESS_ONCE(qp->s_last) != qp->s_head)
rdi->driver_f.schedule_send(qp); rdi->driver_f.schedule_send(qp);
rvt_clear_mr_refs(qp, 0); rvt_clear_mr_refs(qp, 0);
...@@ -979,6 +984,7 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, ...@@ -979,6 +984,7 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
link = rdma_port_get_link_layer(ibqp->device, qp->port_num); link = rdma_port_get_link_layer(ibqp->device, qp->port_num);
spin_lock_irq(&qp->r_lock); spin_lock_irq(&qp->r_lock);
spin_lock(&qp->s_hlock);
spin_lock(&qp->s_lock); spin_lock(&qp->s_lock);
cur_state = attr_mask & IB_QP_CUR_STATE ? cur_state = attr_mask & IB_QP_CUR_STATE ?
...@@ -1151,6 +1157,7 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, ...@@ -1151,6 +1157,7 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
if (attr_mask & IB_QP_PATH_MTU) { if (attr_mask & IB_QP_PATH_MTU) {
qp->pmtu = rdi->driver_f.mtu_from_qp(rdi, qp, pmtu); qp->pmtu = rdi->driver_f.mtu_from_qp(rdi, qp, pmtu);
qp->path_mtu = rdi->driver_f.mtu_to_path_mtu(qp->pmtu); qp->path_mtu = rdi->driver_f.mtu_to_path_mtu(qp->pmtu);
qp->log_pmtu = ilog2(qp->pmtu);
} }
if (attr_mask & IB_QP_RETRY_CNT) { if (attr_mask & IB_QP_RETRY_CNT) {
...@@ -1186,6 +1193,7 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, ...@@ -1186,6 +1193,7 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
rdi->driver_f.modify_qp(qp, attr, attr_mask, udata); rdi->driver_f.modify_qp(qp, attr, attr_mask, udata);
spin_unlock(&qp->s_lock); spin_unlock(&qp->s_lock);
spin_unlock(&qp->s_hlock);
spin_unlock_irq(&qp->r_lock); spin_unlock_irq(&qp->r_lock);
if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
...@@ -1207,6 +1215,7 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, ...@@ -1207,6 +1215,7 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
inval: inval:
spin_unlock(&qp->s_lock); spin_unlock(&qp->s_lock);
spin_unlock(&qp->s_hlock);
spin_unlock_irq(&qp->r_lock); spin_unlock_irq(&qp->r_lock);
return -EINVAL; return -EINVAL;
} }
...@@ -1226,9 +1235,11 @@ int rvt_destroy_qp(struct ib_qp *ibqp) ...@@ -1226,9 +1235,11 @@ int rvt_destroy_qp(struct ib_qp *ibqp)
struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
spin_lock_irq(&qp->r_lock); spin_lock_irq(&qp->r_lock);
spin_lock(&qp->s_hlock);
spin_lock(&qp->s_lock); spin_lock(&qp->s_lock);
rvt_reset_qp(rdi, qp, ibqp->qp_type); rvt_reset_qp(rdi, qp, ibqp->qp_type);
spin_unlock(&qp->s_lock); spin_unlock(&qp->s_lock);
spin_unlock(&qp->s_hlock);
spin_unlock_irq(&qp->r_lock); spin_unlock_irq(&qp->r_lock);
/* qpn is now available for use again */ /* qpn is now available for use again */
...@@ -1357,6 +1368,28 @@ int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, ...@@ -1357,6 +1368,28 @@ int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
return 0; return 0;
} }
/**
* qp_get_savail - return number of avail send entries
*
* @qp - the qp
*
* This assumes the s_hlock is held but the s_last
* qp variable is uncontrolled.
*/
static inline u32 qp_get_savail(struct rvt_qp *qp)
{
u32 slast;
u32 ret;
smp_read_barrier_depends(); /* see rc.c */
slast = ACCESS_ONCE(qp->s_last);
if (qp->s_head >= slast)
ret = qp->s_size - (qp->s_head - slast);
else
ret = slast - qp->s_head;
return ret - 1;
}
/** /**
* rvt_post_one_wr - post one RC, UC, or UD send work request * rvt_post_one_wr - post one RC, UC, or UD send work request
* @qp: the QP to post on * @qp: the QP to post on
...@@ -1372,6 +1405,8 @@ static int rvt_post_one_wr(struct rvt_qp *qp, struct ib_send_wr *wr) ...@@ -1372,6 +1405,8 @@ static int rvt_post_one_wr(struct rvt_qp *qp, struct ib_send_wr *wr)
struct rvt_lkey_table *rkt; struct rvt_lkey_table *rkt;
struct rvt_pd *pd; struct rvt_pd *pd;
struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device); struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
u8 log_pmtu;
int ret;
/* IB spec says that num_sge == 0 is OK. */ /* IB spec says that num_sge == 0 is OK. */
if (unlikely(wr->num_sge > qp->s_max_sge)) if (unlikely(wr->num_sge > qp->s_max_sge))
...@@ -1403,16 +1438,16 @@ static int rvt_post_one_wr(struct rvt_qp *qp, struct ib_send_wr *wr) ...@@ -1403,16 +1438,16 @@ static int rvt_post_one_wr(struct rvt_qp *qp, struct ib_send_wr *wr)
} else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic) { } else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic) {
return -EINVAL; return -EINVAL;
} }
/* check for avail */
if (unlikely(!qp->s_avail)) {
qp->s_avail = qp_get_savail(qp);
WARN_ON(qp->s_avail > (qp->s_size - 1));
if (!qp->s_avail)
return -ENOMEM;
}
next = qp->s_head + 1; next = qp->s_head + 1;
if (next >= qp->s_size) if (next >= qp->s_size)
next = 0; next = 0;
if (next == qp->s_last)
return -ENOMEM;
if (rdi->driver_f.check_send_wr &&
rdi->driver_f.check_send_wr(qp, wr))
return -EINVAL;
rkt = &rdi->lkey_table; rkt = &rdi->lkey_table;
pd = ibpd_to_rvtpd(qp->ibqp.pd); pd = ibpd_to_rvtpd(qp->ibqp.pd);
...@@ -1444,21 +1479,39 @@ static int rvt_post_one_wr(struct rvt_qp *qp, struct ib_send_wr *wr) ...@@ -1444,21 +1479,39 @@ static int rvt_post_one_wr(struct rvt_qp *qp, struct ib_send_wr *wr)
continue; continue;
ok = rvt_lkey_ok(rkt, pd, &wqe->sg_list[j], ok = rvt_lkey_ok(rkt, pd, &wqe->sg_list[j],
&wr->sg_list[i], acc); &wr->sg_list[i], acc);
if (!ok) if (!ok) {
ret = -EINVAL;
goto bail_inval_free; goto bail_inval_free;
}
wqe->length += length; wqe->length += length;
j++; j++;
} }
wqe->wr.num_sge = j; wqe->wr.num_sge = j;
} }
if (qp->ibqp.qp_type == IB_QPT_UC ||
qp->ibqp.qp_type == IB_QPT_RC) { /* general part of wqe valid - allow for driver checks */
if (wqe->length > 0x80000000U) if (rdi->driver_f.check_send_wqe) {
ret = rdi->driver_f.check_send_wqe(qp, wqe);
if (ret)
goto bail_inval_free; goto bail_inval_free;
} else { }
log_pmtu = qp->log_pmtu;
if (qp->ibqp.qp_type != IB_QPT_UC &&
qp->ibqp.qp_type != IB_QPT_RC) {
struct rvt_ah *ah = ibah_to_rvtah(wqe->ud_wr.ah);
log_pmtu = ah->log_pmtu;
atomic_inc(&ibah_to_rvtah(ud_wr(wr)->ah)->refcount); atomic_inc(&ibah_to_rvtah(ud_wr(wr)->ah)->refcount);
} }
wqe->ssn = qp->s_ssn++; wqe->ssn = qp->s_ssn++;
wqe->psn = qp->s_next_psn;
wqe->lpsn = wqe->psn +
(wqe->length ? ((wqe->length - 1) >> log_pmtu) : 0);
qp->s_next_psn = wqe->lpsn + 1;
smp_wmb(); /* see request builders */
qp->s_avail--;
qp->s_head = next; qp->s_head = next;
return 0; return 0;
...@@ -1470,7 +1523,7 @@ static int rvt_post_one_wr(struct rvt_qp *qp, struct ib_send_wr *wr) ...@@ -1470,7 +1523,7 @@ static int rvt_post_one_wr(struct rvt_qp *qp, struct ib_send_wr *wr)
rvt_put_mr(sge->mr); rvt_put_mr(sge->mr);
} }
return -EINVAL; return ret;
} }
/** /**
...@@ -1491,14 +1544,14 @@ int rvt_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, ...@@ -1491,14 +1544,14 @@ int rvt_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
unsigned nreq = 0; unsigned nreq = 0;
int err = 0; int err = 0;
spin_lock_irqsave(&qp->s_lock, flags); spin_lock_irqsave(&qp->s_hlock, flags);
/* /*
* Ensure QP state is such that we can send. If not bail out early, * Ensure QP state is such that we can send. If not bail out early,
* there is no need to do this every time we post a send. * there is no need to do this every time we post a send.
*/ */
if (unlikely(!(ib_rvt_state_ops[qp->state] & RVT_POST_SEND_OK))) { if (unlikely(!(ib_rvt_state_ops[qp->state] & RVT_POST_SEND_OK))) {
spin_unlock_irqrestore(&qp->s_lock, flags); spin_unlock_irqrestore(&qp->s_hlock, flags);
return -EINVAL; return -EINVAL;
} }
...@@ -1518,11 +1571,13 @@ int rvt_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, ...@@ -1518,11 +1571,13 @@ int rvt_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
nreq++; nreq++;
} }
bail: bail:
if (nreq && !call_send) spin_unlock_irqrestore(&qp->s_hlock, flags);
rdi->driver_f.schedule_send(qp); if (nreq) {
spin_unlock_irqrestore(&qp->s_lock, flags); if (call_send)
if (nreq && call_send) rdi->driver_f.schedule_send_no_lock(qp);
rdi->driver_f.do_send(qp); else
rdi->driver_f.do_send(qp);
}
return err; return err;
} }
......
...@@ -226,16 +226,45 @@ void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, ...@@ -226,16 +226,45 @@ void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
} }
} }
int hfi1_check_send_wr(struct rvt_qp *qp, struct ib_send_wr *wr) /**
* hfi1_check_send_wqe - validate wqe
* @qp - The qp
* @wqe - The built wqe
*
* validate wqe. This is called
* prior to inserting the wqe into
* the ring but after the wqe has been
* setup.
*
* Returns 0 on success, -EINVAL on failure
*
*/
int hfi1_check_send_wqe(struct rvt_qp *qp,
struct rvt_swqe *wqe)
{ {
struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
struct rvt_ah *ah = ibah_to_rvtah(ud_wr(wr)->ah); struct rvt_ah *ah;
if (qp->ibqp.qp_type != IB_QPT_RC && switch (qp->ibqp.qp_type) {
qp->ibqp.qp_type != IB_QPT_UC && case IB_QPT_RC:
qp->ibqp.qp_type != IB_QPT_SMI && case IB_QPT_UC:
ibp->sl_to_sc[ah->attr.sl] == 0xf) { if (wqe->length > 0x80000000U)
return -EINVAL; return -EINVAL;
break;
case IB_QPT_SMI:
ah = ibah_to_rvtah(wqe->ud_wr.ah);
if (wqe->length > (1 << ah->log_pmtu))
return -EINVAL;
break;
case IB_QPT_GSI:
case IB_QPT_UD:
ah = ibah_to_rvtah(wqe->ud_wr.ah);
if (wqe->length > (1 << ah->log_pmtu))
return -EINVAL;
if (ibp->sl_to_sc[ah->attr.sl] == 0xf)
return -EINVAL;
default:
break;
} }
return 0; return 0;
} }
...@@ -301,6 +330,42 @@ __be32 hfi1_compute_aeth(struct rvt_qp *qp) ...@@ -301,6 +330,42 @@ __be32 hfi1_compute_aeth(struct rvt_qp *qp)
return cpu_to_be32(aeth); return cpu_to_be32(aeth);
} }
/**
* _hfi1_schedule_send - schedule progress
* @qp: the QP
*
* This schedules qp progress w/o regard to the s_flags.
*
* It is only used in the post send, which doesn't hold
* the s_lock.
*/
void _hfi1_schedule_send(struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
struct hfi1_ibport *ibp =
to_iport(qp->ibqp.device, qp->port_num);
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
iowait_schedule(&priv->s_iowait, ppd->hfi1_wq,
priv->s_sde ?
priv->s_sde->cpu :
cpumask_first(cpumask_of_node(dd->node)));
}
/**
* hfi1_schedule_send - schedule progress
* @qp: the QP
*
* This schedules qp progress and caller should hold
* the s_lock.
*/
void hfi1_schedule_send(struct rvt_qp *qp)
{
if (hfi1_send_ok(qp))
_hfi1_schedule_send(qp);
}
/** /**
* hfi1_get_credit - flush the send work queue of a QP * hfi1_get_credit - flush the send work queue of a QP
* @qp: the qp who's send work queue to flush * @qp: the qp who's send work queue to flush
......
...@@ -137,41 +137,8 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter); ...@@ -137,41 +137,8 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter);
*/ */
void qp_comm_est(struct rvt_qp *qp); void qp_comm_est(struct rvt_qp *qp);
/** void _hfi1_schedule_send(struct rvt_qp *qp);
* _hfi1_schedule_send - schedule progress void hfi1_schedule_send(struct rvt_qp *qp);
* @qp: the QP
*
* This schedules qp progress w/o regard to the s_flags.
*
* It is only used in the post send, which doesn't hold
* the s_lock.
*/
static inline void _hfi1_schedule_send(struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
struct hfi1_ibport *ibp =
to_iport(qp->ibqp.device, qp->port_num);
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
iowait_schedule(&priv->s_iowait, ppd->hfi1_wq,
priv->s_sde ?
priv->s_sde->cpu :
cpumask_first(cpumask_of_node(dd->node)));
}
/**
* hfi1_schedule_send - schedule progress
* @qp: the QP
*
* This schedules qp progress and caller should hold
* the s_lock.
*/
static inline void hfi1_schedule_send(struct rvt_qp *qp)
{
if (hfi1_send_ok(qp))
_hfi1_schedule_send(qp);
}
void hfi1_migrate_qp(struct rvt_qp *qp); void hfi1_migrate_qp(struct rvt_qp *qp);
......
...@@ -367,6 +367,8 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, ...@@ -367,6 +367,8 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
* hfi1_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC) * hfi1_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC)
* @qp: a pointer to the QP * @qp: a pointer to the QP
* *
* Assumes s_lock is held.
*
* Return 1 if constructed; otherwise, return 0. * Return 1 if constructed; otherwise, return 0.
*/ */
int hfi1_make_rc_req(struct rvt_qp *qp) int hfi1_make_rc_req(struct rvt_qp *qp)
...@@ -383,7 +385,6 @@ int hfi1_make_rc_req(struct rvt_qp *qp) ...@@ -383,7 +385,6 @@ int hfi1_make_rc_req(struct rvt_qp *qp)
u32 bth2; u32 bth2;
u32 pmtu = qp->pmtu; u32 pmtu = qp->pmtu;
char newreq; char newreq;
unsigned long flags;
int ret = 0; int ret = 0;
int middle = 0; int middle = 0;
int delta; int delta;
...@@ -392,12 +393,6 @@ int hfi1_make_rc_req(struct rvt_qp *qp) ...@@ -392,12 +393,6 @@ int hfi1_make_rc_req(struct rvt_qp *qp)
if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
ohdr = &priv->s_hdr->ibh.u.l.oth; ohdr = &priv->s_hdr->ibh.u.l.oth;
/*
* The lock is needed to synchronize between the sending tasklet,
* the receive interrupt handler, and timeout re-sends.
*/
spin_lock_irqsave(&qp->s_lock, flags);
/* Sending responses has higher priority over sending requests. */ /* Sending responses has higher priority over sending requests. */
if ((qp->s_flags & RVT_S_RESP_PENDING) && if ((qp->s_flags & RVT_S_RESP_PENDING) &&
make_rc_ack(dev, qp, ohdr, pmtu)) make_rc_ack(dev, qp, ohdr, pmtu))
...@@ -407,7 +402,8 @@ int hfi1_make_rc_req(struct rvt_qp *qp) ...@@ -407,7 +402,8 @@ int hfi1_make_rc_req(struct rvt_qp *qp)
if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND)) if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND))
goto bail; goto bail;
/* We are in the error state, flush the work request. */ /* We are in the error state, flush the work request. */
if (qp->s_last == qp->s_head) smp_read_barrier_depends(); /* see post_one_send() */
if (qp->s_last == ACCESS_ONCE(qp->s_head))
goto bail; goto bail;
/* If DMAs are in progress, we can't flush immediately. */ /* If DMAs are in progress, we can't flush immediately. */
if (atomic_read(&priv->s_iowait.sdma_busy)) { if (atomic_read(&priv->s_iowait.sdma_busy)) {
...@@ -463,8 +459,8 @@ int hfi1_make_rc_req(struct rvt_qp *qp) ...@@ -463,8 +459,8 @@ int hfi1_make_rc_req(struct rvt_qp *qp)
qp->s_flags |= RVT_S_WAIT_FENCE; qp->s_flags |= RVT_S_WAIT_FENCE;
goto bail; goto bail;
} }
wqe->psn = qp->s_next_psn;
newreq = 1; newreq = 1;
qp->s_psn = wqe->psn;
} }
/* /*
* Note that we have to be careful not to modify the * Note that we have to be careful not to modify the
...@@ -483,9 +479,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp) ...@@ -483,9 +479,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp)
qp->s_flags |= RVT_S_WAIT_SSN_CREDIT; qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
goto bail; goto bail;
} }
wqe->lpsn = wqe->psn;
if (len > pmtu) { if (len > pmtu) {
wqe->lpsn += (len - 1) / pmtu;
qp->s_state = OP(SEND_FIRST); qp->s_state = OP(SEND_FIRST);
len = pmtu; len = pmtu;
break; break;
...@@ -522,9 +516,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp) ...@@ -522,9 +516,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp)
cpu_to_be32(wqe->rdma_wr.rkey); cpu_to_be32(wqe->rdma_wr.rkey);
ohdr->u.rc.reth.length = cpu_to_be32(len); ohdr->u.rc.reth.length = cpu_to_be32(len);
hwords += sizeof(struct ib_reth) / sizeof(u32); hwords += sizeof(struct ib_reth) / sizeof(u32);
wqe->lpsn = wqe->psn;
if (len > pmtu) { if (len > pmtu) {
wqe->lpsn += (len - 1) / pmtu;
qp->s_state = OP(RDMA_WRITE_FIRST); qp->s_state = OP(RDMA_WRITE_FIRST);
len = pmtu; len = pmtu;
break; break;
...@@ -559,13 +551,6 @@ int hfi1_make_rc_req(struct rvt_qp *qp) ...@@ -559,13 +551,6 @@ int hfi1_make_rc_req(struct rvt_qp *qp)
qp->s_num_rd_atomic++; qp->s_num_rd_atomic++;
if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
qp->s_lsn++; qp->s_lsn++;
/*
* Adjust s_next_psn to count the
* expected number of responses.
*/
if (len > pmtu)
qp->s_next_psn += (len - 1) / pmtu;
wqe->lpsn = qp->s_next_psn++;
} }
ohdr->u.rc.reth.vaddr = ohdr->u.rc.reth.vaddr =
cpu_to_be64(wqe->rdma_wr.remote_addr); cpu_to_be64(wqe->rdma_wr.remote_addr);
...@@ -596,7 +581,6 @@ int hfi1_make_rc_req(struct rvt_qp *qp) ...@@ -596,7 +581,6 @@ int hfi1_make_rc_req(struct rvt_qp *qp)
qp->s_num_rd_atomic++; qp->s_num_rd_atomic++;
if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
qp->s_lsn++; qp->s_lsn++;
wqe->lpsn = wqe->psn;
} }
if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
qp->s_state = OP(COMPARE_SWAP); qp->s_state = OP(COMPARE_SWAP);
...@@ -639,11 +623,8 @@ int hfi1_make_rc_req(struct rvt_qp *qp) ...@@ -639,11 +623,8 @@ int hfi1_make_rc_req(struct rvt_qp *qp)
} }
if (wqe->wr.opcode == IB_WR_RDMA_READ) if (wqe->wr.opcode == IB_WR_RDMA_READ)
qp->s_psn = wqe->lpsn + 1; qp->s_psn = wqe->lpsn + 1;
else { else
qp->s_psn++; qp->s_psn++;
if (cmp_psn(qp->s_psn, qp->s_next_psn) > 0)
qp->s_next_psn = qp->s_psn;
}
break; break;
case OP(RDMA_READ_RESPONSE_FIRST): case OP(RDMA_READ_RESPONSE_FIRST):
...@@ -663,8 +644,6 @@ int hfi1_make_rc_req(struct rvt_qp *qp) ...@@ -663,8 +644,6 @@ int hfi1_make_rc_req(struct rvt_qp *qp)
/* FALLTHROUGH */ /* FALLTHROUGH */
case OP(SEND_MIDDLE): case OP(SEND_MIDDLE):
bth2 = mask_psn(qp->s_psn++); bth2 = mask_psn(qp->s_psn++);
if (cmp_psn(qp->s_psn, qp->s_next_psn) > 0)
qp->s_next_psn = qp->s_psn;
ss = &qp->s_sge; ss = &qp->s_sge;
len = qp->s_len; len = qp->s_len;
if (len > pmtu) { if (len > pmtu) {
...@@ -705,8 +684,6 @@ int hfi1_make_rc_req(struct rvt_qp *qp) ...@@ -705,8 +684,6 @@ int hfi1_make_rc_req(struct rvt_qp *qp)
/* FALLTHROUGH */ /* FALLTHROUGH */
case OP(RDMA_WRITE_MIDDLE): case OP(RDMA_WRITE_MIDDLE):
bth2 = mask_psn(qp->s_psn++); bth2 = mask_psn(qp->s_psn++);
if (cmp_psn(qp->s_psn, qp->s_next_psn) > 0)
qp->s_next_psn = qp->s_psn;
ss = &qp->s_sge; ss = &qp->s_sge;
len = qp->s_len; len = qp->s_len;
if (len > pmtu) { if (len > pmtu) {
...@@ -777,13 +754,9 @@ int hfi1_make_rc_req(struct rvt_qp *qp) ...@@ -777,13 +754,9 @@ int hfi1_make_rc_req(struct rvt_qp *qp)
bth2, bth2,
middle); middle);
done: done:
ret = 1; return 1;
goto unlock;
bail: bail:
qp->s_flags &= ~RVT_S_BUSY; qp->s_flags &= ~RVT_S_BUSY;
unlock:
spin_unlock_irqrestore(&qp->s_lock, flags);
return ret; return ret;
} }
...@@ -1563,7 +1536,8 @@ static void rc_rcv_resp(struct hfi1_ibport *ibp, ...@@ -1563,7 +1536,8 @@ static void rc_rcv_resp(struct hfi1_ibport *ibp,
trace_hfi1_rc_ack(qp, psn); trace_hfi1_rc_ack(qp, psn);
/* Ignore invalid responses. */ /* Ignore invalid responses. */
if (cmp_psn(psn, qp->s_next_psn) >= 0) smp_read_barrier_depends(); /* see post_one_send */
if (cmp_psn(psn, ACCESS_ONCE(qp->s_next_psn)) >= 0)
goto ack_done; goto ack_done;
/* Ignore duplicate responses. */ /* Ignore duplicate responses. */
......
...@@ -392,7 +392,8 @@ static void ruc_loopback(struct rvt_qp *sqp) ...@@ -392,7 +392,8 @@ static void ruc_loopback(struct rvt_qp *sqp)
sqp->s_flags |= RVT_S_BUSY; sqp->s_flags |= RVT_S_BUSY;
again: again:
if (sqp->s_last == sqp->s_head) smp_read_barrier_depends(); /* see post_one_send() */
if (sqp->s_last == ACCESS_ONCE(sqp->s_head))
goto clr_busy; goto clr_busy;
wqe = rvt_get_swqe_ptr(sqp, sqp->s_last); wqe = rvt_get_swqe_ptr(sqp, sqp->s_last);
...@@ -871,40 +872,43 @@ void hfi1_do_send(struct rvt_qp *qp) ...@@ -871,40 +872,43 @@ void hfi1_do_send(struct rvt_qp *qp)
qp->s_flags |= RVT_S_BUSY; qp->s_flags |= RVT_S_BUSY;
spin_unlock_irqrestore(&qp->s_lock, flags);
timeout = jiffies + (timeout_int) / 8; timeout = jiffies + (timeout_int) / 8;
cpu = priv->s_sde ? priv->s_sde->cpu : cpu = priv->s_sde ? priv->s_sde->cpu :
cpumask_first(cpumask_of_node(ps.ppd->dd->node)); cpumask_first(cpumask_of_node(ps.ppd->dd->node));
do { do {
/* Check for a constructed packet to be sent. */ /* Check for a constructed packet to be sent. */
if (qp->s_hdrwords != 0) { if (qp->s_hdrwords != 0) {
spin_unlock_irqrestore(&qp->s_lock, flags);
/* /*
* If the packet cannot be sent now, return and * If the packet cannot be sent now, return and
* the send tasklet will be woken up later. * the send tasklet will be woken up later.
*/ */
if (hfi1_verbs_send(qp, &ps)) if (hfi1_verbs_send(qp, &ps))
break; return;
/* Record that s_hdr is empty. */ /* Record that s_hdr is empty. */
qp->s_hdrwords = 0; qp->s_hdrwords = 0;
} /* allow other tasks to run */
if (unlikely(time_after(jiffies, timeout))) {
/* allow other tasks to run */ if (workqueue_congested(cpu,
if (unlikely(time_after(jiffies, timeout))) { ps.ppd->hfi1_wq)) {
if (workqueue_congested(cpu, ps.ppd->hfi1_wq)) { spin_lock_irqsave(&qp->s_lock, flags);
spin_lock_irqsave(&qp->s_lock, flags); qp->s_flags &= ~RVT_S_BUSY;
qp->s_flags &= ~RVT_S_BUSY; hfi1_schedule_send(qp);
hfi1_schedule_send(qp); spin_unlock_irqrestore(&qp->s_lock,
spin_unlock_irqrestore(&qp->s_lock, flags);
flags); this_cpu_inc(
*ps.ppd->dd->send_schedule);
return;
}
cond_resched();
this_cpu_inc(*ps.ppd->dd->send_schedule); this_cpu_inc(*ps.ppd->dd->send_schedule);
return; timeout = jiffies + (timeout_int) / 8;
} }
cond_resched(); spin_lock_irqsave(&qp->s_lock, flags);
this_cpu_inc(*ps.ppd->dd->send_schedule);
timeout = jiffies + (timeout_int) / 8;
} }
} while (make_req(qp)); } while (make_req(qp));
spin_unlock_irqrestore(&qp->s_lock, flags);
} }
/* /*
......
...@@ -59,6 +59,8 @@ ...@@ -59,6 +59,8 @@
* hfi1_make_uc_req - construct a request packet (SEND, RDMA write) * hfi1_make_uc_req - construct a request packet (SEND, RDMA write)
* @qp: a pointer to the QP * @qp: a pointer to the QP
* *
* Assume s_lock is held.
*
* Return 1 if constructed; otherwise, return 0. * Return 1 if constructed; otherwise, return 0.
*/ */
int hfi1_make_uc_req(struct rvt_qp *qp) int hfi1_make_uc_req(struct rvt_qp *qp)
...@@ -66,7 +68,6 @@ int hfi1_make_uc_req(struct rvt_qp *qp) ...@@ -66,7 +68,6 @@ int hfi1_make_uc_req(struct rvt_qp *qp)
struct hfi1_qp_priv *priv = qp->priv; struct hfi1_qp_priv *priv = qp->priv;
struct hfi1_other_headers *ohdr; struct hfi1_other_headers *ohdr;
struct rvt_swqe *wqe; struct rvt_swqe *wqe;
unsigned long flags;
u32 hwords = 5; u32 hwords = 5;
u32 bth0 = 0; u32 bth0 = 0;
u32 len; u32 len;
...@@ -74,13 +75,12 @@ int hfi1_make_uc_req(struct rvt_qp *qp) ...@@ -74,13 +75,12 @@ int hfi1_make_uc_req(struct rvt_qp *qp)
int ret = 0; int ret = 0;
int middle = 0; int middle = 0;
spin_lock_irqsave(&qp->s_lock, flags);
if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) { if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) {
if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND)) if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND))
goto bail; goto bail;
/* We are in the error state, flush the work request. */ /* We are in the error state, flush the work request. */
if (qp->s_last == qp->s_head) smp_read_barrier_depends(); /* see post_one_send() */
if (qp->s_last == ACCESS_ONCE(qp->s_head))
goto bail; goto bail;
/* If DMAs are in progress, we can't flush immediately. */ /* If DMAs are in progress, we can't flush immediately. */
if (atomic_read(&priv->s_iowait.sdma_busy)) { if (atomic_read(&priv->s_iowait.sdma_busy)) {
...@@ -106,15 +106,15 @@ int hfi1_make_uc_req(struct rvt_qp *qp) ...@@ -106,15 +106,15 @@ int hfi1_make_uc_req(struct rvt_qp *qp)
RVT_PROCESS_NEXT_SEND_OK)) RVT_PROCESS_NEXT_SEND_OK))
goto bail; goto bail;
/* Check if send work queue is empty. */ /* Check if send work queue is empty. */
if (qp->s_cur == qp->s_head) { smp_read_barrier_depends(); /* see post_one_send() */
if (qp->s_cur == ACCESS_ONCE(qp->s_head)) {
clear_ahg(qp); clear_ahg(qp);
goto bail; goto bail;
} }
/* /*
* Start a new request. * Start a new request.
*/ */
wqe->psn = qp->s_next_psn; qp->s_psn = wqe->psn;
qp->s_psn = qp->s_next_psn;
qp->s_sge.sge = wqe->sg_list[0]; qp->s_sge.sge = wqe->sg_list[0];
qp->s_sge.sg_list = wqe->sg_list + 1; qp->s_sge.sg_list = wqe->sg_list + 1;
qp->s_sge.num_sge = wqe->wr.num_sge; qp->s_sge.num_sge = wqe->wr.num_sge;
...@@ -235,15 +235,12 @@ int hfi1_make_uc_req(struct rvt_qp *qp) ...@@ -235,15 +235,12 @@ int hfi1_make_uc_req(struct rvt_qp *qp)
qp->s_cur_sge = &qp->s_sge; qp->s_cur_sge = &qp->s_sge;
qp->s_cur_size = len; qp->s_cur_size = len;
hfi1_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24), hfi1_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24),
mask_psn(qp->s_next_psn++), middle); mask_psn(qp->s_psn++), middle);
done: done:
ret = 1; return 1;
goto unlock;
bail: bail:
qp->s_flags &= ~RVT_S_BUSY; qp->s_flags &= ~RVT_S_BUSY;
unlock:
spin_unlock_irqrestore(&qp->s_lock, flags);
return ret; return ret;
} }
......
...@@ -261,6 +261,8 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) ...@@ -261,6 +261,8 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
* hfi1_make_ud_req - construct a UD request packet * hfi1_make_ud_req - construct a UD request packet
* @qp: the QP * @qp: the QP
* *
* Assume s_lock is held.
*
* Return 1 if constructed; otherwise, return 0. * Return 1 if constructed; otherwise, return 0.
*/ */
int hfi1_make_ud_req(struct rvt_qp *qp) int hfi1_make_ud_req(struct rvt_qp *qp)
...@@ -271,7 +273,6 @@ int hfi1_make_ud_req(struct rvt_qp *qp) ...@@ -271,7 +273,6 @@ int hfi1_make_ud_req(struct rvt_qp *qp)
struct hfi1_pportdata *ppd; struct hfi1_pportdata *ppd;
struct hfi1_ibport *ibp; struct hfi1_ibport *ibp;
struct rvt_swqe *wqe; struct rvt_swqe *wqe;
unsigned long flags;
u32 nwords; u32 nwords;
u32 extra_bytes; u32 extra_bytes;
u32 bth0; u32 bth0;
...@@ -281,13 +282,12 @@ int hfi1_make_ud_req(struct rvt_qp *qp) ...@@ -281,13 +282,12 @@ int hfi1_make_ud_req(struct rvt_qp *qp)
int next_cur; int next_cur;
u8 sc5; u8 sc5;
spin_lock_irqsave(&qp->s_lock, flags);
if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_NEXT_SEND_OK)) { if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_NEXT_SEND_OK)) {
if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND)) if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND))
goto bail; goto bail;
/* We are in the error state, flush the work request. */ /* We are in the error state, flush the work request. */
if (qp->s_last == qp->s_head) smp_read_barrier_depends(); /* see post_one_send */
if (qp->s_last == ACCESS_ONCE(qp->s_head))
goto bail; goto bail;
/* If DMAs are in progress, we can't flush immediately. */ /* If DMAs are in progress, we can't flush immediately. */
if (atomic_read(&priv->s_iowait.sdma_busy)) { if (atomic_read(&priv->s_iowait.sdma_busy)) {
...@@ -299,7 +299,9 @@ int hfi1_make_ud_req(struct rvt_qp *qp) ...@@ -299,7 +299,9 @@ int hfi1_make_ud_req(struct rvt_qp *qp)
goto done; goto done;
} }
if (qp->s_cur == qp->s_head) /* see post_one_send() */
smp_read_barrier_depends();
if (qp->s_cur == ACCESS_ONCE(qp->s_head))
goto bail; goto bail;
wqe = rvt_get_swqe_ptr(qp, qp->s_cur); wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
...@@ -317,6 +319,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp) ...@@ -317,6 +319,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp)
if (unlikely(!loopback && (lid == ppd->lid || if (unlikely(!loopback && (lid == ppd->lid ||
(lid == be16_to_cpu(IB_LID_PERMISSIVE) && (lid == be16_to_cpu(IB_LID_PERMISSIVE) &&
qp->ibqp.qp_type == IB_QPT_GSI)))) { qp->ibqp.qp_type == IB_QPT_GSI)))) {
unsigned long flags;
/* /*
* If DMAs are in progress, we can't generate * If DMAs are in progress, we can't generate
* a completion for the loopback packet since * a completion for the loopback packet since
...@@ -329,6 +332,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp) ...@@ -329,6 +332,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp)
goto bail; goto bail;
} }
qp->s_cur = next_cur; qp->s_cur = next_cur;
local_irq_save(flags);
spin_unlock_irqrestore(&qp->s_lock, flags); spin_unlock_irqrestore(&qp->s_lock, flags);
ud_loopback(qp, wqe); ud_loopback(qp, wqe);
spin_lock_irqsave(&qp->s_lock, flags); spin_lock_irqsave(&qp->s_lock, flags);
...@@ -408,7 +412,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp) ...@@ -408,7 +412,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp)
bth0 |= hfi1_get_pkey(ibp, qp->s_pkey_index); bth0 |= hfi1_get_pkey(ibp, qp->s_pkey_index);
ohdr->bth[0] = cpu_to_be32(bth0); ohdr->bth[0] = cpu_to_be32(bth0);
ohdr->bth[1] = cpu_to_be32(wqe->ud_wr.remote_qpn); ohdr->bth[1] = cpu_to_be32(wqe->ud_wr.remote_qpn);
ohdr->bth[2] = cpu_to_be32(mask_psn(qp->s_next_psn++)); ohdr->bth[2] = cpu_to_be32(mask_psn(wqe->psn));
/* /*
* Qkeys with the high order bit set mean use the * Qkeys with the high order bit set mean use the
* qkey from the QP context instead of the WR (see 10.2.5). * qkey from the QP context instead of the WR (see 10.2.5).
...@@ -423,13 +427,9 @@ int hfi1_make_ud_req(struct rvt_qp *qp) ...@@ -423,13 +427,9 @@ int hfi1_make_ud_req(struct rvt_qp *qp)
priv->s_hdr->sde = NULL; priv->s_hdr->sde = NULL;
done: done:
ret = 1; return 1;
goto unlock;
bail: bail:
qp->s_flags &= ~RVT_S_BUSY; qp->s_flags &= ~RVT_S_BUSY;
unlock:
spin_unlock_irqrestore(&qp->s_lock, flags);
return ret; return ret;
} }
......
...@@ -1533,6 +1533,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) ...@@ -1533,6 +1533,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
dd->verbs_dev.rdi.driver_f.notify_qp_reset = notify_qp_reset; dd->verbs_dev.rdi.driver_f.notify_qp_reset = notify_qp_reset;
dd->verbs_dev.rdi.driver_f.do_send = hfi1_do_send; dd->verbs_dev.rdi.driver_f.do_send = hfi1_do_send;
dd->verbs_dev.rdi.driver_f.schedule_send = hfi1_schedule_send; dd->verbs_dev.rdi.driver_f.schedule_send = hfi1_schedule_send;
dd->verbs_dev.rdi.driver_f.schedule_send_no_lock = _hfi1_schedule_send;
dd->verbs_dev.rdi.driver_f.get_pmtu_from_attr = get_pmtu_from_attr; dd->verbs_dev.rdi.driver_f.get_pmtu_from_attr = get_pmtu_from_attr;
dd->verbs_dev.rdi.driver_f.notify_error_qp = notify_error_qp; dd->verbs_dev.rdi.driver_f.notify_error_qp = notify_error_qp;
dd->verbs_dev.rdi.driver_f.flush_qp_waiters = flush_qp_waiters; dd->verbs_dev.rdi.driver_f.flush_qp_waiters = flush_qp_waiters;
...@@ -1543,7 +1544,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) ...@@ -1543,7 +1544,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
dd->verbs_dev.rdi.driver_f.mtu_to_path_mtu = mtu_to_path_mtu; dd->verbs_dev.rdi.driver_f.mtu_to_path_mtu = mtu_to_path_mtu;
dd->verbs_dev.rdi.driver_f.check_modify_qp = hfi1_check_modify_qp; dd->verbs_dev.rdi.driver_f.check_modify_qp = hfi1_check_modify_qp;
dd->verbs_dev.rdi.driver_f.modify_qp = hfi1_modify_qp; dd->verbs_dev.rdi.driver_f.modify_qp = hfi1_modify_qp;
dd->verbs_dev.rdi.driver_f.check_send_wr = hfi1_check_send_wr; dd->verbs_dev.rdi.driver_f.check_send_wqe = hfi1_check_send_wqe;
/* completeion queue */ /* completeion queue */
snprintf(dd->verbs_dev.rdi.dparms.cq_name, snprintf(dd->verbs_dev.rdi.dparms.cq_name,
......
...@@ -427,7 +427,7 @@ int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, ...@@ -427,7 +427,7 @@ int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata); int attr_mask, struct ib_udata *udata);
int hfi1_check_send_wr(struct rvt_qp *qp, struct ib_send_wr *wr); int hfi1_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe);
int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_ib_header *hdr, int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_ib_header *hdr,
int has_grh, struct rvt_qp *qp, u32 bth0); int has_grh, struct rvt_qp *qp, u32 bth0);
......
...@@ -220,6 +220,7 @@ struct rvt_ah { ...@@ -220,6 +220,7 @@ struct rvt_ah {
}; };
struct rvt_dev_info; struct rvt_dev_info;
struct rvt_swqe;
struct rvt_driver_provided { struct rvt_driver_provided {
/* /*
* The work to create port files in /sys/class Infiniband is different * The work to create port files in /sys/class Infiniband is different
...@@ -240,6 +241,7 @@ struct rvt_driver_provided { ...@@ -240,6 +241,7 @@ struct rvt_driver_provided {
void (*qp_priv_free)(struct rvt_dev_info *rdi, struct rvt_qp *qp); void (*qp_priv_free)(struct rvt_dev_info *rdi, struct rvt_qp *qp);
void (*notify_qp_reset)(struct rvt_qp *qp); void (*notify_qp_reset)(struct rvt_qp *qp);
void (*schedule_send)(struct rvt_qp *qp); void (*schedule_send)(struct rvt_qp *qp);
void (*schedule_send_no_lock)(struct rvt_qp *qp);
void (*do_send)(struct rvt_qp *qp); void (*do_send)(struct rvt_qp *qp);
int (*get_pmtu_from_attr)(struct rvt_dev_info *rdi, struct rvt_qp *qp, int (*get_pmtu_from_attr)(struct rvt_dev_info *rdi, struct rvt_qp *qp,
struct ib_qp_attr *attr); struct ib_qp_attr *attr);
...@@ -273,7 +275,7 @@ struct rvt_driver_provided { ...@@ -273,7 +275,7 @@ struct rvt_driver_provided {
void (*modify_qp)(struct rvt_qp *qp, struct ib_qp_attr *attr, void (*modify_qp)(struct rvt_qp *qp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata); int attr_mask, struct ib_udata *udata);
int (*check_send_wr)(struct rvt_qp *qp, struct ib_send_wr *wr); int (*check_send_wqe)(struct rvt_qp *qp, struct rvt_swqe *wqe);
void (*notify_create_mad_agent)(struct rvt_dev_info *rdi, int port_idx); void (*notify_create_mad_agent)(struct rvt_dev_info *rdi, int port_idx);
void (*notify_free_mad_agent)(struct rvt_dev_info *rdi, int port_idx); void (*notify_free_mad_agent)(struct rvt_dev_info *rdi, int port_idx);
......
...@@ -250,11 +250,12 @@ struct rvt_qp { ...@@ -250,11 +250,12 @@ struct rvt_qp {
enum ib_mtu path_mtu; enum ib_mtu path_mtu;
int srate_mbps; /* s_srate (below) converted to Mbit/s */ int srate_mbps; /* s_srate (below) converted to Mbit/s */
u32 remote_qpn; u32 remote_qpn;
u32 pmtu; /* decoded from path_mtu */
u32 qkey; /* QKEY for this QP (for UD or RD) */ u32 qkey; /* QKEY for this QP (for UD or RD) */
u32 s_size; /* send work queue size */ u32 s_size; /* send work queue size */
u32 s_ahgpsn; /* set to the psn in the copy of the header */ u32 s_ahgpsn; /* set to the psn in the copy of the header */
u16 pmtu; /* decoded from path_mtu */
u8 log_pmtu; /* shift for pmtu */
u8 state; /* QP state */ u8 state; /* QP state */
u8 allowed_ops; /* high order bits of allowed opcodes */ u8 allowed_ops; /* high order bits of allowed opcodes */
u8 qp_access_flags; u8 qp_access_flags;
...@@ -299,6 +300,13 @@ struct rvt_qp { ...@@ -299,6 +300,13 @@ struct rvt_qp {
struct rvt_sge_state r_sge; /* current receive data */ struct rvt_sge_state r_sge; /* current receive data */
struct rvt_rq r_rq; /* receive work queue */ struct rvt_rq r_rq; /* receive work queue */
/* post send line */
spinlock_t s_hlock ____cacheline_aligned_in_smp;
u32 s_head; /* new entries added here */
u32 s_next_psn; /* PSN for next request */
u32 s_avail; /* number of entries avail */
u32 s_ssn; /* SSN of tail entry */
spinlock_t s_lock ____cacheline_aligned_in_smp; spinlock_t s_lock ____cacheline_aligned_in_smp;
struct rvt_sge_state *s_cur_sge; struct rvt_sge_state *s_cur_sge;
u32 s_flags; u32 s_flags;
...@@ -308,19 +316,16 @@ struct rvt_qp { ...@@ -308,19 +316,16 @@ struct rvt_qp {
u32 s_cur_size; /* size of send packet in bytes */ u32 s_cur_size; /* size of send packet in bytes */
u32 s_len; /* total length of s_sge */ u32 s_len; /* total length of s_sge */
u32 s_rdma_read_len; /* total length of s_rdma_read_sge */ u32 s_rdma_read_len; /* total length of s_rdma_read_sge */
u32 s_next_psn; /* PSN for next request */
u32 s_last_psn; /* last response PSN processed */ u32 s_last_psn; /* last response PSN processed */
u32 s_sending_psn; /* lowest PSN that is being sent */ u32 s_sending_psn; /* lowest PSN that is being sent */
u32 s_sending_hpsn; /* highest PSN that is being sent */ u32 s_sending_hpsn; /* highest PSN that is being sent */
u32 s_psn; /* current packet sequence number */ u32 s_psn; /* current packet sequence number */
u32 s_ack_rdma_psn; /* PSN for sending RDMA read responses */ u32 s_ack_rdma_psn; /* PSN for sending RDMA read responses */
u32 s_ack_psn; /* PSN for acking sends and RDMA writes */ u32 s_ack_psn; /* PSN for acking sends and RDMA writes */
u32 s_head; /* new entries added here */
u32 s_tail; /* next entry to process */ u32 s_tail; /* next entry to process */
u32 s_cur; /* current work queue entry */ u32 s_cur; /* current work queue entry */
u32 s_acked; /* last un-ACK'ed entry */ u32 s_acked; /* last un-ACK'ed entry */
u32 s_last; /* last completed entry */ u32 s_last; /* last completed entry */
u32 s_ssn; /* SSN of tail entry */
u32 s_lsn; /* limit sequence number (credit) */ u32 s_lsn; /* limit sequence number (credit) */
u16 s_hdrwords; /* size of s_hdr in 32 bit words */ u16 s_hdrwords; /* size of s_hdr in 32 bit words */
u16 s_rdma_ack_cnt; u16 s_rdma_ack_cnt;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment