Commit c7cbf2fa authored by Mitko Haralanov's avatar Mitko Haralanov Committed by Doug Ledford

staging/rdma/hfi1: Properly determine error status of SDMA slots

To ensure correct operation between the driver and PSM
with respect to managing the SDMA request ring, it is
important that the status for a particular request slot
is set at the correct time. Otherwise, PSM can get out
of sync with the driver, which could lead to hangs or
errors on new requests.

Properly determining of when to set the error status of
a SDMA slot depends on knowing exactly when the last txreq
for that request has been completed. This in turn requires
that the driver knows exactly how many requests have been
generated and how many of those requests have been successfully
submitted to the SDMA queue.

The previous implementation of the mid-layer SDMA API did not
provide a way for the caller of sdma_send_txlist() to know how
many of the txreqs in the input list have actually been submitted
without traversing the list and counting. Since sdma_send_txlist()
already traverses the list in order to process it, requiring
such traversal in the caller is completely unnecessary. Therefore,
it is much easier to enhance sdma_send_txlist() to return the
number of successfully submitted txreqs.

This, in turn, allows the caller to accurately determine the
progress of the SDMA request and, therefore, correctly set the
error status at the right time.
Reviewed-by: default avatarMike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: default avatarMitko Haralanov <mitko.haralanov@intel.com>
Signed-off-by: default avatarJubin John <jubin.john@intel.com>
Signed-off-by: default avatarDoug Ledford <dledford@redhat.com>
parent ecd42f8d
...@@ -2144,8 +2144,8 @@ int sdma_send_txreq(struct sdma_engine *sde, ...@@ -2144,8 +2144,8 @@ int sdma_send_txreq(struct sdma_engine *sde,
* side locking. * side locking.
* *
* Return: * Return:
* 0 - Success, -EINVAL - sdma_txreq incomplete, -EBUSY - no space in ring * > 0 - Success (value is number of sdma_txreq's submitted),
* (wait == NULL) * -EINVAL - sdma_txreq incomplete, -EBUSY - no space in ring (wait == NULL)
* -EIOCBQUEUED - tx queued to iowait, -ECOMM bad sdma state * -EIOCBQUEUED - tx queued to iowait, -ECOMM bad sdma state
*/ */
int sdma_send_txlist(struct sdma_engine *sde, int sdma_send_txlist(struct sdma_engine *sde,
...@@ -2185,7 +2185,7 @@ int sdma_send_txlist(struct sdma_engine *sde, ...@@ -2185,7 +2185,7 @@ int sdma_send_txlist(struct sdma_engine *sde,
if (tail != INVALID_TAIL) if (tail != INVALID_TAIL)
sdma_update_tail(sde, tail); sdma_update_tail(sde, tail);
spin_unlock_irqrestore(&sde->tail_lock, flags); spin_unlock_irqrestore(&sde->tail_lock, flags);
return ret; return ret == 0 ? count : ret;
unlock_noconn: unlock_noconn:
spin_lock(&sde->flushlist_lock); spin_lock(&sde->flushlist_lock);
list_for_each_entry_safe(tx, tx_next, tx_list, list) { list_for_each_entry_safe(tx, tx_next, tx_list, list) {
......
...@@ -234,6 +234,7 @@ struct user_sdma_request { ...@@ -234,6 +234,7 @@ struct user_sdma_request {
u32 sent; u32 sent;
u64 seqnum; u64 seqnum;
u64 seqcomp; u64 seqcomp;
u64 seqsubmitted;
struct list_head txps; struct list_head txps;
spinlock_t txcmp_lock; /* protect txcmp list */ spinlock_t txcmp_lock; /* protect txcmp list */
struct list_head txcmp; struct list_head txcmp;
...@@ -1001,18 +1002,19 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) ...@@ -1001,18 +1002,19 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
TXREQ_FLAGS_IOVEC_LAST_PKT; TXREQ_FLAGS_IOVEC_LAST_PKT;
} }
list_add_tail(&tx->txreq.list, &req->txps);
/* /*
* It is important to increment this here as it is used to * It is important to increment this here as it is used to
* generate the BTH.PSN and, therefore, can't be bulk-updated * generate the BTH.PSN and, therefore, can't be bulk-updated
* outside of the loop. * outside of the loop.
*/ */
tx->seqnum = req->seqnum++; tx->seqnum = req->seqnum++;
list_add_tail(&tx->txreq.list, &req->txps);
npkts++; npkts++;
} }
dosend: dosend:
ret = sdma_send_txlist(req->sde, &pq->busy, &req->txps); ret = sdma_send_txlist(req->sde, &pq->busy, &req->txps);
if (list_empty(&req->txps)) if (list_empty(&req->txps)) {
req->seqsubmitted = req->seqnum;
if (req->seqnum == req->info.npkts) { if (req->seqnum == req->info.npkts) {
set_bit(SDMA_REQ_SEND_DONE, &req->flags); set_bit(SDMA_REQ_SEND_DONE, &req->flags);
/* /*
...@@ -1024,6 +1026,10 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) ...@@ -1024,6 +1026,10 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
if (test_bit(SDMA_REQ_HAVE_AHG, &req->flags)) if (test_bit(SDMA_REQ_HAVE_AHG, &req->flags))
sdma_ahg_free(req->sde, req->ahg_idx); sdma_ahg_free(req->sde, req->ahg_idx);
} }
} else if (ret > 0) {
req->seqsubmitted += ret;
ret = 0;
}
return ret; return ret;
free_txreq: free_txreq:
...@@ -1406,8 +1412,9 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status, ...@@ -1406,8 +1412,9 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status,
} else { } else {
if (status != SDMA_TXREQ_S_OK) if (status != SDMA_TXREQ_S_OK)
req->status = status; req->status = status;
if (req->seqcomp == ACCESS_ONCE(req->seqnum) && if (req->seqcomp == (ACCESS_ONCE(req->seqsubmitted) - 1) &&
test_bit(SDMA_REQ_DONE_ERROR, &req->flags)) { (test_bit(SDMA_REQ_SEND_DONE, &req->flags) ||
test_bit(SDMA_REQ_DONE_ERROR, &req->flags))) {
user_sdma_free_request(req, false); user_sdma_free_request(req, false);
pq_update(pq); pq_update(pq);
set_comp_state(pq, cq, idx, ERROR, req->status); set_comp_state(pq, cq, idx, ERROR, req->status);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment