Commit 0f2d87d2 authored by Mitko Haralanov's avatar Mitko Haralanov Committed by Doug Ledford

staging/rdma/hfi1: Improve performance of SDMA transfers

Commit a0d40693 ("staging/rdma/hfi1: Add page lock limit
check for SDMA requests") added a mechanism to
delay the clean-up of user SDMA requests in order to facilitate
proper locked page counting.

This delayed processing was done using a kernel workqueue, which
meant that a kernel thread would have to spin up and take CPU
cycles to do the clean-up.

This proved detrimental to performance because now there are two
execution threads (the kernel workqueue and the user process)
needing cycles on the same CPU.

Performance-wise, it is much better to do as much of the clean-up
as can be done in interrupt context (during the callback) and do
the remaining work in-line during subsequent calls of the user
process into the driver.

The changes required to implement the above also significantly
simplify the entire SDMA completion processing code and eliminate
a memory corruption causing the following observed crash:

    [ 2881.703362] BUG: unable to handle kernel NULL pointer dereference at        (null)
    [ 2881.703389] IP: [<ffffffffa02897e4>] user_sdma_send_pkts+0xcd4/0x18e0 [hfi1]
    [ 2881.703422] PGD 7d4d25067 PUD 77d96d067 PMD 0
    [ 2881.703427] Oops: 0000 [#1] SMP
    [ 2881.703431] Modules linked in:
    [ 2881.703504] CPU: 28 PID: 6668 Comm: mpi_stress Tainted: G           OENX 3.12.28-4-default #1
    [ 2881.703508] Hardware name: Intel Corporation S2600KP/S2600KP, BIOS SE5C610.86B.11.01.0044.090
    [ 2881.703512] task: ffff88077da8e0c0 ti: ffff880856772000 task.ti: ffff880856772000
    [ 2881.703515] RIP: 0010:[<ffffffffa02897e4>]  [<ffffffffa02897e4>] user_sdma_send_pkts+0xcd4/0x
    [ 2881.703529] RSP: 0018:ffff880856773c48  EFLAGS: 00010287
    [ 2881.703531] RAX: 0000000000000000 RBX: 0000000000001000 RCX: 0000000000002000
    [ 2881.703534] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000002000
    [ 2881.703537] RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000000
    [ 2881.703540] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
    [ 2881.703543] R13: 0000000000000000 R14: ffff88071e782e68 R15: ffff8810532955c0
    [ 2881.703546] FS:  00007f9c4375e700(0000) GS:ffff88107eec0000(0000) knlGS:0000000000000000
    [ 2881.703549] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
    [ 2881.703551] CR2: 0000000000000000 CR3: 00000007d4cba000 CR4: 00000000003407e0
    [ 2881.703554] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
    [ 2881.703556] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
    [ 2881.703558] Stack:
    [ 2881.703559]  ffffffff00002000 ffff881000001800 ffffffff00000000 00000000000080d0
    [ 2881.703570]  0000000000000000 0000200000000000 0000000000000000 ffff88071e782db8
    [ 2881.703580]  ffff8807d4d08d80 ffff881053295600 0000000000000008 ffff88071e782fc8
    [ 2881.703589] Call Trace:
    [ 2881.703691]  [<ffffffffa028b5da>] hfi1_user_sdma_process_request+0x84a/0xab0 [hfi1]
    [ 2881.703777]  [<ffffffffa0255412>] hfi1_aio_write+0xd2/0x110 [hfi1]
    [ 2881.703828]  [<ffffffff8119e3d8>] do_sync_readv_writev+0x48/0x80
    [ 2881.703837]  [<ffffffff8119f78b>] do_readv_writev+0xbb/0x230
    [ 2881.703843]  [<ffffffff8119fab8>] SyS_writev+0x48/0xc0

This commit also addresses issues related to notification of user
processes of SDMA request slot availability. The slot should be
cleaned up first before the user processes is notified of its
availability.
Reviewed-by: default avatarArthur Kepner <arthur.kepner@intel.com>
Reviewed-by: default avatarDennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: default avatarMitko Haralanov <mitko.haralanov@intel.com>
Signed-off-by: default avatarJubin John <jubin.john@intel.com>
Signed-off-by: default avatarDoug Ledford <dledford@redhat.com>
parent d24bc648
...@@ -147,6 +147,8 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12 ...@@ -147,6 +147,8 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12
/* Last packet in the request */ /* Last packet in the request */
#define TXREQ_FLAGS_REQ_LAST_PKT BIT(0) #define TXREQ_FLAGS_REQ_LAST_PKT BIT(0)
/* Last packet that uses a particular io vector */
#define TXREQ_FLAGS_IOVEC_LAST_PKT BIT(0) #define TXREQ_FLAGS_IOVEC_LAST_PKT BIT(0)
#define SDMA_REQ_IN_USE 0 #define SDMA_REQ_IN_USE 0
...@@ -171,6 +173,7 @@ static unsigned initial_pkt_count = 8; ...@@ -171,6 +173,7 @@ static unsigned initial_pkt_count = 8;
#define SDMA_IOWAIT_TIMEOUT 1000 /* in milliseconds */ #define SDMA_IOWAIT_TIMEOUT 1000 /* in milliseconds */
struct user_sdma_iovec { struct user_sdma_iovec {
struct list_head list;
struct iovec iov; struct iovec iov;
/* number of pages in this vector */ /* number of pages in this vector */
unsigned npages; unsigned npages;
...@@ -213,15 +216,6 @@ struct user_sdma_request { ...@@ -213,15 +216,6 @@ struct user_sdma_request {
* to 0. * to 0.
*/ */
u8 omfactor; u8 omfactor;
/*
* pointer to the user's mm_struct. We are going to
* get a reference to it so it doesn't get freed
* since we might not be in process context when we
* are processing the iov's.
* Using this mm_struct, we can get vma based on the
* iov's address (find_vma()).
*/
struct mm_struct *user_mm;
/* /*
* We copy the iovs for this request (based on * We copy the iovs for this request (based on
* info.iovcnt). These are only the data vectors * info.iovcnt). These are only the data vectors
...@@ -239,13 +233,13 @@ struct user_sdma_request { ...@@ -239,13 +233,13 @@ struct user_sdma_request {
u16 tididx; u16 tididx;
u32 sent; u32 sent;
u64 seqnum; u64 seqnum;
u64 seqcomp;
struct list_head txps; struct list_head txps;
spinlock_t txcmp_lock; /* protect txcmp list */ spinlock_t txcmp_lock; /* protect txcmp list */
struct list_head txcmp; struct list_head txcmp;
unsigned long flags; unsigned long flags;
/* status of the last txreq completed */ /* status of the last txreq completed */
int status; int status;
struct work_struct worker;
}; };
/* /*
...@@ -281,20 +275,20 @@ struct user_sdma_txreq { ...@@ -281,20 +275,20 @@ struct user_sdma_txreq {
static int user_sdma_send_pkts(struct user_sdma_request *, unsigned); static int user_sdma_send_pkts(struct user_sdma_request *, unsigned);
static int num_user_pages(const struct iovec *); static int num_user_pages(const struct iovec *);
static void user_sdma_txreq_cb(struct sdma_txreq *, int, int); static void user_sdma_txreq_cb(struct sdma_txreq *, int, int);
static void user_sdma_delayed_completion(struct work_struct *); static inline void pq_update(struct hfi1_user_sdma_pkt_q *);
static void user_sdma_free_request(struct user_sdma_request *); static void user_sdma_free_request(struct user_sdma_request *, bool);
static int pin_vector_pages(struct user_sdma_request *, static int pin_vector_pages(struct user_sdma_request *,
struct user_sdma_iovec *); struct user_sdma_iovec *);
static void unpin_vector_pages(struct user_sdma_request *, static void unpin_vector_pages(struct user_sdma_iovec *);
struct user_sdma_iovec *);
static int check_header_template(struct user_sdma_request *, static int check_header_template(struct user_sdma_request *,
struct hfi1_pkt_header *, u32, u32); struct hfi1_pkt_header *, u32, u32);
static int set_txreq_header(struct user_sdma_request *, static int set_txreq_header(struct user_sdma_request *,
struct user_sdma_txreq *, u32); struct user_sdma_txreq *, u32);
static int set_txreq_header_ahg(struct user_sdma_request *, static int set_txreq_header_ahg(struct user_sdma_request *,
struct user_sdma_txreq *, u32); struct user_sdma_txreq *, u32);
static inline void set_comp_state(struct user_sdma_request *, static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *,
enum hfi1_sdma_comp_state, int); struct hfi1_user_sdma_comp_q *,
u16, enum hfi1_sdma_comp_state, int);
static inline u32 set_pkt_bth_psn(__be32, u8, u32); static inline u32 set_pkt_bth_psn(__be32, u8, u32);
static inline u32 get_lrh_len(struct hfi1_pkt_header, u32 len); static inline u32 get_lrh_len(struct hfi1_pkt_header, u32 len);
...@@ -381,17 +375,19 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp) ...@@ -381,17 +375,19 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp)
goto pq_nomem; goto pq_nomem;
memsize = sizeof(*pq->reqs) * hfi1_sdma_comp_ring_size; memsize = sizeof(*pq->reqs) * hfi1_sdma_comp_ring_size;
pq->reqs = kmalloc(memsize, GFP_KERNEL); pq->reqs = kzalloc(memsize, GFP_KERNEL);
if (!pq->reqs) if (!pq->reqs)
goto pq_reqs_nomem; goto pq_reqs_nomem;
INIT_LIST_HEAD(&pq->list); INIT_LIST_HEAD(&pq->list);
INIT_LIST_HEAD(&pq->iovec_list);
pq->dd = dd; pq->dd = dd;
pq->ctxt = uctxt->ctxt; pq->ctxt = uctxt->ctxt;
pq->subctxt = fd->subctxt; pq->subctxt = fd->subctxt;
pq->n_max_reqs = hfi1_sdma_comp_ring_size; pq->n_max_reqs = hfi1_sdma_comp_ring_size;
pq->state = SDMA_PKT_Q_INACTIVE; pq->state = SDMA_PKT_Q_INACTIVE;
atomic_set(&pq->n_reqs, 0); atomic_set(&pq->n_reqs, 0);
spin_lock_init(&pq->iovec_lock);
init_waitqueue_head(&pq->wait); init_waitqueue_head(&pq->wait);
iowait_init(&pq->busy, 0, NULL, defer_packet_queue, iowait_init(&pq->busy, 0, NULL, defer_packet_queue,
...@@ -447,6 +443,7 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd) ...@@ -447,6 +443,7 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd)
{ {
struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_user_sdma_pkt_q *pq; struct hfi1_user_sdma_pkt_q *pq;
struct user_sdma_iovec *iov;
unsigned long flags; unsigned long flags;
hfi1_cdbg(SDMA, "[%u:%u:%u] Freeing user SDMA queues", uctxt->dd->unit, hfi1_cdbg(SDMA, "[%u:%u:%u] Freeing user SDMA queues", uctxt->dd->unit,
...@@ -462,6 +459,15 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd) ...@@ -462,6 +459,15 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd)
wait_event_interruptible( wait_event_interruptible(
pq->wait, pq->wait,
(ACCESS_ONCE(pq->state) == SDMA_PKT_Q_INACTIVE)); (ACCESS_ONCE(pq->state) == SDMA_PKT_Q_INACTIVE));
/* Unpin any left over buffers. */
while (!list_empty(&pq->iovec_list)) {
spin_lock_irqsave(&pq->iovec_lock, flags);
iov = list_first_entry(&pq->iovec_list,
struct user_sdma_iovec, list);
list_del_init(&iov->list);
spin_unlock_irqrestore(&pq->iovec_lock, flags);
unpin_vector_pages(iov);
}
kfree(pq->reqs); kfree(pq->reqs);
kmem_cache_destroy(pq->txreq_cache); kmem_cache_destroy(pq->txreq_cache);
kfree(pq); kfree(pq);
...@@ -479,16 +485,17 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd) ...@@ -479,16 +485,17 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd)
int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
unsigned long dim, unsigned long *count) unsigned long dim, unsigned long *count)
{ {
int ret = 0, i = 0, sent; int ret = 0, i = 0;
struct hfi1_filedata *fd = fp->private_data; struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_user_sdma_pkt_q *pq = fd->pq; struct hfi1_user_sdma_pkt_q *pq = fd->pq;
struct hfi1_user_sdma_comp_q *cq = fd->cq; struct hfi1_user_sdma_comp_q *cq = fd->cq;
struct hfi1_devdata *dd = pq->dd; struct hfi1_devdata *dd = pq->dd;
unsigned long idx = 0; unsigned long idx = 0, flags;
u8 pcount = initial_pkt_count; u8 pcount = initial_pkt_count;
struct sdma_req_info info; struct sdma_req_info info;
struct user_sdma_request *req; struct user_sdma_request *req;
struct user_sdma_iovec *ioptr;
u8 opcode, sc, vl; u8 opcode, sc, vl;
if (iovec[idx].iov_len < sizeof(info) + sizeof(req->hdr)) { if (iovec[idx].iov_len < sizeof(info) + sizeof(req->hdr)) {
...@@ -505,9 +512,21 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, ...@@ -505,9 +512,21 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
dd->unit, uctxt->ctxt, fd->subctxt, ret); dd->unit, uctxt->ctxt, fd->subctxt, ret);
return -EFAULT; return -EFAULT;
} }
/* Process any completed vectors */
while (!list_empty(&pq->iovec_list)) {
spin_lock_irqsave(&pq->iovec_lock, flags);
ioptr = list_first_entry(&pq->iovec_list,
struct user_sdma_iovec, list);
list_del_init(&ioptr->list);
spin_unlock_irqrestore(&pq->iovec_lock, flags);
unpin_vector_pages(ioptr);
}
trace_hfi1_sdma_user_reqinfo(dd, uctxt->ctxt, fd->subctxt, trace_hfi1_sdma_user_reqinfo(dd, uctxt->ctxt, fd->subctxt,
(u16 *)&info); (u16 *)&info);
if (cq->comps[info.comp_idx].status == QUEUED) { if (cq->comps[info.comp_idx].status == QUEUED ||
test_bit(SDMA_REQ_IN_USE, &pq->reqs[info.comp_idx].flags)) {
hfi1_cdbg(SDMA, "[%u:%u:%u] Entry %u is in QUEUED state", hfi1_cdbg(SDMA, "[%u:%u:%u] Entry %u is in QUEUED state",
dd->unit, uctxt->ctxt, fd->subctxt, dd->unit, uctxt->ctxt, fd->subctxt,
info.comp_idx); info.comp_idx);
...@@ -534,10 +553,7 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, ...@@ -534,10 +553,7 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
req->cq = cq; req->cq = cq;
req->status = -1; req->status = -1;
INIT_LIST_HEAD(&req->txps); INIT_LIST_HEAD(&req->txps);
INIT_LIST_HEAD(&req->txcmp);
INIT_WORK(&req->worker, user_sdma_delayed_completion);
spin_lock_init(&req->txcmp_lock);
memcpy(&req->info, &info, sizeof(info)); memcpy(&req->info, &info, sizeof(info));
if (req_opcode(info.ctrl) == EXPECTED) if (req_opcode(info.ctrl) == EXPECTED)
...@@ -606,6 +622,7 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, ...@@ -606,6 +622,7 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
/* Save all the IO vector structures */ /* Save all the IO vector structures */
while (i < req->data_iovs) { while (i < req->data_iovs) {
INIT_LIST_HEAD(&req->iovs[i].list);
memcpy(&req->iovs[i].iov, iovec + idx++, sizeof(struct iovec)); memcpy(&req->iovs[i].iov, iovec + idx++, sizeof(struct iovec));
req->iovs[i].offset = 0; req->iovs[i].offset = 0;
req->data_len += req->iovs[i++].iov.iov_len; req->data_len += req->iovs[i++].iov.iov_len;
...@@ -671,21 +688,25 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, ...@@ -671,21 +688,25 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
} }
} }
set_comp_state(req, QUEUED, 0); set_comp_state(pq, cq, info.comp_idx, QUEUED, 0);
atomic_inc(&pq->n_reqs);
/* Send the first N packets in the request to buy us some time */ /* Send the first N packets in the request to buy us some time */
sent = user_sdma_send_pkts(req, pcount); ret = user_sdma_send_pkts(req, pcount);
if (unlikely(sent < 0)) { if (unlikely(ret < 0 && ret != -EBUSY)) {
if (sent != -EBUSY) { req->status = ret;
req->status = sent; atomic_dec(&pq->n_reqs);
set_comp_state(req, ERROR, req->status); goto free_req;
return sent;
} else
sent = 0;
} }
atomic_inc(&pq->n_reqs);
/*
* It is possible that the SDMA engine would have processed all the
* submitted packets by the time we get here. Therefore, only set
* packet queue state to ACTIVE if there are still uncompleted
* requests.
*/
if (atomic_read(&pq->n_reqs))
xchg(&pq->state, SDMA_PKT_Q_ACTIVE); xchg(&pq->state, SDMA_PKT_Q_ACTIVE);
if (sent < req->info.npkts) {
/* /*
* This is a somewhat blocking send implementation. * This is a somewhat blocking send implementation.
* The driver will block the caller until all packets of the * The driver will block the caller until all packets of the
...@@ -697,6 +718,7 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, ...@@ -697,6 +718,7 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
if (ret < 0) { if (ret < 0) {
if (ret != -EBUSY) { if (ret != -EBUSY) {
req->status = ret; req->status = ret;
set_bit(SDMA_REQ_DONE_ERROR, &req->flags);
return ret; return ret;
} }
wait_event_interruptible_timeout( wait_event_interruptible_timeout(
...@@ -705,13 +727,13 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, ...@@ -705,13 +727,13 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
msecs_to_jiffies( msecs_to_jiffies(
SDMA_IOWAIT_TIMEOUT)); SDMA_IOWAIT_TIMEOUT));
} }
}
} }
*count += idx; *count += idx;
return 0; return 0;
free_req: free_req:
user_sdma_free_request(req); user_sdma_free_request(req, true);
set_comp_state(pq, cq, info.comp_idx, ERROR, req->status);
return ret; return ret;
} }
...@@ -937,16 +959,8 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) ...@@ -937,16 +959,8 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
iovec->pages[pageidx], iovec->pages[pageidx],
offset, len); offset, len);
if (ret) { if (ret) {
int i;
SDMA_DBG(req, "SDMA txreq add page failed %d\n", SDMA_DBG(req, "SDMA txreq add page failed %d\n",
ret); ret);
/* Mark all assigned vectors as complete so they
* are unpinned in the callback. */
for (i = tx->idx; i >= 0; i--) {
tx->iovecs[i].flags |=
TXREQ_FLAGS_IOVEC_LAST_PKT;
}
goto free_txreq; goto free_txreq;
} }
iov_offset += len; iov_offset += len;
...@@ -1043,12 +1057,6 @@ static int pin_vector_pages(struct user_sdma_request *req, ...@@ -1043,12 +1057,6 @@ static int pin_vector_pages(struct user_sdma_request *req,
return -ENOMEM; return -ENOMEM;
} }
/*
* Get a reference to the process's mm so we can use it when
* unpinning the io vectors.
*/
req->pq->user_mm = get_task_mm(current);
pinned = hfi1_acquire_user_pages((unsigned long)iovec->iov.iov_base, pinned = hfi1_acquire_user_pages((unsigned long)iovec->iov.iov_base,
npages, 0, iovec->pages); npages, 0, iovec->pages);
...@@ -1058,34 +1066,20 @@ static int pin_vector_pages(struct user_sdma_request *req, ...@@ -1058,34 +1066,20 @@ static int pin_vector_pages(struct user_sdma_request *req,
iovec->npages = pinned; iovec->npages = pinned;
if (pinned != npages) { if (pinned != npages) {
SDMA_DBG(req, "Failed to pin pages (%d/%u)", pinned, npages); SDMA_DBG(req, "Failed to pin pages (%d/%u)", pinned, npages);
unpin_vector_pages(req, iovec); unpin_vector_pages(iovec);
return -EFAULT; return -EFAULT;
} }
/*
* Get a reference to the process's mm so we can use it when
* unpinning the io vectors.
*/
return 0; return 0;
} }
static void unpin_vector_pages(struct user_sdma_request *req, static void unpin_vector_pages(struct user_sdma_iovec *iovec)
struct user_sdma_iovec *iovec)
{ {
/*
* Unpinning is done through the workqueue so use the
* process's mm if we have a reference to it.
*/
if ((current->flags & PF_KTHREAD) && req->pq->user_mm)
use_mm(req->pq->user_mm);
hfi1_release_user_pages(iovec->pages, iovec->npages, 0); hfi1_release_user_pages(iovec->pages, iovec->npages, 0);
/*
* Unuse the user's mm (see above) and release the
* reference to it.
*/
if (req->pq->user_mm) {
if (current->flags & PF_KTHREAD)
unuse_mm(req->pq->user_mm);
mmput(req->pq->user_mm);
}
kfree(iovec->pages); kfree(iovec->pages);
iovec->pages = NULL; iovec->pages = NULL;
iovec->npages = 0; iovec->npages = 0;
...@@ -1365,18 +1359,17 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status, ...@@ -1365,18 +1359,17 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status,
struct user_sdma_txreq *tx = struct user_sdma_txreq *tx =
container_of(txreq, struct user_sdma_txreq, txreq); container_of(txreq, struct user_sdma_txreq, txreq);
struct user_sdma_request *req; struct user_sdma_request *req;
bool defer; struct hfi1_user_sdma_pkt_q *pq;
struct hfi1_user_sdma_comp_q *cq;
u16 idx;
int i; int i;
if (!tx->req) if (!tx->req)
return; return;
req = tx->req; req = tx->req;
/* pq = req->pq;
* If this is the callback for the last packet of the request, cq = req->cq;
* queue up the request for clean up.
*/
defer = (tx->seqnum == req->info.npkts - 1);
/* /*
* If we have any io vectors associated with this txreq, * If we have any io vectors associated with this txreq,
...@@ -1385,87 +1378,52 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status, ...@@ -1385,87 +1378,52 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status,
*/ */
for (i = tx->idx; i >= 0; i--) { for (i = tx->idx; i >= 0; i--) {
if (tx->iovecs[i].flags & TXREQ_FLAGS_IOVEC_LAST_PKT) { if (tx->iovecs[i].flags & TXREQ_FLAGS_IOVEC_LAST_PKT) {
defer = true; spin_lock(&pq->iovec_lock);
break; list_add_tail(&tx->iovecs[i].vec->list,
&pq->iovec_list);
spin_unlock(&pq->iovec_lock);
} }
} }
req->status = status;
if (status != SDMA_TXREQ_S_OK) { if (status != SDMA_TXREQ_S_OK) {
SDMA_DBG(req, "SDMA completion with error %d", SDMA_DBG(req, "SDMA completion with error %d",
status); status);
set_bit(SDMA_REQ_HAS_ERROR, &req->flags); set_bit(SDMA_REQ_HAS_ERROR, &req->flags);
defer = true;
} }
/* req->seqcomp = tx->seqnum;
* Defer the clean up of the iovectors and the request until later
* so it can be done outside of interrupt context.
*/
if (defer) {
spin_lock(&req->txcmp_lock);
list_add_tail(&tx->list, &req->txcmp);
spin_unlock(&req->txcmp_lock);
schedule_work(&req->worker);
} else {
kmem_cache_free(req->pq->txreq_cache, tx);
}
}
static void user_sdma_delayed_completion(struct work_struct *work)
{
struct user_sdma_request *req =
container_of(work, struct user_sdma_request, worker);
struct hfi1_user_sdma_pkt_q *pq = req->pq;
struct user_sdma_txreq *tx = NULL;
unsigned long flags;
u64 seqnum;
int i;
while (1) {
spin_lock_irqsave(&req->txcmp_lock, flags);
if (!list_empty(&req->txcmp)) {
tx = list_first_entry(&req->txcmp,
struct user_sdma_txreq, list);
list_del(&tx->list);
}
spin_unlock_irqrestore(&req->txcmp_lock, flags);
if (!tx)
break;
for (i = tx->idx; i >= 0; i--)
if (tx->iovecs[i].flags & TXREQ_FLAGS_IOVEC_LAST_PKT)
unpin_vector_pages(req, tx->iovecs[i].vec);
seqnum = tx->seqnum;
kmem_cache_free(pq->txreq_cache, tx); kmem_cache_free(pq->txreq_cache, tx);
tx = NULL; tx = NULL;
if (req->status != SDMA_TXREQ_S_OK) { idx = req->info.comp_idx;
if (seqnum == ACCESS_ONCE(req->seqnum) && if (req->status == -1 && status == SDMA_TXREQ_S_OK) {
test_bit(SDMA_REQ_DONE_ERROR, &req->flags)) { if (req->seqcomp == req->info.npkts - 1) {
atomic_dec(&pq->n_reqs); req->status = 0;
set_comp_state(req, ERROR, req->status); user_sdma_free_request(req, false);
user_sdma_free_request(req); pq_update(pq);
break; set_comp_state(pq, cq, idx, COMPLETE, 0);
} }
} else { } else {
if (seqnum == req->info.npkts - 1) { if (status != SDMA_TXREQ_S_OK)
atomic_dec(&pq->n_reqs); req->status = status;
set_comp_state(req, COMPLETE, 0); if (req->seqcomp == ACCESS_ONCE(req->seqnum) &&
user_sdma_free_request(req); test_bit(SDMA_REQ_DONE_ERROR, &req->flags)) {
break; user_sdma_free_request(req, false);
} pq_update(pq);
set_comp_state(pq, cq, idx, ERROR, req->status);
} }
} }
}
if (!atomic_read(&pq->n_reqs)) { static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq)
{
if (atomic_dec_and_test(&pq->n_reqs)) {
xchg(&pq->state, SDMA_PKT_Q_INACTIVE); xchg(&pq->state, SDMA_PKT_Q_INACTIVE);
wake_up(&pq->wait); wake_up(&pq->wait);
} }
} }
static void user_sdma_free_request(struct user_sdma_request *req) static void user_sdma_free_request(struct user_sdma_request *req, bool unpin)
{ {
if (!list_empty(&req->txps)) { if (!list_empty(&req->txps)) {
struct sdma_txreq *t, *p; struct sdma_txreq *t, *p;
...@@ -1478,26 +1436,27 @@ static void user_sdma_free_request(struct user_sdma_request *req) ...@@ -1478,26 +1436,27 @@ static void user_sdma_free_request(struct user_sdma_request *req)
kmem_cache_free(req->pq->txreq_cache, tx); kmem_cache_free(req->pq->txreq_cache, tx);
} }
} }
if (req->data_iovs) { if (req->data_iovs && unpin) {
int i; int i;
for (i = 0; i < req->data_iovs; i++) for (i = 0; i < req->data_iovs; i++)
if (req->iovs[i].npages && req->iovs[i].pages) if (req->iovs[i].npages && req->iovs[i].pages)
unpin_vector_pages(req, &req->iovs[i]); unpin_vector_pages(&req->iovs[i]);
} }
kfree(req->tids); kfree(req->tids);
clear_bit(SDMA_REQ_IN_USE, &req->flags); clear_bit(SDMA_REQ_IN_USE, &req->flags);
} }
static inline void set_comp_state(struct user_sdma_request *req, static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *pq,
enum hfi1_sdma_comp_state state, struct hfi1_user_sdma_comp_q *cq,
u16 idx, enum hfi1_sdma_comp_state state,
int ret) int ret)
{ {
SDMA_DBG(req, "Setting completion status %u %d", state, ret); hfi1_cdbg(SDMA, "[%u:%u:%u:%u] Setting completion status %u %d",
req->cq->comps[req->info.comp_idx].status = state; pq->dd->unit, pq->ctxt, pq->subctxt, idx, state, ret);
cq->comps[idx].status = state;
if (state == ERROR) if (state == ERROR)
req->cq->comps[req->info.comp_idx].errcode = -ret; cq->comps[idx].errcode = -ret;
trace_hfi1_sdma_user_completion(req->pq->dd, req->pq->ctxt, trace_hfi1_sdma_user_completion(pq->dd, pq->ctxt, pq->subctxt,
req->pq->subctxt, req->info.comp_idx, idx, state, ret);
state, ret);
} }
...@@ -69,7 +69,8 @@ struct hfi1_user_sdma_pkt_q { ...@@ -69,7 +69,8 @@ struct hfi1_user_sdma_pkt_q {
struct iowait busy; struct iowait busy;
unsigned state; unsigned state;
wait_queue_head_t wait; wait_queue_head_t wait;
struct mm_struct *user_mm; struct list_head iovec_list;
spinlock_t iovec_lock; /* protect iovec_list */
}; };
struct hfi1_user_sdma_comp_q { struct hfi1_user_sdma_comp_q {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment