Commit 2ece9ec6 authored by Jack Wang's avatar Jack Wang Committed by Jason Gunthorpe

RDMA/rtrs-clt: Write path fast memory registration

With fast memory registration in write path, we can reduce
the memory consumption by using less max_send_sge, support IO bigger
than 116 KB (29 segments * 4 KB) without splitting, and it also
make the IO path more symmetric.

To avoid some times MR reg failed, waiting for the invalidation to finish
before the new mr reg. Introduce a refcount, only finish the request
when both local invalidation and io reply are there.

Link: https://lore.kernel.org/r/20210621055340.11789-3-jinpu.wang@ionos.comSigned-off-by: default avatarJack Wang <jinpu.wang@cloud.ionos.com>
Signed-off-by: default avatarMd Haris Iqbal <haris.iqbal@ionos.com>
Signed-off-by: default avatarDima Stepanov <dmitrii.stepanov@ionos.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@nvidia.com>
parent 630e438f
...@@ -412,6 +412,7 @@ static void complete_rdma_req(struct rtrs_clt_io_req *req, int errno, ...@@ -412,6 +412,7 @@ static void complete_rdma_req(struct rtrs_clt_io_req *req, int errno,
req->inv_errno = errno; req->inv_errno = errno;
} }
refcount_inc(&req->ref);
err = rtrs_inv_rkey(req); err = rtrs_inv_rkey(req);
if (unlikely(err)) { if (unlikely(err)) {
rtrs_err(con->c.sess, "Send INV WR key=%#x: %d\n", rtrs_err(con->c.sess, "Send INV WR key=%#x: %d\n",
...@@ -427,10 +428,14 @@ static void complete_rdma_req(struct rtrs_clt_io_req *req, int errno, ...@@ -427,10 +428,14 @@ static void complete_rdma_req(struct rtrs_clt_io_req *req, int errno,
return; return;
} }
if (!refcount_dec_and_test(&req->ref))
return;
} }
ib_dma_unmap_sg(sess->s.dev->ib_dev, req->sglist, ib_dma_unmap_sg(sess->s.dev->ib_dev, req->sglist,
req->sg_cnt, req->dir); req->sg_cnt, req->dir);
} }
if (!refcount_dec_and_test(&req->ref))
return;
if (sess->clt->mp_policy == MP_POLICY_MIN_INFLIGHT) if (sess->clt->mp_policy == MP_POLICY_MIN_INFLIGHT)
atomic_dec(&sess->stats->inflight); atomic_dec(&sess->stats->inflight);
...@@ -438,10 +443,9 @@ static void complete_rdma_req(struct rtrs_clt_io_req *req, int errno, ...@@ -438,10 +443,9 @@ static void complete_rdma_req(struct rtrs_clt_io_req *req, int errno,
req->con = NULL; req->con = NULL;
if (errno) { if (errno) {
rtrs_err_rl(con->c.sess, rtrs_err_rl(con->c.sess, "IO request failed: error=%d path=%s [%s:%u] notify=%d\n",
"IO request failed: error=%d path=%s [%s:%u]\n",
errno, kobject_name(&sess->kobj), sess->hca_name, errno, kobject_name(&sess->kobj), sess->hca_name,
sess->hca_port); sess->hca_port, notify);
} }
if (notify) if (notify)
...@@ -956,6 +960,7 @@ static void rtrs_clt_init_req(struct rtrs_clt_io_req *req, ...@@ -956,6 +960,7 @@ static void rtrs_clt_init_req(struct rtrs_clt_io_req *req,
req->need_inv = false; req->need_inv = false;
req->need_inv_comp = false; req->need_inv_comp = false;
req->inv_errno = 0; req->inv_errno = 0;
refcount_set(&req->ref, 1);
iov_iter_kvec(&iter, READ, vec, 1, usr_len); iov_iter_kvec(&iter, READ, vec, 1, usr_len);
len = _copy_from_iter(req->iu->buf, usr_len, &iter); len = _copy_from_iter(req->iu->buf, usr_len, &iter);
...@@ -1000,7 +1005,7 @@ rtrs_clt_get_copy_req(struct rtrs_clt_sess *alive_sess, ...@@ -1000,7 +1005,7 @@ rtrs_clt_get_copy_req(struct rtrs_clt_sess *alive_sess,
static int rtrs_post_rdma_write_sg(struct rtrs_clt_con *con, static int rtrs_post_rdma_write_sg(struct rtrs_clt_con *con,
struct rtrs_clt_io_req *req, struct rtrs_clt_io_req *req,
struct rtrs_rbuf *rbuf, struct rtrs_rbuf *rbuf, bool fr_en,
u32 size, u32 imm, struct ib_send_wr *wr, u32 size, u32 imm, struct ib_send_wr *wr,
struct ib_send_wr *tail) struct ib_send_wr *tail)
{ {
...@@ -1012,17 +1017,26 @@ static int rtrs_post_rdma_write_sg(struct rtrs_clt_con *con, ...@@ -1012,17 +1017,26 @@ static int rtrs_post_rdma_write_sg(struct rtrs_clt_con *con,
int i; int i;
struct ib_send_wr *ptail = NULL; struct ib_send_wr *ptail = NULL;
if (fr_en) {
i = 0;
sge[i].addr = req->mr->iova;
sge[i].length = req->mr->length;
sge[i].lkey = req->mr->lkey;
i++;
num_sge = 2;
ptail = tail;
} else {
for_each_sg(req->sglist, sg, req->sg_cnt, i) { for_each_sg(req->sglist, sg, req->sg_cnt, i) {
sge[i].addr = sg_dma_address(sg); sge[i].addr = sg_dma_address(sg);
sge[i].length = sg_dma_len(sg); sge[i].length = sg_dma_len(sg);
sge[i].lkey = sess->s.dev->ib_pd->local_dma_lkey; sge[i].lkey = sess->s.dev->ib_pd->local_dma_lkey;
} }
num_sge = 1 + req->sg_cnt;
}
sge[i].addr = req->iu->dma_addr; sge[i].addr = req->iu->dma_addr;
sge[i].length = size; sge[i].length = size;
sge[i].lkey = sess->s.dev->ib_pd->local_dma_lkey; sge[i].lkey = sess->s.dev->ib_pd->local_dma_lkey;
num_sge = 1 + req->sg_cnt;
/* /*
* From time to time we have to post signalled sends, * From time to time we have to post signalled sends,
* or send queue will fill up and only QP reset can help. * or send queue will fill up and only QP reset can help.
...@@ -1038,6 +1052,21 @@ static int rtrs_post_rdma_write_sg(struct rtrs_clt_con *con, ...@@ -1038,6 +1052,21 @@ static int rtrs_post_rdma_write_sg(struct rtrs_clt_con *con,
flags, wr, ptail); flags, wr, ptail);
} }
static int rtrs_map_sg_fr(struct rtrs_clt_io_req *req, size_t count)
{
int nr;
/* Align the MR to a 4K page size to match the block virt boundary */
nr = ib_map_mr_sg(req->mr, req->sglist, count, NULL, SZ_4K);
if (nr < 0)
return nr;
if (unlikely(nr < req->sg_cnt))
return -EINVAL;
ib_update_fast_reg_key(req->mr, ib_inc_rkey(req->mr->rkey));
return nr;
}
static int rtrs_clt_write_req(struct rtrs_clt_io_req *req) static int rtrs_clt_write_req(struct rtrs_clt_io_req *req)
{ {
struct rtrs_clt_con *con = req->con; struct rtrs_clt_con *con = req->con;
...@@ -1048,6 +1077,10 @@ static int rtrs_clt_write_req(struct rtrs_clt_io_req *req) ...@@ -1048,6 +1077,10 @@ static int rtrs_clt_write_req(struct rtrs_clt_io_req *req)
struct rtrs_rbuf *rbuf; struct rtrs_rbuf *rbuf;
int ret, count = 0; int ret, count = 0;
u32 imm, buf_id; u32 imm, buf_id;
struct ib_reg_wr rwr;
struct ib_send_wr inv_wr;
struct ib_send_wr *wr = NULL;
bool fr_en = false;
const size_t tsize = sizeof(*msg) + req->data_len + req->usr_len; const size_t tsize = sizeof(*msg) + req->data_len + req->usr_len;
...@@ -1076,15 +1109,43 @@ static int rtrs_clt_write_req(struct rtrs_clt_io_req *req) ...@@ -1076,15 +1109,43 @@ static int rtrs_clt_write_req(struct rtrs_clt_io_req *req)
req->sg_size = tsize; req->sg_size = tsize;
rbuf = &sess->rbufs[buf_id]; rbuf = &sess->rbufs[buf_id];
if (count) {
ret = rtrs_map_sg_fr(req, count);
if (ret < 0) {
rtrs_err_rl(s,
"Write request failed, failed to map fast reg. data, err: %d\n",
ret);
ib_dma_unmap_sg(sess->s.dev->ib_dev, req->sglist,
req->sg_cnt, req->dir);
return ret;
}
inv_wr = (struct ib_send_wr) {
.opcode = IB_WR_LOCAL_INV,
.wr_cqe = &req->inv_cqe,
.send_flags = IB_SEND_SIGNALED,
.ex.invalidate_rkey = req->mr->rkey,
};
req->inv_cqe.done = rtrs_clt_inv_rkey_done;
rwr = (struct ib_reg_wr) {
.wr.opcode = IB_WR_REG_MR,
.wr.wr_cqe = &fast_reg_cqe,
.mr = req->mr,
.key = req->mr->rkey,
.access = (IB_ACCESS_LOCAL_WRITE),
};
wr = &rwr.wr;
fr_en = true;
refcount_inc(&req->ref);
}
/* /*
* Update stats now, after request is successfully sent it is not * Update stats now, after request is successfully sent it is not
* safe anymore to touch it. * safe anymore to touch it.
*/ */
rtrs_clt_update_all_stats(req, WRITE); rtrs_clt_update_all_stats(req, WRITE);
ret = rtrs_post_rdma_write_sg(req->con, req, rbuf, ret = rtrs_post_rdma_write_sg(req->con, req, rbuf, fr_en,
req->usr_len + sizeof(*msg), req->usr_len + sizeof(*msg),
imm, NULL, NULL); imm, wr, &inv_wr);
if (unlikely(ret)) { if (unlikely(ret)) {
rtrs_err_rl(s, rtrs_err_rl(s,
"Write request failed: error=%d path=%s [%s:%u]\n", "Write request failed: error=%d path=%s [%s:%u]\n",
...@@ -1100,21 +1161,6 @@ static int rtrs_clt_write_req(struct rtrs_clt_io_req *req) ...@@ -1100,21 +1161,6 @@ static int rtrs_clt_write_req(struct rtrs_clt_io_req *req)
return ret; return ret;
} }
static int rtrs_map_sg_fr(struct rtrs_clt_io_req *req, size_t count)
{
int nr;
/* Align the MR to a 4K page size to match the block virt boundary */
nr = ib_map_mr_sg(req->mr, req->sglist, count, NULL, SZ_4K);
if (nr < 0)
return nr;
if (unlikely(nr < req->sg_cnt))
return -EINVAL;
ib_update_fast_reg_key(req->mr, ib_inc_rkey(req->mr->rkey));
return nr;
}
static int rtrs_clt_read_req(struct rtrs_clt_io_req *req) static int rtrs_clt_read_req(struct rtrs_clt_io_req *req)
{ {
struct rtrs_clt_con *con = req->con; struct rtrs_clt_con *con = req->con;
......
...@@ -116,6 +116,7 @@ struct rtrs_clt_io_req { ...@@ -116,6 +116,7 @@ struct rtrs_clt_io_req {
int inv_errno; int inv_errno;
bool need_inv_comp; bool need_inv_comp;
bool need_inv; bool need_inv;
refcount_t ref;
}; };
struct rtrs_rbuf { struct rtrs_rbuf {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment