Commit 68a0bd67 authored by Jakub Kicinski's avatar Jakub Kicinski

Merge branch 'net-smc-send-and-write-inline-optimization-for-smc'

Guangguan Wang says:

====================
net/smc: send and write inline optimization for smc

Send cdc msgs and write data inline if qp has sufficent inline
space, helps latency reducing.

In my test environment, which are 2 VMs running on the same
physical host and whose NICs(ConnectX-4Lx) are working on
SR-IOV mode, qperf shows 0.4us-1.3us improvement in latency.

Test command:
server: smc_run taskset -c 1 qperf
client: smc_run taskset -c 1 qperf <server ip> -oo \
		msg_size:1:2K:*2 -t 30 -vu tcp_lat

The results shown below:
msgsize     before       after
1B          11.9 us      10.6 us (-1.3 us)
2B          11.7 us      10.7 us (-1.0 us)
4B          11.7 us      10.7 us (-1.0 us)
8B          11.6 us      10.6 us (-1.0 us)
16B         11.7 us      10.7 us (-1.0 us)
32B         11.7 us      10.6 us (-1.1 us)
64B         11.7 us      11.2 us (-0.5 us)
128B        11.6 us      11.2 us (-0.4 us)
256B        11.8 us      11.2 us (-0.6 us)
512B        11.8 us      11.3 us (-0.5 us)
1KB         11.9 us      11.5 us (-0.4 us)
2KB         12.1 us      11.5 us (-0.6 us)
====================

Link: https://lore.kernel.org/r/20220516055137.51873-1-guangguan.wang@linux.alibaba.comSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 65a9dedc 793a7df6
...@@ -671,6 +671,7 @@ int smc_ib_create_queue_pair(struct smc_link *lnk) ...@@ -671,6 +671,7 @@ int smc_ib_create_queue_pair(struct smc_link *lnk)
.max_recv_wr = SMC_WR_BUF_CNT * 3, .max_recv_wr = SMC_WR_BUF_CNT * 3,
.max_send_sge = SMC_IB_MAX_SEND_SGE, .max_send_sge = SMC_IB_MAX_SEND_SGE,
.max_recv_sge = sges_per_buf, .max_recv_sge = sges_per_buf,
.max_inline_data = 0,
}, },
.sq_sig_type = IB_SIGNAL_REQ_WR, .sq_sig_type = IB_SIGNAL_REQ_WR,
.qp_type = IB_QPT_RC, .qp_type = IB_QPT_RC,
......
...@@ -391,12 +391,20 @@ static int smcr_tx_rdma_writes(struct smc_connection *conn, size_t len, ...@@ -391,12 +391,20 @@ static int smcr_tx_rdma_writes(struct smc_connection *conn, size_t len,
int rc; int rc;
for (dstchunk = 0; dstchunk < 2; dstchunk++) { for (dstchunk = 0; dstchunk < 2; dstchunk++) {
struct ib_sge *sge = struct ib_rdma_wr *wr = &wr_rdma_buf->wr_tx_rdma[dstchunk];
wr_rdma_buf->wr_tx_rdma[dstchunk].wr.sg_list; struct ib_sge *sge = wr->wr.sg_list;
u64 base_addr = dma_addr;
if (dst_len < link->qp_attr.cap.max_inline_data) {
base_addr = (uintptr_t)conn->sndbuf_desc->cpu_addr;
wr->wr.send_flags |= IB_SEND_INLINE;
} else {
wr->wr.send_flags &= ~IB_SEND_INLINE;
}
num_sges = 0; num_sges = 0;
for (srcchunk = 0; srcchunk < 2; srcchunk++) { for (srcchunk = 0; srcchunk < 2; srcchunk++) {
sge[srcchunk].addr = dma_addr + src_off; sge[srcchunk].addr = base_addr + src_off;
sge[srcchunk].length = src_len; sge[srcchunk].length = src_len;
num_sges++; num_sges++;
...@@ -410,8 +418,7 @@ static int smcr_tx_rdma_writes(struct smc_connection *conn, size_t len, ...@@ -410,8 +418,7 @@ static int smcr_tx_rdma_writes(struct smc_connection *conn, size_t len,
src_len = dst_len - src_len; /* remainder */ src_len = dst_len - src_len; /* remainder */
src_len_sum += src_len; src_len_sum += src_len;
} }
rc = smc_tx_rdma_write(conn, dst_off, num_sges, rc = smc_tx_rdma_write(conn, dst_off, num_sges, wr);
&wr_rdma_buf->wr_tx_rdma[dstchunk]);
if (rc) if (rc)
return rc; return rc;
if (dst_len_sum == len) if (dst_len_sum == len)
......
...@@ -554,10 +554,11 @@ void smc_wr_remember_qp_attr(struct smc_link *lnk) ...@@ -554,10 +554,11 @@ void smc_wr_remember_qp_attr(struct smc_link *lnk)
static void smc_wr_init_sge(struct smc_link *lnk) static void smc_wr_init_sge(struct smc_link *lnk)
{ {
int sges_per_buf = (lnk->lgr->smc_version == SMC_V2) ? 2 : 1; int sges_per_buf = (lnk->lgr->smc_version == SMC_V2) ? 2 : 1;
bool send_inline = (lnk->qp_attr.cap.max_inline_data > SMC_WR_TX_SIZE);
u32 i; u32 i;
for (i = 0; i < lnk->wr_tx_cnt; i++) { for (i = 0; i < lnk->wr_tx_cnt; i++) {
lnk->wr_tx_sges[i].addr = lnk->wr_tx_sges[i].addr = send_inline ? (uintptr_t)(&lnk->wr_tx_bufs[i]) :
lnk->wr_tx_dma_addr + i * SMC_WR_BUF_SIZE; lnk->wr_tx_dma_addr + i * SMC_WR_BUF_SIZE;
lnk->wr_tx_sges[i].length = SMC_WR_TX_SIZE; lnk->wr_tx_sges[i].length = SMC_WR_TX_SIZE;
lnk->wr_tx_sges[i].lkey = lnk->roce_pd->local_dma_lkey; lnk->wr_tx_sges[i].lkey = lnk->roce_pd->local_dma_lkey;
...@@ -575,6 +576,8 @@ static void smc_wr_init_sge(struct smc_link *lnk) ...@@ -575,6 +576,8 @@ static void smc_wr_init_sge(struct smc_link *lnk)
lnk->wr_tx_ibs[i].opcode = IB_WR_SEND; lnk->wr_tx_ibs[i].opcode = IB_WR_SEND;
lnk->wr_tx_ibs[i].send_flags = lnk->wr_tx_ibs[i].send_flags =
IB_SEND_SIGNALED | IB_SEND_SOLICITED; IB_SEND_SIGNALED | IB_SEND_SOLICITED;
if (send_inline)
lnk->wr_tx_ibs[i].send_flags |= IB_SEND_INLINE;
lnk->wr_tx_rdmas[i].wr_tx_rdma[0].wr.opcode = IB_WR_RDMA_WRITE; lnk->wr_tx_rdmas[i].wr_tx_rdma[0].wr.opcode = IB_WR_RDMA_WRITE;
lnk->wr_tx_rdmas[i].wr_tx_rdma[1].wr.opcode = IB_WR_RDMA_WRITE; lnk->wr_tx_rdmas[i].wr_tx_rdma[1].wr.opcode = IB_WR_RDMA_WRITE;
lnk->wr_tx_rdmas[i].wr_tx_rdma[0].wr.sg_list = lnk->wr_tx_rdmas[i].wr_tx_rdma[0].wr.sg_list =
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment