Commit 3cae32b4 authored by Paolo Abeni's avatar Paolo Abeni

Merge branch 'separate-smc-parameter-settings-from-tcp-sysctls'

Wen Gu says:

====================
Separate SMC parameter settings from TCP sysctls

SMC shares some sysctls with TCP, but considering the difference
between these two protocols, it may not be very suitable for SMC
to reuse TCP parameter settings in some cases, such as keepalive
time or buffer size.

So this patch set aims to introduce some SMC specific sysctls to
independently and flexibly set the parameters that suit SMC.
====================

Link: https://lore.kernel.org/r/1663667542-119851-1-git-send-email-guwen@linux.alibaba.comSigned-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
parents 9b17dbd9 0227f058
...@@ -34,3 +34,28 @@ smcr_buf_type - INTEGER ...@@ -34,3 +34,28 @@ smcr_buf_type - INTEGER
- 1 - Use virtually contiguous buffers - 1 - Use virtually contiguous buffers
- 2 - Mixed use of the two types. Try physically contiguous buffers first. - 2 - Mixed use of the two types. Try physically contiguous buffers first.
If not available, use virtually contiguous buffers then. If not available, use virtually contiguous buffers then.
smcr_testlink_time - INTEGER
How frequently SMC-R link sends out TEST_LINK LLC messages to confirm
viability, after the last activity of connections on it. Value 0 means
disabling TEST_LINK.
Default: 30 seconds.
wmem - INTEGER
Initial size of send buffer used by SMC sockets.
The default value inherits from net.ipv4.tcp_wmem[1].
The minimum value is 16KiB and there is no hard limit for max value, but
only allowed 512KiB for SMC-R and 1MiB for SMC-D.
Default: 16K
rmem - INTEGER
Initial size of receive buffer (RMB) used by SMC sockets.
The default value inherits from net.ipv4.tcp_rmem[1].
The minimum value is 16KiB and there is no hard limit for max value, but
only allowed 512KiB for SMC-R and 1MiB for SMC-D.
Default: 128K
...@@ -19,5 +19,8 @@ struct netns_smc { ...@@ -19,5 +19,8 @@ struct netns_smc {
#endif #endif
unsigned int sysctl_autocorking_size; unsigned int sysctl_autocorking_size;
unsigned int sysctl_smcr_buf_type; unsigned int sysctl_smcr_buf_type;
int sysctl_smcr_testlink_time;
int sysctl_wmem;
int sysctl_rmem;
}; };
#endif #endif
...@@ -379,6 +379,8 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock, ...@@ -379,6 +379,8 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
sk->sk_state = SMC_INIT; sk->sk_state = SMC_INIT;
sk->sk_destruct = smc_destruct; sk->sk_destruct = smc_destruct;
sk->sk_protocol = protocol; sk->sk_protocol = protocol;
WRITE_ONCE(sk->sk_sndbuf, READ_ONCE(net->smc.sysctl_wmem));
WRITE_ONCE(sk->sk_rcvbuf, READ_ONCE(net->smc.sysctl_rmem));
smc = smc_sk(sk); smc = smc_sk(sk);
INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work); INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
INIT_WORK(&smc->connect_work, smc_connect_work); INIT_WORK(&smc->connect_work, smc_connect_work);
...@@ -3253,9 +3255,6 @@ static int __smc_create(struct net *net, struct socket *sock, int protocol, ...@@ -3253,9 +3255,6 @@ static int __smc_create(struct net *net, struct socket *sock, int protocol,
smc->clcsock = clcsock; smc->clcsock = clcsock;
} }
smc->sk.sk_sndbuf = max(smc->clcsock->sk->sk_sndbuf, SMC_BUF_MIN_SIZE);
smc->sk.sk_rcvbuf = max(smc->clcsock->sk->sk_rcvbuf, SMC_BUF_MIN_SIZE);
out: out:
return rc; return rc;
} }
......
...@@ -2307,10 +2307,10 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) ...@@ -2307,10 +2307,10 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
if (is_rmb) if (is_rmb)
/* use socket recv buffer size (w/o overhead) as start value */ /* use socket recv buffer size (w/o overhead) as start value */
sk_buf_size = smc->sk.sk_rcvbuf / 2; sk_buf_size = smc->sk.sk_rcvbuf;
else else
/* use socket send buffer size (w/o overhead) as start value */ /* use socket send buffer size (w/o overhead) as start value */
sk_buf_size = smc->sk.sk_sndbuf / 2; sk_buf_size = smc->sk.sk_sndbuf;
for (bufsize_short = smc_compress_bufsize(sk_buf_size, is_smcd, is_rmb); for (bufsize_short = smc_compress_bufsize(sk_buf_size, is_smcd, is_rmb);
bufsize_short >= 0; bufsize_short--) { bufsize_short >= 0; bufsize_short--) {
...@@ -2369,7 +2369,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) ...@@ -2369,7 +2369,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
if (is_rmb) { if (is_rmb) {
conn->rmb_desc = buf_desc; conn->rmb_desc = buf_desc;
conn->rmbe_size_short = bufsize_short; conn->rmbe_size_short = bufsize_short;
smc->sk.sk_rcvbuf = bufsize * 2; smc->sk.sk_rcvbuf = bufsize;
atomic_set(&conn->bytes_to_rcv, 0); atomic_set(&conn->bytes_to_rcv, 0);
conn->rmbe_update_limit = conn->rmbe_update_limit =
smc_rmb_wnd_update_limit(buf_desc->len); smc_rmb_wnd_update_limit(buf_desc->len);
...@@ -2377,7 +2377,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) ...@@ -2377,7 +2377,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */ smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */
} else { } else {
conn->sndbuf_desc = buf_desc; conn->sndbuf_desc = buf_desc;
smc->sk.sk_sndbuf = bufsize * 2; smc->sk.sk_sndbuf = bufsize;
atomic_set(&conn->sndbuf_space, bufsize); atomic_set(&conn->sndbuf_space, bufsize);
} }
return 0; return 0;
......
...@@ -2127,7 +2127,7 @@ void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc) ...@@ -2127,7 +2127,7 @@ void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc)
init_waitqueue_head(&lgr->llc_flow_waiter); init_waitqueue_head(&lgr->llc_flow_waiter);
init_waitqueue_head(&lgr->llc_msg_waiter); init_waitqueue_head(&lgr->llc_msg_waiter);
mutex_init(&lgr->llc_conf_mutex); mutex_init(&lgr->llc_conf_mutex);
lgr->llc_testlink_time = READ_ONCE(net->ipv4.sysctl_tcp_keepalive_time); lgr->llc_testlink_time = READ_ONCE(net->smc.sysctl_smcr_testlink_time);
} }
/* called after lgr was removed from lgr_list */ /* called after lgr was removed from lgr_list */
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#define SMC_LLC_WAIT_FIRST_TIME (5 * HZ) #define SMC_LLC_WAIT_FIRST_TIME (5 * HZ)
#define SMC_LLC_WAIT_TIME (2 * HZ) #define SMC_LLC_WAIT_TIME (2 * HZ)
#define SMC_LLC_TESTLINK_DEFAULT_TIME (30 * HZ)
enum smc_llc_reqresp { enum smc_llc_reqresp {
SMC_LLC_REQ, SMC_LLC_REQ,
......
...@@ -16,8 +16,12 @@ ...@@ -16,8 +16,12 @@
#include "smc.h" #include "smc.h"
#include "smc_core.h" #include "smc_core.h"
#include "smc_llc.h"
#include "smc_sysctl.h" #include "smc_sysctl.h"
static int min_sndbuf = SMC_BUF_MIN_SIZE;
static int min_rcvbuf = SMC_BUF_MIN_SIZE;
static struct ctl_table smc_table[] = { static struct ctl_table smc_table[] = {
{ {
.procname = "autocorking_size", .procname = "autocorking_size",
...@@ -35,6 +39,29 @@ static struct ctl_table smc_table[] = { ...@@ -35,6 +39,29 @@ static struct ctl_table smc_table[] = {
.extra1 = SYSCTL_ZERO, .extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_TWO, .extra2 = SYSCTL_TWO,
}, },
{
.procname = "smcr_testlink_time",
.data = &init_net.smc.sysctl_smcr_testlink_time,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
{
.procname = "wmem",
.data = &init_net.smc.sysctl_wmem,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &min_sndbuf,
},
{
.procname = "rmem",
.data = &init_net.smc.sysctl_rmem,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &min_rcvbuf,
},
{ } { }
}; };
...@@ -60,6 +87,9 @@ int __net_init smc_sysctl_net_init(struct net *net) ...@@ -60,6 +87,9 @@ int __net_init smc_sysctl_net_init(struct net *net)
net->smc.sysctl_autocorking_size = SMC_AUTOCORKING_DEFAULT_SIZE; net->smc.sysctl_autocorking_size = SMC_AUTOCORKING_DEFAULT_SIZE;
net->smc.sysctl_smcr_buf_type = SMCR_PHYS_CONT_BUFS; net->smc.sysctl_smcr_buf_type = SMCR_PHYS_CONT_BUFS;
net->smc.sysctl_smcr_testlink_time = SMC_LLC_TESTLINK_DEFAULT_TIME;
WRITE_ONCE(net->smc.sysctl_wmem, READ_ONCE(net->ipv4.sysctl_tcp_wmem[1]));
WRITE_ONCE(net->smc.sysctl_rmem, READ_ONCE(net->ipv4.sysctl_tcp_rmem[1]));
return 0; return 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment