Commit 356d1833 authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller

tcp: Namespace-ify sysctl_tcp_rmem and sysctl_tcp_wmem

Note that when a new netns is created, it inherits its
sysctl_tcp_rmem and sysctl_tcp_wmem from initial netns.

This change is needed so that we can refine TCP rcvbuf autotuning,
to take RTT into consideration.
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Cc: Wei Wang <weiwan@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent a3dcaf17
...@@ -155,6 +155,8 @@ struct netns_ipv4 { ...@@ -155,6 +155,8 @@ struct netns_ipv4 {
int sysctl_tcp_invalid_ratelimit; int sysctl_tcp_invalid_ratelimit;
int sysctl_tcp_pacing_ss_ratio; int sysctl_tcp_pacing_ss_ratio;
int sysctl_tcp_pacing_ca_ratio; int sysctl_tcp_pacing_ca_ratio;
int sysctl_tcp_wmem[3];
int sysctl_tcp_rmem[3];
struct inet_timewait_death_row tcp_death_row; struct inet_timewait_death_row tcp_death_row;
int sysctl_max_syn_backlog; int sysctl_max_syn_backlog;
int sysctl_tcp_fastopen; int sysctl_tcp_fastopen;
......
...@@ -242,8 +242,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); ...@@ -242,8 +242,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
/* sysctl variables for tcp */ /* sysctl variables for tcp */
extern int sysctl_tcp_max_orphans; extern int sysctl_tcp_max_orphans;
extern long sysctl_tcp_mem[3]; extern long sysctl_tcp_mem[3];
extern int sysctl_tcp_wmem[3];
extern int sysctl_tcp_rmem[3];
#define TCP_RACK_LOSS_DETECTION 0x1 /* Use RACK to detect losses */ #define TCP_RACK_LOSS_DETECTION 0x1 /* Use RACK to detect losses */
#define TCP_RACK_STATIC_REO_WND 0x2 /* Use static RACK reo wnd */ #define TCP_RACK_STATIC_REO_WND 0x2 /* Use static RACK reo wnd */
......
...@@ -440,22 +440,6 @@ static struct ctl_table ipv4_table[] = { ...@@ -440,22 +440,6 @@ static struct ctl_table ipv4_table[] = {
.mode = 0644, .mode = 0644,
.proc_handler = proc_doulongvec_minmax, .proc_handler = proc_doulongvec_minmax,
}, },
{
.procname = "tcp_wmem",
.data = &sysctl_tcp_wmem,
.maxlen = sizeof(sysctl_tcp_wmem),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &one,
},
{
.procname = "tcp_rmem",
.data = &sysctl_tcp_rmem,
.maxlen = sizeof(sysctl_tcp_rmem),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &one,
},
{ {
.procname = "tcp_low_latency", .procname = "tcp_low_latency",
.data = &sysctl_tcp_low_latency, .data = &sysctl_tcp_low_latency,
...@@ -1164,6 +1148,22 @@ static struct ctl_table ipv4_net_table[] = { ...@@ -1164,6 +1148,22 @@ static struct ctl_table ipv4_net_table[] = {
.extra1 = &zero, .extra1 = &zero,
.extra2 = &thousand, .extra2 = &thousand,
}, },
{
.procname = "tcp_wmem",
.data = &init_net.ipv4.sysctl_tcp_wmem,
.maxlen = sizeof(init_net.ipv4.sysctl_tcp_wmem),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &one,
},
{
.procname = "tcp_rmem",
.data = &init_net.ipv4.sysctl_tcp_rmem,
.maxlen = sizeof(init_net.ipv4.sysctl_tcp_rmem),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &one,
},
{ } { }
}; };
......
...@@ -289,12 +289,7 @@ struct percpu_counter tcp_orphan_count; ...@@ -289,12 +289,7 @@ struct percpu_counter tcp_orphan_count;
EXPORT_SYMBOL_GPL(tcp_orphan_count); EXPORT_SYMBOL_GPL(tcp_orphan_count);
long sysctl_tcp_mem[3] __read_mostly; long sysctl_tcp_mem[3] __read_mostly;
int sysctl_tcp_wmem[3] __read_mostly;
int sysctl_tcp_rmem[3] __read_mostly;
EXPORT_SYMBOL(sysctl_tcp_mem); EXPORT_SYMBOL(sysctl_tcp_mem);
EXPORT_SYMBOL(sysctl_tcp_rmem);
EXPORT_SYMBOL(sysctl_tcp_wmem);
atomic_long_t tcp_memory_allocated; /* Current allocated memory. */ atomic_long_t tcp_memory_allocated; /* Current allocated memory. */
EXPORT_SYMBOL(tcp_memory_allocated); EXPORT_SYMBOL(tcp_memory_allocated);
...@@ -456,8 +451,8 @@ void tcp_init_sock(struct sock *sk) ...@@ -456,8 +451,8 @@ void tcp_init_sock(struct sock *sk)
icsk->icsk_sync_mss = tcp_sync_mss; icsk->icsk_sync_mss = tcp_sync_mss;
sk->sk_sndbuf = sysctl_tcp_wmem[1]; sk->sk_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[1];
sk->sk_rcvbuf = sysctl_tcp_rmem[1]; sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1];
sk_sockets_allocated_inc(sk); sk_sockets_allocated_inc(sk);
} }
...@@ -3636,13 +3631,13 @@ void __init tcp_init(void) ...@@ -3636,13 +3631,13 @@ void __init tcp_init(void)
max_wshare = min(4UL*1024*1024, limit); max_wshare = min(4UL*1024*1024, limit);
max_rshare = min(6UL*1024*1024, limit); max_rshare = min(6UL*1024*1024, limit);
sysctl_tcp_wmem[0] = SK_MEM_QUANTUM; init_net.ipv4.sysctl_tcp_wmem[0] = SK_MEM_QUANTUM;
sysctl_tcp_wmem[1] = 16*1024; init_net.ipv4.sysctl_tcp_wmem[1] = 16*1024;
sysctl_tcp_wmem[2] = max(64*1024, max_wshare); init_net.ipv4.sysctl_tcp_wmem[2] = max(64*1024, max_wshare);
sysctl_tcp_rmem[0] = SK_MEM_QUANTUM; init_net.ipv4.sysctl_tcp_rmem[0] = SK_MEM_QUANTUM;
sysctl_tcp_rmem[1] = 87380; init_net.ipv4.sysctl_tcp_rmem[1] = 87380;
sysctl_tcp_rmem[2] = max(87380, max_rshare); init_net.ipv4.sysctl_tcp_rmem[2] = max(87380, max_rshare);
pr_info("Hash tables configured (established %u bind %u)\n", pr_info("Hash tables configured (established %u bind %u)\n",
tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size); tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size);
......
...@@ -320,7 +320,7 @@ static void tcp_sndbuf_expand(struct sock *sk) ...@@ -320,7 +320,7 @@ static void tcp_sndbuf_expand(struct sock *sk)
sndmem *= nr_segs * per_mss; sndmem *= nr_segs * per_mss;
if (sk->sk_sndbuf < sndmem) if (sk->sk_sndbuf < sndmem)
sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]); sk->sk_sndbuf = min(sndmem, sock_net(sk)->ipv4.sysctl_tcp_wmem[2]);
} }
/* 2. Tuning advertised window (window_clamp, rcv_ssthresh) /* 2. Tuning advertised window (window_clamp, rcv_ssthresh)
...@@ -354,7 +354,7 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb) ...@@ -354,7 +354,7 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
/* Optimize this! */ /* Optimize this! */
int truesize = tcp_win_from_space(sk, skb->truesize) >> 1; int truesize = tcp_win_from_space(sk, skb->truesize) >> 1;
int window = tcp_win_from_space(sk, sysctl_tcp_rmem[2]) >> 1; int window = tcp_win_from_space(sk, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1;
while (tp->rcv_ssthresh <= window) { while (tp->rcv_ssthresh <= window) {
if (truesize <= skb->len) if (truesize <= skb->len)
...@@ -409,7 +409,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk) ...@@ -409,7 +409,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk)
rcvmem <<= 2; rcvmem <<= 2;
if (sk->sk_rcvbuf < rcvmem) if (sk->sk_rcvbuf < rcvmem)
sk->sk_rcvbuf = min(rcvmem, sysctl_tcp_rmem[2]); sk->sk_rcvbuf = min(rcvmem, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
} }
/* 4. Try to fixup all. It is made immediately after connection enters /* 4. Try to fixup all. It is made immediately after connection enters
...@@ -457,15 +457,16 @@ static void tcp_clamp_window(struct sock *sk) ...@@ -457,15 +457,16 @@ static void tcp_clamp_window(struct sock *sk)
{ {
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk); struct inet_connection_sock *icsk = inet_csk(sk);
struct net *net = sock_net(sk);
icsk->icsk_ack.quick = 0; icsk->icsk_ack.quick = 0;
if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] && if (sk->sk_rcvbuf < net->ipv4.sysctl_tcp_rmem[2] &&
!(sk->sk_userlocks & SOCK_RCVBUF_LOCK) && !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
!tcp_under_memory_pressure(sk) && !tcp_under_memory_pressure(sk) &&
sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) { sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc), sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
sysctl_tcp_rmem[2]); net->ipv4.sysctl_tcp_rmem[2]);
} }
if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss); tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss);
...@@ -623,7 +624,8 @@ void tcp_rcv_space_adjust(struct sock *sk) ...@@ -623,7 +624,8 @@ void tcp_rcv_space_adjust(struct sock *sk)
while (tcp_win_from_space(sk, rcvmem) < tp->advmss) while (tcp_win_from_space(sk, rcvmem) < tp->advmss)
rcvmem += 128; rcvmem += 128;
rcvbuf = min(rcvwin / tp->advmss * rcvmem, sysctl_tcp_rmem[2]); rcvbuf = min(rcvwin / tp->advmss * rcvmem,
sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
if (rcvbuf > sk->sk_rcvbuf) { if (rcvbuf > sk->sk_rcvbuf) {
sk->sk_rcvbuf = rcvbuf; sk->sk_rcvbuf = rcvbuf;
......
...@@ -2409,8 +2409,8 @@ struct proto tcp_prot = { ...@@ -2409,8 +2409,8 @@ struct proto tcp_prot = {
.memory_allocated = &tcp_memory_allocated, .memory_allocated = &tcp_memory_allocated,
.memory_pressure = &tcp_memory_pressure, .memory_pressure = &tcp_memory_pressure,
.sysctl_mem = sysctl_tcp_mem, .sysctl_mem = sysctl_tcp_mem,
.sysctl_wmem = sysctl_tcp_wmem, .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
.sysctl_rmem = sysctl_tcp_rmem, .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
.max_header = MAX_TCP_HEADER, .max_header = MAX_TCP_HEADER,
.obj_size = sizeof(struct tcp_sock), .obj_size = sizeof(struct tcp_sock),
.slab_flags = SLAB_TYPESAFE_BY_RCU, .slab_flags = SLAB_TYPESAFE_BY_RCU,
...@@ -2509,7 +2509,14 @@ static int __net_init tcp_sk_init(struct net *net) ...@@ -2509,7 +2509,14 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_invalid_ratelimit = HZ/2; net->ipv4.sysctl_tcp_invalid_ratelimit = HZ/2;
net->ipv4.sysctl_tcp_pacing_ss_ratio = 200; net->ipv4.sysctl_tcp_pacing_ss_ratio = 200;
net->ipv4.sysctl_tcp_pacing_ca_ratio = 120; net->ipv4.sysctl_tcp_pacing_ca_ratio = 120;
if (net != &init_net) {
memcpy(net->ipv4.sysctl_tcp_rmem,
init_net.ipv4.sysctl_tcp_rmem,
sizeof(init_net.ipv4.sysctl_tcp_rmem));
memcpy(net->ipv4.sysctl_tcp_wmem,
init_net.ipv4.sysctl_tcp_wmem,
sizeof(init_net.ipv4.sysctl_tcp_wmem));
}
net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock); spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock);
net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60; net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60;
......
...@@ -220,7 +220,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss, ...@@ -220,7 +220,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
(*rcv_wscale) = 0; (*rcv_wscale) = 0;
if (wscale_ok) { if (wscale_ok) {
/* Set window scaling on max possible window */ /* Set window scaling on max possible window */
space = max_t(u32, space, sysctl_tcp_rmem[2]); space = max_t(u32, space, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
space = max_t(u32, space, sysctl_rmem_max); space = max_t(u32, space, sysctl_rmem_max);
space = min_t(u32, space, *window_clamp); space = min_t(u32, space, *window_clamp);
while (space > U16_MAX && (*rcv_wscale) < TCP_MAX_WSCALE) { while (space > U16_MAX && (*rcv_wscale) < TCP_MAX_WSCALE) {
......
...@@ -1940,8 +1940,8 @@ struct proto tcpv6_prot = { ...@@ -1940,8 +1940,8 @@ struct proto tcpv6_prot = {
.memory_pressure = &tcp_memory_pressure, .memory_pressure = &tcp_memory_pressure,
.orphan_count = &tcp_orphan_count, .orphan_count = &tcp_orphan_count,
.sysctl_mem = sysctl_tcp_mem, .sysctl_mem = sysctl_tcp_mem,
.sysctl_wmem = sysctl_tcp_wmem, .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
.sysctl_rmem = sysctl_tcp_rmem, .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
.max_header = MAX_TCP_HEADER, .max_header = MAX_TCP_HEADER,
.obj_size = sizeof(struct tcp6_sock), .obj_size = sizeof(struct tcp6_sock),
.slab_flags = SLAB_TYPESAFE_BY_RCU, .slab_flags = SLAB_TYPESAFE_BY_RCU,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment