Commit 871da0a7 authored by David S. Miller's avatar David S. Miller

Merge branch 'tcp-more-perns-sysctls'

Eric Dumazet says:

====================
tcp: move 12 sysctls to namespaces

Ideally all TCP sysctls should be per netns.
This patch series takes care of 12 sysctls.

Remains the ones that need discussion :

sysctl_tcp_mem, sysctl_tcp_rmem, sysctl_tcp_wmem, and sysctl_tcp_max_orphans
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 267146d4 c26e91f8
...@@ -142,6 +142,18 @@ struct netns_ipv4 { ...@@ -142,6 +142,18 @@ struct netns_ipv4 {
int sysctl_tcp_app_win; int sysctl_tcp_app_win;
int sysctl_tcp_adv_win_scale; int sysctl_tcp_adv_win_scale;
int sysctl_tcp_frto; int sysctl_tcp_frto;
int sysctl_tcp_nometrics_save;
int sysctl_tcp_moderate_rcvbuf;
int sysctl_tcp_tso_win_divisor;
int sysctl_tcp_workaround_signed_windows;
int sysctl_tcp_limit_output_bytes;
int sysctl_tcp_challenge_ack_limit;
int sysctl_tcp_min_tso_segs;
int sysctl_tcp_min_rtt_wlen;
int sysctl_tcp_autocorking;
int sysctl_tcp_invalid_ratelimit;
int sysctl_tcp_pacing_ss_ratio;
int sysctl_tcp_pacing_ca_ratio;
struct inet_timewait_death_row tcp_death_row; struct inet_timewait_death_row tcp_death_row;
int sysctl_max_syn_backlog; int sysctl_max_syn_backlog;
int sysctl_tcp_fastopen; int sysctl_tcp_fastopen;
......
...@@ -247,22 +247,9 @@ extern int sysctl_tcp_max_orphans; ...@@ -247,22 +247,9 @@ extern int sysctl_tcp_max_orphans;
extern long sysctl_tcp_mem[3]; extern long sysctl_tcp_mem[3];
extern int sysctl_tcp_wmem[3]; extern int sysctl_tcp_wmem[3];
extern int sysctl_tcp_rmem[3]; extern int sysctl_tcp_rmem[3];
extern int sysctl_tcp_nometrics_save;
extern int sysctl_tcp_moderate_rcvbuf;
extern int sysctl_tcp_tso_win_divisor;
extern int sysctl_tcp_workaround_signed_windows;
#define TCP_RACK_LOSS_DETECTION 0x1 /* Use RACK to detect losses */ #define TCP_RACK_LOSS_DETECTION 0x1 /* Use RACK to detect losses */
extern int sysctl_tcp_limit_output_bytes;
extern int sysctl_tcp_challenge_ack_limit;
extern int sysctl_tcp_min_tso_segs;
extern int sysctl_tcp_min_rtt_wlen;
extern int sysctl_tcp_autocorking;
extern int sysctl_tcp_invalid_ratelimit;
extern int sysctl_tcp_pacing_ss_ratio;
extern int sysctl_tcp_pacing_ca_ratio;
extern atomic_long_t tcp_memory_allocated; extern atomic_long_t tcp_memory_allocated;
extern struct percpu_counter tcp_sockets_allocated; extern struct percpu_counter tcp_sockets_allocated;
extern unsigned long tcp_memory_pressure; extern unsigned long tcp_memory_pressure;
...@@ -1305,7 +1292,8 @@ static inline void tcp_slow_start_after_idle_check(struct sock *sk) ...@@ -1305,7 +1292,8 @@ static inline void tcp_slow_start_after_idle_check(struct sock *sk)
} }
/* Determine a window scaling and initial window to offer. */ /* Determine a window scaling and initial window to offer. */
void tcp_select_initial_window(int __space, __u32 mss, __u32 *rcv_wnd, void tcp_select_initial_window(const struct sock *sk, int __space,
__u32 mss, __u32 *rcv_wnd,
__u32 *window_clamp, int wscale_ok, __u32 *window_clamp, int wscale_ok,
__u8 *rcv_wscale, __u32 init_rcv_wnd); __u8 *rcv_wscale, __u32 init_rcv_wnd);
......
...@@ -385,7 +385,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) ...@@ -385,7 +385,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
/* Try to redo what tcp_v4_send_synack did. */ /* Try to redo what tcp_v4_send_synack did. */
req->rsk_window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW); req->rsk_window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW);
tcp_select_initial_window(tcp_full_space(sk), req->mss, tcp_select_initial_window(sk, tcp_full_space(sk), req->mss,
&req->rsk_rcv_wnd, &req->rsk_window_clamp, &req->rsk_rcv_wnd, &req->rsk_window_clamp,
ireq->wscale_ok, &rcv_wscale, ireq->wscale_ok, &rcv_wscale,
dst_metric(&rt->dst, RTAX_INITRWND)); dst_metric(&rt->dst, RTAX_INITRWND));
......
...@@ -437,13 +437,6 @@ static struct ctl_table ipv4_table[] = { ...@@ -437,13 +437,6 @@ static struct ctl_table ipv4_table[] = {
.proc_handler = proc_dointvec_minmax, .proc_handler = proc_dointvec_minmax,
.extra1 = &one, .extra1 = &one,
}, },
{
.procname = "tcp_min_rtt_wlen",
.data = &sysctl_tcp_min_rtt_wlen,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{ {
.procname = "tcp_low_latency", .procname = "tcp_low_latency",
.data = &sysctl_tcp_low_latency, .data = &sysctl_tcp_low_latency,
...@@ -451,54 +444,12 @@ static struct ctl_table ipv4_table[] = { ...@@ -451,54 +444,12 @@ static struct ctl_table ipv4_table[] = {
.mode = 0644, .mode = 0644,
.proc_handler = proc_dointvec .proc_handler = proc_dointvec
}, },
{
.procname = "tcp_no_metrics_save",
.data = &sysctl_tcp_nometrics_save,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "tcp_moderate_rcvbuf",
.data = &sysctl_tcp_moderate_rcvbuf,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "tcp_tso_win_divisor",
.data = &sysctl_tcp_tso_win_divisor,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{ {
.procname = "tcp_congestion_control", .procname = "tcp_congestion_control",
.mode = 0644, .mode = 0644,
.maxlen = TCP_CA_NAME_MAX, .maxlen = TCP_CA_NAME_MAX,
.proc_handler = proc_tcp_congestion_control, .proc_handler = proc_tcp_congestion_control,
}, },
{
.procname = "tcp_workaround_signed_windows",
.data = &sysctl_tcp_workaround_signed_windows,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_limit_output_bytes",
.data = &sysctl_tcp_limit_output_bytes,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_challenge_ack_limit",
.data = &sysctl_tcp_challenge_ack_limit,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
#ifdef CONFIG_NETLABEL #ifdef CONFIG_NETLABEL
{ {
.procname = "cipso_cache_enable", .procname = "cipso_cache_enable",
...@@ -541,49 +492,6 @@ static struct ctl_table ipv4_table[] = { ...@@ -541,49 +492,6 @@ static struct ctl_table ipv4_table[] = {
.mode = 0644, .mode = 0644,
.proc_handler = proc_allowed_congestion_control, .proc_handler = proc_allowed_congestion_control,
}, },
{
.procname = "tcp_min_tso_segs",
.data = &sysctl_tcp_min_tso_segs,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &one,
.extra2 = &gso_max_segs,
},
{
.procname = "tcp_pacing_ss_ratio",
.data = &sysctl_tcp_pacing_ss_ratio,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &zero,
.extra2 = &thousand,
},
{
.procname = "tcp_pacing_ca_ratio",
.data = &sysctl_tcp_pacing_ca_ratio,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &zero,
.extra2 = &thousand,
},
{
.procname = "tcp_autocorking",
.data = &sysctl_tcp_autocorking,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &zero,
.extra2 = &one,
},
{
.procname = "tcp_invalid_ratelimit",
.data = &sysctl_tcp_invalid_ratelimit,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_ms_jiffies,
},
{ {
.procname = "tcp_available_ulp", .procname = "tcp_available_ulp",
.maxlen = TCP_ULP_BUF_MAX, .maxlen = TCP_ULP_BUF_MAX,
...@@ -1145,6 +1053,98 @@ static struct ctl_table ipv4_net_table[] = { ...@@ -1145,6 +1053,98 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644, .mode = 0644,
.proc_handler = proc_dointvec .proc_handler = proc_dointvec
}, },
{
.procname = "tcp_no_metrics_save",
.data = &init_net.ipv4.sysctl_tcp_nometrics_save,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "tcp_moderate_rcvbuf",
.data = &init_net.ipv4.sysctl_tcp_moderate_rcvbuf,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "tcp_tso_win_divisor",
.data = &init_net.ipv4.sysctl_tcp_tso_win_divisor,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "tcp_workaround_signed_windows",
.data = &init_net.ipv4.sysctl_tcp_workaround_signed_windows,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_limit_output_bytes",
.data = &init_net.ipv4.sysctl_tcp_limit_output_bytes,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_challenge_ack_limit",
.data = &init_net.ipv4.sysctl_tcp_challenge_ack_limit,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_min_tso_segs",
.data = &init_net.ipv4.sysctl_tcp_min_tso_segs,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &one,
.extra2 = &gso_max_segs,
},
{
.procname = "tcp_min_rtt_wlen",
.data = &init_net.ipv4.sysctl_tcp_min_rtt_wlen,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_autocorking",
.data = &init_net.ipv4.sysctl_tcp_autocorking,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &zero,
.extra2 = &one,
},
{
.procname = "tcp_invalid_ratelimit",
.data = &init_net.ipv4.sysctl_tcp_invalid_ratelimit,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_ms_jiffies,
},
{
.procname = "tcp_pacing_ss_ratio",
.data = &init_net.ipv4.sysctl_tcp_pacing_ss_ratio,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &zero,
.extra2 = &thousand,
},
{
.procname = "tcp_pacing_ca_ratio",
.data = &init_net.ipv4.sysctl_tcp_pacing_ca_ratio,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &zero,
.extra2 = &thousand,
},
{ } { }
}; };
......
...@@ -285,10 +285,6 @@ ...@@ -285,10 +285,6 @@
#include <trace/events/tcp.h> #include <trace/events/tcp.h>
int sysctl_tcp_min_tso_segs __read_mostly = 2;
int sysctl_tcp_autocorking __read_mostly = 1;
struct percpu_counter tcp_orphan_count; struct percpu_counter tcp_orphan_count;
EXPORT_SYMBOL_GPL(tcp_orphan_count); EXPORT_SYMBOL_GPL(tcp_orphan_count);
...@@ -699,7 +695,7 @@ static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb, ...@@ -699,7 +695,7 @@ static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb,
int size_goal) int size_goal)
{ {
return skb->len < size_goal && return skb->len < size_goal &&
sysctl_tcp_autocorking && sock_net(sk)->ipv4.sysctl_tcp_autocorking &&
skb != tcp_write_queue_head(sk) && skb != tcp_write_queue_head(sk) &&
refcount_read(&sk->sk_wmem_alloc) > skb->truesize; refcount_read(&sk->sk_wmem_alloc) > skb->truesize;
} }
......
...@@ -79,13 +79,7 @@ ...@@ -79,13 +79,7 @@
#include <linux/unaligned/access_ok.h> #include <linux/unaligned/access_ok.h>
#include <linux/static_key.h> #include <linux/static_key.h>
/* rfc5961 challenge ack rate limiting */
int sysctl_tcp_challenge_ack_limit = 1000;
int sysctl_tcp_max_orphans __read_mostly = NR_FILE; int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
int sysctl_tcp_min_rtt_wlen __read_mostly = 300;
int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2;
#define FLAG_DATA 0x01 /* Incoming frame contained data. */ #define FLAG_DATA 0x01 /* Incoming frame contained data. */
#define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */
...@@ -411,7 +405,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk) ...@@ -411,7 +405,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk)
/* Dynamic Right Sizing (DRS) has 2 to 3 RTT latency /* Dynamic Right Sizing (DRS) has 2 to 3 RTT latency
* Allow enough cushion so that sender is not limited by our window * Allow enough cushion so that sender is not limited by our window
*/ */
if (sysctl_tcp_moderate_rcvbuf) if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf)
rcvmem <<= 2; rcvmem <<= 2;
if (sk->sk_rcvbuf < rcvmem) if (sk->sk_rcvbuf < rcvmem)
...@@ -602,7 +596,7 @@ void tcp_rcv_space_adjust(struct sock *sk) ...@@ -602,7 +596,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
* <prev RTT . ><current RTT .. ><next RTT .... > * <prev RTT . ><current RTT .. ><next RTT .... >
*/ */
if (sysctl_tcp_moderate_rcvbuf && if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf &&
!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
int rcvwin, rcvmem, rcvbuf; int rcvwin, rcvmem, rcvbuf;
...@@ -773,15 +767,6 @@ static void tcp_rtt_estimator(struct sock *sk, long mrtt_us) ...@@ -773,15 +767,6 @@ static void tcp_rtt_estimator(struct sock *sk, long mrtt_us)
tp->srtt_us = max(1U, srtt); tp->srtt_us = max(1U, srtt);
} }
/* Set the sk_pacing_rate to allow proper sizing of TSO packets.
* Note: TCP stack does not yet implement pacing.
* FQ packet scheduler can be used to implement cheap but effective
* TCP pacing, to smooth the burst on large writes when packets
* in flight is significantly lower than cwnd (or rwin)
*/
int sysctl_tcp_pacing_ss_ratio __read_mostly = 200;
int sysctl_tcp_pacing_ca_ratio __read_mostly = 120;
static void tcp_update_pacing_rate(struct sock *sk) static void tcp_update_pacing_rate(struct sock *sk)
{ {
const struct tcp_sock *tp = tcp_sk(sk); const struct tcp_sock *tp = tcp_sk(sk);
...@@ -799,9 +784,9 @@ static void tcp_update_pacing_rate(struct sock *sk) ...@@ -799,9 +784,9 @@ static void tcp_update_pacing_rate(struct sock *sk)
* end of slow start and should slow down. * end of slow start and should slow down.
*/ */
if (tp->snd_cwnd < tp->snd_ssthresh / 2) if (tp->snd_cwnd < tp->snd_ssthresh / 2)
rate *= sysctl_tcp_pacing_ss_ratio; rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ss_ratio;
else else
rate *= sysctl_tcp_pacing_ca_ratio; rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ca_ratio;
rate *= max(tp->snd_cwnd, tp->packets_out); rate *= max(tp->snd_cwnd, tp->packets_out);
...@@ -2919,8 +2904,8 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked, ...@@ -2919,8 +2904,8 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us) static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us)
{ {
u32 wlen = sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen * HZ;
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
u32 wlen = sysctl_tcp_min_rtt_wlen * HZ;
minmax_running_min(&tp->rtt_min, wlen, tcp_jiffies32, minmax_running_min(&tp->rtt_min, wlen, tcp_jiffies32,
rtt_us ? : jiffies_to_usecs(1)); rtt_us ? : jiffies_to_usecs(1));
...@@ -3408,7 +3393,7 @@ static bool __tcp_oow_rate_limited(struct net *net, int mib_idx, ...@@ -3408,7 +3393,7 @@ static bool __tcp_oow_rate_limited(struct net *net, int mib_idx,
if (*last_oow_ack_time) { if (*last_oow_ack_time) {
s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time); s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time);
if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) { if (0 <= elapsed && elapsed < net->ipv4.sysctl_tcp_invalid_ratelimit) {
NET_INC_STATS(net, mib_idx); NET_INC_STATS(net, mib_idx);
return true; /* rate-limited: don't send yet! */ return true; /* rate-limited: don't send yet! */
} }
...@@ -3444,10 +3429,11 @@ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb) ...@@ -3444,10 +3429,11 @@ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)
static u32 challenge_timestamp; static u32 challenge_timestamp;
static unsigned int challenge_count; static unsigned int challenge_count;
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
struct net *net = sock_net(sk);
u32 count, now; u32 count, now;
/* First check our per-socket dupack rate limit. */ /* First check our per-socket dupack rate limit. */
if (__tcp_oow_rate_limited(sock_net(sk), if (__tcp_oow_rate_limited(net,
LINUX_MIB_TCPACKSKIPPEDCHALLENGE, LINUX_MIB_TCPACKSKIPPEDCHALLENGE,
&tp->last_oow_ack_time)) &tp->last_oow_ack_time))
return; return;
...@@ -3455,16 +3441,16 @@ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb) ...@@ -3455,16 +3441,16 @@ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)
/* Then check host-wide RFC 5961 rate limit. */ /* Then check host-wide RFC 5961 rate limit. */
now = jiffies / HZ; now = jiffies / HZ;
if (now != challenge_timestamp) { if (now != challenge_timestamp) {
u32 half = (sysctl_tcp_challenge_ack_limit + 1) >> 1; u32 ack_limit = net->ipv4.sysctl_tcp_challenge_ack_limit;
u32 half = (ack_limit + 1) >> 1;
challenge_timestamp = now; challenge_timestamp = now;
WRITE_ONCE(challenge_count, half + WRITE_ONCE(challenge_count, half + prandom_u32_max(ack_limit));
prandom_u32_max(sysctl_tcp_challenge_ack_limit));
} }
count = READ_ONCE(challenge_count); count = READ_ONCE(challenge_count);
if (count > 0) { if (count > 0) {
WRITE_ONCE(challenge_count, count - 1); WRITE_ONCE(challenge_count, count - 1);
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK); NET_INC_STATS(net, LINUX_MIB_TCPCHALLENGEACK);
tcp_send_ack(sk); tcp_send_ack(sk);
} }
} }
......
...@@ -2493,6 +2493,22 @@ static int __net_init tcp_sk_init(struct net *net) ...@@ -2493,6 +2493,22 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_app_win = 31; net->ipv4.sysctl_tcp_app_win = 31;
net->ipv4.sysctl_tcp_adv_win_scale = 1; net->ipv4.sysctl_tcp_adv_win_scale = 1;
net->ipv4.sysctl_tcp_frto = 2; net->ipv4.sysctl_tcp_frto = 2;
net->ipv4.sysctl_tcp_moderate_rcvbuf = 1;
/* This limits the percentage of the congestion window which we
* will allow a single TSO frame to consume. Building TSO frames
* which are too large can cause TCP streams to be bursty.
*/
net->ipv4.sysctl_tcp_tso_win_divisor = 3;
/* Default TSQ limit of four TSO segments */
net->ipv4.sysctl_tcp_limit_output_bytes = 262144;
/* rfc5961 challenge ack rate limiting */
net->ipv4.sysctl_tcp_challenge_ack_limit = 1000;
net->ipv4.sysctl_tcp_min_tso_segs = 2;
net->ipv4.sysctl_tcp_min_rtt_wlen = 300;
net->ipv4.sysctl_tcp_autocorking = 1;
net->ipv4.sysctl_tcp_invalid_ratelimit = HZ/2;
net->ipv4.sysctl_tcp_pacing_ss_ratio = 200;
net->ipv4.sysctl_tcp_pacing_ca_ratio = 120;
net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock); spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock);
......
...@@ -20,8 +20,6 @@ ...@@ -20,8 +20,6 @@
#include <net/tcp.h> #include <net/tcp.h>
#include <net/genetlink.h> #include <net/genetlink.h>
int sysctl_tcp_nometrics_save __read_mostly;
static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *saddr, static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *saddr,
const struct inetpeer_addr *daddr, const struct inetpeer_addr *daddr,
struct net *net, unsigned int hash); struct net *net, unsigned int hash);
...@@ -330,7 +328,7 @@ void tcp_update_metrics(struct sock *sk) ...@@ -330,7 +328,7 @@ void tcp_update_metrics(struct sock *sk)
int m; int m;
sk_dst_confirm(sk); sk_dst_confirm(sk);
if (sysctl_tcp_nometrics_save || !dst) if (net->ipv4.sysctl_tcp_nometrics_save || !dst)
return; return;
rcu_read_lock(); rcu_read_lock();
......
...@@ -369,7 +369,7 @@ void tcp_openreq_init_rwin(struct request_sock *req, ...@@ -369,7 +369,7 @@ void tcp_openreq_init_rwin(struct request_sock *req,
full_space = rcv_wnd * mss; full_space = rcv_wnd * mss;
/* tcp_full_space because it is guaranteed to be the first packet */ /* tcp_full_space because it is guaranteed to be the first packet */
tcp_select_initial_window(full_space, tcp_select_initial_window(sk_listener, full_space,
mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0), mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
&req->rsk_rcv_wnd, &req->rsk_rcv_wnd,
&req->rsk_window_clamp, &req->rsk_window_clamp,
......
...@@ -45,20 +45,6 @@ ...@@ -45,20 +45,6 @@
#include <trace/events/tcp.h> #include <trace/events/tcp.h>
/* People can turn this on to work with those rare, broken TCPs that
* interpret the window field as a signed quantity.
*/
int sysctl_tcp_workaround_signed_windows __read_mostly = 0;
/* Default TSQ limit of four TSO segments */
int sysctl_tcp_limit_output_bytes __read_mostly = 262144;
/* This limits the percentage of the congestion window which we
* will allow a single TSO frame to consume. Building TSO frames
* which are too large can cause TCP streams to be bursty.
*/
int sysctl_tcp_tso_win_divisor __read_mostly = 3;
static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
int push_one, gfp_t gfp); int push_one, gfp_t gfp);
...@@ -202,7 +188,7 @@ u32 tcp_default_init_rwnd(u32 mss) ...@@ -202,7 +188,7 @@ u32 tcp_default_init_rwnd(u32 mss)
* be a multiple of mss if possible. We assume here that mss >= 1. * be a multiple of mss if possible. We assume here that mss >= 1.
* This MUST be enforced by all callers. * This MUST be enforced by all callers.
*/ */
void tcp_select_initial_window(int __space, __u32 mss, void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
__u32 *rcv_wnd, __u32 *window_clamp, __u32 *rcv_wnd, __u32 *window_clamp,
int wscale_ok, __u8 *rcv_wscale, int wscale_ok, __u8 *rcv_wscale,
__u32 init_rcv_wnd) __u32 init_rcv_wnd)
...@@ -226,7 +212,7 @@ void tcp_select_initial_window(int __space, __u32 mss, ...@@ -226,7 +212,7 @@ void tcp_select_initial_window(int __space, __u32 mss,
* which we interpret as a sign the remote TCP is not * which we interpret as a sign the remote TCP is not
* misinterpreting the window field as a signed quantity. * misinterpreting the window field as a signed quantity.
*/ */
if (sysctl_tcp_workaround_signed_windows) if (sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows)
(*rcv_wnd) = min(space, MAX_TCP_WINDOW); (*rcv_wnd) = min(space, MAX_TCP_WINDOW);
else else
(*rcv_wnd) = space; (*rcv_wnd) = space;
...@@ -286,7 +272,8 @@ static u16 tcp_select_window(struct sock *sk) ...@@ -286,7 +272,8 @@ static u16 tcp_select_window(struct sock *sk)
/* Make sure we do not exceed the maximum possible /* Make sure we do not exceed the maximum possible
* scaled window. * scaled window.
*/ */
if (!tp->rx_opt.rcv_wscale && sysctl_tcp_workaround_signed_windows) if (!tp->rx_opt.rcv_wscale &&
sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows)
new_win = min(new_win, MAX_TCP_WINDOW); new_win = min(new_win, MAX_TCP_WINDOW);
else else
new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale)); new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale));
...@@ -1771,7 +1758,8 @@ static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now) ...@@ -1771,7 +1758,8 @@ static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
u32 tso_segs = ca_ops->tso_segs_goal ? ca_ops->tso_segs_goal(sk) : 0; u32 tso_segs = ca_ops->tso_segs_goal ? ca_ops->tso_segs_goal(sk) : 0;
return tso_segs ? : return tso_segs ? :
tcp_tso_autosize(sk, mss_now, sysctl_tcp_min_tso_segs); tcp_tso_autosize(sk, mss_now,
sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
} }
/* Returns the portion of skb which can be sent right away */ /* Returns the portion of skb which can be sent right away */
...@@ -1988,7 +1976,7 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, ...@@ -1988,7 +1976,7 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len)) if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len))
goto send_now; goto send_now;
win_divisor = ACCESS_ONCE(sysctl_tcp_tso_win_divisor); win_divisor = ACCESS_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tso_win_divisor);
if (win_divisor) { if (win_divisor) {
u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache); u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache);
...@@ -2225,7 +2213,8 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, ...@@ -2225,7 +2213,8 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
unsigned int limit; unsigned int limit;
limit = max(2 * skb->truesize, sk->sk_pacing_rate >> 10); limit = max(2 * skb->truesize, sk->sk_pacing_rate >> 10);
limit = min_t(u32, limit, sysctl_tcp_limit_output_bytes); limit = min_t(u32, limit,
sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes);
limit <<= factor; limit <<= factor;
if (refcount_read(&sk->sk_wmem_alloc) > limit) { if (refcount_read(&sk->sk_wmem_alloc) > limit) {
...@@ -3355,7 +3344,7 @@ static void tcp_connect_init(struct sock *sk) ...@@ -3355,7 +3344,7 @@ static void tcp_connect_init(struct sock *sk)
if (rcv_wnd == 0) if (rcv_wnd == 0)
rcv_wnd = dst_metric(dst, RTAX_INITRWND); rcv_wnd = dst_metric(dst, RTAX_INITRWND);
tcp_select_initial_window(tcp_full_space(sk), tcp_select_initial_window(sk, tcp_full_space(sk),
tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0), tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
&tp->rcv_wnd, &tp->rcv_wnd,
&tp->window_clamp, &tp->window_clamp,
......
...@@ -244,7 +244,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ...@@ -244,7 +244,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
} }
req->rsk_window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW); req->rsk_window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW);
tcp_select_initial_window(tcp_full_space(sk), req->mss, tcp_select_initial_window(sk, tcp_full_space(sk), req->mss,
&req->rsk_rcv_wnd, &req->rsk_window_clamp, &req->rsk_rcv_wnd, &req->rsk_window_clamp,
ireq->wscale_ok, &rcv_wscale, ireq->wscale_ok, &rcv_wscale,
dst_metric(dst, RTAX_INITRWND)); dst_metric(dst, RTAX_INITRWND));
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment