Commit b8fa4100 authored by David S. Miller's avatar David S. Miller

Merge branch 'tcp_tsoffset'

Andrey Vagin says:

====================
If a TCP socket will get live-migrated from one box to another the
timestamps (which are typically ON) will get screwed up -- the new
kernel will generate TS values that has nothing to do with what they
were on dump. The solution is to yet again fix the kernel and put a
"timestamp offset" on a socket.

A socket offset is added in places where externally visible tcp
timestamp option is parsed/initialized.

Connections in the SYN_RECV state are not supported, global
tcp_time_stamp is used for them, because repair mode doesn't support
this state. In a future it can be implemented by the similar way as for
TIME_WAIT sockets.

For time-wait sockets offset is inhereted by a proper tcp_sock.

A per-socket offset can be set only for sockets in repair mode.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents d0023f82 ee684b6f
...@@ -162,6 +162,8 @@ struct tcp_sock { ...@@ -162,6 +162,8 @@ struct tcp_sock {
u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */ u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */
u32 lsndtime; /* timestamp of last sent data packet (for restart window) */ u32 lsndtime; /* timestamp of last sent data packet (for restart window) */
u32 tsoffset; /* timestamp offset */
struct list_head tsq_node; /* anchor in tsq_tasklet.head list */ struct list_head tsq_node; /* anchor in tsq_tasklet.head list */
unsigned long tsq_flags; unsigned long tsq_flags;
...@@ -353,6 +355,7 @@ struct tcp_timewait_sock { ...@@ -353,6 +355,7 @@ struct tcp_timewait_sock {
u32 tw_rcv_nxt; u32 tw_rcv_nxt;
u32 tw_snd_nxt; u32 tw_snd_nxt;
u32 tw_rcv_wnd; u32 tw_rcv_wnd;
u32 tw_ts_offset;
u32 tw_ts_recent; u32 tw_ts_recent;
long tw_ts_recent_stamp; long tw_ts_recent_stamp;
#ifdef CONFIG_TCP_MD5SIG #ifdef CONFIG_TCP_MD5SIG
......
...@@ -111,6 +111,7 @@ enum { ...@@ -111,6 +111,7 @@ enum {
#define TCP_QUEUE_SEQ 21 #define TCP_QUEUE_SEQ 21
#define TCP_REPAIR_OPTIONS 22 #define TCP_REPAIR_OPTIONS 22
#define TCP_FASTOPEN 23 /* Enable FastOpen on listeners */ #define TCP_FASTOPEN 23 /* Enable FastOpen on listeners */
#define TCP_TIMESTAMP 24
struct tcp_repair_opt { struct tcp_repair_opt {
__u32 opt_code; __u32 opt_code;
......
...@@ -400,6 +400,8 @@ void tcp_init_sock(struct sock *sk) ...@@ -400,6 +400,8 @@ void tcp_init_sock(struct sock *sk)
tcp_enable_early_retrans(tp); tcp_enable_early_retrans(tp);
icsk->icsk_ca_ops = &tcp_init_congestion_ops; icsk->icsk_ca_ops = &tcp_init_congestion_ops;
tp->tsoffset = 0;
sk->sk_state = TCP_CLOSE; sk->sk_state = TCP_CLOSE;
sk->sk_write_space = sk_stream_write_space; sk->sk_write_space = sk_stream_write_space;
...@@ -2712,6 +2714,12 @@ static int do_tcp_setsockopt(struct sock *sk, int level, ...@@ -2712,6 +2714,12 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
else else
err = -EINVAL; err = -EINVAL;
break; break;
case TCP_TIMESTAMP:
if (!tp->repair)
err = -EPERM;
else
tp->tsoffset = val - tcp_time_stamp;
break;
default: default:
err = -ENOPROTOOPT; err = -ENOPROTOOPT;
break; break;
...@@ -2960,6 +2968,9 @@ static int do_tcp_getsockopt(struct sock *sk, int level, ...@@ -2960,6 +2968,9 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
case TCP_USER_TIMEOUT: case TCP_USER_TIMEOUT:
val = jiffies_to_msecs(icsk->icsk_user_timeout); val = jiffies_to_msecs(icsk->icsk_user_timeout);
break; break;
case TCP_TIMESTAMP:
val = tcp_time_stamp + tp->tsoffset;
break;
default: default:
return -ENOPROTOOPT; return -ENOPROTOOPT;
} }
......
...@@ -3860,7 +3860,7 @@ static bool tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr ...@@ -3860,7 +3860,7 @@ static bool tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr
++ptr; ++ptr;
tp->rx_opt.rcv_tsval = ntohl(*ptr); tp->rx_opt.rcv_tsval = ntohl(*ptr);
++ptr; ++ptr;
tp->rx_opt.rcv_tsecr = ntohl(*ptr); tp->rx_opt.rcv_tsecr = ntohl(*ptr) - tp->tsoffset;
return true; return true;
} }
return false; return false;
...@@ -3884,7 +3884,11 @@ static bool tcp_fast_parse_options(const struct sk_buff *skb, ...@@ -3884,7 +3884,11 @@ static bool tcp_fast_parse_options(const struct sk_buff *skb,
if (tcp_parse_aligned_timestamp(tp, th)) if (tcp_parse_aligned_timestamp(tp, th))
return true; return true;
} }
tcp_parse_options(skb, &tp->rx_opt, hvpp, 1, NULL); tcp_parse_options(skb, &tp->rx_opt, hvpp, 1, NULL);
if (tp->rx_opt.saw_tstamp)
tp->rx_opt.rcv_tsecr -= tp->tsoffset;
return true; return true;
} }
...@@ -5665,6 +5669,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, ...@@ -5665,6 +5669,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
int saved_clamp = tp->rx_opt.mss_clamp; int saved_clamp = tp->rx_opt.mss_clamp;
tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0, &foc); tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0, &foc);
if (tp->rx_opt.saw_tstamp)
tp->rx_opt.rcv_tsecr -= tp->tsoffset;
if (th->ack) { if (th->ack) {
/* rfc793: /* rfc793:
......
...@@ -726,7 +726,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) ...@@ -726,7 +726,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
*/ */
static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
u32 win, u32 ts, int oif, u32 win, u32 tsval, u32 tsecr, int oif,
struct tcp_md5sig_key *key, struct tcp_md5sig_key *key,
int reply_flags, u8 tos) int reply_flags, u8 tos)
{ {
...@@ -747,12 +747,12 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, ...@@ -747,12 +747,12 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
arg.iov[0].iov_base = (unsigned char *)&rep; arg.iov[0].iov_base = (unsigned char *)&rep;
arg.iov[0].iov_len = sizeof(rep.th); arg.iov[0].iov_len = sizeof(rep.th);
if (ts) { if (tsecr) {
rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
(TCPOPT_TIMESTAMP << 8) | (TCPOPT_TIMESTAMP << 8) |
TCPOLEN_TIMESTAMP); TCPOLEN_TIMESTAMP);
rep.opt[1] = htonl(tcp_time_stamp); rep.opt[1] = htonl(tsval);
rep.opt[2] = htonl(ts); rep.opt[2] = htonl(tsecr);
arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED; arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
} }
...@@ -767,7 +767,7 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, ...@@ -767,7 +767,7 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
#ifdef CONFIG_TCP_MD5SIG #ifdef CONFIG_TCP_MD5SIG
if (key) { if (key) {
int offset = (ts) ? 3 : 0; int offset = (tsecr) ? 3 : 0;
rep.opt[offset++] = htonl((TCPOPT_NOP << 24) | rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
(TCPOPT_NOP << 16) | (TCPOPT_NOP << 16) |
...@@ -802,6 +802,7 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) ...@@ -802,6 +802,7 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
tcp_time_stamp + tcptw->tw_ts_offset,
tcptw->tw_ts_recent, tcptw->tw_ts_recent,
tw->tw_bound_dev_if, tw->tw_bound_dev_if,
tcp_twsk_md5_key(tcptw), tcp_twsk_md5_key(tcptw),
...@@ -821,6 +822,7 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, ...@@ -821,6 +822,7 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ? tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ?
tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
tcp_rsk(req)->rcv_nxt, req->rcv_wnd, tcp_rsk(req)->rcv_nxt, req->rcv_wnd,
tcp_time_stamp,
req->ts_recent, req->ts_recent,
0, 0,
tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr, tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
......
...@@ -102,6 +102,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, ...@@ -102,6 +102,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL); tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL);
if (tmp_opt.saw_tstamp) { if (tmp_opt.saw_tstamp) {
tmp_opt.rcv_tsecr -= tcptw->tw_ts_offset;
tmp_opt.ts_recent = tcptw->tw_ts_recent; tmp_opt.ts_recent = tcptw->tw_ts_recent;
tmp_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; tmp_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
paws_reject = tcp_paws_reject(&tmp_opt, th->rst); paws_reject = tcp_paws_reject(&tmp_opt, th->rst);
...@@ -288,6 +289,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) ...@@ -288,6 +289,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
tcptw->tw_rcv_wnd = tcp_receive_window(tp); tcptw->tw_rcv_wnd = tcp_receive_window(tp);
tcptw->tw_ts_recent = tp->rx_opt.ts_recent; tcptw->tw_ts_recent = tp->rx_opt.ts_recent;
tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp; tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp;
tcptw->tw_ts_offset = tp->tsoffset;
#if IS_ENABLED(CONFIG_IPV6) #if IS_ENABLED(CONFIG_IPV6)
if (tw->tw_family == PF_INET6) { if (tw->tw_family == PF_INET6) {
...@@ -499,6 +501,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, ...@@ -499,6 +501,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
newtp->rx_opt.ts_recent_stamp = 0; newtp->rx_opt.ts_recent_stamp = 0;
newtp->tcp_header_len = sizeof(struct tcphdr); newtp->tcp_header_len = sizeof(struct tcphdr);
} }
newtp->tsoffset = 0;
#ifdef CONFIG_TCP_MD5SIG #ifdef CONFIG_TCP_MD5SIG
newtp->md5sig_info = NULL; /*XXX*/ newtp->md5sig_info = NULL; /*XXX*/
if (newtp->af_specific->md5_lookup(sk, newsk)) if (newtp->af_specific->md5_lookup(sk, newsk))
......
...@@ -622,7 +622,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, ...@@ -622,7 +622,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
if (likely(sysctl_tcp_timestamps && *md5 == NULL)) { if (likely(sysctl_tcp_timestamps && *md5 == NULL)) {
opts->options |= OPTION_TS; opts->options |= OPTION_TS;
opts->tsval = TCP_SKB_CB(skb)->when; opts->tsval = TCP_SKB_CB(skb)->when + tp->tsoffset;
opts->tsecr = tp->rx_opt.ts_recent; opts->tsecr = tp->rx_opt.ts_recent;
remaining -= TCPOLEN_TSTAMP_ALIGNED; remaining -= TCPOLEN_TSTAMP_ALIGNED;
} }
...@@ -806,7 +806,7 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb ...@@ -806,7 +806,7 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb
if (likely(tp->rx_opt.tstamp_ok)) { if (likely(tp->rx_opt.tstamp_ok)) {
opts->options |= OPTION_TS; opts->options |= OPTION_TS;
opts->tsval = tcb ? tcb->when : 0; opts->tsval = tcb ? tcb->when + tp->tsoffset : 0;
opts->tsecr = tp->rx_opt.ts_recent; opts->tsecr = tp->rx_opt.ts_recent;
size += TCPOLEN_TSTAMP_ALIGNED; size += TCPOLEN_TSTAMP_ALIGNED;
} }
......
...@@ -713,7 +713,8 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { ...@@ -713,7 +713,8 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
#endif #endif
static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
u32 ts, struct tcp_md5sig_key *key, int rst, u8 tclass) u32 tsval, u32 tsecr,
struct tcp_md5sig_key *key, int rst, u8 tclass)
{ {
const struct tcphdr *th = tcp_hdr(skb); const struct tcphdr *th = tcp_hdr(skb);
struct tcphdr *t1; struct tcphdr *t1;
...@@ -725,7 +726,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, ...@@ -725,7 +726,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
struct dst_entry *dst; struct dst_entry *dst;
__be32 *topt; __be32 *topt;
if (ts) if (tsecr)
tot_len += TCPOLEN_TSTAMP_ALIGNED; tot_len += TCPOLEN_TSTAMP_ALIGNED;
#ifdef CONFIG_TCP_MD5SIG #ifdef CONFIG_TCP_MD5SIG
if (key) if (key)
...@@ -755,11 +756,11 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, ...@@ -755,11 +756,11 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
topt = (__be32 *)(t1 + 1); topt = (__be32 *)(t1 + 1);
if (ts) { if (tsecr) {
*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
(TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
*topt++ = htonl(tcp_time_stamp); *topt++ = htonl(tsval);
*topt++ = htonl(ts); *topt++ = htonl(tsecr);
} }
#ifdef CONFIG_TCP_MD5SIG #ifdef CONFIG_TCP_MD5SIG
...@@ -860,7 +861,7 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) ...@@ -860,7 +861,7 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len - ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
(th->doff << 2); (th->doff << 2);
tcp_v6_send_response(skb, seq, ack_seq, 0, 0, key, 1, 0); tcp_v6_send_response(skb, seq, ack_seq, 0, 0, 0, key, 1, 0);
#ifdef CONFIG_TCP_MD5SIG #ifdef CONFIG_TCP_MD5SIG
release_sk1: release_sk1:
...@@ -871,10 +872,11 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) ...@@ -871,10 +872,11 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
#endif #endif
} }
static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts, static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
u32 win, u32 tsval, u32 tsecr,
struct tcp_md5sig_key *key, u8 tclass) struct tcp_md5sig_key *key, u8 tclass)
{ {
tcp_v6_send_response(skb, seq, ack, win, ts, key, 0, tclass); tcp_v6_send_response(skb, seq, ack, win, tsval, tsecr, key, 0, tclass);
} }
static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
...@@ -884,6 +886,7 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) ...@@ -884,6 +886,7 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
tcp_time_stamp + tcptw->tw_ts_offset,
tcptw->tw_ts_recent, tcp_twsk_md5_key(tcptw), tcptw->tw_ts_recent, tcp_twsk_md5_key(tcptw),
tw->tw_tclass); tw->tw_tclass);
...@@ -893,7 +896,8 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) ...@@ -893,7 +896,8 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
struct request_sock *req) struct request_sock *req)
{ {
tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent, tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1,
req->rcv_wnd, tcp_time_stamp, req->ts_recent,
tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), 0); tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), 0);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment