Commit 5532946e authored by David S. Miller's avatar David S. Miller

Merge branch 'tcp-fix-handling-of-stale-syncookies-timestamps'

Guillaume Nault says:

====================
tcp: fix handling of stale syncookies timestamps

The synflood timestamps (->ts_recent_stamp and ->synq_overflow_ts) are
only refreshed when the syncookie protection triggers. Therefore, their
value can become very far apart from jiffies if no synflood happens for
a long time.

If jiffies grows too much and wraps while the synflood timestamp isn't
refreshed, then time_after32() might consider the later to be in the
future. This can trick tcp_synq_no_recent_overflow() into returning
erroneous values and rejecting valid ACKs.

Patch 1 handles the case of ACKs using legitimate syncookies.
Patch 2 handles the case of stray ACKs.
Patch 3 annotates lockless timestamp operations with READ_ONCE() and
WRITE_ONCE().

Changes from v3:
  - Fix description of time_between32() (found by Eric Dumazet).
  - Use more accurate Fixes tag in patch 3 (suggested by Eric Dumazet).

Changes from v2:
  - Define and use time_between32() instead of a pair of
    time_before32/time_after32 (suggested by Eric Dumazet).
  - Use 'last_overflow - HZ' as lower bound in
    tcp_synq_no_recent_overflow(), to accommodate for concurrent
    timestamp updates (found by Eric Dumazet).
  - Add a third patch to annotate lockless accesses to .ts_recent_stamp.

Changes from v1:
  - Initialising timestamps at socket creation time is not enough
    because jiffies wraps in 24 days with HZ=1000 (Eric Dumazet).
    Handle stale timestamps in tcp_synq_overflow() and
    tcp_synq_no_recent_overflow() instead.
  - Rework commit description.
  - Add a second patch to handle the case of stray ACKs.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 537d0779 721c8daf
...@@ -97,4 +97,17 @@ static inline bool itimerspec64_valid(const struct itimerspec64 *its) ...@@ -97,4 +97,17 @@ static inline bool itimerspec64_valid(const struct itimerspec64 *its)
*/ */
#define time_after32(a, b) ((s32)((u32)(b) - (u32)(a)) < 0) #define time_after32(a, b) ((s32)((u32)(b) - (u32)(a)) < 0)
#define time_before32(b, a) time_after32(a, b) #define time_before32(b, a) time_after32(a, b)
/**
* time_between32 - check if a 32-bit timestamp is within a given time range
* @t: the time which may be within [l,h]
* @l: the lower bound of the range
* @h: the higher bound of the range
*
* time_before32(t, l, h) returns true if @l <= @t <= @h. All operands are
* treated as 32-bit integers.
*
* Equivalent to !(time_before32(@t, @l) || time_after32(@t, @h)).
*/
#define time_between32(t, l, h) ((u32)(h) - (u32)(l) >= (u32)(t) - (u32)(l))
#endif #endif
...@@ -494,15 +494,16 @@ static inline void tcp_synq_overflow(const struct sock *sk) ...@@ -494,15 +494,16 @@ static inline void tcp_synq_overflow(const struct sock *sk)
reuse = rcu_dereference(sk->sk_reuseport_cb); reuse = rcu_dereference(sk->sk_reuseport_cb);
if (likely(reuse)) { if (likely(reuse)) {
last_overflow = READ_ONCE(reuse->synq_overflow_ts); last_overflow = READ_ONCE(reuse->synq_overflow_ts);
if (time_after32(now, last_overflow + HZ)) if (!time_between32(now, last_overflow,
last_overflow + HZ))
WRITE_ONCE(reuse->synq_overflow_ts, now); WRITE_ONCE(reuse->synq_overflow_ts, now);
return; return;
} }
} }
last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp; last_overflow = READ_ONCE(tcp_sk(sk)->rx_opt.ts_recent_stamp);
if (time_after32(now, last_overflow + HZ)) if (!time_between32(now, last_overflow, last_overflow + HZ))
tcp_sk(sk)->rx_opt.ts_recent_stamp = now; WRITE_ONCE(tcp_sk(sk)->rx_opt.ts_recent_stamp, now);
} }
/* syncookies: no recent synqueue overflow on this listening socket? */ /* syncookies: no recent synqueue overflow on this listening socket? */
...@@ -517,13 +518,23 @@ static inline bool tcp_synq_no_recent_overflow(const struct sock *sk) ...@@ -517,13 +518,23 @@ static inline bool tcp_synq_no_recent_overflow(const struct sock *sk)
reuse = rcu_dereference(sk->sk_reuseport_cb); reuse = rcu_dereference(sk->sk_reuseport_cb);
if (likely(reuse)) { if (likely(reuse)) {
last_overflow = READ_ONCE(reuse->synq_overflow_ts); last_overflow = READ_ONCE(reuse->synq_overflow_ts);
return time_after32(now, last_overflow + return !time_between32(now, last_overflow - HZ,
last_overflow +
TCP_SYNCOOKIE_VALID); TCP_SYNCOOKIE_VALID);
} }
} }
last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp; last_overflow = READ_ONCE(tcp_sk(sk)->rx_opt.ts_recent_stamp);
return time_after32(now, last_overflow + TCP_SYNCOOKIE_VALID);
/* If last_overflow <= jiffies <= last_overflow + TCP_SYNCOOKIE_VALID,
* then we're under synflood. However, we have to use
* 'last_overflow - HZ' as lower bound. That's because a concurrent
* tcp_synq_overflow() could update .ts_recent_stamp after we read
* jiffies but before we store .ts_recent_stamp into last_overflow,
* which could lead to rejecting a valid syncookie.
*/
return !time_between32(now, last_overflow - HZ,
last_overflow + TCP_SYNCOOKIE_VALID);
} }
static inline u32 tcp_cookie_time(void) static inline u32 tcp_cookie_time(void)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment