Commit 2c667d40 authored by David S. Miller's avatar David S. Miller

Merge branch 'tcp-readd-hp'

Florian Westphal says:

====================
tcp: re-add header prediction

Eric reported a performance regression caused by header prediction
removal.

We now call tcp_ack() much more frequently, for some workloads
this brings in enough cache line misses to become noticeable.

We could possibly still kill HP provided we find a different
way to suppress unneeded tcp_ack, but given we're late in
the cycle it seems preferable to revert.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 0da93d2e 31770e34
......@@ -147,6 +147,12 @@ struct tcp_sock {
u16 tcp_header_len; /* Bytes of tcp header to send */
u16 gso_segs; /* Max number of segs per GSO packet */
/*
* Header prediction flags
* 0x5?10 << 16 + snd_wnd in net byte order
*/
__be32 pred_flags;
/*
* RFC793 variables by their proper names. This means you can
* read the code and the spec side by side (and laugh ...)
......
......@@ -634,6 +634,29 @@ static inline u32 __tcp_set_rto(const struct tcp_sock *tp)
return usecs_to_jiffies((tp->srtt_us >> 3) + tp->rttvar_us);
}
static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd)
{
tp->pred_flags = htonl((tp->tcp_header_len << 26) |
ntohl(TCP_FLAG_ACK) |
snd_wnd);
}
static inline void tcp_fast_path_on(struct tcp_sock *tp)
{
__tcp_fast_path_on(tp, tp->snd_wnd >> tp->rx_opt.snd_wscale);
}
static inline void tcp_fast_path_check(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
if (RB_EMPTY_ROOT(&tp->out_of_order_queue) &&
tp->rcv_wnd &&
atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf &&
!tp->urg_data)
tcp_fast_path_on(tp);
}
/* Compute the actual rto_min value */
static inline u32 tcp_rto_min(struct sock *sk)
{
......@@ -910,8 +933,9 @@ enum tcp_ca_event {
/* Information about inbound ACK, passed to cong_ops->in_ack_event() */
enum tcp_ca_ack_event_flags {
CA_ACK_WIN_UPDATE = (1 << 0), /* ACK updated window */
CA_ACK_ECE = (1 << 1), /* ECE bit is set on ack */
CA_ACK_SLOWPATH = (1 << 0), /* In slow path processing */
CA_ACK_WIN_UPDATE = (1 << 1), /* ACK updated window */
CA_ACK_ECE = (1 << 2), /* ECE bit is set on ack */
};
/*
......
......@@ -184,7 +184,9 @@ enum
LINUX_MIB_DELAYEDACKLOST, /* DelayedACKLost */
LINUX_MIB_LISTENOVERFLOWS, /* ListenOverflows */
LINUX_MIB_LISTENDROPS, /* ListenDrops */
LINUX_MIB_TCPHPHITS, /* TCPHPHits */
LINUX_MIB_TCPPUREACKS, /* TCPPureAcks */
LINUX_MIB_TCPHPACKS, /* TCPHPAcks */
LINUX_MIB_TCPRENORECOVERY, /* TCPRenoRecovery */
LINUX_MIB_TCPSACKRECOVERY, /* TCPSackRecovery */
LINUX_MIB_TCPSACKRENEGING, /* TCPSACKReneging */
......
......@@ -206,7 +206,9 @@ static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_ITEM("DelayedACKLost", LINUX_MIB_DELAYEDACKLOST),
SNMP_MIB_ITEM("ListenOverflows", LINUX_MIB_LISTENOVERFLOWS),
SNMP_MIB_ITEM("ListenDrops", LINUX_MIB_LISTENDROPS),
SNMP_MIB_ITEM("TCPHPHits", LINUX_MIB_TCPHPHITS),
SNMP_MIB_ITEM("TCPPureAcks", LINUX_MIB_TCPPUREACKS),
SNMP_MIB_ITEM("TCPHPAcks", LINUX_MIB_TCPHPACKS),
SNMP_MIB_ITEM("TCPRenoRecovery", LINUX_MIB_TCPRENORECOVERY),
SNMP_MIB_ITEM("TCPSackRecovery", LINUX_MIB_TCPSACKRECOVERY),
SNMP_MIB_ITEM("TCPSACKReneging", LINUX_MIB_TCPSACKRENEGING),
......
......@@ -1963,8 +1963,10 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
tcp_rcv_space_adjust(sk);
skip_copy:
if (tp->urg_data && after(tp->copied_seq, tp->urg_seq))
if (tp->urg_data && after(tp->copied_seq, tp->urg_seq)) {
tp->urg_data = 0;
tcp_fast_path_check(sk);
}
if (used + offset < skb->len)
continue;
......
This diff is collapsed.
......@@ -436,6 +436,8 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
struct tcp_sock *newtp = tcp_sk(newsk);
/* Now setup tcp_sock */
newtp->pred_flags = 0;
newtp->rcv_wup = newtp->copied_seq =
newtp->rcv_nxt = treq->rcv_isn + 1;
newtp->segs_in = 1;
......
......@@ -295,7 +295,9 @@ static u16 tcp_select_window(struct sock *sk)
/* RFC1323 scaling applied */
new_win >>= tp->rx_opt.rcv_wscale;
/* If we advertise zero window, disable fast path. */
if (new_win == 0) {
tp->pred_flags = 0;
if (old_win)
NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPTOZEROWINDOWADV);
......
......@@ -153,6 +153,24 @@ static inline void update_rtt_min(struct westwood *w)
w->rtt_min = min(w->rtt, w->rtt_min);
}
/*
* @westwood_fast_bw
* It is called when we are in fast path. In particular it is called when
* header prediction is successful. In such case in fact update is
* straight forward and doesn't need any particular care.
*/
static inline void westwood_fast_bw(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
struct westwood *w = inet_csk_ca(sk);
westwood_update_window(sk);
w->bk += tp->snd_una - w->snd_una;
w->snd_una = tp->snd_una;
update_rtt_min(w);
}
/*
* @westwood_acked_count
* This function evaluates cumul_ack for evaluating bk in case of
......@@ -205,12 +223,17 @@ static u32 tcp_westwood_bw_rttmin(const struct sock *sk)
static void tcp_westwood_ack(struct sock *sk, u32 ack_flags)
{
struct westwood *w = inet_csk_ca(sk);
if (ack_flags & CA_ACK_SLOWPATH) {
struct westwood *w = inet_csk_ca(sk);
westwood_update_window(sk);
w->bk += westwood_acked_count(sk);
westwood_update_window(sk);
w->bk += westwood_acked_count(sk);
update_rtt_min(w);
update_rtt_min(w);
return;
}
westwood_fast_bw(sk);
}
static void tcp_westwood_event(struct sock *sk, enum tcp_ca_event event)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment