Commit 43702406 authored by David S. Miller's avatar David S. Miller

Merge branch 'tcpflags'

Eric Dumazet says:

====================
tcp: no longer keep around headers in input path

Looking at tcp_try_coalesce() I was wondering why I did :

if (tcp_hdr(from)->fin)
     return false;

The answer would be to allow the aggregation, if we simply OR the FIN and PSH
flags eventually present in @from to @to packet. (Note a change is also
needed in skb_try_coalesce() to avoid calling skb_put() with 0 len)

Then, looking at tcp_recvmsg(), I realized we access tcp_hdr(skb)->syn
(and maybe tcp_hdr(skb)->fin) for every packet we process from socket
receive queue.

We have to understand TCP flags are cold in cpu caches most of the time
(assuming TCP timestamps, and that application calls recvmsg() a long
time after incoming packet was processed), and bringing a whole
cache line only to access one bit is not very nice.

It would make sense to use in TCP input path TCP_SKB_CB(skb)->tcp_flags
as we do in output path.

This saves one cache line miss, and TCP tcp_collapse() can avoid dealing
with the headers.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 13bb5180 b3d6cb92
...@@ -3936,7 +3936,8 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, ...@@ -3936,7 +3936,8 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
return false; return false;
if (len <= skb_tailroom(to)) { if (len <= skb_tailroom(to)) {
BUG_ON(skb_copy_bits(from, 0, skb_put(to, len), len)); if (len)
BUG_ON(skb_copy_bits(from, 0, skb_put(to, len), len));
*delta_truesize = 0; *delta_truesize = 0;
return true; return true;
} }
......
...@@ -1510,9 +1510,9 @@ static struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off) ...@@ -1510,9 +1510,9 @@ static struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
while ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) { while ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) {
offset = seq - TCP_SKB_CB(skb)->seq; offset = seq - TCP_SKB_CB(skb)->seq;
if (tcp_hdr(skb)->syn) if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
offset--; offset--;
if (offset < skb->len || tcp_hdr(skb)->fin) { if (offset < skb->len || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) {
*off = offset; *off = offset;
return skb; return skb;
} }
...@@ -1585,7 +1585,7 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, ...@@ -1585,7 +1585,7 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
if (offset + 1 != skb->len) if (offset + 1 != skb->len)
continue; continue;
} }
if (tcp_hdr(skb)->fin) { if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) {
sk_eat_skb(sk, skb, false); sk_eat_skb(sk, skb, false);
++seq; ++seq;
break; break;
...@@ -1722,11 +1722,11 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, ...@@ -1722,11 +1722,11 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
break; break;
offset = *seq - TCP_SKB_CB(skb)->seq; offset = *seq - TCP_SKB_CB(skb)->seq;
if (tcp_hdr(skb)->syn) if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
offset--; offset--;
if (offset < skb->len) if (offset < skb->len)
goto found_ok_skb; goto found_ok_skb;
if (tcp_hdr(skb)->fin) if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
goto found_fin_ok; goto found_fin_ok;
WARN(!(flags & MSG_PEEK), WARN(!(flags & MSG_PEEK),
"recvmsg bug 2: copied %X seq %X rcvnxt %X fl %X\n", "recvmsg bug 2: copied %X seq %X rcvnxt %X fl %X\n",
...@@ -1959,7 +1959,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, ...@@ -1959,7 +1959,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
if (used + offset < skb->len) if (used + offset < skb->len)
continue; continue;
if (tcp_hdr(skb)->fin) if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
goto found_fin_ok; goto found_fin_ok;
if (!(flags & MSG_PEEK)) { if (!(flags & MSG_PEEK)) {
sk_eat_skb(sk, skb, copied_early); sk_eat_skb(sk, skb, copied_early);
...@@ -2160,8 +2160,10 @@ void tcp_close(struct sock *sk, long timeout) ...@@ -2160,8 +2160,10 @@ void tcp_close(struct sock *sk, long timeout)
* reader process may not have drained the data yet! * reader process may not have drained the data yet!
*/ */
while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) { while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq - u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq;
tcp_hdr(skb)->fin;
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
len--;
data_was_unread += len; data_was_unread += len;
__kfree_skb(skb); __kfree_skb(skb);
} }
......
...@@ -4093,7 +4093,7 @@ static void tcp_ofo_queue(struct sock *sk) ...@@ -4093,7 +4093,7 @@ static void tcp_ofo_queue(struct sock *sk)
__skb_unlink(skb, &tp->out_of_order_queue); __skb_unlink(skb, &tp->out_of_order_queue);
__skb_queue_tail(&sk->sk_receive_queue, skb); __skb_queue_tail(&sk->sk_receive_queue, skb);
tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
if (tcp_hdr(skb)->fin) if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
tcp_fin(sk); tcp_fin(sk);
} }
} }
...@@ -4143,9 +4143,6 @@ static bool tcp_try_coalesce(struct sock *sk, ...@@ -4143,9 +4143,6 @@ static bool tcp_try_coalesce(struct sock *sk,
*fragstolen = false; *fragstolen = false;
if (tcp_hdr(from)->fin)
return false;
/* Its possible this segment overlaps with prior segment in queue */ /* Its possible this segment overlaps with prior segment in queue */
if (TCP_SKB_CB(from)->seq != TCP_SKB_CB(to)->end_seq) if (TCP_SKB_CB(from)->seq != TCP_SKB_CB(to)->end_seq)
return false; return false;
...@@ -4158,6 +4155,7 @@ static bool tcp_try_coalesce(struct sock *sk, ...@@ -4158,6 +4155,7 @@ static bool tcp_try_coalesce(struct sock *sk,
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOALESCE); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOALESCE);
TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq; TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq;
TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq; TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq;
TCP_SKB_CB(to)->tcp_flags |= TCP_SKB_CB(from)->tcp_flags;
return true; return true;
} }
...@@ -4513,7 +4511,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, ...@@ -4513,7 +4511,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
* - bloated or contains data before "start" or * - bloated or contains data before "start" or
* overlaps to the next one. * overlaps to the next one.
*/ */
if (!tcp_hdr(skb)->syn && !tcp_hdr(skb)->fin && if (!(TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)) &&
(tcp_win_from_space(skb->truesize) > skb->len || (tcp_win_from_space(skb->truesize) > skb->len ||
before(TCP_SKB_CB(skb)->seq, start))) { before(TCP_SKB_CB(skb)->seq, start))) {
end_of_skbs = false; end_of_skbs = false;
...@@ -4532,30 +4530,18 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, ...@@ -4532,30 +4530,18 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
/* Decided to skip this, advance start seq. */ /* Decided to skip this, advance start seq. */
start = TCP_SKB_CB(skb)->end_seq; start = TCP_SKB_CB(skb)->end_seq;
} }
if (end_of_skbs || tcp_hdr(skb)->syn || tcp_hdr(skb)->fin) if (end_of_skbs ||
(TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
return; return;
while (before(start, end)) { while (before(start, end)) {
int copy = min_t(int, SKB_MAX_ORDER(0, 0), end - start);
struct sk_buff *nskb; struct sk_buff *nskb;
unsigned int header = skb_headroom(skb);
int copy = SKB_MAX_ORDER(header, 0);
/* Too big header? This can happen with IPv6. */ nskb = alloc_skb(copy, GFP_ATOMIC);
if (copy < 0)
return;
if (end - start < copy)
copy = end - start;
nskb = alloc_skb(copy + header, GFP_ATOMIC);
if (!nskb) if (!nskb)
return; return;
skb_set_mac_header(nskb, skb_mac_header(skb) - skb->head);
skb_set_network_header(nskb, (skb_network_header(skb) -
skb->head));
skb_set_transport_header(nskb, (skb_transport_header(skb) -
skb->head));
skb_reserve(nskb, header);
memcpy(nskb->head, skb->head, header);
memcpy(nskb->cb, skb->cb, sizeof(skb->cb)); memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start; TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
__skb_queue_before(list, skb, nskb); __skb_queue_before(list, skb, nskb);
...@@ -4579,8 +4565,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, ...@@ -4579,8 +4565,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
skb = tcp_collapse_one(sk, skb, list); skb = tcp_collapse_one(sk, skb, list);
if (!skb || if (!skb ||
skb == tail || skb == tail ||
tcp_hdr(skb)->syn || (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
tcp_hdr(skb)->fin)
return; return;
} }
} }
......
...@@ -1638,6 +1638,7 @@ int tcp_v4_rcv(struct sk_buff *skb) ...@@ -1638,6 +1638,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
skb->len - th->doff * 4); skb->len - th->doff * 4);
TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
TCP_SKB_CB(skb)->tcp_tw_isn = 0; TCP_SKB_CB(skb)->tcp_tw_isn = 0;
TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph); TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
TCP_SKB_CB(skb)->sacked = 0; TCP_SKB_CB(skb)->sacked = 0;
......
...@@ -1415,6 +1415,7 @@ static int tcp_v6_rcv(struct sk_buff *skb) ...@@ -1415,6 +1415,7 @@ static int tcp_v6_rcv(struct sk_buff *skb)
TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
skb->len - th->doff*4); skb->len - th->doff*4);
TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
TCP_SKB_CB(skb)->tcp_tw_isn = 0; TCP_SKB_CB(skb)->tcp_tw_isn = 0;
TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr); TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
TCP_SKB_CB(skb)->sacked = 0; TCP_SKB_CB(skb)->sacked = 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment