Commit 53c33a16 authored by David S. Miller's avatar David S. Miller

Merge branch 'tcp-drop-reason-additions'

Eric Dumazet says:

====================
tcp: drop reason additions

Currently, TCP is either missing drop reasons,
or pretending that some useful packets are dropped.

This patch series makes "perf record -a -e skb:kfree_skb"
much more usable.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 7925c2d9 8fbf1957
......@@ -381,6 +381,19 @@ enum skb_drop_reason {
* the ofo queue, corresponding to
* LINUX_MIB_TCPOFOMERGE
*/
SKB_DROP_REASON_TCP_RFC7323_PAWS, /* PAWS check, corresponding to
* LINUX_MIB_PAWSESTABREJECTED
*/
SKB_DROP_REASON_TCP_INVALID_SEQUENCE, /* Not acceptable SEQ field */
SKB_DROP_REASON_TCP_RESET, /* Invalid RST packet */
SKB_DROP_REASON_TCP_INVALID_SYN, /* Incoming packet has unexpected SYN flag */
SKB_DROP_REASON_TCP_CLOSE, /* TCP socket in CLOSE state */
SKB_DROP_REASON_TCP_FASTOPEN, /* dropped by FASTOPEN request socket */
SKB_DROP_REASON_TCP_OLD_ACK, /* TCP ACK is old, but in window */
SKB_DROP_REASON_TCP_TOO_OLD_ACK, /* TCP ACK is too old */
SKB_DROP_REASON_TCP_ACK_UNSENT_DATA, /* TCP ACK for data we haven't sent yet */
SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE, /* pruned from TCP OFO queue */
SKB_DROP_REASON_TCP_OFO_DROP, /* data already in receive queue */
SKB_DROP_REASON_IP_OUTNOROUTES, /* route lookup failed */
SKB_DROP_REASON_BPF_CGROUP_EGRESS, /* dropped by
* BPF_PROG_TYPE_CGROUP_SKB
......
......@@ -37,6 +37,20 @@
EM(SKB_DROP_REASON_TCP_OLD_DATA, TCP_OLD_DATA) \
EM(SKB_DROP_REASON_TCP_OVERWINDOW, TCP_OVERWINDOW) \
EM(SKB_DROP_REASON_TCP_OFOMERGE, TCP_OFOMERGE) \
EM(SKB_DROP_REASON_TCP_OFO_DROP, TCP_OFO_DROP) \
EM(SKB_DROP_REASON_TCP_RFC7323_PAWS, TCP_RFC7323_PAWS) \
EM(SKB_DROP_REASON_TCP_INVALID_SEQUENCE, \
TCP_INVALID_SEQUENCE) \
EM(SKB_DROP_REASON_TCP_RESET, TCP_RESET) \
EM(SKB_DROP_REASON_TCP_INVALID_SYN, TCP_INVALID_SYN) \
EM(SKB_DROP_REASON_TCP_CLOSE, TCP_CLOSE) \
EM(SKB_DROP_REASON_TCP_FASTOPEN, TCP_FASTOPEN) \
EM(SKB_DROP_REASON_TCP_OLD_ACK, TCP_OLD_ACK) \
EM(SKB_DROP_REASON_TCP_TOO_OLD_ACK, TCP_TOO_OLD_ACK) \
EM(SKB_DROP_REASON_TCP_ACK_UNSENT_DATA, \
TCP_ACK_UNSENT_DATA) \
EM(SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE, \
TCP_OFO_QUEUE_PRUNE) \
EM(SKB_DROP_REASON_IP_OUTNOROUTES, IP_OUTNOROUTES) \
EM(SKB_DROP_REASON_BPF_CGROUP_EGRESS, \
BPF_CGROUP_EGRESS) \
......
......@@ -3766,7 +3766,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
if (before(ack, prior_snd_una - tp->max_window)) {
if (!(flag & FLAG_NO_CHALLENGE_ACK))
tcp_send_challenge_ack(sk);
return -1;
return -SKB_DROP_REASON_TCP_TOO_OLD_ACK;
}
goto old_ack;
}
......@@ -3775,7 +3775,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
* this segment (RFC793 Section 3.9).
*/
if (after(ack, tp->snd_nxt))
return -1;
return -SKB_DROP_REASON_TCP_ACK_UNSENT_DATA;
if (after(ack, prior_snd_una)) {
flag |= FLAG_SND_UNA_ADVANCED;
......@@ -4674,7 +4674,7 @@ static bool tcp_ooo_try_coalesce(struct sock *sk,
{
bool res = tcp_try_coalesce(sk, to, from, fragstolen);
/* In case tcp_drop() is called later, update to->gso_segs */
/* In case tcp_drop_reason() is called later, update to->gso_segs */
if (res) {
u32 gso_segs = max_t(u16, 1, skb_shinfo(to)->gso_segs) +
max_t(u16, 1, skb_shinfo(from)->gso_segs);
......@@ -4691,11 +4691,6 @@ static void tcp_drop_reason(struct sock *sk, struct sk_buff *skb,
kfree_skb_reason(skb, reason);
}
static void tcp_drop(struct sock *sk, struct sk_buff *skb)
{
tcp_drop_reason(sk, skb, SKB_DROP_REASON_NOT_SPECIFIED);
}
/* This one checks to see if we can put data from the
* out_of_order queue into the receive_queue.
*/
......@@ -4723,7 +4718,7 @@ static void tcp_ofo_queue(struct sock *sk)
rb_erase(&skb->rbnode, &tp->out_of_order_queue);
if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) {
tcp_drop(sk, skb);
tcp_drop_reason(sk, skb, SKB_DROP_REASON_TCP_OFO_DROP);
continue;
}
......@@ -5334,7 +5329,8 @@ static bool tcp_prune_ofo_queue(struct sock *sk)
prev = rb_prev(node);
rb_erase(node, &tp->out_of_order_queue);
goal -= rb_to_skb(node)->truesize;
tcp_drop(sk, rb_to_skb(node));
tcp_drop_reason(sk, rb_to_skb(node),
SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE);
if (!prev || goal <= 0) {
sk_mem_reclaim(sk);
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
......@@ -5667,7 +5663,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
const struct tcphdr *th, int syn_inerr)
{
struct tcp_sock *tp = tcp_sk(sk);
bool rst_seq_match = false;
SKB_DR(reason);
/* RFC1323: H1. Apply PAWS check first. */
if (tcp_fast_parse_options(sock_net(sk), skb, th, tp) &&
......@@ -5679,6 +5675,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
LINUX_MIB_TCPACKSKIPPEDPAWS,
&tp->last_oow_ack_time))
tcp_send_dupack(sk, skb);
SKB_DR_SET(reason, TCP_RFC7323_PAWS);
goto discard;
}
/* Reset is accepted even if it did not pass PAWS. */
......@@ -5700,8 +5697,9 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
&tp->last_oow_ack_time))
tcp_send_dupack(sk, skb);
} else if (tcp_reset_check(sk, skb)) {
tcp_reset(sk, skb);
goto reset;
}
SKB_DR_SET(reason, TCP_INVALID_SEQUENCE);
goto discard;
}
......@@ -5717,9 +5715,10 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
* Send a challenge ACK
*/
if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt ||
tcp_reset_check(sk, skb)) {
rst_seq_match = true;
} else if (tcp_is_sack(tp) && tp->rx_opt.num_sacks > 0) {
tcp_reset_check(sk, skb))
goto reset;
if (tcp_is_sack(tp) && tp->rx_opt.num_sacks > 0) {
struct tcp_sack_block *sp = &tp->selective_acks[0];
int max_sack = sp[0].end_seq;
int this_sack;
......@@ -5732,21 +5731,18 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
}
if (TCP_SKB_CB(skb)->seq == max_sack)
rst_seq_match = true;
goto reset;
}
if (rst_seq_match)
tcp_reset(sk, skb);
else {
/* Disable TFO if RST is out-of-order
* and no data has been received
* for current active TFO socket
*/
if (tp->syn_fastopen && !tp->data_segs_in &&
sk->sk_state == TCP_ESTABLISHED)
tcp_fastopen_active_disable(sk);
tcp_send_challenge_ack(sk);
}
/* Disable TFO if RST is out-of-order
* and no data has been received
* for current active TFO socket
*/
if (tp->syn_fastopen && !tp->data_segs_in &&
sk->sk_state == TCP_ESTABLISHED)
tcp_fastopen_active_disable(sk);
tcp_send_challenge_ack(sk);
SKB_DR_SET(reason, TCP_RESET);
goto discard;
}
......@@ -5761,6 +5757,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNCHALLENGE);
tcp_send_challenge_ack(sk);
SKB_DR_SET(reason, TCP_INVALID_SYN);
goto discard;
}
......@@ -5769,7 +5766,12 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
return true;
discard:
tcp_drop(sk, skb);
tcp_drop_reason(sk, skb, reason);
return false;
reset:
tcp_reset(sk, skb);
__kfree_skb(skb);
return false;
}
......@@ -5956,7 +5958,8 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
return;
step5:
if (tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0)
reason = tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT);
if (reason < 0)
goto discard;
tcp_rcv_rtt_measure_ts(sk, skb);
......@@ -6136,6 +6139,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
struct tcp_fastopen_cookie foc = { .len = -1 };
int saved_clamp = tp->rx_opt.mss_clamp;
bool fastopen_fail;
SKB_DR(reason);
tcp_parse_options(sock_net(sk), skb, &tp->rx_opt, 0, &foc);
if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
......@@ -6178,7 +6182,9 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
if (th->rst) {
tcp_reset(sk, skb);
goto discard;
consume:
__kfree_skb(skb);
return 0;
}
/* rfc793:
......@@ -6188,9 +6194,10 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
* See note below!
* --ANK(990513)
*/
if (!th->syn)
if (!th->syn) {
SKB_DR_SET(reason, TCP_FLAGS);
goto discard_and_undo;
}
/* rfc793:
* "If the SYN bit is on ...
* are acceptable then ...
......@@ -6267,13 +6274,9 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
TCP_DELACK_MAX, TCP_RTO_MAX);
discard:
tcp_drop(sk, skb);
return 0;
} else {
tcp_send_ack(sk);
goto consume;
}
tcp_send_ack(sk);
return -1;
}
......@@ -6285,15 +6288,16 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
*
* Otherwise (no ACK) drop the segment and return."
*/
SKB_DR_SET(reason, TCP_RESET);
goto discard_and_undo;
}
/* PAWS check. */
if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp &&
tcp_paws_reject(&tp->rx_opt, 0))
tcp_paws_reject(&tp->rx_opt, 0)) {
SKB_DR_SET(reason, TCP_RFC7323_PAWS);
goto discard_and_undo;
}
if (th->syn) {
/* We see SYN without ACK. It is attempt of
* simultaneous connect with crossed SYNs.
......@@ -6342,7 +6346,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
*/
return -1;
#else
goto discard;
goto consume;
#endif
}
/* "fifth, if neither of the SYN or RST bits is set then
......@@ -6352,7 +6356,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
discard_and_undo:
tcp_clear_options(&tp->rx_opt);
tp->rx_opt.mss_clamp = saved_clamp;
goto discard;
tcp_drop_reason(sk, skb, reason);
return 0;
reset_and_undo:
tcp_clear_options(&tp->rx_opt);
......@@ -6407,21 +6412,26 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
struct request_sock *req;
int queued = 0;
bool acceptable;
SKB_DR(reason);
switch (sk->sk_state) {
case TCP_CLOSE:
SKB_DR_SET(reason, TCP_CLOSE);
goto discard;
case TCP_LISTEN:
if (th->ack)
return 1;
if (th->rst)
if (th->rst) {
SKB_DR_SET(reason, TCP_RESET);
goto discard;
}
if (th->syn) {
if (th->fin)
if (th->fin) {
SKB_DR_SET(reason, TCP_FLAGS);
goto discard;
}
/* It is possible that we process SYN packets from backlog,
* so we need to make sure to disable BH and RCU right there.
*/
......@@ -6436,6 +6446,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
consume_skb(skb);
return 0;
}
SKB_DR_SET(reason, TCP_FLAGS);
goto discard;
case TCP_SYN_SENT:
......@@ -6462,13 +6473,16 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
sk->sk_state != TCP_FIN_WAIT1);
if (!tcp_check_req(sk, skb, req, true, &req_stolen))
if (!tcp_check_req(sk, skb, req, true, &req_stolen)) {
SKB_DR_SET(reason, TCP_FASTOPEN);
goto discard;
}
}
if (!th->ack && !th->rst && !th->syn)
if (!th->ack && !th->rst && !th->syn) {
SKB_DR_SET(reason, TCP_FLAGS);
goto discard;
}
if (!tcp_validate_incoming(sk, skb, th, 0))
return 0;
......@@ -6481,6 +6495,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
if (sk->sk_state == TCP_SYN_RECV)
return 1; /* send one RST */
tcp_send_challenge_ack(sk);
SKB_DR_SET(reason, TCP_OLD_ACK);
goto discard;
}
switch (sk->sk_state) {
......@@ -6574,7 +6589,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
inet_csk_reset_keepalive_timer(sk, tmo);
} else {
tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
goto discard;
goto consume;
}
break;
}
......@@ -6582,7 +6597,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
case TCP_CLOSING:
if (tp->snd_una == tp->write_seq) {
tcp_time_wait(sk, TCP_TIME_WAIT, 0);
goto discard;
goto consume;
}
break;
......@@ -6590,7 +6605,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
if (tp->snd_una == tp->write_seq) {
tcp_update_metrics(sk);
tcp_done(sk);
goto discard;
goto consume;
}
break;
}
......@@ -6641,9 +6656,13 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
if (!queued) {
discard:
tcp_drop(sk, skb);
tcp_drop_reason(sk, skb, reason);
}
return 0;
consume:
__kfree_skb(skb);
return 0;
}
EXPORT_SYMBOL(tcp_rcv_state_process);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment