Commit 4c4fde21 authored by David S. Miller's avatar David S. Miller

Merge branch 'tcp-tracepoints'

Song Liu says:

====================
net: add a set of tracepoints to tcp stack

Changes from v1:

Fix build error (with ipv6 as ko) by adding EXPORT_TRACEPOINT_SYMBOL_GPL
for trace_tcp_send_reset.

These patches add the following tracepoints to tcp stack.

tcp_send_reset
tcp_receive_reset
tcp_destroy_sock
tcp_set_state

These tracepoints can be used to track TCP state changes. Such state
changes include but are not limited to: connection establish,
connection termination, tx and rx of RST, various retransmits.

Currently, we use the following kprobes to trace these events:

int kprobe__tcp_validate_incoming
int kprobe__tcp_send_active_reset
int kprobe__tcp_v4_send_reset
int kprobe__tcp_v6_send_reset
int kprobe__tcp_v4_destroy_sock
int kprobe__tcp_set_state
int kprobe__tcp_retransmit_skb

These tracepoints will help us simplify this work.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents bded4422 e8fce239
...@@ -9,15 +9,37 @@ ...@@ -9,15 +9,37 @@
#include <linux/tracepoint.h> #include <linux/tracepoint.h>
#include <net/ipv6.h> #include <net/ipv6.h>
TRACE_EVENT(tcp_retransmit_skb, #define tcp_state_name(state) { state, #state }
#define show_tcp_state_name(val) \
__print_symbolic(val, \
tcp_state_name(TCP_ESTABLISHED), \
tcp_state_name(TCP_SYN_SENT), \
tcp_state_name(TCP_SYN_RECV), \
tcp_state_name(TCP_FIN_WAIT1), \
tcp_state_name(TCP_FIN_WAIT2), \
tcp_state_name(TCP_TIME_WAIT), \
tcp_state_name(TCP_CLOSE), \
tcp_state_name(TCP_CLOSE_WAIT), \
tcp_state_name(TCP_LAST_ACK), \
tcp_state_name(TCP_LISTEN), \
tcp_state_name(TCP_CLOSING), \
tcp_state_name(TCP_NEW_SYN_RECV))
TP_PROTO(struct sock *sk, struct sk_buff *skb), /*
* tcp event with arguments sk and skb
*
* Note: this class requires a valid sk pointer; while skb pointer could
* be NULL.
*/
DECLARE_EVENT_CLASS(tcp_event_sk_skb,
TP_PROTO(const struct sock *sk, const struct sk_buff *skb),
TP_ARGS(sk, skb), TP_ARGS(sk, skb),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(void *, skbaddr) __field(const void *, skbaddr)
__field(void *, skaddr) __field(const void *, skaddr)
__field(__u16, sport) __field(__u16, sport)
__field(__u16, dport) __field(__u16, dport)
__array(__u8, saddr, 4) __array(__u8, saddr, 4)
...@@ -64,6 +86,157 @@ TRACE_EVENT(tcp_retransmit_skb, ...@@ -64,6 +86,157 @@ TRACE_EVENT(tcp_retransmit_skb,
__entry->saddr_v6, __entry->daddr_v6) __entry->saddr_v6, __entry->daddr_v6)
); );
DEFINE_EVENT(tcp_event_sk_skb, tcp_retransmit_skb,
TP_PROTO(const struct sock *sk, const struct sk_buff *skb),
TP_ARGS(sk, skb)
);
/*
* skb of trace_tcp_send_reset is the skb that caused RST. In case of
* active reset, skb should be NULL
*/
DEFINE_EVENT(tcp_event_sk_skb, tcp_send_reset,
TP_PROTO(const struct sock *sk, const struct sk_buff *skb),
TP_ARGS(sk, skb)
);
/*
* tcp event with arguments sk
*
* Note: this class requires a valid sk pointer.
*/
DECLARE_EVENT_CLASS(tcp_event_sk,
TP_PROTO(const struct sock *sk),
TP_ARGS(sk),
TP_STRUCT__entry(
__field(const void *, skaddr)
__field(__u16, sport)
__field(__u16, dport)
__array(__u8, saddr, 4)
__array(__u8, daddr, 4)
__array(__u8, saddr_v6, 16)
__array(__u8, daddr_v6, 16)
),
TP_fast_assign(
struct inet_sock *inet = inet_sk(sk);
struct in6_addr *pin6;
__be32 *p32;
__entry->skaddr = sk;
__entry->sport = ntohs(inet->inet_sport);
__entry->dport = ntohs(inet->inet_dport);
p32 = (__be32 *) __entry->saddr;
*p32 = inet->inet_saddr;
p32 = (__be32 *) __entry->daddr;
*p32 = inet->inet_daddr;
#if IS_ENABLED(CONFIG_IPV6)
if (sk->sk_family == AF_INET6) {
pin6 = (struct in6_addr *)__entry->saddr_v6;
*pin6 = sk->sk_v6_rcv_saddr;
pin6 = (struct in6_addr *)__entry->daddr_v6;
*pin6 = sk->sk_v6_daddr;
} else
#endif
{
pin6 = (struct in6_addr *)__entry->saddr_v6;
ipv6_addr_set_v4mapped(inet->inet_saddr, pin6);
pin6 = (struct in6_addr *)__entry->daddr_v6;
ipv6_addr_set_v4mapped(inet->inet_daddr, pin6);
}
),
TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c",
__entry->sport, __entry->dport,
__entry->saddr, __entry->daddr,
__entry->saddr_v6, __entry->daddr_v6)
);
DEFINE_EVENT(tcp_event_sk, tcp_receive_reset,
TP_PROTO(const struct sock *sk),
TP_ARGS(sk)
);
DEFINE_EVENT(tcp_event_sk, tcp_destroy_sock,
TP_PROTO(const struct sock *sk),
TP_ARGS(sk)
);
TRACE_EVENT(tcp_set_state,
TP_PROTO(const struct sock *sk, const int oldstate, const int newstate),
TP_ARGS(sk, oldstate, newstate),
TP_STRUCT__entry(
__field(const void *, skaddr)
__field(int, oldstate)
__field(int, newstate)
__field(__u16, sport)
__field(__u16, dport)
__array(__u8, saddr, 4)
__array(__u8, daddr, 4)
__array(__u8, saddr_v6, 16)
__array(__u8, daddr_v6, 16)
),
TP_fast_assign(
struct inet_sock *inet = inet_sk(sk);
struct in6_addr *pin6;
__be32 *p32;
__entry->skaddr = sk;
__entry->oldstate = oldstate;
__entry->newstate = newstate;
__entry->sport = ntohs(inet->inet_sport);
__entry->dport = ntohs(inet->inet_dport);
p32 = (__be32 *) __entry->saddr;
*p32 = inet->inet_saddr;
p32 = (__be32 *) __entry->daddr;
*p32 = inet->inet_daddr;
#if IS_ENABLED(CONFIG_IPV6)
if (sk->sk_family == AF_INET6) {
pin6 = (struct in6_addr *)__entry->saddr_v6;
*pin6 = sk->sk_v6_rcv_saddr;
pin6 = (struct in6_addr *)__entry->daddr_v6;
*pin6 = sk->sk_v6_daddr;
} else
#endif
{
pin6 = (struct in6_addr *)__entry->saddr_v6;
ipv6_addr_set_v4mapped(inet->inet_saddr, pin6);
pin6 = (struct in6_addr *)__entry->daddr_v6;
ipv6_addr_set_v4mapped(inet->inet_daddr, pin6);
}
),
TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c oldstate=%s newstate=%s",
__entry->sport, __entry->dport,
__entry->saddr, __entry->daddr,
__entry->saddr_v6, __entry->daddr_v6,
show_tcp_state_name(__entry->oldstate),
show_tcp_state_name(__entry->newstate))
);
#endif /* _TRACE_TCP_H */ #endif /* _TRACE_TCP_H */
/* This part must be outside protection */ /* This part must be outside protection */
......
...@@ -49,3 +49,5 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_update); ...@@ -49,3 +49,5 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_update);
EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb); EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb);
EXPORT_TRACEPOINT_SYMBOL_GPL(napi_poll); EXPORT_TRACEPOINT_SYMBOL_GPL(napi_poll);
EXPORT_TRACEPOINT_SYMBOL_GPL(tcp_send_reset);
...@@ -282,6 +282,8 @@ ...@@ -282,6 +282,8 @@
#include <asm/ioctls.h> #include <asm/ioctls.h>
#include <net/busy_poll.h> #include <net/busy_poll.h>
#include <trace/events/tcp.h>
int sysctl_tcp_min_tso_segs __read_mostly = 2; int sysctl_tcp_min_tso_segs __read_mostly = 2;
int sysctl_tcp_autocorking __read_mostly = 1; int sysctl_tcp_autocorking __read_mostly = 1;
...@@ -2040,6 +2042,8 @@ void tcp_set_state(struct sock *sk, int state) ...@@ -2040,6 +2042,8 @@ void tcp_set_state(struct sock *sk, int state)
{ {
int oldstate = sk->sk_state; int oldstate = sk->sk_state;
trace_tcp_set_state(sk, oldstate, state);
switch (state) { switch (state) {
case TCP_ESTABLISHED: case TCP_ESTABLISHED:
if (oldstate != TCP_ESTABLISHED) if (oldstate != TCP_ESTABLISHED)
......
...@@ -75,6 +75,7 @@ ...@@ -75,6 +75,7 @@
#include <linux/ipsec.h> #include <linux/ipsec.h>
#include <asm/unaligned.h> #include <asm/unaligned.h>
#include <linux/errqueue.h> #include <linux/errqueue.h>
#include <trace/events/tcp.h>
int sysctl_tcp_fack __read_mostly; int sysctl_tcp_fack __read_mostly;
int sysctl_tcp_max_reordering __read_mostly = 300; int sysctl_tcp_max_reordering __read_mostly = 300;
...@@ -4010,6 +4011,8 @@ static inline bool tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq) ...@@ -4010,6 +4011,8 @@ static inline bool tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq)
/* When we get a reset we do this. */ /* When we get a reset we do this. */
void tcp_reset(struct sock *sk) void tcp_reset(struct sock *sk)
{ {
trace_tcp_receive_reset(sk);
/* We want the right error as BSD sees it (and indeed as we do). */ /* We want the right error as BSD sees it (and indeed as we do). */
switch (sk->sk_state) { switch (sk->sk_state) {
case TCP_SYN_SENT: case TCP_SYN_SENT:
......
...@@ -85,6 +85,8 @@ ...@@ -85,6 +85,8 @@
#include <crypto/hash.h> #include <crypto/hash.h>
#include <linux/scatterlist.h> #include <linux/scatterlist.h>
#include <trace/events/tcp.h>
#ifdef CONFIG_TCP_MD5SIG #ifdef CONFIG_TCP_MD5SIG
static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
__be32 daddr, __be32 saddr, const struct tcphdr *th); __be32 daddr, __be32 saddr, const struct tcphdr *th);
...@@ -701,8 +703,10 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) ...@@ -701,8 +703,10 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
* routing might fail in this case. No choice here, if we choose to force * routing might fail in this case. No choice here, if we choose to force
* input interface, we will misroute in case of asymmetric route. * input interface, we will misroute in case of asymmetric route.
*/ */
if (sk) if (sk) {
arg.bound_dev_if = sk->sk_bound_dev_if; arg.bound_dev_if = sk->sk_bound_dev_if;
trace_tcp_send_reset(sk, skb);
}
BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) != BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) !=
offsetof(struct inet_timewait_sock, tw_bound_dev_if)); offsetof(struct inet_timewait_sock, tw_bound_dev_if));
...@@ -1865,6 +1869,8 @@ void tcp_v4_destroy_sock(struct sock *sk) ...@@ -1865,6 +1869,8 @@ void tcp_v4_destroy_sock(struct sock *sk)
{ {
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
trace_tcp_destroy_sock(sk);
tcp_clear_xmit_timers(sk); tcp_clear_xmit_timers(sk);
tcp_cleanup_congestion_control(sk); tcp_cleanup_congestion_control(sk);
......
...@@ -3084,6 +3084,11 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority) ...@@ -3084,6 +3084,11 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
/* Send it off. */ /* Send it off. */
if (tcp_transmit_skb(sk, skb, 0, priority)) if (tcp_transmit_skb(sk, skb, 0, priority))
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
/* skb of trace_tcp_send_reset() keeps the skb that caused RST,
* skb here is different to the troublesome skb, so use NULL
*/
trace_tcp_send_reset(sk, NULL);
} }
/* Send a crossed SYN-ACK during socket establishment. /* Send a crossed SYN-ACK during socket establishment.
......
...@@ -69,6 +69,8 @@ ...@@ -69,6 +69,8 @@
#include <crypto/hash.h> #include <crypto/hash.h>
#include <linux/scatterlist.h> #include <linux/scatterlist.h>
#include <trace/events/tcp.h>
static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb); static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
struct request_sock *req); struct request_sock *req);
...@@ -890,7 +892,7 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) ...@@ -890,7 +892,7 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
int genhash; int genhash;
struct sock *sk1 = NULL; struct sock *sk1 = NULL;
#endif #endif
int oif; int oif = 0;
if (th->rst) if (th->rst)
return; return;
...@@ -939,7 +941,11 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) ...@@ -939,7 +941,11 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len - ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
(th->doff << 2); (th->doff << 2);
oif = sk ? sk->sk_bound_dev_if : 0; if (sk) {
oif = sk->sk_bound_dev_if;
trace_tcp_send_reset(sk, skb);
}
tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0); tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0);
#ifdef CONFIG_TCP_MD5SIG #ifdef CONFIG_TCP_MD5SIG
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment