Commit 618896e6 authored by David S. Miller's avatar David S. Miller

Merge branch 'net-timestamp-next'

Willem de Bruijn says:

====================
net-timestamp: new tx tstamps and tcp

Extend socket tx timestamping:
- allow multiple types of software timestamps aside from send (1)
- add software timestamp on enter packet scheduling (4)
- add software timestamp for TCP (5)
- add software timestamp for TCP on ACK (6)

The sk_flags option space is nearly exhausted. Also move the
many timestamp options to a new sk->sk_tstamps (2).

To disambiguate data when tstamps may arrive out of order,
optionally return a sequential ID assigned at send (3).

Extend Linux tx timestamping to monitoring of latency
incurred within the kernel stack and to protocols embedded in TCP.
Complex kernel setups may have multiple layers of queueing, including
multiple instances of packet scheduling, and many classes per layer.
Many applications embed discrete payloads into TCP bytestreams for
reliability, flow control, etcetera. Detecting application tail
latency in such scenarios relies on identifying the exact queue
responsible if on the host, or the network latency if otherwise.

Changelog:
v4->v5
  - define SCM_TSTAMP_SND == 0, for legacy behavior
  - add TCP tstamps without changing the generated byte stream
    - modify GSO and ACK to find offset: slightly more complex
      than previous invariant that it is the last byte
  - consistent naming of packet scheduling
    - rename SCM_TSTAMP_ENQ to SCM_TSTAMP_SCHED
  - add unique key in ee_data
  - add id field in ee_info to disambiguate tstamps
    - optional, only on new flag SOF_TIMESTAMPING_OPT_ID
    - for bytestream, in bytes

v3->v4
  - (v3 review comment) removed skb->mark packet identification (*A)
  - (v3 review comment) fixed indentation
  - tcp: fixed poll() to return POLLERR on non-zero queue
  - rebased to work without syststamp
  - comments: removed all traces of MSG_TSTAMP_.. (*B)

v2->v3
  - extend the SO_TIMESTAMPING API, instead of defining a new one.
  - add protocol independent support to correlate tstamps with data,
    based on returning skb->mark.
  - removed no-payload optimization and documentation (for now):

    I have a follow-on patch that reintroduces MSG_TSTAMP along with a
    new socket option SOF_TIMESTAMPING_OPT_ONFLAG. This is equivalent
    to sequence setsockopt(<enable>); send(..); setsockopt(<disable>),
    but avoids the need to define a MSG_TSTAMP_<TYPE> for each type.

    I will leave these three patches as follow-on, as this patchset is
    large enough as is.

v1->v2
  - expand timestamping (existing and new) to SOCK_RAW and ping sockets
  - rename sock_errqueue_timestamping to scm_timestamping
  - change timestamp data format: do not add fields to scm_timestamping.
      Doing so could break legacy applications. Instead, communicate
      through an existing, but unused, field in the error message.
  - rename SOF_.._OPT_TX_NO_PAYLOAD to shorter SOF_.._OPT_TSONLY
  - move msg_tstamp test app out of patchset and to github
      git://github.com/wdebruij/kerneltools.git
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents a2b81b35 e1c8a607
...@@ -229,7 +229,7 @@ enum { ...@@ -229,7 +229,7 @@ enum {
/* generate hardware time stamp */ /* generate hardware time stamp */
SKBTX_HW_TSTAMP = 1 << 0, SKBTX_HW_TSTAMP = 1 << 0,
/* generate software time stamp */ /* generate software time stamp when queueing packet to NIC */
SKBTX_SW_TSTAMP = 1 << 1, SKBTX_SW_TSTAMP = 1 << 1,
/* device driver is going to provide hardware time stamp */ /* device driver is going to provide hardware time stamp */
...@@ -247,8 +247,19 @@ enum { ...@@ -247,8 +247,19 @@ enum {
* all frags to avoid possible bad checksum * all frags to avoid possible bad checksum
*/ */
SKBTX_SHARED_FRAG = 1 << 5, SKBTX_SHARED_FRAG = 1 << 5,
/* generate software time stamp when entering packet scheduling */
SKBTX_SCHED_TSTAMP = 1 << 6,
/* generate software timestamp on peer data acknowledgment */
SKBTX_ACK_TSTAMP = 1 << 7,
}; };
#define SKBTX_ANY_SW_TSTAMP (SKBTX_SW_TSTAMP | \
SKBTX_SCHED_TSTAMP | \
SKBTX_ACK_TSTAMP)
#define SKBTX_ANY_TSTAMP (SKBTX_HW_TSTAMP | SKBTX_ANY_SW_TSTAMP)
/* /*
* The callback notifies userspace to release buffers when skb DMA is done in * The callback notifies userspace to release buffers when skb DMA is done in
* lower device, the skb last reference should be 0 when calling this. * lower device, the skb last reference should be 0 when calling this.
...@@ -275,6 +286,7 @@ struct skb_shared_info { ...@@ -275,6 +286,7 @@ struct skb_shared_info {
unsigned short gso_type; unsigned short gso_type;
struct sk_buff *frag_list; struct sk_buff *frag_list;
struct skb_shared_hwtstamps hwtstamps; struct skb_shared_hwtstamps hwtstamps;
u32 tskey;
__be32 ip6_frag_id; __be32 ip6_frag_id;
/* /*
...@@ -2691,6 +2703,10 @@ static inline bool skb_defer_rx_timestamp(struct sk_buff *skb) ...@@ -2691,6 +2703,10 @@ static inline bool skb_defer_rx_timestamp(struct sk_buff *skb)
void skb_complete_tx_timestamp(struct sk_buff *skb, void skb_complete_tx_timestamp(struct sk_buff *skb,
struct skb_shared_hwtstamps *hwtstamps); struct skb_shared_hwtstamps *hwtstamps);
void __skb_tstamp_tx(struct sk_buff *orig_skb,
struct skb_shared_hwtstamps *hwtstamps,
struct sock *sk, int tstype);
/** /**
* skb_tstamp_tx - queue clone of skb with send time stamps * skb_tstamp_tx - queue clone of skb with send time stamps
* @orig_skb: the original outgoing packet * @orig_skb: the original outgoing packet
......
...@@ -67,6 +67,7 @@ ...@@ -67,6 +67,7 @@
#include <linux/atomic.h> #include <linux/atomic.h>
#include <net/dst.h> #include <net/dst.h>
#include <net/checksum.h> #include <net/checksum.h>
#include <linux/net_tstamp.h>
struct cgroup; struct cgroup;
struct cgroup_subsys; struct cgroup_subsys;
...@@ -278,6 +279,8 @@ struct cg_proto; ...@@ -278,6 +279,8 @@ struct cg_proto;
* @sk_protinfo: private area, net family specific, when not using slab * @sk_protinfo: private area, net family specific, when not using slab
* @sk_timer: sock cleanup timer * @sk_timer: sock cleanup timer
* @sk_stamp: time stamp of last packet received * @sk_stamp: time stamp of last packet received
* @sk_tsflags: SO_TIMESTAMPING socket options
* @sk_tskey: counter to disambiguate concurrent tstamp requests
* @sk_socket: Identd and reporting IO signals * @sk_socket: Identd and reporting IO signals
* @sk_user_data: RPC layer private data * @sk_user_data: RPC layer private data
* @sk_frag: cached page frag * @sk_frag: cached page frag
...@@ -411,6 +414,8 @@ struct sock { ...@@ -411,6 +414,8 @@ struct sock {
void *sk_protinfo; void *sk_protinfo;
struct timer_list sk_timer; struct timer_list sk_timer;
ktime_t sk_stamp; ktime_t sk_stamp;
u16 sk_tsflags;
u32 sk_tskey;
struct socket *sk_socket; struct socket *sk_socket;
void *sk_user_data; void *sk_user_data;
struct page_frag sk_frag; struct page_frag sk_frag;
...@@ -701,12 +706,7 @@ enum sock_flags { ...@@ -701,12 +706,7 @@ enum sock_flags {
SOCK_LOCALROUTE, /* route locally only, %SO_DONTROUTE setting */ SOCK_LOCALROUTE, /* route locally only, %SO_DONTROUTE setting */
SOCK_QUEUE_SHRUNK, /* write queue has been shrunk recently */ SOCK_QUEUE_SHRUNK, /* write queue has been shrunk recently */
SOCK_MEMALLOC, /* VM depends on this socket for swapping */ SOCK_MEMALLOC, /* VM depends on this socket for swapping */
SOCK_TIMESTAMPING_TX_HARDWARE, /* %SOF_TIMESTAMPING_TX_HARDWARE */
SOCK_TIMESTAMPING_TX_SOFTWARE, /* %SOF_TIMESTAMPING_TX_SOFTWARE */
SOCK_TIMESTAMPING_RX_HARDWARE, /* %SOF_TIMESTAMPING_RX_HARDWARE */
SOCK_TIMESTAMPING_RX_SOFTWARE, /* %SOF_TIMESTAMPING_RX_SOFTWARE */ SOCK_TIMESTAMPING_RX_SOFTWARE, /* %SOF_TIMESTAMPING_RX_SOFTWARE */
SOCK_TIMESTAMPING_SOFTWARE, /* %SOF_TIMESTAMPING_SOFTWARE */
SOCK_TIMESTAMPING_RAW_HARDWARE, /* %SOF_TIMESTAMPING_RAW_HARDWARE */
SOCK_FASYNC, /* fasync() active */ SOCK_FASYNC, /* fasync() active */
SOCK_RXQ_OVFL, SOCK_RXQ_OVFL,
SOCK_ZEROCOPY, /* buffers from userspace */ SOCK_ZEROCOPY, /* buffers from userspace */
...@@ -2160,18 +2160,17 @@ sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) ...@@ -2160,18 +2160,17 @@ sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
/* /*
* generate control messages if * generate control messages if
* - receive time stamping in software requested (SOCK_RCVTSTAMP * - receive time stamping in software requested
* or SOCK_TIMESTAMPING_RX_SOFTWARE)
* - software time stamp available and wanted * - software time stamp available and wanted
* (SOCK_TIMESTAMPING_SOFTWARE)
* - hardware time stamps available and wanted * - hardware time stamps available and wanted
* SOCK_TIMESTAMPING_RAW_HARDWARE
*/ */
if (sock_flag(sk, SOCK_RCVTSTAMP) || if (sock_flag(sk, SOCK_RCVTSTAMP) ||
sock_flag(sk, SOCK_TIMESTAMPING_RX_SOFTWARE) || (sk->sk_tsflags & SOF_TIMESTAMPING_RX_SOFTWARE) ||
(kt.tv64 && sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE)) || (kt.tv64 &&
(sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE ||
skb_shinfo(skb)->tx_flags & SKBTX_ANY_SW_TSTAMP)) ||
(hwtstamps->hwtstamp.tv64 && (hwtstamps->hwtstamp.tv64 &&
sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE))) (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE)))
__sock_recv_timestamp(msg, sk, skb); __sock_recv_timestamp(msg, sk, skb);
else else
sk->sk_stamp = kt; sk->sk_stamp = kt;
...@@ -2187,11 +2186,11 @@ static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, ...@@ -2187,11 +2186,11 @@ static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
struct sk_buff *skb) struct sk_buff *skb)
{ {
#define FLAGS_TS_OR_DROPS ((1UL << SOCK_RXQ_OVFL) | \ #define FLAGS_TS_OR_DROPS ((1UL << SOCK_RXQ_OVFL) | \
(1UL << SOCK_RCVTSTAMP) | \ (1UL << SOCK_RCVTSTAMP))
(1UL << SOCK_TIMESTAMPING_SOFTWARE) | \ #define TSFLAGS_ANY (SOF_TIMESTAMPING_SOFTWARE | \
(1UL << SOCK_TIMESTAMPING_RAW_HARDWARE)) SOF_TIMESTAMPING_RAW_HARDWARE)
if (sk->sk_flags & FLAGS_TS_OR_DROPS) if (sk->sk_flags & FLAGS_TS_OR_DROPS || sk->sk_tsflags & TSFLAGS_ANY)
__sock_recv_ts_and_drops(msg, sk, skb); __sock_recv_ts_and_drops(msg, sk, skb);
else else
sk->sk_stamp = skb->tstamp; sk->sk_stamp = skb->tstamp;
...@@ -2201,8 +2200,6 @@ static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, ...@@ -2201,8 +2200,6 @@ static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
* sock_tx_timestamp - checks whether the outgoing packet is to be time stamped * sock_tx_timestamp - checks whether the outgoing packet is to be time stamped
* @sk: socket sending this packet * @sk: socket sending this packet
* @tx_flags: filled with instructions for time stamping * @tx_flags: filled with instructions for time stamping
*
* Currently only depends on SOCK_TIMESTAMPING* flags.
*/ */
void sock_tx_timestamp(struct sock *sk, __u8 *tx_flags); void sock_tx_timestamp(struct sock *sk, __u8 *tx_flags);
......
...@@ -22,5 +22,25 @@ struct sock_extended_err { ...@@ -22,5 +22,25 @@ struct sock_extended_err {
#define SO_EE_OFFENDER(ee) ((struct sockaddr*)((ee)+1)) #define SO_EE_OFFENDER(ee) ((struct sockaddr*)((ee)+1))
/**
* struct scm_timestamping - timestamps exposed through cmsg
*
* The timestamping interfaces SO_TIMESTAMPING, MSG_TSTAMP_*
* communicate network timestamps by passing this struct in a cmsg with
* recvmsg(). See Documentation/networking/timestamping.txt for details.
*/
struct scm_timestamping {
struct timespec ts[3];
};
/* The type of scm_timestamping, passed in sock_extended_err ee_info.
* This defines the type of ts[0]. For SCM_TSTAMP_SND only, if ts[0]
* is zero, then this is a hardware timestamp and recorded in ts[2].
*/
enum {
SCM_TSTAMP_SND, /* driver passed skb to NIC, or HW */
SCM_TSTAMP_SCHED, /* data entered the packet scheduler */
SCM_TSTAMP_ACK, /* data acknowledged by peer */
};
#endif /* _UAPI_LINUX_ERRQUEUE_H */ #endif /* _UAPI_LINUX_ERRQUEUE_H */
...@@ -20,9 +20,13 @@ enum { ...@@ -20,9 +20,13 @@ enum {
SOF_TIMESTAMPING_SOFTWARE = (1<<4), SOF_TIMESTAMPING_SOFTWARE = (1<<4),
SOF_TIMESTAMPING_SYS_HARDWARE = (1<<5), SOF_TIMESTAMPING_SYS_HARDWARE = (1<<5),
SOF_TIMESTAMPING_RAW_HARDWARE = (1<<6), SOF_TIMESTAMPING_RAW_HARDWARE = (1<<6),
SOF_TIMESTAMPING_MASK = SOF_TIMESTAMPING_OPT_ID = (1<<7),
(SOF_TIMESTAMPING_RAW_HARDWARE - 1) | SOF_TIMESTAMPING_TX_SCHED = (1<<8),
SOF_TIMESTAMPING_RAW_HARDWARE SOF_TIMESTAMPING_TX_ACK = (1<<9),
SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_TX_ACK,
SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) |
SOF_TIMESTAMPING_LAST
}; };
/** /**
......
...@@ -132,6 +132,7 @@ ...@@ -132,6 +132,7 @@
#include <linux/hashtable.h> #include <linux/hashtable.h>
#include <linux/vmalloc.h> #include <linux/vmalloc.h>
#include <linux/if_macvlan.h> #include <linux/if_macvlan.h>
#include <linux/errqueue.h>
#include "net-sysfs.h" #include "net-sysfs.h"
...@@ -2876,6 +2877,9 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) ...@@ -2876,6 +2877,9 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
skb_reset_mac_header(skb); skb_reset_mac_header(skb);
if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_SCHED_TSTAMP))
__skb_tstamp_tx(skb, NULL, skb->sk, SCM_TSTAMP_SCHED);
/* Disable soft irqs for various locks below. Also /* Disable soft irqs for various locks below. Also
* stops preemption for RCU. * stops preemption for RCU.
*/ */
......
...@@ -3490,10 +3490,10 @@ int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb) ...@@ -3490,10 +3490,10 @@ int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
} }
EXPORT_SYMBOL(sock_queue_err_skb); EXPORT_SYMBOL(sock_queue_err_skb);
void skb_tstamp_tx(struct sk_buff *orig_skb, void __skb_tstamp_tx(struct sk_buff *orig_skb,
struct skb_shared_hwtstamps *hwtstamps) struct skb_shared_hwtstamps *hwtstamps,
struct sock *sk, int tstype)
{ {
struct sock *sk = orig_skb->sk;
struct sock_exterr_skb *serr; struct sock_exterr_skb *serr;
struct sk_buff *skb; struct sk_buff *skb;
int err; int err;
...@@ -3521,12 +3521,26 @@ void skb_tstamp_tx(struct sk_buff *orig_skb, ...@@ -3521,12 +3521,26 @@ void skb_tstamp_tx(struct sk_buff *orig_skb,
memset(serr, 0, sizeof(*serr)); memset(serr, 0, sizeof(*serr));
serr->ee.ee_errno = ENOMSG; serr->ee.ee_errno = ENOMSG;
serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
serr->ee.ee_info = tstype;
if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) {
serr->ee.ee_data = skb_shinfo(skb)->tskey;
if (sk->sk_protocol == IPPROTO_TCP)
serr->ee.ee_data -= sk->sk_tskey;
}
err = sock_queue_err_skb(sk, skb); err = sock_queue_err_skb(sk, skb);
if (err) if (err)
kfree_skb(skb); kfree_skb(skb);
} }
EXPORT_SYMBOL_GPL(__skb_tstamp_tx);
void skb_tstamp_tx(struct sk_buff *orig_skb,
struct skb_shared_hwtstamps *hwtstamps)
{
return __skb_tstamp_tx(orig_skb, hwtstamps, orig_skb->sk,
SCM_TSTAMP_SND);
}
EXPORT_SYMBOL_GPL(skb_tstamp_tx); EXPORT_SYMBOL_GPL(skb_tstamp_tx);
void skb_complete_wifi_ack(struct sk_buff *skb, bool acked) void skb_complete_wifi_ack(struct sk_buff *skb, bool acked)
......
...@@ -848,22 +848,25 @@ int sock_setsockopt(struct socket *sock, int level, int optname, ...@@ -848,22 +848,25 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
ret = -EINVAL; ret = -EINVAL;
break; break;
} }
sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE, if (val & SOF_TIMESTAMPING_OPT_ID &&
val & SOF_TIMESTAMPING_TX_HARDWARE); !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) {
sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE, if (sk->sk_protocol == IPPROTO_TCP) {
val & SOF_TIMESTAMPING_TX_SOFTWARE); if (sk->sk_state != TCP_ESTABLISHED) {
sock_valbool_flag(sk, SOCK_TIMESTAMPING_RX_HARDWARE, ret = -EINVAL;
val & SOF_TIMESTAMPING_RX_HARDWARE); break;
}
sk->sk_tskey = tcp_sk(sk)->snd_una;
} else {
sk->sk_tskey = 0;
}
}
sk->sk_tsflags = val;
if (val & SOF_TIMESTAMPING_RX_SOFTWARE) if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
sock_enable_timestamp(sk, sock_enable_timestamp(sk,
SOCK_TIMESTAMPING_RX_SOFTWARE); SOCK_TIMESTAMPING_RX_SOFTWARE);
else else
sock_disable_timestamp(sk, sock_disable_timestamp(sk,
(1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)); (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE));
sock_valbool_flag(sk, SOCK_TIMESTAMPING_SOFTWARE,
val & SOF_TIMESTAMPING_SOFTWARE);
sock_valbool_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE,
val & SOF_TIMESTAMPING_RAW_HARDWARE);
break; break;
case SO_RCVLOWAT: case SO_RCVLOWAT:
...@@ -1089,19 +1092,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, ...@@ -1089,19 +1092,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
break; break;
case SO_TIMESTAMPING: case SO_TIMESTAMPING:
v.val = 0; v.val = sk->sk_tsflags;
if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
v.val |= SOF_TIMESTAMPING_TX_HARDWARE;
if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE))
v.val |= SOF_TIMESTAMPING_TX_SOFTWARE;
if (sock_flag(sk, SOCK_TIMESTAMPING_RX_HARDWARE))
v.val |= SOF_TIMESTAMPING_RX_HARDWARE;
if (sock_flag(sk, SOCK_TIMESTAMPING_RX_SOFTWARE))
v.val |= SOF_TIMESTAMPING_RX_SOFTWARE;
if (sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE))
v.val |= SOF_TIMESTAMPING_SOFTWARE;
if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE))
v.val |= SOF_TIMESTAMPING_RAW_HARDWARE;
break; break;
case SO_RCVTIMEO: case SO_RCVTIMEO:
......
...@@ -855,11 +855,15 @@ static int __ip_append_data(struct sock *sk, ...@@ -855,11 +855,15 @@ static int __ip_append_data(struct sock *sk,
unsigned int maxfraglen, fragheaderlen, maxnonfragsize; unsigned int maxfraglen, fragheaderlen, maxnonfragsize;
int csummode = CHECKSUM_NONE; int csummode = CHECKSUM_NONE;
struct rtable *rt = (struct rtable *)cork->dst; struct rtable *rt = (struct rtable *)cork->dst;
u32 tskey = 0;
skb = skb_peek_tail(queue); skb = skb_peek_tail(queue);
exthdrlen = !skb ? rt->dst.header_len : 0; exthdrlen = !skb ? rt->dst.header_len : 0;
mtu = cork->fragsize; mtu = cork->fragsize;
if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
tskey = sk->sk_tskey++;
hh_len = LL_RESERVED_SPACE(rt->dst.dev); hh_len = LL_RESERVED_SPACE(rt->dst.dev);
...@@ -976,6 +980,8 @@ static int __ip_append_data(struct sock *sk, ...@@ -976,6 +980,8 @@ static int __ip_append_data(struct sock *sk,
/* only the initial fragment is time stamped */ /* only the initial fragment is time stamped */
skb_shinfo(skb)->tx_flags = cork->tx_flags; skb_shinfo(skb)->tx_flags = cork->tx_flags;
cork->tx_flags = 0; cork->tx_flags = 0;
skb_shinfo(skb)->tskey = tskey;
tskey = 0;
/* /*
* Find where to start putting bytes. * Find where to start putting bytes.
......
...@@ -426,6 +426,15 @@ void tcp_init_sock(struct sock *sk) ...@@ -426,6 +426,15 @@ void tcp_init_sock(struct sock *sk)
} }
EXPORT_SYMBOL(tcp_init_sock); EXPORT_SYMBOL(tcp_init_sock);
void tcp_tx_timestamp(struct sock *sk, struct sk_buff *skb)
{
struct skb_shared_info *shinfo = skb_shinfo(skb);
sock_tx_timestamp(sk, &shinfo->tx_flags);
if (shinfo->tx_flags & SKBTX_ANY_SW_TSTAMP)
shinfo->tskey = TCP_SKB_CB(skb)->seq + skb->len - 1;
}
/* /*
* Wait for a TCP event. * Wait for a TCP event.
* *
...@@ -523,7 +532,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) ...@@ -523,7 +532,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
} }
/* This barrier is coupled with smp_wmb() in tcp_reset() */ /* This barrier is coupled with smp_wmb() in tcp_reset() */
smp_rmb(); smp_rmb();
if (sk->sk_err) if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
mask |= POLLERR; mask |= POLLERR;
return mask; return mask;
...@@ -959,8 +968,10 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, ...@@ -959,8 +968,10 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
copied += copy; copied += copy;
offset += copy; offset += copy;
if (!(size -= copy)) if (!(size -= copy)) {
tcp_tx_timestamp(sk, skb);
goto out; goto out;
}
if (skb->len < size_goal || (flags & MSG_OOB)) if (skb->len < size_goal || (flags & MSG_OOB))
continue; continue;
...@@ -1252,8 +1263,10 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, ...@@ -1252,8 +1263,10 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
from += copy; from += copy;
copied += copy; copied += copy;
if ((seglen -= copy) == 0 && iovlen == 0) if ((seglen -= copy) == 0 && iovlen == 0) {
tcp_tx_timestamp(sk, skb);
goto out; goto out;
}
if (skb->len < max || (flags & MSG_OOB) || unlikely(tp->repair)) if (skb->len < max || (flags & MSG_OOB) || unlikely(tp->repair))
continue; continue;
...@@ -1617,6 +1630,9 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, ...@@ -1617,6 +1630,9 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
struct sk_buff *skb; struct sk_buff *skb;
u32 urg_hole = 0; u32 urg_hole = 0;
if (unlikely(flags & MSG_ERRQUEUE))
return ip_recv_error(sk, msg, len, addr_len);
if (sk_can_busy_loop(sk) && skb_queue_empty(&sk->sk_receive_queue) && if (sk_can_busy_loop(sk) && skb_queue_empty(&sk->sk_receive_queue) &&
(sk->sk_state == TCP_ESTABLISHED)) (sk->sk_state == TCP_ESTABLISHED))
sk_busy_loop(sk, nonblock); sk_busy_loop(sk, nonblock);
......
...@@ -74,6 +74,7 @@ ...@@ -74,6 +74,7 @@
#include <linux/ipsec.h> #include <linux/ipsec.h>
#include <asm/unaligned.h> #include <asm/unaligned.h>
#include <net/netdma.h> #include <net/netdma.h>
#include <linux/errqueue.h>
int sysctl_tcp_timestamps __read_mostly = 1; int sysctl_tcp_timestamps __read_mostly = 1;
int sysctl_tcp_window_scaling __read_mostly = 1; int sysctl_tcp_window_scaling __read_mostly = 1;
...@@ -3106,6 +3107,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, ...@@ -3106,6 +3107,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
tp->retrans_stamp = 0; tp->retrans_stamp = 0;
} }
if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_ACK_TSTAMP) &&
between(skb_shinfo(skb)->tskey, prior_snd_una,
tp->snd_una + 1))
__skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK);
if (!fully_acked) if (!fully_acked)
break; break;
......
...@@ -14,6 +14,21 @@ ...@@ -14,6 +14,21 @@
#include <net/tcp.h> #include <net/tcp.h>
#include <net/protocol.h> #include <net/protocol.h>
void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq, unsigned int seq,
unsigned int mss)
{
while (skb) {
if (ts_seq < (__u64) seq + mss) {
skb_shinfo(skb)->tx_flags = SKBTX_SW_TSTAMP;
skb_shinfo(skb)->tskey = ts_seq;
return;
}
skb = skb->next;
seq += mss;
}
}
struct sk_buff *tcp_gso_segment(struct sk_buff *skb, struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
netdev_features_t features) netdev_features_t features)
{ {
...@@ -91,6 +106,9 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, ...@@ -91,6 +106,9 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
th = tcp_hdr(skb); th = tcp_hdr(skb);
seq = ntohl(th->seq); seq = ntohl(th->seq);
if (unlikely(skb_shinfo(gso_skb)->tx_flags & SKBTX_SW_TSTAMP))
tcp_gso_tstamp(segs, skb_shinfo(gso_skb)->tskey, seq, mss);
newcheck = ~csum_fold((__force __wsum)((__force u32)th->check + newcheck = ~csum_fold((__force __wsum)((__force u32)th->check +
(__force u32)delta)); (__force u32)delta));
......
...@@ -1157,6 +1157,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, ...@@ -1157,6 +1157,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
int err; int err;
int offset = 0; int offset = 0;
__u8 tx_flags = 0; __u8 tx_flags = 0;
u32 tskey = 0;
if (flags&MSG_PROBE) if (flags&MSG_PROBE)
return 0; return 0;
...@@ -1272,8 +1273,12 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, ...@@ -1272,8 +1273,12 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
} }
} }
if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) {
sock_tx_timestamp(sk, &tx_flags); sock_tx_timestamp(sk, &tx_flags);
if (tx_flags & SKBTX_ANY_SW_TSTAMP &&
sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
tskey = sk->sk_tskey++;
}
/* /*
* Let's try using as much space as possible. * Let's try using as much space as possible.
...@@ -1397,6 +1402,8 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, ...@@ -1397,6 +1402,8 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
/* Only the initial fragment is time stamped */ /* Only the initial fragment is time stamped */
skb_shinfo(skb)->tx_flags = tx_flags; skb_shinfo(skb)->tx_flags = tx_flags;
tx_flags = 0; tx_flags = 0;
skb_shinfo(skb)->tskey = tskey;
tskey = 0;
/* /*
* Find where to start putting bytes * Find where to start putting bytes
......
...@@ -106,6 +106,7 @@ ...@@ -106,6 +106,7 @@
#include <linux/sockios.h> #include <linux/sockios.h>
#include <linux/atalk.h> #include <linux/atalk.h>
#include <net/busy_poll.h> #include <net/busy_poll.h>
#include <linux/errqueue.h>
#ifdef CONFIG_NET_RX_BUSY_POLL #ifdef CONFIG_NET_RX_BUSY_POLL
unsigned int sysctl_net_busy_read __read_mostly; unsigned int sysctl_net_busy_read __read_mostly;
...@@ -612,10 +613,15 @@ EXPORT_SYMBOL(sock_release); ...@@ -612,10 +613,15 @@ EXPORT_SYMBOL(sock_release);
void sock_tx_timestamp(struct sock *sk, __u8 *tx_flags) void sock_tx_timestamp(struct sock *sk, __u8 *tx_flags)
{ {
*tx_flags = 0; *tx_flags = 0;
if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE)) if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
*tx_flags |= SKBTX_HW_TSTAMP; *tx_flags |= SKBTX_HW_TSTAMP;
if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE)) if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
*tx_flags |= SKBTX_SW_TSTAMP; *tx_flags |= SKBTX_SW_TSTAMP;
if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED)
*tx_flags |= SKBTX_SCHED_TSTAMP;
if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK)
*tx_flags |= SKBTX_ACK_TSTAMP;
if (sock_flag(sk, SOCK_WIFI_STATUS)) if (sock_flag(sk, SOCK_WIFI_STATUS))
*tx_flags |= SKBTX_WIFI_STATUS; *tx_flags |= SKBTX_WIFI_STATUS;
} }
...@@ -697,7 +703,7 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, ...@@ -697,7 +703,7 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
struct sk_buff *skb) struct sk_buff *skb)
{ {
int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP); int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
struct timespec ts[3]; struct scm_timestamping tss;
int empty = 1; int empty = 1;
struct skb_shared_hwtstamps *shhwtstamps = struct skb_shared_hwtstamps *shhwtstamps =
skb_hwtstamps(skb); skb_hwtstamps(skb);
...@@ -714,24 +720,25 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, ...@@ -714,24 +720,25 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
sizeof(tv), &tv); sizeof(tv), &tv);
} else { } else {
skb_get_timestampns(skb, &ts[0]); struct timespec ts;
skb_get_timestampns(skb, &ts);
put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
sizeof(ts[0]), &ts[0]); sizeof(ts), &ts);
} }
} }
memset(&tss, 0, sizeof(tss));
memset(ts, 0, sizeof(ts)); if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE ||
if (sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE) && skb_shinfo(skb)->tx_flags & SKBTX_ANY_SW_TSTAMP) &&
ktime_to_timespec_cond(skb->tstamp, ts + 0)) ktime_to_timespec_cond(skb->tstamp, tss.ts + 0))
empty = 0; empty = 0;
if (shhwtstamps && if (shhwtstamps &&
sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE) && (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts + 2)) ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2))
empty = 0; empty = 0;
if (!empty) if (!empty)
put_cmsg(msg, SOL_SOCKET, put_cmsg(msg, SOL_SOCKET,
SCM_TIMESTAMPING, sizeof(ts), &ts); SCM_TIMESTAMPING, sizeof(tss), &tss);
} }
EXPORT_SYMBOL_GPL(__sock_recv_timestamp); EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment