Commit 1693c5db authored by Abhishek Chauhan's avatar Abhishek Chauhan Committed by Martin KaFai Lau

net: Add additional bit to support clockid_t timestamp type

tstamp_type is now set based on actual clockid_t compressed
into 2 bits.

To make the design scalable for future needs this commit bring in
the change to extend the tstamp_type:1 to tstamp_type:2 to support
other clockid_t timestamp.

We now support CLOCK_TAI as part of tstamp_type as part of this
commit with existing support CLOCK_MONOTONIC and CLOCK_REALTIME.
Signed-off-by: default avatarAbhishek Chauhan <quic_abchauha@quicinc.com>
Reviewed-by: default avatarWillem de Bruijn <willemb@google.com>
Reviewed-by: default avatarMartin KaFai Lau <martin.lau@kernel.org>
Link: https://lore.kernel.org/r/20240509211834.3235191-3-quic_abchauha@quicinc.comSigned-off-by: default avatarMartin KaFai Lau <martin.lau@kernel.org>
parent 4d25ca2d
...@@ -709,6 +709,8 @@ typedef unsigned char *sk_buff_data_t; ...@@ -709,6 +709,8 @@ typedef unsigned char *sk_buff_data_t;
enum skb_tstamp_type { enum skb_tstamp_type {
SKB_CLOCK_REALTIME, SKB_CLOCK_REALTIME,
SKB_CLOCK_MONOTONIC, SKB_CLOCK_MONOTONIC,
SKB_CLOCK_TAI,
__SKB_CLOCK_MAX = SKB_CLOCK_TAI,
}; };
/** /**
...@@ -957,7 +959,7 @@ struct sk_buff { ...@@ -957,7 +959,7 @@ struct sk_buff {
/* private: */ /* private: */
__u8 __mono_tc_offset[0]; __u8 __mono_tc_offset[0];
/* public: */ /* public: */
__u8 tstamp_type:1; /* See skb_tstamp_type */ __u8 tstamp_type:2; /* See skb_tstamp_type */
#ifdef CONFIG_NET_XGRESS #ifdef CONFIG_NET_XGRESS
__u8 tc_at_ingress:1; /* See TC_AT_INGRESS_MASK */ __u8 tc_at_ingress:1; /* See TC_AT_INGRESS_MASK */
__u8 tc_skip_classify:1; __u8 tc_skip_classify:1;
...@@ -1087,15 +1089,16 @@ struct sk_buff { ...@@ -1087,15 +1089,16 @@ struct sk_buff {
#endif #endif
#define PKT_TYPE_OFFSET offsetof(struct sk_buff, __pkt_type_offset) #define PKT_TYPE_OFFSET offsetof(struct sk_buff, __pkt_type_offset)
/* if you move tc_at_ingress or mono_delivery_time /* if you move tc_at_ingress or tstamp_type
* around, you also must adapt these constants. * around, you also must adapt these constants.
*/ */
#ifdef __BIG_ENDIAN_BITFIELD #ifdef __BIG_ENDIAN_BITFIELD
#define SKB_MONO_DELIVERY_TIME_MASK (1 << 7) #define SKB_TSTAMP_TYPE_MASK (3 << 6)
#define TC_AT_INGRESS_MASK (1 << 6) #define SKB_TSTAMP_TYPE_RSHIFT (6)
#define TC_AT_INGRESS_MASK (1 << 5)
#else #else
#define SKB_MONO_DELIVERY_TIME_MASK (1 << 0) #define SKB_TSTAMP_TYPE_MASK (3)
#define TC_AT_INGRESS_MASK (1 << 1) #define TC_AT_INGRESS_MASK (1 << 2)
#endif #endif
#define SKB_BF_MONO_TC_OFFSET offsetof(struct sk_buff, __mono_tc_offset) #define SKB_BF_MONO_TC_OFFSET offsetof(struct sk_buff, __mono_tc_offset)
...@@ -4216,6 +4219,9 @@ static inline void skb_set_delivery_type_by_clockid(struct sk_buff *skb, ...@@ -4216,6 +4219,9 @@ static inline void skb_set_delivery_type_by_clockid(struct sk_buff *skb,
case CLOCK_MONOTONIC: case CLOCK_MONOTONIC:
tstamp_type = SKB_CLOCK_MONOTONIC; tstamp_type = SKB_CLOCK_MONOTONIC;
break; break;
case CLOCK_TAI:
tstamp_type = SKB_CLOCK_TAI;
break;
default: default:
WARN_ON_ONCE(1); WARN_ON_ONCE(1);
kt = 0; kt = 0;
......
...@@ -6207,12 +6207,17 @@ union { \ ...@@ -6207,12 +6207,17 @@ union { \
__u64 :64; \ __u64 :64; \
} __attribute__((aligned(8))) } __attribute__((aligned(8)))
/* The enum used in skb->tstamp_type. It specifies the clock type
* of the time stored in the skb->tstamp.
*/
enum { enum {
BPF_SKB_TSTAMP_UNSPEC, BPF_SKB_TSTAMP_UNSPEC = 0, /* DEPRECATED */
BPF_SKB_TSTAMP_DELIVERY_MONO, /* tstamp has mono delivery time */ BPF_SKB_TSTAMP_DELIVERY_MONO = 1, /* DEPRECATED */
/* For any BPF_SKB_TSTAMP_* that the bpf prog cannot handle, BPF_SKB_CLOCK_REALTIME = 0,
* the bpf prog should handle it like BPF_SKB_TSTAMP_UNSPEC BPF_SKB_CLOCK_MONOTONIC = 1,
* and try to deduce it by ingress, egress or skb->sk->sk_clockid. BPF_SKB_CLOCK_TAI = 2,
/* For any future BPF_SKB_CLOCK_* that the bpf prog cannot handle,
* the bpf prog can try to deduce it by ingress/egress/skb->sk->sk_clockid.
*/ */
}; };
......
...@@ -7726,17 +7726,21 @@ BPF_CALL_3(bpf_skb_set_tstamp, struct sk_buff *, skb, ...@@ -7726,17 +7726,21 @@ BPF_CALL_3(bpf_skb_set_tstamp, struct sk_buff *, skb,
return -EOPNOTSUPP; return -EOPNOTSUPP;
switch (tstamp_type) { switch (tstamp_type) {
case BPF_SKB_TSTAMP_DELIVERY_MONO: case BPF_SKB_CLOCK_REALTIME:
skb->tstamp = tstamp;
skb->tstamp_type = SKB_CLOCK_REALTIME;
break;
case BPF_SKB_CLOCK_MONOTONIC:
if (!tstamp) if (!tstamp)
return -EINVAL; return -EINVAL;
skb->tstamp = tstamp; skb->tstamp = tstamp;
skb->tstamp_type = SKB_CLOCK_MONOTONIC; skb->tstamp_type = SKB_CLOCK_MONOTONIC;
break; break;
case BPF_SKB_TSTAMP_UNSPEC: case BPF_SKB_CLOCK_TAI:
if (tstamp) if (!tstamp)
return -EINVAL; return -EINVAL;
skb->tstamp = 0; skb->tstamp = tstamp;
skb->tstamp_type = SKB_CLOCK_REALTIME; skb->tstamp_type = SKB_CLOCK_TAI;
break; break;
default: default:
return -EINVAL; return -EINVAL;
...@@ -9387,16 +9391,17 @@ static struct bpf_insn *bpf_convert_tstamp_type_read(const struct bpf_insn *si, ...@@ -9387,16 +9391,17 @@ static struct bpf_insn *bpf_convert_tstamp_type_read(const struct bpf_insn *si,
{ {
__u8 value_reg = si->dst_reg; __u8 value_reg = si->dst_reg;
__u8 skb_reg = si->src_reg; __u8 skb_reg = si->src_reg;
/* AX is needed because src_reg and dst_reg could be the same */ BUILD_BUG_ON(__SKB_CLOCK_MAX != (int)BPF_SKB_CLOCK_TAI);
__u8 tmp_reg = BPF_REG_AX; BUILD_BUG_ON(SKB_CLOCK_REALTIME != (int)BPF_SKB_CLOCK_REALTIME);
BUILD_BUG_ON(SKB_CLOCK_MONOTONIC != (int)BPF_SKB_CLOCK_MONOTONIC);
*insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, BUILD_BUG_ON(SKB_CLOCK_TAI != (int)BPF_SKB_CLOCK_TAI);
SKB_BF_MONO_TC_OFFSET); *insn++ = BPF_LDX_MEM(BPF_B, value_reg, skb_reg, SKB_BF_MONO_TC_OFFSET);
*insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg, *insn++ = BPF_ALU32_IMM(BPF_AND, value_reg, SKB_TSTAMP_TYPE_MASK);
SKB_MONO_DELIVERY_TIME_MASK, 2); #ifdef __BIG_ENDIAN_BITFIELD
*insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_TSTAMP_UNSPEC); *insn++ = BPF_ALU32_IMM(BPF_RSH, value_reg, SKB_TSTAMP_TYPE_RSHIFT);
*insn++ = BPF_JMP_A(1); #else
*insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_TSTAMP_DELIVERY_MONO); BUILD_BUG_ON(!(SKB_TSTAMP_TYPE_MASK & 0x1));
#endif
return insn; return insn;
} }
...@@ -9439,10 +9444,11 @@ static struct bpf_insn *bpf_convert_tstamp_read(const struct bpf_prog *prog, ...@@ -9439,10 +9444,11 @@ static struct bpf_insn *bpf_convert_tstamp_read(const struct bpf_prog *prog,
__u8 tmp_reg = BPF_REG_AX; __u8 tmp_reg = BPF_REG_AX;
*insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, SKB_BF_MONO_TC_OFFSET); *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, SKB_BF_MONO_TC_OFFSET);
*insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, /* check if ingress mask bits is set */
TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK); *insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg, TC_AT_INGRESS_MASK, 1);
*insn++ = BPF_JMP32_IMM(BPF_JNE, tmp_reg, *insn++ = BPF_JMP_A(4);
TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK, 2); *insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg, SKB_TSTAMP_TYPE_MASK, 1);
*insn++ = BPF_JMP_A(2);
/* skb->tc_at_ingress && skb->tstamp_type, /* skb->tc_at_ingress && skb->tstamp_type,
* read 0 as the (rcv) timestamp. * read 0 as the (rcv) timestamp.
*/ */
...@@ -9479,7 +9485,7 @@ static struct bpf_insn *bpf_convert_tstamp_write(const struct bpf_prog *prog, ...@@ -9479,7 +9485,7 @@ static struct bpf_insn *bpf_convert_tstamp_write(const struct bpf_prog *prog,
/* goto <store> */ /* goto <store> */
*insn++ = BPF_JMP_A(2); *insn++ = BPF_JMP_A(2);
/* <clear>: skb->tstamp_type */ /* <clear>: skb->tstamp_type */
*insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, ~SKB_MONO_DELIVERY_TIME_MASK); *insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, ~SKB_TSTAMP_TYPE_MASK);
*insn++ = BPF_STX_MEM(BPF_B, skb_reg, tmp_reg, SKB_BF_MONO_TC_OFFSET); *insn++ = BPF_STX_MEM(BPF_B, skb_reg, tmp_reg, SKB_BF_MONO_TC_OFFSET);
} }
#endif #endif
......
...@@ -1457,7 +1457,10 @@ struct sk_buff *__ip_make_skb(struct sock *sk, ...@@ -1457,7 +1457,10 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
skb->priority = (cork->tos != -1) ? cork->priority: READ_ONCE(sk->sk_priority); skb->priority = (cork->tos != -1) ? cork->priority: READ_ONCE(sk->sk_priority);
skb->mark = cork->mark; skb->mark = cork->mark;
skb->tstamp = cork->transmit_time; if (sk_is_tcp(sk))
skb_set_delivery_time(skb, cork->transmit_time, SKB_CLOCK_MONOTONIC);
else
skb_set_delivery_type_by_clockid(skb, cork->transmit_time, sk->sk_clockid);
/* /*
* Steal rt from cork.dst to avoid a pair of atomic_inc/atomic_dec * Steal rt from cork.dst to avoid a pair of atomic_inc/atomic_dec
* on dst refcount * on dst refcount
......
...@@ -360,7 +360,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, ...@@ -360,7 +360,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
skb->protocol = htons(ETH_P_IP); skb->protocol = htons(ETH_P_IP);
skb->priority = READ_ONCE(sk->sk_priority); skb->priority = READ_ONCE(sk->sk_priority);
skb->mark = sockc->mark; skb->mark = sockc->mark;
skb->tstamp = sockc->transmit_time; skb_set_delivery_type_by_clockid(skb, sockc->transmit_time, sk->sk_clockid);
skb_dst_set(skb, &rt->dst); skb_dst_set(skb, &rt->dst);
*rtp = NULL; *rtp = NULL;
......
...@@ -3625,6 +3625,8 @@ void __init tcp_v4_init(void) ...@@ -3625,6 +3625,8 @@ void __init tcp_v4_init(void)
*/ */
inet_sk(sk)->pmtudisc = IP_PMTUDISC_DO; inet_sk(sk)->pmtudisc = IP_PMTUDISC_DO;
sk->sk_clockid = CLOCK_MONOTONIC;
per_cpu(ipv4_tcp_sk, cpu) = sk; per_cpu(ipv4_tcp_sk, cpu) = sk;
} }
if (register_pernet_subsys(&tcp_sk_ops)) if (register_pernet_subsys(&tcp_sk_ops))
......
...@@ -1924,7 +1924,10 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, ...@@ -1924,7 +1924,10 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
skb->priority = READ_ONCE(sk->sk_priority); skb->priority = READ_ONCE(sk->sk_priority);
skb->mark = cork->base.mark; skb->mark = cork->base.mark;
skb->tstamp = cork->base.transmit_time; if (sk_is_tcp(sk))
skb_set_delivery_time(skb, cork->base.transmit_time, SKB_CLOCK_MONOTONIC);
else
skb_set_delivery_type_by_clockid(skb, cork->base.transmit_time, sk->sk_clockid);
ip6_cork_steal_dst(skb, cork); ip6_cork_steal_dst(skb, cork);
IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS); IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
......
...@@ -621,7 +621,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, ...@@ -621,7 +621,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
skb->protocol = htons(ETH_P_IPV6); skb->protocol = htons(ETH_P_IPV6);
skb->priority = READ_ONCE(sk->sk_priority); skb->priority = READ_ONCE(sk->sk_priority);
skb->mark = sockc->mark; skb->mark = sockc->mark;
skb->tstamp = sockc->transmit_time; skb_set_delivery_type_by_clockid(skb, sockc->transmit_time, sk->sk_clockid);
skb_put(skb, length); skb_put(skb, length);
skb_reset_network_header(skb); skb_reset_network_header(skb);
......
...@@ -2387,8 +2387,14 @@ static struct inet_protosw tcpv6_protosw = { ...@@ -2387,8 +2387,14 @@ static struct inet_protosw tcpv6_protosw = {
static int __net_init tcpv6_net_init(struct net *net) static int __net_init tcpv6_net_init(struct net *net)
{ {
return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6, int res;
res = inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
SOCK_RAW, IPPROTO_TCP, net); SOCK_RAW, IPPROTO_TCP, net);
if (!res)
net->ipv6.tcp_sk->sk_clockid = CLOCK_MONOTONIC;
return res;
} }
static void __net_exit tcpv6_net_exit(struct net *net) static void __net_exit tcpv6_net_exit(struct net *net)
......
...@@ -2056,8 +2056,7 @@ static int packet_sendmsg_spkt(struct socket *sock, struct msghdr *msg, ...@@ -2056,8 +2056,7 @@ static int packet_sendmsg_spkt(struct socket *sock, struct msghdr *msg,
skb->dev = dev; skb->dev = dev;
skb->priority = READ_ONCE(sk->sk_priority); skb->priority = READ_ONCE(sk->sk_priority);
skb->mark = READ_ONCE(sk->sk_mark); skb->mark = READ_ONCE(sk->sk_mark);
skb->tstamp = sockc.transmit_time; skb_set_delivery_type_by_clockid(skb, sockc.transmit_time, sk->sk_clockid);
skb_setup_tx_timestamp(skb, sockc.tsflags); skb_setup_tx_timestamp(skb, sockc.tsflags);
if (unlikely(extra_len == 4)) if (unlikely(extra_len == 4))
...@@ -2584,7 +2583,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, ...@@ -2584,7 +2583,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
skb->dev = dev; skb->dev = dev;
skb->priority = READ_ONCE(po->sk.sk_priority); skb->priority = READ_ONCE(po->sk.sk_priority);
skb->mark = READ_ONCE(po->sk.sk_mark); skb->mark = READ_ONCE(po->sk.sk_mark);
skb->tstamp = sockc->transmit_time; skb_set_delivery_type_by_clockid(skb, sockc->transmit_time, po->sk.sk_clockid);
skb_setup_tx_timestamp(skb, sockc->tsflags); skb_setup_tx_timestamp(skb, sockc->tsflags);
skb_zcopy_set_nouarg(skb, ph.raw); skb_zcopy_set_nouarg(skb, ph.raw);
...@@ -3062,7 +3061,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) ...@@ -3062,7 +3061,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
skb->dev = dev; skb->dev = dev;
skb->priority = READ_ONCE(sk->sk_priority); skb->priority = READ_ONCE(sk->sk_priority);
skb->mark = sockc.mark; skb->mark = sockc.mark;
skb->tstamp = sockc.transmit_time; skb_set_delivery_type_by_clockid(skb, sockc.transmit_time, sk->sk_clockid);
if (unlikely(extra_len == 4)) if (unlikely(extra_len == 4))
skb->no_fcs = 1; skb->no_fcs = 1;
......
...@@ -6207,12 +6207,17 @@ union { \ ...@@ -6207,12 +6207,17 @@ union { \
__u64 :64; \ __u64 :64; \
} __attribute__((aligned(8))) } __attribute__((aligned(8)))
/* The enum used in skb->tstamp_type. It specifies the clock type
* of the time stored in the skb->tstamp.
*/
enum { enum {
BPF_SKB_TSTAMP_UNSPEC, BPF_SKB_TSTAMP_UNSPEC = 0, /* DEPRECATED */
BPF_SKB_TSTAMP_DELIVERY_MONO, /* tstamp has mono delivery time */ BPF_SKB_TSTAMP_DELIVERY_MONO = 1, /* DEPRECATED */
/* For any BPF_SKB_TSTAMP_* that the bpf prog cannot handle, BPF_SKB_CLOCK_REALTIME = 0,
* the bpf prog should handle it like BPF_SKB_TSTAMP_UNSPEC BPF_SKB_CLOCK_MONOTONIC = 1,
* and try to deduce it by ingress, egress or skb->sk->sk_clockid. BPF_SKB_CLOCK_TAI = 2,
/* For any future BPF_SKB_CLOCK_* that the bpf prog cannot handle,
* the bpf prog can try to deduce it by ingress/egress/skb->sk->sk_clockid.
*/ */
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment