Commit 60695896 authored by Daniel Borkmann's avatar Daniel Borkmann

Merge branch 'bpf-tstamp-follow-ups'

Martin KaFai Lau says:

====================
This set is a follow up on the bpf side based on discussion [0].

Patch 1 is to remove some skbuff macros that are used in bpf filter.c.

Patch 2 and 3 are to simplify the bpf insn rewrite on __sk_buff->tstamp.

Patch 4 is to simplify the bpf uapi by modeling the __sk_buff->tstamp
and __sk_buff->tstamp_type (was delivery_time_type) the same as its kernel
counter part skb->tstamp and skb->mono_delivery_time.

Patch 5 is to adjust the bpf selftests due to changes in patch 4.

  [0]: https://lore.kernel.org/bpf/419d994e-ff61-7c11-0ec7-11fefcb0186e@iogearbox.net/
====================
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
parents 743bec1b 3daf0896
...@@ -573,7 +573,7 @@ struct bpf_prog { ...@@ -573,7 +573,7 @@ struct bpf_prog {
enforce_expected_attach_type:1, /* Enforce expected_attach_type checking at attach time */ enforce_expected_attach_type:1, /* Enforce expected_attach_type checking at attach time */
call_get_stack:1, /* Do we call bpf_get_stack() or bpf_get_stackid() */ call_get_stack:1, /* Do we call bpf_get_stack() or bpf_get_stackid() */
call_get_func_ip:1, /* Do we call get_func_ip() */ call_get_func_ip:1, /* Do we call get_func_ip() */
delivery_time_access:1; /* Accessed __sk_buff->delivery_time_type */ tstamp_type_access:1; /* Accessed __sk_buff->tstamp_type */
enum bpf_prog_type type; /* Type of BPF program */ enum bpf_prog_type type; /* Type of BPF program */
enum bpf_attach_type expected_attach_type; /* For some prog types */ enum bpf_attach_type expected_attach_type; /* For some prog types */
u32 len; /* Number of filter blocks */ u32 len; /* Number of filter blocks */
......
...@@ -960,10 +960,10 @@ struct sk_buff { ...@@ -960,10 +960,10 @@ struct sk_buff {
__u8 csum_complete_sw:1; __u8 csum_complete_sw:1;
__u8 csum_level:2; __u8 csum_level:2;
__u8 dst_pending_confirm:1; __u8 dst_pending_confirm:1;
__u8 mono_delivery_time:1; __u8 mono_delivery_time:1; /* See SKB_MONO_DELIVERY_TIME_MASK */
#ifdef CONFIG_NET_CLS_ACT #ifdef CONFIG_NET_CLS_ACT
__u8 tc_skip_classify:1; __u8 tc_skip_classify:1;
__u8 tc_at_ingress:1; __u8 tc_at_ingress:1; /* See TC_AT_INGRESS_MASK */
#endif #endif
#ifdef CONFIG_IPV6_NDISC_NODETYPE #ifdef CONFIG_IPV6_NDISC_NODETYPE
__u8 ndisc_nodetype:2; __u8 ndisc_nodetype:2;
...@@ -1062,7 +1062,9 @@ struct sk_buff { ...@@ -1062,7 +1062,9 @@ struct sk_buff {
#endif #endif
#define PKT_TYPE_OFFSET offsetof(struct sk_buff, __pkt_type_offset) #define PKT_TYPE_OFFSET offsetof(struct sk_buff, __pkt_type_offset)
/* if you move pkt_vlan_present around you also must adapt these constants */ /* if you move pkt_vlan_present, tc_at_ingress, or mono_delivery_time
* around, you also must adapt these constants.
*/
#ifdef __BIG_ENDIAN_BITFIELD #ifdef __BIG_ENDIAN_BITFIELD
#define PKT_VLAN_PRESENT_BIT 7 #define PKT_VLAN_PRESENT_BIT 7
#define TC_AT_INGRESS_MASK (1 << 0) #define TC_AT_INGRESS_MASK (1 << 0)
...@@ -1073,8 +1075,6 @@ struct sk_buff { ...@@ -1073,8 +1075,6 @@ struct sk_buff {
#define SKB_MONO_DELIVERY_TIME_MASK (1 << 5) #define SKB_MONO_DELIVERY_TIME_MASK (1 << 5)
#endif #endif
#define PKT_VLAN_PRESENT_OFFSET offsetof(struct sk_buff, __pkt_vlan_present_offset) #define PKT_VLAN_PRESENT_OFFSET offsetof(struct sk_buff, __pkt_vlan_present_offset)
#define TC_AT_INGRESS_OFFSET offsetof(struct sk_buff, __pkt_vlan_present_offset)
#define SKB_MONO_DELIVERY_TIME_OFFSET offsetof(struct sk_buff, __pkt_vlan_present_offset)
#ifdef __KERNEL__ #ifdef __KERNEL__
/* /*
......
...@@ -5090,23 +5090,22 @@ union bpf_attr { ...@@ -5090,23 +5090,22 @@ union bpf_attr {
* 0 on success, or a negative error in case of failure. On error * 0 on success, or a negative error in case of failure. On error
* *dst* buffer is zeroed out. * *dst* buffer is zeroed out.
* *
* long bpf_skb_set_delivery_time(struct sk_buff *skb, u64 dtime, u32 dtime_type) * long bpf_skb_set_tstamp(struct sk_buff *skb, u64 tstamp, u32 tstamp_type)
* Description * Description
* Set a *dtime* (delivery time) to the __sk_buff->tstamp and also * Change the __sk_buff->tstamp_type to *tstamp_type*
* change the __sk_buff->delivery_time_type to *dtime_type*. * and set *tstamp* to the __sk_buff->tstamp together.
* *
* When setting a delivery time (non zero *dtime*) to * If there is no need to change the __sk_buff->tstamp_type,
* __sk_buff->tstamp, only BPF_SKB_DELIVERY_TIME_MONO *dtime_type* * the tstamp value can be directly written to __sk_buff->tstamp
* is supported. It is the only delivery_time_type that will be
* kept after bpf_redirect_*().
*
* If there is no need to change the __sk_buff->delivery_time_type,
* the delivery time can be directly written to __sk_buff->tstamp
* instead. * instead.
* *
* *dtime* 0 and *dtime_type* BPF_SKB_DELIVERY_TIME_NONE * BPF_SKB_TSTAMP_DELIVERY_MONO is the only tstamp that
* can be used to clear any delivery time stored in * will be kept during bpf_redirect_*(). A non zero
* __sk_buff->tstamp. * *tstamp* must be used with the BPF_SKB_TSTAMP_DELIVERY_MONO
* *tstamp_type*.
*
* A BPF_SKB_TSTAMP_UNSPEC *tstamp_type* can only be used
* with a zero *tstamp*.
* *
* Only IPv4 and IPv6 skb->protocol are supported. * Only IPv4 and IPv6 skb->protocol are supported.
* *
...@@ -5119,7 +5118,7 @@ union bpf_attr { ...@@ -5119,7 +5118,7 @@ union bpf_attr {
* Return * Return
* 0 on success. * 0 on success.
* **-EINVAL** for invalid input * **-EINVAL** for invalid input
* **-EOPNOTSUPP** for unsupported delivery_time_type and protocol * **-EOPNOTSUPP** for unsupported protocol
*/ */
#define __BPF_FUNC_MAPPER(FN) \ #define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \ FN(unspec), \
...@@ -5314,7 +5313,7 @@ union bpf_attr { ...@@ -5314,7 +5313,7 @@ union bpf_attr {
FN(xdp_load_bytes), \ FN(xdp_load_bytes), \
FN(xdp_store_bytes), \ FN(xdp_store_bytes), \
FN(copy_from_user_task), \ FN(copy_from_user_task), \
FN(skb_set_delivery_time), \ FN(skb_set_tstamp), \
/* */ /* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper /* integer value in 'imm' field of BPF_CALL instruction selects which helper
...@@ -5505,9 +5504,12 @@ union { \ ...@@ -5505,9 +5504,12 @@ union { \
} __attribute__((aligned(8))) } __attribute__((aligned(8)))
enum { enum {
BPF_SKB_DELIVERY_TIME_NONE, BPF_SKB_TSTAMP_UNSPEC,
BPF_SKB_DELIVERY_TIME_UNSPEC, BPF_SKB_TSTAMP_DELIVERY_MONO, /* tstamp has mono delivery time */
BPF_SKB_DELIVERY_TIME_MONO, /* For any BPF_SKB_TSTAMP_* that the bpf prog cannot handle,
* the bpf prog should handle it like BPF_SKB_TSTAMP_UNSPEC
* and try to deduce it by ingress, egress or skb->sk->sk_clockid.
*/
}; };
/* user accessible mirror of in-kernel sk_buff. /* user accessible mirror of in-kernel sk_buff.
...@@ -5550,7 +5552,7 @@ struct __sk_buff { ...@@ -5550,7 +5552,7 @@ struct __sk_buff {
__u32 gso_segs; __u32 gso_segs;
__bpf_md_ptr(struct bpf_sock *, sk); __bpf_md_ptr(struct bpf_sock *, sk);
__u32 gso_size; __u32 gso_size;
__u8 delivery_time_type; __u8 tstamp_type;
__u32 :24; /* Padding, future use. */ __u32 :24; /* Padding, future use. */
__u64 hwtstamp; __u64 hwtstamp;
}; };
......
...@@ -7388,36 +7388,36 @@ static const struct bpf_func_proto bpf_sock_ops_reserve_hdr_opt_proto = { ...@@ -7388,36 +7388,36 @@ static const struct bpf_func_proto bpf_sock_ops_reserve_hdr_opt_proto = {
.arg3_type = ARG_ANYTHING, .arg3_type = ARG_ANYTHING,
}; };
BPF_CALL_3(bpf_skb_set_delivery_time, struct sk_buff *, skb, BPF_CALL_3(bpf_skb_set_tstamp, struct sk_buff *, skb,
u64, dtime, u32, dtime_type) u64, tstamp, u32, tstamp_type)
{ {
/* skb_clear_delivery_time() is done for inet protocol */ /* skb_clear_delivery_time() is done for inet protocol */
if (skb->protocol != htons(ETH_P_IP) && if (skb->protocol != htons(ETH_P_IP) &&
skb->protocol != htons(ETH_P_IPV6)) skb->protocol != htons(ETH_P_IPV6))
return -EOPNOTSUPP; return -EOPNOTSUPP;
switch (dtime_type) { switch (tstamp_type) {
case BPF_SKB_DELIVERY_TIME_MONO: case BPF_SKB_TSTAMP_DELIVERY_MONO:
if (!dtime) if (!tstamp)
return -EINVAL; return -EINVAL;
skb->tstamp = dtime; skb->tstamp = tstamp;
skb->mono_delivery_time = 1; skb->mono_delivery_time = 1;
break; break;
case BPF_SKB_DELIVERY_TIME_NONE: case BPF_SKB_TSTAMP_UNSPEC:
if (dtime) if (tstamp)
return -EINVAL; return -EINVAL;
skb->tstamp = 0; skb->tstamp = 0;
skb->mono_delivery_time = 0; skb->mono_delivery_time = 0;
break; break;
default: default:
return -EOPNOTSUPP; return -EINVAL;
} }
return 0; return 0;
} }
static const struct bpf_func_proto bpf_skb_set_delivery_time_proto = { static const struct bpf_func_proto bpf_skb_set_tstamp_proto = {
.func = bpf_skb_set_delivery_time, .func = bpf_skb_set_tstamp,
.gpl_only = false, .gpl_only = false,
.ret_type = RET_INTEGER, .ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX, .arg1_type = ARG_PTR_TO_CTX,
...@@ -7786,8 +7786,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) ...@@ -7786,8 +7786,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_tcp_gen_syncookie_proto; return &bpf_tcp_gen_syncookie_proto;
case BPF_FUNC_sk_assign: case BPF_FUNC_sk_assign:
return &bpf_sk_assign_proto; return &bpf_sk_assign_proto;
case BPF_FUNC_skb_set_delivery_time: case BPF_FUNC_skb_set_tstamp:
return &bpf_skb_set_delivery_time_proto; return &bpf_skb_set_tstamp_proto;
#endif #endif
default: default:
return bpf_sk_base_func_proto(func_id); return bpf_sk_base_func_proto(func_id);
...@@ -8127,9 +8127,9 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type ...@@ -8127,9 +8127,9 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
return false; return false;
info->reg_type = PTR_TO_SOCK_COMMON_OR_NULL; info->reg_type = PTR_TO_SOCK_COMMON_OR_NULL;
break; break;
case offsetof(struct __sk_buff, delivery_time_type): case offsetof(struct __sk_buff, tstamp_type):
return false; return false;
case offsetofend(struct __sk_buff, delivery_time_type) ... offsetof(struct __sk_buff, hwtstamp) - 1: case offsetofend(struct __sk_buff, tstamp_type) ... offsetof(struct __sk_buff, hwtstamp) - 1:
/* Explicitly prohibit access to padding in __sk_buff. */ /* Explicitly prohibit access to padding in __sk_buff. */
return false; return false;
default: default:
...@@ -8484,14 +8484,14 @@ static bool tc_cls_act_is_valid_access(int off, int size, ...@@ -8484,14 +8484,14 @@ static bool tc_cls_act_is_valid_access(int off, int size,
break; break;
case bpf_ctx_range_till(struct __sk_buff, family, local_port): case bpf_ctx_range_till(struct __sk_buff, family, local_port):
return false; return false;
case offsetof(struct __sk_buff, delivery_time_type): case offsetof(struct __sk_buff, tstamp_type):
/* The convert_ctx_access() on reading and writing /* The convert_ctx_access() on reading and writing
* __sk_buff->tstamp depends on whether the bpf prog * __sk_buff->tstamp depends on whether the bpf prog
* has used __sk_buff->delivery_time_type or not. * has used __sk_buff->tstamp_type or not.
* Thus, we need to set prog->delivery_time_access * Thus, we need to set prog->tstamp_type_access
* earlier during is_valid_access() here. * earlier during is_valid_access() here.
*/ */
((struct bpf_prog *)prog)->delivery_time_access = 1; ((struct bpf_prog *)prog)->tstamp_type_access = 1;
return size == sizeof(__u8); return size == sizeof(__u8);
} }
...@@ -8888,42 +8888,22 @@ static u32 flow_dissector_convert_ctx_access(enum bpf_access_type type, ...@@ -8888,42 +8888,22 @@ static u32 flow_dissector_convert_ctx_access(enum bpf_access_type type,
return insn - insn_buf; return insn - insn_buf;
} }
static struct bpf_insn *bpf_convert_dtime_type_read(const struct bpf_insn *si, static struct bpf_insn *bpf_convert_tstamp_type_read(const struct bpf_insn *si,
struct bpf_insn *insn) struct bpf_insn *insn)
{ {
__u8 value_reg = si->dst_reg; __u8 value_reg = si->dst_reg;
__u8 skb_reg = si->src_reg; __u8 skb_reg = si->src_reg;
/* AX is needed because src_reg and dst_reg could be the same */
__u8 tmp_reg = BPF_REG_AX; __u8 tmp_reg = BPF_REG_AX;
*insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg,
SKB_MONO_DELIVERY_TIME_OFFSET); PKT_VLAN_PRESENT_OFFSET);
*insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, *insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg,
SKB_MONO_DELIVERY_TIME_MASK); SKB_MONO_DELIVERY_TIME_MASK, 2);
*insn++ = BPF_JMP32_IMM(BPF_JEQ, tmp_reg, 0, 2); *insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_TSTAMP_UNSPEC);
/* value_reg = BPF_SKB_DELIVERY_TIME_MONO */
*insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_DELIVERY_TIME_MONO);
*insn++ = BPF_JMP_A(IS_ENABLED(CONFIG_NET_CLS_ACT) ? 10 : 5);
*insn++ = BPF_LDX_MEM(BPF_DW, tmp_reg, skb_reg,
offsetof(struct sk_buff, tstamp));
*insn++ = BPF_JMP_IMM(BPF_JNE, tmp_reg, 0, 2);
/* value_reg = BPF_SKB_DELIVERY_TIME_NONE */
*insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_DELIVERY_TIME_NONE);
*insn++ = BPF_JMP_A(IS_ENABLED(CONFIG_NET_CLS_ACT) ? 6 : 1);
#ifdef CONFIG_NET_CLS_ACT
*insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, TC_AT_INGRESS_OFFSET);
*insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, TC_AT_INGRESS_MASK);
*insn++ = BPF_JMP32_IMM(BPF_JEQ, tmp_reg, 0, 2);
/* At ingress, value_reg = 0 */
*insn++ = BPF_MOV32_IMM(value_reg, 0);
*insn++ = BPF_JMP_A(1); *insn++ = BPF_JMP_A(1);
#endif *insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_TSTAMP_DELIVERY_MONO);
/* value_reg = BPF_SKB_DELIVERYT_TIME_UNSPEC */
*insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_DELIVERY_TIME_UNSPEC);
/* 15 insns with CONFIG_NET_CLS_ACT */
return insn; return insn;
} }
...@@ -8956,21 +8936,22 @@ static struct bpf_insn *bpf_convert_tstamp_read(const struct bpf_prog *prog, ...@@ -8956,21 +8936,22 @@ static struct bpf_insn *bpf_convert_tstamp_read(const struct bpf_prog *prog,
__u8 skb_reg = si->src_reg; __u8 skb_reg = si->src_reg;
#ifdef CONFIG_NET_CLS_ACT #ifdef CONFIG_NET_CLS_ACT
if (!prog->delivery_time_access) { /* If the tstamp_type is read,
* the bpf prog is aware the tstamp could have delivery time.
* Thus, read skb->tstamp as is if tstamp_type_access is true.
*/
if (!prog->tstamp_type_access) {
/* AX is needed because src_reg and dst_reg could be the same */
__u8 tmp_reg = BPF_REG_AX; __u8 tmp_reg = BPF_REG_AX;
*insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, TC_AT_INGRESS_OFFSET); *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, PKT_VLAN_PRESENT_OFFSET);
*insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, TC_AT_INGRESS_MASK);
*insn++ = BPF_JMP32_IMM(BPF_JEQ, tmp_reg, 0, 5);
/* @ingress, read __sk_buff->tstamp as the (rcv) timestamp,
* so check the skb->mono_delivery_time.
*/
*insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg,
SKB_MONO_DELIVERY_TIME_OFFSET);
*insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, *insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg,
SKB_MONO_DELIVERY_TIME_MASK); TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK);
*insn++ = BPF_JMP32_IMM(BPF_JEQ, tmp_reg, 0, 2); *insn++ = BPF_JMP32_IMM(BPF_JNE, tmp_reg,
/* skb->mono_delivery_time is set, read 0 as the (rcv) timestamp. */ TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK, 2);
/* skb->tc_at_ingress && skb->mono_delivery_time,
* read 0 as the (rcv) timestamp.
*/
*insn++ = BPF_MOV64_IMM(value_reg, 0); *insn++ = BPF_MOV64_IMM(value_reg, 0);
*insn++ = BPF_JMP_A(1); *insn++ = BPF_JMP_A(1);
} }
...@@ -8989,25 +8970,27 @@ static struct bpf_insn *bpf_convert_tstamp_write(const struct bpf_prog *prog, ...@@ -8989,25 +8970,27 @@ static struct bpf_insn *bpf_convert_tstamp_write(const struct bpf_prog *prog,
__u8 skb_reg = si->dst_reg; __u8 skb_reg = si->dst_reg;
#ifdef CONFIG_NET_CLS_ACT #ifdef CONFIG_NET_CLS_ACT
if (!prog->delivery_time_access) { /* If the tstamp_type is read,
* the bpf prog is aware the tstamp could have delivery time.
* Thus, write skb->tstamp as is if tstamp_type_access is true.
* Otherwise, writing at ingress will have to clear the
* mono_delivery_time bit also.
*/
if (!prog->tstamp_type_access) {
__u8 tmp_reg = BPF_REG_AX; __u8 tmp_reg = BPF_REG_AX;
*insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, TC_AT_INGRESS_OFFSET); *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, PKT_VLAN_PRESENT_OFFSET);
*insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, TC_AT_INGRESS_MASK); /* Writing __sk_buff->tstamp as ingress, goto <clear> */
*insn++ = BPF_JMP32_IMM(BPF_JEQ, tmp_reg, 0, 3); *insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg, TC_AT_INGRESS_MASK, 1);
/* Writing __sk_buff->tstamp at ingress as the (rcv) timestamp. /* goto <store> */
* Clear the skb->mono_delivery_time. *insn++ = BPF_JMP_A(2);
*/ /* <clear>: mono_delivery_time */
*insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, *insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, ~SKB_MONO_DELIVERY_TIME_MASK);
SKB_MONO_DELIVERY_TIME_OFFSET); *insn++ = BPF_STX_MEM(BPF_B, skb_reg, tmp_reg, PKT_VLAN_PRESENT_OFFSET);
*insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg,
~SKB_MONO_DELIVERY_TIME_MASK);
*insn++ = BPF_STX_MEM(BPF_B, skb_reg, tmp_reg,
SKB_MONO_DELIVERY_TIME_OFFSET);
} }
#endif #endif
/* skb->tstamp = tstamp */ /* <store>: skb->tstamp = tstamp */
*insn++ = BPF_STX_MEM(BPF_DW, skb_reg, value_reg, *insn++ = BPF_STX_MEM(BPF_DW, skb_reg, value_reg,
offsetof(struct sk_buff, tstamp)); offsetof(struct sk_buff, tstamp));
return insn; return insn;
...@@ -9326,8 +9309,8 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, ...@@ -9326,8 +9309,8 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
insn = bpf_convert_tstamp_read(prog, si, insn); insn = bpf_convert_tstamp_read(prog, si, insn);
break; break;
case offsetof(struct __sk_buff, delivery_time_type): case offsetof(struct __sk_buff, tstamp_type):
insn = bpf_convert_dtime_type_read(si, insn); insn = bpf_convert_tstamp_type_read(si, insn);
break; break;
case offsetof(struct __sk_buff, gso_segs): case offsetof(struct __sk_buff, gso_segs):
......
...@@ -5090,23 +5090,22 @@ union bpf_attr { ...@@ -5090,23 +5090,22 @@ union bpf_attr {
* 0 on success, or a negative error in case of failure. On error * 0 on success, or a negative error in case of failure. On error
* *dst* buffer is zeroed out. * *dst* buffer is zeroed out.
* *
* long bpf_skb_set_delivery_time(struct sk_buff *skb, u64 dtime, u32 dtime_type) * long bpf_skb_set_tstamp(struct sk_buff *skb, u64 tstamp, u32 tstamp_type)
* Description * Description
* Set a *dtime* (delivery time) to the __sk_buff->tstamp and also * Change the __sk_buff->tstamp_type to *tstamp_type*
* change the __sk_buff->delivery_time_type to *dtime_type*. * and set *tstamp* to the __sk_buff->tstamp together.
* *
* When setting a delivery time (non zero *dtime*) to * If there is no need to change the __sk_buff->tstamp_type,
* __sk_buff->tstamp, only BPF_SKB_DELIVERY_TIME_MONO *dtime_type* * the tstamp value can be directly written to __sk_buff->tstamp
* is supported. It is the only delivery_time_type that will be
* kept after bpf_redirect_*().
*
* If there is no need to change the __sk_buff->delivery_time_type,
* the delivery time can be directly written to __sk_buff->tstamp
* instead. * instead.
* *
* *dtime* 0 and *dtime_type* BPF_SKB_DELIVERY_TIME_NONE * BPF_SKB_TSTAMP_DELIVERY_MONO is the only tstamp that
* can be used to clear any delivery time stored in * will be kept during bpf_redirect_*(). A non zero
* __sk_buff->tstamp. * *tstamp* must be used with the BPF_SKB_TSTAMP_DELIVERY_MONO
* *tstamp_type*.
*
* A BPF_SKB_TSTAMP_UNSPEC *tstamp_type* can only be used
* with a zero *tstamp*.
* *
* Only IPv4 and IPv6 skb->protocol are supported. * Only IPv4 and IPv6 skb->protocol are supported.
* *
...@@ -5119,7 +5118,7 @@ union bpf_attr { ...@@ -5119,7 +5118,7 @@ union bpf_attr {
* Return * Return
* 0 on success. * 0 on success.
* **-EINVAL** for invalid input * **-EINVAL** for invalid input
* **-EOPNOTSUPP** for unsupported delivery_time_type and protocol * **-EOPNOTSUPP** for unsupported protocol
*/ */
#define __BPF_FUNC_MAPPER(FN) \ #define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \ FN(unspec), \
...@@ -5314,7 +5313,7 @@ union bpf_attr { ...@@ -5314,7 +5313,7 @@ union bpf_attr {
FN(xdp_load_bytes), \ FN(xdp_load_bytes), \
FN(xdp_store_bytes), \ FN(xdp_store_bytes), \
FN(copy_from_user_task), \ FN(copy_from_user_task), \
FN(skb_set_delivery_time), \ FN(skb_set_tstamp), \
/* */ /* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper /* integer value in 'imm' field of BPF_CALL instruction selects which helper
...@@ -5505,9 +5504,12 @@ union { \ ...@@ -5505,9 +5504,12 @@ union { \
} __attribute__((aligned(8))) } __attribute__((aligned(8)))
enum { enum {
BPF_SKB_DELIVERY_TIME_NONE, BPF_SKB_TSTAMP_UNSPEC,
BPF_SKB_DELIVERY_TIME_UNSPEC, BPF_SKB_TSTAMP_DELIVERY_MONO, /* tstamp has mono delivery time */
BPF_SKB_DELIVERY_TIME_MONO, /* For any BPF_SKB_TSTAMP_* that the bpf prog cannot handle,
* the bpf prog should handle it like BPF_SKB_TSTAMP_UNSPEC
* and try to deduce it by ingress, egress or skb->sk->sk_clockid.
*/
}; };
/* user accessible mirror of in-kernel sk_buff. /* user accessible mirror of in-kernel sk_buff.
...@@ -5550,7 +5552,7 @@ struct __sk_buff { ...@@ -5550,7 +5552,7 @@ struct __sk_buff {
__u32 gso_segs; __u32 gso_segs;
__bpf_md_ptr(struct bpf_sock *, sk); __bpf_md_ptr(struct bpf_sock *, sk);
__u32 gso_size; __u32 gso_size;
__u8 delivery_time_type; __u8 tstamp_type;
__u32 :24; /* Padding, future use. */ __u32 :24; /* Padding, future use. */
__u64 hwtstamp; __u64 hwtstamp;
}; };
......
...@@ -174,13 +174,13 @@ int egress_host(struct __sk_buff *skb) ...@@ -174,13 +174,13 @@ int egress_host(struct __sk_buff *skb)
return TC_ACT_OK; return TC_ACT_OK;
if (skb_proto(skb_type) == IPPROTO_TCP) { if (skb_proto(skb_type) == IPPROTO_TCP) {
if (skb->delivery_time_type == BPF_SKB_DELIVERY_TIME_MONO && if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO &&
skb->tstamp) skb->tstamp)
inc_dtimes(EGRESS_ENDHOST); inc_dtimes(EGRESS_ENDHOST);
else else
inc_errs(EGRESS_ENDHOST); inc_errs(EGRESS_ENDHOST);
} else { } else {
if (skb->delivery_time_type == BPF_SKB_DELIVERY_TIME_UNSPEC && if (skb->tstamp_type == BPF_SKB_TSTAMP_UNSPEC &&
skb->tstamp) skb->tstamp)
inc_dtimes(EGRESS_ENDHOST); inc_dtimes(EGRESS_ENDHOST);
else else
...@@ -204,7 +204,7 @@ int ingress_host(struct __sk_buff *skb) ...@@ -204,7 +204,7 @@ int ingress_host(struct __sk_buff *skb)
if (!skb_type) if (!skb_type)
return TC_ACT_OK; return TC_ACT_OK;
if (skb->delivery_time_type == BPF_SKB_DELIVERY_TIME_MONO && if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO &&
skb->tstamp == EGRESS_FWDNS_MAGIC) skb->tstamp == EGRESS_FWDNS_MAGIC)
inc_dtimes(INGRESS_ENDHOST); inc_dtimes(INGRESS_ENDHOST);
else else
...@@ -226,7 +226,7 @@ int ingress_fwdns_prio100(struct __sk_buff *skb) ...@@ -226,7 +226,7 @@ int ingress_fwdns_prio100(struct __sk_buff *skb)
return TC_ACT_OK; return TC_ACT_OK;
/* delivery_time is only available to the ingress /* delivery_time is only available to the ingress
* if the tc-bpf checks the skb->delivery_time_type. * if the tc-bpf checks the skb->tstamp_type.
*/ */
if (skb->tstamp == EGRESS_ENDHOST_MAGIC) if (skb->tstamp == EGRESS_ENDHOST_MAGIC)
inc_errs(INGRESS_FWDNS_P100); inc_errs(INGRESS_FWDNS_P100);
...@@ -250,7 +250,7 @@ int egress_fwdns_prio100(struct __sk_buff *skb) ...@@ -250,7 +250,7 @@ int egress_fwdns_prio100(struct __sk_buff *skb)
return TC_ACT_OK; return TC_ACT_OK;
/* delivery_time is always available to egress even /* delivery_time is always available to egress even
* the tc-bpf did not use the delivery_time_type. * the tc-bpf did not use the tstamp_type.
*/ */
if (skb->tstamp == INGRESS_FWDNS_MAGIC) if (skb->tstamp == INGRESS_FWDNS_MAGIC)
inc_dtimes(EGRESS_FWDNS_P100); inc_dtimes(EGRESS_FWDNS_P100);
...@@ -278,9 +278,9 @@ int ingress_fwdns_prio101(struct __sk_buff *skb) ...@@ -278,9 +278,9 @@ int ingress_fwdns_prio101(struct __sk_buff *skb)
if (skb_proto(skb_type) == IPPROTO_UDP) if (skb_proto(skb_type) == IPPROTO_UDP)
expected_dtime = 0; expected_dtime = 0;
if (skb->delivery_time_type) { if (skb->tstamp_type) {
if (fwdns_clear_dtime() || if (fwdns_clear_dtime() ||
skb->delivery_time_type != BPF_SKB_DELIVERY_TIME_MONO || skb->tstamp_type != BPF_SKB_TSTAMP_DELIVERY_MONO ||
skb->tstamp != expected_dtime) skb->tstamp != expected_dtime)
inc_errs(INGRESS_FWDNS_P101); inc_errs(INGRESS_FWDNS_P101);
else else
...@@ -290,14 +290,14 @@ int ingress_fwdns_prio101(struct __sk_buff *skb) ...@@ -290,14 +290,14 @@ int ingress_fwdns_prio101(struct __sk_buff *skb)
inc_errs(INGRESS_FWDNS_P101); inc_errs(INGRESS_FWDNS_P101);
} }
if (skb->delivery_time_type == BPF_SKB_DELIVERY_TIME_MONO) { if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO) {
skb->tstamp = INGRESS_FWDNS_MAGIC; skb->tstamp = INGRESS_FWDNS_MAGIC;
} else { } else {
if (bpf_skb_set_delivery_time(skb, INGRESS_FWDNS_MAGIC, if (bpf_skb_set_tstamp(skb, INGRESS_FWDNS_MAGIC,
BPF_SKB_DELIVERY_TIME_MONO)) BPF_SKB_TSTAMP_DELIVERY_MONO))
inc_errs(SET_DTIME); inc_errs(SET_DTIME);
if (!bpf_skb_set_delivery_time(skb, INGRESS_FWDNS_MAGIC, if (!bpf_skb_set_tstamp(skb, INGRESS_FWDNS_MAGIC,
BPF_SKB_DELIVERY_TIME_UNSPEC)) BPF_SKB_TSTAMP_UNSPEC))
inc_errs(SET_DTIME); inc_errs(SET_DTIME);
} }
...@@ -320,9 +320,9 @@ int egress_fwdns_prio101(struct __sk_buff *skb) ...@@ -320,9 +320,9 @@ int egress_fwdns_prio101(struct __sk_buff *skb)
/* Should have handled in prio100 */ /* Should have handled in prio100 */
return TC_ACT_SHOT; return TC_ACT_SHOT;
if (skb->delivery_time_type) { if (skb->tstamp_type) {
if (fwdns_clear_dtime() || if (fwdns_clear_dtime() ||
skb->delivery_time_type != BPF_SKB_DELIVERY_TIME_MONO || skb->tstamp_type != BPF_SKB_TSTAMP_DELIVERY_MONO ||
skb->tstamp != INGRESS_FWDNS_MAGIC) skb->tstamp != INGRESS_FWDNS_MAGIC)
inc_errs(EGRESS_FWDNS_P101); inc_errs(EGRESS_FWDNS_P101);
else else
...@@ -332,14 +332,14 @@ int egress_fwdns_prio101(struct __sk_buff *skb) ...@@ -332,14 +332,14 @@ int egress_fwdns_prio101(struct __sk_buff *skb)
inc_errs(EGRESS_FWDNS_P101); inc_errs(EGRESS_FWDNS_P101);
} }
if (skb->delivery_time_type == BPF_SKB_DELIVERY_TIME_MONO) { if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO) {
skb->tstamp = EGRESS_FWDNS_MAGIC; skb->tstamp = EGRESS_FWDNS_MAGIC;
} else { } else {
if (bpf_skb_set_delivery_time(skb, EGRESS_FWDNS_MAGIC, if (bpf_skb_set_tstamp(skb, EGRESS_FWDNS_MAGIC,
BPF_SKB_DELIVERY_TIME_MONO)) BPF_SKB_TSTAMP_DELIVERY_MONO))
inc_errs(SET_DTIME); inc_errs(SET_DTIME);
if (!bpf_skb_set_delivery_time(skb, EGRESS_FWDNS_MAGIC, if (!bpf_skb_set_tstamp(skb, INGRESS_FWDNS_MAGIC,
BPF_SKB_DELIVERY_TIME_UNSPEC)) BPF_SKB_TSTAMP_UNSPEC))
inc_errs(SET_DTIME); inc_errs(SET_DTIME);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment