Commit f11216b2 authored by Vlad Dumitrescu's avatar Vlad Dumitrescu Committed by Alexei Starovoitov

bpf: add skb->tstamp r/w access from tc clsact and cg skb progs

This could be used to rate limit egress traffic in concert with a qdisc
which supports Earliest Departure Time, such as FQ.

Write access from cg skb progs only with CAP_SYS_ADMIN, since the value
will be used by downstream qdiscs. It might make sense to relax this.

Changes v1 -> v2:
  - allow access from cg skb, write only with CAP_SYS_ADMIN
Signed-off-by: default avatarVlad Dumitrescu <vladum@google.com>
Acked-by: default avatarEric Dumazet <edumazet@google.com>
Acked-by: default avatarWillem de Bruijn <willemb@google.com>
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parent ab85b014
...@@ -2468,6 +2468,7 @@ struct __sk_buff { ...@@ -2468,6 +2468,7 @@ struct __sk_buff {
__u32 data_meta; __u32 data_meta;
struct bpf_flow_keys *flow_keys; struct bpf_flow_keys *flow_keys;
__u64 tstamp;
}; };
struct bpf_tunnel_key { struct bpf_tunnel_key {
......
...@@ -5573,6 +5573,10 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type ...@@ -5573,6 +5573,10 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
if (size != sizeof(struct bpf_flow_keys *)) if (size != sizeof(struct bpf_flow_keys *))
return false; return false;
break; break;
case bpf_ctx_range(struct __sk_buff, tstamp):
if (size != sizeof(__u64))
return false;
break;
default: default:
/* Only narrow read access allowed for now. */ /* Only narrow read access allowed for now. */
if (type == BPF_WRITE) { if (type == BPF_WRITE) {
...@@ -5600,6 +5604,7 @@ static bool sk_filter_is_valid_access(int off, int size, ...@@ -5600,6 +5604,7 @@ static bool sk_filter_is_valid_access(int off, int size,
case bpf_ctx_range(struct __sk_buff, data_end): case bpf_ctx_range(struct __sk_buff, data_end):
case bpf_ctx_range(struct __sk_buff, flow_keys): case bpf_ctx_range(struct __sk_buff, flow_keys):
case bpf_ctx_range_till(struct __sk_buff, family, local_port): case bpf_ctx_range_till(struct __sk_buff, family, local_port):
case bpf_ctx_range(struct __sk_buff, tstamp):
return false; return false;
} }
...@@ -5638,6 +5643,10 @@ static bool cg_skb_is_valid_access(int off, int size, ...@@ -5638,6 +5643,10 @@ static bool cg_skb_is_valid_access(int off, int size,
case bpf_ctx_range(struct __sk_buff, priority): case bpf_ctx_range(struct __sk_buff, priority):
case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]): case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
break; break;
case bpf_ctx_range(struct __sk_buff, tstamp):
if (!capable(CAP_SYS_ADMIN))
return false;
break;
default: default:
return false; return false;
} }
...@@ -5665,6 +5674,7 @@ static bool lwt_is_valid_access(int off, int size, ...@@ -5665,6 +5674,7 @@ static bool lwt_is_valid_access(int off, int size,
case bpf_ctx_range_till(struct __sk_buff, family, local_port): case bpf_ctx_range_till(struct __sk_buff, family, local_port):
case bpf_ctx_range(struct __sk_buff, data_meta): case bpf_ctx_range(struct __sk_buff, data_meta):
case bpf_ctx_range(struct __sk_buff, flow_keys): case bpf_ctx_range(struct __sk_buff, flow_keys):
case bpf_ctx_range(struct __sk_buff, tstamp):
return false; return false;
} }
...@@ -5874,6 +5884,7 @@ static bool tc_cls_act_is_valid_access(int off, int size, ...@@ -5874,6 +5884,7 @@ static bool tc_cls_act_is_valid_access(int off, int size,
case bpf_ctx_range(struct __sk_buff, priority): case bpf_ctx_range(struct __sk_buff, priority):
case bpf_ctx_range(struct __sk_buff, tc_classid): case bpf_ctx_range(struct __sk_buff, tc_classid):
case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]): case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
case bpf_ctx_range(struct __sk_buff, tstamp):
break; break;
default: default:
return false; return false;
...@@ -6093,6 +6104,7 @@ static bool sk_skb_is_valid_access(int off, int size, ...@@ -6093,6 +6104,7 @@ static bool sk_skb_is_valid_access(int off, int size,
case bpf_ctx_range(struct __sk_buff, tc_classid): case bpf_ctx_range(struct __sk_buff, tc_classid):
case bpf_ctx_range(struct __sk_buff, data_meta): case bpf_ctx_range(struct __sk_buff, data_meta):
case bpf_ctx_range(struct __sk_buff, flow_keys): case bpf_ctx_range(struct __sk_buff, flow_keys):
case bpf_ctx_range(struct __sk_buff, tstamp):
return false; return false;
} }
...@@ -6179,6 +6191,7 @@ static bool flow_dissector_is_valid_access(int off, int size, ...@@ -6179,6 +6191,7 @@ static bool flow_dissector_is_valid_access(int off, int size,
case bpf_ctx_range(struct __sk_buff, tc_classid): case bpf_ctx_range(struct __sk_buff, tc_classid):
case bpf_ctx_range(struct __sk_buff, data_meta): case bpf_ctx_range(struct __sk_buff, data_meta):
case bpf_ctx_range_till(struct __sk_buff, family, local_port): case bpf_ctx_range_till(struct __sk_buff, family, local_port):
case bpf_ctx_range(struct __sk_buff, tstamp):
return false; return false;
} }
...@@ -6488,6 +6501,22 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, ...@@ -6488,6 +6501,22 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
*insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
si->src_reg, off); si->src_reg, off);
break; break;
case offsetof(struct __sk_buff, tstamp):
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tstamp) != 8);
if (type == BPF_WRITE)
*insn++ = BPF_STX_MEM(BPF_DW,
si->dst_reg, si->src_reg,
bpf_target_off(struct sk_buff,
tstamp, 8,
target_size));
else
*insn++ = BPF_LDX_MEM(BPF_DW,
si->dst_reg, si->src_reg,
bpf_target_off(struct sk_buff,
tstamp, 8,
target_size));
} }
return insn - insn_buf; return insn - insn_buf;
......
...@@ -2468,6 +2468,7 @@ struct __sk_buff { ...@@ -2468,6 +2468,7 @@ struct __sk_buff {
__u32 data_meta; __u32 data_meta;
struct bpf_flow_keys *flow_keys; struct bpf_flow_keys *flow_keys;
__u64 tstamp;
}; };
struct bpf_tunnel_key { struct bpf_tunnel_key {
......
...@@ -2446,6 +2446,10 @@ static struct bpf_test tests[] = { ...@@ -2446,6 +2446,10 @@ static struct bpf_test tests[] = {
offsetof(struct __sk_buff, tc_index)), offsetof(struct __sk_buff, tc_index)),
BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
offsetof(struct __sk_buff, cb[3])), offsetof(struct __sk_buff, cb[3])),
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
offsetof(struct __sk_buff, tstamp)),
BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
offsetof(struct __sk_buff, tstamp)),
BPF_EXIT_INSN(), BPF_EXIT_INSN(),
}, },
.errstr_unpriv = "", .errstr_unpriv = "",
...@@ -5297,6 +5301,31 @@ static struct bpf_test tests[] = { ...@@ -5297,6 +5301,31 @@ static struct bpf_test tests[] = {
.errstr_unpriv = "R2 leaks addr into helper function", .errstr_unpriv = "R2 leaks addr into helper function",
.prog_type = BPF_PROG_TYPE_CGROUP_SKB, .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
}, },
{
"write tstamp from CGROUP_SKB",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
offsetof(struct __sk_buff, tstamp)),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.result = ACCEPT,
.result_unpriv = REJECT,
.errstr_unpriv = "invalid bpf_context access off=152 size=8",
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
},
{
"read tstamp from CGROUP_SKB",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
offsetof(struct __sk_buff, tstamp)),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
},
{ {
"multiple registers share map_lookup_elem result", "multiple registers share map_lookup_elem result",
.insns = { .insns = {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment