Commit 4e950747 authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'bpf: allow cgroup progs to export custom retval to userspace'

YiFei Zhu says:

====================

Right now, most cgroup hooks are best used for permission checks. They
can only reject a syscall with -EPERM, so a cause of a rejection, if
the rejected by eBPF cgroup hooks, is ambiguous to userspace.
Additionally, if the syscalls are implemented in eBPF, all permission
checks and the implementation has to happen within the same filter,
as programs executed later in the series of progs are unaware of the
return values return by the previous progs.

This patch series adds two helpers, bpf_get_retval and bpf_set_retval,
that allows hooks to get/set the return value of syscall to userspace.
This also allows later progs to retrieve retval set by previous progs.

For legacy programs that rejects a syscall without setting the retval,
for backwards compatibility, if a prog rejects without itself or a
prior prog setting retval to an -err, the retval is set by the kernel
to -EPERM.

For getsockopt hooks that has ctx->retval, this variable mirrors that
that accessed by the helpers.

Additionally, the following user-visible behavior for getsockopt
hooks has changed:
  - If a prior filter rejected the syscall, it will be visible
    in ctx->retval.
  - Attempting to change the retval arbitrarily is now allowed and
    will not cause an -EFAULT.
  - If kernel rejects a getsockopt syscall before running the hooks,
    the error will be visible in ctx->retval. Returning 0 from the
    prog will not overwrite the error to -EPERM unless there is an
    explicit call of bpf_set_retval(-EPERM)

Tests have been added in this series to test the behavior of the helper
with cgroup setsockopt getsockopt hooks.

Patch 1 changes the API of macros to prepare for the next patch and
  should be a no-op.
Patch 2 moves ctx->retval to a struct pointed to by current
  task_struct.
Patch 3 implements the helpers.
Patch 4 tests the behaviors of the helpers.
Patch 5 updates a test after the test broke due to the visible changes.

v1 -> v2:
  - errno -> retval
  - split one helper to get & set helpers
  - allow retval to be set arbitrarily in the general case
  - made the helper retval and context retval mirror each other
====================
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents d81283d2 1080ef5c
...@@ -1245,6 +1245,7 @@ struct bpf_run_ctx {}; ...@@ -1245,6 +1245,7 @@ struct bpf_run_ctx {};
struct bpf_cg_run_ctx { struct bpf_cg_run_ctx {
struct bpf_run_ctx run_ctx; struct bpf_run_ctx run_ctx;
const struct bpf_prog_array_item *prog_item; const struct bpf_prog_array_item *prog_item;
int retval;
}; };
struct bpf_trace_run_ctx { struct bpf_trace_run_ctx {
...@@ -1277,19 +1278,19 @@ static inline void bpf_reset_run_ctx(struct bpf_run_ctx *old_ctx) ...@@ -1277,19 +1278,19 @@ static inline void bpf_reset_run_ctx(struct bpf_run_ctx *old_ctx)
typedef u32 (*bpf_prog_run_fn)(const struct bpf_prog *prog, const void *ctx); typedef u32 (*bpf_prog_run_fn)(const struct bpf_prog *prog, const void *ctx);
static __always_inline u32 static __always_inline int
BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu, BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu,
const void *ctx, bpf_prog_run_fn run_prog, const void *ctx, bpf_prog_run_fn run_prog,
u32 *ret_flags) int retval, u32 *ret_flags)
{ {
const struct bpf_prog_array_item *item; const struct bpf_prog_array_item *item;
const struct bpf_prog *prog; const struct bpf_prog *prog;
const struct bpf_prog_array *array; const struct bpf_prog_array *array;
struct bpf_run_ctx *old_run_ctx; struct bpf_run_ctx *old_run_ctx;
struct bpf_cg_run_ctx run_ctx; struct bpf_cg_run_ctx run_ctx;
u32 ret = 1;
u32 func_ret; u32 func_ret;
run_ctx.retval = retval;
migrate_disable(); migrate_disable();
rcu_read_lock(); rcu_read_lock();
array = rcu_dereference(array_rcu); array = rcu_dereference(array_rcu);
...@@ -1298,27 +1299,29 @@ BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu, ...@@ -1298,27 +1299,29 @@ BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu,
while ((prog = READ_ONCE(item->prog))) { while ((prog = READ_ONCE(item->prog))) {
run_ctx.prog_item = item; run_ctx.prog_item = item;
func_ret = run_prog(prog, ctx); func_ret = run_prog(prog, ctx);
ret &= (func_ret & 1); if (!(func_ret & 1) && !IS_ERR_VALUE((long)run_ctx.retval))
run_ctx.retval = -EPERM;
*(ret_flags) |= (func_ret >> 1); *(ret_flags) |= (func_ret >> 1);
item++; item++;
} }
bpf_reset_run_ctx(old_run_ctx); bpf_reset_run_ctx(old_run_ctx);
rcu_read_unlock(); rcu_read_unlock();
migrate_enable(); migrate_enable();
return ret; return run_ctx.retval;
} }
static __always_inline u32 static __always_inline int
BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu, BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu,
const void *ctx, bpf_prog_run_fn run_prog) const void *ctx, bpf_prog_run_fn run_prog,
int retval)
{ {
const struct bpf_prog_array_item *item; const struct bpf_prog_array_item *item;
const struct bpf_prog *prog; const struct bpf_prog *prog;
const struct bpf_prog_array *array; const struct bpf_prog_array *array;
struct bpf_run_ctx *old_run_ctx; struct bpf_run_ctx *old_run_ctx;
struct bpf_cg_run_ctx run_ctx; struct bpf_cg_run_ctx run_ctx;
u32 ret = 1;
run_ctx.retval = retval;
migrate_disable(); migrate_disable();
rcu_read_lock(); rcu_read_lock();
array = rcu_dereference(array_rcu); array = rcu_dereference(array_rcu);
...@@ -1326,13 +1329,14 @@ BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu, ...@@ -1326,13 +1329,14 @@ BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu,
old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
while ((prog = READ_ONCE(item->prog))) { while ((prog = READ_ONCE(item->prog))) {
run_ctx.prog_item = item; run_ctx.prog_item = item;
ret &= run_prog(prog, ctx); if (!run_prog(prog, ctx) && !IS_ERR_VALUE((long)run_ctx.retval))
run_ctx.retval = -EPERM;
item++; item++;
} }
bpf_reset_run_ctx(old_run_ctx); bpf_reset_run_ctx(old_run_ctx);
rcu_read_unlock(); rcu_read_unlock();
migrate_enable(); migrate_enable();
return ret; return run_ctx.retval;
} }
static __always_inline u32 static __always_inline u32
...@@ -1385,19 +1389,21 @@ BPF_PROG_RUN_ARRAY(const struct bpf_prog_array __rcu *array_rcu, ...@@ -1385,19 +1389,21 @@ BPF_PROG_RUN_ARRAY(const struct bpf_prog_array __rcu *array_rcu,
* 0: NET_XMIT_SUCCESS skb should be transmitted * 0: NET_XMIT_SUCCESS skb should be transmitted
* 1: NET_XMIT_DROP skb should be dropped and cn * 1: NET_XMIT_DROP skb should be dropped and cn
* 2: NET_XMIT_CN skb should be transmitted and cn * 2: NET_XMIT_CN skb should be transmitted and cn
* 3: -EPERM skb should be dropped * 3: -err skb should be dropped
*/ */
#define BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(array, ctx, func) \ #define BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(array, ctx, func) \
({ \ ({ \
u32 _flags = 0; \ u32 _flags = 0; \
bool _cn; \ bool _cn; \
u32 _ret; \ u32 _ret; \
_ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(array, ctx, func, &_flags); \ _ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(array, ctx, func, 0, &_flags); \
_cn = _flags & BPF_RET_SET_CN; \ _cn = _flags & BPF_RET_SET_CN; \
if (_ret) \ if (_ret && !IS_ERR_VALUE((long)_ret)) \
_ret = -EFAULT; \
if (!_ret) \
_ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS); \ _ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS); \
else \ else \
_ret = (_cn ? NET_XMIT_DROP : -EPERM); \ _ret = (_cn ? NET_XMIT_DROP : _ret); \
_ret; \ _ret; \
}) })
......
...@@ -1356,7 +1356,10 @@ struct bpf_sockopt_kern { ...@@ -1356,7 +1356,10 @@ struct bpf_sockopt_kern {
s32 level; s32 level;
s32 optname; s32 optname;
s32 optlen; s32 optlen;
s32 retval; /* for retval in struct bpf_cg_run_ctx */
struct task_struct *current_task;
/* Temporary "register" for indirect stores to ppos. */
u64 tmp_reg;
}; };
int copy_bpf_fprog_from_user(struct sock_fprog *dst, sockptr_t src, int len); int copy_bpf_fprog_from_user(struct sock_fprog *dst, sockptr_t src, int len);
......
...@@ -5033,6 +5033,22 @@ union bpf_attr { ...@@ -5033,6 +5033,22 @@ union bpf_attr {
* *
* Return * Return
* The number of arguments of the traced function. * The number of arguments of the traced function.
*
* int bpf_get_retval(void)
* Description
* Get the syscall's return value that will be returned to userspace.
*
* This helper is currently supported by cgroup programs only.
* Return
* The syscall's return value.
*
* int bpf_set_retval(int retval)
* Description
* Set the syscall's return value that will be returned to userspace.
*
* This helper is currently supported by cgroup programs only.
* Return
* 0 on success, or a negative error in case of failure.
*/ */
#define __BPF_FUNC_MAPPER(FN) \ #define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \ FN(unspec), \
...@@ -5221,6 +5237,8 @@ union bpf_attr { ...@@ -5221,6 +5237,8 @@ union bpf_attr {
FN(get_func_arg), \ FN(get_func_arg), \
FN(get_func_ret), \ FN(get_func_ret), \
FN(get_func_arg_cnt), \ FN(get_func_arg_cnt), \
FN(get_retval), \
FN(set_retval), \
/* */ /* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper /* integer value in 'imm' field of BPF_CALL instruction selects which helper
......
...@@ -1044,7 +1044,7 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr, ...@@ -1044,7 +1044,7 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr,
* NET_XMIT_DROP (1) - drop packet and notify TCP to call cwr * NET_XMIT_DROP (1) - drop packet and notify TCP to call cwr
* NET_XMIT_CN (2) - continue with packet output and notify TCP * NET_XMIT_CN (2) - continue with packet output and notify TCP
* to call cwr * to call cwr
* -EPERM - drop packet * -err - drop packet
* *
* For ingress packets, this function will return -EPERM if any * For ingress packets, this function will return -EPERM if any
* attached program was found and if it returned != 1 during execution. * attached program was found and if it returned != 1 during execution.
...@@ -1079,8 +1079,9 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk, ...@@ -1079,8 +1079,9 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
cgrp->bpf.effective[atype], skb, __bpf_prog_run_save_cb); cgrp->bpf.effective[atype], skb, __bpf_prog_run_save_cb);
} else { } else {
ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], skb, ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], skb,
__bpf_prog_run_save_cb); __bpf_prog_run_save_cb, 0);
ret = (ret == 1 ? 0 : -EPERM); if (ret && !IS_ERR_VALUE((long)ret))
ret = -EFAULT;
} }
bpf_restore_data_end(skb, saved_data_end); bpf_restore_data_end(skb, saved_data_end);
__skb_pull(skb, offset); __skb_pull(skb, offset);
...@@ -1107,10 +1108,9 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk, ...@@ -1107,10 +1108,9 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk,
enum cgroup_bpf_attach_type atype) enum cgroup_bpf_attach_type atype)
{ {
struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
int ret;
ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sk, bpf_prog_run); return BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sk,
return ret == 1 ? 0 : -EPERM; bpf_prog_run, 0);
} }
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk); EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
...@@ -1142,7 +1142,6 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, ...@@ -1142,7 +1142,6 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
}; };
struct sockaddr_storage unspec; struct sockaddr_storage unspec;
struct cgroup *cgrp; struct cgroup *cgrp;
int ret;
/* Check socket family since not all sockets represent network /* Check socket family since not all sockets represent network
* endpoint (e.g. AF_UNIX). * endpoint (e.g. AF_UNIX).
...@@ -1156,10 +1155,8 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, ...@@ -1156,10 +1155,8 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
} }
cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(cgrp->bpf.effective[atype], &ctx, return BPF_PROG_RUN_ARRAY_CG_FLAGS(cgrp->bpf.effective[atype], &ctx,
bpf_prog_run, flags); bpf_prog_run, 0, flags);
return ret == 1 ? 0 : -EPERM;
} }
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr); EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr);
...@@ -1184,11 +1181,9 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, ...@@ -1184,11 +1181,9 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
enum cgroup_bpf_attach_type atype) enum cgroup_bpf_attach_type atype)
{ {
struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
int ret;
ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sock_ops, return BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sock_ops,
bpf_prog_run); bpf_prog_run, 0);
return ret == 1 ? 0 : -EPERM;
} }
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops); EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops);
...@@ -1201,17 +1196,47 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, ...@@ -1201,17 +1196,47 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
.major = major, .major = major,
.minor = minor, .minor = minor,
}; };
int allow; int ret;
rcu_read_lock(); rcu_read_lock();
cgrp = task_dfl_cgroup(current); cgrp = task_dfl_cgroup(current);
allow = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx, ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx,
bpf_prog_run); bpf_prog_run, 0);
rcu_read_unlock(); rcu_read_unlock();
return !allow; return ret;
}
BPF_CALL_0(bpf_get_retval)
{
struct bpf_cg_run_ctx *ctx =
container_of(current->bpf_ctx, struct bpf_cg_run_ctx, run_ctx);
return ctx->retval;
}
static const struct bpf_func_proto bpf_get_retval_proto = {
.func = bpf_get_retval,
.gpl_only = false,
.ret_type = RET_INTEGER,
};
BPF_CALL_1(bpf_set_retval, int, retval)
{
struct bpf_cg_run_ctx *ctx =
container_of(current->bpf_ctx, struct bpf_cg_run_ctx, run_ctx);
ctx->retval = retval;
return 0;
} }
static const struct bpf_func_proto bpf_set_retval_proto = {
.func = bpf_set_retval,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_ANYTHING,
};
static const struct bpf_func_proto * static const struct bpf_func_proto *
cgroup_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) cgroup_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{ {
...@@ -1224,6 +1249,10 @@ cgroup_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) ...@@ -1224,6 +1249,10 @@ cgroup_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_current_cgroup_id_proto; return &bpf_get_current_cgroup_id_proto;
case BPF_FUNC_perf_event_output: case BPF_FUNC_perf_event_output:
return &bpf_event_output_data_proto; return &bpf_event_output_data_proto;
case BPF_FUNC_get_retval:
return &bpf_get_retval_proto;
case BPF_FUNC_set_retval:
return &bpf_set_retval_proto;
default: default:
return bpf_base_func_proto(func_id); return bpf_base_func_proto(func_id);
} }
...@@ -1337,7 +1366,8 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, ...@@ -1337,7 +1366,8 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
rcu_read_lock(); rcu_read_lock();
cgrp = task_dfl_cgroup(current); cgrp = task_dfl_cgroup(current);
ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx, bpf_prog_run); ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx,
bpf_prog_run, 0);
rcu_read_unlock(); rcu_read_unlock();
kfree(ctx.cur_val); kfree(ctx.cur_val);
...@@ -1350,7 +1380,7 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, ...@@ -1350,7 +1380,7 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
kfree(ctx.new_val); kfree(ctx.new_val);
} }
return ret == 1 ? 0 : -EPERM; return ret;
} }
#ifdef CONFIG_NET #ifdef CONFIG_NET
...@@ -1452,13 +1482,11 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level, ...@@ -1452,13 +1482,11 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
lock_sock(sk); lock_sock(sk);
ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_SETSOCKOPT], ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_SETSOCKOPT],
&ctx, bpf_prog_run); &ctx, bpf_prog_run, 0);
release_sock(sk); release_sock(sk);
if (!ret) { if (ret)
ret = -EPERM;
goto out; goto out;
}
if (ctx.optlen == -1) { if (ctx.optlen == -1) {
/* optlen set to -1, bypass kernel */ /* optlen set to -1, bypass kernel */
...@@ -1518,7 +1546,7 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, ...@@ -1518,7 +1546,7 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
.sk = sk, .sk = sk,
.level = level, .level = level,
.optname = optname, .optname = optname,
.retval = retval, .current_task = current,
}; };
int ret; int ret;
...@@ -1562,27 +1590,17 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, ...@@ -1562,27 +1590,17 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
lock_sock(sk); lock_sock(sk);
ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_GETSOCKOPT], ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_GETSOCKOPT],
&ctx, bpf_prog_run); &ctx, bpf_prog_run, retval);
release_sock(sk); release_sock(sk);
if (!ret) { if (ret < 0)
ret = -EPERM;
goto out; goto out;
}
if (ctx.optlen > max_optlen || ctx.optlen < 0) { if (ctx.optlen > max_optlen || ctx.optlen < 0) {
ret = -EFAULT; ret = -EFAULT;
goto out; goto out;
} }
/* BPF programs only allowed to set retval to 0, not some
* arbitrary value.
*/
if (ctx.retval != 0 && ctx.retval != retval) {
ret = -EFAULT;
goto out;
}
if (ctx.optlen != 0) { if (ctx.optlen != 0) {
if (copy_to_user(optval, ctx.optval, ctx.optlen) || if (copy_to_user(optval, ctx.optval, ctx.optlen) ||
put_user(ctx.optlen, optlen)) { put_user(ctx.optlen, optlen)) {
...@@ -1591,8 +1609,6 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, ...@@ -1591,8 +1609,6 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
} }
} }
ret = ctx.retval;
out: out:
sockopt_free_buf(&ctx, &buf); sockopt_free_buf(&ctx, &buf);
return ret; return ret;
...@@ -1607,10 +1623,10 @@ int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level, ...@@ -1607,10 +1623,10 @@ int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level,
.sk = sk, .sk = sk,
.level = level, .level = level,
.optname = optname, .optname = optname,
.retval = retval,
.optlen = *optlen, .optlen = *optlen,
.optval = optval, .optval = optval,
.optval_end = optval + *optlen, .optval_end = optval + *optlen,
.current_task = current,
}; };
int ret; int ret;
...@@ -1623,25 +1639,19 @@ int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level, ...@@ -1623,25 +1639,19 @@ int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level,
*/ */
ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_GETSOCKOPT], ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_GETSOCKOPT],
&ctx, bpf_prog_run); &ctx, bpf_prog_run, retval);
if (!ret) if (ret < 0)
return -EPERM; return ret;
if (ctx.optlen > *optlen) if (ctx.optlen > *optlen)
return -EFAULT; return -EFAULT;
/* BPF programs only allowed to set retval to 0, not some
* arbitrary value.
*/
if (ctx.retval != 0 && ctx.retval != retval)
return -EFAULT;
/* BPF programs can shrink the buffer, export the modifications. /* BPF programs can shrink the buffer, export the modifications.
*/ */
if (ctx.optlen != 0) if (ctx.optlen != 0)
*optlen = ctx.optlen; *optlen = ctx.optlen;
return ctx.retval; return ret;
} }
#endif #endif
...@@ -2057,10 +2067,39 @@ static u32 cg_sockopt_convert_ctx_access(enum bpf_access_type type, ...@@ -2057,10 +2067,39 @@ static u32 cg_sockopt_convert_ctx_access(enum bpf_access_type type,
*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optlen); *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optlen);
break; break;
case offsetof(struct bpf_sockopt, retval): case offsetof(struct bpf_sockopt, retval):
if (type == BPF_WRITE) BUILD_BUG_ON(offsetof(struct bpf_cg_run_ctx, run_ctx) != 0);
*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, retval);
else if (type == BPF_WRITE) {
*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, retval); int treg = BPF_REG_9;
if (si->src_reg == treg || si->dst_reg == treg)
--treg;
if (si->src_reg == treg || si->dst_reg == treg)
--treg;
*insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, treg,
offsetof(struct bpf_sockopt_kern, tmp_reg));
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, current_task),
treg, si->dst_reg,
offsetof(struct bpf_sockopt_kern, current_task));
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct task_struct, bpf_ctx),
treg, treg,
offsetof(struct task_struct, bpf_ctx));
*insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(struct bpf_cg_run_ctx, retval),
treg, si->src_reg,
offsetof(struct bpf_cg_run_ctx, retval));
*insn++ = BPF_LDX_MEM(BPF_DW, treg, si->dst_reg,
offsetof(struct bpf_sockopt_kern, tmp_reg));
} else {
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, current_task),
si->dst_reg, si->src_reg,
offsetof(struct bpf_sockopt_kern, current_task));
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct task_struct, bpf_ctx),
si->dst_reg, si->dst_reg,
offsetof(struct task_struct, bpf_ctx));
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_cg_run_ctx, retval),
si->dst_reg, si->dst_reg,
offsetof(struct bpf_cg_run_ctx, retval));
}
break; break;
case offsetof(struct bpf_sockopt, optval): case offsetof(struct bpf_sockopt, optval):
*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optval); *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optval);
......
...@@ -838,7 +838,7 @@ int devcgroup_check_permission(short type, u32 major, u32 minor, short access) ...@@ -838,7 +838,7 @@ int devcgroup_check_permission(short type, u32 major, u32 minor, short access)
int rc = BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type, major, minor, access); int rc = BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type, major, minor, access);
if (rc) if (rc)
return -EPERM; return rc;
#ifdef CONFIG_CGROUP_DEVICE #ifdef CONFIG_CGROUP_DEVICE
return devcgroup_legacy_check_permission(type, major, minor, access); return devcgroup_legacy_check_permission(type, major, minor, access);
......
...@@ -5033,6 +5033,22 @@ union bpf_attr { ...@@ -5033,6 +5033,22 @@ union bpf_attr {
* *
* Return * Return
* The number of arguments of the traced function. * The number of arguments of the traced function.
*
* int bpf_get_retval(void)
* Description
* Get the syscall's return value that will be returned to userspace.
*
* This helper is currently supported by cgroup programs only.
* Return
* The syscall's return value.
*
* int bpf_set_retval(int retval)
* Description
* Set the syscall's return value that will be returned to userspace.
*
* This helper is currently supported by cgroup programs only.
* Return
* 0 on success, or a negative error in case of failure.
*/ */
#define __BPF_FUNC_MAPPER(FN) \ #define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \ FN(unspec), \
...@@ -5221,6 +5237,8 @@ union bpf_attr { ...@@ -5221,6 +5237,8 @@ union bpf_attr {
FN(get_func_arg), \ FN(get_func_arg), \
FN(get_func_ret), \ FN(get_func_ret), \
FN(get_func_arg_cnt), \ FN(get_func_arg_cnt), \
FN(get_retval), \
FN(set_retval), \
/* */ /* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper /* integer value in 'imm' field of BPF_CALL instruction selects which helper
......
This diff is collapsed.
...@@ -173,11 +173,11 @@ static int getsetsockopt(void) ...@@ -173,11 +173,11 @@ static int getsetsockopt(void)
} }
memset(&buf, 0, sizeof(buf)); memset(&buf, 0, sizeof(buf));
buf.zc.address = 12345; /* rejected by BPF */ buf.zc.address = 12345; /* Not page aligned. Rejected by tcp_zerocopy_receive() */
optlen = sizeof(buf.zc); optlen = sizeof(buf.zc);
errno = 0; errno = 0;
err = getsockopt(fd, SOL_TCP, TCP_ZEROCOPY_RECEIVE, &buf, &optlen); err = getsockopt(fd, SOL_TCP, TCP_ZEROCOPY_RECEIVE, &buf, &optlen);
if (errno != EPERM) { if (errno != EINVAL) {
log_err("Unexpected getsockopt(TCP_ZEROCOPY_RECEIVE) err=%d errno=%d", log_err("Unexpected getsockopt(TCP_ZEROCOPY_RECEIVE) err=%d errno=%d",
err, errno); err, errno);
goto err; goto err;
......
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright 2021 Google LLC.
*/
#include <errno.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
__u32 invocations = 0;
__u32 assertion_error = 0;
__u32 retval_value = 0;
__u32 ctx_retval_value = 0;
SEC("cgroup/getsockopt")
int get_retval(struct bpf_sockopt *ctx)
{
retval_value = bpf_get_retval();
ctx_retval_value = ctx->retval;
__sync_fetch_and_add(&invocations, 1);
return 1;
}
SEC("cgroup/getsockopt")
int set_eisconn(struct bpf_sockopt *ctx)
{
__sync_fetch_and_add(&invocations, 1);
if (bpf_set_retval(-EISCONN))
assertion_error = 1;
return 1;
}
SEC("cgroup/getsockopt")
int clear_retval(struct bpf_sockopt *ctx)
{
__sync_fetch_and_add(&invocations, 1);
ctx->retval = 0;
return 1;
}
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright 2021 Google LLC.
*/
#include <errno.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
__u32 invocations = 0;
__u32 assertion_error = 0;
__u32 retval_value = 0;
SEC("cgroup/setsockopt")
int get_retval(struct bpf_sockopt *ctx)
{
retval_value = bpf_get_retval();
__sync_fetch_and_add(&invocations, 1);
return 1;
}
SEC("cgroup/setsockopt")
int set_eunatch(struct bpf_sockopt *ctx)
{
__sync_fetch_and_add(&invocations, 1);
if (bpf_set_retval(-EUNATCH))
assertion_error = 1;
return 0;
}
SEC("cgroup/setsockopt")
int set_eisconn(struct bpf_sockopt *ctx)
{
__sync_fetch_and_add(&invocations, 1);
if (bpf_set_retval(-EISCONN))
assertion_error = 1;
return 0;
}
SEC("cgroup/setsockopt")
int legacy_eperm(struct bpf_sockopt *ctx)
{
__sync_fetch_and_add(&invocations, 1);
return 0;
}
...@@ -73,17 +73,17 @@ int _getsockopt(struct bpf_sockopt *ctx) ...@@ -73,17 +73,17 @@ int _getsockopt(struct bpf_sockopt *ctx)
*/ */
if (optval + sizeof(struct tcp_zerocopy_receive) > optval_end) if (optval + sizeof(struct tcp_zerocopy_receive) > optval_end)
return 0; /* EPERM, bounds check */ return 0; /* bounds check */
if (((struct tcp_zerocopy_receive *)optval)->address != 0) if (((struct tcp_zerocopy_receive *)optval)->address != 0)
return 0; /* EPERM, unexpected data */ return 0; /* unexpected data */
return 1; return 1;
} }
if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) { if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
if (optval + 1 > optval_end) if (optval + 1 > optval_end)
return 0; /* EPERM, bounds check */ return 0; /* bounds check */
ctx->retval = 0; /* Reset system call return value to zero */ ctx->retval = 0; /* Reset system call return value to zero */
...@@ -96,24 +96,24 @@ int _getsockopt(struct bpf_sockopt *ctx) ...@@ -96,24 +96,24 @@ int _getsockopt(struct bpf_sockopt *ctx)
* bytes of data. * bytes of data.
*/ */
if (optval_end - optval != page_size) if (optval_end - optval != page_size)
return 0; /* EPERM, unexpected data size */ return 0; /* unexpected data size */
return 1; return 1;
} }
if (ctx->level != SOL_CUSTOM) if (ctx->level != SOL_CUSTOM)
return 0; /* EPERM, deny everything except custom level */ return 0; /* deny everything except custom level */
if (optval + 1 > optval_end) if (optval + 1 > optval_end)
return 0; /* EPERM, bounds check */ return 0; /* bounds check */
storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0, storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0,
BPF_SK_STORAGE_GET_F_CREATE); BPF_SK_STORAGE_GET_F_CREATE);
if (!storage) if (!storage)
return 0; /* EPERM, couldn't get sk storage */ return 0; /* couldn't get sk storage */
if (!ctx->retval) if (!ctx->retval)
return 0; /* EPERM, kernel should not have handled return 0; /* kernel should not have handled
* SOL_CUSTOM, something is wrong! * SOL_CUSTOM, something is wrong!
*/ */
ctx->retval = 0; /* Reset system call return value to zero */ ctx->retval = 0; /* Reset system call return value to zero */
...@@ -152,7 +152,7 @@ int _setsockopt(struct bpf_sockopt *ctx) ...@@ -152,7 +152,7 @@ int _setsockopt(struct bpf_sockopt *ctx)
/* Overwrite SO_SNDBUF value */ /* Overwrite SO_SNDBUF value */
if (optval + sizeof(__u32) > optval_end) if (optval + sizeof(__u32) > optval_end)
return 0; /* EPERM, bounds check */ return 0; /* bounds check */
*(__u32 *)optval = 0x55AA; *(__u32 *)optval = 0x55AA;
ctx->optlen = 4; ctx->optlen = 4;
...@@ -164,7 +164,7 @@ int _setsockopt(struct bpf_sockopt *ctx) ...@@ -164,7 +164,7 @@ int _setsockopt(struct bpf_sockopt *ctx)
/* Always use cubic */ /* Always use cubic */
if (optval + 5 > optval_end) if (optval + 5 > optval_end)
return 0; /* EPERM, bounds check */ return 0; /* bounds check */
memcpy(optval, "cubic", 5); memcpy(optval, "cubic", 5);
ctx->optlen = 5; ctx->optlen = 5;
...@@ -175,10 +175,10 @@ int _setsockopt(struct bpf_sockopt *ctx) ...@@ -175,10 +175,10 @@ int _setsockopt(struct bpf_sockopt *ctx)
if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) { if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
/* Original optlen is larger than PAGE_SIZE. */ /* Original optlen is larger than PAGE_SIZE. */
if (ctx->optlen != page_size * 2) if (ctx->optlen != page_size * 2)
return 0; /* EPERM, unexpected data size */ return 0; /* unexpected data size */
if (optval + 1 > optval_end) if (optval + 1 > optval_end)
return 0; /* EPERM, bounds check */ return 0; /* bounds check */
/* Make sure we can trim the buffer. */ /* Make sure we can trim the buffer. */
optval[0] = 0; optval[0] = 0;
...@@ -189,21 +189,21 @@ int _setsockopt(struct bpf_sockopt *ctx) ...@@ -189,21 +189,21 @@ int _setsockopt(struct bpf_sockopt *ctx)
* bytes of data. * bytes of data.
*/ */
if (optval_end - optval != page_size) if (optval_end - optval != page_size)
return 0; /* EPERM, unexpected data size */ return 0; /* unexpected data size */
return 1; return 1;
} }
if (ctx->level != SOL_CUSTOM) if (ctx->level != SOL_CUSTOM)
return 0; /* EPERM, deny everything except custom level */ return 0; /* deny everything except custom level */
if (optval + 1 > optval_end) if (optval + 1 > optval_end)
return 0; /* EPERM, bounds check */ return 0; /* bounds check */
storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0, storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0,
BPF_SK_STORAGE_GET_F_CREATE); BPF_SK_STORAGE_GET_F_CREATE);
if (!storage) if (!storage)
return 0; /* EPERM, couldn't get sk storage */ return 0; /* couldn't get sk storage */
storage->val = optval[0]; storage->val = optval[0];
ctx->optlen = -1; /* BPF has consumed this option, don't call kernel ctx->optlen = -1; /* BPF has consumed this option, don't call kernel
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment