Commit b00fa38a authored by Joanne Koong's avatar Joanne Koong Committed by Alexei Starovoitov

bpf: Enable non-atomic allocations in local storage

Currently, local storage memory can only be allocated atomically
(GFP_ATOMIC). This restriction is too strict for sleepable bpf
programs.

In this patch, the verifier detects whether the program is sleepable,
and passes the corresponding GFP_KERNEL or GFP_ATOMIC flag as a
5th argument to bpf_task/sk/inode_storage_get. This flag will propagate
down to the local storage functions that allocate memory.

Please note that bpf_task/sk/inode_storage_update_elem functions are
invoked by userspace applications through syscalls. Preemption is
disabled before bpf_task/sk/inode_storage_update_elem is called, which
means they will always have to allocate memory atomically.
Signed-off-by: default avatarJoanne Koong <joannelkoong@gmail.com>
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
Acked-by: default avatarKP Singh <kpsingh@kernel.org>
Acked-by: default avatarMartin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20220318045553.3091807-2-joannekoong@fb.com
parent a8fee962
...@@ -154,16 +154,17 @@ void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem); ...@@ -154,16 +154,17 @@ void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem);
struct bpf_local_storage_elem * struct bpf_local_storage_elem *
bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, void *value, bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, void *value,
bool charge_mem); bool charge_mem, gfp_t gfp_flags);
int int
bpf_local_storage_alloc(void *owner, bpf_local_storage_alloc(void *owner,
struct bpf_local_storage_map *smap, struct bpf_local_storage_map *smap,
struct bpf_local_storage_elem *first_selem); struct bpf_local_storage_elem *first_selem,
gfp_t gfp_flags);
struct bpf_local_storage_data * struct bpf_local_storage_data *
bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
void *value, u64 map_flags); void *value, u64 map_flags, gfp_t gfp_flags);
void bpf_local_storage_free_rcu(struct rcu_head *rcu); void bpf_local_storage_free_rcu(struct rcu_head *rcu);
......
...@@ -136,7 +136,7 @@ static int bpf_fd_inode_storage_update_elem(struct bpf_map *map, void *key, ...@@ -136,7 +136,7 @@ static int bpf_fd_inode_storage_update_elem(struct bpf_map *map, void *key,
sdata = bpf_local_storage_update(f->f_inode, sdata = bpf_local_storage_update(f->f_inode,
(struct bpf_local_storage_map *)map, (struct bpf_local_storage_map *)map,
value, map_flags); value, map_flags, GFP_ATOMIC);
fput(f); fput(f);
return PTR_ERR_OR_ZERO(sdata); return PTR_ERR_OR_ZERO(sdata);
} }
...@@ -169,8 +169,9 @@ static int bpf_fd_inode_storage_delete_elem(struct bpf_map *map, void *key) ...@@ -169,8 +169,9 @@ static int bpf_fd_inode_storage_delete_elem(struct bpf_map *map, void *key)
return err; return err;
} }
BPF_CALL_4(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode, /* *gfp_flags* is a hidden argument provided by the verifier */
void *, value, u64, flags) BPF_CALL_5(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode,
void *, value, u64, flags, gfp_t, gfp_flags)
{ {
struct bpf_local_storage_data *sdata; struct bpf_local_storage_data *sdata;
...@@ -196,7 +197,7 @@ BPF_CALL_4(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode, ...@@ -196,7 +197,7 @@ BPF_CALL_4(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode,
if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) { if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) {
sdata = bpf_local_storage_update( sdata = bpf_local_storage_update(
inode, (struct bpf_local_storage_map *)map, value, inode, (struct bpf_local_storage_map *)map, value,
BPF_NOEXIST); BPF_NOEXIST, gfp_flags);
return IS_ERR(sdata) ? (unsigned long)NULL : return IS_ERR(sdata) ? (unsigned long)NULL :
(unsigned long)sdata->data; (unsigned long)sdata->data;
} }
......
...@@ -63,7 +63,7 @@ static bool selem_linked_to_map(const struct bpf_local_storage_elem *selem) ...@@ -63,7 +63,7 @@ static bool selem_linked_to_map(const struct bpf_local_storage_elem *selem)
struct bpf_local_storage_elem * struct bpf_local_storage_elem *
bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
void *value, bool charge_mem) void *value, bool charge_mem, gfp_t gfp_flags)
{ {
struct bpf_local_storage_elem *selem; struct bpf_local_storage_elem *selem;
...@@ -71,7 +71,7 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, ...@@ -71,7 +71,7 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
return NULL; return NULL;
selem = bpf_map_kzalloc(&smap->map, smap->elem_size, selem = bpf_map_kzalloc(&smap->map, smap->elem_size,
GFP_ATOMIC | __GFP_NOWARN); gfp_flags | __GFP_NOWARN);
if (selem) { if (selem) {
if (value) if (value)
memcpy(SDATA(selem)->data, value, smap->map.value_size); memcpy(SDATA(selem)->data, value, smap->map.value_size);
...@@ -282,7 +282,8 @@ static int check_flags(const struct bpf_local_storage_data *old_sdata, ...@@ -282,7 +282,8 @@ static int check_flags(const struct bpf_local_storage_data *old_sdata,
int bpf_local_storage_alloc(void *owner, int bpf_local_storage_alloc(void *owner,
struct bpf_local_storage_map *smap, struct bpf_local_storage_map *smap,
struct bpf_local_storage_elem *first_selem) struct bpf_local_storage_elem *first_selem,
gfp_t gfp_flags)
{ {
struct bpf_local_storage *prev_storage, *storage; struct bpf_local_storage *prev_storage, *storage;
struct bpf_local_storage **owner_storage_ptr; struct bpf_local_storage **owner_storage_ptr;
...@@ -293,7 +294,7 @@ int bpf_local_storage_alloc(void *owner, ...@@ -293,7 +294,7 @@ int bpf_local_storage_alloc(void *owner,
return err; return err;
storage = bpf_map_kzalloc(&smap->map, sizeof(*storage), storage = bpf_map_kzalloc(&smap->map, sizeof(*storage),
GFP_ATOMIC | __GFP_NOWARN); gfp_flags | __GFP_NOWARN);
if (!storage) { if (!storage) {
err = -ENOMEM; err = -ENOMEM;
goto uncharge; goto uncharge;
...@@ -350,10 +351,10 @@ int bpf_local_storage_alloc(void *owner, ...@@ -350,10 +351,10 @@ int bpf_local_storage_alloc(void *owner,
*/ */
struct bpf_local_storage_data * struct bpf_local_storage_data *
bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
void *value, u64 map_flags) void *value, u64 map_flags, gfp_t gfp_flags)
{ {
struct bpf_local_storage_data *old_sdata = NULL; struct bpf_local_storage_data *old_sdata = NULL;
struct bpf_local_storage_elem *selem; struct bpf_local_storage_elem *selem = NULL;
struct bpf_local_storage *local_storage; struct bpf_local_storage *local_storage;
unsigned long flags; unsigned long flags;
int err; int err;
...@@ -365,6 +366,9 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, ...@@ -365,6 +366,9 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
!map_value_has_spin_lock(&smap->map))) !map_value_has_spin_lock(&smap->map)))
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
if (gfp_flags == GFP_KERNEL && (map_flags & ~BPF_F_LOCK) != BPF_NOEXIST)
return ERR_PTR(-EINVAL);
local_storage = rcu_dereference_check(*owner_storage(smap, owner), local_storage = rcu_dereference_check(*owner_storage(smap, owner),
bpf_rcu_lock_held()); bpf_rcu_lock_held());
if (!local_storage || hlist_empty(&local_storage->list)) { if (!local_storage || hlist_empty(&local_storage->list)) {
...@@ -373,11 +377,11 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, ...@@ -373,11 +377,11 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
if (err) if (err)
return ERR_PTR(err); return ERR_PTR(err);
selem = bpf_selem_alloc(smap, owner, value, true); selem = bpf_selem_alloc(smap, owner, value, true, gfp_flags);
if (!selem) if (!selem)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
err = bpf_local_storage_alloc(owner, smap, selem); err = bpf_local_storage_alloc(owner, smap, selem, gfp_flags);
if (err) { if (err) {
kfree(selem); kfree(selem);
mem_uncharge(smap, owner, smap->elem_size); mem_uncharge(smap, owner, smap->elem_size);
...@@ -404,6 +408,12 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, ...@@ -404,6 +408,12 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
} }
} }
if (gfp_flags == GFP_KERNEL) {
selem = bpf_selem_alloc(smap, owner, value, true, gfp_flags);
if (!selem)
return ERR_PTR(-ENOMEM);
}
raw_spin_lock_irqsave(&local_storage->lock, flags); raw_spin_lock_irqsave(&local_storage->lock, flags);
/* Recheck local_storage->list under local_storage->lock */ /* Recheck local_storage->list under local_storage->lock */
...@@ -429,6 +439,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, ...@@ -429,6 +439,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
goto unlock; goto unlock;
} }
if (gfp_flags != GFP_KERNEL) {
/* local_storage->lock is held. Hence, we are sure /* local_storage->lock is held. Hence, we are sure
* we can unlink and uncharge the old_sdata successfully * we can unlink and uncharge the old_sdata successfully
* later. Hence, instead of charging the new selem now * later. Hence, instead of charging the new selem now
...@@ -438,11 +449,12 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, ...@@ -438,11 +449,12 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
* old_sdata will not be uncharged later during * old_sdata will not be uncharged later during
* bpf_selem_unlink_storage_nolock(). * bpf_selem_unlink_storage_nolock().
*/ */
selem = bpf_selem_alloc(smap, owner, value, !old_sdata); selem = bpf_selem_alloc(smap, owner, value, !old_sdata, gfp_flags);
if (!selem) { if (!selem) {
err = -ENOMEM; err = -ENOMEM;
goto unlock_err; goto unlock_err;
} }
}
/* First, link the new selem to the map */ /* First, link the new selem to the map */
bpf_selem_link_map(smap, selem); bpf_selem_link_map(smap, selem);
...@@ -463,6 +475,10 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, ...@@ -463,6 +475,10 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
unlock_err: unlock_err:
raw_spin_unlock_irqrestore(&local_storage->lock, flags); raw_spin_unlock_irqrestore(&local_storage->lock, flags);
if (selem) {
mem_uncharge(smap, owner, smap->elem_size);
kfree(selem);
}
return ERR_PTR(err); return ERR_PTR(err);
} }
......
...@@ -174,7 +174,8 @@ static int bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key, ...@@ -174,7 +174,8 @@ static int bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key,
bpf_task_storage_lock(); bpf_task_storage_lock();
sdata = bpf_local_storage_update( sdata = bpf_local_storage_update(
task, (struct bpf_local_storage_map *)map, value, map_flags); task, (struct bpf_local_storage_map *)map, value, map_flags,
GFP_ATOMIC);
bpf_task_storage_unlock(); bpf_task_storage_unlock();
err = PTR_ERR_OR_ZERO(sdata); err = PTR_ERR_OR_ZERO(sdata);
...@@ -226,8 +227,9 @@ static int bpf_pid_task_storage_delete_elem(struct bpf_map *map, void *key) ...@@ -226,8 +227,9 @@ static int bpf_pid_task_storage_delete_elem(struct bpf_map *map, void *key)
return err; return err;
} }
BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *, /* *gfp_flags* is a hidden argument provided by the verifier */
task, void *, value, u64, flags) BPF_CALL_5(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
task, void *, value, u64, flags, gfp_t, gfp_flags)
{ {
struct bpf_local_storage_data *sdata; struct bpf_local_storage_data *sdata;
...@@ -250,7 +252,7 @@ BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *, ...@@ -250,7 +252,7 @@ BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
(flags & BPF_LOCAL_STORAGE_GET_F_CREATE)) (flags & BPF_LOCAL_STORAGE_GET_F_CREATE))
sdata = bpf_local_storage_update( sdata = bpf_local_storage_update(
task, (struct bpf_local_storage_map *)map, value, task, (struct bpf_local_storage_map *)map, value,
BPF_NOEXIST); BPF_NOEXIST, gfp_flags);
unlock: unlock:
bpf_task_storage_unlock(); bpf_task_storage_unlock();
......
...@@ -13492,6 +13492,26 @@ static int do_misc_fixups(struct bpf_verifier_env *env) ...@@ -13492,6 +13492,26 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
goto patch_call_imm; goto patch_call_imm;
} }
if (insn->imm == BPF_FUNC_task_storage_get ||
insn->imm == BPF_FUNC_sk_storage_get ||
insn->imm == BPF_FUNC_inode_storage_get) {
if (env->prog->aux->sleepable)
insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__s32)GFP_KERNEL);
else
insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__s32)GFP_ATOMIC);
insn_buf[1] = *insn;
cnt = 2;
new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
if (!new_prog)
return -ENOMEM;
delta += cnt - 1;
env->prog = prog = new_prog;
insn = new_prog->insnsi + i + delta;
goto patch_call_imm;
}
/* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup /* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
* and other inlining handlers are currently limited to 64 bit * and other inlining handlers are currently limited to 64 bit
* only. * only.
......
...@@ -141,7 +141,7 @@ static int bpf_fd_sk_storage_update_elem(struct bpf_map *map, void *key, ...@@ -141,7 +141,7 @@ static int bpf_fd_sk_storage_update_elem(struct bpf_map *map, void *key,
if (sock) { if (sock) {
sdata = bpf_local_storage_update( sdata = bpf_local_storage_update(
sock->sk, (struct bpf_local_storage_map *)map, value, sock->sk, (struct bpf_local_storage_map *)map, value,
map_flags); map_flags, GFP_ATOMIC);
sockfd_put(sock); sockfd_put(sock);
return PTR_ERR_OR_ZERO(sdata); return PTR_ERR_OR_ZERO(sdata);
} }
...@@ -172,7 +172,7 @@ bpf_sk_storage_clone_elem(struct sock *newsk, ...@@ -172,7 +172,7 @@ bpf_sk_storage_clone_elem(struct sock *newsk,
{ {
struct bpf_local_storage_elem *copy_selem; struct bpf_local_storage_elem *copy_selem;
copy_selem = bpf_selem_alloc(smap, newsk, NULL, true); copy_selem = bpf_selem_alloc(smap, newsk, NULL, true, GFP_ATOMIC);
if (!copy_selem) if (!copy_selem)
return NULL; return NULL;
...@@ -230,7 +230,7 @@ int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk) ...@@ -230,7 +230,7 @@ int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk)
bpf_selem_link_map(smap, copy_selem); bpf_selem_link_map(smap, copy_selem);
bpf_selem_link_storage_nolock(new_sk_storage, copy_selem); bpf_selem_link_storage_nolock(new_sk_storage, copy_selem);
} else { } else {
ret = bpf_local_storage_alloc(newsk, smap, copy_selem); ret = bpf_local_storage_alloc(newsk, smap, copy_selem, GFP_ATOMIC);
if (ret) { if (ret) {
kfree(copy_selem); kfree(copy_selem);
atomic_sub(smap->elem_size, atomic_sub(smap->elem_size,
...@@ -255,8 +255,9 @@ int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk) ...@@ -255,8 +255,9 @@ int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk)
return ret; return ret;
} }
BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk, /* *gfp_flags* is a hidden argument provided by the verifier */
void *, value, u64, flags) BPF_CALL_5(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
void *, value, u64, flags, gfp_t, gfp_flags)
{ {
struct bpf_local_storage_data *sdata; struct bpf_local_storage_data *sdata;
...@@ -277,7 +278,7 @@ BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk, ...@@ -277,7 +278,7 @@ BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
refcount_inc_not_zero(&sk->sk_refcnt)) { refcount_inc_not_zero(&sk->sk_refcnt)) {
sdata = bpf_local_storage_update( sdata = bpf_local_storage_update(
sk, (struct bpf_local_storage_map *)map, value, sk, (struct bpf_local_storage_map *)map, value,
BPF_NOEXIST); BPF_NOEXIST, gfp_flags);
/* sk must be a fullsock (guaranteed by verifier), /* sk must be a fullsock (guaranteed by verifier),
* so sock_gen_put() is unnecessary. * so sock_gen_put() is unnecessary.
*/ */
...@@ -417,14 +418,16 @@ static bool bpf_sk_storage_tracing_allowed(const struct bpf_prog *prog) ...@@ -417,14 +418,16 @@ static bool bpf_sk_storage_tracing_allowed(const struct bpf_prog *prog)
return false; return false;
} }
BPF_CALL_4(bpf_sk_storage_get_tracing, struct bpf_map *, map, struct sock *, sk, /* *gfp_flags* is a hidden argument provided by the verifier */
void *, value, u64, flags) BPF_CALL_5(bpf_sk_storage_get_tracing, struct bpf_map *, map, struct sock *, sk,
void *, value, u64, flags, gfp_t, gfp_flags)
{ {
WARN_ON_ONCE(!bpf_rcu_lock_held()); WARN_ON_ONCE(!bpf_rcu_lock_held());
if (in_hardirq() || in_nmi()) if (in_hardirq() || in_nmi())
return (unsigned long)NULL; return (unsigned long)NULL;
return (unsigned long)____bpf_sk_storage_get(map, sk, value, flags); return (unsigned long)____bpf_sk_storage_get(map, sk, value, flags,
gfp_flags);
} }
BPF_CALL_2(bpf_sk_storage_delete_tracing, struct bpf_map *, map, BPF_CALL_2(bpf_sk_storage_delete_tracing, struct bpf_map *, map,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment