Commit d168286d authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'map-charge-cleanup'

Roman Gushchin says:

====================
During my work on memcg-based memory accounting for bpf maps
I've done some cleanups and refactorings of the existing
memlock rlimit-based code. It makes it more robust, unifies
size to pages conversion, size checks and corresponding error
codes. Also it adds coverage for cgroup local storage and
socket local storage maps.

It looks like some preliminary work on the mm side might be
required to start working on the memcg-based accounting,
so I'm sending these patches as a separate patchset.
====================
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 576240cf c85d6913
...@@ -66,6 +66,11 @@ struct bpf_map_ops { ...@@ -66,6 +66,11 @@ struct bpf_map_ops {
u64 imm, u32 *off); u64 imm, u32 *off);
}; };
struct bpf_map_memory {
u32 pages;
struct user_struct *user;
};
struct bpf_map { struct bpf_map {
/* The first two cachelines with read-mostly members of which some /* The first two cachelines with read-mostly members of which some
* are also accessed in fast-path (e.g. ops, max_entries). * are also accessed in fast-path (e.g. ops, max_entries).
...@@ -86,7 +91,7 @@ struct bpf_map { ...@@ -86,7 +91,7 @@ struct bpf_map {
u32 btf_key_type_id; u32 btf_key_type_id;
u32 btf_value_type_id; u32 btf_value_type_id;
struct btf *btf; struct btf *btf;
u32 pages; struct bpf_map_memory memory;
bool unpriv_array; bool unpriv_array;
bool frozen; /* write-once */ bool frozen; /* write-once */
/* 48 bytes hole */ /* 48 bytes hole */
...@@ -94,8 +99,7 @@ struct bpf_map { ...@@ -94,8 +99,7 @@ struct bpf_map {
/* The 3rd and 4th cacheline with misc members to avoid false sharing /* The 3rd and 4th cacheline with misc members to avoid false sharing
* particularly with refcounting. * particularly with refcounting.
*/ */
struct user_struct *user ____cacheline_aligned; atomic_t refcnt ____cacheline_aligned;
atomic_t refcnt;
atomic_t usercnt; atomic_t usercnt;
struct work_struct work; struct work_struct work;
char name[BPF_OBJ_NAME_LEN]; char name[BPF_OBJ_NAME_LEN];
...@@ -646,9 +650,12 @@ struct bpf_map *__bpf_map_get(struct fd f); ...@@ -646,9 +650,12 @@ struct bpf_map *__bpf_map_get(struct fd f);
struct bpf_map * __must_check bpf_map_inc(struct bpf_map *map, bool uref); struct bpf_map * __must_check bpf_map_inc(struct bpf_map *map, bool uref);
void bpf_map_put_with_uref(struct bpf_map *map); void bpf_map_put_with_uref(struct bpf_map *map);
void bpf_map_put(struct bpf_map *map); void bpf_map_put(struct bpf_map *map);
int bpf_map_precharge_memlock(u32 pages);
int bpf_map_charge_memlock(struct bpf_map *map, u32 pages); int bpf_map_charge_memlock(struct bpf_map *map, u32 pages);
void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages); void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages);
int bpf_map_charge_init(struct bpf_map_memory *mem, size_t size);
void bpf_map_charge_finish(struct bpf_map_memory *mem);
void bpf_map_charge_move(struct bpf_map_memory *dst,
struct bpf_map_memory *src);
void *bpf_map_area_alloc(size_t size, int numa_node); void *bpf_map_area_alloc(size_t size, int numa_node);
void bpf_map_area_free(void *base); void bpf_map_area_free(void *base);
void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr); void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr);
......
...@@ -83,6 +83,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) ...@@ -83,6 +83,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
u32 elem_size, index_mask, max_entries; u32 elem_size, index_mask, max_entries;
bool unpriv = !capable(CAP_SYS_ADMIN); bool unpriv = !capable(CAP_SYS_ADMIN);
u64 cost, array_size, mask64; u64 cost, array_size, mask64;
struct bpf_map_memory mem;
struct bpf_array *array; struct bpf_array *array;
elem_size = round_up(attr->value_size, 8); elem_size = round_up(attr->value_size, 8);
...@@ -116,32 +117,29 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) ...@@ -116,32 +117,29 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
/* make sure there is no u32 overflow later in round_up() */ /* make sure there is no u32 overflow later in round_up() */
cost = array_size; cost = array_size;
if (cost >= U32_MAX - PAGE_SIZE) if (percpu)
return ERR_PTR(-ENOMEM);
if (percpu) {
cost += (u64)attr->max_entries * elem_size * num_possible_cpus(); cost += (u64)attr->max_entries * elem_size * num_possible_cpus();
if (cost >= U32_MAX - PAGE_SIZE)
return ERR_PTR(-ENOMEM);
}
cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
ret = bpf_map_precharge_memlock(cost); ret = bpf_map_charge_init(&mem, cost);
if (ret < 0) if (ret < 0)
return ERR_PTR(ret); return ERR_PTR(ret);
/* allocate all map elements and zero-initialize them */ /* allocate all map elements and zero-initialize them */
array = bpf_map_area_alloc(array_size, numa_node); array = bpf_map_area_alloc(array_size, numa_node);
if (!array) if (!array) {
bpf_map_charge_finish(&mem);
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
}
array->index_mask = index_mask; array->index_mask = index_mask;
array->map.unpriv_array = unpriv; array->map.unpriv_array = unpriv;
/* copy mandatory map attributes */ /* copy mandatory map attributes */
bpf_map_init_from_attr(&array->map, attr); bpf_map_init_from_attr(&array->map, attr);
array->map.pages = cost; bpf_map_charge_move(&array->map.memory, &mem);
array->elem_size = elem_size; array->elem_size = elem_size;
if (percpu && bpf_array_alloc_percpu(array)) { if (percpu && bpf_array_alloc_percpu(array)) {
bpf_map_charge_finish(&array->map.memory);
bpf_map_area_free(array); bpf_map_area_free(array);
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
} }
......
...@@ -106,12 +106,9 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr) ...@@ -106,12 +106,9 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
/* make sure page count doesn't overflow */ /* make sure page count doesn't overflow */
cost = (u64) cmap->map.max_entries * sizeof(struct bpf_cpu_map_entry *); cost = (u64) cmap->map.max_entries * sizeof(struct bpf_cpu_map_entry *);
cost += cpu_map_bitmap_size(attr) * num_possible_cpus(); cost += cpu_map_bitmap_size(attr) * num_possible_cpus();
if (cost >= U32_MAX - PAGE_SIZE)
goto free_cmap;
cmap->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
/* Notice returns -EPERM on if map size is larger than memlock limit */ /* Notice returns -EPERM on if map size is larger than memlock limit */
ret = bpf_map_precharge_memlock(cmap->map.pages); ret = bpf_map_charge_init(&cmap->map.memory, cost);
if (ret) { if (ret) {
err = ret; err = ret;
goto free_cmap; goto free_cmap;
...@@ -121,7 +118,7 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr) ...@@ -121,7 +118,7 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
cmap->flush_needed = __alloc_percpu(cpu_map_bitmap_size(attr), cmap->flush_needed = __alloc_percpu(cpu_map_bitmap_size(attr),
__alignof__(unsigned long)); __alignof__(unsigned long));
if (!cmap->flush_needed) if (!cmap->flush_needed)
goto free_cmap; goto free_charge;
/* Alloc array for possible remote "destination" CPUs */ /* Alloc array for possible remote "destination" CPUs */
cmap->cpu_map = bpf_map_area_alloc(cmap->map.max_entries * cmap->cpu_map = bpf_map_area_alloc(cmap->map.max_entries *
...@@ -133,6 +130,8 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr) ...@@ -133,6 +130,8 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
return &cmap->map; return &cmap->map;
free_percpu: free_percpu:
free_percpu(cmap->flush_needed); free_percpu(cmap->flush_needed);
free_charge:
bpf_map_charge_finish(&cmap->map.memory);
free_cmap: free_cmap:
kfree(cmap); kfree(cmap);
return ERR_PTR(err); return ERR_PTR(err);
......
...@@ -108,13 +108,9 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr) ...@@ -108,13 +108,9 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
/* make sure page count doesn't overflow */ /* make sure page count doesn't overflow */
cost = (u64) dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *); cost = (u64) dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *);
cost += dev_map_bitmap_size(attr) * num_possible_cpus(); cost += dev_map_bitmap_size(attr) * num_possible_cpus();
if (cost >= U32_MAX - PAGE_SIZE)
goto free_dtab;
dtab->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
/* if map size is larger than memlock limit, reject it early */ /* if map size is larger than memlock limit, reject it */
err = bpf_map_precharge_memlock(dtab->map.pages); err = bpf_map_charge_init(&dtab->map.memory, cost);
if (err) if (err)
goto free_dtab; goto free_dtab;
...@@ -125,19 +121,21 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr) ...@@ -125,19 +121,21 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
__alignof__(unsigned long), __alignof__(unsigned long),
GFP_KERNEL | __GFP_NOWARN); GFP_KERNEL | __GFP_NOWARN);
if (!dtab->flush_needed) if (!dtab->flush_needed)
goto free_dtab; goto free_charge;
dtab->netdev_map = bpf_map_area_alloc(dtab->map.max_entries * dtab->netdev_map = bpf_map_area_alloc(dtab->map.max_entries *
sizeof(struct bpf_dtab_netdev *), sizeof(struct bpf_dtab_netdev *),
dtab->map.numa_node); dtab->map.numa_node);
if (!dtab->netdev_map) if (!dtab->netdev_map)
goto free_dtab; goto free_charge;
spin_lock(&dev_map_lock); spin_lock(&dev_map_lock);
list_add_tail_rcu(&dtab->list, &dev_map_list); list_add_tail_rcu(&dtab->list, &dev_map_list);
spin_unlock(&dev_map_lock); spin_unlock(&dev_map_lock);
return &dtab->map; return &dtab->map;
free_charge:
bpf_map_charge_finish(&dtab->map.memory);
free_dtab: free_dtab:
free_percpu(dtab->flush_needed); free_percpu(dtab->flush_needed);
kfree(dtab); kfree(dtab);
......
...@@ -360,14 +360,8 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) ...@@ -360,14 +360,8 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
else else
cost += (u64) htab->elem_size * num_possible_cpus(); cost += (u64) htab->elem_size * num_possible_cpus();
if (cost >= U32_MAX - PAGE_SIZE) /* if map size is larger than memlock limit, reject it */
/* make sure page count doesn't overflow */ err = bpf_map_charge_init(&htab->map.memory, cost);
goto free_htab;
htab->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
/* if map size is larger than memlock limit, reject it early */
err = bpf_map_precharge_memlock(htab->map.pages);
if (err) if (err)
goto free_htab; goto free_htab;
...@@ -376,7 +370,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) ...@@ -376,7 +370,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
sizeof(struct bucket), sizeof(struct bucket),
htab->map.numa_node); htab->map.numa_node);
if (!htab->buckets) if (!htab->buckets)
goto free_htab; goto free_charge;
if (htab->map.map_flags & BPF_F_ZERO_SEED) if (htab->map.map_flags & BPF_F_ZERO_SEED)
htab->hashrnd = 0; htab->hashrnd = 0;
...@@ -409,6 +403,8 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) ...@@ -409,6 +403,8 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
prealloc_destroy(htab); prealloc_destroy(htab);
free_buckets: free_buckets:
bpf_map_area_free(htab->buckets); bpf_map_area_free(htab->buckets);
free_charge:
bpf_map_charge_finish(&htab->map.memory);
free_htab: free_htab:
kfree(htab); kfree(htab);
return ERR_PTR(err); return ERR_PTR(err);
......
...@@ -272,6 +272,8 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr) ...@@ -272,6 +272,8 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
{ {
int numa_node = bpf_map_attr_numa_node(attr); int numa_node = bpf_map_attr_numa_node(attr);
struct bpf_cgroup_storage_map *map; struct bpf_cgroup_storage_map *map;
struct bpf_map_memory mem;
int ret;
if (attr->key_size != sizeof(struct bpf_cgroup_storage_key)) if (attr->key_size != sizeof(struct bpf_cgroup_storage_key))
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
...@@ -290,13 +292,18 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr) ...@@ -290,13 +292,18 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
/* max_entries is not used and enforced to be 0 */ /* max_entries is not used and enforced to be 0 */
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
ret = bpf_map_charge_init(&mem, sizeof(struct bpf_cgroup_storage_map));
if (ret < 0)
return ERR_PTR(ret);
map = kmalloc_node(sizeof(struct bpf_cgroup_storage_map), map = kmalloc_node(sizeof(struct bpf_cgroup_storage_map),
__GFP_ZERO | GFP_USER, numa_node); __GFP_ZERO | GFP_USER, numa_node);
if (!map) if (!map) {
bpf_map_charge_finish(&mem);
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
}
map->map.pages = round_up(sizeof(struct bpf_cgroup_storage_map), bpf_map_charge_move(&map->map.memory, &mem);
PAGE_SIZE) >> PAGE_SHIFT;
/* copy mandatory map attributes */ /* copy mandatory map attributes */
bpf_map_init_from_attr(&map->map, attr); bpf_map_init_from_attr(&map->map, attr);
......
...@@ -573,14 +573,8 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr) ...@@ -573,14 +573,8 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr)
cost_per_node = sizeof(struct lpm_trie_node) + cost_per_node = sizeof(struct lpm_trie_node) +
attr->value_size + trie->data_size; attr->value_size + trie->data_size;
cost += (u64) attr->max_entries * cost_per_node; cost += (u64) attr->max_entries * cost_per_node;
if (cost >= U32_MAX - PAGE_SIZE) {
ret = -E2BIG;
goto out_err;
}
trie->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
ret = bpf_map_precharge_memlock(trie->map.pages); ret = bpf_map_charge_init(&trie->map.memory, cost);
if (ret) if (ret)
goto out_err; goto out_err;
......
...@@ -67,29 +67,28 @@ static int queue_stack_map_alloc_check(union bpf_attr *attr) ...@@ -67,29 +67,28 @@ static int queue_stack_map_alloc_check(union bpf_attr *attr)
static struct bpf_map *queue_stack_map_alloc(union bpf_attr *attr) static struct bpf_map *queue_stack_map_alloc(union bpf_attr *attr)
{ {
int ret, numa_node = bpf_map_attr_numa_node(attr); int ret, numa_node = bpf_map_attr_numa_node(attr);
struct bpf_map_memory mem = {0};
struct bpf_queue_stack *qs; struct bpf_queue_stack *qs;
u64 size, queue_size, cost; u64 size, queue_size, cost;
size = (u64) attr->max_entries + 1; size = (u64) attr->max_entries + 1;
cost = queue_size = sizeof(*qs) + size * attr->value_size; cost = queue_size = sizeof(*qs) + size * attr->value_size;
if (cost >= U32_MAX - PAGE_SIZE)
return ERR_PTR(-E2BIG);
cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; ret = bpf_map_charge_init(&mem, cost);
ret = bpf_map_precharge_memlock(cost);
if (ret < 0) if (ret < 0)
return ERR_PTR(ret); return ERR_PTR(ret);
qs = bpf_map_area_alloc(queue_size, numa_node); qs = bpf_map_area_alloc(queue_size, numa_node);
if (!qs) if (!qs) {
bpf_map_charge_finish(&mem);
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
}
memset(qs, 0, sizeof(*qs)); memset(qs, 0, sizeof(*qs));
bpf_map_init_from_attr(&qs->map, attr); bpf_map_init_from_attr(&qs->map, attr);
qs->map.pages = cost; bpf_map_charge_move(&qs->map.memory, &mem);
qs->size = size; qs->size = size;
raw_spin_lock_init(&qs->lock); raw_spin_lock_init(&qs->lock);
......
...@@ -151,7 +151,8 @@ static struct bpf_map *reuseport_array_alloc(union bpf_attr *attr) ...@@ -151,7 +151,8 @@ static struct bpf_map *reuseport_array_alloc(union bpf_attr *attr)
{ {
int err, numa_node = bpf_map_attr_numa_node(attr); int err, numa_node = bpf_map_attr_numa_node(attr);
struct reuseport_array *array; struct reuseport_array *array;
u64 cost, array_size; struct bpf_map_memory mem;
u64 array_size;
if (!capable(CAP_SYS_ADMIN)) if (!capable(CAP_SYS_ADMIN))
return ERR_PTR(-EPERM); return ERR_PTR(-EPERM);
...@@ -159,24 +160,20 @@ static struct bpf_map *reuseport_array_alloc(union bpf_attr *attr) ...@@ -159,24 +160,20 @@ static struct bpf_map *reuseport_array_alloc(union bpf_attr *attr)
array_size = sizeof(*array); array_size = sizeof(*array);
array_size += (u64)attr->max_entries * sizeof(struct sock *); array_size += (u64)attr->max_entries * sizeof(struct sock *);
/* make sure there is no u32 overflow later in round_up() */ err = bpf_map_charge_init(&mem, array_size);
cost = array_size;
if (cost >= U32_MAX - PAGE_SIZE)
return ERR_PTR(-ENOMEM);
cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
err = bpf_map_precharge_memlock(cost);
if (err) if (err)
return ERR_PTR(err); return ERR_PTR(err);
/* allocate all map elements and zero-initialize them */ /* allocate all map elements and zero-initialize them */
array = bpf_map_area_alloc(array_size, numa_node); array = bpf_map_area_alloc(array_size, numa_node);
if (!array) if (!array) {
bpf_map_charge_finish(&mem);
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
}
/* copy mandatory map attributes */ /* copy mandatory map attributes */
bpf_map_init_from_attr(&array->map, attr); bpf_map_init_from_attr(&array->map, attr);
array->map.pages = cost; bpf_map_charge_move(&array->map.memory, &mem);
return &array->map; return &array->map;
} }
......
...@@ -89,6 +89,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr) ...@@ -89,6 +89,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
{ {
u32 value_size = attr->value_size; u32 value_size = attr->value_size;
struct bpf_stack_map *smap; struct bpf_stack_map *smap;
struct bpf_map_memory mem;
u64 cost, n_buckets; u64 cost, n_buckets;
int err; int err;
...@@ -116,40 +117,37 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr) ...@@ -116,40 +117,37 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
n_buckets = roundup_pow_of_two(attr->max_entries); n_buckets = roundup_pow_of_two(attr->max_entries);
cost = n_buckets * sizeof(struct stack_map_bucket *) + sizeof(*smap); cost = n_buckets * sizeof(struct stack_map_bucket *) + sizeof(*smap);
if (cost >= U32_MAX - PAGE_SIZE) cost += n_buckets * (value_size + sizeof(struct stack_map_bucket));
return ERR_PTR(-E2BIG); err = bpf_map_charge_init(&mem, cost);
if (err)
return ERR_PTR(err);
smap = bpf_map_area_alloc(cost, bpf_map_attr_numa_node(attr)); smap = bpf_map_area_alloc(cost, bpf_map_attr_numa_node(attr));
if (!smap) if (!smap) {
bpf_map_charge_finish(&mem);
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
}
err = -E2BIG;
cost += n_buckets * (value_size + sizeof(struct stack_map_bucket));
if (cost >= U32_MAX - PAGE_SIZE)
goto free_smap;
bpf_map_init_from_attr(&smap->map, attr); bpf_map_init_from_attr(&smap->map, attr);
smap->map.value_size = value_size; smap->map.value_size = value_size;
smap->n_buckets = n_buckets; smap->n_buckets = n_buckets;
smap->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
err = bpf_map_precharge_memlock(smap->map.pages);
if (err)
goto free_smap;
err = get_callchain_buffers(sysctl_perf_event_max_stack); err = get_callchain_buffers(sysctl_perf_event_max_stack);
if (err) if (err)
goto free_smap; goto free_charge;
err = prealloc_elems_and_freelist(smap); err = prealloc_elems_and_freelist(smap);
if (err) if (err)
goto put_buffers; goto put_buffers;
bpf_map_charge_move(&smap->map.memory, &mem);
return &smap->map; return &smap->map;
put_buffers: put_buffers:
put_callchain_buffers(); put_callchain_buffers();
free_smap: free_charge:
bpf_map_charge_finish(&mem);
bpf_map_area_free(smap); bpf_map_area_free(smap);
return ERR_PTR(err); return ERR_PTR(err);
} }
......
...@@ -188,19 +188,6 @@ void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr) ...@@ -188,19 +188,6 @@ void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr)
map->numa_node = bpf_map_attr_numa_node(attr); map->numa_node = bpf_map_attr_numa_node(attr);
} }
int bpf_map_precharge_memlock(u32 pages)
{
struct user_struct *user = get_current_user();
unsigned long memlock_limit, cur;
memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
cur = atomic_long_read(&user->locked_vm);
free_uid(user);
if (cur + pages > memlock_limit)
return -EPERM;
return 0;
}
static int bpf_charge_memlock(struct user_struct *user, u32 pages) static int bpf_charge_memlock(struct user_struct *user, u32 pages)
{ {
unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
...@@ -214,45 +201,62 @@ static int bpf_charge_memlock(struct user_struct *user, u32 pages) ...@@ -214,45 +201,62 @@ static int bpf_charge_memlock(struct user_struct *user, u32 pages)
static void bpf_uncharge_memlock(struct user_struct *user, u32 pages) static void bpf_uncharge_memlock(struct user_struct *user, u32 pages)
{ {
if (user)
atomic_long_sub(pages, &user->locked_vm); atomic_long_sub(pages, &user->locked_vm);
} }
static int bpf_map_init_memlock(struct bpf_map *map) int bpf_map_charge_init(struct bpf_map_memory *mem, size_t size)
{ {
struct user_struct *user = get_current_user(); u32 pages = round_up(size, PAGE_SIZE) >> PAGE_SHIFT;
struct user_struct *user;
int ret; int ret;
ret = bpf_charge_memlock(user, map->pages); if (size >= U32_MAX - PAGE_SIZE)
return -E2BIG;
user = get_current_user();
ret = bpf_charge_memlock(user, pages);
if (ret) { if (ret) {
free_uid(user); free_uid(user);
return ret; return ret;
} }
map->user = user;
return ret; mem->pages = pages;
mem->user = user;
return 0;
} }
static void bpf_map_release_memlock(struct bpf_map *map) void bpf_map_charge_finish(struct bpf_map_memory *mem)
{ {
struct user_struct *user = map->user; bpf_uncharge_memlock(mem->user, mem->pages);
bpf_uncharge_memlock(user, map->pages); free_uid(mem->user);
free_uid(user); }
void bpf_map_charge_move(struct bpf_map_memory *dst,
struct bpf_map_memory *src)
{
*dst = *src;
/* Make sure src will not be used for the redundant uncharging. */
memset(src, 0, sizeof(struct bpf_map_memory));
} }
int bpf_map_charge_memlock(struct bpf_map *map, u32 pages) int bpf_map_charge_memlock(struct bpf_map *map, u32 pages)
{ {
int ret; int ret;
ret = bpf_charge_memlock(map->user, pages); ret = bpf_charge_memlock(map->memory.user, pages);
if (ret) if (ret)
return ret; return ret;
map->pages += pages; map->memory.pages += pages;
return ret; return ret;
} }
void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages) void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages)
{ {
bpf_uncharge_memlock(map->user, pages); bpf_uncharge_memlock(map->memory.user, pages);
map->pages -= pages; map->memory.pages -= pages;
} }
static int bpf_map_alloc_id(struct bpf_map *map) static int bpf_map_alloc_id(struct bpf_map *map)
...@@ -303,11 +307,13 @@ void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock) ...@@ -303,11 +307,13 @@ void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock)
static void bpf_map_free_deferred(struct work_struct *work) static void bpf_map_free_deferred(struct work_struct *work)
{ {
struct bpf_map *map = container_of(work, struct bpf_map, work); struct bpf_map *map = container_of(work, struct bpf_map, work);
struct bpf_map_memory mem;
bpf_map_release_memlock(map); bpf_map_charge_move(&mem, &map->memory);
security_bpf_map_free(map); security_bpf_map_free(map);
/* implementation dependent freeing */ /* implementation dependent freeing */
map->ops->map_free(map); map->ops->map_free(map);
bpf_map_charge_finish(&mem);
} }
static void bpf_map_put_uref(struct bpf_map *map) static void bpf_map_put_uref(struct bpf_map *map)
...@@ -395,7 +401,7 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) ...@@ -395,7 +401,7 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
map->value_size, map->value_size,
map->max_entries, map->max_entries,
map->map_flags, map->map_flags,
map->pages * 1ULL << PAGE_SHIFT, map->memory.pages * 1ULL << PAGE_SHIFT,
map->id, map->id,
READ_ONCE(map->frozen)); READ_ONCE(map->frozen));
...@@ -549,6 +555,7 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf, ...@@ -549,6 +555,7 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
static int map_create(union bpf_attr *attr) static int map_create(union bpf_attr *attr)
{ {
int numa_node = bpf_map_attr_numa_node(attr); int numa_node = bpf_map_attr_numa_node(attr);
struct bpf_map_memory mem;
struct bpf_map *map; struct bpf_map *map;
int f_flags; int f_flags;
int err; int err;
...@@ -573,7 +580,7 @@ static int map_create(union bpf_attr *attr) ...@@ -573,7 +580,7 @@ static int map_create(union bpf_attr *attr)
err = bpf_obj_name_cpy(map->name, attr->map_name); err = bpf_obj_name_cpy(map->name, attr->map_name);
if (err) if (err)
goto free_map_nouncharge; goto free_map;
atomic_set(&map->refcnt, 1); atomic_set(&map->refcnt, 1);
atomic_set(&map->usercnt, 1); atomic_set(&map->usercnt, 1);
...@@ -583,20 +590,20 @@ static int map_create(union bpf_attr *attr) ...@@ -583,20 +590,20 @@ static int map_create(union bpf_attr *attr)
if (!attr->btf_value_type_id) { if (!attr->btf_value_type_id) {
err = -EINVAL; err = -EINVAL;
goto free_map_nouncharge; goto free_map;
} }
btf = btf_get_by_fd(attr->btf_fd); btf = btf_get_by_fd(attr->btf_fd);
if (IS_ERR(btf)) { if (IS_ERR(btf)) {
err = PTR_ERR(btf); err = PTR_ERR(btf);
goto free_map_nouncharge; goto free_map;
} }
err = map_check_btf(map, btf, attr->btf_key_type_id, err = map_check_btf(map, btf, attr->btf_key_type_id,
attr->btf_value_type_id); attr->btf_value_type_id);
if (err) { if (err) {
btf_put(btf); btf_put(btf);
goto free_map_nouncharge; goto free_map;
} }
map->btf = btf; map->btf = btf;
...@@ -608,15 +615,11 @@ static int map_create(union bpf_attr *attr) ...@@ -608,15 +615,11 @@ static int map_create(union bpf_attr *attr)
err = security_bpf_map_alloc(map); err = security_bpf_map_alloc(map);
if (err) if (err)
goto free_map_nouncharge; goto free_map;
err = bpf_map_init_memlock(map);
if (err)
goto free_map_sec;
err = bpf_map_alloc_id(map); err = bpf_map_alloc_id(map);
if (err) if (err)
goto free_map; goto free_map_sec;
err = bpf_map_new_fd(map, f_flags); err = bpf_map_new_fd(map, f_flags);
if (err < 0) { if (err < 0) {
...@@ -632,13 +635,13 @@ static int map_create(union bpf_attr *attr) ...@@ -632,13 +635,13 @@ static int map_create(union bpf_attr *attr)
return err; return err;
free_map:
bpf_map_release_memlock(map);
free_map_sec: free_map_sec:
security_bpf_map_free(map); security_bpf_map_free(map);
free_map_nouncharge: free_map:
btf_put(map->btf); btf_put(map->btf);
bpf_map_charge_move(&mem, &map->memory);
map->ops->map_free(map); map->ops->map_free(map);
bpf_map_charge_finish(&mem);
return err; return err;
} }
......
...@@ -37,13 +37,9 @@ static struct bpf_map *xsk_map_alloc(union bpf_attr *attr) ...@@ -37,13 +37,9 @@ static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
cost = (u64)m->map.max_entries * sizeof(struct xdp_sock *); cost = (u64)m->map.max_entries * sizeof(struct xdp_sock *);
cost += sizeof(struct list_head) * num_possible_cpus(); cost += sizeof(struct list_head) * num_possible_cpus();
if (cost >= U32_MAX - PAGE_SIZE)
goto free_m;
m->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
/* Notice returns -EPERM on if map size is larger than memlock limit */ /* Notice returns -EPERM on if map size is larger than memlock limit */
err = bpf_map_precharge_memlock(m->map.pages); err = bpf_map_charge_init(&m->map.memory, cost);
if (err) if (err)
goto free_m; goto free_m;
...@@ -51,7 +47,7 @@ static struct bpf_map *xsk_map_alloc(union bpf_attr *attr) ...@@ -51,7 +47,7 @@ static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
m->flush_list = alloc_percpu(struct list_head); m->flush_list = alloc_percpu(struct list_head);
if (!m->flush_list) if (!m->flush_list)
goto free_m; goto free_charge;
for_each_possible_cpu(cpu) for_each_possible_cpu(cpu)
INIT_LIST_HEAD(per_cpu_ptr(m->flush_list, cpu)); INIT_LIST_HEAD(per_cpu_ptr(m->flush_list, cpu));
...@@ -65,6 +61,8 @@ static struct bpf_map *xsk_map_alloc(union bpf_attr *attr) ...@@ -65,6 +61,8 @@ static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
free_percpu: free_percpu:
free_percpu(m->flush_list); free_percpu(m->flush_list);
free_charge:
bpf_map_charge_finish(&m->map.memory);
free_m: free_m:
kfree(m); kfree(m);
return ERR_PTR(err); return ERR_PTR(err);
......
...@@ -627,6 +627,7 @@ static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr) ...@@ -627,6 +627,7 @@ static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr)
unsigned int i; unsigned int i;
u32 nbuckets; u32 nbuckets;
u64 cost; u64 cost;
int ret;
smap = kzalloc(sizeof(*smap), GFP_USER | __GFP_NOWARN); smap = kzalloc(sizeof(*smap), GFP_USER | __GFP_NOWARN);
if (!smap) if (!smap)
...@@ -635,13 +636,21 @@ static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr) ...@@ -635,13 +636,21 @@ static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr)
smap->bucket_log = ilog2(roundup_pow_of_two(num_possible_cpus())); smap->bucket_log = ilog2(roundup_pow_of_two(num_possible_cpus()));
nbuckets = 1U << smap->bucket_log; nbuckets = 1U << smap->bucket_log;
cost = sizeof(*smap->buckets) * nbuckets + sizeof(*smap);
ret = bpf_map_charge_init(&smap->map.memory, cost);
if (ret < 0) {
kfree(smap);
return ERR_PTR(ret);
}
smap->buckets = kvcalloc(sizeof(*smap->buckets), nbuckets, smap->buckets = kvcalloc(sizeof(*smap->buckets), nbuckets,
GFP_USER | __GFP_NOWARN); GFP_USER | __GFP_NOWARN);
if (!smap->buckets) { if (!smap->buckets) {
bpf_map_charge_finish(&smap->map.memory);
kfree(smap); kfree(smap);
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
} }
cost = sizeof(*smap->buckets) * nbuckets + sizeof(*smap);
for (i = 0; i < nbuckets; i++) { for (i = 0; i < nbuckets; i++) {
INIT_HLIST_HEAD(&smap->buckets[i].list); INIT_HLIST_HEAD(&smap->buckets[i].list);
...@@ -651,7 +660,6 @@ static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr) ...@@ -651,7 +660,6 @@ static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr)
smap->elem_size = sizeof(struct bpf_sk_storage_elem) + attr->value_size; smap->elem_size = sizeof(struct bpf_sk_storage_elem) + attr->value_size;
smap->cache_idx = (unsigned int)atomic_inc_return(&cache_idx) % smap->cache_idx = (unsigned int)atomic_inc_return(&cache_idx) %
BPF_SK_STORAGE_CACHE_SIZE; BPF_SK_STORAGE_CACHE_SIZE;
smap->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
return &smap->map; return &smap->map;
} }
......
...@@ -44,13 +44,7 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr) ...@@ -44,13 +44,7 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
/* Make sure page count doesn't overflow. */ /* Make sure page count doesn't overflow. */
cost = (u64) stab->map.max_entries * sizeof(struct sock *); cost = (u64) stab->map.max_entries * sizeof(struct sock *);
if (cost >= U32_MAX - PAGE_SIZE) { err = bpf_map_charge_init(&stab->map.memory, cost);
err = -EINVAL;
goto free_stab;
}
stab->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
err = bpf_map_precharge_memlock(stab->map.pages);
if (err) if (err)
goto free_stab; goto free_stab;
...@@ -60,6 +54,7 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr) ...@@ -60,6 +54,7 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
if (stab->sks) if (stab->sks)
return &stab->map; return &stab->map;
err = -ENOMEM; err = -ENOMEM;
bpf_map_charge_finish(&stab->map.memory);
free_stab: free_stab:
kfree(stab); kfree(stab);
return ERR_PTR(err); return ERR_PTR(err);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment