Commit 7c7982cb authored by Martin KaFai Lau's avatar Martin KaFai Lau Committed by Alexei Starovoitov

bpf: sk_storage: Prefer to get a free cache_idx

The cache_idx is currently picked by RR.  There is chance that
the same cache_idx will be picked by multiple sk_storage_maps while
other cache_idx is still unused.  e.g. It could happen when the
sk_storage_map is recreated during the restart of the user
space process.

This patch tracks the usage count for each cache_idx.  There is
16 of them now (defined in BPF_SK_STORAGE_CACHE_SIZE).
It will try to pick the free cache_idx.  If none was found,
it would pick one with the minimal usage count.
Signed-off-by: default avatarMartin KaFai Lau <kafai@fb.com>
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
Acked-by: default avatarJohn Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20200617174226.2301909-1-kafai@fb.com
parent 7bd3a33a
...@@ -11,8 +11,6 @@ ...@@ -11,8 +11,6 @@
#include <uapi/linux/sock_diag.h> #include <uapi/linux/sock_diag.h>
#include <uapi/linux/btf.h> #include <uapi/linux/btf.h>
static atomic_t cache_idx;
#define SK_STORAGE_CREATE_FLAG_MASK \ #define SK_STORAGE_CREATE_FLAG_MASK \
(BPF_F_NO_PREALLOC | BPF_F_CLONE) (BPF_F_NO_PREALLOC | BPF_F_CLONE)
...@@ -81,6 +79,9 @@ struct bpf_sk_storage_elem { ...@@ -81,6 +79,9 @@ struct bpf_sk_storage_elem {
#define SDATA(_SELEM) (&(_SELEM)->sdata) #define SDATA(_SELEM) (&(_SELEM)->sdata)
#define BPF_SK_STORAGE_CACHE_SIZE 16 #define BPF_SK_STORAGE_CACHE_SIZE 16
static DEFINE_SPINLOCK(cache_idx_lock);
static u64 cache_idx_usage_counts[BPF_SK_STORAGE_CACHE_SIZE];
struct bpf_sk_storage { struct bpf_sk_storage {
struct bpf_sk_storage_data __rcu *cache[BPF_SK_STORAGE_CACHE_SIZE]; struct bpf_sk_storage_data __rcu *cache[BPF_SK_STORAGE_CACHE_SIZE];
struct hlist_head list; /* List of bpf_sk_storage_elem */ struct hlist_head list; /* List of bpf_sk_storage_elem */
...@@ -512,6 +513,37 @@ static int sk_storage_delete(struct sock *sk, struct bpf_map *map) ...@@ -512,6 +513,37 @@ static int sk_storage_delete(struct sock *sk, struct bpf_map *map)
return 0; return 0;
} }
static u16 cache_idx_get(void)
{
u64 min_usage = U64_MAX;
u16 i, res = 0;
spin_lock(&cache_idx_lock);
for (i = 0; i < BPF_SK_STORAGE_CACHE_SIZE; i++) {
if (cache_idx_usage_counts[i] < min_usage) {
min_usage = cache_idx_usage_counts[i];
res = i;
/* Found a free cache_idx */
if (!min_usage)
break;
}
}
cache_idx_usage_counts[res]++;
spin_unlock(&cache_idx_lock);
return res;
}
static void cache_idx_free(u16 idx)
{
spin_lock(&cache_idx_lock);
cache_idx_usage_counts[idx]--;
spin_unlock(&cache_idx_lock);
}
/* Called by __sk_destruct() & bpf_sk_storage_clone() */ /* Called by __sk_destruct() & bpf_sk_storage_clone() */
void bpf_sk_storage_free(struct sock *sk) void bpf_sk_storage_free(struct sock *sk)
{ {
...@@ -560,6 +592,8 @@ static void bpf_sk_storage_map_free(struct bpf_map *map) ...@@ -560,6 +592,8 @@ static void bpf_sk_storage_map_free(struct bpf_map *map)
smap = (struct bpf_sk_storage_map *)map; smap = (struct bpf_sk_storage_map *)map;
cache_idx_free(smap->cache_idx);
/* Note that this map might be concurrently cloned from /* Note that this map might be concurrently cloned from
* bpf_sk_storage_clone. Wait for any existing bpf_sk_storage_clone * bpf_sk_storage_clone. Wait for any existing bpf_sk_storage_clone
* RCU read section to finish before proceeding. New RCU * RCU read section to finish before proceeding. New RCU
...@@ -673,8 +707,7 @@ static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr) ...@@ -673,8 +707,7 @@ static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr)
} }
smap->elem_size = sizeof(struct bpf_sk_storage_elem) + attr->value_size; smap->elem_size = sizeof(struct bpf_sk_storage_elem) + attr->value_size;
smap->cache_idx = (unsigned int)atomic_inc_return(&cache_idx) % smap->cache_idx = cache_idx_get();
BPF_SK_STORAGE_CACHE_SIZE;
return &smap->map; return &smap->map;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment