Commit 23d86c8e authored by Martin KaFai Lau's avatar Martin KaFai Lau

Merge branch 'Use this_cpu_xxx for preemption-safety'

Hou Tao says:

====================

From: Hou Tao <houtao1@huawei.com>

Hi,

The patchset aims to make the update of per-cpu prog->active and per-cpu
bpf_task_storage_busy being preemption-safe. The problem is on same
architectures (e.g. arm64), __this_cpu_{inc|dec|inc_return} are neither
preemption-safe nor IRQ-safe, so under fully preemptible kernel the
concurrent updates on these per-cpu variables may be interleaved and the
final values of these variables may be not zero.

Patch 1 & 2 use the preemption-safe per-cpu helpers to manipulate
prog->active and bpf_task_storage_busy. Patch 3 & 4 add a test case in
map_tests to show the concurrent updates on the per-cpu
bpf_task_storage_busy by using __this_cpu_{inc|dec} are not atomic.

Comments are always welcome.

Regards,
Tao

Change Log:
v2:
* Patch 1: update commit message to indicate the problem is only
  possible for fully preemptible kernel
* Patch 2: a new patch which fixes the problem for prog->active
* Patch 3 & 4: move it to test_maps and make it depend on CONFIG_PREEMPT

v1: https://lore.kernel.org/bpf/20220829142752.330094-1-houtao@huaweicloud.com/
====================
Signed-off-by: default avatarMartin KaFai Lau <martin.lau@kernel.org>
parents c9ae8c96 73b97bc7
......@@ -555,11 +555,11 @@ void bpf_local_storage_map_free(struct bpf_local_storage_map *smap,
struct bpf_local_storage_elem, map_node))) {
if (busy_counter) {
migrate_disable();
__this_cpu_inc(*busy_counter);
this_cpu_inc(*busy_counter);
}
bpf_selem_unlink(selem, false);
if (busy_counter) {
__this_cpu_dec(*busy_counter);
this_cpu_dec(*busy_counter);
migrate_enable();
}
cond_resched_rcu();
......
......@@ -26,20 +26,20 @@ static DEFINE_PER_CPU(int, bpf_task_storage_busy);
static void bpf_task_storage_lock(void)
{
migrate_disable();
__this_cpu_inc(bpf_task_storage_busy);
this_cpu_inc(bpf_task_storage_busy);
}
static void bpf_task_storage_unlock(void)
{
__this_cpu_dec(bpf_task_storage_busy);
this_cpu_dec(bpf_task_storage_busy);
migrate_enable();
}
static bool bpf_task_storage_trylock(void)
{
migrate_disable();
if (unlikely(__this_cpu_inc_return(bpf_task_storage_busy) != 1)) {
__this_cpu_dec(bpf_task_storage_busy);
if (unlikely(this_cpu_inc_return(bpf_task_storage_busy) != 1)) {
this_cpu_dec(bpf_task_storage_busy);
migrate_enable();
return false;
}
......
......@@ -895,7 +895,7 @@ u64 notrace __bpf_prog_enter(struct bpf_prog *prog, struct bpf_tramp_run_ctx *ru
run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
if (unlikely(__this_cpu_inc_return(*(prog->active)) != 1)) {
if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) {
inc_misses_counter(prog);
return 0;
}
......@@ -930,7 +930,7 @@ void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start, struct bpf_tramp_
bpf_reset_run_ctx(run_ctx->saved_run_ctx);
update_prog_stats(prog, start);
__this_cpu_dec(*(prog->active));
this_cpu_dec(*(prog->active));
migrate_enable();
rcu_read_unlock();
}
......@@ -966,7 +966,7 @@ u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog, struct bpf_tramp_r
migrate_disable();
might_fault();
if (unlikely(__this_cpu_inc_return(*(prog->active)) != 1)) {
if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) {
inc_misses_counter(prog);
return 0;
}
......@@ -982,7 +982,7 @@ void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start,
bpf_reset_run_ctx(run_ctx->saved_run_ctx);
update_prog_stats(prog, start);
__this_cpu_dec(*(prog->active));
this_cpu_dec(*(prog->active));
migrate_enable();
rcu_read_unlock_trace();
}
......
// SPDX-License-Identifier: GPL-2.0
/* Copyright (C) 2022. Huawei Technologies Co., Ltd */
#define _GNU_SOURCE
#include <sched.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdbool.h>
#include <errno.h>
#include <string.h>
#include <pthread.h>
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
#include "test_maps.h"
#include "task_local_storage_helpers.h"
#include "read_bpf_task_storage_busy.skel.h"
struct lookup_ctx {
bool start;
bool stop;
int pid_fd;
int map_fd;
int loop;
};
static void *lookup_fn(void *arg)
{
struct lookup_ctx *ctx = arg;
long value;
int i = 0;
while (!ctx->start)
usleep(1);
while (!ctx->stop && i++ < ctx->loop)
bpf_map_lookup_elem(ctx->map_fd, &ctx->pid_fd, &value);
return NULL;
}
static void abort_lookup(struct lookup_ctx *ctx, pthread_t *tids, unsigned int nr)
{
unsigned int i;
ctx->stop = true;
ctx->start = true;
for (i = 0; i < nr; i++)
pthread_join(tids[i], NULL);
}
void test_task_storage_map_stress_lookup(void)
{
#define MAX_NR_THREAD 4096
unsigned int i, nr = 256, loop = 8192, cpu = 0;
struct read_bpf_task_storage_busy *skel;
pthread_t tids[MAX_NR_THREAD];
struct lookup_ctx ctx;
cpu_set_t old, new;
const char *cfg;
int err;
cfg = getenv("TASK_STORAGE_MAP_NR_THREAD");
if (cfg) {
nr = atoi(cfg);
if (nr > MAX_NR_THREAD)
nr = MAX_NR_THREAD;
}
cfg = getenv("TASK_STORAGE_MAP_NR_LOOP");
if (cfg)
loop = atoi(cfg);
cfg = getenv("TASK_STORAGE_MAP_PIN_CPU");
if (cfg)
cpu = atoi(cfg);
skel = read_bpf_task_storage_busy__open_and_load();
err = libbpf_get_error(skel);
CHECK(err, "open_and_load", "error %d\n", err);
/* Only for a fully preemptible kernel */
if (!skel->kconfig->CONFIG_PREEMPT)
return;
/* Save the old affinity setting */
sched_getaffinity(getpid(), sizeof(old), &old);
/* Pinned on a specific CPU */
CPU_ZERO(&new);
CPU_SET(cpu, &new);
sched_setaffinity(getpid(), sizeof(new), &new);
ctx.start = false;
ctx.stop = false;
ctx.pid_fd = sys_pidfd_open(getpid(), 0);
ctx.map_fd = bpf_map__fd(skel->maps.task);
ctx.loop = loop;
for (i = 0; i < nr; i++) {
err = pthread_create(&tids[i], NULL, lookup_fn, &ctx);
if (err) {
abort_lookup(&ctx, tids, i);
CHECK(err, "pthread_create", "error %d\n", err);
goto out;
}
}
ctx.start = true;
for (i = 0; i < nr; i++)
pthread_join(tids[i], NULL);
skel->bss->pid = getpid();
err = read_bpf_task_storage_busy__attach(skel);
CHECK(err, "attach", "error %d\n", err);
/* Trigger program */
syscall(SYS_gettid);
skel->bss->pid = 0;
CHECK(skel->bss->busy != 0, "bad bpf_task_storage_busy", "got %d\n", skel->bss->busy);
out:
read_bpf_task_storage_busy__destroy(skel);
/* Restore affinity setting */
sched_setaffinity(getpid(), sizeof(old), &old);
}
......@@ -9,18 +9,10 @@
#include "bprm_opts.skel.h"
#include "network_helpers.h"
#ifndef __NR_pidfd_open
#define __NR_pidfd_open 434
#endif
#include "task_local_storage_helpers.h"
static const char * const bash_envp[] = { "TMPDIR=shouldnotbeset", NULL };
static inline int sys_pidfd_open(pid_t pid, unsigned int flags)
{
return syscall(__NR_pidfd_open, pid, flags);
}
static int update_storage(int map_fd, int secureexec)
{
int task_fd, ret = 0;
......
......@@ -11,15 +11,7 @@
#include "local_storage.skel.h"
#include "network_helpers.h"
#ifndef __NR_pidfd_open
#define __NR_pidfd_open 434
#endif
static inline int sys_pidfd_open(pid_t pid, unsigned int flags)
{
return syscall(__NR_pidfd_open, pid, flags);
}
#include "task_local_storage_helpers.h"
static unsigned int duration;
......
// SPDX-License-Identifier: GPL-2.0
/* Copyright (C) 2022. Huawei Technologies Co., Ltd */
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
extern bool CONFIG_PREEMPT __kconfig __weak;
extern const int bpf_task_storage_busy __ksym;
char _license[] SEC("license") = "GPL";
int pid = 0;
int busy = 0;
struct {
__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
__uint(map_flags, BPF_F_NO_PREALLOC);
__type(key, int);
__type(value, long);
} task SEC(".maps");
SEC("raw_tp/sys_enter")
int BPF_PROG(read_bpf_task_storage_busy)
{
int *value;
int key;
if (!CONFIG_PREEMPT)
return 0;
if (bpf_get_current_pid_tgid() >> 32 != pid)
return 0;
value = bpf_this_cpu_ptr(&bpf_task_storage_busy);
if (value)
busy = *value;
return 0;
}
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __TASK_LOCAL_STORAGE_HELPER_H
#define __TASK_LOCAL_STORAGE_HELPER_H
#include <unistd.h>
#include <sys/syscall.h>
#include <sys/types.h>
#ifndef __NR_pidfd_open
#define __NR_pidfd_open 434
#endif
static inline int sys_pidfd_open(pid_t pid, unsigned int flags)
{
return syscall(__NR_pidfd_open, pid, flags);
}
#endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment