Commit 8b8e3ad0 authored by David S. Miller's avatar David S. Miller

Merge branch 'sample-bpf-loader-fixes'

Jesper Dangaard Brouer says:

====================
Improve bpf ELF-loader under samples/bpf

This series improves and fixes bpf ELF loader and programs under
samples/bpf.  The bpf_load.c created some hard to debug issues when
the struct (bpf_map_def) used in the ELF maps section format changed
in commit fb30d4b7 ("bpf: Add tests for map-in-map").

This was hotfixed in commit 409526bea3c3 ("samples/bpf: bpf_load.c
detect and abort if ELF maps section size is wrong") by detecting the
issue and aborting the program.

In most situations the bpf-loader should be able to handle these kind
of changes to the struct size.  This patch series aim to do proper
backward and forward compabilility handling when loading ELF files.

This series also adjust the callback that was introduced in commit
9fd63d05 ("bpf: Allow bpf sample programs (*_user.c) to change
bpf_map_def") to use the new bpf_map_data structure, before more users
start to use this callback.

Hoping these changes can make the merge window, as above mentioned
commits have not been merged yet, and it would be good to avoid users
hitting these issues.
====================
Acked-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 89c9fea3 9178b4c1
...@@ -39,6 +39,9 @@ int event_fd[MAX_PROGS]; ...@@ -39,6 +39,9 @@ int event_fd[MAX_PROGS];
int prog_cnt; int prog_cnt;
int prog_array_fd = -1; int prog_array_fd = -1;
struct bpf_map_data map_data[MAX_MAPS];
int map_data_count = 0;
static int populate_prog_array(const char *event, int prog_fd) static int populate_prog_array(const char *event, int prog_fd)
{ {
int ind = atoi(event), err; int ind = atoi(event), err;
...@@ -186,42 +189,45 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) ...@@ -186,42 +189,45 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
return 0; return 0;
} }
static int load_maps(struct bpf_map_def *maps, int nr_maps, static int load_maps(struct bpf_map_data *maps, int nr_maps,
const char **map_names, fixup_map_cb fixup_map) fixup_map_cb fixup_map)
{ {
int i; int i;
/*
* Warning: Using "maps" pointing to ELF data_maps->d_buf as
* an array of struct bpf_map_def is a wrong assumption about
* the ELF maps section format.
*/
for (i = 0; i < nr_maps; i++) { for (i = 0; i < nr_maps; i++) {
if (fixup_map) if (fixup_map) {
fixup_map(&maps[i], map_names[i], i); fixup_map(&maps[i], i);
/* Allow userspace to assign map FD prior to creation */
if (maps[i].type == BPF_MAP_TYPE_ARRAY_OF_MAPS || if (maps[i].fd != -1) {
maps[i].type == BPF_MAP_TYPE_HASH_OF_MAPS) { map_fd[i] = maps[i].fd;
int inner_map_fd = map_fd[maps[i].inner_map_idx]; continue;
}
map_fd[i] = bpf_create_map_in_map(maps[i].type, }
maps[i].key_size,
inner_map_fd, if (maps[i].def.type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
maps[i].max_entries, maps[i].def.type == BPF_MAP_TYPE_HASH_OF_MAPS) {
maps[i].map_flags); int inner_map_fd = map_fd[maps[i].def.inner_map_idx];
map_fd[i] = bpf_create_map_in_map(maps[i].def.type,
maps[i].def.key_size,
inner_map_fd,
maps[i].def.max_entries,
maps[i].def.map_flags);
} else { } else {
map_fd[i] = bpf_create_map(maps[i].type, map_fd[i] = bpf_create_map(maps[i].def.type,
maps[i].key_size, maps[i].def.key_size,
maps[i].value_size, maps[i].def.value_size,
maps[i].max_entries, maps[i].def.max_entries,
maps[i].map_flags); maps[i].def.map_flags);
} }
if (map_fd[i] < 0) { if (map_fd[i] < 0) {
printf("failed to create a map: %d %s\n", printf("failed to create a map: %d %s\n",
errno, strerror(errno)); errno, strerror(errno));
return 1; return 1;
} }
maps[i].fd = map_fd[i];
if (maps[i].type == BPF_MAP_TYPE_PROG_ARRAY) if (maps[i].def.type == BPF_MAP_TYPE_PROG_ARRAY)
prog_array_fd = map_fd[i]; prog_array_fd = map_fd[i];
} }
return 0; return 0;
...@@ -251,7 +257,8 @@ static int get_sec(Elf *elf, int i, GElf_Ehdr *ehdr, char **shname, ...@@ -251,7 +257,8 @@ static int get_sec(Elf *elf, int i, GElf_Ehdr *ehdr, char **shname,
} }
static int parse_relo_and_apply(Elf_Data *data, Elf_Data *symbols, static int parse_relo_and_apply(Elf_Data *data, Elf_Data *symbols,
GElf_Shdr *shdr, struct bpf_insn *insn) GElf_Shdr *shdr, struct bpf_insn *insn,
struct bpf_map_data *maps, int nr_maps)
{ {
int i, nrels; int i, nrels;
...@@ -261,6 +268,8 @@ static int parse_relo_and_apply(Elf_Data *data, Elf_Data *symbols, ...@@ -261,6 +268,8 @@ static int parse_relo_and_apply(Elf_Data *data, Elf_Data *symbols,
GElf_Sym sym; GElf_Sym sym;
GElf_Rel rel; GElf_Rel rel;
unsigned int insn_idx; unsigned int insn_idx;
bool match = false;
int j, map_idx;
gelf_getrel(data, i, &rel); gelf_getrel(data, i, &rel);
...@@ -274,11 +283,21 @@ static int parse_relo_and_apply(Elf_Data *data, Elf_Data *symbols, ...@@ -274,11 +283,21 @@ static int parse_relo_and_apply(Elf_Data *data, Elf_Data *symbols,
return 1; return 1;
} }
insn[insn_idx].src_reg = BPF_PSEUDO_MAP_FD; insn[insn_idx].src_reg = BPF_PSEUDO_MAP_FD;
/*
* Warning: Using sizeof(struct bpf_map_def) here is a /* Match FD relocation against recorded map_data[] offset */
* wrong assumption about ELF maps section format for (map_idx = 0; map_idx < nr_maps; map_idx++) {
*/ if (maps[map_idx].elf_offset == sym.st_value) {
insn[insn_idx].imm = map_fd[sym.st_value / sizeof(struct bpf_map_def)]; match = true;
break;
}
}
if (match) {
insn[insn_idx].imm = maps[map_idx].fd;
} else {
printf("invalid relo for insn[%d] no map_data match\n",
insn_idx);
return 1;
}
} }
return 0; return 0;
...@@ -297,40 +316,112 @@ static int cmp_symbols(const void *l, const void *r) ...@@ -297,40 +316,112 @@ static int cmp_symbols(const void *l, const void *r)
return 0; return 0;
} }
static int get_sorted_map_names(Elf *elf, Elf_Data *symbols, int maps_shndx, static int load_elf_maps_section(struct bpf_map_data *maps, int maps_shndx,
int strtabidx, char **map_names) Elf *elf, Elf_Data *symbols, int strtabidx)
{ {
GElf_Sym map_symbols[MAX_MAPS]; int map_sz_elf, map_sz_copy;
int i, nr_maps = 0; bool validate_zero = false;
Elf_Data *data_maps;
int i, nr_maps;
GElf_Sym *sym;
Elf_Scn *scn;
int copy_sz;
if (maps_shndx < 0)
return -EINVAL;
if (!symbols)
return -EINVAL;
/* Get data for maps section via elf index */
scn = elf_getscn(elf, maps_shndx);
if (scn)
data_maps = elf_getdata(scn, NULL);
if (!scn || !data_maps) {
printf("Failed to get Elf_Data from maps section %d\n",
maps_shndx);
return -EINVAL;
}
for (i = 0; i < symbols->d_size / sizeof(GElf_Sym); i++) { /* For each map get corrosponding symbol table entry */
assert(nr_maps < MAX_MAPS); sym = calloc(MAX_MAPS+1, sizeof(GElf_Sym));
if (!gelf_getsym(symbols, i, &map_symbols[nr_maps])) for (i = 0, nr_maps = 0; i < symbols->d_size / sizeof(GElf_Sym); i++) {
assert(nr_maps < MAX_MAPS+1);
if (!gelf_getsym(symbols, i, &sym[nr_maps]))
continue; continue;
if (map_symbols[nr_maps].st_shndx != maps_shndx) if (sym[nr_maps].st_shndx != maps_shndx)
continue; continue;
/* Only increment iif maps section */
nr_maps++; nr_maps++;
} }
qsort(map_symbols, nr_maps, sizeof(GElf_Sym), cmp_symbols); /* Align to map_fd[] order, via sort on offset in sym.st_value */
qsort(sym, nr_maps, sizeof(GElf_Sym), cmp_symbols);
/* Keeping compatible with ELF maps section changes
* ------------------------------------------------
* The program size of struct bpf_map_def is known by loader
* code, but struct stored in ELF file can be different.
*
* Unfortunately sym[i].st_size is zero. To calculate the
* struct size stored in the ELF file, assume all struct have
* the same size, and simply divide with number of map
* symbols.
*/
map_sz_elf = data_maps->d_size / nr_maps;
map_sz_copy = sizeof(struct bpf_map_def);
if (map_sz_elf < map_sz_copy) {
/*
* Backward compat, loading older ELF file with
* smaller struct, keeping remaining bytes zero.
*/
map_sz_copy = map_sz_elf;
} else if (map_sz_elf > map_sz_copy) {
/*
* Forward compat, loading newer ELF file with larger
* struct with unknown features. Assume zero means
* feature not used. Thus, validate rest of struct
* data is zero.
*/
validate_zero = true;
}
/* Memcpy relevant part of ELF maps data to loader maps */
for (i = 0; i < nr_maps; i++) { for (i = 0; i < nr_maps; i++) {
char *map_name; unsigned char *addr, *end;
struct bpf_map_def *def;
map_name = elf_strptr(elf, strtabidx, map_symbols[i].st_name); const char *map_name;
if (!map_name) { size_t offset;
printf("cannot get map symbol\n");
return -1; map_name = elf_strptr(elf, strtabidx, sym[i].st_name);
} maps[i].name = strdup(map_name);
if (!maps[i].name) {
map_names[i] = strdup(map_name);
if (!map_names[i]) {
printf("strdup(%s): %s(%d)\n", map_name, printf("strdup(%s): %s(%d)\n", map_name,
strerror(errno), errno); strerror(errno), errno);
return -1; free(sym);
return -errno;
}
/* Symbol value is offset into ELF maps section data area */
offset = sym[i].st_value;
def = (struct bpf_map_def *)(data_maps->d_buf + offset);
maps[i].elf_offset = offset;
memset(&maps[i].def, 0, sizeof(struct bpf_map_def));
memcpy(&maps[i].def, def, map_sz_copy);
/* Verify no newer features were requested */
if (validate_zero) {
addr = (unsigned char*) def + map_sz_copy;
end = (unsigned char*) def + map_sz_elf;
for (; addr < end; addr++) {
if (*addr != 0) {
free(sym);
return -EFBIG;
}
}
} }
} }
free(sym);
return nr_maps; return nr_maps;
} }
...@@ -341,7 +432,8 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map) ...@@ -341,7 +432,8 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map)
GElf_Ehdr ehdr; GElf_Ehdr ehdr;
GElf_Shdr shdr, shdr_prog; GElf_Shdr shdr, shdr_prog;
Elf_Data *data, *data_prog, *data_maps = NULL, *symbols = NULL; Elf_Data *data, *data_prog, *data_maps = NULL, *symbols = NULL;
char *shname, *shname_prog, *map_names[MAX_MAPS] = { NULL }; char *shname, *shname_prog;
int nr_maps = 0;
/* reset global variables */ /* reset global variables */
kern_version = 0; kern_version = 0;
...@@ -389,8 +481,12 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map) ...@@ -389,8 +481,12 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map)
} }
memcpy(&kern_version, data->d_buf, sizeof(int)); memcpy(&kern_version, data->d_buf, sizeof(int));
} else if (strcmp(shname, "maps") == 0) { } else if (strcmp(shname, "maps") == 0) {
int j;
maps_shndx = i; maps_shndx = i;
data_maps = data; data_maps = data;
for (j = 0; j < MAX_MAPS; j++)
map_data[j].fd = -1;
} else if (shdr.sh_type == SHT_SYMTAB) { } else if (shdr.sh_type == SHT_SYMTAB) {
strtabidx = shdr.sh_link; strtabidx = shdr.sh_link;
symbols = data; symbols = data;
...@@ -405,27 +501,17 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map) ...@@ -405,27 +501,17 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map)
} }
if (data_maps) { if (data_maps) {
int nr_maps; nr_maps = load_elf_maps_section(map_data, maps_shndx,
int prog_elf_map_sz; elf, symbols, strtabidx);
if (nr_maps < 0) {
nr_maps = get_sorted_map_names(elf, symbols, maps_shndx, printf("Error: Failed loading ELF maps (errno:%d):%s\n",
strtabidx, map_names); nr_maps, strerror(-nr_maps));
if (nr_maps < 0)
goto done;
/* Deduce map struct size stored in ELF maps section */
prog_elf_map_sz = data_maps->d_size / nr_maps;
if (prog_elf_map_sz != sizeof(struct bpf_map_def)) {
printf("Error: ELF maps sec wrong size (%d/%lu),"
" old kern.o file?\n",
prog_elf_map_sz, sizeof(struct bpf_map_def));
ret = 1; ret = 1;
goto done; goto done;
} }
if (load_maps(map_data, nr_maps, fixup_map))
if (load_maps(data_maps->d_buf, nr_maps,
(const char **)map_names, fixup_map))
goto done; goto done;
map_data_count = nr_maps;
processed_sec[maps_shndx] = true; processed_sec[maps_shndx] = true;
} }
...@@ -453,7 +539,8 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map) ...@@ -453,7 +539,8 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map)
processed_sec[shdr.sh_info] = true; processed_sec[shdr.sh_info] = true;
processed_sec[i] = true; processed_sec[i] = true;
if (parse_relo_and_apply(data, symbols, &shdr, insns)) if (parse_relo_and_apply(data, symbols, &shdr, insns,
map_data, nr_maps))
continue; continue;
if (memcmp(shname_prog, "kprobe/", 7) == 0 || if (memcmp(shname_prog, "kprobe/", 7) == 0 ||
...@@ -488,8 +575,6 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map) ...@@ -488,8 +575,6 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map)
ret = 0; ret = 0;
done: done:
for (i = 0; i < MAX_MAPS; i++)
free(map_names[i]);
close(fd); close(fd);
return ret; return ret;
} }
......
...@@ -15,15 +15,27 @@ struct bpf_map_def { ...@@ -15,15 +15,27 @@ struct bpf_map_def {
unsigned int inner_map_idx; unsigned int inner_map_idx;
}; };
typedef void (*fixup_map_cb)(struct bpf_map_def *map, const char *map_name, struct bpf_map_data {
int idx); int fd;
char *name;
size_t elf_offset;
struct bpf_map_def def;
};
typedef void (*fixup_map_cb)(struct bpf_map_data *map, int idx);
extern int map_fd[MAX_MAPS];
extern int prog_fd[MAX_PROGS]; extern int prog_fd[MAX_PROGS];
extern int event_fd[MAX_PROGS]; extern int event_fd[MAX_PROGS];
extern char bpf_log_buf[BPF_LOG_BUF_SIZE]; extern char bpf_log_buf[BPF_LOG_BUF_SIZE];
extern int prog_cnt; extern int prog_cnt;
/* There is a one-to-one mapping between map_fd[] and map_data[].
* The map_data[] just contains more rich info on the given map.
*/
extern int map_fd[MAX_MAPS];
extern struct bpf_map_data map_data[MAX_MAPS];
extern int map_data_count;
/* parses elf file compiled by llvm .c->.o /* parses elf file compiled by llvm .c->.o
* . parses 'maps' section and creates maps via BPF syscall * . parses 'maps' section and creates maps via BPF syscall
* . parses 'license' section and passes it to syscall * . parses 'license' section and passes it to syscall
......
...@@ -320,21 +320,21 @@ static void fill_lpm_trie(void) ...@@ -320,21 +320,21 @@ static void fill_lpm_trie(void)
assert(!r); assert(!r);
} }
static void fixup_map(struct bpf_map_def *map, const char *name, int idx) static void fixup_map(struct bpf_map_data *map, int idx)
{ {
int i; int i;
if (!strcmp("inner_lru_hash_map", name)) { if (!strcmp("inner_lru_hash_map", map->name)) {
inner_lru_hash_idx = idx; inner_lru_hash_idx = idx;
inner_lru_hash_size = map->max_entries; inner_lru_hash_size = map->def.max_entries;
} }
if (!strcmp("array_of_lru_hashs", name)) { if (!strcmp("array_of_lru_hashs", map->name)) {
if (inner_lru_hash_idx == -1) { if (inner_lru_hash_idx == -1) {
printf("inner_lru_hash_map must be defined before array_of_lru_hashs\n"); printf("inner_lru_hash_map must be defined before array_of_lru_hashs\n");
exit(1); exit(1);
} }
map->inner_map_idx = inner_lru_hash_idx; map->def.inner_map_idx = inner_lru_hash_idx;
array_of_lru_hashs_idx = idx; array_of_lru_hashs_idx = idx;
} }
...@@ -345,9 +345,9 @@ static void fixup_map(struct bpf_map_def *map, const char *name, int idx) ...@@ -345,9 +345,9 @@ static void fixup_map(struct bpf_map_def *map, const char *name, int idx)
/* Only change the max_entries for the enabled test(s) */ /* Only change the max_entries for the enabled test(s) */
for (i = 0; i < NR_TESTS; i++) { for (i = 0; i < NR_TESTS; i++) {
if (!strcmp(test_map_names[i], name) && if (!strcmp(test_map_names[i], map->name) &&
(check_test_flags(i))) { (check_test_flags(i))) {
map->max_entries = num_map_entries; map->def.max_entries = num_map_entries;
} }
} }
} }
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
#include <signal.h> #include <signal.h>
#include <linux/bpf.h> #include <linux/bpf.h>
#include <string.h> #include <string.h>
#include <sys/resource.h>
#include "libbpf.h" #include "libbpf.h"
#include "bpf_load.h" #include "bpf_load.h"
...@@ -112,6 +113,7 @@ static void int_exit(int sig) ...@@ -112,6 +113,7 @@ static void int_exit(int sig)
int main(int ac, char **argv) int main(int ac, char **argv)
{ {
struct rlimit r = {1024*1024, RLIM_INFINITY};
char filename[256]; char filename[256];
long key, next_key, value; long key, next_key, value;
FILE *f; FILE *f;
...@@ -119,6 +121,11 @@ int main(int ac, char **argv) ...@@ -119,6 +121,11 @@ int main(int ac, char **argv)
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
if (setrlimit(RLIMIT_MEMLOCK, &r)) {
perror("setrlimit(RLIMIT_MEMLOCK)");
return 1;
}
signal(SIGINT, int_exit); signal(SIGINT, int_exit);
/* start 'ping' in the background to have some kfree_skb events */ /* start 'ping' in the background to have some kfree_skb events */
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#include <stdbool.h> #include <stdbool.h>
#include <string.h> #include <string.h>
#include <linux/bpf.h> #include <linux/bpf.h>
#include <sys/resource.h>
#include "libbpf.h" #include "libbpf.h"
#include "bpf_load.h" #include "bpf_load.h"
...@@ -112,11 +113,17 @@ static void print_hist(int fd) ...@@ -112,11 +113,17 @@ static void print_hist(int fd)
int main(int ac, char **argv) int main(int ac, char **argv)
{ {
struct rlimit r = {1024*1024, RLIM_INFINITY};
char filename[256]; char filename[256];
int i; int i;
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
if (setrlimit(RLIMIT_MEMLOCK, &r)) {
perror("setrlimit(RLIMIT_MEMLOCK)");
return 1;
}
if (load_bpf_file(filename)) { if (load_bpf_file(filename)) {
printf("%s", bpf_log_buf); printf("%s", bpf_log_buf);
return 1; return 1;
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
#include <string.h> #include <string.h>
#include <time.h> #include <time.h>
#include <linux/bpf.h> #include <linux/bpf.h>
#include <sys/resource.h>
#include "libbpf.h" #include "libbpf.h"
#include "bpf_load.h" #include "bpf_load.h"
...@@ -50,11 +52,17 @@ static void print_old_objects(int fd) ...@@ -50,11 +52,17 @@ static void print_old_objects(int fd)
int main(int ac, char **argv) int main(int ac, char **argv)
{ {
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
char filename[256]; char filename[256];
int i; int i;
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
if (setrlimit(RLIMIT_MEMLOCK, &r)) {
perror("setrlimit(RLIMIT_MEMLOCK, RLIM_INFINITY)");
return 1;
}
if (load_bpf_file(filename)) { if (load_bpf_file(filename)) {
printf("%s", bpf_log_buf); printf("%s", bpf_log_buf);
return 1; return 1;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment