Commit c7d5ec26 authored by David S. Miller's avatar David S. Miller

Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf

Daniel Borkmann says:

====================
pull-request: bpf 2019-05-16

The following pull-request contains BPF updates for your *net* tree.

The main changes are:

1) Fix a use after free in __dev_map_entry_free(), from Eric.

2) Several sockmap related bug fixes: a splat in strparser if
   it was never initialized, remove duplicate ingress msg list
   purging which can race, fix msg->sg.size accounting upon
   skb to msg conversion, and last but not least fix a timeout
   bug in tcp_bpf_wait_data(), from John.

3) Fix LRU map to avoid messing with eviction heuristics upon
   syscall lookup, e.g. map walks from user space side will
   then lead to eviction of just recently created entries on
   updates as it would mark all map entries, from Daniel.

4) Don't bail out when libbpf feature probing fails. Also
   various smaller fixes to flow_dissector test, from Stanislav.

5) Fix missing brackets for BTF_INT_OFFSET() in UAPI, from Gary.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 858f5017 5fa2ca7c
...@@ -131,7 +131,7 @@ The following sections detail encoding of each kind. ...@@ -131,7 +131,7 @@ The following sections detail encoding of each kind.
``btf_type`` is followed by a ``u32`` with the following bits arrangement:: ``btf_type`` is followed by a ``u32`` with the following bits arrangement::
#define BTF_INT_ENCODING(VAL) (((VAL) & 0x0f000000) >> 24) #define BTF_INT_ENCODING(VAL) (((VAL) & 0x0f000000) >> 24)
#define BTF_INT_OFFSET(VAL) (((VAL & 0x00ff0000)) >> 16) #define BTF_INT_OFFSET(VAL) (((VAL) & 0x00ff0000) >> 16)
#define BTF_INT_BITS(VAL) ((VAL) & 0x000000ff) #define BTF_INT_BITS(VAL) ((VAL) & 0x000000ff)
The ``BTF_INT_ENCODING`` has the following attributes:: The ``BTF_INT_ENCODING`` has the following attributes::
......
...@@ -36,6 +36,7 @@ struct bpf_map_ops { ...@@ -36,6 +36,7 @@ struct bpf_map_ops {
void (*map_free)(struct bpf_map *map); void (*map_free)(struct bpf_map *map);
int (*map_get_next_key)(struct bpf_map *map, void *key, void *next_key); int (*map_get_next_key)(struct bpf_map *map, void *key, void *next_key);
void (*map_release_uref)(struct bpf_map *map); void (*map_release_uref)(struct bpf_map *map);
void *(*map_lookup_elem_sys_only)(struct bpf_map *map, void *key);
/* funcs callable from userspace and from eBPF programs */ /* funcs callable from userspace and from eBPF programs */
void *(*map_lookup_elem)(struct bpf_map *map, void *key); void *(*map_lookup_elem)(struct bpf_map *map, void *key);
......
...@@ -83,7 +83,7 @@ struct btf_type { ...@@ -83,7 +83,7 @@ struct btf_type {
* is the 32 bits arrangement: * is the 32 bits arrangement:
*/ */
#define BTF_INT_ENCODING(VAL) (((VAL) & 0x0f000000) >> 24) #define BTF_INT_ENCODING(VAL) (((VAL) & 0x0f000000) >> 24)
#define BTF_INT_OFFSET(VAL) (((VAL & 0x00ff0000)) >> 16) #define BTF_INT_OFFSET(VAL) (((VAL) & 0x00ff0000) >> 16)
#define BTF_INT_BITS(VAL) ((VAL) & 0x000000ff) #define BTF_INT_BITS(VAL) ((VAL) & 0x000000ff)
/* Attributes stored in the BTF_INT_ENCODING */ /* Attributes stored in the BTF_INT_ENCODING */
......
...@@ -164,6 +164,9 @@ static void dev_map_free(struct bpf_map *map) ...@@ -164,6 +164,9 @@ static void dev_map_free(struct bpf_map *map)
bpf_clear_redirect_map(map); bpf_clear_redirect_map(map);
synchronize_rcu(); synchronize_rcu();
/* Make sure prior __dev_map_entry_free() have completed. */
rcu_barrier();
/* To ensure all pending flush operations have completed wait for flush /* To ensure all pending flush operations have completed wait for flush
* bitmap to indicate all flush_needed bits to be zero on _all_ cpus. * bitmap to indicate all flush_needed bits to be zero on _all_ cpus.
* Because the above synchronize_rcu() ensures the map is disconnected * Because the above synchronize_rcu() ensures the map is disconnected
......
...@@ -527,18 +527,30 @@ static u32 htab_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) ...@@ -527,18 +527,30 @@ static u32 htab_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
return insn - insn_buf; return insn - insn_buf;
} }
static void *htab_lru_map_lookup_elem(struct bpf_map *map, void *key) static __always_inline void *__htab_lru_map_lookup_elem(struct bpf_map *map,
void *key, const bool mark)
{ {
struct htab_elem *l = __htab_map_lookup_elem(map, key); struct htab_elem *l = __htab_map_lookup_elem(map, key);
if (l) { if (l) {
bpf_lru_node_set_ref(&l->lru_node); if (mark)
bpf_lru_node_set_ref(&l->lru_node);
return l->key + round_up(map->key_size, 8); return l->key + round_up(map->key_size, 8);
} }
return NULL; return NULL;
} }
static void *htab_lru_map_lookup_elem(struct bpf_map *map, void *key)
{
return __htab_lru_map_lookup_elem(map, key, true);
}
static void *htab_lru_map_lookup_elem_sys(struct bpf_map *map, void *key)
{
return __htab_lru_map_lookup_elem(map, key, false);
}
static u32 htab_lru_map_gen_lookup(struct bpf_map *map, static u32 htab_lru_map_gen_lookup(struct bpf_map *map,
struct bpf_insn *insn_buf) struct bpf_insn *insn_buf)
{ {
...@@ -1250,6 +1262,7 @@ const struct bpf_map_ops htab_lru_map_ops = { ...@@ -1250,6 +1262,7 @@ const struct bpf_map_ops htab_lru_map_ops = {
.map_free = htab_map_free, .map_free = htab_map_free,
.map_get_next_key = htab_map_get_next_key, .map_get_next_key = htab_map_get_next_key,
.map_lookup_elem = htab_lru_map_lookup_elem, .map_lookup_elem = htab_lru_map_lookup_elem,
.map_lookup_elem_sys_only = htab_lru_map_lookup_elem_sys,
.map_update_elem = htab_lru_map_update_elem, .map_update_elem = htab_lru_map_update_elem,
.map_delete_elem = htab_lru_map_delete_elem, .map_delete_elem = htab_lru_map_delete_elem,
.map_gen_lookup = htab_lru_map_gen_lookup, .map_gen_lookup = htab_lru_map_gen_lookup,
...@@ -1281,7 +1294,6 @@ static void *htab_lru_percpu_map_lookup_elem(struct bpf_map *map, void *key) ...@@ -1281,7 +1294,6 @@ static void *htab_lru_percpu_map_lookup_elem(struct bpf_map *map, void *key)
int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value) int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value)
{ {
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
struct htab_elem *l; struct htab_elem *l;
void __percpu *pptr; void __percpu *pptr;
int ret = -ENOENT; int ret = -ENOENT;
...@@ -1297,8 +1309,9 @@ int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value) ...@@ -1297,8 +1309,9 @@ int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value)
l = __htab_map_lookup_elem(map, key); l = __htab_map_lookup_elem(map, key);
if (!l) if (!l)
goto out; goto out;
if (htab_is_lru(htab)) /* We do not mark LRU map element here in order to not mess up
bpf_lru_node_set_ref(&l->lru_node); * eviction heuristics when user space does a map walk.
*/
pptr = htab_elem_get_ptr(l, map->key_size); pptr = htab_elem_get_ptr(l, map->key_size);
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
bpf_long_memcpy(value + off, bpf_long_memcpy(value + off,
......
...@@ -808,7 +808,10 @@ static int map_lookup_elem(union bpf_attr *attr) ...@@ -808,7 +808,10 @@ static int map_lookup_elem(union bpf_attr *attr)
err = map->ops->map_peek_elem(map, value); err = map->ops->map_peek_elem(map, value);
} else { } else {
rcu_read_lock(); rcu_read_lock();
ptr = map->ops->map_lookup_elem(map, key); if (map->ops->map_lookup_elem_sys_only)
ptr = map->ops->map_lookup_elem_sys_only(map, key);
else
ptr = map->ops->map_lookup_elem(map, key);
if (IS_ERR(ptr)) { if (IS_ERR(ptr)) {
err = PTR_ERR(ptr); err = PTR_ERR(ptr);
} else if (!ptr) { } else if (!ptr) {
......
...@@ -1297,7 +1297,8 @@ int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, ...@@ -1297,7 +1297,8 @@ int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
} }
#ifdef CONFIG_MODULES #ifdef CONFIG_MODULES
int bpf_event_notify(struct notifier_block *nb, unsigned long op, void *module) static int bpf_event_notify(struct notifier_block *nb, unsigned long op,
void *module)
{ {
struct bpf_trace_module *btm, *tmp; struct bpf_trace_module *btm, *tmp;
struct module *mod = module; struct module *mod = module;
...@@ -1336,7 +1337,7 @@ static struct notifier_block bpf_module_nb = { ...@@ -1336,7 +1337,7 @@ static struct notifier_block bpf_module_nb = {
.notifier_call = bpf_event_notify, .notifier_call = bpf_event_notify,
}; };
int __init bpf_event_init(void) static int __init bpf_event_init(void)
{ {
register_module_notifier(&bpf_module_nb); register_module_notifier(&bpf_module_nb);
return 0; return 0;
......
...@@ -411,6 +411,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb) ...@@ -411,6 +411,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
sk_mem_charge(sk, skb->len); sk_mem_charge(sk, skb->len);
copied = skb->len; copied = skb->len;
msg->sg.start = 0; msg->sg.start = 0;
msg->sg.size = copied;
msg->sg.end = num_sge == MAX_MSG_FRAGS ? 0 : num_sge; msg->sg.end = num_sge == MAX_MSG_FRAGS ? 0 : num_sge;
msg->skb = skb; msg->skb = skb;
...@@ -554,8 +555,10 @@ static void sk_psock_destroy_deferred(struct work_struct *gc) ...@@ -554,8 +555,10 @@ static void sk_psock_destroy_deferred(struct work_struct *gc)
struct sk_psock *psock = container_of(gc, struct sk_psock, gc); struct sk_psock *psock = container_of(gc, struct sk_psock, gc);
/* No sk_callback_lock since already detached. */ /* No sk_callback_lock since already detached. */
strp_stop(&psock->parser.strp);
strp_done(&psock->parser.strp); /* Parser has been stopped */
if (psock->progs.skb_parser)
strp_done(&psock->parser.strp);
cancel_work_sync(&psock->work); cancel_work_sync(&psock->work);
......
...@@ -27,7 +27,10 @@ static int tcp_bpf_wait_data(struct sock *sk, struct sk_psock *psock, ...@@ -27,7 +27,10 @@ static int tcp_bpf_wait_data(struct sock *sk, struct sk_psock *psock,
int flags, long timeo, int *err) int flags, long timeo, int *err)
{ {
DEFINE_WAIT_FUNC(wait, woken_wake_function); DEFINE_WAIT_FUNC(wait, woken_wake_function);
int ret; int ret = 0;
if (!timeo)
return ret;
add_wait_queue(sk_sleep(sk), &wait); add_wait_queue(sk_sleep(sk), &wait);
sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
...@@ -528,8 +531,6 @@ static void tcp_bpf_remove(struct sock *sk, struct sk_psock *psock) ...@@ -528,8 +531,6 @@ static void tcp_bpf_remove(struct sock *sk, struct sk_psock *psock)
{ {
struct sk_psock_link *link; struct sk_psock_link *link;
sk_psock_cork_free(psock);
__sk_psock_purge_ingress_msg(psock);
while ((link = sk_psock_link_pop(psock))) { while ((link = sk_psock_link_pop(psock))) {
sk_psock_unlink(sk, link); sk_psock_unlink(sk, link);
sk_psock_free_link(link); sk_psock_free_link(link);
......
...@@ -83,7 +83,7 @@ struct btf_type { ...@@ -83,7 +83,7 @@ struct btf_type {
* is the 32 bits arrangement: * is the 32 bits arrangement:
*/ */
#define BTF_INT_ENCODING(VAL) (((VAL) & 0x0f000000) >> 24) #define BTF_INT_ENCODING(VAL) (((VAL) & 0x0f000000) >> 24)
#define BTF_INT_OFFSET(VAL) (((VAL & 0x00ff0000)) >> 16) #define BTF_INT_OFFSET(VAL) (((VAL) & 0x00ff0000) >> 16)
#define BTF_INT_BITS(VAL) ((VAL) & 0x000000ff) #define BTF_INT_BITS(VAL) ((VAL) & 0x000000ff)
/* Attributes stored in the BTF_INT_ENCODING */ /* Attributes stored in the BTF_INT_ENCODING */
......
...@@ -1696,7 +1696,7 @@ bpf_object__probe_caps(struct bpf_object *obj) ...@@ -1696,7 +1696,7 @@ bpf_object__probe_caps(struct bpf_object *obj)
for (i = 0; i < ARRAY_SIZE(probe_fn); i++) { for (i = 0; i < ARRAY_SIZE(probe_fn); i++) {
ret = probe_fn[i](obj); ret = probe_fn[i](obj);
if (ret < 0) if (ret < 0)
return ret; pr_debug("Probe #%d failed with %d.\n", i, ret);
} }
return 0; return 0;
......
...@@ -242,12 +242,12 @@ void test_flow_dissector(void) ...@@ -242,12 +242,12 @@ void test_flow_dissector(void)
*/ */
err = bpf_prog_attach(prog_fd, 0, BPF_FLOW_DISSECTOR, 0); err = bpf_prog_attach(prog_fd, 0, BPF_FLOW_DISSECTOR, 0);
CHECK(err, "bpf_prog_attach", "err %d errno %d", err, errno); CHECK(err, "bpf_prog_attach", "err %d errno %d\n", err, errno);
tap_fd = create_tap("tap0"); tap_fd = create_tap("tap0");
CHECK(tap_fd < 0, "create_tap", "tap_fd %d errno %d", tap_fd, errno); CHECK(tap_fd < 0, "create_tap", "tap_fd %d errno %d\n", tap_fd, errno);
err = ifup("tap0"); err = ifup("tap0");
CHECK(err, "ifup", "err %d errno %d", err, errno); CHECK(err, "ifup", "err %d errno %d\n", err, errno);
for (i = 0; i < ARRAY_SIZE(tests); i++) { for (i = 0; i < ARRAY_SIZE(tests); i++) {
struct bpf_flow_keys flow_keys = {}; struct bpf_flow_keys flow_keys = {};
...@@ -255,7 +255,7 @@ void test_flow_dissector(void) ...@@ -255,7 +255,7 @@ void test_flow_dissector(void)
__u32 key = 0; __u32 key = 0;
err = tx_tap(tap_fd, &tests[i].pkt, sizeof(tests[i].pkt)); err = tx_tap(tap_fd, &tests[i].pkt, sizeof(tests[i].pkt));
CHECK(err < 0, "tx_tap", "err %d errno %d", err, errno); CHECK(err < 0, "tx_tap", "err %d errno %d\n", err, errno);
err = bpf_map_lookup_elem(keys_fd, &key, &flow_keys); err = bpf_map_lookup_elem(keys_fd, &key, &flow_keys);
CHECK_ATTR(err, tests[i].name, "bpf_map_lookup_elem %d\n", err); CHECK_ATTR(err, tests[i].name, "bpf_map_lookup_elem %d\n", err);
...@@ -264,5 +264,6 @@ void test_flow_dissector(void) ...@@ -264,5 +264,6 @@ void test_flow_dissector(void)
CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys); CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys);
} }
bpf_prog_detach(prog_fd, BPF_FLOW_DISSECTOR);
bpf_object__close(obj); bpf_object__close(obj);
} }
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment