Commit 174a79ff authored by John Fastabend's avatar John Fastabend Committed by David S. Miller

bpf: sockmap with sk redirect support

Recently we added a new map type called dev map used to forward XDP
packets between ports (6093ec2d). This patches introduces a
similar notion for sockets.

A sockmap allows users to add participating sockets to a map. When
sockets are added to the map enough context is stored with the
map entry to use the entry with a new helper

  bpf_sk_redirect_map(map, key, flags)

This helper (analogous to bpf_redirect_map in XDP) is given the map
and an entry in the map. When called from a sockmap program, discussed
below, the skb will be sent on the socket using skb_send_sock().

With the above we need a bpf program to call the helper from that will
then implement the send logic. The initial site implemented in this
series is the recv_sock hook. For this to work we implemented a map
attach command to add attributes to a map. In sockmap we add two
programs a parse program and a verdict program. The parse program
uses strparser to build messages and pass them to the verdict program.
The parse programs use the normal strparser semantics. The verdict
program is of type SK_SKB.

The verdict program returns a verdict SK_DROP, or  SK_REDIRECT for
now. Additional actions may be added later. When SK_REDIRECT is
returned, expected when bpf program uses bpf_sk_redirect_map(), the
sockmap logic will consult per cpu variables set by the helper routine
and pull the sock entry out of the sock map. This pattern follows the
existing redirect logic in cls and xdp programs.

This gives the flow,

 recv_sock -> str_parser (parse_prog) -> verdict_prog -> skb_send_sock
                                                     \
                                                      -> kfree_skb

As an example use case a message based load balancer may use specific
logic in the verdict program to select the sock to send on.

Sample programs are provided in future patches that hopefully illustrate
the user interfaces. Also selftests are in follow-on patches.
Signed-off-by: default avatarJohn Fastabend <john.fastabend@gmail.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent a6f6df69
......@@ -16,6 +16,7 @@
#include <linux/rbtree_latch.h>
struct perf_event;
struct bpf_prog;
struct bpf_map;
/* map is generic key/value storage optionally accesible by eBPF programs */
......@@ -37,6 +38,8 @@ struct bpf_map_ops {
void (*map_fd_put_ptr)(void *ptr);
u32 (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf);
u32 (*map_fd_sys_lookup_elem)(void *ptr);
int (*map_attach)(struct bpf_map *map,
struct bpf_prog *p1, struct bpf_prog *p2);
};
struct bpf_map {
......@@ -138,8 +141,6 @@ enum bpf_reg_type {
PTR_TO_PACKET_END, /* skb->data + headlen */
};
struct bpf_prog;
/* The information passed from prog-specific *_is_valid_access
* back to the verifier.
*/
......@@ -312,6 +313,7 @@ int bpf_check(struct bpf_prog **fp, union bpf_attr *attr);
/* Map specifics */
struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key);
void __dev_map_insert_ctx(struct bpf_map *map, u32 index);
void __dev_map_flush(struct bpf_map *map);
......@@ -391,6 +393,7 @@ extern const struct bpf_func_proto bpf_get_current_comm_proto;
extern const struct bpf_func_proto bpf_skb_vlan_push_proto;
extern const struct bpf_func_proto bpf_skb_vlan_pop_proto;
extern const struct bpf_func_proto bpf_get_stackid_proto;
extern const struct bpf_func_proto bpf_sock_map_update_proto;
/* Shared helpers among cBPF and eBPF. */
void bpf_user_rnd_init_once(void);
......
......@@ -38,4 +38,5 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops)
#ifdef CONFIG_NET
BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops)
#endif
......@@ -727,6 +727,8 @@ void xdp_do_flush_map(void);
void bpf_warn_invalid_xdp_action(u32 act);
void bpf_warn_invalid_xdp_redirect(u32 ifindex);
struct sock *do_sk_redirect_map(void);
#ifdef CONFIG_BPF_JIT
extern int bpf_jit_enable;
extern int bpf_jit_harden;
......
......@@ -110,6 +110,7 @@ enum bpf_map_type {
BPF_MAP_TYPE_ARRAY_OF_MAPS,
BPF_MAP_TYPE_HASH_OF_MAPS,
BPF_MAP_TYPE_DEVMAP,
BPF_MAP_TYPE_SOCKMAP,
};
enum bpf_prog_type {
......@@ -135,11 +136,15 @@ enum bpf_attach_type {
BPF_CGROUP_INET_EGRESS,
BPF_CGROUP_INET_SOCK_CREATE,
BPF_CGROUP_SOCK_OPS,
BPF_CGROUP_SMAP_INGRESS,
__MAX_BPF_ATTACH_TYPE
};
#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
/* If BPF_SOCKMAP_STRPARSER is used sockmap will use strparser on receive */
#define BPF_SOCKMAP_STRPARSER (1U << 0)
/* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command
* to the given target_fd cgroup the descendent cgroup will be able to
* override effective bpf program that was inherited from this cgroup
......@@ -211,6 +216,7 @@ union bpf_attr {
__u32 attach_bpf_fd; /* eBPF program to attach */
__u32 attach_type;
__u32 attach_flags;
__u32 attach_bpf_fd2;
};
struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */
......@@ -557,6 +563,23 @@ union bpf_attr {
* @mode: operation mode (enum bpf_adj_room_mode)
* @flags: reserved for future use
* Return: 0 on success or negative error code
*
* int bpf_sk_redirect_map(map, key, flags)
* Redirect skb to a sock in map using key as a lookup key for the
* sock in map.
* @map: pointer to sockmap
* @key: key to lookup sock in map
* @flags: reserved for future use
* Return: SK_REDIRECT
*
* int bpf_sock_map_update(skops, map, key, flags, map_flags)
* @skops: pointer to bpf_sock_ops
* @map: pointer to sockmap to update
* @key: key to insert/update sock in map
* @flags: same flags as map update elem
* @map_flags: sock map specific flags
* bit 1: Enable strparser
* other bits: reserved
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
......@@ -610,7 +633,9 @@ union bpf_attr {
FN(set_hash), \
FN(setsockopt), \
FN(skb_adjust_room), \
FN(redirect_map),
FN(redirect_map), \
FN(sk_redirect_map), \
FN(sock_map_update), \
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
......@@ -747,6 +772,12 @@ struct xdp_md {
__u32 data_end;
};
enum sk_action {
SK_ABORTED = 0,
SK_DROP,
SK_REDIRECT,
};
#define BPF_TAG_SIZE 8
struct bpf_prog_info {
......
......@@ -3,7 +3,7 @@ obj-y := core.o
obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
ifeq ($(CONFIG_NET),y)
obj-$(CONFIG_BPF_SYSCALL) += devmap.o
obj-$(CONFIG_BPF_SYSCALL) += devmap.o sockmap.o
endif
ifeq ($(CONFIG_PERF_EVENTS),y)
obj-$(CONFIG_BPF_SYSCALL) += stackmap.o
......
This diff is collapsed.
......@@ -1087,7 +1087,50 @@ static int bpf_obj_get(const union bpf_attr *attr)
#ifdef CONFIG_CGROUP_BPF
#define BPF_PROG_ATTACH_LAST_FIELD attach_flags
#define BPF_PROG_ATTACH_LAST_FIELD attach_bpf_fd2
static int sockmap_get_from_fd(const union bpf_attr *attr, int ptype)
{
struct bpf_prog *prog1, *prog2;
int ufd = attr->target_fd;
struct bpf_map *map;
struct fd f;
int err;
f = fdget(ufd);
map = __bpf_map_get(f);
if (IS_ERR(map))
return PTR_ERR(map);
if (!map->ops->map_attach) {
fdput(f);
return -EOPNOTSUPP;
}
prog1 = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
if (IS_ERR(prog1)) {
fdput(f);
return PTR_ERR(prog1);
}
prog2 = bpf_prog_get_type(attr->attach_bpf_fd2, ptype);
if (IS_ERR(prog2)) {
fdput(f);
bpf_prog_put(prog1);
return PTR_ERR(prog2);
}
err = map->ops->map_attach(map, prog1, prog2);
if (err) {
fdput(f);
bpf_prog_put(prog1);
bpf_prog_put(prog2);
return PTR_ERR(map);
}
fdput(f);
return err;
}
static int bpf_prog_attach(const union bpf_attr *attr)
{
......@@ -1116,10 +1159,16 @@ static int bpf_prog_attach(const union bpf_attr *attr)
case BPF_CGROUP_SOCK_OPS:
ptype = BPF_PROG_TYPE_SOCK_OPS;
break;
case BPF_CGROUP_SMAP_INGRESS:
ptype = BPF_PROG_TYPE_SK_SKB;
break;
default:
return -EINVAL;
}
if (attr->attach_type == BPF_CGROUP_SMAP_INGRESS)
return sockmap_get_from_fd(attr, ptype);
prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
if (IS_ERR(prog))
return PTR_ERR(prog);
......
......@@ -1522,6 +1522,12 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
case BPF_MAP_TYPE_HASH_OF_MAPS:
if (func_id != BPF_FUNC_map_lookup_elem)
goto error;
case BPF_MAP_TYPE_SOCKMAP:
if (func_id != BPF_FUNC_sk_redirect_map &&
func_id != BPF_FUNC_sock_map_update &&
func_id != BPF_FUNC_map_delete_elem)
goto error;
break;
default:
break;
}
......@@ -1550,6 +1556,14 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
if (map->map_type != BPF_MAP_TYPE_DEVMAP)
goto error;
break;
case BPF_FUNC_sk_redirect_map:
if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
goto error;
break;
case BPF_FUNC_sock_map_update:
if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
goto error;
break;
default:
break;
}
......
......@@ -1858,6 +1858,45 @@ static const struct bpf_func_proto bpf_redirect_map_proto = {
.arg3_type = ARG_ANYTHING,
};
BPF_CALL_3(bpf_sk_redirect_map, struct bpf_map *, map, u32, key, u64, flags)
{
struct redirect_info *ri = this_cpu_ptr(&redirect_info);
if (unlikely(flags))
return SK_ABORTED;
ri->ifindex = key;
ri->flags = flags;
ri->map = map;
return SK_REDIRECT;
}
struct sock *do_sk_redirect_map(void)
{
struct redirect_info *ri = this_cpu_ptr(&redirect_info);
struct sock *sk = NULL;
if (ri->map) {
sk = __sock_map_lookup_elem(ri->map, ri->ifindex);
ri->ifindex = 0;
ri->map = NULL;
/* we do not clear flags for future lookup */
}
return sk;
}
static const struct bpf_func_proto bpf_sk_redirect_map_proto = {
.func = bpf_sk_redirect_map,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_CONST_MAP_PTR,
.arg2_type = ARG_ANYTHING,
.arg3_type = ARG_ANYTHING,
};
BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
{
return task_get_classid(skb);
......@@ -3229,6 +3268,8 @@ static const struct bpf_func_proto *
switch (func_id) {
case BPF_FUNC_setsockopt:
return &bpf_setsockopt_proto;
case BPF_FUNC_sock_map_update:
return &bpf_sock_map_update_proto;
default:
return bpf_base_func_proto(func_id);
}
......@@ -3243,6 +3284,8 @@ static const struct bpf_func_proto *sk_skb_func_proto(enum bpf_func_id func_id)
return &bpf_get_socket_cookie_proto;
case BPF_FUNC_get_socket_uid:
return &bpf_get_socket_uid_proto;
case BPF_FUNC_sk_redirect_map:
return &bpf_sk_redirect_map_proto;
default:
return bpf_base_func_proto(func_id);
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment