Commit 7cb164ef authored by David S. Miller's avatar David S. Miller

Merge branch 'net-SO_COOKIE'

Chenbo Feng says:

====================
New getsockopt option to retrieve socket cookie

In the current kernel socket cookie implementation, there is no simple
and direct way to retrieve the socket cookie based on file descriptor. A
process mat need to get it from sock fd if it want to correlate with
sock_diag output or use a bpf map with new socket cookie function.

If userspace wants to receive the socket cookie for a given socket fd,
it must send a SOCK_DIAG_BY_FAMILY dump request and look for the 5-tuple.
This is slow and can be ambiguous in the case of sockets that have the
same 5-tuple (e.g., tproxy / transparent sockets, SO_REUSEPORT sockets,
etc.).

As shown in the example program. The xt_eBPF program is using socket cookie
to record the network traffics statistics and with the socket cookie
retrieved by getsockopt. The program can directly access to a specific
socket data without scanning the whole bpf map.
====================
Acked-by: default avatarAlexei Starovoitov <ast@kernel.org>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents c42cb98c 00f660ea
...@@ -103,4 +103,6 @@ ...@@ -103,4 +103,6 @@
#define SO_INCOMING_NAPI_ID 56 #define SO_INCOMING_NAPI_ID 56
#define SO_COOKIE 57
#endif /* _UAPI_ASM_SOCKET_H */ #endif /* _UAPI_ASM_SOCKET_H */
...@@ -96,4 +96,6 @@ ...@@ -96,4 +96,6 @@
#define SO_INCOMING_NAPI_ID 56 #define SO_INCOMING_NAPI_ID 56
#define SO_COOKIE 57
#endif /* _UAPI__ASM_AVR32_SOCKET_H */ #endif /* _UAPI__ASM_AVR32_SOCKET_H */
...@@ -96,5 +96,7 @@ ...@@ -96,5 +96,7 @@
#define SO_INCOMING_NAPI_ID 56 #define SO_INCOMING_NAPI_ID 56
#define SO_COOKIE 57
#endif /* _ASM_SOCKET_H */ #endif /* _ASM_SOCKET_H */
...@@ -105,4 +105,6 @@ ...@@ -105,4 +105,6 @@
#define SO_INCOMING_NAPI_ID 56 #define SO_INCOMING_NAPI_ID 56
#define SO_COOKIE 57
#endif /* _ASM_IA64_SOCKET_H */ #endif /* _ASM_IA64_SOCKET_H */
...@@ -96,4 +96,6 @@ ...@@ -96,4 +96,6 @@
#define SO_INCOMING_NAPI_ID 56 #define SO_INCOMING_NAPI_ID 56
#define SO_COOKIE 57
#endif /* _ASM_M32R_SOCKET_H */ #endif /* _ASM_M32R_SOCKET_H */
...@@ -114,4 +114,6 @@ ...@@ -114,4 +114,6 @@
#define SO_INCOMING_NAPI_ID 56 #define SO_INCOMING_NAPI_ID 56
#define SO_COOKIE 57
#endif /* _UAPI_ASM_SOCKET_H */ #endif /* _UAPI_ASM_SOCKET_H */
...@@ -96,4 +96,6 @@ ...@@ -96,4 +96,6 @@
#define SO_INCOMING_NAPI_ID 56 #define SO_INCOMING_NAPI_ID 56
#define SO_COOKIE 57
#endif /* _ASM_SOCKET_H */ #endif /* _ASM_SOCKET_H */
...@@ -95,4 +95,6 @@ ...@@ -95,4 +95,6 @@
#define SO_INCOMING_NAPI_ID 0x4031 #define SO_INCOMING_NAPI_ID 0x4031
#define SO_COOKIE 0x4032
#endif /* _UAPI_ASM_SOCKET_H */ #endif /* _UAPI_ASM_SOCKET_H */
...@@ -103,4 +103,6 @@ ...@@ -103,4 +103,6 @@
#define SO_INCOMING_NAPI_ID 56 #define SO_INCOMING_NAPI_ID 56
#define SO_COOKIE 57
#endif /* _ASM_POWERPC_SOCKET_H */ #endif /* _ASM_POWERPC_SOCKET_H */
...@@ -102,4 +102,6 @@ ...@@ -102,4 +102,6 @@
#define SO_INCOMING_NAPI_ID 56 #define SO_INCOMING_NAPI_ID 56
#define SO_COOKIE 57
#endif /* _ASM_SOCKET_H */ #endif /* _ASM_SOCKET_H */
...@@ -92,6 +92,8 @@ ...@@ -92,6 +92,8 @@
#define SO_INCOMING_NAPI_ID 0x003a #define SO_INCOMING_NAPI_ID 0x003a
#define SO_COOKIE 0x003b
/* Security levels - as per NRL IPv6 - don't actually do anything */ /* Security levels - as per NRL IPv6 - don't actually do anything */
#define SO_SECURITY_AUTHENTICATION 0x5001 #define SO_SECURITY_AUTHENTICATION 0x5001
#define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002 #define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002
......
...@@ -107,4 +107,6 @@ ...@@ -107,4 +107,6 @@
#define SO_INCOMING_NAPI_ID 56 #define SO_INCOMING_NAPI_ID 56
#define SO_COOKIE 57
#endif /* _XTENSA_SOCKET_H */ #endif /* _XTENSA_SOCKET_H */
...@@ -98,4 +98,6 @@ ...@@ -98,4 +98,6 @@
#define SO_INCOMING_NAPI_ID 56 #define SO_INCOMING_NAPI_ID 56
#define SO_COOKIE 57
#endif /* __ASM_GENERIC_SOCKET_H */ #endif /* __ASM_GENERIC_SOCKET_H */
...@@ -1083,6 +1083,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, ...@@ -1083,6 +1083,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
union { union {
int val; int val;
u64 val64;
struct linger ling; struct linger ling;
struct timeval tm; struct timeval tm;
} v; } v;
...@@ -1340,6 +1341,13 @@ int sock_getsockopt(struct socket *sock, int level, int optname, ...@@ -1340,6 +1341,13 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
break; break;
#endif #endif
case SO_COOKIE:
lv = sizeof(u64);
if (len < lv)
return -EINVAL;
v.val64 = sock_gen_cookie(sk);
break;
default: default:
/* We implement the SO_SNDLOWAT etc to not be settable /* We implement the SO_SNDLOWAT etc to not be settable
* (1003.1g 7). * (1003.1g 7).
......
...@@ -4,10 +4,11 @@ ...@@ -4,10 +4,11 @@
* program into the xt_bpf match. * program into the xt_bpf match.
* *
* TEST: * TEST:
* ./run_cookie_uid_helper_example.sh * ./run_cookie_uid_helper_example.sh -option
* Then generate some traffic in variate ways. ping 0 -c 10 would work * option:
* but the cookie and uid in this case could both be 0. A sample output * -t: do traffic monitoring test, the program will continuously
* with some traffic generated by web browser is shown below: * print out network traffic happens after program started A sample
* output is shown below:
* *
* cookie: 877, uid: 0x3e8, Pakcet Count: 20, Bytes Count: 11058 * cookie: 877, uid: 0x3e8, Pakcet Count: 20, Bytes Count: 11058
* cookie: 132, uid: 0x0, Pakcet Count: 2, Bytes Count: 286 * cookie: 132, uid: 0x0, Pakcet Count: 2, Bytes Count: 286
...@@ -18,6 +19,10 @@ ...@@ -18,6 +19,10 @@
* cookie: 0, uid: 0x0, Pakcet Count: 6, Bytes Count: 712 * cookie: 0, uid: 0x0, Pakcet Count: 6, Bytes Count: 712
* cookie: 880, uid: 0xfffe, Pakcet Count: 1, Bytes Count: 70 * cookie: 880, uid: 0xfffe, Pakcet Count: 1, Bytes Count: 70
* *
* -s: do getsockopt SO_COOKIE test, the program will set up a pair of
* UDP sockets and send packets between them. And read out the traffic data
* directly from the ebpf map based on the socket cookie.
*
* Clean up: if using shell script, the script file will delete the iptables * Clean up: if using shell script, the script file will delete the iptables
* rule and unmount the bpf program when exit. Else the iptables rule need * rule and unmount the bpf program when exit. Else the iptables rule need
* to be deleted by hand, see run_cookie_uid_helper_example.sh for detail. * to be deleted by hand, see run_cookie_uid_helper_example.sh for detail.
...@@ -34,6 +39,8 @@ ...@@ -34,6 +39,8 @@
#include <limits.h> #include <limits.h>
#include <linux/bpf.h> #include <linux/bpf.h>
#include <linux/if_ether.h> #include <linux/if_ether.h>
#include <net/if.h>
#include <signal.h>
#include <stdbool.h> #include <stdbool.h>
#include <stdint.h> #include <stdint.h>
#include <stdio.h> #include <stdio.h>
...@@ -46,6 +53,8 @@ ...@@ -46,6 +53,8 @@
#include <bpf/bpf.h> #include <bpf/bpf.h>
#include "libbpf.h" #include "libbpf.h"
#define PORT 8888
struct stats { struct stats {
uint32_t uid; uint32_t uid;
uint64_t packets; uint64_t packets;
...@@ -54,6 +63,8 @@ struct stats { ...@@ -54,6 +63,8 @@ struct stats {
static int map_fd, prog_fd; static int map_fd, prog_fd;
static bool test_finish;
static void maps_create(void) static void maps_create(void)
{ {
map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(uint32_t), map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(uint32_t),
...@@ -164,7 +175,7 @@ static void prog_attach_iptables(char *file) ...@@ -164,7 +175,7 @@ static void prog_attach_iptables(char *file)
printf("file path too long: %s\n", file); printf("file path too long: %s\n", file);
exit(1); exit(1);
} }
sprintf(rules, "iptables -A INPUT -m bpf --object-pinned %s -j ACCEPT", sprintf(rules, "iptables -A OUTPUT -m bpf --object-pinned %s -j ACCEPT",
file); file);
ret = system(rules); ret = system(rules);
if (ret < 0) { if (ret < 0) {
...@@ -177,7 +188,8 @@ static void print_table(void) ...@@ -177,7 +188,8 @@ static void print_table(void)
{ {
struct stats curEntry; struct stats curEntry;
uint32_t curN = UINT32_MAX; uint32_t curN = UINT32_MAX;
uint32_t nextN, res; uint32_t nextN;
int res;
while (bpf_map_get_next_key(map_fd, &curN, &nextN) > -1) { while (bpf_map_get_next_key(map_fd, &curN, &nextN) > -1) {
curN = nextN; curN = nextN;
...@@ -193,25 +205,117 @@ static void print_table(void) ...@@ -193,25 +205,117 @@ static void print_table(void)
} }
} }
int main(int argc, char *argv[]) static void udp_client(void)
{ {
if (argc > 2) { struct sockaddr_in si_other = {0};
printf("Too many argument provided\n"); struct sockaddr_in si_me = {0};
return 1; struct stats dataEntry;
} else if (argc < 2) { int s_rcv, s_send, i, recv_len;
printf("Usage: %s bpfObjName\n", argv[0]); char message = 'a';
return 1; char buf;
uint64_t cookie;
int res;
socklen_t cookie_len = sizeof(cookie);
socklen_t slen = sizeof(si_other);
s_rcv = socket(PF_INET, SOCK_DGRAM, 0);
if (s_rcv < 0)
error(1, errno, "rcv socket creat failed!\n");
si_other.sin_family = AF_INET;
si_other.sin_port = htons(PORT);
if (inet_aton("127.0.0.1", &si_other.sin_addr) == 0)
error(1, errno, "inet_aton\n");
if (bind(s_rcv, (struct sockaddr *)&si_other, sizeof(si_other)) == -1)
error(1, errno, "bind\n");
s_send = socket(PF_INET, SOCK_DGRAM, 0);
if (s_send < 0)
error(1, errno, "send socket creat failed!\n");
res = getsockopt(s_send, SOL_SOCKET, SO_COOKIE, &cookie, &cookie_len);
if (res < 0)
printf("get cookie failed: %s\n", strerror(errno));
res = bpf_map_lookup_elem(map_fd, &cookie, &dataEntry);
if (res != -1)
error(1, errno, "socket stat found while flow not active\n");
for (i = 0; i < 10; i++) {
res = sendto(s_send, &message, sizeof(message), 0,
(struct sockaddr *)&si_other, slen);
if (res == -1)
error(1, errno, "send\n");
if (res != sizeof(message))
error(1, 0, "%uB != %luB\n", res, sizeof(message));
recv_len = recvfrom(s_rcv, &buf, sizeof(buf), 0,
(struct sockaddr *)&si_me, &slen);
if (recv_len < 0)
error(1, errno, "revieve\n");
res = memcmp(&(si_other.sin_addr), &(si_me.sin_addr),
sizeof(si_me.sin_addr));
if (res != 0)
error(1, EFAULT, "sender addr error: %d\n", res);
printf("Message received: %c\n", buf);
res = bpf_map_lookup_elem(map_fd, &cookie, &dataEntry);
if (res < 0)
error(1, errno, "lookup sk stat failed, cookie: %lu\n",
cookie);
printf("cookie: %lu, uid: 0x%x, Packet Count: %lu,"
" Bytes Count: %lu\n\n", cookie, dataEntry.uid,
dataEntry.packets, dataEntry.bytes);
} }
close(s_send);
close(s_rcv);
}
static int usage(void)
{
printf("Usage: ./run_cookie_uid_helper_example.sh"
" bpfObjName -option\n"
" -t traffic monitor test\n"
" -s getsockopt cookie test\n");
return 1;
}
void finish(int ret)
{
test_finish = true;
}
int main(int argc, char *argv[])
{
int opt;
bool cfg_test_traffic = false;
bool cfg_test_cookie = false;
if (argc != 3)
return usage();
while ((opt = getopt(argc, argv, "ts")) != -1) {
switch (opt) {
case 't':
cfg_test_traffic = true;
break;
case 's':
cfg_test_cookie = true;
break;
default:
printf("unknown option %c\n", opt);
usage();
return -1;
}
}
maps_create(); maps_create();
prog_load(); prog_load();
prog_attach_iptables(argv[1]); prog_attach_iptables(argv[2]);
if (cfg_test_traffic) {
while (true) { if (signal(SIGINT, finish) == SIG_ERR)
error(1, errno, "register handler failed");
while (!test_finish) {
print_table(); print_table();
printf("\n"); printf("\n");
sleep(1); sleep(1);
}; };
} else if (cfg_test_cookie) {
udp_client();
}
close(prog_fd);
close(map_fd);
return 0; return 0;
} }
...@@ -4,11 +4,11 @@ root_dir=$local_dir/../.. ...@@ -4,11 +4,11 @@ root_dir=$local_dir/../..
mnt_dir=$(mktemp -d --tmp) mnt_dir=$(mktemp -d --tmp)
on_exit() { on_exit() {
iptables -D INPUT -m bpf --object-pinned ${mnt_dir}/bpf_prog -j ACCEPT iptables -D OUTPUT -m bpf --object-pinned ${mnt_dir}/bpf_prog -j ACCEPT
umount ${mnt_dir} umount ${mnt_dir}
rm -r ${mnt_dir} rm -r ${mnt_dir}
} }
trap on_exit EXIT trap on_exit EXIT
mount -t bpf bpf ${mnt_dir} mount -t bpf bpf ${mnt_dir}
./per_socket_stats_example ${mnt_dir}/bpf_prog ./per_socket_stats_example ${mnt_dir}/bpf_prog $1
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment