Commit c133acf3 authored by David S. Miller's avatar David S. Miller

Merge branch 'mptcp-socket-options'

Mat Martineau says:

====================
mptcp: Improve socket option handling

MPTCP sockets have previously had limited socket option support. The
architecture of MPTCP sockets (one userspace-facing MPTCP socket that
manages one or more in-kernel TCP subflow sockets) adds complexity for
passing options through to lower levels. This patch set adds MPTCP
support for socket options commonly used with TCP.

Patch 1 reverts an interim socket option fix (a socket option blocklist)
that was merged in the net tree for v5.12.

Patch 2 moves the socket option code to a separate file, with no
functional changes.

Patch 3 adds an allowlist for socket options that are known to function
with MPTCP. Later patches in this set add more allowed options.

Patches 4 and 5 add infrastructure for syncing MPTCP-level options with
the TCP subflows.

Patches 6-12 add support for specific socket options.

Patch 13 adds a socket option self test.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents a1150a04 dc65fe82
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
obj-$(CONFIG_MPTCP) += mptcp.o obj-$(CONFIG_MPTCP) += mptcp.o
mptcp-y := protocol.o subflow.o options.o token.o crypto.o ctrl.o pm.o diag.o \ mptcp-y := protocol.o subflow.o options.o token.o crypto.o ctrl.o pm.o diag.o \
mib.o pm_netlink.o mib.o pm_netlink.o sockopt.o
obj-$(CONFIG_SYN_COOKIES) += syncookies.o obj-$(CONFIG_SYN_COOKIES) += syncookies.o
obj-$(CONFIG_INET_MPTCP_DIAG) += mptcp_diag.o obj-$(CONFIG_INET_MPTCP_DIAG) += mptcp_diag.o
......
...@@ -90,16 +90,6 @@ static bool mptcp_is_tcpsk(struct sock *sk) ...@@ -90,16 +90,6 @@ static bool mptcp_is_tcpsk(struct sock *sk)
return false; return false;
} }
static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk)
{
sock_owned_by_me((const struct sock *)msk);
if (likely(!__mptcp_check_fallback(msk)))
return NULL;
return msk->first;
}
static int __mptcp_socket_create(struct mptcp_sock *msk) static int __mptcp_socket_create(struct mptcp_sock *msk)
{ {
struct mptcp_subflow_context *subflow; struct mptcp_subflow_context *subflow;
...@@ -740,18 +730,47 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk) ...@@ -740,18 +730,47 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk)
sk->sk_data_ready(sk); sk->sk_data_ready(sk);
} }
void __mptcp_flush_join_list(struct mptcp_sock *msk) static bool mptcp_do_flush_join_list(struct mptcp_sock *msk)
{ {
struct mptcp_subflow_context *subflow; struct mptcp_subflow_context *subflow;
bool ret = false;
if (likely(list_empty(&msk->join_list))) if (likely(list_empty(&msk->join_list)))
return; return false;
spin_lock_bh(&msk->join_list_lock); spin_lock_bh(&msk->join_list_lock);
list_for_each_entry(subflow, &msk->join_list, node) list_for_each_entry(subflow, &msk->join_list, node) {
u32 sseq = READ_ONCE(subflow->setsockopt_seq);
mptcp_propagate_sndbuf((struct sock *)msk, mptcp_subflow_tcp_sock(subflow)); mptcp_propagate_sndbuf((struct sock *)msk, mptcp_subflow_tcp_sock(subflow));
if (READ_ONCE(msk->setsockopt_seq) != sseq)
ret = true;
}
list_splice_tail_init(&msk->join_list, &msk->conn_list); list_splice_tail_init(&msk->join_list, &msk->conn_list);
spin_unlock_bh(&msk->join_list_lock); spin_unlock_bh(&msk->join_list_lock);
return ret;
}
void __mptcp_flush_join_list(struct mptcp_sock *msk)
{
if (likely(!mptcp_do_flush_join_list(msk)))
return;
if (!test_and_set_bit(MPTCP_WORK_SYNC_SETSOCKOPT, &msk->flags))
mptcp_schedule_work((struct sock *)msk);
}
static void mptcp_flush_join_list(struct mptcp_sock *msk)
{
bool sync_needed = test_and_clear_bit(MPTCP_WORK_SYNC_SETSOCKOPT, &msk->flags);
might_sleep();
if (!mptcp_do_flush_join_list(msk) && !sync_needed)
return;
mptcp_sockopt_sync_all(msk);
} }
static bool mptcp_timer_pending(struct sock *sk) static bool mptcp_timer_pending(struct sock *sk)
...@@ -1467,7 +1486,7 @@ static void __mptcp_push_pending(struct sock *sk, unsigned int flags) ...@@ -1467,7 +1486,7 @@ static void __mptcp_push_pending(struct sock *sk, unsigned int flags)
int ret = 0; int ret = 0;
prev_ssk = ssk; prev_ssk = ssk;
__mptcp_flush_join_list(msk); mptcp_flush_join_list(msk);
ssk = mptcp_subflow_get_send(msk); ssk = mptcp_subflow_get_send(msk);
/* try to keep the subflow socket lock across /* try to keep the subflow socket lock across
...@@ -1893,7 +1912,7 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk) ...@@ -1893,7 +1912,7 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk)
unsigned int moved = 0; unsigned int moved = 0;
bool ret, done; bool ret, done;
__mptcp_flush_join_list(msk); mptcp_flush_join_list(msk);
do { do {
struct sock *ssk = mptcp_subflow_recv_lookup(msk); struct sock *ssk = mptcp_subflow_recv_lookup(msk);
bool slowpath; bool slowpath;
...@@ -2317,7 +2336,7 @@ static void mptcp_worker(struct work_struct *work) ...@@ -2317,7 +2336,7 @@ static void mptcp_worker(struct work_struct *work)
goto unlock; goto unlock;
mptcp_check_data_fin_ack(sk); mptcp_check_data_fin_ack(sk);
__mptcp_flush_join_list(msk); mptcp_flush_join_list(msk);
mptcp_check_fastclose(msk); mptcp_check_fastclose(msk);
...@@ -2380,6 +2399,9 @@ static int __mptcp_init_sock(struct sock *sk) ...@@ -2380,6 +2399,9 @@ static int __mptcp_init_sock(struct sock *sk)
/* re-use the csk retrans timer for MPTCP-level retrans */ /* re-use the csk retrans timer for MPTCP-level retrans */
timer_setup(&msk->sk.icsk_retransmit_timer, mptcp_retransmit_timer, 0); timer_setup(&msk->sk.icsk_retransmit_timer, mptcp_retransmit_timer, 0);
timer_setup(&sk->sk_timer, mptcp_timeout_timer, 0); timer_setup(&sk->sk_timer, mptcp_timeout_timer, 0);
tcp_assign_congestion_control(sk);
return 0; return 0;
} }
...@@ -2517,7 +2539,7 @@ static void __mptcp_check_send_data_fin(struct sock *sk) ...@@ -2517,7 +2539,7 @@ static void __mptcp_check_send_data_fin(struct sock *sk)
} }
} }
__mptcp_flush_join_list(msk); mptcp_flush_join_list(msk);
mptcp_for_each_subflow(msk, subflow) { mptcp_for_each_subflow(msk, subflow) {
struct sock *tcp_sk = mptcp_subflow_tcp_sock(subflow); struct sock *tcp_sk = mptcp_subflow_tcp_sock(subflow);
...@@ -2573,6 +2595,8 @@ static void __mptcp_destroy_sock(struct sock *sk) ...@@ -2573,6 +2595,8 @@ static void __mptcp_destroy_sock(struct sock *sk)
WARN_ON_ONCE(msk->rmem_released); WARN_ON_ONCE(msk->rmem_released);
sk_stream_kill_queues(sk); sk_stream_kill_queues(sk);
xfrm_sk_free_policy(sk); xfrm_sk_free_policy(sk);
tcp_cleanup_congestion_control(sk);
sk_refcnt_debug_release(sk); sk_refcnt_debug_release(sk);
mptcp_dispose_initial_subflow(msk); mptcp_dispose_initial_subflow(msk);
sock_put(sk); sock_put(sk);
...@@ -2654,7 +2678,8 @@ static int mptcp_disconnect(struct sock *sk, int flags) ...@@ -2654,7 +2678,8 @@ static int mptcp_disconnect(struct sock *sk, int flags)
struct mptcp_subflow_context *subflow; struct mptcp_subflow_context *subflow;
struct mptcp_sock *msk = mptcp_sk(sk); struct mptcp_sock *msk = mptcp_sk(sk);
__mptcp_flush_join_list(msk); mptcp_do_flush_join_list(msk);
mptcp_for_each_subflow(msk, subflow) { mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow); struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
...@@ -2703,6 +2728,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk, ...@@ -2703,6 +2728,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
msk->snd_nxt = msk->write_seq; msk->snd_nxt = msk->write_seq;
msk->snd_una = msk->write_seq; msk->snd_una = msk->write_seq;
msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd; msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd;
msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq;
if (mp_opt->mp_capable) { if (mp_opt->mp_capable) {
msk->can_ack = true; msk->can_ack = true;
...@@ -2811,161 +2837,6 @@ static void mptcp_destroy(struct sock *sk) ...@@ -2811,161 +2837,6 @@ static void mptcp_destroy(struct sock *sk)
sk_sockets_allocated_dec(sk); sk_sockets_allocated_dec(sk);
} }
static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname,
sockptr_t optval, unsigned int optlen)
{
struct sock *sk = (struct sock *)msk;
struct socket *ssock;
int ret;
switch (optname) {
case SO_REUSEPORT:
case SO_REUSEADDR:
lock_sock(sk);
ssock = __mptcp_nmpc_socket(msk);
if (!ssock) {
release_sock(sk);
return -EINVAL;
}
ret = sock_setsockopt(ssock, SOL_SOCKET, optname, optval, optlen);
if (ret == 0) {
if (optname == SO_REUSEPORT)
sk->sk_reuseport = ssock->sk->sk_reuseport;
else if (optname == SO_REUSEADDR)
sk->sk_reuse = ssock->sk->sk_reuse;
}
release_sock(sk);
return ret;
}
return sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, optval, optlen);
}
static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname,
sockptr_t optval, unsigned int optlen)
{
struct sock *sk = (struct sock *)msk;
int ret = -EOPNOTSUPP;
struct socket *ssock;
switch (optname) {
case IPV6_V6ONLY:
lock_sock(sk);
ssock = __mptcp_nmpc_socket(msk);
if (!ssock) {
release_sock(sk);
return -EINVAL;
}
ret = tcp_setsockopt(ssock->sk, SOL_IPV6, optname, optval, optlen);
if (ret == 0)
sk->sk_ipv6only = ssock->sk->sk_ipv6only;
release_sock(sk);
break;
}
return ret;
}
static bool mptcp_unsupported(int level, int optname)
{
if (level == SOL_IP) {
switch (optname) {
case IP_ADD_MEMBERSHIP:
case IP_ADD_SOURCE_MEMBERSHIP:
case IP_DROP_MEMBERSHIP:
case IP_DROP_SOURCE_MEMBERSHIP:
case IP_BLOCK_SOURCE:
case IP_UNBLOCK_SOURCE:
case MCAST_JOIN_GROUP:
case MCAST_LEAVE_GROUP:
case MCAST_JOIN_SOURCE_GROUP:
case MCAST_LEAVE_SOURCE_GROUP:
case MCAST_BLOCK_SOURCE:
case MCAST_UNBLOCK_SOURCE:
case MCAST_MSFILTER:
return true;
}
return false;
}
if (level == SOL_IPV6) {
switch (optname) {
case IPV6_ADDRFORM:
case IPV6_ADD_MEMBERSHIP:
case IPV6_DROP_MEMBERSHIP:
case IPV6_JOIN_ANYCAST:
case IPV6_LEAVE_ANYCAST:
case MCAST_JOIN_GROUP:
case MCAST_LEAVE_GROUP:
case MCAST_JOIN_SOURCE_GROUP:
case MCAST_LEAVE_SOURCE_GROUP:
case MCAST_BLOCK_SOURCE:
case MCAST_UNBLOCK_SOURCE:
case MCAST_MSFILTER:
return true;
}
return false;
}
return false;
}
static int mptcp_setsockopt(struct sock *sk, int level, int optname,
sockptr_t optval, unsigned int optlen)
{
struct mptcp_sock *msk = mptcp_sk(sk);
struct sock *ssk;
pr_debug("msk=%p", msk);
if (mptcp_unsupported(level, optname))
return -ENOPROTOOPT;
if (level == SOL_SOCKET)
return mptcp_setsockopt_sol_socket(msk, optname, optval, optlen);
/* @@ the meaning of setsockopt() when the socket is connected and
* there are multiple subflows is not yet defined. It is up to the
* MPTCP-level socket to configure the subflows until the subflow
* is in TCP fallback, when TCP socket options are passed through
* to the one remaining subflow.
*/
lock_sock(sk);
ssk = __mptcp_tcp_fallback(msk);
release_sock(sk);
if (ssk)
return tcp_setsockopt(ssk, level, optname, optval, optlen);
if (level == SOL_IPV6)
return mptcp_setsockopt_v6(msk, optname, optval, optlen);
return -EOPNOTSUPP;
}
static int mptcp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *option)
{
struct mptcp_sock *msk = mptcp_sk(sk);
struct sock *ssk;
pr_debug("msk=%p", msk);
/* @@ the meaning of setsockopt() when the socket is connected and
* there are multiple subflows is not yet defined. It is up to the
* MPTCP-level socket to configure the subflows until the subflow
* is in TCP fallback, when socket options are passed through
* to the one remaining subflow.
*/
lock_sock(sk);
ssk = __mptcp_tcp_fallback(msk);
release_sock(sk);
if (ssk)
return tcp_getsockopt(ssk, level, optname, optval, option);
return -EOPNOTSUPP;
}
void __mptcp_data_acked(struct sock *sk) void __mptcp_data_acked(struct sock *sk)
{ {
if (!sock_owned_by_user(sk)) if (!sock_owned_by_user(sk))
...@@ -3375,7 +3246,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, ...@@ -3375,7 +3246,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
/* set ssk->sk_socket of accept()ed flows to mptcp socket. /* set ssk->sk_socket of accept()ed flows to mptcp socket.
* This is needed so NOSPACE flag can be set from tcp stack. * This is needed so NOSPACE flag can be set from tcp stack.
*/ */
__mptcp_flush_join_list(msk); mptcp_flush_join_list(msk);
mptcp_for_each_subflow(msk, subflow) { mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow); struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
......
...@@ -108,6 +108,7 @@ ...@@ -108,6 +108,7 @@
#define MPTCP_CLEAN_UNA 7 #define MPTCP_CLEAN_UNA 7
#define MPTCP_ERROR_REPORT 8 #define MPTCP_ERROR_REPORT 8
#define MPTCP_RETRANSMIT 9 #define MPTCP_RETRANSMIT 9
#define MPTCP_WORK_SYNC_SETSOCKOPT 10
static inline bool before64(__u64 seq1, __u64 seq2) static inline bool before64(__u64 seq1, __u64 seq2)
{ {
...@@ -255,6 +256,8 @@ struct mptcp_sock { ...@@ -255,6 +256,8 @@ struct mptcp_sock {
u64 time; /* start time of measurement window */ u64 time; /* start time of measurement window */
u64 rtt_us; /* last maximum rtt of subflows */ u64 rtt_us; /* last maximum rtt of subflows */
} rcvq_space; } rcvq_space;
u32 setsockopt_seq;
}; };
#define mptcp_lock_sock(___sk, cb) do { \ #define mptcp_lock_sock(___sk, cb) do { \
...@@ -413,6 +416,8 @@ struct mptcp_subflow_context { ...@@ -413,6 +416,8 @@ struct mptcp_subflow_context {
long delegated_status; long delegated_status;
struct list_head delegated_node; /* link into delegated_action, protected by local BH */ struct list_head delegated_node; /* link into delegated_action, protected by local BH */
u32 setsockopt_seq;
struct sock *tcp_sock; /* tcp sk backpointer */ struct sock *tcp_sock; /* tcp sk backpointer */
struct sock *conn; /* parent mptcp_sock */ struct sock *conn; /* parent mptcp_sock */
const struct inet_connection_sock_af_ops *icsk_af_ops; const struct inet_connection_sock_af_ops *icsk_af_ops;
...@@ -571,6 +576,11 @@ void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk); ...@@ -571,6 +576,11 @@ void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk);
void mptcp_data_ready(struct sock *sk, struct sock *ssk); void mptcp_data_ready(struct sock *sk, struct sock *ssk);
bool mptcp_finish_join(struct sock *sk); bool mptcp_finish_join(struct sock *sk);
bool mptcp_schedule_work(struct sock *sk); bool mptcp_schedule_work(struct sock *sk);
int mptcp_setsockopt(struct sock *sk, int level, int optname,
sockptr_t optval, unsigned int optlen);
int mptcp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *option);
void __mptcp_check_push(struct sock *sk, struct sock *ssk); void __mptcp_check_push(struct sock *sk, struct sock *ssk);
void __mptcp_data_acked(struct sock *sk); void __mptcp_data_acked(struct sock *sk);
void __mptcp_error_report(struct sock *sk); void __mptcp_error_report(struct sock *sk);
...@@ -730,6 +740,12 @@ unsigned int mptcp_pm_get_add_addr_accept_max(struct mptcp_sock *msk); ...@@ -730,6 +740,12 @@ unsigned int mptcp_pm_get_add_addr_accept_max(struct mptcp_sock *msk);
unsigned int mptcp_pm_get_subflows_max(struct mptcp_sock *msk); unsigned int mptcp_pm_get_subflows_max(struct mptcp_sock *msk);
unsigned int mptcp_pm_get_local_addr_max(struct mptcp_sock *msk); unsigned int mptcp_pm_get_local_addr_max(struct mptcp_sock *msk);
int mptcp_setsockopt(struct sock *sk, int level, int optname,
sockptr_t optval, unsigned int optlen);
void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk);
void mptcp_sockopt_sync_all(struct mptcp_sock *msk);
static inline struct mptcp_ext *mptcp_get_ext(const struct sk_buff *skb) static inline struct mptcp_ext *mptcp_get_ext(const struct sk_buff *skb)
{ {
return (struct mptcp_ext *)skb_ext_find(skb, SKB_EXT_MPTCP); return (struct mptcp_ext *)skb_ext_find(skb, SKB_EXT_MPTCP);
......
This diff is collapsed.
...@@ -679,6 +679,9 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, ...@@ -679,6 +679,9 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
goto out; goto out;
} }
/* ssk inherits options of listener sk */
ctx->setsockopt_seq = listener->setsockopt_seq;
if (ctx->mp_capable) { if (ctx->mp_capable) {
/* this can't race with mptcp_close(), as the msk is /* this can't race with mptcp_close(), as the msk is
* not yet exposted to user-space * not yet exposted to user-space
...@@ -694,6 +697,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, ...@@ -694,6 +697,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
* created mptcp socket * created mptcp socket
*/ */
new_msk->sk_destruct = mptcp_sock_destruct; new_msk->sk_destruct = mptcp_sock_destruct;
mptcp_sk(new_msk)->setsockopt_seq = ctx->setsockopt_seq;
mptcp_pm_new_connection(mptcp_sk(new_msk), child, 1); mptcp_pm_new_connection(mptcp_sk(new_msk), child, 1);
mptcp_token_accept(subflow_req, mptcp_sk(new_msk)); mptcp_token_accept(subflow_req, mptcp_sk(new_msk));
ctx->conn = new_msk; ctx->conn = new_msk;
...@@ -1317,6 +1321,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, ...@@ -1317,6 +1321,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
mptcp_info2sockaddr(remote, &addr, ssk->sk_family); mptcp_info2sockaddr(remote, &addr, ssk->sk_family);
mptcp_add_pending_subflow(msk, subflow); mptcp_add_pending_subflow(msk, subflow);
mptcp_sockopt_sync(msk, ssk);
err = kernel_connect(sf, (struct sockaddr *)&addr, addrlen, O_NONBLOCK); err = kernel_connect(sf, (struct sockaddr *)&addr, addrlen, O_NONBLOCK);
if (err && err != -EINPROGRESS) if (err && err != -EINPROGRESS)
goto failed_unlink; goto failed_unlink;
......
...@@ -6,7 +6,7 @@ KSFT_KHDR_INSTALL := 1 ...@@ -6,7 +6,7 @@ KSFT_KHDR_INSTALL := 1
CFLAGS = -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include CFLAGS = -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include
TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh \ TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh \
simult_flows.sh simult_flows.sh mptcp_sockopt.sh
TEST_GEN_FILES = mptcp_connect pm_nl_ctl TEST_GEN_FILES = mptcp_connect pm_nl_ctl
......
...@@ -57,6 +57,7 @@ static bool cfg_join; ...@@ -57,6 +57,7 @@ static bool cfg_join;
static bool cfg_remove; static bool cfg_remove;
static unsigned int cfg_do_w; static unsigned int cfg_do_w;
static int cfg_wait; static int cfg_wait;
static uint32_t cfg_mark;
static void die_usage(void) static void die_usage(void)
{ {
...@@ -69,6 +70,7 @@ static void die_usage(void) ...@@ -69,6 +70,7 @@ static void die_usage(void)
fprintf(stderr, "\t-p num -- use port num\n"); fprintf(stderr, "\t-p num -- use port num\n");
fprintf(stderr, "\t-s [MPTCP|TCP] -- use mptcp(default) or tcp sockets\n"); fprintf(stderr, "\t-s [MPTCP|TCP] -- use mptcp(default) or tcp sockets\n");
fprintf(stderr, "\t-m [poll|mmap|sendfile] -- use poll(default)/mmap+write/sendfile\n"); fprintf(stderr, "\t-m [poll|mmap|sendfile] -- use poll(default)/mmap+write/sendfile\n");
fprintf(stderr, "\t-M mark -- set socket packet mark\n");
fprintf(stderr, "\t-u -- check mptcp ulp\n"); fprintf(stderr, "\t-u -- check mptcp ulp\n");
fprintf(stderr, "\t-w num -- wait num sec before closing the socket\n"); fprintf(stderr, "\t-w num -- wait num sec before closing the socket\n");
exit(1); exit(1);
...@@ -140,6 +142,17 @@ static void set_sndbuf(int fd, unsigned int size) ...@@ -140,6 +142,17 @@ static void set_sndbuf(int fd, unsigned int size)
} }
} }
static void set_mark(int fd, uint32_t mark)
{
int err;
err = setsockopt(fd, SOL_SOCKET, SO_MARK, &mark, sizeof(mark));
if (err) {
perror("set SO_MARK");
exit(1);
}
}
static int sock_listen_mptcp(const char * const listenaddr, static int sock_listen_mptcp(const char * const listenaddr,
const char * const port) const char * const port)
{ {
...@@ -248,6 +261,9 @@ static int sock_connect_mptcp(const char * const remoteaddr, ...@@ -248,6 +261,9 @@ static int sock_connect_mptcp(const char * const remoteaddr,
continue; continue;
} }
if (cfg_mark)
set_mark(sock, cfg_mark);
if (connect(sock, a->ai_addr, a->ai_addrlen) == 0) if (connect(sock, a->ai_addr, a->ai_addrlen) == 0)
break; /* success */ break; /* success */
...@@ -830,7 +846,7 @@ static void parse_opts(int argc, char **argv) ...@@ -830,7 +846,7 @@ static void parse_opts(int argc, char **argv)
{ {
int c; int c;
while ((c = getopt(argc, argv, "6jr:lp:s:hut:m:S:R:w:")) != -1) { while ((c = getopt(argc, argv, "6jr:lp:s:hut:m:S:R:w:M:")) != -1) {
switch (c) { switch (c) {
case 'j': case 'j':
cfg_join = true; cfg_join = true;
...@@ -880,6 +896,9 @@ static void parse_opts(int argc, char **argv) ...@@ -880,6 +896,9 @@ static void parse_opts(int argc, char **argv)
case 'w': case 'w':
cfg_wait = atoi(optarg)*1000000; cfg_wait = atoi(optarg)*1000000;
break; break;
case 'M':
cfg_mark = strtol(optarg, NULL, 0);
break;
} }
} }
...@@ -911,6 +930,8 @@ int main(int argc, char *argv[]) ...@@ -911,6 +930,8 @@ int main(int argc, char *argv[])
set_rcvbuf(fd, cfg_rcvbuf); set_rcvbuf(fd, cfg_rcvbuf);
if (cfg_sndbuf) if (cfg_sndbuf)
set_sndbuf(fd, cfg_sndbuf); set_sndbuf(fd, cfg_sndbuf);
if (cfg_mark)
set_mark(fd, cfg_mark);
return main_loop_s(fd); return main_loop_s(fd);
} }
......
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
ret=0
sin=""
sout=""
cin=""
cout=""
ksft_skip=4
timeout_poll=30
timeout_test=$((timeout_poll * 2 + 1))
mptcp_connect=""
do_all_tests=1
add_mark_rules()
{
local ns=$1
local m=$2
for t in iptables ip6tables; do
# just to debug: check we have multiple subflows connection requests
ip netns exec $ns $t -A OUTPUT -p tcp --syn -m mark --mark $m -j ACCEPT
# RST packets might be handled by a internal dummy socket
ip netns exec $ns $t -A OUTPUT -p tcp --tcp-flags RST RST -m mark --mark 0 -j ACCEPT
ip netns exec $ns $t -A OUTPUT -p tcp -m mark --mark $m -j ACCEPT
ip netns exec $ns $t -A OUTPUT -p tcp -m mark --mark 0 -j DROP
done
}
init()
{
rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
ns1="ns1-$rndh"
ns2="ns2-$rndh"
for netns in "$ns1" "$ns2";do
ip netns add $netns || exit $ksft_skip
ip -net $netns link set lo up
ip netns exec $netns sysctl -q net.mptcp.enabled=1
ip netns exec $netns sysctl -q net.ipv4.conf.all.rp_filter=0
ip netns exec $netns sysctl -q net.ipv4.conf.default.rp_filter=0
done
for i in `seq 1 4`; do
ip link add ns1eth$i netns "$ns1" type veth peer name ns2eth$i netns "$ns2"
ip -net "$ns1" addr add 10.0.$i.1/24 dev ns1eth$i
ip -net "$ns1" addr add dead:beef:$i::1/64 dev ns1eth$i nodad
ip -net "$ns1" link set ns1eth$i up
ip -net "$ns2" addr add 10.0.$i.2/24 dev ns2eth$i
ip -net "$ns2" addr add dead:beef:$i::2/64 dev ns2eth$i nodad
ip -net "$ns2" link set ns2eth$i up
# let $ns2 reach any $ns1 address from any interface
ip -net "$ns2" route add default via 10.0.$i.1 dev ns2eth$i metric 10$i
ip netns exec $ns1 ./pm_nl_ctl add 10.0.$i.1 flags signal
ip netns exec $ns1 ./pm_nl_ctl add dead:beef:$i::1 flags signal
ip netns exec $ns2 ./pm_nl_ctl add 10.0.$i.2 flags signal
ip netns exec $ns2 ./pm_nl_ctl add dead:beef:$i::2 flags signal
done
ip netns exec $ns1 ./pm_nl_ctl limits 8 8
ip netns exec $ns2 ./pm_nl_ctl limits 8 8
add_mark_rules $ns1 1
add_mark_rules $ns2 2
}
cleanup()
{
for netns in "$ns1" "$ns2"; do
ip netns del $netns
done
rm -f "$cin" "$cout"
rm -f "$sin" "$sout"
}
ip -Version > /dev/null 2>&1
if [ $? -ne 0 ];then
echo "SKIP: Could not run test without ip tool"
exit $ksft_skip
fi
iptables -V > /dev/null 2>&1
if [ $? -ne 0 ];then
echo "SKIP: Could not run all tests without iptables tool"
exit $ksft_skip
fi
ip6tables -V > /dev/null 2>&1
if [ $? -ne 0 ];then
echo "SKIP: Could not run all tests without ip6tables tool"
exit $ksft_skip
fi
check_mark()
{
local ns=$1
local af=$2
tables=iptables
if [ $af -eq 6 ];then
tables=ip6tables
fi
counters=$(ip netns exec $ns $tables -v -L OUTPUT | grep DROP)
values=${counters%DROP*}
for v in $values; do
if [ $v -ne 0 ]; then
echo "FAIL: got $tables $values in ns $ns , not 0 - not all expected packets marked" 1>&2
return 1
fi
done
return 0
}
print_file_err()
{
ls -l "$1" 1>&2
echo "Trailing bytes are: "
tail -c 27 "$1"
}
check_transfer()
{
in=$1
out=$2
what=$3
cmp "$in" "$out" > /dev/null 2>&1
if [ $? -ne 0 ] ;then
echo "[ FAIL ] $what does not match (in, out):"
print_file_err "$in"
print_file_err "$out"
ret=1
return 1
fi
return 0
}
# $1: IP address
is_v6()
{
[ -z "${1##*:*}" ]
}
do_transfer()
{
listener_ns="$1"
connector_ns="$2"
cl_proto="$3"
srv_proto="$4"
connect_addr="$5"
port=12001
:> "$cout"
:> "$sout"
mptcp_connect="./mptcp_connect -r 20"
local local_addr
if is_v6 "${connect_addr}"; then
local_addr="::"
else
local_addr="0.0.0.0"
fi
timeout ${timeout_test} \
ip netns exec ${listener_ns} \
$mptcp_connect -t ${timeout_poll} -l -M 1 -p $port -s ${srv_proto} \
${local_addr} < "$sin" > "$sout" &
spid=$!
sleep 1
timeout ${timeout_test} \
ip netns exec ${connector_ns} \
$mptcp_connect -t ${timeout_poll} -M 2 -p $port -s ${cl_proto} \
$connect_addr < "$cin" > "$cout" &
cpid=$!
wait $cpid
retc=$?
wait $spid
rets=$?
if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
echo " client exit code $retc, server $rets" 1>&2
echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2
ip netns exec ${listener_ns} ss -Menita 1>&2 -o "sport = :$port"
echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2
ip netns exec ${connector_ns} ss -Menita 1>&2 -o "dport = :$port"
ret=1
return 1
fi
if [ $local_addr = "::" ];then
check_mark $listener_ns 6
check_mark $connector_ns 6
else
check_mark $listener_ns 4
check_mark $connector_ns 4
fi
check_transfer $cin $sout "file received by server"
rets=$?
if [ $retc -eq 0 ] && [ $rets -eq 0 ];then
return 0
fi
return 1
}
make_file()
{
name=$1
who=$2
size=$3
dd if=/dev/urandom of="$name" bs=1024 count=$size 2> /dev/null
echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name"
echo "Created $name (size $size KB) containing data sent by $who"
}
run_tests()
{
listener_ns="$1"
connector_ns="$2"
connect_addr="$3"
lret=0
do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr}
lret=$?
if [ $lret -ne 0 ]; then
ret=$lret
return
fi
}
sin=$(mktemp)
sout=$(mktemp)
cin=$(mktemp)
cout=$(mktemp)
init
make_file "$cin" "client" 1
make_file "$sin" "server" 1
trap cleanup EXIT
run_tests $ns1 $ns2 10.0.1.1
run_tests $ns1 $ns2 dead:beef:1::1
if [ $ret -eq 0 ];then
echo "PASS: all packets had packet mark set"
fi
exit $ret
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment