Commit ca1a6705 authored by David S. Miller's avatar David S. Miller

Merge branch 'mptcp-next'

Mat Martineau says:

====================
mptcp: New features and cleanup

These patches have been tested in the MPTCP tree for a longer than usual
time (thanks to holiday schedules), and are ready for the net-next
branch. Changes include feature updates, small fixes, refactoring, and
some selftest changes.

Patch 1 fixes an OUTQ ioctl issue with TCP fallback sockets.

Patches 2, 3, and 6 add support of the MPTCP fastclose option (quick
shutdown of the full MPTCP connection, similar to TCP RST in regular
TCP), and a related self test.

Patch 4 cleans up some accept and poll code that is no longer needed
after the fastclose changes.

Patch 5 add userspace disconnect using AF_UNSPEC, which is used when
testing fastclose and makes the MPTCP socket's handling of AF_UNSPEC in
connect() more TCP-like.

Patches 7-11 refactor subflow creation to make better use of multiple
local endpoints and to better handle individual connection failures when
creating multiple subflows. Includes self test updates.

Patch 12 cleans up the way subflows are added to the MPTCP connection
list, eliminating the need for calls throughout the MPTCP code that had
to check the intermediate "join list" for entries to shift over to the
main "connection list".

Patch 13 refactors the MPTCP release_cb flags to use separate storage
for values only accessed with the socket lock held (no atomic ops
needed), and for values that need atomic operations.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 26abf15c e9d09bac
......@@ -768,6 +768,28 @@ static noinline bool mptcp_established_options_rst(struct sock *sk, struct sk_bu
return true;
}
static bool mptcp_established_options_fastclose(struct sock *sk,
unsigned int *size,
unsigned int remaining,
struct mptcp_out_options *opts)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct mptcp_sock *msk = mptcp_sk(subflow->conn);
if (likely(!subflow->send_fastclose))
return false;
if (remaining < TCPOLEN_MPTCP_FASTCLOSE)
return false;
*size = TCPOLEN_MPTCP_FASTCLOSE;
opts->suboptions |= OPTION_MPTCP_FASTCLOSE;
opts->rcvr_key = msk->remote_key;
pr_debug("FASTCLOSE key=%llu", opts->rcvr_key);
return true;
}
static bool mptcp_established_options_mp_fail(struct sock *sk,
unsigned int *size,
unsigned int remaining,
......@@ -806,10 +828,12 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
return false;
if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST)) {
if (mptcp_established_options_mp_fail(sk, &opt_size, remaining, opts)) {
if (mptcp_established_options_fastclose(sk, &opt_size, remaining, opts) ||
mptcp_established_options_mp_fail(sk, &opt_size, remaining, opts)) {
*size += opt_size;
remaining -= opt_size;
}
/* MP_RST can be used with MP_FASTCLOSE and MP_FAIL if there is room */
if (mptcp_established_options_rst(sk, skb, &opt_size, remaining, opts)) {
*size += opt_size;
remaining -= opt_size;
......@@ -1251,17 +1275,8 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
ptr += 2;
}
/* RST is mutually exclusive with everything else */
if (unlikely(OPTION_MPTCP_RST & opts->suboptions)) {
*ptr++ = mptcp_option(MPTCPOPT_RST,
TCPOLEN_MPTCP_RST,
opts->reset_transient,
opts->reset_reason);
return;
}
/* DSS, MPC, MPJ and ADD_ADDR are mutually exclusive, see
* mptcp_established_options*()
/* DSS, MPC, MPJ, ADD_ADDR, FASTCLOSE and RST are mutually exclusive,
* see mptcp_established_options*()
*/
if (likely(OPTION_MPTCP_DSS & opts->suboptions)) {
struct mptcp_ext *mpext = &opts->ext_copy;
......@@ -1370,27 +1385,29 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
/* MPC is additionally mutually exclusive with MP_PRIO */
goto mp_capable_done;
} else if (OPTION_MPTCP_MPJ_SYN & opts->suboptions) {
*ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
TCPOLEN_MPTCP_MPJ_SYN,
opts->backup, opts->join_id);
put_unaligned_be32(opts->token, ptr);
ptr += 1;
put_unaligned_be32(opts->nonce, ptr);
ptr += 1;
} else if (OPTION_MPTCP_MPJ_SYNACK & opts->suboptions) {
*ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
TCPOLEN_MPTCP_MPJ_SYNACK,
opts->backup, opts->join_id);
put_unaligned_be64(opts->thmac, ptr);
ptr += 2;
put_unaligned_be32(opts->nonce, ptr);
ptr += 1;
} else if (OPTION_MPTCP_MPJ_ACK & opts->suboptions) {
*ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
TCPOLEN_MPTCP_MPJ_ACK, 0, 0);
memcpy(ptr, opts->hmac, MPTCPOPT_HMAC_LEN);
ptr += 5;
} else if (OPTIONS_MPTCP_MPJ & opts->suboptions) {
if (OPTION_MPTCP_MPJ_SYN & opts->suboptions) {
*ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
TCPOLEN_MPTCP_MPJ_SYN,
opts->backup, opts->join_id);
put_unaligned_be32(opts->token, ptr);
ptr += 1;
put_unaligned_be32(opts->nonce, ptr);
ptr += 1;
} else if (OPTION_MPTCP_MPJ_SYNACK & opts->suboptions) {
*ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
TCPOLEN_MPTCP_MPJ_SYNACK,
opts->backup, opts->join_id);
put_unaligned_be64(opts->thmac, ptr);
ptr += 2;
put_unaligned_be32(opts->nonce, ptr);
ptr += 1;
} else {
*ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
TCPOLEN_MPTCP_MPJ_ACK, 0, 0);
memcpy(ptr, opts->hmac, MPTCPOPT_HMAC_LEN);
ptr += 5;
}
} else if (OPTION_MPTCP_ADD_ADDR & opts->suboptions) {
u8 len = TCPOLEN_MPTCP_ADD_ADDR_BASE;
u8 echo = MPTCP_ADDR_ECHO;
......@@ -1447,6 +1464,24 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
ptr += 1;
}
}
} else if (unlikely(OPTION_MPTCP_FASTCLOSE & opts->suboptions)) {
/* FASTCLOSE is mutually exclusive with others except RST */
*ptr++ = mptcp_option(MPTCPOPT_MP_FASTCLOSE,
TCPOLEN_MPTCP_FASTCLOSE,
0, 0);
put_unaligned_be64(opts->rcvr_key, ptr);
ptr += 2;
if (OPTION_MPTCP_RST & opts->suboptions)
goto mp_rst;
return;
} else if (unlikely(OPTION_MPTCP_RST & opts->suboptions)) {
mp_rst:
*ptr++ = mptcp_option(MPTCPOPT_RST,
TCPOLEN_MPTCP_RST,
opts->reset_transient,
opts->reset_reason);
return;
}
if (OPTION_MPTCP_PRIO & opts->suboptions) {
......
......@@ -172,9 +172,28 @@ void mptcp_pm_subflow_established(struct mptcp_sock *msk)
spin_unlock_bh(&pm->lock);
}
void mptcp_pm_subflow_closed(struct mptcp_sock *msk, u8 id)
void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct sock *ssk,
const struct mptcp_subflow_context *subflow)
{
pr_debug("msk=%p", msk);
struct mptcp_pm_data *pm = &msk->pm;
bool update_subflows;
update_subflows = (ssk->sk_state == TCP_CLOSE) &&
(subflow->request_join || subflow->mp_join);
if (!READ_ONCE(pm->work_pending) && !update_subflows)
return;
spin_lock_bh(&pm->lock);
if (update_subflows)
pm->subflows--;
/* Even if this subflow is not really established, tell the PM to try
* to pick the next ones, if possible.
*/
if (mptcp_pm_nl_check_work_pending(msk))
mptcp_pm_schedule_work(msk, MPTCP_PM_SUBFLOW_ESTABLISHED);
spin_unlock_bh(&pm->lock);
}
void mptcp_pm_add_addr_received(struct mptcp_sock *msk,
......@@ -356,7 +375,7 @@ void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk)
}
}
void mptcp_pm_data_init(struct mptcp_sock *msk)
void mptcp_pm_data_reset(struct mptcp_sock *msk)
{
msk->pm.add_addr_signaled = 0;
msk->pm.add_addr_accepted = 0;
......@@ -370,11 +389,16 @@ void mptcp_pm_data_init(struct mptcp_sock *msk)
WRITE_ONCE(msk->pm.accept_subflow, false);
WRITE_ONCE(msk->pm.remote_deny_join_id0, false);
msk->pm.status = 0;
bitmap_fill(msk->pm.id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
mptcp_pm_nl_data_init(msk);
}
void mptcp_pm_data_init(struct mptcp_sock *msk)
{
spin_lock_init(&msk->pm.lock);
INIT_LIST_HEAD(&msk->pm.anno_list);
mptcp_pm_nl_data_init(msk);
mptcp_pm_data_reset(msk);
}
void __init mptcp_pm_init(void)
......
This diff is collapsed.
This diff is collapsed.
......@@ -110,19 +110,20 @@
/* MPTCP TCPRST flags */
#define MPTCP_RST_TRANSIENT BIT(0)
/* MPTCP socket flags */
#define MPTCP_DATA_READY 0
/* MPTCP socket atomic flags */
#define MPTCP_NOSPACE 1
#define MPTCP_WORK_RTX 2
#define MPTCP_WORK_EOF 3
#define MPTCP_FALLBACK_DONE 4
#define MPTCP_WORK_CLOSE_SUBFLOW 5
#define MPTCP_PUSH_PENDING 6
#define MPTCP_CLEAN_UNA 7
#define MPTCP_ERROR_REPORT 8
#define MPTCP_RETRANSMIT 9
#define MPTCP_WORK_SYNC_SETSOCKOPT 10
#define MPTCP_CONNECTED 11
/* MPTCP socket release cb flags */
#define MPTCP_PUSH_PENDING 1
#define MPTCP_CLEAN_UNA 2
#define MPTCP_ERROR_REPORT 3
#define MPTCP_RETRANSMIT 4
#define MPTCP_FLUSH_JOIN_LIST 5
#define MPTCP_CONNECTED 6
static inline bool before64(__u64 seq1, __u64 seq2)
{
......@@ -174,16 +175,25 @@ enum mptcp_pm_status {
MPTCP_PM_ADD_ADDR_SEND_ACK,
MPTCP_PM_RM_ADDR_RECEIVED,
MPTCP_PM_ESTABLISHED,
MPTCP_PM_ALREADY_ESTABLISHED, /* persistent status, set after ESTABLISHED event */
MPTCP_PM_SUBFLOW_ESTABLISHED,
MPTCP_PM_ALREADY_ESTABLISHED, /* persistent status, set after ESTABLISHED event */
MPTCP_PM_MPC_ENDPOINT_ACCOUNTED /* persistent status, set after MPC local address is
* accounted int id_avail_bitmap
*/
};
/* Status bits below MPTCP_PM_ALREADY_ESTABLISHED need pm worker actions */
#define MPTCP_PM_WORK_MASK ((1 << MPTCP_PM_ALREADY_ESTABLISHED) - 1)
enum mptcp_addr_signal_status {
MPTCP_ADD_ADDR_SIGNAL,
MPTCP_ADD_ADDR_ECHO,
MPTCP_RM_ADDR_SIGNAL,
};
/* max value of mptcp_addr_info.id */
#define MPTCP_PM_MAX_ADDR_ID U8_MAX
struct mptcp_pm_data {
struct mptcp_addr_info local;
struct mptcp_addr_info remote;
......@@ -202,6 +212,7 @@ struct mptcp_pm_data {
u8 local_addr_used;
u8 subflows;
u8 status;
DECLARE_BITMAP(id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
struct mptcp_rm_list rm_list_tx;
struct mptcp_rm_list rm_list_rx;
};
......@@ -241,6 +252,8 @@ struct mptcp_sock {
u32 token;
int rmem_released;
unsigned long flags;
unsigned long cb_flags;
unsigned long push_pending;
bool recovery; /* closing subflow write queue reinjected */
bool can_ack;
bool fully_established;
......@@ -252,7 +265,6 @@ struct mptcp_sock {
u8 recvmsg_inq:1,
cork:1,
nodelay:1;
spinlock_t join_list_lock;
struct work_struct work;
struct sk_buff *ooo_last_skb;
struct rb_root out_of_order_queue;
......@@ -395,6 +407,9 @@ DECLARE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions);
/* MPTCP subflow context */
struct mptcp_subflow_context {
struct list_head node;/* conn_list of subflows */
char reset_start[0];
unsigned long avg_pacing_rate; /* protected by msk socket lock */
u64 local_key;
u64 remote_key;
......@@ -423,6 +438,7 @@ struct mptcp_subflow_context {
backup : 1,
send_mp_prio : 1,
send_mp_fail : 1,
send_fastclose : 1,
rx_eof : 1,
can_ack : 1, /* only after processing the remote a key */
disposable : 1, /* ctx can be free at ulp release time */
......@@ -441,6 +457,9 @@ struct mptcp_subflow_context {
u8 stale_count;
long delegated_status;
char reset_end[0];
struct list_head delegated_node; /* link into delegated_action, protected by local BH */
u32 setsockopt_seq;
......@@ -472,6 +491,13 @@ mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow)
return subflow->tcp_sock;
}
static inline void
mptcp_subflow_ctx_reset(struct mptcp_subflow_context *subflow)
{
memset(subflow->reset_start, 0, subflow->reset_end - subflow->reset_start);
subflow->request_mptcp = 1;
}
static inline u64
mptcp_subflow_get_map_offset(const struct mptcp_subflow_context *subflow)
{
......@@ -486,15 +512,6 @@ mptcp_subflow_get_mapped_dsn(const struct mptcp_subflow_context *subflow)
return subflow->map_seq + mptcp_subflow_get_map_offset(subflow);
}
static inline void mptcp_add_pending_subflow(struct mptcp_sock *msk,
struct mptcp_subflow_context *subflow)
{
sock_hold(mptcp_subflow_tcp_sock(subflow));
spin_lock_bh(&msk->join_list_lock);
list_add_tail(&subflow->node, &msk->join_list);
spin_unlock_bh(&msk->join_list_lock);
}
void mptcp_subflow_process_delegated(struct sock *ssk);
static inline void mptcp_subflow_delegate(struct mptcp_subflow_context *subflow, int action)
......@@ -659,7 +676,6 @@ void __mptcp_data_acked(struct sock *sk);
void __mptcp_error_report(struct sock *sk);
void mptcp_subflow_eof(struct sock *sk);
bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit);
void __mptcp_flush_join_list(struct mptcp_sock *msk);
static inline bool mptcp_data_fin_enabled(const struct mptcp_sock *msk)
{
return READ_ONCE(msk->snd_data_fin_enable) &&
......@@ -712,6 +728,7 @@ void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac);
void __init mptcp_pm_init(void);
void mptcp_pm_data_init(struct mptcp_sock *msk);
void mptcp_pm_data_reset(struct mptcp_sock *msk);
void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk);
void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk);
void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side);
......@@ -719,7 +736,9 @@ void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk,
bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk);
void mptcp_pm_connection_closed(struct mptcp_sock *msk);
void mptcp_pm_subflow_established(struct mptcp_sock *msk);
void mptcp_pm_subflow_closed(struct mptcp_sock *msk, u8 id);
bool mptcp_pm_nl_check_work_pending(struct mptcp_sock *msk);
void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct sock *ssk,
const struct mptcp_subflow_context *subflow);
void mptcp_pm_add_addr_received(struct mptcp_sock *msk,
const struct mptcp_addr_info *addr);
void mptcp_pm_add_addr_echoed(struct mptcp_sock *msk,
......@@ -816,7 +835,7 @@ unsigned int mptcp_pm_get_subflows_max(struct mptcp_sock *msk);
unsigned int mptcp_pm_get_local_addr_max(struct mptcp_sock *msk);
void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk);
void mptcp_sockopt_sync_all(struct mptcp_sock *msk);
void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk);
static inline struct mptcp_ext *mptcp_get_ext(const struct sk_buff *skb)
{
......
......@@ -1285,27 +1285,15 @@ void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk)
}
}
void mptcp_sockopt_sync_all(struct mptcp_sock *msk)
void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk)
{
struct mptcp_subflow_context *subflow;
struct sock *sk = (struct sock *)msk;
u32 seq;
seq = sockopt_seq_reset(sk);
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
u32 sseq = READ_ONCE(subflow->setsockopt_seq);
msk_owned_by_me(msk);
if (sseq != msk->setsockopt_seq) {
__mptcp_sockopt_sync(msk, ssk);
WRITE_ONCE(subflow->setsockopt_seq, seq);
} else if (sseq != seq) {
WRITE_ONCE(subflow->setsockopt_seq, seq);
}
if (READ_ONCE(subflow->setsockopt_seq) != msk->setsockopt_seq) {
sync_socket_options(msk, ssk);
cond_resched();
subflow->setsockopt_seq = msk->setsockopt_seq;
}
msk->setsockopt_seq = seq;
}
......@@ -388,7 +388,7 @@ static void mptcp_set_connected(struct sock *sk)
if (!sock_owned_by_user(sk))
__mptcp_set_connected(sk);
else
set_bit(MPTCP_CONNECTED, &mptcp_sk(sk)->flags);
__set_bit(MPTCP_CONNECTED, &mptcp_sk(sk)->cb_flags);
mptcp_data_unlock(sk);
}
......@@ -1274,7 +1274,7 @@ static void subflow_error_report(struct sock *ssk)
if (!sock_owned_by_user(sk))
__mptcp_error_report(sk);
else
set_bit(MPTCP_ERROR_REPORT, &mptcp_sk(sk)->flags);
__set_bit(MPTCP_ERROR_REPORT, &mptcp_sk(sk)->cb_flags);
mptcp_data_unlock(sk);
}
......@@ -1293,7 +1293,6 @@ static void subflow_data_ready(struct sock *sk)
if (reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue))
return;
set_bit(MPTCP_DATA_READY, &msk->flags);
parent->sk_data_ready(parent);
return;
}
......@@ -1442,7 +1441,8 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
subflow->request_bkup = !!(flags & MPTCP_PM_ADDR_FLAG_BACKUP);
mptcp_info2sockaddr(remote, &addr, ssk->sk_family);
mptcp_add_pending_subflow(msk, subflow);
sock_hold(ssk);
list_add_tail(&subflow->node, &msk->conn_list);
err = kernel_connect(sf, (struct sockaddr *)&addr, addrlen, O_NONBLOCK);
if (err && err != -EINPROGRESS)
goto failed_unlink;
......@@ -1453,9 +1453,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
return err;
failed_unlink:
spin_lock_bh(&msk->join_list_lock);
list_del(&subflow->node);
spin_unlock_bh(&msk->join_list_lock);
sock_put(mptcp_subflow_tcp_sock(subflow));
failed:
......
......@@ -384,6 +384,7 @@ void mptcp_token_destroy(struct mptcp_sock *msk)
bucket->chain_len--;
}
spin_unlock_bh(&bucket->lock);
WRITE_ONCE(msk->token, 0);
}
void __init mptcp_token_init(void)
......
......@@ -17,4 +17,5 @@ CONFIG_NFT_TPROXY=m
CONFIG_NFT_SOCKET=m
CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_IP_MULTIPLE_TABLES=y
CONFIG_IP_NF_TARGET_REJECT=m
CONFIG_IPV6_MULTIPLE_TABLES=y
......@@ -16,6 +16,7 @@
#include <unistd.h>
#include <time.h>
#include <sys/ioctl.h>
#include <sys/poll.h>
#include <sys/sendfile.h>
#include <sys/stat.h>
......@@ -28,6 +29,7 @@
#include <linux/tcp.h>
#include <linux/time_types.h>
#include <linux/sockios.h>
extern int optind;
......@@ -68,6 +70,8 @@ static unsigned int cfg_time;
static unsigned int cfg_do_w;
static int cfg_wait;
static uint32_t cfg_mark;
static char *cfg_input;
static int cfg_repeat = 1;
struct cfg_cmsg_types {
unsigned int cmsg_enabled:1;
......@@ -91,22 +95,31 @@ static struct cfg_sockopt_types cfg_sockopt_types;
static void die_usage(void)
{
fprintf(stderr, "Usage: mptcp_connect [-6] [-u] [-s MPTCP|TCP] [-p port] [-m mode]"
"[-l] [-w sec] [-t num] [-T num] connect_address\n");
fprintf(stderr, "Usage: mptcp_connect [-6] [-c cmsg] [-i file] [-I num] [-j] [-l] "
"[-m mode] [-M mark] [-o option] [-p port] [-P mode] [-j] [-l] [-r num] "
"[-s MPTCP|TCP] [-S num] [-r num] [-t num] [-T num] [-u] [-w sec] connect_address\n");
fprintf(stderr, "\t-6 use ipv6\n");
fprintf(stderr, "\t-t num -- set poll timeout to num\n");
fprintf(stderr, "\t-T num -- set expected runtime to num ms\n");
fprintf(stderr, "\t-S num -- set SO_SNDBUF to num\n");
fprintf(stderr, "\t-R num -- set SO_RCVBUF to num\n");
fprintf(stderr, "\t-p num -- use port num\n");
fprintf(stderr, "\t-s [MPTCP|TCP] -- use mptcp(default) or tcp sockets\n");
fprintf(stderr, "\t-c cmsg -- test cmsg type <cmsg>\n");
fprintf(stderr, "\t-i file -- read the data to send from the given file instead of stdin");
fprintf(stderr, "\t-I num -- repeat the transfer 'num' times. In listen mode accepts num "
"incoming connections, in client mode, disconnect and reconnect to the server\n");
fprintf(stderr, "\t-j -- add additional sleep at connection start and tear down "
"-- for MPJ tests\n");
fprintf(stderr, "\t-l -- listens mode, accepts incoming connection\n");
fprintf(stderr, "\t-m [poll|mmap|sendfile] -- use poll(default)/mmap+write/sendfile\n");
fprintf(stderr, "\t-M mark -- set socket packet mark\n");
fprintf(stderr, "\t-w num -- wait num sec before closing the socket\n");
fprintf(stderr, "\t-c cmsg -- test cmsg type <cmsg>\n");
fprintf(stderr, "\t-o option -- test sockopt <option>\n");
fprintf(stderr, "\t-p num -- use port num\n");
fprintf(stderr,
"\t-P [saveWithPeek|saveAfterPeek] -- save data with/after MSG_PEEK form tcp socket\n");
fprintf(stderr, "\t-t num -- set poll timeout to num\n");
fprintf(stderr, "\t-T num -- set expected runtime to num ms\n");
fprintf(stderr, "\t-r num -- enable slow mode, limiting each write to num bytes "
"-- for remove addr tests\n");
fprintf(stderr, "\t-R num -- set SO_RCVBUF to num\n");
fprintf(stderr, "\t-s [MPTCP|TCP] -- use mptcp(default) or tcp sockets\n");
fprintf(stderr, "\t-S num -- set SO_SNDBUF to num\n");
fprintf(stderr, "\t-w num -- wait num sec before closing the socket\n");
exit(1);
}
......@@ -310,7 +323,8 @@ static int sock_listen_mptcp(const char * const listenaddr,
}
static int sock_connect_mptcp(const char * const remoteaddr,
const char * const port, int proto)
const char * const port, int proto,
struct addrinfo **peer)
{
struct addrinfo hints = {
.ai_protocol = IPPROTO_TCP,
......@@ -334,8 +348,10 @@ static int sock_connect_mptcp(const char * const remoteaddr,
if (cfg_mark)
set_mark(sock, cfg_mark);
if (connect(sock, a->ai_addr, a->ai_addrlen) == 0)
if (connect(sock, a->ai_addr, a->ai_addrlen) == 0) {
*peer = a;
break; /* success */
}
perror("connect()");
close(sock);
......@@ -524,14 +540,17 @@ static ssize_t do_rnd_read(const int fd, char *buf, const size_t len)
return ret;
}
static void set_nonblock(int fd)
static void set_nonblock(int fd, bool nonblock)
{
int flags = fcntl(fd, F_GETFL);
if (flags == -1)
return;
fcntl(fd, F_SETFL, flags | O_NONBLOCK);
if (nonblock)
fcntl(fd, F_SETFL, flags | O_NONBLOCK);
else
fcntl(fd, F_SETFL, flags & ~O_NONBLOCK);
}
static int copyfd_io_poll(int infd, int peerfd, int outfd, bool *in_closed_after_out)
......@@ -543,7 +562,7 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd, bool *in_closed_after
unsigned int woff = 0, wlen = 0;
char wbuf[8192];
set_nonblock(peerfd);
set_nonblock(peerfd, true);
for (;;) {
char rbuf[8192];
......@@ -638,7 +657,6 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd, bool *in_closed_after
if (cfg_remove)
usleep(cfg_wait);
close(peerfd);
return 0;
}
......@@ -780,7 +798,7 @@ static int copyfd_io_sendfile(int infd, int peerfd, int outfd,
return err;
}
static int copyfd_io(int infd, int peerfd, int outfd)
static int copyfd_io(int infd, int peerfd, int outfd, bool close_peerfd)
{
bool in_closed_after_out = false;
struct timespec start, end;
......@@ -819,6 +837,9 @@ static int copyfd_io(int infd, int peerfd, int outfd)
if (ret)
return ret;
if (close_peerfd)
close(peerfd);
if (cfg_time) {
unsigned int delta_ms;
......@@ -930,7 +951,7 @@ static void maybe_close(int fd)
{
unsigned int r = rand();
if (!(cfg_join || cfg_remove) && (r & 1))
if (!(cfg_join || cfg_remove || cfg_repeat > 1) && (r & 1))
close(fd);
}
......@@ -940,7 +961,9 @@ int main_loop_s(int listensock)
struct pollfd polls;
socklen_t salen;
int remotesock;
int fd = 0;
again:
polls.fd = listensock;
polls.events = POLLIN;
......@@ -961,14 +984,27 @@ int main_loop_s(int listensock)
check_sockaddr(pf, &ss, salen);
check_getpeername(remotesock, &ss, salen);
if (cfg_input) {
fd = open(cfg_input, O_RDONLY);
if (fd < 0)
xerror("can't open %s: %d", cfg_input, errno);
}
SOCK_TEST_TCPULP(remotesock, 0);
return copyfd_io(0, remotesock, 1);
copyfd_io(fd, remotesock, 1, true);
} else {
perror("accept");
return 1;
}
perror("accept");
if (--cfg_repeat > 0) {
if (cfg_input)
close(fd);
goto again;
}
return 1;
return 0;
}
static void init_rng(void)
......@@ -1057,15 +1093,47 @@ static void parse_setsock_options(const char *name)
exit(1);
}
void xdisconnect(int fd, int addrlen)
{
struct sockaddr_storage empty;
int msec_sleep = 10;
int queued = 1;
int i;
shutdown(fd, SHUT_WR);
/* while until the pending data is completely flushed, the later
* disconnect will bypass/ignore/drop any pending data.
*/
for (i = 0; ; i += msec_sleep) {
if (ioctl(fd, SIOCOUTQ, &queued) < 0)
xerror("can't query out socket queue: %d", errno);
if (!queued)
break;
if (i > poll_timeout)
xerror("timeout while waiting for spool to complete");
usleep(msec_sleep * 1000);
}
memset(&empty, 0, sizeof(empty));
empty.ss_family = AF_UNSPEC;
if (connect(fd, (struct sockaddr *)&empty, addrlen) < 0)
xerror("can't disconnect: %d", errno);
}
int main_loop(void)
{
int fd;
int fd, ret, fd_in = 0;
struct addrinfo *peer;
/* listener is ready. */
fd = sock_connect_mptcp(cfg_host, cfg_port, cfg_sock_proto);
fd = sock_connect_mptcp(cfg_host, cfg_port, cfg_sock_proto, &peer);
if (fd < 0)
return 2;
again:
check_getpeername_connect(fd);
SOCK_TEST_TCPULP(fd, cfg_sock_proto);
......@@ -1077,7 +1145,31 @@ int main_loop(void)
if (cfg_cmsg_types.cmsg_enabled)
apply_cmsg_types(fd, &cfg_cmsg_types);
return copyfd_io(0, fd, 1);
if (cfg_input) {
fd_in = open(cfg_input, O_RDONLY);
if (fd < 0)
xerror("can't open %s:%d", cfg_input, errno);
}
/* close the client socket open only if we are not going to reconnect */
ret = copyfd_io(fd_in, fd, 1, cfg_repeat == 1);
if (ret)
return ret;
if (--cfg_repeat > 0) {
xdisconnect(fd, peer->ai_addrlen);
/* the socket could be unblocking at this point, we need the
* connect to be blocking
*/
set_nonblock(fd, false);
if (connect(fd, peer->ai_addr, peer->ai_addrlen))
xerror("can't reconnect: %d", errno);
if (cfg_input)
close(fd_in);
goto again;
}
return 0;
}
int parse_proto(const char *proto)
......@@ -1162,7 +1254,7 @@ static void parse_opts(int argc, char **argv)
{
int c;
while ((c = getopt(argc, argv, "6jr:lp:s:ht:T:m:S:R:w:M:P:c:o:")) != -1) {
while ((c = getopt(argc, argv, "6c:hi:I:jlm:M:o:p:P:r:R:s:S:t:T:w:")) != -1) {
switch (c) {
case 'j':
cfg_join = true;
......@@ -1176,6 +1268,12 @@ static void parse_opts(int argc, char **argv)
if (cfg_do_w <= 0)
cfg_do_w = 50;
break;
case 'i':
cfg_input = optarg;
break;
case 'I':
cfg_repeat = atoi(optarg);
break;
case 'l':
listen_mode = true;
break;
......
......@@ -7,6 +7,7 @@ optstring="S:R:d:e:l:r:h4cm:f:tC"
ret=0
sin=""
sout=""
cin_disconnect=""
cin=""
cout=""
ksft_skip=4
......@@ -24,6 +25,7 @@ options_log=true
do_tcp=0
checksum=false
filesize=0
connect_per_transfer=1
if [ $tc_loss -eq 100 ];then
tc_loss=1%
......@@ -127,6 +129,7 @@ TEST_COUNT=0
cleanup()
{
rm -f "$cin_disconnect" "$cout_disconnect"
rm -f "$cin" "$cout"
rm -f "$sin" "$sout"
rm -f "$capout"
......@@ -149,6 +152,8 @@ sout=$(mktemp)
cin=$(mktemp)
cout=$(mktemp)
capout=$(mktemp)
cin_disconnect="$cin".disconnect
cout_disconnect="$cout".disconnect
trap cleanup EXIT
for i in "$ns1" "$ns2" "$ns3" "$ns4";do
......@@ -500,8 +505,8 @@ do_transfer()
cookies=${cookies##*=}
if [ ${cl_proto} = "MPTCP" ] && [ ${srv_proto} = "MPTCP" ]; then
expect_synrx=$((stat_synrx_last_l+1))
expect_ackrx=$((stat_ackrx_last_l+1))
expect_synrx=$((stat_synrx_last_l+$connect_per_transfer))
expect_ackrx=$((stat_ackrx_last_l+$connect_per_transfer))
fi
if [ ${stat_synrx_now_l} -lt ${expect_synrx} ]; then
......@@ -738,6 +743,33 @@ run_tests_peekmode()
run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1 "-P ${peekmode}"
}
run_tests_disconnect()
{
local peekmode="$1"
local old_cin=$cin
local old_sin=$sin
cat $cin $cin $cin > "$cin".disconnect
# force do_transfer to cope with the multiple tranmissions
sin="$cin.disconnect"
sin_disconnect=$old_sin
cin="$cin.disconnect"
cin_disconnect="$old_cin"
connect_per_transfer=3
echo "INFO: disconnect"
run_tests_lo "$ns1" "$ns1" 10.0.1.1 1 "-I 3 -i $old_cin"
run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1 "-I 3 -i $old_cin"
# restore previous status
cout=$old_cout
cout_disconnect="$cout".disconnect
cin=$old_cin
cin_disconnect="$cin".disconnect
connect_per_transfer=1
}
display_time()
{
time_end=$(date +%s)
......@@ -853,6 +885,9 @@ stop_if_error "Tests with peek mode have failed"
# connect to ns4 ip address, ns2 should intercept/proxy
run_test_transparent 10.0.3.1 "tproxy ipv4"
run_test_transparent dead:beef:3::1 "tproxy ipv6"
stop_if_error "Tests with tproxy have failed"
run_tests_disconnect
display_time
exit $ret
......@@ -937,6 +937,22 @@ chk_link_usage()
fi
}
wait_for_tw()
{
local timeout_ms=$((timeout_poll * 1000))
local time=0
local ns=$1
while [ $time -lt $timeout_ms ]; do
local cnt=$(ip netns exec $ns ss -t state time-wait |wc -l)
[ "$cnt" = 1 ] && return 1
time=$((time + 100))
sleep 0.1
done
return 1
}
subflows_tests()
{
reset
......@@ -994,6 +1010,61 @@ subflows_tests()
chk_join_nr "single subflow, dev" 1 1 1
}
subflows_error_tests()
{
# If a single subflow is configured, and matches the MPC src
# address, no additional subflow should be created
reset
ip netns exec $ns1 ./pm_nl_ctl limits 0 1
ip netns exec $ns2 ./pm_nl_ctl limits 0 1
ip netns exec $ns2 ./pm_nl_ctl add 10.0.1.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow
chk_join_nr "no MPC reuse with single endpoint" 0 0 0
# multiple subflows, with subflow creation error
reset
ip netns exec $ns1 ./pm_nl_ctl limits 0 2
ip netns exec $ns2 ./pm_nl_ctl limits 0 2
ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
ip netns exec $ns1 iptables -A INPUT -s 10.0.3.2 -p tcp -j REJECT
run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow
chk_join_nr "multi subflows, with failing subflow" 1 1 1
# multiple subflows, with subflow timeout on MPJ
reset
ip netns exec $ns1 ./pm_nl_ctl limits 0 2
ip netns exec $ns2 ./pm_nl_ctl limits 0 2
ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
ip netns exec $ns1 iptables -A INPUT -s 10.0.3.2 -p tcp -j DROP
run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow
chk_join_nr "multi subflows, with subflow timeout" 1 1 1
# multiple subflows, check that the endpoint corresponding to
# closed subflow (due to reset) is not reused if additional
# subflows are added later
reset
ip netns exec $ns1 ./pm_nl_ctl limits 0 1
ip netns exec $ns2 ./pm_nl_ctl limits 0 1
ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
ip netns exec $ns1 iptables -A INPUT -s 10.0.3.2 -p tcp -j REJECT
run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow &
# updates in the child shell do not have any effect here, we
# need to bump the test counter for the above case
TEST_COUNT=$((TEST_COUNT+1))
# mpj subflow will be in TW after the reset
wait_for_tw $ns2
ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
wait
# additional subflow could be created only if the PM select
# the later endpoint, skipping the already used one
chk_join_nr "multi subflows, fair usage on close" 1 1 1
}
signal_address_tests()
{
# add_address, unused
......@@ -1071,7 +1142,10 @@ signal_address_tests()
ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags signal
ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags signal
run_tests $ns1 $ns2 10.0.1.1
chk_add_nr 4 4
# the server will not signal the address terminating
# the MPC subflow
chk_add_nr 3 3
}
link_failure_tests()
......@@ -1802,6 +1876,7 @@ fullmesh_tests()
all_tests()
{
subflows_tests
subflows_error_tests
signal_address_tests
link_failure_tests
add_addr_timeout_tests
......@@ -1821,6 +1896,7 @@ usage()
{
echo "mptcp_join usage:"
echo " -f subflows_tests"
echo " -e subflows_error_tests"
echo " -s signal_address_tests"
echo " -l link_failure_tests"
echo " -t add_addr_timeout_tests"
......@@ -1869,11 +1945,14 @@ if [ $do_all_tests -eq 1 ]; then
exit $ret
fi
while getopts 'fsltra64bpkdmchCS' opt; do
while getopts 'fesltra64bpkdmchCS' opt; do
case $opt in
f)
subflows_tests
;;
e)
subflows_error_tests
;;
s)
signal_address_tests
;;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment