Commit 7f0c940b authored by Jakub Kicinski's avatar Jakub Kicinski

Merge branch 'mptcp-msg_fastopen-and-tfo-listener-side-support'

Matthieu Baerts says:

====================
mptcp: MSG_FASTOPEN and TFO listener side support

Before this series, only the initiator of a connection was able to combine
both TCP FastOpen and MPTCP when using TCP_FASTOPEN_CONNECT socket option.

These new patches here add (in theory) the full support of TFO with MPTCP,
which means:

 - MSG_FASTOPEN sendmsg flag support (patch 1/8)
 - TFO support for the listener side (patches 2-5/8)
 - TCP_FASTOPEN socket option (patch 6/8)
 - TCP_FASTOPEN_KEY socket option (patch 7/8)

To support TFO for the server side, a few preparation patches are needed
(patches 2 to 5/8). Some of them were inspired by a previous work from
Benjamin Hesmans.

Note that TFO support with MPTCP has been validated with selftests
(patch 8/8) but also with Packetdrill tests running with a modified
but still very WIP version supporting MPTCP. Both the modified tool
and the tests are available online:

  https://github.com/multipath-tcp/packetdrill/
====================

Link: https://lore.kernel.org/r/20221125222958.958636-1-matthieu.baerts@tessares.netSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents f2bb566f ca7ae891
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
obj-$(CONFIG_MPTCP) += mptcp.o obj-$(CONFIG_MPTCP) += mptcp.o
mptcp-y := protocol.o subflow.o options.o token.o crypto.o ctrl.o pm.o diag.o \ mptcp-y := protocol.o subflow.o options.o token.o crypto.o ctrl.o pm.o diag.o \
mib.o pm_netlink.o sockopt.o pm_userspace.o mib.o pm_netlink.o sockopt.o pm_userspace.o fastopen.o
obj-$(CONFIG_SYN_COOKIES) += syncookies.o obj-$(CONFIG_SYN_COOKIES) += syncookies.o
obj-$(CONFIG_INET_MPTCP_DIAG) += mptcp_diag.o obj-$(CONFIG_INET_MPTCP_DIAG) += mptcp_diag.o
......
// SPDX-License-Identifier: GPL-2.0
/* MPTCP Fast Open Mechanism
*
* Copyright (c) 2021-2022, Dmytro SHYTYI
*/
#include "protocol.h"
void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subflow,
struct request_sock *req)
{
struct sock *ssk = subflow->tcp_sock;
struct sock *sk = subflow->conn;
struct sk_buff *skb;
struct tcp_sock *tp;
tp = tcp_sk(ssk);
subflow->is_mptfo = 1;
skb = skb_peek(&ssk->sk_receive_queue);
if (WARN_ON_ONCE(!skb))
return;
/* dequeue the skb from sk receive queue */
__skb_unlink(skb, &ssk->sk_receive_queue);
skb_ext_reset(skb);
skb_orphan(skb);
/* We copy the fastopen data, but that don't belong to the mptcp sequence
* space, need to offset it in the subflow sequence, see mptcp_subflow_get_map_offset()
*/
tp->copied_seq += skb->len;
subflow->ssn_offset += skb->len;
/* initialize a dummy sequence number, we will update it at MPC
* completion, if needed
*/
MPTCP_SKB_CB(skb)->map_seq = -skb->len;
MPTCP_SKB_CB(skb)->end_seq = 0;
MPTCP_SKB_CB(skb)->offset = 0;
MPTCP_SKB_CB(skb)->has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp;
mptcp_data_lock(sk);
mptcp_set_owner_r(skb, sk);
__skb_queue_tail(&sk->sk_receive_queue, skb);
sk->sk_data_ready(sk);
mptcp_data_unlock(sk);
}
void mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow,
const struct mptcp_options_received *mp_opt)
{
struct sock *sk = (struct sock *)msk;
struct sk_buff *skb;
mptcp_data_lock(sk);
skb = skb_peek_tail(&sk->sk_receive_queue);
if (skb) {
WARN_ON_ONCE(MPTCP_SKB_CB(skb)->end_seq);
pr_debug("msk %p moving seq %llx -> %llx end_seq %llx -> %llx", sk,
MPTCP_SKB_CB(skb)->map_seq, MPTCP_SKB_CB(skb)->map_seq + msk->ack_seq,
MPTCP_SKB_CB(skb)->end_seq, MPTCP_SKB_CB(skb)->end_seq + msk->ack_seq);
MPTCP_SKB_CB(skb)->map_seq += msk->ack_seq;
MPTCP_SKB_CB(skb)->end_seq += msk->ack_seq;
}
pr_debug("msk=%p ack_seq=%llx", msk, msk->ack_seq);
mptcp_data_unlock(sk);
}
...@@ -26,6 +26,7 @@ static void mptcp_parse_option(const struct sk_buff *skb, ...@@ -26,6 +26,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
{ {
u8 subtype = *ptr >> 4; u8 subtype = *ptr >> 4;
int expected_opsize; int expected_opsize;
u16 subopt;
u8 version; u8 version;
u8 flags; u8 flags;
u8 i; u8 i;
...@@ -38,11 +39,15 @@ static void mptcp_parse_option(const struct sk_buff *skb, ...@@ -38,11 +39,15 @@ static void mptcp_parse_option(const struct sk_buff *skb,
expected_opsize = TCPOLEN_MPTCP_MPC_ACK_DATA; expected_opsize = TCPOLEN_MPTCP_MPC_ACK_DATA;
else else
expected_opsize = TCPOLEN_MPTCP_MPC_ACK; expected_opsize = TCPOLEN_MPTCP_MPC_ACK;
subopt = OPTION_MPTCP_MPC_ACK;
} else { } else {
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK) if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK) {
expected_opsize = TCPOLEN_MPTCP_MPC_SYNACK; expected_opsize = TCPOLEN_MPTCP_MPC_SYNACK;
else subopt = OPTION_MPTCP_MPC_SYNACK;
} else {
expected_opsize = TCPOLEN_MPTCP_MPC_SYN; expected_opsize = TCPOLEN_MPTCP_MPC_SYN;
subopt = OPTION_MPTCP_MPC_SYN;
}
} }
/* Cfr RFC 8684 Section 3.3.0: /* Cfr RFC 8684 Section 3.3.0:
...@@ -85,7 +90,7 @@ static void mptcp_parse_option(const struct sk_buff *skb, ...@@ -85,7 +90,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
mp_opt->deny_join_id0 = !!(flags & MPTCP_CAP_DENY_JOIN_ID0); mp_opt->deny_join_id0 = !!(flags & MPTCP_CAP_DENY_JOIN_ID0);
mp_opt->suboptions |= OPTIONS_MPTCP_MPC; mp_opt->suboptions |= subopt;
if (opsize >= TCPOLEN_MPTCP_MPC_SYNACK) { if (opsize >= TCPOLEN_MPTCP_MPC_SYNACK) {
mp_opt->sndr_key = get_unaligned_be64(ptr); mp_opt->sndr_key = get_unaligned_be64(ptr);
ptr += 8; ptr += 8;
...@@ -934,7 +939,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk, ...@@ -934,7 +939,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
subflow->mp_join && (mp_opt->suboptions & OPTIONS_MPTCP_MPJ) && subflow->mp_join && (mp_opt->suboptions & OPTIONS_MPTCP_MPJ) &&
!subflow->request_join) !subflow->request_join)
tcp_send_ack(ssk); tcp_send_ack(ssk);
goto fully_established; goto check_notify;
} }
/* we must process OoO packets before the first subflow is fully /* we must process OoO packets before the first subflow is fully
...@@ -945,17 +950,20 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk, ...@@ -945,17 +950,20 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
if (TCP_SKB_CB(skb)->seq != subflow->ssn_offset + 1) { if (TCP_SKB_CB(skb)->seq != subflow->ssn_offset + 1) {
if (subflow->mp_join) if (subflow->mp_join)
goto reset; goto reset;
if (subflow->is_mptfo && mp_opt->suboptions & OPTION_MPTCP_MPC_ACK)
goto set_fully_established;
return subflow->mp_capable; return subflow->mp_capable;
} }
if (((mp_opt->suboptions & OPTION_MPTCP_DSS) && mp_opt->use_ack) || if (subflow->remote_key_valid &&
((mp_opt->suboptions & OPTION_MPTCP_ADD_ADDR) && !mp_opt->echo)) { (((mp_opt->suboptions & OPTION_MPTCP_DSS) && mp_opt->use_ack) ||
((mp_opt->suboptions & OPTION_MPTCP_ADD_ADDR) && !mp_opt->echo))) {
/* subflows are fully established as soon as we get any /* subflows are fully established as soon as we get any
* additional ack, including ADD_ADDR. * additional ack, including ADD_ADDR.
*/ */
subflow->fully_established = 1; subflow->fully_established = 1;
WRITE_ONCE(msk->fully_established, true); WRITE_ONCE(msk->fully_established, true);
goto fully_established; goto check_notify;
} }
/* If the first established packet does not contain MP_CAPABLE + data /* If the first established packet does not contain MP_CAPABLE + data
...@@ -974,11 +982,12 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk, ...@@ -974,11 +982,12 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
if (mp_opt->deny_join_id0) if (mp_opt->deny_join_id0)
WRITE_ONCE(msk->pm.remote_deny_join_id0, true); WRITE_ONCE(msk->pm.remote_deny_join_id0, true);
set_fully_established:
if (unlikely(!READ_ONCE(msk->pm.server_side))) if (unlikely(!READ_ONCE(msk->pm.server_side)))
pr_warn_once("bogus mpc option on established client sk"); pr_warn_once("bogus mpc option on established client sk");
mptcp_subflow_fully_established(subflow, mp_opt); mptcp_subflow_fully_established(subflow, mp_opt);
fully_established: check_notify:
/* if the subflow is not already linked into the conn_list, we can't /* if the subflow is not already linked into the conn_list, we can't
* notify the PM: this subflow is still on the listener queue * notify the PM: this subflow is still on the listener queue
* and the PM possibly acquiring the subflow lock could race with * and the PM possibly acquiring the subflow lock could race with
......
...@@ -36,15 +36,6 @@ struct mptcp6_sock { ...@@ -36,15 +36,6 @@ struct mptcp6_sock {
}; };
#endif #endif
struct mptcp_skb_cb {
u64 map_seq;
u64 end_seq;
u32 offset;
u8 has_rxtstamp:1;
};
#define MPTCP_SKB_CB(__skb) ((struct mptcp_skb_cb *)&((__skb)->cb[0]))
enum { enum {
MPTCP_CMSG_TS = BIT(0), MPTCP_CMSG_TS = BIT(0),
MPTCP_CMSG_INQ = BIT(1), MPTCP_CMSG_INQ = BIT(1),
...@@ -200,7 +191,7 @@ static void mptcp_rfree(struct sk_buff *skb) ...@@ -200,7 +191,7 @@ static void mptcp_rfree(struct sk_buff *skb)
mptcp_rmem_uncharge(sk, len); mptcp_rmem_uncharge(sk, len);
} }
static void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk) void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk)
{ {
skb_orphan(skb); skb_orphan(skb);
skb->sk = sk; skb->sk = sk;
...@@ -1711,17 +1702,14 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ...@@ -1711,17 +1702,14 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
int ret = 0; int ret = 0;
long timeo; long timeo;
/* we don't support FASTOPEN yet */
if (msg->msg_flags & MSG_FASTOPEN)
return -EOPNOTSUPP;
/* silently ignore everything else */ /* silently ignore everything else */
msg->msg_flags &= MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL; msg->msg_flags &= MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | MSG_FASTOPEN;
lock_sock(sk); lock_sock(sk);
ssock = __mptcp_nmpc_socket(msk); ssock = __mptcp_nmpc_socket(msk);
if (unlikely(ssock && inet_sk(ssock->sk)->defer_connect)) { if (unlikely(ssock && (inet_sk(ssock->sk)->defer_connect ||
msg->msg_flags & MSG_FASTOPEN))) {
int copied_syn = 0; int copied_syn = 0;
ret = mptcp_sendmsg_fastopen(sk, ssock->sk, msg, len, &copied_syn); ret = mptcp_sendmsg_fastopen(sk, ssock->sk, msg, len, &copied_syn);
...@@ -3048,7 +3036,6 @@ struct sock *mptcp_sk_clone(const struct sock *sk, ...@@ -3048,7 +3036,6 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
struct sock *nsk = sk_clone_lock(sk, GFP_ATOMIC); struct sock *nsk = sk_clone_lock(sk, GFP_ATOMIC);
struct mptcp_sock *msk; struct mptcp_sock *msk;
u64 ack_seq;
if (!nsk) if (!nsk)
return NULL; return NULL;
...@@ -3074,15 +3061,6 @@ struct sock *mptcp_sk_clone(const struct sock *sk, ...@@ -3074,15 +3061,6 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd; msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd;
msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq; msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq;
if (mp_opt->suboptions & OPTIONS_MPTCP_MPC) {
msk->can_ack = true;
msk->remote_key = mp_opt->sndr_key;
mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq);
ack_seq++;
WRITE_ONCE(msk->ack_seq, ack_seq);
atomic64_set(&msk->rcv_wnd_sent, ack_seq);
}
sock_reset_flag(nsk, SOCK_RCU_FREE); sock_reset_flag(nsk, SOCK_RCU_FREE);
/* will be fully established after successful MPC subflow creation */ /* will be fully established after successful MPC subflow creation */
inet_sk_state_store(nsk, TCP_SYN_RECV); inet_sk_state_store(nsk, TCP_SYN_RECV);
...@@ -3355,7 +3333,6 @@ void mptcp_finish_connect(struct sock *ssk) ...@@ -3355,7 +3333,6 @@ void mptcp_finish_connect(struct sock *ssk)
struct mptcp_subflow_context *subflow; struct mptcp_subflow_context *subflow;
struct mptcp_sock *msk; struct mptcp_sock *msk;
struct sock *sk; struct sock *sk;
u64 ack_seq;
subflow = mptcp_subflow_ctx(ssk); subflow = mptcp_subflow_ctx(ssk);
sk = subflow->conn; sk = subflow->conn;
...@@ -3363,22 +3340,16 @@ void mptcp_finish_connect(struct sock *ssk) ...@@ -3363,22 +3340,16 @@ void mptcp_finish_connect(struct sock *ssk)
pr_debug("msk=%p, token=%u", sk, subflow->token); pr_debug("msk=%p, token=%u", sk, subflow->token);
mptcp_crypto_key_sha(subflow->remote_key, NULL, &ack_seq); subflow->map_seq = subflow->iasn;
ack_seq++;
subflow->map_seq = ack_seq;
subflow->map_subflow_seq = 1; subflow->map_subflow_seq = 1;
/* the socket is not connected yet, no msk/subflow ops can access/race /* the socket is not connected yet, no msk/subflow ops can access/race
* accessing the field below * accessing the field below
*/ */
WRITE_ONCE(msk->remote_key, subflow->remote_key);
WRITE_ONCE(msk->local_key, subflow->local_key); WRITE_ONCE(msk->local_key, subflow->local_key);
WRITE_ONCE(msk->write_seq, subflow->idsn + 1); WRITE_ONCE(msk->write_seq, subflow->idsn + 1);
WRITE_ONCE(msk->snd_nxt, msk->write_seq); WRITE_ONCE(msk->snd_nxt, msk->write_seq);
WRITE_ONCE(msk->ack_seq, ack_seq);
WRITE_ONCE(msk->can_ack, 1);
WRITE_ONCE(msk->snd_una, msk->write_seq); WRITE_ONCE(msk->snd_una, msk->write_seq);
atomic64_set(&msk->rcv_wnd_sent, ack_seq);
mptcp_pm_new_connection(msk, ssk, 0); mptcp_pm_new_connection(msk, ssk, 0);
......
...@@ -126,6 +126,15 @@ ...@@ -126,6 +126,15 @@
#define MPTCP_CONNECTED 6 #define MPTCP_CONNECTED 6
#define MPTCP_RESET_SCHEDULER 7 #define MPTCP_RESET_SCHEDULER 7
struct mptcp_skb_cb {
u64 map_seq;
u64 end_seq;
u32 offset;
u8 has_rxtstamp:1;
};
#define MPTCP_SKB_CB(__skb) ((struct mptcp_skb_cb *)&((__skb)->cb[0]))
static inline bool before64(__u64 seq1, __u64 seq2) static inline bool before64(__u64 seq1, __u64 seq2)
{ {
return (__s64)(seq1 - seq2) < 0; return (__s64)(seq1 - seq2) < 0;
...@@ -467,17 +476,22 @@ struct mptcp_subflow_context { ...@@ -467,17 +476,22 @@ struct mptcp_subflow_context {
send_fastclose : 1, send_fastclose : 1,
send_infinite_map : 1, send_infinite_map : 1,
rx_eof : 1, rx_eof : 1,
can_ack : 1, /* only after processing the remote a key */ remote_key_valid : 1, /* received the peer key from */
disposable : 1, /* ctx can be free at ulp release time */ disposable : 1, /* ctx can be free at ulp release time */
stale : 1, /* unable to snd/rcv data, do not use for xmit */ stale : 1, /* unable to snd/rcv data, do not use for xmit */
local_id_valid : 1, /* local_id is correctly initialized */ local_id_valid : 1, /* local_id is correctly initialized */
valid_csum_seen : 1; /* at least one csum validated */ valid_csum_seen : 1, /* at least one csum validated */
is_mptfo : 1, /* subflow is doing TFO */
__unused : 8;
enum mptcp_data_avail data_avail; enum mptcp_data_avail data_avail;
u32 remote_nonce; u32 remote_nonce;
u64 thmac; u64 thmac;
u32 local_nonce; u32 local_nonce;
u32 remote_token; u32 remote_token;
u8 hmac[MPTCPOPT_HMAC_LEN]; union {
u8 hmac[MPTCPOPT_HMAC_LEN]; /* MPJ subflow only */
u64 iasn; /* initial ack sequence number, MPC subflows only */
};
u8 local_id; u8 local_id;
u8 remote_id; u8 remote_id;
u8 reset_seen:1; u8 reset_seen:1;
...@@ -603,7 +617,7 @@ unsigned int mptcp_stale_loss_cnt(const struct net *net); ...@@ -603,7 +617,7 @@ unsigned int mptcp_stale_loss_cnt(const struct net *net);
int mptcp_get_pm_type(const struct net *net); int mptcp_get_pm_type(const struct net *net);
void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk); void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk);
void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow, void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
struct mptcp_options_received *mp_opt); const struct mptcp_options_received *mp_opt);
bool __mptcp_retransmit_pending_data(struct sock *sk); bool __mptcp_retransmit_pending_data(struct sock *sk);
void mptcp_check_and_set_pending(struct sock *sk); void mptcp_check_and_set_pending(struct sock *sk);
void __mptcp_push_pending(struct sock *sk, unsigned int flags); void __mptcp_push_pending(struct sock *sk, unsigned int flags);
...@@ -619,6 +633,7 @@ void mptcp_sock_graft(struct sock *sk, struct socket *parent); ...@@ -619,6 +633,7 @@ void mptcp_sock_graft(struct sock *sk, struct socket *parent);
struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk); struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk);
bool __mptcp_close(struct sock *sk, long timeout); bool __mptcp_close(struct sock *sk, long timeout);
void mptcp_cancel_work(struct sock *sk); void mptcp_cancel_work(struct sock *sk);
void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk);
bool mptcp_addresses_equal(const struct mptcp_addr_info *a, bool mptcp_addresses_equal(const struct mptcp_addr_info *a,
const struct mptcp_addr_info *b, bool use_port); const struct mptcp_addr_info *b, bool use_port);
...@@ -826,6 +841,11 @@ void mptcp_event_addr_announced(const struct sock *ssk, const struct mptcp_addr_ ...@@ -826,6 +841,11 @@ void mptcp_event_addr_announced(const struct sock *ssk, const struct mptcp_addr_
void mptcp_event_addr_removed(const struct mptcp_sock *msk, u8 id); void mptcp_event_addr_removed(const struct mptcp_sock *msk, u8 id);
bool mptcp_userspace_pm_active(const struct mptcp_sock *msk); bool mptcp_userspace_pm_active(const struct mptcp_sock *msk);
void mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow,
const struct mptcp_options_received *mp_opt);
void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subflow,
struct request_sock *req);
static inline bool mptcp_pm_should_add_signal(struct mptcp_sock *msk) static inline bool mptcp_pm_should_add_signal(struct mptcp_sock *msk)
{ {
return READ_ONCE(msk->pm.addr_signal) & return READ_ONCE(msk->pm.addr_signal) &
......
...@@ -559,7 +559,9 @@ static bool mptcp_supported_sockopt(int level, int optname) ...@@ -559,7 +559,9 @@ static bool mptcp_supported_sockopt(int level, int optname)
case TCP_NOTSENT_LOWAT: case TCP_NOTSENT_LOWAT:
case TCP_TX_DELAY: case TCP_TX_DELAY:
case TCP_INQ: case TCP_INQ:
case TCP_FASTOPEN:
case TCP_FASTOPEN_CONNECT: case TCP_FASTOPEN_CONNECT:
case TCP_FASTOPEN_KEY:
case TCP_FASTOPEN_NO_COOKIE: case TCP_FASTOPEN_NO_COOKIE:
return true; return true;
} }
...@@ -569,9 +571,6 @@ static bool mptcp_supported_sockopt(int level, int optname) ...@@ -569,9 +571,6 @@ static bool mptcp_supported_sockopt(int level, int optname)
/* TCP_REPAIR, TCP_REPAIR_QUEUE, TCP_QUEUE_SEQ, TCP_REPAIR_OPTIONS, /* TCP_REPAIR, TCP_REPAIR_QUEUE, TCP_QUEUE_SEQ, TCP_REPAIR_OPTIONS,
* TCP_REPAIR_WINDOW are not supported, better avoid this mess * TCP_REPAIR_WINDOW are not supported, better avoid this mess
*/ */
/* TCP_FASTOPEN_KEY, TCP_FASTOPEN are not supported because
* fastopen for the listener side is currently unsupported
*/
} }
return false; return false;
} }
...@@ -801,7 +800,9 @@ static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, ...@@ -801,7 +800,9 @@ static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
/* See tcp.c: TCP_DEFER_ACCEPT does not fail */ /* See tcp.c: TCP_DEFER_ACCEPT does not fail */
mptcp_setsockopt_first_sf_only(msk, SOL_TCP, optname, optval, optlen); mptcp_setsockopt_first_sf_only(msk, SOL_TCP, optname, optval, optlen);
return 0; return 0;
case TCP_FASTOPEN:
case TCP_FASTOPEN_CONNECT: case TCP_FASTOPEN_CONNECT:
case TCP_FASTOPEN_KEY:
case TCP_FASTOPEN_NO_COOKIE: case TCP_FASTOPEN_NO_COOKIE:
return mptcp_setsockopt_first_sf_only(msk, SOL_TCP, optname, return mptcp_setsockopt_first_sf_only(msk, SOL_TCP, optname,
optval, optlen); optval, optlen);
...@@ -1166,7 +1167,9 @@ static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname, ...@@ -1166,7 +1167,9 @@ static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
case TCP_INFO: case TCP_INFO:
case TCP_CC_INFO: case TCP_CC_INFO:
case TCP_DEFER_ACCEPT: case TCP_DEFER_ACCEPT:
case TCP_FASTOPEN:
case TCP_FASTOPEN_CONNECT: case TCP_FASTOPEN_CONNECT:
case TCP_FASTOPEN_KEY:
case TCP_FASTOPEN_NO_COOKIE: case TCP_FASTOPEN_NO_COOKIE:
return mptcp_getsockopt_first_sf_only(msk, SOL_TCP, optname, return mptcp_getsockopt_first_sf_only(msk, SOL_TCP, optname,
optval, optlen); optval, optlen);
......
...@@ -307,7 +307,48 @@ static struct dst_entry *subflow_v4_route_req(const struct sock *sk, ...@@ -307,7 +307,48 @@ static struct dst_entry *subflow_v4_route_req(const struct sock *sk,
return NULL; return NULL;
} }
static void subflow_prep_synack(const struct sock *sk, struct request_sock *req,
struct tcp_fastopen_cookie *foc,
enum tcp_synack_type synack_type)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct inet_request_sock *ireq = inet_rsk(req);
/* clear tstamp_ok, as needed depending on cookie */
if (foc && foc->len > -1)
ireq->tstamp_ok = 0;
if (synack_type == TCP_SYNACK_FASTOPEN)
mptcp_fastopen_subflow_synack_set_params(subflow, req);
}
static int subflow_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
struct flowi *fl,
struct request_sock *req,
struct tcp_fastopen_cookie *foc,
enum tcp_synack_type synack_type,
struct sk_buff *syn_skb)
{
subflow_prep_synack(sk, req, foc, synack_type);
return tcp_request_sock_ipv4_ops.send_synack(sk, dst, fl, req, foc,
synack_type, syn_skb);
}
#if IS_ENABLED(CONFIG_MPTCP_IPV6) #if IS_ENABLED(CONFIG_MPTCP_IPV6)
static int subflow_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
struct flowi *fl,
struct request_sock *req,
struct tcp_fastopen_cookie *foc,
enum tcp_synack_type synack_type,
struct sk_buff *syn_skb)
{
subflow_prep_synack(sk, req, foc, synack_type);
return tcp_request_sock_ipv6_ops.send_synack(sk, dst, fl, req, foc,
synack_type, syn_skb);
}
static struct dst_entry *subflow_v6_route_req(const struct sock *sk, static struct dst_entry *subflow_v6_route_req(const struct sock *sk,
struct sk_buff *skb, struct sk_buff *skb,
struct flowi *fl, struct flowi *fl,
...@@ -392,11 +433,33 @@ static void mptcp_set_connected(struct sock *sk) ...@@ -392,11 +433,33 @@ static void mptcp_set_connected(struct sock *sk)
mptcp_data_unlock(sk); mptcp_data_unlock(sk);
} }
static void subflow_set_remote_key(struct mptcp_sock *msk,
struct mptcp_subflow_context *subflow,
const struct mptcp_options_received *mp_opt)
{
/* active MPC subflow will reach here multiple times:
* at subflow_finish_connect() time and at 4th ack time
*/
if (subflow->remote_key_valid)
return;
subflow->remote_key_valid = 1;
subflow->remote_key = mp_opt->sndr_key;
mptcp_crypto_key_sha(subflow->remote_key, NULL, &subflow->iasn);
subflow->iasn++;
WRITE_ONCE(msk->remote_key, subflow->remote_key);
WRITE_ONCE(msk->ack_seq, subflow->iasn);
WRITE_ONCE(msk->can_ack, true);
atomic64_set(&msk->rcv_wnd_sent, subflow->iasn);
}
static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
{ {
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct mptcp_options_received mp_opt; struct mptcp_options_received mp_opt;
struct sock *parent = subflow->conn; struct sock *parent = subflow->conn;
struct mptcp_sock *msk;
subflow->icsk_af_ops->sk_rx_dst_set(sk, skb); subflow->icsk_af_ops->sk_rx_dst_set(sk, skb);
...@@ -404,6 +467,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) ...@@ -404,6 +467,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
if (subflow->conn_finished) if (subflow->conn_finished)
return; return;
msk = mptcp_sk(parent);
mptcp_propagate_sndbuf(parent, sk); mptcp_propagate_sndbuf(parent, sk);
subflow->rel_write_seq = 1; subflow->rel_write_seq = 1;
subflow->conn_finished = 1; subflow->conn_finished = 1;
...@@ -416,19 +480,16 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) ...@@ -416,19 +480,16 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
MPTCP_INC_STATS(sock_net(sk), MPTCP_INC_STATS(sock_net(sk),
MPTCP_MIB_MPCAPABLEACTIVEFALLBACK); MPTCP_MIB_MPCAPABLEACTIVEFALLBACK);
mptcp_do_fallback(sk); mptcp_do_fallback(sk);
pr_fallback(mptcp_sk(subflow->conn)); pr_fallback(msk);
goto fallback; goto fallback;
} }
if (mp_opt.suboptions & OPTION_MPTCP_CSUMREQD) if (mp_opt.suboptions & OPTION_MPTCP_CSUMREQD)
WRITE_ONCE(mptcp_sk(parent)->csum_enabled, true); WRITE_ONCE(msk->csum_enabled, true);
if (mp_opt.deny_join_id0) if (mp_opt.deny_join_id0)
WRITE_ONCE(mptcp_sk(parent)->pm.remote_deny_join_id0, true); WRITE_ONCE(msk->pm.remote_deny_join_id0, true);
subflow->mp_capable = 1; subflow->mp_capable = 1;
subflow->can_ack = 1; subflow_set_remote_key(msk, subflow, &mp_opt);
subflow->remote_key = mp_opt.sndr_key;
pr_debug("subflow=%p, remote_key=%llu", subflow,
subflow->remote_key);
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVEACK); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVEACK);
mptcp_finish_connect(sk); mptcp_finish_connect(sk);
mptcp_set_connected(parent); mptcp_set_connected(parent);
...@@ -466,7 +527,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) ...@@ -466,7 +527,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
subflow->mp_join = 1; subflow->mp_join = 1;
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKRX); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKRX);
if (subflow_use_different_dport(mptcp_sk(parent), sk)) { if (subflow_use_different_dport(msk, sk)) {
pr_debug("synack inet_dport=%d %d", pr_debug("synack inet_dport=%d %d",
ntohs(inet_sk(sk)->inet_dport), ntohs(inet_sk(sk)->inet_dport),
ntohs(inet_sk(parent)->inet_dport)); ntohs(inet_sk(parent)->inet_dport));
...@@ -474,7 +535,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) ...@@ -474,7 +535,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
} }
} else if (mptcp_check_fallback(sk)) { } else if (mptcp_check_fallback(sk)) {
fallback: fallback:
mptcp_rcv_space_init(mptcp_sk(parent), sk); mptcp_rcv_space_init(msk, sk);
mptcp_set_connected(parent); mptcp_set_connected(parent);
} }
return; return;
...@@ -637,14 +698,16 @@ static void subflow_drop_ctx(struct sock *ssk) ...@@ -637,14 +698,16 @@ static void subflow_drop_ctx(struct sock *ssk)
} }
void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow, void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
struct mptcp_options_received *mp_opt) const struct mptcp_options_received *mp_opt)
{ {
struct mptcp_sock *msk = mptcp_sk(subflow->conn); struct mptcp_sock *msk = mptcp_sk(subflow->conn);
subflow->remote_key = mp_opt->sndr_key; subflow_set_remote_key(msk, subflow, mp_opt);
subflow->fully_established = 1; subflow->fully_established = 1;
subflow->can_ack = 1;
WRITE_ONCE(msk->fully_established, true); WRITE_ONCE(msk->fully_established, true);
if (subflow->is_mptfo)
mptcp_fastopen_gen_msk_ackseq(msk, subflow, mp_opt);
} }
static struct sock *subflow_syn_recv_sock(const struct sock *sk, static struct sock *subflow_syn_recv_sock(const struct sock *sk,
...@@ -760,7 +823,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, ...@@ -760,7 +823,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
/* with OoO packets we can reach here without ingress /* with OoO packets we can reach here without ingress
* mpc option * mpc option
*/ */
if (mp_opt.suboptions & OPTIONS_MPTCP_MPC) if (mp_opt.suboptions & OPTION_MPTCP_MPC_ACK)
mptcp_subflow_fully_established(ctx, &mp_opt); mptcp_subflow_fully_established(ctx, &mp_opt);
} else if (ctx->mp_join) { } else if (ctx->mp_join) {
struct mptcp_sock *owner; struct mptcp_sock *owner;
...@@ -1198,16 +1261,8 @@ static bool subflow_check_data_avail(struct sock *ssk) ...@@ -1198,16 +1261,8 @@ static bool subflow_check_data_avail(struct sock *ssk)
if (WARN_ON_ONCE(!skb)) if (WARN_ON_ONCE(!skb))
goto no_data; goto no_data;
/* if msk lacks the remote key, this subflow must provide an if (unlikely(!READ_ONCE(msk->can_ack)))
* MP_CAPABLE-based mapping
*/
if (unlikely(!READ_ONCE(msk->can_ack))) {
if (!subflow->mpc_map)
goto fallback; goto fallback;
WRITE_ONCE(msk->remote_key, subflow->remote_key);
WRITE_ONCE(msk->ack_seq, subflow->map_seq);
WRITE_ONCE(msk->can_ack, true);
}
old_ack = READ_ONCE(msk->ack_seq); old_ack = READ_ONCE(msk->ack_seq);
ack_seq = mptcp_subflow_get_mapped_dsn(subflow); ack_seq = mptcp_subflow_get_mapped_dsn(subflow);
...@@ -1480,6 +1535,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, ...@@ -1480,6 +1535,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
mptcp_pm_get_flags_and_ifindex_by_id(msk, local_id, mptcp_pm_get_flags_and_ifindex_by_id(msk, local_id,
&flags, &ifindex); &flags, &ifindex);
subflow->remote_key_valid = 1;
subflow->remote_key = msk->remote_key; subflow->remote_key = msk->remote_key;
subflow->local_key = msk->local_key; subflow->local_key = msk->local_key;
subflow->token = msk->token; subflow->token = msk->token;
...@@ -1873,6 +1929,7 @@ static void subflow_ulp_clone(const struct request_sock *req, ...@@ -1873,6 +1929,7 @@ static void subflow_ulp_clone(const struct request_sock *req,
new_ctx->ssn_offset = subflow_req->ssn_offset; new_ctx->ssn_offset = subflow_req->ssn_offset;
new_ctx->mp_join = 1; new_ctx->mp_join = 1;
new_ctx->fully_established = 1; new_ctx->fully_established = 1;
new_ctx->remote_key_valid = 1;
new_ctx->backup = subflow_req->backup; new_ctx->backup = subflow_req->backup;
new_ctx->remote_id = subflow_req->remote_id; new_ctx->remote_id = subflow_req->remote_id;
new_ctx->token = subflow_req->token; new_ctx->token = subflow_req->token;
...@@ -1929,6 +1986,7 @@ void __init mptcp_subflow_init(void) ...@@ -1929,6 +1986,7 @@ void __init mptcp_subflow_init(void)
subflow_request_sock_ipv4_ops = tcp_request_sock_ipv4_ops; subflow_request_sock_ipv4_ops = tcp_request_sock_ipv4_ops;
subflow_request_sock_ipv4_ops.route_req = subflow_v4_route_req; subflow_request_sock_ipv4_ops.route_req = subflow_v4_route_req;
subflow_request_sock_ipv4_ops.send_synack = subflow_v4_send_synack;
subflow_specific = ipv4_specific; subflow_specific = ipv4_specific;
subflow_specific.conn_request = subflow_v4_conn_request; subflow_specific.conn_request = subflow_v4_conn_request;
...@@ -1942,6 +2000,7 @@ void __init mptcp_subflow_init(void) ...@@ -1942,6 +2000,7 @@ void __init mptcp_subflow_init(void)
#if IS_ENABLED(CONFIG_MPTCP_IPV6) #if IS_ENABLED(CONFIG_MPTCP_IPV6)
subflow_request_sock_ipv6_ops = tcp_request_sock_ipv6_ops; subflow_request_sock_ipv6_ops = tcp_request_sock_ipv6_ops;
subflow_request_sock_ipv6_ops.route_req = subflow_v6_route_req; subflow_request_sock_ipv6_ops.route_req = subflow_v6_route_req;
subflow_request_sock_ipv6_ops.send_synack = subflow_v6_send_synack;
subflow_v6_specific = ipv6_specific; subflow_v6_specific = ipv6_specific;
subflow_v6_specific.conn_request = subflow_v6_conn_request; subflow_v6_specific.conn_request = subflow_v6_conn_request;
......
...@@ -83,6 +83,7 @@ struct cfg_cmsg_types { ...@@ -83,6 +83,7 @@ struct cfg_cmsg_types {
struct cfg_sockopt_types { struct cfg_sockopt_types {
unsigned int transparent:1; unsigned int transparent:1;
unsigned int mptfo:1;
}; };
struct tcp_inq_state { struct tcp_inq_state {
...@@ -90,6 +91,13 @@ struct tcp_inq_state { ...@@ -90,6 +91,13 @@ struct tcp_inq_state {
bool expect_eof; bool expect_eof;
}; };
struct wstate {
char buf[8192];
unsigned int len;
unsigned int off;
unsigned int total_len;
};
static struct tcp_inq_state tcp_inq; static struct tcp_inq_state tcp_inq;
static struct cfg_cmsg_types cfg_cmsg_types; static struct cfg_cmsg_types cfg_cmsg_types;
...@@ -232,6 +240,14 @@ static void set_transparent(int fd, int pf) ...@@ -232,6 +240,14 @@ static void set_transparent(int fd, int pf)
} }
} }
static void set_mptfo(int fd, int pf)
{
int qlen = 25;
if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &qlen, sizeof(qlen)) == -1)
perror("TCP_FASTOPEN");
}
static int do_ulp_so(int sock, const char *name) static int do_ulp_so(int sock, const char *name)
{ {
return setsockopt(sock, IPPROTO_TCP, TCP_ULP, name, strlen(name)); return setsockopt(sock, IPPROTO_TCP, TCP_ULP, name, strlen(name));
...@@ -300,6 +316,9 @@ static int sock_listen_mptcp(const char * const listenaddr, ...@@ -300,6 +316,9 @@ static int sock_listen_mptcp(const char * const listenaddr,
if (cfg_sockopt_types.transparent) if (cfg_sockopt_types.transparent)
set_transparent(sock, pf); set_transparent(sock, pf);
if (cfg_sockopt_types.mptfo)
set_mptfo(sock, pf);
if (bind(sock, a->ai_addr, a->ai_addrlen) == 0) if (bind(sock, a->ai_addr, a->ai_addrlen) == 0)
break; /* success */ break; /* success */
...@@ -330,13 +349,15 @@ static int sock_listen_mptcp(const char * const listenaddr, ...@@ -330,13 +349,15 @@ static int sock_listen_mptcp(const char * const listenaddr,
static int sock_connect_mptcp(const char * const remoteaddr, static int sock_connect_mptcp(const char * const remoteaddr,
const char * const port, int proto, const char * const port, int proto,
struct addrinfo **peer) struct addrinfo **peer,
int infd, struct wstate *winfo)
{ {
struct addrinfo hints = { struct addrinfo hints = {
.ai_protocol = IPPROTO_TCP, .ai_protocol = IPPROTO_TCP,
.ai_socktype = SOCK_STREAM, .ai_socktype = SOCK_STREAM,
}; };
struct addrinfo *a, *addr; struct addrinfo *a, *addr;
int syn_copied = 0;
int sock = -1; int sock = -1;
hints.ai_family = pf; hints.ai_family = pf;
...@@ -354,15 +375,35 @@ static int sock_connect_mptcp(const char * const remoteaddr, ...@@ -354,15 +375,35 @@ static int sock_connect_mptcp(const char * const remoteaddr,
if (cfg_mark) if (cfg_mark)
set_mark(sock, cfg_mark); set_mark(sock, cfg_mark);
if (cfg_sockopt_types.mptfo) {
if (!winfo->total_len)
winfo->total_len = winfo->len = read(infd, winfo->buf,
sizeof(winfo->buf));
syn_copied = sendto(sock, winfo->buf, winfo->len, MSG_FASTOPEN,
a->ai_addr, a->ai_addrlen);
if (syn_copied >= 0) {
winfo->off = syn_copied;
winfo->len -= syn_copied;
*peer = a;
break; /* success */
}
} else {
if (connect(sock, a->ai_addr, a->ai_addrlen) == 0) { if (connect(sock, a->ai_addr, a->ai_addrlen) == 0) {
*peer = a; *peer = a;
break; /* success */ break; /* success */
} }
}
if (cfg_sockopt_types.mptfo) {
perror("sendto()");
close(sock);
sock = -1;
} else {
perror("connect()"); perror("connect()");
close(sock); close(sock);
sock = -1; sock = -1;
} }
}
freeaddrinfo(addr); freeaddrinfo(addr);
if (sock != -1) if (sock != -1)
...@@ -571,14 +612,14 @@ static void shut_wr(int fd) ...@@ -571,14 +612,14 @@ static void shut_wr(int fd)
shutdown(fd, SHUT_WR); shutdown(fd, SHUT_WR);
} }
static int copyfd_io_poll(int infd, int peerfd, int outfd, bool *in_closed_after_out) static int copyfd_io_poll(int infd, int peerfd, int outfd,
bool *in_closed_after_out, struct wstate *winfo)
{ {
struct pollfd fds = { struct pollfd fds = {
.fd = peerfd, .fd = peerfd,
.events = POLLIN | POLLOUT, .events = POLLIN | POLLOUT,
}; };
unsigned int woff = 0, wlen = 0, total_wlen = 0, total_rlen = 0; unsigned int total_wlen = 0, total_rlen = 0;
char wbuf[8192];
set_nonblock(peerfd, true); set_nonblock(peerfd, true);
...@@ -638,19 +679,19 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd, bool *in_closed_after ...@@ -638,19 +679,19 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd, bool *in_closed_after
} }
if (fds.revents & POLLOUT) { if (fds.revents & POLLOUT) {
if (wlen == 0) { if (winfo->len == 0) {
woff = 0; winfo->off = 0;
wlen = read(infd, wbuf, sizeof(wbuf)); winfo->len = read(infd, winfo->buf, sizeof(winfo->buf));
} }
if (wlen > 0) { if (winfo->len > 0) {
ssize_t bw; ssize_t bw;
/* limit the total amount of written data to the trunc value */ /* limit the total amount of written data to the trunc value */
if (cfg_truncate > 0 && wlen + total_wlen > cfg_truncate) if (cfg_truncate > 0 && winfo->len + total_wlen > cfg_truncate)
wlen = cfg_truncate - total_wlen; winfo->len = cfg_truncate - total_wlen;
bw = do_rnd_write(peerfd, wbuf + woff, wlen); bw = do_rnd_write(peerfd, winfo->buf + winfo->off, winfo->len);
if (bw < 0) { if (bw < 0) {
if (cfg_rcv_trunc) if (cfg_rcv_trunc)
return 0; return 0;
...@@ -658,10 +699,10 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd, bool *in_closed_after ...@@ -658,10 +699,10 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd, bool *in_closed_after
return 111; return 111;
} }
woff += bw; winfo->off += bw;
wlen -= bw; winfo->len -= bw;
total_wlen += bw; total_wlen += bw;
} else if (wlen == 0) { } else if (winfo->len == 0) {
/* We have no more data to send. */ /* We have no more data to send. */
fds.events &= ~POLLOUT; fds.events &= ~POLLOUT;
...@@ -717,10 +758,26 @@ static int do_recvfile(int infd, int outfd) ...@@ -717,10 +758,26 @@ static int do_recvfile(int infd, int outfd)
return (int)r; return (int)r;
} }
static int do_mmap(int infd, int outfd, unsigned int size) static int spool_buf(int fd, struct wstate *winfo)
{
while (winfo->len) {
int ret = write(fd, winfo->buf + winfo->off, winfo->len);
if (ret < 0) {
perror("write");
return 4;
}
winfo->off += ret;
winfo->len -= ret;
}
return 0;
}
static int do_mmap(int infd, int outfd, unsigned int size,
struct wstate *winfo)
{ {
char *inbuf = mmap(NULL, size, PROT_READ, MAP_SHARED, infd, 0); char *inbuf = mmap(NULL, size, PROT_READ, MAP_SHARED, infd, 0);
ssize_t ret = 0, off = 0; ssize_t ret = 0, off = winfo->total_len;
size_t rem; size_t rem;
if (inbuf == MAP_FAILED) { if (inbuf == MAP_FAILED) {
...@@ -728,7 +785,11 @@ static int do_mmap(int infd, int outfd, unsigned int size) ...@@ -728,7 +785,11 @@ static int do_mmap(int infd, int outfd, unsigned int size)
return 1; return 1;
} }
rem = size; ret = spool_buf(outfd, winfo);
if (ret < 0)
return ret;
rem = size - winfo->total_len;
while (rem > 0) { while (rem > 0) {
ret = write(outfd, inbuf + off, rem); ret = write(outfd, inbuf + off, rem);
...@@ -772,8 +833,16 @@ static int get_infd_size(int fd) ...@@ -772,8 +833,16 @@ static int get_infd_size(int fd)
return (int)count; return (int)count;
} }
static int do_sendfile(int infd, int outfd, unsigned int count) static int do_sendfile(int infd, int outfd, unsigned int count,
struct wstate *winfo)
{ {
int ret = spool_buf(outfd, winfo);
if (ret < 0)
return ret;
count -= winfo->total_len;
while (count > 0) { while (count > 0) {
ssize_t r; ssize_t r;
...@@ -790,7 +859,8 @@ static int do_sendfile(int infd, int outfd, unsigned int count) ...@@ -790,7 +859,8 @@ static int do_sendfile(int infd, int outfd, unsigned int count)
} }
static int copyfd_io_mmap(int infd, int peerfd, int outfd, static int copyfd_io_mmap(int infd, int peerfd, int outfd,
unsigned int size, bool *in_closed_after_out) unsigned int size, bool *in_closed_after_out,
struct wstate *winfo)
{ {
int err; int err;
...@@ -799,9 +869,9 @@ static int copyfd_io_mmap(int infd, int peerfd, int outfd, ...@@ -799,9 +869,9 @@ static int copyfd_io_mmap(int infd, int peerfd, int outfd,
if (err) if (err)
return err; return err;
err = do_mmap(infd, peerfd, size); err = do_mmap(infd, peerfd, size, winfo);
} else { } else {
err = do_mmap(infd, peerfd, size); err = do_mmap(infd, peerfd, size, winfo);
if (err) if (err)
return err; return err;
...@@ -815,7 +885,7 @@ static int copyfd_io_mmap(int infd, int peerfd, int outfd, ...@@ -815,7 +885,7 @@ static int copyfd_io_mmap(int infd, int peerfd, int outfd,
} }
static int copyfd_io_sendfile(int infd, int peerfd, int outfd, static int copyfd_io_sendfile(int infd, int peerfd, int outfd,
unsigned int size, bool *in_closed_after_out) unsigned int size, bool *in_closed_after_out, struct wstate *winfo)
{ {
int err; int err;
...@@ -824,9 +894,9 @@ static int copyfd_io_sendfile(int infd, int peerfd, int outfd, ...@@ -824,9 +894,9 @@ static int copyfd_io_sendfile(int infd, int peerfd, int outfd,
if (err) if (err)
return err; return err;
err = do_sendfile(infd, peerfd, size); err = do_sendfile(infd, peerfd, size, winfo);
} else { } else {
err = do_sendfile(infd, peerfd, size); err = do_sendfile(infd, peerfd, size, winfo);
if (err) if (err)
return err; return err;
...@@ -839,7 +909,7 @@ static int copyfd_io_sendfile(int infd, int peerfd, int outfd, ...@@ -839,7 +909,7 @@ static int copyfd_io_sendfile(int infd, int peerfd, int outfd,
return err; return err;
} }
static int copyfd_io(int infd, int peerfd, int outfd, bool close_peerfd) static int copyfd_io(int infd, int peerfd, int outfd, bool close_peerfd, struct wstate *winfo)
{ {
bool in_closed_after_out = false; bool in_closed_after_out = false;
struct timespec start, end; struct timespec start, end;
...@@ -851,21 +921,24 @@ static int copyfd_io(int infd, int peerfd, int outfd, bool close_peerfd) ...@@ -851,21 +921,24 @@ static int copyfd_io(int infd, int peerfd, int outfd, bool close_peerfd)
switch (cfg_mode) { switch (cfg_mode) {
case CFG_MODE_POLL: case CFG_MODE_POLL:
ret = copyfd_io_poll(infd, peerfd, outfd, &in_closed_after_out); ret = copyfd_io_poll(infd, peerfd, outfd, &in_closed_after_out,
winfo);
break; break;
case CFG_MODE_MMAP: case CFG_MODE_MMAP:
file_size = get_infd_size(infd); file_size = get_infd_size(infd);
if (file_size < 0) if (file_size < 0)
return file_size; return file_size;
ret = copyfd_io_mmap(infd, peerfd, outfd, file_size, &in_closed_after_out); ret = copyfd_io_mmap(infd, peerfd, outfd, file_size,
&in_closed_after_out, winfo);
break; break;
case CFG_MODE_SENDFILE: case CFG_MODE_SENDFILE:
file_size = get_infd_size(infd); file_size = get_infd_size(infd);
if (file_size < 0) if (file_size < 0)
return file_size; return file_size;
ret = copyfd_io_sendfile(infd, peerfd, outfd, file_size, &in_closed_after_out); ret = copyfd_io_sendfile(infd, peerfd, outfd, file_size,
&in_closed_after_out, winfo);
break; break;
default: default:
...@@ -999,6 +1072,7 @@ static void maybe_close(int fd) ...@@ -999,6 +1072,7 @@ static void maybe_close(int fd)
int main_loop_s(int listensock) int main_loop_s(int listensock)
{ {
struct sockaddr_storage ss; struct sockaddr_storage ss;
struct wstate winfo;
struct pollfd polls; struct pollfd polls;
socklen_t salen; socklen_t salen;
int remotesock; int remotesock;
...@@ -1033,7 +1107,8 @@ int main_loop_s(int listensock) ...@@ -1033,7 +1107,8 @@ int main_loop_s(int listensock)
SOCK_TEST_TCPULP(remotesock, 0); SOCK_TEST_TCPULP(remotesock, 0);
copyfd_io(fd, remotesock, 1, true); memset(&winfo, 0, sizeof(winfo));
copyfd_io(fd, remotesock, 1, true, &winfo);
} else { } else {
perror("accept"); perror("accept");
return 1; return 1;
...@@ -1130,6 +1205,11 @@ static void parse_setsock_options(const char *name) ...@@ -1130,6 +1205,11 @@ static void parse_setsock_options(const char *name)
return; return;
} }
if (strncmp(name, "MPTFO", len) == 0) {
cfg_sockopt_types.mptfo = 1;
return;
}
fprintf(stderr, "Unrecognized setsockopt option %s\n", name); fprintf(stderr, "Unrecognized setsockopt option %s\n", name);
exit(1); exit(1);
} }
...@@ -1166,11 +1246,18 @@ void xdisconnect(int fd, int addrlen) ...@@ -1166,11 +1246,18 @@ void xdisconnect(int fd, int addrlen)
int main_loop(void) int main_loop(void)
{ {
int fd, ret, fd_in = 0; int fd = 0, ret, fd_in = 0;
struct addrinfo *peer; struct addrinfo *peer;
struct wstate winfo;
/* listener is ready. */ if (cfg_input && cfg_sockopt_types.mptfo) {
fd = sock_connect_mptcp(cfg_host, cfg_port, cfg_sock_proto, &peer); fd_in = open(cfg_input, O_RDONLY);
if (fd < 0)
xerror("can't open %s:%d", cfg_input, errno);
}
memset(&winfo, 0, sizeof(winfo));
fd = sock_connect_mptcp(cfg_host, cfg_port, cfg_sock_proto, &peer, fd_in, &winfo);
if (fd < 0) if (fd < 0)
return 2; return 2;
...@@ -1186,14 +1273,13 @@ int main_loop(void) ...@@ -1186,14 +1273,13 @@ int main_loop(void)
if (cfg_cmsg_types.cmsg_enabled) if (cfg_cmsg_types.cmsg_enabled)
apply_cmsg_types(fd, &cfg_cmsg_types); apply_cmsg_types(fd, &cfg_cmsg_types);
if (cfg_input) { if (cfg_input && !cfg_sockopt_types.mptfo) {
fd_in = open(cfg_input, O_RDONLY); fd_in = open(cfg_input, O_RDONLY);
if (fd < 0) if (fd < 0)
xerror("can't open %s:%d", cfg_input, errno); xerror("can't open %s:%d", cfg_input, errno);
} }
/* close the client socket open only if we are not going to reconnect */ ret = copyfd_io(fd_in, fd, 1, 0, &winfo);
ret = copyfd_io(fd_in, fd, 1, 0);
if (ret) if (ret)
return ret; return ret;
...@@ -1210,6 +1296,7 @@ int main_loop(void) ...@@ -1210,6 +1296,7 @@ int main_loop(void)
xerror("can't reconnect: %d", errno); xerror("can't reconnect: %d", errno);
if (cfg_input) if (cfg_input)
close(fd_in); close(fd_in);
memset(&winfo, 0, sizeof(winfo));
goto again; goto again;
} else { } else {
close(fd); close(fd);
......
...@@ -762,6 +762,23 @@ run_tests_peekmode() ...@@ -762,6 +762,23 @@ run_tests_peekmode()
run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1 "-P ${peekmode}" run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1 "-P ${peekmode}"
} }
run_tests_mptfo()
{
echo "INFO: with MPTFO start"
ip netns exec "$ns1" sysctl -q net.ipv4.tcp_fastopen=2
ip netns exec "$ns2" sysctl -q net.ipv4.tcp_fastopen=1
run_tests_lo "$ns1" "$ns2" 10.0.1.1 0 "-o MPTFO"
run_tests_lo "$ns1" "$ns2" 10.0.1.1 0 "-o MPTFO"
run_tests_lo "$ns1" "$ns2" dead:beef:1::1 0 "-o MPTFO"
run_tests_lo "$ns1" "$ns2" dead:beef:1::1 0 "-o MPTFO"
ip netns exec "$ns1" sysctl -q net.ipv4.tcp_fastopen=0
ip netns exec "$ns2" sysctl -q net.ipv4.tcp_fastopen=0
echo "INFO: with MPTFO end"
}
run_tests_disconnect() run_tests_disconnect()
{ {
local peekmode="$1" local peekmode="$1"
...@@ -901,6 +918,10 @@ run_tests_peekmode "saveWithPeek" ...@@ -901,6 +918,10 @@ run_tests_peekmode "saveWithPeek"
run_tests_peekmode "saveAfterPeek" run_tests_peekmode "saveAfterPeek"
stop_if_error "Tests with peek mode have failed" stop_if_error "Tests with peek mode have failed"
# MPTFO (MultiPath TCP Fatopen tests)
run_tests_mptfo
stop_if_error "Tests with MPTFO have failed"
# connect to ns4 ip address, ns2 should intercept/proxy # connect to ns4 ip address, ns2 should intercept/proxy
run_test_transparent 10.0.3.1 "tproxy ipv4" run_test_transparent 10.0.3.1 "tproxy ipv4"
run_test_transparent dead:beef:3::1 "tproxy ipv6" run_test_transparent dead:beef:3::1 "tproxy ipv6"
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment