Commit a778e93d authored by David S. Miller's avatar David S. Miller

Merge branch 'mptcp-dss-checksums'

Mat Martineau says:

====================
mptcp: DSS checksum support

RFC 8684 defines a DSS checksum feature that allows MPTCP to detect
middlebox interference with the MPTCP DSS header and the portion of the
data stream associated with that header. So far, the MPTCP
implementation in the Linux kernel has not supported this feature.

This patch series adds DSS checksum support. By default, the kernel will
not request checksums when sending SYN or SYN/ACK packets for MPTCP
connections. Outgoing checksum requests can be enabled with a
per-namespace net.mptcp.checksum_enabled sysctl. MPTCP connections will
now proceed with DSS checksums when the peer requests them, whether the
sysctl is enabled or not.

Patches 1-5 add checksum bits to the outgoing SYN, SYN/ACK, and data
packet headers. This includes calculating the checksum using a range of
data and the MPTCP DSS mapping for that data.

Patches 6-10 handle the checksum request in the SYN or SYN/ACK, and
receiving and verifying the DSS checksum on data packets.

Patch 11 adjusts the MPTCP-level retransmission process for checksum
compatibility.

Patches 12-14 add checksum-related MIBs, the net.mptcp.checksum_enabled
sysctl, and a checksum field to debug trace output.

Patches 15 & 16 add selftests.

The series is slightly longer than the preferred 15-patch limit that
patchwork warns about. I do try to stay below that whenever possible -
this series does implement one feature and is, I think, cohesive enough
to justify keeping it together. If it's at all problematic please let me
know!

A trivial merge conflict with net/master is introduced in patch 15: a
commit in net/master removes a couple of nearby lines of code.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents e7f3863c af66d3e1
...@@ -24,3 +24,11 @@ add_addr_timeout - INTEGER (seconds) ...@@ -24,3 +24,11 @@ add_addr_timeout - INTEGER (seconds)
sysctl. sysctl.
Default: 120 Default: 120
checksum_enabled - BOOLEAN
Control whether DSS checksum can be enabled.
DSS checksum can be enabled if the value is nonzero. This is a
per-namespace sysctl.
Default: 0
...@@ -23,6 +23,7 @@ struct mptcp_ext { ...@@ -23,6 +23,7 @@ struct mptcp_ext {
u64 data_seq; u64 data_seq;
u32 subflow_seq; u32 subflow_seq;
u16 data_len; u16 data_len;
__sum16 csum;
u8 use_map:1, u8 use_map:1,
dsn64:1, dsn64:1,
data_fin:1, data_fin:1,
...@@ -31,7 +32,8 @@ struct mptcp_ext { ...@@ -31,7 +32,8 @@ struct mptcp_ext {
mpc_map:1, mpc_map:1,
frozen:1, frozen:1,
reset_transient:1; reset_transient:1;
u8 reset_reason:4; u8 reset_reason:4,
csum_reqd:1;
}; };
#define MPTCP_RM_IDS_MAX 8 #define MPTCP_RM_IDS_MAX 8
...@@ -63,8 +65,9 @@ struct mptcp_out_options { ...@@ -63,8 +65,9 @@ struct mptcp_out_options {
struct mptcp_rm_list rm_list; struct mptcp_rm_list rm_list;
u8 join_id; u8 join_id;
u8 backup; u8 backup;
u8 reset_reason:4; u8 reset_reason:4,
u8 reset_transient:1; reset_transient:1,
csum_reqd:1;
u32 nonce; u32 nonce;
u64 thmac; u64 thmac;
u32 token; u32 token;
......
...@@ -73,6 +73,7 @@ DECLARE_EVENT_CLASS(mptcp_dump_mpext, ...@@ -73,6 +73,7 @@ DECLARE_EVENT_CLASS(mptcp_dump_mpext,
__field(u64, data_seq) __field(u64, data_seq)
__field(u32, subflow_seq) __field(u32, subflow_seq)
__field(u16, data_len) __field(u16, data_len)
__field(u16, csum)
__field(u8, use_map) __field(u8, use_map)
__field(u8, dsn64) __field(u8, dsn64)
__field(u8, data_fin) __field(u8, data_fin)
...@@ -82,6 +83,7 @@ DECLARE_EVENT_CLASS(mptcp_dump_mpext, ...@@ -82,6 +83,7 @@ DECLARE_EVENT_CLASS(mptcp_dump_mpext,
__field(u8, frozen) __field(u8, frozen)
__field(u8, reset_transient) __field(u8, reset_transient)
__field(u8, reset_reason) __field(u8, reset_reason)
__field(u8, csum_reqd)
), ),
TP_fast_assign( TP_fast_assign(
...@@ -89,6 +91,7 @@ DECLARE_EVENT_CLASS(mptcp_dump_mpext, ...@@ -89,6 +91,7 @@ DECLARE_EVENT_CLASS(mptcp_dump_mpext,
__entry->data_seq = mpext->data_seq; __entry->data_seq = mpext->data_seq;
__entry->subflow_seq = mpext->subflow_seq; __entry->subflow_seq = mpext->subflow_seq;
__entry->data_len = mpext->data_len; __entry->data_len = mpext->data_len;
__entry->csum = (__force u16)mpext->csum;
__entry->use_map = mpext->use_map; __entry->use_map = mpext->use_map;
__entry->dsn64 = mpext->dsn64; __entry->dsn64 = mpext->dsn64;
__entry->data_fin = mpext->data_fin; __entry->data_fin = mpext->data_fin;
...@@ -98,16 +101,18 @@ DECLARE_EVENT_CLASS(mptcp_dump_mpext, ...@@ -98,16 +101,18 @@ DECLARE_EVENT_CLASS(mptcp_dump_mpext,
__entry->frozen = mpext->frozen; __entry->frozen = mpext->frozen;
__entry->reset_transient = mpext->reset_transient; __entry->reset_transient = mpext->reset_transient;
__entry->reset_reason = mpext->reset_reason; __entry->reset_reason = mpext->reset_reason;
__entry->csum_reqd = mpext->csum_reqd;
), ),
TP_printk("data_ack=%llu data_seq=%llu subflow_seq=%u data_len=%u use_map=%u dsn64=%u data_fin=%u use_ack=%u ack64=%u mpc_map=%u frozen=%u reset_transient=%u reset_reason=%u", TP_printk("data_ack=%llu data_seq=%llu subflow_seq=%u data_len=%u csum=%x use_map=%u dsn64=%u data_fin=%u use_ack=%u ack64=%u mpc_map=%u frozen=%u reset_transient=%u reset_reason=%u csum_reqd=%u",
__entry->data_ack, __entry->data_seq, __entry->data_ack, __entry->data_seq,
__entry->subflow_seq, __entry->data_len, __entry->subflow_seq, __entry->data_len,
__entry->use_map, __entry->dsn64, __entry->csum, __entry->use_map,
__entry->data_fin, __entry->use_ack, __entry->dsn64, __entry->data_fin,
__entry->ack64, __entry->mpc_map, __entry->use_ack, __entry->ack64,
__entry->frozen, __entry->reset_transient, __entry->mpc_map, __entry->frozen,
__entry->reset_reason) __entry->reset_transient, __entry->reset_reason,
__entry->csum_reqd)
); );
DEFINE_EVENT(mptcp_dump_mpext, get_mapping_status, DEFINE_EVENT(mptcp_dump_mpext, get_mapping_status,
......
...@@ -105,6 +105,7 @@ struct mptcp_info { ...@@ -105,6 +105,7 @@ struct mptcp_info {
__u64 mptcpi_rcv_nxt; __u64 mptcpi_rcv_nxt;
__u8 mptcpi_local_addr_used; __u8 mptcpi_local_addr_used;
__u8 mptcpi_local_addr_max; __u8 mptcpi_local_addr_max;
__u8 mptcpi_csum_enabled;
}; };
/* /*
......
...@@ -23,6 +23,7 @@ struct mptcp_pernet { ...@@ -23,6 +23,7 @@ struct mptcp_pernet {
u8 mptcp_enabled; u8 mptcp_enabled;
unsigned int add_addr_timeout; unsigned int add_addr_timeout;
u8 checksum_enabled;
}; };
static struct mptcp_pernet *mptcp_get_pernet(struct net *net) static struct mptcp_pernet *mptcp_get_pernet(struct net *net)
...@@ -40,10 +41,16 @@ unsigned int mptcp_get_add_addr_timeout(struct net *net) ...@@ -40,10 +41,16 @@ unsigned int mptcp_get_add_addr_timeout(struct net *net)
return mptcp_get_pernet(net)->add_addr_timeout; return mptcp_get_pernet(net)->add_addr_timeout;
} }
int mptcp_is_checksum_enabled(struct net *net)
{
return mptcp_get_pernet(net)->checksum_enabled;
}
static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet) static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
{ {
pernet->mptcp_enabled = 1; pernet->mptcp_enabled = 1;
pernet->add_addr_timeout = TCP_RTO_MAX; pernet->add_addr_timeout = TCP_RTO_MAX;
pernet->checksum_enabled = 0;
} }
#ifdef CONFIG_SYSCTL #ifdef CONFIG_SYSCTL
...@@ -65,6 +72,14 @@ static struct ctl_table mptcp_sysctl_table[] = { ...@@ -65,6 +72,14 @@ static struct ctl_table mptcp_sysctl_table[] = {
.mode = 0644, .mode = 0644,
.proc_handler = proc_dointvec_jiffies, .proc_handler = proc_dointvec_jiffies,
}, },
{
.procname = "checksum_enabled",
.maxlen = sizeof(u8),
.mode = 0644,
.proc_handler = proc_dou8vec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE
},
{} {}
}; };
...@@ -82,6 +97,7 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet) ...@@ -82,6 +97,7 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
table[0].data = &pernet->mptcp_enabled; table[0].data = &pernet->mptcp_enabled;
table[1].data = &pernet->add_addr_timeout; table[1].data = &pernet->add_addr_timeout;
table[2].data = &pernet->checksum_enabled;
hdr = register_net_sysctl(net, MPTCP_SYSCTL_PATH, table); hdr = register_net_sysctl(net, MPTCP_SYSCTL_PATH, table);
if (!hdr) if (!hdr)
......
...@@ -25,6 +25,7 @@ static const struct snmp_mib mptcp_snmp_list[] = { ...@@ -25,6 +25,7 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("MPJoinAckHMacFailure", MPTCP_MIB_JOINACKMAC), SNMP_MIB_ITEM("MPJoinAckHMacFailure", MPTCP_MIB_JOINACKMAC),
SNMP_MIB_ITEM("DSSNotMatching", MPTCP_MIB_DSSNOMATCH), SNMP_MIB_ITEM("DSSNotMatching", MPTCP_MIB_DSSNOMATCH),
SNMP_MIB_ITEM("InfiniteMapRx", MPTCP_MIB_INFINITEMAPRX), SNMP_MIB_ITEM("InfiniteMapRx", MPTCP_MIB_INFINITEMAPRX),
SNMP_MIB_ITEM("DataCsumErr", MPTCP_MIB_DATACSUMERR),
SNMP_MIB_ITEM("OFOQueueTail", MPTCP_MIB_OFOQUEUETAIL), SNMP_MIB_ITEM("OFOQueueTail", MPTCP_MIB_OFOQUEUETAIL),
SNMP_MIB_ITEM("OFOQueue", MPTCP_MIB_OFOQUEUE), SNMP_MIB_ITEM("OFOQueue", MPTCP_MIB_OFOQUEUE),
SNMP_MIB_ITEM("OFOMerge", MPTCP_MIB_OFOMERGE), SNMP_MIB_ITEM("OFOMerge", MPTCP_MIB_OFOMERGE),
......
...@@ -18,6 +18,7 @@ enum linux_mptcp_mib_field { ...@@ -18,6 +18,7 @@ enum linux_mptcp_mib_field {
MPTCP_MIB_JOINACKMAC, /* HMAC was wrong on ACK + MP_JOIN */ MPTCP_MIB_JOINACKMAC, /* HMAC was wrong on ACK + MP_JOIN */
MPTCP_MIB_DSSNOMATCH, /* Received a new mapping that did not match the previous one */ MPTCP_MIB_DSSNOMATCH, /* Received a new mapping that did not match the previous one */
MPTCP_MIB_INFINITEMAPRX, /* Received an infinite mapping */ MPTCP_MIB_INFINITEMAPRX, /* Received an infinite mapping */
MPTCP_MIB_DATACSUMERR, /* The data checksum fail */
MPTCP_MIB_OFOQUEUETAIL, /* Segments inserted into OoO queue tail */ MPTCP_MIB_OFOQUEUETAIL, /* Segments inserted into OoO queue tail */
MPTCP_MIB_OFOQUEUE, /* Segments inserted into OoO queue */ MPTCP_MIB_OFOQUEUE, /* Segments inserted into OoO queue */
MPTCP_MIB_OFOMERGE, /* Segments merged in OoO queue */ MPTCP_MIB_OFOMERGE, /* Segments merged in OoO queue */
......
...@@ -144,6 +144,7 @@ static void mptcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, ...@@ -144,6 +144,7 @@ static void mptcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
info->mptcpi_write_seq = READ_ONCE(msk->write_seq); info->mptcpi_write_seq = READ_ONCE(msk->write_seq);
info->mptcpi_snd_una = READ_ONCE(msk->snd_una); info->mptcpi_snd_una = READ_ONCE(msk->snd_una);
info->mptcpi_rcv_nxt = READ_ONCE(msk->ack_seq); info->mptcpi_rcv_nxt = READ_ONCE(msk->ack_seq);
info->mptcpi_csum_enabled = READ_ONCE(msk->csum_enabled);
unlock_sock_fast(sk, slow); unlock_sock_fast(sk, slow);
} }
......
...@@ -44,7 +44,20 @@ static void mptcp_parse_option(const struct sk_buff *skb, ...@@ -44,7 +44,20 @@ static void mptcp_parse_option(const struct sk_buff *skb,
else else
expected_opsize = TCPOLEN_MPTCP_MPC_SYN; expected_opsize = TCPOLEN_MPTCP_MPC_SYN;
} }
if (opsize != expected_opsize)
/* Cfr RFC 8684 Section 3.3.0:
* If a checksum is present but its use had
* not been negotiated in the MP_CAPABLE handshake, the receiver MUST
* close the subflow with a RST, as it is not behaving as negotiated.
* If a checksum is not present when its use has been negotiated, the
* receiver MUST close the subflow with a RST, as it is considered
* broken
* We parse even option with mismatching csum presence, so that
* later in subflow_data_ready we can trigger the reset.
*/
if (opsize != expected_opsize &&
(expected_opsize != TCPOLEN_MPTCP_MPC_ACK_DATA ||
opsize != TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM))
break; break;
/* try to be gentle vs future versions on the initial syn */ /* try to be gentle vs future versions on the initial syn */
...@@ -66,16 +79,9 @@ static void mptcp_parse_option(const struct sk_buff *skb, ...@@ -66,16 +79,9 @@ static void mptcp_parse_option(const struct sk_buff *skb,
* host requires the use of checksums, checksums MUST be used. * host requires the use of checksums, checksums MUST be used.
* In other words, the only way for checksums not to be used * In other words, the only way for checksums not to be used
* is if both hosts in their SYNs set A=0." * is if both hosts in their SYNs set A=0."
*
* Section 3.3.0:
* "If a checksum is not present when its use has been
* negotiated, the receiver MUST close the subflow with a RST as
* it is considered broken."
*
* We don't implement DSS checksum - fall back to TCP.
*/ */
if (flags & MPTCP_CAP_CHECKSUM_REQD) if (flags & MPTCP_CAP_CHECKSUM_REQD)
break; mp_opt->csum_reqd = 1;
mp_opt->mp_capable = 1; mp_opt->mp_capable = 1;
if (opsize >= TCPOLEN_MPTCP_MPC_SYNACK) { if (opsize >= TCPOLEN_MPTCP_MPC_SYNACK) {
...@@ -86,7 +92,7 @@ static void mptcp_parse_option(const struct sk_buff *skb, ...@@ -86,7 +92,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
mp_opt->rcvr_key = get_unaligned_be64(ptr); mp_opt->rcvr_key = get_unaligned_be64(ptr);
ptr += 8; ptr += 8;
} }
if (opsize == TCPOLEN_MPTCP_MPC_ACK_DATA) { if (opsize >= TCPOLEN_MPTCP_MPC_ACK_DATA) {
/* Section 3.1.: /* Section 3.1.:
* "the data parameters in a MP_CAPABLE are semantically * "the data parameters in a MP_CAPABLE are semantically
* equivalent to those in a DSS option and can be used * equivalent to those in a DSS option and can be used
...@@ -98,9 +104,14 @@ static void mptcp_parse_option(const struct sk_buff *skb, ...@@ -98,9 +104,14 @@ static void mptcp_parse_option(const struct sk_buff *skb,
mp_opt->data_len = get_unaligned_be16(ptr); mp_opt->data_len = get_unaligned_be16(ptr);
ptr += 2; ptr += 2;
} }
pr_debug("MP_CAPABLE version=%x, flags=%x, optlen=%d sndr=%llu, rcvr=%llu len=%d", if (opsize == TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM) {
mp_opt->csum = (__force __sum16)get_unaligned_be16(ptr);
mp_opt->csum_reqd = 1;
ptr += 2;
}
pr_debug("MP_CAPABLE version=%x, flags=%x, optlen=%d sndr=%llu, rcvr=%llu len=%d csum=%u",
version, flags, opsize, mp_opt->sndr_key, version, flags, opsize, mp_opt->sndr_key,
mp_opt->rcvr_key, mp_opt->data_len); mp_opt->rcvr_key, mp_opt->data_len, mp_opt->csum);
break; break;
case MPTCPOPT_MP_JOIN: case MPTCPOPT_MP_JOIN:
...@@ -171,10 +182,8 @@ static void mptcp_parse_option(const struct sk_buff *skb, ...@@ -171,10 +182,8 @@ static void mptcp_parse_option(const struct sk_buff *skb,
expected_opsize += TCPOLEN_MPTCP_DSS_MAP32; expected_opsize += TCPOLEN_MPTCP_DSS_MAP32;
} }
/* RFC 6824, Section 3.3: /* Always parse any csum presence combination, we will enforce
* If a checksum is present, but its use had * RFC 8684 Section 3.3.0 checks later in subflow_data_ready
* not been negotiated in the MP_CAPABLE handshake,
* the checksum field MUST be ignored.
*/ */
if (opsize != expected_opsize && if (opsize != expected_opsize &&
opsize != expected_opsize + TCPOLEN_MPTCP_DSS_CHECKSUM) opsize != expected_opsize + TCPOLEN_MPTCP_DSS_CHECKSUM)
...@@ -209,9 +218,15 @@ static void mptcp_parse_option(const struct sk_buff *skb, ...@@ -209,9 +218,15 @@ static void mptcp_parse_option(const struct sk_buff *skb,
mp_opt->data_len = get_unaligned_be16(ptr); mp_opt->data_len = get_unaligned_be16(ptr);
ptr += 2; ptr += 2;
pr_debug("data_seq=%llu subflow_seq=%u data_len=%u", if (opsize == expected_opsize + TCPOLEN_MPTCP_DSS_CHECKSUM) {
mp_opt->csum_reqd = 1;
mp_opt->csum = (__force __sum16)get_unaligned_be16(ptr);
ptr += 2;
}
pr_debug("data_seq=%llu subflow_seq=%u data_len=%u csum=%d:%u",
mp_opt->data_seq, mp_opt->subflow_seq, mp_opt->data_seq, mp_opt->subflow_seq,
mp_opt->data_len); mp_opt->data_len, mp_opt->csum_reqd, mp_opt->csum);
} }
break; break;
...@@ -323,9 +338,12 @@ static void mptcp_parse_option(const struct sk_buff *skb, ...@@ -323,9 +338,12 @@ static void mptcp_parse_option(const struct sk_buff *skb,
} }
} }
void mptcp_get_options(const struct sk_buff *skb, void mptcp_get_options(const struct sock *sk,
const struct sk_buff *skb,
struct mptcp_options_received *mp_opt) struct mptcp_options_received *mp_opt)
{ {
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct mptcp_sock *msk = mptcp_sk(subflow->conn);
const struct tcphdr *th = tcp_hdr(skb); const struct tcphdr *th = tcp_hdr(skb);
const unsigned char *ptr; const unsigned char *ptr;
int length; int length;
...@@ -341,6 +359,7 @@ void mptcp_get_options(const struct sk_buff *skb, ...@@ -341,6 +359,7 @@ void mptcp_get_options(const struct sk_buff *skb,
mp_opt->dss = 0; mp_opt->dss = 0;
mp_opt->mp_prio = 0; mp_opt->mp_prio = 0;
mp_opt->reset = 0; mp_opt->reset = 0;
mp_opt->csum_reqd = READ_ONCE(msk->csum_enabled);
length = (th->doff * 4) - sizeof(struct tcphdr); length = (th->doff * 4) - sizeof(struct tcphdr);
ptr = (const unsigned char *)(th + 1); ptr = (const unsigned char *)(th + 1);
...@@ -380,6 +399,7 @@ bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb, ...@@ -380,6 +399,7 @@ bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb,
subflow->snd_isn = TCP_SKB_CB(skb)->end_seq; subflow->snd_isn = TCP_SKB_CB(skb)->end_seq;
if (subflow->request_mptcp) { if (subflow->request_mptcp) {
opts->suboptions = OPTION_MPTCP_MPC_SYN; opts->suboptions = OPTION_MPTCP_MPC_SYN;
opts->csum_reqd = mptcp_is_checksum_enabled(sock_net(sk));
*size = TCPOLEN_MPTCP_MPC_SYN; *size = TCPOLEN_MPTCP_MPC_SYN;
return true; return true;
} else if (subflow->request_join) { } else if (subflow->request_join) {
...@@ -435,8 +455,10 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb, ...@@ -435,8 +455,10 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
struct mptcp_out_options *opts) struct mptcp_out_options *opts)
{ {
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct mptcp_sock *msk = mptcp_sk(subflow->conn);
struct mptcp_ext *mpext; struct mptcp_ext *mpext;
unsigned int data_len; unsigned int data_len;
u8 len;
/* When skb is not available, we better over-estimate the emitted /* When skb is not available, we better over-estimate the emitted
* options len. A full DSS option (28 bytes) is longer than * options len. A full DSS option (28 bytes) is longer than
...@@ -465,16 +487,26 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb, ...@@ -465,16 +487,26 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
opts->suboptions = OPTION_MPTCP_MPC_ACK; opts->suboptions = OPTION_MPTCP_MPC_ACK;
opts->sndr_key = subflow->local_key; opts->sndr_key = subflow->local_key;
opts->rcvr_key = subflow->remote_key; opts->rcvr_key = subflow->remote_key;
opts->csum_reqd = READ_ONCE(msk->csum_enabled);
/* Section 3.1. /* Section 3.1.
* The MP_CAPABLE option is carried on the SYN, SYN/ACK, and ACK * The MP_CAPABLE option is carried on the SYN, SYN/ACK, and ACK
* packets that start the first subflow of an MPTCP connection, * packets that start the first subflow of an MPTCP connection,
* as well as the first packet that carries data * as well as the first packet that carries data
*/ */
if (data_len > 0) if (data_len > 0) {
*size = ALIGN(TCPOLEN_MPTCP_MPC_ACK_DATA, 4); len = TCPOLEN_MPTCP_MPC_ACK_DATA;
else if (opts->csum_reqd) {
/* we need to propagate more info to csum the pseudo hdr */
opts->ext_copy.data_seq = mpext->data_seq;
opts->ext_copy.subflow_seq = mpext->subflow_seq;
opts->ext_copy.csum = mpext->csum;
len += TCPOLEN_MPTCP_DSS_CHECKSUM;
}
*size = ALIGN(len, 4);
} else {
*size = TCPOLEN_MPTCP_MPC_ACK; *size = TCPOLEN_MPTCP_MPC_ACK;
}
pr_debug("subflow=%p, local_key=%llu, remote_key=%llu map_len=%d", pr_debug("subflow=%p, local_key=%llu, remote_key=%llu map_len=%d",
subflow, subflow->local_key, subflow->remote_key, subflow, subflow->local_key, subflow->remote_key,
...@@ -535,18 +567,21 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, ...@@ -535,18 +567,21 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
bool ret = false; bool ret = false;
u64 ack_seq; u64 ack_seq;
opts->csum_reqd = READ_ONCE(msk->csum_enabled);
mpext = skb ? mptcp_get_ext(skb) : NULL; mpext = skb ? mptcp_get_ext(skb) : NULL;
if (!skb || (mpext && mpext->use_map) || snd_data_fin_enable) { if (!skb || (mpext && mpext->use_map) || snd_data_fin_enable) {
unsigned int map_size; unsigned int map_size = TCPOLEN_MPTCP_DSS_BASE + TCPOLEN_MPTCP_DSS_MAP64;
map_size = TCPOLEN_MPTCP_DSS_BASE + TCPOLEN_MPTCP_DSS_MAP64; if (mpext) {
if (opts->csum_reqd)
map_size += TCPOLEN_MPTCP_DSS_CHECKSUM;
remaining -= map_size;
dss_size = map_size;
if (mpext)
opts->ext_copy = *mpext; opts->ext_copy = *mpext;
}
remaining -= map_size;
dss_size = map_size;
if (skb && snd_data_fin_enable) if (skb && snd_data_fin_enable)
mptcp_write_data_fin(subflow, skb, &opts->ext_copy); mptcp_write_data_fin(subflow, skb, &opts->ext_copy);
ret = true; ret = true;
...@@ -789,6 +824,7 @@ bool mptcp_synack_options(const struct request_sock *req, unsigned int *size, ...@@ -789,6 +824,7 @@ bool mptcp_synack_options(const struct request_sock *req, unsigned int *size,
if (subflow_req->mp_capable) { if (subflow_req->mp_capable) {
opts->suboptions = OPTION_MPTCP_MPC_SYNACK; opts->suboptions = OPTION_MPTCP_MPC_SYNACK;
opts->sndr_key = subflow_req->local_key; opts->sndr_key = subflow_req->local_key;
opts->csum_reqd = subflow_req->csum_reqd;
*size = TCPOLEN_MPTCP_MPC_SYNACK; *size = TCPOLEN_MPTCP_MPC_SYNACK;
pr_debug("subflow_req=%p, local_key=%llu", pr_debug("subflow_req=%p, local_key=%llu",
subflow_req, subflow_req->local_key); subflow_req, subflow_req->local_key);
...@@ -1007,7 +1043,7 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) ...@@ -1007,7 +1043,7 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
return; return;
} }
mptcp_get_options(skb, &mp_opt); mptcp_get_options(sk, skb, &mp_opt);
if (!check_fully_established(msk, sk, subflow, skb, &mp_opt)) if (!check_fully_established(msk, sk, subflow, skb, &mp_opt))
return; return;
...@@ -1099,6 +1135,10 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) ...@@ -1099,6 +1135,10 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
} }
mpext->data_len = mp_opt.data_len; mpext->data_len = mp_opt.data_len;
mpext->use_map = 1; mpext->use_map = 1;
mpext->csum_reqd = mp_opt.csum_reqd;
if (mpext->csum_reqd)
mpext->csum = mp_opt.csum;
} }
} }
...@@ -1118,25 +1158,50 @@ static void mptcp_set_rwin(const struct tcp_sock *tp) ...@@ -1118,25 +1158,50 @@ static void mptcp_set_rwin(const struct tcp_sock *tp)
WRITE_ONCE(msk->rcv_wnd_sent, ack_seq); WRITE_ONCE(msk->rcv_wnd_sent, ack_seq);
} }
static u16 mptcp_make_csum(const struct mptcp_ext *mpext)
{
struct csum_pseudo_header header;
__wsum csum;
/* cfr RFC 8684 3.3.1.:
* the data sequence number used in the pseudo-header is
* always the 64-bit value, irrespective of what length is used in the
* DSS option itself.
*/
header.data_seq = cpu_to_be64(mpext->data_seq);
header.subflow_seq = htonl(mpext->subflow_seq);
header.data_len = htons(mpext->data_len);
header.csum = 0;
csum = csum_partial(&header, sizeof(header), ~csum_unfold(mpext->csum));
return (__force u16)csum_fold(csum);
}
void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
struct mptcp_out_options *opts) struct mptcp_out_options *opts)
{ {
if ((OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK | if ((OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK |
OPTION_MPTCP_MPC_ACK) & opts->suboptions) { OPTION_MPTCP_MPC_ACK) & opts->suboptions) {
u8 len; u8 len, flag = MPTCP_CAP_HMAC_SHA256;
if (OPTION_MPTCP_MPC_SYN & opts->suboptions) if (OPTION_MPTCP_MPC_SYN & opts->suboptions) {
len = TCPOLEN_MPTCP_MPC_SYN; len = TCPOLEN_MPTCP_MPC_SYN;
else if (OPTION_MPTCP_MPC_SYNACK & opts->suboptions) } else if (OPTION_MPTCP_MPC_SYNACK & opts->suboptions) {
len = TCPOLEN_MPTCP_MPC_SYNACK; len = TCPOLEN_MPTCP_MPC_SYNACK;
else if (opts->ext_copy.data_len) } else if (opts->ext_copy.data_len) {
len = TCPOLEN_MPTCP_MPC_ACK_DATA; len = TCPOLEN_MPTCP_MPC_ACK_DATA;
else if (opts->csum_reqd)
len += TCPOLEN_MPTCP_DSS_CHECKSUM;
} else {
len = TCPOLEN_MPTCP_MPC_ACK; len = TCPOLEN_MPTCP_MPC_ACK;
}
if (opts->csum_reqd)
flag |= MPTCP_CAP_CHECKSUM_REQD;
*ptr++ = mptcp_option(MPTCPOPT_MP_CAPABLE, len, *ptr++ = mptcp_option(MPTCPOPT_MP_CAPABLE, len,
MPTCP_SUPPORTED_VERSION, MPTCP_SUPPORTED_VERSION,
MPTCP_CAP_HMAC_SHA256); flag);
if (!((OPTION_MPTCP_MPC_SYNACK | OPTION_MPTCP_MPC_ACK) & if (!((OPTION_MPTCP_MPC_SYNACK | OPTION_MPTCP_MPC_ACK) &
opts->suboptions)) opts->suboptions))
...@@ -1152,8 +1217,13 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, ...@@ -1152,8 +1217,13 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
if (!opts->ext_copy.data_len) if (!opts->ext_copy.data_len)
goto mp_capable_done; goto mp_capable_done;
put_unaligned_be32(opts->ext_copy.data_len << 16 | if (opts->csum_reqd) {
TCPOPT_NOP << 8 | TCPOPT_NOP, ptr); put_unaligned_be32(opts->ext_copy.data_len << 16 |
mptcp_make_csum(&opts->ext_copy), ptr);
} else {
put_unaligned_be32(opts->ext_copy.data_len << 16 |
TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
}
ptr += 1; ptr += 1;
} }
...@@ -1305,6 +1375,9 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, ...@@ -1305,6 +1375,9 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
flags |= MPTCP_DSS_HAS_MAP | MPTCP_DSS_DSN64; flags |= MPTCP_DSS_HAS_MAP | MPTCP_DSS_DSN64;
if (mpext->data_fin) if (mpext->data_fin)
flags |= MPTCP_DSS_DATA_FIN; flags |= MPTCP_DSS_DATA_FIN;
if (opts->csum_reqd)
len += TCPOLEN_MPTCP_DSS_CHECKSUM;
} }
*ptr++ = mptcp_option(MPTCPOPT_DSS, len, 0, flags); *ptr++ = mptcp_option(MPTCPOPT_DSS, len, 0, flags);
...@@ -1324,8 +1397,13 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, ...@@ -1324,8 +1397,13 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
ptr += 2; ptr += 2;
put_unaligned_be32(mpext->subflow_seq, ptr); put_unaligned_be32(mpext->subflow_seq, ptr);
ptr += 1; ptr += 1;
put_unaligned_be32(mpext->data_len << 16 | if (opts->csum_reqd) {
TCPOPT_NOP << 8 | TCPOPT_NOP, ptr); put_unaligned_be32(mpext->data_len << 16 |
mptcp_make_csum(mpext), ptr);
} else {
put_unaligned_be32(mpext->data_len << 16 |
TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
}
} }
} }
......
...@@ -1308,6 +1308,18 @@ static bool mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk) ...@@ -1308,6 +1308,18 @@ static bool mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk)
return __mptcp_alloc_tx_skb(sk, ssk, sk->sk_allocation); return __mptcp_alloc_tx_skb(sk, ssk, sk->sk_allocation);
} }
/* note: this always recompute the csum on the whole skb, even
* if we just appended a single frag. More status info needed
*/
static void mptcp_update_data_checksum(struct sk_buff *skb, int added)
{
struct mptcp_ext *mpext = mptcp_get_ext(skb);
__wsum csum = ~csum_unfold(mpext->csum);
int offset = skb->len - added;
mpext->csum = csum_fold(csum_block_add(csum, skb_checksum(skb, offset, added, 0), offset));
}
static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
struct mptcp_data_frag *dfrag, struct mptcp_data_frag *dfrag,
struct mptcp_sendmsg_info *info) struct mptcp_sendmsg_info *info)
...@@ -1402,10 +1414,14 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, ...@@ -1402,10 +1414,14 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
if (zero_window_probe) { if (zero_window_probe) {
mptcp_subflow_ctx(ssk)->rel_write_seq += ret; mptcp_subflow_ctx(ssk)->rel_write_seq += ret;
mpext->frozen = 1; mpext->frozen = 1;
ret = 0; if (READ_ONCE(msk->csum_enabled))
mptcp_update_data_checksum(tail, ret);
tcp_push_pending_frames(ssk); tcp_push_pending_frames(ssk);
return 0;
} }
out: out:
if (READ_ONCE(msk->csum_enabled))
mptcp_update_data_checksum(tail, ret);
mptcp_subflow_ctx(ssk)->rel_write_seq += ret; mptcp_subflow_ctx(ssk)->rel_write_seq += ret;
return ret; return ret;
} }
...@@ -2359,8 +2375,8 @@ static void __mptcp_retrans(struct sock *sk) ...@@ -2359,8 +2375,8 @@ static void __mptcp_retrans(struct sock *sk)
/* limit retransmission to the bytes already sent on some subflows */ /* limit retransmission to the bytes already sent on some subflows */
info.sent = 0; info.sent = 0;
info.limit = dfrag->already_sent; info.limit = READ_ONCE(msk->csum_enabled) ? dfrag->data_len : dfrag->already_sent;
while (info.sent < dfrag->already_sent) { while (info.sent < info.limit) {
if (!mptcp_alloc_tx_skb(sk, ssk)) if (!mptcp_alloc_tx_skb(sk, ssk))
break; break;
...@@ -2372,9 +2388,11 @@ static void __mptcp_retrans(struct sock *sk) ...@@ -2372,9 +2388,11 @@ static void __mptcp_retrans(struct sock *sk)
copied += ret; copied += ret;
info.sent += ret; info.sent += ret;
} }
if (copied) if (copied) {
dfrag->already_sent = max(dfrag->already_sent, info.sent);
tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle, tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
info.size_goal); info.size_goal);
}
mptcp_set_timeout(sk, ssk); mptcp_set_timeout(sk, ssk);
release_sock(ssk); release_sock(ssk);
...@@ -2453,6 +2471,7 @@ static int __mptcp_init_sock(struct sock *sk) ...@@ -2453,6 +2471,7 @@ static int __mptcp_init_sock(struct sock *sk)
msk->ack_hint = NULL; msk->ack_hint = NULL;
msk->first = NULL; msk->first = NULL;
inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss; inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss;
WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
mptcp_pm_data_init(msk); mptcp_pm_data_init(msk);
...@@ -2793,6 +2812,8 @@ struct sock *mptcp_sk_clone(const struct sock *sk, ...@@ -2793,6 +2812,8 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
msk->token = subflow_req->token; msk->token = subflow_req->token;
msk->subflow = NULL; msk->subflow = NULL;
WRITE_ONCE(msk->fully_established, false); WRITE_ONCE(msk->fully_established, false);
if (mp_opt->csum_reqd)
WRITE_ONCE(msk->csum_enabled, true);
msk->write_seq = subflow_req->idsn + 1; msk->write_seq = subflow_req->idsn + 1;
msk->snd_nxt = msk->write_seq; msk->snd_nxt = msk->write_seq;
......
...@@ -68,6 +68,8 @@ ...@@ -68,6 +68,8 @@
#define TCPOLEN_MPTCP_FASTCLOSE 12 #define TCPOLEN_MPTCP_FASTCLOSE 12
#define TCPOLEN_MPTCP_RST 4 #define TCPOLEN_MPTCP_RST 4
#define TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM (TCPOLEN_MPTCP_DSS_CHECKSUM + TCPOLEN_MPTCP_MPC_ACK_DATA)
/* MPTCP MP_JOIN flags */ /* MPTCP MP_JOIN flags */
#define MPTCPOPT_BACKUP BIT(0) #define MPTCPOPT_BACKUP BIT(0)
#define MPTCPOPT_HMAC_LEN 20 #define MPTCPOPT_HMAC_LEN 20
...@@ -124,6 +126,7 @@ struct mptcp_options_received { ...@@ -124,6 +126,7 @@ struct mptcp_options_received {
u64 data_seq; u64 data_seq;
u32 subflow_seq; u32 subflow_seq;
u16 data_len; u16 data_len;
__sum16 csum;
u16 mp_capable : 1, u16 mp_capable : 1,
mp_join : 1, mp_join : 1,
fastclose : 1, fastclose : 1,
...@@ -133,6 +136,7 @@ struct mptcp_options_received { ...@@ -133,6 +136,7 @@ struct mptcp_options_received {
rm_addr : 1, rm_addr : 1,
mp_prio : 1, mp_prio : 1,
echo : 1, echo : 1,
csum_reqd : 1,
backup : 1; backup : 1;
u32 token; u32 token;
u32 nonce; u32 nonce;
...@@ -234,6 +238,7 @@ struct mptcp_sock { ...@@ -234,6 +238,7 @@ struct mptcp_sock {
bool snd_data_fin_enable; bool snd_data_fin_enable;
bool rcv_fastclose; bool rcv_fastclose;
bool use_64bit_ack; /* Set when we received a 64-bit DSN */ bool use_64bit_ack; /* Set when we received a 64-bit DSN */
bool csum_enabled;
spinlock_t join_list_lock; spinlock_t join_list_lock;
struct sock *ack_hint; struct sock *ack_hint;
struct work_struct work; struct work_struct work;
...@@ -335,11 +340,19 @@ static inline struct mptcp_data_frag *mptcp_rtx_head(const struct sock *sk) ...@@ -335,11 +340,19 @@ static inline struct mptcp_data_frag *mptcp_rtx_head(const struct sock *sk)
return list_first_entry_or_null(&msk->rtx_queue, struct mptcp_data_frag, list); return list_first_entry_or_null(&msk->rtx_queue, struct mptcp_data_frag, list);
} }
struct csum_pseudo_header {
__be64 data_seq;
__be32 subflow_seq;
__be16 data_len;
__sum16 csum;
};
struct mptcp_subflow_request_sock { struct mptcp_subflow_request_sock {
struct tcp_request_sock sk; struct tcp_request_sock sk;
u16 mp_capable : 1, u16 mp_capable : 1,
mp_join : 1, mp_join : 1,
backup : 1; backup : 1,
csum_reqd : 1;
u8 local_id; u8 local_id;
u8 remote_id; u8 remote_id;
u64 local_key; u64 local_key;
...@@ -387,6 +400,8 @@ struct mptcp_subflow_context { ...@@ -387,6 +400,8 @@ struct mptcp_subflow_context {
u32 map_subflow_seq; u32 map_subflow_seq;
u32 ssn_offset; u32 ssn_offset;
u32 map_data_len; u32 map_data_len;
__wsum map_data_csum;
u32 map_csum_len;
u32 request_mptcp : 1, /* send MP_CAPABLE */ u32 request_mptcp : 1, /* send MP_CAPABLE */
request_join : 1, /* send MP_JOIN */ request_join : 1, /* send MP_JOIN */
request_bkup : 1, request_bkup : 1,
...@@ -396,6 +411,8 @@ struct mptcp_subflow_context { ...@@ -396,6 +411,8 @@ struct mptcp_subflow_context {
pm_notified : 1, /* PM hook called for established status */ pm_notified : 1, /* PM hook called for established status */
conn_finished : 1, conn_finished : 1,
map_valid : 1, map_valid : 1,
map_csum_reqd : 1,
map_data_fin : 1,
mpc_map : 1, mpc_map : 1,
backup : 1, backup : 1,
send_mp_prio : 1, send_mp_prio : 1,
...@@ -525,6 +542,7 @@ static inline void mptcp_subflow_delegated_done(struct mptcp_subflow_context *su ...@@ -525,6 +542,7 @@ static inline void mptcp_subflow_delegated_done(struct mptcp_subflow_context *su
int mptcp_is_enabled(struct net *net); int mptcp_is_enabled(struct net *net);
unsigned int mptcp_get_add_addr_timeout(struct net *net); unsigned int mptcp_get_add_addr_timeout(struct net *net);
int mptcp_is_checksum_enabled(struct net *net);
void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow, void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
struct mptcp_options_received *mp_opt); struct mptcp_options_received *mp_opt);
bool mptcp_subflow_data_available(struct sock *sk); bool mptcp_subflow_data_available(struct sock *sk);
...@@ -576,7 +594,8 @@ int __init mptcp_proto_v6_init(void); ...@@ -576,7 +594,8 @@ int __init mptcp_proto_v6_init(void);
struct sock *mptcp_sk_clone(const struct sock *sk, struct sock *mptcp_sk_clone(const struct sock *sk,
const struct mptcp_options_received *mp_opt, const struct mptcp_options_received *mp_opt,
struct request_sock *req); struct request_sock *req);
void mptcp_get_options(const struct sk_buff *skb, void mptcp_get_options(const struct sock *sk,
const struct sk_buff *skb,
struct mptcp_options_received *mp_opt); struct mptcp_options_received *mp_opt);
void mptcp_finish_connect(struct sock *sk); void mptcp_finish_connect(struct sock *sk);
......
...@@ -108,6 +108,7 @@ static void subflow_init_req(struct request_sock *req, const struct sock *sk_lis ...@@ -108,6 +108,7 @@ static void subflow_init_req(struct request_sock *req, const struct sock *sk_lis
subflow_req->mp_capable = 0; subflow_req->mp_capable = 0;
subflow_req->mp_join = 0; subflow_req->mp_join = 0;
subflow_req->csum_reqd = mptcp_is_checksum_enabled(sock_net(sk_listener));
subflow_req->msk = NULL; subflow_req->msk = NULL;
mptcp_token_init_request(req); mptcp_token_init_request(req);
} }
...@@ -150,7 +151,7 @@ static int subflow_check_req(struct request_sock *req, ...@@ -150,7 +151,7 @@ static int subflow_check_req(struct request_sock *req,
return -EINVAL; return -EINVAL;
#endif #endif
mptcp_get_options(skb, &mp_opt); mptcp_get_options(sk_listener, skb, &mp_opt);
if (mp_opt.mp_capable) { if (mp_opt.mp_capable) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE); SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE);
...@@ -247,7 +248,7 @@ int mptcp_subflow_init_cookie_req(struct request_sock *req, ...@@ -247,7 +248,7 @@ int mptcp_subflow_init_cookie_req(struct request_sock *req,
int err; int err;
subflow_init_req(req, sk_listener); subflow_init_req(req, sk_listener);
mptcp_get_options(skb, &mp_opt); mptcp_get_options(sk_listener, skb, &mp_opt);
if (mp_opt.mp_capable && mp_opt.mp_join) if (mp_opt.mp_capable && mp_opt.mp_join)
return -EINVAL; return -EINVAL;
...@@ -394,7 +395,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) ...@@ -394,7 +395,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
subflow->ssn_offset = TCP_SKB_CB(skb)->seq; subflow->ssn_offset = TCP_SKB_CB(skb)->seq;
pr_debug("subflow=%p synack seq=%x", subflow, subflow->ssn_offset); pr_debug("subflow=%p synack seq=%x", subflow, subflow->ssn_offset);
mptcp_get_options(skb, &mp_opt); mptcp_get_options(sk, skb, &mp_opt);
if (subflow->request_mptcp) { if (subflow->request_mptcp) {
if (!mp_opt.mp_capable) { if (!mp_opt.mp_capable) {
MPTCP_INC_STATS(sock_net(sk), MPTCP_INC_STATS(sock_net(sk),
...@@ -404,6 +405,8 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) ...@@ -404,6 +405,8 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
goto fallback; goto fallback;
} }
if (mp_opt.csum_reqd)
WRITE_ONCE(mptcp_sk(parent)->csum_enabled, true);
subflow->mp_capable = 1; subflow->mp_capable = 1;
subflow->can_ack = 1; subflow->can_ack = 1;
subflow->remote_key = mp_opt.sndr_key; subflow->remote_key = mp_opt.sndr_key;
...@@ -638,7 +641,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, ...@@ -638,7 +641,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
* reordered MPC will cause fallback, but we don't have other * reordered MPC will cause fallback, but we don't have other
* options. * options.
*/ */
mptcp_get_options(skb, &mp_opt); mptcp_get_options(sk, skb, &mp_opt);
if (!mp_opt.mp_capable) { if (!mp_opt.mp_capable) {
fallback = true; fallback = true;
goto create_child; goto create_child;
...@@ -648,7 +651,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, ...@@ -648,7 +651,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
if (!new_msk) if (!new_msk)
fallback = true; fallback = true;
} else if (subflow_req->mp_join) { } else if (subflow_req->mp_join) {
mptcp_get_options(skb, &mp_opt); mptcp_get_options(sk, skb, &mp_opt);
if (!mp_opt.mp_join || !subflow_hmac_valid(req, &mp_opt) || if (!mp_opt.mp_join || !subflow_hmac_valid(req, &mp_opt) ||
!mptcp_can_accept_new_subflow(subflow_req->msk)) { !mptcp_can_accept_new_subflow(subflow_req->msk)) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC); SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC);
...@@ -824,10 +827,92 @@ static bool validate_mapping(struct sock *ssk, struct sk_buff *skb) ...@@ -824,10 +827,92 @@ static bool validate_mapping(struct sock *ssk, struct sk_buff *skb)
return true; return true;
} }
static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff *skb,
bool csum_reqd)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
struct csum_pseudo_header header;
u32 offset, seq, delta;
__wsum csum;
int len;
if (!csum_reqd)
return MAPPING_OK;
/* mapping already validated on previous traversal */
if (subflow->map_csum_len == subflow->map_data_len)
return MAPPING_OK;
/* traverse the receive queue, ensuring it contains a full
* DSS mapping and accumulating the related csum.
* Preserve the accoumlate csum across multiple calls, to compute
* the csum only once
*/
delta = subflow->map_data_len - subflow->map_csum_len;
for (;;) {
seq = tcp_sk(ssk)->copied_seq + subflow->map_csum_len;
offset = seq - TCP_SKB_CB(skb)->seq;
/* if the current skb has not been accounted yet, csum its contents
* up to the amount covered by the current DSS
*/
if (offset < skb->len) {
__wsum csum;
len = min(skb->len - offset, delta);
csum = skb_checksum(skb, offset, len, 0);
subflow->map_data_csum = csum_block_add(subflow->map_data_csum, csum,
subflow->map_csum_len);
delta -= len;
subflow->map_csum_len += len;
}
if (delta == 0)
break;
if (skb_queue_is_last(&ssk->sk_receive_queue, skb)) {
/* if this subflow is closed, the partial mapping
* will be never completed; flush the pending skbs, so
* that subflow_sched_work_if_closed() can kick in
*/
if (unlikely(ssk->sk_state == TCP_CLOSE))
while ((skb = skb_peek(&ssk->sk_receive_queue)))
sk_eat_skb(ssk, skb);
/* not enough data to validate the csum */
return MAPPING_EMPTY;
}
/* the DSS mapping for next skbs will be validated later,
* when a get_mapping_status call will process such skb
*/
skb = skb->next;
}
/* note that 'map_data_len' accounts only for the carried data, does
* not include the eventual seq increment due to the data fin,
* while the pseudo header requires the original DSS data len,
* including that
*/
header.data_seq = cpu_to_be64(subflow->map_seq);
header.subflow_seq = htonl(subflow->map_subflow_seq);
header.data_len = htons(subflow->map_data_len + subflow->map_data_fin);
header.csum = 0;
csum = csum_partial(&header, sizeof(header), subflow->map_data_csum);
if (unlikely(csum_fold(csum))) {
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DATACSUMERR);
return subflow->mp_join ? MAPPING_INVALID : MAPPING_DUMMY;
}
return MAPPING_OK;
}
static enum mapping_status get_mapping_status(struct sock *ssk, static enum mapping_status get_mapping_status(struct sock *ssk,
struct mptcp_sock *msk) struct mptcp_sock *msk)
{ {
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
bool csum_reqd = READ_ONCE(msk->csum_enabled);
struct mptcp_ext *mpext; struct mptcp_ext *mpext;
struct sk_buff *skb; struct sk_buff *skb;
u16 data_len; u16 data_len;
...@@ -920,9 +1005,10 @@ static enum mapping_status get_mapping_status(struct sock *ssk, ...@@ -920,9 +1005,10 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
/* Allow replacing only with an identical map */ /* Allow replacing only with an identical map */
if (subflow->map_seq == map_seq && if (subflow->map_seq == map_seq &&
subflow->map_subflow_seq == mpext->subflow_seq && subflow->map_subflow_seq == mpext->subflow_seq &&
subflow->map_data_len == data_len) { subflow->map_data_len == data_len &&
subflow->map_csum_reqd == mpext->csum_reqd) {
skb_ext_del(skb, SKB_EXT_MPTCP); skb_ext_del(skb, SKB_EXT_MPTCP);
return MAPPING_OK; goto validate_csum;
} }
/* If this skb data are fully covered by the current mapping, /* If this skb data are fully covered by the current mapping,
...@@ -934,17 +1020,27 @@ static enum mapping_status get_mapping_status(struct sock *ssk, ...@@ -934,17 +1020,27 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
} }
/* will validate the next map after consuming the current one */ /* will validate the next map after consuming the current one */
return MAPPING_OK; goto validate_csum;
} }
subflow->map_seq = map_seq; subflow->map_seq = map_seq;
subflow->map_subflow_seq = mpext->subflow_seq; subflow->map_subflow_seq = mpext->subflow_seq;
subflow->map_data_len = data_len; subflow->map_data_len = data_len;
subflow->map_valid = 1; subflow->map_valid = 1;
subflow->map_data_fin = mpext->data_fin;
subflow->mpc_map = mpext->mpc_map; subflow->mpc_map = mpext->mpc_map;
pr_debug("new map seq=%llu subflow_seq=%u data_len=%u", subflow->map_csum_reqd = mpext->csum_reqd;
subflow->map_csum_len = 0;
subflow->map_data_csum = csum_unfold(mpext->csum);
/* Cfr RFC 8684 Section 3.3.0 */
if (unlikely(subflow->map_csum_reqd != csum_reqd))
return MAPPING_INVALID;
pr_debug("new map seq=%llu subflow_seq=%u data_len=%u csum=%d:%u",
subflow->map_seq, subflow->map_subflow_seq, subflow->map_seq, subflow->map_subflow_seq,
subflow->map_data_len); subflow->map_data_len, subflow->map_csum_reqd,
subflow->map_data_csum);
validate_seq: validate_seq:
/* we revalidate valid mapping on new skb, because we must ensure /* we revalidate valid mapping on new skb, because we must ensure
...@@ -954,7 +1050,9 @@ static enum mapping_status get_mapping_status(struct sock *ssk, ...@@ -954,7 +1050,9 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
return MAPPING_INVALID; return MAPPING_INVALID;
skb_ext_del(skb, SKB_EXT_MPTCP); skb_ext_del(skb, SKB_EXT_MPTCP);
return MAPPING_OK;
validate_csum:
return validate_data_csum(ssk, skb, csum_reqd);
} }
static void mptcp_subflow_discard_data(struct sock *ssk, struct sk_buff *skb, static void mptcp_subflow_discard_data(struct sock *ssk, struct sk_buff *skb,
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
time_start=$(date +%s) time_start=$(date +%s)
optstring="S:R:d:e:l:r:h4cm:f:t" optstring="S:R:d:e:l:r:h4cm:f:tC"
ret=0 ret=0
sin="" sin=""
sout="" sout=""
...@@ -22,6 +22,7 @@ sndbuf=0 ...@@ -22,6 +22,7 @@ sndbuf=0
rcvbuf=0 rcvbuf=0
options_log=true options_log=true
do_tcp=0 do_tcp=0
checksum=false
filesize=0 filesize=0
if [ $tc_loss -eq 100 ];then if [ $tc_loss -eq 100 ];then
...@@ -47,6 +48,7 @@ usage() { ...@@ -47,6 +48,7 @@ usage() {
echo -e "\t-R: set rcvbuf value (default: use kernel default)" echo -e "\t-R: set rcvbuf value (default: use kernel default)"
echo -e "\t-m: test mode (poll, sendfile; default: poll)" echo -e "\t-m: test mode (poll, sendfile; default: poll)"
echo -e "\t-t: also run tests with TCP (use twice to non-fallback tcp)" echo -e "\t-t: also run tests with TCP (use twice to non-fallback tcp)"
echo -e "\t-C: enable the MPTCP data checksum"
} }
while getopts "$optstring" option;do while getopts "$optstring" option;do
...@@ -104,6 +106,9 @@ while getopts "$optstring" option;do ...@@ -104,6 +106,9 @@ while getopts "$optstring" option;do
"t") "t")
do_tcp=$((do_tcp+1)) do_tcp=$((do_tcp+1))
;; ;;
"C")
checksum=true
;;
"?") "?")
usage $0 usage $0
exit 1 exit 1
...@@ -200,6 +205,12 @@ ip -net "$ns4" route add default via dead:beef:3::2 ...@@ -200,6 +205,12 @@ ip -net "$ns4" route add default via dead:beef:3::2
# use TCP syn cookies, even if no flooding was detected. # use TCP syn cookies, even if no flooding was detected.
ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=2 ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=2
if $checksum; then
for i in "$ns1" "$ns2" "$ns3" "$ns4";do
ip netns exec $i sysctl -q net.mptcp.checksum_enabled=1
done
fi
set_ethtool_flags() { set_ethtool_flags() {
local ns="$1" local ns="$1"
local dev="$2" local dev="$2"
......
...@@ -12,6 +12,7 @@ timeout_poll=30 ...@@ -12,6 +12,7 @@ timeout_poll=30
timeout_test=$((timeout_poll * 2 + 1)) timeout_test=$((timeout_poll * 2 + 1))
mptcp_connect="" mptcp_connect=""
capture=0 capture=0
checksum=0
do_all_tests=1 do_all_tests=1
TEST_COUNT=0 TEST_COUNT=0
...@@ -49,6 +50,9 @@ init() ...@@ -49,6 +50,9 @@ init()
ip netns exec $netns sysctl -q net.mptcp.enabled=1 ip netns exec $netns sysctl -q net.mptcp.enabled=1
ip netns exec $netns sysctl -q net.ipv4.conf.all.rp_filter=0 ip netns exec $netns sysctl -q net.ipv4.conf.all.rp_filter=0
ip netns exec $netns sysctl -q net.ipv4.conf.default.rp_filter=0 ip netns exec $netns sysctl -q net.ipv4.conf.default.rp_filter=0
if [ $checksum -eq 1 ]; then
ip netns exec $netns sysctl -q net.mptcp.checksum_enabled=1
fi
done done
# ns1 ns2 # ns1 ns2
...@@ -124,6 +128,17 @@ reset_with_add_addr_timeout() ...@@ -124,6 +128,17 @@ reset_with_add_addr_timeout()
-j DROP -j DROP
} }
reset_with_checksum()
{
local ns1_enable=$1
local ns2_enable=$2
reset
ip netns exec $ns1 sysctl -q net.mptcp.checksum_enabled=$ns1_enable
ip netns exec $ns2 sysctl -q net.mptcp.checksum_enabled=$ns2_enable
}
ip -Version > /dev/null 2>&1 ip -Version > /dev/null 2>&1
if [ $? -ne 0 ];then if [ $? -ne 0 ];then
echo "SKIP: Could not run test without ip tool" echo "SKIP: Could not run test without ip tool"
...@@ -476,6 +491,45 @@ run_tests() ...@@ -476,6 +491,45 @@ run_tests()
fi fi
} }
chk_csum_nr()
{
local msg=${1:-""}
local count
local dump_stats
if [ ! -z "$msg" ]; then
printf "%02u" "$TEST_COUNT"
else
echo -n " "
fi
printf " %-36s %s" "$msg" "sum"
count=`ip netns exec $ns1 nstat -as | grep MPTcpExtDataCsumErr | awk '{print $2}'`
[ -z "$count" ] && count=0
if [ "$count" != 0 ]; then
echo "[fail] got $count data checksum error[s] expected 0"
ret=1
dump_stats=1
else
echo -n "[ ok ]"
fi
echo -n " - csum "
count=`ip netns exec $ns2 nstat -as | grep MPTcpExtDataCsumErr | awk '{print $2}'`
[ -z "$count" ] && count=0
if [ "$count" != 0 ]; then
echo "[fail] got $count data checksum error[s] expected 0"
ret=1
dump_stats=1
else
echo "[ ok ]"
fi
if [ "${dump_stats}" = 1 ]; then
echo Server ns stats
ip netns exec $ns1 nstat -as | grep MPTcp
echo Client ns stats
ip netns exec $ns2 nstat -as | grep MPTcp
fi
}
chk_join_nr() chk_join_nr()
{ {
local msg="$1" local msg="$1"
...@@ -523,6 +577,9 @@ chk_join_nr() ...@@ -523,6 +577,9 @@ chk_join_nr()
echo Client ns stats echo Client ns stats
ip netns exec $ns2 nstat -as | grep MPTcp ip netns exec $ns2 nstat -as | grep MPTcp
fi fi
if [ $checksum -eq 1 ]; then
chk_csum_nr
fi
} }
chk_add_nr() chk_add_nr()
...@@ -1374,6 +1431,37 @@ syncookies_tests() ...@@ -1374,6 +1431,37 @@ syncookies_tests()
chk_add_nr 1 1 chk_add_nr 1 1
} }
checksum_tests()
{
# checksum test 0 0
reset_with_checksum 0 0
ip netns exec $ns1 ./pm_nl_ctl limits 0 1
ip netns exec $ns2 ./pm_nl_ctl limits 0 1
run_tests $ns1 $ns2 10.0.1.1
chk_csum_nr "checksum test 0 0"
# checksum test 1 1
reset_with_checksum 1 1
ip netns exec $ns1 ./pm_nl_ctl limits 0 1
ip netns exec $ns2 ./pm_nl_ctl limits 0 1
run_tests $ns1 $ns2 10.0.1.1
chk_csum_nr "checksum test 1 1"
# checksum test 0 1
reset_with_checksum 0 1
ip netns exec $ns1 ./pm_nl_ctl limits 0 1
ip netns exec $ns2 ./pm_nl_ctl limits 0 1
run_tests $ns1 $ns2 10.0.1.1
chk_csum_nr "checksum test 0 1"
# checksum test 1 0
reset_with_checksum 1 0
ip netns exec $ns1 ./pm_nl_ctl limits 0 1
ip netns exec $ns2 ./pm_nl_ctl limits 0 1
run_tests $ns1 $ns2 10.0.1.1
chk_csum_nr "checksum test 1 0"
}
all_tests() all_tests()
{ {
subflows_tests subflows_tests
...@@ -1387,6 +1475,7 @@ all_tests() ...@@ -1387,6 +1475,7 @@ all_tests()
backup_tests backup_tests
add_addr_ports_tests add_addr_ports_tests
syncookies_tests syncookies_tests
checksum_tests
} }
usage() usage()
...@@ -1403,7 +1492,9 @@ usage() ...@@ -1403,7 +1492,9 @@ usage()
echo " -b backup_tests" echo " -b backup_tests"
echo " -p add_addr_ports_tests" echo " -p add_addr_ports_tests"
echo " -k syncookies_tests" echo " -k syncookies_tests"
echo " -S checksum_tests"
echo " -c capture pcap files" echo " -c capture pcap files"
echo " -C enable data checksum"
echo " -h help" echo " -h help"
} }
...@@ -1418,13 +1509,16 @@ make_file "$sin" "server" 1 ...@@ -1418,13 +1509,16 @@ make_file "$sin" "server" 1
trap cleanup EXIT trap cleanup EXIT
for arg in "$@"; do for arg in "$@"; do
# check for "capture" arg before launching tests # check for "capture/checksum" args before launching tests
if [[ "${arg}" =~ ^"-"[0-9a-zA-Z]*"c"[0-9a-zA-Z]*$ ]]; then if [[ "${arg}" =~ ^"-"[0-9a-zA-Z]*"c"[0-9a-zA-Z]*$ ]]; then
capture=1 capture=1
fi fi
if [[ "${arg}" =~ ^"-"[0-9a-zA-Z]*"C"[0-9a-zA-Z]*$ ]]; then
checksum=1
fi
# exception for the capture option, the rest means: a part of the tests # exception for the capture/checksum options, the rest means: a part of the tests
if [ "${arg}" != "-c" ]; then if [ "${arg}" != "-c" ] && [ "${arg}" != "-C" ]; then
do_all_tests=0 do_all_tests=0
fi fi
done done
...@@ -1434,7 +1528,7 @@ if [ $do_all_tests -eq 1 ]; then ...@@ -1434,7 +1528,7 @@ if [ $do_all_tests -eq 1 ]; then
exit $ret exit $ret
fi fi
while getopts 'fsltra64bpkch' opt; do while getopts 'fsltra64bpkchCS' opt; do
case $opt in case $opt in
f) f)
subflows_tests subflows_tests
...@@ -1469,8 +1563,13 @@ while getopts 'fsltra64bpkch' opt; do ...@@ -1469,8 +1563,13 @@ while getopts 'fsltra64bpkch' opt; do
k) k)
syncookies_tests syncookies_tests
;; ;;
S)
checksum_tests
;;
c) c)
;; ;;
C)
;;
h | *) h | *)
usage usage
;; ;;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment