Commit 6850ec97 authored by Jakub Kicinski's avatar Jakub Kicinski

Merge branch 'mptcp-fixes-for-5-13'

Mat Martineau says:

====================
mptcp: Fixes for 5.13

These patches address two issues in MPTCP.

Patch 1 fixes a locking issue affecting MPTCP-level retransmissions.

Patches 2-4 improve handling of out-of-order packet arrival early
in a connection, so it falls back to TCP rather than forcing a
reset. Includes a selftest.
====================

Link: https://lore.kernel.org/r/20210527233140.182728-1-mathew.j.martineau@linux.intel.comSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 44991d61 69ca3d29
...@@ -947,6 +947,10 @@ static void __mptcp_update_wmem(struct sock *sk) ...@@ -947,6 +947,10 @@ static void __mptcp_update_wmem(struct sock *sk)
{ {
struct mptcp_sock *msk = mptcp_sk(sk); struct mptcp_sock *msk = mptcp_sk(sk);
#ifdef CONFIG_LOCKDEP
WARN_ON_ONCE(!lockdep_is_held(&sk->sk_lock.slock));
#endif
if (!msk->wmem_reserved) if (!msk->wmem_reserved)
return; return;
...@@ -1085,10 +1089,20 @@ static void __mptcp_clean_una(struct sock *sk) ...@@ -1085,10 +1089,20 @@ static void __mptcp_clean_una(struct sock *sk)
static void __mptcp_clean_una_wakeup(struct sock *sk) static void __mptcp_clean_una_wakeup(struct sock *sk)
{ {
#ifdef CONFIG_LOCKDEP
WARN_ON_ONCE(!lockdep_is_held(&sk->sk_lock.slock));
#endif
__mptcp_clean_una(sk); __mptcp_clean_una(sk);
mptcp_write_space(sk); mptcp_write_space(sk);
} }
static void mptcp_clean_una_wakeup(struct sock *sk)
{
mptcp_data_lock(sk);
__mptcp_clean_una_wakeup(sk);
mptcp_data_unlock(sk);
}
static void mptcp_enter_memory_pressure(struct sock *sk) static void mptcp_enter_memory_pressure(struct sock *sk)
{ {
struct mptcp_subflow_context *subflow; struct mptcp_subflow_context *subflow;
...@@ -2299,7 +2313,7 @@ static void __mptcp_retrans(struct sock *sk) ...@@ -2299,7 +2313,7 @@ static void __mptcp_retrans(struct sock *sk)
struct sock *ssk; struct sock *ssk;
int ret; int ret;
__mptcp_clean_una_wakeup(sk); mptcp_clean_una_wakeup(sk);
dfrag = mptcp_rtx_head(sk); dfrag = mptcp_rtx_head(sk);
if (!dfrag) { if (!dfrag) {
if (mptcp_data_fin_enabled(msk)) { if (mptcp_data_fin_enabled(msk)) {
......
...@@ -630,21 +630,20 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, ...@@ -630,21 +630,20 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
/* if the sk is MP_CAPABLE, we try to fetch the client key */ /* if the sk is MP_CAPABLE, we try to fetch the client key */
if (subflow_req->mp_capable) { if (subflow_req->mp_capable) {
if (TCP_SKB_CB(skb)->seq != subflow_req->ssn_offset + 1) { /* we can receive and accept an in-window, out-of-order pkt,
/* here we can receive and accept an in-window, * which may not carry the MP_CAPABLE opt even on mptcp enabled
* out-of-order pkt, which will not carry the MP_CAPABLE * paths: always try to extract the peer key, and fallback
* opt even on mptcp enabled paths * for packets missing it.
* Even OoO DSS packets coming legitly after dropped or
* reordered MPC will cause fallback, but we don't have other
* options.
*/ */
goto create_msk;
}
mptcp_get_options(skb, &mp_opt); mptcp_get_options(skb, &mp_opt);
if (!mp_opt.mp_capable) { if (!mp_opt.mp_capable) {
fallback = true; fallback = true;
goto create_child; goto create_child;
} }
create_msk:
new_msk = mptcp_sk_clone(listener->conn, &mp_opt, req); new_msk = mptcp_sk_clone(listener->conn, &mp_opt, req);
if (!new_msk) if (!new_msk)
fallback = true; fallback = true;
...@@ -1012,21 +1011,11 @@ static bool subflow_check_data_avail(struct sock *ssk) ...@@ -1012,21 +1011,11 @@ static bool subflow_check_data_avail(struct sock *ssk)
status = get_mapping_status(ssk, msk); status = get_mapping_status(ssk, msk);
trace_subflow_check_data_avail(status, skb_peek(&ssk->sk_receive_queue)); trace_subflow_check_data_avail(status, skb_peek(&ssk->sk_receive_queue));
if (status == MAPPING_INVALID) { if (unlikely(status == MAPPING_INVALID))
ssk->sk_err = EBADMSG; goto fallback;
goto fatal;
} if (unlikely(status == MAPPING_DUMMY))
if (status == MAPPING_DUMMY) { goto fallback;
__mptcp_do_fallback(msk);
skb = skb_peek(&ssk->sk_receive_queue);
subflow->map_valid = 1;
subflow->map_seq = READ_ONCE(msk->ack_seq);
subflow->map_data_len = skb->len;
subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq -
subflow->ssn_offset;
subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL;
return true;
}
if (status != MAPPING_OK) if (status != MAPPING_OK)
goto no_data; goto no_data;
...@@ -1039,10 +1028,8 @@ static bool subflow_check_data_avail(struct sock *ssk) ...@@ -1039,10 +1028,8 @@ static bool subflow_check_data_avail(struct sock *ssk)
* MP_CAPABLE-based mapping * MP_CAPABLE-based mapping
*/ */
if (unlikely(!READ_ONCE(msk->can_ack))) { if (unlikely(!READ_ONCE(msk->can_ack))) {
if (!subflow->mpc_map) { if (!subflow->mpc_map)
ssk->sk_err = EBADMSG; goto fallback;
goto fatal;
}
WRITE_ONCE(msk->remote_key, subflow->remote_key); WRITE_ONCE(msk->remote_key, subflow->remote_key);
WRITE_ONCE(msk->ack_seq, subflow->map_seq); WRITE_ONCE(msk->ack_seq, subflow->map_seq);
WRITE_ONCE(msk->can_ack, true); WRITE_ONCE(msk->can_ack, true);
...@@ -1070,10 +1057,14 @@ static bool subflow_check_data_avail(struct sock *ssk) ...@@ -1070,10 +1057,14 @@ static bool subflow_check_data_avail(struct sock *ssk)
no_data: no_data:
subflow_sched_work_if_closed(msk, ssk); subflow_sched_work_if_closed(msk, ssk);
return false; return false;
fatal:
/* fatal protocol error, close the socket */ fallback:
/* This barrier is coupled with smp_rmb() in tcp_poll() */ /* RFC 8684 section 3.7. */
smp_wmb(); if (subflow->mp_join || subflow->fully_established) {
/* fatal protocol error, close the socket.
* subflow_error_report() will introduce the appropriate barriers
*/
ssk->sk_err = EBADMSG;
ssk->sk_error_report(ssk); ssk->sk_error_report(ssk);
tcp_set_state(ssk, TCP_CLOSE); tcp_set_state(ssk, TCP_CLOSE);
subflow->reset_transient = 0; subflow->reset_transient = 0;
...@@ -1081,6 +1072,16 @@ static bool subflow_check_data_avail(struct sock *ssk) ...@@ -1081,6 +1072,16 @@ static bool subflow_check_data_avail(struct sock *ssk)
tcp_send_active_reset(ssk, GFP_ATOMIC); tcp_send_active_reset(ssk, GFP_ATOMIC);
subflow->data_avail = 0; subflow->data_avail = 0;
return false; return false;
}
__mptcp_do_fallback(msk);
skb = skb_peek(&ssk->sk_receive_queue);
subflow->map_valid = 1;
subflow->map_seq = READ_ONCE(msk->ack_seq);
subflow->map_data_len = skb->len;
subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq - subflow->ssn_offset;
subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL;
return true;
} }
bool mptcp_subflow_data_available(struct sock *sk) bool mptcp_subflow_data_available(struct sock *sk)
......
...@@ -501,6 +501,7 @@ do_transfer() ...@@ -501,6 +501,7 @@ do_transfer()
local stat_ackrx_now_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableACKRX") local stat_ackrx_now_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
local stat_cookietx_now=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesSent") local stat_cookietx_now=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesSent")
local stat_cookierx_now=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesRecv") local stat_cookierx_now=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesRecv")
local stat_ooo_now=$(get_mib_counter "${listener_ns}" "TcpExtTCPOFOQueue")
expect_synrx=$((stat_synrx_last_l)) expect_synrx=$((stat_synrx_last_l))
expect_ackrx=$((stat_ackrx_last_l)) expect_ackrx=$((stat_ackrx_last_l))
...@@ -518,10 +519,14 @@ do_transfer() ...@@ -518,10 +519,14 @@ do_transfer()
"${stat_synrx_now_l}" "${expect_synrx}" 1>&2 "${stat_synrx_now_l}" "${expect_synrx}" 1>&2
retc=1 retc=1
fi fi
if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} ]; then if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} -a ${stat_ooo_now} -eq 0 ]; then
if [ ${stat_ooo_now} -eq 0 ]; then
printf "[ FAIL ] lower MPC ACK rx (%d) than expected (%d)\n" \ printf "[ FAIL ] lower MPC ACK rx (%d) than expected (%d)\n" \
"${stat_ackrx_now_l}" "${expect_ackrx}" 1>&2 "${stat_ackrx_now_l}" "${expect_ackrx}" 1>&2
rets=1 rets=1
else
printf "[ Note ] fallback due to TCP OoO"
fi
fi fi
if [ $retc -eq 0 ] && [ $rets -eq 0 ]; then if [ $retc -eq 0 ] && [ $rets -eq 0 ]; then
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment