Commit bcd97734 authored by Paolo Abeni's avatar Paolo Abeni Committed by David S. Miller

mptcp: use delegate action to schedule 3rd ack retrans

Scheduling a delack in mptcp_established_options_mp() is
not a good idea: such function is called by tcp_send_ack() and
the pending delayed ack will be cleared shortly after by the
tcp_event_ack_sent() call in __tcp_transmit_skb().

Instead use the mptcp delegated action infrastructure to
schedule the delayed ack after the current bh processing completes.

Additionally moves the schedule_3rdack_retransmission() helper
into protocol.c to avoid making it visible in a different compilation
unit.

Fixes: ec3edaa7 ("mptcp: Add handling of outgoing MP_JOIN requests")
Reviewed-by: default avatarMat Martineau <mathew.j.martineau&gt;@linux.intel.com>
Signed-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent ee50e67b
...@@ -422,29 +422,6 @@ bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb, ...@@ -422,29 +422,6 @@ bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb,
return false; return false;
} }
/* MP_JOIN client subflow must wait for 4th ack before sending any data:
* TCP can't schedule delack timer before the subflow is fully established.
* MPTCP uses the delack timer to do 3rd ack retransmissions
*/
static void schedule_3rdack_retransmission(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
unsigned long timeout;
/* reschedule with a timeout above RTT, as we must look only for drop */
if (tp->srtt_us)
timeout = usecs_to_jiffies(tp->srtt_us >> (3 - 1));
else
timeout = TCP_TIMEOUT_INIT;
timeout += jiffies;
WARN_ON_ONCE(icsk->icsk_ack.pending & ICSK_ACK_TIMER);
icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER;
icsk->icsk_ack.timeout = timeout;
sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout);
}
static void clear_3rdack_retransmission(struct sock *sk) static void clear_3rdack_retransmission(struct sock *sk)
{ {
struct inet_connection_sock *icsk = inet_csk(sk); struct inet_connection_sock *icsk = inet_csk(sk);
...@@ -527,7 +504,15 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb, ...@@ -527,7 +504,15 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
*size = TCPOLEN_MPTCP_MPJ_ACK; *size = TCPOLEN_MPTCP_MPJ_ACK;
pr_debug("subflow=%p", subflow); pr_debug("subflow=%p", subflow);
schedule_3rdack_retransmission(sk); /* we can use the full delegate action helper only from BH context
* If we are in process context - sk is flushing the backlog at
* socket lock release time - just set the appropriate flag, will
* be handled by the release callback
*/
if (sock_owned_by_user(sk))
set_bit(MPTCP_DELEGATE_ACK, &subflow->delegated_status);
else
mptcp_subflow_delegate(subflow, MPTCP_DELEGATE_ACK);
return true; return true;
} }
return false; return false;
......
...@@ -1596,7 +1596,8 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk) ...@@ -1596,7 +1596,8 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
if (!xmit_ssk) if (!xmit_ssk)
goto out; goto out;
if (xmit_ssk != ssk) { if (xmit_ssk != ssk) {
mptcp_subflow_delegate(mptcp_subflow_ctx(xmit_ssk)); mptcp_subflow_delegate(mptcp_subflow_ctx(xmit_ssk),
MPTCP_DELEGATE_SEND);
goto out; goto out;
} }
...@@ -2943,7 +2944,7 @@ void __mptcp_check_push(struct sock *sk, struct sock *ssk) ...@@ -2943,7 +2944,7 @@ void __mptcp_check_push(struct sock *sk, struct sock *ssk)
if (xmit_ssk == ssk) if (xmit_ssk == ssk)
__mptcp_subflow_push_pending(sk, ssk); __mptcp_subflow_push_pending(sk, ssk);
else if (xmit_ssk) else if (xmit_ssk)
mptcp_subflow_delegate(mptcp_subflow_ctx(xmit_ssk)); mptcp_subflow_delegate(mptcp_subflow_ctx(xmit_ssk), MPTCP_DELEGATE_SEND);
} else { } else {
set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags); set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags);
} }
...@@ -2993,18 +2994,50 @@ static void mptcp_release_cb(struct sock *sk) ...@@ -2993,18 +2994,50 @@ static void mptcp_release_cb(struct sock *sk)
__mptcp_update_rmem(sk); __mptcp_update_rmem(sk);
} }
/* MP_JOIN client subflow must wait for 4th ack before sending any data:
* TCP can't schedule delack timer before the subflow is fully established.
* MPTCP uses the delack timer to do 3rd ack retransmissions
*/
static void schedule_3rdack_retransmission(struct sock *ssk)
{
struct inet_connection_sock *icsk = inet_csk(ssk);
struct tcp_sock *tp = tcp_sk(ssk);
unsigned long timeout;
if (mptcp_subflow_ctx(ssk)->fully_established)
return;
/* reschedule with a timeout above RTT, as we must look only for drop */
if (tp->srtt_us)
timeout = usecs_to_jiffies(tp->srtt_us >> (3 - 1));
else
timeout = TCP_TIMEOUT_INIT;
timeout += jiffies;
WARN_ON_ONCE(icsk->icsk_ack.pending & ICSK_ACK_TIMER);
icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER;
icsk->icsk_ack.timeout = timeout;
sk_reset_timer(ssk, &icsk->icsk_delack_timer, timeout);
}
void mptcp_subflow_process_delegated(struct sock *ssk) void mptcp_subflow_process_delegated(struct sock *ssk)
{ {
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
struct sock *sk = subflow->conn; struct sock *sk = subflow->conn;
if (test_bit(MPTCP_DELEGATE_SEND, &subflow->delegated_status)) {
mptcp_data_lock(sk); mptcp_data_lock(sk);
if (!sock_owned_by_user(sk)) if (!sock_owned_by_user(sk))
__mptcp_subflow_push_pending(sk, ssk); __mptcp_subflow_push_pending(sk, ssk);
else else
set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags); set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags);
mptcp_data_unlock(sk); mptcp_data_unlock(sk);
mptcp_subflow_delegated_done(subflow); mptcp_subflow_delegated_done(subflow, MPTCP_DELEGATE_SEND);
}
if (test_bit(MPTCP_DELEGATE_ACK, &subflow->delegated_status)) {
schedule_3rdack_retransmission(ssk);
mptcp_subflow_delegated_done(subflow, MPTCP_DELEGATE_ACK);
}
} }
static int mptcp_hash(struct sock *sk) static int mptcp_hash(struct sock *sk)
......
...@@ -387,6 +387,7 @@ struct mptcp_delegated_action { ...@@ -387,6 +387,7 @@ struct mptcp_delegated_action {
DECLARE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions); DECLARE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions);
#define MPTCP_DELEGATE_SEND 0 #define MPTCP_DELEGATE_SEND 0
#define MPTCP_DELEGATE_ACK 1
/* MPTCP subflow context */ /* MPTCP subflow context */
struct mptcp_subflow_context { struct mptcp_subflow_context {
...@@ -492,23 +493,23 @@ static inline void mptcp_add_pending_subflow(struct mptcp_sock *msk, ...@@ -492,23 +493,23 @@ static inline void mptcp_add_pending_subflow(struct mptcp_sock *msk,
void mptcp_subflow_process_delegated(struct sock *ssk); void mptcp_subflow_process_delegated(struct sock *ssk);
static inline void mptcp_subflow_delegate(struct mptcp_subflow_context *subflow) static inline void mptcp_subflow_delegate(struct mptcp_subflow_context *subflow, int action)
{ {
struct mptcp_delegated_action *delegated; struct mptcp_delegated_action *delegated;
bool schedule; bool schedule;
/* the caller held the subflow bh socket lock */
lockdep_assert_in_softirq();
/* The implied barrier pairs with mptcp_subflow_delegated_done(), and /* The implied barrier pairs with mptcp_subflow_delegated_done(), and
* ensures the below list check sees list updates done prior to status * ensures the below list check sees list updates done prior to status
* bit changes * bit changes
*/ */
if (!test_and_set_bit(MPTCP_DELEGATE_SEND, &subflow->delegated_status)) { if (!test_and_set_bit(action, &subflow->delegated_status)) {
/* still on delegated list from previous scheduling */ /* still on delegated list from previous scheduling */
if (!list_empty(&subflow->delegated_node)) if (!list_empty(&subflow->delegated_node))
return; return;
/* the caller held the subflow bh socket lock */
lockdep_assert_in_softirq();
delegated = this_cpu_ptr(&mptcp_delegated_actions); delegated = this_cpu_ptr(&mptcp_delegated_actions);
schedule = list_empty(&delegated->head); schedule = list_empty(&delegated->head);
list_add_tail(&subflow->delegated_node, &delegated->head); list_add_tail(&subflow->delegated_node, &delegated->head);
...@@ -533,16 +534,16 @@ mptcp_subflow_delegated_next(struct mptcp_delegated_action *delegated) ...@@ -533,16 +534,16 @@ mptcp_subflow_delegated_next(struct mptcp_delegated_action *delegated)
static inline bool mptcp_subflow_has_delegated_action(const struct mptcp_subflow_context *subflow) static inline bool mptcp_subflow_has_delegated_action(const struct mptcp_subflow_context *subflow)
{ {
return test_bit(MPTCP_DELEGATE_SEND, &subflow->delegated_status); return !!READ_ONCE(subflow->delegated_status);
} }
static inline void mptcp_subflow_delegated_done(struct mptcp_subflow_context *subflow) static inline void mptcp_subflow_delegated_done(struct mptcp_subflow_context *subflow, int action)
{ {
/* pairs with mptcp_subflow_delegate, ensures delegate_node is updated before /* pairs with mptcp_subflow_delegate, ensures delegate_node is updated before
* touching the status bit * touching the status bit
*/ */
smp_wmb(); smp_wmb();
clear_bit(MPTCP_DELEGATE_SEND, &subflow->delegated_status); clear_bit(action, &subflow->delegated_status);
} }
int mptcp_is_enabled(const struct net *net); int mptcp_is_enabled(const struct net *net);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment