Commit 6719331c authored by Paolo Abeni's avatar Paolo Abeni Committed by David S. Miller

mptcp: trigger msk processing even for OoO data

This is a prerequisite to allow receiving data from multiple
subflows without re-injection.

Instead of dropping the OoO - "future" data in
subflow_check_data_avail(), call into __mptcp_move_skbs()
and let the msk drop that.

To avoid code duplication factor out the mptcp_subflow_discard_data()
helper.

Note that __mptcp_move_skbs() can now find multiple subflows
with data avail (comprising to-be-discarded data), so must
update the byte counter incrementally.

v1 -> v2:
 - fix checkpatch issues (unsigned -> unsigned int)
Signed-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
Reviewed-by: default avatarMat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 47bebdf3
...@@ -167,7 +167,8 @@ static bool mptcp_subflow_dsn_valid(const struct mptcp_sock *msk, ...@@ -167,7 +167,8 @@ static bool mptcp_subflow_dsn_valid(const struct mptcp_sock *msk,
return true; return true;
subflow->data_avail = 0; subflow->data_avail = 0;
return mptcp_subflow_data_available(ssk); mptcp_subflow_data_available(ssk);
return subflow->data_avail == MPTCP_SUBFLOW_DATA_AVAIL;
} }
static void mptcp_check_data_fin_ack(struct sock *sk) static void mptcp_check_data_fin_ack(struct sock *sk)
...@@ -313,11 +314,18 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, ...@@ -313,11 +314,18 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
struct tcp_sock *tp; struct tcp_sock *tp;
bool done = false; bool done = false;
if (!mptcp_subflow_dsn_valid(msk, ssk)) { pr_debug("msk=%p ssk=%p data avail=%d valid=%d empty=%d",
*bytes = 0; msk, ssk, subflow->data_avail,
mptcp_subflow_dsn_valid(msk, ssk),
!skb_peek(&ssk->sk_receive_queue));
if (subflow->data_avail == MPTCP_SUBFLOW_OOO_DATA) {
mptcp_subflow_discard_data(ssk, subflow->map_data_len);
return false; return false;
} }
if (!mptcp_subflow_dsn_valid(msk, ssk))
return false;
tp = tcp_sk(ssk); tp = tcp_sk(ssk);
do { do {
u32 map_remaining, offset; u32 map_remaining, offset;
...@@ -376,7 +384,7 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, ...@@ -376,7 +384,7 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
} }
} while (more_data_avail); } while (more_data_avail);
*bytes = moved; *bytes += moved;
/* If the moves have caught up with the DATA_FIN sequence number /* If the moves have caught up with the DATA_FIN sequence number
* it's time to ack the DATA_FIN and change socket state, but * it's time to ack the DATA_FIN and change socket state, but
...@@ -415,9 +423,17 @@ static bool move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk) ...@@ -415,9 +423,17 @@ static bool move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk)
void mptcp_data_ready(struct sock *sk, struct sock *ssk) void mptcp_data_ready(struct sock *sk, struct sock *ssk)
{ {
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
struct mptcp_sock *msk = mptcp_sk(sk); struct mptcp_sock *msk = mptcp_sk(sk);
bool wake;
set_bit(MPTCP_DATA_READY, &msk->flags); /* move_skbs_to_msk below can legitly clear the data_avail flag,
* but we will need later to properly woke the reader, cache its
* value
*/
wake = subflow->data_avail == MPTCP_SUBFLOW_DATA_AVAIL;
if (wake)
set_bit(MPTCP_DATA_READY, &msk->flags);
if (atomic_read(&sk->sk_rmem_alloc) < READ_ONCE(sk->sk_rcvbuf) && if (atomic_read(&sk->sk_rmem_alloc) < READ_ONCE(sk->sk_rcvbuf) &&
move_skbs_to_msk(msk, ssk)) move_skbs_to_msk(msk, ssk))
...@@ -438,7 +454,8 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk) ...@@ -438,7 +454,8 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk)
move_skbs_to_msk(msk, ssk); move_skbs_to_msk(msk, ssk);
} }
wake: wake:
sk->sk_data_ready(sk); if (wake)
sk->sk_data_ready(sk);
} }
static void __mptcp_flush_join_list(struct mptcp_sock *msk) static void __mptcp_flush_join_list(struct mptcp_sock *msk)
...@@ -1281,6 +1298,9 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, ...@@ -1281,6 +1298,9 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
set_bit(MPTCP_DATA_READY, &msk->flags); set_bit(MPTCP_DATA_READY, &msk->flags);
} }
out_err: out_err:
pr_debug("msk=%p data_ready=%d rx queue empty=%d copied=%d",
msk, test_bit(MPTCP_DATA_READY, &msk->flags),
skb_queue_empty(&sk->sk_receive_queue), copied);
mptcp_rcv_space_adjust(msk, copied); mptcp_rcv_space_adjust(msk, copied);
release_sock(sk); release_sock(sk);
...@@ -2308,6 +2328,7 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock, ...@@ -2308,6 +2328,7 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
sock_poll_wait(file, sock, wait); sock_poll_wait(file, sock, wait);
state = inet_sk_state_load(sk); state = inet_sk_state_load(sk);
pr_debug("msk=%p state=%d flags=%lx", msk, state, msk->flags);
if (state == TCP_LISTEN) if (state == TCP_LISTEN)
return mptcp_check_readable(msk); return mptcp_check_readable(msk);
......
...@@ -268,6 +268,12 @@ mptcp_subflow_rsk(const struct request_sock *rsk) ...@@ -268,6 +268,12 @@ mptcp_subflow_rsk(const struct request_sock *rsk)
return (struct mptcp_subflow_request_sock *)rsk; return (struct mptcp_subflow_request_sock *)rsk;
} }
enum mptcp_data_avail {
MPTCP_SUBFLOW_NODATA,
MPTCP_SUBFLOW_DATA_AVAIL,
MPTCP_SUBFLOW_OOO_DATA
};
/* MPTCP subflow context */ /* MPTCP subflow context */
struct mptcp_subflow_context { struct mptcp_subflow_context {
struct list_head node;/* conn_list of subflows */ struct list_head node;/* conn_list of subflows */
...@@ -292,10 +298,10 @@ struct mptcp_subflow_context { ...@@ -292,10 +298,10 @@ struct mptcp_subflow_context {
map_valid : 1, map_valid : 1,
mpc_map : 1, mpc_map : 1,
backup : 1, backup : 1,
data_avail : 1,
rx_eof : 1, rx_eof : 1,
use_64bit_ack : 1, /* Set when we received a 64-bit DSN */ use_64bit_ack : 1, /* Set when we received a 64-bit DSN */
can_ack : 1; /* only after processing the remote a key */ can_ack : 1; /* only after processing the remote a key */
enum mptcp_data_avail data_avail;
u32 remote_nonce; u32 remote_nonce;
u64 thmac; u64 thmac;
u32 local_nonce; u32 local_nonce;
...@@ -347,6 +353,7 @@ int mptcp_is_enabled(struct net *net); ...@@ -347,6 +353,7 @@ int mptcp_is_enabled(struct net *net);
void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow, void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
struct mptcp_options_received *mp_opt); struct mptcp_options_received *mp_opt);
bool mptcp_subflow_data_available(struct sock *sk); bool mptcp_subflow_data_available(struct sock *sk);
int mptcp_subflow_discard_data(struct sock *sk, unsigned int limit);
void __init mptcp_subflow_init(void); void __init mptcp_subflow_init(void);
/* called with sk socket lock held */ /* called with sk socket lock held */
......
...@@ -816,6 +816,40 @@ static int subflow_read_actor(read_descriptor_t *desc, ...@@ -816,6 +816,40 @@ static int subflow_read_actor(read_descriptor_t *desc,
return copy_len; return copy_len;
} }
int mptcp_subflow_discard_data(struct sock *ssk, unsigned int limit)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
u32 map_remaining;
size_t delta;
map_remaining = subflow->map_data_len -
mptcp_subflow_get_map_offset(subflow);
delta = min_t(size_t, limit, map_remaining);
/* discard mapped data */
pr_debug("discarding %zu bytes, current map len=%d", delta,
map_remaining);
if (delta) {
read_descriptor_t desc = {
.count = delta,
};
int ret;
ret = tcp_read_sock(ssk, &desc, subflow_read_actor);
if (ret < 0) {
ssk->sk_err = -ret;
return ret;
}
if (ret < delta)
return 0;
if (delta == map_remaining) {
subflow->data_avail = 0;
subflow->map_valid = 0;
}
}
return 0;
}
static bool subflow_check_data_avail(struct sock *ssk) static bool subflow_check_data_avail(struct sock *ssk)
{ {
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
...@@ -832,8 +866,6 @@ static bool subflow_check_data_avail(struct sock *ssk) ...@@ -832,8 +866,6 @@ static bool subflow_check_data_avail(struct sock *ssk)
msk = mptcp_sk(subflow->conn); msk = mptcp_sk(subflow->conn);
for (;;) { for (;;) {
u32 map_remaining;
size_t delta;
u64 ack_seq; u64 ack_seq;
u64 old_ack; u64 old_ack;
...@@ -851,7 +883,7 @@ static bool subflow_check_data_avail(struct sock *ssk) ...@@ -851,7 +883,7 @@ static bool subflow_check_data_avail(struct sock *ssk)
subflow->map_data_len = skb->len; subflow->map_data_len = skb->len;
subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq - subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq -
subflow->ssn_offset; subflow->ssn_offset;
subflow->data_avail = 1; subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL;
return true; return true;
} }
...@@ -880,43 +912,19 @@ static bool subflow_check_data_avail(struct sock *ssk) ...@@ -880,43 +912,19 @@ static bool subflow_check_data_avail(struct sock *ssk)
pr_debug("msk ack_seq=%llx subflow ack_seq=%llx", old_ack, pr_debug("msk ack_seq=%llx subflow ack_seq=%llx", old_ack,
ack_seq); ack_seq);
if (ack_seq == old_ack) { if (ack_seq == old_ack) {
subflow->data_avail = 1; subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL;
break;
} else if (after64(ack_seq, old_ack)) {
subflow->data_avail = MPTCP_SUBFLOW_OOO_DATA;
break; break;
} }
/* only accept in-sequence mapping. Old values are spurious /* only accept in-sequence mapping. Old values are spurious
* retransmission; we can hit "future" values on active backup * retransmission
* subflow switch, we relay on retransmissions to get
* in-sequence data.
* Cuncurrent subflows support will require subflow data
* reordering
*/ */
map_remaining = subflow->map_data_len - if (mptcp_subflow_discard_data(ssk, old_ack - ack_seq))
mptcp_subflow_get_map_offset(subflow); goto fatal;
if (before64(ack_seq, old_ack)) return false;
delta = min_t(size_t, old_ack - ack_seq, map_remaining);
else
delta = min_t(size_t, ack_seq - old_ack, map_remaining);
/* discard mapped data */
pr_debug("discarding %zu bytes, current map len=%d", delta,
map_remaining);
if (delta) {
read_descriptor_t desc = {
.count = delta,
};
int ret;
ret = tcp_read_sock(ssk, &desc, subflow_read_actor);
if (ret < 0) {
ssk->sk_err = -ret;
goto fatal;
}
if (ret < delta)
return false;
if (delta == map_remaining)
subflow->map_valid = 0;
}
} }
return true; return true;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment