Commit ed7f9c01 authored by David S. Miller's avatar David S. Miller

Merge branch 'mptcp-fixes'

Matthieu Baerts says:

====================
mptcp: fixes around listening sockets and the MPTCP worker

Christoph Paasch reported a couple of issues found by syzkaller and
linked to operations done by the MPTCP worker on (un)accepted sockets.

Fixing these issues was not obvious and rather complex but Paolo Abeni
nicely managed to propose these excellent patches that seem to satisfy
syzkaller.

Patch 1 partially reverts a recent fix but while still providing a
solution for the previous issue, it also prevents the MPTCP worker from
running concurrently with inet_csk_listen_stop(). A warning is then
avoided. The partially reverted patch has been introduced in v6.3-rc3,
backported up to v6.1 and fixing an issue visible from v5.18.

Patch 2 prevents the MPTCP worker to race with mptcp_accept() causing a
UaF when a fallback to TCP is done while in parallel, the socket is
being accepted by the userspace. This is also a fix of a previous fix
introduced in v6.3-rc3, backported up to v6.1 but here fixing an issue
that is in theory there from v5.7. There is no need to backport it up
to here as it looks like it is only visible later, around v5.18, see the
previous cover-letter linked to this original fix.
====================
Signed-off-by: default avatarMatthieu Baerts <matthieu.baerts@tessares.net>
parents 4e006c7a 63740448
......@@ -2315,7 +2315,26 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
unsigned int flags)
{
struct mptcp_sock *msk = mptcp_sk(sk);
bool need_push, dispose_it;
bool dispose_it, need_push = false;
/* If the first subflow moved to a close state before accept, e.g. due
* to an incoming reset, mptcp either:
* - if either the subflow or the msk are dead, destroy the context
* (the subflow socket is deleted by inet_child_forget) and the msk
* - otherwise do nothing at the moment and take action at accept and/or
* listener shutdown - user-space must be able to accept() the closed
* socket.
*/
if (msk->in_accept_queue && msk->first == ssk) {
if (!sock_flag(sk, SOCK_DEAD) && !sock_flag(ssk, SOCK_DEAD))
return;
/* ensure later check in mptcp_worker() will dispose the msk */
sock_set_flag(sk, SOCK_DEAD);
lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
mptcp_subflow_drop_ctx(ssk);
goto out_release;
}
dispose_it = !msk->subflow || ssk != msk->subflow->sk;
if (dispose_it)
......@@ -2351,28 +2370,22 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
if (!inet_csk(ssk)->icsk_ulp_ops) {
WARN_ON_ONCE(!sock_flag(ssk, SOCK_DEAD));
kfree_rcu(subflow, rcu);
} else if (msk->in_accept_queue && msk->first == ssk) {
/* if the first subflow moved to a close state, e.g. due to
* incoming reset and we reach here before inet_child_forget()
* the TCP stack could later try to close it via
* inet_csk_listen_stop(), or deliver it to the user space via
* accept().
* We can't delete the subflow - or risk a double free - nor let
* the msk survive - or will be leaked in the non accept scenario:
* fallback and let TCP cope with the subflow cleanup.
*/
WARN_ON_ONCE(sock_flag(ssk, SOCK_DEAD));
mptcp_subflow_drop_ctx(ssk);
} else {
/* otherwise tcp will dispose of the ssk and subflow ctx */
if (ssk->sk_state == TCP_LISTEN)
if (ssk->sk_state == TCP_LISTEN) {
tcp_set_state(ssk, TCP_CLOSE);
mptcp_subflow_queue_clean(sk, ssk);
inet_csk_listen_stop(ssk);
mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CLOSED);
}
__tcp_close(ssk, 0);
/* close acquired an extra ref */
__sock_put(ssk);
}
out_release:
release_sock(ssk);
sock_put(ssk);
......@@ -2427,21 +2440,14 @@ static void __mptcp_close_subflow(struct sock *sk)
mptcp_close_ssk(sk, ssk, subflow);
}
/* if the MPC subflow has been closed before the msk is accepted,
* msk will never be accept-ed, close it now
*/
if (!msk->first && msk->in_accept_queue) {
sock_set_flag(sk, SOCK_DEAD);
inet_sk_state_store(sk, TCP_CLOSE);
}
}
static bool mptcp_check_close_timeout(const struct sock *sk)
static bool mptcp_should_close(const struct sock *sk)
{
s32 delta = tcp_jiffies32 - inet_csk(sk)->icsk_mtup.probe_timestamp;
struct mptcp_subflow_context *subflow;
if (delta >= TCP_TIMEWAIT_LEN)
if (delta >= TCP_TIMEWAIT_LEN || mptcp_sk(sk)->in_accept_queue)
return true;
/* if all subflows are in closed status don't bother with additional
......@@ -2649,7 +2655,7 @@ static void mptcp_worker(struct work_struct *work)
* even if it is orphaned and in FIN_WAIT2 state
*/
if (sock_flag(sk, SOCK_DEAD)) {
if (mptcp_check_close_timeout(sk)) {
if (mptcp_should_close(sk)) {
inet_sk_state_store(sk, TCP_CLOSE);
mptcp_do_fastclose(sk);
}
......@@ -2895,6 +2901,14 @@ static void __mptcp_destroy_sock(struct sock *sk)
sock_put(sk);
}
void __mptcp_unaccepted_force_close(struct sock *sk)
{
sock_set_flag(sk, SOCK_DEAD);
inet_sk_state_store(sk, TCP_CLOSE);
mptcp_do_fastclose(sk);
__mptcp_destroy_sock(sk);
}
static __poll_t mptcp_check_readable(struct mptcp_sock *msk)
{
/* Concurrent splices from sk_receive_queue into receive_queue will
......@@ -3733,6 +3747,18 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
if (!ssk->sk_socket)
mptcp_sock_graft(ssk, newsock);
}
/* Do late cleanup for the first subflow as necessary. Also
* deal with bad peers not doing a complete shutdown.
*/
if (msk->first &&
unlikely(inet_sk_state_load(msk->first) == TCP_CLOSE)) {
__mptcp_close_ssk(newsk, msk->first,
mptcp_subflow_ctx(msk->first), 0);
if (unlikely(list_empty(&msk->conn_list)))
inet_sk_state_store(newsk, TCP_CLOSE);
}
release_sock(newsk);
}
......
......@@ -629,10 +629,12 @@ void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
struct mptcp_subflow_context *subflow);
void __mptcp_subflow_send_ack(struct sock *ssk);
void mptcp_subflow_reset(struct sock *ssk);
void mptcp_subflow_queue_clean(struct sock *sk, struct sock *ssk);
void mptcp_sock_graft(struct sock *sk, struct socket *parent);
struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk);
bool __mptcp_close(struct sock *sk, long timeout);
void mptcp_cancel_work(struct sock *sk);
void __mptcp_unaccepted_force_close(struct sock *sk);
void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk);
bool mptcp_addresses_equal(const struct mptcp_addr_info *a,
......
......@@ -723,9 +723,12 @@ void mptcp_subflow_drop_ctx(struct sock *ssk)
if (!ctx)
return;
list_del(&mptcp_subflow_ctx(ssk)->node);
if (inet_csk(ssk)->icsk_ulp_ops) {
subflow_ulp_fallback(ssk, ctx);
if (ctx->conn)
sock_put(ctx->conn);
}
kfree_rcu(ctx, rcu);
}
......@@ -1819,6 +1822,77 @@ static void subflow_state_change(struct sock *sk)
}
}
void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_ssk)
{
struct request_sock_queue *queue = &inet_csk(listener_ssk)->icsk_accept_queue;
struct mptcp_sock *msk, *next, *head = NULL;
struct request_sock *req;
struct sock *sk;
/* build a list of all unaccepted mptcp sockets */
spin_lock_bh(&queue->rskq_lock);
for (req = queue->rskq_accept_head; req; req = req->dl_next) {
struct mptcp_subflow_context *subflow;
struct sock *ssk = req->sk;
if (!sk_is_mptcp(ssk))
continue;
subflow = mptcp_subflow_ctx(ssk);
if (!subflow || !subflow->conn)
continue;
/* skip if already in list */
sk = subflow->conn;
msk = mptcp_sk(sk);
if (msk->dl_next || msk == head)
continue;
sock_hold(sk);
msk->dl_next = head;
head = msk;
}
spin_unlock_bh(&queue->rskq_lock);
if (!head)
return;
/* can't acquire the msk socket lock under the subflow one,
* or will cause ABBA deadlock
*/
release_sock(listener_ssk);
for (msk = head; msk; msk = next) {
sk = (struct sock *)msk;
lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
next = msk->dl_next;
msk->dl_next = NULL;
__mptcp_unaccepted_force_close(sk);
release_sock(sk);
/* lockdep will report a false positive ABBA deadlock
* between cancel_work_sync and the listener socket.
* The involved locks belong to different sockets WRT
* the existing AB chain.
* Using a per socket key is problematic as key
* deregistration requires process context and must be
* performed at socket disposal time, in atomic
* context.
* Just tell lockdep to consider the listener socket
* released here.
*/
mutex_release(&listener_sk->sk_lock.dep_map, _RET_IP_);
mptcp_cancel_work(sk);
mutex_acquire(&listener_sk->sk_lock.dep_map, 0, 0, _RET_IP_);
sock_put(sk);
}
/* we are still under the listener msk socket lock */
lock_sock_nested(listener_ssk, SINGLE_DEPTH_NESTING);
}
static int subflow_ulp_init(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment