Commit 0d76fc7e authored by Jakub Kicinski's avatar Jakub Kicinski

Merge branch 'mptcp-pm-fix-ids-not-being-reusable'

Matthieu Baerts says:

====================
mptcp: pm: fix IDs not being reusable

Here are more fixes for the MPTCP in-kernel path-manager. In this
series, the fixes are around the endpoint IDs not being reusable for
on-going connections when re-creating endpoints with previously used IDs.

- Patch 1 fixes this case for endpoints being used to send ADD_ADDR.
  Patch 2 validates this fix. The issue is present since v5.10.

- Patch 3 fixes this case for endpoints being used to establish new
  subflows. Patch 4 validates this fix. The issue is present since v5.10.

- Patch 5 fixes this case when all endpoints are flushed. Patch 6
  validates this fix. The issue is present since v5.13.

- Patch 7 removes a helper that is confusing, and introduced in v5.10.
  It helps simplifying the next patches.

- Patch 8 makes sure a 'subflow' counter is only decremented when
  removing a 'subflow' endpoint. Can be backported up to v5.13.

- Patch 9 is similar, but for a 'signal' counter. Can be backported up
  to v5.10.

- Patch 10 checks the last max accepted ADD_ADDR limit before accepting
  new ADD_ADDR. For v5.10 as well.

- Patch 11 removes a wrong restriction for the userspace PM, added
  during a refactoring in v6.5.

- Patch 12 makes sure the fullmesh mode sets the ID 0 when a new subflow
  using the source address of the initial subflow is created. Patch 13
  covers this case. This issue is present since v5.15.

- Patch 14 avoid possible UaF when selecting an address from the
  endpoints list.
====================

Link: https://patch.msgid.link/20240819-net-mptcp-pm-reusing-id-v1-0-38035d40de5b@kernel.orgSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents c07ff859 48e50dcb
......@@ -60,16 +60,6 @@ int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_
return 0;
}
int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list)
{
pr_debug("msk=%p, rm_list_nr=%d", msk, rm_list->nr);
spin_lock_bh(&msk->pm.lock);
mptcp_pm_nl_rm_subflow_received(msk, rm_list);
spin_unlock_bh(&msk->pm.lock);
return 0;
}
/* path manager event handlers */
void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side)
......@@ -444,9 +434,6 @@ int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id
*flags = 0;
*ifindex = 0;
if (!id)
return 0;
if (mptcp_pm_is_userspace(msk))
return mptcp_userspace_pm_get_flags_and_ifindex_by_id(msk, id, flags, ifindex);
return mptcp_pm_nl_get_flags_and_ifindex_by_id(msk, id, flags, ifindex);
......
......@@ -143,11 +143,13 @@ static bool lookup_subflow_by_daddr(const struct list_head *list,
return false;
}
static struct mptcp_pm_addr_entry *
static bool
select_local_address(const struct pm_nl_pernet *pernet,
const struct mptcp_sock *msk)
const struct mptcp_sock *msk,
struct mptcp_pm_addr_entry *new_entry)
{
struct mptcp_pm_addr_entry *entry, *ret = NULL;
struct mptcp_pm_addr_entry *entry;
bool found = false;
msk_owned_by_me(msk);
......@@ -159,17 +161,21 @@ select_local_address(const struct pm_nl_pernet *pernet,
if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap))
continue;
ret = entry;
*new_entry = *entry;
found = true;
break;
}
rcu_read_unlock();
return ret;
return found;
}
static struct mptcp_pm_addr_entry *
select_signal_address(struct pm_nl_pernet *pernet, const struct mptcp_sock *msk)
static bool
select_signal_address(struct pm_nl_pernet *pernet, const struct mptcp_sock *msk,
struct mptcp_pm_addr_entry *new_entry)
{
struct mptcp_pm_addr_entry *entry, *ret = NULL;
struct mptcp_pm_addr_entry *entry;
bool found = false;
rcu_read_lock();
/* do not keep any additional per socket state, just signal
......@@ -184,11 +190,13 @@ select_signal_address(struct pm_nl_pernet *pernet, const struct mptcp_sock *msk)
if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL))
continue;
ret = entry;
*new_entry = *entry;
found = true;
break;
}
rcu_read_unlock();
return ret;
return found;
}
unsigned int mptcp_pm_get_add_addr_signal_max(const struct mptcp_sock *msk)
......@@ -512,9 +520,10 @@ __lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info)
static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
{
struct mptcp_pm_addr_entry *local, *signal_and_subflow = NULL;
struct sock *sk = (struct sock *)msk;
struct mptcp_pm_addr_entry local;
unsigned int add_addr_signal_max;
bool signal_and_subflow = false;
unsigned int local_addr_max;
struct pm_nl_pernet *pernet;
unsigned int subflows_max;
......@@ -565,23 +574,22 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
if (msk->pm.addr_signal & BIT(MPTCP_ADD_ADDR_SIGNAL))
return;
local = select_signal_address(pernet, msk);
if (!local)
if (!select_signal_address(pernet, msk, &local))
goto subflow;
/* If the alloc fails, we are on memory pressure, not worth
* continuing, and trying to create subflows.
*/
if (!mptcp_pm_alloc_anno_list(msk, &local->addr))
if (!mptcp_pm_alloc_anno_list(msk, &local.addr))
return;
__clear_bit(local->addr.id, msk->pm.id_avail_bitmap);
__clear_bit(local.addr.id, msk->pm.id_avail_bitmap);
msk->pm.add_addr_signaled++;
mptcp_pm_announce_addr(msk, &local->addr, false);
mptcp_pm_announce_addr(msk, &local.addr, false);
mptcp_pm_nl_addr_send_ack(msk);
if (local->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW)
signal_and_subflow = local;
if (local.flags & MPTCP_PM_ADDR_FLAG_SUBFLOW)
signal_and_subflow = true;
}
subflow:
......@@ -592,26 +600,22 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
bool fullmesh;
int i, nr;
if (signal_and_subflow) {
local = signal_and_subflow;
signal_and_subflow = NULL;
} else {
local = select_local_address(pernet, msk);
if (!local)
if (signal_and_subflow)
signal_and_subflow = false;
else if (!select_local_address(pernet, msk, &local))
break;
}
fullmesh = !!(local->flags & MPTCP_PM_ADDR_FLAG_FULLMESH);
fullmesh = !!(local.flags & MPTCP_PM_ADDR_FLAG_FULLMESH);
msk->pm.local_addr_used++;
__clear_bit(local->addr.id, msk->pm.id_avail_bitmap);
nr = fill_remote_addresses_vec(msk, &local->addr, fullmesh, addrs);
__clear_bit(local.addr.id, msk->pm.id_avail_bitmap);
nr = fill_remote_addresses_vec(msk, &local.addr, fullmesh, addrs);
if (nr == 0)
continue;
spin_unlock_bh(&msk->pm.lock);
for (i = 0; i < nr; i++)
__mptcp_subflow_connect(sk, &local->addr, &addrs[i]);
__mptcp_subflow_connect(sk, &local.addr, &addrs[i]);
spin_lock_bh(&msk->pm.lock);
}
mptcp_pm_nl_check_work_pending(msk);
......@@ -636,6 +640,7 @@ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk,
{
struct sock *sk = (struct sock *)msk;
struct mptcp_pm_addr_entry *entry;
struct mptcp_addr_info mpc_addr;
struct pm_nl_pernet *pernet;
unsigned int subflows_max;
int i = 0;
......@@ -643,6 +648,8 @@ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk,
pernet = pm_nl_get_pernet_from_msk(msk);
subflows_max = mptcp_pm_get_subflows_max(msk);
mptcp_local_address((struct sock_common *)msk, &mpc_addr);
rcu_read_lock();
list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) {
if (!(entry->flags & MPTCP_PM_ADDR_FLAG_FULLMESH))
......@@ -653,7 +660,13 @@ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk,
if (msk->pm.subflows < subflows_max) {
msk->pm.subflows++;
addrs[i++] = entry->addr;
addrs[i] = entry->addr;
/* Special case for ID0: set the correct ID */
if (mptcp_addresses_equal(&entry->addr, &mpc_addr, entry->addr.port))
addrs[i].id = 0;
i++;
}
}
rcu_read_unlock();
......@@ -829,25 +842,27 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk,
mptcp_close_ssk(sk, ssk, subflow);
spin_lock_bh(&msk->pm.lock);
removed = true;
removed |= subflow->request_join;
if (rm_type == MPTCP_MIB_RMSUBFLOW)
__MPTCP_INC_STATS(sock_net(sk), rm_type);
}
if (rm_type == MPTCP_MIB_RMSUBFLOW)
__set_bit(rm_id ? rm_id : msk->mpc_endpoint_id, msk->pm.id_avail_bitmap);
else if (rm_type == MPTCP_MIB_RMADDR)
if (rm_type == MPTCP_MIB_RMADDR)
__MPTCP_INC_STATS(sock_net(sk), rm_type);
if (!removed)
continue;
if (!mptcp_pm_is_kernel(msk))
continue;
if (rm_type == MPTCP_MIB_RMADDR) {
msk->pm.add_addr_accepted--;
if (rm_type == MPTCP_MIB_RMADDR && rm_id &&
!WARN_ON_ONCE(msk->pm.add_addr_accepted == 0)) {
/* Note: if the subflow has been closed before, this
* add_addr_accepted counter will not be decremented.
*/
if (--msk->pm.add_addr_accepted < mptcp_pm_get_add_addr_accept_max(msk))
WRITE_ONCE(msk->pm.accept_addr, true);
} else if (rm_type == MPTCP_MIB_RMSUBFLOW) {
msk->pm.local_addr_used--;
}
}
}
......@@ -857,7 +872,7 @@ static void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk)
mptcp_pm_nl_rm_addr_or_subflow(msk, &msk->pm.rm_list_rx, MPTCP_MIB_RMADDR);
}
void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk,
static void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk,
const struct mptcp_rm_list *rm_list)
{
mptcp_pm_nl_rm_addr_or_subflow(msk, rm_list, MPTCP_MIB_RMSUBFLOW);
......@@ -1393,6 +1408,10 @@ int mptcp_pm_nl_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int
struct sock *sk = (struct sock *)msk;
struct net *net = sock_net(sk);
/* No entries with ID 0 */
if (id == 0)
return 0;
rcu_read_lock();
entry = __lookup_addr_by_id(pm_nl_get_pernet(net), id);
if (entry) {
......@@ -1431,13 +1450,24 @@ static bool mptcp_pm_remove_anno_addr(struct mptcp_sock *msk,
ret = remove_anno_list_by_saddr(msk, addr);
if (ret || force) {
spin_lock_bh(&msk->pm.lock);
msk->pm.add_addr_signaled -= ret;
if (ret) {
__set_bit(addr->id, msk->pm.id_avail_bitmap);
msk->pm.add_addr_signaled--;
}
mptcp_pm_remove_addr(msk, &list);
spin_unlock_bh(&msk->pm.lock);
}
return ret;
}
static void __mark_subflow_endp_available(struct mptcp_sock *msk, u8 id)
{
/* If it was marked as used, and not ID 0, decrement local_addr_used */
if (!__test_and_set_bit(id ? : msk->mpc_endpoint_id, msk->pm.id_avail_bitmap) &&
id && !WARN_ON_ONCE(msk->pm.local_addr_used == 0))
msk->pm.local_addr_used--;
}
static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net,
const struct mptcp_pm_addr_entry *entry)
{
......@@ -1466,8 +1496,19 @@ static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net,
remove_subflow = lookup_subflow_by_saddr(&msk->conn_list, addr);
mptcp_pm_remove_anno_addr(msk, addr, remove_subflow &&
!(entry->flags & MPTCP_PM_ADDR_FLAG_IMPLICIT));
if (remove_subflow)
mptcp_pm_remove_subflow(msk, &list);
if (remove_subflow) {
spin_lock_bh(&msk->pm.lock);
mptcp_pm_nl_rm_subflow_received(msk, &list);
spin_unlock_bh(&msk->pm.lock);
}
if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) {
spin_lock_bh(&msk->pm.lock);
__mark_subflow_endp_available(msk, list.ids[0]);
spin_unlock_bh(&msk->pm.lock);
}
release_sock(sk);
next:
......@@ -1502,6 +1543,7 @@ static int mptcp_nl_remove_id_zero_address(struct net *net,
spin_lock_bh(&msk->pm.lock);
mptcp_pm_remove_addr(msk, &list);
mptcp_pm_nl_rm_subflow_received(msk, &list);
__mark_subflow_endp_available(msk, 0);
spin_unlock_bh(&msk->pm.lock);
release_sock(sk);
......@@ -1605,14 +1647,17 @@ static void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk,
alist.ids[alist.nr++] = entry->addr.id;
}
if (alist.nr) {
spin_lock_bh(&msk->pm.lock);
if (alist.nr) {
msk->pm.add_addr_signaled -= alist.nr;
mptcp_pm_remove_addr(msk, &alist);
spin_unlock_bh(&msk->pm.lock);
}
if (slist.nr)
mptcp_pm_remove_subflow(msk, &slist);
mptcp_pm_nl_rm_subflow_received(msk, &slist);
/* Reset counters: maybe some subflows have been removed before */
bitmap_fill(msk->pm.id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
msk->pm.local_addr_used = 0;
spin_unlock_bh(&msk->pm.lock);
}
static void mptcp_nl_remove_addrs_list(struct net *net,
......@@ -1900,6 +1945,7 @@ static void mptcp_pm_nl_fullmesh(struct mptcp_sock *msk,
spin_lock_bh(&msk->pm.lock);
mptcp_pm_nl_rm_subflow_received(msk, &list);
__mark_subflow_endp_available(msk, list.ids[0]);
mptcp_pm_create_subflow_or_signal_addr(msk);
spin_unlock_bh(&msk->pm.lock);
}
......
......@@ -1026,7 +1026,6 @@ int mptcp_pm_announce_addr(struct mptcp_sock *msk,
const struct mptcp_addr_info *addr,
bool echo);
int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list);
int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list);
void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list);
void mptcp_free_local_addr_list(struct mptcp_sock *msk);
......@@ -1133,8 +1132,6 @@ static inline u8 subflow_get_local_id(const struct mptcp_subflow_context *subflo
void __init mptcp_pm_nl_init(void);
void mptcp_pm_nl_work(struct mptcp_sock *msk);
void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk,
const struct mptcp_rm_list *rm_list);
unsigned int mptcp_pm_get_add_addr_signal_max(const struct mptcp_sock *msk);
unsigned int mptcp_pm_get_add_addr_accept_max(const struct mptcp_sock *msk);
unsigned int mptcp_pm_get_subflows_max(const struct mptcp_sock *msk);
......
......@@ -436,9 +436,10 @@ reset_with_tcp_filter()
local ns="${!1}"
local src="${2}"
local target="${3}"
local chain="${4:-INPUT}"
if ! ip netns exec "${ns}" ${iptables} \
-A INPUT \
-A "${chain}" \
-s "${src}" \
-p tcp \
-j "${target}"; then
......@@ -3058,6 +3059,7 @@ fullmesh_tests()
pm_nl_set_limits $ns1 1 3
pm_nl_set_limits $ns2 1 3
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,fullmesh
fullmesh=1 speed=slow \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 3 3 3
......@@ -3571,10 +3573,10 @@ endpoint_tests()
mptcp_lib_kill_wait $tests_pid
fi
if reset "delete and re-add" &&
if reset_with_tcp_filter "delete and re-add" ns2 10.0.3.2 REJECT OUTPUT &&
mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
pm_nl_set_limits $ns1 1 1
pm_nl_set_limits $ns2 1 1
pm_nl_set_limits $ns1 0 2
pm_nl_set_limits $ns2 0 2
pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow
test_linkfail=4 speed=20 \
run_tests $ns1 $ns2 10.0.1.1 &
......@@ -3591,19 +3593,37 @@ endpoint_tests()
chk_subflow_nr "after delete" 1
chk_mptcp_info subflows 0 subflows 0
pm_nl_add_endpoint $ns2 10.0.2.2 dev ns2eth2 flags subflow
pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow
wait_mpj $ns2
chk_subflow_nr "after re-add" 2
chk_mptcp_info subflows 1 subflows 1
pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow
wait_attempt_fail $ns2
chk_subflow_nr "after new reject" 2
chk_mptcp_info subflows 1 subflows 1
ip netns exec "${ns2}" ${iptables} -D OUTPUT -s "10.0.3.2" -p tcp -j REJECT
pm_nl_del_endpoint $ns2 3 10.0.3.2
pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow
wait_mpj $ns2
chk_subflow_nr "after no reject" 3
chk_mptcp_info subflows 2 subflows 2
mptcp_lib_kill_wait $tests_pid
chk_join_nr 3 3 3
chk_rm_nr 1 1
fi
# remove and re-add
if reset "delete re-add signal" &&
mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
pm_nl_set_limits $ns1 1 1
pm_nl_set_limits $ns2 1 1
pm_nl_set_limits $ns1 0 2
pm_nl_set_limits $ns2 2 2
pm_nl_add_endpoint $ns1 10.0.2.1 id 1 flags signal
# broadcast IP: no packet for this address will be received on ns1
pm_nl_add_endpoint $ns1 224.0.0.1 id 2 flags signal
test_linkfail=4 speed=20 \
run_tests $ns1 $ns2 10.0.1.1 &
local tests_pid=$!
......@@ -3615,17 +3635,53 @@ endpoint_tests()
chk_mptcp_info subflows 1 subflows 1
pm_nl_del_endpoint $ns1 1 10.0.2.1
pm_nl_del_endpoint $ns1 2 224.0.0.1
sleep 0.5
chk_subflow_nr "after delete" 1
chk_mptcp_info subflows 0 subflows 0
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
pm_nl_add_endpoint $ns1 10.0.2.1 id 1 flags signal
pm_nl_add_endpoint $ns1 10.0.3.1 id 2 flags signal
wait_mpj $ns2
chk_subflow_nr "after re-add" 2
chk_mptcp_info subflows 1 subflows 1
chk_subflow_nr "after re-add" 3
chk_mptcp_info subflows 2 subflows 2
mptcp_lib_kill_wait $tests_pid
chk_join_nr 3 3 3
chk_add_nr 4 4
chk_rm_nr 2 1 invert
fi
# flush and re-add
if reset_with_tcp_filter "flush re-add" ns2 10.0.3.2 REJECT OUTPUT &&
mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
pm_nl_set_limits $ns1 0 2
pm_nl_set_limits $ns2 1 2
# broadcast IP: no packet for this address will be received on ns1
pm_nl_add_endpoint $ns1 224.0.0.1 id 2 flags signal
pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow
test_linkfail=4 speed=20 \
run_tests $ns1 $ns2 10.0.1.1 &
local tests_pid=$!
wait_attempt_fail $ns2
chk_subflow_nr "before flush" 1
chk_mptcp_info subflows 0 subflows 0
pm_nl_flush_endpoint $ns2
pm_nl_flush_endpoint $ns1
wait_rm_addr $ns2 0
ip netns exec "${ns2}" ${iptables} -D OUTPUT -s "10.0.3.2" -p tcp -j REJECT
pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow
wait_mpj $ns2
pm_nl_add_endpoint $ns1 10.0.3.1 id 2 flags signal
wait_mpj $ns2
mptcp_lib_kill_wait $tests_pid
chk_join_nr 2 2 2
chk_add_nr 2 2
chk_rm_nr 1 0 invert
fi
}
# [$1: error message]
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment