Commit 86e39e04 authored by Paolo Abeni's avatar Paolo Abeni Committed by David S. Miller

mptcp: keep track of local endpoint still available for each msk

Include into the path manager status a bitmap tracking the list
of local endpoints still available - not yet used - for the
relevant mptcp socket.

Keep such map updated at endpoint creation/deletion time, so
that we can easily skip already used endpoint at local address
selection time.

The endpoint used by the initial subflow is lazyly accounted at
subflow creation time: the usage bitmap is be up2date before
endpoint selection and we avoid such unneeded task in some relevant
scenarios - e.g. busy servers accepting incoming subflows but
not creating any additional ones nor annuncing additional addresses.

Overall this allows for fair local endpoints usage in case of
subflow failure.

As a side effect, this patch also enforces that each endpoint
is used at most once for each mptcp connection.
Signed-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
Signed-off-by: default avatarMat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 71b077e4
...@@ -370,6 +370,7 @@ void mptcp_pm_data_reset(struct mptcp_sock *msk) ...@@ -370,6 +370,7 @@ void mptcp_pm_data_reset(struct mptcp_sock *msk)
WRITE_ONCE(msk->pm.accept_subflow, false); WRITE_ONCE(msk->pm.accept_subflow, false);
WRITE_ONCE(msk->pm.remote_deny_join_id0, false); WRITE_ONCE(msk->pm.remote_deny_join_id0, false);
msk->pm.status = 0; msk->pm.status = 0;
bitmap_fill(msk->pm.id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
mptcp_pm_nl_data_init(msk); mptcp_pm_nl_data_init(msk);
} }
......
...@@ -38,10 +38,6 @@ struct mptcp_pm_add_entry { ...@@ -38,10 +38,6 @@ struct mptcp_pm_add_entry {
u8 retrans_times; u8 retrans_times;
}; };
/* max value of mptcp_addr_info.id */
#define MAX_ADDR_ID U8_MAX
#define BITMAP_SZ DIV_ROUND_UP(MAX_ADDR_ID + 1, BITS_PER_LONG)
struct pm_nl_pernet { struct pm_nl_pernet {
/* protects pernet updates */ /* protects pernet updates */
spinlock_t lock; spinlock_t lock;
...@@ -53,14 +49,14 @@ struct pm_nl_pernet { ...@@ -53,14 +49,14 @@ struct pm_nl_pernet {
unsigned int local_addr_max; unsigned int local_addr_max;
unsigned int subflows_max; unsigned int subflows_max;
unsigned int next_id; unsigned int next_id;
unsigned long id_bitmap[BITMAP_SZ]; DECLARE_BITMAP(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
}; };
#define MPTCP_PM_ADDR_MAX 8 #define MPTCP_PM_ADDR_MAX 8
#define ADD_ADDR_RETRANS_MAX 3 #define ADD_ADDR_RETRANS_MAX 3
static bool addresses_equal(const struct mptcp_addr_info *a, static bool addresses_equal(const struct mptcp_addr_info *a,
struct mptcp_addr_info *b, bool use_port) const struct mptcp_addr_info *b, bool use_port)
{ {
bool addr_equals = false; bool addr_equals = false;
...@@ -174,6 +170,9 @@ select_local_address(const struct pm_nl_pernet *pernet, ...@@ -174,6 +170,9 @@ select_local_address(const struct pm_nl_pernet *pernet,
if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW)) if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW))
continue; continue;
if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap))
continue;
if (entry->addr.family != sk->sk_family) { if (entry->addr.family != sk->sk_family) {
#if IS_ENABLED(CONFIG_MPTCP_IPV6) #if IS_ENABLED(CONFIG_MPTCP_IPV6)
if ((entry->addr.family == AF_INET && if ((entry->addr.family == AF_INET &&
...@@ -184,23 +183,17 @@ select_local_address(const struct pm_nl_pernet *pernet, ...@@ -184,23 +183,17 @@ select_local_address(const struct pm_nl_pernet *pernet,
continue; continue;
} }
/* avoid any address already in use by subflows and
* pending join
*/
if (!lookup_subflow_by_saddr(&msk->conn_list, &entry->addr)) {
ret = entry; ret = entry;
break; break;
} }
}
rcu_read_unlock(); rcu_read_unlock();
return ret; return ret;
} }
static struct mptcp_pm_addr_entry * static struct mptcp_pm_addr_entry *
select_signal_address(struct pm_nl_pernet *pernet, unsigned int pos) select_signal_address(struct pm_nl_pernet *pernet, struct mptcp_sock *msk)
{ {
struct mptcp_pm_addr_entry *entry, *ret = NULL; struct mptcp_pm_addr_entry *entry, *ret = NULL;
int i = 0;
rcu_read_lock(); rcu_read_lock();
/* do not keep any additional per socket state, just signal /* do not keep any additional per socket state, just signal
...@@ -209,13 +202,15 @@ select_signal_address(struct pm_nl_pernet *pernet, unsigned int pos) ...@@ -209,13 +202,15 @@ select_signal_address(struct pm_nl_pernet *pernet, unsigned int pos)
* can lead to additional addresses not being announced. * can lead to additional addresses not being announced.
*/ */
list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) {
if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap))
continue;
if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL))
continue; continue;
if (i++ == pos) {
ret = entry; ret = entry;
break; break;
} }
}
rcu_read_unlock(); rcu_read_unlock();
return ret; return ret;
} }
...@@ -258,9 +253,11 @@ EXPORT_SYMBOL_GPL(mptcp_pm_get_local_addr_max); ...@@ -258,9 +253,11 @@ EXPORT_SYMBOL_GPL(mptcp_pm_get_local_addr_max);
static void check_work_pending(struct mptcp_sock *msk) static void check_work_pending(struct mptcp_sock *msk)
{ {
if (msk->pm.add_addr_signaled == mptcp_pm_get_add_addr_signal_max(msk) && struct pm_nl_pernet *pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id);
(msk->pm.local_addr_used == mptcp_pm_get_local_addr_max(msk) ||
msk->pm.subflows == mptcp_pm_get_subflows_max(msk))) if (msk->pm.subflows == mptcp_pm_get_subflows_max(msk) ||
(find_next_and_bit(pernet->id_bitmap, msk->pm.id_avail_bitmap,
MPTCP_PM_MAX_ADDR_ID + 1, 0) == MPTCP_PM_MAX_ADDR_ID + 1))
WRITE_ONCE(msk->pm.work_pending, false); WRITE_ONCE(msk->pm.work_pending, false);
} }
...@@ -460,6 +457,35 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullm ...@@ -460,6 +457,35 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullm
return i; return i;
} }
static struct mptcp_pm_addr_entry *
__lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id)
{
struct mptcp_pm_addr_entry *entry;
list_for_each_entry(entry, &pernet->local_addr_list, list) {
if (entry->addr.id == id)
return entry;
}
return NULL;
}
static int
lookup_id_by_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *addr)
{
struct mptcp_pm_addr_entry *entry;
int ret = -1;
rcu_read_lock();
list_for_each_entry(entry, &pernet->local_addr_list, list) {
if (addresses_equal(&entry->addr, addr, entry->addr.port)) {
ret = entry->addr.id;
break;
}
}
rcu_read_unlock();
return ret;
}
static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
{ {
struct sock *sk = (struct sock *)msk; struct sock *sk = (struct sock *)msk;
...@@ -475,6 +501,19 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) ...@@ -475,6 +501,19 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
local_addr_max = mptcp_pm_get_local_addr_max(msk); local_addr_max = mptcp_pm_get_local_addr_max(msk);
subflows_max = mptcp_pm_get_subflows_max(msk); subflows_max = mptcp_pm_get_subflows_max(msk);
/* do lazy endpoint usage accounting for the MPC subflows */
if (unlikely(!(msk->pm.status & BIT(MPTCP_PM_MPC_ENDPOINT_ACCOUNTED))) && msk->first) {
struct mptcp_addr_info local;
int mpc_id;
local_address((struct sock_common *)msk->first, &local);
mpc_id = lookup_id_by_addr(pernet, &local);
if (mpc_id < 0)
__clear_bit(mpc_id, msk->pm.id_avail_bitmap);
msk->pm.status |= BIT(MPTCP_PM_MPC_ENDPOINT_ACCOUNTED);
}
pr_debug("local %d:%d signal %d:%d subflows %d:%d\n", pr_debug("local %d:%d signal %d:%d subflows %d:%d\n",
msk->pm.local_addr_used, local_addr_max, msk->pm.local_addr_used, local_addr_max,
msk->pm.add_addr_signaled, add_addr_signal_max, msk->pm.add_addr_signaled, add_addr_signal_max,
...@@ -482,21 +521,16 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) ...@@ -482,21 +521,16 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
/* check first for announce */ /* check first for announce */
if (msk->pm.add_addr_signaled < add_addr_signal_max) { if (msk->pm.add_addr_signaled < add_addr_signal_max) {
local = select_signal_address(pernet, local = select_signal_address(pernet, msk);
msk->pm.add_addr_signaled);
if (local) { if (local) {
if (mptcp_pm_alloc_anno_list(msk, local)) { if (mptcp_pm_alloc_anno_list(msk, local)) {
__clear_bit(local->addr.id, msk->pm.id_avail_bitmap);
msk->pm.add_addr_signaled++; msk->pm.add_addr_signaled++;
mptcp_pm_announce_addr(msk, &local->addr, false); mptcp_pm_announce_addr(msk, &local->addr, false);
mptcp_pm_nl_addr_send_ack(msk); mptcp_pm_nl_addr_send_ack(msk);
} }
} else {
/* pick failed, avoid fourther attempts later */
msk->pm.local_addr_used = add_addr_signal_max;
} }
check_work_pending(msk);
} }
/* check if should create a new subflow */ /* check if should create a new subflow */
...@@ -510,19 +544,16 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) ...@@ -510,19 +544,16 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
int i, nr; int i, nr;
msk->pm.local_addr_used++; msk->pm.local_addr_used++;
check_work_pending(msk);
nr = fill_remote_addresses_vec(msk, fullmesh, addrs); nr = fill_remote_addresses_vec(msk, fullmesh, addrs);
if (nr)
__clear_bit(local->addr.id, msk->pm.id_avail_bitmap);
spin_unlock_bh(&msk->pm.lock); spin_unlock_bh(&msk->pm.lock);
for (i = 0; i < nr; i++) for (i = 0; i < nr; i++)
__mptcp_subflow_connect(sk, &local->addr, &addrs[i]); __mptcp_subflow_connect(sk, &local->addr, &addrs[i]);
spin_lock_bh(&msk->pm.lock); spin_lock_bh(&msk->pm.lock);
return;
} }
/* lookup failed, avoid fourther attempts later */
msk->pm.local_addr_used = local_addr_max;
check_work_pending(msk);
} }
check_work_pending(msk);
} }
static void mptcp_pm_nl_fully_established(struct mptcp_sock *msk) static void mptcp_pm_nl_fully_established(struct mptcp_sock *msk)
...@@ -736,6 +767,7 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, ...@@ -736,6 +767,7 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk,
msk->pm.subflows--; msk->pm.subflows--;
__MPTCP_INC_STATS(sock_net(sk), rm_type); __MPTCP_INC_STATS(sock_net(sk), rm_type);
} }
__set_bit(rm_list->ids[1], msk->pm.id_avail_bitmap);
if (!removed) if (!removed)
continue; continue;
...@@ -765,6 +797,9 @@ void mptcp_pm_nl_work(struct mptcp_sock *msk) ...@@ -765,6 +797,9 @@ void mptcp_pm_nl_work(struct mptcp_sock *msk)
msk_owned_by_me(msk); msk_owned_by_me(msk);
if (!(pm->status & MPTCP_PM_WORK_MASK))
return;
spin_lock_bh(&msk->pm.lock); spin_lock_bh(&msk->pm.lock);
pr_debug("msk=%p status=%x", msk, pm->status); pr_debug("msk=%p status=%x", msk, pm->status);
...@@ -810,7 +845,7 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet, ...@@ -810,7 +845,7 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet,
/* to keep the code simple, don't do IDR-like allocation for address ID, /* to keep the code simple, don't do IDR-like allocation for address ID,
* just bail when we exceed limits * just bail when we exceed limits
*/ */
if (pernet->next_id == MAX_ADDR_ID) if (pernet->next_id == MPTCP_PM_MAX_ADDR_ID)
pernet->next_id = 1; pernet->next_id = 1;
if (pernet->addrs >= MPTCP_PM_ADDR_MAX) if (pernet->addrs >= MPTCP_PM_ADDR_MAX)
goto out; goto out;
...@@ -830,7 +865,7 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet, ...@@ -830,7 +865,7 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet,
if (!entry->addr.id) { if (!entry->addr.id) {
find_next: find_next:
entry->addr.id = find_next_zero_bit(pernet->id_bitmap, entry->addr.id = find_next_zero_bit(pernet->id_bitmap,
MAX_ADDR_ID + 1, MPTCP_PM_MAX_ADDR_ID + 1,
pernet->next_id); pernet->next_id);
if (!entry->addr.id && pernet->next_id != 1) { if (!entry->addr.id && pernet->next_id != 1) {
pernet->next_id = 1; pernet->next_id = 1;
...@@ -1197,18 +1232,6 @@ static int mptcp_nl_cmd_add_addr(struct sk_buff *skb, struct genl_info *info) ...@@ -1197,18 +1232,6 @@ static int mptcp_nl_cmd_add_addr(struct sk_buff *skb, struct genl_info *info)
return 0; return 0;
} }
static struct mptcp_pm_addr_entry *
__lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id)
{
struct mptcp_pm_addr_entry *entry;
list_for_each_entry(entry, &pernet->local_addr_list, list) {
if (entry->addr.id == id)
return entry;
}
return NULL;
}
int mptcp_pm_get_flags_and_ifindex_by_id(struct net *net, unsigned int id, int mptcp_pm_get_flags_and_ifindex_by_id(struct net *net, unsigned int id,
u8 *flags, int *ifindex) u8 *flags, int *ifindex)
{ {
...@@ -1467,7 +1490,7 @@ static int mptcp_nl_cmd_flush_addrs(struct sk_buff *skb, struct genl_info *info) ...@@ -1467,7 +1490,7 @@ static int mptcp_nl_cmd_flush_addrs(struct sk_buff *skb, struct genl_info *info)
list_splice_init(&pernet->local_addr_list, &free_list); list_splice_init(&pernet->local_addr_list, &free_list);
__reset_counters(pernet); __reset_counters(pernet);
pernet->next_id = 1; pernet->next_id = 1;
bitmap_zero(pernet->id_bitmap, MAX_ADDR_ID + 1); bitmap_zero(pernet->id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
spin_unlock_bh(&pernet->lock); spin_unlock_bh(&pernet->lock);
mptcp_nl_remove_addrs_list(sock_net(skb->sk), &free_list); mptcp_nl_remove_addrs_list(sock_net(skb->sk), &free_list);
synchronize_rcu(); synchronize_rcu();
...@@ -1577,7 +1600,7 @@ static int mptcp_nl_cmd_dump_addrs(struct sk_buff *msg, ...@@ -1577,7 +1600,7 @@ static int mptcp_nl_cmd_dump_addrs(struct sk_buff *msg,
pernet = net_generic(net, pm_nl_pernet_id); pernet = net_generic(net, pm_nl_pernet_id);
spin_lock_bh(&pernet->lock); spin_lock_bh(&pernet->lock);
for (i = id; i < MAX_ADDR_ID + 1; i++) { for (i = id; i < MPTCP_PM_MAX_ADDR_ID + 1; i++) {
if (test_bit(i, pernet->id_bitmap)) { if (test_bit(i, pernet->id_bitmap)) {
entry = __lookup_addr_by_id(pernet, i); entry = __lookup_addr_by_id(pernet, i);
if (!entry) if (!entry)
......
...@@ -2488,7 +2488,6 @@ static void mptcp_worker(struct work_struct *work) ...@@ -2488,7 +2488,6 @@ static void mptcp_worker(struct work_struct *work)
mptcp_check_fastclose(msk); mptcp_check_fastclose(msk);
if (msk->pm.status)
mptcp_pm_nl_work(msk); mptcp_pm_nl_work(msk);
if (test_and_clear_bit(MPTCP_WORK_EOF, &msk->flags)) if (test_and_clear_bit(MPTCP_WORK_EOF, &msk->flags))
......
...@@ -173,16 +173,25 @@ enum mptcp_pm_status { ...@@ -173,16 +173,25 @@ enum mptcp_pm_status {
MPTCP_PM_ADD_ADDR_SEND_ACK, MPTCP_PM_ADD_ADDR_SEND_ACK,
MPTCP_PM_RM_ADDR_RECEIVED, MPTCP_PM_RM_ADDR_RECEIVED,
MPTCP_PM_ESTABLISHED, MPTCP_PM_ESTABLISHED,
MPTCP_PM_ALREADY_ESTABLISHED, /* persistent status, set after ESTABLISHED event */
MPTCP_PM_SUBFLOW_ESTABLISHED, MPTCP_PM_SUBFLOW_ESTABLISHED,
MPTCP_PM_ALREADY_ESTABLISHED, /* persistent status, set after ESTABLISHED event */
MPTCP_PM_MPC_ENDPOINT_ACCOUNTED /* persistent status, set after MPC local address is
* accounted int id_avail_bitmap
*/
}; };
/* Status bits below MPTCP_PM_ALREADY_ESTABLISHED need pm worker actions */
#define MPTCP_PM_WORK_MASK ((1 << MPTCP_PM_ALREADY_ESTABLISHED) - 1)
enum mptcp_addr_signal_status { enum mptcp_addr_signal_status {
MPTCP_ADD_ADDR_SIGNAL, MPTCP_ADD_ADDR_SIGNAL,
MPTCP_ADD_ADDR_ECHO, MPTCP_ADD_ADDR_ECHO,
MPTCP_RM_ADDR_SIGNAL, MPTCP_RM_ADDR_SIGNAL,
}; };
/* max value of mptcp_addr_info.id */
#define MPTCP_PM_MAX_ADDR_ID U8_MAX
struct mptcp_pm_data { struct mptcp_pm_data {
struct mptcp_addr_info local; struct mptcp_addr_info local;
struct mptcp_addr_info remote; struct mptcp_addr_info remote;
...@@ -201,6 +210,7 @@ struct mptcp_pm_data { ...@@ -201,6 +210,7 @@ struct mptcp_pm_data {
u8 local_addr_used; u8 local_addr_used;
u8 subflows; u8 subflows;
u8 status; u8 status;
DECLARE_BITMAP(id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
struct mptcp_rm_list rm_list_tx; struct mptcp_rm_list rm_list_tx;
struct mptcp_rm_list rm_list_rx; struct mptcp_rm_list rm_list_rx;
}; };
......
...@@ -1071,7 +1071,10 @@ signal_address_tests() ...@@ -1071,7 +1071,10 @@ signal_address_tests()
ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags signal ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags signal
ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags signal ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags signal
run_tests $ns1 $ns2 10.0.1.1 run_tests $ns1 $ns2 10.0.1.1
chk_add_nr 4 4
# the server will not signal the address terminating
# the MPC subflow
chk_add_nr 3 3
} }
link_failure_tests() link_failure_tests()
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment