Commit e782f5ba authored by David S. Miller's avatar David S. Miller

Merge branch 'mptcp-next'

Mat Martineau says:

====================
mptcp: Miscellaneous changes for 5.19

Four separate groups of patches here:

Patch 1 optimizes flag checking when releasing mptcp socket locks.

Patches 2 and 3 update the packet scheduler when subflow priorities
change.

Patch 4 adds some pernet helper functions for MPTCP.

Patches 5-8 add diag support for MPTCP listeners, including a selftest.

====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 4696ad36 f2ae0fa6
......@@ -66,20 +66,103 @@ static int mptcp_diag_dump_one(struct netlink_callback *cb,
return err;
}
struct mptcp_diag_ctx {
long s_slot;
long s_num;
unsigned int l_slot;
unsigned int l_num;
};
static void mptcp_diag_dump_listeners(struct sk_buff *skb, struct netlink_callback *cb,
const struct inet_diag_req_v2 *r,
bool net_admin)
{
struct inet_diag_dump_data *cb_data = cb->data;
struct mptcp_diag_ctx *diag_ctx = (void *)cb->ctx;
struct nlattr *bc = cb_data->inet_diag_nla_bc;
struct net *net = sock_net(skb->sk);
int i;
for (i = diag_ctx->l_slot; i < INET_LHTABLE_SIZE; i++) {
struct inet_listen_hashbucket *ilb;
struct hlist_nulls_node *node;
struct sock *sk;
int num = 0;
ilb = &tcp_hashinfo.listening_hash[i];
rcu_read_lock();
spin_lock(&ilb->lock);
sk_nulls_for_each(sk, node, &ilb->nulls_head) {
const struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(sk);
struct inet_sock *inet = inet_sk(sk);
int ret;
if (num < diag_ctx->l_num)
goto next_listen;
if (!ctx || strcmp(inet_csk(sk)->icsk_ulp_ops->name, "mptcp"))
goto next_listen;
sk = ctx->conn;
if (!sk || !net_eq(sock_net(sk), net))
goto next_listen;
if (r->sdiag_family != AF_UNSPEC &&
sk->sk_family != r->sdiag_family)
goto next_listen;
if (r->id.idiag_sport != inet->inet_sport &&
r->id.idiag_sport)
goto next_listen;
if (!refcount_inc_not_zero(&sk->sk_refcnt))
goto next_listen;
ret = sk_diag_dump(sk, skb, cb, r, bc, net_admin);
sock_put(sk);
if (ret < 0) {
spin_unlock(&ilb->lock);
rcu_read_unlock();
diag_ctx->l_slot = i;
diag_ctx->l_num = num;
return;
}
diag_ctx->l_num = num + 1;
num = 0;
next_listen:
++num;
}
spin_unlock(&ilb->lock);
rcu_read_unlock();
cond_resched();
diag_ctx->l_num = 0;
}
diag_ctx->l_num = 0;
diag_ctx->l_slot = i;
}
static void mptcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
const struct inet_diag_req_v2 *r)
{
bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN);
struct mptcp_diag_ctx *diag_ctx = (void *)cb->ctx;
struct net *net = sock_net(skb->sk);
struct inet_diag_dump_data *cb_data;
struct mptcp_sock *msk;
struct nlattr *bc;
BUILD_BUG_ON(sizeof(cb->ctx) < sizeof(*diag_ctx));
cb_data = cb->data;
bc = cb_data->inet_diag_nla_bc;
while ((msk = mptcp_token_iter_next(net, &cb->args[0], &cb->args[1])) !=
NULL) {
while ((msk = mptcp_token_iter_next(net, &diag_ctx->s_slot,
&diag_ctx->s_num)) != NULL) {
struct inet_sock *inet = (struct inet_sock *)msk;
struct sock *sk = (struct sock *)msk;
int ret = 0;
......@@ -101,11 +184,14 @@ static void mptcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
sock_put(sk);
if (ret < 0) {
/* will retry on the same position */
cb->args[1]--;
diag_ctx->s_num--;
break;
}
cond_resched();
}
if ((r->idiag_states & TCPF_LISTEN) && r->id.idiag_dport == 0)
mptcp_diag_dump_listeners(skb, cb, r, net_admin);
}
static void mptcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
......@@ -116,6 +202,19 @@ static void mptcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
r->idiag_rqueue = sk_rmem_alloc_get(sk);
r->idiag_wqueue = sk_wmem_alloc_get(sk);
if (inet_sk_state_load(sk) == TCP_LISTEN) {
struct sock *lsk = READ_ONCE(msk->first);
if (lsk) {
/* override with settings from tcp listener,
* so Send-Q will show accept queue.
*/
r->idiag_rqueue = READ_ONCE(lsk->sk_ack_backlog);
r->idiag_wqueue = READ_ONCE(lsk->sk_max_ack_backlog);
}
}
if (!info)
return;
......
......@@ -262,14 +262,25 @@ void mptcp_pm_rm_addr_received(struct mptcp_sock *msk,
spin_unlock_bh(&pm->lock);
}
void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup)
void mptcp_pm_mp_prio_received(struct sock *ssk, u8 bkup)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
struct sock *sk = subflow->conn;
struct mptcp_sock *msk;
pr_debug("subflow->backup=%d, bkup=%d\n", subflow->backup, bkup);
msk = mptcp_sk(sk);
if (subflow->backup != bkup) {
subflow->backup = bkup;
mptcp_data_lock(sk);
if (!sock_owned_by_user(sk))
msk->last_snd = NULL;
else
__set_bit(MPTCP_RESET_SCHEDULER, &msk->cb_flags);
mptcp_data_unlock(sk);
}
mptcp_event(MPTCP_EVENT_SUB_PRIORITY, mptcp_sk(subflow->conn), sk, GFP_ATOMIC);
mptcp_event(MPTCP_EVENT_SUB_PRIORITY, msk, ssk, GFP_ATOMIC);
}
void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq)
......
......@@ -55,6 +55,17 @@ struct pm_nl_pernet {
#define MPTCP_PM_ADDR_MAX 8
#define ADD_ADDR_RETRANS_MAX 3
static struct pm_nl_pernet *pm_nl_get_pernet(const struct net *net)
{
return net_generic(net, pm_nl_pernet_id);
}
static struct pm_nl_pernet *
pm_nl_get_pernet_from_msk(const struct mptcp_sock *msk)
{
return pm_nl_get_pernet(sock_net((struct sock *)msk));
}
static bool addresses_equal(const struct mptcp_addr_info *a,
const struct mptcp_addr_info *b, bool use_port)
{
......@@ -206,43 +217,39 @@ select_signal_address(struct pm_nl_pernet *pernet, const struct mptcp_sock *msk)
unsigned int mptcp_pm_get_add_addr_signal_max(const struct mptcp_sock *msk)
{
const struct pm_nl_pernet *pernet;
const struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
pernet = net_generic(sock_net((const struct sock *)msk), pm_nl_pernet_id);
return READ_ONCE(pernet->add_addr_signal_max);
}
EXPORT_SYMBOL_GPL(mptcp_pm_get_add_addr_signal_max);
unsigned int mptcp_pm_get_add_addr_accept_max(const struct mptcp_sock *msk)
{
struct pm_nl_pernet *pernet;
struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id);
return READ_ONCE(pernet->add_addr_accept_max);
}
EXPORT_SYMBOL_GPL(mptcp_pm_get_add_addr_accept_max);
unsigned int mptcp_pm_get_subflows_max(const struct mptcp_sock *msk)
{
struct pm_nl_pernet *pernet;
struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id);
return READ_ONCE(pernet->subflows_max);
}
EXPORT_SYMBOL_GPL(mptcp_pm_get_subflows_max);
unsigned int mptcp_pm_get_local_addr_max(const struct mptcp_sock *msk)
{
struct pm_nl_pernet *pernet;
struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id);
return READ_ONCE(pernet->local_addr_max);
}
EXPORT_SYMBOL_GPL(mptcp_pm_get_local_addr_max);
bool mptcp_pm_nl_check_work_pending(struct mptcp_sock *msk)
{
struct pm_nl_pernet *pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id);
struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
if (msk->pm.subflows == mptcp_pm_get_subflows_max(msk) ||
(find_next_and_bit(pernet->id_bitmap, msk->pm.id_avail_bitmap,
......@@ -508,7 +515,7 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
struct pm_nl_pernet *pernet;
unsigned int subflows_max;
pernet = net_generic(sock_net(sk), pm_nl_pernet_id);
pernet = pm_nl_get_pernet(sock_net(sk));
add_addr_signal_max = mptcp_pm_get_add_addr_signal_max(msk);
local_addr_max = mptcp_pm_get_local_addr_max(msk);
......@@ -604,7 +611,7 @@ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk,
unsigned int subflows_max;
int i = 0;
pernet = net_generic(sock_net(sk), pm_nl_pernet_id);
pernet = pm_nl_get_pernet_from_msk(msk);
subflows_max = mptcp_pm_get_subflows_max(msk);
rcu_read_lock();
......@@ -727,6 +734,8 @@ static int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
if (!addresses_equal(&local, addr, addr->port))
continue;
if (subflow->backup != bkup)
msk->last_snd = NULL;
subflow->backup = bkup;
subflow->send_mp_prio = 1;
subflow->request_bkup = bkup;
......@@ -1021,7 +1030,7 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc)
if (addresses_equal(&msk_local, &skc_local, false))
return 0;
pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id);
pernet = pm_nl_get_pernet_from_msk(msk);
rcu_read_lock();
list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) {
......@@ -1212,7 +1221,7 @@ static int mptcp_pm_parse_addr(struct nlattr *attr, struct genl_info *info,
static struct pm_nl_pernet *genl_info_pm_nl(struct genl_info *info)
{
return net_generic(genl_info_net(info), pm_nl_pernet_id);
return pm_nl_get_pernet(genl_info_net(info));
}
static int mptcp_nl_add_subflow_or_signal_addr(struct net *net)
......@@ -1306,7 +1315,7 @@ int mptcp_pm_get_flags_and_ifindex_by_id(struct net *net, unsigned int id,
if (id) {
rcu_read_lock();
entry = __lookup_addr_by_id(net_generic(net, pm_nl_pernet_id), id);
entry = __lookup_addr_by_id(pm_nl_get_pernet(net), id);
if (entry) {
*flags = entry->flags;
*ifindex = entry->ifindex;
......@@ -1653,7 +1662,7 @@ static int mptcp_nl_cmd_dump_addrs(struct sk_buff *msg,
void *hdr;
int i;
pernet = net_generic(net, pm_nl_pernet_id);
pernet = pm_nl_get_pernet(net);
spin_lock_bh(&pernet->lock);
for (i = id; i < MPTCP_PM_MAX_ADDR_ID + 1; i++) {
......@@ -2165,7 +2174,7 @@ static struct genl_family mptcp_genl_family __ro_after_init = {
static int __net_init pm_nl_init_net(struct net *net)
{
struct pm_nl_pernet *pernet = net_generic(net, pm_nl_pernet_id);
struct pm_nl_pernet *pernet = pm_nl_get_pernet(net);
INIT_LIST_HEAD_RCU(&pernet->local_addr_list);
......@@ -2187,7 +2196,7 @@ static void __net_exit pm_nl_exit_net(struct list_head *net_list)
struct net *net;
list_for_each_entry(net, net_list, exit_list) {
struct pm_nl_pernet *pernet = net_generic(net, pm_nl_pernet_id);
struct pm_nl_pernet *pernet = pm_nl_get_pernet(net);
/* net is removed from namespace list, can't race with
* other modifiers, also netns core already waited for a
......
......@@ -3092,15 +3092,19 @@ static void mptcp_release_cb(struct sock *sk)
spin_lock_bh(&sk->sk_lock.slock);
}
if (__test_and_clear_bit(MPTCP_CLEAN_UNA, &msk->cb_flags))
__mptcp_clean_una_wakeup(sk);
if (unlikely(&msk->cb_flags)) {
/* be sure to set the current sk state before tacking actions
* depending on sk_state
* depending on sk_state, that is processing MPTCP_ERROR_REPORT
*/
if (__test_and_clear_bit(MPTCP_CONNECTED, &msk->cb_flags))
__mptcp_set_connected(sk);
if (__test_and_clear_bit(MPTCP_CLEAN_UNA, &msk->cb_flags))
__mptcp_clean_una_wakeup(sk);
if (__test_and_clear_bit(MPTCP_ERROR_REPORT, &msk->cb_flags))
__mptcp_error_report(sk);
if (__test_and_clear_bit(MPTCP_RESET_SCHEDULER, &msk->cb_flags))
msk->last_snd = NULL;
}
__mptcp_update_rmem(sk);
}
......
......@@ -124,6 +124,7 @@
#define MPTCP_RETRANSMIT 4
#define MPTCP_FLUSH_JOIN_LIST 5
#define MPTCP_CONNECTED 6
#define MPTCP_RESET_SCHEDULER 7
static inline bool before64(__u64 seq1, __u64 seq2)
{
......
......@@ -853,15 +853,11 @@ static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int
void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info)
{
struct sock *sk = &msk->sk.icsk_inet.sk;
u32 flags = 0;
bool slow;
u8 val;
memset(info, 0, sizeof(*info));
slow = lock_sock_fast(sk);
info->mptcpi_subflows = READ_ONCE(msk->pm.subflows);
info->mptcpi_add_addr_signal = READ_ONCE(msk->pm.add_addr_signaled);
info->mptcpi_add_addr_accepted = READ_ONCE(msk->pm.add_addr_accepted);
......@@ -882,8 +878,6 @@ void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info)
info->mptcpi_snd_una = READ_ONCE(msk->snd_una);
info->mptcpi_rcv_nxt = READ_ONCE(msk->ack_seq);
info->mptcpi_csum_enabled = READ_ONCE(msk->csum_enabled);
unlock_sock_fast(sk, slow);
}
EXPORT_SYMBOL_GPL(mptcp_diag_fill_info);
......
......@@ -71,6 +71,43 @@ chk_msk_remote_key_nr()
__chk_nr "grep -c remote_key" $*
}
__chk_listen()
{
local filter="$1"
local expected=$2
shift 2
msg=$*
nr=$(ss -N $ns -Ml "$filter" | grep -c LISTEN)
printf "%-50s" "$msg"
if [ $nr != $expected ]; then
echo "[ fail ] expected $expected found $nr"
ret=$test_cnt
else
echo "[ ok ]"
fi
}
chk_msk_listen()
{
lport=$1
local msg="check for listen socket"
# destination port search should always return empty list
__chk_listen "dport $lport" 0 "listen match for dport $lport"
# should return 'our' mptcp listen socket
__chk_listen "sport $lport" 1 "listen match for sport $lport"
__chk_listen "src inet:0.0.0.0:$lport" 1 "listen match for saddr and sport"
__chk_listen "" 1 "all listen sockets"
nr=$(ss -Ml $filter | wc -l)
}
# $1: ns, $2: port
wait_local_port_listen()
{
......@@ -113,6 +150,7 @@ echo "a" | \
0.0.0.0 >/dev/null &
wait_local_port_listen $ns 10000
chk_msk_nr 0 "no msk on netns creation"
chk_msk_listen 10000
echo "b" | \
timeout ${timeout_test} \
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment