Commit 2303f994 authored by Peter Krystad's avatar Peter Krystad Committed by David S. Miller

mptcp: Associate MPTCP context with TCP socket

Use ULP to associate a subflow_context structure with each TCP subflow
socket. Creating these sockets requires new bind and connect functions
to make sure ULP is set up immediately when the subflow sockets are
created.
Co-developed-by: default avatarFlorian Westphal <fw@strlen.de>
Signed-off-by: default avatarFlorian Westphal <fw@strlen.de>
Co-developed-by: default avatarMatthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: default avatarMatthieu Baerts <matthieu.baerts@tessares.net>
Co-developed-by: default avatarDavide Caratti <dcaratti@redhat.com>
Signed-off-by: default avatarDavide Caratti <dcaratti@redhat.com>
Co-developed-by: default avatarPaolo Abeni <pabeni@redhat.com>
Signed-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
Signed-off-by: default avatarPeter Krystad <peter.krystad@linux.intel.com>
Signed-off-by: default avatarChristoph Paasch <cpaasch@apple.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent eda7acdd
...@@ -397,6 +397,9 @@ struct tcp_sock { ...@@ -397,6 +397,9 @@ struct tcp_sock {
u32 mtu_info; /* We received an ICMP_FRAG_NEEDED / ICMPV6_PKT_TOOBIG u32 mtu_info; /* We received an ICMP_FRAG_NEEDED / ICMPV6_PKT_TOOBIG
* while socket was owned by user. * while socket was owned by user.
*/ */
#if IS_ENABLED(CONFIG_MPTCP)
bool is_mptcp;
#endif
#ifdef CONFIG_TCP_MD5SIG #ifdef CONFIG_TCP_MD5SIG
/* TCP AF-Specific parts; only used by MD5 Signature support so far */ /* TCP AF-Specific parts; only used by MD5 Signature support so far */
......
# SPDX-License-Identifier: GPL-2.0 # SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_MPTCP) += mptcp.o obj-$(CONFIG_MPTCP) += mptcp.o
mptcp-y := protocol.o options.o mptcp-y := protocol.o subflow.o options.o
...@@ -17,6 +17,53 @@ ...@@ -17,6 +17,53 @@
#include <net/mptcp.h> #include <net/mptcp.h>
#include "protocol.h" #include "protocol.h"
#define MPTCP_SAME_STATE TCP_MAX_STATES
/* If msk has an initial subflow socket, and the MP_CAPABLE handshake has not
* completed yet or has failed, return the subflow socket.
* Otherwise return NULL.
*/
static struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk)
{
if (!msk->subflow)
return NULL;
return msk->subflow;
}
static bool __mptcp_can_create_subflow(const struct mptcp_sock *msk)
{
return ((struct sock *)msk)->sk_state == TCP_CLOSE;
}
static struct socket *__mptcp_socket_create(struct mptcp_sock *msk, int state)
{
struct mptcp_subflow_context *subflow;
struct sock *sk = (struct sock *)msk;
struct socket *ssock;
int err;
ssock = __mptcp_nmpc_socket(msk);
if (ssock)
goto set_state;
if (!__mptcp_can_create_subflow(msk))
return ERR_PTR(-EINVAL);
err = mptcp_subflow_create_socket(sk, &ssock);
if (err)
return ERR_PTR(err);
msk->subflow = ssock;
subflow = mptcp_subflow_ctx(ssock->sk);
subflow->request_mptcp = 1;
set_state:
if (state != MPTCP_SAME_STATE)
inet_sk_state_store(sk, state);
return ssock;
}
static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
{ {
struct mptcp_sock *msk = mptcp_sk(sk); struct mptcp_sock *msk = mptcp_sk(sk);
...@@ -48,12 +95,14 @@ static int mptcp_init_sock(struct sock *sk) ...@@ -48,12 +95,14 @@ static int mptcp_init_sock(struct sock *sk)
static void mptcp_close(struct sock *sk, long timeout) static void mptcp_close(struct sock *sk, long timeout)
{ {
struct mptcp_sock *msk = mptcp_sk(sk); struct mptcp_sock *msk = mptcp_sk(sk);
struct socket *ssock;
inet_sk_state_store(sk, TCP_CLOSE); inet_sk_state_store(sk, TCP_CLOSE);
if (msk->subflow) { ssock = __mptcp_nmpc_socket(msk);
pr_debug("subflow=%p", msk->subflow->sk); if (ssock) {
sock_release(msk->subflow); pr_debug("subflow=%p", mptcp_subflow_ctx(ssock->sk));
sock_release(ssock);
} }
sock_orphan(sk); sock_orphan(sk);
...@@ -67,7 +116,8 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *saddr, int len) ...@@ -67,7 +116,8 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *saddr, int len)
saddr->sa_family = AF_INET; saddr->sa_family = AF_INET;
pr_debug("msk=%p, subflow=%p", msk, msk->subflow->sk); pr_debug("msk=%p, subflow=%p", msk,
mptcp_subflow_ctx(msk->subflow->sk));
err = kernel_connect(msk->subflow, saddr, len, 0); err = kernel_connect(msk->subflow, saddr, len, 0);
...@@ -93,15 +143,79 @@ static struct proto mptcp_prot = { ...@@ -93,15 +143,79 @@ static struct proto mptcp_prot = {
.no_autobind = true, .no_autobind = true,
}; };
static int mptcp_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
struct mptcp_sock *msk = mptcp_sk(sock->sk);
struct socket *ssock;
int err = -ENOTSUPP;
if (uaddr->sa_family != AF_INET) // @@ allow only IPv4 for now
return err;
lock_sock(sock->sk);
ssock = __mptcp_socket_create(msk, MPTCP_SAME_STATE);
if (IS_ERR(ssock)) {
err = PTR_ERR(ssock);
goto unlock;
}
err = ssock->ops->bind(ssock, uaddr, addr_len);
unlock:
release_sock(sock->sk);
return err;
}
static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
int addr_len, int flags)
{
struct mptcp_sock *msk = mptcp_sk(sock->sk);
struct socket *ssock;
int err;
lock_sock(sock->sk);
ssock = __mptcp_socket_create(msk, TCP_SYN_SENT);
if (IS_ERR(ssock)) {
err = PTR_ERR(ssock);
goto unlock;
}
err = ssock->ops->connect(ssock, uaddr, addr_len, flags);
inet_sk_state_store(sock->sk, inet_sk_state_load(ssock->sk));
unlock:
release_sock(sock->sk);
return err;
}
static __poll_t mptcp_poll(struct file *file, struct socket *sock,
struct poll_table_struct *wait)
{
__poll_t mask = 0;
return mask;
}
static struct proto_ops mptcp_stream_ops;
static struct inet_protosw mptcp_protosw = { static struct inet_protosw mptcp_protosw = {
.type = SOCK_STREAM, .type = SOCK_STREAM,
.protocol = IPPROTO_MPTCP, .protocol = IPPROTO_MPTCP,
.prot = &mptcp_prot, .prot = &mptcp_prot,
.ops = &inet_stream_ops, .ops = &mptcp_stream_ops,
.flags = INET_PROTOSW_ICSK,
}; };
void __init mptcp_init(void) void __init mptcp_init(void)
{ {
mptcp_prot.h.hashinfo = tcp_prot.h.hashinfo;
mptcp_stream_ops = inet_stream_ops;
mptcp_stream_ops.bind = mptcp_bind;
mptcp_stream_ops.connect = mptcp_stream_connect;
mptcp_stream_ops.poll = mptcp_poll;
mptcp_subflow_init();
if (proto_register(&mptcp_prot, 1) != 0) if (proto_register(&mptcp_prot, 1) != 0)
panic("Failed to register MPTCP proto.\n"); panic("Failed to register MPTCP proto.\n");
...@@ -109,13 +223,14 @@ void __init mptcp_init(void) ...@@ -109,13 +223,14 @@ void __init mptcp_init(void)
} }
#if IS_ENABLED(CONFIG_MPTCP_IPV6) #if IS_ENABLED(CONFIG_MPTCP_IPV6)
static struct proto_ops mptcp_v6_stream_ops;
static struct proto mptcp_v6_prot; static struct proto mptcp_v6_prot;
static struct inet_protosw mptcp_v6_protosw = { static struct inet_protosw mptcp_v6_protosw = {
.type = SOCK_STREAM, .type = SOCK_STREAM,
.protocol = IPPROTO_MPTCP, .protocol = IPPROTO_MPTCP,
.prot = &mptcp_v6_prot, .prot = &mptcp_v6_prot,
.ops = &inet6_stream_ops, .ops = &mptcp_v6_stream_ops,
.flags = INET_PROTOSW_ICSK, .flags = INET_PROTOSW_ICSK,
}; };
...@@ -133,6 +248,11 @@ int mptcpv6_init(void) ...@@ -133,6 +248,11 @@ int mptcpv6_init(void)
if (err) if (err)
return err; return err;
mptcp_v6_stream_ops = inet6_stream_ops;
mptcp_v6_stream_ops.bind = mptcp_bind;
mptcp_v6_stream_ops.connect = mptcp_stream_connect;
mptcp_v6_stream_ops.poll = mptcp_poll;
err = inet6_register_protosw(&mptcp_v6_protosw); err = inet6_register_protosw(&mptcp_v6_protosw);
if (err) if (err)
proto_unregister(&mptcp_v6_prot); proto_unregister(&mptcp_v6_prot);
......
...@@ -48,4 +48,30 @@ static inline struct mptcp_sock *mptcp_sk(const struct sock *sk) ...@@ -48,4 +48,30 @@ static inline struct mptcp_sock *mptcp_sk(const struct sock *sk)
return (struct mptcp_sock *)sk; return (struct mptcp_sock *)sk;
} }
/* MPTCP subflow context */
struct mptcp_subflow_context {
u32 request_mptcp : 1; /* send MP_CAPABLE */
struct sock *tcp_sock; /* tcp sk backpointer */
struct sock *conn; /* parent mptcp_sock */
struct rcu_head rcu;
};
static inline struct mptcp_subflow_context *
mptcp_subflow_ctx(const struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
/* Use RCU on icsk_ulp_data only for sock diag code */
return (__force struct mptcp_subflow_context *)icsk->icsk_ulp_data;
}
static inline struct sock *
mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow)
{
return subflow->tcp_sock;
}
void mptcp_subflow_init(void);
int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock);
#endif /* __MPTCP_PROTOCOL_H */ #endif /* __MPTCP_PROTOCOL_H */
// SPDX-License-Identifier: GPL-2.0
/* Multipath TCP
*
* Copyright (c) 2017 - 2019, Intel Corporation.
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/netdevice.h>
#include <net/sock.h>
#include <net/inet_common.h>
#include <net/inet_hashtables.h>
#include <net/protocol.h>
#include <net/tcp.h>
#include <net/mptcp.h>
#include "protocol.h"
int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock)
{
struct mptcp_subflow_context *subflow;
struct net *net = sock_net(sk);
struct socket *sf;
int err;
err = sock_create_kern(net, PF_INET, SOCK_STREAM, IPPROTO_TCP, &sf);
if (err)
return err;
lock_sock(sf->sk);
/* kernel sockets do not by default acquire net ref, but TCP timer
* needs it.
*/
sf->sk->sk_net_refcnt = 1;
get_net(net);
this_cpu_add(*net->core.sock_inuse, 1);
err = tcp_set_ulp(sf->sk, "mptcp");
release_sock(sf->sk);
if (err)
return err;
subflow = mptcp_subflow_ctx(sf->sk);
pr_debug("subflow=%p", subflow);
*new_sock = sf;
subflow->conn = sk;
return 0;
}
static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk,
gfp_t priority)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct mptcp_subflow_context *ctx;
ctx = kzalloc(sizeof(*ctx), priority);
if (!ctx)
return NULL;
rcu_assign_pointer(icsk->icsk_ulp_data, ctx);
pr_debug("subflow=%p", ctx);
ctx->tcp_sock = sk;
return ctx;
}
static int subflow_ulp_init(struct sock *sk)
{
struct mptcp_subflow_context *ctx;
struct tcp_sock *tp = tcp_sk(sk);
int err = 0;
/* disallow attaching ULP to a socket unless it has been
* created with sock_create_kern()
*/
if (!sk->sk_kern_sock) {
err = -EOPNOTSUPP;
goto out;
}
ctx = subflow_create_ctx(sk, GFP_KERNEL);
if (!ctx) {
err = -ENOMEM;
goto out;
}
pr_debug("subflow=%p, family=%d", ctx, sk->sk_family);
tp->is_mptcp = 1;
out:
return err;
}
static void subflow_ulp_release(struct sock *sk)
{
struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(sk);
if (!ctx)
return;
kfree_rcu(ctx, rcu);
}
static struct tcp_ulp_ops subflow_ulp_ops __read_mostly = {
.name = "mptcp",
.owner = THIS_MODULE,
.init = subflow_ulp_init,
.release = subflow_ulp_release,
};
void mptcp_subflow_init(void)
{
if (tcp_register_ulp(&subflow_ulp_ops) != 0)
panic("MPTCP: failed to register subflows to ULP\n");
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment