Commit f859a448 authored by Willem de Bruijn's avatar Willem de Bruijn Committed by David S. Miller

tcp: allow zerocopy with fastopen

Accept MSG_ZEROCOPY in all the TCP states that allow sendmsg. Remove
the explicit check for ESTABLISHED and CLOSE_WAIT states.

This requires correctly handling zerocopy state (uarg, sk_zckey) in
all paths reachable from other TCP states. Such as the EPIPE case
in sk_stream_wait_connect, which a sendmsg() in incorrect state will
now hit. Most paths are already safe.

Only extension needed is for TCP Fastopen active open. This can build
an skb with data in tcp_send_syn_data. Pass the uarg along with other
fastopen state, so that this skb also generates a zerocopy
notification on release.

Tested with active and passive tcp fastopen packetdrill scripts at
https://github.com/wdebruij/packetdrill/commit/1747eef03d25a2404e8132817d0f1244fd6f129dSigned-off-by: default avatarWillem de Bruijn <willemb@google.com>
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 84239b44
...@@ -1608,6 +1608,7 @@ struct tcp_fastopen_request { ...@@ -1608,6 +1608,7 @@ struct tcp_fastopen_request {
struct msghdr *data; /* data in MSG_FASTOPEN */ struct msghdr *data; /* data in MSG_FASTOPEN */
size_t size; size_t size;
int copied; /* queued in tcp_connect() */ int copied; /* queued in tcp_connect() */
struct ubuf_info *uarg;
}; };
void tcp_free_fastopen_req(struct tcp_sock *tp); void tcp_free_fastopen_req(struct tcp_sock *tp);
void tcp_fastopen_destroy_cipher(struct sock *sk); void tcp_fastopen_destroy_cipher(struct sock *sk);
......
...@@ -1127,7 +1127,8 @@ void tcp_free_fastopen_req(struct tcp_sock *tp) ...@@ -1127,7 +1127,8 @@ void tcp_free_fastopen_req(struct tcp_sock *tp)
} }
static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
int *copied, size_t size) int *copied, size_t size,
struct ubuf_info *uarg)
{ {
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
struct inet_sock *inet = inet_sk(sk); struct inet_sock *inet = inet_sk(sk);
...@@ -1147,6 +1148,7 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, ...@@ -1147,6 +1148,7 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
return -ENOBUFS; return -ENOBUFS;
tp->fastopen_req->data = msg; tp->fastopen_req->data = msg;
tp->fastopen_req->size = size; tp->fastopen_req->size = size;
tp->fastopen_req->uarg = uarg;
if (inet->defer_connect) { if (inet->defer_connect) {
err = tcp_connect(sk); err = tcp_connect(sk);
...@@ -1186,11 +1188,6 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) ...@@ -1186,11 +1188,6 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
flags = msg->msg_flags; flags = msg->msg_flags;
if (flags & MSG_ZEROCOPY && size && sock_flag(sk, SOCK_ZEROCOPY)) { if (flags & MSG_ZEROCOPY && size && sock_flag(sk, SOCK_ZEROCOPY)) {
if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) {
err = -EINVAL;
goto out_err;
}
skb = tcp_write_queue_tail(sk); skb = tcp_write_queue_tail(sk);
uarg = sock_zerocopy_realloc(sk, size, skb_zcopy(skb)); uarg = sock_zerocopy_realloc(sk, size, skb_zcopy(skb));
if (!uarg) { if (!uarg) {
...@@ -1205,7 +1202,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) ...@@ -1205,7 +1202,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
if (unlikely(flags & MSG_FASTOPEN || inet_sk(sk)->defer_connect) && if (unlikely(flags & MSG_FASTOPEN || inet_sk(sk)->defer_connect) &&
!tp->repair) { !tp->repair) {
err = tcp_sendmsg_fastopen(sk, msg, &copied_syn, size); err = tcp_sendmsg_fastopen(sk, msg, &copied_syn, size, uarg);
if (err == -EINPROGRESS && copied_syn > 0) if (err == -EINPROGRESS && copied_syn > 0)
goto out; goto out;
else if (err) else if (err)
......
...@@ -3455,6 +3455,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) ...@@ -3455,6 +3455,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
skb_trim(syn_data, copied); skb_trim(syn_data, copied);
space = copied; space = copied;
} }
skb_zcopy_set(syn_data, fo->uarg, NULL);
} }
/* No more data pending in inet_wait_for_connect() */ /* No more data pending in inet_wait_for_connect() */
if (space == fo->size) if (space == fo->size)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment