Commit 57be5bda authored by Al Viro's avatar Al Viro

ip: convert tcp_sendmsg() to iov_iter primitives

patch is actually smaller than it seems to be - most of it is unindenting
the inner loop body in tcp_sendmsg() itself...

the bit in tcp_input.c is going to get reverted very soon - that's what
memcpy_from_msg() will become, but not in this commit; let's keep it
reasonably contained...

There's one potentially subtle change here: in case of short copy from
userland, mainline tcp_send_syn_data() discards the skb it has allocated
and falls back to normal path, where we'll send as much as possible after
rereading the same data again.  This patch trims SYN+data skb instead -
that way we don't need to copy from the same place twice.
Signed-off-by: default avatarAl Viro <viro@zeniv.linux.org.uk>
parent cacdc7d2
...@@ -1803,27 +1803,25 @@ static inline void sk_nocaps_add(struct sock *sk, netdev_features_t flags) ...@@ -1803,27 +1803,25 @@ static inline void sk_nocaps_add(struct sock *sk, netdev_features_t flags)
} }
static inline int skb_do_copy_data_nocache(struct sock *sk, struct sk_buff *skb, static inline int skb_do_copy_data_nocache(struct sock *sk, struct sk_buff *skb,
char __user *from, char *to, struct iov_iter *from, char *to,
int copy, int offset) int copy, int offset)
{ {
if (skb->ip_summed == CHECKSUM_NONE) { if (skb->ip_summed == CHECKSUM_NONE) {
int err = 0; __wsum csum = 0;
__wsum csum = csum_and_copy_from_user(from, to, copy, 0, &err); if (csum_and_copy_from_iter(to, copy, &csum, from) != copy)
if (err) return -EFAULT;
return err;
skb->csum = csum_block_add(skb->csum, csum, offset); skb->csum = csum_block_add(skb->csum, csum, offset);
} else if (sk->sk_route_caps & NETIF_F_NOCACHE_COPY) { } else if (sk->sk_route_caps & NETIF_F_NOCACHE_COPY) {
if (!access_ok(VERIFY_READ, from, copy) || if (copy_from_iter_nocache(to, copy, from) != copy)
__copy_from_user_nocache(to, from, copy))
return -EFAULT; return -EFAULT;
} else if (copy_from_user(to, from, copy)) } else if (copy_from_iter(to, copy, from) != copy)
return -EFAULT; return -EFAULT;
return 0; return 0;
} }
static inline int skb_add_data_nocache(struct sock *sk, struct sk_buff *skb, static inline int skb_add_data_nocache(struct sock *sk, struct sk_buff *skb,
char __user *from, int copy) struct iov_iter *from, int copy)
{ {
int err, offset = skb->len; int err, offset = skb->len;
...@@ -1835,7 +1833,7 @@ static inline int skb_add_data_nocache(struct sock *sk, struct sk_buff *skb, ...@@ -1835,7 +1833,7 @@ static inline int skb_add_data_nocache(struct sock *sk, struct sk_buff *skb,
return err; return err;
} }
static inline int skb_copy_to_page_nocache(struct sock *sk, char __user *from, static inline int skb_copy_to_page_nocache(struct sock *sk, struct iov_iter *from,
struct sk_buff *skb, struct sk_buff *skb,
struct page *page, struct page *page,
int off, int copy) int off, int copy)
......
...@@ -1067,11 +1067,10 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, ...@@ -1067,11 +1067,10 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
size_t size) size_t size)
{ {
const struct iovec *iov;
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb; struct sk_buff *skb;
int iovlen, flags, err, copied = 0; int flags, err, copied = 0;
int mss_now = 0, size_goal, copied_syn = 0, offset = 0; int mss_now = 0, size_goal, copied_syn = 0;
bool sg; bool sg;
long timeo; long timeo;
...@@ -1084,7 +1083,6 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, ...@@ -1084,7 +1083,6 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
goto out; goto out;
else if (err) else if (err)
goto out_err; goto out_err;
offset = copied_syn;
} }
timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
...@@ -1118,8 +1116,6 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, ...@@ -1118,8 +1116,6 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
mss_now = tcp_send_mss(sk, &size_goal, flags); mss_now = tcp_send_mss(sk, &size_goal, flags);
/* Ok commence sending. */ /* Ok commence sending. */
iovlen = msg->msg_iter.nr_segs;
iov = msg->msg_iter.iov;
copied = 0; copied = 0;
err = -EPIPE; err = -EPIPE;
...@@ -1128,151 +1124,134 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, ...@@ -1128,151 +1124,134 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
sg = !!(sk->sk_route_caps & NETIF_F_SG); sg = !!(sk->sk_route_caps & NETIF_F_SG);
while (--iovlen >= 0) { while (iov_iter_count(&msg->msg_iter)) {
size_t seglen = iov->iov_len; int copy = 0;
unsigned char __user *from = iov->iov_base; int max = size_goal;
iov++; skb = tcp_write_queue_tail(sk);
if (unlikely(offset > 0)) { /* Skip bytes copied in SYN */ if (tcp_send_head(sk)) {
if (offset >= seglen) { if (skb->ip_summed == CHECKSUM_NONE)
offset -= seglen; max = mss_now;
continue; copy = max - skb->len;
}
seglen -= offset;
from += offset;
offset = 0;
} }
while (seglen > 0) { if (copy <= 0) {
int copy = 0;
int max = size_goal;
skb = tcp_write_queue_tail(sk);
if (tcp_send_head(sk)) {
if (skb->ip_summed == CHECKSUM_NONE)
max = mss_now;
copy = max - skb->len;
}
if (copy <= 0) {
new_segment: new_segment:
/* Allocate new segment. If the interface is SG, /* Allocate new segment. If the interface is SG,
* allocate skb fitting to single page. * allocate skb fitting to single page.
*/ */
if (!sk_stream_memory_free(sk)) if (!sk_stream_memory_free(sk))
goto wait_for_sndbuf; goto wait_for_sndbuf;
skb = sk_stream_alloc_skb(sk, skb = sk_stream_alloc_skb(sk,
select_size(sk, sg), select_size(sk, sg),
sk->sk_allocation); sk->sk_allocation);
if (!skb) if (!skb)
goto wait_for_memory; goto wait_for_memory;
/* /*
* Check whether we can use HW checksum. * Check whether we can use HW checksum.
*/ */
if (sk->sk_route_caps & NETIF_F_ALL_CSUM) if (sk->sk_route_caps & NETIF_F_ALL_CSUM)
skb->ip_summed = CHECKSUM_PARTIAL; skb->ip_summed = CHECKSUM_PARTIAL;
skb_entail(sk, skb); skb_entail(sk, skb);
copy = size_goal; copy = size_goal;
max = size_goal; max = size_goal;
/* All packets are restored as if they have /* All packets are restored as if they have
* already been sent. skb_mstamp isn't set to * already been sent. skb_mstamp isn't set to
* avoid wrong rtt estimation. * avoid wrong rtt estimation.
*/ */
if (tp->repair) if (tp->repair)
TCP_SKB_CB(skb)->sacked |= TCPCB_REPAIRED; TCP_SKB_CB(skb)->sacked |= TCPCB_REPAIRED;
} }
/* Try to append data to the end of skb. */ /* Try to append data to the end of skb. */
if (copy > seglen) if (copy > iov_iter_count(&msg->msg_iter))
copy = seglen; copy = iov_iter_count(&msg->msg_iter);
/* Where to copy to? */ /* Where to copy to? */
if (skb_availroom(skb) > 0) { if (skb_availroom(skb) > 0) {
/* We have some space in skb head. Superb! */ /* We have some space in skb head. Superb! */
copy = min_t(int, copy, skb_availroom(skb)); copy = min_t(int, copy, skb_availroom(skb));
err = skb_add_data_nocache(sk, skb, from, copy); err = skb_add_data_nocache(sk, skb, &msg->msg_iter, copy);
if (err) if (err)
goto do_fault; goto do_fault;
} else { } else {
bool merge = true; bool merge = true;
int i = skb_shinfo(skb)->nr_frags; int i = skb_shinfo(skb)->nr_frags;
struct page_frag *pfrag = sk_page_frag(sk); struct page_frag *pfrag = sk_page_frag(sk);
if (!sk_page_frag_refill(sk, pfrag)) if (!sk_page_frag_refill(sk, pfrag))
goto wait_for_memory; goto wait_for_memory;
if (!skb_can_coalesce(skb, i, pfrag->page,
pfrag->offset)) {
if (i == MAX_SKB_FRAGS || !sg) {
tcp_mark_push(tp, skb);
goto new_segment;
}
merge = false;
}
copy = min_t(int, copy, pfrag->size - pfrag->offset); if (!skb_can_coalesce(skb, i, pfrag->page,
pfrag->offset)) {
if (!sk_wmem_schedule(sk, copy)) if (i == MAX_SKB_FRAGS || !sg) {
goto wait_for_memory; tcp_mark_push(tp, skb);
goto new_segment;
err = skb_copy_to_page_nocache(sk, from, skb,
pfrag->page,
pfrag->offset,
copy);
if (err)
goto do_error;
/* Update the skb. */
if (merge) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
} else {
skb_fill_page_desc(skb, i, pfrag->page,
pfrag->offset, copy);
get_page(pfrag->page);
} }
pfrag->offset += copy; merge = false;
} }
if (!copied) copy = min_t(int, copy, pfrag->size - pfrag->offset);
TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH;
tp->write_seq += copy; if (!sk_wmem_schedule(sk, copy))
TCP_SKB_CB(skb)->end_seq += copy; goto wait_for_memory;
tcp_skb_pcount_set(skb, 0);
from += copy; err = skb_copy_to_page_nocache(sk, &msg->msg_iter, skb,
copied += copy; pfrag->page,
if ((seglen -= copy) == 0 && iovlen == 0) { pfrag->offset,
tcp_tx_timestamp(sk, skb); copy);
goto out; if (err)
goto do_error;
/* Update the skb. */
if (merge) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
} else {
skb_fill_page_desc(skb, i, pfrag->page,
pfrag->offset, copy);
get_page(pfrag->page);
} }
pfrag->offset += copy;
}
if (skb->len < max || (flags & MSG_OOB) || unlikely(tp->repair)) if (!copied)
continue; TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH;
tp->write_seq += copy;
TCP_SKB_CB(skb)->end_seq += copy;
tcp_skb_pcount_set(skb, 0);
copied += copy;
if (!iov_iter_count(&msg->msg_iter)) {
tcp_tx_timestamp(sk, skb);
goto out;
}
if (forced_push(tp)) { if (skb->len < max || (flags & MSG_OOB) || unlikely(tp->repair))
tcp_mark_push(tp, skb);
__tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_PUSH);
} else if (skb == tcp_send_head(sk))
tcp_push_one(sk, mss_now);
continue; continue;
if (forced_push(tp)) {
tcp_mark_push(tp, skb);
__tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_PUSH);
} else if (skb == tcp_send_head(sk))
tcp_push_one(sk, mss_now);
continue;
wait_for_sndbuf: wait_for_sndbuf:
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
wait_for_memory: wait_for_memory:
if (copied) if (copied)
tcp_push(sk, flags & ~MSG_MORE, mss_now, tcp_push(sk, flags & ~MSG_MORE, mss_now,
TCP_NAGLE_PUSH, size_goal); TCP_NAGLE_PUSH, size_goal);
if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
goto do_error; goto do_error;
mss_now = tcp_send_mss(sk, &size_goal, flags); mss_now = tcp_send_mss(sk, &size_goal, flags);
}
} }
out: out:
......
...@@ -4368,7 +4368,7 @@ int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size) ...@@ -4368,7 +4368,7 @@ int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) if (tcp_try_rmem_schedule(sk, skb, skb->truesize))
goto err_free; goto err_free;
if (memcpy_from_msg(skb_put(skb, size), msg, size)) if (copy_from_iter(skb_put(skb, size), size, &msg->msg_iter) != size)
goto err_free; goto err_free;
TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt; TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt;
......
...@@ -3055,7 +3055,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) ...@@ -3055,7 +3055,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
{ {
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
struct tcp_fastopen_request *fo = tp->fastopen_req; struct tcp_fastopen_request *fo = tp->fastopen_req;
int syn_loss = 0, space, err = 0; int syn_loss = 0, space, err = 0, copied;
unsigned long last_syn_loss = 0; unsigned long last_syn_loss = 0;
struct sk_buff *syn_data; struct sk_buff *syn_data;
...@@ -3093,11 +3093,16 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) ...@@ -3093,11 +3093,16 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
goto fallback; goto fallback;
syn_data->ip_summed = CHECKSUM_PARTIAL; syn_data->ip_summed = CHECKSUM_PARTIAL;
memcpy(syn_data->cb, syn->cb, sizeof(syn->cb)); memcpy(syn_data->cb, syn->cb, sizeof(syn->cb));
if (unlikely(memcpy_fromiovecend(skb_put(syn_data, space), copied = copy_from_iter(skb_put(syn_data, space), space,
fo->data->msg_iter.iov, 0, space))) { &fo->data->msg_iter);
if (unlikely(!copied)) {
kfree_skb(syn_data); kfree_skb(syn_data);
goto fallback; goto fallback;
} }
if (copied != space) {
skb_trim(syn_data, copied);
space = copied;
}
/* No more data pending in inet_wait_for_connect() */ /* No more data pending in inet_wait_for_connect() */
if (space == fo->size) if (space == fo->size)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment