Commit e8f117f0 authored by Eric Dumazet's avatar Eric Dumazet Committed by Sasha Levin

tcp: make connect() mem charging friendly

[ Upstream commit 355a901e ]

While working on sk_forward_alloc problems reported by Denys
Fedoryshchenko, we found that tcp connect() (and fastopen) do not call
sk_wmem_schedule() for SYN packet (and/or SYN/DATA packet), so
sk_forward_alloc is negative while connect is in progress.

We can fix this by calling regular sk_stream_alloc_skb() both for the
SYN packet (in tcp_connect()) and the syn_data packet in
tcp_send_syn_data()

Then, tcp_send_syn_data() can avoid copying syn_data as we simply
can manipulate syn_data->cb[] to remove SYN flag (and increment seq)

Instead of open coding memcpy_fromiovecend(), simply use this helper.

This leaves in socket write queue clean fast clone skbs.

This was tested against our fastopen packetdrill tests.
Reported-by: default avatarDenys Fedoryshchenko <nuclearcat@nuclearcat.com>
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Acked-by: default avatarYuchung Cheng <ycheng@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
Signed-off-by: default avatarSasha Levin <sasha.levin@oracle.com>
parent 34ca18c8
...@@ -2994,9 +2994,9 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) ...@@ -2994,9 +2994,9 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
{ {
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
struct tcp_fastopen_request *fo = tp->fastopen_req; struct tcp_fastopen_request *fo = tp->fastopen_req;
int syn_loss = 0, space, i, err = 0, iovlen = fo->data->msg_iovlen; int syn_loss = 0, space, err = 0;
struct sk_buff *syn_data = NULL, *data;
unsigned long last_syn_loss = 0; unsigned long last_syn_loss = 0;
struct sk_buff *syn_data;
tp->rx_opt.mss_clamp = tp->advmss; /* If MSS is not cached */ tp->rx_opt.mss_clamp = tp->advmss; /* If MSS is not cached */
tcp_fastopen_cache_get(sk, &tp->rx_opt.mss_clamp, &fo->cookie, tcp_fastopen_cache_get(sk, &tp->rx_opt.mss_clamp, &fo->cookie,
...@@ -3027,48 +3027,40 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) ...@@ -3027,48 +3027,40 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
/* limit to order-0 allocations */ /* limit to order-0 allocations */
space = min_t(size_t, space, SKB_MAX_HEAD(MAX_TCP_HEADER)); space = min_t(size_t, space, SKB_MAX_HEAD(MAX_TCP_HEADER));
syn_data = skb_copy_expand(syn, MAX_TCP_HEADER, space, syn_data = sk_stream_alloc_skb(sk, space, sk->sk_allocation);
sk->sk_allocation); if (!syn_data)
if (syn_data == NULL)
goto fallback; goto fallback;
syn_data->ip_summed = CHECKSUM_PARTIAL;
memcpy(syn_data->cb, syn->cb, sizeof(syn->cb));
if (unlikely(memcpy_fromiovecend(skb_put(syn_data, space),
fo->data->msg_iov, 0, space))) {
kfree_skb(syn_data);
goto fallback;
}
for (i = 0; i < iovlen && syn_data->len < space; ++i) { /* No more data pending in inet_wait_for_connect() */
struct iovec *iov = &fo->data->msg_iov[i]; if (space == fo->size)
unsigned char __user *from = iov->iov_base; fo->data = NULL;
int len = iov->iov_len; fo->copied = space;
if (syn_data->len + len > space) tcp_connect_queue_skb(sk, syn_data);
len = space - syn_data->len;
else if (i + 1 == iovlen)
/* No more data pending in inet_wait_for_connect() */
fo->data = NULL;
if (skb_add_data(syn_data, from, len)) err = tcp_transmit_skb(sk, syn_data, 1, sk->sk_allocation);
goto fallback;
}
/* Queue a data-only packet after the regular SYN for retransmission */ syn->skb_mstamp = syn_data->skb_mstamp;
data = pskb_copy(syn_data, sk->sk_allocation);
if (data == NULL)
goto fallback;
TCP_SKB_CB(data)->seq++;
TCP_SKB_CB(data)->tcp_flags &= ~TCPHDR_SYN;
TCP_SKB_CB(data)->tcp_flags = (TCPHDR_ACK|TCPHDR_PSH);
tcp_connect_queue_skb(sk, data);
fo->copied = data->len;
/* syn_data is about to be sent, we need to take current time stamps
* for the packets that are in write queue : SYN packet and DATA
*/
skb_mstamp_get(&syn->skb_mstamp);
data->skb_mstamp = syn->skb_mstamp;
if (tcp_transmit_skb(sk, syn_data, 0, sk->sk_allocation) == 0) { /* Now full SYN+DATA was cloned and sent (or not),
* remove the SYN from the original skb (syn_data)
* we keep in write queue in case of a retransmit, as we
* also have the SYN packet (with no data) in the same queue.
*/
TCP_SKB_CB(syn_data)->seq++;
TCP_SKB_CB(syn_data)->tcp_flags = TCPHDR_ACK | TCPHDR_PSH;
if (!err) {
tp->syn_data = (fo->copied > 0); tp->syn_data = (fo->copied > 0);
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT);
goto done; goto done;
} }
syn_data = NULL;
fallback: fallback:
/* Send a regular SYN with Fast Open cookie request option */ /* Send a regular SYN with Fast Open cookie request option */
...@@ -3077,7 +3069,6 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) ...@@ -3077,7 +3069,6 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
err = tcp_transmit_skb(sk, syn, 1, sk->sk_allocation); err = tcp_transmit_skb(sk, syn, 1, sk->sk_allocation);
if (err) if (err)
tp->syn_fastopen = 0; tp->syn_fastopen = 0;
kfree_skb(syn_data);
done: done:
fo->cookie.len = -1; /* Exclude Fast Open option for SYN retries */ fo->cookie.len = -1; /* Exclude Fast Open option for SYN retries */
return err; return err;
...@@ -3097,13 +3088,10 @@ int tcp_connect(struct sock *sk) ...@@ -3097,13 +3088,10 @@ int tcp_connect(struct sock *sk)
return 0; return 0;
} }
buff = alloc_skb_fclone(MAX_TCP_HEADER + 15, sk->sk_allocation); buff = sk_stream_alloc_skb(sk, 0, sk->sk_allocation);
if (unlikely(buff == NULL)) if (unlikely(!buff))
return -ENOBUFS; return -ENOBUFS;
/* Reserve space for headers. */
skb_reserve(buff, MAX_TCP_HEADER);
tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN); tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN);
tp->retrans_stamp = tcp_time_stamp; tp->retrans_stamp = tcp_time_stamp;
tcp_connect_queue_skb(sk, buff); tcp_connect_queue_skb(sk, buff);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment