Commit ca822141 authored by David S. Miller's avatar David S. Miller

Merge branch 'tcp-rbtree-retransmit-queue'

Eric Dumazet says:

====================
tcp: implement rb-tree based retransmit queue

This patch series implement RB-tree based retransmit queue for TCP,
to better match modern BDP.

Tested:

 On receiver :
 netem on ingress : delay 150ms 200us loss 1
 GRO disabled to force stress and SACK storms.

for f in `seq 1 10`
do
 ./netperf -H lpaa6 -l30 -- -K bbr -o THROUGHPUT|tail -1
done | awk '{print $0} {sum += $0} END {printf "%7u\n",sum}'

Before patch :

323.87  351.48  339.59  338.62  306.72
204.07  304.93  291.88  202.47  176.88
->   2840

After patch:

1700.83 2207.98 2070.17 1544.26 2114.76
2124.89 1693.14 1080.91 2216.82 1299.94
->  18053

Average of 1805 Mbits istead of 284 Mbits.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents f5333f80 75c119af
...@@ -3158,6 +3158,12 @@ static inline int __skb_grow_rcsum(struct sk_buff *skb, unsigned int len) ...@@ -3158,6 +3158,12 @@ static inline int __skb_grow_rcsum(struct sk_buff *skb, unsigned int len)
return __skb_grow(skb, len); return __skb_grow(skb, len);
} }
#define rb_to_skb(rb) rb_entry_safe(rb, struct sk_buff, rbnode)
#define skb_rb_first(root) rb_to_skb(rb_first(root))
#define skb_rb_last(root) rb_to_skb(rb_last(root))
#define skb_rb_next(skb) rb_to_skb(rb_next(&(skb)->rbnode))
#define skb_rb_prev(skb) rb_to_skb(rb_prev(&(skb)->rbnode))
#define skb_queue_walk(queue, skb) \ #define skb_queue_walk(queue, skb) \
for (skb = (queue)->next; \ for (skb = (queue)->next; \
skb != (struct sk_buff *)(queue); \ skb != (struct sk_buff *)(queue); \
...@@ -3172,6 +3178,18 @@ static inline int __skb_grow_rcsum(struct sk_buff *skb, unsigned int len) ...@@ -3172,6 +3178,18 @@ static inline int __skb_grow_rcsum(struct sk_buff *skb, unsigned int len)
for (; skb != (struct sk_buff *)(queue); \ for (; skb != (struct sk_buff *)(queue); \
skb = skb->next) skb = skb->next)
#define skb_rbtree_walk(skb, root) \
for (skb = skb_rb_first(root); skb != NULL; \
skb = skb_rb_next(skb))
#define skb_rbtree_walk_from(skb) \
for (; skb != NULL; \
skb = skb_rb_next(skb))
#define skb_rbtree_walk_from_safe(skb, tmp) \
for (; tmp = skb ? skb_rb_next(skb) : NULL, (skb != NULL); \
skb = tmp)
#define skb_queue_walk_from_safe(queue, skb, tmp) \ #define skb_queue_walk_from_safe(queue, skb, tmp) \
for (tmp = skb->next; \ for (tmp = skb->next; \
skb != (struct sk_buff *)(queue); \ skb != (struct sk_buff *)(queue); \
......
...@@ -60,7 +60,7 @@ ...@@ -60,7 +60,7 @@
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/wait.h> #include <linux/wait.h>
#include <linux/cgroup-defs.h> #include <linux/cgroup-defs.h>
#include <linux/rbtree.h>
#include <linux/filter.h> #include <linux/filter.h>
#include <linux/rculist_nulls.h> #include <linux/rculist_nulls.h>
#include <linux/poll.h> #include <linux/poll.h>
...@@ -397,7 +397,10 @@ struct sock { ...@@ -397,7 +397,10 @@ struct sock {
int sk_wmem_queued; int sk_wmem_queued;
refcount_t sk_wmem_alloc; refcount_t sk_wmem_alloc;
unsigned long sk_tsq_flags; unsigned long sk_tsq_flags;
struct sk_buff *sk_send_head; union {
struct sk_buff *sk_send_head;
struct rb_root tcp_rtx_queue;
};
struct sk_buff_head sk_write_queue; struct sk_buff_head sk_write_queue;
__s32 sk_peek_off; __s32 sk_peek_off;
int sk_write_pending; int sk_write_pending;
......
...@@ -551,7 +551,13 @@ void tcp_xmit_retransmit_queue(struct sock *); ...@@ -551,7 +551,13 @@ void tcp_xmit_retransmit_queue(struct sock *);
void tcp_simple_retransmit(struct sock *); void tcp_simple_retransmit(struct sock *);
void tcp_enter_recovery(struct sock *sk, bool ece_ack); void tcp_enter_recovery(struct sock *sk, bool ece_ack);
int tcp_trim_head(struct sock *, struct sk_buff *, u32); int tcp_trim_head(struct sock *, struct sk_buff *, u32);
int tcp_fragment(struct sock *, struct sk_buff *, u32, unsigned int, gfp_t); enum tcp_queue {
TCP_FRAG_IN_WRITE_QUEUE,
TCP_FRAG_IN_RTX_QUEUE,
};
int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
struct sk_buff *skb, u32 len,
unsigned int mss_now, gfp_t gfp);
void tcp_send_probe0(struct sock *); void tcp_send_probe0(struct sock *);
void tcp_send_partial(struct sock *); void tcp_send_partial(struct sock *);
...@@ -1606,19 +1612,11 @@ static inline void tcp_skb_tsorted_anchor_cleanup(struct sk_buff *skb) ...@@ -1606,19 +1612,11 @@ static inline void tcp_skb_tsorted_anchor_cleanup(struct sk_buff *skb)
skb->_skb_refdst = _save; \ skb->_skb_refdst = _save; \
} }
/* write queue abstraction */ void tcp_write_queue_purge(struct sock *sk);
static inline void tcp_write_queue_purge(struct sock *sk)
{
struct sk_buff *skb;
tcp_chrono_stop(sk, TCP_CHRONO_BUSY); static inline struct sk_buff *tcp_rtx_queue_head(const struct sock *sk)
while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) { {
tcp_skb_tsorted_anchor_cleanup(skb); return skb_rb_first(&sk->tcp_rtx_queue);
sk_wmem_free_skb(sk, skb);
}
INIT_LIST_HEAD(&tcp_sk(sk)->tsorted_sent_queue);
sk_mem_reclaim(sk);
tcp_clear_all_retrans_hints(tcp_sk(sk));
} }
static inline struct sk_buff *tcp_write_queue_head(const struct sock *sk) static inline struct sk_buff *tcp_write_queue_head(const struct sock *sk)
...@@ -1643,18 +1641,12 @@ static inline struct sk_buff *tcp_write_queue_prev(const struct sock *sk, ...@@ -1643,18 +1641,12 @@ static inline struct sk_buff *tcp_write_queue_prev(const struct sock *sk,
return skb_queue_prev(&sk->sk_write_queue, skb); return skb_queue_prev(&sk->sk_write_queue, skb);
} }
#define tcp_for_write_queue(skb, sk) \
skb_queue_walk(&(sk)->sk_write_queue, skb)
#define tcp_for_write_queue_from(skb, sk) \
skb_queue_walk_from(&(sk)->sk_write_queue, skb)
#define tcp_for_write_queue_from_safe(skb, tmp, sk) \ #define tcp_for_write_queue_from_safe(skb, tmp, sk) \
skb_queue_walk_from_safe(&(sk)->sk_write_queue, skb, tmp) skb_queue_walk_from_safe(&(sk)->sk_write_queue, skb, tmp)
static inline struct sk_buff *tcp_send_head(const struct sock *sk) static inline struct sk_buff *tcp_send_head(const struct sock *sk)
{ {
return sk->sk_send_head; return skb_peek(&sk->sk_write_queue);
} }
static inline bool tcp_skb_is_last(const struct sock *sk, static inline bool tcp_skb_is_last(const struct sock *sk,
...@@ -1663,29 +1655,30 @@ static inline bool tcp_skb_is_last(const struct sock *sk, ...@@ -1663,29 +1655,30 @@ static inline bool tcp_skb_is_last(const struct sock *sk,
return skb_queue_is_last(&sk->sk_write_queue, skb); return skb_queue_is_last(&sk->sk_write_queue, skb);
} }
static inline void tcp_advance_send_head(struct sock *sk, const struct sk_buff *skb) static inline bool tcp_write_queue_empty(const struct sock *sk)
{ {
if (tcp_skb_is_last(sk, skb)) return skb_queue_empty(&sk->sk_write_queue);
sk->sk_send_head = NULL; }
else
sk->sk_send_head = tcp_write_queue_next(sk, skb); static inline bool tcp_rtx_queue_empty(const struct sock *sk)
{
return RB_EMPTY_ROOT(&sk->tcp_rtx_queue);
}
static inline bool tcp_rtx_and_write_queues_empty(const struct sock *sk)
{
return tcp_rtx_queue_empty(sk) && tcp_write_queue_empty(sk);
} }
static inline void tcp_check_send_head(struct sock *sk, struct sk_buff *skb_unlinked) static inline void tcp_check_send_head(struct sock *sk, struct sk_buff *skb_unlinked)
{ {
if (sk->sk_send_head == skb_unlinked) { if (tcp_write_queue_empty(sk))
sk->sk_send_head = NULL;
tcp_chrono_stop(sk, TCP_CHRONO_BUSY); tcp_chrono_stop(sk, TCP_CHRONO_BUSY);
}
if (tcp_sk(sk)->highest_sack == skb_unlinked) if (tcp_sk(sk)->highest_sack == skb_unlinked)
tcp_sk(sk)->highest_sack = NULL; tcp_sk(sk)->highest_sack = NULL;
} }
static inline void tcp_init_send_head(struct sock *sk)
{
sk->sk_send_head = NULL;
}
static inline void __tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb) static inline void __tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb)
{ {
__skb_queue_tail(&sk->sk_write_queue, skb); __skb_queue_tail(&sk->sk_write_queue, skb);
...@@ -1696,8 +1689,7 @@ static inline void tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb ...@@ -1696,8 +1689,7 @@ static inline void tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb
__tcp_add_write_queue_tail(sk, skb); __tcp_add_write_queue_tail(sk, skb);
/* Queue it, remembering where we must start sending. */ /* Queue it, remembering where we must start sending. */
if (sk->sk_send_head == NULL) { if (sk->sk_write_queue.next == skb) {
sk->sk_send_head = skb;
tcp_chrono_start(sk, TCP_CHRONO_BUSY); tcp_chrono_start(sk, TCP_CHRONO_BUSY);
if (tcp_sk(sk)->highest_sack == NULL) if (tcp_sk(sk)->highest_sack == NULL)
...@@ -1710,35 +1702,32 @@ static inline void __tcp_add_write_queue_head(struct sock *sk, struct sk_buff *s ...@@ -1710,35 +1702,32 @@ static inline void __tcp_add_write_queue_head(struct sock *sk, struct sk_buff *s
__skb_queue_head(&sk->sk_write_queue, skb); __skb_queue_head(&sk->sk_write_queue, skb);
} }
/* Insert buff after skb on the write queue of sk. */
static inline void tcp_insert_write_queue_after(struct sk_buff *skb,
struct sk_buff *buff,
struct sock *sk)
{
__skb_queue_after(&sk->sk_write_queue, skb, buff);
}
/* Insert new before skb on the write queue of sk. */ /* Insert new before skb on the write queue of sk. */
static inline void tcp_insert_write_queue_before(struct sk_buff *new, static inline void tcp_insert_write_queue_before(struct sk_buff *new,
struct sk_buff *skb, struct sk_buff *skb,
struct sock *sk) struct sock *sk)
{ {
__skb_queue_before(&sk->sk_write_queue, skb, new); __skb_queue_before(&sk->sk_write_queue, skb, new);
if (sk->sk_send_head == skb)
sk->sk_send_head = new;
} }
static inline void tcp_unlink_write_queue(struct sk_buff *skb, struct sock *sk) static inline void tcp_unlink_write_queue(struct sk_buff *skb, struct sock *sk)
{ {
list_del(&skb->tcp_tsorted_anchor);
tcp_skb_tsorted_anchor_cleanup(skb);
__skb_unlink(skb, &sk->sk_write_queue); __skb_unlink(skb, &sk->sk_write_queue);
} }
static inline bool tcp_write_queue_empty(struct sock *sk) void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb);
static inline void tcp_rtx_queue_unlink(struct sk_buff *skb, struct sock *sk)
{ {
return skb_queue_empty(&sk->sk_write_queue); tcp_skb_tsorted_anchor_cleanup(skb);
rb_erase(&skb->rbnode, &sk->tcp_rtx_queue);
}
static inline void tcp_rtx_queue_unlink_and_free(struct sk_buff *skb, struct sock *sk)
{
list_del(&skb->tcp_tsorted_anchor);
tcp_rtx_queue_unlink(skb, sk);
sk_wmem_free_skb(sk, skb);
} }
static inline void tcp_push_pending_frames(struct sock *sk) static inline void tcp_push_pending_frames(struct sock *sk)
...@@ -1767,8 +1756,9 @@ static inline u32 tcp_highest_sack_seq(struct tcp_sock *tp) ...@@ -1767,8 +1756,9 @@ static inline u32 tcp_highest_sack_seq(struct tcp_sock *tp)
static inline void tcp_advance_highest_sack(struct sock *sk, struct sk_buff *skb) static inline void tcp_advance_highest_sack(struct sock *sk, struct sk_buff *skb)
{ {
tcp_sk(sk)->highest_sack = tcp_skb_is_last(sk, skb) ? NULL : struct sk_buff *next = skb_rb_next(skb);
tcp_write_queue_next(sk, skb);
tcp_sk(sk)->highest_sack = next ?: tcp_send_head(sk);
} }
static inline struct sk_buff *tcp_highest_sack(struct sock *sk) static inline struct sk_buff *tcp_highest_sack(struct sock *sk)
...@@ -1778,7 +1768,9 @@ static inline struct sk_buff *tcp_highest_sack(struct sock *sk) ...@@ -1778,7 +1768,9 @@ static inline struct sk_buff *tcp_highest_sack(struct sock *sk)
static inline void tcp_highest_sack_reset(struct sock *sk) static inline void tcp_highest_sack_reset(struct sock *sk)
{ {
tcp_sk(sk)->highest_sack = tcp_write_queue_head(sk); struct sk_buff *skb = tcp_rtx_queue_head(sk);
tcp_sk(sk)->highest_sack = skb ?: tcp_send_head(sk);
} }
/* Called when old skb is about to be deleted (to be combined with new skb) */ /* Called when old skb is about to be deleted (to be combined with new skb) */
...@@ -1948,7 +1940,7 @@ extern void tcp_rack_reo_timeout(struct sock *sk); ...@@ -1948,7 +1940,7 @@ extern void tcp_rack_reo_timeout(struct sock *sk);
/* At how many usecs into the future should the RTO fire? */ /* At how many usecs into the future should the RTO fire? */
static inline s64 tcp_rto_delta_us(const struct sock *sk) static inline s64 tcp_rto_delta_us(const struct sock *sk)
{ {
const struct sk_buff *skb = tcp_write_queue_head(sk); const struct sk_buff *skb = tcp_rtx_queue_head(sk);
u32 rto = inet_csk(sk)->icsk_rto; u32 rto = inet_csk(sk)->icsk_rto;
u64 rto_time_stamp_us = skb->skb_mstamp + jiffies_to_usecs(rto); u64 rto_time_stamp_us = skb->skb_mstamp + jiffies_to_usecs(rto);
......
...@@ -413,6 +413,7 @@ void tcp_init_sock(struct sock *sk) ...@@ -413,6 +413,7 @@ void tcp_init_sock(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
tp->out_of_order_queue = RB_ROOT; tp->out_of_order_queue = RB_ROOT;
sk->tcp_rtx_queue = RB_ROOT;
tcp_init_xmit_timers(sk); tcp_init_xmit_timers(sk);
INIT_LIST_HEAD(&tp->tsq_node); INIT_LIST_HEAD(&tp->tsq_node);
INIT_LIST_HEAD(&tp->tsorted_sent_queue); INIT_LIST_HEAD(&tp->tsorted_sent_queue);
...@@ -469,8 +470,10 @@ void tcp_init_transfer(struct sock *sk, int bpf_op) ...@@ -469,8 +470,10 @@ void tcp_init_transfer(struct sock *sk, int bpf_op)
tcp_init_buffer_space(sk); tcp_init_buffer_space(sk);
} }
static void tcp_tx_timestamp(struct sock *sk, u16 tsflags, struct sk_buff *skb) static void tcp_tx_timestamp(struct sock *sk, u16 tsflags)
{ {
struct sk_buff *skb = tcp_write_queue_tail(sk);
if (tsflags && skb) { if (tsflags && skb) {
struct skb_shared_info *shinfo = skb_shinfo(skb); struct skb_shared_info *shinfo = skb_shinfo(skb);
struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
...@@ -699,10 +702,9 @@ static void tcp_push(struct sock *sk, int flags, int mss_now, ...@@ -699,10 +702,9 @@ static void tcp_push(struct sock *sk, int flags, int mss_now,
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb; struct sk_buff *skb;
if (!tcp_send_head(sk))
return;
skb = tcp_write_queue_tail(sk); skb = tcp_write_queue_tail(sk);
if (!skb)
return;
if (!(flags & MSG_MORE) || forced_push(tp)) if (!(flags & MSG_MORE) || forced_push(tp))
tcp_mark_push(tp, skb); tcp_mark_push(tp, skb);
...@@ -962,14 +964,14 @@ ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, ...@@ -962,14 +964,14 @@ ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
int copy, i; int copy, i;
bool can_coalesce; bool can_coalesce;
if (!tcp_send_head(sk) || (copy = size_goal - skb->len) <= 0 || if (!skb || (copy = size_goal - skb->len) <= 0 ||
!tcp_skb_can_collapse_to(skb)) { !tcp_skb_can_collapse_to(skb)) {
new_segment: new_segment:
if (!sk_stream_memory_free(sk)) if (!sk_stream_memory_free(sk))
goto wait_for_sndbuf; goto wait_for_sndbuf;
skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation, skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation,
skb_queue_empty(&sk->sk_write_queue)); tcp_rtx_and_write_queues_empty(sk));
if (!skb) if (!skb)
goto wait_for_memory; goto wait_for_memory;
...@@ -1041,7 +1043,7 @@ ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, ...@@ -1041,7 +1043,7 @@ ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
out: out:
if (copied) { if (copied) {
tcp_tx_timestamp(sk, sk->sk_tsflags, tcp_write_queue_tail(sk)); tcp_tx_timestamp(sk, sk->sk_tsflags);
if (!(flags & MSG_SENDPAGE_NOTLAST)) if (!(flags & MSG_SENDPAGE_NOTLAST))
tcp_push(sk, flags, mss_now, tp->nonagle, size_goal); tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
} }
...@@ -1197,7 +1199,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) ...@@ -1197,7 +1199,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
goto out_err; goto out_err;
} }
skb = tcp_send_head(sk) ? tcp_write_queue_tail(sk) : NULL; skb = tcp_write_queue_tail(sk);
uarg = sock_zerocopy_realloc(sk, size, skb_zcopy(skb)); uarg = sock_zerocopy_realloc(sk, size, skb_zcopy(skb));
if (!uarg) { if (!uarg) {
err = -ENOBUFS; err = -ENOBUFS;
...@@ -1273,7 +1275,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) ...@@ -1273,7 +1275,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
int max = size_goal; int max = size_goal;
skb = tcp_write_queue_tail(sk); skb = tcp_write_queue_tail(sk);
if (tcp_send_head(sk)) { if (skb) {
if (skb->ip_summed == CHECKSUM_NONE) if (skb->ip_summed == CHECKSUM_NONE)
max = mss_now; max = mss_now;
copy = max - skb->len; copy = max - skb->len;
...@@ -1293,7 +1295,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) ...@@ -1293,7 +1295,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
process_backlog = false; process_backlog = false;
goto restart; goto restart;
} }
first_skb = skb_queue_empty(&sk->sk_write_queue); first_skb = tcp_rtx_and_write_queues_empty(sk);
skb = sk_stream_alloc_skb(sk, skb = sk_stream_alloc_skb(sk,
select_size(sk, sg, first_skb), select_size(sk, sg, first_skb),
sk->sk_allocation, sk->sk_allocation,
...@@ -1418,7 +1420,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) ...@@ -1418,7 +1420,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
out: out:
if (copied) { if (copied) {
tcp_tx_timestamp(sk, sockc.tsflags, tcp_write_queue_tail(sk)); tcp_tx_timestamp(sk, sockc.tsflags);
tcp_push(sk, flags, mss_now, tp->nonagle, size_goal); tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
} }
out_nopush: out_nopush:
...@@ -1519,6 +1521,13 @@ static int tcp_peek_sndq(struct sock *sk, struct msghdr *msg, int len) ...@@ -1519,6 +1521,13 @@ static int tcp_peek_sndq(struct sock *sk, struct msghdr *msg, int len)
/* XXX -- need to support SO_PEEK_OFF */ /* XXX -- need to support SO_PEEK_OFF */
skb_rbtree_walk(skb, &sk->tcp_rtx_queue) {
err = skb_copy_datagram_msg(skb, 0, msg, skb->len);
if (err)
return err;
copied += skb->len;
}
skb_queue_walk(&sk->sk_write_queue, skb) { skb_queue_walk(&sk->sk_write_queue, skb) {
err = skb_copy_datagram_msg(skb, 0, msg, skb->len); err = skb_copy_datagram_msg(skb, 0, msg, skb->len);
if (err) if (err)
...@@ -2318,6 +2327,37 @@ static inline bool tcp_need_reset(int state) ...@@ -2318,6 +2327,37 @@ static inline bool tcp_need_reset(int state)
TCPF_FIN_WAIT2 | TCPF_SYN_RECV); TCPF_FIN_WAIT2 | TCPF_SYN_RECV);
} }
static void tcp_rtx_queue_purge(struct sock *sk)
{
struct rb_node *p = rb_first(&sk->tcp_rtx_queue);
while (p) {
struct sk_buff *skb = rb_to_skb(p);
p = rb_next(p);
/* Since we are deleting whole queue, no need to
* list_del(&skb->tcp_tsorted_anchor)
*/
tcp_rtx_queue_unlink(skb, sk);
sk_wmem_free_skb(sk, skb);
}
}
void tcp_write_queue_purge(struct sock *sk)
{
struct sk_buff *skb;
tcp_chrono_stop(sk, TCP_CHRONO_BUSY);
while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
tcp_skb_tsorted_anchor_cleanup(skb);
sk_wmem_free_skb(sk, skb);
}
tcp_rtx_queue_purge(sk);
INIT_LIST_HEAD(&tcp_sk(sk)->tsorted_sent_queue);
sk_mem_reclaim(sk);
tcp_clear_all_retrans_hints(tcp_sk(sk));
}
int tcp_disconnect(struct sock *sk, int flags) int tcp_disconnect(struct sock *sk, int flags)
{ {
struct inet_sock *inet = inet_sk(sk); struct inet_sock *inet = inet_sk(sk);
...@@ -2376,7 +2416,6 @@ int tcp_disconnect(struct sock *sk, int flags) ...@@ -2376,7 +2416,6 @@ int tcp_disconnect(struct sock *sk, int flags)
* issue in __tcp_select_window() * issue in __tcp_select_window()
*/ */
icsk->icsk_ack.rcv_mss = TCP_MIN_MSS; icsk->icsk_ack.rcv_mss = TCP_MIN_MSS;
tcp_init_send_head(sk);
memset(&tp->rx_opt, 0, sizeof(tp->rx_opt)); memset(&tp->rx_opt, 0, sizeof(tp->rx_opt));
__sk_dst_reset(sk); __sk_dst_reset(sk);
dst_release(sk->sk_rx_dst); dst_release(sk->sk_rx_dst);
......
...@@ -465,17 +465,15 @@ bool tcp_fastopen_active_should_disable(struct sock *sk) ...@@ -465,17 +465,15 @@ bool tcp_fastopen_active_should_disable(struct sock *sk)
void tcp_fastopen_active_disable_ofo_check(struct sock *sk) void tcp_fastopen_active_disable_ofo_check(struct sock *sk)
{ {
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
struct rb_node *p;
struct sk_buff *skb;
struct dst_entry *dst; struct dst_entry *dst;
struct sk_buff *skb;
if (!tp->syn_fastopen) if (!tp->syn_fastopen)
return; return;
if (!tp->data_segs_in) { if (!tp->data_segs_in) {
p = rb_first(&tp->out_of_order_queue); skb = skb_rb_first(&tp->out_of_order_queue);
if (p && !rb_next(p)) { if (skb && !skb_rb_next(skb)) {
skb = rb_entry(p, struct sk_buff, rbnode);
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) { if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) {
tcp_fastopen_active_disable(sk); tcp_fastopen_active_disable(sk);
return; return;
......
This diff is collapsed.
...@@ -480,7 +480,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) ...@@ -480,7 +480,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
TCP_TIMEOUT_INIT; TCP_TIMEOUT_INIT;
icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX); icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX);
skb = tcp_write_queue_head(sk); skb = tcp_rtx_queue_head(sk);
BUG_ON(!skb); BUG_ON(!skb);
tcp_mstamp_refresh(tp); tcp_mstamp_refresh(tp);
......
This diff is collapsed.
...@@ -156,8 +156,13 @@ static bool retransmits_timed_out(struct sock *sk, ...@@ -156,8 +156,13 @@ static bool retransmits_timed_out(struct sock *sk,
return false; return false;
start_ts = tcp_sk(sk)->retrans_stamp; start_ts = tcp_sk(sk)->retrans_stamp;
if (unlikely(!start_ts)) if (unlikely(!start_ts)) {
start_ts = tcp_skb_timestamp(tcp_write_queue_head(sk)); struct sk_buff *head = tcp_rtx_queue_head(sk);
if (!head)
return false;
start_ts = tcp_skb_timestamp(head);
}
if (likely(timeout == 0)) { if (likely(timeout == 0)) {
linear_backoff_thresh = ilog2(TCP_RTO_MAX/rto_base); linear_backoff_thresh = ilog2(TCP_RTO_MAX/rto_base);
...@@ -304,11 +309,12 @@ static void tcp_delack_timer(unsigned long data) ...@@ -304,11 +309,12 @@ static void tcp_delack_timer(unsigned long data)
static void tcp_probe_timer(struct sock *sk) static void tcp_probe_timer(struct sock *sk)
{ {
struct inet_connection_sock *icsk = inet_csk(sk); struct inet_connection_sock *icsk = inet_csk(sk);
struct sk_buff *skb = tcp_send_head(sk);
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
int max_probes; int max_probes;
u32 start_ts; u32 start_ts;
if (tp->packets_out || !tcp_send_head(sk)) { if (tp->packets_out || !skb) {
icsk->icsk_probes_out = 0; icsk->icsk_probes_out = 0;
return; return;
} }
...@@ -321,9 +327,9 @@ static void tcp_probe_timer(struct sock *sk) ...@@ -321,9 +327,9 @@ static void tcp_probe_timer(struct sock *sk)
* corresponding system limit. We also implement similar policy when * corresponding system limit. We also implement similar policy when
* we use RTO to probe window in tcp_retransmit_timer(). * we use RTO to probe window in tcp_retransmit_timer().
*/ */
start_ts = tcp_skb_timestamp(tcp_send_head(sk)); start_ts = tcp_skb_timestamp(skb);
if (!start_ts) if (!start_ts)
tcp_send_head(sk)->skb_mstamp = tp->tcp_mstamp; skb->skb_mstamp = tp->tcp_mstamp;
else if (icsk->icsk_user_timeout && else if (icsk->icsk_user_timeout &&
(s32)(tcp_time_stamp(tp) - start_ts) > (s32)(tcp_time_stamp(tp) - start_ts) >
jiffies_to_msecs(icsk->icsk_user_timeout)) jiffies_to_msecs(icsk->icsk_user_timeout))
...@@ -408,7 +414,7 @@ void tcp_retransmit_timer(struct sock *sk) ...@@ -408,7 +414,7 @@ void tcp_retransmit_timer(struct sock *sk)
if (!tp->packets_out) if (!tp->packets_out)
goto out; goto out;
WARN_ON(tcp_write_queue_empty(sk)); WARN_ON(tcp_rtx_queue_empty(sk));
tp->tlp_high_seq = 0; tp->tlp_high_seq = 0;
...@@ -441,7 +447,7 @@ void tcp_retransmit_timer(struct sock *sk) ...@@ -441,7 +447,7 @@ void tcp_retransmit_timer(struct sock *sk)
goto out; goto out;
} }
tcp_enter_loss(sk); tcp_enter_loss(sk);
tcp_retransmit_skb(sk, tcp_write_queue_head(sk), 1); tcp_retransmit_skb(sk, tcp_rtx_queue_head(sk), 1);
__sk_dst_reset(sk); __sk_dst_reset(sk);
goto out_reset_timer; goto out_reset_timer;
} }
...@@ -473,7 +479,7 @@ void tcp_retransmit_timer(struct sock *sk) ...@@ -473,7 +479,7 @@ void tcp_retransmit_timer(struct sock *sk)
tcp_enter_loss(sk); tcp_enter_loss(sk);
if (tcp_retransmit_skb(sk, tcp_write_queue_head(sk), 1) > 0) { if (tcp_retransmit_skb(sk, tcp_rtx_queue_head(sk), 1) > 0) {
/* Retransmission failed because of local congestion, /* Retransmission failed because of local congestion,
* do not backoff. * do not backoff.
*/ */
...@@ -647,7 +653,7 @@ static void tcp_keepalive_timer (unsigned long data) ...@@ -647,7 +653,7 @@ static void tcp_keepalive_timer (unsigned long data)
elapsed = keepalive_time_when(tp); elapsed = keepalive_time_when(tp);
/* It is alive without keepalive 8) */ /* It is alive without keepalive 8) */
if (tp->packets_out || tcp_send_head(sk)) if (tp->packets_out || !tcp_write_queue_empty(sk))
goto resched; goto resched;
elapsed = keepalive_time_elapsed(tp); elapsed = keepalive_time_elapsed(tp);
......
...@@ -148,12 +148,6 @@ struct netem_skb_cb { ...@@ -148,12 +148,6 @@ struct netem_skb_cb {
psched_time_t time_to_send; psched_time_t time_to_send;
}; };
static struct sk_buff *netem_rb_to_skb(struct rb_node *rb)
{
return rb_entry(rb, struct sk_buff, rbnode);
}
static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb) static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
{ {
/* we assume we can use skb next/prev/tstamp as storage for rb_node */ /* we assume we can use skb next/prev/tstamp as storage for rb_node */
...@@ -364,7 +358,7 @@ static void tfifo_reset(struct Qdisc *sch) ...@@ -364,7 +358,7 @@ static void tfifo_reset(struct Qdisc *sch)
struct rb_node *p = rb_first(&q->t_root); struct rb_node *p = rb_first(&q->t_root);
while (p) { while (p) {
struct sk_buff *skb = netem_rb_to_skb(p); struct sk_buff *skb = rb_to_skb(p);
p = rb_next(p); p = rb_next(p);
rb_erase(&skb->rbnode, &q->t_root); rb_erase(&skb->rbnode, &q->t_root);
...@@ -382,7 +376,7 @@ static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) ...@@ -382,7 +376,7 @@ static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
struct sk_buff *skb; struct sk_buff *skb;
parent = *p; parent = *p;
skb = netem_rb_to_skb(parent); skb = rb_to_skb(parent);
if (tnext >= netem_skb_cb(skb)->time_to_send) if (tnext >= netem_skb_cb(skb)->time_to_send)
p = &parent->rb_right; p = &parent->rb_right;
else else
...@@ -538,7 +532,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, ...@@ -538,7 +532,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff *t_skb; struct sk_buff *t_skb;
struct netem_skb_cb *t_last; struct netem_skb_cb *t_last;
t_skb = netem_rb_to_skb(rb_last(&q->t_root)); t_skb = skb_rb_last(&q->t_root);
t_last = netem_skb_cb(t_skb); t_last = netem_skb_cb(t_skb);
if (!last || if (!last ||
t_last->time_to_send > last->time_to_send) { t_last->time_to_send > last->time_to_send) {
...@@ -617,7 +611,7 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) ...@@ -617,7 +611,7 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
if (p) { if (p) {
psched_time_t time_to_send; psched_time_t time_to_send;
skb = netem_rb_to_skb(p); skb = rb_to_skb(p);
/* if more time remaining? */ /* if more time remaining? */
time_to_send = netem_skb_cb(skb)->time_to_send; time_to_send = netem_skb_cb(skb)->time_to_send;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment