Commit b1394967 authored by David S. Miller's avatar David S. Miller

Merge branch 'tcp-second-round-for-EDT-conversion'

Eric Dumazet says:

====================
tcp: second round for EDT conversion

First round of EDT patches left TCP stack in a non optimal state.

- High speed flows suffered from loss of performance, addressed
  by the first patch of this series.

- Second patch brings pacing to the current state of networking,
  since we now reach ~100 Gbit on a single TCP flow.

- Third patch implements a mitigation for scheduling delays,
  like the one we did in sch_fq in the past.

- Fourth patch removes one special case in sch_fq for ACK packets.

- Fifth patch removes a serious perfomance cost for TCP internal
  pacing. We should setup the high resolution timer only if
  really needed.

- Sixth patch fixes a typo in BBR.

- Last patch is one minor change in cdg congestion control.

Neal Cardwell also has a patch series fixing BBR after
EDT adoption.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 1a3aea25 825e1c52
...@@ -249,6 +249,7 @@ struct tcp_sock { ...@@ -249,6 +249,7 @@ struct tcp_sock {
u32 tlp_high_seq; /* snd_nxt at the time of TLP retransmit. */ u32 tlp_high_seq; /* snd_nxt at the time of TLP retransmit. */
u64 tcp_wstamp_ns; /* departure time for next sent data packet */ u64 tcp_wstamp_ns; /* departure time for next sent data packet */
u64 tcp_clock_cache; /* cache last tcp_clock_ns() (see tcp_mstamp_refresh()) */
/* RTT measurement */ /* RTT measurement */
u64 tcp_mstamp; /* most recent packet received/sent */ u64 tcp_mstamp; /* most recent packet received/sent */
......
...@@ -422,8 +422,8 @@ struct sock { ...@@ -422,8 +422,8 @@ struct sock {
struct timer_list sk_timer; struct timer_list sk_timer;
__u32 sk_priority; __u32 sk_priority;
__u32 sk_mark; __u32 sk_mark;
u32 sk_pacing_rate; /* bytes per second */ unsigned long sk_pacing_rate; /* bytes per second */
u32 sk_max_pacing_rate; unsigned long sk_max_pacing_rate;
struct page_frag sk_frag; struct page_frag sk_frag;
netdev_features_t sk_route_caps; netdev_features_t sk_route_caps;
netdev_features_t sk_route_nocaps; netdev_features_t sk_route_nocaps;
......
...@@ -3927,8 +3927,8 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, ...@@ -3927,8 +3927,8 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
sk->sk_userlocks |= SOCK_SNDBUF_LOCK; sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF); sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF);
break; break;
case SO_MAX_PACING_RATE: case SO_MAX_PACING_RATE: /* 32bit version */
sk->sk_max_pacing_rate = val; sk->sk_max_pacing_rate = (val == ~0U) ? ~0UL : val;
sk->sk_pacing_rate = min(sk->sk_pacing_rate, sk->sk_pacing_rate = min(sk->sk_pacing_rate,
sk->sk_max_pacing_rate); sk->sk_max_pacing_rate);
break; break;
......
...@@ -998,7 +998,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname, ...@@ -998,7 +998,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
cmpxchg(&sk->sk_pacing_status, cmpxchg(&sk->sk_pacing_status,
SK_PACING_NONE, SK_PACING_NONE,
SK_PACING_NEEDED); SK_PACING_NEEDED);
sk->sk_max_pacing_rate = val; sk->sk_max_pacing_rate = (val == ~0U) ? ~0UL : val;
sk->sk_pacing_rate = min(sk->sk_pacing_rate, sk->sk_pacing_rate = min(sk->sk_pacing_rate,
sk->sk_max_pacing_rate); sk->sk_max_pacing_rate);
break; break;
...@@ -1336,7 +1336,8 @@ int sock_getsockopt(struct socket *sock, int level, int optname, ...@@ -1336,7 +1336,8 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
#endif #endif
case SO_MAX_PACING_RATE: case SO_MAX_PACING_RATE:
v.val = sk->sk_max_pacing_rate; /* 32bit version */
v.val = min_t(unsigned long, sk->sk_max_pacing_rate, ~0U);
break; break;
case SO_INCOMING_CPU: case SO_INCOMING_CPU:
...@@ -2810,8 +2811,8 @@ void sock_init_data(struct socket *sock, struct sock *sk) ...@@ -2810,8 +2811,8 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_ll_usec = sysctl_net_busy_read; sk->sk_ll_usec = sysctl_net_busy_read;
#endif #endif
sk->sk_max_pacing_rate = ~0U; sk->sk_max_pacing_rate = ~0UL;
sk->sk_pacing_rate = ~0U; sk->sk_pacing_rate = ~0UL;
sk->sk_pacing_shift = 10; sk->sk_pacing_shift = 10;
sk->sk_incoming_cpu = -1; sk->sk_incoming_cpu = -1;
......
...@@ -3111,10 +3111,10 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) ...@@ -3111,10 +3111,10 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
{ {
const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */ const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */
const struct inet_connection_sock *icsk = inet_csk(sk); const struct inet_connection_sock *icsk = inet_csk(sk);
unsigned long rate;
u32 now; u32 now;
u64 rate64; u64 rate64;
bool slow; bool slow;
u32 rate;
memset(info, 0, sizeof(*info)); memset(info, 0, sizeof(*info));
if (sk->sk_type != SOCK_STREAM) if (sk->sk_type != SOCK_STREAM)
...@@ -3124,11 +3124,11 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) ...@@ -3124,11 +3124,11 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
/* Report meaningful fields for all TCP states, including listeners */ /* Report meaningful fields for all TCP states, including listeners */
rate = READ_ONCE(sk->sk_pacing_rate); rate = READ_ONCE(sk->sk_pacing_rate);
rate64 = rate != ~0U ? rate : ~0ULL; rate64 = (rate != ~0UL) ? rate : ~0ULL;
info->tcpi_pacing_rate = rate64; info->tcpi_pacing_rate = rate64;
rate = READ_ONCE(sk->sk_max_pacing_rate); rate = READ_ONCE(sk->sk_max_pacing_rate);
rate64 = rate != ~0U ? rate : ~0ULL; rate64 = (rate != ~0UL) ? rate : ~0ULL;
info->tcpi_max_pacing_rate = rate64; info->tcpi_max_pacing_rate = rate64;
info->tcpi_reordering = tp->reordering; info->tcpi_reordering = tp->reordering;
...@@ -3254,8 +3254,8 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk) ...@@ -3254,8 +3254,8 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
const struct tcp_sock *tp = tcp_sk(sk); const struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *stats; struct sk_buff *stats;
struct tcp_info info; struct tcp_info info;
unsigned long rate;
u64 rate64; u64 rate64;
u32 rate;
stats = alloc_skb(tcp_opt_stats_get_size(), GFP_ATOMIC); stats = alloc_skb(tcp_opt_stats_get_size(), GFP_ATOMIC);
if (!stats) if (!stats)
...@@ -3274,7 +3274,7 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk) ...@@ -3274,7 +3274,7 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
tp->total_retrans, TCP_NLA_PAD); tp->total_retrans, TCP_NLA_PAD);
rate = READ_ONCE(sk->sk_pacing_rate); rate = READ_ONCE(sk->sk_pacing_rate);
rate64 = rate != ~0U ? rate : ~0ULL; rate64 = (rate != ~0UL) ? rate : ~0ULL;
nla_put_u64_64bit(stats, TCP_NLA_PACING_RATE, rate64, TCP_NLA_PAD); nla_put_u64_64bit(stats, TCP_NLA_PACING_RATE, rate64, TCP_NLA_PAD);
rate64 = tcp_compute_delivery_rate(tp); rate64 = tcp_compute_delivery_rate(tp);
......
...@@ -129,7 +129,7 @@ static const u32 bbr_probe_rtt_mode_ms = 200; ...@@ -129,7 +129,7 @@ static const u32 bbr_probe_rtt_mode_ms = 200;
static const int bbr_min_tso_rate = 1200000; static const int bbr_min_tso_rate = 1200000;
/* Pace at ~1% below estimated bw, on average, to reduce queue at bottleneck. */ /* Pace at ~1% below estimated bw, on average, to reduce queue at bottleneck. */
static const int bbr_pacing_marging_percent = 1; static const int bbr_pacing_margin_percent = 1;
/* We use a high_gain value of 2/ln(2) because it's the smallest pacing gain /* We use a high_gain value of 2/ln(2) because it's the smallest pacing gain
* that will allow a smoothly increasing pacing rate that will double each RTT * that will allow a smoothly increasing pacing rate that will double each RTT
...@@ -214,12 +214,12 @@ static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain) ...@@ -214,12 +214,12 @@ static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain)
rate *= mss; rate *= mss;
rate *= gain; rate *= gain;
rate >>= BBR_SCALE; rate >>= BBR_SCALE;
rate *= USEC_PER_SEC / 100 * (100 - bbr_pacing_marging_percent); rate *= USEC_PER_SEC / 100 * (100 - bbr_pacing_margin_percent);
return rate >> BW_SCALE; return rate >> BW_SCALE;
} }
/* Convert a BBR bw and gain factor to a pacing rate in bytes per second. */ /* Convert a BBR bw and gain factor to a pacing rate in bytes per second. */
static u32 bbr_bw_to_pacing_rate(struct sock *sk, u32 bw, int gain) static unsigned long bbr_bw_to_pacing_rate(struct sock *sk, u32 bw, int gain)
{ {
u64 rate = bw; u64 rate = bw;
...@@ -258,7 +258,7 @@ static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain) ...@@ -258,7 +258,7 @@ static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain)
{ {
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk); struct bbr *bbr = inet_csk_ca(sk);
u32 rate = bbr_bw_to_pacing_rate(sk, bw, gain); unsigned long rate = bbr_bw_to_pacing_rate(sk, bw, gain);
if (unlikely(!bbr->has_seen_rtt && tp->srtt_us)) if (unlikely(!bbr->has_seen_rtt && tp->srtt_us))
bbr_init_pacing_rate_from_rtt(sk); bbr_init_pacing_rate_from_rtt(sk);
...@@ -280,7 +280,7 @@ static u32 bbr_tso_segs_goal(struct sock *sk) ...@@ -280,7 +280,7 @@ static u32 bbr_tso_segs_goal(struct sock *sk)
/* Sort of tcp_tso_autosize() but ignoring /* Sort of tcp_tso_autosize() but ignoring
* driver provided sk_gso_max_size. * driver provided sk_gso_max_size.
*/ */
bytes = min_t(u32, sk->sk_pacing_rate >> sk->sk_pacing_shift, bytes = min_t(unsigned long, sk->sk_pacing_rate >> sk->sk_pacing_shift,
GSO_MAX_SIZE - 1 - MAX_TCP_HEADER); GSO_MAX_SIZE - 1 - MAX_TCP_HEADER);
segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk)); segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk));
......
...@@ -146,7 +146,7 @@ static void tcp_cdg_hystart_update(struct sock *sk) ...@@ -146,7 +146,7 @@ static void tcp_cdg_hystart_update(struct sock *sk)
return; return;
if (hystart_detect & HYSTART_ACK_TRAIN) { if (hystart_detect & HYSTART_ACK_TRAIN) {
u32 now_us = div_u64(local_clock(), NSEC_PER_USEC); u32 now_us = tp->tcp_mstamp;
if (ca->last_ack == 0 || !tcp_is_cwnd_limited(sk)) { if (ca->last_ack == 0 || !tcp_is_cwnd_limited(sk)) {
ca->last_ack = now_us; ca->last_ack = now_us;
......
...@@ -52,9 +52,8 @@ void tcp_mstamp_refresh(struct tcp_sock *tp) ...@@ -52,9 +52,8 @@ void tcp_mstamp_refresh(struct tcp_sock *tp)
{ {
u64 val = tcp_clock_ns(); u64 val = tcp_clock_ns();
/* departure time for next data packet */ if (val > tp->tcp_clock_cache)
if (val > tp->tcp_wstamp_ns) tp->tcp_clock_cache = val;
tp->tcp_wstamp_ns = val;
val = div_u64(val, NSEC_PER_USEC); val = div_u64(val, NSEC_PER_USEC);
if (val > tp->tcp_mstamp) if (val > tp->tcp_mstamp)
...@@ -976,32 +975,26 @@ enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer) ...@@ -976,32 +975,26 @@ enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer)
return HRTIMER_NORESTART; return HRTIMER_NORESTART;
} }
static void tcp_internal_pacing(struct sock *sk) static void tcp_update_skb_after_send(struct sock *sk, struct sk_buff *skb,
{ u64 prior_wstamp)
if (!tcp_needs_internal_pacing(sk))
return;
hrtimer_start(&tcp_sk(sk)->pacing_timer,
ns_to_ktime(tcp_sk(sk)->tcp_wstamp_ns),
HRTIMER_MODE_ABS_PINNED_SOFT);
sock_hold(sk);
}
static void tcp_update_skb_after_send(struct sock *sk, struct sk_buff *skb)
{ {
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
skb->skb_mstamp_ns = tp->tcp_wstamp_ns; skb->skb_mstamp_ns = tp->tcp_wstamp_ns;
if (sk->sk_pacing_status != SK_PACING_NONE) { if (sk->sk_pacing_status != SK_PACING_NONE) {
u32 rate = sk->sk_pacing_rate; unsigned long rate = sk->sk_pacing_rate;
/* Original sch_fq does not pace first 10 MSS /* Original sch_fq does not pace first 10 MSS
* Note that tp->data_segs_out overflows after 2^32 packets, * Note that tp->data_segs_out overflows after 2^32 packets,
* this is a minor annoyance. * this is a minor annoyance.
*/ */
if (rate != ~0U && rate && tp->data_segs_out >= 10) { if (rate != ~0UL && rate && tp->data_segs_out >= 10) {
tp->tcp_wstamp_ns += div_u64((u64)skb->len * NSEC_PER_SEC, rate); u64 len_ns = div64_ul((u64)skb->len * NSEC_PER_SEC, rate);
u64 credit = tp->tcp_wstamp_ns - prior_wstamp;
tcp_internal_pacing(sk); /* take into account OS jitter */
len_ns -= min_t(u64, len_ns / 2, credit);
tp->tcp_wstamp_ns += len_ns;
} }
} }
list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue); list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
...@@ -1030,6 +1023,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, ...@@ -1030,6 +1023,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
struct sk_buff *oskb = NULL; struct sk_buff *oskb = NULL;
struct tcp_md5sig_key *md5; struct tcp_md5sig_key *md5;
struct tcphdr *th; struct tcphdr *th;
u64 prior_wstamp;
int err; int err;
BUG_ON(!skb || !tcp_skb_pcount(skb)); BUG_ON(!skb || !tcp_skb_pcount(skb));
...@@ -1050,6 +1044,10 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, ...@@ -1050,6 +1044,10 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
if (unlikely(!skb)) if (unlikely(!skb))
return -ENOBUFS; return -ENOBUFS;
} }
prior_wstamp = tp->tcp_wstamp_ns;
tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache);
skb->skb_mstamp_ns = tp->tcp_wstamp_ns; skb->skb_mstamp_ns = tp->tcp_wstamp_ns;
inet = inet_sk(sk); inet = inet_sk(sk);
...@@ -1166,7 +1164,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, ...@@ -1166,7 +1164,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
err = net_xmit_eval(err); err = net_xmit_eval(err);
} }
if (!err && oskb) { if (!err && oskb) {
tcp_update_skb_after_send(sk, oskb); tcp_update_skb_after_send(sk, oskb, prior_wstamp);
tcp_rate_skb_sent(sk, oskb); tcp_rate_skb_sent(sk, oskb);
} }
return err; return err;
...@@ -1701,7 +1699,8 @@ static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, ...@@ -1701,7 +1699,8 @@ static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
{ {
u32 bytes, segs; u32 bytes, segs;
bytes = min(sk->sk_pacing_rate >> sk->sk_pacing_shift, bytes = min_t(unsigned long,
sk->sk_pacing_rate >> sk->sk_pacing_shift,
sk->sk_gso_max_size - 1 - MAX_TCP_HEADER); sk->sk_gso_max_size - 1 - MAX_TCP_HEADER);
/* Goal is to send at least one packet per ms, /* Goal is to send at least one packet per ms,
...@@ -2175,10 +2174,23 @@ static int tcp_mtu_probe(struct sock *sk) ...@@ -2175,10 +2174,23 @@ static int tcp_mtu_probe(struct sock *sk)
return -1; return -1;
} }
static bool tcp_pacing_check(const struct sock *sk) static bool tcp_pacing_check(struct sock *sk)
{ {
return tcp_needs_internal_pacing(sk) && struct tcp_sock *tp = tcp_sk(sk);
hrtimer_is_queued(&tcp_sk(sk)->pacing_timer);
if (!tcp_needs_internal_pacing(sk))
return false;
if (tp->tcp_wstamp_ns <= tp->tcp_clock_cache)
return false;
if (!hrtimer_is_queued(&tp->pacing_timer)) {
hrtimer_start(&tp->pacing_timer,
ns_to_ktime(tp->tcp_wstamp_ns),
HRTIMER_MODE_ABS_PINNED_SOFT);
sock_hold(sk);
}
return true;
} }
/* TCP Small Queues : /* TCP Small Queues :
...@@ -2195,10 +2207,12 @@ static bool tcp_pacing_check(const struct sock *sk) ...@@ -2195,10 +2207,12 @@ static bool tcp_pacing_check(const struct sock *sk)
static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
unsigned int factor) unsigned int factor)
{ {
unsigned int limit; unsigned long limit;
limit = max(2 * skb->truesize, sk->sk_pacing_rate >> sk->sk_pacing_shift); limit = max_t(unsigned long,
limit = min_t(u32, limit, 2 * skb->truesize,
sk->sk_pacing_rate >> sk->sk_pacing_shift);
limit = min_t(unsigned long, limit,
sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes); sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes);
limit <<= factor; limit <<= factor;
...@@ -2315,7 +2329,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, ...@@ -2315,7 +2329,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) { if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) {
/* "skb_mstamp" is used as a start point for the retransmit timer */ /* "skb_mstamp" is used as a start point for the retransmit timer */
tcp_update_skb_after_send(sk, skb); tcp_update_skb_after_send(sk, skb, tp->tcp_wstamp_ns);
goto repair; /* Skip network transmission */ goto repair; /* Skip network transmission */
} }
...@@ -2890,7 +2904,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) ...@@ -2890,7 +2904,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
} tcp_skb_tsorted_restore(skb); } tcp_skb_tsorted_restore(skb);
if (!err) { if (!err) {
tcp_update_skb_after_send(sk, skb); tcp_update_skb_after_send(sk, skb, tp->tcp_wstamp_ns);
tcp_rate_skb_sent(sk, skb); tcp_rate_skb_sent(sk, skb);
} }
} else { } else {
......
...@@ -360,7 +360,7 @@ static void tcp_probe_timer(struct sock *sk) ...@@ -360,7 +360,7 @@ static void tcp_probe_timer(struct sock *sk)
*/ */
start_ts = tcp_skb_timestamp(skb); start_ts = tcp_skb_timestamp(skb);
if (!start_ts) if (!start_ts)
skb->skb_mstamp_ns = tp->tcp_wstamp_ns; skb->skb_mstamp_ns = tp->tcp_clock_cache;
else if (icsk->icsk_user_timeout && else if (icsk->icsk_user_timeout &&
(s32)(tcp_time_stamp(tp) - start_ts) > icsk->icsk_user_timeout) (s32)(tcp_time_stamp(tp) - start_ts) > icsk->icsk_user_timeout)
goto abort; goto abort;
......
...@@ -92,8 +92,8 @@ struct fq_sched_data { ...@@ -92,8 +92,8 @@ struct fq_sched_data {
u32 quantum; u32 quantum;
u32 initial_quantum; u32 initial_quantum;
u32 flow_refill_delay; u32 flow_refill_delay;
u32 flow_max_rate; /* optional max rate per flow */
u32 flow_plimit; /* max packets per flow */ u32 flow_plimit; /* max packets per flow */
unsigned long flow_max_rate; /* optional max rate per flow */
u32 orphan_mask; /* mask for orphaned skb */ u32 orphan_mask; /* mask for orphaned skb */
u32 low_rate_threshold; u32 low_rate_threshold;
struct rb_root *fq_root; struct rb_root *fq_root;
...@@ -416,7 +416,8 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch) ...@@ -416,7 +416,8 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch)
struct fq_flow_head *head; struct fq_flow_head *head;
struct sk_buff *skb; struct sk_buff *skb;
struct fq_flow *f; struct fq_flow *f;
u32 rate, plen; unsigned long rate;
u32 plen;
skb = fq_dequeue_head(sch, &q->internal); skb = fq_dequeue_head(sch, &q->internal);
if (skb) if (skb)
...@@ -443,7 +444,7 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch) ...@@ -443,7 +444,7 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch)
} }
skb = f->head; skb = f->head;
if (skb && !skb_is_tcp_pure_ack(skb)) { if (skb) {
u64 time_next_packet = max_t(u64, ktime_to_ns(skb->tstamp), u64 time_next_packet = max_t(u64, ktime_to_ns(skb->tstamp),
f->time_next_packet); f->time_next_packet);
...@@ -485,11 +486,11 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch) ...@@ -485,11 +486,11 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch)
if (f->credit > 0) if (f->credit > 0)
goto out; goto out;
} }
if (rate != ~0U) { if (rate != ~0UL) {
u64 len = (u64)plen * NSEC_PER_SEC; u64 len = (u64)plen * NSEC_PER_SEC;
if (likely(rate)) if (likely(rate))
do_div(len, rate); len = div64_ul(len, rate);
/* Since socket rate can change later, /* Since socket rate can change later,
* clamp the delay to 1 second. * clamp the delay to 1 second.
* Really, providers of too big packets should be fixed ! * Really, providers of too big packets should be fixed !
...@@ -701,9 +702,11 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt, ...@@ -701,9 +702,11 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt,
pr_warn_ratelimited("sch_fq: defrate %u ignored.\n", pr_warn_ratelimited("sch_fq: defrate %u ignored.\n",
nla_get_u32(tb[TCA_FQ_FLOW_DEFAULT_RATE])); nla_get_u32(tb[TCA_FQ_FLOW_DEFAULT_RATE]));
if (tb[TCA_FQ_FLOW_MAX_RATE]) if (tb[TCA_FQ_FLOW_MAX_RATE]) {
q->flow_max_rate = nla_get_u32(tb[TCA_FQ_FLOW_MAX_RATE]); u32 rate = nla_get_u32(tb[TCA_FQ_FLOW_MAX_RATE]);
q->flow_max_rate = (rate == ~0U) ? ~0UL : rate;
}
if (tb[TCA_FQ_LOW_RATE_THRESHOLD]) if (tb[TCA_FQ_LOW_RATE_THRESHOLD])
q->low_rate_threshold = q->low_rate_threshold =
nla_get_u32(tb[TCA_FQ_LOW_RATE_THRESHOLD]); nla_get_u32(tb[TCA_FQ_LOW_RATE_THRESHOLD]);
...@@ -766,7 +769,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt, ...@@ -766,7 +769,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt,
q->quantum = 2 * psched_mtu(qdisc_dev(sch)); q->quantum = 2 * psched_mtu(qdisc_dev(sch));
q->initial_quantum = 10 * psched_mtu(qdisc_dev(sch)); q->initial_quantum = 10 * psched_mtu(qdisc_dev(sch));
q->flow_refill_delay = msecs_to_jiffies(40); q->flow_refill_delay = msecs_to_jiffies(40);
q->flow_max_rate = ~0U; q->flow_max_rate = ~0UL;
q->time_next_delayed_flow = ~0ULL; q->time_next_delayed_flow = ~0ULL;
q->rate_enable = 1; q->rate_enable = 1;
q->new_flows.first = NULL; q->new_flows.first = NULL;
...@@ -802,7 +805,8 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb) ...@@ -802,7 +805,8 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
nla_put_u32(skb, TCA_FQ_QUANTUM, q->quantum) || nla_put_u32(skb, TCA_FQ_QUANTUM, q->quantum) ||
nla_put_u32(skb, TCA_FQ_INITIAL_QUANTUM, q->initial_quantum) || nla_put_u32(skb, TCA_FQ_INITIAL_QUANTUM, q->initial_quantum) ||
nla_put_u32(skb, TCA_FQ_RATE_ENABLE, q->rate_enable) || nla_put_u32(skb, TCA_FQ_RATE_ENABLE, q->rate_enable) ||
nla_put_u32(skb, TCA_FQ_FLOW_MAX_RATE, q->flow_max_rate) || nla_put_u32(skb, TCA_FQ_FLOW_MAX_RATE,
min_t(unsigned long, q->flow_max_rate, ~0U)) ||
nla_put_u32(skb, TCA_FQ_FLOW_REFILL_DELAY, nla_put_u32(skb, TCA_FQ_FLOW_REFILL_DELAY,
jiffies_to_usecs(q->flow_refill_delay)) || jiffies_to_usecs(q->flow_refill_delay)) ||
nla_put_u32(skb, TCA_FQ_ORPHAN_MASK, q->orphan_mask) || nla_put_u32(skb, TCA_FQ_ORPHAN_MASK, q->orphan_mask) ||
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment