Commit cc106e44 authored by Yang Yingliang's avatar Yang Yingliang Committed by David S. Miller

net: sched: tbf: fix the calculation of max_size

Current max_size is caluated from rate table. Now, the rate table
has been replaced and it's wrong to caculate max_size based on this
rate table. It can lead wrong calculation of max_size.

The burst in kernel may be lower than user asked, because burst may gets
some loss when transform it to buffer(E.g. "burst 40kb rate 30mbit/s")
and it seems we cannot avoid this loss. Burst's value(max_size) based on
rate table may be equal user asked. If a packet's length is max_size, this
packet will be stalled in tbf_dequeue() because its length is above the
burst in kernel so that it cannot get enough tokens. The max_size guards
against enqueuing packet sizes above q->buffer "time" in tbf_enqueue().

To make consistent with the calculation of tokens, this patch add a helper
psched_ns_t2l() to calculate burst(max_size) directly to fix this problem.

After this fix, we can support to using 64bit rates to calculate burst as well.
Signed-off-by: default avatarYang Yingliang <yangyingliang@huawei.com>
Acked-by: default avatarEric Dumazet <edumazet@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 4bd7b512
...@@ -118,6 +118,30 @@ struct tbf_sched_data { ...@@ -118,6 +118,30 @@ struct tbf_sched_data {
}; };
/* Time to Length, convert time in ns to length in bytes
* to determinate how many bytes can be sent in given time.
*/
static u64 psched_ns_t2l(const struct psched_ratecfg *r,
u64 time_in_ns)
{
/* The formula is :
* len = (time_in_ns * r->rate_bytes_ps) / NSEC_PER_SEC
*/
u64 len = time_in_ns * r->rate_bytes_ps;
do_div(len, NSEC_PER_SEC);
if (unlikely(r->linklayer == TC_LINKLAYER_ATM))
len = (len / 53) * 48;
if (len > r->overhead)
len -= r->overhead;
else
len = 0;
return len;
}
/* /*
* Return length of individual segments of a gso packet, * Return length of individual segments of a gso packet,
* including all headers (MAC, IP, TCP/UDP) * including all headers (MAC, IP, TCP/UDP)
...@@ -289,10 +313,11 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) ...@@ -289,10 +313,11 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
struct tbf_sched_data *q = qdisc_priv(sch); struct tbf_sched_data *q = qdisc_priv(sch);
struct nlattr *tb[TCA_TBF_MAX + 1]; struct nlattr *tb[TCA_TBF_MAX + 1];
struct tc_tbf_qopt *qopt; struct tc_tbf_qopt *qopt;
struct qdisc_rate_table *rtab = NULL;
struct qdisc_rate_table *ptab = NULL;
struct Qdisc *child = NULL; struct Qdisc *child = NULL;
int max_size, n; struct psched_ratecfg rate;
struct psched_ratecfg peak;
u64 max_size;
s64 buffer, mtu;
u64 rate64 = 0, prate64 = 0; u64 rate64 = 0, prate64 = 0;
err = nla_parse_nested(tb, TCA_TBF_MAX, opt, tbf_policy); err = nla_parse_nested(tb, TCA_TBF_MAX, opt, tbf_policy);
...@@ -304,50 +329,58 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) ...@@ -304,50 +329,58 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
goto done; goto done;
qopt = nla_data(tb[TCA_TBF_PARMS]); qopt = nla_data(tb[TCA_TBF_PARMS]);
rtab = qdisc_get_rtab(&qopt->rate, tb[TCA_TBF_RTAB]); if (qopt->rate.linklayer == TC_LINKLAYER_UNAWARE)
if (rtab == NULL) qdisc_put_rtab(qdisc_get_rtab(&qopt->rate,
tb[TCA_TBF_RTAB]));
if (qopt->peakrate.linklayer == TC_LINKLAYER_UNAWARE)
qdisc_put_rtab(qdisc_get_rtab(&qopt->peakrate,
tb[TCA_TBF_PTAB]));
if (q->qdisc != &noop_qdisc) {
err = fifo_set_limit(q->qdisc, qopt->limit);
if (err)
goto done;
} else if (qopt->limit > 0) {
child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit);
if (IS_ERR(child)) {
err = PTR_ERR(child);
goto done; goto done;
}
}
buffer = min_t(u64, PSCHED_TICKS2NS(qopt->buffer), ~0U);
mtu = min_t(u64, PSCHED_TICKS2NS(qopt->mtu), ~0U);
if (tb[TCA_TBF_RATE64])
rate64 = nla_get_u64(tb[TCA_TBF_RATE64]);
psched_ratecfg_precompute(&rate, &qopt->rate, rate64);
max_size = min_t(u64, psched_ns_t2l(&rate, buffer), ~0U);
if (qopt->peakrate.rate) { if (qopt->peakrate.rate) {
if (qopt->peakrate.rate > qopt->rate.rate) if (tb[TCA_TBF_PRATE64])
ptab = qdisc_get_rtab(&qopt->peakrate, tb[TCA_TBF_PTAB]); prate64 = nla_get_u64(tb[TCA_TBF_PRATE64]);
if (ptab == NULL) psched_ratecfg_precompute(&peak, &qopt->peakrate, prate64);
if (peak.rate_bytes_ps <= rate.rate_bytes_ps) {
pr_warn_ratelimited("sch_tbf: peakrate %llu is lower than or equals to rate %llu !\n",
peak.rate_bytes_ps, rate.rate_bytes_ps);
err = -EINVAL;
goto done; goto done;
} }
for (n = 0; n < 256; n++) max_size = min_t(u64, max_size, psched_ns_t2l(&peak, mtu));
if (rtab->data[n] > qopt->buffer)
break;
max_size = (n << qopt->rate.cell_log) - 1;
if (ptab) {
int size;
for (n = 0; n < 256; n++)
if (ptab->data[n] > qopt->mtu)
break;
size = (n << qopt->peakrate.cell_log) - 1;
if (size < max_size)
max_size = size;
} }
if (max_size < 0)
goto done;
if (max_size < psched_mtu(qdisc_dev(sch))) if (max_size < psched_mtu(qdisc_dev(sch)))
pr_warn_ratelimited("sch_tbf: burst %u is lower than device %s mtu (%u) !\n", pr_warn_ratelimited("sch_tbf: burst %llu is lower than device %s mtu (%u) !\n",
max_size, qdisc_dev(sch)->name, max_size, qdisc_dev(sch)->name,
psched_mtu(qdisc_dev(sch))); psched_mtu(qdisc_dev(sch)));
if (q->qdisc != &noop_qdisc) { if (!max_size) {
err = fifo_set_limit(q->qdisc, qopt->limit); err = -EINVAL;
if (err)
goto done;
} else if (qopt->limit > 0) {
child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit);
if (IS_ERR(child)) {
err = PTR_ERR(child);
goto done; goto done;
} }
}
sch_tree_lock(sch); sch_tree_lock(sch);
if (child) { if (child) {
...@@ -362,13 +395,9 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) ...@@ -362,13 +395,9 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
q->tokens = q->buffer; q->tokens = q->buffer;
q->ptokens = q->mtu; q->ptokens = q->mtu;
if (tb[TCA_TBF_RATE64]) memcpy(&q->rate, &rate, sizeof(struct psched_ratecfg));
rate64 = nla_get_u64(tb[TCA_TBF_RATE64]); if (qopt->peakrate.rate) {
psched_ratecfg_precompute(&q->rate, &rtab->rate, rate64); memcpy(&q->peak, &peak, sizeof(struct psched_ratecfg));
if (ptab) {
if (tb[TCA_TBF_PRATE64])
prate64 = nla_get_u64(tb[TCA_TBF_PRATE64]);
psched_ratecfg_precompute(&q->peak, &ptab->rate, prate64);
q->peak_present = true; q->peak_present = true;
} else { } else {
q->peak_present = false; q->peak_present = false;
...@@ -377,10 +406,6 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) ...@@ -377,10 +406,6 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
sch_tree_unlock(sch); sch_tree_unlock(sch);
err = 0; err = 0;
done: done:
if (rtab)
qdisc_put_rtab(rtab);
if (ptab)
qdisc_put_rtab(ptab);
return err; return err;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment