Commit 50612537 authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller

netem: fix classful handling

Commit 10f6dfcf (Revert "sch_netem: Remove classful functionality")
reintroduced classful functionality to netem, but broke basic netem
behavior :

netem uses an t(ime)fifo queue, and store timestamps in skb->cb[]

If qdisc is changed, time constraints are not respected and other qdisc
can destroy skb->cb[] and block netem at dequeue time.

Fix this by always using internal tfifo, and optionally attach a child
qdisc to netem (or a tree of qdiscs)

Example of use :

DEV=eth3
tc qdisc del dev $DEV root
tc qdisc add dev $DEV root handle 30: est 1sec 8sec netem delay 20ms 10ms
tc qdisc add dev $DEV handle 40:0 parent 30:0 tbf \
	burst 20480 limit 20480 mtu 1514 rate 32000bps

qdisc netem 30: root refcnt 18 limit 1000 delay 20.0ms  10.0ms
 Sent 190792 bytes 413 pkt (dropped 0, overlimits 0 requeues 0)
 rate 18416bit 3pps backlog 0b 0p requeues 0
qdisc tbf 40: parent 30: rate 256000bit burst 20Kb/8 mpu 0b lat 0us
 Sent 190792 bytes 413 pkt (dropped 6, overlimits 10 requeues 0)
 backlog 0b 5p requeues 0
Signed-off-by: default avatarEric Dumazet <eric.dumazet@gmail.com>
CC: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 32b293a5
...@@ -67,7 +67,11 @@ ...@@ -67,7 +67,11 @@
*/ */
struct netem_sched_data { struct netem_sched_data {
/* internal t(ime)fifo qdisc uses sch->q and sch->limit */
/* optional qdisc for classful handling (NULL at netem init) */
struct Qdisc *qdisc; struct Qdisc *qdisc;
struct qdisc_watchdog watchdog; struct qdisc_watchdog watchdog;
psched_tdiff_t latency; psched_tdiff_t latency;
...@@ -117,7 +121,9 @@ struct netem_sched_data { ...@@ -117,7 +121,9 @@ struct netem_sched_data {
}; };
/* Time stamp put into socket buffer control block */ /* Time stamp put into socket buffer control block
* Only valid when skbs are in our internal t(ime)fifo queue.
*/
struct netem_skb_cb { struct netem_skb_cb {
psched_time_t time_to_send; psched_time_t time_to_send;
}; };
...@@ -324,6 +330,31 @@ static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sche ...@@ -324,6 +330,31 @@ static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sche
return PSCHED_NS2TICKS(ticks); return PSCHED_NS2TICKS(ticks);
} }
static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
{
struct sk_buff_head *list = &sch->q;
psched_time_t tnext = netem_skb_cb(nskb)->time_to_send;
struct sk_buff *skb;
if (likely(skb_queue_len(list) < sch->limit)) {
skb = skb_peek_tail(list);
/* Optimize for add at tail */
if (likely(!skb || tnext >= netem_skb_cb(skb)->time_to_send))
return qdisc_enqueue_tail(nskb, sch);
skb_queue_reverse_walk(list, skb) {
if (tnext >= netem_skb_cb(skb)->time_to_send)
break;
}
__skb_queue_after(list, skb, nskb);
sch->qstats.backlog += qdisc_pkt_len(nskb);
return NET_XMIT_SUCCESS;
}
return qdisc_reshape_fail(nskb, sch);
}
/* /*
* Insert one skb into qdisc. * Insert one skb into qdisc.
* Note: parent depends on return value to account for queue length. * Note: parent depends on return value to account for queue length.
...@@ -399,7 +430,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) ...@@ -399,7 +430,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
now = psched_get_time(); now = psched_get_time();
if (q->rate) { if (q->rate) {
struct sk_buff_head *list = &q->qdisc->q; struct sk_buff_head *list = &sch->q;
delay += packet_len_2_sched_time(skb->len, q); delay += packet_len_2_sched_time(skb->len, q);
...@@ -417,7 +448,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) ...@@ -417,7 +448,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
cb->time_to_send = now + delay; cb->time_to_send = now + delay;
++q->counter; ++q->counter;
ret = qdisc_enqueue(skb, q->qdisc); ret = tfifo_enqueue(skb, sch);
} else { } else {
/* /*
* Do re-ordering by putting one out of N packets at the front * Do re-ordering by putting one out of N packets at the front
...@@ -426,7 +457,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) ...@@ -426,7 +457,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
cb->time_to_send = psched_get_time(); cb->time_to_send = psched_get_time();
q->counter = 0; q->counter = 0;
__skb_queue_head(&q->qdisc->q, skb); __skb_queue_head(&sch->q, skb);
q->qdisc->qstats.backlog += qdisc_pkt_len(skb); q->qdisc->qstats.backlog += qdisc_pkt_len(skb);
q->qdisc->qstats.requeues++; q->qdisc->qstats.requeues++;
ret = NET_XMIT_SUCCESS; ret = NET_XMIT_SUCCESS;
...@@ -439,19 +470,20 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) ...@@ -439,19 +470,20 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
} }
} }
sch->q.qlen++;
return NET_XMIT_SUCCESS; return NET_XMIT_SUCCESS;
} }
static unsigned int netem_drop(struct Qdisc *sch) static unsigned int netem_drop(struct Qdisc *sch)
{ {
struct netem_sched_data *q = qdisc_priv(sch); struct netem_sched_data *q = qdisc_priv(sch);
unsigned int len = 0; unsigned int len;
if (q->qdisc->ops->drop && (len = q->qdisc->ops->drop(q->qdisc)) != 0) { len = qdisc_queue_drop(sch);
sch->q.qlen--; if (!len && q->qdisc && q->qdisc->ops->drop)
len = q->qdisc->ops->drop(q->qdisc);
if (len)
sch->qstats.drops++; sch->qstats.drops++;
}
return len; return len;
} }
...@@ -463,16 +495,16 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) ...@@ -463,16 +495,16 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
if (qdisc_is_throttled(sch)) if (qdisc_is_throttled(sch))
return NULL; return NULL;
skb = q->qdisc->ops->peek(q->qdisc); tfifo_dequeue:
skb = qdisc_peek_head(sch);
if (skb) { if (skb) {
const struct netem_skb_cb *cb = netem_skb_cb(skb); const struct netem_skb_cb *cb = netem_skb_cb(skb);
psched_time_t now = psched_get_time();
/* if more time remaining? */ /* if more time remaining? */
if (cb->time_to_send <= now) { if (cb->time_to_send <= psched_get_time()) {
skb = qdisc_dequeue_peeked(q->qdisc); skb = qdisc_dequeue_tail(sch);
if (unlikely(!skb)) if (unlikely(!skb))
return NULL; goto qdisc_dequeue;
#ifdef CONFIG_NET_CLS_ACT #ifdef CONFIG_NET_CLS_ACT
/* /*
...@@ -483,15 +515,37 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) ...@@ -483,15 +515,37 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
skb->tstamp.tv64 = 0; skb->tstamp.tv64 = 0;
#endif #endif
sch->q.qlen--; if (q->qdisc) {
int err = qdisc_enqueue(skb, q->qdisc);
if (unlikely(err != NET_XMIT_SUCCESS)) {
if (net_xmit_drop_count(err)) {
sch->qstats.drops++;
qdisc_tree_decrease_qlen(sch, 1);
}
}
goto tfifo_dequeue;
}
deliver:
qdisc_unthrottled(sch); qdisc_unthrottled(sch);
qdisc_bstats_update(sch, skb); qdisc_bstats_update(sch, skb);
return skb; return skb;
} }
if (q->qdisc) {
skb = q->qdisc->ops->dequeue(q->qdisc);
if (skb)
goto deliver;
}
qdisc_watchdog_schedule(&q->watchdog, cb->time_to_send); qdisc_watchdog_schedule(&q->watchdog, cb->time_to_send);
} }
qdisc_dequeue:
if (q->qdisc) {
skb = q->qdisc->ops->dequeue(q->qdisc);
if (skb)
goto deliver;
}
return NULL; return NULL;
} }
...@@ -499,8 +553,9 @@ static void netem_reset(struct Qdisc *sch) ...@@ -499,8 +553,9 @@ static void netem_reset(struct Qdisc *sch)
{ {
struct netem_sched_data *q = qdisc_priv(sch); struct netem_sched_data *q = qdisc_priv(sch);
qdisc_reset_queue(sch);
if (q->qdisc)
qdisc_reset(q->qdisc); qdisc_reset(q->qdisc);
sch->q.qlen = 0;
qdisc_watchdog_cancel(&q->watchdog); qdisc_watchdog_cancel(&q->watchdog);
} }
...@@ -690,11 +745,7 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt) ...@@ -690,11 +745,7 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
if (ret < 0) if (ret < 0)
return ret; return ret;
ret = fifo_set_limit(q->qdisc, qopt->limit); sch->limit = qopt->limit;
if (ret) {
pr_info("netem: can't set fifo limit\n");
return ret;
}
q->latency = qopt->latency; q->latency = qopt->latency;
q->jitter = qopt->jitter; q->jitter = qopt->jitter;
...@@ -735,88 +786,6 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt) ...@@ -735,88 +786,6 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
return ret; return ret;
} }
/*
* Special case version of FIFO queue for use by netem.
* It queues in order based on timestamps in skb's
*/
struct fifo_sched_data {
u32 limit;
psched_time_t oldest;
};
static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
{
struct fifo_sched_data *q = qdisc_priv(sch);
struct sk_buff_head *list = &sch->q;
psched_time_t tnext = netem_skb_cb(nskb)->time_to_send;
struct sk_buff *skb;
if (likely(skb_queue_len(list) < q->limit)) {
/* Optimize for add at tail */
if (likely(skb_queue_empty(list) || tnext >= q->oldest)) {
q->oldest = tnext;
return qdisc_enqueue_tail(nskb, sch);
}
skb_queue_reverse_walk(list, skb) {
const struct netem_skb_cb *cb = netem_skb_cb(skb);
if (tnext >= cb->time_to_send)
break;
}
__skb_queue_after(list, skb, nskb);
sch->qstats.backlog += qdisc_pkt_len(nskb);
return NET_XMIT_SUCCESS;
}
return qdisc_reshape_fail(nskb, sch);
}
static int tfifo_init(struct Qdisc *sch, struct nlattr *opt)
{
struct fifo_sched_data *q = qdisc_priv(sch);
if (opt) {
struct tc_fifo_qopt *ctl = nla_data(opt);
if (nla_len(opt) < sizeof(*ctl))
return -EINVAL;
q->limit = ctl->limit;
} else
q->limit = max_t(u32, qdisc_dev(sch)->tx_queue_len, 1);
q->oldest = PSCHED_PASTPERFECT;
return 0;
}
static int tfifo_dump(struct Qdisc *sch, struct sk_buff *skb)
{
struct fifo_sched_data *q = qdisc_priv(sch);
struct tc_fifo_qopt opt = { .limit = q->limit };
NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
return skb->len;
nla_put_failure:
return -1;
}
static struct Qdisc_ops tfifo_qdisc_ops __read_mostly = {
.id = "tfifo",
.priv_size = sizeof(struct fifo_sched_data),
.enqueue = tfifo_enqueue,
.dequeue = qdisc_dequeue_head,
.peek = qdisc_peek_head,
.drop = qdisc_queue_drop,
.init = tfifo_init,
.reset = qdisc_reset_queue,
.change = tfifo_init,
.dump = tfifo_dump,
};
static int netem_init(struct Qdisc *sch, struct nlattr *opt) static int netem_init(struct Qdisc *sch, struct nlattr *opt)
{ {
struct netem_sched_data *q = qdisc_priv(sch); struct netem_sched_data *q = qdisc_priv(sch);
...@@ -828,18 +797,9 @@ static int netem_init(struct Qdisc *sch, struct nlattr *opt) ...@@ -828,18 +797,9 @@ static int netem_init(struct Qdisc *sch, struct nlattr *opt)
qdisc_watchdog_init(&q->watchdog, sch); qdisc_watchdog_init(&q->watchdog, sch);
q->loss_model = CLG_RANDOM; q->loss_model = CLG_RANDOM;
q->qdisc = qdisc_create_dflt(sch->dev_queue, &tfifo_qdisc_ops,
TC_H_MAKE(sch->handle, 1));
if (!q->qdisc) {
pr_notice("netem: qdisc create tfifo qdisc failed\n");
return -ENOMEM;
}
ret = netem_change(sch, opt); ret = netem_change(sch, opt);
if (ret) { if (ret)
pr_info("netem: change failed\n"); pr_info("netem: change failed\n");
qdisc_destroy(q->qdisc);
}
return ret; return ret;
} }
...@@ -848,6 +808,7 @@ static void netem_destroy(struct Qdisc *sch) ...@@ -848,6 +808,7 @@ static void netem_destroy(struct Qdisc *sch)
struct netem_sched_data *q = qdisc_priv(sch); struct netem_sched_data *q = qdisc_priv(sch);
qdisc_watchdog_cancel(&q->watchdog); qdisc_watchdog_cancel(&q->watchdog);
if (q->qdisc)
qdisc_destroy(q->qdisc); qdisc_destroy(q->qdisc);
dist_free(q->delay_dist); dist_free(q->delay_dist);
} }
...@@ -952,7 +913,7 @@ static int netem_dump_class(struct Qdisc *sch, unsigned long cl, ...@@ -952,7 +913,7 @@ static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
{ {
struct netem_sched_data *q = qdisc_priv(sch); struct netem_sched_data *q = qdisc_priv(sch);
if (cl != 1) /* only one class */ if (cl != 1 || !q->qdisc) /* only one class */
return -ENOENT; return -ENOENT;
tcm->tcm_handle |= TC_H_MIN(1); tcm->tcm_handle |= TC_H_MIN(1);
...@@ -966,14 +927,13 @@ static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, ...@@ -966,14 +927,13 @@ static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
{ {
struct netem_sched_data *q = qdisc_priv(sch); struct netem_sched_data *q = qdisc_priv(sch);
if (new == NULL)
new = &noop_qdisc;
sch_tree_lock(sch); sch_tree_lock(sch);
*old = q->qdisc; *old = q->qdisc;
q->qdisc = new; q->qdisc = new;
if (*old) {
qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
qdisc_reset(*old); qdisc_reset(*old);
}
sch_tree_unlock(sch); sch_tree_unlock(sch);
return 0; return 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment