Commit 029ac211 authored by David S. Miller's avatar David S. Miller

Merge branch 'net-sched-singly-linked-list'

Florian Westphal says:

====================
sched: convert queues to single-linked list

During Netfilter Workshop 2016 Eric Dumazet pointed out that qdisc
schedulers use doubly-linked lists, even though single-linked list
would be enough.

The double-linked skb lists incur one extra write on enqueue/dequeue
operations (to change ->prev pointer of next list elem).

This series converts qdiscs to single-linked version, listhead
maintains pointers to first (for dequeue) and last skb (for enqueue).

Most qdiscs don't queue at all and instead use a leaf qdisc (typically
pfifo_fast) so only a few schedulers needed changes.

I briefly tested netem and htb and they seemed fine.

UDP_STREAM netperf with 64 byte packets via veth+pfifo_fast shows
a small (~2%) improvement.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 106323b9 48da34b7
...@@ -36,6 +36,14 @@ struct qdisc_size_table { ...@@ -36,6 +36,14 @@ struct qdisc_size_table {
u16 data[]; u16 data[];
}; };
/* similar to sk_buff_head, but skb->prev pointer is undefined. */
struct qdisc_skb_head {
struct sk_buff *head;
struct sk_buff *tail;
__u32 qlen;
spinlock_t lock;
};
struct Qdisc { struct Qdisc {
int (*enqueue)(struct sk_buff *skb, int (*enqueue)(struct sk_buff *skb,
struct Qdisc *sch, struct Qdisc *sch,
...@@ -76,7 +84,7 @@ struct Qdisc { ...@@ -76,7 +84,7 @@ struct Qdisc {
* For performance sake on SMP, we put highly modified fields at the end * For performance sake on SMP, we put highly modified fields at the end
*/ */
struct sk_buff *gso_skb ____cacheline_aligned_in_smp; struct sk_buff *gso_skb ____cacheline_aligned_in_smp;
struct sk_buff_head q; struct qdisc_skb_head q;
struct gnet_stats_basic_packed bstats; struct gnet_stats_basic_packed bstats;
seqcount_t running; seqcount_t running;
struct gnet_stats_queue qstats; struct gnet_stats_queue qstats;
...@@ -600,10 +608,27 @@ static inline void qdisc_qstats_overlimit(struct Qdisc *sch) ...@@ -600,10 +608,27 @@ static inline void qdisc_qstats_overlimit(struct Qdisc *sch)
sch->qstats.overlimits++; sch->qstats.overlimits++;
} }
static inline void qdisc_skb_head_init(struct qdisc_skb_head *qh)
{
qh->head = NULL;
qh->tail = NULL;
qh->qlen = 0;
}
static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch, static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff_head *list) struct qdisc_skb_head *qh)
{ {
__skb_queue_tail(list, skb); struct sk_buff *last = qh->tail;
if (last) {
skb->next = NULL;
last->next = skb;
qh->tail = skb;
} else {
qh->tail = skb;
qh->head = skb;
}
qh->qlen++;
qdisc_qstats_backlog_inc(sch, skb); qdisc_qstats_backlog_inc(sch, skb);
return NET_XMIT_SUCCESS; return NET_XMIT_SUCCESS;
...@@ -614,14 +639,16 @@ static inline int qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch) ...@@ -614,14 +639,16 @@ static inline int qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch)
return __qdisc_enqueue_tail(skb, sch, &sch->q); return __qdisc_enqueue_tail(skb, sch, &sch->q);
} }
static inline struct sk_buff *__qdisc_dequeue_head(struct Qdisc *sch, static inline struct sk_buff *__qdisc_dequeue_head(struct qdisc_skb_head *qh)
struct sk_buff_head *list)
{ {
struct sk_buff *skb = __skb_dequeue(list); struct sk_buff *skb = qh->head;
if (likely(skb != NULL)) { if (likely(skb != NULL)) {
qdisc_qstats_backlog_dec(sch, skb); qh->head = skb->next;
qdisc_bstats_update(sch, skb); qh->qlen--;
if (qh->head == NULL)
qh->tail = NULL;
skb->next = NULL;
} }
return skb; return skb;
...@@ -629,7 +656,14 @@ static inline struct sk_buff *__qdisc_dequeue_head(struct Qdisc *sch, ...@@ -629,7 +656,14 @@ static inline struct sk_buff *__qdisc_dequeue_head(struct Qdisc *sch,
static inline struct sk_buff *qdisc_dequeue_head(struct Qdisc *sch) static inline struct sk_buff *qdisc_dequeue_head(struct Qdisc *sch)
{ {
return __qdisc_dequeue_head(sch, &sch->q); struct sk_buff *skb = __qdisc_dequeue_head(&sch->q);
if (likely(skb != NULL)) {
qdisc_qstats_backlog_dec(sch, skb);
qdisc_bstats_update(sch, skb);
}
return skb;
} }
/* Instead of calling kfree_skb() while root qdisc lock is held, /* Instead of calling kfree_skb() while root qdisc lock is held,
...@@ -642,10 +676,10 @@ static inline void __qdisc_drop(struct sk_buff *skb, struct sk_buff **to_free) ...@@ -642,10 +676,10 @@ static inline void __qdisc_drop(struct sk_buff *skb, struct sk_buff **to_free)
} }
static inline unsigned int __qdisc_queue_drop_head(struct Qdisc *sch, static inline unsigned int __qdisc_queue_drop_head(struct Qdisc *sch,
struct sk_buff_head *list, struct qdisc_skb_head *qh,
struct sk_buff **to_free) struct sk_buff **to_free)
{ {
struct sk_buff *skb = __skb_dequeue(list); struct sk_buff *skb = __qdisc_dequeue_head(qh);
if (likely(skb != NULL)) { if (likely(skb != NULL)) {
unsigned int len = qdisc_pkt_len(skb); unsigned int len = qdisc_pkt_len(skb);
...@@ -666,7 +700,9 @@ static inline unsigned int qdisc_queue_drop_head(struct Qdisc *sch, ...@@ -666,7 +700,9 @@ static inline unsigned int qdisc_queue_drop_head(struct Qdisc *sch,
static inline struct sk_buff *qdisc_peek_head(struct Qdisc *sch) static inline struct sk_buff *qdisc_peek_head(struct Qdisc *sch)
{ {
return skb_peek(&sch->q); const struct qdisc_skb_head *qh = &sch->q;
return qh->head;
} }
/* generic pseudo peek method for non-work-conserving qdisc */ /* generic pseudo peek method for non-work-conserving qdisc */
...@@ -701,15 +737,19 @@ static inline struct sk_buff *qdisc_dequeue_peeked(struct Qdisc *sch) ...@@ -701,15 +737,19 @@ static inline struct sk_buff *qdisc_dequeue_peeked(struct Qdisc *sch)
return skb; return skb;
} }
static inline void __qdisc_reset_queue(struct sk_buff_head *list) static inline void __qdisc_reset_queue(struct qdisc_skb_head *qh)
{ {
/* /*
* We do not know the backlog in bytes of this list, it * We do not know the backlog in bytes of this list, it
* is up to the caller to correct it * is up to the caller to correct it
*/ */
if (!skb_queue_empty(list)) { ASSERT_RTNL();
rtnl_kfree_skbs(list->next, list->prev); if (qh->qlen) {
__skb_queue_head_init(list); rtnl_kfree_skbs(qh->head, qh->tail);
qh->head = NULL;
qh->tail = NULL;
qh->qlen = 0;
} }
} }
......
...@@ -69,7 +69,7 @@ struct codel_sched_data { ...@@ -69,7 +69,7 @@ struct codel_sched_data {
static struct sk_buff *dequeue_func(struct codel_vars *vars, void *ctx) static struct sk_buff *dequeue_func(struct codel_vars *vars, void *ctx)
{ {
struct Qdisc *sch = ctx; struct Qdisc *sch = ctx;
struct sk_buff *skb = __skb_dequeue(&sch->q); struct sk_buff *skb = __qdisc_dequeue_head(&sch->q);
if (skb) if (skb)
sch->qstats.backlog -= qdisc_pkt_len(skb); sch->qstats.backlog -= qdisc_pkt_len(skb);
...@@ -172,7 +172,7 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt) ...@@ -172,7 +172,7 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt)
qlen = sch->q.qlen; qlen = sch->q.qlen;
while (sch->q.qlen > sch->limit) { while (sch->q.qlen > sch->limit) {
struct sk_buff *skb = __skb_dequeue(&sch->q); struct sk_buff *skb = __qdisc_dequeue_head(&sch->q);
dropped += qdisc_pkt_len(skb); dropped += qdisc_pkt_len(skb);
qdisc_qstats_backlog_dec(sch, skb); qdisc_qstats_backlog_dec(sch, skb);
......
...@@ -31,7 +31,7 @@ static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch, ...@@ -31,7 +31,7 @@ static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch,
static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch, static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free) struct sk_buff **to_free)
{ {
if (likely(skb_queue_len(&sch->q) < sch->limit)) if (likely(sch->q.qlen < sch->limit))
return qdisc_enqueue_tail(skb, sch); return qdisc_enqueue_tail(skb, sch);
return qdisc_drop(skb, sch, to_free); return qdisc_drop(skb, sch, to_free);
...@@ -42,7 +42,7 @@ static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch, ...@@ -42,7 +42,7 @@ static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch,
{ {
unsigned int prev_backlog; unsigned int prev_backlog;
if (likely(skb_queue_len(&sch->q) < sch->limit)) if (likely(sch->q.qlen < sch->limit))
return qdisc_enqueue_tail(skb, sch); return qdisc_enqueue_tail(skb, sch);
prev_backlog = sch->qstats.backlog; prev_backlog = sch->qstats.backlog;
......
...@@ -466,7 +466,7 @@ static const u8 prio2band[TC_PRIO_MAX + 1] = { ...@@ -466,7 +466,7 @@ static const u8 prio2band[TC_PRIO_MAX + 1] = {
*/ */
struct pfifo_fast_priv { struct pfifo_fast_priv {
u32 bitmap; u32 bitmap;
struct sk_buff_head q[PFIFO_FAST_BANDS]; struct qdisc_skb_head q[PFIFO_FAST_BANDS];
}; };
/* /*
...@@ -477,7 +477,7 @@ struct pfifo_fast_priv { ...@@ -477,7 +477,7 @@ struct pfifo_fast_priv {
*/ */
static const int bitmap2band[] = {-1, 0, 1, 0, 2, 0, 1, 0}; static const int bitmap2band[] = {-1, 0, 1, 0, 2, 0, 1, 0};
static inline struct sk_buff_head *band2list(struct pfifo_fast_priv *priv, static inline struct qdisc_skb_head *band2list(struct pfifo_fast_priv *priv,
int band) int band)
{ {
return priv->q + band; return priv->q + band;
...@@ -486,10 +486,10 @@ static inline struct sk_buff_head *band2list(struct pfifo_fast_priv *priv, ...@@ -486,10 +486,10 @@ static inline struct sk_buff_head *band2list(struct pfifo_fast_priv *priv,
static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc, static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc,
struct sk_buff **to_free) struct sk_buff **to_free)
{ {
if (skb_queue_len(&qdisc->q) < qdisc_dev(qdisc)->tx_queue_len) { if (qdisc->q.qlen < qdisc_dev(qdisc)->tx_queue_len) {
int band = prio2band[skb->priority & TC_PRIO_MAX]; int band = prio2band[skb->priority & TC_PRIO_MAX];
struct pfifo_fast_priv *priv = qdisc_priv(qdisc); struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
struct sk_buff_head *list = band2list(priv, band); struct qdisc_skb_head *list = band2list(priv, band);
priv->bitmap |= (1 << band); priv->bitmap |= (1 << band);
qdisc->q.qlen++; qdisc->q.qlen++;
...@@ -505,11 +505,16 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc) ...@@ -505,11 +505,16 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc)
int band = bitmap2band[priv->bitmap]; int band = bitmap2band[priv->bitmap];
if (likely(band >= 0)) { if (likely(band >= 0)) {
struct sk_buff_head *list = band2list(priv, band); struct qdisc_skb_head *qh = band2list(priv, band);
struct sk_buff *skb = __qdisc_dequeue_head(qdisc, list); struct sk_buff *skb = __qdisc_dequeue_head(qh);
if (likely(skb != NULL)) {
qdisc_qstats_backlog_dec(qdisc, skb);
qdisc_bstats_update(qdisc, skb);
}
qdisc->q.qlen--; qdisc->q.qlen--;
if (skb_queue_empty(list)) if (qh->qlen == 0)
priv->bitmap &= ~(1 << band); priv->bitmap &= ~(1 << band);
return skb; return skb;
...@@ -524,9 +529,9 @@ static struct sk_buff *pfifo_fast_peek(struct Qdisc *qdisc) ...@@ -524,9 +529,9 @@ static struct sk_buff *pfifo_fast_peek(struct Qdisc *qdisc)
int band = bitmap2band[priv->bitmap]; int band = bitmap2band[priv->bitmap];
if (band >= 0) { if (band >= 0) {
struct sk_buff_head *list = band2list(priv, band); struct qdisc_skb_head *qh = band2list(priv, band);
return skb_peek(list); return qh->head;
} }
return NULL; return NULL;
...@@ -564,7 +569,7 @@ static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt) ...@@ -564,7 +569,7 @@ static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt)
struct pfifo_fast_priv *priv = qdisc_priv(qdisc); struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
__skb_queue_head_init(band2list(priv, prio)); qdisc_skb_head_init(band2list(priv, prio));
/* Can by-pass the queue discipline */ /* Can by-pass the queue discipline */
qdisc->flags |= TCQ_F_CAN_BYPASS; qdisc->flags |= TCQ_F_CAN_BYPASS;
...@@ -612,7 +617,8 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, ...@@ -612,7 +617,8 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p); sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
sch->padded = (char *) sch - (char *) p; sch->padded = (char *) sch - (char *) p;
} }
skb_queue_head_init(&sch->q); qdisc_skb_head_init(&sch->q);
spin_lock_init(&sch->q.lock);
spin_lock_init(&sch->busylock); spin_lock_init(&sch->busylock);
lockdep_set_class(&sch->busylock, lockdep_set_class(&sch->busylock,
......
...@@ -162,7 +162,7 @@ struct htb_sched { ...@@ -162,7 +162,7 @@ struct htb_sched {
struct work_struct work; struct work_struct work;
/* non shaped skbs; let them go directly thru */ /* non shaped skbs; let them go directly thru */
struct sk_buff_head direct_queue; struct qdisc_skb_head direct_queue;
long direct_pkts; long direct_pkts;
struct qdisc_watchdog watchdog; struct qdisc_watchdog watchdog;
...@@ -570,6 +570,22 @@ static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl) ...@@ -570,6 +570,22 @@ static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
list_del_init(&cl->un.leaf.drop_list); list_del_init(&cl->un.leaf.drop_list);
} }
static void htb_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch,
struct qdisc_skb_head *qh)
{
struct sk_buff *last = qh->tail;
if (last) {
skb->next = NULL;
last->next = skb;
qh->tail = skb;
} else {
qh->tail = skb;
qh->head = skb;
}
qh->qlen++;
}
static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch, static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free) struct sk_buff **to_free)
{ {
...@@ -580,7 +596,7 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch, ...@@ -580,7 +596,7 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
if (cl == HTB_DIRECT) { if (cl == HTB_DIRECT) {
/* enqueue to helper queue */ /* enqueue to helper queue */
if (q->direct_queue.qlen < q->direct_qlen) { if (q->direct_queue.qlen < q->direct_qlen) {
__skb_queue_tail(&q->direct_queue, skb); htb_enqueue_tail(skb, sch, &q->direct_queue);
q->direct_pkts++; q->direct_pkts++;
} else { } else {
return qdisc_drop(skb, sch, to_free); return qdisc_drop(skb, sch, to_free);
...@@ -888,7 +904,7 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) ...@@ -888,7 +904,7 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
unsigned long start_at; unsigned long start_at;
/* try to dequeue direct packets as high prio (!) to minimize cpu work */ /* try to dequeue direct packets as high prio (!) to minimize cpu work */
skb = __skb_dequeue(&q->direct_queue); skb = __qdisc_dequeue_head(&q->direct_queue);
if (skb != NULL) { if (skb != NULL) {
ok: ok:
qdisc_bstats_update(sch, skb); qdisc_bstats_update(sch, skb);
...@@ -1019,7 +1035,7 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt) ...@@ -1019,7 +1035,7 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
qdisc_watchdog_init(&q->watchdog, sch); qdisc_watchdog_init(&q->watchdog, sch);
INIT_WORK(&q->work, htb_work_func); INIT_WORK(&q->work, htb_work_func);
__skb_queue_head_init(&q->direct_queue); qdisc_skb_head_init(&q->direct_queue);
if (tb[TCA_HTB_DIRECT_QLEN]) if (tb[TCA_HTB_DIRECT_QLEN])
q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]); q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]);
......
...@@ -413,6 +413,16 @@ static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch, ...@@ -413,6 +413,16 @@ static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch,
return segs; return segs;
} }
static void netem_enqueue_skb_head(struct qdisc_skb_head *qh, struct sk_buff *skb)
{
skb->next = qh->head;
if (!qh->head)
qh->tail = skb;
qh->head = skb;
qh->qlen++;
}
/* /*
* Insert one skb into qdisc. * Insert one skb into qdisc.
* Note: parent depends on return value to account for queue length. * Note: parent depends on return value to account for queue length.
...@@ -502,7 +512,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, ...@@ -502,7 +512,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
1<<(prandom_u32() % 8); 1<<(prandom_u32() % 8);
} }
if (unlikely(skb_queue_len(&sch->q) >= sch->limit)) if (unlikely(sch->q.qlen >= sch->limit))
return qdisc_drop(skb, sch, to_free); return qdisc_drop(skb, sch, to_free);
qdisc_qstats_backlog_inc(sch, skb); qdisc_qstats_backlog_inc(sch, skb);
...@@ -522,8 +532,8 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, ...@@ -522,8 +532,8 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
if (q->rate) { if (q->rate) {
struct sk_buff *last; struct sk_buff *last;
if (!skb_queue_empty(&sch->q)) if (sch->q.qlen)
last = skb_peek_tail(&sch->q); last = sch->q.tail;
else else
last = netem_rb_to_skb(rb_last(&q->t_root)); last = netem_rb_to_skb(rb_last(&q->t_root));
if (last) { if (last) {
...@@ -552,7 +562,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, ...@@ -552,7 +562,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
cb->time_to_send = psched_get_time(); cb->time_to_send = psched_get_time();
q->counter = 0; q->counter = 0;
__skb_queue_head(&sch->q, skb); netem_enqueue_skb_head(&sch->q, skb);
sch->qstats.requeues++; sch->qstats.requeues++;
} }
...@@ -587,7 +597,7 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) ...@@ -587,7 +597,7 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
struct rb_node *p; struct rb_node *p;
tfifo_dequeue: tfifo_dequeue:
skb = __skb_dequeue(&sch->q); skb = __qdisc_dequeue_head(&sch->q);
if (skb) { if (skb) {
qdisc_qstats_backlog_dec(sch, skb); qdisc_qstats_backlog_dec(sch, skb);
deliver: deliver:
......
...@@ -231,7 +231,7 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt) ...@@ -231,7 +231,7 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt)
/* Drop excess packets if new limit is lower */ /* Drop excess packets if new limit is lower */
qlen = sch->q.qlen; qlen = sch->q.qlen;
while (sch->q.qlen > sch->limit) { while (sch->q.qlen > sch->limit) {
struct sk_buff *skb = __skb_dequeue(&sch->q); struct sk_buff *skb = __qdisc_dequeue_head(&sch->q);
dropped += qdisc_pkt_len(skb); dropped += qdisc_pkt_len(skb);
qdisc_qstats_backlog_dec(sch, skb); qdisc_qstats_backlog_dec(sch, skb);
...@@ -511,7 +511,7 @@ static int pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d) ...@@ -511,7 +511,7 @@ static int pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
static struct sk_buff *pie_qdisc_dequeue(struct Qdisc *sch) static struct sk_buff *pie_qdisc_dequeue(struct Qdisc *sch)
{ {
struct sk_buff *skb; struct sk_buff *skb;
skb = __qdisc_dequeue_head(sch, &sch->q); skb = qdisc_dequeue_head(sch);
if (!skb) if (!skb)
return NULL; return NULL;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment