Commit 88da48f4 authored by David S. Miller's avatar David S. Miller

Merge branch 'sched_skb_free_defer'

Eric Dumazet says:

====================
net_sched: defer skb freeing while changing qdiscs

qdiscs/classes are changed under RTNL protection and often
while blocking BH and root qdisc spinlock.

When lots of skbs need to be dropped, we free
them under these locks causing TX/RX freezes,
and more generally latency spikes.

I saw spikes of 50+ ms on quite fast hardware...

This patch series adds a simple queue protected by RTNL
where skbs can be placed until RTNL is released.

Note that this might also serve in the future for optional
reinjection of packets when a qdisc is replaced.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 35c55c98 fea02478
...@@ -89,8 +89,9 @@ void net_inc_egress_queue(void); ...@@ -89,8 +89,9 @@ void net_inc_egress_queue(void);
void net_dec_egress_queue(void); void net_dec_egress_queue(void);
#endif #endif
extern void rtnetlink_init(void); void rtnetlink_init(void);
extern void __rtnl_unlock(void); void __rtnl_unlock(void);
void rtnl_kfree_skbs(struct sk_buff *head, struct sk_buff *tail);
#define ASSERT_RTNL() do { \ #define ASSERT_RTNL() do { \
if (unlikely(!rtnl_is_locked())) { \ if (unlikely(!rtnl_is_locked())) { \
......
...@@ -683,19 +683,21 @@ static inline struct sk_buff *qdisc_dequeue_peeked(struct Qdisc *sch) ...@@ -683,19 +683,21 @@ static inline struct sk_buff *qdisc_dequeue_peeked(struct Qdisc *sch)
return skb; return skb;
} }
static inline void __qdisc_reset_queue(struct Qdisc *sch, static inline void __qdisc_reset_queue(struct sk_buff_head *list)
struct sk_buff_head *list)
{ {
/* /*
* We do not know the backlog in bytes of this list, it * We do not know the backlog in bytes of this list, it
* is up to the caller to correct it * is up to the caller to correct it
*/ */
__skb_queue_purge(list); if (!skb_queue_empty(list)) {
rtnl_kfree_skbs(list->next, list->prev);
__skb_queue_head_init(list);
}
} }
static inline void qdisc_reset_queue(struct Qdisc *sch) static inline void qdisc_reset_queue(struct Qdisc *sch)
{ {
__qdisc_reset_queue(sch, &sch->q); __qdisc_reset_queue(&sch->q);
sch->qstats.backlog = 0; sch->qstats.backlog = 0;
} }
...@@ -716,6 +718,12 @@ static inline struct Qdisc *qdisc_replace(struct Qdisc *sch, struct Qdisc *new, ...@@ -716,6 +718,12 @@ static inline struct Qdisc *qdisc_replace(struct Qdisc *sch, struct Qdisc *new,
return old; return old;
} }
static inline void rtnl_qdisc_drop(struct sk_buff *skb, struct Qdisc *sch)
{
rtnl_kfree_skbs(skb, skb);
qdisc_qstats_drop(sch);
}
static inline int qdisc_drop(struct sk_buff *skb, struct Qdisc *sch) static inline int qdisc_drop(struct sk_buff *skb, struct Qdisc *sch)
{ {
kfree_skb(skb); kfree_skb(skb);
......
...@@ -71,9 +71,31 @@ void rtnl_lock(void) ...@@ -71,9 +71,31 @@ void rtnl_lock(void)
} }
EXPORT_SYMBOL(rtnl_lock); EXPORT_SYMBOL(rtnl_lock);
static struct sk_buff *defer_kfree_skb_list;
void rtnl_kfree_skbs(struct sk_buff *head, struct sk_buff *tail)
{
if (head && tail) {
tail->next = defer_kfree_skb_list;
defer_kfree_skb_list = head;
}
}
EXPORT_SYMBOL(rtnl_kfree_skbs);
void __rtnl_unlock(void) void __rtnl_unlock(void)
{ {
struct sk_buff *head = defer_kfree_skb_list;
defer_kfree_skb_list = NULL;
mutex_unlock(&rtnl_mutex); mutex_unlock(&rtnl_mutex);
while (head) {
struct sk_buff *next = head->next;
kfree_skb(head);
cond_resched();
head = next;
}
} }
void rtnl_unlock(void) void rtnl_unlock(void)
......
...@@ -375,11 +375,11 @@ static void choke_reset(struct Qdisc *sch) ...@@ -375,11 +375,11 @@ static void choke_reset(struct Qdisc *sch)
q->head = (q->head + 1) & q->tab_mask; q->head = (q->head + 1) & q->tab_mask;
if (!skb) if (!skb)
continue; continue;
qdisc_qstats_backlog_dec(sch, skb); rtnl_qdisc_drop(skb, sch);
--sch->q.qlen;
qdisc_drop(skb, sch);
} }
sch->q.qlen = 0;
sch->qstats.backlog = 0;
memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *)); memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *));
q->head = q->tail = 0; q->head = q->tail = 0;
red_restart(&q->vars); red_restart(&q->vars);
...@@ -455,7 +455,7 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt) ...@@ -455,7 +455,7 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt)
dropped += qdisc_pkt_len(skb); dropped += qdisc_pkt_len(skb);
qdisc_qstats_backlog_dec(sch, skb); qdisc_qstats_backlog_dec(sch, skb);
--sch->q.qlen; --sch->q.qlen;
qdisc_drop(skb, sch); rtnl_qdisc_drop(skb, sch);
} }
qdisc_tree_reduce_backlog(sch, oqlen - sch->q.qlen, dropped); qdisc_tree_reduce_backlog(sch, oqlen - sch->q.qlen, dropped);
q->head = 0; q->head = 0;
......
...@@ -174,7 +174,7 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt) ...@@ -174,7 +174,7 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt)
dropped += qdisc_pkt_len(skb); dropped += qdisc_pkt_len(skb);
qdisc_qstats_backlog_dec(sch, skb); qdisc_qstats_backlog_dec(sch, skb);
qdisc_drop(skb, sch); rtnl_qdisc_drop(skb, sch);
} }
qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped); qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped);
......
...@@ -514,17 +514,25 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch) ...@@ -514,17 +514,25 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch)
return skb; return skb;
} }
static void fq_flow_purge(struct fq_flow *flow)
{
rtnl_kfree_skbs(flow->head, flow->tail);
flow->head = NULL;
flow->qlen = 0;
}
static void fq_reset(struct Qdisc *sch) static void fq_reset(struct Qdisc *sch)
{ {
struct fq_sched_data *q = qdisc_priv(sch); struct fq_sched_data *q = qdisc_priv(sch);
struct rb_root *root; struct rb_root *root;
struct sk_buff *skb;
struct rb_node *p; struct rb_node *p;
struct fq_flow *f; struct fq_flow *f;
unsigned int idx; unsigned int idx;
while ((skb = fq_dequeue_head(sch, &q->internal)) != NULL) sch->q.qlen = 0;
kfree_skb(skb); sch->qstats.backlog = 0;
fq_flow_purge(&q->internal);
if (!q->fq_root) if (!q->fq_root)
return; return;
...@@ -535,8 +543,7 @@ static void fq_reset(struct Qdisc *sch) ...@@ -535,8 +543,7 @@ static void fq_reset(struct Qdisc *sch)
f = container_of(p, struct fq_flow, fq_node); f = container_of(p, struct fq_flow, fq_node);
rb_erase(p, root); rb_erase(p, root);
while ((skb = fq_dequeue_head(sch, f)) != NULL) fq_flow_purge(f);
kfree_skb(skb);
kmem_cache_free(fq_flow_cachep, f); kmem_cache_free(fq_flow_cachep, f);
} }
...@@ -737,7 +744,7 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt) ...@@ -737,7 +744,7 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt)
if (!skb) if (!skb)
break; break;
drop_len += qdisc_pkt_len(skb); drop_len += qdisc_pkt_len(skb);
kfree_skb(skb); rtnl_kfree_skbs(skb, skb);
drop_count++; drop_count++;
} }
qdisc_tree_reduce_backlog(sch, drop_count, drop_len); qdisc_tree_reduce_backlog(sch, drop_count, drop_len);
......
...@@ -336,6 +336,12 @@ static struct sk_buff *fq_codel_dequeue(struct Qdisc *sch) ...@@ -336,6 +336,12 @@ static struct sk_buff *fq_codel_dequeue(struct Qdisc *sch)
return skb; return skb;
} }
static void fq_codel_flow_purge(struct fq_codel_flow *flow)
{
rtnl_kfree_skbs(flow->head, flow->tail);
flow->head = NULL;
}
static void fq_codel_reset(struct Qdisc *sch) static void fq_codel_reset(struct Qdisc *sch)
{ {
struct fq_codel_sched_data *q = qdisc_priv(sch); struct fq_codel_sched_data *q = qdisc_priv(sch);
...@@ -346,18 +352,13 @@ static void fq_codel_reset(struct Qdisc *sch) ...@@ -346,18 +352,13 @@ static void fq_codel_reset(struct Qdisc *sch)
for (i = 0; i < q->flows_cnt; i++) { for (i = 0; i < q->flows_cnt; i++) {
struct fq_codel_flow *flow = q->flows + i; struct fq_codel_flow *flow = q->flows + i;
while (flow->head) { fq_codel_flow_purge(flow);
struct sk_buff *skb = dequeue_head(flow);
qdisc_qstats_backlog_dec(sch, skb);
kfree_skb(skb);
}
INIT_LIST_HEAD(&flow->flowchain); INIT_LIST_HEAD(&flow->flowchain);
codel_vars_init(&flow->cvars); codel_vars_init(&flow->cvars);
} }
memset(q->backlogs, 0, q->flows_cnt * sizeof(u32)); memset(q->backlogs, 0, q->flows_cnt * sizeof(u32));
sch->q.qlen = 0; sch->q.qlen = 0;
sch->qstats.backlog = 0;
q->memory_usage = 0; q->memory_usage = 0;
} }
...@@ -433,7 +434,7 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt) ...@@ -433,7 +434,7 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt)
struct sk_buff *skb = fq_codel_dequeue(sch); struct sk_buff *skb = fq_codel_dequeue(sch);
q->cstats.drop_len += qdisc_pkt_len(skb); q->cstats.drop_len += qdisc_pkt_len(skb);
kfree_skb(skb); rtnl_kfree_skbs(skb, skb);
q->cstats.drop_count++; q->cstats.drop_count++;
} }
qdisc_tree_reduce_backlog(sch, q->cstats.drop_count, q->cstats.drop_len); qdisc_tree_reduce_backlog(sch, q->cstats.drop_count, q->cstats.drop_len);
......
...@@ -493,7 +493,7 @@ static void pfifo_fast_reset(struct Qdisc *qdisc) ...@@ -493,7 +493,7 @@ static void pfifo_fast_reset(struct Qdisc *qdisc)
struct pfifo_fast_priv *priv = qdisc_priv(qdisc); struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
__qdisc_reset_queue(qdisc, band2list(priv, prio)); __qdisc_reset_queue(band2list(priv, prio));
priv->bitmap = 0; priv->bitmap = 0;
qdisc->qstats.backlog = 0; qdisc->qstats.backlog = 0;
......
...@@ -464,7 +464,7 @@ static void hhf_reset(struct Qdisc *sch) ...@@ -464,7 +464,7 @@ static void hhf_reset(struct Qdisc *sch)
struct sk_buff *skb; struct sk_buff *skb;
while ((skb = hhf_dequeue(sch)) != NULL) while ((skb = hhf_dequeue(sch)) != NULL)
kfree_skb(skb); rtnl_kfree_skbs(skb, skb);
} }
static void *hhf_zalloc(size_t sz) static void *hhf_zalloc(size_t sz)
...@@ -574,7 +574,7 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt) ...@@ -574,7 +574,7 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt)
while (sch->q.qlen > sch->limit) { while (sch->q.qlen > sch->limit) {
struct sk_buff *skb = hhf_dequeue(sch); struct sk_buff *skb = hhf_dequeue(sch);
kfree_skb(skb); rtnl_kfree_skbs(skb, skb);
} }
qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen,
prev_backlog - sch->qstats.backlog); prev_backlog - sch->qstats.backlog);
......
...@@ -957,7 +957,7 @@ static void htb_reset(struct Qdisc *sch) ...@@ -957,7 +957,7 @@ static void htb_reset(struct Qdisc *sch)
} }
} }
qdisc_watchdog_cancel(&q->watchdog); qdisc_watchdog_cancel(&q->watchdog);
__skb_queue_purge(&q->direct_queue); __qdisc_reset_queue(&q->direct_queue);
sch->q.qlen = 0; sch->q.qlen = 0;
sch->qstats.backlog = 0; sch->qstats.backlog = 0;
memset(q->hlevel, 0, sizeof(q->hlevel)); memset(q->hlevel, 0, sizeof(q->hlevel));
...@@ -1231,7 +1231,7 @@ static void htb_destroy(struct Qdisc *sch) ...@@ -1231,7 +1231,7 @@ static void htb_destroy(struct Qdisc *sch)
htb_destroy_class(sch, cl); htb_destroy_class(sch, cl);
} }
qdisc_class_hash_destroy(&q->clhash); qdisc_class_hash_destroy(&q->clhash);
__skb_queue_purge(&q->direct_queue); __qdisc_reset_queue(&q->direct_queue);
} }
static int htb_delete(struct Qdisc *sch, unsigned long arg) static int htb_delete(struct Qdisc *sch, unsigned long arg)
......
...@@ -368,9 +368,7 @@ static void tfifo_reset(struct Qdisc *sch) ...@@ -368,9 +368,7 @@ static void tfifo_reset(struct Qdisc *sch)
struct sk_buff *skb = netem_rb_to_skb(p); struct sk_buff *skb = netem_rb_to_skb(p);
rb_erase(p, &q->t_root); rb_erase(p, &q->t_root);
skb->next = NULL; rtnl_kfree_skbs(skb, skb);
skb->prev = NULL;
kfree_skb(skb);
} }
} }
......
...@@ -234,7 +234,7 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt) ...@@ -234,7 +234,7 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt)
dropped += qdisc_pkt_len(skb); dropped += qdisc_pkt_len(skb);
qdisc_qstats_backlog_dec(sch, skb); qdisc_qstats_backlog_dec(sch, skb);
qdisc_drop(skb, sch); rtnl_qdisc_drop(skb, sch);
} }
qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped); qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped);
......
...@@ -520,7 +520,7 @@ sfq_reset(struct Qdisc *sch) ...@@ -520,7 +520,7 @@ sfq_reset(struct Qdisc *sch)
struct sk_buff *skb; struct sk_buff *skb;
while ((skb = sfq_dequeue(sch)) != NULL) while ((skb = sfq_dequeue(sch)) != NULL)
kfree_skb(skb); rtnl_kfree_skbs(skb, skb);
} }
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment