Commit 5889a62b authored by David S. Miller's avatar David S. Miller

Merge branch 'sch_fq-optimizations'

Eric Dumazet says:

====================
net_sched: sch_fq: round of optimizations

This series is focused on better layout of struct fq_flow to
reduce number of cache line misses in fq_enqueue() and dequeue operations.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents ee1bd483 348e289b
...@@ -66,22 +66,27 @@ static inline struct fq_skb_cb *fq_skb_cb(struct sk_buff *skb) ...@@ -66,22 +66,27 @@ static inline struct fq_skb_cb *fq_skb_cb(struct sk_buff *skb)
* in linear list (head,tail), otherwise are placed in a rbtree (t_root). * in linear list (head,tail), otherwise are placed in a rbtree (t_root).
*/ */
struct fq_flow { struct fq_flow {
/* First cache line : used in fq_gc(), fq_enqueue(), fq_dequeue() */
struct rb_root t_root; struct rb_root t_root;
struct sk_buff *head; /* list of skbs for this flow : first skb */ struct sk_buff *head; /* list of skbs for this flow : first skb */
union { union {
struct sk_buff *tail; /* last skb in the list */ struct sk_buff *tail; /* last skb in the list */
unsigned long age; /* jiffies when flow was emptied, for gc */ unsigned long age; /* (jiffies | 1UL) when flow was emptied, for gc */
}; };
struct rb_node fq_node; /* anchor in fq_root[] trees */ struct rb_node fq_node; /* anchor in fq_root[] trees */
struct sock *sk; struct sock *sk;
u32 socket_hash; /* sk_hash */
int qlen; /* number of packets in flow queue */ int qlen; /* number of packets in flow queue */
/* Second cache line, used in fq_dequeue() */
int credit; int credit;
u32 socket_hash; /* sk_hash */ /* 32bit hole on 64bit arches */
struct fq_flow *next; /* next pointer in RR lists, or &detached */
struct fq_flow *next; /* next pointer in RR lists */
struct rb_node rate_node; /* anchor in q->delayed tree */ struct rb_node rate_node; /* anchor in q->delayed tree */
u64 time_next_packet; u64 time_next_packet;
}; } ____cacheline_aligned_in_smp;
struct fq_flow_head { struct fq_flow_head {
struct fq_flow *first; struct fq_flow *first;
...@@ -126,20 +131,25 @@ struct fq_sched_data { ...@@ -126,20 +131,25 @@ struct fq_sched_data {
struct qdisc_watchdog watchdog; struct qdisc_watchdog watchdog;
}; };
/* special value to mark a detached flow (not on old/new list) */ /*
static struct fq_flow detached, throttled; * f->tail and f->age share the same location.
* We can use the low order bit to differentiate if this location points
* to a sk_buff or contains a jiffies value, if we force this value to be odd.
* This assumes f->tail low order bit must be 0 since alignof(struct sk_buff) >= 2
*/
static void fq_flow_set_detached(struct fq_flow *f) static void fq_flow_set_detached(struct fq_flow *f)
{ {
f->next = &detached; f->age = jiffies | 1UL;
f->age = jiffies;
} }
static bool fq_flow_is_detached(const struct fq_flow *f) static bool fq_flow_is_detached(const struct fq_flow *f)
{ {
return f->next == &detached; return !!(f->age & 1UL);
} }
/* special value to mark a throttled flow (not on old/new list) */
static struct fq_flow throttled;
static bool fq_flow_is_throttled(const struct fq_flow *f) static bool fq_flow_is_throttled(const struct fq_flow *f)
{ {
return f->next == &throttled; return f->next == &throttled;
...@@ -204,9 +214,10 @@ static void fq_gc(struct fq_sched_data *q, ...@@ -204,9 +214,10 @@ static void fq_gc(struct fq_sched_data *q,
struct rb_root *root, struct rb_root *root,
struct sock *sk) struct sock *sk)
{ {
struct fq_flow *f, *tofree[FQ_GC_MAX];
struct rb_node **p, *parent; struct rb_node **p, *parent;
int fcnt = 0; void *tofree[FQ_GC_MAX];
struct fq_flow *f;
int i, fcnt = 0;
p = &root->rb_node; p = &root->rb_node;
parent = NULL; parent = NULL;
...@@ -229,15 +240,18 @@ static void fq_gc(struct fq_sched_data *q, ...@@ -229,15 +240,18 @@ static void fq_gc(struct fq_sched_data *q,
p = &parent->rb_left; p = &parent->rb_left;
} }
if (!fcnt)
return;
for (i = fcnt; i > 0; ) {
f = tofree[--i];
rb_erase(&f->fq_node, root);
}
q->flows -= fcnt; q->flows -= fcnt;
q->inactive_flows -= fcnt; q->inactive_flows -= fcnt;
q->stat_gc_flows += fcnt; q->stat_gc_flows += fcnt;
while (fcnt) {
struct fq_flow *f = tofree[--fcnt];
rb_erase(&f->fq_node, root); kmem_cache_free_bulk(fq_flow_cachep, fcnt, tofree);
kmem_cache_free(fq_flow_cachep, f);
}
} }
static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q) static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)
...@@ -370,19 +384,17 @@ static void fq_erase_head(struct Qdisc *sch, struct fq_flow *flow, ...@@ -370,19 +384,17 @@ static void fq_erase_head(struct Qdisc *sch, struct fq_flow *flow,
} }
} }
/* remove one skb from head of flow queue */ /* Remove one skb from flow queue.
static struct sk_buff *fq_dequeue_head(struct Qdisc *sch, struct fq_flow *flow) * This skb must be the return value of prior fq_peek().
*/
static void fq_dequeue_skb(struct Qdisc *sch, struct fq_flow *flow,
struct sk_buff *skb)
{ {
struct sk_buff *skb = fq_peek(flow); fq_erase_head(sch, flow, skb);
skb_mark_not_on_list(skb);
if (skb) { flow->qlen--;
fq_erase_head(sch, flow, skb); qdisc_qstats_backlog_dec(sch, skb);
skb_mark_not_on_list(skb); sch->q.qlen--;
flow->qlen--;
qdisc_qstats_backlog_dec(sch, skb);
sch->q.qlen--;
}
return skb;
} }
static void flow_queue_add(struct fq_flow *flow, struct sk_buff *skb) static void flow_queue_add(struct fq_flow *flow, struct sk_buff *skb)
...@@ -494,9 +506,11 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch) ...@@ -494,9 +506,11 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch)
if (!sch->q.qlen) if (!sch->q.qlen)
return NULL; return NULL;
skb = fq_dequeue_head(sch, &q->internal); skb = fq_peek(&q->internal);
if (skb) if (unlikely(skb)) {
fq_dequeue_skb(sch, &q->internal, skb);
goto out; goto out;
}
now = ktime_get_ns(); now = ktime_get_ns();
fq_check_throttled(q, now); fq_check_throttled(q, now);
...@@ -532,14 +546,13 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch) ...@@ -532,14 +546,13 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch)
fq_flow_set_throttled(q, f); fq_flow_set_throttled(q, f);
goto begin; goto begin;
} }
prefetch(&skb->end);
if ((s64)(now - time_next_packet - q->ce_threshold) > 0) { if ((s64)(now - time_next_packet - q->ce_threshold) > 0) {
INET_ECN_set_ce(skb); INET_ECN_set_ce(skb);
q->stat_ce_mark++; q->stat_ce_mark++;
} }
} fq_dequeue_skb(sch, f, skb);
} else {
skb = fq_dequeue_head(sch, f);
if (!skb) {
head->first = f->next; head->first = f->next;
/* force a pass through old_flows to prevent starvation */ /* force a pass through old_flows to prevent starvation */
if ((head == &q->new_flows) && q->old_flows.first) { if ((head == &q->new_flows) && q->old_flows.first) {
...@@ -550,7 +563,6 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch) ...@@ -550,7 +563,6 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch)
} }
goto begin; goto begin;
} }
prefetch(&skb->end);
plen = qdisc_pkt_len(skb); plen = qdisc_pkt_len(skb);
f->credit -= plen; f->credit -= plen;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment