Commit eda83e3b authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller

net_sched: sch_sfq: better struct layouts

Here is a respin of patch.

I'll send a short patch to make SFQ more fair in presence of large
packets as well.

Thanks

[PATCH v3 net-next-2.6] net_sched: sch_sfq: better struct layouts

This patch shrinks sizeof(struct sfq_sched_data)
from 0x14f8 (or more if spinlocks are bigger) to 0x1180 bytes, and
reduce text size as well.

   text    data     bss     dec     hex filename
   4821     152       0    4973    136d old/net/sched/sch_sfq.o
   4627     136       0    4763    129b new/net/sched/sch_sfq.o

All data for a slot/flow is now grouped in a compact and cache friendly
structure, instead of being spreaded in many different points.

struct sfq_slot {
        struct sk_buff  *skblist_next;
        struct sk_buff  *skblist_prev;
        sfq_index       qlen; /* number of skbs in skblist */
        sfq_index       next; /* next slot in sfq chain */
        struct sfq_head dep; /* anchor in dep[] chains */
        unsigned short  hash; /* hash value (index in ht[]) */
        short           allot; /* credit for this slot */
};
Signed-off-by: default avatarEric Dumazet <eric.dumazet@gmail.com>
Cc: Jarek Poplawski <jarkao2@gmail.com>
Cc: Patrick McHardy <kaber@trash.net>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent d9993be6
...@@ -67,27 +67,42 @@ ...@@ -67,27 +67,42 @@
IMPLEMENTATION: IMPLEMENTATION:
This implementation limits maximal queue length to 128; This implementation limits maximal queue length to 128;
maximal mtu to 2^15-1; number of hash buckets to 1024. maximal mtu to 2^15-1; max 128 flows, number of hash buckets to 1024.
The only goal of this restrictions was that all data The only goal of this restrictions was that all data
fit into one 4K page :-). Struct sfq_sched_data is fit into one 4K page on 32bit arches.
organized in anti-cache manner: all the data for a bucket
are scattered over different locations. This is not good,
but it allowed me to put it into 4K.
It is easy to increase these values, but not in flight. */ It is easy to increase these values, but not in flight. */
#define SFQ_DEPTH 128 #define SFQ_DEPTH 128 /* max number of packets per flow */
#define SFQ_SLOTS 128 /* max number of flows */
#define SFQ_EMPTY_SLOT 255
#define SFQ_HASH_DIVISOR 1024 #define SFQ_HASH_DIVISOR 1024
/* This type should contain at least SFQ_DEPTH*2 values */ /* This type should contain at least SFQ_DEPTH + SFQ_SLOTS values */
typedef unsigned char sfq_index; typedef unsigned char sfq_index;
/*
* We dont use pointers to save space.
* Small indexes [0 ... SFQ_SLOTS - 1] are 'pointers' to slots[] array
* while following values [SFQ_SLOTS ... SFQ_SLOTS + SFQ_DEPTH - 1]
* are 'pointers' to dep[] array
*/
struct sfq_head struct sfq_head
{ {
sfq_index next; sfq_index next;
sfq_index prev; sfq_index prev;
}; };
struct sfq_slot {
struct sk_buff *skblist_next;
struct sk_buff *skblist_prev;
sfq_index qlen; /* number of skbs in skblist */
sfq_index next; /* next slot in sfq chain */
struct sfq_head dep; /* anchor in dep[] chains */
unsigned short hash; /* hash value (index in ht[]) */
short allot; /* credit for this slot */
};
struct sfq_sched_data struct sfq_sched_data
{ {
/* Parameters */ /* Parameters */
...@@ -99,17 +114,24 @@ struct sfq_sched_data ...@@ -99,17 +114,24 @@ struct sfq_sched_data
struct tcf_proto *filter_list; struct tcf_proto *filter_list;
struct timer_list perturb_timer; struct timer_list perturb_timer;
u32 perturbation; u32 perturbation;
sfq_index tail; /* Index of current slot in round */ sfq_index cur_depth; /* depth of longest slot */
sfq_index max_depth; /* Maximal depth */
struct sfq_slot *tail; /* current slot in round */
sfq_index ht[SFQ_HASH_DIVISOR]; /* Hash table */ sfq_index ht[SFQ_HASH_DIVISOR]; /* Hash table */
sfq_index next[SFQ_DEPTH]; /* Active slots link */ struct sfq_slot slots[SFQ_SLOTS];
short allot[SFQ_DEPTH]; /* Current allotment per slot */ struct sfq_head dep[SFQ_DEPTH]; /* Linked list of slots, indexed by depth */
unsigned short hash[SFQ_DEPTH]; /* Hash value indexed by slots */
struct sk_buff_head qs[SFQ_DEPTH]; /* Slot queue */
struct sfq_head dep[SFQ_DEPTH*2]; /* Linked list of slots, indexed by depth */
}; };
/*
* sfq_head are either in a sfq_slot or in dep[] array
*/
static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index val)
{
if (val < SFQ_SLOTS)
return &q->slots[val].dep;
return &q->dep[val - SFQ_SLOTS];
}
static __inline__ unsigned sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1) static __inline__ unsigned sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1)
{ {
return jhash_2words(h, h1, q->perturbation) & (SFQ_HASH_DIVISOR - 1); return jhash_2words(h, h1, q->perturbation) & (SFQ_HASH_DIVISOR - 1);
...@@ -200,30 +222,41 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch, ...@@ -200,30 +222,41 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
return 0; return 0;
} }
/*
* x : slot number [0 .. SFQ_SLOTS - 1]
*/
static inline void sfq_link(struct sfq_sched_data *q, sfq_index x) static inline void sfq_link(struct sfq_sched_data *q, sfq_index x)
{ {
sfq_index p, n; sfq_index p, n;
int d = q->qs[x].qlen + SFQ_DEPTH; int qlen = q->slots[x].qlen;
p = qlen + SFQ_SLOTS;
n = q->dep[qlen].next;
p = d; q->slots[x].dep.next = n;
n = q->dep[d].next; q->slots[x].dep.prev = p;
q->dep[x].next = n;
q->dep[x].prev = p; q->dep[qlen].next = x; /* sfq_dep_head(q, p)->next = x */
q->dep[p].next = q->dep[n].prev = x; sfq_dep_head(q, n)->prev = x;
} }
#define sfq_unlink(q, x, n, p) \
n = q->slots[x].dep.next; \
p = q->slots[x].dep.prev; \
sfq_dep_head(q, p)->next = n; \
sfq_dep_head(q, n)->prev = p
static inline void sfq_dec(struct sfq_sched_data *q, sfq_index x) static inline void sfq_dec(struct sfq_sched_data *q, sfq_index x)
{ {
sfq_index p, n; sfq_index p, n;
int d;
n = q->dep[x].next; sfq_unlink(q, x, n, p);
p = q->dep[x].prev;
q->dep[p].next = n;
q->dep[n].prev = p;
if (n == p && q->max_depth == q->qs[x].qlen + 1)
q->max_depth--;
d = q->slots[x].qlen--;
if (n == p && q->cur_depth == d)
q->cur_depth--;
sfq_link(q, x); sfq_link(q, x);
} }
...@@ -232,34 +265,72 @@ static inline void sfq_inc(struct sfq_sched_data *q, sfq_index x) ...@@ -232,34 +265,72 @@ static inline void sfq_inc(struct sfq_sched_data *q, sfq_index x)
sfq_index p, n; sfq_index p, n;
int d; int d;
n = q->dep[x].next; sfq_unlink(q, x, n, p);
p = q->dep[x].prev;
q->dep[p].next = n;
q->dep[n].prev = p;
d = q->qs[x].qlen;
if (q->max_depth < d)
q->max_depth = d;
d = ++q->slots[x].qlen;
if (q->cur_depth < d)
q->cur_depth = d;
sfq_link(q, x); sfq_link(q, x);
} }
/* helper functions : might be changed when/if skb use a standard list_head */
/* remove one skb from tail of slot queue */
static inline struct sk_buff *slot_dequeue_tail(struct sfq_slot *slot)
{
struct sk_buff *skb = slot->skblist_prev;
slot->skblist_prev = skb->prev;
skb->next = skb->prev = NULL;
return skb;
}
/* remove one skb from head of slot queue */
static inline struct sk_buff *slot_dequeue_head(struct sfq_slot *slot)
{
struct sk_buff *skb = slot->skblist_next;
slot->skblist_next = skb->next;
skb->next = skb->prev = NULL;
return skb;
}
static inline void slot_queue_init(struct sfq_slot *slot)
{
slot->skblist_prev = slot->skblist_next = (struct sk_buff *)slot;
}
/* add skb to slot queue (tail add) */
static inline void slot_queue_add(struct sfq_slot *slot, struct sk_buff *skb)
{
skb->prev = slot->skblist_prev;
skb->next = (struct sk_buff *)slot;
slot->skblist_prev->next = skb;
slot->skblist_prev = skb;
}
#define slot_queue_walk(slot, skb) \
for (skb = slot->skblist_next; \
skb != (struct sk_buff *)slot; \
skb = skb->next)
static unsigned int sfq_drop(struct Qdisc *sch) static unsigned int sfq_drop(struct Qdisc *sch)
{ {
struct sfq_sched_data *q = qdisc_priv(sch); struct sfq_sched_data *q = qdisc_priv(sch);
sfq_index d = q->max_depth; sfq_index x, d = q->cur_depth;
struct sk_buff *skb; struct sk_buff *skb;
unsigned int len; unsigned int len;
struct sfq_slot *slot;
/* Queue is full! Find the longest slot and /* Queue is full! Find the longest slot and drop tail packet from it */
drop a packet from it */
if (d > 1) { if (d > 1) {
sfq_index x = q->dep[d + SFQ_DEPTH].next; x = q->dep[d].next;
skb = q->qs[x].prev; slot = &q->slots[x];
drop:
skb = slot_dequeue_tail(slot);
len = qdisc_pkt_len(skb); len = qdisc_pkt_len(skb);
__skb_unlink(skb, &q->qs[x]);
kfree_skb(skb);
sfq_dec(q, x); sfq_dec(q, x);
kfree_skb(skb);
sch->q.qlen--; sch->q.qlen--;
sch->qstats.drops++; sch->qstats.drops++;
sch->qstats.backlog -= len; sch->qstats.backlog -= len;
...@@ -268,18 +339,11 @@ static unsigned int sfq_drop(struct Qdisc *sch) ...@@ -268,18 +339,11 @@ static unsigned int sfq_drop(struct Qdisc *sch)
if (d == 1) { if (d == 1) {
/* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */ /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */
d = q->next[q->tail]; x = q->tail->next;
q->next[q->tail] = q->next[d]; slot = &q->slots[x];
skb = q->qs[d].prev; q->tail->next = slot->next;
len = qdisc_pkt_len(skb); q->ht[slot->hash] = SFQ_EMPTY_SLOT;
__skb_unlink(skb, &q->qs[d]); goto drop;
kfree_skb(skb);
sfq_dec(q, d);
sch->q.qlen--;
q->ht[q->hash[d]] = SFQ_DEPTH;
sch->qstats.drops++;
sch->qstats.backlog -= len;
return len;
} }
return 0; return 0;
...@@ -291,6 +355,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) ...@@ -291,6 +355,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
struct sfq_sched_data *q = qdisc_priv(sch); struct sfq_sched_data *q = qdisc_priv(sch);
unsigned int hash; unsigned int hash;
sfq_index x; sfq_index x;
struct sfq_slot *slot;
int uninitialized_var(ret); int uninitialized_var(ret);
hash = sfq_classify(skb, sch, &ret); hash = sfq_classify(skb, sch, &ret);
...@@ -303,30 +368,33 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) ...@@ -303,30 +368,33 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
hash--; hash--;
x = q->ht[hash]; x = q->ht[hash];
if (x == SFQ_DEPTH) { slot = &q->slots[x];
q->ht[hash] = x = q->dep[SFQ_DEPTH].next; if (x == SFQ_EMPTY_SLOT) {
q->hash[x] = hash; x = q->dep[0].next; /* get a free slot */
q->ht[hash] = x;
slot = &q->slots[x];
slot->hash = hash;
slot_queue_init(slot);
} }
/* If selected queue has length q->limit, this means that /* If selected queue has length q->limit, do simple tail drop,
* all another queues are empty and that we do simple tail drop,
* i.e. drop _this_ packet. * i.e. drop _this_ packet.
*/ */
if (q->qs[x].qlen >= q->limit) if (slot->qlen >= q->limit)
return qdisc_drop(skb, sch); return qdisc_drop(skb, sch);
sch->qstats.backlog += qdisc_pkt_len(skb); sch->qstats.backlog += qdisc_pkt_len(skb);
__skb_queue_tail(&q->qs[x], skb); slot_queue_add(slot, skb);
sfq_inc(q, x); sfq_inc(q, x);
if (q->qs[x].qlen == 1) { /* The flow is new */ if (slot->qlen == 1) { /* The flow is new */
if (q->tail == SFQ_DEPTH) { /* It is the first flow */ if (q->tail == NULL) { /* It is the first flow */
q->next[x] = x; slot->next = x;
} else { } else {
q->next[x] = q->next[q->tail]; slot->next = q->tail->next;
q->next[q->tail] = x; q->tail->next = x;
} }
q->tail = x; q->tail = slot;
q->allot[x] = q->quantum; slot->allot = q->quantum;
} }
if (++sch->q.qlen <= q->limit) { if (++sch->q.qlen <= q->limit) {
sch->bstats.bytes += qdisc_pkt_len(skb); sch->bstats.bytes += qdisc_pkt_len(skb);
...@@ -342,14 +410,12 @@ static struct sk_buff * ...@@ -342,14 +410,12 @@ static struct sk_buff *
sfq_peek(struct Qdisc *sch) sfq_peek(struct Qdisc *sch)
{ {
struct sfq_sched_data *q = qdisc_priv(sch); struct sfq_sched_data *q = qdisc_priv(sch);
sfq_index a;
/* No active slots */ /* No active slots */
if (q->tail == SFQ_DEPTH) if (q->tail == NULL)
return NULL; return NULL;
a = q->next[q->tail]; return q->slots[q->tail->next].skblist_next;
return skb_peek(&q->qs[a]);
} }
static struct sk_buff * static struct sk_buff *
...@@ -358,31 +424,31 @@ sfq_dequeue(struct Qdisc *sch) ...@@ -358,31 +424,31 @@ sfq_dequeue(struct Qdisc *sch)
struct sfq_sched_data *q = qdisc_priv(sch); struct sfq_sched_data *q = qdisc_priv(sch);
struct sk_buff *skb; struct sk_buff *skb;
sfq_index a, next_a; sfq_index a, next_a;
struct sfq_slot *slot;
/* No active slots */ /* No active slots */
if (q->tail == SFQ_DEPTH) if (q->tail == NULL)
return NULL; return NULL;
a = q->next[q->tail]; a = q->tail->next;
slot = &q->slots[a];
/* Grab packet */ skb = slot_dequeue_head(slot);
skb = __skb_dequeue(&q->qs[a]);
sfq_dec(q, a); sfq_dec(q, a);
sch->q.qlen--; sch->q.qlen--;
sch->qstats.backlog -= qdisc_pkt_len(skb); sch->qstats.backlog -= qdisc_pkt_len(skb);
/* Is the slot empty? */ /* Is the slot empty? */
if (q->qs[a].qlen == 0) { if (slot->qlen == 0) {
q->ht[q->hash[a]] = SFQ_DEPTH; q->ht[slot->hash] = SFQ_EMPTY_SLOT;
next_a = q->next[a]; next_a = slot->next;
if (a == next_a) { if (a == next_a) {
q->tail = SFQ_DEPTH; q->tail = NULL; /* no more active slots */
return skb; return skb;
} }
q->next[q->tail] = next_a; q->tail->next = next_a;
} else if ((q->allot[a] -= qdisc_pkt_len(skb)) <= 0) { } else if ((slot->allot -= qdisc_pkt_len(skb)) <= 0) {
q->allot[a] += q->quantum; q->tail = slot;
q->tail = a; slot->allot += q->quantum;
} }
return skb; return skb;
} }
...@@ -446,17 +512,16 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt) ...@@ -446,17 +512,16 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
init_timer_deferrable(&q->perturb_timer); init_timer_deferrable(&q->perturb_timer);
for (i = 0; i < SFQ_HASH_DIVISOR; i++) for (i = 0; i < SFQ_HASH_DIVISOR; i++)
q->ht[i] = SFQ_DEPTH; q->ht[i] = SFQ_EMPTY_SLOT;
for (i = 0; i < SFQ_DEPTH; i++) { for (i = 0; i < SFQ_DEPTH; i++) {
skb_queue_head_init(&q->qs[i]); q->dep[i].next = i + SFQ_SLOTS;
q->dep[i + SFQ_DEPTH].next = i + SFQ_DEPTH; q->dep[i].prev = i + SFQ_SLOTS;
q->dep[i + SFQ_DEPTH].prev = i + SFQ_DEPTH;
} }
q->limit = SFQ_DEPTH - 1; q->limit = SFQ_DEPTH - 1;
q->max_depth = 0; q->cur_depth = 0;
q->tail = SFQ_DEPTH; q->tail = NULL;
if (opt == NULL) { if (opt == NULL) {
q->quantum = psched_mtu(qdisc_dev(sch)); q->quantum = psched_mtu(qdisc_dev(sch));
q->perturb_period = 0; q->perturb_period = 0;
...@@ -467,7 +532,7 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt) ...@@ -467,7 +532,7 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
return err; return err;
} }
for (i = 0; i < SFQ_DEPTH; i++) for (i = 0; i < SFQ_SLOTS; i++)
sfq_link(q, i); sfq_link(q, i);
return 0; return 0;
} }
...@@ -543,13 +608,12 @@ static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl, ...@@ -543,13 +608,12 @@ static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
struct gnet_dump *d) struct gnet_dump *d)
{ {
struct sfq_sched_data *q = qdisc_priv(sch); struct sfq_sched_data *q = qdisc_priv(sch);
sfq_index idx = q->ht[cl-1]; const struct sfq_slot *slot = &q->slots[q->ht[cl - 1]];
struct sk_buff_head *list = &q->qs[idx]; struct gnet_stats_queue qs = { .qlen = slot->qlen };
struct gnet_stats_queue qs = { .qlen = list->qlen }; struct tc_sfq_xstats xstats = { .allot = slot->allot };
struct tc_sfq_xstats xstats = { .allot = q->allot[idx] };
struct sk_buff *skb; struct sk_buff *skb;
skb_queue_walk(list, skb) slot_queue_walk(slot, skb)
qs.backlog += qdisc_pkt_len(skb); qs.backlog += qdisc_pkt_len(skb);
if (gnet_stats_copy_queue(d, &qs) < 0) if (gnet_stats_copy_queue(d, &qs) < 0)
...@@ -566,7 +630,7 @@ static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg) ...@@ -566,7 +630,7 @@ static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
return; return;
for (i = 0; i < SFQ_HASH_DIVISOR; i++) { for (i = 0; i < SFQ_HASH_DIVISOR; i++) {
if (q->ht[i] == SFQ_DEPTH || if (q->ht[i] == SFQ_EMPTY_SLOT ||
arg->count < arg->skip) { arg->count < arg->skip) {
arg->count++; arg->count++;
continue; continue;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment