Commit 07d78363 authored by David Miller's avatar David Miller Committed by David S. Miller

net: Convert NAPI gro list into a small hash table.

Improve the performance of GRO receive by splitting flows into
multiple hash chains.
Suggested-by: default avatarEric Dumazet <edumazet@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent d4546c25
...@@ -305,6 +305,7 @@ int __init netdev_boot_setup(char *str); ...@@ -305,6 +305,7 @@ int __init netdev_boot_setup(char *str);
/* /*
* Structure for NAPI scheduling similar to tasklet but with weighting * Structure for NAPI scheduling similar to tasklet but with weighting
*/ */
#define GRO_HASH_BUCKETS 8
struct napi_struct { struct napi_struct {
/* The poll_list must only be managed by the entity which /* The poll_list must only be managed by the entity which
* changes the state of the NAPI_STATE_SCHED bit. This means * changes the state of the NAPI_STATE_SCHED bit. This means
...@@ -322,7 +323,7 @@ struct napi_struct { ...@@ -322,7 +323,7 @@ struct napi_struct {
int poll_owner; int poll_owner;
#endif #endif
struct net_device *dev; struct net_device *dev;
struct list_head gro_list; struct list_head gro_hash[GRO_HASH_BUCKETS];
struct sk_buff *skb; struct sk_buff *skb;
struct hrtimer timer; struct hrtimer timer;
struct list_head dev_list; struct list_head dev_list;
......
...@@ -4875,15 +4875,12 @@ static int napi_gro_complete(struct sk_buff *skb) ...@@ -4875,15 +4875,12 @@ static int napi_gro_complete(struct sk_buff *skb)
return netif_receive_skb_internal(skb); return netif_receive_skb_internal(skb);
} }
/* napi->gro_list contains packets ordered by age. static void __napi_gro_flush_chain(struct napi_struct *napi, struct list_head *head,
* youngest packets at the head of it. bool flush_old)
* Complete skbs in reverse order to reduce latencies.
*/
void napi_gro_flush(struct napi_struct *napi, bool flush_old)
{ {
struct sk_buff *skb, *p; struct sk_buff *skb, *p;
list_for_each_entry_safe_reverse(skb, p, &napi->gro_list, list) { list_for_each_entry_safe_reverse(skb, p, head, list) {
if (flush_old && NAPI_GRO_CB(skb)->age == jiffies) if (flush_old && NAPI_GRO_CB(skb)->age == jiffies)
return; return;
list_del_init(&skb->list); list_del_init(&skb->list);
...@@ -4891,15 +4888,33 @@ void napi_gro_flush(struct napi_struct *napi, bool flush_old) ...@@ -4891,15 +4888,33 @@ void napi_gro_flush(struct napi_struct *napi, bool flush_old)
napi->gro_count--; napi->gro_count--;
} }
} }
/* napi->gro_hash contains packets ordered by age.
* youngest packets at the head of it.
* Complete skbs in reverse order to reduce latencies.
*/
void napi_gro_flush(struct napi_struct *napi, bool flush_old)
{
int i;
for (i = 0; i < GRO_HASH_BUCKETS; i++) {
struct list_head *head = &napi->gro_hash[i];
__napi_gro_flush_chain(napi, head, flush_old);
}
}
EXPORT_SYMBOL(napi_gro_flush); EXPORT_SYMBOL(napi_gro_flush);
static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb) static struct list_head *gro_list_prepare(struct napi_struct *napi,
struct sk_buff *skb)
{ {
unsigned int maclen = skb->dev->hard_header_len; unsigned int maclen = skb->dev->hard_header_len;
u32 hash = skb_get_hash_raw(skb); u32 hash = skb_get_hash_raw(skb);
struct list_head *head;
struct sk_buff *p; struct sk_buff *p;
list_for_each_entry(p, &napi->gro_list, list) { head = &napi->gro_hash[hash & (GRO_HASH_BUCKETS - 1)];
list_for_each_entry(p, head, list) {
unsigned long diffs; unsigned long diffs;
NAPI_GRO_CB(p)->flush = 0; NAPI_GRO_CB(p)->flush = 0;
...@@ -4922,6 +4937,8 @@ static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb) ...@@ -4922,6 +4937,8 @@ static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)
maclen); maclen);
NAPI_GRO_CB(p)->same_flow = !diffs; NAPI_GRO_CB(p)->same_flow = !diffs;
} }
return head;
} }
static void skb_gro_reset_offset(struct sk_buff *skb) static void skb_gro_reset_offset(struct sk_buff *skb)
...@@ -4964,11 +4981,45 @@ static void gro_pull_from_frag0(struct sk_buff *skb, int grow) ...@@ -4964,11 +4981,45 @@ static void gro_pull_from_frag0(struct sk_buff *skb, int grow)
} }
} }
static void gro_flush_oldest(struct napi_struct *napi)
{
struct sk_buff *oldest = NULL;
unsigned long age = jiffies;
int i;
for (i = 0; i < GRO_HASH_BUCKETS; i++) {
struct list_head *head = &napi->gro_hash[i];
struct sk_buff *skb;
if (list_empty(head))
continue;
skb = list_last_entry(head, struct sk_buff, list);
if (!oldest || time_before(NAPI_GRO_CB(skb)->age, age)) {
oldest = skb;
age = NAPI_GRO_CB(skb)->age;
}
}
/* We are called with napi->gro_count >= MAX_GRO_SKBS, so this is
* impossible.
*/
if (WARN_ON_ONCE(!oldest))
return;
/* Do not adjust napi->gro_count, caller is adding a new SKB to
* the chain.
*/
list_del(&oldest->list);
napi_gro_complete(oldest);
}
static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
{ {
struct list_head *head = &offload_base; struct list_head *head = &offload_base;
struct packet_offload *ptype; struct packet_offload *ptype;
__be16 type = skb->protocol; __be16 type = skb->protocol;
struct list_head *gro_head;
struct sk_buff *pp = NULL; struct sk_buff *pp = NULL;
enum gro_result ret; enum gro_result ret;
int same_flow; int same_flow;
...@@ -4977,7 +5028,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff ...@@ -4977,7 +5028,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
if (netif_elide_gro(skb->dev)) if (netif_elide_gro(skb->dev))
goto normal; goto normal;
gro_list_prepare(napi, skb); gro_head = gro_list_prepare(napi, skb);
rcu_read_lock(); rcu_read_lock();
list_for_each_entry_rcu(ptype, head, list) { list_for_each_entry_rcu(ptype, head, list) {
...@@ -5011,7 +5062,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff ...@@ -5011,7 +5062,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
NAPI_GRO_CB(skb)->csum_valid = 0; NAPI_GRO_CB(skb)->csum_valid = 0;
} }
pp = ptype->callbacks.gro_receive(&napi->gro_list, skb); pp = ptype->callbacks.gro_receive(gro_head, skb);
break; break;
} }
rcu_read_unlock(); rcu_read_unlock();
...@@ -5040,11 +5091,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff ...@@ -5040,11 +5091,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
goto normal; goto normal;
if (unlikely(napi->gro_count >= MAX_GRO_SKBS)) { if (unlikely(napi->gro_count >= MAX_GRO_SKBS)) {
struct sk_buff *nskb; gro_flush_oldest(napi);
nskb = list_last_entry(&napi->gro_list, struct sk_buff, list);
list_del(&nskb->list);
napi_gro_complete(nskb);
} else { } else {
napi->gro_count++; napi->gro_count++;
} }
...@@ -5052,7 +5099,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff ...@@ -5052,7 +5099,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
NAPI_GRO_CB(skb)->age = jiffies; NAPI_GRO_CB(skb)->age = jiffies;
NAPI_GRO_CB(skb)->last = skb; NAPI_GRO_CB(skb)->last = skb;
skb_shinfo(skb)->gso_size = skb_gro_len(skb); skb_shinfo(skb)->gso_size = skb_gro_len(skb);
list_add(&skb->list, &napi->gro_list); list_add(&skb->list, gro_head);
ret = GRO_HELD; ret = GRO_HELD;
pull: pull:
...@@ -5458,7 +5505,7 @@ bool napi_complete_done(struct napi_struct *n, int work_done) ...@@ -5458,7 +5505,7 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
NAPIF_STATE_IN_BUSY_POLL))) NAPIF_STATE_IN_BUSY_POLL)))
return false; return false;
if (!list_empty(&n->gro_list)) { if (n->gro_count) {
unsigned long timeout = 0; unsigned long timeout = 0;
if (work_done) if (work_done)
...@@ -5667,7 +5714,7 @@ static enum hrtimer_restart napi_watchdog(struct hrtimer *timer) ...@@ -5667,7 +5714,7 @@ static enum hrtimer_restart napi_watchdog(struct hrtimer *timer)
/* Note : we use a relaxed variant of napi_schedule_prep() not setting /* Note : we use a relaxed variant of napi_schedule_prep() not setting
* NAPI_STATE_MISSED, since we do not react to a device IRQ. * NAPI_STATE_MISSED, since we do not react to a device IRQ.
*/ */
if (!list_empty(&napi->gro_list) && !napi_disable_pending(napi) && if (napi->gro_count && !napi_disable_pending(napi) &&
!test_and_set_bit(NAPI_STATE_SCHED, &napi->state)) !test_and_set_bit(NAPI_STATE_SCHED, &napi->state))
__napi_schedule_irqoff(napi); __napi_schedule_irqoff(napi);
...@@ -5677,11 +5724,14 @@ static enum hrtimer_restart napi_watchdog(struct hrtimer *timer) ...@@ -5677,11 +5724,14 @@ static enum hrtimer_restart napi_watchdog(struct hrtimer *timer)
void netif_napi_add(struct net_device *dev, struct napi_struct *napi, void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
int (*poll)(struct napi_struct *, int), int weight) int (*poll)(struct napi_struct *, int), int weight)
{ {
int i;
INIT_LIST_HEAD(&napi->poll_list); INIT_LIST_HEAD(&napi->poll_list);
hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED); hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
napi->timer.function = napi_watchdog; napi->timer.function = napi_watchdog;
napi->gro_count = 0; napi->gro_count = 0;
INIT_LIST_HEAD(&napi->gro_list); for (i = 0; i < GRO_HASH_BUCKETS; i++)
INIT_LIST_HEAD(&napi->gro_hash[i]);
napi->skb = NULL; napi->skb = NULL;
napi->poll = poll; napi->poll = poll;
if (weight > NAPI_POLL_WEIGHT) if (weight > NAPI_POLL_WEIGHT)
...@@ -5714,12 +5764,16 @@ void napi_disable(struct napi_struct *n) ...@@ -5714,12 +5764,16 @@ void napi_disable(struct napi_struct *n)
} }
EXPORT_SYMBOL(napi_disable); EXPORT_SYMBOL(napi_disable);
static void gro_list_free(struct list_head *head) static void flush_gro_hash(struct napi_struct *napi)
{ {
struct sk_buff *skb, *p; int i;
list_for_each_entry_safe(skb, p, head, list) for (i = 0; i < GRO_HASH_BUCKETS; i++) {
kfree_skb(skb); struct sk_buff *skb, *n;
list_for_each_entry_safe(skb, n, &napi->gro_hash[i], list)
kfree_skb(skb);
}
} }
/* Must be called in process context */ /* Must be called in process context */
...@@ -5731,8 +5785,7 @@ void netif_napi_del(struct napi_struct *napi) ...@@ -5731,8 +5785,7 @@ void netif_napi_del(struct napi_struct *napi)
list_del_init(&napi->dev_list); list_del_init(&napi->dev_list);
napi_free_frags(napi); napi_free_frags(napi);
gro_list_free(&napi->gro_list); flush_gro_hash(napi);
INIT_LIST_HEAD(&napi->gro_list);
napi->gro_count = 0; napi->gro_count = 0;
} }
EXPORT_SYMBOL(netif_napi_del); EXPORT_SYMBOL(netif_napi_del);
...@@ -5775,7 +5828,7 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll) ...@@ -5775,7 +5828,7 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
goto out_unlock; goto out_unlock;
} }
if (!list_empty(&n->gro_list)) { if (n->gro_count) {
/* flush too old packets /* flush too old packets
* If HZ < 1000, flush all packets. * If HZ < 1000, flush all packets.
*/ */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment