Commit 323ebb61 authored by Edward Cree's avatar Edward Cree Committed by David S. Miller

net: use listified RX for handling GRO_NORMAL skbs

When GRO decides not to coalesce a packet, in napi_frags_finish(), instead
 of passing it to the stack immediately, place it on a list in the napi
 struct.  Then, at flush time (napi_complete_done(), napi_poll(), or
 napi_busy_loop()), call netif_receive_skb_list_internal() on the list.
We'd like to do that in napi_gro_flush(), but it's not called if
 !napi->gro_bitmask, so we have to do it in the callers instead.  (There are
 a handful of drivers that call napi_gro_flush() themselves, but it's not
 clear why, or whether this will affect them.)
Because a full 64 packets is an inefficiently large batch, also consume the
 list whenever it exceeds gro_normal_batch, a new net/core sysctl that
 defaults to 8.
Signed-off-by: default avatarEdward Cree <ecree@solarflare.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 67270136
...@@ -332,6 +332,8 @@ struct napi_struct { ...@@ -332,6 +332,8 @@ struct napi_struct {
struct net_device *dev; struct net_device *dev;
struct gro_list gro_hash[GRO_HASH_BUCKETS]; struct gro_list gro_hash[GRO_HASH_BUCKETS];
struct sk_buff *skb; struct sk_buff *skb;
struct list_head rx_list; /* Pending GRO_NORMAL skbs */
int rx_count; /* length of rx_list */
struct hrtimer timer; struct hrtimer timer;
struct list_head dev_list; struct list_head dev_list;
struct hlist_node napi_hash_node; struct hlist_node napi_hash_node;
...@@ -4239,6 +4241,7 @@ extern int dev_weight_rx_bias; ...@@ -4239,6 +4241,7 @@ extern int dev_weight_rx_bias;
extern int dev_weight_tx_bias; extern int dev_weight_tx_bias;
extern int dev_rx_weight; extern int dev_rx_weight;
extern int dev_tx_weight; extern int dev_tx_weight;
extern int gro_normal_batch;
bool netdev_has_upper_dev(struct net_device *dev, struct net_device *upper_dev); bool netdev_has_upper_dev(struct net_device *dev, struct net_device *upper_dev);
struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev, struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
......
...@@ -3963,6 +3963,8 @@ int dev_weight_rx_bias __read_mostly = 1; /* bias for backlog weight */ ...@@ -3963,6 +3963,8 @@ int dev_weight_rx_bias __read_mostly = 1; /* bias for backlog weight */
int dev_weight_tx_bias __read_mostly = 1; /* bias for output_queue quota */ int dev_weight_tx_bias __read_mostly = 1; /* bias for output_queue quota */
int dev_rx_weight __read_mostly = 64; int dev_rx_weight __read_mostly = 64;
int dev_tx_weight __read_mostly = 64; int dev_tx_weight __read_mostly = 64;
/* Maximum number of GRO_NORMAL skbs to batch up for list-RX */
int gro_normal_batch __read_mostly = 8;
/* Called with irq disabled */ /* Called with irq disabled */
static inline void ____napi_schedule(struct softnet_data *sd, static inline void ____napi_schedule(struct softnet_data *sd,
...@@ -5747,6 +5749,26 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi) ...@@ -5747,6 +5749,26 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi)
} }
EXPORT_SYMBOL(napi_get_frags); EXPORT_SYMBOL(napi_get_frags);
/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */
static void gro_normal_list(struct napi_struct *napi)
{
if (!napi->rx_count)
return;
netif_receive_skb_list_internal(&napi->rx_list);
INIT_LIST_HEAD(&napi->rx_list);
napi->rx_count = 0;
}
/* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded,
* pass the whole batch up to the stack.
*/
static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb)
{
list_add_tail(&skb->list, &napi->rx_list);
if (++napi->rx_count >= gro_normal_batch)
gro_normal_list(napi);
}
static gro_result_t napi_frags_finish(struct napi_struct *napi, static gro_result_t napi_frags_finish(struct napi_struct *napi,
struct sk_buff *skb, struct sk_buff *skb,
gro_result_t ret) gro_result_t ret)
...@@ -5756,8 +5778,8 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, ...@@ -5756,8 +5778,8 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi,
case GRO_HELD: case GRO_HELD:
__skb_push(skb, ETH_HLEN); __skb_push(skb, ETH_HLEN);
skb->protocol = eth_type_trans(skb, skb->dev); skb->protocol = eth_type_trans(skb, skb->dev);
if (ret == GRO_NORMAL && netif_receive_skb_internal(skb)) if (ret == GRO_NORMAL)
ret = GRO_DROP; gro_normal_one(napi, skb);
break; break;
case GRO_DROP: case GRO_DROP:
...@@ -6034,6 +6056,8 @@ bool napi_complete_done(struct napi_struct *n, int work_done) ...@@ -6034,6 +6056,8 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
NAPIF_STATE_IN_BUSY_POLL))) NAPIF_STATE_IN_BUSY_POLL)))
return false; return false;
gro_normal_list(n);
if (n->gro_bitmask) { if (n->gro_bitmask) {
unsigned long timeout = 0; unsigned long timeout = 0;
...@@ -6119,10 +6143,19 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock) ...@@ -6119,10 +6143,19 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock)
* Ideally, a new ndo_busy_poll_stop() could avoid another round. * Ideally, a new ndo_busy_poll_stop() could avoid another round.
*/ */
rc = napi->poll(napi, BUSY_POLL_BUDGET); rc = napi->poll(napi, BUSY_POLL_BUDGET);
/* We can't gro_normal_list() here, because napi->poll() might have
* rearmed the napi (napi_complete_done()) in which case it could
* already be running on another CPU.
*/
trace_napi_poll(napi, rc, BUSY_POLL_BUDGET); trace_napi_poll(napi, rc, BUSY_POLL_BUDGET);
netpoll_poll_unlock(have_poll_lock); netpoll_poll_unlock(have_poll_lock);
if (rc == BUSY_POLL_BUDGET) if (rc == BUSY_POLL_BUDGET) {
/* As the whole budget was spent, we still own the napi so can
* safely handle the rx_list.
*/
gro_normal_list(napi);
__napi_schedule(napi); __napi_schedule(napi);
}
local_bh_enable(); local_bh_enable();
} }
...@@ -6167,6 +6200,7 @@ void napi_busy_loop(unsigned int napi_id, ...@@ -6167,6 +6200,7 @@ void napi_busy_loop(unsigned int napi_id,
} }
work = napi_poll(napi, BUSY_POLL_BUDGET); work = napi_poll(napi, BUSY_POLL_BUDGET);
trace_napi_poll(napi, work, BUSY_POLL_BUDGET); trace_napi_poll(napi, work, BUSY_POLL_BUDGET);
gro_normal_list(napi);
count: count:
if (work > 0) if (work > 0)
__NET_ADD_STATS(dev_net(napi->dev), __NET_ADD_STATS(dev_net(napi->dev),
...@@ -6272,6 +6306,8 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi, ...@@ -6272,6 +6306,8 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
napi->timer.function = napi_watchdog; napi->timer.function = napi_watchdog;
init_gro_hash(napi); init_gro_hash(napi);
napi->skb = NULL; napi->skb = NULL;
INIT_LIST_HEAD(&napi->rx_list);
napi->rx_count = 0;
napi->poll = poll; napi->poll = poll;
if (weight > NAPI_POLL_WEIGHT) if (weight > NAPI_POLL_WEIGHT)
netdev_err_once(dev, "%s() called with weight %d\n", __func__, netdev_err_once(dev, "%s() called with weight %d\n", __func__,
...@@ -6368,6 +6404,8 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll) ...@@ -6368,6 +6404,8 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
goto out_unlock; goto out_unlock;
} }
gro_normal_list(n);
if (n->gro_bitmask) { if (n->gro_bitmask) {
/* flush too old packets /* flush too old packets
* If HZ < 1000, flush all packets. * If HZ < 1000, flush all packets.
......
...@@ -567,6 +567,14 @@ static struct ctl_table net_core_table[] = { ...@@ -567,6 +567,14 @@ static struct ctl_table net_core_table[] = {
.mode = 0644, .mode = 0644,
.proc_handler = proc_do_static_key, .proc_handler = proc_do_static_key,
}, },
{
.procname = "gro_normal_batch",
.data = &gro_normal_batch,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ONE,
},
{ } { }
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment