Commit cf84eb0b authored by David S. Miller's avatar David S. Miller

Merge branch 'virtio_rx_merging'

Michael Dalton says:

====================
virtio-net: mergeable rx buffer size auto-tuning

The virtio-net device currently uses aligned MTU-sized mergeable receive
packet buffers. Network throughput for workloads with large average
packet size can be improved by posting larger receive packet buffers.
However, due to SKB truesize effects, posting large (e.g, PAGE_SIZE)
buffers reduces the throughput of workloads that do not benefit from GRO
and have no large inbound packets.

This patchset introduces virtio-net mergeable buffer size auto-tuning,
with buffer sizes ranging from aligned MTU-size to PAGE_SIZE. Packet
buffer size is chosen based on a per-receive queue EWMA of incoming
packet size.

To unify mergeable receive buffer memory allocation and improve
SKB frag coalescing, all mergeable buffer memory allocation is
migrated to per-receive queue page frag allocators.

The per-receive queue mergeable packet buffer size is exported via
sysfs, and the network device sysfs layer has been extended to add
support for device-specific per-receive queue sysfs attribute groups.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 722e47d7 fbf28d78
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#include <linux/if_vlan.h> #include <linux/if_vlan.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/cpu.h> #include <linux/cpu.h>
#include <linux/average.h>
static int napi_weight = NAPI_POLL_WEIGHT; static int napi_weight = NAPI_POLL_WEIGHT;
module_param(napi_weight, int, 0444); module_param(napi_weight, int, 0444);
...@@ -36,11 +37,18 @@ module_param(gso, bool, 0444); ...@@ -36,11 +37,18 @@ module_param(gso, bool, 0444);
/* FIXME: MTU in config. */ /* FIXME: MTU in config. */
#define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN) #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN)
#define MERGE_BUFFER_LEN (ALIGN(GOOD_PACKET_LEN + \
sizeof(struct virtio_net_hdr_mrg_rxbuf), \
L1_CACHE_BYTES))
#define GOOD_COPY_LEN 128 #define GOOD_COPY_LEN 128
/* Weight used for the RX packet size EWMA. The average packet size is used to
* determine the packet buffer size when refilling RX rings. As the entire RX
* ring may be refilled at once, the weight is chosen so that the EWMA will be
* insensitive to short-term, transient changes in packet size.
*/
#define RECEIVE_AVG_WEIGHT 64
/* Minimum alignment for mergeable packet buffers. */
#define MERGEABLE_BUFFER_ALIGN max(L1_CACHE_BYTES, 256)
#define VIRTNET_DRIVER_VERSION "1.0.0" #define VIRTNET_DRIVER_VERSION "1.0.0"
struct virtnet_stats { struct virtnet_stats {
...@@ -75,6 +83,12 @@ struct receive_queue { ...@@ -75,6 +83,12 @@ struct receive_queue {
/* Chain pages by the private ptr. */ /* Chain pages by the private ptr. */
struct page *pages; struct page *pages;
/* Average packet length for mergeable receive buffers. */
struct ewma mrg_avg_pkt_len;
/* Page frag for packet buffer allocation. */
struct page_frag alloc_frag;
/* RX: fragments + linear part + virtio header */ /* RX: fragments + linear part + virtio header */
struct scatterlist sg[MAX_SKB_FRAGS + 2]; struct scatterlist sg[MAX_SKB_FRAGS + 2];
...@@ -123,11 +137,6 @@ struct virtnet_info { ...@@ -123,11 +137,6 @@ struct virtnet_info {
/* Lock for config space updates */ /* Lock for config space updates */
struct mutex config_lock; struct mutex config_lock;
/* Page_frag for GFP_KERNEL packet buffer allocation when we run
* low on memory.
*/
struct page_frag alloc_frag;
/* Does the affinity hint is set for virtqueues? */ /* Does the affinity hint is set for virtqueues? */
bool affinity_hint_set; bool affinity_hint_set;
...@@ -218,6 +227,24 @@ static void skb_xmit_done(struct virtqueue *vq) ...@@ -218,6 +227,24 @@ static void skb_xmit_done(struct virtqueue *vq)
netif_wake_subqueue(vi->dev, vq2txq(vq)); netif_wake_subqueue(vi->dev, vq2txq(vq));
} }
static unsigned int mergeable_ctx_to_buf_truesize(unsigned long mrg_ctx)
{
unsigned int truesize = mrg_ctx & (MERGEABLE_BUFFER_ALIGN - 1);
return (truesize + 1) * MERGEABLE_BUFFER_ALIGN;
}
static void *mergeable_ctx_to_buf_address(unsigned long mrg_ctx)
{
return (void *)(mrg_ctx & -MERGEABLE_BUFFER_ALIGN);
}
static unsigned long mergeable_buf_to_ctx(void *buf, unsigned int truesize)
{
unsigned int size = truesize / MERGEABLE_BUFFER_ALIGN;
return (unsigned long)buf | (size - 1);
}
/* Called from bottom half context */ /* Called from bottom half context */
static struct sk_buff *page_to_skb(struct receive_queue *rq, static struct sk_buff *page_to_skb(struct receive_queue *rq,
struct page *page, unsigned int offset, struct page *page, unsigned int offset,
...@@ -326,36 +353,33 @@ static struct sk_buff *receive_big(struct net_device *dev, ...@@ -326,36 +353,33 @@ static struct sk_buff *receive_big(struct net_device *dev,
static struct sk_buff *receive_mergeable(struct net_device *dev, static struct sk_buff *receive_mergeable(struct net_device *dev,
struct receive_queue *rq, struct receive_queue *rq,
void *buf, unsigned long ctx,
unsigned int len) unsigned int len)
{ {
void *buf = mergeable_ctx_to_buf_address(ctx);
struct skb_vnet_hdr *hdr = buf; struct skb_vnet_hdr *hdr = buf;
int num_buf = hdr->mhdr.num_buffers; int num_buf = hdr->mhdr.num_buffers;
struct page *page = virt_to_head_page(buf); struct page *page = virt_to_head_page(buf);
int offset = buf - page_address(page); int offset = buf - page_address(page);
struct sk_buff *head_skb = page_to_skb(rq, page, offset, len, unsigned int truesize = max(len, mergeable_ctx_to_buf_truesize(ctx));
MERGE_BUFFER_LEN);
struct sk_buff *head_skb = page_to_skb(rq, page, offset, len, truesize);
struct sk_buff *curr_skb = head_skb; struct sk_buff *curr_skb = head_skb;
if (unlikely(!curr_skb)) if (unlikely(!curr_skb))
goto err_skb; goto err_skb;
while (--num_buf) { while (--num_buf) {
int num_skb_frags; int num_skb_frags;
buf = virtqueue_get_buf(rq->vq, &len); ctx = (unsigned long)virtqueue_get_buf(rq->vq, &len);
if (unlikely(!buf)) { if (unlikely(!ctx)) {
pr_debug("%s: rx error: %d buffers out of %d missing\n", pr_debug("%s: rx error: %d buffers out of %d missing\n",
dev->name, num_buf, hdr->mhdr.num_buffers); dev->name, num_buf, hdr->mhdr.num_buffers);
dev->stats.rx_length_errors++; dev->stats.rx_length_errors++;
goto err_buf; goto err_buf;
} }
if (unlikely(len > MERGE_BUFFER_LEN)) {
pr_debug("%s: rx error: merge buffer too long\n",
dev->name);
len = MERGE_BUFFER_LEN;
}
buf = mergeable_ctx_to_buf_address(ctx);
page = virt_to_head_page(buf); page = virt_to_head_page(buf);
num_skb_frags = skb_shinfo(curr_skb)->nr_frags; num_skb_frags = skb_shinfo(curr_skb)->nr_frags;
...@@ -372,35 +396,37 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, ...@@ -372,35 +396,37 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
head_skb->truesize += nskb->truesize; head_skb->truesize += nskb->truesize;
num_skb_frags = 0; num_skb_frags = 0;
} }
truesize = max(len, mergeable_ctx_to_buf_truesize(ctx));
if (curr_skb != head_skb) { if (curr_skb != head_skb) {
head_skb->data_len += len; head_skb->data_len += len;
head_skb->len += len; head_skb->len += len;
head_skb->truesize += MERGE_BUFFER_LEN; head_skb->truesize += truesize;
} }
offset = buf - page_address(page); offset = buf - page_address(page);
if (skb_can_coalesce(curr_skb, num_skb_frags, page, offset)) { if (skb_can_coalesce(curr_skb, num_skb_frags, page, offset)) {
put_page(page); put_page(page);
skb_coalesce_rx_frag(curr_skb, num_skb_frags - 1, skb_coalesce_rx_frag(curr_skb, num_skb_frags - 1,
len, MERGE_BUFFER_LEN); len, truesize);
} else { } else {
skb_add_rx_frag(curr_skb, num_skb_frags, page, skb_add_rx_frag(curr_skb, num_skb_frags, page,
offset, len, MERGE_BUFFER_LEN); offset, len, truesize);
} }
} }
ewma_add(&rq->mrg_avg_pkt_len, head_skb->len);
return head_skb; return head_skb;
err_skb: err_skb:
put_page(page); put_page(page);
while (--num_buf) { while (--num_buf) {
buf = virtqueue_get_buf(rq->vq, &len); ctx = (unsigned long)virtqueue_get_buf(rq->vq, &len);
if (unlikely(!buf)) { if (unlikely(!ctx)) {
pr_debug("%s: rx error: %d buffers missing\n", pr_debug("%s: rx error: %d buffers missing\n",
dev->name, num_buf); dev->name, num_buf);
dev->stats.rx_length_errors++; dev->stats.rx_length_errors++;
break; break;
} }
page = virt_to_head_page(buf); page = virt_to_head_page(mergeable_ctx_to_buf_address(ctx));
put_page(page); put_page(page);
} }
err_buf: err_buf:
...@@ -420,17 +446,20 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len) ...@@ -420,17 +446,20 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len)
if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) { if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) {
pr_debug("%s: short packet %i\n", dev->name, len); pr_debug("%s: short packet %i\n", dev->name, len);
dev->stats.rx_length_errors++; dev->stats.rx_length_errors++;
if (vi->mergeable_rx_bufs) if (vi->mergeable_rx_bufs) {
put_page(virt_to_head_page(buf)); unsigned long ctx = (unsigned long)buf;
else if (vi->big_packets) void *base = mergeable_ctx_to_buf_address(ctx);
put_page(virt_to_head_page(base));
} else if (vi->big_packets) {
give_pages(rq, buf); give_pages(rq, buf);
else } else {
dev_kfree_skb(buf); dev_kfree_skb(buf);
}
return; return;
} }
if (vi->mergeable_rx_bufs) if (vi->mergeable_rx_bufs)
skb = receive_mergeable(dev, rq, buf, len); skb = receive_mergeable(dev, rq, (unsigned long)buf, len);
else if (vi->big_packets) else if (vi->big_packets)
skb = receive_big(dev, rq, buf, len); skb = receive_big(dev, rq, buf, len);
else else
...@@ -571,28 +600,45 @@ static int add_recvbuf_big(struct receive_queue *rq, gfp_t gfp) ...@@ -571,28 +600,45 @@ static int add_recvbuf_big(struct receive_queue *rq, gfp_t gfp)
return err; return err;
} }
static unsigned int get_mergeable_buf_len(struct ewma *avg_pkt_len)
{
const size_t hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
unsigned int len;
len = hdr_len + clamp_t(unsigned int, ewma_read(avg_pkt_len),
GOOD_PACKET_LEN, PAGE_SIZE - hdr_len);
return ALIGN(len, MERGEABLE_BUFFER_ALIGN);
}
static int add_recvbuf_mergeable(struct receive_queue *rq, gfp_t gfp) static int add_recvbuf_mergeable(struct receive_queue *rq, gfp_t gfp)
{ {
struct virtnet_info *vi = rq->vq->vdev->priv; struct page_frag *alloc_frag = &rq->alloc_frag;
char *buf = NULL; char *buf;
unsigned long ctx;
int err; int err;
unsigned int len, hole;
if (gfp & __GFP_WAIT) { len = get_mergeable_buf_len(&rq->mrg_avg_pkt_len);
if (skb_page_frag_refill(MERGE_BUFFER_LEN, &vi->alloc_frag, if (unlikely(!skb_page_frag_refill(len, alloc_frag, gfp)))
gfp)) {
buf = (char *)page_address(vi->alloc_frag.page) +
vi->alloc_frag.offset;
get_page(vi->alloc_frag.page);
vi->alloc_frag.offset += MERGE_BUFFER_LEN;
}
} else {
buf = netdev_alloc_frag(MERGE_BUFFER_LEN);
}
if (!buf)
return -ENOMEM; return -ENOMEM;
sg_init_one(rq->sg, buf, MERGE_BUFFER_LEN); buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
err = virtqueue_add_inbuf(rq->vq, rq->sg, 1, buf, gfp); ctx = mergeable_buf_to_ctx(buf, len);
get_page(alloc_frag->page);
alloc_frag->offset += len;
hole = alloc_frag->size - alloc_frag->offset;
if (hole < len) {
/* To avoid internal fragmentation, if there is very likely not
* enough space for another buffer, add the remaining space to
* the current buffer. This extra space is not included in
* the truesize stored in ctx.
*/
len += hole;
alloc_frag->offset += hole;
}
sg_init_one(rq->sg, buf, len);
err = virtqueue_add_inbuf(rq->vq, rq->sg, 1, (void *)ctx, gfp);
if (err < 0) if (err < 0)
put_page(virt_to_head_page(buf)); put_page(virt_to_head_page(buf));
...@@ -612,6 +658,7 @@ static bool try_fill_recv(struct receive_queue *rq, gfp_t gfp) ...@@ -612,6 +658,7 @@ static bool try_fill_recv(struct receive_queue *rq, gfp_t gfp)
int err; int err;
bool oom; bool oom;
gfp |= __GFP_COLD;
do { do {
if (vi->mergeable_rx_bufs) if (vi->mergeable_rx_bufs)
err = add_recvbuf_mergeable(rq, gfp); err = add_recvbuf_mergeable(rq, gfp);
...@@ -1368,6 +1415,14 @@ static void free_receive_bufs(struct virtnet_info *vi) ...@@ -1368,6 +1415,14 @@ static void free_receive_bufs(struct virtnet_info *vi)
} }
} }
static void free_receive_page_frags(struct virtnet_info *vi)
{
int i;
for (i = 0; i < vi->max_queue_pairs; i++)
if (vi->rq[i].alloc_frag.page)
put_page(vi->rq[i].alloc_frag.page);
}
static void free_unused_bufs(struct virtnet_info *vi) static void free_unused_bufs(struct virtnet_info *vi)
{ {
void *buf; void *buf;
...@@ -1383,12 +1438,15 @@ static void free_unused_bufs(struct virtnet_info *vi) ...@@ -1383,12 +1438,15 @@ static void free_unused_bufs(struct virtnet_info *vi)
struct virtqueue *vq = vi->rq[i].vq; struct virtqueue *vq = vi->rq[i].vq;
while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) { while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) {
if (vi->mergeable_rx_bufs) if (vi->mergeable_rx_bufs) {
put_page(virt_to_head_page(buf)); unsigned long ctx = (unsigned long)buf;
else if (vi->big_packets) void *base = mergeable_ctx_to_buf_address(ctx);
put_page(virt_to_head_page(base));
} else if (vi->big_packets) {
give_pages(&vi->rq[i], buf); give_pages(&vi->rq[i], buf);
else } else {
dev_kfree_skb(buf); dev_kfree_skb(buf);
}
} }
} }
} }
...@@ -1496,6 +1554,7 @@ static int virtnet_alloc_queues(struct virtnet_info *vi) ...@@ -1496,6 +1554,7 @@ static int virtnet_alloc_queues(struct virtnet_info *vi)
napi_weight); napi_weight);
sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg)); sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg));
ewma_init(&vi->rq[i].mrg_avg_pkt_len, 1, RECEIVE_AVG_WEIGHT);
sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg)); sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg));
} }
...@@ -1532,6 +1591,33 @@ static int init_vqs(struct virtnet_info *vi) ...@@ -1532,6 +1591,33 @@ static int init_vqs(struct virtnet_info *vi)
return ret; return ret;
} }
#ifdef CONFIG_SYSFS
static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue,
struct rx_queue_attribute *attribute, char *buf)
{
struct virtnet_info *vi = netdev_priv(queue->dev);
unsigned int queue_index = get_netdev_rx_queue_index(queue);
struct ewma *avg;
BUG_ON(queue_index >= vi->max_queue_pairs);
avg = &vi->rq[queue_index].mrg_avg_pkt_len;
return sprintf(buf, "%u\n", get_mergeable_buf_len(avg));
}
static struct rx_queue_attribute mergeable_rx_buffer_size_attribute =
__ATTR_RO(mergeable_rx_buffer_size);
static struct attribute *virtio_net_mrg_rx_attrs[] = {
&mergeable_rx_buffer_size_attribute.attr,
NULL
};
static const struct attribute_group virtio_net_mrg_rx_group = {
.name = "virtio_net",
.attrs = virtio_net_mrg_rx_attrs
};
#endif
static int virtnet_probe(struct virtio_device *vdev) static int virtnet_probe(struct virtio_device *vdev)
{ {
int i, err; int i, err;
...@@ -1646,6 +1732,10 @@ static int virtnet_probe(struct virtio_device *vdev) ...@@ -1646,6 +1732,10 @@ static int virtnet_probe(struct virtio_device *vdev)
if (err) if (err)
goto free_stats; goto free_stats;
#ifdef CONFIG_SYSFS
if (vi->mergeable_rx_bufs)
dev->sysfs_rx_queue_group = &virtio_net_mrg_rx_group;
#endif
netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs); netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs);
netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs); netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs);
...@@ -1695,9 +1785,8 @@ static int virtnet_probe(struct virtio_device *vdev) ...@@ -1695,9 +1785,8 @@ static int virtnet_probe(struct virtio_device *vdev)
unregister_netdev(dev); unregister_netdev(dev);
free_vqs: free_vqs:
cancel_delayed_work_sync(&vi->refill); cancel_delayed_work_sync(&vi->refill);
free_receive_page_frags(vi);
virtnet_del_vqs(vi); virtnet_del_vqs(vi);
if (vi->alloc_frag.page)
put_page(vi->alloc_frag.page);
free_stats: free_stats:
free_percpu(vi->stats); free_percpu(vi->stats);
free: free:
...@@ -1714,6 +1803,8 @@ static void remove_vq_common(struct virtnet_info *vi) ...@@ -1714,6 +1803,8 @@ static void remove_vq_common(struct virtnet_info *vi)
free_receive_bufs(vi); free_receive_bufs(vi);
free_receive_page_frags(vi);
virtnet_del_vqs(vi); virtnet_del_vqs(vi);
} }
...@@ -1731,8 +1822,6 @@ static void virtnet_remove(struct virtio_device *vdev) ...@@ -1731,8 +1822,6 @@ static void virtnet_remove(struct virtio_device *vdev)
unregister_netdev(vi->dev); unregister_netdev(vi->dev);
remove_vq_common(vi); remove_vq_common(vi);
if (vi->alloc_frag.page)
put_page(vi->alloc_frag.page);
flush_work(&vi->config_work); flush_work(&vi->config_work);
......
...@@ -668,15 +668,28 @@ extern struct rps_sock_flow_table __rcu *rps_sock_flow_table; ...@@ -668,15 +668,28 @@ extern struct rps_sock_flow_table __rcu *rps_sock_flow_table;
bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, u32 flow_id, bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, u32 flow_id,
u16 filter_id); u16 filter_id);
#endif #endif
#endif /* CONFIG_RPS */
/* This structure contains an instance of an RX queue. */ /* This structure contains an instance of an RX queue. */
struct netdev_rx_queue { struct netdev_rx_queue {
#ifdef CONFIG_RPS
struct rps_map __rcu *rps_map; struct rps_map __rcu *rps_map;
struct rps_dev_flow_table __rcu *rps_flow_table; struct rps_dev_flow_table __rcu *rps_flow_table;
#endif
struct kobject kobj; struct kobject kobj;
struct net_device *dev; struct net_device *dev;
} ____cacheline_aligned_in_smp; } ____cacheline_aligned_in_smp;
#endif /* CONFIG_RPS */
/*
* RX queue sysfs structures and functions.
*/
struct rx_queue_attribute {
struct attribute attr;
ssize_t (*show)(struct netdev_rx_queue *queue,
struct rx_queue_attribute *attr, char *buf);
ssize_t (*store)(struct netdev_rx_queue *queue,
struct rx_queue_attribute *attr, const char *buf, size_t len);
};
#ifdef CONFIG_XPS #ifdef CONFIG_XPS
/* /*
...@@ -1313,7 +1326,7 @@ struct net_device { ...@@ -1313,7 +1326,7 @@ struct net_device {
unicast) */ unicast) */
#ifdef CONFIG_RPS #ifdef CONFIG_SYSFS
struct netdev_rx_queue *_rx; struct netdev_rx_queue *_rx;
/* Number of RX queues allocated at register_netdev() time */ /* Number of RX queues allocated at register_netdev() time */
...@@ -1424,6 +1437,8 @@ struct net_device { ...@@ -1424,6 +1437,8 @@ struct net_device {
struct device dev; struct device dev;
/* space for optional device, statistics, and wireless sysfs groups */ /* space for optional device, statistics, and wireless sysfs groups */
const struct attribute_group *sysfs_groups[4]; const struct attribute_group *sysfs_groups[4];
/* space for optional per-rx queue attributes */
const struct attribute_group *sysfs_rx_queue_group;
/* rtnetlink link ops */ /* rtnetlink link ops */
const struct rtnl_link_ops *rtnl_link_ops; const struct rtnl_link_ops *rtnl_link_ops;
...@@ -2375,7 +2390,7 @@ static inline bool netif_is_multiqueue(const struct net_device *dev) ...@@ -2375,7 +2390,7 @@ static inline bool netif_is_multiqueue(const struct net_device *dev)
int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq); int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq);
#ifdef CONFIG_RPS #ifdef CONFIG_SYSFS
int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq); int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq);
#else #else
static inline int netif_set_real_num_rx_queues(struct net_device *dev, static inline int netif_set_real_num_rx_queues(struct net_device *dev,
...@@ -2394,7 +2409,7 @@ static inline int netif_copy_real_num_queues(struct net_device *to_dev, ...@@ -2394,7 +2409,7 @@ static inline int netif_copy_real_num_queues(struct net_device *to_dev,
from_dev->real_num_tx_queues); from_dev->real_num_tx_queues);
if (err) if (err)
return err; return err;
#ifdef CONFIG_RPS #ifdef CONFIG_SYSFS
return netif_set_real_num_rx_queues(to_dev, return netif_set_real_num_rx_queues(to_dev,
from_dev->real_num_rx_queues); from_dev->real_num_rx_queues);
#else #else
...@@ -2402,6 +2417,18 @@ static inline int netif_copy_real_num_queues(struct net_device *to_dev, ...@@ -2402,6 +2417,18 @@ static inline int netif_copy_real_num_queues(struct net_device *to_dev,
#endif #endif
} }
#ifdef CONFIG_SYSFS
static inline unsigned int get_netdev_rx_queue_index(
struct netdev_rx_queue *queue)
{
struct net_device *dev = queue->dev;
int index = queue - dev->_rx;
BUG_ON(index >= dev->num_rx_queues);
return index;
}
#endif
#define DEFAULT_MAX_NUM_RSS_QUEUES (8) #define DEFAULT_MAX_NUM_RSS_QUEUES (8)
int netif_get_num_default_rss_queues(void); int netif_get_num_default_rss_queues(void);
......
...@@ -53,8 +53,10 @@ EXPORT_SYMBOL(ewma_init); ...@@ -53,8 +53,10 @@ EXPORT_SYMBOL(ewma_init);
*/ */
struct ewma *ewma_add(struct ewma *avg, unsigned long val) struct ewma *ewma_add(struct ewma *avg, unsigned long val)
{ {
avg->internal = avg->internal ? unsigned long internal = ACCESS_ONCE(avg->internal);
(((avg->internal << avg->weight) - avg->internal) +
ACCESS_ONCE(avg->internal) = internal ?
(((internal << avg->weight) - internal) +
(val << avg->factor)) >> avg->weight : (val << avg->factor)) >> avg->weight :
(val << avg->factor); (val << avg->factor);
return avg; return avg;
......
...@@ -2083,7 +2083,7 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) ...@@ -2083,7 +2083,7 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
} }
EXPORT_SYMBOL(netif_set_real_num_tx_queues); EXPORT_SYMBOL(netif_set_real_num_tx_queues);
#ifdef CONFIG_RPS #ifdef CONFIG_SYSFS
/** /**
* netif_set_real_num_rx_queues - set actual number of RX queues used * netif_set_real_num_rx_queues - set actual number of RX queues used
* @dev: Network device * @dev: Network device
...@@ -5764,7 +5764,7 @@ void netif_stacked_transfer_operstate(const struct net_device *rootdev, ...@@ -5764,7 +5764,7 @@ void netif_stacked_transfer_operstate(const struct net_device *rootdev,
} }
EXPORT_SYMBOL(netif_stacked_transfer_operstate); EXPORT_SYMBOL(netif_stacked_transfer_operstate);
#ifdef CONFIG_RPS #ifdef CONFIG_SYSFS
static int netif_alloc_rx_queues(struct net_device *dev) static int netif_alloc_rx_queues(struct net_device *dev)
{ {
unsigned int i, count = dev->num_rx_queues; unsigned int i, count = dev->num_rx_queues;
...@@ -6309,7 +6309,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, ...@@ -6309,7 +6309,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
return NULL; return NULL;
} }
#ifdef CONFIG_RPS #ifdef CONFIG_SYSFS
if (rxqs < 1) { if (rxqs < 1) {
pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n"); pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n");
return NULL; return NULL;
...@@ -6365,7 +6365,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, ...@@ -6365,7 +6365,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
if (netif_alloc_netdev_queues(dev)) if (netif_alloc_netdev_queues(dev))
goto free_all; goto free_all;
#ifdef CONFIG_RPS #ifdef CONFIG_SYSFS
dev->num_rx_queues = rxqs; dev->num_rx_queues = rxqs;
dev->real_num_rx_queues = rxqs; dev->real_num_rx_queues = rxqs;
if (netif_alloc_rx_queues(dev)) if (netif_alloc_rx_queues(dev))
...@@ -6385,7 +6385,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, ...@@ -6385,7 +6385,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
free_pcpu: free_pcpu:
free_percpu(dev->pcpu_refcnt); free_percpu(dev->pcpu_refcnt);
netif_free_tx_queues(dev); netif_free_tx_queues(dev);
#ifdef CONFIG_RPS #ifdef CONFIG_SYSFS
kfree(dev->_rx); kfree(dev->_rx);
#endif #endif
...@@ -6410,7 +6410,7 @@ void free_netdev(struct net_device *dev) ...@@ -6410,7 +6410,7 @@ void free_netdev(struct net_device *dev)
release_net(dev_net(dev)); release_net(dev_net(dev));
netif_free_tx_queues(dev); netif_free_tx_queues(dev);
#ifdef CONFIG_RPS #ifdef CONFIG_SYSFS
kfree(dev->_rx); kfree(dev->_rx);
#endif #endif
......
...@@ -498,17 +498,7 @@ static struct attribute_group wireless_group = { ...@@ -498,17 +498,7 @@ static struct attribute_group wireless_group = {
#define net_class_groups NULL #define net_class_groups NULL
#endif /* CONFIG_SYSFS */ #endif /* CONFIG_SYSFS */
#ifdef CONFIG_RPS #ifdef CONFIG_SYSFS
/*
* RX queue sysfs structures and functions.
*/
struct rx_queue_attribute {
struct attribute attr;
ssize_t (*show)(struct netdev_rx_queue *queue,
struct rx_queue_attribute *attr, char *buf);
ssize_t (*store)(struct netdev_rx_queue *queue,
struct rx_queue_attribute *attr, const char *buf, size_t len);
};
#define to_rx_queue_attr(_attr) container_of(_attr, \ #define to_rx_queue_attr(_attr) container_of(_attr, \
struct rx_queue_attribute, attr) struct rx_queue_attribute, attr)
...@@ -543,6 +533,7 @@ static const struct sysfs_ops rx_queue_sysfs_ops = { ...@@ -543,6 +533,7 @@ static const struct sysfs_ops rx_queue_sysfs_ops = {
.store = rx_queue_attr_store, .store = rx_queue_attr_store,
}; };
#ifdef CONFIG_RPS
static ssize_t show_rps_map(struct netdev_rx_queue *queue, static ssize_t show_rps_map(struct netdev_rx_queue *queue,
struct rx_queue_attribute *attribute, char *buf) struct rx_queue_attribute *attribute, char *buf)
{ {
...@@ -718,16 +709,20 @@ static struct rx_queue_attribute rps_cpus_attribute = ...@@ -718,16 +709,20 @@ static struct rx_queue_attribute rps_cpus_attribute =
static struct rx_queue_attribute rps_dev_flow_table_cnt_attribute = static struct rx_queue_attribute rps_dev_flow_table_cnt_attribute =
__ATTR(rps_flow_cnt, S_IRUGO | S_IWUSR, __ATTR(rps_flow_cnt, S_IRUGO | S_IWUSR,
show_rps_dev_flow_table_cnt, store_rps_dev_flow_table_cnt); show_rps_dev_flow_table_cnt, store_rps_dev_flow_table_cnt);
#endif /* CONFIG_RPS */
static struct attribute *rx_queue_default_attrs[] = { static struct attribute *rx_queue_default_attrs[] = {
#ifdef CONFIG_RPS
&rps_cpus_attribute.attr, &rps_cpus_attribute.attr,
&rps_dev_flow_table_cnt_attribute.attr, &rps_dev_flow_table_cnt_attribute.attr,
#endif
NULL NULL
}; };
static void rx_queue_release(struct kobject *kobj) static void rx_queue_release(struct kobject *kobj)
{ {
struct netdev_rx_queue *queue = to_rx_queue(kobj); struct netdev_rx_queue *queue = to_rx_queue(kobj);
#ifdef CONFIG_RPS
struct rps_map *map; struct rps_map *map;
struct rps_dev_flow_table *flow_table; struct rps_dev_flow_table *flow_table;
...@@ -743,6 +738,7 @@ static void rx_queue_release(struct kobject *kobj) ...@@ -743,6 +738,7 @@ static void rx_queue_release(struct kobject *kobj)
RCU_INIT_POINTER(queue->rps_flow_table, NULL); RCU_INIT_POINTER(queue->rps_flow_table, NULL);
call_rcu(&flow_table->rcu, rps_dev_flow_table_release); call_rcu(&flow_table->rcu, rps_dev_flow_table_release);
} }
#endif
memset(kobj, 0, sizeof(*kobj)); memset(kobj, 0, sizeof(*kobj));
dev_put(queue->dev); dev_put(queue->dev);
...@@ -763,25 +759,36 @@ static int rx_queue_add_kobject(struct net_device *net, int index) ...@@ -763,25 +759,36 @@ static int rx_queue_add_kobject(struct net_device *net, int index)
kobj->kset = net->queues_kset; kobj->kset = net->queues_kset;
error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL, error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL,
"rx-%u", index); "rx-%u", index);
if (error) { if (error)
kobject_put(kobj); goto exit;
return error;
if (net->sysfs_rx_queue_group) {
error = sysfs_create_group(kobj, net->sysfs_rx_queue_group);
if (error)
goto exit;
} }
kobject_uevent(kobj, KOBJ_ADD); kobject_uevent(kobj, KOBJ_ADD);
dev_hold(queue->dev); dev_hold(queue->dev);
return error;
exit:
kobject_put(kobj);
return error; return error;
} }
#endif /* CONFIG_RPS */ #endif /* CONFIG_SYFS */
int int
net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num) net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
{ {
#ifdef CONFIG_RPS #ifdef CONFIG_SYSFS
int i; int i;
int error = 0; int error = 0;
#ifndef CONFIG_RPS
if (!net->sysfs_rx_queue_group)
return 0;
#endif
for (i = old_num; i < new_num; i++) { for (i = old_num; i < new_num; i++) {
error = rx_queue_add_kobject(net, i); error = rx_queue_add_kobject(net, i);
if (error) { if (error) {
...@@ -790,8 +797,12 @@ net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num) ...@@ -790,8 +797,12 @@ net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
} }
} }
while (--i >= new_num) while (--i >= new_num) {
if (net->sysfs_rx_queue_group)
sysfs_remove_group(&net->_rx[i].kobj,
net->sysfs_rx_queue_group);
kobject_put(&net->_rx[i].kobj); kobject_put(&net->_rx[i].kobj);
}
return error; return error;
#else #else
...@@ -1155,9 +1166,6 @@ static int register_queue_kobjects(struct net_device *net) ...@@ -1155,9 +1166,6 @@ static int register_queue_kobjects(struct net_device *net)
NULL, &net->dev.kobj); NULL, &net->dev.kobj);
if (!net->queues_kset) if (!net->queues_kset)
return -ENOMEM; return -ENOMEM;
#endif
#ifdef CONFIG_RPS
real_rx = net->real_num_rx_queues; real_rx = net->real_num_rx_queues;
#endif #endif
real_tx = net->real_num_tx_queues; real_tx = net->real_num_tx_queues;
...@@ -1184,7 +1192,7 @@ static void remove_queue_kobjects(struct net_device *net) ...@@ -1184,7 +1192,7 @@ static void remove_queue_kobjects(struct net_device *net)
{ {
int real_rx = 0, real_tx = 0; int real_rx = 0, real_tx = 0;
#ifdef CONFIG_RPS #ifdef CONFIG_SYSFS
real_rx = net->real_num_rx_queues; real_rx = net->real_num_rx_queues;
#endif #endif
real_tx = net->real_num_tx_queues; real_tx = net->real_num_tx_queues;
......
...@@ -1836,9 +1836,7 @@ bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio) ...@@ -1836,9 +1836,7 @@ bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio)
put_page(pfrag->page); put_page(pfrag->page);
} }
/* We restrict high order allocations to users that can afford to wait */ order = SKB_FRAG_PAGE_ORDER;
order = (prio & __GFP_WAIT) ? SKB_FRAG_PAGE_ORDER : 0;
do { do {
gfp_t gfp = prio; gfp_t gfp = prio;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment