Commit 04f762e8 authored by David S. Miller's avatar David S. Miller

Merge branch 'xps-DCB'

Alexander Duyck says:

====================
Add support for XPS when using DCB

This patch series enables proper isolation between traffic classes when
using XPS while DCB is enabled.  Previously enabling XPS would cause the
traffic to be potentially pulled from one traffic class into another on
egress.  This change essentially multiplies the XPS map by the number of
traffic classes and allows us to do a lookup per traffic class for a given
CPU.

To guarantee the isolation I invalidate the XPS map for any queues that are
moved from one traffic class to another, or if we change the number of
traffic classes.

v2: Added sysfs to display traffic class
    Replaced do/while with for loop
    Cleaned up several other for for loops throughout the patch
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 20861f26 184c449f
...@@ -732,8 +732,8 @@ struct xps_dev_maps { ...@@ -732,8 +732,8 @@ struct xps_dev_maps {
struct rcu_head rcu; struct rcu_head rcu;
struct xps_map __rcu *cpu_map[0]; struct xps_map __rcu *cpu_map[0];
}; };
#define XPS_DEV_MAPS_SIZE (sizeof(struct xps_dev_maps) + \ #define XPS_DEV_MAPS_SIZE(_tcs) (sizeof(struct xps_dev_maps) + \
(nr_cpu_ids * sizeof(struct xps_map *))) (nr_cpu_ids * (_tcs) * sizeof(struct xps_map *)))
#endif /* CONFIG_XPS */ #endif /* CONFIG_XPS */
#define TC_MAX_QUEUE 16 #define TC_MAX_QUEUE 16
...@@ -1920,34 +1920,10 @@ int netdev_set_prio_tc_map(struct net_device *dev, u8 prio, u8 tc) ...@@ -1920,34 +1920,10 @@ int netdev_set_prio_tc_map(struct net_device *dev, u8 prio, u8 tc)
return 0; return 0;
} }
static inline int netdev_txq_to_tc(struct net_device *dev, unsigned int txq);
void netdev_reset_tc(struct net_device *dev) void netdev_reset_tc(struct net_device *dev);
{ int netdev_set_tc_queue(struct net_device *dev, u8 tc, u16 count, u16 offset);
dev->num_tc = 0; int netdev_set_num_tc(struct net_device *dev, u8 num_tc);
memset(dev->tc_to_txq, 0, sizeof(dev->tc_to_txq));
memset(dev->prio_tc_map, 0, sizeof(dev->prio_tc_map));
}
static inline
int netdev_set_tc_queue(struct net_device *dev, u8 tc, u16 count, u16 offset)
{
if (tc >= dev->num_tc)
return -EINVAL;
dev->tc_to_txq[tc].count = count;
dev->tc_to_txq[tc].offset = offset;
return 0;
}
static inline
int netdev_set_num_tc(struct net_device *dev, u8 num_tc)
{
if (num_tc > TC_MAX_QUEUE)
return -EINVAL;
dev->num_tc = num_tc;
return 0;
}
static inline static inline
int netdev_get_num_tc(struct net_device *dev) int netdev_get_num_tc(struct net_device *dev)
......
...@@ -1948,37 +1948,80 @@ static void netif_setup_tc(struct net_device *dev, unsigned int txq) ...@@ -1948,37 +1948,80 @@ static void netif_setup_tc(struct net_device *dev, unsigned int txq)
} }
} }
int netdev_txq_to_tc(struct net_device *dev, unsigned int txq)
{
if (dev->num_tc) {
struct netdev_tc_txq *tc = &dev->tc_to_txq[0];
int i;
for (i = 0; i < TC_MAX_QUEUE; i++, tc++) {
if ((txq - tc->offset) < tc->count)
return i;
}
return -1;
}
return 0;
}
#ifdef CONFIG_XPS #ifdef CONFIG_XPS
static DEFINE_MUTEX(xps_map_mutex); static DEFINE_MUTEX(xps_map_mutex);
#define xmap_dereference(P) \ #define xmap_dereference(P) \
rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex)) rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))
static struct xps_map *remove_xps_queue(struct xps_dev_maps *dev_maps, static bool remove_xps_queue(struct xps_dev_maps *dev_maps,
int cpu, u16 index) int tci, u16 index)
{ {
struct xps_map *map = NULL; struct xps_map *map = NULL;
int pos; int pos;
if (dev_maps) if (dev_maps)
map = xmap_dereference(dev_maps->cpu_map[cpu]); map = xmap_dereference(dev_maps->cpu_map[tci]);
if (!map)
return false;
for (pos = 0; map && pos < map->len; pos++) { for (pos = map->len; pos--;) {
if (map->queues[pos] == index) { if (map->queues[pos] != index)
if (map->len > 1) { continue;
map->queues[pos] = map->queues[--map->len];
} else { if (map->len > 1) {
RCU_INIT_POINTER(dev_maps->cpu_map[cpu], NULL); map->queues[pos] = map->queues[--map->len];
kfree_rcu(map, rcu);
map = NULL;
}
break; break;
} }
RCU_INIT_POINTER(dev_maps->cpu_map[tci], NULL);
kfree_rcu(map, rcu);
return false;
} }
return map; return true;
} }
static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index) static bool remove_xps_queue_cpu(struct net_device *dev,
struct xps_dev_maps *dev_maps,
int cpu, u16 offset, u16 count)
{
int num_tc = dev->num_tc ? : 1;
bool active = false;
int tci;
for (tci = cpu * num_tc; num_tc--; tci++) {
int i, j;
for (i = count, j = offset; i--; j++) {
if (!remove_xps_queue(dev_maps, cpu, j))
break;
}
active |= i < 0;
}
return active;
}
static void netif_reset_xps_queues(struct net_device *dev, u16 offset,
u16 count)
{ {
struct xps_dev_maps *dev_maps; struct xps_dev_maps *dev_maps;
int cpu, i; int cpu, i;
...@@ -1990,21 +2033,16 @@ static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index) ...@@ -1990,21 +2033,16 @@ static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index)
if (!dev_maps) if (!dev_maps)
goto out_no_maps; goto out_no_maps;
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu)
for (i = index; i < dev->num_tx_queues; i++) { active |= remove_xps_queue_cpu(dev, dev_maps, cpu,
if (!remove_xps_queue(dev_maps, cpu, i)) offset, count);
break;
}
if (i == dev->num_tx_queues)
active = true;
}
if (!active) { if (!active) {
RCU_INIT_POINTER(dev->xps_maps, NULL); RCU_INIT_POINTER(dev->xps_maps, NULL);
kfree_rcu(dev_maps, rcu); kfree_rcu(dev_maps, rcu);
} }
for (i = index; i < dev->num_tx_queues; i++) for (i = offset + (count - 1); count--; i--)
netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i), netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i),
NUMA_NO_NODE); NUMA_NO_NODE);
...@@ -2012,6 +2050,11 @@ static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index) ...@@ -2012,6 +2050,11 @@ static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index)
mutex_unlock(&xps_map_mutex); mutex_unlock(&xps_map_mutex);
} }
static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index)
{
netif_reset_xps_queues(dev, index, dev->num_tx_queues - index);
}
static struct xps_map *expand_xps_map(struct xps_map *map, static struct xps_map *expand_xps_map(struct xps_map *map,
int cpu, u16 index) int cpu, u16 index)
{ {
...@@ -2051,20 +2094,28 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask, ...@@ -2051,20 +2094,28 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
u16 index) u16 index)
{ {
struct xps_dev_maps *dev_maps, *new_dev_maps = NULL; struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;
int i, cpu, tci, numa_node_id = -2;
int maps_sz, num_tc = 1, tc = 0;
struct xps_map *map, *new_map; struct xps_map *map, *new_map;
int maps_sz = max_t(unsigned int, XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES);
int cpu, numa_node_id = -2;
bool active = false; bool active = false;
if (dev->num_tc) {
num_tc = dev->num_tc;
tc = netdev_txq_to_tc(dev, index);
if (tc < 0)
return -EINVAL;
}
maps_sz = XPS_DEV_MAPS_SIZE(num_tc);
if (maps_sz < L1_CACHE_BYTES)
maps_sz = L1_CACHE_BYTES;
mutex_lock(&xps_map_mutex); mutex_lock(&xps_map_mutex);
dev_maps = xmap_dereference(dev->xps_maps); dev_maps = xmap_dereference(dev->xps_maps);
/* allocate memory for queue storage */ /* allocate memory for queue storage */
for_each_online_cpu(cpu) { for_each_cpu_and(cpu, cpu_online_mask, mask) {
if (!cpumask_test_cpu(cpu, mask))
continue;
if (!new_dev_maps) if (!new_dev_maps)
new_dev_maps = kzalloc(maps_sz, GFP_KERNEL); new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);
if (!new_dev_maps) { if (!new_dev_maps) {
...@@ -2072,25 +2123,38 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask, ...@@ -2072,25 +2123,38 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
return -ENOMEM; return -ENOMEM;
} }
map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) : tci = cpu * num_tc + tc;
map = dev_maps ? xmap_dereference(dev_maps->cpu_map[tci]) :
NULL; NULL;
map = expand_xps_map(map, cpu, index); map = expand_xps_map(map, cpu, index);
if (!map) if (!map)
goto error; goto error;
RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map); RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
} }
if (!new_dev_maps) if (!new_dev_maps)
goto out_no_new_maps; goto out_no_new_maps;
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
/* copy maps belonging to foreign traffic classes */
for (i = tc, tci = cpu * num_tc; dev_maps && i--; tci++) {
/* fill in the new device map from the old device map */
map = xmap_dereference(dev_maps->cpu_map[tci]);
RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
}
/* We need to explicitly update tci as prevous loop
* could break out early if dev_maps is NULL.
*/
tci = cpu * num_tc + tc;
if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) { if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) {
/* add queue to CPU maps */ /* add queue to CPU maps */
int pos = 0; int pos = 0;
map = xmap_dereference(new_dev_maps->cpu_map[cpu]); map = xmap_dereference(new_dev_maps->cpu_map[tci]);
while ((pos < map->len) && (map->queues[pos] != index)) while ((pos < map->len) && (map->queues[pos] != index))
pos++; pos++;
...@@ -2104,26 +2168,36 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask, ...@@ -2104,26 +2168,36 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
#endif #endif
} else if (dev_maps) { } else if (dev_maps) {
/* fill in the new device map from the old device map */ /* fill in the new device map from the old device map */
map = xmap_dereference(dev_maps->cpu_map[cpu]); map = xmap_dereference(dev_maps->cpu_map[tci]);
RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map); RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
} }
/* copy maps belonging to foreign traffic classes */
for (i = num_tc - tc, tci++; dev_maps && --i; tci++) {
/* fill in the new device map from the old device map */
map = xmap_dereference(dev_maps->cpu_map[tci]);
RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
}
} }
rcu_assign_pointer(dev->xps_maps, new_dev_maps); rcu_assign_pointer(dev->xps_maps, new_dev_maps);
/* Cleanup old maps */ /* Cleanup old maps */
if (dev_maps) { if (!dev_maps)
for_each_possible_cpu(cpu) { goto out_no_old_maps;
new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
map = xmap_dereference(dev_maps->cpu_map[cpu]); for_each_possible_cpu(cpu) {
for (i = num_tc, tci = cpu * num_tc; i--; tci++) {
new_map = xmap_dereference(new_dev_maps->cpu_map[tci]);
map = xmap_dereference(dev_maps->cpu_map[tci]);
if (map && map != new_map) if (map && map != new_map)
kfree_rcu(map, rcu); kfree_rcu(map, rcu);
} }
kfree_rcu(dev_maps, rcu);
} }
kfree_rcu(dev_maps, rcu);
out_no_old_maps:
dev_maps = new_dev_maps; dev_maps = new_dev_maps;
active = true; active = true;
...@@ -2138,11 +2212,12 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask, ...@@ -2138,11 +2212,12 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
/* removes queue from unused CPUs */ /* removes queue from unused CPUs */
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) for (i = tc, tci = cpu * num_tc; i--; tci++)
continue; active |= remove_xps_queue(dev_maps, tci, index);
if (!cpumask_test_cpu(cpu, mask) || !cpu_online(cpu))
if (remove_xps_queue(dev_maps, cpu, index)) active |= remove_xps_queue(dev_maps, tci, index);
active = true; for (i = num_tc - tc, tci++; --i; tci++)
active |= remove_xps_queue(dev_maps, tci, index);
} }
/* free map if not active */ /* free map if not active */
...@@ -2158,11 +2233,14 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask, ...@@ -2158,11 +2233,14 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
error: error:
/* remove any maps that we added */ /* remove any maps that we added */
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]); for (i = num_tc, tci = cpu * num_tc; i--; tci++) {
map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) : new_map = xmap_dereference(new_dev_maps->cpu_map[tci]);
NULL; map = dev_maps ?
if (new_map && new_map != map) xmap_dereference(dev_maps->cpu_map[tci]) :
kfree(new_map); NULL;
if (new_map && new_map != map)
kfree(new_map);
}
} }
mutex_unlock(&xps_map_mutex); mutex_unlock(&xps_map_mutex);
...@@ -2173,6 +2251,44 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask, ...@@ -2173,6 +2251,44 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
EXPORT_SYMBOL(netif_set_xps_queue); EXPORT_SYMBOL(netif_set_xps_queue);
#endif #endif
void netdev_reset_tc(struct net_device *dev)
{
#ifdef CONFIG_XPS
netif_reset_xps_queues_gt(dev, 0);
#endif
dev->num_tc = 0;
memset(dev->tc_to_txq, 0, sizeof(dev->tc_to_txq));
memset(dev->prio_tc_map, 0, sizeof(dev->prio_tc_map));
}
EXPORT_SYMBOL(netdev_reset_tc);
int netdev_set_tc_queue(struct net_device *dev, u8 tc, u16 count, u16 offset)
{
if (tc >= dev->num_tc)
return -EINVAL;
#ifdef CONFIG_XPS
netif_reset_xps_queues(dev, offset, count);
#endif
dev->tc_to_txq[tc].count = count;
dev->tc_to_txq[tc].offset = offset;
return 0;
}
EXPORT_SYMBOL(netdev_set_tc_queue);
int netdev_set_num_tc(struct net_device *dev, u8 num_tc)
{
if (num_tc > TC_MAX_QUEUE)
return -EINVAL;
#ifdef CONFIG_XPS
netif_reset_xps_queues_gt(dev, 0);
#endif
dev->num_tc = num_tc;
return 0;
}
EXPORT_SYMBOL(netdev_set_num_tc);
/* /*
* Routine to help set real_num_tx_queues. To avoid skbs mapped to queues * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
* greater then real_num_tx_queues stale skbs on the qdisc must be flushed. * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
...@@ -3085,8 +3201,14 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) ...@@ -3085,8 +3201,14 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
rcu_read_lock(); rcu_read_lock();
dev_maps = rcu_dereference(dev->xps_maps); dev_maps = rcu_dereference(dev->xps_maps);
if (dev_maps) { if (dev_maps) {
map = rcu_dereference( unsigned int tci = skb->sender_cpu - 1;
dev_maps->cpu_map[skb->sender_cpu - 1]);
if (dev->num_tc) {
tci *= dev->num_tc;
tci += netdev_get_prio_tc_map(dev, skb->priority);
}
map = rcu_dereference(dev_maps->cpu_map[tci]);
if (map) { if (map) {
if (map->len == 1) if (map->len == 1)
queue_index = map->queues[0]; queue_index = map->queues[0];
......
...@@ -1024,7 +1024,6 @@ static ssize_t show_trans_timeout(struct netdev_queue *queue, ...@@ -1024,7 +1024,6 @@ static ssize_t show_trans_timeout(struct netdev_queue *queue,
return sprintf(buf, "%lu", trans_timeout); return sprintf(buf, "%lu", trans_timeout);
} }
#ifdef CONFIG_XPS
static unsigned int get_netdev_queue_index(struct netdev_queue *queue) static unsigned int get_netdev_queue_index(struct netdev_queue *queue)
{ {
struct net_device *dev = queue->dev; struct net_device *dev = queue->dev;
...@@ -1036,6 +1035,21 @@ static unsigned int get_netdev_queue_index(struct netdev_queue *queue) ...@@ -1036,6 +1035,21 @@ static unsigned int get_netdev_queue_index(struct netdev_queue *queue)
return i; return i;
} }
static ssize_t show_traffic_class(struct netdev_queue *queue,
struct netdev_queue_attribute *attribute,
char *buf)
{
struct net_device *dev = queue->dev;
int index = get_netdev_queue_index(queue);
int tc = netdev_txq_to_tc(dev, index);
if (tc < 0)
return -EINVAL;
return sprintf(buf, "%u\n", tc);
}
#ifdef CONFIG_XPS
static ssize_t show_tx_maxrate(struct netdev_queue *queue, static ssize_t show_tx_maxrate(struct netdev_queue *queue,
struct netdev_queue_attribute *attribute, struct netdev_queue_attribute *attribute,
char *buf) char *buf)
...@@ -1078,6 +1092,9 @@ static struct netdev_queue_attribute queue_tx_maxrate = ...@@ -1078,6 +1092,9 @@ static struct netdev_queue_attribute queue_tx_maxrate =
static struct netdev_queue_attribute queue_trans_timeout = static struct netdev_queue_attribute queue_trans_timeout =
__ATTR(tx_timeout, S_IRUGO, show_trans_timeout, NULL); __ATTR(tx_timeout, S_IRUGO, show_trans_timeout, NULL);
static struct netdev_queue_attribute queue_traffic_class =
__ATTR(traffic_class, S_IRUGO, show_traffic_class, NULL);
#ifdef CONFIG_BQL #ifdef CONFIG_BQL
/* /*
* Byte queue limits sysfs structures and functions. * Byte queue limits sysfs structures and functions.
...@@ -1193,29 +1210,38 @@ static ssize_t show_xps_map(struct netdev_queue *queue, ...@@ -1193,29 +1210,38 @@ static ssize_t show_xps_map(struct netdev_queue *queue,
struct netdev_queue_attribute *attribute, char *buf) struct netdev_queue_attribute *attribute, char *buf)
{ {
struct net_device *dev = queue->dev; struct net_device *dev = queue->dev;
int cpu, len, num_tc = 1, tc = 0;
struct xps_dev_maps *dev_maps; struct xps_dev_maps *dev_maps;
cpumask_var_t mask; cpumask_var_t mask;
unsigned long index; unsigned long index;
int i, len;
if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
return -ENOMEM; return -ENOMEM;
index = get_netdev_queue_index(queue); index = get_netdev_queue_index(queue);
if (dev->num_tc) {
num_tc = dev->num_tc;
tc = netdev_txq_to_tc(dev, index);
if (tc < 0)
return -EINVAL;
}
rcu_read_lock(); rcu_read_lock();
dev_maps = rcu_dereference(dev->xps_maps); dev_maps = rcu_dereference(dev->xps_maps);
if (dev_maps) { if (dev_maps) {
for_each_possible_cpu(i) { for_each_possible_cpu(cpu) {
struct xps_map *map = int i, tci = cpu * num_tc + tc;
rcu_dereference(dev_maps->cpu_map[i]); struct xps_map *map;
if (map) {
int j; map = rcu_dereference(dev_maps->cpu_map[tci]);
for (j = 0; j < map->len; j++) { if (!map)
if (map->queues[j] == index) { continue;
cpumask_set_cpu(i, mask);
break; for (i = map->len; i--;) {
} if (map->queues[i] == index) {
cpumask_set_cpu(cpu, mask);
break;
} }
} }
} }
...@@ -1263,6 +1289,7 @@ static struct netdev_queue_attribute xps_cpus_attribute = ...@@ -1263,6 +1289,7 @@ static struct netdev_queue_attribute xps_cpus_attribute =
static struct attribute *netdev_queue_default_attrs[] = { static struct attribute *netdev_queue_default_attrs[] = {
&queue_trans_timeout.attr, &queue_trans_timeout.attr,
&queue_traffic_class.attr,
#ifdef CONFIG_XPS #ifdef CONFIG_XPS
&xps_cpus_attribute.attr, &xps_cpus_attribute.attr,
&queue_tx_maxrate.attr, &queue_tx_maxrate.attr,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment