Commit 542bb396 authored by David S. Miller's avatar David S. Miller

Merge branch 'veth-flexible-channel-numbers'

Paolo Abeni says:

====================
veth: more flexible channels number configuration

XDP setups can benefit from multiple veth RX/TX queues. Currently
veth allow setting such number only at creation time via the
'numrxqueues' and 'numtxqueues' parameters.

This series introduces support for the ethtool set_channel operation
and allows configuring the queue number via a new module parameter.

The veth default configuration is not changed.

Finally self-tests are updated to check the new features, with both
valid and invalid arguments.

This iteration is a rebase of the most recent RFC, it does not provide
a module parameter to configure the default number of queues, but I
think could be worthy

RFC v1 -> RFC v2:
 - report more consistent 'combined' count
 - make set_channel as resilient as possible to errors
 - drop module parameter - but I would still consider it.
 - more self-tests
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 2c080404 1ec2230f
This diff is collapsed.
......@@ -968,7 +968,7 @@ static __always_inline bool memcg_kmem_bypass(void)
return false;
/* Memcg to charge can't be determined. */
if (in_interrupt() || !current->mm || (current->flags & PF_KTHREAD))
if (!in_task() || !current->mm || (current->flags & PF_KTHREAD))
return true;
return false;
......
......@@ -67,7 +67,7 @@ static int vlan_group_prealloc_vid(struct vlan_group *vg,
return 0;
size = sizeof(struct net_device *) * VLAN_GROUP_ARRAY_PART_LEN;
array = kzalloc(size, GFP_KERNEL);
array = kzalloc(size, GFP_KERNEL_ACCOUNT);
if (array == NULL)
return -ENOBUFS;
......
......@@ -10119,7 +10119,7 @@ static int netif_alloc_rx_queues(struct net_device *dev)
BUG_ON(count < 1);
rx = kvzalloc(sz, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
rx = kvzalloc(sz, GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL);
if (!rx)
return -ENOMEM;
......@@ -10186,7 +10186,7 @@ static int netif_alloc_netdev_queues(struct net_device *dev)
if (count < 1 || count > 0xffff)
return -EINVAL;
tx = kvzalloc(sz, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
tx = kvzalloc(sz, GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL);
if (!tx)
return -ENOMEM;
......@@ -10826,7 +10826,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
/* ensure 32-byte alignment of whole construct */
alloc_size += NETDEV_ALIGN - 1;
p = kvzalloc(alloc_size, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
p = kvzalloc(alloc_size, GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL);
if (!p)
return NULL;
......
......@@ -57,7 +57,7 @@ int fib_default_rule_add(struct fib_rules_ops *ops,
{
struct fib_rule *r;
r = kzalloc(ops->rule_size, GFP_KERNEL);
r = kzalloc(ops->rule_size, GFP_KERNEL_ACCOUNT);
if (r == NULL)
return -ENOMEM;
......@@ -541,7 +541,7 @@ static int fib_nl2rule(struct sk_buff *skb, struct nlmsghdr *nlh,
goto errout;
}
nlrule = kzalloc(ops->rule_size, GFP_KERNEL);
nlrule = kzalloc(ops->rule_size, GFP_KERNEL_ACCOUNT);
if (!nlrule) {
err = -ENOMEM;
goto errout;
......
......@@ -79,7 +79,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
if (!fpl)
{
fpl = kmalloc(sizeof(struct scm_fp_list), GFP_KERNEL);
fpl = kmalloc(sizeof(struct scm_fp_list), GFP_KERNEL_ACCOUNT);
if (!fpl)
return -ENOMEM;
*fplp = fpl;
......@@ -355,7 +355,7 @@ struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl)
return NULL;
new_fpl = kmemdup(fpl, offsetof(struct scm_fp_list, fp[fpl->count]),
GFP_KERNEL);
GFP_KERNEL_ACCOUNT);
if (new_fpl) {
for (i = 0; i < fpl->count; i++)
get_file(fpl->fp[i]);
......
......@@ -1126,7 +1126,7 @@ static int __init dccp_init(void)
dccp_hashinfo.bind_bucket_cachep =
kmem_cache_create("dccp_bind_bucket",
sizeof(struct inet_bind_bucket), 0,
SLAB_HWCACHE_ALIGN, NULL);
SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL);
if (!dccp_hashinfo.bind_bucket_cachep)
goto out_free_hashinfo2;
......
......@@ -215,7 +215,7 @@ static void devinet_sysctl_unregister(struct in_device *idev)
static struct in_ifaddr *inet_alloc_ifa(void)
{
return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL_ACCOUNT);
}
static void inet_rcu_free_ifa(struct rcu_head *head)
......
......@@ -2380,11 +2380,11 @@ void __init fib_trie_init(void)
{
fn_alias_kmem = kmem_cache_create("ip_fib_alias",
sizeof(struct fib_alias),
0, SLAB_PANIC, NULL);
0, SLAB_PANIC | SLAB_ACCOUNT, NULL);
trie_leaf_kmem = kmem_cache_create("ip_fib_trie",
LEAF_SIZE,
0, SLAB_PANIC, NULL);
0, SLAB_PANIC | SLAB_ACCOUNT, NULL);
}
struct fib_table *fib_trie_table(u32 id, struct fib_table *alias)
......
......@@ -4512,7 +4512,9 @@ void __init tcp_init(void)
tcp_hashinfo.bind_bucket_cachep =
kmem_cache_create("tcp_bind_bucket",
sizeof(struct inet_bind_bucket), 0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
SLAB_HWCACHE_ALIGN | SLAB_PANIC |
SLAB_ACCOUNT,
NULL);
/* Size and allocate the main established and bind bucket
* hash tables.
......
......@@ -1080,7 +1080,7 @@ ipv6_add_addr(struct inet6_dev *idev, struct ifa6_config *cfg,
goto out;
}
ifa = kzalloc(sizeof(*ifa), gfp_flags);
ifa = kzalloc(sizeof(*ifa), gfp_flags | __GFP_ACCOUNT);
if (!ifa) {
err = -ENOBUFS;
goto out;
......
......@@ -2449,8 +2449,8 @@ int __init fib6_init(void)
int ret = -ENOMEM;
fib6_node_kmem = kmem_cache_create("fib6_nodes",
sizeof(struct fib6_node),
0, SLAB_HWCACHE_ALIGN,
sizeof(struct fib6_node), 0,
SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT,
NULL);
if (!fib6_node_kmem)
goto out;
......
......@@ -6638,7 +6638,7 @@ int __init ip6_route_init(void)
ret = -ENOMEM;
ip6_dst_ops_template.kmem_cachep =
kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
SLAB_HWCACHE_ALIGN, NULL);
SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL);
if (!ip6_dst_ops_template.kmem_cachep)
goto out;
......
......@@ -321,7 +321,7 @@ static int ipip6_tunnel_get_prl(struct net_device *dev, struct ifreq *ifr)
* we try harder to allocate.
*/
kp = (cmax <= 1 || capable(CAP_NET_ADMIN)) ?
kcalloc(cmax, sizeof(*kp), GFP_KERNEL | __GFP_NOWARN) :
kcalloc(cmax, sizeof(*kp), GFP_KERNEL_ACCOUNT | __GFP_NOWARN) :
NULL;
rcu_read_lock();
......@@ -334,7 +334,8 @@ static int ipip6_tunnel_get_prl(struct net_device *dev, struct ifreq *ifr)
* For root users, retry allocating enough memory for
* the answer.
*/
kp = kcalloc(ca, sizeof(*kp), GFP_ATOMIC);
kp = kcalloc(ca, sizeof(*kp), GFP_ATOMIC | __GFP_ACCOUNT |
__GFP_NOWARN);
if (!kp) {
ret = -ENOMEM;
goto out;
......
......@@ -13,7 +13,7 @@ readonly NS_DST=$BASE$DST
readonly BM_NET_V4=192.168.1.
readonly BM_NET_V6=2001:db8::
readonly NPROCS=`nproc`
readonly CPUS=`nproc`
ret=0
cleanup() {
......@@ -75,6 +75,29 @@ chk_tso_flag() {
__chk_flag "$1" $2 $3 tcp-segmentation-offload
}
chk_channels() {
local msg="$1"
local target=$2
local rx=$3
local tx=$4
local dev=veth$target
local cur_rx=`ip netns exec $BASE$target ethtool -l $dev |\
grep RX: | tail -n 1 | awk '{print $2}' `
local cur_tx=`ip netns exec $BASE$target ethtool -l $dev |\
grep TX: | tail -n 1 | awk '{print $2}'`
local cur_combined=`ip netns exec $BASE$target ethtool -l $dev |\
grep Combined: | tail -n 1 | awk '{print $2}'`
printf "%-60s" "$msg"
if [ "$cur_rx" = "$rx" -a "$cur_tx" = "$tx" -a "$cur_combined" = "n/a" ]; then
echo " ok "
else
echo " fail rx:$rx:$cur_rx tx:$tx:$cur_tx combined:n/a:$cur_combined"
fi
}
chk_gro() {
local msg="$1"
local expected=$2
......@@ -107,11 +130,100 @@ chk_gro() {
fi
}
__change_channels()
{
local cur_cpu
local end=$1
local cur
local i
while true; do
printf -v cur '%(%s)T'
[ $cur -le $end ] || break
for i in `seq 1 $CPUS`; do
ip netns exec $NS_SRC ethtool -L veth$SRC rx $i tx $i
ip netns exec $NS_DST ethtool -L veth$DST rx $i tx $i
done
for i in `seq 1 $((CPUS - 1))`; do
cur_cpu=$((CPUS - $i))
ip netns exec $NS_SRC ethtool -L veth$SRC rx $cur_cpu tx $cur_cpu
ip netns exec $NS_DST ethtool -L veth$DST rx $cur_cpu tx $cur_cpu
done
done
}
__send_data() {
local end=$1
while true; do
printf -v cur '%(%s)T'
[ $cur -le $end ] || break
ip netns exec $NS_SRC ./udpgso_bench_tx -4 -s 1000 -M 300 -D $BM_NET_V4$DST
done
}
do_stress() {
local end
printf -v end '%(%s)T'
end=$((end + $STRESS))
ip netns exec $NS_SRC ethtool -L veth$SRC rx 3 tx 3
ip netns exec $NS_DST ethtool -L veth$DST rx 3 tx 3
ip netns exec $NS_DST ./udpgso_bench_rx &
local rx_pid=$!
echo "Running stress test for $STRESS seconds..."
__change_channels $end &
local ch_pid=$!
__send_data $end &
local data_pid_1=$!
__send_data $end &
local data_pid_2=$!
__send_data $end &
local data_pid_3=$!
__send_data $end &
local data_pid_4=$!
wait $ch_pid $data_pid_1 $data_pid_2 $data_pid_3 $data_pid_4
kill -9 $rx_pid
echo "done"
# restore previous setting
ip netns exec $NS_SRC ethtool -L veth$SRC rx 2 tx 2
ip netns exec $NS_DST ethtool -L veth$DST rx 2 tx 1
}
usage() {
echo "Usage: $0 [-h] [-s <seconds>]"
echo -e "\t-h: show this help"
echo -e "\t-s: run optional stress tests for the given amount of seconds"
}
STRESS=0
while getopts "hs:" option; do
case "$option" in
"h")
usage $0
exit 0
;;
"s")
STRESS=$OPTARG
;;
esac
done
if [ ! -f ../bpf/xdp_dummy.o ]; then
echo "Missing xdp_dummy helper. Build bpf selftest first"
exit 1
fi
[ $CPUS -lt 2 ] && echo "Only one CPU available, some tests will be skipped"
[ $STRESS -gt 0 -a $CPUS -lt 3 ] && echo " stress test will be skipped, too"
create_ns
chk_gro_flag "default - gro flag" $SRC off
chk_gro_flag " - peer gro flag" $DST off
......@@ -134,6 +246,8 @@ chk_gro " - aggregation with TSO off" 1
cleanup
create_ns
chk_channels "default channels" $DST 1 1
ip -n $NS_DST link set dev veth$DST down
ip netns exec $NS_DST ethtool -K veth$DST gro on
chk_gro_flag "with gro enabled on link down - gro flag" $DST on
......@@ -147,6 +261,56 @@ chk_gro " - aggregation with TSO off" 1
cleanup
create_ns
CUR_TX=1
CUR_RX=1
if [ $CPUS -gt 1 ]; then
ip netns exec $NS_DST ethtool -L veth$DST tx 2
chk_channels "setting tx channels" $DST 1 2
CUR_TX=2
fi
if [ $CPUS -gt 2 ]; then
ip netns exec $NS_DST ethtool -L veth$DST rx 3 tx 3
chk_channels "setting both rx and tx channels" $DST 3 3
CUR_RX=3
CUR_TX=3
fi
ip netns exec $NS_DST ethtool -L veth$DST combined 2 2>/dev/null
chk_channels "bad setting: combined channels" $DST $CUR_RX $CUR_TX
ip netns exec $NS_DST ethtool -L veth$DST tx $((CPUS + 1)) 2>/dev/null
chk_channels "setting invalid channels nr" $DST $CUR_RX $CUR_TX
if [ $CPUS -gt 1 ]; then
# this also tests queues nr reduction
ip netns exec $NS_DST ethtool -L veth$DST rx 1 tx 2 2>/dev/null
ip netns exec $NS_SRC ethtool -L veth$SRC rx 1 tx 2 2>/dev/null
printf "%-60s" "bad setting: XDP with RX nr less than TX"
ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o \
section xdp_dummy 2>/dev/null &&\
echo "fail - set operation successful ?!?" || echo " ok "
# the following tests will run with multiple channels active
ip netns exec $NS_SRC ethtool -L veth$SRC rx 2
ip netns exec $NS_DST ethtool -L veth$DST rx 2
ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o \
section xdp_dummy 2>/dev/null
printf "%-60s" "bad setting: reducing RX nr below peer TX with XDP set"
ip netns exec $NS_DST ethtool -L veth$DST rx 1 2>/dev/null &&\
echo "fail - set operation successful ?!?" || echo " ok "
CUR_RX=2
CUR_TX=2
fi
if [ $CPUS -gt 2 ]; then
printf "%-60s" "bad setting: increasing peer TX nr above RX with XDP set"
ip netns exec $NS_SRC ethtool -L veth$SRC tx 3 2>/dev/null &&\
echo "fail - set operation successful ?!?" || echo " ok "
chk_channels "setting invalid channels nr" $DST 2 2
fi
ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o section xdp_dummy 2>/dev/null
chk_gro_flag "with xdp attached - gro flag" $DST on
chk_gro_flag " - peer gro flag" $SRC off
......@@ -167,10 +331,27 @@ chk_gro_flag " - after gro on xdp off, gro flag" $DST on
chk_gro_flag " - peer gro flag" $SRC off
chk_tso_flag " - tso flag" $SRC on
chk_tso_flag " - peer tso flag" $DST on
if [ $CPUS -gt 1 ]; then
ip netns exec $NS_DST ethtool -L veth$DST tx 1
chk_channels "decreasing tx channels with device down" $DST 2 1
fi
ip -n $NS_DST link set dev veth$DST up
ip -n $NS_SRC link set dev veth$SRC up
chk_gro " - aggregation" 1
if [ $CPUS -gt 1 ]; then
[ $STRESS -gt 0 -a $CPUS -gt 2 ] && do_stress
ip -n $NS_DST link set dev veth$DST down
ip -n $NS_SRC link set dev veth$SRC down
ip netns exec $NS_DST ethtool -L veth$DST tx 2
chk_channels "increasing tx channels with device down" $DST 2 2
ip -n $NS_DST link set dev veth$DST up
ip -n $NS_SRC link set dev veth$SRC up
fi
ip netns exec $NS_DST ethtool -K veth$DST gro off
ip netns exec $NS_SRC ethtool -K veth$SRC tx-udp-segmentation off
chk_gro "aggregation again with default and TSO off" 10
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment