Commit edd79ca8 authored by David S. Miller's avatar David S. Miller

Merge branch 'pktgen'

Jesper Dangaard Brouer says:

====================
Optimizing pktgen for single CPU performance

This series focus on optimizing "pktgen" for single CPU performance.

V2-series:
 - Removed some patches
 - Doc real reason for TX ring buffer filling up

NIC tuning for pktgen:
 http://netoptimizer.blogspot.dk/2014/06/pktgen-for-network-overload-testing.html

General overload setup according to:
 http://netoptimizer.blogspot.dk/2014/04/basic-tuning-for-network-overload.html

Hardware:
 System: CPU E5-2630
 NIC: Intel ixgbe/82599 chip

Testing done with net-next git tree on top of
 commit 6623b419 ("Merge branch 'master' of...jkirsher/net-next")

Pktgen script exercising race condition:
 https://github.com/netoptimizer/network-testing/blob/master/pktgen/unit_test01_race_add_rem_device_loop.sh

Tool for measuring LOCK overhead:
 https://github.com/netoptimizer/network-testing/blob/master/src/overhead_cmpxchg.c
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 5b9e7e16 8788370a
......@@ -24,6 +24,34 @@ For monitoring and control pktgen creates:
/proc/net/pktgen/ethX
Tuning NIC for max performance
==============================
The default NIC setting are (likely) not tuned for pktgen's artificial
overload type of benchmarking, as this could hurt the normal use-case.
Specifically increasing the TX ring buffer in the NIC:
# ethtool -G ethX tx 1024
A larger TX ring can improve pktgen's performance, while it can hurt
in the general case, 1) because the TX ring buffer might get larger
than the CPUs L1/L2 cache, 2) because it allow more queueing in the
NIC HW layer (which is bad for bufferbloat).
One should be careful to conclude, that packets/descriptors in the HW
TX ring cause delay. Drivers usually delay cleaning up the
ring-buffers (for various performance reasons), thus packets stalling
the TX ring, might just be waiting for cleanup.
This cleanup issues is specifically the case, for the driver ixgbe
(Intel 82599 chip). This driver (ixgbe) combine TX+RX ring cleanups,
and the cleanup interval is affected by the ethtool --coalesce setting
of parameter "rx-usecs".
For ixgbe use e.g "30" resulting in approx 33K interrupts/sec (1/30*10^6):
# ethtool -C ethX rx-usecs 30
Viewing threads
===============
/proc/net/pktgen/kpktgend_0
......
......@@ -69,8 +69,9 @@
* for running devices in the if_list and sends packets until count is 0 it
* also the thread checks the thread->control which is used for inter-process
* communication. controlling process "posts" operations to the threads this
* way. The if_lock should be possible to remove when add/rem_device is merged
* into this too.
* way.
* The if_list is RCU protected, and the if_lock remains to protect updating
* of if_list, from "add_device" as it invoked from userspace (via proc write).
*
* By design there should only be *one* "controlling" process. In practice
* multiple write accesses gives unpredictable result. Understood by "write"
......@@ -208,7 +209,7 @@
#define T_REMDEVALL (1<<2) /* Remove all devs */
#define T_REMDEV (1<<3) /* Remove one dev */
/* If lock -- can be removed after some work */
/* If lock -- protects updating of if_list */
#define if_lock(t) spin_lock(&(t->if_lock));
#define if_unlock(t) spin_unlock(&(t->if_lock));
......@@ -241,6 +242,7 @@ struct pktgen_dev {
struct proc_dir_entry *entry; /* proc file */
struct pktgen_thread *pg_thread;/* the owner */
struct list_head list; /* chaining in the thread's run-queue */
struct rcu_head rcu; /* freed by RCU */
int running; /* if false, the test will stop */
......@@ -1737,14 +1739,14 @@ static int pktgen_thread_show(struct seq_file *seq, void *v)
seq_puts(seq, "Running: ");
if_lock(t);
list_for_each_entry(pkt_dev, &t->if_list, list)
rcu_read_lock();
list_for_each_entry_rcu(pkt_dev, &t->if_list, list)
if (pkt_dev->running)
seq_printf(seq, "%s ", pkt_dev->odevname);
seq_puts(seq, "\nStopped: ");
list_for_each_entry(pkt_dev, &t->if_list, list)
list_for_each_entry_rcu(pkt_dev, &t->if_list, list)
if (!pkt_dev->running)
seq_printf(seq, "%s ", pkt_dev->odevname);
......@@ -1753,7 +1755,7 @@ static int pktgen_thread_show(struct seq_file *seq, void *v)
else
seq_puts(seq, "\nResult: NA\n");
if_unlock(t);
rcu_read_unlock();
return 0;
}
......@@ -1878,10 +1880,8 @@ static struct pktgen_dev *__pktgen_NN_threads(const struct pktgen_net *pn,
pkt_dev = pktgen_find_dev(t, ifname, exact);
if (pkt_dev) {
if (remove) {
if_lock(t);
pkt_dev->removal_mark = 1;
t->control |= T_REMDEV;
if_unlock(t);
}
break;
}
......@@ -1931,7 +1931,8 @@ static void pktgen_change_name(const struct pktgen_net *pn, struct net_device *d
list_for_each_entry(t, &pn->pktgen_threads, th_list) {
struct pktgen_dev *pkt_dev;
list_for_each_entry(pkt_dev, &t->if_list, list) {
rcu_read_lock();
list_for_each_entry_rcu(pkt_dev, &t->if_list, list) {
if (pkt_dev->odev != dev)
continue;
......@@ -1946,6 +1947,7 @@ static void pktgen_change_name(const struct pktgen_net *pn, struct net_device *d
dev->name);
break;
}
rcu_read_unlock();
}
}
......@@ -2997,8 +2999,8 @@ static void pktgen_run(struct pktgen_thread *t)
func_enter();
if_lock(t);
list_for_each_entry(pkt_dev, &t->if_list, list) {
rcu_read_lock();
list_for_each_entry_rcu(pkt_dev, &t->if_list, list) {
/*
* setup odev and create initial packet.
......@@ -3007,18 +3009,18 @@ static void pktgen_run(struct pktgen_thread *t)
if (pkt_dev->odev) {
pktgen_clear_counters(pkt_dev);
pkt_dev->running = 1; /* Cranke yeself! */
pkt_dev->skb = NULL;
pkt_dev->started_at = pkt_dev->next_tx = ktime_get();
set_pkt_overhead(pkt_dev);
strcpy(pkt_dev->result, "Starting");
pkt_dev->running = 1; /* Cranke yeself! */
started++;
} else
strcpy(pkt_dev->result, "Error starting");
}
if_unlock(t);
rcu_read_unlock();
if (started)
t->control &= ~(T_STOP);
}
......@@ -3041,27 +3043,25 @@ static int thread_is_running(const struct pktgen_thread *t)
{
const struct pktgen_dev *pkt_dev;
list_for_each_entry(pkt_dev, &t->if_list, list)
if (pkt_dev->running)
rcu_read_lock();
list_for_each_entry_rcu(pkt_dev, &t->if_list, list)
if (pkt_dev->running) {
rcu_read_unlock();
return 1;
}
rcu_read_unlock();
return 0;
}
static int pktgen_wait_thread_run(struct pktgen_thread *t)
{
if_lock(t);
while (thread_is_running(t)) {
if_unlock(t);
msleep_interruptible(100);
if (signal_pending(current))
goto signal;
if_lock(t);
}
if_unlock(t);
return 1;
signal:
return 0;
......@@ -3166,10 +3166,10 @@ static int pktgen_stop_device(struct pktgen_dev *pkt_dev)
return -EINVAL;
}
pkt_dev->running = 0;
kfree_skb(pkt_dev->skb);
pkt_dev->skb = NULL;
pkt_dev->stopped_at = ktime_get();
pkt_dev->running = 0;
show_results(pkt_dev, nr_frags);
......@@ -3180,9 +3180,8 @@ static struct pktgen_dev *next_to_run(struct pktgen_thread *t)
{
struct pktgen_dev *pkt_dev, *best = NULL;
if_lock(t);
list_for_each_entry(pkt_dev, &t->if_list, list) {
rcu_read_lock();
list_for_each_entry_rcu(pkt_dev, &t->if_list, list) {
if (!pkt_dev->running)
continue;
if (best == NULL)
......@@ -3190,7 +3189,8 @@ static struct pktgen_dev *next_to_run(struct pktgen_thread *t)
else if (ktime_compare(pkt_dev->next_tx, best->next_tx) < 0)
best = pkt_dev;
}
if_unlock(t);
rcu_read_unlock();
return best;
}
......@@ -3200,13 +3200,13 @@ static void pktgen_stop(struct pktgen_thread *t)
func_enter();
if_lock(t);
rcu_read_lock();
list_for_each_entry(pkt_dev, &t->if_list, list) {
list_for_each_entry_rcu(pkt_dev, &t->if_list, list) {
pktgen_stop_device(pkt_dev);
}
if_unlock(t);
rcu_read_unlock();
}
/*
......@@ -3220,8 +3220,6 @@ static void pktgen_rem_one_if(struct pktgen_thread *t)
func_enter();
if_lock(t);
list_for_each_safe(q, n, &t->if_list) {
cur = list_entry(q, struct pktgen_dev, list);
......@@ -3235,8 +3233,6 @@ static void pktgen_rem_one_if(struct pktgen_thread *t)
break;
}
if_unlock(t);
}
static void pktgen_rem_all_ifs(struct pktgen_thread *t)
......@@ -3248,8 +3244,6 @@ static void pktgen_rem_all_ifs(struct pktgen_thread *t)
/* Remove all devices, free mem */
if_lock(t);
list_for_each_safe(q, n, &t->if_list) {
cur = list_entry(q, struct pktgen_dev, list);
......@@ -3258,8 +3252,6 @@ static void pktgen_rem_all_ifs(struct pktgen_thread *t)
pktgen_remove_device(t, cur);
}
if_unlock(t);
}
static void pktgen_rem_thread(struct pktgen_thread *t)
......@@ -3407,10 +3399,10 @@ static int pktgen_thread_worker(void *arg)
pr_debug("starting pktgen/%d: pid=%d\n", cpu, task_pid_nr(current));
set_current_state(TASK_INTERRUPTIBLE);
set_freezable();
__set_current_state(TASK_RUNNING);
while (!kthread_should_stop()) {
pkt_dev = next_to_run(t);
......@@ -3424,8 +3416,6 @@ static int pktgen_thread_worker(void *arg)
continue;
}
__set_current_state(TASK_RUNNING);
if (likely(pkt_dev)) {
pktgen_xmit(pkt_dev);
......@@ -3456,9 +3446,8 @@ static int pktgen_thread_worker(void *arg)
}
try_to_freeze();
set_current_state(TASK_INTERRUPTIBLE);
}
set_current_state(TASK_INTERRUPTIBLE);
pr_debug("%s stopping all device\n", t->tsk->comm);
pktgen_stop(t);
......@@ -3485,8 +3474,8 @@ static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t,
struct pktgen_dev *p, *pkt_dev = NULL;
size_t len = strlen(ifname);
if_lock(t);
list_for_each_entry(p, &t->if_list, list)
rcu_read_lock();
list_for_each_entry_rcu(p, &t->if_list, list)
if (strncmp(p->odevname, ifname, len) == 0) {
if (p->odevname[len]) {
if (exact || p->odevname[len] != '@')
......@@ -3496,7 +3485,7 @@ static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t,
break;
}
if_unlock(t);
rcu_read_unlock();
pr_debug("find_dev(%s) returning %p\n", ifname, pkt_dev);
return pkt_dev;
}
......@@ -3510,6 +3499,12 @@ static int add_dev_to_thread(struct pktgen_thread *t,
{
int rv = 0;
/* This function cannot be called concurrently, as its called
* under pktgen_thread_lock mutex, but it can run from
* userspace on another CPU than the kthread. The if_lock()
* is used here to sync with concurrent instances of
* _rem_dev_from_if_list() invoked via kthread, which is also
* updating the if_list */
if_lock(t);
if (pkt_dev->pg_thread) {
......@@ -3518,9 +3513,9 @@ static int add_dev_to_thread(struct pktgen_thread *t,
goto out;
}
list_add(&pkt_dev->list, &t->if_list);
pkt_dev->pg_thread = t;
pkt_dev->running = 0;
pkt_dev->pg_thread = t;
list_add_rcu(&pkt_dev->list, &t->if_list);
out:
if_unlock(t);
......@@ -3675,11 +3670,13 @@ static void _rem_dev_from_if_list(struct pktgen_thread *t,
struct list_head *q, *n;
struct pktgen_dev *p;
if_lock(t);
list_for_each_safe(q, n, &t->if_list) {
p = list_entry(q, struct pktgen_dev, list);
if (p == pkt_dev)
list_del(&p->list);
list_del_rcu(&p->list);
}
if_unlock(t);
}
static int pktgen_remove_device(struct pktgen_thread *t,
......@@ -3699,20 +3696,22 @@ static int pktgen_remove_device(struct pktgen_thread *t,
pkt_dev->odev = NULL;
}
/* And update the thread if_list */
_rem_dev_from_if_list(t, pkt_dev);
/* Remove proc before if_list entry, because add_device uses
* list to determine if interface already exist, avoid race
* with proc_create_data() */
if (pkt_dev->entry)
proc_remove(pkt_dev->entry);
/* And update the thread if_list */
_rem_dev_from_if_list(t, pkt_dev);
#ifdef CONFIG_XFRM
free_SAs(pkt_dev);
#endif
vfree(pkt_dev->flows);
if (pkt_dev->page)
put_page(pkt_dev->page);
kfree(pkt_dev);
kfree_rcu(pkt_dev, rcu);
return 0;
}
......@@ -3812,6 +3811,7 @@ static void __exit pg_cleanup(void)
{
unregister_netdevice_notifier(&pktgen_notifier_block);
unregister_pernet_subsys(&pg_net_ops);
/* Don't need rcu_barrier() due to use of kfree_rcu() */
}
module_init(pg_init);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment