Commit 83723d60 authored by Eric Dumazet's avatar Eric Dumazet Committed by Pablo Neira Ayuso

netfilter: x_tables: dont block BH while reading counters

Using "iptables -L" with a lot of rules have a too big BH latency.
Jesper mentioned ~6 ms and worried of frame drops.

Switch to a per_cpu seqlock scheme, so that taking a snapshot of
counters doesnt need to block BH (for this cpu, but also other cpus).

This adds two increments on seqlock sequence per ipt_do_table() call,
its a reasonable cost for allowing "iptables -L" not block BH
processing.
Reported-by: default avatarJesper Dangaard Brouer <hawk@comx.dk>
Signed-off-by: default avatarEric Dumazet <eric.dumazet@gmail.com>
CC: Patrick McHardy <kaber@trash.net>
Acked-by: default avatarStephen Hemminger <shemminger@vyatta.com>
Acked-by: default avatarJesper Dangaard Brouer <hawk@comx.dk>
Signed-off-by: default avatarPablo Neira Ayuso <pablo@netfilter.org>
parent 45b9f509
...@@ -472,7 +472,7 @@ extern void xt_free_table_info(struct xt_table_info *info); ...@@ -472,7 +472,7 @@ extern void xt_free_table_info(struct xt_table_info *info);
* necessary for reading the counters. * necessary for reading the counters.
*/ */
struct xt_info_lock { struct xt_info_lock {
spinlock_t lock; seqlock_t lock;
unsigned char readers; unsigned char readers;
}; };
DECLARE_PER_CPU(struct xt_info_lock, xt_info_locks); DECLARE_PER_CPU(struct xt_info_lock, xt_info_locks);
...@@ -497,7 +497,7 @@ static inline void xt_info_rdlock_bh(void) ...@@ -497,7 +497,7 @@ static inline void xt_info_rdlock_bh(void)
local_bh_disable(); local_bh_disable();
lock = &__get_cpu_var(xt_info_locks); lock = &__get_cpu_var(xt_info_locks);
if (likely(!lock->readers++)) if (likely(!lock->readers++))
spin_lock(&lock->lock); write_seqlock(&lock->lock);
} }
static inline void xt_info_rdunlock_bh(void) static inline void xt_info_rdunlock_bh(void)
...@@ -505,7 +505,7 @@ static inline void xt_info_rdunlock_bh(void) ...@@ -505,7 +505,7 @@ static inline void xt_info_rdunlock_bh(void)
struct xt_info_lock *lock = &__get_cpu_var(xt_info_locks); struct xt_info_lock *lock = &__get_cpu_var(xt_info_locks);
if (likely(!--lock->readers)) if (likely(!--lock->readers))
spin_unlock(&lock->lock); write_sequnlock(&lock->lock);
local_bh_enable(); local_bh_enable();
} }
...@@ -516,12 +516,12 @@ static inline void xt_info_rdunlock_bh(void) ...@@ -516,12 +516,12 @@ static inline void xt_info_rdunlock_bh(void)
*/ */
static inline void xt_info_wrlock(unsigned int cpu) static inline void xt_info_wrlock(unsigned int cpu)
{ {
spin_lock(&per_cpu(xt_info_locks, cpu).lock); write_seqlock(&per_cpu(xt_info_locks, cpu).lock);
} }
static inline void xt_info_wrunlock(unsigned int cpu) static inline void xt_info_wrunlock(unsigned int cpu)
{ {
spin_unlock(&per_cpu(xt_info_locks, cpu).lock); write_sequnlock(&per_cpu(xt_info_locks, cpu).lock);
} }
/* /*
......
...@@ -710,42 +710,25 @@ static void get_counters(const struct xt_table_info *t, ...@@ -710,42 +710,25 @@ static void get_counters(const struct xt_table_info *t,
struct arpt_entry *iter; struct arpt_entry *iter;
unsigned int cpu; unsigned int cpu;
unsigned int i; unsigned int i;
unsigned int curcpu = get_cpu();
/* Instead of clearing (by a previous call to memset())
* the counters and using adds, we set the counters
* with data used by 'current' CPU
*
* Bottom half has to be disabled to prevent deadlock
* if new softirq were to run and call ipt_do_table
*/
local_bh_disable();
i = 0;
xt_entry_foreach(iter, t->entries[curcpu], t->size) {
SET_COUNTER(counters[i], iter->counters.bcnt,
iter->counters.pcnt);
++i;
}
local_bh_enable();
/* Processing counters from other cpus, we can let bottom half enabled,
* (preemption is disabled)
*/
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
if (cpu == curcpu) seqlock_t *lock = &per_cpu(xt_info_locks, cpu).lock;
continue;
i = 0; i = 0;
local_bh_disable();
xt_info_wrlock(cpu);
xt_entry_foreach(iter, t->entries[cpu], t->size) { xt_entry_foreach(iter, t->entries[cpu], t->size) {
ADD_COUNTER(counters[i], iter->counters.bcnt, u64 bcnt, pcnt;
iter->counters.pcnt); unsigned int start;
do {
start = read_seqbegin(lock);
bcnt = iter->counters.bcnt;
pcnt = iter->counters.pcnt;
} while (read_seqretry(lock, start));
ADD_COUNTER(counters[i], bcnt, pcnt);
++i; ++i;
} }
xt_info_wrunlock(cpu);
local_bh_enable();
} }
put_cpu();
} }
static struct xt_counters *alloc_counters(const struct xt_table *table) static struct xt_counters *alloc_counters(const struct xt_table *table)
...@@ -759,7 +742,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table) ...@@ -759,7 +742,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table)
* about). * about).
*/ */
countersize = sizeof(struct xt_counters) * private->number; countersize = sizeof(struct xt_counters) * private->number;
counters = vmalloc(countersize); counters = vzalloc(countersize);
if (counters == NULL) if (counters == NULL)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
...@@ -1007,7 +990,7 @@ static int __do_replace(struct net *net, const char *name, ...@@ -1007,7 +990,7 @@ static int __do_replace(struct net *net, const char *name,
struct arpt_entry *iter; struct arpt_entry *iter;
ret = 0; ret = 0;
counters = vmalloc(num_counters * sizeof(struct xt_counters)); counters = vzalloc(num_counters * sizeof(struct xt_counters));
if (!counters) { if (!counters) {
ret = -ENOMEM; ret = -ENOMEM;
goto out; goto out;
......
...@@ -884,42 +884,25 @@ get_counters(const struct xt_table_info *t, ...@@ -884,42 +884,25 @@ get_counters(const struct xt_table_info *t,
struct ipt_entry *iter; struct ipt_entry *iter;
unsigned int cpu; unsigned int cpu;
unsigned int i; unsigned int i;
unsigned int curcpu = get_cpu();
/* Instead of clearing (by a previous call to memset())
* the counters and using adds, we set the counters
* with data used by 'current' CPU.
*
* Bottom half has to be disabled to prevent deadlock
* if new softirq were to run and call ipt_do_table
*/
local_bh_disable();
i = 0;
xt_entry_foreach(iter, t->entries[curcpu], t->size) {
SET_COUNTER(counters[i], iter->counters.bcnt,
iter->counters.pcnt);
++i;
}
local_bh_enable();
/* Processing counters from other cpus, we can let bottom half enabled,
* (preemption is disabled)
*/
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
if (cpu == curcpu) seqlock_t *lock = &per_cpu(xt_info_locks, cpu).lock;
continue;
i = 0; i = 0;
local_bh_disable();
xt_info_wrlock(cpu);
xt_entry_foreach(iter, t->entries[cpu], t->size) { xt_entry_foreach(iter, t->entries[cpu], t->size) {
ADD_COUNTER(counters[i], iter->counters.bcnt, u64 bcnt, pcnt;
iter->counters.pcnt); unsigned int start;
do {
start = read_seqbegin(lock);
bcnt = iter->counters.bcnt;
pcnt = iter->counters.pcnt;
} while (read_seqretry(lock, start));
ADD_COUNTER(counters[i], bcnt, pcnt);
++i; /* macro does multi eval of i */ ++i; /* macro does multi eval of i */
} }
xt_info_wrunlock(cpu);
local_bh_enable();
} }
put_cpu();
} }
static struct xt_counters *alloc_counters(const struct xt_table *table) static struct xt_counters *alloc_counters(const struct xt_table *table)
...@@ -932,7 +915,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table) ...@@ -932,7 +915,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table)
(other than comefrom, which userspace doesn't care (other than comefrom, which userspace doesn't care
about). */ about). */
countersize = sizeof(struct xt_counters) * private->number; countersize = sizeof(struct xt_counters) * private->number;
counters = vmalloc(countersize); counters = vzalloc(countersize);
if (counters == NULL) if (counters == NULL)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
...@@ -1203,7 +1186,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, ...@@ -1203,7 +1186,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
struct ipt_entry *iter; struct ipt_entry *iter;
ret = 0; ret = 0;
counters = vmalloc(num_counters * sizeof(struct xt_counters)); counters = vzalloc(num_counters * sizeof(struct xt_counters));
if (!counters) { if (!counters) {
ret = -ENOMEM; ret = -ENOMEM;
goto out; goto out;
......
...@@ -897,42 +897,25 @@ get_counters(const struct xt_table_info *t, ...@@ -897,42 +897,25 @@ get_counters(const struct xt_table_info *t,
struct ip6t_entry *iter; struct ip6t_entry *iter;
unsigned int cpu; unsigned int cpu;
unsigned int i; unsigned int i;
unsigned int curcpu = get_cpu();
/* Instead of clearing (by a previous call to memset())
* the counters and using adds, we set the counters
* with data used by 'current' CPU
*
* Bottom half has to be disabled to prevent deadlock
* if new softirq were to run and call ipt_do_table
*/
local_bh_disable();
i = 0;
xt_entry_foreach(iter, t->entries[curcpu], t->size) {
SET_COUNTER(counters[i], iter->counters.bcnt,
iter->counters.pcnt);
++i;
}
local_bh_enable();
/* Processing counters from other cpus, we can let bottom half enabled,
* (preemption is disabled)
*/
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
if (cpu == curcpu) seqlock_t *lock = &per_cpu(xt_info_locks, cpu).lock;
continue;
i = 0; i = 0;
local_bh_disable();
xt_info_wrlock(cpu);
xt_entry_foreach(iter, t->entries[cpu], t->size) { xt_entry_foreach(iter, t->entries[cpu], t->size) {
ADD_COUNTER(counters[i], iter->counters.bcnt, u64 bcnt, pcnt;
iter->counters.pcnt); unsigned int start;
do {
start = read_seqbegin(lock);
bcnt = iter->counters.bcnt;
pcnt = iter->counters.pcnt;
} while (read_seqretry(lock, start));
ADD_COUNTER(counters[i], bcnt, pcnt);
++i; ++i;
} }
xt_info_wrunlock(cpu);
local_bh_enable();
} }
put_cpu();
} }
static struct xt_counters *alloc_counters(const struct xt_table *table) static struct xt_counters *alloc_counters(const struct xt_table *table)
...@@ -945,7 +928,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table) ...@@ -945,7 +928,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table)
(other than comefrom, which userspace doesn't care (other than comefrom, which userspace doesn't care
about). */ about). */
countersize = sizeof(struct xt_counters) * private->number; countersize = sizeof(struct xt_counters) * private->number;
counters = vmalloc(countersize); counters = vzalloc(countersize);
if (counters == NULL) if (counters == NULL)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
...@@ -1216,7 +1199,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, ...@@ -1216,7 +1199,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
struct ip6t_entry *iter; struct ip6t_entry *iter;
ret = 0; ret = 0;
counters = vmalloc(num_counters * sizeof(struct xt_counters)); counters = vzalloc(num_counters * sizeof(struct xt_counters));
if (!counters) { if (!counters) {
ret = -ENOMEM; ret = -ENOMEM;
goto out; goto out;
......
...@@ -1325,7 +1325,8 @@ static int __init xt_init(void) ...@@ -1325,7 +1325,8 @@ static int __init xt_init(void)
for_each_possible_cpu(i) { for_each_possible_cpu(i) {
struct xt_info_lock *lock = &per_cpu(xt_info_locks, i); struct xt_info_lock *lock = &per_cpu(xt_info_locks, i);
spin_lock_init(&lock->lock);
seqlock_init(&lock->lock);
lock->readers = 0; lock->readers = 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment