Commit f3c5c1bf authored by Jan Engelhardt's avatar Jan Engelhardt Committed by Patrick McHardy

netfilter: xtables: make ip_tables reentrant

Currently, the table traverser stores return addresses in the ruleset
itself (struct ip6t_entry->comefrom). This has a well-known drawback:
the jumpstack is overwritten on reentry, making it necessary for
targets to return absolute verdicts. Also, the ruleset (which might
be heavy memory-wise) needs to be replicated for each CPU that can
possibly invoke ip6t_do_table.

This patch decouples the jumpstack from struct ip6t_entry and instead
puts it into xt_table_info. Not being restricted by 'comefrom'
anymore, we can set up a stack as needed. By default, there is room
allocated for two entries into the traverser.

arp_tables is not touched though, because there is just one/two
modules and further patches seek to collapse the table traverser
anyhow.
Signed-off-by: default avatarJan Engelhardt <jengelh@medozas.de>
Signed-off-by: default avatarPatrick McHardy <kaber@trash.net>
parent e281b198
...@@ -401,6 +401,13 @@ struct xt_table_info { ...@@ -401,6 +401,13 @@ struct xt_table_info {
unsigned int hook_entry[NF_INET_NUMHOOKS]; unsigned int hook_entry[NF_INET_NUMHOOKS];
unsigned int underflow[NF_INET_NUMHOOKS]; unsigned int underflow[NF_INET_NUMHOOKS];
/*
* Number of user chains. Since tables cannot have loops, at most
* @stacksize jumps (number of user chains) can possibly be made.
*/
unsigned int stacksize;
unsigned int *stackptr;
void ***jumpstack;
/* ipt_entry tables: one per CPU */ /* ipt_entry tables: one per CPU */
/* Note : this field MUST be the last one, see XT_TABLE_INFO_SZ */ /* Note : this field MUST be the last one, see XT_TABLE_INFO_SZ */
void *entries[1]; void *entries[1];
......
...@@ -649,6 +649,9 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0, ...@@ -649,6 +649,9 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
if (ret != 0) if (ret != 0)
break; break;
++i; ++i;
if (strcmp(arpt_get_target(iter)->u.user.name,
XT_ERROR_TARGET) == 0)
++newinfo->stacksize;
} }
duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret); duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret);
if (ret != 0) if (ret != 0)
...@@ -1774,8 +1777,7 @@ struct xt_table *arpt_register_table(struct net *net, ...@@ -1774,8 +1777,7 @@ struct xt_table *arpt_register_table(struct net *net,
{ {
int ret; int ret;
struct xt_table_info *newinfo; struct xt_table_info *newinfo;
struct xt_table_info bootstrap struct xt_table_info bootstrap = {0};
= { 0, 0, 0, { 0 }, { 0 }, { } };
void *loc_cpu_entry; void *loc_cpu_entry;
struct xt_table *new_table; struct xt_table *new_table;
......
...@@ -321,8 +321,6 @@ ipt_do_table(struct sk_buff *skb, ...@@ -321,8 +321,6 @@ ipt_do_table(struct sk_buff *skb,
const struct net_device *out, const struct net_device *out,
struct xt_table *table) struct xt_table *table)
{ {
#define tb_comefrom ((struct ipt_entry *)table_base)->comefrom
static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
const struct iphdr *ip; const struct iphdr *ip;
bool hotdrop = false; bool hotdrop = false;
...@@ -330,7 +328,8 @@ ipt_do_table(struct sk_buff *skb, ...@@ -330,7 +328,8 @@ ipt_do_table(struct sk_buff *skb,
unsigned int verdict = NF_DROP; unsigned int verdict = NF_DROP;
const char *indev, *outdev; const char *indev, *outdev;
const void *table_base; const void *table_base;
struct ipt_entry *e, *back; struct ipt_entry *e, **jumpstack;
unsigned int *stackptr, origptr, cpu;
const struct xt_table_info *private; const struct xt_table_info *private;
struct xt_match_param mtpar; struct xt_match_param mtpar;
struct xt_target_param tgpar; struct xt_target_param tgpar;
...@@ -356,19 +355,23 @@ ipt_do_table(struct sk_buff *skb, ...@@ -356,19 +355,23 @@ ipt_do_table(struct sk_buff *skb,
IP_NF_ASSERT(table->valid_hooks & (1 << hook)); IP_NF_ASSERT(table->valid_hooks & (1 << hook));
xt_info_rdlock_bh(); xt_info_rdlock_bh();
private = table->private; private = table->private;
table_base = private->entries[smp_processor_id()]; cpu = smp_processor_id();
table_base = private->entries[cpu];
jumpstack = (struct ipt_entry **)private->jumpstack[cpu];
stackptr = &private->stackptr[cpu];
origptr = *stackptr;
e = get_entry(table_base, private->hook_entry[hook]); e = get_entry(table_base, private->hook_entry[hook]);
/* For return from builtin chain */ pr_devel("Entering %s(hook %u); sp at %u (UF %p)\n",
back = get_entry(table_base, private->underflow[hook]); table->name, hook, origptr,
get_entry(table_base, private->underflow[hook]));
do { do {
const struct ipt_entry_target *t; const struct ipt_entry_target *t;
const struct xt_entry_match *ematch; const struct xt_entry_match *ematch;
IP_NF_ASSERT(e); IP_NF_ASSERT(e);
IP_NF_ASSERT(back);
if (!ip_packet_match(ip, indev, outdev, if (!ip_packet_match(ip, indev, outdev,
&e->ip, mtpar.fragoff)) { &e->ip, mtpar.fragoff)) {
no_match: no_match:
...@@ -403,17 +406,28 @@ ipt_do_table(struct sk_buff *skb, ...@@ -403,17 +406,28 @@ ipt_do_table(struct sk_buff *skb,
verdict = (unsigned)(-v) - 1; verdict = (unsigned)(-v) - 1;
break; break;
} }
e = back; if (*stackptr == 0) {
back = get_entry(table_base, back->comefrom); e = get_entry(table_base,
private->underflow[hook]);
pr_devel("Underflow (this is normal) "
"to %p\n", e);
} else {
e = jumpstack[--*stackptr];
pr_devel("Pulled %p out from pos %u\n",
e, *stackptr);
e = ipt_next_entry(e);
}
continue; continue;
} }
if (table_base + v != ipt_next_entry(e) && if (table_base + v != ipt_next_entry(e) &&
!(e->ip.flags & IPT_F_GOTO)) { !(e->ip.flags & IPT_F_GOTO)) {
/* Save old back ptr in next entry */ if (*stackptr >= private->stacksize) {
struct ipt_entry *next = ipt_next_entry(e); verdict = NF_DROP;
next->comefrom = (void *)back - table_base; break;
/* set back pointer to next entry */ }
back = next; jumpstack[(*stackptr)++] = e;
pr_devel("Pushed %p into pos %u\n",
e, *stackptr - 1);
} }
e = get_entry(table_base, v); e = get_entry(table_base, v);
...@@ -426,18 +440,7 @@ ipt_do_table(struct sk_buff *skb, ...@@ -426,18 +440,7 @@ ipt_do_table(struct sk_buff *skb,
tgpar.targinfo = t->data; tgpar.targinfo = t->data;
#ifdef CONFIG_NETFILTER_DEBUG
tb_comefrom = 0xeeeeeeec;
#endif
verdict = t->u.kernel.target->target(skb, &tgpar); verdict = t->u.kernel.target->target(skb, &tgpar);
#ifdef CONFIG_NETFILTER_DEBUG
if (tb_comefrom != 0xeeeeeeec && verdict == IPT_CONTINUE) {
printk("Target %s reentered!\n",
t->u.kernel.target->name);
verdict = NF_DROP;
}
tb_comefrom = 0x57acc001;
#endif
/* Target might have changed stuff. */ /* Target might have changed stuff. */
ip = ip_hdr(skb); ip = ip_hdr(skb);
if (verdict == IPT_CONTINUE) if (verdict == IPT_CONTINUE)
...@@ -447,7 +450,9 @@ ipt_do_table(struct sk_buff *skb, ...@@ -447,7 +450,9 @@ ipt_do_table(struct sk_buff *skb,
break; break;
} while (!hotdrop); } while (!hotdrop);
xt_info_rdunlock_bh(); xt_info_rdunlock_bh();
pr_devel("Exiting %s; resetting sp from %u to %u\n",
__func__, *stackptr, origptr);
*stackptr = origptr;
#ifdef DEBUG_ALLOW_ALL #ifdef DEBUG_ALLOW_ALL
return NF_ACCEPT; return NF_ACCEPT;
#else #else
...@@ -455,8 +460,6 @@ ipt_do_table(struct sk_buff *skb, ...@@ -455,8 +460,6 @@ ipt_do_table(struct sk_buff *skb,
return NF_DROP; return NF_DROP;
else return verdict; else return verdict;
#endif #endif
#undef tb_comefrom
} }
/* Figures out from what hook each rule can be called: returns 0 if /* Figures out from what hook each rule can be called: returns 0 if
...@@ -838,6 +841,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, ...@@ -838,6 +841,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
if (ret != 0) if (ret != 0)
return ret; return ret;
++i; ++i;
if (strcmp(ipt_get_target(iter)->u.user.name,
XT_ERROR_TARGET) == 0)
++newinfo->stacksize;
} }
if (i != repl->num_entries) { if (i != repl->num_entries) {
...@@ -2086,8 +2092,7 @@ struct xt_table *ipt_register_table(struct net *net, ...@@ -2086,8 +2092,7 @@ struct xt_table *ipt_register_table(struct net *net,
{ {
int ret; int ret;
struct xt_table_info *newinfo; struct xt_table_info *newinfo;
struct xt_table_info bootstrap struct xt_table_info bootstrap = {0};
= { 0, 0, 0, { 0 }, { 0 }, { } };
void *loc_cpu_entry; void *loc_cpu_entry;
struct xt_table *new_table; struct xt_table *new_table;
......
...@@ -351,15 +351,14 @@ ip6t_do_table(struct sk_buff *skb, ...@@ -351,15 +351,14 @@ ip6t_do_table(struct sk_buff *skb,
const struct net_device *out, const struct net_device *out,
struct xt_table *table) struct xt_table *table)
{ {
#define tb_comefrom ((struct ip6t_entry *)table_base)->comefrom
static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
bool hotdrop = false; bool hotdrop = false;
/* Initializing verdict to NF_DROP keeps gcc happy. */ /* Initializing verdict to NF_DROP keeps gcc happy. */
unsigned int verdict = NF_DROP; unsigned int verdict = NF_DROP;
const char *indev, *outdev; const char *indev, *outdev;
const void *table_base; const void *table_base;
struct ip6t_entry *e, *back; struct ip6t_entry *e, **jumpstack;
unsigned int *stackptr, origptr, cpu;
const struct xt_table_info *private; const struct xt_table_info *private;
struct xt_match_param mtpar; struct xt_match_param mtpar;
struct xt_target_param tgpar; struct xt_target_param tgpar;
...@@ -383,19 +382,19 @@ ip6t_do_table(struct sk_buff *skb, ...@@ -383,19 +382,19 @@ ip6t_do_table(struct sk_buff *skb,
xt_info_rdlock_bh(); xt_info_rdlock_bh();
private = table->private; private = table->private;
table_base = private->entries[smp_processor_id()]; cpu = smp_processor_id();
table_base = private->entries[cpu];
jumpstack = (struct ip6t_entry **)private->jumpstack[cpu];
stackptr = &private->stackptr[cpu];
origptr = *stackptr;
e = get_entry(table_base, private->hook_entry[hook]); e = get_entry(table_base, private->hook_entry[hook]);
/* For return from builtin chain */
back = get_entry(table_base, private->underflow[hook]);
do { do {
const struct ip6t_entry_target *t; const struct ip6t_entry_target *t;
const struct xt_entry_match *ematch; const struct xt_entry_match *ematch;
IP_NF_ASSERT(e); IP_NF_ASSERT(e);
IP_NF_ASSERT(back);
if (!ip6_packet_match(skb, indev, outdev, &e->ipv6, if (!ip6_packet_match(skb, indev, outdev, &e->ipv6,
&mtpar.thoff, &mtpar.fragoff, &hotdrop)) { &mtpar.thoff, &mtpar.fragoff, &hotdrop)) {
no_match: no_match:
...@@ -432,17 +431,20 @@ ip6t_do_table(struct sk_buff *skb, ...@@ -432,17 +431,20 @@ ip6t_do_table(struct sk_buff *skb,
verdict = (unsigned)(-v) - 1; verdict = (unsigned)(-v) - 1;
break; break;
} }
e = back; if (*stackptr == 0)
back = get_entry(table_base, back->comefrom); e = get_entry(table_base,
private->underflow[hook]);
else
e = ip6t_next_entry(jumpstack[--*stackptr]);
continue; continue;
} }
if (table_base + v != ip6t_next_entry(e) && if (table_base + v != ip6t_next_entry(e) &&
!(e->ipv6.flags & IP6T_F_GOTO)) { !(e->ipv6.flags & IP6T_F_GOTO)) {
/* Save old back ptr in next entry */ if (*stackptr >= private->stacksize) {
struct ip6t_entry *next = ip6t_next_entry(e); verdict = NF_DROP;
next->comefrom = (void *)back - table_base; break;
/* set back pointer to next entry */ }
back = next; jumpstack[(*stackptr)++] = e;
} }
e = get_entry(table_base, v); e = get_entry(table_base, v);
...@@ -454,19 +456,7 @@ ip6t_do_table(struct sk_buff *skb, ...@@ -454,19 +456,7 @@ ip6t_do_table(struct sk_buff *skb,
tgpar.target = t->u.kernel.target; tgpar.target = t->u.kernel.target;
tgpar.targinfo = t->data; tgpar.targinfo = t->data;
#ifdef CONFIG_NETFILTER_DEBUG
tb_comefrom = 0xeeeeeeec;
#endif
verdict = t->u.kernel.target->target(skb, &tgpar); verdict = t->u.kernel.target->target(skb, &tgpar);
#ifdef CONFIG_NETFILTER_DEBUG
if (tb_comefrom != 0xeeeeeeec && verdict == IP6T_CONTINUE) {
printk("Target %s reentered!\n",
t->u.kernel.target->name);
verdict = NF_DROP;
}
tb_comefrom = 0x57acc001;
#endif
if (verdict == IP6T_CONTINUE) if (verdict == IP6T_CONTINUE)
e = ip6t_next_entry(e); e = ip6t_next_entry(e);
else else
...@@ -474,10 +464,8 @@ ip6t_do_table(struct sk_buff *skb, ...@@ -474,10 +464,8 @@ ip6t_do_table(struct sk_buff *skb,
break; break;
} while (!hotdrop); } while (!hotdrop);
#ifdef CONFIG_NETFILTER_DEBUG
tb_comefrom = NETFILTER_LINK_POISON;
#endif
xt_info_rdunlock_bh(); xt_info_rdunlock_bh();
*stackptr = origptr;
#ifdef DEBUG_ALLOW_ALL #ifdef DEBUG_ALLOW_ALL
return NF_ACCEPT; return NF_ACCEPT;
...@@ -486,8 +474,6 @@ ip6t_do_table(struct sk_buff *skb, ...@@ -486,8 +474,6 @@ ip6t_do_table(struct sk_buff *skb,
return NF_DROP; return NF_DROP;
else return verdict; else return verdict;
#endif #endif
#undef tb_comefrom
} }
/* Figures out from what hook each rule can be called: returns 0 if /* Figures out from what hook each rule can be called: returns 0 if
...@@ -869,6 +855,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, ...@@ -869,6 +855,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
if (ret != 0) if (ret != 0)
return ret; return ret;
++i; ++i;
if (strcmp(ip6t_get_target(iter)->u.user.name,
XT_ERROR_TARGET) == 0)
++newinfo->stacksize;
} }
if (i != repl->num_entries) { if (i != repl->num_entries) {
...@@ -2120,8 +2109,7 @@ struct xt_table *ip6t_register_table(struct net *net, ...@@ -2120,8 +2109,7 @@ struct xt_table *ip6t_register_table(struct net *net,
{ {
int ret; int ret;
struct xt_table_info *newinfo; struct xt_table_info *newinfo;
struct xt_table_info bootstrap struct xt_table_info bootstrap = {0};
= { 0, 0, 0, { 0 }, { 0 }, { } };
void *loc_cpu_entry; void *loc_cpu_entry;
struct xt_table *new_table; struct xt_table *new_table;
......
...@@ -62,6 +62,9 @@ static const char *const xt_prefix[NFPROTO_NUMPROTO] = { ...@@ -62,6 +62,9 @@ static const char *const xt_prefix[NFPROTO_NUMPROTO] = {
[NFPROTO_IPV6] = "ip6", [NFPROTO_IPV6] = "ip6",
}; };
/* Allow this many total (re)entries. */
static const unsigned int xt_jumpstack_multiplier = 2;
/* Registration hooks for targets. */ /* Registration hooks for targets. */
int int
xt_register_target(struct xt_target *target) xt_register_target(struct xt_target *target)
...@@ -680,6 +683,26 @@ void xt_free_table_info(struct xt_table_info *info) ...@@ -680,6 +683,26 @@ void xt_free_table_info(struct xt_table_info *info)
else else
vfree(info->entries[cpu]); vfree(info->entries[cpu]);
} }
if (info->jumpstack != NULL) {
if (sizeof(void *) * info->stacksize > PAGE_SIZE) {
for_each_possible_cpu(cpu)
vfree(info->jumpstack[cpu]);
} else {
for_each_possible_cpu(cpu)
kfree(info->jumpstack[cpu]);
}
}
if (sizeof(void **) * nr_cpu_ids > PAGE_SIZE)
vfree(info->jumpstack);
else
kfree(info->jumpstack);
if (sizeof(unsigned int) * nr_cpu_ids > PAGE_SIZE)
vfree(info->stackptr);
else
kfree(info->stackptr);
kfree(info); kfree(info);
} }
EXPORT_SYMBOL(xt_free_table_info); EXPORT_SYMBOL(xt_free_table_info);
...@@ -724,6 +747,49 @@ EXPORT_SYMBOL_GPL(xt_compat_unlock); ...@@ -724,6 +747,49 @@ EXPORT_SYMBOL_GPL(xt_compat_unlock);
DEFINE_PER_CPU(struct xt_info_lock, xt_info_locks); DEFINE_PER_CPU(struct xt_info_lock, xt_info_locks);
EXPORT_PER_CPU_SYMBOL_GPL(xt_info_locks); EXPORT_PER_CPU_SYMBOL_GPL(xt_info_locks);
static int xt_jumpstack_alloc(struct xt_table_info *i)
{
unsigned int size;
int cpu;
size = sizeof(unsigned int) * nr_cpu_ids;
if (size > PAGE_SIZE)
i->stackptr = vmalloc(size);
else
i->stackptr = kmalloc(size, GFP_KERNEL);
if (i->stackptr == NULL)
return -ENOMEM;
memset(i->stackptr, 0, size);
size = sizeof(void **) * nr_cpu_ids;
if (size > PAGE_SIZE)
i->jumpstack = vmalloc(size);
else
i->jumpstack = kmalloc(size, GFP_KERNEL);
if (i->jumpstack == NULL)
return -ENOMEM;
memset(i->jumpstack, 0, size);
i->stacksize *= xt_jumpstack_multiplier;
size = sizeof(void *) * i->stacksize;
for_each_possible_cpu(cpu) {
if (size > PAGE_SIZE)
i->jumpstack[cpu] = vmalloc_node(size,
cpu_to_node(cpu));
else
i->jumpstack[cpu] = kmalloc_node(size,
GFP_KERNEL, cpu_to_node(cpu));
if (i->jumpstack[cpu] == NULL)
/*
* Freeing will be done later on by the callers. The
* chain is: xt_replace_table -> __do_replace ->
* do_replace -> xt_free_table_info.
*/
return -ENOMEM;
}
return 0;
}
struct xt_table_info * struct xt_table_info *
xt_replace_table(struct xt_table *table, xt_replace_table(struct xt_table *table,
...@@ -732,6 +798,7 @@ xt_replace_table(struct xt_table *table, ...@@ -732,6 +798,7 @@ xt_replace_table(struct xt_table *table,
int *error) int *error)
{ {
struct xt_table_info *private; struct xt_table_info *private;
int ret;
/* Do the substitution. */ /* Do the substitution. */
local_bh_disable(); local_bh_disable();
...@@ -746,6 +813,12 @@ xt_replace_table(struct xt_table *table, ...@@ -746,6 +813,12 @@ xt_replace_table(struct xt_table *table,
return NULL; return NULL;
} }
ret = xt_jumpstack_alloc(newinfo);
if (ret < 0) {
*error = ret;
return NULL;
}
table->private = newinfo; table->private = newinfo;
newinfo->initial_entries = private->initial_entries; newinfo->initial_entries = private->initial_entries;
...@@ -770,6 +843,10 @@ struct xt_table *xt_register_table(struct net *net, ...@@ -770,6 +843,10 @@ struct xt_table *xt_register_table(struct net *net,
struct xt_table_info *private; struct xt_table_info *private;
struct xt_table *t, *table; struct xt_table *t, *table;
ret = xt_jumpstack_alloc(newinfo);
if (ret < 0)
return ERR_PTR(ret);
/* Don't add one object to multiple lists. */ /* Don't add one object to multiple lists. */
table = kmemdup(input_table, sizeof(struct xt_table), GFP_KERNEL); table = kmemdup(input_table, sizeof(struct xt_table), GFP_KERNEL);
if (!table) { if (!table) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment