Commit 2a526ac9 authored by Rusty Russell's avatar Rusty Russell Committed by David S. Miller

[NETFILTER]: Simplify expect handling

Now we've changed expect handling, we can simplify it significantly.

1) struct ip_conntrack_expect only exists until the connection
   matching it is created.  Now NAT is done directly at the time the
   expectation is matched, we don't need to keep this information
   around.

2) The term 'master' is used everywhere to mean the connection that
   expected this connection.  The 'master' field in the new connection
   points straight to the master connection, and holds a reference.

3) There is no direct link from the connection to the expectations it
   has created: we walk the global list to find them if we need to
   clean them up.  Each expectation holds a reference.

4) The ip_conntrack_expect_tuple_lock is now a proper subset of
   ip_conntrack_lock, so we can eliminate it.

5) Remove flags from helper: the policy of evicting the oldest
   expectation seems to be appropriate for everyone.

6) ip_conntrack_expect_find_get() and ip_conntrack_expect_put() are no
   longer required.

7) Remove reference count from expectations, and don't free when we
   fail ip_conntrack_expect_related(): have user call
   ip_conntrack_expect_free().
Signed-off-by: default avatarRusty Russell <rusty@rustcorp.com.au>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 55d349b2
......@@ -102,33 +102,19 @@ struct ip_conntrack_expect
/* Internal linked list (global expectation list) */
struct list_head list;
/* reference count */
atomic_t use;
/* expectation list for this master */
struct list_head expected_list;
/* We expect this tuple, with the following mask */
struct ip_conntrack_tuple tuple, mask;
/* Function to call after setup and insertion */
void (*expectfn)(struct ip_conntrack *new,
struct ip_conntrack_expect *this);
/* The conntrack of the master connection */
struct ip_conntrack *expectant;
/* The conntrack of the sibling connection, set after
* expectation arrived */
struct ip_conntrack *sibling;
/* Tuple saved for conntrack */
struct ip_conntrack_tuple ct_tuple;
struct ip_conntrack *master;
/* Timer function; deletes the expectation. */
struct timer_list timeout;
/* Data filled out by the conntrack helpers follow: */
/* We expect this tuple, with the following mask */
struct ip_conntrack_tuple tuple, mask;
/* Function to call after setup and insertion */
void (*expectfn)(struct ip_conntrack *new);
#ifdef CONFIG_IP_NF_NAT_NEEDED
/* This is the original per-proto part, used to map the
* expected connection the way the recipient expects. */
......@@ -136,8 +122,6 @@ struct ip_conntrack_expect
/* Direction relative to the master connection. */
enum ip_conntrack_dir dir;
#endif
union ip_conntrack_expect_proto proto;
};
struct ip_conntrack_counter
......@@ -164,17 +148,12 @@ struct ip_conntrack
/* Accounting Information (same cache line as other written members) */
struct ip_conntrack_counter counters[IP_CT_DIR_MAX];
#endif
/* If we were expected by an expectation, this will be it */
struct ip_conntrack *master;
/* If we're expecting another related connection, this will be
in expected linked list */
struct list_head sibling_list;
/* Current number of expected connections */
unsigned int expecting;
/* If we were expected by an expectation, this will be it */
struct ip_conntrack_expect *master;
/* Helper, if any. */
struct ip_conntrack_helper *helper;
......@@ -203,7 +182,7 @@ struct ip_conntrack
};
/* get master conntrack via master expectation */
#define master_ct(conntr) (conntr->master ? conntr->master->expectant : NULL)
#define master_ct(conntr) (conntr->master)
/* Alter reply tuple (maybe alter helper). */
extern void
......@@ -227,13 +206,6 @@ ip_conntrack_get(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo)
/* decrement reference count on a conntrack */
extern inline void ip_conntrack_put(struct ip_conntrack *ct);
/* find unconfirmed expectation based on tuple */
struct ip_conntrack_expect *
ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple);
/* decrement reference count on an expectation */
void ip_conntrack_expect_put(struct ip_conntrack_expect *exp);
/* call to create an explicit dependency on ip_conntrack. */
extern void need_ip_conntrack(void);
......
......@@ -4,7 +4,6 @@
struct ip_conntrack_expect;
extern unsigned int (*ip_nat_amanda_hook)(struct sk_buff **pskb,
struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo,
unsigned int matchoff,
unsigned int matchlen,
......
......@@ -48,6 +48,5 @@ static inline int ip_conntrack_confirm(struct sk_buff **pskb)
extern struct list_head *ip_conntrack_hash;
extern struct list_head ip_conntrack_expect_list;
DECLARE_RWLOCK_EXTERN(ip_conntrack_lock);
DECLARE_RWLOCK_EXTERN(ip_conntrack_expect_tuple_lock);
#endif /* _IP_CONNTRACK_CORE_H */
......@@ -34,7 +34,6 @@ struct ip_conntrack_expect;
/* For NAT to hook in when we find a packet which describes what other
* connection we should expect. */
extern unsigned int (*ip_nat_ftp_hook)(struct sk_buff **pskb,
struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo,
enum ip_ct_ftp_type type,
unsigned int matchoff,
......
......@@ -5,15 +5,11 @@
struct module;
/* Reuse expectation when max_expected reached */
#define IP_CT_HELPER_F_REUSE_EXPECT 0x01
struct ip_conntrack_helper
{
struct list_head list; /* Internal use. */
const char *name; /* name of the module */
unsigned char flags; /* Flags (see above) */
struct module *me; /* pointer to self */
unsigned int max_expected; /* Maximum number of concurrent
* expected connections */
......@@ -39,9 +35,10 @@ extern struct ip_conntrack_helper *ip_ct_find_helper(const struct ip_conntrack_t
/* Allocate space for an expectation: this is mandatory before calling
ip_conntrack_expect_related. */
extern struct ip_conntrack_expect *ip_conntrack_expect_alloc(void);
extern void ip_conntrack_expect_free(struct ip_conntrack_expect *exp);
/* Add an expected connection: can have more than one per connection */
extern int ip_conntrack_expect_related(struct ip_conntrack_expect *exp,
struct ip_conntrack *related_to);
extern int ip_conntrack_expect_related(struct ip_conntrack_expect *exp);
extern void ip_conntrack_unexpect_related(struct ip_conntrack_expect *exp);
#endif /*_IP_CONNTRACK_HELPER_H*/
......@@ -20,7 +20,6 @@ struct ip_ct_irc_master {
#ifdef __KERNEL__
extern unsigned int (*ip_nat_irc_hook)(struct sk_buff **pskb,
struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo,
unsigned int matchoff,
unsigned int matchlen,
......
......@@ -14,7 +14,6 @@ struct tftphdr {
#define TFTP_OPCODE_ERROR 5
unsigned int (*ip_nat_tftp_hook)(struct sk_buff **pskb,
struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo,
struct ip_conntrack_expect *exp);
......
......@@ -28,5 +28,6 @@ extern int ip_nat_seq_adjust(struct sk_buff **pskb,
/* Setup NAT on this expected conntrack so it follows master, but goes
* to port ct->master->saved_proto. */
extern void ip_nat_follow_master(struct ip_conntrack *ct);
extern void ip_nat_follow_master(struct ip_conntrack *ct,
struct ip_conntrack_expect *this);
#endif
......@@ -45,7 +45,6 @@ static char amanda_buffer[65536];
static DECLARE_LOCK(amanda_buffer_lock);
unsigned int (*ip_nat_amanda_hook)(struct sk_buff **pskb,
struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo,
unsigned int matchoff,
unsigned int matchlen,
......@@ -110,6 +109,7 @@ static int help(struct sk_buff **pskb,
}
exp->expectfn = NULL;
exp->master = ct;
exp->tuple.src.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip;
exp->tuple.src.u.tcp.port = 0;
......@@ -124,11 +124,13 @@ static int help(struct sk_buff **pskb,
exp->mask.dst.u.tcp.port = 0xFFFF;
if (ip_nat_amanda_hook)
ret = ip_nat_amanda_hook(pskb, ct, ctinfo,
ret = ip_nat_amanda_hook(pskb, ctinfo,
tmp - amanda_buffer,
len, exp);
else if (ip_conntrack_expect_related(exp, ct) != 0)
else if (ip_conntrack_expect_related(exp) != 0) {
ip_conntrack_expect_free(exp);
ret = NF_DROP;
}
}
out:
......
This diff is collapsed.
......@@ -40,7 +40,6 @@ static int loose;
module_param(loose, int, 0600);
unsigned int (*ip_nat_ftp_hook)(struct sk_buff **pskb,
struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo,
enum ip_ct_ftp_type type,
unsigned int matchoff,
......@@ -407,7 +406,7 @@ static int help(struct sk_buff **pskb,
networks, or the packet filter itself). */
if (!loose) {
ret = NF_ACCEPT;
ip_conntrack_expect_put(exp);
ip_conntrack_expect_free(exp);
goto out_update_nl;
}
exp->tuple.dst.ip = htonl((array[0] << 24) | (array[1] << 16)
......@@ -423,17 +422,19 @@ static int help(struct sk_buff **pskb,
{ 0xFFFFFFFF, { .tcp = { 0xFFFF } }, 0xFFFF }});
exp->expectfn = NULL;
exp->master = ct;
/* Now, NAT might want to mangle the packet, and register the
* (possibly changed) expectation itself. */
if (ip_nat_ftp_hook)
ret = ip_nat_ftp_hook(pskb, ct, ctinfo, search[i].ftptype,
ret = ip_nat_ftp_hook(pskb, ctinfo, search[i].ftptype,
matchoff, matchlen, exp, &seq);
else {
/* Can't expect this? Best to drop packet now. */
if (ip_conntrack_expect_related(exp, ct) != 0)
if (ip_conntrack_expect_related(exp) != 0) {
ip_conntrack_expect_free(exp);
ret = NF_DROP;
else
} else
ret = NF_ACCEPT;
}
......@@ -476,7 +477,6 @@ static int __init init(void)
ftp[i].mask.dst.protonum = 0xFFFF;
ftp[i].max_expected = 1;
ftp[i].timeout = 0;
ftp[i].flags = IP_CT_HELPER_F_REUSE_EXPECT;
ftp[i].me = ip_conntrack_ftp;
ftp[i].help = help;
......
......@@ -44,7 +44,6 @@ static char irc_buffer[65536];
static DECLARE_LOCK(irc_buffer_lock);
unsigned int (*ip_nat_irc_hook)(struct sk_buff **pskb,
struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo,
unsigned int matchoff,
unsigned int matchlen,
......@@ -220,13 +219,16 @@ static int help(struct sk_buff **pskb,
{ { 0, { 0 } },
{ 0xFFFFFFFF, { .tcp = { 0xFFFF } }, 0xFFFF }});
exp->expectfn = NULL;
exp->master = ct;
if (ip_nat_irc_hook)
ret = ip_nat_irc_hook(pskb, ct, ctinfo,
ret = ip_nat_irc_hook(pskb, ctinfo,
addr_beg_p - ib_ptr,
addr_end_p - addr_beg_p,
exp);
else if (ip_conntrack_expect_related(exp, ct) != 0)
else if (ip_conntrack_expect_related(exp) != 0) {
ip_conntrack_expect_free(exp);
ret = NF_DROP;
}
goto out;
} /* for .. NUM_DCCPROTO */
} /* while data < ... */
......
......@@ -200,7 +200,6 @@ static void *exp_seq_start(struct seq_file *s, loff_t *pos)
/* strange seq_file api calls stop even if we fail,
* thus we need to grab lock since stop unlocks */
READ_LOCK(&ip_conntrack_lock);
READ_LOCK(&ip_conntrack_expect_tuple_lock);
if (list_empty(e))
return NULL;
......@@ -227,7 +226,6 @@ static void *exp_seq_next(struct seq_file *s, void *v, loff_t *pos)
static void exp_seq_stop(struct seq_file *s, void *v)
{
READ_UNLOCK(&ip_conntrack_expect_tuple_lock);
READ_UNLOCK(&ip_conntrack_lock);
}
......@@ -235,14 +233,13 @@ static int exp_seq_show(struct seq_file *s, void *v)
{
struct ip_conntrack_expect *expect = v;
if (expect->expectant->helper->timeout)
if (expect->timeout.function)
seq_printf(s, "%lu ", timer_pending(&expect->timeout)
? (expect->timeout.expires - jiffies)/HZ : 0);
else
seq_printf(s, "- ");
seq_printf(s, "use=%u proto=%u ", atomic_read(&expect->use),
expect->tuple.dst.protonum);
seq_printf(s, "proto=%u ", expect->tuple.dst.protonum);
print_tuple(s, &expect->tuple,
ip_ct_find_proto(expect->tuple.dst.protonum));
......@@ -910,14 +907,12 @@ EXPORT_SYMBOL(ip_ct_protos);
EXPORT_SYMBOL(ip_ct_find_proto);
EXPORT_SYMBOL(ip_ct_find_helper);
EXPORT_SYMBOL(ip_conntrack_expect_alloc);
EXPORT_SYMBOL(ip_conntrack_expect_free);
EXPORT_SYMBOL(ip_conntrack_expect_related);
EXPORT_SYMBOL(ip_conntrack_unexpect_related);
EXPORT_SYMBOL_GPL(ip_conntrack_expect_find_get);
EXPORT_SYMBOL_GPL(ip_conntrack_expect_put);
EXPORT_SYMBOL(ip_conntrack_tuple_taken);
EXPORT_SYMBOL(ip_ct_gather_frags);
EXPORT_SYMBOL(ip_conntrack_htable_size);
EXPORT_SYMBOL(ip_conntrack_expect_list);
EXPORT_SYMBOL(ip_conntrack_lock);
EXPORT_SYMBOL(ip_conntrack_hash);
EXPORT_SYMBOL(ip_conntrack_untracked);
......
......@@ -39,7 +39,6 @@ MODULE_PARM_DESC(ports, "port numbers of tftp servers");
#endif
unsigned int (*ip_nat_tftp_hook)(struct sk_buff **pskb,
struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo,
struct ip_conntrack_expect *exp);
EXPORT_SYMBOL_GPL(ip_nat_tftp_hook);
......@@ -76,14 +75,17 @@ static int tftp_help(struct sk_buff **pskb,
exp->mask.dst.u.udp.port = 0xffff;
exp->mask.dst.protonum = 0xffff;
exp->expectfn = NULL;
exp->master = ct;
DEBUGP("expect: ");
DUMP_TUPLE(&exp->tuple);
DUMP_TUPLE(&exp->mask);
if (ip_nat_tftp_hook)
ret = ip_nat_tftp_hook(pskb, ct, ctinfo, exp);
else if (ip_conntrack_expect_related(exp, ct) != 0)
ret = ip_nat_tftp_hook(pskb, ctinfo, exp);
else if (ip_conntrack_expect_related(exp) != 0) {
ip_conntrack_expect_free(exp);
ret = NF_DROP;
}
break;
case TFTP_OPCODE_DATA:
case TFTP_OPCODE_ACK:
......
......@@ -32,7 +32,6 @@ MODULE_DESCRIPTION("Amanda NAT helper");
MODULE_LICENSE("GPL");
static unsigned int help(struct sk_buff **pskb,
struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo,
unsigned int matchoff,
unsigned int matchlen,
......@@ -52,22 +51,18 @@ static unsigned int help(struct sk_buff **pskb,
/* Try to get same port: if not, try to change it. */
for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
int err;
exp->tuple.dst.u.tcp.port = htons(port);
atomic_inc(&exp->use);
err = ip_conntrack_expect_related(exp, ct);
/* Success, or retransmit. */
if (!err || err == -EEXIST)
if (ip_conntrack_expect_related(exp) == 0)
break;
}
if (port == 0) {
ip_conntrack_expect_put(exp);
ip_conntrack_expect_free(exp);
return NF_DROP;
}
sprintf(buffer, "%u", port);
ret = ip_nat_mangle_udp_packet(pskb, ct, ctinfo,
ret = ip_nat_mangle_udp_packet(pskb, exp->master, ctinfo,
matchoff, matchlen,
buffer, strlen(buffer));
if (ret != NF_ACCEPT)
......
......@@ -113,7 +113,6 @@ static int (*mangle[])(struct sk_buff **, u_int32_t, u_int16_t,
/* So, this packet has hit the connection tracking matching code.
Mangle it, and change the expectation to match the new version. */
static unsigned int ip_nat_ftp(struct sk_buff **pskb,
struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo,
enum ip_ct_ftp_type type,
unsigned int matchoff,
......@@ -124,6 +123,7 @@ static unsigned int ip_nat_ftp(struct sk_buff **pskb,
u_int32_t newip;
u_int16_t port;
int dir = CTINFO2DIR(ctinfo);
struct ip_conntrack *ct = exp->master;
DEBUGP("FTP_NAT: type %i, off %u len %u\n", type, matchoff, matchlen);
......@@ -138,17 +138,13 @@ static unsigned int ip_nat_ftp(struct sk_buff **pskb,
/* Try to get same port: if not, try to change it. */
for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
int err;
exp->tuple.dst.u.tcp.port = htons(port);
atomic_inc(&exp->use);
err = ip_conntrack_expect_related(exp, ct);
/* Success, or retransmit. */
if (!err || err == -EEXIST)
if (ip_conntrack_expect_related(exp) == 0)
break;
}
if (port == 0) {
ip_conntrack_expect_put(exp);
ip_conntrack_expect_free(exp);
return NF_DROP;
}
......
......@@ -438,12 +438,13 @@ static void ip_nat_copy_manip(struct ip_nat_info *master,
/* Setup NAT on this expected conntrack so it follows master. */
/* If we fail to get a free NAT slot, we'll get dropped on confirm */
void ip_nat_follow_master(struct ip_conntrack *ct)
void ip_nat_follow_master(struct ip_conntrack *ct,
struct ip_conntrack_expect *this)
{
struct ip_nat_info *master = &ct->master->expectant->nat.info;
struct ip_nat_info *master = &ct->master->nat.info;
/* This must be a fresh one. */
BUG_ON(ct->nat.info.initialized);
ip_nat_copy_manip(master, ct->master, ct);
ip_nat_copy_manip(master, this, ct);
}
......@@ -37,7 +37,6 @@ MODULE_LICENSE("GPL");
/* FIXME: Time out? --RR */
static unsigned int help(struct sk_buff **pskb,
struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo,
unsigned int matchoff,
unsigned int matchlen,
......@@ -63,18 +62,13 @@ static unsigned int help(struct sk_buff **pskb,
/* Try to get same port: if not, try to change it. */
for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
int err;
exp->tuple.dst.u.tcp.port = htons(port);
atomic_inc(&exp->use);
err = ip_conntrack_expect_related(exp, ct);
/* Success, or retransmit. */
if (!err || err == -EEXIST)
if (ip_conntrack_expect_related(exp) == 0)
break;
}
if (port == 0) {
ip_conntrack_expect_put(exp);
ip_conntrack_expect_free(exp);
return NF_DROP;
}
......@@ -95,7 +89,7 @@ static unsigned int help(struct sk_buff **pskb,
DEBUGP("ip_nat_irc: Inserting '%s' == %u.%u.%u.%u, port %u\n",
buffer, NIPQUAD(exp->tuple.src.ip), port);
ret = ip_nat_mangle_tcp_packet(pskb, ct, ctinfo,
ret = ip_nat_mangle_tcp_packet(pskb, exp->master, ctinfo,
matchoff, matchlen, buffer,
strlen(buffer));
if (ret != NF_ACCEPT)
......
......@@ -39,15 +39,14 @@ MODULE_DESCRIPTION("tftp NAT helper");
MODULE_LICENSE("GPL");
static unsigned int help(struct sk_buff **pskb,
struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo,
struct ip_conntrack_expect *exp)
{
exp->saved_proto.udp.port = exp->tuple.dst.u.tcp.port;
exp->dir = IP_CT_DIR_REPLY;
exp->expectfn = ip_nat_follow_master;
if (ip_conntrack_expect_related(exp, ct) != 0) {
ip_conntrack_expect_put(exp);
if (ip_conntrack_expect_related(exp) != 0) {
ip_conntrack_expect_free(exp);
return NF_DROP;
}
return NF_ACCEPT;
......
......@@ -38,7 +38,6 @@ match(const struct sk_buff *skb,
int *hotdrop)
{
const struct ipt_helper_info *info = matchinfo;
struct ip_conntrack_expect *exp;
struct ip_conntrack *ct;
enum ip_conntrack_info ctinfo;
int ret = info->invert;
......@@ -54,28 +53,21 @@ match(const struct sk_buff *skb,
return ret;
}
exp = ct->master;
READ_LOCK(&ip_conntrack_lock);
if (!exp->expectant) {
DEBUGP("ipt_helper: expectation %p without expectant !?!\n",
exp);
goto out_unlock;
}
if (!exp->expectant->helper) {
if (!ct->master->helper) {
DEBUGP("ipt_helper: master ct %p has no helper\n",
exp->expectant);
goto out_unlock;
}
DEBUGP("master's name = %s , info->name = %s\n",
exp->expectant->helper->name, info->name);
ct->master->helper->name, info->name);
if (info->name[0] == '\0')
ret ^= 1;
else
ret ^= !strncmp(exp->expectant->helper->name, info->name,
strlen(exp->expectant->helper->name));
ret ^= !strncmp(ct->master->helper->name, info->name,
strlen(ct->master->helper->name));
out_unlock:
READ_UNLOCK(&ip_conntrack_lock);
return ret;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment