Commit f1227c5c authored by David S. Miller's avatar David S. Miller

Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf

Pablo Neira Ayuso says:

====================
Netfilter/IPVS fixes for net

The following patchset contains Netfilter updates for your net tree,
they are:

1) Fix missing initialization of the range structure (allocated in the
   stack) in nft_masq_{ipv4, ipv6}_eval, from Daniel Borkmann.

2) Make sure the data we receive from userspace contains the req_version
   structure, otherwise return an error incomplete on truncated input.
   From Dan Carpenter.

3) Fix handling og skb->sk which may cause incorrect handling
   of connections from a local process. Via Simon Horman, patch from
   Calvin Owens.

4) Fix wrong netns in nft_compat when setting target and match params
   structure.

5) Relax chain type validation in nft_compat that was recently included,
   this broke the matches that need to be run from the route chain type.
   Now iptables-test.py automated regression tests report success again
   and we avoid the only possible problematic case, which is the use of
   nat targets out of nat chain type.

6) Use match->table to validate the tablename, instead of the match->name.
   Again patch for nft_compat.

7) Restore the synchronous release of objects from the commit and abort
   path in nf_tables. This is causing two major problems: splats when using
   nft_compat, given that matches and targets may sleep and call_rcu is
   invoked from softirq context. Moreover Patrick reported possible event
   notification reordering when rules refer to anonymous sets.

8) Fix race condition in between packets that are being confirmed by
   conntrack and the ctnetlink flush operation. This happens since the
   removal of the central spinlock. Thanks to Jesper D. Brouer to looking
   into this.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 35717d8d 5195c14c
...@@ -396,14 +396,12 @@ struct nft_rule { ...@@ -396,14 +396,12 @@ struct nft_rule {
/** /**
* struct nft_trans - nf_tables object update in transaction * struct nft_trans - nf_tables object update in transaction
* *
* @rcu_head: rcu head to defer release of transaction data
* @list: used internally * @list: used internally
* @msg_type: message type * @msg_type: message type
* @ctx: transaction context * @ctx: transaction context
* @data: internal information related to the transaction * @data: internal information related to the transaction
*/ */
struct nft_trans { struct nft_trans {
struct rcu_head rcu_head;
struct list_head list; struct list_head list;
int msg_type; int msg_type;
struct nft_ctx ctx; struct nft_ctx ctx;
......
...@@ -24,6 +24,7 @@ static void nft_masq_ipv4_eval(const struct nft_expr *expr, ...@@ -24,6 +24,7 @@ static void nft_masq_ipv4_eval(const struct nft_expr *expr,
struct nf_nat_range range; struct nf_nat_range range;
unsigned int verdict; unsigned int verdict;
memset(&range, 0, sizeof(range));
range.flags = priv->flags; range.flags = priv->flags;
verdict = nf_nat_masquerade_ipv4(pkt->skb, pkt->ops->hooknum, verdict = nf_nat_masquerade_ipv4(pkt->skb, pkt->ops->hooknum,
......
...@@ -25,6 +25,7 @@ static void nft_masq_ipv6_eval(const struct nft_expr *expr, ...@@ -25,6 +25,7 @@ static void nft_masq_ipv6_eval(const struct nft_expr *expr,
struct nf_nat_range range; struct nf_nat_range range;
unsigned int verdict; unsigned int verdict;
memset(&range, 0, sizeof(range));
range.flags = priv->flags; range.flags = priv->flags;
verdict = nf_nat_masquerade_ipv6(pkt->skb, &range, pkt->out); verdict = nf_nat_masquerade_ipv6(pkt->skb, &range, pkt->out);
......
...@@ -1863,6 +1863,12 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) ...@@ -1863,6 +1863,12 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
if (*op < IP_SET_OP_VERSION) { if (*op < IP_SET_OP_VERSION) {
/* Check the version at the beginning of operations */ /* Check the version at the beginning of operations */
struct ip_set_req_version *req_version = data; struct ip_set_req_version *req_version = data;
if (*len < sizeof(struct ip_set_req_version)) {
ret = -EINVAL;
goto done;
}
if (req_version->version != IPSET_PROTOCOL) { if (req_version->version != IPSET_PROTOCOL) {
ret = -EPROTO; ret = -EPROTO;
goto done; goto done;
......
...@@ -846,6 +846,8 @@ ip_vs_prepare_tunneled_skb(struct sk_buff *skb, int skb_af, ...@@ -846,6 +846,8 @@ ip_vs_prepare_tunneled_skb(struct sk_buff *skb, int skb_af,
new_skb = skb_realloc_headroom(skb, max_headroom); new_skb = skb_realloc_headroom(skb, max_headroom);
if (!new_skb) if (!new_skb)
goto error; goto error;
if (skb->sk)
skb_set_owner_w(new_skb, skb->sk);
consume_skb(skb); consume_skb(skb);
skb = new_skb; skb = new_skb;
} }
......
...@@ -611,12 +611,16 @@ __nf_conntrack_confirm(struct sk_buff *skb) ...@@ -611,12 +611,16 @@ __nf_conntrack_confirm(struct sk_buff *skb)
*/ */
NF_CT_ASSERT(!nf_ct_is_confirmed(ct)); NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
pr_debug("Confirming conntrack %p\n", ct); pr_debug("Confirming conntrack %p\n", ct);
/* We have to check the DYING flag inside the lock to prevent
a race against nf_ct_get_next_corpse() possibly called from /* We have to check the DYING flag after unlink to prevent
user context, else we insert an already 'dead' hash, blocking * a race against nf_ct_get_next_corpse() possibly called from
further use of that particular connection -JM */ * user context, else we insert an already 'dead' hash, blocking
* further use of that particular connection -JM.
*/
nf_ct_del_from_dying_or_unconfirmed_list(ct);
if (unlikely(nf_ct_is_dying(ct))) { if (unlikely(nf_ct_is_dying(ct))) {
nf_ct_add_to_dying_list(ct);
nf_conntrack_double_unlock(hash, reply_hash); nf_conntrack_double_unlock(hash, reply_hash);
local_bh_enable(); local_bh_enable();
return NF_ACCEPT; return NF_ACCEPT;
...@@ -636,8 +640,6 @@ __nf_conntrack_confirm(struct sk_buff *skb) ...@@ -636,8 +640,6 @@ __nf_conntrack_confirm(struct sk_buff *skb)
zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h))) zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)))
goto out; goto out;
nf_ct_del_from_dying_or_unconfirmed_list(ct);
/* Timer relative to confirmation time, not original /* Timer relative to confirmation time, not original
setting time, otherwise we'd get timer wrap in setting time, otherwise we'd get timer wrap in
weird delay cases. */ weird delay cases. */
......
...@@ -3484,13 +3484,8 @@ static void nft_chain_commit_update(struct nft_trans *trans) ...@@ -3484,13 +3484,8 @@ static void nft_chain_commit_update(struct nft_trans *trans)
} }
} }
/* Schedule objects for release via rcu to make sure no packets are accesing static void nf_tables_commit_release(struct nft_trans *trans)
* removed rules.
*/
static void nf_tables_commit_release_rcu(struct rcu_head *rt)
{ {
struct nft_trans *trans = container_of(rt, struct nft_trans, rcu_head);
switch (trans->msg_type) { switch (trans->msg_type) {
case NFT_MSG_DELTABLE: case NFT_MSG_DELTABLE:
nf_tables_table_destroy(&trans->ctx); nf_tables_table_destroy(&trans->ctx);
...@@ -3612,10 +3607,11 @@ static int nf_tables_commit(struct sk_buff *skb) ...@@ -3612,10 +3607,11 @@ static int nf_tables_commit(struct sk_buff *skb)
} }
} }
synchronize_rcu();
list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
list_del(&trans->list); list_del(&trans->list);
trans->ctx.nla = NULL; nf_tables_commit_release(trans);
call_rcu(&trans->rcu_head, nf_tables_commit_release_rcu);
} }
nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN); nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN);
...@@ -3623,13 +3619,8 @@ static int nf_tables_commit(struct sk_buff *skb) ...@@ -3623,13 +3619,8 @@ static int nf_tables_commit(struct sk_buff *skb)
return 0; return 0;
} }
/* Schedule objects for release via rcu to make sure no packets are accesing static void nf_tables_abort_release(struct nft_trans *trans)
* aborted rules.
*/
static void nf_tables_abort_release_rcu(struct rcu_head *rt)
{ {
struct nft_trans *trans = container_of(rt, struct nft_trans, rcu_head);
switch (trans->msg_type) { switch (trans->msg_type) {
case NFT_MSG_NEWTABLE: case NFT_MSG_NEWTABLE:
nf_tables_table_destroy(&trans->ctx); nf_tables_table_destroy(&trans->ctx);
...@@ -3725,11 +3716,12 @@ static int nf_tables_abort(struct sk_buff *skb) ...@@ -3725,11 +3716,12 @@ static int nf_tables_abort(struct sk_buff *skb)
} }
} }
synchronize_rcu();
list_for_each_entry_safe_reverse(trans, next, list_for_each_entry_safe_reverse(trans, next,
&net->nft.commit_list, list) { &net->nft.commit_list, list) {
list_del(&trans->list); list_del(&trans->list);
trans->ctx.nla = NULL; nf_tables_abort_release(trans);
call_rcu(&trans->rcu_head, nf_tables_abort_release_rcu);
} }
return 0; return 0;
......
...@@ -21,45 +21,17 @@ ...@@ -21,45 +21,17 @@
#include <linux/netfilter_ipv6/ip6_tables.h> #include <linux/netfilter_ipv6/ip6_tables.h>
#include <net/netfilter/nf_tables.h> #include <net/netfilter/nf_tables.h>
static const struct {
const char *name;
u8 type;
} table_to_chaintype[] = {
{ "filter", NFT_CHAIN_T_DEFAULT },
{ "raw", NFT_CHAIN_T_DEFAULT },
{ "security", NFT_CHAIN_T_DEFAULT },
{ "mangle", NFT_CHAIN_T_ROUTE },
{ "nat", NFT_CHAIN_T_NAT },
{ },
};
static int nft_compat_table_to_chaintype(const char *table)
{
int i;
for (i = 0; table_to_chaintype[i].name != NULL; i++) {
if (strcmp(table_to_chaintype[i].name, table) == 0)
return table_to_chaintype[i].type;
}
return -1;
}
static int nft_compat_chain_validate_dependency(const char *tablename, static int nft_compat_chain_validate_dependency(const char *tablename,
const struct nft_chain *chain) const struct nft_chain *chain)
{ {
enum nft_chain_type type;
const struct nft_base_chain *basechain; const struct nft_base_chain *basechain;
if (!tablename || !(chain->flags & NFT_BASE_CHAIN)) if (!tablename || !(chain->flags & NFT_BASE_CHAIN))
return 0; return 0;
type = nft_compat_table_to_chaintype(tablename);
if (type < 0)
return -EINVAL;
basechain = nft_base_chain(chain); basechain = nft_base_chain(chain);
if (basechain->type->type != type) if (strcmp(tablename, "nat") == 0 &&
basechain->type->type != NFT_CHAIN_T_NAT)
return -EINVAL; return -EINVAL;
return 0; return 0;
...@@ -117,7 +89,7 @@ nft_target_set_tgchk_param(struct xt_tgchk_param *par, ...@@ -117,7 +89,7 @@ nft_target_set_tgchk_param(struct xt_tgchk_param *par,
struct xt_target *target, void *info, struct xt_target *target, void *info,
union nft_entry *entry, u8 proto, bool inv) union nft_entry *entry, u8 proto, bool inv)
{ {
par->net = &init_net; par->net = ctx->net;
par->table = ctx->table->name; par->table = ctx->table->name;
switch (ctx->afi->family) { switch (ctx->afi->family) {
case AF_INET: case AF_INET:
...@@ -324,7 +296,7 @@ nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx, ...@@ -324,7 +296,7 @@ nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx,
struct xt_match *match, void *info, struct xt_match *match, void *info,
union nft_entry *entry, u8 proto, bool inv) union nft_entry *entry, u8 proto, bool inv)
{ {
par->net = &init_net; par->net = ctx->net;
par->table = ctx->table->name; par->table = ctx->table->name;
switch (ctx->afi->family) { switch (ctx->afi->family) {
case AF_INET: case AF_INET:
...@@ -374,7 +346,7 @@ nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, ...@@ -374,7 +346,7 @@ nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
union nft_entry e = {}; union nft_entry e = {};
int ret; int ret;
ret = nft_compat_chain_validate_dependency(match->name, ctx->chain); ret = nft_compat_chain_validate_dependency(match->table, ctx->chain);
if (ret < 0) if (ret < 0)
goto err; goto err;
...@@ -448,7 +420,7 @@ static int nft_match_validate(const struct nft_ctx *ctx, ...@@ -448,7 +420,7 @@ static int nft_match_validate(const struct nft_ctx *ctx,
if (!(hook_mask & match->hooks)) if (!(hook_mask & match->hooks))
return -EINVAL; return -EINVAL;
ret = nft_compat_chain_validate_dependency(match->name, ret = nft_compat_chain_validate_dependency(match->table,
ctx->chain); ctx->chain);
if (ret < 0) if (ret < 0)
return ret; return ret;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment