Commit fd2a55e7 authored by Jakub Kicinski's avatar Jakub Kicinski

Merge git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf

Pablo Neira Ayuso says:

====================
Netfilter fixes for net

1) Fix broken listing of set elements when table has an owner.

2) Fix conntrack refcount leak in ctnetlink with related conntrack
   entries, from Hangyu Hua.

3) Fix use-after-free/double-free in ctnetlink conntrack insert path,
   from Florian Westphal.

4) Fix ip6t_rpfilter with VRF, from Phil Sutter.

5) Fix use-after-free in ebtables reported by syzbot, also from Florian.

6) Use skb->len in xt_length to deal with IPv6 jumbo packets,
   from Xin Long.

7) Fix NETLINK_LISTEN_ALL_NSID with ctnetlink, from Florian Westphal.

8) Fix memleak in {ip_,ip6_,arp_}tables in ENOMEM error case,
   from Pavel Tikhomirov.

* git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf:
  netfilter: x_tables: fix percpu counter block leak on error path when creating new netns
  netfilter: ctnetlink: make event listener tracking global
  netfilter: xt_length: use skb len to match in length_mt6
  netfilter: ebtables: fix table blob use-after-free
  netfilter: ip6t_rpfilter: Fix regression with VRF interfaces
  netfilter: conntrack: fix rmmod double-free race
  netfilter: ctnetlink: fix possible refcount leak in ctnetlink_create_conntrack()
  netfilter: nf_tables: allow to fetch set elements when table has an owner
====================

Link: https://lore.kernel.org/r/20230222092137.88637-1-pablo@netfilter.orgSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 67d93ffc 0af8c09c
......@@ -491,4 +491,9 @@ extern const struct nfnl_ct_hook __rcu *nfnl_ct_hook;
*/
DECLARE_PER_CPU(bool, nf_skb_duplicated);
/**
* Contains bitmask of ctnetlink event subscribers, if any.
* Can't be pernet due to NETLINK_LISTEN_ALL_NSID setsockopt flag.
*/
extern u8 nf_ctnetlink_has_listener;
#endif /*__LINUX_NETFILTER_H*/
......@@ -95,7 +95,6 @@ struct nf_ip_net {
struct netns_ct {
#ifdef CONFIG_NF_CONNTRACK_EVENTS
u8 ctnetlink_has_listener;
bool ecache_dwork_pending;
#endif
u8 sysctl_log_invalid; /* Log invalid packets */
......
......@@ -1090,7 +1090,7 @@ static int do_replace_finish(struct net *net, struct ebt_replace *repl,
audit_log_nfcfg(repl->name, AF_BRIDGE, repl->nentries,
AUDIT_XT_OP_REPLACE, GFP_KERNEL);
return ret;
return 0;
free_unlock:
mutex_unlock(&ebt_mutex);
......
......@@ -1525,6 +1525,10 @@ int arpt_register_table(struct net *net,
new_table = xt_register_table(net, table, &bootstrap, newinfo);
if (IS_ERR(new_table)) {
struct arpt_entry *iter;
xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
cleanup_entry(iter, net);
xt_free_table_info(newinfo);
return PTR_ERR(new_table);
}
......
......@@ -1045,7 +1045,6 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
struct xt_counters *counters;
struct ipt_entry *iter;
ret = 0;
counters = xt_counters_alloc(num_counters);
if (!counters) {
ret = -ENOMEM;
......@@ -1091,7 +1090,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
net_warn_ratelimited("iptables: counters copy to user failed while replacing table\n");
}
vfree(counters);
return ret;
return 0;
put_module:
module_put(t->me);
......@@ -1742,6 +1741,10 @@ int ipt_register_table(struct net *net, const struct xt_table *table,
new_table = xt_register_table(net, table, &bootstrap, newinfo);
if (IS_ERR(new_table)) {
struct ipt_entry *iter;
xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
cleanup_entry(iter, net);
xt_free_table_info(newinfo);
return PTR_ERR(new_table);
}
......
......@@ -1062,7 +1062,6 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
struct xt_counters *counters;
struct ip6t_entry *iter;
ret = 0;
counters = xt_counters_alloc(num_counters);
if (!counters) {
ret = -ENOMEM;
......@@ -1108,7 +1107,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
net_warn_ratelimited("ip6tables: counters copy to user failed while replacing table\n");
}
vfree(counters);
return ret;
return 0;
put_module:
module_put(t->me);
......@@ -1751,6 +1750,10 @@ int ip6t_register_table(struct net *net, const struct xt_table *table,
new_table = xt_register_table(net, table, &bootstrap, newinfo);
if (IS_ERR(new_table)) {
struct ip6t_entry *iter;
xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
cleanup_entry(iter, net);
xt_free_table_info(newinfo);
return PTR_ERR(new_table);
}
......
......@@ -72,7 +72,9 @@ static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb,
goto out;
}
if (rt->rt6i_idev->dev == dev || (flags & XT_RPFILTER_LOOSE))
if (rt->rt6i_idev->dev == dev ||
l3mdev_master_ifindex_rcu(rt->rt6i_idev->dev) == dev->ifindex ||
(flags & XT_RPFILTER_LOOSE))
ret = true;
out:
ip6_rt_put(rt);
......
......@@ -669,6 +669,9 @@ const struct nf_ct_hook __rcu *nf_ct_hook __read_mostly;
EXPORT_SYMBOL_GPL(nf_ct_hook);
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
u8 nf_ctnetlink_has_listener;
EXPORT_SYMBOL_GPL(nf_ctnetlink_has_listener);
const struct nf_nat_hook __rcu *nf_nat_hook __read_mostly;
EXPORT_SYMBOL_GPL(nf_nat_hook);
......
......@@ -381,7 +381,6 @@ __bpf_kfunc struct nf_conn *bpf_ct_insert_entry(struct nf_conn___init *nfct_i)
struct nf_conn *nfct = (struct nf_conn *)nfct_i;
int err;
nfct->status |= IPS_CONFIRMED;
err = nf_conntrack_hash_check_insert(nfct);
if (err < 0) {
nf_conntrack_free(nfct);
......
......@@ -884,10 +884,8 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
zone = nf_ct_zone(ct);
if (!nf_ct_ext_valid_pre(ct->ext)) {
NF_CT_STAT_INC_ATOMIC(net, insert_failed);
return -ETIMEDOUT;
}
if (!nf_ct_ext_valid_pre(ct->ext))
return -EAGAIN;
local_bh_disable();
do {
......@@ -922,6 +920,19 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
goto chaintoolong;
}
/* If genid has changed, we can't insert anymore because ct
* extensions could have stale pointers and nf_ct_iterate_destroy
* might have completed its table scan already.
*
* Increment of the ext genid right after this check is fine:
* nf_ct_iterate_destroy blocks until locks are released.
*/
if (!nf_ct_ext_valid_post(ct->ext)) {
err = -EAGAIN;
goto out;
}
ct->status |= IPS_CONFIRMED;
smp_wmb();
/* The caller holds a reference to this object */
refcount_set(&ct->ct_general.use, 2);
......@@ -930,12 +941,6 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
NF_CT_STAT_INC(net, insert);
local_bh_enable();
if (!nf_ct_ext_valid_post(ct->ext)) {
nf_ct_kill(ct);
NF_CT_STAT_INC_ATOMIC(net, drop);
return -ETIMEDOUT;
}
return 0;
chaintoolong:
NF_CT_STAT_INC(net, chaintoolong);
......
......@@ -309,7 +309,7 @@ bool nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp
break;
return true;
case 2: /* autodetect: no event listener, don't allocate extension. */
if (!READ_ONCE(net->ct.ctnetlink_has_listener))
if (!READ_ONCE(nf_ctnetlink_has_listener))
return true;
fallthrough;
case 1:
......
......@@ -2316,9 +2316,6 @@ ctnetlink_create_conntrack(struct net *net,
nfct_seqadj_ext_add(ct);
nfct_synproxy_ext_add(ct);
/* we must add conntrack extensions before confirmation. */
ct->status |= IPS_CONFIRMED;
if (cda[CTA_STATUS]) {
err = ctnetlink_change_status(ct, cda);
if (err < 0)
......@@ -2375,12 +2372,15 @@ ctnetlink_create_conntrack(struct net *net,
err = nf_conntrack_hash_check_insert(ct);
if (err < 0)
goto err2;
goto err3;
rcu_read_unlock();
return ct;
err3:
if (ct->master)
nf_ct_put(ct->master);
err2:
rcu_read_unlock();
err1:
......
......@@ -5507,7 +5507,7 @@ static int nf_tables_getsetelem(struct sk_buff *skb,
int rem, err = 0;
table = nft_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE], family,
genmask, NETLINK_CB(skb).portid);
genmask, 0);
if (IS_ERR(table)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_TABLE]);
return PTR_ERR(table);
......
......@@ -29,6 +29,7 @@
#include <net/netlink.h>
#include <net/netns/generic.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nfnetlink.h>
MODULE_LICENSE("GPL");
......@@ -685,12 +686,12 @@ static void nfnetlink_bind_event(struct net *net, unsigned int group)
group_bit = (1 << group);
spin_lock(&nfnl_grp_active_lock);
v = READ_ONCE(net->ct.ctnetlink_has_listener);
v = READ_ONCE(nf_ctnetlink_has_listener);
if ((v & group_bit) == 0) {
v |= group_bit;
/* read concurrently without nfnl_grp_active_lock held. */
WRITE_ONCE(net->ct.ctnetlink_has_listener, v);
WRITE_ONCE(nf_ctnetlink_has_listener, v);
}
spin_unlock(&nfnl_grp_active_lock);
......@@ -744,12 +745,12 @@ static void nfnetlink_unbind(struct net *net, int group)
spin_lock(&nfnl_grp_active_lock);
if (!nfnetlink_has_listeners(net, group)) {
u8 v = READ_ONCE(net->ct.ctnetlink_has_listener);
u8 v = READ_ONCE(nf_ctnetlink_has_listener);
v &= ~group_bit;
/* read concurrently without nfnl_grp_active_lock held. */
WRITE_ONCE(net->ct.ctnetlink_has_listener, v);
WRITE_ONCE(nf_ctnetlink_has_listener, v);
}
spin_unlock(&nfnl_grp_active_lock);
#endif
......
......@@ -30,8 +30,7 @@ static bool
length_mt6(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_length_info *info = par->matchinfo;
const u_int16_t pktlen = ntohs(ipv6_hdr(skb)->payload_len) +
sizeof(struct ipv6hdr);
u32 pktlen = skb->len;
return (pktlen >= info->min && pktlen <= info->max) ^ info->invert;
}
......
......@@ -62,10 +62,16 @@ ip -net "$ns1" a a fec0:42::2/64 dev v0 nodad
ip -net "$ns2" a a fec0:42::1/64 dev d0 nodad
# firewall matches to test
[ -n "$iptables" ] && ip netns exec "$ns2" \
"$iptables" -t raw -A PREROUTING -s 192.168.0.0/16 -m rpfilter
[ -n "$ip6tables" ] && ip netns exec "$ns2" \
"$ip6tables" -t raw -A PREROUTING -s fec0::/16 -m rpfilter
[ -n "$iptables" ] && {
common='-t raw -A PREROUTING -s 192.168.0.0/16'
ip netns exec "$ns2" "$iptables" $common -m rpfilter
ip netns exec "$ns2" "$iptables" $common -m rpfilter --invert
}
[ -n "$ip6tables" ] && {
common='-t raw -A PREROUTING -s fec0::/16'
ip netns exec "$ns2" "$ip6tables" $common -m rpfilter
ip netns exec "$ns2" "$ip6tables" $common -m rpfilter --invert
}
[ -n "$nft" ] && ip netns exec "$ns2" $nft -f - <<EOF
table inet t {
chain c {
......@@ -89,6 +95,11 @@ ipt_zero_rule() { # (command)
[ -n "$1" ] || return 0
ip netns exec "$ns2" "$1" -t raw -vS | grep -q -- "-m rpfilter -c 0 0"
}
ipt_zero_reverse_rule() { # (command)
[ -n "$1" ] || return 0
ip netns exec "$ns2" "$1" -t raw -vS | \
grep -q -- "-m rpfilter --invert -c 0 0"
}
nft_zero_rule() { # (family)
[ -n "$nft" ] || return 0
ip netns exec "$ns2" "$nft" list chain inet t c | \
......@@ -101,8 +112,7 @@ netns_ping() { # (netns, args...)
ip netns exec "$netns" ping -q -c 1 -W 1 "$@" >/dev/null
}
testrun() {
# clear counters first
clear_counters() {
[ -n "$iptables" ] && ip netns exec "$ns2" "$iptables" -t raw -Z
[ -n "$ip6tables" ] && ip netns exec "$ns2" "$ip6tables" -t raw -Z
if [ -n "$nft" ]; then
......@@ -111,6 +121,10 @@ testrun() {
ip netns exec "$ns2" $nft -s list table inet t;
) | ip netns exec "$ns2" $nft -f -
fi
}
testrun() {
clear_counters
# test 1: martian traffic should fail rpfilter matches
netns_ping "$ns1" -I v0 192.168.42.1 && \
......@@ -120,9 +134,13 @@ testrun() {
ipt_zero_rule "$iptables" || die "iptables matched martian"
ipt_zero_rule "$ip6tables" || die "ip6tables matched martian"
ipt_zero_reverse_rule "$iptables" && die "iptables not matched martian"
ipt_zero_reverse_rule "$ip6tables" && die "ip6tables not matched martian"
nft_zero_rule ip || die "nft IPv4 matched martian"
nft_zero_rule ip6 || die "nft IPv6 matched martian"
clear_counters
# test 2: rpfilter match should pass for regular traffic
netns_ping "$ns1" 192.168.23.1 || \
die "regular ping 192.168.23.1 failed"
......@@ -131,6 +149,8 @@ testrun() {
ipt_zero_rule "$iptables" && die "iptables match not effective"
ipt_zero_rule "$ip6tables" && die "ip6tables match not effective"
ipt_zero_reverse_rule "$iptables" || die "iptables match over-effective"
ipt_zero_reverse_rule "$ip6tables" || die "ip6tables match over-effective"
nft_zero_rule ip && die "nft IPv4 match not effective"
nft_zero_rule ip6 && die "nft IPv6 match not effective"
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment