Commit 33d12dc9 authored by Jakub Kicinski's avatar Jakub Kicinski

Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf

Pablo Neira Ayuso says:

====================
Netfilter fixes for net

1) Remove leftovers from flowtable modules, from Geert Uytterhoeven.

2) Missing refcount increment of conntrack template in nft_ct,
   from Florian Westphal.

3) Reduce nft_zone selftest time, also from Florian.

4) Add selftest to cover stateless NAT on fragments, from Florian Westphal.

5) Do not set net_device when for reject packets from the bridge path,
   from Phil Sutter.

6) Cancel register tracking info on nft_byteorder operations.

7) Extend nft_concat_range selftest to cover set reload with no elements,
   from Florian Westphal.

8) Remove useless update of pointer in chain blob builder, reported
   by kbuild test robot.

* git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf:
  netfilter: nf_tables: remove assignment with no effect in chain blob builder
  selftests: nft_concat_range: add test for reload with no element add/del
  netfilter: nft_byteorder: track register operations
  netfilter: nft_reject_bridge: Fix for missing reply from prerouting
  selftests: netfilter: check stateless nat udp checksum fixup
  selftests: netfilter: reduce zone stress test running time
  netfilter: nft_ct: fix use after free when attaching zone template
  netfilter: Remove flowtable relics
====================

Link: https://lore.kernel.org/r/20220127235235.656931-1-pablo@netfilter.orgSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 5aac9108 b07f4137
......@@ -49,7 +49,7 @@ static void nft_reject_br_send_v4_tcp_reset(struct net *net,
{
struct sk_buff *nskb;
nskb = nf_reject_skb_v4_tcp_reset(net, oldskb, dev, hook);
nskb = nf_reject_skb_v4_tcp_reset(net, oldskb, NULL, hook);
if (!nskb)
return;
......@@ -65,7 +65,7 @@ static void nft_reject_br_send_v4_unreach(struct net *net,
{
struct sk_buff *nskb;
nskb = nf_reject_skb_v4_unreach(net, oldskb, dev, hook, code);
nskb = nf_reject_skb_v4_unreach(net, oldskb, NULL, hook, code);
if (!nskb)
return;
......@@ -81,7 +81,7 @@ static void nft_reject_br_send_v6_tcp_reset(struct net *net,
{
struct sk_buff *nskb;
nskb = nf_reject_skb_v6_tcp_reset(net, oldskb, dev, hook);
nskb = nf_reject_skb_v6_tcp_reset(net, oldskb, NULL, hook);
if (!nskb)
return;
......@@ -98,7 +98,7 @@ static void nft_reject_br_send_v6_unreach(struct net *net,
{
struct sk_buff *nskb;
nskb = nf_reject_skb_v6_unreach(net, oldskb, dev, hook, code);
nskb = nf_reject_skb_v6_unreach(net, oldskb, NULL, hook, code);
if (!nskb)
return;
......
......@@ -58,10 +58,6 @@ config NF_TABLES_ARP
endif # NF_TABLES
config NF_FLOW_TABLE_IPV4
tristate
select NF_FLOW_TABLE_INET
config NF_DUP_IPV4
tristate "Netfilter IPv4 packet duplication to alternate destination"
depends on !NF_CONNTRACK || NF_CONNTRACK
......
......@@ -47,10 +47,6 @@ config NFT_FIB_IPV6
endif # NF_TABLES_IPV6
endif # NF_TABLES
config NF_FLOW_TABLE_IPV6
tristate
select NF_FLOW_TABLE_INET
config NF_DUP_IPV6
tristate "Netfilter IPv6 packet duplication to alternate destination"
depends on !NF_CONNTRACK || NF_CONNTRACK
......
......@@ -28,9 +28,6 @@ obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o
obj-$(CONFIG_NFT_DUP_IPV6) += nft_dup_ipv6.o
obj-$(CONFIG_NFT_FIB_IPV6) += nft_fib_ipv6.o
# flow table support
obj-$(CONFIG_NF_FLOW_TABLE_IPV6) += nf_flow_table_ipv6.o
# matches
obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o
obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o
......
......@@ -2011,7 +2011,6 @@ static void nft_last_rule(struct nft_rule_blob *blob, const void *ptr)
prule = (struct nft_rule_dp *)ptr;
prule->is_last = 1;
ptr += offsetof(struct nft_rule_dp, data);
/* blob size does not include the trailer rule */
}
......
......@@ -167,12 +167,24 @@ static int nft_byteorder_dump(struct sk_buff *skb, const struct nft_expr *expr)
return -1;
}
static bool nft_byteorder_reduce(struct nft_regs_track *track,
const struct nft_expr *expr)
{
struct nft_byteorder *priv = nft_expr_priv(expr);
track->regs[priv->dreg].selector = NULL;
track->regs[priv->dreg].bitwise = NULL;
return false;
}
static const struct nft_expr_ops nft_byteorder_ops = {
.type = &nft_byteorder_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_byteorder)),
.eval = nft_byteorder_eval,
.init = nft_byteorder_init,
.dump = nft_byteorder_dump,
.reduce = nft_byteorder_reduce,
};
struct nft_expr_type nft_byteorder_type __read_mostly = {
......
......@@ -260,9 +260,12 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr,
ct = this_cpu_read(nft_ct_pcpu_template);
if (likely(refcount_read(&ct->ct_general.use) == 1)) {
refcount_inc(&ct->ct_general.use);
nf_ct_zone_add(ct, &zone);
} else {
/* previous skb got queued to userspace */
/* previous skb got queued to userspace, allocate temporary
* one until percpu template can be reused.
*/
ct = nf_ct_tmpl_alloc(nft_net(pkt), &zone, GFP_ATOMIC);
if (!ct) {
regs->verdict.code = NF_DROP;
......
......@@ -27,7 +27,7 @@ TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto
net6_port_net6_port net_port_mac_proto_net"
# Reported bugs, also described by TYPE_ variables below
BUGS="flush_remove_add"
BUGS="flush_remove_add reload"
# List of possible paths to pktgen script from kernel tree for performance tests
PKTGEN_SCRIPT_PATHS="
......@@ -354,6 +354,23 @@ TYPE_flush_remove_add="
display Add two elements, flush, re-add
"
TYPE_reload="
display net,mac with reload
type_spec ipv4_addr . ether_addr
chain_spec ip daddr . ether saddr
dst addr4
src mac
start 1
count 1
src_delta 2000
tools sendip nc bash
proto udp
race_repeat 0
perf_duration 0
"
# Set template for all tests, types and rules are filled in depending on test
set_template='
flush ruleset
......@@ -1473,6 +1490,59 @@ test_bug_flush_remove_add() {
nft flush ruleset
}
# - add ranged element, check that packets match it
# - reload the set, check packets still match
test_bug_reload() {
setup veth send_"${proto}" set || return ${KSELFTEST_SKIP}
rstart=${start}
range_size=1
for i in $(seq "${start}" $((start + count))); do
end=$((start + range_size))
# Avoid negative or zero-sized port ranges
if [ $((end / 65534)) -gt $((start / 65534)) ]; then
start=${end}
end=$((end + 1))
fi
srcstart=$((start + src_delta))
srcend=$((end + src_delta))
add "$(format)" || return 1
range_size=$((range_size + 1))
start=$((end + range_size))
done
# check kernel does allocate pcpu sctrach map
# for reload with no elemet add/delete
( echo flush set inet filter test ;
nft list set inet filter test ) | nft -f -
start=${rstart}
range_size=1
for i in $(seq "${start}" $((start + count))); do
end=$((start + range_size))
# Avoid negative or zero-sized port ranges
if [ $((end / 65534)) -gt $((start / 65534)) ]; then
start=${end}
end=$((end + 1))
fi
srcstart=$((start + src_delta))
srcend=$((end + src_delta))
for j in $(seq ${start} $((range_size / 2 + 1)) ${end}); do
send_match "${j}" $((j + src_delta)) || return 1
done
range_size=$((range_size + 1))
start=$((end + range_size))
done
nft flush ruleset
}
test_reported_issues() {
eval test_bug_"${subtest}"
}
......
......@@ -899,6 +899,144 @@ EOF
ip netns exec "$ns0" nft delete table $family nat
}
test_stateless_nat_ip()
{
local lret=0
ip netns exec "$ns0" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
if [ $? -ne 0 ] ; then
echo "ERROR: cannot ping $ns1 from $ns2 before loading stateless rules"
return 1
fi
ip netns exec "$ns0" nft -f /dev/stdin <<EOF
table ip stateless {
map xlate_in {
typeof meta iifname . ip saddr . ip daddr : ip daddr
elements = {
"veth1" . 10.0.2.99 . 10.0.1.99 : 10.0.2.2,
}
}
map xlate_out {
typeof meta iifname . ip saddr . ip daddr : ip daddr
elements = {
"veth0" . 10.0.1.99 . 10.0.2.2 : 10.0.2.99
}
}
chain prerouting {
type filter hook prerouting priority -400; policy accept;
ip saddr set meta iifname . ip saddr . ip daddr map @xlate_in
ip daddr set meta iifname . ip saddr . ip daddr map @xlate_out
}
}
EOF
if [ $? -ne 0 ]; then
echo "SKIP: Could not add ip statless rules"
return $ksft_skip
fi
reset_counters
ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
if [ $? -ne 0 ] ; then
echo "ERROR: cannot ping $ns1 from $ns2 with stateless rules"
lret=1
fi
# ns1 should have seen packets from .2.2, due to stateless rewrite.
expect="packets 1 bytes 84"
cnt=$(ip netns exec "$ns1" nft list counter inet filter ns0insl | grep -q "$expect")
if [ $? -ne 0 ]; then
bad_counter "$ns1" ns0insl "$expect" "test_stateless 1"
lret=1
fi
for dir in "in" "out" ; do
cnt=$(ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
bad_counter "$ns2" ns1$dir "$expect" "test_stateless 2"
lret=1
fi
done
# ns1 should not have seen packets from ns2, due to masquerade
expect="packets 0 bytes 0"
for dir in "in" "out" ; do
cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
bad_counter "$ns1" ns0$dir "$expect" "test_stateless 3"
lret=1
fi
cnt=$(ip netns exec "$ns0" nft list counter inet filter ns1${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
bad_counter "$ns0" ns1$dir "$expect" "test_stateless 4"
lret=1
fi
done
reset_counters
socat -h > /dev/null 2>&1
if [ $? -ne 0 ];then
echo "SKIP: Could not run stateless nat frag test without socat tool"
if [ $lret -eq 0 ]; then
return $ksft_skip
fi
ip netns exec "$ns0" nft delete table ip stateless
return $lret
fi
local tmpfile=$(mktemp)
dd if=/dev/urandom of=$tmpfile bs=4096 count=1 2>/dev/null
local outfile=$(mktemp)
ip netns exec "$ns1" timeout 3 socat -u UDP4-RECV:4233 OPEN:$outfile < /dev/null &
sc_r=$!
sleep 1
# re-do with large ping -> ip fragmentation
ip netns exec "$ns2" timeout 3 socat - UDP4-SENDTO:"10.0.1.99:4233" < "$tmpfile" > /dev/null
if [ $? -ne 0 ] ; then
echo "ERROR: failed to test udp $ns1 to $ns2 with stateless ip nat" 1>&2
lret=1
fi
wait
cmp "$tmpfile" "$outfile"
if [ $? -ne 0 ]; then
ls -l "$tmpfile" "$outfile"
echo "ERROR: in and output file mismatch when checking udp with stateless nat" 1>&2
lret=1
fi
rm -f "$tmpfile" "$outfile"
# ns1 should have seen packets from 2.2, due to stateless rewrite.
expect="packets 3 bytes 4164"
cnt=$(ip netns exec "$ns1" nft list counter inet filter ns0insl | grep -q "$expect")
if [ $? -ne 0 ]; then
bad_counter "$ns1" ns0insl "$expect" "test_stateless 5"
lret=1
fi
ip netns exec "$ns0" nft delete table ip stateless
if [ $? -ne 0 ]; then
echo "ERROR: Could not delete table ip stateless" 1>&2
lret=1
fi
test $lret -eq 0 && echo "PASS: IP statless for $ns2"
return $lret
}
# ip netns exec "$ns0" ping -c 1 -q 10.0.$i.99
for i in 0 1 2; do
ip netns exec ns$i-$sfx nft -f /dev/stdin <<EOF
......@@ -965,6 +1103,19 @@ table inet filter {
EOF
done
# special case for stateless nat check, counter needs to
# be done before (input) ip defragmentation
ip netns exec ns1-$sfx nft -f /dev/stdin <<EOF
table inet filter {
counter ns0insl {}
chain pre {
type filter hook prerouting priority -400; policy accept;
ip saddr 10.0.2.2 counter name "ns0insl"
}
}
EOF
sleep 3
# test basic connectivity
for i in 1 2; do
......@@ -1019,6 +1170,7 @@ $test_inet_nat && test_redirect inet
$test_inet_nat && test_redirect6 inet
test_port_shadowing
test_stateless_nat_ip
if [ $ret -ne 0 ];then
echo -n "FAIL: "
......
......@@ -9,7 +9,7 @@ ns="ns-$sfx"
# Kselftest framework requirement - SKIP code is 4.
ksft_skip=4
zones=20000
zones=2000
have_ct_tool=0
ret=0
......@@ -75,10 +75,10 @@ EOF
while [ $i -lt $max_zones ]; do
local start=$(date +%s%3N)
i=$((i + 10000))
i=$((i + 1000))
j=$((j + 1))
# nft rule in output places each packet in a different zone.
dd if=/dev/zero of=/dev/stdout bs=8k count=10000 2>/dev/null | ip netns exec "$ns" socat STDIN UDP:127.0.0.1:12345,sourceport=12345
dd if=/dev/zero of=/dev/stdout bs=8k count=1000 2>/dev/null | ip netns exec "$ns" socat STDIN UDP:127.0.0.1:12345,sourceport=12345
if [ $? -ne 0 ] ;then
ret=1
break
......@@ -86,7 +86,7 @@ EOF
stop=$(date +%s%3N)
local duration=$((stop-start))
echo "PASS: added 10000 entries in $duration ms (now $i total, loop $j)"
echo "PASS: added 1000 entries in $duration ms (now $i total, loop $j)"
done
if [ $have_ct_tool -eq 1 ]; then
......@@ -128,11 +128,11 @@ test_conntrack_tool() {
break
fi
if [ $((i%10000)) -eq 0 ];then
if [ $((i%1000)) -eq 0 ];then
stop=$(date +%s%3N)
local duration=$((stop-start))
echo "PASS: added 10000 entries in $duration ms (now $i total)"
echo "PASS: added 1000 entries in $duration ms (now $i total)"
start=$stop
fi
done
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment