Commit 18f84d41 authored by Jozsef Kadlecsik's avatar Jozsef Kadlecsik

netfilter: ipset: Introduce RCU locking in hash:* types

Three types of data need to be protected in the case of the hash types:

a. The hash buckets: standard rcu pointer operations are used.
b. The element blobs in the hash buckets are stored in an array and
   a bitmap is used for book-keeping to tell which elements in the array
   are used or free.
c. Networks per cidr values and the cidr values themselves are stored
   in fix sized arrays and need no protection. The values are modified
   in such an order that in the worst case an element testing is repeated
   once with the same cidr value.

The ipset hash approach uses arrays instead of lists and therefore is
incompatible with rhashtable.

Performance is tested by Jesper Dangaard Brouer:

Simple drop in FORWARD
~~~~~~~~~~~~~~~~~~~~~~

Dropping via simple iptables net-mask match::

 iptables -t raw -N simple || iptables -t raw -F simple
 iptables -t raw -I simple  -s 198.18.0.0/15 -j DROP
 iptables -t raw -D PREROUTING -j simple
 iptables -t raw -I PREROUTING -j simple

Drop performance in "raw": 11.3Mpps

Generator: sending 12.2Mpps (tx:12264083 pps)

Drop via original ipset in RAW table
~~~~~~~~~~~~~~~~~~~~~~~~~~~

Create a set with lots of elements::

 sudo ./ipset destroy test
 echo "create test hash:ip hashsize 65536" > test.set
 for x in `seq 0 255`; do
    for y in `seq 0 255`; do
        echo "add test 198.18.$x.$y" >> test.set
    done
 done
 sudo ./ipset restore < test.set

Dropping via ipset::

 iptables -t raw -F
 iptables -t raw -N net198 || iptables -t raw -F net198
 iptables -t raw -I net198 -m set --match-set test src -j DROP
 iptables -t raw -I PREROUTING -j net198

Drop performance in "raw" with ipset: 8Mpps

Perf report numbers ipset drop in "raw"::

 +   24.65%  ksoftirqd/1  [ip_set]           [k] ip_set_test
 -   21.42%  ksoftirqd/1  [kernel.kallsyms]  [k] _raw_read_lock_bh
    - _raw_read_lock_bh
       + 99.88% ip_set_test
 -   19.42%  ksoftirqd/1  [kernel.kallsyms]  [k] _raw_read_unlock_bh
    - _raw_read_unlock_bh
       + 99.72% ip_set_test
 +    4.31%  ksoftirqd/1  [ip_set_hash_ip]   [k] hash_ip4_kadt
 +    2.27%  ksoftirqd/1  [ixgbe]            [k] ixgbe_fetch_rx_buffer
 +    2.18%  ksoftirqd/1  [ip_tables]        [k] ipt_do_table
 +    1.81%  ksoftirqd/1  [ip_set_hash_ip]   [k] hash_ip4_test
 +    1.61%  ksoftirqd/1  [kernel.kallsyms]  [k] __netif_receive_skb_core
 +    1.44%  ksoftirqd/1  [kernel.kallsyms]  [k] build_skb
 +    1.42%  ksoftirqd/1  [kernel.kallsyms]  [k] ip_rcv
 +    1.36%  ksoftirqd/1  [kernel.kallsyms]  [k] __local_bh_enable_ip
 +    1.16%  ksoftirqd/1  [kernel.kallsyms]  [k] dev_gro_receive
 +    1.09%  ksoftirqd/1  [kernel.kallsyms]  [k] __rcu_read_unlock
 +    0.96%  ksoftirqd/1  [ixgbe]            [k] ixgbe_clean_rx_irq
 +    0.95%  ksoftirqd/1  [kernel.kallsyms]  [k] __netdev_alloc_frag
 +    0.88%  ksoftirqd/1  [kernel.kallsyms]  [k] kmem_cache_alloc
 +    0.87%  ksoftirqd/1  [xt_set]           [k] set_match_v3
 +    0.85%  ksoftirqd/1  [kernel.kallsyms]  [k] inet_gro_receive
 +    0.83%  ksoftirqd/1  [kernel.kallsyms]  [k] nf_iterate
 +    0.76%  ksoftirqd/1  [kernel.kallsyms]  [k] put_compound_page
 +    0.75%  ksoftirqd/1  [kernel.kallsyms]  [k] __rcu_read_lock

Drop via ipset in RAW table with RCU-locking
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

With RCU locking, the RW-lock is gone.

Drop performance in "raw" with ipset with RCU-locking: 11.3Mpps
Performance-tested-by: default avatarJesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: default avatarJozsef Kadlecsik <kadlec@blackhole.kfki.hu>
parent 96f51428
This diff is collapsed.
...@@ -315,6 +315,7 @@ hash_ip_init(void) ...@@ -315,6 +315,7 @@ hash_ip_init(void)
static void __exit static void __exit
hash_ip_fini(void) hash_ip_fini(void)
{ {
rcu_barrier();
ip_set_type_unregister(&hash_ip_type); ip_set_type_unregister(&hash_ip_type);
} }
......
...@@ -319,6 +319,7 @@ hash_ipmark_init(void) ...@@ -319,6 +319,7 @@ hash_ipmark_init(void)
static void __exit static void __exit
hash_ipmark_fini(void) hash_ipmark_fini(void)
{ {
rcu_barrier();
ip_set_type_unregister(&hash_ipmark_type); ip_set_type_unregister(&hash_ipmark_type);
} }
......
...@@ -382,6 +382,7 @@ hash_ipport_init(void) ...@@ -382,6 +382,7 @@ hash_ipport_init(void)
static void __exit static void __exit
hash_ipport_fini(void) hash_ipport_fini(void)
{ {
rcu_barrier();
ip_set_type_unregister(&hash_ipport_type); ip_set_type_unregister(&hash_ipport_type);
} }
......
...@@ -397,6 +397,7 @@ hash_ipportip_init(void) ...@@ -397,6 +397,7 @@ hash_ipportip_init(void)
static void __exit static void __exit
hash_ipportip_fini(void) hash_ipportip_fini(void)
{ {
rcu_barrier();
ip_set_type_unregister(&hash_ipportip_type); ip_set_type_unregister(&hash_ipportip_type);
} }
......
...@@ -554,6 +554,7 @@ hash_ipportnet_init(void) ...@@ -554,6 +554,7 @@ hash_ipportnet_init(void)
static void __exit static void __exit
hash_ipportnet_fini(void) hash_ipportnet_fini(void)
{ {
rcu_barrier();
ip_set_type_unregister(&hash_ipportnet_type); ip_set_type_unregister(&hash_ipportnet_type);
} }
......
...@@ -165,6 +165,7 @@ hash_mac_init(void) ...@@ -165,6 +165,7 @@ hash_mac_init(void)
static void __exit static void __exit
hash_mac_fini(void) hash_mac_fini(void)
{ {
rcu_barrier();
ip_set_type_unregister(&hash_mac_type); ip_set_type_unregister(&hash_mac_type);
} }
......
...@@ -392,6 +392,7 @@ hash_net_init(void) ...@@ -392,6 +392,7 @@ hash_net_init(void)
static void __exit static void __exit
hash_net_fini(void) hash_net_fini(void)
{ {
rcu_barrier();
ip_set_type_unregister(&hash_net_type); ip_set_type_unregister(&hash_net_type);
} }
......
...@@ -500,6 +500,7 @@ hash_netiface_init(void) ...@@ -500,6 +500,7 @@ hash_netiface_init(void)
static void __exit static void __exit
hash_netiface_fini(void) hash_netiface_fini(void)
{ {
rcu_barrier();
ip_set_type_unregister(&hash_netiface_type); ip_set_type_unregister(&hash_netiface_type);
} }
......
...@@ -480,6 +480,7 @@ hash_netnet_init(void) ...@@ -480,6 +480,7 @@ hash_netnet_init(void)
static void __exit static void __exit
hash_netnet_fini(void) hash_netnet_fini(void)
{ {
rcu_barrier();
ip_set_type_unregister(&hash_netnet_type); ip_set_type_unregister(&hash_netnet_type);
} }
......
...@@ -498,6 +498,7 @@ hash_netport_init(void) ...@@ -498,6 +498,7 @@ hash_netport_init(void)
static void __exit static void __exit
hash_netport_fini(void) hash_netport_fini(void)
{ {
rcu_barrier();
ip_set_type_unregister(&hash_netport_type); ip_set_type_unregister(&hash_netport_type);
} }
......
...@@ -581,6 +581,7 @@ hash_netportnet_init(void) ...@@ -581,6 +581,7 @@ hash_netportnet_init(void)
static void __exit static void __exit
hash_netportnet_fini(void) hash_netportnet_fini(void)
{ {
rcu_barrier();
ip_set_type_unregister(&hash_netportnet_type); ip_set_type_unregister(&hash_netportnet_type);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment