Commit 834f9b05 authored by David S. Miller's avatar David S. Miller

Merge branch 'mlxsw-spectrum_acl-Don-t-take-rtnl-mutex-for-region-rehash'

Ido Schimmel says:

====================
mlxsw: spectrum_acl: Don't take rtnl mutex for region rehash

Jiri says:

During region rehash, a new region is created with a more optimized set
of masks (ERPs). When transitioning to the new region, all the rules
from the old region are copied one-by-one to the new region. This
transition can be time consuming and currently done under RTNL lock.

In order to remove RTNL lock dependency during region rehash, introduce
multiple smaller locks guarding dedicated structures or parts of them.
That is the vast majority of this patchset. Only patch #1 is simple
cleanup and patches 12-15 are improving or introducing new selftests.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 731e7ccb 81d56d82
......@@ -5,11 +5,13 @@
#include <linux/gfp.h>
#include <linux/kernel.h>
#include <linux/refcount.h>
#include <linux/mutex.h>
#include "spectrum.h"
#include "spectrum_acl_tcam.h"
struct mlxsw_sp_acl_bf {
struct mutex lock; /* Protects Bloom Filter updates. */
unsigned int bank_size;
refcount_t refcnt[0];
};
......@@ -172,26 +174,36 @@ mlxsw_sp_acl_bf_entry_add(struct mlxsw_sp *mlxsw_sp,
u16 bf_index;
int err;
mutex_lock(&bf->lock);
bf_index = mlxsw_sp_acl_bf_index_get(bf, aregion, aentry);
rule_index = mlxsw_sp_acl_bf_rule_count_index_get(bf, erp_bank,
bf_index);
if (refcount_inc_not_zero(&bf->refcnt[rule_index]))
return 0;
if (refcount_inc_not_zero(&bf->refcnt[rule_index])) {
err = 0;
goto unlock;
}
peabfe_pl = kmalloc(MLXSW_REG_PEABFE_LEN, GFP_KERNEL);
if (!peabfe_pl)
return -ENOMEM;
if (!peabfe_pl) {
err = -ENOMEM;
goto unlock;
}
mlxsw_reg_peabfe_pack(peabfe_pl);
mlxsw_reg_peabfe_rec_pack(peabfe_pl, 0, 1, erp_bank, bf_index);
err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(peabfe), peabfe_pl);
kfree(peabfe_pl);
if (err)
return err;
goto unlock;
refcount_set(&bf->refcnt[rule_index], 1);
return 0;
err = 0;
unlock:
mutex_unlock(&bf->lock);
return err;
}
void
......@@ -205,6 +217,8 @@ mlxsw_sp_acl_bf_entry_del(struct mlxsw_sp *mlxsw_sp,
char *peabfe_pl;
u16 bf_index;
mutex_lock(&bf->lock);
bf_index = mlxsw_sp_acl_bf_index_get(bf, aregion, aentry);
rule_index = mlxsw_sp_acl_bf_rule_count_index_get(bf, erp_bank,
bf_index);
......@@ -212,13 +226,16 @@ mlxsw_sp_acl_bf_entry_del(struct mlxsw_sp *mlxsw_sp,
if (refcount_dec_and_test(&bf->refcnt[rule_index])) {
peabfe_pl = kmalloc(MLXSW_REG_PEABFE_LEN, GFP_KERNEL);
if (!peabfe_pl)
return;
goto unlock;
mlxsw_reg_peabfe_pack(peabfe_pl);
mlxsw_reg_peabfe_rec_pack(peabfe_pl, 0, 0, erp_bank, bf_index);
mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(peabfe), peabfe_pl);
kfree(peabfe_pl);
}
unlock:
mutex_unlock(&bf->lock);
}
struct mlxsw_sp_acl_bf *
......@@ -240,10 +257,13 @@ mlxsw_sp_acl_bf_init(struct mlxsw_sp *mlxsw_sp, unsigned int num_erp_banks)
return ERR_PTR(-ENOMEM);
bf->bank_size = bf_bank_size;
mutex_init(&bf->lock);
return bf;
}
void mlxsw_sp_acl_bf_fini(struct mlxsw_sp_acl_bf *bf)
{
mutex_destroy(&bf->lock);
kfree(bf);
}
......@@ -7,6 +7,7 @@
#include <linux/gfp.h>
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/mutex.h>
#include <linux/objagg.h>
#include <linux/rtnetlink.h>
#include <linux/slab.h>
......@@ -63,6 +64,7 @@ struct mlxsw_sp_acl_erp_table {
unsigned int num_ctcam_erps;
unsigned int num_deltas;
struct objagg *objagg;
struct mutex objagg_lock; /* guards objagg manipulation */
};
struct mlxsw_sp_acl_erp_table_ops {
......@@ -1001,17 +1003,15 @@ struct mlxsw_sp_acl_erp_mask *
mlxsw_sp_acl_erp_mask_get(struct mlxsw_sp_acl_atcam_region *aregion,
const char *mask, bool ctcam)
{
struct mlxsw_sp_acl_erp_table *erp_table = aregion->erp_table;
struct mlxsw_sp_acl_erp_key key;
struct objagg_obj *objagg_obj;
/* eRPs are allocated from a shared resource, but currently all
* allocations are done under RTNL.
*/
ASSERT_RTNL();
memcpy(key.mask, mask, MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN);
key.ctcam = ctcam;
objagg_obj = objagg_obj_get(aregion->erp_table->objagg, &key);
mutex_lock(&erp_table->objagg_lock);
objagg_obj = objagg_obj_get(erp_table->objagg, &key);
mutex_unlock(&erp_table->objagg_lock);
if (IS_ERR(objagg_obj))
return ERR_CAST(objagg_obj);
return (struct mlxsw_sp_acl_erp_mask *) objagg_obj;
......@@ -1021,8 +1021,11 @@ void mlxsw_sp_acl_erp_mask_put(struct mlxsw_sp_acl_atcam_region *aregion,
struct mlxsw_sp_acl_erp_mask *erp_mask)
{
struct objagg_obj *objagg_obj = (struct objagg_obj *) erp_mask;
struct mlxsw_sp_acl_erp_table *erp_table = aregion->erp_table;
objagg_obj_put(aregion->erp_table->objagg, objagg_obj);
mutex_lock(&erp_table->objagg_lock);
objagg_obj_put(erp_table->objagg, objagg_obj);
mutex_unlock(&erp_table->objagg_lock);
}
int mlxsw_sp_acl_erp_bf_insert(struct mlxsw_sp *mlxsw_sp,
......@@ -1034,7 +1037,6 @@ int mlxsw_sp_acl_erp_bf_insert(struct mlxsw_sp *mlxsw_sp,
const struct mlxsw_sp_acl_erp *erp = objagg_obj_root_priv(objagg_obj);
unsigned int erp_bank;
ASSERT_RTNL();
if (!mlxsw_sp_acl_erp_table_is_used(erp->erp_table))
return 0;
......@@ -1334,6 +1336,7 @@ mlxsw_sp_acl_erp_table_create(struct mlxsw_sp_acl_atcam_region *aregion,
erp_table->ops = &erp_no_mask_ops;
INIT_LIST_HEAD(&erp_table->atcam_erps_list);
erp_table->aregion = aregion;
mutex_init(&erp_table->objagg_lock);
return erp_table;
......@@ -1346,6 +1349,7 @@ static void
mlxsw_sp_acl_erp_table_destroy(struct mlxsw_sp_acl_erp_table *erp_table)
{
WARN_ON(!list_empty(&erp_table->atcam_erps_list));
mutex_destroy(&erp_table->objagg_lock);
objagg_destroy(erp_table->objagg);
kfree(erp_table);
}
......@@ -1376,14 +1380,16 @@ mlxsw_sp_acl_erp_hints_check(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_acl_atcam_region *aregion,
struct objagg_hints *hints, bool *p_rehash_needed)
{
struct objagg *objagg = aregion->erp_table->objagg;
struct mlxsw_sp_acl_erp_table *erp_table = aregion->erp_table;
const struct objagg_stats *ostats;
const struct objagg_stats *hstats;
int err;
*p_rehash_needed = false;
ostats = objagg_stats_get(objagg);
mutex_lock(&erp_table->objagg_lock);
ostats = objagg_stats_get(erp_table->objagg);
mutex_unlock(&erp_table->objagg_lock);
if (IS_ERR(ostats)) {
dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to get ERP stats\n");
return PTR_ERR(ostats);
......@@ -1411,13 +1417,16 @@ mlxsw_sp_acl_erp_hints_check(struct mlxsw_sp *mlxsw_sp,
void *
mlxsw_sp_acl_erp_rehash_hints_get(struct mlxsw_sp_acl_atcam_region *aregion)
{
struct mlxsw_sp_acl_erp_table *erp_table = aregion->erp_table;
struct mlxsw_sp *mlxsw_sp = aregion->region->mlxsw_sp;
struct objagg_hints *hints;
bool rehash_needed;
int err;
hints = objagg_hints_get(aregion->erp_table->objagg,
mutex_lock(&erp_table->objagg_lock);
hints = objagg_hints_get(erp_table->objagg,
OBJAGG_OPT_ALGO_SIMPLE_GREEDY);
mutex_unlock(&erp_table->objagg_lock);
if (IS_ERR(hints)) {
dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to create ERP hints\n");
return ERR_CAST(hints);
......
......@@ -17,6 +17,7 @@ struct mlxsw_sp_acl_tcam {
unsigned long *used_groups; /* bit array */
unsigned int max_groups;
unsigned int max_group_size;
struct mutex lock; /* guards vregion list */
struct list_head vregion_list;
u32 vregion_rehash_intrvl; /* ms */
unsigned long priv[0];
......@@ -78,6 +79,8 @@ struct mlxsw_sp_acl_tcam_vregion;
struct mlxsw_sp_acl_tcam_region {
struct mlxsw_sp_acl_tcam_vregion *vregion;
struct mlxsw_sp_acl_tcam_group *group;
struct list_head list; /* Member of a TCAM group */
enum mlxsw_reg_ptar_key_type key_type;
u16 id; /* ACL ID and region ID - they are same */
char tcam_region_info[MLXSW_REG_PXXX_TCAM_REGION_INFO_LEN];
......
......@@ -73,6 +73,26 @@ TRACE_EVENT(mlxsw_sp_acl_tcam_vregion_migrate,
__entry->mlxsw_sp, __entry->vregion)
);
TRACE_EVENT(mlxsw_sp_acl_tcam_vregion_migrate_end,
TP_PROTO(const struct mlxsw_sp *mlxsw_sp,
const struct mlxsw_sp_acl_tcam_vregion *vregion),
TP_ARGS(mlxsw_sp, vregion),
TP_STRUCT__entry(
__field(const void *, mlxsw_sp)
__field(const void *, vregion)
),
TP_fast_assign(
__entry->mlxsw_sp = mlxsw_sp;
__entry->vregion = vregion;
),
TP_printk("mlxsw_sp %p, vregion %p",
__entry->mlxsw_sp, __entry->vregion)
);
TRACE_EVENT(mlxsw_sp_acl_tcam_vregion_rehash_dis,
TP_PROTO(const struct mlxsw_sp *mlxsw_sp,
const struct mlxsw_sp_acl_tcam_vregion *vregion),
......
......@@ -540,11 +540,15 @@ delta_simple_rehash_test()
check_err $? "Rehash trace was not hit"
tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_migrate
check_err $? "Migrate trace was not hit"
tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_migrate_end
check_err $? "Migrate end trace was not hit"
tp_record_all mlxsw:* 3
tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_rehash
check_err $? "Rehash trace was not hit"
tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_migrate
check_fail $? "Migrate trace was hit when no migration should happen"
tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_migrate_end
check_fail $? "Migrate end trace was hit when no migration should happen"
$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
-t ip -q
......@@ -565,6 +569,247 @@ delta_simple_rehash_test()
log_test "delta simple rehash test ($tcflags)"
}
delta_simple_ipv6_rehash_test()
{
RET=0
if [[ "$tcflags" != "skip_sw" ]]; then
return 0;
fi
devlink dev param set $DEVLINK_DEV \
name acl_region_rehash_interval cmode runtime value 0
check_err $? "Failed to set ACL region rehash interval"
tp_record_all mlxsw:mlxsw_sp_acl_tcam_vregion_rehash 7
tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_rehash
check_fail $? "Rehash trace was hit even when rehash should be disabled"
devlink dev param set $DEVLINK_DEV \
name acl_region_rehash_interval cmode runtime value 3000
check_err $? "Failed to set ACL region rehash interval"
sleep 1
tc filter add dev $h2 ingress protocol ipv6 pref 1 handle 101 flower \
$tcflags dst_ip 2001:db8:1::0/121 action drop
tc filter add dev $h2 ingress protocol ipv6 pref 2 handle 102 flower \
$tcflags dst_ip 2001:db8:2::2 action drop
tc filter add dev $h2 ingress protocol ipv6 pref 3 handle 103 flower \
$tcflags dst_ip 2001:db8:3::0/120 action drop
$MZ $h1 -6 -c 1 -p 64 -a $h1mac -b $h2mac \
-A 2001:db8:2::1 -B 2001:db8:2::2 -t udp -q
tc_check_packets "dev $h2 ingress" 101 1
check_fail $? "Matched a wrong filter"
tc_check_packets "dev $h2 ingress" 103 1
check_fail $? "Matched a wrong filter"
tc_check_packets "dev $h2 ingress" 102 1
check_err $? "Did not match on correct filter"
tp_record_all mlxsw:* 3
tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_rehash
check_err $? "Rehash trace was not hit"
tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_migrate
check_err $? "Migrate trace was not hit"
tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_migrate_end
check_err $? "Migrate end trace was not hit"
tp_record_all mlxsw:* 3
tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_rehash
check_err $? "Rehash trace was not hit"
tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_migrate
check_fail $? "Migrate trace was hit when no migration should happen"
tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_migrate_end
check_fail $? "Migrate end trace was hit when no migration should happen"
$MZ $h1 -6 -c 1 -p 64 -a $h1mac -b $h2mac \
-A 2001:db8:2::1 -B 2001:db8:2::2 -t udp -q
tc_check_packets "dev $h2 ingress" 101 1
check_fail $? "Matched a wrong filter after rehash"
tc_check_packets "dev $h2 ingress" 103 1
check_fail $? "Matched a wrong filter after rehash"
tc_check_packets "dev $h2 ingress" 102 2
check_err $? "Did not match on correct filter after rehash"
tc filter del dev $h2 ingress protocol ipv6 pref 3 handle 103 flower
tc filter del dev $h2 ingress protocol ipv6 pref 2 handle 102 flower
tc filter del dev $h2 ingress protocol ipv6 pref 1 handle 101 flower
log_test "delta simple IPv6 rehash test ($tcflags)"
}
TEST_RULE_BASE=256
declare -a test_rules_inserted
test_rule_add()
{
local iface=$1
local tcflags=$2
local index=$3
if ! [ ${test_rules_inserted[$index]} ] ; then
test_rules_inserted[$index]=false
fi
if ${test_rules_inserted[$index]} ; then
return
fi
local number=$(( $index + $TEST_RULE_BASE ))
printf -v hexnumber '%x' $number
batch="${batch}filter add dev $iface ingress protocol ipv6 pref 1 \
handle $number flower $tcflags \
src_ip 2001:db8:1::$hexnumber action drop\n"
test_rules_inserted[$index]=true
}
test_rule_del()
{
local iface=$1
local index=$2
if ! [ ${test_rules_inserted[$index]} ] ; then
test_rules_inserted[$index]=false
fi
if ! ${test_rules_inserted[$index]} ; then
return
fi
local number=$(( $index + $TEST_RULE_BASE ))
printf -v hexnumber '%x' $number
batch="${batch}filter del dev $iface ingress protocol ipv6 pref 1 \
handle $number flower\n"
test_rules_inserted[$index]=false
}
test_rule_add_or_remove()
{
local iface=$1
local tcflags=$2
local index=$3
if ! [ ${test_rules_inserted[$index]} ] ; then
test_rules_inserted[$index]=false
fi
if ${test_rules_inserted[$index]} ; then
test_rule_del $iface $index
else
test_rule_add $iface $tcflags $index
fi
}
test_rule_add_or_remove_random_batch()
{
local iface=$1
local tcflags=$2
local total_count=$3
local skip=0
local count=0
local MAXSKIP=20
local MAXCOUNT=20
for ((i=1;i<=total_count;i++)); do
if (( $skip == 0 )) && (($count == 0)); then
((skip=$RANDOM % $MAXSKIP + 1))
((count=$RANDOM % $MAXCOUNT + 1))
fi
if (( $skip != 0 )); then
((skip-=1))
else
((count-=1))
test_rule_add_or_remove $iface $tcflags $i
fi
done
}
delta_massive_ipv6_rehash_test()
{
RET=0
if [[ "$tcflags" != "skip_sw" ]]; then
return 0;
fi
devlink dev param set $DEVLINK_DEV \
name acl_region_rehash_interval cmode runtime value 0
check_err $? "Failed to set ACL region rehash interval"
tp_record_all mlxsw:mlxsw_sp_acl_tcam_vregion_rehash 7
tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_rehash
check_fail $? "Rehash trace was hit even when rehash should be disabled"
RANDOM=4432897
declare batch=""
test_rule_add_or_remove_random_batch $h2 $tcflags 5000
echo -n -e $batch | tc -b -
declare batch=""
test_rule_add_or_remove_random_batch $h2 $tcflags 5000
devlink dev param set $DEVLINK_DEV \
name acl_region_rehash_interval cmode runtime value 3000
check_err $? "Failed to set ACL region rehash interval"
sleep 1
tc filter add dev $h2 ingress protocol ipv6 pref 1 handle 101 flower \
$tcflags dst_ip 2001:db8:1::0/121 action drop
tc filter add dev $h2 ingress protocol ipv6 pref 2 handle 102 flower \
$tcflags dst_ip 2001:db8:2::2 action drop
tc filter add dev $h2 ingress protocol ipv6 pref 3 handle 103 flower \
$tcflags dst_ip 2001:db8:3::0/120 action drop
$MZ $h1 -6 -c 1 -p 64 -a $h1mac -b $h2mac \
-A 2001:db8:2::1 -B 2001:db8:2::2 -t udp -q
tc_check_packets "dev $h2 ingress" 101 1
check_fail $? "Matched a wrong filter"
tc_check_packets "dev $h2 ingress" 103 1
check_fail $? "Matched a wrong filter"
tc_check_packets "dev $h2 ingress" 102 1
check_err $? "Did not match on correct filter"
echo -n -e $batch | tc -b -
devlink dev param set $DEVLINK_DEV \
name acl_region_rehash_interval cmode runtime value 0
check_err $? "Failed to set ACL region rehash interval"
$MZ $h1 -6 -c 1 -p 64 -a $h1mac -b $h2mac \
-A 2001:db8:2::1 -B 2001:db8:2::2 -t udp -q
tc_check_packets "dev $h2 ingress" 101 1
check_fail $? "Matched a wrong filter after rehash"
tc_check_packets "dev $h2 ingress" 103 1
check_fail $? "Matched a wrong filter after rehash"
tc_check_packets "dev $h2 ingress" 102 2
check_err $? "Did not match on correct filter after rehash"
tc filter del dev $h2 ingress protocol ipv6 pref 3 handle 103 flower
tc filter del dev $h2 ingress protocol ipv6 pref 2 handle 102 flower
tc filter del dev $h2 ingress protocol ipv6 pref 1 handle 101 flower
declare batch=""
for i in {1..5000}; do
test_rule_del $h2 $tcflags $i
done
echo -e $batch | tc -b -
log_test "delta massive IPv6 rehash test ($tcflags)"
}
bloom_simple_test()
{
# Bloom filter requires that the eRP table is used. This test
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment