Commit 981f4045 authored by Jakub Kicinski's avatar Jakub Kicinski

Merge branch 'net-sched-cls_api-support-hardware-miss-to-tc-action'

Paul Blakey says:

====================
net/sched: cls_api: Support hardware miss to tc action

This series adds support for hardware miss to instruct tc to continue execution
in a specific tc action instance on a filter's action list. The mlx5 driver patch
(besides the refactors) shows its usage instead of using just chain restore.

Currently a filter's action list must be executed all together or
not at all as driver are only able to tell tc to continue executing from a
specific tc chain, and not a specific filter/action.

This is troublesome with regards to action CT, where new connections should
be sent to software (via tc chain restore), and established connections can
be handled in hardware.

Checking for new connections is done when executing the ct action in hardware
(by checking the packet's tuple against known established tuples).
But if there is a packet modification (pedit) action before action CT and the
checked tuple is a new connection, hardware will need to revert the previous
packet modifications before sending it back to software so it can
re-match the same tc filter in software and re-execute its CT action.

The following is an example configuration of stateless nat
on mlx5 driver that isn't supported before this patchet:

 #Setup corrosponding mlx5 VFs in namespaces
 $ ip netns add ns0
 $ ip netns add ns1
 $ ip link set dev enp8s0f0v0 netns ns0
 $ ip netns exec ns0 ifconfig enp8s0f0v0 1.1.1.1/24 up
 $ ip link set dev enp8s0f0v1 netns ns1
 $ ip netns exec ns1 ifconfig enp8s0f0v1 1.1.1.2/24 up

 #Setup tc arp and ct rules on mxl5 VF representors
 $ tc qdisc add dev enp8s0f0_0 ingress
 $ tc qdisc add dev enp8s0f0_1 ingress
 $ ifconfig enp8s0f0_0 up
 $ ifconfig enp8s0f0_1 up

 #Original side
 $ tc filter add dev enp8s0f0_0 ingress chain 0 proto ip flower \
    ct_state -trk ip_proto tcp dst_port 8888 \
      action pedit ex munge tcp dport set 5001 pipe \
      action csum ip tcp pipe \
      action ct pipe \
      action goto chain 1
 $ tc filter add dev enp8s0f0_0 ingress chain 1 proto ip flower \
    ct_state +trk+est \
      action mirred egress redirect dev enp8s0f0_1
 $ tc filter add dev enp8s0f0_0 ingress chain 1 proto ip flower \
    ct_state +trk+new \
      action ct commit pipe \
      action mirred egress redirect dev enp8s0f0_1
 $ tc filter add dev enp8s0f0_0 ingress chain 0 proto arp flower \
      action mirred egress redirect dev enp8s0f0_1

 #Reply side
 $ tc filter add dev enp8s0f0_1 ingress chain 0 proto arp flower \
      action mirred egress redirect dev enp8s0f0_0
 $ tc filter add dev enp8s0f0_1 ingress chain 0 proto ip flower \
    ct_state -trk ip_proto tcp \
      action ct pipe \
      action pedit ex munge tcp sport set 8888 pipe \
      action csum ip tcp pipe \
      action mirred egress redirect dev enp8s0f0_0

 #Run traffic
 $ ip netns exec ns1 iperf -s -p 5001&
 $ sleep 2 #wait for iperf to fully open
 $ ip netns exec ns0 iperf -c 1.1.1.2 -p 8888

 #dump tc filter stats on enp8s0f0_0 chain 0 rule and see hardware packets:
 $ tc -s filter show dev enp8s0f0_0 ingress chain 0 proto ip | grep "hardware.*pkt"
        Sent hardware 9310116832 bytes 6149672 pkt
        Sent hardware 9310116832 bytes 6149672 pkt
        Sent hardware 9310116832 bytes 6149672 pkt

A new connection executing the first filter in hardware will first rewrite
the dst port to the new port, and then the ct action is executed,
because this is a new connection, hardware will need to be send this back
to software, on chain 0, to execute the first filter again in software.
The dst port needs to be reverted otherwise it won't re-match the old
dst port in the first filter. Because of that, currently mlx5 driver will
reject offloading the above action ct rule.

This series adds support for hardware partially executing a filter's action list,
and letting tc software continue processing in the specific action instance
where hardware left off (in the above case after the "action pedit ex munge tcp
dport... of the first rule") allowing support for scenarios such as the above.
====================

Link: https://lore.kernel.org/r/20230217223620.28508-1-paulb@nvidia.comSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 871489dd 67027828
......@@ -85,7 +85,7 @@ config MLX5_BRIDGE
config MLX5_CLS_ACT
bool "MLX5 TC classifier action support"
depends on MLX5_ESWITCH && NET_CLS_ACT
depends on MLX5_ESWITCH && NET_CLS_ACT && NET_TC_SKB_EXT
default y
help
mlx5 ConnectX offloads support for TC classifier action (NET_CLS_ACT),
......@@ -100,7 +100,7 @@ config MLX5_CLS_ACT
config MLX5_TC_CT
bool "MLX5 TC connection tracking offload support"
depends on MLX5_CLS_ACT && NF_FLOW_TABLE && NET_ACT_CT && NET_TC_SKB_EXT
depends on MLX5_CLS_ACT && NF_FLOW_TABLE && NET_ACT_CT
default y
help
Say Y here if you want to support offloading connection tracking rules
......
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2020 Mellanox Technologies. */
#include <net/dst_metadata.h>
#include <linux/netdevice.h>
#include <linux/if_macvlan.h>
#include <linux/list.h>
......@@ -665,232 +664,54 @@ void mlx5e_rep_tc_netdevice_event_unregister(struct mlx5e_rep_priv *rpriv)
mlx5e_rep_indr_block_unbind);
}
static bool mlx5e_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb,
struct mlx5e_tc_update_priv *tc_priv,
u32 tunnel_id)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct tunnel_match_enc_opts enc_opts = {};
struct mlx5_rep_uplink_priv *uplink_priv;
struct mlx5e_rep_priv *uplink_rpriv;
struct metadata_dst *tun_dst;
struct tunnel_match_key key;
u32 tun_id, enc_opts_id;
struct net_device *dev;
int err;
enc_opts_id = tunnel_id & ENC_OPTS_BITS_MASK;
tun_id = tunnel_id >> ENC_OPTS_BITS;
if (!tun_id)
return true;
uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
uplink_priv = &uplink_rpriv->uplink_priv;
err = mapping_find(uplink_priv->tunnel_mapping, tun_id, &key);
if (err) {
netdev_dbg(priv->netdev,
"Couldn't find tunnel for tun_id: %d, err: %d\n",
tun_id, err);
return false;
}
if (enc_opts_id) {
err = mapping_find(uplink_priv->tunnel_enc_opts_mapping,
enc_opts_id, &enc_opts);
if (err) {
netdev_dbg(priv->netdev,
"Couldn't find tunnel (opts) for tun_id: %d, err: %d\n",
enc_opts_id, err);
return false;
}
}
if (key.enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
tun_dst = __ip_tun_set_dst(key.enc_ipv4.src, key.enc_ipv4.dst,
key.enc_ip.tos, key.enc_ip.ttl,
key.enc_tp.dst, TUNNEL_KEY,
key32_to_tunnel_id(key.enc_key_id.keyid),
enc_opts.key.len);
} else if (key.enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
tun_dst = __ipv6_tun_set_dst(&key.enc_ipv6.src, &key.enc_ipv6.dst,
key.enc_ip.tos, key.enc_ip.ttl,
key.enc_tp.dst, 0, TUNNEL_KEY,
key32_to_tunnel_id(key.enc_key_id.keyid),
enc_opts.key.len);
} else {
netdev_dbg(priv->netdev,
"Couldn't restore tunnel, unsupported addr_type: %d\n",
key.enc_control.addr_type);
return false;
}
if (!tun_dst) {
netdev_dbg(priv->netdev, "Couldn't restore tunnel, no tun_dst\n");
return false;
}
tun_dst->u.tun_info.key.tp_src = key.enc_tp.src;
if (enc_opts.key.len)
ip_tunnel_info_opts_set(&tun_dst->u.tun_info,
enc_opts.key.data,
enc_opts.key.len,
enc_opts.key.dst_opt_type);
skb_dst_set(skb, (struct dst_entry *)tun_dst);
dev = dev_get_by_index(&init_net, key.filter_ifindex);
if (!dev) {
netdev_dbg(priv->netdev,
"Couldn't find tunnel device with ifindex: %d\n",
key.filter_ifindex);
return false;
}
/* Set fwd_dev so we do dev_put() after datapath */
tc_priv->fwd_dev = dev;
skb->dev = dev;
return true;
}
static bool mlx5e_restore_skb_chain(struct sk_buff *skb, u32 chain, u32 reg_c1,
struct mlx5e_tc_update_priv *tc_priv)
{
struct mlx5e_priv *priv = netdev_priv(skb->dev);
u32 tunnel_id = (reg_c1 >> ESW_TUN_OFFSET) & TUNNEL_ID_MASK;
#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
if (chain) {
struct mlx5_rep_uplink_priv *uplink_priv;
struct mlx5e_rep_priv *uplink_rpriv;
struct tc_skb_ext *tc_skb_ext;
struct mlx5_eswitch *esw;
u32 zone_restore_id;
tc_skb_ext = tc_skb_ext_alloc(skb);
if (!tc_skb_ext) {
WARN_ON(1);
return false;
}
tc_skb_ext->chain = chain;
zone_restore_id = reg_c1 & ESW_ZONE_ID_MASK;
esw = priv->mdev->priv.eswitch;
uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
uplink_priv = &uplink_rpriv->uplink_priv;
if (!mlx5e_tc_ct_restore_flow(uplink_priv->ct_priv, skb,
zone_restore_id))
return false;
}
#endif /* CONFIG_NET_TC_SKB_EXT */
return mlx5e_restore_tunnel(priv, skb, tc_priv, tunnel_id);
}
static void mlx5_rep_tc_post_napi_receive(struct mlx5e_tc_update_priv *tc_priv)
{
if (tc_priv->fwd_dev)
dev_put(tc_priv->fwd_dev);
}
static void mlx5e_restore_skb_sample(struct mlx5e_priv *priv, struct sk_buff *skb,
struct mlx5_mapped_obj *mapped_obj,
struct mlx5e_tc_update_priv *tc_priv)
{
if (!mlx5e_restore_tunnel(priv, skb, tc_priv, mapped_obj->sample.tunnel_id)) {
netdev_dbg(priv->netdev,
"Failed to restore tunnel info for sampled packet\n");
return;
}
mlx5e_tc_sample_skb(skb, mapped_obj);
mlx5_rep_tc_post_napi_receive(tc_priv);
}
static bool mlx5e_restore_skb_int_port(struct mlx5e_priv *priv, struct sk_buff *skb,
struct mlx5_mapped_obj *mapped_obj,
struct mlx5e_tc_update_priv *tc_priv,
bool *forward_tx,
u32 reg_c1)
{
u32 tunnel_id = (reg_c1 >> ESW_TUN_OFFSET) & TUNNEL_ID_MASK;
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5_rep_uplink_priv *uplink_priv;
struct mlx5e_rep_priv *uplink_rpriv;
/* Tunnel restore takes precedence over int port restore */
if (tunnel_id)
return mlx5e_restore_tunnel(priv, skb, tc_priv, tunnel_id);
uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
uplink_priv = &uplink_rpriv->uplink_priv;
if (mlx5e_tc_int_port_dev_fwd(uplink_priv->int_port_priv, skb,
mapped_obj->int_port_metadata, forward_tx)) {
/* Set fwd_dev for future dev_put */
tc_priv->fwd_dev = skb->dev;
return true;
}
return false;
}
void mlx5e_rep_tc_receive(struct mlx5_cqe64 *cqe, struct mlx5e_rq *rq,
struct sk_buff *skb)
{
u32 reg_c1 = be32_to_cpu(cqe->ft_metadata);
u32 reg_c0, reg_c1, zone_restore_id, tunnel_id;
struct mlx5e_tc_update_priv tc_priv = {};
struct mlx5_mapped_obj mapped_obj;
struct mlx5_rep_uplink_priv *uplink_priv;
struct mlx5e_rep_priv *uplink_rpriv;
struct mlx5_tc_ct_priv *ct_priv;
struct mapping_ctx *mapping_ctx;
struct mlx5_eswitch *esw;
bool forward_tx = false;
struct mlx5e_priv *priv;
u32 reg_c0;
int err;
reg_c0 = (be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK);
if (!reg_c0 || reg_c0 == MLX5_FS_DEFAULT_FLOW_TAG)
goto forward;
/* If reg_c0 is not equal to the default flow tag then skb->mark
/* If mapped_obj_id is not equal to the default flow tag then skb->mark
* is not supported and must be reset back to 0.
*/
skb->mark = 0;
priv = netdev_priv(skb->dev);
esw = priv->mdev->priv.eswitch;
err = mapping_find(esw->offloads.reg_c0_obj_pool, reg_c0, &mapped_obj);
if (err) {
netdev_dbg(priv->netdev,
"Couldn't find mapped object for reg_c0: %d, err: %d\n",
reg_c0, err);
goto free_skb;
}
mapping_ctx = esw->offloads.reg_c0_obj_pool;
reg_c1 = be32_to_cpu(cqe->ft_metadata);
zone_restore_id = reg_c1 & ESW_ZONE_ID_MASK;
tunnel_id = (reg_c1 >> ESW_TUN_OFFSET) & TUNNEL_ID_MASK;
if (mapped_obj.type == MLX5_MAPPED_OBJ_CHAIN) {
if (!mlx5e_restore_skb_chain(skb, mapped_obj.chain, reg_c1, &tc_priv) &&
!mlx5_ipsec_is_rx_flow(cqe))
goto free_skb;
} else if (mapped_obj.type == MLX5_MAPPED_OBJ_SAMPLE) {
mlx5e_restore_skb_sample(priv, skb, &mapped_obj, &tc_priv);
goto free_skb;
} else if (mapped_obj.type == MLX5_MAPPED_OBJ_INT_PORT_METADATA) {
if (!mlx5e_restore_skb_int_port(priv, skb, &mapped_obj, &tc_priv,
&forward_tx, reg_c1))
goto free_skb;
} else {
netdev_dbg(priv->netdev, "Invalid mapped object type: %d\n", mapped_obj.type);
uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
uplink_priv = &uplink_rpriv->uplink_priv;
ct_priv = uplink_priv->ct_priv;
if (!mlx5_ipsec_is_rx_flow(cqe) &&
!mlx5e_tc_update_skb(cqe, skb, mapping_ctx, reg_c0, ct_priv, zone_restore_id, tunnel_id,
&tc_priv))
goto free_skb;
}
forward:
if (forward_tx)
if (tc_priv.skb_done)
goto free_skb;
if (tc_priv.forward_tx)
dev_queue_xmit(skb);
else
napi_gro_receive(rq->cq.napi, skb);
mlx5_rep_tc_post_napi_receive(&tc_priv);
if (tc_priv.fwd_dev)
dev_put(tc_priv.fwd_dev);
return;
......
......@@ -237,7 +237,7 @@ sample_modify_hdr_get(struct mlx5_core_dev *mdev, u32 obj_id,
int err;
err = mlx5e_tc_match_to_reg_set(mdev, mod_acts, MLX5_FLOW_NAMESPACE_FDB,
CHAIN_TO_REG, obj_id);
MAPPED_OBJ_TO_REG, obj_id);
if (err)
goto err_set_regc0;
......
......@@ -59,6 +59,7 @@ struct mlx5_tc_ct_debugfs {
struct mlx5_tc_ct_priv {
struct mlx5_core_dev *dev;
struct mlx5e_priv *priv;
const struct net_device *netdev;
struct mod_hdr_tbl *mod_hdr_tbl;
struct xarray tuple_ids;
......@@ -85,7 +86,6 @@ struct mlx5_ct_flow {
struct mlx5_flow_attr *pre_ct_attr;
struct mlx5_flow_handle *pre_ct_rule;
struct mlx5_ct_ft *ft;
u32 chain_mapping;
};
struct mlx5_ct_zone_rule {
......@@ -1445,6 +1445,7 @@ mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
attr->ct_attr.zone = act->ct.zone;
attr->ct_attr.ct_action = act->ct.action;
attr->ct_attr.nf_ft = act->ct.flow_table;
attr->ct_attr.act_miss_cookie = act->miss_cookie;
return 0;
}
......@@ -1782,7 +1783,7 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
* + ft prio (tc chain) +
* + original match +
* +---------------------+
* | set chain miss mapping
* | set act_miss_cookie mapping
* | set fte_id
* | set tunnel_id
* | do decap
......@@ -1827,7 +1828,7 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
struct mlx5_flow_attr *pre_ct_attr;
struct mlx5_modify_hdr *mod_hdr;
struct mlx5_ct_flow *ct_flow;
int chain_mapping = 0, err;
int act_miss_mapping = 0, err;
struct mlx5_ct_ft *ft;
u16 zone;
......@@ -1862,22 +1863,18 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
/* Write chain miss tag for miss in ct table as we
* don't go though all prios of this chain as normal tc rules
* miss.
*/
err = mlx5_chains_get_chain_mapping(ct_priv->chains, attr->chain,
&chain_mapping);
err = mlx5e_tc_action_miss_mapping_get(ct_priv->priv, attr, attr->ct_attr.act_miss_cookie,
&act_miss_mapping);
if (err) {
ct_dbg("Failed to get chain register mapping for chain");
goto err_get_chain;
ct_dbg("Failed to get register mapping for act miss");
goto err_get_act_miss;
}
ct_flow->chain_mapping = chain_mapping;
attr->ct_attr.act_miss_mapping = act_miss_mapping;
err = mlx5e_tc_match_to_reg_set(priv->mdev, pre_mod_acts, ct_priv->ns_type,
CHAIN_TO_REG, chain_mapping);
MAPPED_OBJ_TO_REG, act_miss_mapping);
if (err) {
ct_dbg("Failed to set chain register mapping");
ct_dbg("Failed to set act miss register mapping");
goto err_mapping;
}
......@@ -1941,8 +1938,8 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
err_mapping:
mlx5e_mod_hdr_dealloc(pre_mod_acts);
mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping);
err_get_chain:
mlx5e_tc_action_miss_mapping_put(ct_priv->priv, attr, act_miss_mapping);
err_get_act_miss:
kfree(ct_flow->pre_ct_attr);
err_alloc_pre:
mlx5_tc_ct_del_ft_cb(ct_priv, ft);
......@@ -1981,7 +1978,7 @@ __mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv,
mlx5_tc_rule_delete(priv, ct_flow->pre_ct_rule, pre_ct_attr);
mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping);
mlx5e_tc_action_miss_mapping_put(ct_priv->priv, attr, attr->ct_attr.act_miss_mapping);
mlx5_tc_ct_del_ft_cb(ct_priv, ct_flow->ft);
kfree(ct_flow->pre_ct_attr);
......@@ -2078,13 +2075,6 @@ mlx5_tc_ct_init_check_support(struct mlx5e_priv *priv,
const char *err_msg = NULL;
int err = 0;
#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
/* cannot restore chain ID on HW miss */
err_msg = "tc skb extension missing";
err = -EOPNOTSUPP;
goto out_err;
#endif
if (IS_ERR_OR_NULL(post_act)) {
/* Ignore_flow_level support isn't supported by default for VFs and so post_act
* won't be supported. Skip showing error msg.
......@@ -2161,6 +2151,7 @@ mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
}
spin_lock_init(&ct_priv->ht_lock);
ct_priv->priv = priv;
ct_priv->ns_type = ns_type;
ct_priv->chains = chains;
ct_priv->netdev = priv->netdev;
......
......@@ -28,6 +28,8 @@ struct mlx5_ct_attr {
struct mlx5_ct_flow *ct_flow;
struct nf_flowtable *nf_ft;
u32 ct_labels_id;
u32 act_miss_mapping;
u64 act_miss_cookie;
};
#define zone_to_reg_ct {\
......
......@@ -1792,7 +1792,7 @@ static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
if (mlx5e_cqe_regb_chain(cqe))
if (!mlx5e_tc_update_skb(cqe, skb)) {
if (!mlx5e_tc_update_skb_nic(cqe, skb)) {
dev_kfree_skb_any(skb);
goto free_wqe;
}
......@@ -2259,7 +2259,7 @@ static void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cq
mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
if (mlx5e_cqe_regb_chain(cqe))
if (!mlx5e_tc_update_skb(cqe, skb)) {
if (!mlx5e_tc_update_skb_nic(cqe, skb)) {
dev_kfree_skb_any(skb);
goto mpwrq_cqe_out;
}
......
......@@ -59,6 +59,8 @@ int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags);
struct mlx5e_tc_update_priv {
struct net_device *fwd_dev;
bool skb_done;
bool forward_tx;
};
struct mlx5_nic_flow_attr {
......@@ -95,6 +97,7 @@ struct mlx5_flow_attr {
struct mlx5_flow_attr *branch_true;
struct mlx5_flow_attr *branch_false;
struct mlx5_flow_attr *jumping_attr;
struct mlx5_flow_handle *act_id_restore_rule;
/* keep this union last */
union {
DECLARE_FLEX_ARRAY(struct mlx5_esw_flow_attr, esw_attr);
......@@ -225,7 +228,7 @@ void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe);
void mlx5e_tc_reoffload_flows_work(struct work_struct *work);
enum mlx5e_tc_attr_to_reg {
CHAIN_TO_REG,
MAPPED_OBJ_TO_REG,
VPORT_TO_REG,
TUNNEL_TO_REG,
CTSTATE_TO_REG,
......@@ -234,7 +237,7 @@ enum mlx5e_tc_attr_to_reg {
MARK_TO_REG,
LABELS_TO_REG,
FTEID_TO_REG,
NIC_CHAIN_TO_REG,
NIC_MAPPED_OBJ_TO_REG,
NIC_ZONE_RESTORE_TO_REG,
PACKET_COLOR_TO_REG,
};
......@@ -368,7 +371,6 @@ struct mlx5e_tc_table *mlx5e_tc_table_alloc(void);
void mlx5e_tc_table_free(struct mlx5e_tc_table *tc);
static inline bool mlx5e_cqe_regb_chain(struct mlx5_cqe64 *cqe)
{
#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
u32 chain, reg_b;
reg_b = be32_to_cpu(cqe->ft_metadata);
......@@ -379,20 +381,29 @@ static inline bool mlx5e_cqe_regb_chain(struct mlx5_cqe64 *cqe)
chain = reg_b & MLX5E_TC_TABLE_CHAIN_TAG_MASK;
if (chain)
return true;
#endif
return false;
}
bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe, struct sk_buff *skb);
bool mlx5e_tc_update_skb_nic(struct mlx5_cqe64 *cqe, struct sk_buff *skb);
bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe, struct sk_buff *skb,
struct mapping_ctx *mapping_ctx, u32 mapped_obj_id,
struct mlx5_tc_ct_priv *ct_priv,
u32 zone_restore_id, u32 tunnel_id,
struct mlx5e_tc_update_priv *tc_priv);
#else /* CONFIG_MLX5_CLS_ACT */
static inline struct mlx5e_tc_table *mlx5e_tc_table_alloc(void) { return NULL; }
static inline void mlx5e_tc_table_free(struct mlx5e_tc_table *tc) {}
static inline bool mlx5e_cqe_regb_chain(struct mlx5_cqe64 *cqe)
{ return false; }
static inline bool
mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe, struct sk_buff *skb)
mlx5e_tc_update_skb_nic(struct mlx5_cqe64 *cqe, struct sk_buff *skb)
{ return true; }
#endif
int mlx5e_tc_action_miss_mapping_get(struct mlx5e_priv *priv, struct mlx5_flow_attr *attr,
u64 act_miss_cookie, u32 *act_miss_mapping);
void mlx5e_tc_action_miss_mapping_put(struct mlx5e_priv *priv, struct mlx5_flow_attr *attr,
u32 act_miss_mapping);
#endif /* __MLX5_EN_TC_H__ */
......@@ -52,12 +52,14 @@ enum mlx5_mapped_obj_type {
MLX5_MAPPED_OBJ_CHAIN,
MLX5_MAPPED_OBJ_SAMPLE,
MLX5_MAPPED_OBJ_INT_PORT_METADATA,
MLX5_MAPPED_OBJ_ACT_MISS,
};
struct mlx5_mapped_obj {
enum mlx5_mapped_obj_type type;
union {
u32 chain;
u64 act_miss_cookie;
struct {
u32 group_id;
u32 rate;
......
......@@ -214,7 +214,7 @@ create_chain_restore(struct fs_chain *chain)
struct mlx5_eswitch *esw = chain->chains->dev->priv.eswitch;
u8 modact[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {};
struct mlx5_fs_chains *chains = chain->chains;
enum mlx5e_tc_attr_to_reg chain_to_reg;
enum mlx5e_tc_attr_to_reg mapped_obj_to_reg;
struct mlx5_modify_hdr *mod_hdr;
u32 index;
int err;
......@@ -242,7 +242,7 @@ create_chain_restore(struct fs_chain *chain)
chain->id = index;
if (chains->ns == MLX5_FLOW_NAMESPACE_FDB) {
chain_to_reg = CHAIN_TO_REG;
mapped_obj_to_reg = MAPPED_OBJ_TO_REG;
chain->restore_rule = esw_add_restore_rule(esw, chain->id);
if (IS_ERR(chain->restore_rule)) {
err = PTR_ERR(chain->restore_rule);
......@@ -253,7 +253,7 @@ create_chain_restore(struct fs_chain *chain)
* since we write the metadata to reg_b
* that is passed to SW directly.
*/
chain_to_reg = NIC_CHAIN_TO_REG;
mapped_obj_to_reg = NIC_MAPPED_OBJ_TO_REG;
} else {
err = -EINVAL;
goto err_rule;
......@@ -261,12 +261,12 @@ create_chain_restore(struct fs_chain *chain)
MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
MLX5_SET(set_action_in, modact, field,
mlx5e_tc_attr_to_reg_mappings[chain_to_reg].mfield);
mlx5e_tc_attr_to_reg_mappings[mapped_obj_to_reg].mfield);
MLX5_SET(set_action_in, modact, offset,
mlx5e_tc_attr_to_reg_mappings[chain_to_reg].moffset);
mlx5e_tc_attr_to_reg_mappings[mapped_obj_to_reg].moffset);
MLX5_SET(set_action_in, modact, length,
mlx5e_tc_attr_to_reg_mappings[chain_to_reg].mlen == 32 ?
0 : mlx5e_tc_attr_to_reg_mappings[chain_to_reg].mlen);
mlx5e_tc_attr_to_reg_mappings[mapped_obj_to_reg].mlen == 32 ?
0 : mlx5e_tc_attr_to_reg_mappings[mapped_obj_to_reg].mlen);
MLX5_SET(set_action_in, modact, data, chain->id);
mod_hdr = mlx5_modify_header_alloc(chains->dev, chains->ns,
1, modact);
......
......@@ -103,7 +103,7 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp,
}
ingress = mlxsw_sp_flow_block_is_ingress_bound(block);
err = mlxsw_sp_acl_rulei_act_drop(rulei, ingress,
act->cookie, extack);
act->user_cookie, extack);
if (err) {
NL_SET_ERR_MSG_MOD(extack, "Cannot append drop action");
return err;
......
......@@ -319,12 +319,16 @@ struct nf_bridge_info {
* and read by ovs to recirc_id.
*/
struct tc_skb_ext {
__u32 chain;
union {
u64 act_miss_cookie;
__u32 chain;
};
__u16 mru;
__u16 zone;
u8 post_ct:1;
u8 post_ct_snat:1;
u8 post_ct_dnat:1;
u8 act_miss:1; /* Set if act_miss_cookie is used */
};
#endif
......
......@@ -39,7 +39,7 @@ struct tc_action {
struct gnet_stats_basic_sync __percpu *cpu_bstats;
struct gnet_stats_basic_sync __percpu *cpu_bstats_hw;
struct gnet_stats_queue __percpu *cpu_qstats;
struct tc_cookie __rcu *act_cookie;
struct tc_cookie __rcu *user_cookie;
struct tcf_chain __rcu *goto_chain;
u32 tcfa_flags;
u8 hw_stats;
......
......@@ -228,7 +228,8 @@ void flow_action_cookie_destroy(struct flow_action_cookie *cookie);
struct flow_action_entry {
enum flow_action_id id;
u32 hw_index;
unsigned long act_cookie;
unsigned long cookie;
u64 miss_cookie;
enum flow_action_hw_stats hw_stats;
action_destr destructor;
void *destructor_priv;
......@@ -321,7 +322,7 @@ struct flow_action_entry {
u16 sid;
} pppoe;
};
struct flow_action_cookie *cookie; /* user defined action cookie */
struct flow_action_cookie *user_cookie; /* user defined action cookie */
};
struct flow_action {
......
......@@ -59,6 +59,8 @@ int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q,
void tcf_block_put(struct tcf_block *block);
void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
struct tcf_block_ext_info *ei);
int tcf_exts_init_ex(struct tcf_exts *exts, struct net *net, int action,
int police, struct tcf_proto *tp, u32 handle, bool used_action_miss);
static inline bool tcf_block_shared(struct tcf_block *block)
{
......@@ -229,6 +231,7 @@ struct tcf_exts {
struct tc_action **actions;
struct net *net;
netns_tracker ns_tracker;
struct tcf_exts_miss_cookie_node *miss_cookie_node;
#endif
/* Map to export classifier specific extension TLV types to the
* generic extensions API. Unsupported extensions must be set to 0.
......@@ -240,21 +243,11 @@ struct tcf_exts {
static inline int tcf_exts_init(struct tcf_exts *exts, struct net *net,
int action, int police)
{
#ifdef CONFIG_NET_CLS_ACT
exts->type = 0;
exts->nr_actions = 0;
/* Note: we do not own yet a reference on net.
* This reference might be taken later from tcf_exts_get_net().
*/
exts->net = net;
exts->actions = kcalloc(TCA_ACT_MAX_PRIO, sizeof(struct tc_action *),
GFP_KERNEL);
if (!exts->actions)
return -ENOMEM;
#ifdef CONFIG_NET_CLS
return tcf_exts_init_ex(exts, net, action, police, NULL, 0, false);
#else
return -EOPNOTSUPP;
#endif
exts->action = action;
exts->police = police;
return 0;
}
/* Return false if the netns is being destroyed in cleanup_net(). Callers
......@@ -360,6 +353,18 @@ tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts,
return TC_ACT_OK;
}
static inline int
tcf_exts_exec_ex(struct sk_buff *skb, struct tcf_exts *exts, int act_index,
struct tcf_result *res)
{
#ifdef CONFIG_NET_CLS_ACT
return tcf_action_exec(skb, exts->actions + act_index,
exts->nr_actions - act_index, res);
#else
return TC_ACT_OK;
#endif
}
int tcf_exts_validate(struct net *net, struct tcf_proto *tp,
struct nlattr **tb, struct nlattr *rate_tlv,
struct tcf_exts *exts, u32 flags,
......@@ -584,6 +589,7 @@ int tc_setup_offload_action(struct flow_action *flow_action,
void tc_cleanup_offload_action(struct flow_action *flow_action);
int tc_setup_action(struct flow_action *flow_action,
struct tc_action *actions[],
u32 miss_cookie_base,
struct netlink_ext_ack *extack);
int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
......
......@@ -369,6 +369,8 @@ struct tcf_proto_ops {
struct nlattr **tca,
struct netlink_ext_ack *extack);
void (*tmplt_destroy)(void *tmplt_priv);
struct tcf_exts * (*get_exts)(const struct tcf_proto *tp,
u32 handle);
/* rtnetlink specific */
int (*dump)(struct net*, struct tcf_proto*, void *,
......
......@@ -1041,7 +1041,8 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
if (tc_skb_ext_tc_enabled()) {
tc_ext = skb_ext_find(skb, TC_SKB_EXT);
key->recirc_id = tc_ext ? tc_ext->chain : 0;
key->recirc_id = tc_ext && !tc_ext->act_miss ?
tc_ext->chain : 0;
OVS_CB(skb)->mru = tc_ext ? tc_ext->mru : 0;
post_ct = tc_ext ? tc_ext->post_ct : false;
post_ct_snat = post_ct ? tc_ext->post_ct_snat : false;
......
......@@ -125,7 +125,7 @@ static void free_tcf(struct tc_action *p)
free_percpu(p->cpu_bstats_hw);
free_percpu(p->cpu_qstats);
tcf_set_action_cookie(&p->act_cookie, NULL);
tcf_set_action_cookie(&p->user_cookie, NULL);
if (chain)
tcf_chain_put_by_act(chain);
......@@ -268,7 +268,7 @@ static int tcf_action_offload_add_ex(struct tc_action *action,
if (err)
goto fl_err;
err = tc_setup_action(&fl_action->action, actions, extack);
err = tc_setup_action(&fl_action->action, actions, 0, extack);
if (err) {
NL_SET_ERR_MSG_MOD(extack,
"Failed to setup tc actions for offload");
......@@ -431,14 +431,14 @@ EXPORT_SYMBOL(tcf_idr_release);
static size_t tcf_action_shared_attrs_size(const struct tc_action *act)
{
struct tc_cookie *act_cookie;
struct tc_cookie *user_cookie;
u32 cookie_len = 0;
rcu_read_lock();
act_cookie = rcu_dereference(act->act_cookie);
user_cookie = rcu_dereference(act->user_cookie);
if (act_cookie)
cookie_len = nla_total_size(act_cookie->len);
if (user_cookie)
cookie_len = nla_total_size(user_cookie->len);
rcu_read_unlock();
return nla_total_size(0) /* action number nested */
......@@ -488,7 +488,7 @@ tcf_action_dump_terse(struct sk_buff *skb, struct tc_action *a, bool from_act)
goto nla_put_failure;
rcu_read_lock();
cookie = rcu_dereference(a->act_cookie);
cookie = rcu_dereference(a->user_cookie);
if (cookie) {
if (nla_put(skb, TCA_ACT_COOKIE, cookie->len, cookie->data)) {
rcu_read_unlock();
......@@ -1362,9 +1362,9 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
{
bool police = flags & TCA_ACT_FLAGS_POLICE;
struct nla_bitfield32 userflags = { 0, 0 };
struct tc_cookie *user_cookie = NULL;
u8 hw_stats = TCA_ACT_HW_STATS_ANY;
struct nlattr *tb[TCA_ACT_MAX + 1];
struct tc_cookie *cookie = NULL;
struct tc_action *a;
int err;
......@@ -1375,8 +1375,8 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
if (err < 0)
return ERR_PTR(err);
if (tb[TCA_ACT_COOKIE]) {
cookie = nla_memdup_cookie(tb);
if (!cookie) {
user_cookie = nla_memdup_cookie(tb);
if (!user_cookie) {
NL_SET_ERR_MSG(extack, "No memory to generate TC cookie");
err = -ENOMEM;
goto err_out;
......@@ -1402,7 +1402,7 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
*init_res = err;
if (!police && tb[TCA_ACT_COOKIE])
tcf_set_action_cookie(&a->act_cookie, cookie);
tcf_set_action_cookie(&a->user_cookie, user_cookie);
if (!police)
a->hw_stats = hw_stats;
......@@ -1410,9 +1410,9 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
return a;
err_out:
if (cookie) {
kfree(cookie->data);
kfree(cookie);
if (user_cookie) {
kfree(user_cookie->data);
kfree(user_cookie);
}
return ERR_PTR(err);
}
......
This diff is collapsed.
......@@ -529,6 +529,15 @@ static struct cls_fl_filter *__fl_get(struct cls_fl_head *head, u32 handle)
return f;
}
static struct tcf_exts *fl_get_exts(const struct tcf_proto *tp, u32 handle)
{
struct cls_fl_head *head = rcu_dereference_bh(tp->root);
struct cls_fl_filter *f;
f = idr_find(&head->handle_idr, handle);
return f ? &f->exts : NULL;
}
static int __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f,
bool *last, bool rtnl_held,
struct netlink_ext_ack *extack)
......@@ -2187,10 +2196,6 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
INIT_LIST_HEAD(&fnew->hw_list);
refcount_set(&fnew->refcnt, 1);
err = tcf_exts_init(&fnew->exts, net, TCA_FLOWER_ACT, 0);
if (err < 0)
goto errout;
if (tb[TCA_FLOWER_FLAGS]) {
fnew->flags = nla_get_u32(tb[TCA_FLOWER_FLAGS]);
......@@ -2200,15 +2205,46 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
}
}
if (!fold) {
spin_lock(&tp->lock);
if (!handle) {
handle = 1;
err = idr_alloc_u32(&head->handle_idr, fnew, &handle,
INT_MAX, GFP_ATOMIC);
} else {
err = idr_alloc_u32(&head->handle_idr, fnew, &handle,
handle, GFP_ATOMIC);
/* Filter with specified handle was concurrently
* inserted after initial check in cls_api. This is not
* necessarily an error if NLM_F_EXCL is not set in
* message flags. Returning EAGAIN will cause cls_api to
* try to update concurrently inserted rule.
*/
if (err == -ENOSPC)
err = -EAGAIN;
}
spin_unlock(&tp->lock);
if (err)
goto errout;
}
fnew->handle = handle;
err = tcf_exts_init_ex(&fnew->exts, net, TCA_FLOWER_ACT, 0, tp, handle,
!tc_skip_hw(fnew->flags));
if (err < 0)
goto errout_idr;
err = fl_set_parms(net, tp, fnew, mask, base, tb, tca[TCA_RATE],
tp->chain->tmplt_priv, flags, fnew->flags,
extack);
if (err)
goto errout;
goto errout_idr;
err = fl_check_assign_mask(head, fnew, fold, mask);
if (err)
goto errout;
goto errout_idr;
err = fl_ht_insert_unique(fnew, fold, &in_ht);
if (err)
......@@ -2274,29 +2310,9 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
refcount_dec(&fold->refcnt);
__fl_put(fold);
} else {
if (handle) {
/* user specifies a handle and it doesn't exist */
err = idr_alloc_u32(&head->handle_idr, fnew, &handle,
handle, GFP_ATOMIC);
/* Filter with specified handle was concurrently
* inserted after initial check in cls_api. This is not
* necessarily an error if NLM_F_EXCL is not set in
* message flags. Returning EAGAIN will cause cls_api to
* try to update concurrently inserted rule.
*/
if (err == -ENOSPC)
err = -EAGAIN;
} else {
handle = 1;
err = idr_alloc_u32(&head->handle_idr, fnew, &handle,
INT_MAX, GFP_ATOMIC);
}
if (err)
goto errout_hw;
idr_replace(&head->handle_idr, fnew, fnew->handle);
refcount_inc(&fnew->refcnt);
fnew->handle = handle;
list_add_tail_rcu(&fnew->list, &fnew->mask->filters);
spin_unlock(&tp->lock);
}
......@@ -2319,6 +2335,8 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
fnew->mask->filter_ht_params);
errout_mask:
fl_mask_put(head, fnew->mask);
errout_idr:
idr_remove(&head->handle_idr, fnew->handle);
errout:
__fl_put(fnew);
errout_tb:
......@@ -3436,6 +3454,7 @@ static struct tcf_proto_ops cls_fl_ops __read_mostly = {
.tmplt_create = fl_tmplt_create,
.tmplt_destroy = fl_tmplt_destroy,
.tmplt_dump = fl_tmplt_dump,
.get_exts = fl_get_exts,
.owner = THIS_MODULE,
.flags = TCF_PROTO_OPS_DOIT_UNLOCKED,
};
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment