Commit bf3347c4 authored by David S. Miller's avatar David S. Miller
parents 93e61613 b8ce9037
......@@ -3570,7 +3570,8 @@ static void mlx5_ib_set_rule_source_port(struct mlx5_ib_dev *dev,
misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
misc_parameters_2);
MLX5_SET_TO_ONES(fte_match_set_misc2, misc, metadata_reg_c_0);
MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
mlx5_eswitch_get_vport_metadata_mask());
} else {
misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
misc_parameters);
......
......@@ -34,7 +34,7 @@ mlx5_core-$(CONFIG_MLX5_EN_ARFS) += en_arfs.o
mlx5_core-$(CONFIG_MLX5_EN_RXNFC) += en_fs_ethtool.o
mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o
mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o en_tc.o en/tc_tun.o lib/port_tun.o lag_mp.o \
lib/geneve.o en/tc_tun_vxlan.o en/tc_tun_gre.o \
lib/geneve.o en/mapping.o en/tc_tun_vxlan.o en/tc_tun_gre.o \
en/tc_tun_geneve.o diag/en_tc_tracepoint.o
mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += en/hv_vhca_stats.o
......
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2018 Mellanox Technologies */
#include <linux/jhash.h>
#include <linux/slab.h>
#include <linux/xarray.h>
#include <linux/hashtable.h>
#include "mapping.h"
#define MAPPING_GRACE_PERIOD 2000
struct mapping_ctx {
struct xarray xarray;
DECLARE_HASHTABLE(ht, 8);
struct mutex lock; /* Guards hashtable and xarray */
unsigned long max_id;
size_t data_size;
bool delayed_removal;
struct delayed_work dwork;
struct list_head pending_list;
spinlock_t pending_list_lock; /* Guards pending list */
};
struct mapping_item {
struct rcu_head rcu;
struct list_head list;
unsigned long timeout;
struct hlist_node node;
int cnt;
u32 id;
char data[];
};
int mapping_add(struct mapping_ctx *ctx, void *data, u32 *id)
{
struct mapping_item *mi;
int err = -ENOMEM;
u32 hash_key;
mutex_lock(&ctx->lock);
hash_key = jhash(data, ctx->data_size, 0);
hash_for_each_possible(ctx->ht, mi, node, hash_key) {
if (!memcmp(data, mi->data, ctx->data_size))
goto attach;
}
mi = kzalloc(sizeof(*mi) + ctx->data_size, GFP_KERNEL);
if (!mi)
goto err_alloc;
memcpy(mi->data, data, ctx->data_size);
hash_add(ctx->ht, &mi->node, hash_key);
err = xa_alloc(&ctx->xarray, &mi->id, mi, XA_LIMIT(1, ctx->max_id),
GFP_KERNEL);
if (err)
goto err_assign;
attach:
++mi->cnt;
*id = mi->id;
mutex_unlock(&ctx->lock);
return 0;
err_assign:
hash_del(&mi->node);
kfree(mi);
err_alloc:
mutex_unlock(&ctx->lock);
return err;
}
static void mapping_remove_and_free(struct mapping_ctx *ctx,
struct mapping_item *mi)
{
xa_erase(&ctx->xarray, mi->id);
kfree_rcu(mi, rcu);
}
static void mapping_free_item(struct mapping_ctx *ctx,
struct mapping_item *mi)
{
if (!ctx->delayed_removal) {
mapping_remove_and_free(ctx, mi);
return;
}
mi->timeout = jiffies + msecs_to_jiffies(MAPPING_GRACE_PERIOD);
spin_lock(&ctx->pending_list_lock);
list_add_tail(&mi->list, &ctx->pending_list);
spin_unlock(&ctx->pending_list_lock);
schedule_delayed_work(&ctx->dwork, MAPPING_GRACE_PERIOD);
}
int mapping_remove(struct mapping_ctx *ctx, u32 id)
{
unsigned long index = id;
struct mapping_item *mi;
int err = -ENOENT;
mutex_lock(&ctx->lock);
mi = xa_load(&ctx->xarray, index);
if (!mi)
goto out;
err = 0;
if (--mi->cnt > 0)
goto out;
hash_del(&mi->node);
mapping_free_item(ctx, mi);
out:
mutex_unlock(&ctx->lock);
return err;
}
int mapping_find(struct mapping_ctx *ctx, u32 id, void *data)
{
unsigned long index = id;
struct mapping_item *mi;
int err = -ENOENT;
rcu_read_lock();
mi = xa_load(&ctx->xarray, index);
if (!mi)
goto err_find;
memcpy(data, mi->data, ctx->data_size);
err = 0;
err_find:
rcu_read_unlock();
return err;
}
static void
mapping_remove_and_free_list(struct mapping_ctx *ctx, struct list_head *list)
{
struct mapping_item *mi;
list_for_each_entry(mi, list, list)
mapping_remove_and_free(ctx, mi);
}
static void mapping_work_handler(struct work_struct *work)
{
unsigned long min_timeout = 0, now = jiffies;
struct mapping_item *mi, *next;
LIST_HEAD(pending_items);
struct mapping_ctx *ctx;
ctx = container_of(work, struct mapping_ctx, dwork.work);
spin_lock(&ctx->pending_list_lock);
list_for_each_entry_safe(mi, next, &ctx->pending_list, list) {
if (time_after(now, mi->timeout))
list_move(&mi->list, &pending_items);
else if (!min_timeout ||
time_before(mi->timeout, min_timeout))
min_timeout = mi->timeout;
}
spin_unlock(&ctx->pending_list_lock);
mapping_remove_and_free_list(ctx, &pending_items);
if (min_timeout)
schedule_delayed_work(&ctx->dwork, abs(min_timeout - now));
}
static void mapping_flush_work(struct mapping_ctx *ctx)
{
if (!ctx->delayed_removal)
return;
cancel_delayed_work_sync(&ctx->dwork);
mapping_remove_and_free_list(ctx, &ctx->pending_list);
}
struct mapping_ctx *
mapping_create(size_t data_size, u32 max_id, bool delayed_removal)
{
struct mapping_ctx *ctx;
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
if (!ctx)
return ERR_PTR(-ENOMEM);
ctx->max_id = max_id ? max_id : UINT_MAX;
ctx->data_size = data_size;
if (delayed_removal) {
INIT_DELAYED_WORK(&ctx->dwork, mapping_work_handler);
INIT_LIST_HEAD(&ctx->pending_list);
spin_lock_init(&ctx->pending_list_lock);
ctx->delayed_removal = true;
}
mutex_init(&ctx->lock);
xa_init_flags(&ctx->xarray, XA_FLAGS_ALLOC1);
return ctx;
}
void mapping_destroy(struct mapping_ctx *ctx)
{
mapping_flush_work(ctx);
xa_destroy(&ctx->xarray);
mutex_destroy(&ctx->lock);
kfree(ctx);
}
/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/* Copyright (c) 2019 Mellanox Technologies */
#ifndef __MLX5_MAPPING_H__
#define __MLX5_MAPPING_H__
struct mapping_ctx;
int mapping_add(struct mapping_ctx *ctx, void *data, u32 *id);
int mapping_remove(struct mapping_ctx *ctx, u32 id);
int mapping_find(struct mapping_ctx *ctx, u32 id, void *data);
/* mapping uses an xarray to map data to ids in add(), and for find().
* For locking, it uses a internal xarray spin lock for add()/remove(),
* find() uses rcu_read_lock().
* Choosing delayed_removal postpones the removal of a previously mapped
* id by MAPPING_GRACE_PERIOD milliseconds.
* This is to avoid races against hardware, where we mark the packet in
* hardware with a previous id, and quick remove() and add() reusing the same
* previous id. Then find() will get the new mapping instead of the old
* which was used to mark the packet.
*/
struct mapping_ctx *mapping_create(size_t data_size, u32 max_id,
bool delayed_removal);
void mapping_destroy(struct mapping_ctx *ctx);
#endif /* __MLX5_MAPPING_H__ */
......@@ -469,10 +469,15 @@ int mlx5e_tc_tun_parse(struct net_device *filter_dev,
struct mlx5e_priv *priv,
struct mlx5_flow_spec *spec,
struct flow_cls_offload *f,
void *headers_c,
void *headers_v, u8 *match_level)
u8 *match_level)
{
struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(filter_dev);
struct flow_rule *rule = flow_cls_offload_flow_rule(f);
void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
outer_headers);
void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
outer_headers);
struct netlink_ext_ack *extack = f->common.extack;
int err = 0;
if (!tunnel) {
......@@ -499,6 +504,109 @@ int mlx5e_tc_tun_parse(struct net_device *filter_dev,
goto out;
}
if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
struct flow_match_control match;
u16 addr_type;
flow_rule_match_enc_control(rule, &match);
addr_type = match.key->addr_type;
/* For tunnel addr_type used same key id`s as for non-tunnel */
if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
struct flow_match_ipv4_addrs match;
flow_rule_match_enc_ipv4_addrs(rule, &match);
MLX5_SET(fte_match_set_lyr_2_4, headers_c,
src_ipv4_src_ipv6.ipv4_layout.ipv4,
ntohl(match.mask->src));
MLX5_SET(fte_match_set_lyr_2_4, headers_v,
src_ipv4_src_ipv6.ipv4_layout.ipv4,
ntohl(match.key->src));
MLX5_SET(fte_match_set_lyr_2_4, headers_c,
dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
ntohl(match.mask->dst));
MLX5_SET(fte_match_set_lyr_2_4, headers_v,
dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
ntohl(match.key->dst));
MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c,
ethertype);
MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
ETH_P_IP);
} else if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
struct flow_match_ipv6_addrs match;
flow_rule_match_enc_ipv6_addrs(rule, &match);
memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
src_ipv4_src_ipv6.ipv6_layout.ipv6),
&match.mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout,
ipv6));
memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
src_ipv4_src_ipv6.ipv6_layout.ipv6),
&match.key->src, MLX5_FLD_SZ_BYTES(ipv6_layout,
ipv6));
memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
&match.mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout,
ipv6));
memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
&match.key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout,
ipv6));
MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c,
ethertype);
MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
ETH_P_IPV6);
}
}
if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) {
struct flow_match_ip match;
flow_rule_match_enc_ip(rule, &match);
MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn,
match.mask->tos & 0x3);
MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn,
match.key->tos & 0x3);
MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp,
match.mask->tos >> 2);
MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp,
match.key->tos >> 2);
MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit,
match.mask->ttl);
MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit,
match.key->ttl);
if (match.mask->ttl &&
!MLX5_CAP_ESW_FLOWTABLE_FDB
(priv->mdev,
ft_field_support.outer_ipv4_ttl)) {
NL_SET_ERR_MSG_MOD(extack,
"Matching on TTL is not supported");
err = -EOPNOTSUPP;
goto out;
}
}
/* Enforce DMAC when offloading incoming tunneled flows.
* Flow counters require a match on the DMAC.
*/
MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_47_16);
MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_15_0);
ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
dmac_47_16), priv->netdev->dev_addr);
/* let software handle IP fragments */
MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, 0);
return 0;
out:
return err;
}
......
......@@ -76,8 +76,7 @@ int mlx5e_tc_tun_parse(struct net_device *filter_dev,
struct mlx5e_priv *priv,
struct mlx5_flow_spec *spec,
struct flow_cls_offload *f,
void *headers_c,
void *headers_v, u8 *match_level);
u8 *match_level);
int mlx5e_tc_tun_parse_udp_ports(struct mlx5e_priv *priv,
struct mlx5_flow_spec *spec,
......
......@@ -1952,7 +1952,7 @@ static const struct mlx5e_profile mlx5e_rep_profile = {
.update_rx = mlx5e_update_rep_rx,
.update_stats = mlx5e_update_ndo_stats,
.rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep,
.rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq,
.rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq_rep,
.max_tc = 1,
.rq_groups = MLX5E_NUM_RQ_GROUPS(REGULAR),
.stats_grps = mlx5e_rep_stats_grps,
......@@ -1972,7 +1972,7 @@ static const struct mlx5e_profile mlx5e_uplink_rep_profile = {
.update_stats = mlx5e_update_ndo_stats,
.update_carrier = mlx5e_update_carrier,
.rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep,
.rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq,
.rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq_rep,
.max_tc = MLX5E_MAX_NUM_TC,
.rq_groups = MLX5E_NUM_RQ_GROUPS(REGULAR),
.stats_grps = mlx5e_ul_rep_stats_grps,
......
......@@ -81,6 +81,11 @@ struct mlx5_rep_uplink_priv {
struct mutex unready_flows_lock;
struct list_head unready_flows;
struct work_struct reoffload_flows_work;
/* maps tun_info to a unique id*/
struct mapping_ctx *tunnel_mapping;
/* maps tun_enc_opts to a unique id*/
struct mapping_ctx *tunnel_enc_opts_mapping;
};
struct mlx5e_rep_priv {
......@@ -192,6 +197,8 @@ int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv);
void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv);
void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
void mlx5e_handle_rx_cqe_mpwrq_rep(struct mlx5e_rq *rq,
struct mlx5_cqe64 *cqe);
int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv,
struct mlx5e_encap_entry *e);
......
......@@ -1195,6 +1195,7 @@ void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
struct mlx5e_priv *priv = netdev_priv(netdev);
struct mlx5e_rep_priv *rpriv = priv->ppriv;
struct mlx5_eswitch_rep *rep = rpriv->rep;
struct mlx5e_tc_update_priv tc_priv = {};
struct mlx5_wq_cyc *wq = &rq->wqe.wq;
struct mlx5e_wqe_frag_info *wi;
struct sk_buff *skb;
......@@ -1227,13 +1228,78 @@ void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
if (rep->vlan && skb_vlan_tag_present(skb))
skb_vlan_pop(skb);
if (!mlx5e_tc_rep_update_skb(cqe, skb, &tc_priv))
goto free_wqe;
napi_gro_receive(rq->cq.napi, skb);
mlx5_tc_rep_post_napi_receive(&tc_priv);
free_wqe:
mlx5e_free_rx_wqe(rq, wi, true);
wq_cyc_pop:
mlx5_wq_cyc_pop(wq);
}
void mlx5e_handle_rx_cqe_mpwrq_rep(struct mlx5e_rq *rq,
struct mlx5_cqe64 *cqe)
{
u16 cstrides = mpwrq_get_cqe_consumed_strides(cqe);
u16 wqe_id = be16_to_cpu(cqe->wqe_id);
struct mlx5e_mpw_info *wi = &rq->mpwqe.info[wqe_id];
u16 stride_ix = mpwrq_get_cqe_stride_index(cqe);
u32 wqe_offset = stride_ix << rq->mpwqe.log_stride_sz;
u32 head_offset = wqe_offset & (PAGE_SIZE - 1);
u32 page_idx = wqe_offset >> PAGE_SHIFT;
struct mlx5e_tc_update_priv tc_priv = {};
struct mlx5e_rx_wqe_ll *wqe;
struct mlx5_wq_ll *wq;
struct sk_buff *skb;
u16 cqe_bcnt;
wi->consumed_strides += cstrides;
if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
trigger_report(rq, cqe);
rq->stats->wqe_err++;
goto mpwrq_cqe_out;
}
if (unlikely(mpwrq_is_filler_cqe(cqe))) {
struct mlx5e_rq_stats *stats = rq->stats;
stats->mpwqe_filler_cqes++;
stats->mpwqe_filler_strides += cstrides;
goto mpwrq_cqe_out;
}
cqe_bcnt = mpwrq_get_cqe_byte_cnt(cqe);
skb = INDIRECT_CALL_2(rq->mpwqe.skb_from_cqe_mpwrq,
mlx5e_skb_from_cqe_mpwrq_linear,
mlx5e_skb_from_cqe_mpwrq_nonlinear,
rq, wi, cqe_bcnt, head_offset, page_idx);
if (!skb)
goto mpwrq_cqe_out;
mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
if (!mlx5e_tc_rep_update_skb(cqe, skb, &tc_priv))
goto mpwrq_cqe_out;
napi_gro_receive(rq->cq.napi, skb);
mlx5_tc_rep_post_napi_receive(&tc_priv);
mpwrq_cqe_out:
if (likely(wi->consumed_strides < rq->mpwqe.num_strides))
return;
wq = &rq->mpwqe.wq;
wqe = mlx5_wq_ll_get_wqe(wq, wqe_id);
mlx5e_free_rx_mpwqe(rq, wi, true);
mlx5_wq_ll_pop(wq, cqe->wqe_id, &wqe->next.next_wqe_index);
}
#endif
struct sk_buff *
......
......@@ -55,10 +55,13 @@
#include "fs_core.h"
#include "en/port.h"
#include "en/tc_tun.h"
#include "en/mapping.h"
#include "lib/devcom.h"
#include "lib/geneve.h"
#include "diag/en_tc_tracepoint.h"
#define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)
struct mlx5_nic_flow_attr {
u32 action;
u32 flow_tag;
......@@ -134,6 +137,8 @@ struct mlx5e_tc_flow {
refcount_t refcnt;
struct rcu_head rcu_head;
struct completion init_done;
int tunnel_id; /* the mapped tunnel id of this flow */
union {
struct mlx5_esw_flow_attr esw_attr[0];
struct mlx5_nic_flow_attr nic_attr[0];
......@@ -144,15 +149,112 @@ struct mlx5e_tc_flow_parse_attr {
const struct ip_tunnel_info *tun_info[MLX5_MAX_FLOW_FWD_VPORTS];
struct net_device *filter_dev;
struct mlx5_flow_spec spec;
int num_mod_hdr_actions;
int max_mod_hdr_actions;
void *mod_hdr_actions;
struct mlx5e_tc_mod_hdr_acts mod_hdr_acts;
int mirred_ifindex[MLX5_MAX_FLOW_FWD_VPORTS];
};
#define MLX5E_TC_TABLE_NUM_GROUPS 4
#define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(16)
struct tunnel_match_key {
struct flow_dissector_key_control enc_control;
struct flow_dissector_key_keyid enc_key_id;
struct flow_dissector_key_ports enc_tp;
struct flow_dissector_key_ip enc_ip;
union {
struct flow_dissector_key_ipv4_addrs enc_ipv4;
struct flow_dissector_key_ipv6_addrs enc_ipv6;
};
int filter_ifindex;
};
/* Tunnel_id mapping is TUNNEL_INFO_BITS + ENC_OPTS_BITS.
* Upper TUNNEL_INFO_BITS for general tunnel info.
* Lower ENC_OPTS_BITS bits for enc_opts.
*/
#define TUNNEL_INFO_BITS 6
#define TUNNEL_INFO_BITS_MASK GENMASK(TUNNEL_INFO_BITS - 1, 0)
#define ENC_OPTS_BITS 2
#define ENC_OPTS_BITS_MASK GENMASK(ENC_OPTS_BITS - 1, 0)
#define TUNNEL_ID_BITS (TUNNEL_INFO_BITS + ENC_OPTS_BITS)
#define TUNNEL_ID_MASK GENMASK(TUNNEL_ID_BITS - 1, 0)
struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = {
[CHAIN_TO_REG] = {
.mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0,
.moffset = 0,
.mlen = 2,
},
[TUNNEL_TO_REG] = {
.mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_1,
.moffset = 3,
.mlen = 1,
.soffset = MLX5_BYTE_OFF(fte_match_param,
misc_parameters_2.metadata_reg_c_1),
},
};
static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow);
void
mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec *spec,
enum mlx5e_tc_attr_to_reg type,
u32 data,
u32 mask)
{
int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset;
int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen;
void *headers_c = spec->match_criteria;
void *headers_v = spec->match_value;
void *fmask, *fval;
fmask = headers_c + soffset;
fval = headers_v + soffset;
mask = cpu_to_be32(mask) >> (32 - (match_len * 8));
data = cpu_to_be32(data) >> (32 - (match_len * 8));
memcpy(fmask, &mask, match_len);
memcpy(fval, &data, match_len);
spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;
}
int
mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev,
struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
enum mlx5e_tc_attr_to_reg type,
u32 data)
{
int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
int mfield = mlx5e_tc_attr_to_reg_mappings[type].mfield;
int mlen = mlx5e_tc_attr_to_reg_mappings[type].mlen;
char *modact;
int err;
err = alloc_mod_hdr_actions(mdev, MLX5_FLOW_NAMESPACE_FDB,
mod_hdr_acts);
if (err)
return err;
modact = mod_hdr_acts->actions +
(mod_hdr_acts->num_actions * MLX5_MH_ACT_SZ);
/* Firmware has 5bit length field and 0 means 32bits */
if (mlen == 4)
mlen = 0;
MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
MLX5_SET(set_action_in, modact, field, mfield);
MLX5_SET(set_action_in, modact, offset, moffset * 8);
MLX5_SET(set_action_in, modact, length, mlen * 8);
MLX5_SET(set_action_in, modact, data, data);
mod_hdr_acts->num_actions++;
return 0;
}
struct mlx5e_hairpin {
struct mlx5_hairpin *pair;
......@@ -210,8 +312,6 @@ struct mlx5e_mod_hdr_entry {
int compl_result;
};
#define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)
static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow);
......@@ -361,10 +461,10 @@ static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv,
struct mod_hdr_key key;
u32 hash_key;
num_actions = parse_attr->num_mod_hdr_actions;
num_actions = parse_attr->mod_hdr_acts.num_actions;
actions_size = MLX5_MH_ACT_SZ * num_actions;
key.actions = parse_attr->mod_hdr_actions;
key.actions = parse_attr->mod_hdr_acts.actions;
key.num_actions = num_actions;
hash_key = hash_mod_hdr_info(&key);
......@@ -954,7 +1054,7 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
flow_act.modify_hdr = attr->modify_hdr;
kfree(parse_attr->mod_hdr_actions);
dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
if (err)
return err;
}
......@@ -1224,7 +1324,7 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
kfree(parse_attr->mod_hdr_actions);
dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
if (err)
return err;
}
......@@ -1274,6 +1374,8 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
struct mlx5_esw_flow_attr *attr = flow->esw_attr;
int out_index;
mlx5e_put_flow_tunnel_id(flow);
if (flow_flag_test(flow, NOT_READY)) {
remove_unready_flow(flow);
kvfree(attr->parse_attr);
......@@ -1662,150 +1764,267 @@ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
}
}
static int flow_has_tc_fwd_action(struct flow_cls_offload *f)
{
struct flow_rule *rule = flow_cls_offload_flow_rule(f);
struct flow_action *flow_action = &rule->action;
const struct flow_action_entry *act;
int i;
flow_action_for_each(i, act, flow_action) {
switch (act->id) {
case FLOW_ACTION_GOTO:
return true;
default:
continue;
}
}
return false;
}
static int parse_tunnel_attr(struct mlx5e_priv *priv,
struct mlx5_flow_spec *spec,
struct flow_cls_offload *f,
struct net_device *filter_dev, u8 *match_level)
static int
enc_opts_is_dont_care_or_full_match(struct mlx5e_priv *priv,
struct flow_dissector_key_enc_opts *opts,
struct netlink_ext_ack *extack,
bool *dont_care)
{
struct geneve_opt *opt;
int off = 0;
*dont_care = true;
while (opts->len > off) {
opt = (struct geneve_opt *)&opts->data[off];
if (!(*dont_care) || opt->opt_class || opt->type ||
memchr_inv(opt->opt_data, 0, opt->length * 4)) {
*dont_care = false;
if (opt->opt_class != U16_MAX ||
opt->type != U8_MAX ||
memchr_inv(opt->opt_data, 0xFF,
opt->length * 4)) {
NL_SET_ERR_MSG(extack,
"Partial match of tunnel options in chain > 0 isn't supported");
netdev_warn(priv->netdev,
"Partial match of tunnel options in chain > 0 isn't supported");
return -EOPNOTSUPP;
}
}
off += sizeof(struct geneve_opt) + opt->length * 4;
}
return 0;
}
#define COPY_DISSECTOR(rule, diss_key, dst)\
({ \
struct flow_rule *__rule = (rule);\
typeof(dst) __dst = dst;\
\
memcpy(__dst,\
skb_flow_dissector_target(__rule->match.dissector,\
diss_key,\
__rule->match.key),\
sizeof(*__dst));\
})
static int mlx5e_get_flow_tunnel_id(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
struct flow_cls_offload *f,
struct net_device *filter_dev)
{
struct netlink_ext_ack *extack = f->common.extack;
void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
outer_headers);
void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
outer_headers);
struct flow_rule *rule = flow_cls_offload_flow_rule(f);
struct netlink_ext_ack *extack = f->common.extack;
struct mlx5_esw_flow_attr *attr = flow->esw_attr;
struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts;
struct flow_match_enc_opts enc_opts_match;
struct mlx5_rep_uplink_priv *uplink_priv;
struct mlx5e_rep_priv *uplink_rpriv;
struct tunnel_match_key tunnel_key;
bool enc_opts_is_dont_care = true;
u32 tun_id, enc_opts_id = 0;
struct mlx5_eswitch *esw;
u32 value, mask;
int err;
err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f,
headers_c, headers_v, match_level);
if (err) {
NL_SET_ERR_MSG_MOD(extack,
"failed to parse tunnel attributes");
esw = priv->mdev->priv.eswitch;
uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
uplink_priv = &uplink_rpriv->uplink_priv;
memset(&tunnel_key, 0, sizeof(tunnel_key));
COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL,
&tunnel_key.enc_control);
if (tunnel_key.enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS)
COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
&tunnel_key.enc_ipv4);
else
COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS,
&tunnel_key.enc_ipv6);
COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IP, &tunnel_key.enc_ip);
COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_PORTS,
&tunnel_key.enc_tp);
COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_KEYID,
&tunnel_key.enc_key_id);
tunnel_key.filter_ifindex = filter_dev->ifindex;
err = mapping_add(uplink_priv->tunnel_mapping, &tunnel_key, &tun_id);
if (err)
return err;
}
if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
struct flow_match_control match;
u16 addr_type;
flow_rule_match_enc_opts(rule, &enc_opts_match);
err = enc_opts_is_dont_care_or_full_match(priv,
enc_opts_match.mask,
extack,
&enc_opts_is_dont_care);
if (err)
goto err_enc_opts;
flow_rule_match_enc_control(rule, &match);
addr_type = match.key->addr_type;
if (!enc_opts_is_dont_care) {
err = mapping_add(uplink_priv->tunnel_enc_opts_mapping,
enc_opts_match.key, &enc_opts_id);
if (err)
goto err_enc_opts;
}
/* For tunnel addr_type used same key id`s as for non-tunnel */
if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
struct flow_match_ipv4_addrs match;
value = tun_id << ENC_OPTS_BITS | enc_opts_id;
mask = enc_opts_id ? TUNNEL_ID_MASK :
(TUNNEL_ID_MASK & ~ENC_OPTS_BITS_MASK);
flow_rule_match_enc_ipv4_addrs(rule, &match);
MLX5_SET(fte_match_set_lyr_2_4, headers_c,
src_ipv4_src_ipv6.ipv4_layout.ipv4,
ntohl(match.mask->src));
MLX5_SET(fte_match_set_lyr_2_4, headers_v,
src_ipv4_src_ipv6.ipv4_layout.ipv4,
ntohl(match.key->src));
if (attr->chain) {
mlx5e_tc_match_to_reg_match(&attr->parse_attr->spec,
TUNNEL_TO_REG, value, mask);
} else {
mod_hdr_acts = &attr->parse_attr->mod_hdr_acts;
err = mlx5e_tc_match_to_reg_set(priv->mdev,
mod_hdr_acts,
TUNNEL_TO_REG, value);
if (err)
goto err_set;
MLX5_SET(fte_match_set_lyr_2_4, headers_c,
dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
ntohl(match.mask->dst));
MLX5_SET(fte_match_set_lyr_2_4, headers_v,
dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
ntohl(match.key->dst));
MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c,
ethertype);
MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
ETH_P_IP);
} else if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
struct flow_match_ipv6_addrs match;
flow_rule_match_enc_ipv6_addrs(rule, &match);
memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
src_ipv4_src_ipv6.ipv6_layout.ipv6),
&match.mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout,
ipv6));
memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
src_ipv4_src_ipv6.ipv6_layout.ipv6),
&match.key->src, MLX5_FLD_SZ_BYTES(ipv6_layout,
ipv6));
memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
&match.mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout,
ipv6));
memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
&match.key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout,
ipv6));
MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c,
ethertype);
MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
ETH_P_IPV6);
}
attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
}
if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) {
struct flow_match_ip match;
flow->tunnel_id = value;
return 0;
flow_rule_match_enc_ip(rule, &match);
MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn,
match.mask->tos & 0x3);
MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn,
match.key->tos & 0x3);
err_set:
if (enc_opts_id)
mapping_remove(uplink_priv->tunnel_enc_opts_mapping,
enc_opts_id);
err_enc_opts:
mapping_remove(uplink_priv->tunnel_mapping, tun_id);
return err;
}
MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp,
match.mask->tos >> 2);
MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp,
match.key->tos >> 2);
static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow)
{
u32 enc_opts_id = flow->tunnel_id & ENC_OPTS_BITS_MASK;
u32 tun_id = flow->tunnel_id >> ENC_OPTS_BITS;
struct mlx5_rep_uplink_priv *uplink_priv;
struct mlx5e_rep_priv *uplink_rpriv;
struct mlx5_eswitch *esw;
MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit,
match.mask->ttl);
MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit,
match.key->ttl);
esw = flow->priv->mdev->priv.eswitch;
uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
uplink_priv = &uplink_rpriv->uplink_priv;
if (tun_id)
mapping_remove(uplink_priv->tunnel_mapping, tun_id);
if (enc_opts_id)
mapping_remove(uplink_priv->tunnel_enc_opts_mapping,
enc_opts_id);
}
if (match.mask->ttl &&
!MLX5_CAP_ESW_FLOWTABLE_FDB
(priv->mdev,
ft_field_support.outer_ipv4_ttl)) {
static int parse_tunnel_attr(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
struct mlx5_flow_spec *spec,
struct flow_cls_offload *f,
struct net_device *filter_dev,
u8 *match_level,
bool *match_inner)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct netlink_ext_ack *extack = f->common.extack;
bool needs_mapping, sets_mapping;
int err;
if (!mlx5e_is_eswitch_flow(flow))
return -EOPNOTSUPP;
needs_mapping = !!flow->esw_attr->chain;
sets_mapping = !flow->esw_attr->chain && flow_has_tc_fwd_action(f);
*match_inner = !needs_mapping;
if ((needs_mapping || sets_mapping) &&
!mlx5_eswitch_vport_match_metadata_enabled(esw)) {
NL_SET_ERR_MSG(extack,
"Chains on tunnel devices isn't supported without register metadata support");
netdev_warn(priv->netdev,
"Chains on tunnel devices isn't supported without register metadata support");
return -EOPNOTSUPP;
}
if (!flow->esw_attr->chain) {
err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f,
match_level);
if (err) {
NL_SET_ERR_MSG_MOD(extack,
"Matching on TTL is not supported");
return -EOPNOTSUPP;
"Failed to parse tunnel attributes");
netdev_warn(priv->netdev,
"Failed to parse tunnel attributes");
return err;
}
flow->esw_attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
}
/* Enforce DMAC when offloading incoming tunneled flows.
* Flow counters require a match on the DMAC.
*/
MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_47_16);
MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_15_0);
ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
dmac_47_16), priv->netdev->dev_addr);
if (!needs_mapping && !sets_mapping)
return 0;
/* let software handle IP fragments */
MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, 0);
return mlx5e_get_flow_tunnel_id(priv, flow, f, filter_dev);
}
return 0;
static void *get_match_inner_headers_criteria(struct mlx5_flow_spec *spec)
{
return MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
inner_headers);
}
static void *get_match_headers_criteria(u32 flags,
struct mlx5_flow_spec *spec)
static void *get_match_inner_headers_value(struct mlx5_flow_spec *spec)
{
return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ?
MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
inner_headers) :
MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
outer_headers);
return MLX5_ADDR_OF(fte_match_param, spec->match_value,
inner_headers);
}
static void *get_match_outer_headers_criteria(struct mlx5_flow_spec *spec)
{
return MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
outer_headers);
}
static void *get_match_outer_headers_value(struct mlx5_flow_spec *spec)
{
return MLX5_ADDR_OF(fte_match_param, spec->match_value,
outer_headers);
}
static void *get_match_headers_value(u32 flags,
struct mlx5_flow_spec *spec)
{
return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ?
MLX5_ADDR_OF(fte_match_param, spec->match_value,
inner_headers) :
MLX5_ADDR_OF(fte_match_param, spec->match_value,
outer_headers);
get_match_inner_headers_value(spec) :
get_match_outer_headers_value(spec);
}
static void *get_match_headers_criteria(u32 flags,
struct mlx5_flow_spec *spec)
{
return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ?
get_match_inner_headers_criteria(spec) :
get_match_outer_headers_criteria(spec);
}
static int mlx5e_flower_parse_meta(struct net_device *filter_dev,
......@@ -1843,6 +2062,7 @@ static int mlx5e_flower_parse_meta(struct net_device *filter_dev,
}
static int __parse_cls_flower(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
struct mlx5_flow_spec *spec,
struct flow_cls_offload *f,
struct net_device *filter_dev,
......@@ -1892,18 +2112,22 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
}
if (mlx5e_get_tc_tun(filter_dev)) {
if (parse_tunnel_attr(priv, spec, f, filter_dev,
outer_match_level))
return -EOPNOTSUPP;
bool match_inner = false;
/* At this point, header pointers should point to the inner
* headers, outer header were already set by parse_tunnel_attr
*/
match_level = inner_match_level;
headers_c = get_match_headers_criteria(MLX5_FLOW_CONTEXT_ACTION_DECAP,
spec);
headers_v = get_match_headers_value(MLX5_FLOW_CONTEXT_ACTION_DECAP,
spec);
err = parse_tunnel_attr(priv, flow, spec, f, filter_dev,
outer_match_level, &match_inner);
if (err)
return err;
if (match_inner) {
/* header pointers should point to the inner headers
* if the packet was decapsulated already.
* outer headers are set by parse_tunnel_attr.
*/
match_level = inner_match_level;
headers_c = get_match_inner_headers_criteria(spec);
headers_v = get_match_inner_headers_value(spec);
}
}
err = mlx5e_flower_parse_meta(filter_dev, f);
......@@ -2220,8 +2444,8 @@ static int parse_cls_flower(struct mlx5e_priv *priv,
inner_match_level = MLX5_MATCH_NONE;
outer_match_level = MLX5_MATCH_NONE;
err = __parse_cls_flower(priv, spec, f, filter_dev, &inner_match_level,
&outer_match_level);
err = __parse_cls_flower(priv, flow, spec, f, filter_dev,
&inner_match_level, &outer_match_level);
non_tunnel_match_level = (inner_match_level == MLX5_MATCH_NONE) ?
outer_match_level : inner_match_level;
......@@ -2381,25 +2605,26 @@ static struct mlx5_fields fields[] = {
OFFLOAD(UDP_DPORT, 16, U16_MAX, udp.dest, 0, udp_dport),
};
/* On input attr->max_mod_hdr_actions tells how many HW actions can be parsed at
* max from the SW pedit action. On success, attr->num_mod_hdr_actions
* says how many HW actions were actually parsed.
*/
static int offload_pedit_fields(struct pedit_headers_action *hdrs,
static int offload_pedit_fields(struct mlx5e_priv *priv,
int namespace,
struct pedit_headers_action *hdrs,
struct mlx5e_tc_flow_parse_attr *parse_attr,
u32 *action_flags,
struct netlink_ext_ack *extack)
{
struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals;
int i, action_size, nactions, max_actions, first, last, next_z;
int i, action_size, first, last, next_z;
void *headers_c, *headers_v, *action, *vals_p;
u32 *s_masks_p, *a_masks_p, s_mask, a_mask;
struct mlx5e_tc_mod_hdr_acts *mod_acts;
struct mlx5_fields *f;
unsigned long mask;
__be32 mask_be32;
__be16 mask_be16;
int err;
u8 cmd;
mod_acts = &parse_attr->mod_hdr_acts;
headers_c = get_match_headers_criteria(*action_flags, &parse_attr->spec);
headers_v = get_match_headers_value(*action_flags, &parse_attr->spec);
......@@ -2409,11 +2634,6 @@ static int offload_pedit_fields(struct pedit_headers_action *hdrs,
add_vals = &hdrs[1].vals;
action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto);
action = parse_attr->mod_hdr_actions +
parse_attr->num_mod_hdr_actions * action_size;
max_actions = parse_attr->max_mod_hdr_actions;
nactions = parse_attr->num_mod_hdr_actions;
for (i = 0; i < ARRAY_SIZE(fields); i++) {
bool skip;
......@@ -2439,13 +2659,6 @@ static int offload_pedit_fields(struct pedit_headers_action *hdrs,
return -EOPNOTSUPP;
}
if (nactions == max_actions) {
NL_SET_ERR_MSG_MOD(extack,
"too many pedit actions, can't offload");
printk(KERN_WARNING "mlx5: parsed %d pedit actions, can't do more\n", nactions);
return -EOPNOTSUPP;
}
skip = false;
if (s_mask) {
void *match_mask = headers_c + f->match_offset;
......@@ -2492,6 +2705,18 @@ static int offload_pedit_fields(struct pedit_headers_action *hdrs,
return -EOPNOTSUPP;
}
err = alloc_mod_hdr_actions(priv->mdev, namespace, mod_acts);
if (err) {
NL_SET_ERR_MSG_MOD(extack,
"too many pedit actions, can't offload");
mlx5_core_warn(priv->mdev,
"mlx5: parsed %d pedit actions, can't do more\n",
mod_acts->num_actions);
return err;
}
action = mod_acts->actions +
(mod_acts->num_actions * action_size);
MLX5_SET(set_action_in, action, action_type, cmd);
MLX5_SET(set_action_in, action, field, f->field);
......@@ -2514,11 +2739,9 @@ static int offload_pedit_fields(struct pedit_headers_action *hdrs,
else if (f->field_bsize == 8)
MLX5_SET(set_action_in, action, data, *(u8 *)vals_p >> first);
action += action_size;
nactions++;
++mod_acts->num_actions;
}
parse_attr->num_mod_hdr_actions = nactions;
return 0;
}
......@@ -2531,29 +2754,48 @@ static int mlx5e_flow_namespace_max_modify_action(struct mlx5_core_dev *mdev,
return MLX5_CAP_FLOWTABLE_NIC_RX(mdev, max_modify_header_actions);
}
static int alloc_mod_hdr_actions(struct mlx5e_priv *priv,
struct pedit_headers_action *hdrs,
int namespace,
struct mlx5e_tc_flow_parse_attr *parse_attr)
int alloc_mod_hdr_actions(struct mlx5_core_dev *mdev,
int namespace,
struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
{
int nkeys, action_size, max_actions;
int action_size, new_num_actions, max_hw_actions;
size_t new_sz, old_sz;
void *ret;
nkeys = hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits +
hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits;
action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto);
if (mod_hdr_acts->num_actions < mod_hdr_acts->max_actions)
return 0;
max_actions = mlx5e_flow_namespace_max_modify_action(priv->mdev, namespace);
/* can get up to crazingly 16 HW actions in 32 bits pedit SW key */
max_actions = min(max_actions, nkeys * 16);
action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto);
parse_attr->mod_hdr_actions = kcalloc(max_actions, action_size, GFP_KERNEL);
if (!parse_attr->mod_hdr_actions)
max_hw_actions = mlx5e_flow_namespace_max_modify_action(mdev,
namespace);
new_num_actions = min(max_hw_actions,
mod_hdr_acts->actions ?
mod_hdr_acts->max_actions * 2 : 1);
if (mod_hdr_acts->max_actions == new_num_actions)
return -ENOSPC;
new_sz = action_size * new_num_actions;
old_sz = mod_hdr_acts->max_actions * action_size;
ret = krealloc(mod_hdr_acts->actions, new_sz, GFP_KERNEL);
if (!ret)
return -ENOMEM;
parse_attr->max_mod_hdr_actions = max_actions;
memset(ret + old_sz, 0, new_sz - old_sz);
mod_hdr_acts->actions = ret;
mod_hdr_acts->max_actions = new_num_actions;
return 0;
}
void dealloc_mod_hdr_actions(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
{
kfree(mod_hdr_acts->actions);
mod_hdr_acts->actions = NULL;
mod_hdr_acts->num_actions = 0;
mod_hdr_acts->max_actions = 0;
}
static const struct pedit_headers zero_masks = {};
static int parse_tc_pedit_action(struct mlx5e_priv *priv,
......@@ -2605,13 +2847,8 @@ static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace,
int err;
u8 cmd;
if (!parse_attr->mod_hdr_actions) {
err = alloc_mod_hdr_actions(priv, hdrs, namespace, parse_attr);
if (err)
goto out_err;
}
err = offload_pedit_fields(hdrs, parse_attr, action_flags, extack);
err = offload_pedit_fields(priv, namespace, hdrs, parse_attr,
action_flags, extack);
if (err < 0)
goto out_dealloc_parsed_actions;
......@@ -2631,8 +2868,7 @@ static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace,
return 0;
out_dealloc_parsed_actions:
kfree(parse_attr->mod_hdr_actions);
out_err:
dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
return err;
}
......@@ -2761,6 +2997,8 @@ static bool actions_match_supported(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
struct netlink_ext_ack *extack)
{
struct net_device *filter_dev = parse_attr->filter_dev;
bool drop_action, pop_action;
u32 actions;
if (mlx5e_is_eswitch_flow(flow))
......@@ -2768,11 +3006,17 @@ static bool actions_match_supported(struct mlx5e_priv *priv,
else
actions = flow->nic_attr->action;
if (flow_flag_test(flow, EGRESS) &&
!((actions & MLX5_FLOW_CONTEXT_ACTION_DECAP) ||
(actions & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) ||
(actions & MLX5_FLOW_CONTEXT_ACTION_DROP)))
return false;
drop_action = actions & MLX5_FLOW_CONTEXT_ACTION_DROP;
pop_action = actions & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
if (flow_flag_test(flow, EGRESS) && !drop_action) {
/* We only support filters on tunnel device, or on vlan
* devices if they have pop/drop action
*/
if (!mlx5e_get_tc_tun(filter_dev) ||
(is_vlan_dev(filter_dev) && !pop_action))
return false;
}
if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
return modify_header_match_supported(&parse_attr->spec,
......@@ -2968,9 +3212,9 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv,
/* in case all pedit actions are skipped, remove the MOD_HDR
* flag.
*/
if (parse_attr->num_mod_hdr_actions == 0) {
if (parse_attr->mod_hdr_acts.num_actions == 0) {
action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
kfree(parse_attr->mod_hdr_actions);
dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
}
}
......@@ -3366,9 +3610,9 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
int ifindexes[MLX5_MAX_FLOW_FWD_VPORTS];
bool ft_flow = mlx5e_is_ft_flow(flow);
const struct flow_action_entry *act;
bool encap = false, decap = false;
u32 action = attr->action;
int err, i, if_count = 0;
bool encap = false;
u32 action = 0;
if (!flow_action_has_entries(flow_action))
return -EINVAL;
......@@ -3571,7 +3815,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
attr->split_count = attr->out_count;
break;
case FLOW_ACTION_TUNNEL_DECAP:
action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
decap = true;
break;
case FLOW_ACTION_GOTO:
err = mlx5_validate_goto_chain(esw, flow, act, action,
......@@ -3610,9 +3854,9 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
* flag. we might have set split_count either by pedit or
* pop/push. if there is no pop/push either, reset it too.
*/
if (parse_attr->num_mod_hdr_actions == 0) {
if (parse_attr->mod_hdr_acts.num_actions == 0) {
action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
kfree(parse_attr->mod_hdr_actions);
dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
if (!((action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) ||
(action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH)))
attr->split_count = 0;
......@@ -3624,6 +3868,22 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
return -EOPNOTSUPP;
if (attr->dest_chain) {
if (decap) {
/* It can be supported if we'll create a mapping for
* the tunnel device only (without tunnel), and set
* this tunnel id with this decap flow.
*
* On restore (miss), we'll just set this saved tunnel
* device.
*/
NL_SET_ERR_MSG(extack,
"Decap with goto isn't supported");
netdev_warn(priv->netdev,
"Decap with goto isn't supported");
return -EOPNOTSUPP;
}
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
NL_SET_ERR_MSG_MOD(extack,
"Mirroring goto chain rules isn't supported");
......@@ -4353,12 +4613,55 @@ void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv)
int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
{
return rhashtable_init(tc_ht, &tc_ht_params);
const size_t sz_enc_opts = sizeof(struct flow_dissector_key_enc_opts);
struct mlx5_rep_uplink_priv *uplink_priv;
struct mlx5e_rep_priv *priv;
struct mapping_ctx *mapping;
int err;
uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht);
priv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv);
mapping = mapping_create(sizeof(struct tunnel_match_key),
TUNNEL_INFO_BITS_MASK, true);
if (IS_ERR(mapping)) {
err = PTR_ERR(mapping);
goto err_tun_mapping;
}
uplink_priv->tunnel_mapping = mapping;
mapping = mapping_create(sz_enc_opts, ENC_OPTS_BITS_MASK, true);
if (IS_ERR(mapping)) {
err = PTR_ERR(mapping);
goto err_enc_opts_mapping;
}
uplink_priv->tunnel_enc_opts_mapping = mapping;
err = rhashtable_init(tc_ht, &tc_ht_params);
if (err)
goto err_ht_init;
return err;
err_ht_init:
mapping_destroy(uplink_priv->tunnel_enc_opts_mapping);
err_enc_opts_mapping:
mapping_destroy(uplink_priv->tunnel_mapping);
err_tun_mapping:
netdev_warn(priv->netdev,
"Failed to initialize tc (eswitch), err: %d", err);
return err;
}
void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht)
{
struct mlx5_rep_uplink_priv *uplink_priv;
rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL);
uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht);
mapping_destroy(uplink_priv->tunnel_enc_opts_mapping);
mapping_destroy(uplink_priv->tunnel_mapping);
}
int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags)
......@@ -4390,3 +4693,138 @@ void mlx5e_tc_reoffload_flows_work(struct work_struct *work)
}
mutex_unlock(&rpriv->unready_flows_lock);
}
#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
static bool mlx5e_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb,
struct mlx5e_tc_update_priv *tc_priv,
u32 tunnel_id)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct flow_dissector_key_enc_opts enc_opts = {};
struct mlx5_rep_uplink_priv *uplink_priv;
struct mlx5e_rep_priv *uplink_rpriv;
struct metadata_dst *tun_dst;
struct tunnel_match_key key;
u32 tun_id, enc_opts_id;
struct net_device *dev;
int err;
enc_opts_id = tunnel_id & ENC_OPTS_BITS_MASK;
tun_id = tunnel_id >> ENC_OPTS_BITS;
if (!tun_id)
return true;
uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
uplink_priv = &uplink_rpriv->uplink_priv;
err = mapping_find(uplink_priv->tunnel_mapping, tun_id, &key);
if (err) {
WARN_ON_ONCE(true);
netdev_dbg(priv->netdev,
"Couldn't find tunnel for tun_id: %d, err: %d\n",
tun_id, err);
return false;
}
if (enc_opts_id) {
err = mapping_find(uplink_priv->tunnel_enc_opts_mapping,
enc_opts_id, &enc_opts);
if (err) {
netdev_dbg(priv->netdev,
"Couldn't find tunnel (opts) for tun_id: %d, err: %d\n",
enc_opts_id, err);
return false;
}
}
tun_dst = tun_rx_dst(enc_opts.len);
if (!tun_dst) {
WARN_ON_ONCE(true);
return false;
}
ip_tunnel_key_init(&tun_dst->u.tun_info.key,
key.enc_ipv4.src, key.enc_ipv4.dst,
key.enc_ip.tos, key.enc_ip.ttl,
0, /* label */
key.enc_tp.src, key.enc_tp.dst,
key32_to_tunnel_id(key.enc_key_id.keyid),
TUNNEL_KEY);
if (enc_opts.len)
ip_tunnel_info_opts_set(&tun_dst->u.tun_info, enc_opts.data,
enc_opts.len, enc_opts.dst_opt_type);
skb_dst_set(skb, (struct dst_entry *)tun_dst);
dev = dev_get_by_index(&init_net, key.filter_ifindex);
if (!dev) {
netdev_dbg(priv->netdev,
"Couldn't find tunnel device with ifindex: %d\n",
key.filter_ifindex);
return false;
}
/* Set tun_dev so we do dev_put() after datapath */
tc_priv->tun_dev = dev;
skb->dev = dev;
return true;
}
#endif /* CONFIG_NET_TC_SKB_EXT */
bool mlx5e_tc_rep_update_skb(struct mlx5_cqe64 *cqe,
struct sk_buff *skb,
struct mlx5e_tc_update_priv *tc_priv)
{
#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
u32 chain = 0, reg_c0, reg_c1, tunnel_id;
struct tc_skb_ext *tc_skb_ext;
struct mlx5_eswitch *esw;
struct mlx5e_priv *priv;
int tunnel_moffset;
int err;
reg_c0 = (be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK);
if (reg_c0 == MLX5_FS_DEFAULT_FLOW_TAG)
reg_c0 = 0;
reg_c1 = be32_to_cpu(cqe->imm_inval_pkey);
if (!reg_c0)
return true;
priv = netdev_priv(skb->dev);
esw = priv->mdev->priv.eswitch;
err = mlx5_eswitch_get_chain_for_tag(esw, reg_c0, &chain);
if (err) {
netdev_dbg(priv->netdev,
"Couldn't find chain for chain tag: %d, err: %d\n",
reg_c0, err);
return false;
}
if (chain) {
tc_skb_ext = skb_ext_add(skb, TC_SKB_EXT);
if (!tc_skb_ext) {
WARN_ON(1);
return false;
}
tc_skb_ext->chain = chain;
}
tunnel_moffset = mlx5e_tc_attr_to_reg_mappings[TUNNEL_TO_REG].moffset;
tunnel_id = reg_c1 >> (8 * tunnel_moffset);
return mlx5e_restore_tunnel(priv, skb, tc_priv, tunnel_id);
#endif /* CONFIG_NET_TC_SKB_EXT */
return true;
}
void mlx5_tc_rep_post_napi_receive(struct mlx5e_tc_update_priv *tc_priv)
{
if (tc_priv->tun_dev)
dev_put(tc_priv->tun_dev);
}
......@@ -91,9 +91,54 @@ int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags);
void mlx5e_tc_reoffload_flows_work(struct work_struct *work);
enum mlx5e_tc_attr_to_reg {
CHAIN_TO_REG,
TUNNEL_TO_REG,
};
struct mlx5e_tc_attr_to_reg_mapping {
int mfield; /* rewrite field */
int moffset; /* offset of mfield */
int mlen; /* bytes to rewrite/match */
int soffset; /* offset of spec for match */
};
extern struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[];
bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv,
struct net_device *out_dev);
struct mlx5e_tc_update_priv {
struct net_device *tun_dev;
};
bool mlx5e_tc_rep_update_skb(struct mlx5_cqe64 *cqe, struct sk_buff *skb,
struct mlx5e_tc_update_priv *tc_priv);
void mlx5_tc_rep_post_napi_receive(struct mlx5e_tc_update_priv *tc_priv);
struct mlx5e_tc_mod_hdr_acts {
int num_actions;
int max_actions;
void *actions;
};
int mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev,
struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
enum mlx5e_tc_attr_to_reg type,
u32 data);
void mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec *spec,
enum mlx5e_tc_attr_to_reg type,
u32 data,
u32 mask);
int alloc_mod_hdr_actions(struct mlx5_core_dev *mdev,
int namespace,
struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts);
void dealloc_mod_hdr_actions(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts);
#else /* CONFIG_MLX5_ESWITCH */
static inline int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; }
static inline void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) {}
......
......@@ -197,6 +197,10 @@ struct mlx5_eswitch_fdb {
};
struct mlx5_esw_offload {
struct mlx5_flow_table *ft_offloads_restore;
struct mlx5_flow_group *restore_group;
struct mlx5_modify_hdr *restore_copy_hdr_id;
struct mlx5_flow_table *ft_offloads;
struct mlx5_flow_group *vport_rx_group;
struct mlx5_eswitch_rep *vport_reps;
......@@ -636,6 +640,11 @@ esw_vport_destroy_offloads_acl_tables(struct mlx5_eswitch *esw,
int mlx5_esw_vport_tbl_get(struct mlx5_eswitch *esw);
void mlx5_esw_vport_tbl_put(struct mlx5_eswitch *esw);
struct mlx5_flow_handle *
esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag);
u32
esw_get_max_restore_tag(struct mlx5_eswitch *esw);
#else /* CONFIG_MLX5_ESWITCH */
/* eswitch API stubs */
static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
......@@ -651,6 +660,12 @@ static inline const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev)
static inline void mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, const int num_vfs) {}
static struct mlx5_flow_handle *
esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag)
{
return ERR_PTR(-EOPNOTSUPP);
}
#endif /* CONFIG_MLX5_ESWITCH */
#endif /* __MLX5_ESWITCH_H__ */
......@@ -260,7 +260,8 @@ mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw,
attr->in_rep->vport));
misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2);
MLX5_SET_TO_ONES(fte_match_set_misc2, misc2, metadata_reg_c_0);
MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0,
mlx5_eswitch_get_vport_metadata_mask());
spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;
misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
......@@ -781,9 +782,11 @@ static int esw_set_passing_vport_metadata(struct mlx5_eswitch *esw, bool enable)
esw_vport_context.fdb_to_vport_reg_c_id);
if (enable)
fdb_to_vport_reg_c_id |= MLX5_FDB_TO_VPORT_REG_C_0;
fdb_to_vport_reg_c_id |= MLX5_FDB_TO_VPORT_REG_C_0 |
MLX5_FDB_TO_VPORT_REG_C_1;
else
fdb_to_vport_reg_c_id &= ~MLX5_FDB_TO_VPORT_REG_C_0;
fdb_to_vport_reg_c_id &= ~(MLX5_FDB_TO_VPORT_REG_C_0 |
MLX5_FDB_TO_VPORT_REG_C_1);
MLX5_SET(modify_esw_vport_context_in, in,
esw_vport_context.fdb_to_vport_reg_c_id, fdb_to_vport_reg_c_id);
......@@ -805,7 +808,8 @@ static void peer_miss_rules_setup(struct mlx5_eswitch *esw,
if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
misc_parameters_2);
MLX5_SET_TO_ONES(fte_match_set_misc2, misc, metadata_reg_c_0);
MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
mlx5_eswitch_get_vport_metadata_mask());
spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2;
} else {
......@@ -1020,6 +1024,56 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
return err;
}
struct mlx5_flow_handle *
esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag)
{
struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND, };
struct mlx5_flow_table *ft = esw->offloads.ft_offloads_restore;
struct mlx5_flow_context *flow_context;
struct mlx5_flow_handle *flow_rule;
struct mlx5_flow_destination dest;
struct mlx5_flow_spec *spec;
void *misc;
spec = kzalloc(sizeof(*spec), GFP_KERNEL);
if (!spec)
return ERR_PTR(-ENOMEM);
misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
misc_parameters_2);
MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
ESW_CHAIN_TAG_METADATA_MASK);
misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
misc_parameters_2);
MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, tag);
spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2;
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
flow_act.modify_hdr = esw->offloads.restore_copy_hdr_id;
flow_context = &spec->flow_context;
flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
flow_context->flow_tag = tag;
dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
dest.ft = esw->offloads.ft_offloads;
flow_rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
kfree(spec);
if (IS_ERR(flow_rule))
esw_warn(esw->dev,
"Failed to create restore rule for tag: %d, err(%d)\n",
tag, (int)PTR_ERR(flow_rule));
return flow_rule;
}
u32
esw_get_max_restore_tag(struct mlx5_eswitch *esw)
{
return ESW_CHAIN_TAG_METADATA_MASK;
}
#define MAX_PF_SQ 256
#define MAX_SQ_NVPORTS 32
......@@ -1035,8 +1089,9 @@ static void esw_set_flow_group_source_port(struct mlx5_eswitch *esw,
match_criteria_enable,
MLX5_MATCH_MISC_PARAMETERS_2);
MLX5_SET_TO_ONES(fte_match_param, match_criteria,
misc_parameters_2.metadata_reg_c_0);
MLX5_SET(fte_match_param, match_criteria,
misc_parameters_2.metadata_reg_c_0,
mlx5_eswitch_get_vport_metadata_mask());
} else {
MLX5_SET(create_flow_group_in, flow_group_in,
match_criteria_enable,
......@@ -1241,6 +1296,7 @@ static int esw_create_offloads_table(struct mlx5_eswitch *esw, int nvports)
}
ft_attr.max_fte = nvports + MLX5_ESW_MISS_FLOWS;
ft_attr.prio = 1;
ft_offloads = mlx5_create_flow_table(ns, &ft_attr);
if (IS_ERR(ft_offloads)) {
......@@ -1318,7 +1374,8 @@ mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, u16 vport,
mlx5_eswitch_get_vport_metadata_for_match(esw, vport));
misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2);
MLX5_SET_TO_ONES(fte_match_set_misc2, misc, metadata_reg_c_0);
MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
mlx5_eswitch_get_vport_metadata_mask());
spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2;
} else {
......@@ -1344,6 +1401,7 @@ mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, u16 vport,
return flow_rule;
}
static int mlx5_eswitch_inline_mode_get(const struct mlx5_eswitch *esw, u8 *mode)
{
u8 prev_mlx5_mode, mlx5_mode = MLX5_INLINE_MODE_L2;
......@@ -1379,6 +1437,102 @@ static int mlx5_eswitch_inline_mode_get(const struct mlx5_eswitch *esw, u8 *mode
out:
*mode = mlx5_mode;
return 0;
}
static void esw_destroy_restore_table(struct mlx5_eswitch *esw)
{
struct mlx5_esw_offload *offloads = &esw->offloads;
mlx5_modify_header_dealloc(esw->dev, offloads->restore_copy_hdr_id);
mlx5_destroy_flow_group(offloads->restore_group);
mlx5_destroy_flow_table(offloads->ft_offloads_restore);
}
static int esw_create_restore_table(struct mlx5_eswitch *esw)
{
u8 modact[MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)] = {};
int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
struct mlx5_flow_table_attr ft_attr = {};
struct mlx5_core_dev *dev = esw->dev;
struct mlx5_flow_namespace *ns;
struct mlx5_modify_hdr *mod_hdr;
void *match_criteria, *misc;
struct mlx5_flow_table *ft;
struct mlx5_flow_group *g;
u32 *flow_group_in;
int err = 0;
ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_OFFLOADS);
if (!ns) {
esw_warn(esw->dev, "Failed to get offloads flow namespace\n");
return -EOPNOTSUPP;
}
flow_group_in = kvzalloc(inlen, GFP_KERNEL);
if (!flow_group_in) {
err = -ENOMEM;
goto out_free;
}
ft_attr.max_fte = 1 << ESW_CHAIN_TAG_METADATA_BITS;
ft = mlx5_create_flow_table(ns, &ft_attr);
if (IS_ERR(ft)) {
err = PTR_ERR(ft);
esw_warn(esw->dev, "Failed to create restore table, err %d\n",
err);
goto out_free;
}
memset(flow_group_in, 0, inlen);
match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
match_criteria);
misc = MLX5_ADDR_OF(fte_match_param, match_criteria,
misc_parameters_2);
MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
ESW_CHAIN_TAG_METADATA_MASK);
MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
ft_attr.max_fte - 1);
MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
MLX5_MATCH_MISC_PARAMETERS_2);
g = mlx5_create_flow_group(ft, flow_group_in);
if (IS_ERR(g)) {
err = PTR_ERR(g);
esw_warn(dev, "Failed to create restore flow group, err: %d\n",
err);
goto err_group;
}
MLX5_SET(copy_action_in, modact, action_type, MLX5_ACTION_TYPE_COPY);
MLX5_SET(copy_action_in, modact, src_field,
MLX5_ACTION_IN_FIELD_METADATA_REG_C_1);
MLX5_SET(copy_action_in, modact, dst_field,
MLX5_ACTION_IN_FIELD_METADATA_REG_B);
mod_hdr = mlx5_modify_header_alloc(esw->dev,
MLX5_FLOW_NAMESPACE_KERNEL, 1,
modact);
if (IS_ERR(mod_hdr)) {
esw_warn(dev, "Failed to create restore mod header, err: %d\n",
err);
err = PTR_ERR(mod_hdr);
goto err_mod_hdr;
}
esw->offloads.ft_offloads_restore = ft;
esw->offloads.restore_group = g;
esw->offloads.restore_copy_hdr_id = mod_hdr;
return 0;
err_mod_hdr:
mlx5_destroy_flow_group(g);
err_group:
mlx5_destroy_flow_table(ft);
out_free:
kvfree(flow_group_in);
return err;
}
static int esw_offloads_start(struct mlx5_eswitch *esw,
......@@ -1825,11 +1979,19 @@ static int esw_vport_add_ingress_acl_modify_metadata(struct mlx5_eswitch *esw,
static const struct mlx5_flow_spec spec = {};
struct mlx5_flow_act flow_act = {};
int err = 0;
u32 key;
key = mlx5_eswitch_get_vport_metadata_for_match(esw, vport->vport);
key >>= ESW_SOURCE_PORT_METADATA_OFFSET;
MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET);
MLX5_SET(set_action_in, action, field, MLX5_ACTION_IN_FIELD_METADATA_REG_C_0);
MLX5_SET(set_action_in, action, data,
mlx5_eswitch_get_vport_metadata_for_match(esw, vport->vport));
MLX5_SET(set_action_in, action, field,
MLX5_ACTION_IN_FIELD_METADATA_REG_C_0);
MLX5_SET(set_action_in, action, data, key);
MLX5_SET(set_action_in, action, offset,
ESW_SOURCE_PORT_METADATA_OFFSET);
MLX5_SET(set_action_in, action, length,
ESW_SOURCE_PORT_METADATA_BITS);
vport->ingress.offloads.modify_metadata =
mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS,
......@@ -2144,13 +2306,17 @@ static int esw_offloads_steering_init(struct mlx5_eswitch *esw)
if (err)
return err;
err = esw_create_offloads_fdb_tables(esw, total_vports);
err = esw_create_offloads_table(esw, total_vports);
if (err)
goto create_fdb_err;
goto create_offloads_err;
err = esw_create_offloads_table(esw, total_vports);
err = esw_create_restore_table(esw);
if (err)
goto create_ft_err;
goto create_restore_err;
err = esw_create_offloads_fdb_tables(esw, total_vports);
if (err)
goto create_fdb_err;
err = esw_create_vport_rx_group(esw, total_vports);
if (err)
......@@ -2162,12 +2328,12 @@ static int esw_offloads_steering_init(struct mlx5_eswitch *esw)
return 0;
create_fg_err:
esw_destroy_offloads_table(esw);
create_ft_err:
esw_destroy_offloads_fdb_tables(esw);
create_fdb_err:
esw_destroy_restore_table(esw);
create_restore_err:
esw_destroy_offloads_table(esw);
create_offloads_err:
esw_destroy_uplink_offloads_acl_tables(esw);
return err;
......@@ -2177,8 +2343,9 @@ static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw)
{
mutex_destroy(&esw->fdb_table.offloads.vports.lock);
esw_destroy_vport_rx_group(esw);
esw_destroy_offloads_table(esw);
esw_destroy_offloads_fdb_tables(esw);
esw_destroy_restore_table(esw);
esw_destroy_offloads_table(esw);
esw_destroy_uplink_offloads_acl_tables(esw);
}
......@@ -2670,9 +2837,41 @@ bool mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw)
}
EXPORT_SYMBOL(mlx5_eswitch_vport_match_metadata_enabled);
u32 mlx5_eswitch_get_vport_metadata_for_match(const struct mlx5_eswitch *esw,
u32 mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw,
u16 vport_num)
{
return ((MLX5_CAP_GEN(esw->dev, vhca_id) & 0xffff) << 16) | vport_num;
u32 vport_num_mask = GENMASK(ESW_VPORT_BITS - 1, 0);
u32 vhca_id_mask = GENMASK(ESW_VHCA_ID_BITS - 1, 0);
u32 vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id);
u32 val;
/* Make sure the vhca_id fits the ESW_VHCA_ID_BITS */
WARN_ON_ONCE(vhca_id >= BIT(ESW_VHCA_ID_BITS));
/* Trim vhca_id to ESW_VHCA_ID_BITS */
vhca_id &= vhca_id_mask;
/* Make sure pf and ecpf map to end of ESW_VPORT_BITS range so they
* don't overlap with VF numbers, and themselves, after trimming.
*/
WARN_ON_ONCE((MLX5_VPORT_UPLINK & vport_num_mask) <
vport_num_mask - 1);
WARN_ON_ONCE((MLX5_VPORT_ECPF & vport_num_mask) <
vport_num_mask - 1);
WARN_ON_ONCE((MLX5_VPORT_UPLINK & vport_num_mask) ==
(MLX5_VPORT_ECPF & vport_num_mask));
/* Make sure that the VF vport_num fits ESW_VPORT_BITS and don't
* overlap with pf and ecpf.
*/
if (vport_num != MLX5_VPORT_UPLINK &&
vport_num != MLX5_VPORT_ECPF)
WARN_ON_ONCE(vport_num >= vport_num_mask - 1);
/* We can now trim vport_num to ESW_VPORT_BITS */
vport_num &= vport_num_mask;
val = (vhca_id << ESW_VPORT_BITS) | vport_num;
return val << (32 - ESW_SOURCE_PORT_METADATA_BITS);
}
EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_match);
......@@ -6,14 +6,17 @@
#include <linux/mlx5/fs.h>
#include "eswitch_offloads_chains.h"
#include "en/mapping.h"
#include "mlx5_core.h"
#include "fs_core.h"
#include "eswitch.h"
#include "en.h"
#include "en_tc.h"
#define esw_chains_priv(esw) ((esw)->fdb_table.offloads.esw_chains_priv)
#define esw_chains_lock(esw) (esw_chains_priv(esw)->lock)
#define esw_chains_ht(esw) (esw_chains_priv(esw)->chains_ht)
#define esw_chains_mapping(esw) (esw_chains_priv(esw)->chains_mapping)
#define esw_prios_ht(esw) (esw_chains_priv(esw)->prios_ht)
#define fdb_pool_left(esw) (esw_chains_priv(esw)->fdb_left)
#define tc_slow_fdb(esw) ((esw)->fdb_table.offloads.slow_fdb)
......@@ -43,6 +46,7 @@ struct mlx5_esw_chains_priv {
struct mutex lock;
struct mlx5_flow_table *tc_end_fdb;
struct mapping_ctx *chains_mapping;
int fdb_left[ARRAY_SIZE(ESW_POOLS)];
};
......@@ -53,9 +57,12 @@ struct fdb_chain {
u32 chain;
int ref;
int id;
struct mlx5_eswitch *esw;
struct list_head prios_list;
struct mlx5_flow_handle *restore_rule;
struct mlx5_modify_hdr *miss_modify_hdr;
};
struct fdb_prio_key {
......@@ -261,6 +268,70 @@ mlx5_esw_chains_destroy_fdb_table(struct mlx5_eswitch *esw,
mlx5_destroy_flow_table(fdb);
}
static int
create_fdb_chain_restore(struct fdb_chain *fdb_chain)
{
char modact[MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)];
struct mlx5_eswitch *esw = fdb_chain->esw;
struct mlx5_modify_hdr *mod_hdr;
u32 index;
int err;
if (fdb_chain->chain == mlx5_esw_chains_get_ft_chain(esw))
return 0;
err = mapping_add(esw_chains_mapping(esw), &fdb_chain->chain, &index);
if (err)
return err;
if (index == MLX5_FS_DEFAULT_FLOW_TAG) {
/* we got the special default flow tag id, so we won't know
* if we actually marked the packet with the restore rule
* we create.
*
* This case isn't possible with MLX5_FS_DEFAULT_FLOW_TAG = 0.
*/
err = mapping_add(esw_chains_mapping(esw),
&fdb_chain->chain, &index);
mapping_remove(esw_chains_mapping(esw),
MLX5_FS_DEFAULT_FLOW_TAG);
if (err)
return err;
}
fdb_chain->id = index;
MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
MLX5_SET(set_action_in, modact, field,
mlx5e_tc_attr_to_reg_mappings[CHAIN_TO_REG].mfield);
MLX5_SET(set_action_in, modact, offset,
mlx5e_tc_attr_to_reg_mappings[CHAIN_TO_REG].moffset * 8);
MLX5_SET(set_action_in, modact, length,
mlx5e_tc_attr_to_reg_mappings[CHAIN_TO_REG].mlen * 8);
MLX5_SET(set_action_in, modact, data, fdb_chain->id);
mod_hdr = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_FDB,
1, modact);
if (IS_ERR(mod_hdr)) {
err = PTR_ERR(mod_hdr);
goto err_mod_hdr;
}
fdb_chain->miss_modify_hdr = mod_hdr;
fdb_chain->restore_rule = esw_add_restore_rule(esw, fdb_chain->id);
if (IS_ERR(fdb_chain->restore_rule)) {
err = PTR_ERR(fdb_chain->restore_rule);
goto err_rule;
}
return 0;
err_rule:
mlx5_modify_header_dealloc(esw->dev, fdb_chain->miss_modify_hdr);
err_mod_hdr:
/* Datapath can't find this mapping, so we can safely remove it */
mapping_remove(esw_chains_mapping(esw), fdb_chain->id);
return err;
}
static struct fdb_chain *
mlx5_esw_chains_create_fdb_chain(struct mlx5_eswitch *esw, u32 chain)
{
......@@ -275,6 +346,10 @@ mlx5_esw_chains_create_fdb_chain(struct mlx5_eswitch *esw, u32 chain)
fdb_chain->chain = chain;
INIT_LIST_HEAD(&fdb_chain->prios_list);
err = create_fdb_chain_restore(fdb_chain);
if (err)
goto err_restore;
err = rhashtable_insert_fast(&esw_chains_ht(esw), &fdb_chain->node,
chain_params);
if (err)
......@@ -283,6 +358,12 @@ mlx5_esw_chains_create_fdb_chain(struct mlx5_eswitch *esw, u32 chain)
return fdb_chain;
err_insert:
if (fdb_chain->chain != mlx5_esw_chains_get_ft_chain(esw)) {
mlx5_del_flow_rules(fdb_chain->restore_rule);
mlx5_modify_header_dealloc(esw->dev,
fdb_chain->miss_modify_hdr);
}
err_restore:
kvfree(fdb_chain);
return ERR_PTR(err);
}
......@@ -294,6 +375,15 @@ mlx5_esw_chains_destroy_fdb_chain(struct fdb_chain *fdb_chain)
rhashtable_remove_fast(&esw_chains_ht(esw), &fdb_chain->node,
chain_params);
if (fdb_chain->chain != mlx5_esw_chains_get_ft_chain(esw)) {
mlx5_del_flow_rules(fdb_chain->restore_rule);
mlx5_modify_header_dealloc(esw->dev,
fdb_chain->miss_modify_hdr);
mapping_remove(esw_chains_mapping(esw), fdb_chain->id);
}
kvfree(fdb_chain);
}
......@@ -316,10 +406,12 @@ mlx5_esw_chains_get_fdb_chain(struct mlx5_eswitch *esw, u32 chain)
}
static struct mlx5_flow_handle *
mlx5_esw_chains_add_miss_rule(struct mlx5_flow_table *fdb,
mlx5_esw_chains_add_miss_rule(struct fdb_chain *fdb_chain,
struct mlx5_flow_table *fdb,
struct mlx5_flow_table *next_fdb)
{
static const struct mlx5_flow_spec spec = {};
struct mlx5_eswitch *esw = fdb_chain->esw;
struct mlx5_flow_destination dest = {};
struct mlx5_flow_act act = {};
......@@ -328,6 +420,11 @@ mlx5_esw_chains_add_miss_rule(struct mlx5_flow_table *fdb,
dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
dest.ft = next_fdb;
if (fdb_chain->chain != mlx5_esw_chains_get_ft_chain(esw)) {
act.modify_hdr = fdb_chain->miss_modify_hdr;
act.action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
}
return mlx5_add_flow_rules(fdb, &spec, &act, &dest, 1);
}
......@@ -351,7 +448,8 @@ mlx5_esw_chains_update_prio_prevs(struct fdb_prio *fdb_prio,
list_for_each_entry_continue_reverse(pos,
&fdb_chain->prios_list,
list) {
miss_rules[n] = mlx5_esw_chains_add_miss_rule(pos->fdb,
miss_rules[n] = mlx5_esw_chains_add_miss_rule(fdb_chain,
pos->fdb,
next_fdb);
if (IS_ERR(miss_rules[n])) {
err = PTR_ERR(miss_rules[n]);
......@@ -465,7 +563,7 @@ mlx5_esw_chains_create_fdb_prio(struct mlx5_eswitch *esw,
}
/* Add miss rule to next_fdb */
miss_rule = mlx5_esw_chains_add_miss_rule(fdb, next_fdb);
miss_rule = mlx5_esw_chains_add_miss_rule(fdb_chain, fdb, next_fdb);
if (IS_ERR(miss_rule)) {
err = PTR_ERR(miss_rule);
goto err_miss_rule;
......@@ -630,6 +728,7 @@ mlx5_esw_chains_init(struct mlx5_eswitch *esw)
struct mlx5_esw_chains_priv *chains_priv;
struct mlx5_core_dev *dev = esw->dev;
u32 max_flow_counter, fdb_max;
struct mapping_ctx *mapping;
int err;
chains_priv = kzalloc(sizeof(*chains_priv), GFP_KERNEL);
......@@ -666,10 +765,20 @@ mlx5_esw_chains_init(struct mlx5_eswitch *esw)
if (err)
goto init_prios_ht_err;
mapping = mapping_create(sizeof(u32), esw_get_max_restore_tag(esw),
true);
if (IS_ERR(mapping)) {
err = PTR_ERR(mapping);
goto mapping_err;
}
esw_chains_mapping(esw) = mapping;
mutex_init(&esw_chains_lock(esw));
return 0;
mapping_err:
rhashtable_destroy(&esw_prios_ht(esw));
init_prios_ht_err:
rhashtable_destroy(&esw_chains_ht(esw));
init_chains_ht_err:
......@@ -681,6 +790,7 @@ static void
mlx5_esw_chains_cleanup(struct mlx5_eswitch *esw)
{
mutex_destroy(&esw_chains_lock(esw));
mapping_destroy(esw_chains_mapping(esw));
rhashtable_destroy(&esw_prios_ht(esw));
rhashtable_destroy(&esw_chains_ht(esw));
......@@ -759,3 +869,17 @@ mlx5_esw_chains_destroy(struct mlx5_eswitch *esw)
mlx5_esw_chains_close(esw);
mlx5_esw_chains_cleanup(esw);
}
int mlx5_eswitch_get_chain_for_tag(struct mlx5_eswitch *esw, u32 tag,
u32 *chain)
{
int err;
err = mapping_find(esw_chains_mapping(esw), tag, chain);
if (err) {
esw_warn(esw->dev, "Can't find chain for tag: %d\n", tag);
return -ENOENT;
}
return 0;
}
......@@ -28,5 +28,7 @@ mlx5_esw_chains_get_tc_end_ft(struct mlx5_eswitch *esw);
int mlx5_esw_chains_create(struct mlx5_eswitch *esw);
void mlx5_esw_chains_destroy(struct mlx5_eswitch *esw);
#endif /* __ML5_ESW_CHAINS_H__ */
int
mlx5_eswitch_get_chain_for_tag(struct mlx5_eswitch *esw, u32 tag, u32 *chain);
#endif /* __ML5_ESW_CHAINS_H__ */
......@@ -111,8 +111,8 @@
#define ANCHOR_MIN_LEVEL (BY_PASS_MIN_LEVEL + 1)
#define OFFLOADS_MAX_FT 2
#define OFFLOADS_NUM_PRIOS 1
#define OFFLOADS_MIN_LEVEL (ANCHOR_MIN_LEVEL + 1)
#define OFFLOADS_NUM_PRIOS 2
#define OFFLOADS_MIN_LEVEL (ANCHOR_MIN_LEVEL + OFFLOADS_NUM_PRIOS)
#define LAG_PRIO_NUM_LEVELS 1
#define LAG_NUM_PRIOS 1
......
......@@ -71,7 +71,28 @@ enum devlink_eswitch_encap_mode
mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev);
bool mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw);
u32 mlx5_eswitch_get_vport_metadata_for_match(const struct mlx5_eswitch *esw,
/* Reg C0 usage:
* Reg C0 = < ESW_VHCA_ID_BITS(8) | ESW_VPORT BITS(8) | ESW_CHAIN_TAG(16) >
*
* Highest 8 bits of the reg c0 is the vhca_id, next 8 bits is vport_num,
* the rest (lowest 16 bits) is left for tc chain tag restoration.
* VHCA_ID + VPORT comprise the SOURCE_PORT matching.
*/
#define ESW_VHCA_ID_BITS 8
#define ESW_VPORT_BITS 8
#define ESW_SOURCE_PORT_METADATA_BITS (ESW_VHCA_ID_BITS + ESW_VPORT_BITS)
#define ESW_SOURCE_PORT_METADATA_OFFSET (32 - ESW_SOURCE_PORT_METADATA_BITS)
#define ESW_CHAIN_TAG_METADATA_BITS (32 - ESW_SOURCE_PORT_METADATA_BITS)
#define ESW_CHAIN_TAG_METADATA_MASK GENMASK(ESW_CHAIN_TAG_METADATA_BITS - 1,\
0)
static inline u32 mlx5_eswitch_get_vport_metadata_mask(void)
{
return GENMASK(31, 32 - ESW_SOURCE_PORT_METADATA_BITS);
}
u32 mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw,
u16 vport_num);
u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw);
#else /* CONFIG_MLX5_ESWITCH */
......@@ -94,11 +115,17 @@ mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw)
};
static inline u32
mlx5_eswitch_get_vport_metadata_for_match(const struct mlx5_eswitch *esw,
mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw,
int vport_num)
{
return 0;
};
static inline u32
mlx5_eswitch_get_vport_metadata_mask(void)
{
return 0;
}
#endif /* CONFIG_MLX5_ESWITCH */
#endif
......@@ -72,6 +72,10 @@ static inline struct Qdisc *tcf_block_q(struct tcf_block *block)
int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res, bool compat_mode);
int tcf_classify_ingress(struct sk_buff *skb,
const struct tcf_block *ingress_block,
const struct tcf_proto *tp, struct tcf_result *res,
bool compat_mode);
#else
static inline bool tcf_block_shared(struct tcf_block *block)
......@@ -133,6 +137,15 @@ static inline int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
{
return TC_ACT_UNSPEC;
}
static inline int tcf_classify_ingress(struct sk_buff *skb,
const struct tcf_block *ingress_block,
const struct tcf_proto *tp,
struct tcf_result *res, bool compat_mode)
{
return TC_ACT_UNSPEC;
}
#endif
static inline unsigned long
......
......@@ -1269,6 +1269,7 @@ static inline void psched_ratecfg_getrate(struct tc_ratespec *res,
*/
struct mini_Qdisc {
struct tcf_proto *filter_list;
struct tcf_block *block;
struct gnet_stats_basic_cpu __percpu *cpu_bstats;
struct gnet_stats_queue __percpu *cpu_qstats;
struct rcu_head rcu;
......@@ -1295,6 +1296,8 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
struct tcf_proto *tp_head);
void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc,
struct mini_Qdisc __rcu **p_miniq);
void mini_qdisc_pair_block_init(struct mini_Qdisc_pair *miniqp,
struct tcf_block *block);
static inline int skb_tc_reinsert(struct sk_buff *skb, struct tcf_result *res)
{
......
......@@ -4848,7 +4848,8 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
skb->tc_at_ingress = 1;
mini_qdisc_bstats_cpu_update(miniq, skb);
switch (tcf_classify(skb, miniq->filter_list, &cl_res, false)) {
switch (tcf_classify_ingress(skb, miniq->block, miniq->filter_list,
&cl_res, false)) {
case TC_ACT_OK:
case TC_ACT_RECLASSIFY:
skb->tc_index = TC_H_MIN(cl_res.classid);
......
......@@ -22,6 +22,7 @@
#include <linux/idr.h>
#include <linux/rhashtable.h>
#include <linux/jhash.h>
#include <linux/rculist.h>
#include <net/net_namespace.h>
#include <net/sock.h>
#include <net/netlink.h>
......@@ -354,7 +355,7 @@ static struct tcf_chain *tcf_chain_create(struct tcf_block *block,
chain = kzalloc(sizeof(*chain), GFP_KERNEL);
if (!chain)
return NULL;
list_add_tail(&chain->list, &block->chain_list);
list_add_tail_rcu(&chain->list, &block->chain_list);
mutex_init(&chain->filter_chain_lock);
chain->block = block;
chain->index = chain_index;
......@@ -394,7 +395,7 @@ static bool tcf_chain_detach(struct tcf_chain *chain)
ASSERT_BLOCK_LOCKED(block);
list_del(&chain->list);
list_del_rcu(&chain->list);
if (!chain->index)
block->chain0.chain = NULL;
......@@ -453,6 +454,20 @@ static struct tcf_chain *tcf_chain_lookup(struct tcf_block *block,
return NULL;
}
#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
static struct tcf_chain *tcf_chain_lookup_rcu(const struct tcf_block *block,
u32 chain_index)
{
struct tcf_chain *chain;
list_for_each_entry_rcu(chain, &block->chain_list, list) {
if (chain->index == chain_index)
return chain;
}
return NULL;
}
#endif
static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
u32 seq, u16 flags, int event, bool unicast);
......@@ -1559,12 +1574,15 @@ static int tcf_block_setup(struct tcf_block *block,
* to this qdisc, (optionally) tests for protocol and asks
* specific classifiers.
*/
int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res, bool compat_mode)
static inline int __tcf_classify(struct sk_buff *skb,
const struct tcf_proto *tp,
const struct tcf_proto *orig_tp,
struct tcf_result *res,
bool compat_mode,
u32 *last_executed_chain)
{
#ifdef CONFIG_NET_CLS_ACT
const int max_reclassify_loop = 4;
const struct tcf_proto *orig_tp = tp;
const struct tcf_proto *first_tp;
int limit = 0;
......@@ -1582,21 +1600,11 @@ int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
#ifdef CONFIG_NET_CLS_ACT
if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode)) {
first_tp = orig_tp;
*last_executed_chain = first_tp->chain->index;
goto reset;
} else if (unlikely(TC_ACT_EXT_CMP(err, TC_ACT_GOTO_CHAIN))) {
first_tp = res->goto_tp;
#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
{
struct tc_skb_ext *ext;
ext = skb_ext_add(skb, TC_SKB_EXT);
if (WARN_ON_ONCE(!ext))
return TC_ACT_SHOT;
ext->chain = err & TC_ACT_EXT_VAL_MASK;
}
#endif
*last_executed_chain = err & TC_ACT_EXT_VAL_MASK;
goto reset;
}
#endif
......@@ -1619,8 +1627,64 @@ int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
goto reclassify;
#endif
}
int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res, bool compat_mode)
{
u32 last_executed_chain = 0;
return __tcf_classify(skb, tp, tp, res, compat_mode,
&last_executed_chain);
}
EXPORT_SYMBOL(tcf_classify);
int tcf_classify_ingress(struct sk_buff *skb,
const struct tcf_block *ingress_block,
const struct tcf_proto *tp,
struct tcf_result *res, bool compat_mode)
{
#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
u32 last_executed_chain = 0;
return __tcf_classify(skb, tp, tp, res, compat_mode,
&last_executed_chain);
#else
u32 last_executed_chain = tp ? tp->chain->index : 0;
const struct tcf_proto *orig_tp = tp;
struct tc_skb_ext *ext;
int ret;
ext = skb_ext_find(skb, TC_SKB_EXT);
if (ext && ext->chain) {
struct tcf_chain *fchain;
fchain = tcf_chain_lookup_rcu(ingress_block, ext->chain);
if (!fchain)
return TC_ACT_SHOT;
/* Consume, so cloned/redirect skbs won't inherit ext */
skb_ext_del(skb, TC_SKB_EXT);
tp = rcu_dereference_bh(fchain->filter_chain);
}
ret = __tcf_classify(skb, tp, orig_tp, res, compat_mode,
&last_executed_chain);
/* If we missed on some chain */
if (ret == TC_ACT_UNSPEC && last_executed_chain) {
ext = skb_ext_add(skb, TC_SKB_EXT);
if (WARN_ON_ONCE(!ext))
return TC_ACT_SHOT;
ext->chain = last_executed_chain;
}
return ret;
#endif
}
EXPORT_SYMBOL(tcf_classify_ingress);
struct tcf_chain_info {
struct tcf_proto __rcu **pprev;
struct tcf_proto __rcu *next;
......
......@@ -1391,6 +1391,14 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
}
EXPORT_SYMBOL(mini_qdisc_pair_swap);
void mini_qdisc_pair_block_init(struct mini_Qdisc_pair *miniqp,
struct tcf_block *block)
{
miniqp->miniq1.block = block;
miniqp->miniq2.block = block;
}
EXPORT_SYMBOL(mini_qdisc_pair_block_init);
void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc,
struct mini_Qdisc __rcu **p_miniq)
{
......
......@@ -78,6 +78,7 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt,
{
struct ingress_sched_data *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
int err;
net_inc_ingress_queue();
......@@ -87,7 +88,13 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt,
q->block_info.chain_head_change = clsact_chain_head_change;
q->block_info.chain_head_change_priv = &q->miniqp;
return tcf_block_get_ext(&q->block, sch, &q->block_info, extack);
err = tcf_block_get_ext(&q->block, sch, &q->block_info, extack);
if (err)
return err;
mini_qdisc_pair_block_init(&q->miniqp, q->block);
return 0;
}
static void ingress_destroy(struct Qdisc *sch)
......@@ -226,6 +233,8 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt,
if (err)
return err;
mini_qdisc_pair_block_init(&q->miniqp_ingress, q->ingress_block);
mini_qdisc_pair_init(&q->miniqp_egress, sch, &dev->miniq_egress);
q->egress_block_info.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment