Commit 7937963a authored by David S. Miller's avatar David S. Miller

Merge branch 'mlx5-enhanced-flow-steering'

Or Gerlitz says:

====================
net/mlx5_core: Enhance flow steering support

v0 --> v1 changes:
  - fixed improperly formatted comments.
  - compare value of ib_spec->eth.mask.ether_type in network byte order
     in ('IB/mlx5: Add flow steering utilities').

v1 --> v2 changes:
  - made sure that service functions added in the IB driver are only static-fied
    on the last commit, to make sure bisection with -Werror works fine.

v2 --> v3 changes:
   - squashed patches 11 and 12 into one patch, s.t Dave's comment
     on unused static functions gcc complaints during bisection is
     correctly addressed.

v3 has been generated against net-next commit c9c99311 "Merge tag
'batman-adv-for-davem' of git://git.open-mesh.org/linux-merge"

The series is signed by Matan who was revently assigned to a maintainer for
the mlx5_core and IB drivers (this is a 4.5-rc1 change to the maintainers file coming
from the rdma tree) -- as such I didn't see a neeed to add my signature (Or).

This series adds three new functionalists to the driver flow-steering
infrastructure: auto-grouped flow tables, chaining of flow tables and
updates for the root flow table.

1. Auto-grouped flow tables - Flow table with auto grouping management.
When a flow table is created, hints regarding the number of rule types
and the number of rules are given in advance. Thus, a flow table is
divided into #NUM_TYPES+1 groups each contains
(#NUM_RULES)/(#NUM_TYPES+1) rules. The first #NUM_TYPES parts are groups
which are filled if the added rule matches the group specification or
the group is empty. The last part is filled by rules that can't fit
any of the former groups.

2. Chaining flow tables - Flow tables from different priorities are chained
together, if there is no match in flow table of priority i we continue
searching for a match in priority i+1. This is both true if priorities
i and i+1 belongs to the same namespace or not.

3. Updating the root flow table - the root flow table is the flow table
with the lowest level. The hardware start searching for a match in the
root flow table and continue according to the matches it find along
the way.

The first usage for the new functionality is flow steering for user-space
ConnectX-4 offloaded HW Eth RX queues done through the mlx5 IB driver.

When the mlx5 core driver is loaded, it opens three flow namespaces:
1. By-pass namespace (used by mlx5 IB driver).
2. Kernel namespace (used in order to get packets to the networking stack
through mlx5 EN driver).
3. Leftovers namespace (used by mlx5 IB and future sniffer)

The series is built as follows:

Patch #1 introduces auto-grouped flow tables support.

Patch #2 add utility functions for finding the next and the previous
flow tables in different priorities. This is used in order to chain
the flow tables in a downstream patch.

Patch #3 introduces a firmware command for updating the root flow table.

Patch #4 introduces modify flow table firmware command, this command is used
when we want to change the next flow table of an existing flow table.
This is used for chaining flow tables as well.

Patch #5 connect/disconnect flow tables. This is actually the chaining
process when we want to link flow tables. This means that if we couldn't
find a match in the first flow table, we'll continue in the chained
flow table.

Patch #6 updates priority's attributes that is required for flow table
level allocation. We update both the max_fts (the number of allowed FTs
in the sub-tree of this priority) and the start_level (which is the first
level we'll assign to the flow-tables created inside the priority).

Patch #7 adds checking of required device capabilities. Some namespaces
could be only created if the hardware supports certain attributes.
This is especially true for the Bypass and leftovers namespaces. This
adds a generic mechanism to check these required attributes.

Patch #8 creates two additional namespaces:
	a. Bypass flow rules(has nine priorities)
	b. Leftovers packets(have one priority) - for unmatched packets.

Patch #9 re-factors ipv4/ipv6 match fields in the mlx5 firmware interface
header to be more clear.

Patch #10 exports the flow steering API for mlx5_ib usage

Patch #11 implements the required support in mlx5_ib in order
to support the RDMA flow steering verbs.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 23c09c26 038d2ef8
......@@ -43,6 +43,9 @@
#include <linux/mlx5/vport.h>
#include <rdma/ib_smi.h>
#include <rdma/ib_umem.h>
#include <linux/in.h>
#include <linux/etherdevice.h>
#include <linux/mlx5/fs.h>
#include "user.h"
#include "mlx5_ib.h"
......@@ -835,6 +838,457 @@ static int mlx5_ib_dealloc_pd(struct ib_pd *pd)
return 0;
}
static bool outer_header_zero(u32 *match_criteria)
{
int size = MLX5_ST_SZ_BYTES(fte_match_param);
char *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_criteria,
outer_headers);
return outer_headers_c[0] == 0 && !memcmp(outer_headers_c,
outer_headers_c + 1,
size - 1);
}
static int parse_flow_attr(u32 *match_c, u32 *match_v,
union ib_flow_spec *ib_spec)
{
void *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
outer_headers);
void *outer_headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
outer_headers);
switch (ib_spec->type) {
case IB_FLOW_SPEC_ETH:
if (ib_spec->size != sizeof(ib_spec->eth))
return -EINVAL;
ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
dmac_47_16),
ib_spec->eth.mask.dst_mac);
ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
dmac_47_16),
ib_spec->eth.val.dst_mac);
if (ib_spec->eth.mask.vlan_tag) {
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
vlan_tag, 1);
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
vlan_tag, 1);
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
first_vid, ntohs(ib_spec->eth.mask.vlan_tag));
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
first_vid, ntohs(ib_spec->eth.val.vlan_tag));
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
first_cfi,
ntohs(ib_spec->eth.mask.vlan_tag) >> 12);
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
first_cfi,
ntohs(ib_spec->eth.val.vlan_tag) >> 12);
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
first_prio,
ntohs(ib_spec->eth.mask.vlan_tag) >> 13);
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
first_prio,
ntohs(ib_spec->eth.val.vlan_tag) >> 13);
}
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
ethertype, ntohs(ib_spec->eth.mask.ether_type));
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
ethertype, ntohs(ib_spec->eth.val.ether_type));
break;
case IB_FLOW_SPEC_IPV4:
if (ib_spec->size != sizeof(ib_spec->ipv4))
return -EINVAL;
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
ethertype, 0xffff);
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
ethertype, ETH_P_IP);
memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
src_ipv4_src_ipv6.ipv4_layout.ipv4),
&ib_spec->ipv4.mask.src_ip,
sizeof(ib_spec->ipv4.mask.src_ip));
memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
src_ipv4_src_ipv6.ipv4_layout.ipv4),
&ib_spec->ipv4.val.src_ip,
sizeof(ib_spec->ipv4.val.src_ip));
memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
&ib_spec->ipv4.mask.dst_ip,
sizeof(ib_spec->ipv4.mask.dst_ip));
memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
&ib_spec->ipv4.val.dst_ip,
sizeof(ib_spec->ipv4.val.dst_ip));
break;
case IB_FLOW_SPEC_TCP:
if (ib_spec->size != sizeof(ib_spec->tcp_udp))
return -EINVAL;
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol,
0xff);
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, ip_protocol,
IPPROTO_TCP);
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, tcp_sport,
ntohs(ib_spec->tcp_udp.mask.src_port));
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, tcp_sport,
ntohs(ib_spec->tcp_udp.val.src_port));
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, tcp_dport,
ntohs(ib_spec->tcp_udp.mask.dst_port));
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, tcp_dport,
ntohs(ib_spec->tcp_udp.val.dst_port));
break;
case IB_FLOW_SPEC_UDP:
if (ib_spec->size != sizeof(ib_spec->tcp_udp))
return -EINVAL;
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol,
0xff);
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, ip_protocol,
IPPROTO_UDP);
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, udp_sport,
ntohs(ib_spec->tcp_udp.mask.src_port));
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, udp_sport,
ntohs(ib_spec->tcp_udp.val.src_port));
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, udp_dport,
ntohs(ib_spec->tcp_udp.mask.dst_port));
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, udp_dport,
ntohs(ib_spec->tcp_udp.val.dst_port));
break;
default:
return -EINVAL;
}
return 0;
}
/* If a flow could catch both multicast and unicast packets,
* it won't fall into the multicast flow steering table and this rule
* could steal other multicast packets.
*/
static bool flow_is_multicast_only(struct ib_flow_attr *ib_attr)
{
struct ib_flow_spec_eth *eth_spec;
if (ib_attr->type != IB_FLOW_ATTR_NORMAL ||
ib_attr->size < sizeof(struct ib_flow_attr) +
sizeof(struct ib_flow_spec_eth) ||
ib_attr->num_of_specs < 1)
return false;
eth_spec = (struct ib_flow_spec_eth *)(ib_attr + 1);
if (eth_spec->type != IB_FLOW_SPEC_ETH ||
eth_spec->size != sizeof(*eth_spec))
return false;
return is_multicast_ether_addr(eth_spec->mask.dst_mac) &&
is_multicast_ether_addr(eth_spec->val.dst_mac);
}
static bool is_valid_attr(struct ib_flow_attr *flow_attr)
{
union ib_flow_spec *ib_spec = (union ib_flow_spec *)(flow_attr + 1);
bool has_ipv4_spec = false;
bool eth_type_ipv4 = true;
unsigned int spec_index;
/* Validate that ethertype is correct */
for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
if (ib_spec->type == IB_FLOW_SPEC_ETH &&
ib_spec->eth.mask.ether_type) {
if (!((ib_spec->eth.mask.ether_type == htons(0xffff)) &&
ib_spec->eth.val.ether_type == htons(ETH_P_IP)))
eth_type_ipv4 = false;
} else if (ib_spec->type == IB_FLOW_SPEC_IPV4) {
has_ipv4_spec = true;
}
ib_spec = (void *)ib_spec + ib_spec->size;
}
return !has_ipv4_spec || eth_type_ipv4;
}
static void put_flow_table(struct mlx5_ib_dev *dev,
struct mlx5_ib_flow_prio *prio, bool ft_added)
{
prio->refcount -= !!ft_added;
if (!prio->refcount) {
mlx5_destroy_flow_table(prio->flow_table);
prio->flow_table = NULL;
}
}
static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
{
struct mlx5_ib_dev *dev = to_mdev(flow_id->qp->device);
struct mlx5_ib_flow_handler *handler = container_of(flow_id,
struct mlx5_ib_flow_handler,
ibflow);
struct mlx5_ib_flow_handler *iter, *tmp;
mutex_lock(&dev->flow_db.lock);
list_for_each_entry_safe(iter, tmp, &handler->list, list) {
mlx5_del_flow_rule(iter->rule);
list_del(&iter->list);
kfree(iter);
}
mlx5_del_flow_rule(handler->rule);
put_flow_table(dev, &dev->flow_db.prios[handler->prio], true);
mutex_unlock(&dev->flow_db.lock);
kfree(handler);
return 0;
}
#define MLX5_FS_MAX_TYPES 10
#define MLX5_FS_MAX_ENTRIES 32000UL
static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
struct ib_flow_attr *flow_attr)
{
struct mlx5_flow_namespace *ns = NULL;
struct mlx5_ib_flow_prio *prio;
struct mlx5_flow_table *ft;
int num_entries;
int num_groups;
int priority;
int err = 0;
if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
if (flow_is_multicast_only(flow_attr))
priority = MLX5_IB_FLOW_MCAST_PRIO;
else
priority = flow_attr->priority;
ns = mlx5_get_flow_namespace(dev->mdev,
MLX5_FLOW_NAMESPACE_BYPASS);
num_entries = MLX5_FS_MAX_ENTRIES;
num_groups = MLX5_FS_MAX_TYPES;
prio = &dev->flow_db.prios[priority];
} else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
ns = mlx5_get_flow_namespace(dev->mdev,
MLX5_FLOW_NAMESPACE_LEFTOVERS);
build_leftovers_ft_param(&priority,
&num_entries,
&num_groups);
prio = &dev->flow_db.prios[MLX5_IB_FLOW_LEFTOVERS_PRIO];
}
if (!ns)
return ERR_PTR(-ENOTSUPP);
ft = prio->flow_table;
if (!ft) {
ft = mlx5_create_auto_grouped_flow_table(ns, priority,
num_entries,
num_groups);
if (!IS_ERR(ft)) {
prio->refcount = 0;
prio->flow_table = ft;
} else {
err = PTR_ERR(ft);
}
}
return err ? ERR_PTR(err) : prio;
}
static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
struct mlx5_ib_flow_prio *ft_prio,
struct ib_flow_attr *flow_attr,
struct mlx5_flow_destination *dst)
{
struct mlx5_flow_table *ft = ft_prio->flow_table;
struct mlx5_ib_flow_handler *handler;
void *ib_flow = flow_attr + 1;
u8 match_criteria_enable = 0;
unsigned int spec_index;
u32 *match_c;
u32 *match_v;
int err = 0;
if (!is_valid_attr(flow_attr))
return ERR_PTR(-EINVAL);
match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
handler = kzalloc(sizeof(*handler), GFP_KERNEL);
if (!handler || !match_c || !match_v) {
err = -ENOMEM;
goto free;
}
INIT_LIST_HEAD(&handler->list);
for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
err = parse_flow_attr(match_c, match_v, ib_flow);
if (err < 0)
goto free;
ib_flow += ((union ib_flow_spec *)ib_flow)->size;
}
/* Outer header support only */
match_criteria_enable = (!outer_header_zero(match_c)) << 0;
handler->rule = mlx5_add_flow_rule(ft, match_criteria_enable,
match_c, match_v,
MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
MLX5_FS_DEFAULT_FLOW_TAG,
dst);
if (IS_ERR(handler->rule)) {
err = PTR_ERR(handler->rule);
goto free;
}
handler->prio = ft_prio - dev->flow_db.prios;
ft_prio->flow_table = ft;
free:
if (err)
kfree(handler);
kfree(match_c);
kfree(match_v);
return err ? ERR_PTR(err) : handler;
}
enum {
LEFTOVERS_MC,
LEFTOVERS_UC,
};
static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *dev,
struct mlx5_ib_flow_prio *ft_prio,
struct ib_flow_attr *flow_attr,
struct mlx5_flow_destination *dst)
{
struct mlx5_ib_flow_handler *handler_ucast = NULL;
struct mlx5_ib_flow_handler *handler = NULL;
static struct {
struct ib_flow_attr flow_attr;
struct ib_flow_spec_eth eth_flow;
} leftovers_specs[] = {
[LEFTOVERS_MC] = {
.flow_attr = {
.num_of_specs = 1,
.size = sizeof(leftovers_specs[0])
},
.eth_flow = {
.type = IB_FLOW_SPEC_ETH,
.size = sizeof(struct ib_flow_spec_eth),
.mask = {.dst_mac = {0x1} },
.val = {.dst_mac = {0x1} }
}
},
[LEFTOVERS_UC] = {
.flow_attr = {
.num_of_specs = 1,
.size = sizeof(leftovers_specs[0])
},
.eth_flow = {
.type = IB_FLOW_SPEC_ETH,
.size = sizeof(struct ib_flow_spec_eth),
.mask = {.dst_mac = {0x1} },
.val = {.dst_mac = {} }
}
}
};
handler = create_flow_rule(dev, ft_prio,
&leftovers_specs[LEFTOVERS_MC].flow_attr,
dst);
if (!IS_ERR(handler) &&
flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT) {
handler_ucast = create_flow_rule(dev, ft_prio,
&leftovers_specs[LEFTOVERS_UC].flow_attr,
dst);
if (IS_ERR(handler_ucast)) {
kfree(handler);
handler = handler_ucast;
} else {
list_add(&handler_ucast->list, &handler->list);
}
}
return handler;
}
static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
struct ib_flow_attr *flow_attr,
int domain)
{
struct mlx5_ib_dev *dev = to_mdev(qp->device);
struct mlx5_ib_flow_handler *handler = NULL;
struct mlx5_flow_destination *dst = NULL;
struct mlx5_ib_flow_prio *ft_prio;
int err;
if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO)
return ERR_PTR(-ENOSPC);
if (domain != IB_FLOW_DOMAIN_USER ||
flow_attr->port > MLX5_CAP_GEN(dev->mdev, num_ports) ||
flow_attr->flags)
return ERR_PTR(-EINVAL);
dst = kzalloc(sizeof(*dst), GFP_KERNEL);
if (!dst)
return ERR_PTR(-ENOMEM);
mutex_lock(&dev->flow_db.lock);
ft_prio = get_flow_table(dev, flow_attr);
if (IS_ERR(ft_prio)) {
err = PTR_ERR(ft_prio);
goto unlock;
}
dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
dst->tir_num = to_mqp(qp)->raw_packet_qp.rq.tirn;
if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
handler = create_flow_rule(dev, ft_prio, flow_attr,
dst);
} else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
handler = create_leftovers_rule(dev, ft_prio, flow_attr,
dst);
} else {
err = -EINVAL;
goto destroy_ft;
}
if (IS_ERR(handler)) {
err = PTR_ERR(handler);
handler = NULL;
goto destroy_ft;
}
ft_prio->refcount++;
mutex_unlock(&dev->flow_db.lock);
kfree(dst);
return &handler->ibflow;
destroy_ft:
put_flow_table(dev, ft_prio, false);
unlock:
mutex_unlock(&dev->flow_db.lock);
kfree(dst);
kfree(handler);
return ERR_PTR(err);
}
static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
......@@ -1439,10 +1893,19 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
(1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
}
if (mlx5_ib_port_link_layer(&dev->ib_dev) ==
IB_LINK_LAYER_ETHERNET) {
dev->ib_dev.create_flow = mlx5_ib_create_flow;
dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow;
dev->ib_dev.uverbs_ex_cmd_mask |=
(1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
(1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
}
err = init_node_data(dev);
if (err)
goto err_dealloc;
mutex_init(&dev->flow_db.lock);
mutex_init(&dev->cap_mask_mutex);
err = create_dev_resources(&dev->devr);
......
......@@ -105,6 +105,36 @@ struct mlx5_ib_pd {
u32 pdn;
};
#define MLX5_IB_FLOW_MCAST_PRIO (MLX5_BY_PASS_NUM_PRIOS - 1)
#define MLX5_IB_FLOW_LAST_PRIO (MLX5_IB_FLOW_MCAST_PRIO - 1)
#if (MLX5_IB_FLOW_LAST_PRIO <= 0)
#error "Invalid number of bypass priorities"
#endif
#define MLX5_IB_FLOW_LEFTOVERS_PRIO (MLX5_IB_FLOW_MCAST_PRIO + 1)
#define MLX5_IB_NUM_FLOW_FT (MLX5_IB_FLOW_LEFTOVERS_PRIO + 1)
struct mlx5_ib_flow_prio {
struct mlx5_flow_table *flow_table;
unsigned int refcount;
};
struct mlx5_ib_flow_handler {
struct list_head list;
struct ib_flow ibflow;
unsigned int prio;
struct mlx5_flow_rule *rule;
};
struct mlx5_ib_flow_db {
struct mlx5_ib_flow_prio prios[MLX5_IB_NUM_FLOW_FT];
/* Protect flow steering bypass flow tables
* when add/del flow rules.
* only single add/removal of flow steering rule could be done
* simultaneously.
*/
struct mutex lock;
};
/* Use macros here so that don't have to duplicate
* enum ib_send_flags and enum ib_qp_type for low-level driver
*/
......@@ -171,9 +201,21 @@ struct mlx5_ib_pfault {
struct mlx5_pagefault mpfault;
};
struct mlx5_ib_rq {
u32 tirn;
};
struct mlx5_ib_raw_packet_qp {
struct mlx5_ib_rq rq;
};
struct mlx5_ib_qp {
struct ib_qp ibqp;
struct mlx5_core_qp mqp;
union {
struct mlx5_core_qp mqp;
struct mlx5_ib_raw_packet_qp raw_packet_qp;
};
struct mlx5_buf buf;
struct mlx5_db db;
......@@ -431,6 +473,7 @@ struct mlx5_ib_dev {
*/
struct srcu_struct mr_srcu;
#endif
struct mlx5_ib_flow_db flow_db;
};
static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
......
......@@ -38,9 +38,28 @@
#include "fs_cmd.h"
#include "mlx5_core.h"
int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev,
struct mlx5_flow_table *ft)
{
u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)];
u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)];
memset(in, 0, sizeof(in));
MLX5_SET(set_flow_table_root_in, in, opcode,
MLX5_CMD_OP_SET_FLOW_TABLE_ROOT);
MLX5_SET(set_flow_table_root_in, in, table_type, ft->type);
MLX5_SET(set_flow_table_root_in, in, table_id, ft->id);
memset(out, 0, sizeof(out));
return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out,
sizeof(out));
}
int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
enum fs_flow_table_type type, unsigned int level,
unsigned int log_size, unsigned int *table_id)
unsigned int log_size, struct mlx5_flow_table
*next_ft, unsigned int *table_id)
{
u32 out[MLX5_ST_SZ_DW(create_flow_table_out)];
u32 in[MLX5_ST_SZ_DW(create_flow_table_in)];
......@@ -51,6 +70,10 @@ int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
MLX5_SET(create_flow_table_in, in, opcode,
MLX5_CMD_OP_CREATE_FLOW_TABLE);
if (next_ft) {
MLX5_SET(create_flow_table_in, in, table_miss_mode, 1);
MLX5_SET(create_flow_table_in, in, table_miss_id, next_ft->id);
}
MLX5_SET(create_flow_table_in, in, table_type, type);
MLX5_SET(create_flow_table_in, in, level, level);
MLX5_SET(create_flow_table_in, in, log_size, log_size);
......@@ -83,6 +106,33 @@ int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev,
sizeof(out));
}
int mlx5_cmd_modify_flow_table(struct mlx5_core_dev *dev,
struct mlx5_flow_table *ft,
struct mlx5_flow_table *next_ft)
{
u32 in[MLX5_ST_SZ_DW(modify_flow_table_in)];
u32 out[MLX5_ST_SZ_DW(modify_flow_table_out)];
memset(in, 0, sizeof(in));
memset(out, 0, sizeof(out));
MLX5_SET(modify_flow_table_in, in, opcode,
MLX5_CMD_OP_MODIFY_FLOW_TABLE);
MLX5_SET(modify_flow_table_in, in, table_type, ft->type);
MLX5_SET(modify_flow_table_in, in, table_id, ft->id);
MLX5_SET(modify_flow_table_in, in, modify_field_select,
MLX5_MODIFY_FLOW_TABLE_MISS_TABLE_ID);
if (next_ft) {
MLX5_SET(modify_flow_table_in, in, table_miss_mode, 1);
MLX5_SET(modify_flow_table_in, in, table_miss_id, next_ft->id);
} else {
MLX5_SET(modify_flow_table_in, in, table_miss_mode, 0);
}
return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out,
sizeof(out));
}
int mlx5_cmd_create_flow_group(struct mlx5_core_dev *dev,
struct mlx5_flow_table *ft,
u32 *in,
......
......@@ -35,11 +35,16 @@
int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
enum fs_flow_table_type type, unsigned int level,
unsigned int log_size, unsigned int *table_id);
unsigned int log_size, struct mlx5_flow_table
*next_ft, unsigned int *table_id);
int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev,
struct mlx5_flow_table *ft);
int mlx5_cmd_modify_flow_table(struct mlx5_core_dev *dev,
struct mlx5_flow_table *ft,
struct mlx5_flow_table *next_ft);
int mlx5_cmd_create_flow_group(struct mlx5_core_dev *dev,
struct mlx5_flow_table *ft,
u32 *in, unsigned int *group_id);
......@@ -62,4 +67,6 @@ int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev,
struct mlx5_flow_table *ft,
unsigned int index);
int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev,
struct mlx5_flow_table *ft);
#endif
......@@ -40,51 +40,83 @@
#define INIT_TREE_NODE_ARRAY_SIZE(...) (sizeof((struct init_tree_node[]){__VA_ARGS__}) /\
sizeof(struct init_tree_node))
#define INIT_PRIO(min_level_val, max_ft_val,\
start_level_val, ...) {.type = FS_TYPE_PRIO,\
#define ADD_PRIO(num_prios_val, min_level_val, max_ft_val, caps_val,\
...) {.type = FS_TYPE_PRIO,\
.min_ft_level = min_level_val,\
.start_level = start_level_val,\
.max_ft = max_ft_val,\
.num_leaf_prios = num_prios_val,\
.caps = caps_val,\
.children = (struct init_tree_node[]) {__VA_ARGS__},\
.ar_size = INIT_TREE_NODE_ARRAY_SIZE(__VA_ARGS__) \
}
#define ADD_PRIO(min_level_val, max_ft_val, start_level_val, ...)\
INIT_PRIO(min_level_val, max_ft_val, start_level_val,\
__VA_ARGS__)\
#define ADD_FT_PRIO(max_ft_val, start_level_val, ...)\
INIT_PRIO(0, max_ft_val, start_level_val,\
__VA_ARGS__)\
#define ADD_MULTIPLE_PRIO(num_prios_val, max_ft_val, ...)\
ADD_PRIO(num_prios_val, 0, max_ft_val, {},\
__VA_ARGS__)\
#define ADD_NS(...) {.type = FS_TYPE_NAMESPACE,\
.children = (struct init_tree_node[]) {__VA_ARGS__},\
.ar_size = INIT_TREE_NODE_ARRAY_SIZE(__VA_ARGS__) \
}
#define KERNEL_START_LEVEL 0
#define KERNEL_P0_START_LEVEL KERNEL_START_LEVEL
#define INIT_CAPS_ARRAY_SIZE(...) (sizeof((long[]){__VA_ARGS__}) /\
sizeof(long))
#define FS_CAP(cap) (__mlx5_bit_off(flow_table_nic_cap, cap))
#define FS_REQUIRED_CAPS(...) {.arr_sz = INIT_CAPS_ARRAY_SIZE(__VA_ARGS__), \
.caps = (long[]) {__VA_ARGS__} }
#define LEFTOVERS_MAX_FT 1
#define LEFTOVERS_NUM_PRIOS 1
#define BY_PASS_PRIO_MAX_FT 1
#define BY_PASS_MIN_LEVEL (KENREL_MIN_LEVEL + MLX5_BY_PASS_NUM_PRIOS +\
LEFTOVERS_MAX_FT)
#define KERNEL_MAX_FT 2
#define KERNEL_NUM_PRIOS 1
#define KENREL_MIN_LEVEL 2
struct node_caps {
size_t arr_sz;
long *caps;
};
static struct init_tree_node {
enum fs_node_type type;
struct init_tree_node *children;
int ar_size;
struct node_caps caps;
int min_ft_level;
int num_leaf_prios;
int prio;
int max_ft;
int start_level;
} root_fs = {
.type = FS_TYPE_NAMESPACE,
.ar_size = 1,
.ar_size = 3,
.children = (struct init_tree_node[]) {
ADD_PRIO(KENREL_MIN_LEVEL, KERNEL_MAX_FT,
KERNEL_START_LEVEL,
ADD_NS(ADD_FT_PRIO(KERNEL_MAX_FT,
KERNEL_P0_START_LEVEL))),
ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0,
FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en),
FS_CAP(flow_table_properties_nic_receive.modify_root),
FS_CAP(flow_table_properties_nic_receive.identified_miss_table_mode),
FS_CAP(flow_table_properties_nic_receive.flow_table_modify)),
ADD_NS(ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS, BY_PASS_PRIO_MAX_FT))),
ADD_PRIO(0, KENREL_MIN_LEVEL, 0, {},
ADD_NS(ADD_MULTIPLE_PRIO(KERNEL_NUM_PRIOS, KERNEL_MAX_FT))),
ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0,
FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en),
FS_CAP(flow_table_properties_nic_receive.modify_root),
FS_CAP(flow_table_properties_nic_receive.identified_miss_table_mode),
FS_CAP(flow_table_properties_nic_receive.flow_table_modify)),
ADD_NS(ADD_MULTIPLE_PRIO(LEFTOVERS_NUM_PRIOS, LEFTOVERS_MAX_FT))),
}
};
enum fs_i_mutex_lock_class {
FS_MUTEX_GRANDPARENT,
FS_MUTEX_PARENT,
FS_MUTEX_CHILD
};
static void del_rule(struct fs_node *node);
static void del_flow_table(struct fs_node *node);
static void del_flow_group(struct fs_node *node);
......@@ -119,10 +151,11 @@ static void tree_get_node(struct fs_node *node)
atomic_inc(&node->refcount);
}
static void nested_lock_ref_node(struct fs_node *node)
static void nested_lock_ref_node(struct fs_node *node,
enum fs_i_mutex_lock_class class)
{
if (node) {
mutex_lock_nested(&node->lock, SINGLE_DEPTH_NESTING);
mutex_lock_nested(&node->lock, class);
atomic_inc(&node->refcount);
}
}
......@@ -436,10 +469,162 @@ static struct mlx5_flow_table *alloc_flow_table(int level, int max_fte,
return ft;
}
/* If reverse is false, then we search for the first flow table in the
* root sub-tree from start(closest from right), else we search for the
* last flow table in the root sub-tree till start(closest from left).
*/
static struct mlx5_flow_table *find_closest_ft_recursive(struct fs_node *root,
struct list_head *start,
bool reverse)
{
#define list_advance_entry(pos, reverse) \
((reverse) ? list_prev_entry(pos, list) : list_next_entry(pos, list))
#define list_for_each_advance_continue(pos, head, reverse) \
for (pos = list_advance_entry(pos, reverse); \
&pos->list != (head); \
pos = list_advance_entry(pos, reverse))
struct fs_node *iter = list_entry(start, struct fs_node, list);
struct mlx5_flow_table *ft = NULL;
if (!root)
return NULL;
list_for_each_advance_continue(iter, &root->children, reverse) {
if (iter->type == FS_TYPE_FLOW_TABLE) {
fs_get_obj(ft, iter);
return ft;
}
ft = find_closest_ft_recursive(iter, &iter->children, reverse);
if (ft)
return ft;
}
return ft;
}
/* If reverse if false then return the first flow table in next priority of
* prio in the tree, else return the last flow table in the previous priority
* of prio in the tree.
*/
static struct mlx5_flow_table *find_closest_ft(struct fs_prio *prio, bool reverse)
{
struct mlx5_flow_table *ft = NULL;
struct fs_node *curr_node;
struct fs_node *parent;
parent = prio->node.parent;
curr_node = &prio->node;
while (!ft && parent) {
ft = find_closest_ft_recursive(parent, &curr_node->list, reverse);
curr_node = parent;
parent = curr_node->parent;
}
return ft;
}
/* Assuming all the tree is locked by mutex chain lock */
static struct mlx5_flow_table *find_next_chained_ft(struct fs_prio *prio)
{
return find_closest_ft(prio, false);
}
/* Assuming all the tree is locked by mutex chain lock */
static struct mlx5_flow_table *find_prev_chained_ft(struct fs_prio *prio)
{
return find_closest_ft(prio, true);
}
static int connect_fts_in_prio(struct mlx5_core_dev *dev,
struct fs_prio *prio,
struct mlx5_flow_table *ft)
{
struct mlx5_flow_table *iter;
int i = 0;
int err;
fs_for_each_ft(iter, prio) {
i++;
err = mlx5_cmd_modify_flow_table(dev,
iter,
ft);
if (err) {
mlx5_core_warn(dev, "Failed to modify flow table %d\n",
iter->id);
/* The driver is out of sync with the FW */
if (i > 1)
WARN_ON(true);
return err;
}
}
return 0;
}
/* Connect flow tables from previous priority of prio to ft */
static int connect_prev_fts(struct mlx5_core_dev *dev,
struct mlx5_flow_table *ft,
struct fs_prio *prio)
{
struct mlx5_flow_table *prev_ft;
prev_ft = find_prev_chained_ft(prio);
if (prev_ft) {
struct fs_prio *prev_prio;
fs_get_obj(prev_prio, prev_ft->node.parent);
return connect_fts_in_prio(dev, prev_prio, ft);
}
return 0;
}
static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio
*prio)
{
struct mlx5_flow_root_namespace *root = find_root(&prio->node);
int min_level = INT_MAX;
int err;
if (root->root_ft)
min_level = root->root_ft->level;
if (ft->level >= min_level)
return 0;
err = mlx5_cmd_update_root_ft(root->dev, ft);
if (err)
mlx5_core_warn(root->dev, "Update root flow table of id=%u failed\n",
ft->id);
else
root->root_ft = ft;
return err;
}
static int connect_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft,
struct fs_prio *prio)
{
int err = 0;
/* Connect_prev_fts and update_root_ft_create are mutually exclusive */
if (list_empty(&prio->node.children)) {
err = connect_prev_fts(dev, ft, prio);
if (err)
return err;
}
if (MLX5_CAP_FLOWTABLE(dev,
flow_table_properties_nic_receive.modify_root))
err = update_root_ft_create(ft, prio);
return err;
}
struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns,
int prio,
int max_fte)
{
struct mlx5_flow_table *next_ft = NULL;
struct mlx5_flow_table *ft;
int err;
int log_table_sz;
......@@ -452,14 +637,15 @@ struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns,
return ERR_PTR(-ENODEV);
}
mutex_lock(&root->chain_lock);
fs_prio = find_prio(ns, prio);
if (!fs_prio)
return ERR_PTR(-EINVAL);
lock_ref_node(&fs_prio->node);
if (!fs_prio) {
err = -EINVAL;
goto unlock_root;
}
if (fs_prio->num_ft == fs_prio->max_ft) {
err = -ENOSPC;
goto unlock_prio;
goto unlock_root;
}
ft = alloc_flow_table(find_next_free_level(fs_prio),
......@@ -467,32 +653,63 @@ struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns,
root->table_type);
if (!ft) {
err = -ENOMEM;
goto unlock_prio;
goto unlock_root;
}
tree_init_node(&ft->node, 1, del_flow_table);
log_table_sz = ilog2(ft->max_fte);
next_ft = find_next_chained_ft(fs_prio);
err = mlx5_cmd_create_flow_table(root->dev, ft->type, ft->level,
log_table_sz, &ft->id);
log_table_sz, next_ft, &ft->id);
if (err)
goto free_ft;
err = connect_flow_table(root->dev, ft, fs_prio);
if (err)
goto destroy_ft;
lock_ref_node(&fs_prio->node);
tree_add_node(&ft->node, &fs_prio->node);
list_add_tail(&ft->node.list, &fs_prio->node.children);
fs_prio->num_ft++;
unlock_ref_node(&fs_prio->node);
mutex_unlock(&root->chain_lock);
return ft;
destroy_ft:
mlx5_cmd_destroy_flow_table(root->dev, ft);
free_ft:
kfree(ft);
unlock_prio:
unlock_ref_node(&fs_prio->node);
unlock_root:
mutex_unlock(&root->chain_lock);
return ERR_PTR(err);
}
struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft,
u32 *fg_in)
struct mlx5_flow_table *mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns,
int prio,
int num_flow_table_entries,
int max_num_groups)
{
struct mlx5_flow_table *ft;
if (max_num_groups > num_flow_table_entries)
return ERR_PTR(-EINVAL);
ft = mlx5_create_flow_table(ns, prio, num_flow_table_entries);
if (IS_ERR(ft))
return ft;
ft->autogroup.active = true;
ft->autogroup.required_groups = max_num_groups;
return ft;
}
EXPORT_SYMBOL(mlx5_create_auto_grouped_flow_table);
/* Flow table should be locked */
static struct mlx5_flow_group *create_flow_group_common(struct mlx5_flow_table *ft,
u32 *fg_in,
struct list_head
*prev_fg,
bool is_auto_fg)
{
struct mlx5_flow_group *fg;
struct mlx5_core_dev *dev = get_dev(&ft->node);
......@@ -505,18 +722,33 @@ struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft,
if (IS_ERR(fg))
return fg;
lock_ref_node(&ft->node);
err = mlx5_cmd_create_flow_group(dev, ft, fg_in, &fg->id);
if (err) {
kfree(fg);
unlock_ref_node(&ft->node);
return ERR_PTR(err);
}
if (ft->autogroup.active)
ft->autogroup.num_groups++;
/* Add node to tree */
tree_init_node(&fg->node, 1, del_flow_group);
tree_init_node(&fg->node, !is_auto_fg, del_flow_group);
tree_add_node(&fg->node, &ft->node);
/* Add node to group list */
list_add(&fg->node.list, ft->node.children.prev);
return fg;
}
struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft,
u32 *fg_in)
{
struct mlx5_flow_group *fg;
if (ft->autogroup.active)
return ERR_PTR(-EPERM);
lock_ref_node(&ft->node);
fg = create_flow_group_common(ft, fg_in, &ft->node.children, false);
unlock_ref_node(&ft->node);
return fg;
......@@ -614,7 +846,63 @@ static struct fs_fte *create_fte(struct mlx5_flow_group *fg,
return fte;
}
/* Assuming parent fg(flow table) is locked */
static struct mlx5_flow_group *create_autogroup(struct mlx5_flow_table *ft,
u8 match_criteria_enable,
u32 *match_criteria)
{
int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
struct list_head *prev = &ft->node.children;
unsigned int candidate_index = 0;
struct mlx5_flow_group *fg;
void *match_criteria_addr;
unsigned int group_size = 0;
u32 *in;
if (!ft->autogroup.active)
return ERR_PTR(-ENOENT);
in = mlx5_vzalloc(inlen);
if (!in)
return ERR_PTR(-ENOMEM);
if (ft->autogroup.num_groups < ft->autogroup.required_groups)
/* We save place for flow groups in addition to max types */
group_size = ft->max_fte / (ft->autogroup.required_groups + 1);
/* ft->max_fte == ft->autogroup.max_types */
if (group_size == 0)
group_size = 1;
/* sorted by start_index */
fs_for_each_fg(fg, ft) {
if (candidate_index + group_size > fg->start_index)
candidate_index = fg->start_index + fg->max_ftes;
else
break;
prev = &fg->node.list;
}
if (candidate_index + group_size > ft->max_fte) {
fg = ERR_PTR(-ENOSPC);
goto out;
}
MLX5_SET(create_flow_group_in, in, match_criteria_enable,
match_criteria_enable);
MLX5_SET(create_flow_group_in, in, start_flow_index, candidate_index);
MLX5_SET(create_flow_group_in, in, end_flow_index, candidate_index +
group_size - 1);
match_criteria_addr = MLX5_ADDR_OF(create_flow_group_in,
in, match_criteria);
memcpy(match_criteria_addr, match_criteria,
MLX5_ST_SZ_BYTES(fte_match_param));
fg = create_flow_group_common(ft, in, prev, true);
out:
kvfree(in);
return fg;
}
static struct mlx5_flow_rule *add_rule_fg(struct mlx5_flow_group *fg,
u32 *match_value,
u8 action,
......@@ -626,9 +914,9 @@ static struct mlx5_flow_rule *add_rule_fg(struct mlx5_flow_group *fg,
struct mlx5_flow_table *ft;
struct list_head *prev;
lock_ref_node(&fg->node);
nested_lock_ref_node(&fg->node, FS_MUTEX_PARENT);
fs_for_each_fte(fte, fg) {
nested_lock_ref_node(&fte->node);
nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD);
if (compare_match_value(&fg->mask, match_value, &fte->val) &&
action == fte->action && flow_tag == fte->flow_tag) {
rule = add_rule_fte(fte, fg, dest);
......@@ -669,6 +957,33 @@ static struct mlx5_flow_rule *add_rule_fg(struct mlx5_flow_group *fg,
return rule;
}
static struct mlx5_flow_rule *add_rule_to_auto_fg(struct mlx5_flow_table *ft,
u8 match_criteria_enable,
u32 *match_criteria,
u32 *match_value,
u8 action,
u32 flow_tag,
struct mlx5_flow_destination *dest)
{
struct mlx5_flow_rule *rule;
struct mlx5_flow_group *g;
g = create_autogroup(ft, match_criteria_enable, match_criteria);
if (IS_ERR(g))
return (void *)g;
rule = add_rule_fg(g, match_value,
action, flow_tag, dest);
if (IS_ERR(rule)) {
/* Remove assumes refcount > 0 and autogroup creates a group
* with a refcount = 0.
*/
tree_get_node(&g->node);
tree_remove_node(&g->node);
}
return rule;
}
struct mlx5_flow_rule *
mlx5_add_flow_rule(struct mlx5_flow_table *ft,
u8 match_criteria_enable,
......@@ -679,39 +994,115 @@ mlx5_add_flow_rule(struct mlx5_flow_table *ft,
struct mlx5_flow_destination *dest)
{
struct mlx5_flow_group *g;
struct mlx5_flow_rule *rule = ERR_PTR(-EINVAL);
struct mlx5_flow_rule *rule;
tree_get_node(&ft->node);
lock_ref_node(&ft->node);
nested_lock_ref_node(&ft->node, FS_MUTEX_GRANDPARENT);
fs_for_each_fg(g, ft)
if (compare_match_criteria(g->mask.match_criteria_enable,
match_criteria_enable,
g->mask.match_criteria,
match_criteria)) {
unlock_ref_node(&ft->node);
rule = add_rule_fg(g, match_value,
action, flow_tag, dest);
goto put;
if (!IS_ERR(rule) || PTR_ERR(rule) != -ENOSPC)
goto unlock;
}
rule = add_rule_to_auto_fg(ft, match_criteria_enable, match_criteria,
match_value, action, flow_tag, dest);
unlock:
unlock_ref_node(&ft->node);
put:
tree_put_node(&ft->node);
return rule;
}
EXPORT_SYMBOL(mlx5_add_flow_rule);
void mlx5_del_flow_rule(struct mlx5_flow_rule *rule)
{
tree_remove_node(&rule->node);
}
EXPORT_SYMBOL(mlx5_del_flow_rule);
/* Assuming prio->node.children(flow tables) is sorted by level */
static struct mlx5_flow_table *find_next_ft(struct mlx5_flow_table *ft)
{
struct fs_prio *prio;
fs_get_obj(prio, ft->node.parent);
if (!list_is_last(&ft->node.list, &prio->node.children))
return list_next_entry(ft, node.list);
return find_next_chained_ft(prio);
}
static int update_root_ft_destroy(struct mlx5_flow_table *ft)
{
struct mlx5_flow_root_namespace *root = find_root(&ft->node);
struct mlx5_flow_table *new_root_ft = NULL;
if (root->root_ft != ft)
return 0;
new_root_ft = find_next_ft(ft);
if (new_root_ft) {
int err = mlx5_cmd_update_root_ft(root->dev, new_root_ft);
if (err) {
mlx5_core_warn(root->dev, "Update root flow table of id=%u failed\n",
ft->id);
return err;
}
root->root_ft = new_root_ft;
}
return 0;
}
/* Connect flow table from previous priority to
* the next flow table.
*/
static int disconnect_flow_table(struct mlx5_flow_table *ft)
{
struct mlx5_core_dev *dev = get_dev(&ft->node);
struct mlx5_flow_table *next_ft;
struct fs_prio *prio;
int err = 0;
err = update_root_ft_destroy(ft);
if (err)
return err;
fs_get_obj(prio, ft->node.parent);
if (!(list_first_entry(&prio->node.children,
struct mlx5_flow_table,
node.list) == ft))
return 0;
next_ft = find_next_chained_ft(prio);
err = connect_prev_fts(dev, next_ft, prio);
if (err)
mlx5_core_warn(dev, "Failed to disconnect flow table %d\n",
ft->id);
return err;
}
int mlx5_destroy_flow_table(struct mlx5_flow_table *ft)
{
struct mlx5_flow_root_namespace *root = find_root(&ft->node);
int err = 0;
mutex_lock(&root->chain_lock);
err = disconnect_flow_table(ft);
if (err) {
mutex_unlock(&root->chain_lock);
return err;
}
if (tree_remove_node(&ft->node))
mlx5_core_warn(get_dev(&ft->node), "Flow table %d wasn't destroyed, refcount > 1\n",
ft->id);
mutex_unlock(&root->chain_lock);
return 0;
return err;
}
EXPORT_SYMBOL(mlx5_destroy_flow_table);
void mlx5_destroy_flow_group(struct mlx5_flow_group *fg)
{
......@@ -732,8 +1123,10 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
return NULL;
switch (type) {
case MLX5_FLOW_NAMESPACE_BYPASS:
case MLX5_FLOW_NAMESPACE_KERNEL:
prio = 0;
case MLX5_FLOW_NAMESPACE_LEFTOVERS:
prio = type;
break;
case MLX5_FLOW_NAMESPACE_FDB:
if (dev->priv.fdb_root_ns)
......@@ -754,10 +1147,10 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
return ns;
}
EXPORT_SYMBOL(mlx5_get_flow_namespace);
static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns,
unsigned prio, int max_ft,
int start_level)
unsigned prio, int max_ft)
{
struct fs_prio *fs_prio;
......@@ -770,7 +1163,6 @@ static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns,
tree_add_node(&fs_prio->node, &ns->node);
fs_prio->max_ft = max_ft;
fs_prio->prio = prio;
fs_prio->start_level = start_level;
list_add_tail(&fs_prio->node.list, &ns->node.children);
return fs_prio;
......@@ -800,11 +1192,45 @@ static struct mlx5_flow_namespace *fs_create_namespace(struct fs_prio *prio)
return ns;
}
static int init_root_tree_recursive(int max_ft_level, struct init_tree_node *init_node,
static int create_leaf_prios(struct mlx5_flow_namespace *ns, struct init_tree_node
*prio_metadata)
{
struct fs_prio *fs_prio;
int i;
for (i = 0; i < prio_metadata->num_leaf_prios; i++) {
fs_prio = fs_create_prio(ns, i, prio_metadata->max_ft);
if (IS_ERR(fs_prio))
return PTR_ERR(fs_prio);
}
return 0;
}
#define FLOW_TABLE_BIT_SZ 1
#define GET_FLOW_TABLE_CAP(dev, offset) \
((be32_to_cpu(*((__be32 *)(dev->hca_caps_cur[MLX5_CAP_FLOW_TABLE]) + \
offset / 32)) >> \
(32 - FLOW_TABLE_BIT_SZ - (offset & 0x1f))) & FLOW_TABLE_BIT_SZ)
static bool has_required_caps(struct mlx5_core_dev *dev, struct node_caps *caps)
{
int i;
for (i = 0; i < caps->arr_sz; i++) {
if (!GET_FLOW_TABLE_CAP(dev, caps->caps[i]))
return false;
}
return true;
}
static int init_root_tree_recursive(struct mlx5_core_dev *dev,
struct init_tree_node *init_node,
struct fs_node *fs_parent_node,
struct init_tree_node *init_parent_node,
int index)
{
int max_ft_level = MLX5_CAP_FLOWTABLE(dev,
flow_table_properties_nic_receive.
max_ft_level);
struct mlx5_flow_namespace *fs_ns;
struct fs_prio *fs_prio;
struct fs_node *base;
......@@ -812,12 +1238,14 @@ static int init_root_tree_recursive(int max_ft_level, struct init_tree_node *ini
int err;
if (init_node->type == FS_TYPE_PRIO) {
if (init_node->min_ft_level > max_ft_level)
return -ENOTSUPP;
if ((init_node->min_ft_level > max_ft_level) ||
!has_required_caps(dev, &init_node->caps))
return 0;
fs_get_obj(fs_ns, fs_parent_node);
fs_prio = fs_create_prio(fs_ns, index, init_node->max_ft,
init_node->start_level);
if (init_node->num_leaf_prios)
return create_leaf_prios(fs_ns, init_node);
fs_prio = fs_create_prio(fs_ns, index, init_node->max_ft);
if (IS_ERR(fs_prio))
return PTR_ERR(fs_prio);
base = &fs_prio->node;
......@@ -831,9 +1259,8 @@ static int init_root_tree_recursive(int max_ft_level, struct init_tree_node *ini
return -EINVAL;
}
for (i = 0; i < init_node->ar_size; i++) {
err = init_root_tree_recursive(max_ft_level,
&init_node->children[i], base,
init_node, i);
err = init_root_tree_recursive(dev, &init_node->children[i],
base, init_node, i);
if (err)
return err;
}
......@@ -841,7 +1268,8 @@ static int init_root_tree_recursive(int max_ft_level, struct init_tree_node *ini
return 0;
}
static int init_root_tree(int max_ft_level, struct init_tree_node *init_node,
static int init_root_tree(struct mlx5_core_dev *dev,
struct init_tree_node *init_node,
struct fs_node *fs_parent_node)
{
int i;
......@@ -850,8 +1278,7 @@ static int init_root_tree(int max_ft_level, struct init_tree_node *init_node,
fs_get_obj(fs_ns, fs_parent_node);
for (i = 0; i < init_node->ar_size; i++) {
err = init_root_tree_recursive(max_ft_level,
&init_node->children[i],
err = init_root_tree_recursive(dev, &init_node->children[i],
&fs_ns->node,
init_node, i);
if (err)
......@@ -877,25 +1304,65 @@ static struct mlx5_flow_root_namespace *create_root_ns(struct mlx5_core_dev *dev
ns = &root_ns->ns;
fs_init_namespace(ns);
mutex_init(&root_ns->chain_lock);
tree_init_node(&ns->node, 1, NULL);
tree_add_node(&ns->node, NULL);
return root_ns;
}
static void set_prio_attrs_in_prio(struct fs_prio *prio, int acc_level);
static int set_prio_attrs_in_ns(struct mlx5_flow_namespace *ns, int acc_level)
{
struct fs_prio *prio;
fs_for_each_prio(prio, ns) {
/* This updates prio start_level and max_ft */
set_prio_attrs_in_prio(prio, acc_level);
acc_level += prio->max_ft;
}
return acc_level;
}
static void set_prio_attrs_in_prio(struct fs_prio *prio, int acc_level)
{
struct mlx5_flow_namespace *ns;
int acc_level_ns = acc_level;
prio->start_level = acc_level;
fs_for_each_ns(ns, prio)
/* This updates start_level and max_ft of ns's priority descendants */
acc_level_ns = set_prio_attrs_in_ns(ns, acc_level);
if (!prio->max_ft)
prio->max_ft = acc_level_ns - prio->start_level;
WARN_ON(prio->max_ft < acc_level_ns - prio->start_level);
}
static void set_prio_attrs(struct mlx5_flow_root_namespace *root_ns)
{
struct mlx5_flow_namespace *ns = &root_ns->ns;
struct fs_prio *prio;
int start_level = 0;
fs_for_each_prio(prio, ns) {
set_prio_attrs_in_prio(prio, start_level);
start_level += prio->max_ft;
}
}
static int init_root_ns(struct mlx5_core_dev *dev)
{
int max_ft_level = MLX5_CAP_FLOWTABLE(dev,
flow_table_properties_nic_receive.
max_ft_level);
dev->priv.root_ns = create_root_ns(dev, FS_FT_NIC_RX);
if (IS_ERR_OR_NULL(dev->priv.root_ns))
goto cleanup;
if (init_root_tree(max_ft_level, &root_fs, &dev->priv.root_ns->ns.node))
if (init_root_tree(dev, &root_fs, &dev->priv.root_ns->ns.node))
goto cleanup;
set_prio_attrs(dev->priv.root_ns);
return 0;
cleanup:
......@@ -1019,7 +1486,7 @@ static int init_fdb_root_ns(struct mlx5_core_dev *dev)
return -ENOMEM;
/* Create single prio */
prio = fs_create_prio(&dev->priv.fdb_root_ns->ns, 0, 1, 0);
prio = fs_create_prio(&dev->priv.fdb_root_ns->ns, 0, 1);
if (IS_ERR(prio)) {
cleanup_single_prio_root_ns(dev, dev->priv.fdb_root_ns);
return PTR_ERR(prio);
......
......@@ -77,6 +77,11 @@ struct mlx5_flow_table {
unsigned int max_fte;
unsigned int level;
enum fs_flow_table_type type;
struct {
bool active;
unsigned int required_groups;
unsigned int num_groups;
} autogroup;
};
/* Type of children is mlx5_flow_rule */
......@@ -124,6 +129,9 @@ struct mlx5_flow_root_namespace {
struct mlx5_flow_namespace ns;
enum fs_flow_table_type table_type;
struct mlx5_core_dev *dev;
struct mlx5_flow_table *root_ft;
/* Should be held when chaining flow tables */
struct mutex chain_lock;
};
int mlx5_init_fs(struct mlx5_core_dev *dev);
......@@ -143,6 +151,12 @@ void mlx5_cleanup_fs(struct mlx5_core_dev *dev);
#define fs_for_each_prio(pos, ns) \
fs_list_for_each_entry(pos, &(ns)->node.children)
#define fs_for_each_ns(pos, prio) \
fs_list_for_each_entry(pos, &(prio)->node.children)
#define fs_for_each_ft(pos, prio) \
fs_list_for_each_entry(pos, &(prio)->node.children)
#define fs_for_each_fg(pos, ft) \
fs_list_for_each_entry(pos, &(ft)->node.children)
......
......@@ -1258,4 +1258,6 @@ static inline u16 mlx5_to_sw_pkey_sz(int pkey_sz)
return MLX5_MIN_PKEY_TABLE_SIZE << pkey_sz;
}
#define MLX5_BY_PASS_NUM_PRIOS 9
#endif /* MLX5_DEVICE_H */
......@@ -38,8 +38,20 @@
#define MLX5_FS_DEFAULT_FLOW_TAG 0x0
#define LEFTOVERS_RULE_NUM 2
static inline void build_leftovers_ft_param(int *priority,
int *n_ent,
int *n_grp)
{
*priority = 0; /* Priority of leftovers_prio-0 */
*n_ent = LEFTOVERS_RULE_NUM;
*n_grp = LEFTOVERS_RULE_NUM;
}
enum mlx5_flow_namespace_type {
MLX5_FLOW_NAMESPACE_BYPASS,
MLX5_FLOW_NAMESPACE_KERNEL,
MLX5_FLOW_NAMESPACE_LEFTOVERS,
MLX5_FLOW_NAMESPACE_FDB,
};
......@@ -61,6 +73,12 @@ struct mlx5_flow_namespace *
mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
enum mlx5_flow_namespace_type type);
struct mlx5_flow_table *
mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns,
int prio,
int num_flow_table_entries,
int max_num_groups);
struct mlx5_flow_table *
mlx5_create_flow_table(struct mlx5_flow_namespace *ns,
int prio,
......
......@@ -185,6 +185,7 @@ enum {
MLX5_CMD_OP_MODIFY_RQT = 0x917,
MLX5_CMD_OP_DESTROY_RQT = 0x918,
MLX5_CMD_OP_QUERY_RQT = 0x919,
MLX5_CMD_OP_SET_FLOW_TABLE_ROOT = 0x92f,
MLX5_CMD_OP_CREATE_FLOW_TABLE = 0x930,
MLX5_CMD_OP_DESTROY_FLOW_TABLE = 0x931,
MLX5_CMD_OP_QUERY_FLOW_TABLE = 0x932,
......@@ -193,7 +194,8 @@ enum {
MLX5_CMD_OP_QUERY_FLOW_GROUP = 0x935,
MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY = 0x936,
MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY = 0x937,
MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY = 0x938
MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY = 0x938,
MLX5_CMD_OP_MODIFY_FLOW_TABLE = 0x93c
};
struct mlx5_ifc_flow_table_fields_supported_bits {
......@@ -258,7 +260,10 @@ struct mlx5_ifc_flow_table_prop_layout_bits {
u8 ft_support[0x1];
u8 reserved_0[0x2];
u8 flow_modify_en[0x1];
u8 reserved_1[0x1c];
u8 modify_root[0x1];
u8 identified_miss_table_mode[0x1];
u8 flow_table_modify[0x1];
u8 reserved_1[0x19];
u8 reserved_2[0x2];
u8 log_max_ft_size[0x6];
......@@ -293,6 +298,22 @@ struct mlx5_ifc_odp_per_transport_service_cap_bits {
u8 reserved_1[0x1a];
};
struct mlx5_ifc_ipv4_layout_bits {
u8 reserved_0[0x60];
u8 ipv4[0x20];
};
struct mlx5_ifc_ipv6_layout_bits {
u8 ipv6[16][0x8];
};
union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits {
struct mlx5_ifc_ipv6_layout_bits ipv6_layout;
struct mlx5_ifc_ipv4_layout_bits ipv4_layout;
u8 reserved_0[0x80];
};
struct mlx5_ifc_fte_match_set_lyr_2_4_bits {
u8 smac_47_16[0x20];
......@@ -323,9 +344,9 @@ struct mlx5_ifc_fte_match_set_lyr_2_4_bits {
u8 udp_sport[0x10];
u8 udp_dport[0x10];
u8 src_ip[4][0x20];
union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits src_ipv4_src_ipv6;
u8 dst_ip[4][0x20];
union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits dst_ipv4_dst_ipv6;
};
struct mlx5_ifc_fte_match_set_misc_bits {
......@@ -5667,12 +5688,16 @@ struct mlx5_ifc_create_flow_table_in_bits {
u8 reserved_4[0x20];
u8 reserved_5[0x8];
u8 reserved_5[0x4];
u8 table_miss_mode[0x4];
u8 level[0x8];
u8 reserved_6[0x8];
u8 log_size[0x8];
u8 reserved_7[0x120];
u8 reserved_7[0x8];
u8 table_miss_id[0x18];
u8 reserved_8[0x100];
};
struct mlx5_ifc_create_flow_group_out_bits {
......@@ -6946,4 +6971,72 @@ union mlx5_ifc_uplink_pci_interface_document_bits {
u8 reserved_0[0x20060];
};
struct mlx5_ifc_set_flow_table_root_out_bits {
u8 status[0x8];
u8 reserved_0[0x18];
u8 syndrome[0x20];
u8 reserved_1[0x40];
};
struct mlx5_ifc_set_flow_table_root_in_bits {
u8 opcode[0x10];
u8 reserved_0[0x10];
u8 reserved_1[0x10];
u8 op_mod[0x10];
u8 reserved_2[0x40];
u8 table_type[0x8];
u8 reserved_3[0x18];
u8 reserved_4[0x8];
u8 table_id[0x18];
u8 reserved_5[0x140];
};
enum {
MLX5_MODIFY_FLOW_TABLE_MISS_TABLE_ID = 0x1,
};
struct mlx5_ifc_modify_flow_table_out_bits {
u8 status[0x8];
u8 reserved_0[0x18];
u8 syndrome[0x20];
u8 reserved_1[0x40];
};
struct mlx5_ifc_modify_flow_table_in_bits {
u8 opcode[0x10];
u8 reserved_0[0x10];
u8 reserved_1[0x10];
u8 op_mod[0x10];
u8 reserved_2[0x20];
u8 reserved_3[0x10];
u8 modify_field_select[0x10];
u8 table_type[0x8];
u8 reserved_4[0x18];
u8 reserved_5[0x8];
u8 table_id[0x18];
u8 reserved_6[0x4];
u8 table_miss_mode[0x4];
u8 reserved_7[0x18];
u8 reserved_8[0x8];
u8 table_miss_id[0x18];
u8 reserved_9[0x100];
};
#endif /* MLX5_IFC_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment