Commit 073bb189 authored by Saeed Mahameed's avatar Saeed Mahameed Committed by David S. Miller

net/mlx5: Introducing E-Switch and l2 table

E-Switch is the software entity that represents and manages ConnectX4
inter-HCA ethernet l2 switching.

E-Switch has its own Virtual Ports, each Vport/vNIC/VF can be
connected to the device through a vport of an e-switch.

Each e-switch is managed by one vNIC identified by
HCA_CAP.vport_group_manager (usually it is the PF/vport[0]),
and its main responsibility is to forward each packet to the
right vport.

e-Switch needs to manage its own l2-table and FDB tables.

L2 table is a flow table that is managed by FW, it is needed for
Multi-host (Multi PF) configuration for inter HCA switching between
PFs.

FDB table is a flow table that is totally managed by e-Switch driver,
its main responsibility is to switch packets between e-Swtich internal
vports and uplink vport that belong to the same.

This patch introduces only e-Swtich l2 table management, FDB managemnt
will come later when ethernet SRIOV/VFs will be enabled.

preperation for ethernet sriov and l2 table management.
Signed-off-by: default avatarSaeed Mahameed <saeedm@mellanox.com>
Signed-off-by: default avatarOr Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent aad9e6e4
......@@ -3,6 +3,6 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o
mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \
mad.o transobj.o vport.o sriov.o
mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o flow_table.o \
mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o flow_table.o eswitch.o \
en_main.o en_flow_table.o en_ethtool.o en_tx.o en_rx.o \
en_txrx.o
......@@ -35,6 +35,9 @@
#include <linux/mlx5/driver.h>
#include <linux/mlx5/cmd.h>
#include "mlx5_core.h"
#ifdef CONFIG_MLX5_CORE_EN
#include "eswitch.h"
#endif
enum {
MLX5_EQE_SIZE = sizeof(struct mlx5_eqe),
......@@ -287,6 +290,11 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
break;
#endif
#ifdef CONFIG_MLX5_CORE_EN
case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE:
mlx5_eswitch_vport_event(dev->priv.eswitch, eqe);
break;
#endif
default:
mlx5_core_warn(dev, "Unhandled event 0x%x on EQ 0x%x\n",
eqe->type, eq->eqn);
......@@ -459,6 +467,11 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev)
if (MLX5_CAP_GEN(dev, pg))
async_event_mask |= (1ull << MLX5_EVENT_TYPE_PAGE_FAULT);
if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH &&
MLX5_CAP_GEN(dev, vport_group_manager) &&
mlx5_core_is_pf(dev))
async_event_mask |= (1ull << MLX5_EVENT_TYPE_NIC_VPORT_CHANGE);
err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD,
MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD,
"mlx5_cmd_eq", &dev->priv.uuari.uars[0]);
......
This diff is collapsed.
/*
* Copyright (c) 2015, Mellanox Technologies, Ltd. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef __MLX5_ESWITCH_H__
#define __MLX5_ESWITCH_H__
#include <linux/mlx5/device.h>
#define MLX5_MAX_UC_PER_VPORT(dev) \
(1 << MLX5_CAP_GEN(dev, log_max_current_uc_list))
#define MLX5_MAX_MC_PER_VPORT(dev) \
(1 << MLX5_CAP_GEN(dev, log_max_current_mc_list))
#define MLX5_L2_ADDR_HASH_SIZE (BIT(BITS_PER_BYTE))
#define MLX5_L2_ADDR_HASH(addr) (addr[5])
/* L2 -mac address based- hash helpers */
struct l2addr_node {
struct hlist_node hlist;
u8 addr[ETH_ALEN];
};
#define for_each_l2hash_node(hn, tmp, hash, i) \
for (i = 0; i < MLX5_L2_ADDR_HASH_SIZE; i++) \
hlist_for_each_entry_safe(hn, tmp, &hash[i], hlist)
#define l2addr_hash_find(hash, mac, type) ({ \
int ix = MLX5_L2_ADDR_HASH(mac); \
bool found = false; \
type *ptr = NULL; \
\
hlist_for_each_entry(ptr, &hash[ix], node.hlist) \
if (ether_addr_equal(ptr->node.addr, mac)) {\
found = true; \
break; \
} \
if (!found) \
ptr = NULL; \
ptr; \
})
#define l2addr_hash_add(hash, mac, type, gfp) ({ \
int ix = MLX5_L2_ADDR_HASH(mac); \
type *ptr = NULL; \
\
ptr = kzalloc(sizeof(type), gfp); \
if (ptr) { \
ether_addr_copy(ptr->node.addr, mac); \
hlist_add_head(&ptr->node.hlist, &hash[ix]);\
} \
ptr; \
})
#define l2addr_hash_del(ptr) ({ \
hlist_del(&ptr->node.hlist); \
kfree(ptr); \
})
struct mlx5_vport {
struct mlx5_core_dev *dev;
int vport;
struct hlist_head uc_list[MLX5_L2_ADDR_HASH_SIZE];
struct work_struct vport_change_handler;
/* This spinlock protects access to vport data, between
* "esw_vport_disable" and ongoing interrupt "mlx5_eswitch_vport_event"
* once vport marked as disabled new interrupts are discarded.
*/
spinlock_t lock; /* vport events sync */
bool enabled;
};
struct mlx5_l2_table {
struct hlist_head l2_hash[MLX5_L2_ADDR_HASH_SIZE];
u32 size;
unsigned long *bitmap;
};
struct mlx5_eswitch {
struct mlx5_core_dev *dev;
struct mlx5_l2_table l2_table;
struct workqueue_struct *work_queue;
struct mlx5_vport *vports;
int total_vports;
};
/* E-Switch API */
int mlx5_eswitch_init(struct mlx5_core_dev *dev);
void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw);
void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe);
#endif /* __MLX5_ESWITCH_H__ */
......@@ -49,6 +49,9 @@
#include <linux/delay.h>
#include <linux/mlx5/mlx5_ifc.h>
#include "mlx5_core.h"
#ifdef CONFIG_MLX5_CORE_EN
#include "eswitch.h"
#endif
MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
MODULE_DESCRIPTION("Mellanox Connect-IB, ConnectX-4 core driver");
......@@ -1052,6 +1055,14 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
mlx5_init_srq_table(dev);
mlx5_init_mr_table(dev);
#ifdef CONFIG_MLX5_CORE_EN
err = mlx5_eswitch_init(dev);
if (err) {
dev_err(&pdev->dev, "eswitch init failed %d\n", err);
goto err_reg_dev;
}
#endif
err = mlx5_sriov_init(dev);
if (err) {
dev_err(&pdev->dev, "sriov init failed %d\n", err);
......@@ -1078,6 +1089,9 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
if (mlx5_sriov_cleanup(dev))
dev_err(&dev->pdev->dev, "sriov cleanup failed\n");
#ifdef CONFIG_MLX5_CORE_EN
mlx5_eswitch_cleanup(dev->priv.eswitch);
#endif
err_reg_dev:
mlx5_cleanup_mr_table(dev);
mlx5_cleanup_srq_table(dev);
......@@ -1147,6 +1161,10 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
goto out;
}
mlx5_unregister_device(dev);
#ifdef CONFIG_MLX5_CORE_EN
mlx5_eswitch_cleanup(dev->priv.eswitch);
#endif
mlx5_cleanup_mr_table(dev);
mlx5_cleanup_srq_table(dev);
mlx5_cleanup_qp_table(dev);
......
......@@ -251,6 +251,7 @@ enum mlx5_event {
MLX5_EVENT_TYPE_PAGE_REQUEST = 0xb,
MLX5_EVENT_TYPE_PAGE_FAULT = 0xc,
MLX5_EVENT_TYPE_NIC_VPORT_CHANGE = 0xd,
};
enum {
......@@ -520,6 +521,12 @@ struct mlx5_eqe_page_fault {
__be32 flags_qpn;
} __packed;
struct mlx5_eqe_vport_change {
u8 rsvd0[2];
__be16 vport_num;
__be32 rsvd1[6];
} __packed;
union ev_data {
__be32 raw[7];
struct mlx5_eqe_cmd cmd;
......@@ -532,6 +539,7 @@ union ev_data {
struct mlx5_eqe_stall_vl stall_vl;
struct mlx5_eqe_page_req req_pages;
struct mlx5_eqe_page_fault page_fault;
struct mlx5_eqe_vport_change vport_change;
} __packed;
struct mlx5_eqe {
......
......@@ -441,6 +441,8 @@ struct mlx5_irq_info {
char name[MLX5_MAX_IRQ_NAME];
};
struct mlx5_eswitch;
struct mlx5_priv {
char name[MLX5_MAX_NAME_LEN];
struct mlx5_eq_table eq_table;
......@@ -496,6 +498,8 @@ struct mlx5_priv {
struct list_head dev_list;
struct list_head ctx_list;
spinlock_t ctx_lock;
struct mlx5_eswitch *eswitch;
struct mlx5_core_sriov sriov;
unsigned long pci_dev_data;
};
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment