Commit c446d9da authored by Mark Bloch's avatar Mark Bloch Committed by Saeed Mahameed

RDMA/mlx5: Add shared FDB support

Shared FDB allows to create a single RDMA device that holds representors
from both eswitches. As shared FDB is only active when both uplink
representors are enslaved there is a single RDMA port that represents
both uplinks.

The number of ports is the number of vports on both eswitches minus one
as we only need 1 port for both uplinks.
Signed-off-by: default avatarMark Bloch <mbloch@nvidia.com>
Reviewed-by: default avatarMark Zhang <markzhang@nvidia.com>
Signed-off-by: default avatarSaeed Mahameed <saeedm@nvidia.com>
parent 979bf468
...@@ -8,13 +8,15 @@ ...@@ -8,13 +8,15 @@
#include "srq.h" #include "srq.h"
static int static int
mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev,
struct mlx5_eswitch_rep *rep,
int vport_index)
{ {
struct mlx5_ib_dev *ibdev; struct mlx5_ib_dev *ibdev;
int vport_index;
ibdev = mlx5_eswitch_uplink_get_proto_dev(dev->priv.eswitch, REP_IB); ibdev = mlx5_eswitch_uplink_get_proto_dev(dev->priv.eswitch, REP_IB);
vport_index = rep->vport_index; if (!ibdev)
return -EINVAL;
ibdev->port[vport_index].rep = rep; ibdev->port[vport_index].rep = rep;
rep->rep_data[REP_IB].priv = ibdev; rep->rep_data[REP_IB].priv = ibdev;
...@@ -26,19 +28,39 @@ mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) ...@@ -26,19 +28,39 @@ mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
return 0; return 0;
} }
static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev);
static int static int
mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
{ {
u32 num_ports = mlx5_eswitch_get_total_vports(dev); u32 num_ports = mlx5_eswitch_get_total_vports(dev);
const struct mlx5_ib_profile *profile; const struct mlx5_ib_profile *profile;
struct mlx5_core_dev *peer_dev;
struct mlx5_ib_dev *ibdev; struct mlx5_ib_dev *ibdev;
u32 peer_num_ports;
int vport_index; int vport_index;
int ret; int ret;
vport_index = rep->vport_index;
if (mlx5_lag_is_shared_fdb(dev)) {
peer_dev = mlx5_lag_get_peer_mdev(dev);
peer_num_ports = mlx5_eswitch_get_total_vports(peer_dev);
if (mlx5_lag_is_master(dev)) {
/* Only 1 ib port is the representor for both uplinks */
num_ports += peer_num_ports - 1;
} else {
if (rep->vport == MLX5_VPORT_UPLINK)
return 0;
vport_index += peer_num_ports;
dev = peer_dev;
}
}
if (rep->vport == MLX5_VPORT_UPLINK) if (rep->vport == MLX5_VPORT_UPLINK)
profile = &raw_eth_profile; profile = &raw_eth_profile;
else else
return mlx5_ib_set_vport_rep(dev, rep); return mlx5_ib_set_vport_rep(dev, rep, vport_index);
ibdev = ib_alloc_device(mlx5_ib_dev, ib_dev); ibdev = ib_alloc_device(mlx5_ib_dev, ib_dev);
if (!ibdev) if (!ibdev)
...@@ -64,6 +86,8 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) ...@@ -64,6 +86,8 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
goto fail_add; goto fail_add;
rep->rep_data[REP_IB].priv = ibdev; rep->rep_data[REP_IB].priv = ibdev;
if (mlx5_lag_is_shared_fdb(dev))
mlx5_ib_register_peer_vport_reps(dev);
return 0; return 0;
...@@ -82,18 +106,45 @@ static void *mlx5_ib_rep_to_dev(struct mlx5_eswitch_rep *rep) ...@@ -82,18 +106,45 @@ static void *mlx5_ib_rep_to_dev(struct mlx5_eswitch_rep *rep)
static void static void
mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep) mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep)
{ {
struct mlx5_core_dev *mdev = mlx5_eswitch_get_core_dev(rep->esw);
struct mlx5_ib_dev *dev = mlx5_ib_rep_to_dev(rep); struct mlx5_ib_dev *dev = mlx5_ib_rep_to_dev(rep);
int vport_index = rep->vport_index;
struct mlx5_ib_port *port; struct mlx5_ib_port *port;
port = &dev->port[rep->vport_index]; if (WARN_ON(!mdev))
return;
if (mlx5_lag_is_shared_fdb(mdev) &&
!mlx5_lag_is_master(mdev)) {
struct mlx5_core_dev *peer_mdev;
if (rep->vport == MLX5_VPORT_UPLINK)
return;
peer_mdev = mlx5_lag_get_peer_mdev(mdev);
vport_index += mlx5_eswitch_get_total_vports(peer_mdev);
}
if (!dev)
return;
port = &dev->port[vport_index];
write_lock(&port->roce.netdev_lock); write_lock(&port->roce.netdev_lock);
port->roce.netdev = NULL; port->roce.netdev = NULL;
write_unlock(&port->roce.netdev_lock); write_unlock(&port->roce.netdev_lock);
rep->rep_data[REP_IB].priv = NULL; rep->rep_data[REP_IB].priv = NULL;
port->rep = NULL; port->rep = NULL;
if (rep->vport == MLX5_VPORT_UPLINK) if (rep->vport == MLX5_VPORT_UPLINK) {
struct mlx5_core_dev *peer_mdev;
struct mlx5_eswitch *esw;
if (mlx5_lag_is_shared_fdb(mdev)) {
peer_mdev = mlx5_lag_get_peer_mdev(mdev);
esw = peer_mdev->priv.eswitch;
mlx5_eswitch_unregister_vport_reps(esw, REP_IB);
}
__mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX); __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
}
} }
static const struct mlx5_eswitch_rep_ops rep_ops = { static const struct mlx5_eswitch_rep_ops rep_ops = {
...@@ -102,6 +153,18 @@ static const struct mlx5_eswitch_rep_ops rep_ops = { ...@@ -102,6 +153,18 @@ static const struct mlx5_eswitch_rep_ops rep_ops = {
.get_proto_dev = mlx5_ib_rep_to_dev, .get_proto_dev = mlx5_ib_rep_to_dev,
}; };
static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev)
{
struct mlx5_core_dev *peer_mdev = mlx5_lag_get_peer_mdev(mdev);
struct mlx5_eswitch *esw;
if (!peer_mdev)
return;
esw = peer_mdev->priv.eswitch;
mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB);
}
struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw, struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
u16 vport_num) u16 vport_num)
{ {
......
...@@ -126,6 +126,7 @@ static int get_port_state(struct ib_device *ibdev, ...@@ -126,6 +126,7 @@ static int get_port_state(struct ib_device *ibdev,
static struct mlx5_roce *mlx5_get_rep_roce(struct mlx5_ib_dev *dev, static struct mlx5_roce *mlx5_get_rep_roce(struct mlx5_ib_dev *dev,
struct net_device *ndev, struct net_device *ndev,
struct net_device *upper,
u32 *port_num) u32 *port_num)
{ {
struct net_device *rep_ndev; struct net_device *rep_ndev;
...@@ -137,6 +138,14 @@ static struct mlx5_roce *mlx5_get_rep_roce(struct mlx5_ib_dev *dev, ...@@ -137,6 +138,14 @@ static struct mlx5_roce *mlx5_get_rep_roce(struct mlx5_ib_dev *dev,
if (!port->rep) if (!port->rep)
continue; continue;
if (upper == ndev && port->rep->vport == MLX5_VPORT_UPLINK) {
*port_num = i + 1;
return &port->roce;
}
if (upper && port->rep->vport == MLX5_VPORT_UPLINK)
continue;
read_lock(&port->roce.netdev_lock); read_lock(&port->roce.netdev_lock);
rep_ndev = mlx5_ib_get_rep_netdev(port->rep->esw, rep_ndev = mlx5_ib_get_rep_netdev(port->rep->esw,
port->rep->vport); port->rep->vport);
...@@ -196,11 +205,12 @@ static int mlx5_netdev_event(struct notifier_block *this, ...@@ -196,11 +205,12 @@ static int mlx5_netdev_event(struct notifier_block *this,
} }
if (ibdev->is_rep) if (ibdev->is_rep)
roce = mlx5_get_rep_roce(ibdev, ndev, &port_num); roce = mlx5_get_rep_roce(ibdev, ndev, upper, &port_num);
if (!roce) if (!roce)
return NOTIFY_DONE; return NOTIFY_DONE;
if ((upper == ndev || (!upper && ndev == roce->netdev)) if ((upper == ndev ||
&& ibdev->ib_active) { ((!upper || ibdev->is_rep) && ndev == roce->netdev)) &&
ibdev->ib_active) {
struct ib_event ibev = { }; struct ib_event ibev = { };
enum ib_port_state port_state; enum ib_port_state port_state;
...@@ -3012,7 +3022,7 @@ static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev) ...@@ -3012,7 +3022,7 @@ static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev)
struct mlx5_flow_table *ft; struct mlx5_flow_table *ft;
int err; int err;
if (!ns || !mlx5_lag_is_roce(mdev)) if (!ns || !mlx5_lag_is_active(mdev))
return 0; return 0;
err = mlx5_cmd_create_vport_lag(mdev); err = mlx5_cmd_create_vport_lag(mdev);
...@@ -3074,9 +3084,11 @@ static int mlx5_enable_eth(struct mlx5_ib_dev *dev) ...@@ -3074,9 +3084,11 @@ static int mlx5_enable_eth(struct mlx5_ib_dev *dev)
{ {
int err; int err;
if (!dev->is_rep && dev->profile != &raw_eth_profile) {
err = mlx5_nic_vport_enable_roce(dev->mdev); err = mlx5_nic_vport_enable_roce(dev->mdev);
if (err) if (err)
return err; return err;
}
err = mlx5_eth_lag_init(dev); err = mlx5_eth_lag_init(dev);
if (err) if (err)
...@@ -3085,6 +3097,7 @@ static int mlx5_enable_eth(struct mlx5_ib_dev *dev) ...@@ -3085,6 +3097,7 @@ static int mlx5_enable_eth(struct mlx5_ib_dev *dev)
return 0; return 0;
err_disable_roce: err_disable_roce:
if (!dev->is_rep && dev->profile != &raw_eth_profile)
mlx5_nic_vport_disable_roce(dev->mdev); mlx5_nic_vport_disable_roce(dev->mdev);
return err; return err;
...@@ -3093,6 +3106,7 @@ static int mlx5_enable_eth(struct mlx5_ib_dev *dev) ...@@ -3093,6 +3106,7 @@ static int mlx5_enable_eth(struct mlx5_ib_dev *dev)
static void mlx5_disable_eth(struct mlx5_ib_dev *dev) static void mlx5_disable_eth(struct mlx5_ib_dev *dev)
{ {
mlx5_eth_lag_cleanup(dev); mlx5_eth_lag_cleanup(dev);
if (!dev->is_rep && dev->profile != &raw_eth_profile)
mlx5_nic_vport_disable_roce(dev->mdev); mlx5_nic_vport_disable_roce(dev->mdev);
} }
...@@ -3950,12 +3964,7 @@ static int mlx5_ib_roce_init(struct mlx5_ib_dev *dev) ...@@ -3950,12 +3964,7 @@ static int mlx5_ib_roce_init(struct mlx5_ib_dev *dev)
/* Register only for native ports */ /* Register only for native ports */
err = mlx5_add_netdev_notifier(dev, port_num); err = mlx5_add_netdev_notifier(dev, port_num);
if (err || dev->is_rep || !mlx5_is_roce_init_enabled(mdev)) if (err)
/*
* We don't enable ETH interface for
* 1. IB representors
* 2. User disabled ROCE through devlink interface
*/
return err; return err;
err = mlx5_enable_eth(dev); err = mlx5_enable_eth(dev);
...@@ -3980,7 +3989,6 @@ static void mlx5_ib_roce_cleanup(struct mlx5_ib_dev *dev) ...@@ -3980,7 +3989,6 @@ static void mlx5_ib_roce_cleanup(struct mlx5_ib_dev *dev)
ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
if (ll == IB_LINK_LAYER_ETHERNET) { if (ll == IB_LINK_LAYER_ETHERNET) {
if (!dev->is_rep)
mlx5_disable_eth(dev); mlx5_disable_eth(dev);
port_num = mlx5_core_native_port_num(dev->mdev) - 1; port_num = mlx5_core_native_port_num(dev->mdev) - 1;
...@@ -4037,7 +4045,7 @@ static int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev) ...@@ -4037,7 +4045,7 @@ static int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
{ {
const char *name; const char *name;
if (!mlx5_lag_is_roce(dev->mdev)) if (!mlx5_lag_is_active(dev->mdev))
name = "mlx5_%d"; name = "mlx5_%d";
else else
name = "mlx5_bond_%d"; name = "mlx5_bond_%d";
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment