Commit 026a4259 authored by Mark Zhang's avatar Mark Zhang Committed by Leon Romanovsky

RDMA/mlx5: Support plane device and driver APIs to add and delete it

This patch supports driver APIs "add_sub_dev" and "del_sub_dev", to
add and delete a plane device respectively.
A mlx5 plane device is a rdma SMI device; It provides the SMI capability
through user MAD for it's parent, the logical multi-plane aggregated
device. For a plane port:
- It supports QP0 only;
- When adding a plane device, all plane ports are added;
- For some commands like mad_ifc, both plane_index and native portnum
  is needed;
- When querying or modifying a plane port context, the native portnum
  must be used, as the query/modify_hca_vport_context command doesn't
  support plane port.
Signed-off-by: default avatarMark Zhang <markzhang@nvidia.com>
Link: https://lore.kernel.org/r/e933cd0562aece181f8657af2ca0f5b387d0f14e.1718553901.git.leon@kernel.orgSigned-off-by: default avatarLeon Romanovsky <leonro@nvidia.com>
parent a9e0faca
......@@ -177,7 +177,7 @@ int mlx5_cmd_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn, u16 uid)
return mlx5_cmd_exec_in(dev, dealloc_xrcd, in);
}
int mlx5_cmd_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb,
int mlx5_cmd_mad_ifc(struct mlx5_ib_dev *dev, const void *inb, void *outb,
u16 opmod, u8 port)
{
int outlen = MLX5_ST_SZ_BYTES(mad_ifc_out);
......@@ -195,12 +195,18 @@ int mlx5_cmd_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb,
MLX5_SET(mad_ifc_in, in, opcode, MLX5_CMD_OP_MAD_IFC);
MLX5_SET(mad_ifc_in, in, op_mod, opmod);
MLX5_SET(mad_ifc_in, in, port, port);
if (dev->ib_dev.type == RDMA_DEVICE_TYPE_SMI) {
MLX5_SET(mad_ifc_in, in, plane_index, port);
MLX5_SET(mad_ifc_in, in, port,
smi_to_native_portnum(dev, port));
} else {
MLX5_SET(mad_ifc_in, in, port, port);
}
data = MLX5_ADDR_OF(mad_ifc_in, in, mad);
memcpy(data, inb, MLX5_FLD_SZ_BYTES(mad_ifc_in, mad));
err = mlx5_cmd_exec_inout(dev, mad_ifc, in, out);
err = mlx5_cmd_exec_inout(dev->mdev, mad_ifc, in, out);
if (err)
goto out;
......
......@@ -54,7 +54,7 @@ int mlx5_cmd_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid,
u32 qpn, u16 uid);
int mlx5_cmd_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn, u16 uid);
int mlx5_cmd_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn, u16 uid);
int mlx5_cmd_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb,
int mlx5_cmd_mad_ifc(struct mlx5_ib_dev *dev, const void *inb, void *outb,
u16 opmod, u8 port);
int mlx5_cmd_uar_alloc(struct mlx5_core_dev *dev, u32 *uarn, u16 uid);
int mlx5_cmd_uar_dealloc(struct mlx5_core_dev *dev, u32 uarn, u16 uid);
......
......@@ -69,7 +69,7 @@ static int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey,
if (ignore_bkey || !in_wc)
op_modifier |= 0x2;
return mlx5_cmd_mad_ifc(dev->mdev, in_mad, response_mad, op_modifier,
return mlx5_cmd_mad_ifc(dev, in_mad, response_mad, op_modifier,
port);
}
......
......@@ -282,6 +282,14 @@ struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *ibdev,
struct mlx5_ib_multiport_info *mpi;
struct mlx5_ib_port *port;
if (ibdev->ib_dev.type == RDMA_DEVICE_TYPE_SMI) {
if (native_port_num)
*native_port_num = smi_to_native_portnum(ibdev,
ib_port_num);
return ibdev->mdev;
}
if (!mlx5_core_mp_enabled(ibdev->mdev) ||
ll != IB_LINK_LAYER_ETHERNET) {
if (native_port_num)
......@@ -1347,6 +1355,9 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u32 port,
/* props being zeroed by the caller, avoid zeroing it here */
if (ibdev->type == RDMA_DEVICE_TYPE_SMI)
port = smi_to_native_portnum(dev, port);
err = mlx5_query_hca_vport_context(mdev, 0, port, 0, rep);
if (err)
goto out;
......@@ -1362,7 +1373,8 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u32 port,
if (dev->num_plane) {
props->port_cap_flags |= IB_PORT_SM_DISABLED;
props->port_cap_flags &= ~IB_PORT_SM;
}
} else if (ibdev->type == RDMA_DEVICE_TYPE_SMI)
props->port_cap_flags &= ~IB_PORT_CM_SUP;
props->gid_tbl_len = mlx5_get_gid_table_len(MLX5_CAP_GEN(mdev, gid_table_size));
props->max_msg_sz = 1 << MLX5_CAP_GEN(mdev, log_max_msg);
......@@ -2812,7 +2824,8 @@ static int set_has_smi_cap(struct mlx5_ib_dev *dev)
if (dev->num_plane) {
dev->port_caps[port - 1].has_smi = false;
continue;
} else if (!MLX5_CAP_GEN(dev->mdev, ib_virt)) {
} else if (!MLX5_CAP_GEN(dev->mdev, ib_virt) ||
dev->ib_dev.type == RDMA_DEVICE_TYPE_SMI) {
dev->port_caps[port - 1].has_smi = true;
continue;
}
......@@ -3026,6 +3039,8 @@ static u32 get_core_cap_flags(struct ib_device *ibdev,
return ret | RDMA_CORE_CAP_PROT_IB | RDMA_CORE_CAP_IB_MAD |
RDMA_CORE_CAP_IB_CM | RDMA_CORE_CAP_IB_SA |
RDMA_CORE_CAP_AF_IB;
else if (ibdev->type == RDMA_DEVICE_TYPE_SMI)
return ret | RDMA_CORE_CAP_IB_MAD | RDMA_CORE_CAP_IB_SMI;
if (ll == IB_LINK_LAYER_INFINIBAND)
return ret | RDMA_CORE_PORT_IBA_IB;
......@@ -3062,6 +3077,9 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u32 port_num,
return err;
if (ll == IB_LINK_LAYER_INFINIBAND) {
if (ibdev->type == RDMA_DEVICE_TYPE_SMI)
port_num = smi_to_native_portnum(dev, port_num);
err = mlx5_query_hca_vport_context(dev->mdev, 0, port_num, 0,
&rep);
if (err)
......@@ -3862,12 +3880,18 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
return err;
}
static struct ib_device *mlx5_ib_add_sub_dev(struct ib_device *parent,
enum rdma_nl_dev_type type,
const char *name);
static void mlx5_ib_del_sub_dev(struct ib_device *sub_dev);
static const struct ib_device_ops mlx5_ib_dev_ops = {
.owner = THIS_MODULE,
.driver_id = RDMA_DRIVER_MLX5,
.uverbs_abi_ver = MLX5_IB_UVERBS_ABI_VERSION,
.add_gid = mlx5_ib_add_gid,
.add_sub_dev = mlx5_ib_add_sub_dev,
.alloc_mr = mlx5_ib_alloc_mr,
.alloc_mr_integrity = mlx5_ib_alloc_mr_integrity,
.alloc_pd = mlx5_ib_alloc_pd,
......@@ -3882,6 +3906,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = {
.dealloc_pd = mlx5_ib_dealloc_pd,
.dealloc_ucontext = mlx5_ib_dealloc_ucontext,
.del_gid = mlx5_ib_del_gid,
.del_sub_dev = mlx5_ib_del_sub_dev,
.dereg_mr = mlx5_ib_dereg_mr,
.destroy_ah = mlx5_ib_destroy_ah,
.destroy_cq = mlx5_ib_destroy_cq,
......@@ -4171,7 +4196,9 @@ static int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
{
const char *name;
if (!mlx5_lag_is_active(dev->mdev))
if (dev->sub_dev_name)
name = dev->sub_dev_name;
else if (!mlx5_lag_is_active(dev->mdev))
name = "mlx5_%d";
else
name = "mlx5_bond_%d";
......@@ -4432,6 +4459,89 @@ const struct mlx5_ib_profile raw_eth_profile = {
NULL),
};
static const struct mlx5_ib_profile plane_profile = {
STAGE_CREATE(MLX5_IB_STAGE_INIT,
mlx5_ib_stage_init_init,
mlx5_ib_stage_init_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_CAPS,
mlx5_ib_stage_caps_init,
mlx5_ib_stage_caps_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB,
mlx5_ib_stage_non_default_cb,
NULL),
STAGE_CREATE(MLX5_IB_STAGE_QP,
mlx5_init_qp_table,
mlx5_cleanup_qp_table),
STAGE_CREATE(MLX5_IB_STAGE_SRQ,
mlx5_init_srq_table,
mlx5_cleanup_srq_table),
STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
mlx5_ib_dev_res_init,
mlx5_ib_dev_res_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_BFREG,
mlx5_ib_stage_bfrag_init,
mlx5_ib_stage_bfrag_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
mlx5_ib_stage_ib_reg_init,
mlx5_ib_stage_ib_reg_cleanup),
};
static struct ib_device *mlx5_ib_add_sub_dev(struct ib_device *parent,
enum rdma_nl_dev_type type,
const char *name)
{
struct mlx5_ib_dev *mparent = to_mdev(parent), *mplane;
enum rdma_link_layer ll;
int ret;
if (mparent->smi_dev)
return ERR_PTR(-EEXIST);
ll = mlx5_port_type_cap_to_rdma_ll(MLX5_CAP_GEN(mparent->mdev,
port_type));
if (type != RDMA_DEVICE_TYPE_SMI || !mparent->num_plane ||
ll != IB_LINK_LAYER_INFINIBAND ||
!MLX5_CAP_GEN_2(mparent->mdev, multiplane_qp_ud))
return ERR_PTR(-EOPNOTSUPP);
mplane = ib_alloc_device(mlx5_ib_dev, ib_dev);
if (!mplane)
return ERR_PTR(-ENOMEM);
mplane->port = kcalloc(mparent->num_plane * mparent->num_ports,
sizeof(*mplane->port), GFP_KERNEL);
if (!mplane->port) {
ret = -ENOMEM;
goto fail_kcalloc;
}
mplane->ib_dev.type = type;
mplane->mdev = mparent->mdev;
mplane->num_ports = mparent->num_plane;
mplane->sub_dev_name = name;
ret = __mlx5_ib_add(mplane, &plane_profile);
if (ret)
goto fail_ib_add;
mparent->smi_dev = mplane;
return &mplane->ib_dev;
fail_ib_add:
kfree(mplane->port);
fail_kcalloc:
ib_dealloc_device(&mplane->ib_dev);
return ERR_PTR(ret);
}
static void mlx5_ib_del_sub_dev(struct ib_device *sub_dev)
{
struct mlx5_ib_dev *mdev = to_mdev(sub_dev);
to_mdev(sub_dev->parent)->smi_dev = NULL;
__mlx5_ib_remove(mdev, mdev->profile, MLX5_IB_STAGE_MAX);
}
static int mlx5r_mp_probe(struct auxiliary_device *adev,
const struct auxiliary_device_id *id)
{
......
......@@ -1191,6 +1191,8 @@ struct mlx5_ib_dev {
#endif
u8 num_plane;
struct mlx5_ib_dev *smi_dev;
const char *sub_dev_name;
};
static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
......@@ -1699,4 +1701,10 @@ static inline bool mlx5_umem_needs_ats(struct mlx5_ib_dev *dev,
int set_roce_addr(struct mlx5_ib_dev *dev, u32 port_num,
unsigned int index, const union ib_gid *gid,
const struct ib_gid_attr *attr);
static inline u32 smi_to_native_portnum(struct mlx5_ib_dev *dev, u32 port)
{
return (port - 1) / dev->num_ports + 1;
}
#endif /* MLX5_IB_H */
......@@ -4217,7 +4217,12 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
/* todo implement counter_index functionality */
if (is_sqp(qp->type))
if (dev->ib_dev.type == RDMA_DEVICE_TYPE_SMI && is_qp0(qp->type)) {
MLX5_SET(ads, pri_path, vhca_port_num,
smi_to_native_portnum(dev, qp->port));
if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR)
MLX5_SET(ads, pri_path, plane_index, qp->port);
} else if (is_sqp(qp->type))
MLX5_SET(ads, pri_path, vhca_port_num, qp->port);
if (attr_mask & IB_QP_PORT)
......
......@@ -249,7 +249,8 @@ int mlx5_qpc_create_qp(struct mlx5_ib_dev *dev, struct mlx5_core_qp *qp,
if (err)
goto err_cmd;
mlx5_debug_qp_add(dev->mdev, qp);
if (dev->ib_dev.type != RDMA_DEVICE_TYPE_SMI)
mlx5_debug_qp_add(dev->mdev, qp);
return 0;
......@@ -307,7 +308,8 @@ int mlx5_core_destroy_qp(struct mlx5_ib_dev *dev, struct mlx5_core_qp *qp)
{
u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
mlx5_debug_qp_remove(dev->mdev, qp);
if (dev->ib_dev.type != RDMA_DEVICE_TYPE_SMI)
mlx5_debug_qp_remove(dev->mdev, qp);
destroy_resource_common(dev, qp);
......@@ -504,7 +506,9 @@ int mlx5_init_qp_table(struct mlx5_ib_dev *dev)
spin_lock_init(&table->lock);
INIT_RADIX_TREE(&table->tree, GFP_ATOMIC);
xa_init(&table->dct_xa);
mlx5_qp_debugfs_init(dev->mdev);
if (dev->ib_dev.type != RDMA_DEVICE_TYPE_SMI)
mlx5_qp_debugfs_init(dev->mdev);
table->nb.notifier_call = rsc_event_notifier;
mlx5_notifier_register(dev->mdev, &table->nb);
......@@ -517,7 +521,8 @@ void mlx5_cleanup_qp_table(struct mlx5_ib_dev *dev)
struct mlx5_qp_table *table = &dev->qp_table;
mlx5_notifier_unregister(dev->mdev, &table->nb);
mlx5_qp_debugfs_cleanup(dev->mdev);
if (dev->ib_dev.type != RDMA_DEVICE_TYPE_SMI)
mlx5_qp_debugfs_cleanup(dev->mdev);
}
int mlx5_core_qp_query(struct mlx5_ib_dev *dev, struct mlx5_core_qp *qp,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment