Commit 39e7d095 authored by David S. Miller's avatar David S. Miller

Merge branch 'mlx4-next'

Or Gerlitz says:

====================
mlx4: Add SRIOV support for RoCE

This series adds SRIOV support for RoCE (RDMA over Ethernet) to the mlx4 driver.

The patches are against net-next, as of commit 2d8d40af "pkt_sched: fq:
do not hold qdisc lock while allocating memory"

changes from V1:
 - addressed feedback from Dave on patch #3 and changed get_real_sgid_index()
   to be called fill_in_real_sgid_index() and be a void  function.
 - removed some checkpatch warnings on long lines

changes from V0:
  - always check the return code of mlx4_get_roce_gid_from_slave().
    The call we fixed is introduced in patch #1 and later removed by
    patch #3 that allows guests to have multiple GIDS. The 1..3
    separation was done for proper division of patches to logical changes.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 36f6fdb7 aa9a2d51
......@@ -61,6 +61,11 @@ struct cm_generic_msg {
__be32 remote_comm_id;
};
struct cm_sidr_generic_msg {
struct ib_mad_hdr hdr;
__be32 request_id;
};
struct cm_req_msg {
unsigned char unused[0x60];
union ib_gid primary_path_sgid;
......@@ -69,28 +74,62 @@ struct cm_req_msg {
static void set_local_comm_id(struct ib_mad *mad, u32 cm_id)
{
struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
msg->local_comm_id = cpu_to_be32(cm_id);
if (mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) {
struct cm_sidr_generic_msg *msg =
(struct cm_sidr_generic_msg *)mad;
msg->request_id = cpu_to_be32(cm_id);
} else if (mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) {
pr_err("trying to set local_comm_id in SIDR_REP\n");
return;
} else {
struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
msg->local_comm_id = cpu_to_be32(cm_id);
}
}
static u32 get_local_comm_id(struct ib_mad *mad)
{
struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
return be32_to_cpu(msg->local_comm_id);
if (mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) {
struct cm_sidr_generic_msg *msg =
(struct cm_sidr_generic_msg *)mad;
return be32_to_cpu(msg->request_id);
} else if (mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) {
pr_err("trying to set local_comm_id in SIDR_REP\n");
return -1;
} else {
struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
return be32_to_cpu(msg->local_comm_id);
}
}
static void set_remote_comm_id(struct ib_mad *mad, u32 cm_id)
{
struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
msg->remote_comm_id = cpu_to_be32(cm_id);
if (mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) {
struct cm_sidr_generic_msg *msg =
(struct cm_sidr_generic_msg *)mad;
msg->request_id = cpu_to_be32(cm_id);
} else if (mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) {
pr_err("trying to set remote_comm_id in SIDR_REQ\n");
return;
} else {
struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
msg->remote_comm_id = cpu_to_be32(cm_id);
}
}
static u32 get_remote_comm_id(struct ib_mad *mad)
{
struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
return be32_to_cpu(msg->remote_comm_id);
if (mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) {
struct cm_sidr_generic_msg *msg =
(struct cm_sidr_generic_msg *)mad;
return be32_to_cpu(msg->request_id);
} else if (mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) {
pr_err("trying to set remote_comm_id in SIDR_REQ\n");
return -1;
} else {
struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
return be32_to_cpu(msg->remote_comm_id);
}
}
static union ib_gid gid_from_req_msg(struct ib_device *ibdev, struct ib_mad *mad)
......@@ -282,19 +321,21 @@ int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id
u32 sl_cm_id;
int pv_cm_id = -1;
sl_cm_id = get_local_comm_id(mad);
if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID ||
mad->mad_hdr.attr_id == CM_REP_ATTR_ID) {
mad->mad_hdr.attr_id == CM_REP_ATTR_ID ||
mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) {
sl_cm_id = get_local_comm_id(mad);
id = id_map_alloc(ibdev, slave_id, sl_cm_id);
if (IS_ERR(id)) {
mlx4_ib_warn(ibdev, "%s: id{slave: %d, sl_cm_id: 0x%x} Failed to id_map_alloc\n",
__func__, slave_id, sl_cm_id);
return PTR_ERR(id);
}
} else if (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID) {
} else if (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID ||
mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) {
return 0;
} else {
sl_cm_id = get_local_comm_id(mad);
id = id_map_get(ibdev, &pv_cm_id, slave_id, sl_cm_id);
}
......@@ -315,14 +356,18 @@ int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id
}
int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
struct ib_mad *mad)
struct ib_mad *mad)
{
u32 pv_cm_id;
struct id_map_entry *id;
if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID) {
if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID ||
mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) {
union ib_gid gid;
if (!slave)
return 0;
gid = gid_from_req_msg(ibdev, mad);
*slave = mlx4_ib_find_real_gid(ibdev, port, gid.global.interface_id);
if (*slave < 0) {
......@@ -341,7 +386,8 @@ int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
return -ENOENT;
}
*slave = id->slave_id;
if (slave)
*slave = id->slave_id;
set_remote_comm_id(mad, id->sl_cm_id);
if (mad->mad_hdr.attr_id == CM_DREQ_ATTR_ID)
......
......@@ -564,7 +564,7 @@ static int mlx4_ib_ipoib_csum_ok(__be16 status, __be16 checksum)
}
static int use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct ib_wc *wc,
unsigned tail, struct mlx4_cqe *cqe)
unsigned tail, struct mlx4_cqe *cqe, int is_eth)
{
struct mlx4_ib_proxy_sqp_hdr *hdr;
......@@ -574,12 +574,20 @@ static int use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct
DMA_FROM_DEVICE);
hdr = (struct mlx4_ib_proxy_sqp_hdr *) (qp->sqp_proxy_rcv[tail].addr);
wc->pkey_index = be16_to_cpu(hdr->tun.pkey_index);
wc->slid = be16_to_cpu(hdr->tun.slid_mac_47_32);
wc->sl = (u8) (be16_to_cpu(hdr->tun.sl_vid) >> 12);
wc->src_qp = be32_to_cpu(hdr->tun.flags_src_qp) & 0xFFFFFF;
wc->wc_flags |= (hdr->tun.g_ml_path & 0x80) ? (IB_WC_GRH) : 0;
wc->dlid_path_bits = 0;
if (is_eth) {
wc->vlan_id = be16_to_cpu(hdr->tun.sl_vid);
memcpy(&(wc->smac[0]), (char *)&hdr->tun.mac_31_0, 4);
memcpy(&(wc->smac[4]), (char *)&hdr->tun.slid_mac_47_32, 2);
wc->wc_flags |= (IB_WC_WITH_VLAN | IB_WC_WITH_SMAC);
} else {
wc->slid = be16_to_cpu(hdr->tun.slid_mac_47_32);
wc->sl = (u8) (be16_to_cpu(hdr->tun.sl_vid) >> 12);
}
return 0;
}
......@@ -594,6 +602,7 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
struct mlx4_srq *msrq = NULL;
int is_send;
int is_error;
int is_eth;
u32 g_mlpath_rqpn;
u16 wqe_ctr;
unsigned tail = 0;
......@@ -778,11 +787,15 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
break;
}
is_eth = (rdma_port_get_link_layer(wc->qp->device,
(*cur_qp)->port) ==
IB_LINK_LAYER_ETHERNET);
if (mlx4_is_mfunc(to_mdev(cq->ibcq.device)->dev)) {
if ((*cur_qp)->mlx4_ib_qp_type &
(MLX4_IB_QPT_PROXY_SMI_OWNER |
MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI))
return use_tunnel_data(*cur_qp, cq, wc, tail, cqe);
return use_tunnel_data(*cur_qp, cq, wc, tail,
cqe, is_eth);
}
wc->slid = be16_to_cpu(cqe->rlid);
......@@ -793,20 +806,21 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
wc->pkey_index = be32_to_cpu(cqe->immed_rss_invalid) & 0x7f;
wc->wc_flags |= mlx4_ib_ipoib_csum_ok(cqe->status,
cqe->checksum) ? IB_WC_IP_CSUM_OK : 0;
if (rdma_port_get_link_layer(wc->qp->device,
(*cur_qp)->port) == IB_LINK_LAYER_ETHERNET)
if (is_eth) {
wc->sl = be16_to_cpu(cqe->sl_vid) >> 13;
else
wc->sl = be16_to_cpu(cqe->sl_vid) >> 12;
if (be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_VLAN_PRESENT_MASK) {
wc->vlan_id = be16_to_cpu(cqe->sl_vid) &
MLX4_CQE_VID_MASK;
if (be32_to_cpu(cqe->vlan_my_qpn) &
MLX4_CQE_VLAN_PRESENT_MASK) {
wc->vlan_id = be16_to_cpu(cqe->sl_vid) &
MLX4_CQE_VID_MASK;
} else {
wc->vlan_id = 0xffff;
}
memcpy(wc->smac, cqe->smac, ETH_ALEN);
wc->wc_flags |= (IB_WC_WITH_VLAN | IB_WC_WITH_SMAC);
} else {
wc->sl = be16_to_cpu(cqe->sl_vid) >> 12;
wc->vlan_id = 0xffff;
}
wc->wc_flags |= IB_WC_WITH_VLAN;
memcpy(wc->smac, cqe->smac, ETH_ALEN);
wc->wc_flags |= IB_WC_WITH_SMAC;
}
return 0;
......
......@@ -467,6 +467,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
int ret = 0;
u16 tun_pkey_ix;
u16 cached_pkey;
u8 is_eth = dev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH;
if (dest_qpt > IB_QPT_GSI)
return -EINVAL;
......@@ -509,6 +510,10 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
* The driver will set the force loopback bit in post_send */
memset(&attr, 0, sizeof attr);
attr.port_num = port;
if (is_eth) {
memcpy(&attr.grh.dgid.raw[0], &grh->dgid.raw[0], 16);
attr.ah_flags = IB_AH_GRH;
}
ah = ib_create_ah(tun_ctx->pd, &attr);
if (IS_ERR(ah))
return -ENOMEM;
......@@ -540,11 +545,36 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
/* adjust tunnel data */
tun_mad->hdr.pkey_index = cpu_to_be16(tun_pkey_ix);
tun_mad->hdr.sl_vid = cpu_to_be16(((u16)(wc->sl)) << 12);
tun_mad->hdr.slid_mac_47_32 = cpu_to_be16(wc->slid);
tun_mad->hdr.flags_src_qp = cpu_to_be32(wc->src_qp & 0xFFFFFF);
tun_mad->hdr.g_ml_path = (grh && (wc->wc_flags & IB_WC_GRH)) ? 0x80 : 0;
if (is_eth) {
u16 vlan = 0;
if (mlx4_get_slave_default_vlan(dev->dev, port, slave, &vlan,
NULL)) {
/* VST mode */
if (vlan != wc->vlan_id)
/* Packet vlan is not the VST-assigned vlan.
* Drop the packet.
*/
goto out;
else
/* Remove the vlan tag before forwarding
* the packet to the VF.
*/
vlan = 0xffff;
} else {
vlan = wc->vlan_id;
}
tun_mad->hdr.sl_vid = cpu_to_be16(vlan);
memcpy((char *)&tun_mad->hdr.mac_31_0, &(wc->smac[0]), 4);
memcpy((char *)&tun_mad->hdr.slid_mac_47_32, &(wc->smac[4]), 2);
} else {
tun_mad->hdr.sl_vid = cpu_to_be16(((u16)(wc->sl)) << 12);
tun_mad->hdr.slid_mac_47_32 = cpu_to_be16(wc->slid);
}
ib_dma_sync_single_for_device(&dev->ib_dev,
tun_qp->tx_ring[tun_tx_ix].buf.map,
sizeof (struct mlx4_rcv_tunnel_mad),
......@@ -580,6 +610,41 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port,
int err;
int slave;
u8 *slave_id;
int is_eth = 0;
if (rdma_port_get_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND)
is_eth = 0;
else
is_eth = 1;
if (is_eth) {
if (!(wc->wc_flags & IB_WC_GRH)) {
mlx4_ib_warn(ibdev, "RoCE grh not present.\n");
return -EINVAL;
}
if (mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_CM) {
mlx4_ib_warn(ibdev, "RoCE mgmt class is not CM\n");
return -EINVAL;
}
if (mlx4_get_slave_from_roce_gid(dev->dev, port, grh->dgid.raw, &slave)) {
mlx4_ib_warn(ibdev, "failed matching grh\n");
return -ENOENT;
}
if (slave >= dev->dev->caps.sqp_demux) {
mlx4_ib_warn(ibdev, "slave id: %d is bigger than allowed:%d\n",
slave, dev->dev->caps.sqp_demux);
return -ENOENT;
}
if (mlx4_ib_demux_cm_handler(ibdev, port, NULL, mad))
return 0;
err = mlx4_ib_send_to_slave(dev, slave, port, wc->qp->qp_type, wc, grh, mad);
if (err)
pr_debug("failed sending to slave %d via tunnel qp (%d)\n",
slave, err);
return 0;
}
/* Initially assume that this mad is for us */
slave = mlx4_master_func_num(dev->dev);
......@@ -1076,8 +1141,9 @@ static int is_proxy_qp0(struct mlx4_ib_dev *dev, int qpn, int slave)
int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn,
u32 qkey, struct ib_ah_attr *attr, struct ib_mad *mad)
enum ib_qp_type dest_qpt, u16 pkey_index,
u32 remote_qpn, u32 qkey, struct ib_ah_attr *attr,
u8 *s_mac, struct ib_mad *mad)
{
struct ib_sge list;
struct ib_send_wr wr, *bad_wr;
......@@ -1166,6 +1232,9 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
wr.num_sge = 1;
wr.opcode = IB_WR_SEND;
wr.send_flags = IB_SEND_SIGNALED;
if (s_mac)
memcpy(to_mah(ah)->av.eth.s_mac, s_mac, 6);
ret = ib_post_send(send_qp, &wr, &bad_wr);
out:
......@@ -1174,6 +1243,34 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
return ret;
}
static int get_slave_base_gid_ix(struct mlx4_ib_dev *dev, int slave, int port)
{
int gids;
int vfs;
if (rdma_port_get_link_layer(&dev->ib_dev, port) == IB_LINK_LAYER_INFINIBAND)
return slave;
gids = MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS;
vfs = dev->dev->num_vfs;
if (slave == 0)
return 0;
if (slave <= gids % vfs)
return MLX4_ROCE_PF_GIDS + ((gids / vfs) + 1) * (slave - 1);
return MLX4_ROCE_PF_GIDS + (gids % vfs) + ((gids / vfs) * (slave - 1));
}
static void fill_in_real_sgid_index(struct mlx4_ib_dev *dev, int slave, int port,
struct ib_ah_attr *ah_attr)
{
if (rdma_port_get_link_layer(&dev->ib_dev, port) == IB_LINK_LAYER_INFINIBAND)
ah_attr->grh.sgid_index = slave;
else
ah_attr->grh.sgid_index += get_slave_base_gid_ix(dev, slave, port);
}
static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc *wc)
{
struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev);
......@@ -1260,12 +1357,14 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
memcpy(&ah.av, &tunnel->hdr.av, sizeof (struct mlx4_av));
ah.ibah.device = ctx->ib_dev;
mlx4_ib_query_ah(&ah.ibah, &ah_attr);
if ((ah_attr.ah_flags & IB_AH_GRH) &&
(ah_attr.grh.sgid_index != slave)) {
mlx4_ib_warn(ctx->ib_dev, "slave:%d accessed invalid sgid_index:%d\n",
slave, ah_attr.grh.sgid_index);
return;
}
if (ah_attr.ah_flags & IB_AH_GRH)
fill_in_real_sgid_index(dev, slave, ctx->port, &ah_attr);
memcpy(ah_attr.dmac, tunnel->hdr.mac, 6);
ah_attr.vlan_id = be16_to_cpu(tunnel->hdr.vlan);
/* if slave have default vlan use it */
mlx4_get_slave_default_vlan(dev->dev, ctx->port, slave,
&ah_attr.vlan_id, &ah_attr.sl);
mlx4_ib_send_to_wire(dev, slave, ctx->port,
is_proxy_qp0(dev, wc->src_qp, slave) ?
......@@ -1273,7 +1372,7 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
be16_to_cpu(tunnel->hdr.pkey_index),
be32_to_cpu(tunnel->hdr.remote_qpn),
be32_to_cpu(tunnel->hdr.qkey),
&ah_attr, &tunnel->mad);
&ah_attr, wc->smac, &tunnel->mad);
}
static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
......
......@@ -1888,14 +1888,6 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
pr_info_once("%s", mlx4_ib_version);
mlx4_foreach_non_ib_transport_port(i, dev)
num_ports++;
if (mlx4_is_mfunc(dev) && num_ports) {
dev_err(&dev->pdev->dev, "RoCE is not supported over SRIOV as yet\n");
return NULL;
}
num_ports = 0;
mlx4_foreach_ib_transport_port(i, dev)
num_ports++;
......
......@@ -215,8 +215,9 @@ static int send_mad_to_wire(struct mlx4_ib_demux_ctx *ctx, struct ib_mad *mad)
}
mlx4_ib_query_ah(dev->sm_ah[ctx->port - 1], &ah_attr);
spin_unlock(&dev->sm_lock);
return mlx4_ib_send_to_wire(dev, mlx4_master_func_num(dev->dev), ctx->port,
IB_QPT_GSI, 0, 1, IB_QP1_QKEY, &ah_attr, mad);
return mlx4_ib_send_to_wire(dev, mlx4_master_func_num(dev->dev),
ctx->port, IB_QPT_GSI, 0, 1, IB_QP1_QKEY,
&ah_attr, NULL, mad);
}
static int send_mad_to_slave(int slave, struct mlx4_ib_demux_ctx *ctx,
......
......@@ -241,6 +241,22 @@ struct mlx4_ib_proxy_sqp_hdr {
struct mlx4_rcv_tunnel_hdr tun;
} __packed;
struct mlx4_roce_smac_vlan_info {
u64 smac;
int smac_index;
int smac_port;
u64 candidate_smac;
int candidate_smac_index;
int candidate_smac_port;
u16 vid;
int vlan_index;
int vlan_port;
u16 candidate_vid;
int candidate_vlan_index;
int candidate_vlan_port;
int update_vid;
};
struct mlx4_ib_qp {
struct ib_qp ibqp;
struct mlx4_qp mqp;
......@@ -273,8 +289,9 @@ struct mlx4_ib_qp {
struct list_head gid_list;
struct list_head steering_rules;
struct mlx4_ib_buf *sqp_proxy_rcv;
struct mlx4_roce_smac_vlan_info pri;
struct mlx4_roce_smac_vlan_info alt;
u64 reg_id;
};
struct mlx4_ib_srq {
......@@ -720,9 +737,12 @@ void mlx4_ib_tunnels_update_work(struct work_struct *work);
int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
enum ib_qp_type qpt, struct ib_wc *wc,
struct ib_grh *grh, struct ib_mad *mad);
int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn,
u32 qkey, struct ib_ah_attr *attr, struct ib_mad *mad);
u32 qkey, struct ib_ah_attr *attr, u8 *s_mac,
struct ib_mad *mad);
__be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx);
int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
......
......@@ -662,10 +662,14 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
if (!sqp)
return -ENOMEM;
qp = &sqp->qp;
qp->pri.vid = 0xFFFF;
qp->alt.vid = 0xFFFF;
} else {
qp = kzalloc(sizeof (struct mlx4_ib_qp), GFP_KERNEL);
if (!qp)
return -ENOMEM;
qp->pri.vid = 0xFFFF;
qp->alt.vid = 0xFFFF;
}
} else
qp = *caller_qp;
......@@ -940,11 +944,32 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
{
struct mlx4_ib_cq *send_cq, *recv_cq;
if (qp->state != IB_QPS_RESET)
if (qp->state != IB_QPS_RESET) {
if (mlx4_qp_modify(dev->dev, NULL, to_mlx4_state(qp->state),
MLX4_QP_STATE_RST, NULL, 0, 0, &qp->mqp))
pr_warn("modify QP %06x to RESET failed.\n",
qp->mqp.qpn);
if (qp->pri.smac) {
mlx4_unregister_mac(dev->dev, qp->pri.smac_port, qp->pri.smac);
qp->pri.smac = 0;
}
if (qp->alt.smac) {
mlx4_unregister_mac(dev->dev, qp->alt.smac_port, qp->alt.smac);
qp->alt.smac = 0;
}
if (qp->pri.vid < 0x1000) {
mlx4_unregister_vlan(dev->dev, qp->pri.vlan_port, qp->pri.vid);
qp->pri.vid = 0xFFFF;
qp->pri.candidate_vid = 0xFFFF;
qp->pri.update_vid = 0;
}
if (qp->alt.vid < 0x1000) {
mlx4_unregister_vlan(dev->dev, qp->alt.vlan_port, qp->alt.vid);
qp->alt.vid = 0xFFFF;
qp->alt.candidate_vid = 0xFFFF;
qp->alt.update_vid = 0;
}
}
get_cqs(qp, &send_cq, &recv_cq);
......@@ -1057,6 +1082,8 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
qp = kzalloc(sizeof *qp, GFP_KERNEL);
if (!qp)
return ERR_PTR(-ENOMEM);
qp->pri.vid = 0xFFFF;
qp->alt.vid = 0xFFFF;
/* fall through */
case IB_QPT_UD:
{
......@@ -1188,12 +1215,13 @@ static void mlx4_set_sched(struct mlx4_qp_path *path, u8 port)
static int _mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
u64 smac, u16 vlan_tag, struct mlx4_qp_path *path,
u8 port)
struct mlx4_roce_smac_vlan_info *smac_info, u8 port)
{
int is_eth = rdma_port_get_link_layer(&dev->ib_dev, port) ==
IB_LINK_LAYER_ETHERNET;
int vidx;
int smac_index;
int err;
path->grh_mylmc = ah->src_path_bits & 0x7f;
......@@ -1223,61 +1251,103 @@ static int _mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
}
if (is_eth) {
path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE |
((port - 1) << 6) | ((ah->sl & 7) << 3);
if (!(ah->ah_flags & IB_AH_GRH))
return -1;
memcpy(path->dmac, ah->dmac, ETH_ALEN);
path->ackto = MLX4_IB_LINK_TYPE_ETH;
/* find the index into MAC table for IBoE */
if (!is_zero_ether_addr((const u8 *)&smac)) {
if (mlx4_find_cached_mac(dev->dev, port, smac,
&smac_index))
return -ENOENT;
} else {
smac_index = 0;
}
path->grh_mylmc &= 0x80 | smac_index;
path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE |
((port - 1) << 6) | ((ah->sl & 7) << 3);
path->feup |= MLX4_FEUP_FORCE_ETH_UP;
if (vlan_tag < 0x1000) {
if (mlx4_find_cached_vlan(dev->dev, port, vlan_tag, &vidx))
return -ENOENT;
path->vlan_index = vidx;
path->fl = 1 << 6;
if (smac_info->vid < 0x1000) {
/* both valid vlan ids */
if (smac_info->vid != vlan_tag) {
/* different VIDs. unreg old and reg new */
err = mlx4_register_vlan(dev->dev, port, vlan_tag, &vidx);
if (err)
return err;
smac_info->candidate_vid = vlan_tag;
smac_info->candidate_vlan_index = vidx;
smac_info->candidate_vlan_port = port;
smac_info->update_vid = 1;
path->vlan_index = vidx;
} else {
path->vlan_index = smac_info->vlan_index;
}
} else {
/* no current vlan tag in qp */
err = mlx4_register_vlan(dev->dev, port, vlan_tag, &vidx);
if (err)
return err;
smac_info->candidate_vid = vlan_tag;
smac_info->candidate_vlan_index = vidx;
smac_info->candidate_vlan_port = port;
smac_info->update_vid = 1;
path->vlan_index = vidx;
}
path->feup |= MLX4_FVL_FORCE_ETH_VLAN;
path->fl = 1 << 6;
} else {
/* have current vlan tag. unregister it at modify-qp success */
if (smac_info->vid < 0x1000) {
smac_info->candidate_vid = 0xFFFF;
smac_info->update_vid = 1;
}
}
} else
/* get smac_index for RoCE use.
* If no smac was yet assigned, register one.
* If one was already assigned, but the new mac differs,
* unregister the old one and register the new one.
*/
if (!smac_info->smac || smac_info->smac != smac) {
/* register candidate now, unreg if needed, after success */
smac_index = mlx4_register_mac(dev->dev, port, smac);
if (smac_index >= 0) {
smac_info->candidate_smac_index = smac_index;
smac_info->candidate_smac = smac;
smac_info->candidate_smac_port = port;
} else {
return -EINVAL;
}
} else {
smac_index = smac_info->smac_index;
}
memcpy(path->dmac, ah->dmac, 6);
path->ackto = MLX4_IB_LINK_TYPE_ETH;
/* put MAC table smac index for IBoE */
path->grh_mylmc = (u8) (smac_index) | 0x80;
} else {
path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE |
((port - 1) << 6) | ((ah->sl & 0xf) << 2);
}
return 0;
}
static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_qp_attr *qp,
enum ib_qp_attr_mask qp_attr_mask,
struct mlx4_ib_qp *mqp,
struct mlx4_qp_path *path, u8 port)
{
return _mlx4_set_path(dev, &qp->ah_attr,
mlx4_mac_to_u64((u8 *)qp->smac),
(qp_attr_mask & IB_QP_VID) ? qp->vlan_id : 0xffff,
path, port);
path, &mqp->pri, port);
}
static int mlx4_set_alt_path(struct mlx4_ib_dev *dev,
const struct ib_qp_attr *qp,
enum ib_qp_attr_mask qp_attr_mask,
struct mlx4_ib_qp *mqp,
struct mlx4_qp_path *path, u8 port)
{
return _mlx4_set_path(dev, &qp->alt_ah_attr,
mlx4_mac_to_u64((u8 *)qp->alt_smac),
(qp_attr_mask & IB_QP_ALT_VID) ?
qp->alt_vlan_id : 0xffff,
path, port);
path, &mqp->alt, port);
}
static void update_mcg_macs(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
......@@ -1292,6 +1362,37 @@ static void update_mcg_macs(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
}
}
static int handle_eth_ud_smac_index(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, u8 *smac,
struct mlx4_qp_context *context)
{
struct net_device *ndev;
u64 u64_mac;
int smac_index;
ndev = dev->iboe.netdevs[qp->port - 1];
if (ndev) {
smac = ndev->dev_addr;
u64_mac = mlx4_mac_to_u64(smac);
} else {
u64_mac = dev->dev->caps.def_mac[qp->port];
}
context->pri_path.sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | ((qp->port - 1) << 6);
if (!qp->pri.smac) {
smac_index = mlx4_register_mac(dev->dev, qp->port, u64_mac);
if (smac_index >= 0) {
qp->pri.candidate_smac_index = smac_index;
qp->pri.candidate_smac = u64_mac;
qp->pri.candidate_smac_port = qp->port;
context->pri_path.grh_mylmc = 0x80 | (u8) smac_index;
} else {
return -ENOENT;
}
}
return 0;
}
static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
const struct ib_qp_attr *attr, int attr_mask,
enum ib_qp_state cur_state, enum ib_qp_state new_state)
......@@ -1403,7 +1504,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
}
if (attr_mask & IB_QP_AV) {
if (mlx4_set_path(dev, attr, attr_mask, &context->pri_path,
if (mlx4_set_path(dev, attr, attr_mask, qp, &context->pri_path,
attr_mask & IB_QP_PORT ?
attr->port_num : qp->port))
goto out;
......@@ -1426,7 +1527,8 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
dev->dev->caps.pkey_table_len[attr->alt_port_num])
goto out;
if (mlx4_set_alt_path(dev, attr, attr_mask, &context->alt_path,
if (mlx4_set_alt_path(dev, attr, attr_mask, qp,
&context->alt_path,
attr->alt_port_num))
goto out;
......@@ -1532,6 +1634,20 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
context->pri_path.fl = 0x80;
context->pri_path.sched_queue |= MLX4_IB_DEFAULT_SCHED_QUEUE;
}
if (rdma_port_get_link_layer(&dev->ib_dev, qp->port) ==
IB_LINK_LAYER_ETHERNET) {
if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_TUN_GSI ||
qp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI)
context->pri_path.feup = 1 << 7; /* don't fsm */
/* handle smac_index */
if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_UD ||
qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI ||
qp->mlx4_ib_qp_type == MLX4_IB_QPT_TUN_GSI) {
err = handle_eth_ud_smac_index(dev, qp, (u8 *)attr->smac, context);
if (err)
return -EINVAL;
}
}
}
if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET)
......@@ -1619,28 +1735,113 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
* If we moved a kernel QP to RESET, clean up all old CQ
* entries and reinitialize the QP.
*/
if (new_state == IB_QPS_RESET && !ibqp->uobject) {
mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn,
ibqp->srq ? to_msrq(ibqp->srq): NULL);
if (send_cq != recv_cq)
mlx4_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
if (new_state == IB_QPS_RESET) {
if (!ibqp->uobject) {
mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn,
ibqp->srq ? to_msrq(ibqp->srq) : NULL);
if (send_cq != recv_cq)
mlx4_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
qp->rq.head = 0;
qp->rq.tail = 0;
qp->sq.head = 0;
qp->sq.tail = 0;
qp->sq_next_wqe = 0;
if (qp->rq.wqe_cnt)
*qp->db.db = 0;
qp->rq.head = 0;
qp->rq.tail = 0;
qp->sq.head = 0;
qp->sq.tail = 0;
qp->sq_next_wqe = 0;
if (qp->rq.wqe_cnt)
*qp->db.db = 0;
if (qp->flags & MLX4_IB_QP_NETIF)
mlx4_ib_steer_qp_reg(dev, qp, 0);
}
if (qp->pri.smac) {
mlx4_unregister_mac(dev->dev, qp->pri.smac_port, qp->pri.smac);
qp->pri.smac = 0;
}
if (qp->alt.smac) {
mlx4_unregister_mac(dev->dev, qp->alt.smac_port, qp->alt.smac);
qp->alt.smac = 0;
}
if (qp->pri.vid < 0x1000) {
mlx4_unregister_vlan(dev->dev, qp->pri.vlan_port, qp->pri.vid);
qp->pri.vid = 0xFFFF;
qp->pri.candidate_vid = 0xFFFF;
qp->pri.update_vid = 0;
}
if (qp->flags & MLX4_IB_QP_NETIF)
mlx4_ib_steer_qp_reg(dev, qp, 0);
if (qp->alt.vid < 0x1000) {
mlx4_unregister_vlan(dev->dev, qp->alt.vlan_port, qp->alt.vid);
qp->alt.vid = 0xFFFF;
qp->alt.candidate_vid = 0xFFFF;
qp->alt.update_vid = 0;
}
}
out:
if (err && steer_qp)
mlx4_ib_steer_qp_reg(dev, qp, 0);
kfree(context);
if (qp->pri.candidate_smac) {
if (err) {
mlx4_unregister_mac(dev->dev, qp->pri.candidate_smac_port, qp->pri.candidate_smac);
} else {
if (qp->pri.smac)
mlx4_unregister_mac(dev->dev, qp->pri.smac_port, qp->pri.smac);
qp->pri.smac = qp->pri.candidate_smac;
qp->pri.smac_index = qp->pri.candidate_smac_index;
qp->pri.smac_port = qp->pri.candidate_smac_port;
}
qp->pri.candidate_smac = 0;
qp->pri.candidate_smac_index = 0;
qp->pri.candidate_smac_port = 0;
}
if (qp->alt.candidate_smac) {
if (err) {
mlx4_unregister_mac(dev->dev, qp->alt.candidate_smac_port, qp->alt.candidate_smac);
} else {
if (qp->alt.smac)
mlx4_unregister_mac(dev->dev, qp->alt.smac_port, qp->alt.smac);
qp->alt.smac = qp->alt.candidate_smac;
qp->alt.smac_index = qp->alt.candidate_smac_index;
qp->alt.smac_port = qp->alt.candidate_smac_port;
}
qp->alt.candidate_smac = 0;
qp->alt.candidate_smac_index = 0;
qp->alt.candidate_smac_port = 0;
}
if (qp->pri.update_vid) {
if (err) {
if (qp->pri.candidate_vid < 0x1000)
mlx4_unregister_vlan(dev->dev, qp->pri.candidate_vlan_port,
qp->pri.candidate_vid);
} else {
if (qp->pri.vid < 0x1000)
mlx4_unregister_vlan(dev->dev, qp->pri.vlan_port,
qp->pri.vid);
qp->pri.vid = qp->pri.candidate_vid;
qp->pri.vlan_port = qp->pri.candidate_vlan_port;
qp->pri.vlan_index = qp->pri.candidate_vlan_index;
}
qp->pri.candidate_vid = 0xFFFF;
qp->pri.update_vid = 0;
}
if (qp->alt.update_vid) {
if (err) {
if (qp->alt.candidate_vid < 0x1000)
mlx4_unregister_vlan(dev->dev, qp->alt.candidate_vlan_port,
qp->alt.candidate_vid);
} else {
if (qp->alt.vid < 0x1000)
mlx4_unregister_vlan(dev->dev, qp->alt.vlan_port,
qp->alt.vid);
qp->alt.vid = qp->alt.candidate_vid;
qp->alt.vlan_port = qp->alt.candidate_vlan_port;
qp->alt.vlan_index = qp->alt.candidate_vlan_index;
}
qp->alt.candidate_vid = 0xFFFF;
qp->alt.update_vid = 0;
}
return err;
}
......@@ -1842,9 +2043,9 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
{
struct ib_device *ib_dev = sqp->qp.ibqp.device;
struct mlx4_wqe_mlx_seg *mlx = wqe;
struct mlx4_wqe_ctrl_seg *ctrl = wqe;
struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx;
struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah);
struct net_device *ndev;
union ib_gid sgid;
u16 pkey;
int send_size;
......@@ -1868,12 +2069,11 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
/* When multi-function is enabled, the ib_core gid
* indexes don't necessarily match the hw ones, so
* we must use our own cache */
sgid.global.subnet_prefix =
to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
subnet_prefix;
sgid.global.interface_id =
to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
guid_cache[ah->av.ib.gid_index];
err = mlx4_get_roce_gid_from_slave(to_mdev(ib_dev)->dev,
be32_to_cpu(ah->av.ib.port_pd) >> 24,
ah->av.ib.gid_index, &sgid.raw[0]);
if (err)
return err;
} else {
err = ib_get_cached_gid(ib_dev,
be32_to_cpu(ah->av.ib.port_pd) >> 24,
......@@ -1902,6 +2102,9 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
sqp->ud_header.grh.flow_label =
ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff);
sqp->ud_header.grh.hop_limit = ah->av.ib.hop_limit;
if (is_eth)
memcpy(sqp->ud_header.grh.source_gid.raw, sgid.raw, 16);
else {
if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
/* When multi-function is enabled, the ib_core gid
* indexes don't necessarily match the hw ones, so
......@@ -1917,6 +2120,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
be32_to_cpu(ah->av.ib.port_pd) >> 24,
ah->av.ib.gid_index,
&sqp->ud_header.grh.source_gid);
}
memcpy(sqp->ud_header.grh.destination_gid.raw,
ah->av.ib.dgid, 16);
}
......@@ -1949,16 +2153,23 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
if (is_eth) {
u8 *smac;
struct in6_addr in6;
u16 pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 29) << 13;
mlx->sched_prio = cpu_to_be16(pcp);
memcpy(sqp->ud_header.eth.dmac_h, ah->av.eth.mac, 6);
/* FIXME: cache smac value? */
ndev = to_mdev(sqp->qp.ibqp.device)->iboe.netdevs[sqp->qp.port - 1];
if (!ndev)
return -ENODEV;
smac = ndev->dev_addr;
memcpy(&ctrl->srcrb_flags16[0], ah->av.eth.mac, 2);
memcpy(&ctrl->imm, ah->av.eth.mac + 2, 4);
memcpy(&in6, sgid.raw, sizeof(in6));
if (!mlx4_is_mfunc(to_mdev(ib_dev)->dev))
smac = to_mdev(sqp->qp.ibqp.device)->
iboe.netdevs[sqp->qp.port - 1]->dev_addr;
else /* use the src mac of the tunnel */
smac = ah->av.eth.s_mac;
memcpy(sqp->ud_header.eth.smac_h, smac, 6);
if (!memcmp(sqp->ud_header.eth.smac_h, sqp->ud_header.eth.dmac_h, 6))
mlx->flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK);
......@@ -2190,6 +2401,8 @@ static void build_tunnel_header(struct ib_send_wr *wr, void *wqe, unsigned *mlx_
hdr.remote_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
hdr.pkey_index = cpu_to_be16(wr->wr.ud.pkey_index);
hdr.qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
memcpy(hdr.mac, ah->av.eth.mac, 6);
hdr.vlan = ah->av.eth.vlan;
spc = MLX4_INLINE_ALIGN -
((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1));
......
......@@ -2289,6 +2289,30 @@ int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos)
}
EXPORT_SYMBOL_GPL(mlx4_set_vf_vlan);
/* mlx4_get_slave_default_vlan -
* return true if VST ( default vlan)
* if VST, will return vlan & qos (if not NULL)
*/
bool mlx4_get_slave_default_vlan(struct mlx4_dev *dev, int port, int slave,
u16 *vlan, u8 *qos)
{
struct mlx4_vport_oper_state *vp_oper;
struct mlx4_priv *priv;
priv = mlx4_priv(dev);
vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port];
if (MLX4_VGT != vp_oper->state.default_vlan) {
if (vlan)
*vlan = vp_oper->state.default_vlan;
if (qos)
*qos = vp_oper->state.default_qos;
return true;
}
return false;
}
EXPORT_SYMBOL_GPL(mlx4_get_slave_default_vlan);
int mlx4_set_vf_spoofchk(struct mlx4_dev *dev, int port, int vf, bool setting)
{
struct mlx4_priv *priv = mlx4_priv(dev);
......
......@@ -934,7 +934,10 @@ int mlx4_QUERY_PORT_wrapper(struct mlx4_dev *dev, int slave,
MLX4_PUT(outbox->buf, port_type,
QUERY_PORT_SUPPORTED_TYPE_OFFSET);
short_field = 1; /* slave max gids */
if (dev->caps.port_type[vhcr->in_modifier] == MLX4_PORT_TYPE_ETH)
short_field = mlx4_get_slave_num_gids(dev, slave);
else
short_field = 1; /* slave max gids */
MLX4_PUT(outbox->buf, short_field,
QUERY_PORT_CUR_MAX_GID_OFFSET);
......
......@@ -1462,7 +1462,11 @@ static void mlx4_parav_master_pf_caps(struct mlx4_dev *dev)
int i;
for (i = 1; i <= dev->caps.num_ports; i++) {
dev->caps.gid_table_len[i] = 1;
if (dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH)
dev->caps.gid_table_len[i] =
mlx4_get_slave_num_gids(dev, 0);
else
dev->caps.gid_table_len[i] = 1;
dev->caps.pkey_table_len[i] =
dev->phys_caps.pkey_phys_table_len[i] - 1;
}
......
......@@ -788,6 +788,10 @@ enum {
MLX4_USE_RR = 1,
};
struct mlx4_roce_gid_entry {
u8 raw[16];
};
struct mlx4_priv {
struct mlx4_dev dev;
......@@ -834,6 +838,7 @@ struct mlx4_priv {
int fs_hash_mode;
u8 virt2phys_pkey[MLX4_MFUNC_MAX][MLX4_MAX_PORTS][MLX4_MAX_PORT_PKEYS];
__be64 slave_node_guids[MLX4_MFUNC_MAX];
struct mlx4_roce_gid_entry roce_gids[MLX4_MAX_PORTS][MLX4_ROCE_MAX_GIDS];
atomic_t opreq_count;
struct work_struct opreq_task;
......@@ -1282,4 +1287,7 @@ void mlx4_vf_immed_vlan_work_handler(struct work_struct *_work);
void mlx4_init_quotas(struct mlx4_dev *dev);
int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave);
int mlx4_get_base_gid_ix(struct mlx4_dev *dev, int slave);
#endif /* MLX4_H */
......@@ -505,6 +505,32 @@ int mlx4_get_port_ib_caps(struct mlx4_dev *dev, u8 port, __be32 *caps)
mlx4_free_cmd_mailbox(dev, outmailbox);
return err;
}
static struct mlx4_roce_gid_entry zgid_entry;
int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave)
{
if (slave == 0)
return MLX4_ROCE_PF_GIDS;
if (slave <= ((MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) % dev->num_vfs))
return ((MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) / dev->num_vfs) + 1;
return (MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) / dev->num_vfs;
}
int mlx4_get_base_gid_ix(struct mlx4_dev *dev, int slave)
{
int gids;
int vfs;
gids = MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS;
vfs = dev->num_vfs;
if (slave == 0)
return 0;
if (slave <= gids % vfs)
return MLX4_ROCE_PF_GIDS + ((gids / vfs) + 1) * (slave - 1);
return MLX4_ROCE_PF_GIDS + (gids % vfs) + ((gids / vfs) * (slave - 1));
}
static int mlx4_common_set_port(struct mlx4_dev *dev, int slave, u32 in_mod,
u8 op_mod, struct mlx4_cmd_mailbox *inbox)
......@@ -515,14 +541,18 @@ static int mlx4_common_set_port(struct mlx4_dev *dev, int slave, u32 in_mod,
struct mlx4_slave_state *slave_st = &master->slave_state[slave];
struct mlx4_set_port_rqp_calc_context *qpn_context;
struct mlx4_set_port_general_context *gen_context;
struct mlx4_roce_gid_entry *gid_entry_tbl, *gid_entry_mbox, *gid_entry_mb1;
int reset_qkey_viols;
int port;
int is_eth;
int num_gids;
int base;
u32 in_modifier;
u32 promisc;
u16 mtu, prev_mtu;
int err;
int i;
int i, j;
int offset;
__be32 agg_cap_mask;
__be32 slave_cap_mask;
__be32 new_cap_mask;
......@@ -535,7 +565,8 @@ static int mlx4_common_set_port(struct mlx4_dev *dev, int slave, u32 in_mod,
/* Slaves cannot perform SET_PORT operations except changing MTU */
if (is_eth) {
if (slave != dev->caps.function &&
in_modifier != MLX4_SET_PORT_GENERAL) {
in_modifier != MLX4_SET_PORT_GENERAL &&
in_modifier != MLX4_SET_PORT_GID_TABLE) {
mlx4_warn(dev, "denying SET_PORT for slave:%d\n",
slave);
return -EINVAL;
......@@ -581,6 +612,67 @@ static int mlx4_common_set_port(struct mlx4_dev *dev, int slave, u32 in_mod,
gen_context->mtu = cpu_to_be16(master->max_mtu[port]);
break;
case MLX4_SET_PORT_GID_TABLE:
/* change to MULTIPLE entries: number of guest's gids
* need a FOR-loop here over number of gids the guest has.
* 1. Check no duplicates in gids passed by slave
*/
num_gids = mlx4_get_slave_num_gids(dev, slave);
base = mlx4_get_base_gid_ix(dev, slave);
gid_entry_mbox = (struct mlx4_roce_gid_entry *)(inbox->buf);
for (i = 0; i < num_gids; gid_entry_mbox++, i++) {
if (!memcmp(gid_entry_mbox->raw, zgid_entry.raw,
sizeof(zgid_entry)))
continue;
gid_entry_mb1 = gid_entry_mbox + 1;
for (j = i + 1; j < num_gids; gid_entry_mb1++, j++) {
if (!memcmp(gid_entry_mb1->raw,
zgid_entry.raw, sizeof(zgid_entry)))
continue;
if (!memcmp(gid_entry_mb1->raw, gid_entry_mbox->raw,
sizeof(gid_entry_mbox->raw))) {
/* found duplicate */
return -EINVAL;
}
}
}
/* 2. Check that do not have duplicates in OTHER
* entries in the port GID table
*/
for (i = 0; i < MLX4_ROCE_MAX_GIDS; i++) {
if (i >= base && i < base + num_gids)
continue; /* don't compare to slave's current gids */
gid_entry_tbl = &priv->roce_gids[port - 1][i];
if (!memcmp(gid_entry_tbl->raw, zgid_entry.raw, sizeof(zgid_entry)))
continue;
gid_entry_mbox = (struct mlx4_roce_gid_entry *)(inbox->buf);
for (j = 0; j < num_gids; gid_entry_mbox++, j++) {
if (!memcmp(gid_entry_mbox->raw, zgid_entry.raw,
sizeof(zgid_entry)))
continue;
if (!memcmp(gid_entry_mbox->raw, gid_entry_tbl->raw,
sizeof(gid_entry_tbl->raw))) {
/* found duplicate */
mlx4_warn(dev, "requested gid entry for slave:%d "
"is a duplicate of gid at index %d\n",
slave, i);
return -EINVAL;
}
}
}
/* insert slave GIDs with memcpy, starting at slave's base index */
gid_entry_mbox = (struct mlx4_roce_gid_entry *)(inbox->buf);
for (i = 0, offset = base; i < num_gids; gid_entry_mbox++, offset++, i++)
memcpy(priv->roce_gids[port - 1][offset].raw, gid_entry_mbox->raw, 16);
/* Now, copy roce port gids table to current mailbox for passing to FW */
gid_entry_mbox = (struct mlx4_roce_gid_entry *)(inbox->buf);
for (i = 0; i < MLX4_ROCE_MAX_GIDS; gid_entry_mbox++, i++)
memcpy(gid_entry_mbox->raw, priv->roce_gids[port - 1][i].raw, 16);
break;
}
return mlx4_cmd(dev, inbox->dma, in_mod, op_mod,
MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
......@@ -927,3 +1019,51 @@ void mlx4_set_stats_bitmap(struct mlx4_dev *dev, u64 *stats_bitmap)
*stats_bitmap |= MLX4_STATS_ERROR_COUNTERS_MASK;
}
EXPORT_SYMBOL(mlx4_set_stats_bitmap);
int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid,
int *slave_id)
{
struct mlx4_priv *priv = mlx4_priv(dev);
int i, found_ix = -1;
int vf_gids = MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS;
if (!mlx4_is_mfunc(dev))
return -EINVAL;
for (i = 0; i < MLX4_ROCE_MAX_GIDS; i++) {
if (!memcmp(priv->roce_gids[port - 1][i].raw, gid, 16)) {
found_ix = i;
break;
}
}
if (found_ix >= 0) {
if (found_ix < MLX4_ROCE_PF_GIDS)
*slave_id = 0;
else if (found_ix < MLX4_ROCE_PF_GIDS + (vf_gids % dev->num_vfs) *
(vf_gids / dev->num_vfs + 1))
*slave_id = ((found_ix - MLX4_ROCE_PF_GIDS) /
(vf_gids / dev->num_vfs + 1)) + 1;
else
*slave_id =
((found_ix - MLX4_ROCE_PF_GIDS -
((vf_gids % dev->num_vfs) * ((vf_gids / dev->num_vfs + 1)))) /
(vf_gids / dev->num_vfs)) + vf_gids % dev->num_vfs + 1;
}
return (found_ix >= 0) ? 0 : -EINVAL;
}
EXPORT_SYMBOL(mlx4_get_slave_from_roce_gid);
int mlx4_get_roce_gid_from_slave(struct mlx4_dev *dev, int port, int slave_id,
u8 *gid)
{
struct mlx4_priv *priv = mlx4_priv(dev);
if (!mlx4_is_master(dev))
return -EINVAL;
memcpy(gid, priv->roce_gids[port - 1][slave_id].raw, 16);
return 0;
}
EXPORT_SYMBOL(mlx4_get_roce_gid_from_slave);
......@@ -52,6 +52,8 @@
struct mac_res {
struct list_head list;
u64 mac;
int ref_count;
u8 smac_index;
u8 port;
};
......@@ -219,6 +221,11 @@ struct res_fs_rule {
int qpn;
};
static int mlx4_is_eth(struct mlx4_dev *dev, int port)
{
return dev->caps.port_mask[port] == MLX4_PORT_TYPE_IB ? 0 : 1;
}
static void *res_tracker_lookup(struct rb_root *root, u64 res_id)
{
struct rb_node *node = root->rb_node;
......@@ -600,15 +607,34 @@ static void update_gid(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *inbox,
struct mlx4_qp_context *qp_ctx = inbox->buf + 8;
enum mlx4_qp_optpar optpar = be32_to_cpu(*(__be32 *) inbox->buf);
u32 ts = (be32_to_cpu(qp_ctx->flags) >> 16) & 0xff;
int port;
if (MLX4_QP_ST_UD == ts)
qp_ctx->pri_path.mgid_index = 0x80 | slave;
if (MLX4_QP_ST_RC == ts || MLX4_QP_ST_UC == ts) {
if (optpar & MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH)
qp_ctx->pri_path.mgid_index = slave & 0x7F;
if (optpar & MLX4_QP_OPTPAR_ALT_ADDR_PATH)
qp_ctx->alt_path.mgid_index = slave & 0x7F;
if (MLX4_QP_ST_UD == ts) {
port = (qp_ctx->pri_path.sched_queue >> 6 & 1) + 1;
if (mlx4_is_eth(dev, port))
qp_ctx->pri_path.mgid_index = mlx4_get_base_gid_ix(dev, slave) | 0x80;
else
qp_ctx->pri_path.mgid_index = slave | 0x80;
} else if (MLX4_QP_ST_RC == ts || MLX4_QP_ST_XRC == ts || MLX4_QP_ST_UC == ts) {
if (optpar & MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH) {
port = (qp_ctx->pri_path.sched_queue >> 6 & 1) + 1;
if (mlx4_is_eth(dev, port)) {
qp_ctx->pri_path.mgid_index += mlx4_get_base_gid_ix(dev, slave);
qp_ctx->pri_path.mgid_index &= 0x7f;
} else {
qp_ctx->pri_path.mgid_index = slave & 0x7F;
}
}
if (optpar & MLX4_QP_OPTPAR_ALT_ADDR_PATH) {
port = (qp_ctx->alt_path.sched_queue >> 6 & 1) + 1;
if (mlx4_is_eth(dev, port)) {
qp_ctx->alt_path.mgid_index += mlx4_get_base_gid_ix(dev, slave);
qp_ctx->alt_path.mgid_index &= 0x7f;
} else {
qp_ctx->alt_path.mgid_index = slave & 0x7F;
}
}
}
}
......@@ -619,7 +645,6 @@ static int update_vport_qp_param(struct mlx4_dev *dev,
struct mlx4_qp_context *qpc = inbox->buf + 8;
struct mlx4_vport_oper_state *vp_oper;
struct mlx4_priv *priv;
u32 qp_type;
int port;
port = (qpc->pri_path.sched_queue & 0x40) ? 2 : 1;
......@@ -627,12 +652,6 @@ static int update_vport_qp_param(struct mlx4_dev *dev,
vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port];
if (MLX4_VGT != vp_oper->state.default_vlan) {
qp_type = (be32_to_cpu(qpc->flags) >> 16) & 0xff;
if (MLX4_QP_ST_RC == qp_type ||
(MLX4_QP_ST_UD == qp_type &&
!mlx4_is_qp_reserved(dev, qpn)))
return -EINVAL;
/* the reserved QPs (special, proxy, tunnel)
* do not operate over vlans
*/
......@@ -1659,11 +1678,39 @@ static int srq_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd,
return err;
}
static int mac_add_to_slave(struct mlx4_dev *dev, int slave, u64 mac, int port)
static int mac_find_smac_ix_in_slave(struct mlx4_dev *dev, int slave, int port,
u8 smac_index, u64 *mac)
{
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
struct mac_res *res;
struct list_head *mac_list =
&tracker->slave_list[slave].res_list[RES_MAC];
struct mac_res *res, *tmp;
list_for_each_entry_safe(res, tmp, mac_list, list) {
if (res->smac_index == smac_index && res->port == (u8) port) {
*mac = res->mac;
return 0;
}
}
return -ENOENT;
}
static int mac_add_to_slave(struct mlx4_dev *dev, int slave, u64 mac, int port, u8 smac_index)
{
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
struct list_head *mac_list =
&tracker->slave_list[slave].res_list[RES_MAC];
struct mac_res *res, *tmp;
list_for_each_entry_safe(res, tmp, mac_list, list) {
if (res->mac == mac && res->port == (u8) port) {
/* mac found. update ref count */
++res->ref_count;
return 0;
}
}
if (mlx4_grant_resource(dev, slave, RES_MAC, 1, port))
return -EINVAL;
......@@ -1674,6 +1721,8 @@ static int mac_add_to_slave(struct mlx4_dev *dev, int slave, u64 mac, int port)
}
res->mac = mac;
res->port = (u8) port;
res->smac_index = smac_index;
res->ref_count = 1;
list_add_tail(&res->list,
&tracker->slave_list[slave].res_list[RES_MAC]);
return 0;
......@@ -1690,9 +1739,11 @@ static void mac_del_from_slave(struct mlx4_dev *dev, int slave, u64 mac,
list_for_each_entry_safe(res, tmp, mac_list, list) {
if (res->mac == mac && res->port == (u8) port) {
list_del(&res->list);
mlx4_release_resource(dev, slave, RES_MAC, 1, port);
kfree(res);
if (!--res->ref_count) {
list_del(&res->list);
mlx4_release_resource(dev, slave, RES_MAC, 1, port);
kfree(res);
}
break;
}
}
......@@ -1705,10 +1756,13 @@ static void rem_slave_macs(struct mlx4_dev *dev, int slave)
struct list_head *mac_list =
&tracker->slave_list[slave].res_list[RES_MAC];
struct mac_res *res, *tmp;
int i;
list_for_each_entry_safe(res, tmp, mac_list, list) {
list_del(&res->list);
__mlx4_unregister_mac(dev, res->port, res->mac);
/* dereference the mac the num times the slave referenced it */
for (i = 0; i < res->ref_count; i++)
__mlx4_unregister_mac(dev, res->port, res->mac);
mlx4_release_resource(dev, slave, RES_MAC, 1, res->port);
kfree(res);
}
......@@ -1720,6 +1774,7 @@ static int mac_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd,
int err = -EINVAL;
int port;
u64 mac;
u8 smac_index;
if (op != RES_OP_RESERVE_AND_MAP)
return err;
......@@ -1729,12 +1784,13 @@ static int mac_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd,
err = __mlx4_register_mac(dev, port, mac);
if (err >= 0) {
smac_index = err;
set_param_l(out_param, err);
err = 0;
}
if (!err) {
err = mac_add_to_slave(dev, slave, mac, port);
err = mac_add_to_slave(dev, slave, mac, port, smac_index);
if (err)
__mlx4_unregister_mac(dev, port, mac);
}
......@@ -2734,6 +2790,8 @@ static int verify_qp_parameters(struct mlx4_dev *dev,
u32 qp_type;
struct mlx4_qp_context *qp_ctx;
enum mlx4_qp_optpar optpar;
int port;
int num_gids;
qp_ctx = inbox->buf + 8;
qp_type = (be32_to_cpu(qp_ctx->flags) >> 16) & 0xff;
......@@ -2741,6 +2799,7 @@ static int verify_qp_parameters(struct mlx4_dev *dev,
switch (qp_type) {
case MLX4_QP_ST_RC:
case MLX4_QP_ST_XRC:
case MLX4_QP_ST_UC:
switch (transition) {
case QP_TRANS_INIT2RTR:
......@@ -2749,13 +2808,24 @@ static int verify_qp_parameters(struct mlx4_dev *dev,
case QP_TRANS_SQD2SQD:
case QP_TRANS_SQD2RTS:
if (slave != mlx4_master_func_num(dev))
/* slaves have only gid index 0 */
if (optpar & MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH)
if (qp_ctx->pri_path.mgid_index)
if (optpar & MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH) {
port = (qp_ctx->pri_path.sched_queue >> 6 & 1) + 1;
if (dev->caps.port_mask[port] != MLX4_PORT_TYPE_IB)
num_gids = mlx4_get_slave_num_gids(dev, slave);
else
num_gids = 1;
if (qp_ctx->pri_path.mgid_index >= num_gids)
return -EINVAL;
if (optpar & MLX4_QP_OPTPAR_ALT_ADDR_PATH)
if (qp_ctx->alt_path.mgid_index)
}
if (optpar & MLX4_QP_OPTPAR_ALT_ADDR_PATH) {
port = (qp_ctx->alt_path.sched_queue >> 6 & 1) + 1;
if (dev->caps.port_mask[port] != MLX4_PORT_TYPE_IB)
num_gids = mlx4_get_slave_num_gids(dev, slave);
else
num_gids = 1;
if (qp_ctx->alt_path.mgid_index >= num_gids)
return -EINVAL;
}
break;
default:
break;
......@@ -3268,6 +3338,25 @@ int mlx4_INIT2INIT_QP_wrapper(struct mlx4_dev *dev, int slave,
return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
}
static int roce_verify_mac(struct mlx4_dev *dev, int slave,
struct mlx4_qp_context *qpc,
struct mlx4_cmd_mailbox *inbox)
{
u64 mac;
int port;
u32 ts = (be32_to_cpu(qpc->flags) >> 16) & 0xff;
u8 sched = *(u8 *)(inbox->buf + 64);
u8 smac_ix;
port = (sched >> 6 & 1) + 1;
if (mlx4_is_eth(dev, port) && (ts != MLX4_QP_ST_MLX)) {
smac_ix = qpc->pri_path.grh_mylmc & 0x7f;
if (mac_find_smac_ix_in_slave(dev, slave, port, smac_ix, &mac))
return -ENOENT;
}
return 0;
}
int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
......@@ -3290,6 +3379,9 @@ int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave,
if (err)
return err;
if (roce_verify_mac(dev, slave, qpc, inbox))
return -EINVAL;
update_pkey_index(dev, slave, inbox);
update_gid(dev, inbox, (u8)slave);
adjust_proxy_tun_qkey(dev, vhcr, qpc);
......
......@@ -240,6 +240,13 @@ int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos);
int mlx4_set_vf_spoofchk(struct mlx4_dev *dev, int port, int vf, bool setting);
int mlx4_get_vf_config(struct mlx4_dev *dev, int port, int vf, struct ifla_vf_info *ivf);
int mlx4_set_vf_link_state(struct mlx4_dev *dev, int port, int vf, int link_state);
/*
* mlx4_get_slave_default_vlan -
* return true if VST ( default vlan)
* if VST, will return vlan & qos (if not NULL)
*/
bool mlx4_get_slave_default_vlan(struct mlx4_dev *dev, int port, int slave,
u16 *vlan, u8 *qos);
#define MLX4_COMM_GET_IF_REV(cmd_chan_ver) (u8)((cmd_chan_ver) >> 8)
......
......@@ -48,6 +48,9 @@
#define MSIX_LEGACY_SZ 4
#define MIN_MSIX_P_PORT 5
#define MLX4_ROCE_MAX_GIDS 128
#define MLX4_ROCE_PF_GIDS 16
enum {
MLX4_FLAG_MSI_X = 1 << 0,
MLX4_FLAG_OLD_PORT_CMDS = 1 << 1,
......@@ -629,7 +632,8 @@ struct mlx4_eth_av {
u8 hop_limit;
__be32 sl_tclass_flowlabel;
u8 dgid[16];
u32 reserved4[2];
u8 s_mac[6];
u8 reserved4[2];
__be16 vlan;
u8 mac[ETH_ALEN];
};
......@@ -1183,6 +1187,11 @@ int set_and_calc_slave_port_state(struct mlx4_dev *dev, int slave, u8 port, int
void mlx4_put_slave_node_guid(struct mlx4_dev *dev, int slave, __be64 guid);
__be64 mlx4_get_slave_node_guid(struct mlx4_dev *dev, int slave);
int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid,
int *slave_id);
int mlx4_get_roce_gid_from_slave(struct mlx4_dev *dev, int port, int slave_id,
u8 *gid);
int mlx4_FLOW_STEERING_IB_UC_QP_RANGE(struct mlx4_dev *dev, u32 min_range_qpn,
u32 max_range_qpn);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment