Commit 32043830 authored by Doug Ledford's avatar Doug Ledford

Merge branches '32bit_lid' and 'irq_affinity' into k.o/merge-test

Conflicts:
	drivers/infiniband/hw/mlx5/main.c - Both add new code
	include/rdma/ib_verbs.h - Both add new code
Signed-off-by: default avatarDoug Ledford <dledford@redhat.com>
......@@ -206,4 +206,9 @@ config BLK_MQ_VIRTIO
depends on BLOCK && VIRTIO
default y
config BLK_MQ_RDMA
bool
depends on BLOCK && INFINIBAND
default y
source block/Kconfig.iosched
......@@ -29,6 +29,7 @@ obj-$(CONFIG_BLK_CMDLINE_PARSER) += cmdline-parser.o
obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o t10-pi.o
obj-$(CONFIG_BLK_MQ_PCI) += blk-mq-pci.o
obj-$(CONFIG_BLK_MQ_VIRTIO) += blk-mq-virtio.o
obj-$(CONFIG_BLK_MQ_RDMA) += blk-mq-rdma.o
obj-$(CONFIG_BLK_DEV_ZONED) += blk-zoned.o
obj-$(CONFIG_BLK_WBT) += blk-wbt.o
obj-$(CONFIG_BLK_DEBUG_FS) += blk-mq-debugfs.o
......
/*
* Copyright (c) 2017 Sagi Grimberg.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#include <linux/blk-mq.h>
#include <linux/blk-mq-rdma.h>
#include <rdma/ib_verbs.h>
/**
* blk_mq_rdma_map_queues - provide a default queue mapping for rdma device
* @set: tagset to provide the mapping for
* @dev: rdma device associated with @set.
* @first_vec: first interrupt vectors to use for queues (usually 0)
*
* This function assumes the rdma device @dev has at least as many available
* interrupt vetors as @set has queues. It will then query it's affinity mask
* and built queue mapping that maps a queue to the CPUs that have irq affinity
* for the corresponding vector.
*
* In case either the driver passed a @dev with less vectors than
* @set->nr_hw_queues, or @dev does not provide an affinity mask for a
* vector, we fallback to the naive mapping.
*/
int blk_mq_rdma_map_queues(struct blk_mq_tag_set *set,
struct ib_device *dev, int first_vec)
{
const struct cpumask *mask;
unsigned int queue, cpu;
for (queue = 0; queue < set->nr_hw_queues; queue++) {
mask = ib_get_vector_affinity(dev, first_vec + queue);
if (!mask)
goto fallback;
for_each_cpu(cpu, mask)
set->mq_map[cpu] = queue;
}
return 0;
fallback:
return blk_mq_map_queues(set);
}
EXPORT_SYMBOL_GPL(blk_mq_rdma_map_queues);
This diff is collapsed.
......@@ -38,6 +38,7 @@
#include <linux/cgroup_rdma.h>
#include <rdma/ib_verbs.h>
#include <rdma/opa_addr.h>
#include <rdma/ib_mad.h>
#include "mad_priv.h"
......
......@@ -64,7 +64,7 @@ struct mad_rmpp_recv {
__be64 tid;
u32 src_qp;
u16 slid;
u32 slid;
u8 mgmt_class;
u8 class_version;
u8 method;
......
......@@ -618,7 +618,7 @@ static ssize_t ib_ucm_init_qp_attr(struct ib_ucm_file *file,
if (result)
goto out;
ib_copy_qp_attr_to_user(&resp, &qp_attr);
ib_copy_qp_attr_to_user(ctx->cm_id->device, &resp, &qp_attr);
if (copy_to_user((void __user *)(unsigned long)cmd.response,
&resp, sizeof(resp)))
......
......@@ -248,14 +248,15 @@ static void ucma_copy_conn_event(struct rdma_ucm_conn_param *dst,
dst->qp_num = src->qp_num;
}
static void ucma_copy_ud_event(struct rdma_ucm_ud_param *dst,
static void ucma_copy_ud_event(struct ib_device *device,
struct rdma_ucm_ud_param *dst,
struct rdma_ud_param *src)
{
if (src->private_data_len)
memcpy(dst->private_data, src->private_data,
src->private_data_len);
dst->private_data_len = src->private_data_len;
ib_copy_ah_attr_to_user(&dst->ah_attr, &src->ah_attr);
ib_copy_ah_attr_to_user(device, &dst->ah_attr, &src->ah_attr);
dst->qp_num = src->qp_num;
dst->qkey = src->qkey;
}
......@@ -335,7 +336,8 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id,
uevent->resp.event = event->event;
uevent->resp.status = event->status;
if (cm_id->qp_type == IB_QPT_UD)
ucma_copy_ud_event(&uevent->resp.param.ud, &event->param.ud);
ucma_copy_ud_event(cm_id->device, &uevent->resp.param.ud,
&event->param.ud);
else
ucma_copy_conn_event(&uevent->resp.param.conn,
&event->param.conn);
......@@ -1157,7 +1159,7 @@ static ssize_t ucma_init_qp_attr(struct ucma_file *file,
if (ret)
goto out;
ib_copy_qp_attr_to_user(&resp, &qp_attr);
ib_copy_qp_attr_to_user(ctx->cm_id->device, &resp, &qp_attr);
if (copy_to_user((void __user *)(unsigned long)cmd.response,
&resp, sizeof(resp)))
ret = -EFAULT;
......
......@@ -229,7 +229,7 @@ static void recv_handler(struct ib_mad_agent *agent,
packet->mad.hdr.status = 0;
packet->mad.hdr.length = hdr_size(file) + mad_recv_wc->mad_len;
packet->mad.hdr.qpn = cpu_to_be32(mad_recv_wc->wc->src_qp);
packet->mad.hdr.lid = cpu_to_be16(mad_recv_wc->wc->slid);
packet->mad.hdr.lid = ib_slid_be16(mad_recv_wc->wc->slid);
packet->mad.hdr.sl = mad_recv_wc->wc->sl;
packet->mad.hdr.path_bits = mad_recv_wc->wc->dlid_path_bits;
packet->mad.hdr.pkey_index = mad_recv_wc->wc->pkey_index;
......
......@@ -275,8 +275,13 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
resp.bad_pkey_cntr = attr.bad_pkey_cntr;
resp.qkey_viol_cntr = attr.qkey_viol_cntr;
resp.pkey_tbl_len = attr.pkey_tbl_len;
resp.lid = attr.lid;
resp.sm_lid = attr.sm_lid;
if (rdma_cap_opa_ah(ib_dev, cmd.port_num)) {
resp.lid = OPA_TO_IB_UCAST_LID(attr.lid);
resp.sm_lid = OPA_TO_IB_UCAST_LID(attr.sm_lid);
} else {
resp.lid = (u16)attr.lid;
resp.sm_lid = (u16)attr.sm_lid;
}
resp.lmc = attr.lmc;
resp.max_vl_num = attr.max_vl_num;
resp.sm_sl = attr.sm_sl;
......@@ -1185,7 +1190,8 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
return ret ? ret : in_len;
}
static int copy_wc_to_user(void __user *dest, struct ib_wc *wc)
static int copy_wc_to_user(struct ib_device *ib_dev, void __user *dest,
struct ib_wc *wc)
{
struct ib_uverbs_wc tmp;
......@@ -1199,7 +1205,10 @@ static int copy_wc_to_user(void __user *dest, struct ib_wc *wc)
tmp.src_qp = wc->src_qp;
tmp.wc_flags = wc->wc_flags;
tmp.pkey_index = wc->pkey_index;
tmp.slid = wc->slid;
if (rdma_cap_opa_ah(ib_dev, wc->port_num))
tmp.slid = OPA_TO_IB_UCAST_LID(wc->slid);
else
tmp.slid = ib_slid_cpu16(wc->slid);
tmp.sl = wc->sl;
tmp.dlid_path_bits = wc->dlid_path_bits;
tmp.port_num = wc->port_num;
......@@ -1243,7 +1252,7 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
if (!ret)
break;
ret = copy_wc_to_user(data_ptr, &wc);
ret = copy_wc_to_user(ib_dev, data_ptr, &wc);
if (ret)
goto out_put;
......
......@@ -33,10 +33,47 @@
#include <linux/export.h>
#include <rdma/ib_marshall.h>
void ib_copy_ah_attr_to_user(struct ib_uverbs_ah_attr *dst,
struct rdma_ah_attr *src)
#define OPA_DEFAULT_GID_PREFIX cpu_to_be64(0xfe80000000000000ULL)
static int rdma_ah_conv_opa_to_ib(struct ib_device *dev,
struct rdma_ah_attr *ib,
struct rdma_ah_attr *opa)
{
struct ib_port_attr port_attr;
int ret = 0;
/* Do structure copy and the over-write fields */
*ib = *opa;
ib->type = RDMA_AH_ATTR_TYPE_IB;
rdma_ah_set_grh(ib, NULL, 0, 0, 1, 0);
if (ib_query_port(dev, opa->port_num, &port_attr)) {
/* Set to default subnet to indicate error */
rdma_ah_set_subnet_prefix(ib, OPA_DEFAULT_GID_PREFIX);
ret = -EINVAL;
} else {
rdma_ah_set_subnet_prefix(ib,
cpu_to_be64(port_attr.subnet_prefix));
}
rdma_ah_set_interface_id(ib, OPA_MAKE_ID(rdma_ah_get_dlid(opa)));
return ret;
}
void ib_copy_ah_attr_to_user(struct ib_device *device,
struct ib_uverbs_ah_attr *dst,
struct rdma_ah_attr *ah_attr)
{
struct rdma_ah_attr *src = ah_attr;
struct rdma_ah_attr conv_ah;
memset(&dst->grh.reserved, 0, sizeof(dst->grh.reserved));
if ((ah_attr->type == RDMA_AH_ATTR_TYPE_OPA) &&
(rdma_ah_get_dlid(ah_attr) >=
be16_to_cpu(IB_MULTICAST_LID_BASE)) &&
(!rdma_ah_conv_opa_to_ib(device, &conv_ah, ah_attr)))
src = &conv_ah;
dst->dlid = rdma_ah_get_dlid(src);
dst->sl = rdma_ah_get_sl(src);
dst->src_path_bits = rdma_ah_get_path_bits(src);
......@@ -57,7 +94,8 @@ void ib_copy_ah_attr_to_user(struct ib_uverbs_ah_attr *dst,
}
EXPORT_SYMBOL(ib_copy_ah_attr_to_user);
void ib_copy_qp_attr_to_user(struct ib_uverbs_qp_attr *dst,
void ib_copy_qp_attr_to_user(struct ib_device *device,
struct ib_uverbs_qp_attr *dst,
struct ib_qp_attr *src)
{
dst->qp_state = src->qp_state;
......@@ -76,8 +114,8 @@ void ib_copy_qp_attr_to_user(struct ib_uverbs_qp_attr *dst,
dst->max_recv_sge = src->cap.max_recv_sge;
dst->max_inline_data = src->cap.max_inline_data;
ib_copy_ah_attr_to_user(&dst->ah_attr, &src->ah_attr);
ib_copy_ah_attr_to_user(&dst->alt_ah_attr, &src->alt_ah_attr);
ib_copy_ah_attr_to_user(device, &dst->ah_attr, &src->ah_attr);
ib_copy_ah_attr_to_user(device, &dst->alt_ah_attr, &src->alt_ah_attr);
dst->pkey_index = src->pkey_index;
dst->alt_pkey_index = src->alt_pkey_index;
......
......@@ -4216,7 +4216,7 @@ static int opa_local_smp_check(struct hfi1_ibport *ibp,
const struct ib_wc *in_wc)
{
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
u16 slid = in_wc->slid;
u16 slid = ib_slid_cpu16(in_wc->slid);
u16 pkey;
if (in_wc->pkey_index >= ARRAY_SIZE(ppd->pkeys))
......
......@@ -528,7 +528,7 @@ static int set_guid_rec(struct ib_device *ibdev,
memset(&guid_info_rec, 0, sizeof (struct ib_sa_guidinfo_rec));
guid_info_rec.lid = cpu_to_be16(attr.lid);
guid_info_rec.lid = cpu_to_be16((u16)attr.lid);
guid_info_rec.block_num = index;
memcpy(guid_info_rec.guid_info_list, rec_det->all_recs,
......
......@@ -169,7 +169,7 @@ int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags,
op_modifier |= 0x4;
in_modifier |= in_wc->slid << 16;
in_modifier |= ib_slid_cpu16(in_wc->slid) << 16;
}
err = mlx4_cmd_box(dev->dev, inmailbox->dma, outmailbox->dma, in_modifier,
......@@ -625,7 +625,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
memcpy((char *)&tun_mad->hdr.slid_mac_47_32, &(wc->smac[4]), 2);
} else {
tun_mad->hdr.sl_vid = cpu_to_be16(((u16)(wc->sl)) << 12);
tun_mad->hdr.slid_mac_47_32 = cpu_to_be16(wc->slid);
tun_mad->hdr.slid_mac_47_32 = ib_slid_be16(wc->slid);
}
ib_dma_sync_single_for_device(&dev->ib_dev,
......@@ -826,7 +826,7 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
}
}
slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE);
slid = in_wc ? ib_slid_cpu16(in_wc->slid) : be16_to_cpu(IB_LID_PERMISSIVE);
if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0) {
forward_trap(to_mdev(ibdev), port_num, in_mad);
......@@ -860,7 +860,7 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
in_mad->mad_hdr.method == IB_MGMT_METHOD_SET &&
in_mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO &&
!ib_query_port(ibdev, port_num, &pattr))
prev_lid = pattr.lid;
prev_lid = (u16)pattr.lid;
err = mlx4_MAD_IFC(to_mdev(ibdev),
(mad_flags & IB_MAD_IGNORE_MKEY ? MLX4_MAD_IFC_IGNORE_MKEY : 0) |
......
......@@ -78,7 +78,7 @@ static int process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
u16 slid;
int err;
slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE);
slid = in_wc ? ib_slid_cpu16(in_wc->slid) : be16_to_cpu(IB_LID_PERMISSIVE);
if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0)
return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
......
......@@ -3816,6 +3816,14 @@ static void init_delay_drop(struct mlx5_ib_dev *dev)
mlx5_ib_warn(dev, "Failed to init delay drop debugfs\n");
}
const struct cpumask *mlx5_ib_get_vector_affinity(struct ib_device *ibdev,
int comp_vector)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
return mlx5_get_vector_affinity(dev->mdev, comp_vector);
}
static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
{
struct mlx5_ib_dev *dev;
......@@ -3946,6 +3954,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status;
dev->ib_dev.get_port_immutable = mlx5_port_immutable;
dev->ib_dev.get_dev_fw_str = get_dev_fw_str;
dev->ib_dev.get_vector_affinity = mlx5_ib_get_vector_affinity;
if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads))
dev->ib_dev.alloc_rdma_netdev = mlx5_ib_alloc_rdma_netdev;
......
......@@ -1921,7 +1921,7 @@ int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey,
(in_wc->wc_flags & IB_WC_GRH ? 0x80 : 0);
MTHCA_PUT(inbox, val, MAD_IFC_G_PATH_OFFSET);
MTHCA_PUT(inbox, in_wc->slid, MAD_IFC_RLID_OFFSET);
MTHCA_PUT(inbox, ib_slid_cpu16(in_wc->slid), MAD_IFC_RLID_OFFSET);
MTHCA_PUT(inbox, in_wc->pkey_index, MAD_IFC_PKEY_OFFSET);
if (in_grh)
......@@ -1929,7 +1929,7 @@ int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey,
op_modifier |= 0x4;
in_modifier |= in_wc->slid << 16;
in_modifier |= ib_slid_cpu16(in_wc->slid) << 16;
}
err = mthca_cmd_box(dev, inmailbox->dma, outmailbox->dma,
......
......@@ -205,7 +205,7 @@ int mthca_process_mad(struct ib_device *ibdev,
u16 *out_mad_pkey_index)
{
int err;
u16 slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE);
u16 slid = in_wc ? ib_slid_cpu16(in_wc->slid) : be16_to_cpu(IB_LID_PERMISSIVE);
u16 prev_lid = 0;
struct ib_port_attr pattr;
const struct ib_mad *in_mad = (const struct ib_mad *)in;
......@@ -256,7 +256,7 @@ int mthca_process_mad(struct ib_device *ibdev,
in_mad->mad_hdr.method == IB_MGMT_METHOD_SET &&
in_mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO &&
!ib_query_port(ibdev, port_num, &pattr))
prev_lid = pattr.lid;
prev_lid = (u16)pattr.lid;
err = mthca_MAD_IFC(to_mdev(ibdev),
mad_flags & IB_MAD_IGNORE_MKEY,
......
......@@ -107,7 +107,7 @@ void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited)
wc->uqueue[head].src_qp = entry->src_qp;
wc->uqueue[head].wc_flags = entry->wc_flags;
wc->uqueue[head].pkey_index = entry->pkey_index;
wc->uqueue[head].slid = entry->slid;
wc->uqueue[head].slid = ib_slid_cpu16(entry->slid);
wc->uqueue[head].sl = entry->sl;
wc->uqueue[head].dlid_path_bits = entry->dlid_path_bits;
wc->uqueue[head].port_num = entry->port_num;
......
......@@ -366,7 +366,7 @@ struct ipoib_dev_priv {
u32 qkey;
union ib_gid local_gid;
u16 local_lid;
u32 local_lid;
unsigned int admin_mtu;
unsigned int mcast_mtu;
......
......@@ -328,8 +328,8 @@ struct srpt_port {
u8 port_guid[24];
u8 port_gid[64];
u8 port;
u16 sm_lid;
u16 lid;
u32 sm_lid;
u32 lid;
union ib_gid gid;
struct work_struct work;
struct se_portal_group port_guid_tpg;
......
......@@ -587,7 +587,6 @@ struct mlx5e_channel {
struct mlx5_core_dev *mdev;
struct mlx5e_tstamp *tstamp;
int ix;
int cpu;
};
struct mlx5e_channels {
......
......@@ -71,6 +71,11 @@ struct mlx5e_channel_param {
struct mlx5e_cq_param icosq_cq;
};
static int mlx5e_get_node(struct mlx5e_priv *priv, int ix)
{
return pci_irq_get_node(priv->mdev->pdev, MLX5_EQ_VEC_COMP_BASE + ix);
}
static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
{
return MLX5_CAP_GEN(mdev, striding_rq) &&
......@@ -397,7 +402,7 @@ static void mlx5e_enable_async_events(struct mlx5e_priv *priv)
static void mlx5e_disable_async_events(struct mlx5e_priv *priv)
{
clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state);
synchronize_irq(mlx5_get_msix_vec(priv->mdev, MLX5_EQ_VEC_ASYNC));
synchronize_irq(pci_irq_vector(priv->mdev->pdev, MLX5_EQ_VEC_ASYNC));
}
static inline int mlx5e_get_wqe_mtt_sz(void)
......@@ -444,16 +449,17 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq,
int wq_sz = mlx5_wq_ll_get_size(&rq->wq);
int mtt_sz = mlx5e_get_wqe_mtt_sz();
int mtt_alloc = mtt_sz + MLX5_UMR_ALIGN - 1;
int node = mlx5e_get_node(c->priv, c->ix);
int i;
rq->mpwqe.info = kzalloc_node(wq_sz * sizeof(*rq->mpwqe.info),
GFP_KERNEL, cpu_to_node(c->cpu));
GFP_KERNEL, node);
if (!rq->mpwqe.info)
goto err_out;
/* We allocate more than mtt_sz as we will align the pointer */
rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz, GFP_KERNEL,
cpu_to_node(c->cpu));
rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz,
GFP_KERNEL, node);
if (unlikely(!rq->mpwqe.mtt_no_align))
goto err_free_wqe_info;
......@@ -561,7 +567,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
int err;
int i;
rqp->wq.db_numa_node = cpu_to_node(c->cpu);
rqp->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->wq,
&rq->wq_ctrl);
......@@ -628,7 +634,8 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
default: /* MLX5_WQ_TYPE_LINKED_LIST */
rq->wqe.frag_info =
kzalloc_node(wq_sz * sizeof(*rq->wqe.frag_info),
GFP_KERNEL, cpu_to_node(c->cpu));
GFP_KERNEL,
mlx5e_get_node(c->priv, c->ix));
if (!rq->wqe.frag_info) {
err = -ENOMEM;
goto err_rq_wq_destroy;
......@@ -993,13 +1000,13 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c,
sq->uar_map = mdev->mlx5e_res.bfreg.map;
sq->min_inline_mode = params->tx_min_inline_mode;
param->wq.db_numa_node = cpu_to_node(c->cpu);
param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
if (err)
return err;
sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
err = mlx5e_alloc_xdpsq_db(sq, cpu_to_node(c->cpu));
err = mlx5e_alloc_xdpsq_db(sq, mlx5e_get_node(c->priv, c->ix));
if (err)
goto err_sq_wq_destroy;
......@@ -1047,13 +1054,13 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c,
sq->channel = c;
sq->uar_map = mdev->mlx5e_res.bfreg.map;
param->wq.db_numa_node = cpu_to_node(c->cpu);
param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
if (err)
return err;
sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
err = mlx5e_alloc_icosq_db(sq, cpu_to_node(c->cpu));
err = mlx5e_alloc_icosq_db(sq, mlx5e_get_node(c->priv, c->ix));
if (err)
goto err_sq_wq_destroy;
......@@ -1119,13 +1126,13 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
if (MLX5_IPSEC_DEV(c->priv->mdev))
set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
param->wq.db_numa_node = cpu_to_node(c->cpu);
param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
if (err)
return err;
sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
err = mlx5e_alloc_txqsq_db(sq, cpu_to_node(c->cpu));
err = mlx5e_alloc_txqsq_db(sq, mlx5e_get_node(c->priv, c->ix));
if (err)
goto err_sq_wq_destroy;
......@@ -1497,8 +1504,8 @@ static int mlx5e_alloc_cq(struct mlx5e_channel *c,
struct mlx5_core_dev *mdev = c->priv->mdev;
int err;
param->wq.buf_numa_node = cpu_to_node(c->cpu);
param->wq.db_numa_node = cpu_to_node(c->cpu);
param->wq.buf_numa_node = mlx5e_get_node(c->priv, c->ix);
param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
param->eq_ix = c->ix;
err = mlx5e_alloc_cq_common(mdev, param, cq);
......@@ -1597,11 +1604,6 @@ static void mlx5e_close_cq(struct mlx5e_cq *cq)
mlx5e_free_cq(cq);
}
static int mlx5e_get_cpu(struct mlx5e_priv *priv, int ix)
{
return cpumask_first(priv->mdev->priv.irq_info[ix].mask);
}
static int mlx5e_open_tx_cqs(struct mlx5e_channel *c,
struct mlx5e_params *params,
struct mlx5e_channel_param *cparam)
......@@ -1750,11 +1752,10 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
{
struct mlx5e_cq_moder icocq_moder = {0, 0};
struct net_device *netdev = priv->netdev;
int cpu = mlx5e_get_cpu(priv, ix);
struct mlx5e_channel *c;
int err;
c = kzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu));
c = kzalloc_node(sizeof(*c), GFP_KERNEL, mlx5e_get_node(priv, ix));
if (!c)
return -ENOMEM;
......@@ -1762,7 +1763,6 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
c->mdev = priv->mdev;
c->tstamp = &priv->tstamp;
c->ix = ix;
c->cpu = cpu;
c->pdev = &priv->mdev->pdev->dev;
c->netdev = priv->netdev;
c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key);
......@@ -1848,7 +1848,8 @@ static void mlx5e_activate_channel(struct mlx5e_channel *c)
for (tc = 0; tc < c->num_tc; tc++)
mlx5e_activate_txqsq(&c->sq[tc]);
mlx5e_activate_rq(&c->rq);
netif_set_xps_queue(c->netdev, get_cpu_mask(c->cpu), c->ix);
netif_set_xps_queue(c->netdev,
mlx5_get_vector_affinity(c->priv->mdev, c->ix), c->ix);
}
static void mlx5e_deactivate_channel(struct mlx5e_channel *c)
......@@ -3793,18 +3794,8 @@ void mlx5e_build_default_indir_rqt(struct mlx5_core_dev *mdev,
u32 *indirection_rqt, int len,
int num_channels)
{
int node = mdev->priv.numa_node;
int node_num_of_cores;
int i;
if (node == -1)
node = first_online_node;
node_num_of_cores = cpumask_weight(cpumask_of_node(node));
if (node_num_of_cores)
num_channels = min_t(int, num_channels, node_num_of_cores);
for (i = 0; i < len; i++)
indirection_rqt[i] = i % num_channels;
}
......
......@@ -604,7 +604,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
name, pci_name(dev->pdev));
eq->eqn = MLX5_GET(create_eq_out, out, eq_number);
eq->irqn = priv->msix_arr[vecidx].vector;
eq->irqn = pci_irq_vector(dev->pdev, vecidx);
eq->dev = dev;
eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET;
err = request_irq(eq->irqn, handler, 0,
......@@ -639,7 +639,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
return 0;
err_irq:
free_irq(priv->msix_arr[vecidx].vector, eq);
free_irq(eq->irqn, eq);
err_eq:
mlx5_cmd_destroy_eq(dev, eq->eqn);
......@@ -680,11 +680,6 @@ int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
}
EXPORT_SYMBOL_GPL(mlx5_destroy_unmap_eq);
u32 mlx5_get_msix_vec(struct mlx5_core_dev *dev, int vecidx)
{
return dev->priv.msix_arr[MLX5_EQ_VEC_ASYNC].vector;
}
int mlx5_eq_init(struct mlx5_core_dev *dev)
{
int err;
......
......@@ -1585,7 +1585,7 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num)
/* Mark this vport as disabled to discard new events */
vport->enabled = false;
synchronize_irq(mlx5_get_msix_vec(esw->dev, MLX5_EQ_VEC_ASYNC));
synchronize_irq(pci_irq_vector(esw->dev->pdev, MLX5_EQ_VEC_ASYNC));
/* Wait for current already scheduled events to complete */
flush_workqueue(esw->work_queue);
/* Disable events from this vport */
......
......@@ -81,7 +81,7 @@ static void trigger_cmd_completions(struct mlx5_core_dev *dev)
u64 vector;
/* wait for pending handlers to complete */
synchronize_irq(dev->priv.msix_arr[MLX5_EQ_VEC_CMD].vector);
synchronize_irq(pci_irq_vector(dev->pdev, MLX5_EQ_VEC_CMD));
spin_lock_irqsave(&dev->cmd.alloc_lock, flags);
vector = ~dev->cmd.bitmask & ((1ul << (1 << dev->cmd.log_sz)) - 1);
if (!vector)
......
......@@ -313,13 +313,15 @@ static void release_bar(struct pci_dev *pdev)
pci_release_regions(pdev);
}
static int mlx5_enable_msix(struct mlx5_core_dev *dev)
static int mlx5_alloc_irq_vectors(struct mlx5_core_dev *dev)
{
struct mlx5_priv *priv = &dev->priv;
struct mlx5_eq_table *table = &priv->eq_table;
struct irq_affinity irqdesc = {
.pre_vectors = MLX5_EQ_VEC_COMP_BASE,
};
int num_eqs = 1 << MLX5_CAP_GEN(dev, log_max_eq);
int nvec;
int i;
nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() +
MLX5_EQ_VEC_COMP_BASE;
......@@ -327,17 +329,14 @@ static int mlx5_enable_msix(struct mlx5_core_dev *dev)
if (nvec <= MLX5_EQ_VEC_COMP_BASE)
return -ENOMEM;
priv->msix_arr = kcalloc(nvec, sizeof(*priv->msix_arr), GFP_KERNEL);
priv->irq_info = kcalloc(nvec, sizeof(*priv->irq_info), GFP_KERNEL);
if (!priv->msix_arr || !priv->irq_info)
if (!priv->irq_info)
goto err_free_msix;
for (i = 0; i < nvec; i++)
priv->msix_arr[i].entry = i;
nvec = pci_enable_msix_range(dev->pdev, priv->msix_arr,
MLX5_EQ_VEC_COMP_BASE + 1, nvec);
nvec = pci_alloc_irq_vectors_affinity(dev->pdev,
MLX5_EQ_VEC_COMP_BASE + 1, nvec,
PCI_IRQ_MSIX | PCI_IRQ_AFFINITY,
&irqdesc);
if (nvec < 0)
return nvec;
......@@ -347,17 +346,15 @@ static int mlx5_enable_msix(struct mlx5_core_dev *dev)
err_free_msix:
kfree(priv->irq_info);
kfree(priv->msix_arr);
return -ENOMEM;
}
static void mlx5_disable_msix(struct mlx5_core_dev *dev)
static void mlx5_free_irq_vectors(struct mlx5_core_dev *dev)
{
struct mlx5_priv *priv = &dev->priv;
pci_disable_msix(dev->pdev);
pci_free_irq_vectors(dev->pdev);
kfree(priv->irq_info);
kfree(priv->msix_arr);
}
struct mlx5_reg_host_endianness {
......@@ -625,65 +622,6 @@ u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev)
return (u64)timer_l | (u64)timer_h1 << 32;
}
static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i)
{
struct mlx5_priv *priv = &mdev->priv;
struct msix_entry *msix = priv->msix_arr;
int irq = msix[i + MLX5_EQ_VEC_COMP_BASE].vector;
if (!zalloc_cpumask_var(&priv->irq_info[i].mask, GFP_KERNEL)) {
mlx5_core_warn(mdev, "zalloc_cpumask_var failed");
return -ENOMEM;
}
cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node),
priv->irq_info[i].mask);
if (IS_ENABLED(CONFIG_SMP) &&
irq_set_affinity_hint(irq, priv->irq_info[i].mask))
mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq);
return 0;
}
static void mlx5_irq_clear_affinity_hint(struct mlx5_core_dev *mdev, int i)
{
struct mlx5_priv *priv = &mdev->priv;
struct msix_entry *msix = priv->msix_arr;
int irq = msix[i + MLX5_EQ_VEC_COMP_BASE].vector;
irq_set_affinity_hint(irq, NULL);
free_cpumask_var(priv->irq_info[i].mask);
}
static int mlx5_irq_set_affinity_hints(struct mlx5_core_dev *mdev)
{
int err;
int i;
for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) {
err = mlx5_irq_set_affinity_hint(mdev, i);
if (err)
goto err_out;
}
return 0;
err_out:
for (i--; i >= 0; i--)
mlx5_irq_clear_affinity_hint(mdev, i);
return err;
}
static void mlx5_irq_clear_affinity_hints(struct mlx5_core_dev *mdev)
{
int i;
for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++)
mlx5_irq_clear_affinity_hint(mdev, i);
}
int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
unsigned int *irqn)
{
......@@ -773,8 +711,8 @@ static int alloc_comp_eqs(struct mlx5_core_dev *dev)
}
#ifdef CONFIG_RFS_ACCEL
irq_cpu_rmap_add(dev->rmap,
dev->priv.msix_arr[i + MLX5_EQ_VEC_COMP_BASE].vector);
irq_cpu_rmap_add(dev->rmap, pci_irq_vector(dev->pdev,
MLX5_EQ_VEC_COMP_BASE + i));
#endif
snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i);
err = mlx5_create_map_eq(dev, eq,
......@@ -1132,9 +1070,9 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
goto err_stop_poll;
}
err = mlx5_enable_msix(dev);
err = mlx5_alloc_irq_vectors(dev);
if (err) {
dev_err(&pdev->dev, "enable msix failed\n");
dev_err(&pdev->dev, "alloc irq vectors failed\n");
goto err_cleanup_once;
}
......@@ -1156,12 +1094,6 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
goto err_stop_eqs;
}
err = mlx5_irq_set_affinity_hints(dev);
if (err) {
dev_err(&pdev->dev, "Failed to alloc affinity hint cpumask\n");
goto err_affinity_hints;
}
err = mlx5_init_fs(dev);
if (err) {
dev_err(&pdev->dev, "Failed to init flow steering\n");
......@@ -1227,9 +1159,6 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
mlx5_cleanup_fs(dev);
err_fs:
mlx5_irq_clear_affinity_hints(dev);
err_affinity_hints:
free_comp_eqs(dev);
err_stop_eqs:
......@@ -1239,7 +1168,7 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
mlx5_put_uars_page(dev, priv->uar);
err_disable_msix:
mlx5_disable_msix(dev);
mlx5_free_irq_vectors(dev);
err_cleanup_once:
if (boot)
......@@ -1302,11 +1231,10 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
mlx5_eswitch_detach(dev->priv.eswitch);
#endif
mlx5_cleanup_fs(dev);
mlx5_irq_clear_affinity_hints(dev);
free_comp_eqs(dev);
mlx5_stop_eqs(dev);
mlx5_put_uars_page(dev, priv->uar);
mlx5_disable_msix(dev);
mlx5_free_irq_vectors(dev);
if (cleanup)
mlx5_cleanup_once(dev);
mlx5_stop_health_poll(dev);
......
......@@ -110,7 +110,6 @@ int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
u32 element_id);
int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev);
u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev);
u32 mlx5_get_msix_vec(struct mlx5_core_dev *dev, int vecidx);
struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn);
void mlx5_cq_tasklet_cb(unsigned long data);
......
......@@ -19,6 +19,7 @@
#include <linux/string.h>
#include <linux/atomic.h>
#include <linux/blk-mq.h>
#include <linux/blk-mq-rdma.h>
#include <linux/types.h>
#include <linux/list.h>
#include <linux/mutex.h>
......@@ -463,14 +464,10 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
ibdev = queue->device->dev;
/*
* The admin queue is barely used once the controller is live, so don't
* bother to spread it out.
* Spread I/O queues completion vectors according their queue index.
* Admin queues can always go on completion vector 0.
*/
if (idx == 0)
comp_vector = 0;
else
comp_vector = idx % ibdev->num_comp_vectors;
comp_vector = idx == 0 ? idx : idx - 1;
/* +1 for ib_stop_cq */
queue->ib_cq = ib_alloc_cq(ibdev, queue,
......@@ -611,10 +608,20 @@ static int nvme_rdma_connect_io_queues(struct nvme_rdma_ctrl *ctrl)
static int nvme_rdma_init_io_queues(struct nvme_rdma_ctrl *ctrl)
{
struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
struct ib_device *ibdev = ctrl->device->dev;
unsigned int nr_io_queues;
int i, ret;
nr_io_queues = min(opts->nr_io_queues, num_online_cpus());
/*
* we map queues according to the device irq vectors for
* optimal locality so we don't need more queues than
* completion vectors.
*/
nr_io_queues = min_t(unsigned int, nr_io_queues,
ibdev->num_comp_vectors);
ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues);
if (ret)
return ret;
......@@ -1498,6 +1505,13 @@ static void nvme_rdma_complete_rq(struct request *rq)
nvme_complete_rq(rq);
}
static int nvme_rdma_map_queues(struct blk_mq_tag_set *set)
{
struct nvme_rdma_ctrl *ctrl = set->driver_data;
return blk_mq_rdma_map_queues(set, ctrl->device->dev, 0);
}
static const struct blk_mq_ops nvme_rdma_mq_ops = {
.queue_rq = nvme_rdma_queue_rq,
.complete = nvme_rdma_complete_rq,
......@@ -1507,6 +1521,7 @@ static const struct blk_mq_ops nvme_rdma_mq_ops = {
.init_hctx = nvme_rdma_init_hctx,
.poll = nvme_rdma_poll,
.timeout = nvme_rdma_timeout,
.map_queues = nvme_rdma_map_queues,
};
static const struct blk_mq_ops nvme_rdma_admin_mq_ops = {
......
#ifndef _LINUX_BLK_MQ_RDMA_H
#define _LINUX_BLK_MQ_RDMA_H
struct blk_mq_tag_set;
struct ib_device;
int blk_mq_rdma_map_queues(struct blk_mq_tag_set *set,
struct ib_device *dev, int first_vec);
#endif /* _LINUX_BLK_MQ_RDMA_H */
......@@ -545,7 +545,6 @@ struct mlx5_core_sriov {
};
struct mlx5_irq_info {
cpumask_var_t mask;
char name[MLX5_MAX_IRQ_NAME];
};
......@@ -608,7 +607,6 @@ struct mlx5_port_module_event_stats {
struct mlx5_priv {
char name[MLX5_MAX_NAME_LEN];
struct mlx5_eq_table eq_table;
struct msix_entry *msix_arr;
struct mlx5_irq_info *irq_info;
/* pages stuff */
......@@ -1189,4 +1187,10 @@ enum {
MLX5_TRIGGERED_CMD_COMP = (u64)1 << 32,
};
static inline const struct cpumask *
mlx5_get_vector_affinity(struct mlx5_core_dev *dev, int vector)
{
return pci_irq_get_affinity(dev->pdev, MLX5_EQ_VEC_COMP_BASE + vector);
}
#endif /* MLX5_DRIVER_H */
......@@ -38,10 +38,12 @@
#include <rdma/ib_user_verbs.h>
#include <rdma/ib_user_sa.h>
void ib_copy_qp_attr_to_user(struct ib_uverbs_qp_attr *dst,
void ib_copy_qp_attr_to_user(struct ib_device *device,
struct ib_uverbs_qp_attr *dst,
struct ib_qp_attr *src);
void ib_copy_ah_attr_to_user(struct ib_uverbs_ah_attr *dst,
void ib_copy_ah_attr_to_user(struct ib_device *device,
struct ib_uverbs_ah_attr *dst,
struct rdma_ah_attr *src);
void ib_copy_path_rec_to_user(struct ib_user_path_rec *dst,
......
......@@ -549,8 +549,8 @@ struct ib_port_attr {
u32 bad_pkey_cntr;
u32 qkey_viol_cntr;
u16 pkey_tbl_len;
u16 lid;
u16 sm_lid;
u32 sm_lid;
u32 lid;
u8 lmc;
u8 max_vl_num;
u8 sm_sl;
......@@ -951,7 +951,7 @@ struct ib_wc {
u32 src_qp;
int wc_flags;
u16 pkey_index;
u16 slid;
u32 slid;
u8 sl;
u8 dlid_path_bits;
u8 port_num; /* valid only for DR SMPs on switches */
......@@ -2306,6 +2306,8 @@ struct ib_device {
*/
int (*get_port_immutable)(struct ib_device *, u8, struct ib_port_immutable *);
void (*get_dev_fw_str)(struct ib_device *, char *str, size_t str_len);
const struct cpumask *(*get_vector_affinity)(struct ib_device *ibdev,
int comp_vector);
};
struct ib_client {
......@@ -3717,4 +3719,38 @@ static inline enum rdma_ah_attr_type rdma_ah_find_type(struct ib_device *dev,
else
return RDMA_AH_ATTR_TYPE_IB;
}
/* Return slid in 16bit CPU encoding */
static inline u16 ib_slid_cpu16(u32 slid)
{
return (u16)slid;
}
/* Return slid in 16bit BE encoding */
static inline u16 ib_slid_be16(u32 slid)
{
return cpu_to_be16((u16)slid);
}
/**
* ib_get_vector_affinity - Get the affinity mappings of a given completion
* vector
* @device: the rdma device
* @comp_vector: index of completion vector
*
* Returns NULL on failure, otherwise a corresponding cpu map of the
* completion vector (returns all-cpus map if the device driver doesn't
* implement get_vector_affinity).
*/
static inline const struct cpumask *
ib_get_vector_affinity(struct ib_device *device, int comp_vector)
{
if (comp_vector < 0 || comp_vector >= device->num_comp_vectors ||
!device->get_vector_affinity)
return NULL;
return device->get_vector_affinity(device, comp_vector);
}
#endif /* IB_VERBS_H */
......@@ -50,7 +50,8 @@
#define OPA_SPECIAL_OUI (0x00066AULL)
#define OPA_MAKE_ID(x) (cpu_to_be64(OPA_SPECIAL_OUI << 40 | (x)))
#define OPA_TO_IB_UCAST_LID(x) (((x) >= be16_to_cpu(IB_MULTICAST_LID_BASE)) \
? 0 : x)
/**
* ib_is_opa_gid: Returns true if the top 24 bits of the gid
* contains the OPA_STL_OUI identifier. This identifies that
......@@ -76,4 +77,22 @@ static inline u32 opa_get_lid_from_gid(union ib_gid *gid)
{
return be64_to_cpu(gid->global.interface_id) & 0xFFFFFFFF;
}
/**
* opa_is_extended_lid: Returns true if dlid or slid are
* extended.
*
* @dlid: The DLID
* @slid: The SLID
*/
static inline bool opa_is_extended_lid(u32 dlid, u32 slid)
{
if ((be32_to_cpu(dlid) >=
be16_to_cpu(IB_MULTICAST_LID_BASE)) ||
(be32_to_cpu(slid) >=
be16_to_cpu(IB_MULTICAST_LID_BASE)))
return true;
else
return false;
}
#endif /* OPA_ADDR_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment