Commit 0099baa8 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma

Pull rdma fixes from Jason Gunthorpe:
 "Many bug fixes in several drivers:

   - Fix misuse of the DMA API in rtrs

   - Several irdma issues: hung task due to SQ flushing, incorrect
     capability reporting to userspace, improper error handling for MW
     corners, touching an uninitialized SGL for during invalidation.

   - hns was using the wrong page size limits for the HW, an incorrect
     calculation of wqe_shift causing WQE corruption, and mis computed a
     timer id.

   - Fix a crash in SRP triggered by blktests

   - Fix compiler errors by calling virt_to_page() with the proper type
     in siw

   - Userspace triggerable deadlock in ODP

   - mlx5 could use the wrong profile due to some driver loading races,
     counters were not working in some device configurations, and a
     crash on error unwind"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma:
  RDMA/irdma: Report RNR NAK generation in device caps
  RDMA/irdma: Use s/g array in post send only when its valid
  RDMA/irdma: Return correct WC error for bind operation failure
  RDMA/irdma: Return error on MR deregister CQP failure
  RDMA/irdma: Report the correct max cqes from query device
  MAINTAINERS: Update maintainers of HiSilicon RoCE
  RDMA/mlx5: Fix UMR cleanup on error flow of driver init
  RDMA/mlx5: Set local port to one when accessing counters
  RDMA/mlx5: Rely on RoCE fw cap instead of devlink when setting profile
  IB/core: Fix a nested dead lock as part of ODP flow
  RDMA/siw: Pass a pointer to virt_to_page()
  RDMA/srp: Set scmnd->result only when scmnd is not NULL
  RDMA/hns: Remove the num_qpc_timer variable
  RDMA/hns: Fix wrong fixed value of qp->rq.wqe_shift
  RDMA/hns: Fix supported page size
  RDMA/cma: Fix arguments order in net device validation
  RDMA/irdma: Fix drain SQ hang with no completion
  RDMA/rtrs-srv: Pass the correct number of entries for dma mapped SGL
  RDMA/rtrs-clt: Use the right sg_cnt after ib_dma_map_sg
parents b7e00d6f a261786f
......@@ -9208,8 +9208,8 @@ F: Documentation/ABI/testing/debugfs-hisi-zip
F: drivers/crypto/hisilicon/zip/
HISILICON ROCE DRIVER
M: Haoyue Xu <xuhaoyue1@hisilicon.com>
M: Wenpeng Liang <liangwenpeng@huawei.com>
M: Weihang Li <liweihang@huawei.com>
L: linux-rdma@vger.kernel.org
S: Maintained
F: Documentation/devicetree/bindings/infiniband/hisilicon-hns-roce.txt
......
......@@ -1841,8 +1841,8 @@ cma_ib_id_from_event(struct ib_cm_id *cm_id,
}
if (!validate_net_dev(*net_dev,
(struct sockaddr *)&req->listen_addr_storage,
(struct sockaddr *)&req->src_addr_storage)) {
(struct sockaddr *)&req->src_addr_storage,
(struct sockaddr *)&req->listen_addr_storage)) {
id_priv = ERR_PTR(-EHOSTUNREACH);
goto err;
}
......
......@@ -462,7 +462,7 @@ int ib_umem_odp_map_dma_and_lock(struct ib_umem_odp *umem_odp, u64 user_virt,
mutex_unlock(&umem_odp->umem_mutex);
out_put_mm:
mmput(owning_mm);
mmput_async(owning_mm);
out_put_task:
if (owning_process)
put_task_struct(owning_process);
......
......@@ -730,7 +730,6 @@ struct hns_roce_caps {
u32 num_qps;
u32 num_pi_qps;
u32 reserved_qps;
int num_qpc_timer;
u32 num_srqs;
u32 max_wqes;
u32 max_srq_wrs;
......
......@@ -1977,7 +1977,7 @@ static void set_default_caps(struct hns_roce_dev *hr_dev)
caps->num_mtpts = HNS_ROCE_V2_MAX_MTPT_NUM;
caps->num_pds = HNS_ROCE_V2_MAX_PD_NUM;
caps->num_qpc_timer = HNS_ROCE_V2_MAX_QPC_TIMER_NUM;
caps->qpc_timer_bt_num = HNS_ROCE_V2_MAX_QPC_TIMER_BT_NUM;
caps->cqc_timer_bt_num = HNS_ROCE_V2_MAX_CQC_TIMER_BT_NUM;
caps->max_qp_init_rdma = HNS_ROCE_V2_MAX_QP_INIT_RDMA;
......@@ -2273,7 +2273,6 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev)
caps->max_rq_sg = le16_to_cpu(resp_a->max_rq_sg);
caps->max_rq_sg = roundup_pow_of_two(caps->max_rq_sg);
caps->max_extend_sg = le32_to_cpu(resp_a->max_extend_sg);
caps->num_qpc_timer = le16_to_cpu(resp_a->num_qpc_timer);
caps->max_srq_sges = le16_to_cpu(resp_a->max_srq_sges);
caps->max_srq_sges = roundup_pow_of_two(caps->max_srq_sges);
caps->num_aeq_vectors = resp_a->num_aeq_vectors;
......
......@@ -36,11 +36,11 @@
#include <linux/bitops.h>
#define HNS_ROCE_V2_MAX_QP_NUM 0x1000
#define HNS_ROCE_V2_MAX_QPC_TIMER_NUM 0x200
#define HNS_ROCE_V2_MAX_WQE_NUM 0x8000
#define HNS_ROCE_V2_MAX_SRQ_WR 0x8000
#define HNS_ROCE_V2_MAX_SRQ_SGE 64
#define HNS_ROCE_V2_MAX_CQ_NUM 0x100000
#define HNS_ROCE_V2_MAX_QPC_TIMER_BT_NUM 0x100
#define HNS_ROCE_V2_MAX_CQC_TIMER_BT_NUM 0x100
#define HNS_ROCE_V2_MAX_SRQ_NUM 0x100000
#define HNS_ROCE_V2_MAX_CQE_NUM 0x400000
......@@ -83,7 +83,7 @@
#define HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ PAGE_SIZE
#define HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ PAGE_SIZE
#define HNS_ROCE_V2_PAGE_SIZE_SUPPORTED 0xFFFFF000
#define HNS_ROCE_V2_PAGE_SIZE_SUPPORTED 0xFFFF000
#define HNS_ROCE_V2_MAX_INNER_MTPT_NUM 2
#define HNS_ROCE_INVALID_LKEY 0x0
#define HNS_ROCE_INVALID_SGE_LENGTH 0x80000000
......
......@@ -725,7 +725,7 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev)
ret = hns_roce_init_hem_table(hr_dev, &hr_dev->qpc_timer_table,
HEM_TYPE_QPC_TIMER,
hr_dev->caps.qpc_timer_entry_sz,
hr_dev->caps.num_qpc_timer, 1);
hr_dev->caps.qpc_timer_bt_num, 1);
if (ret) {
dev_err(dev,
"Failed to init QPC timer memory, aborting.\n");
......
......@@ -462,11 +462,8 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap,
hr_qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge) +
hr_qp->rq.rsv_sge);
if (hr_dev->caps.max_rq_sg <= HNS_ROCE_SGE_IN_WQE)
hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz);
else
hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz *
hr_qp->rq.max_gs);
hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz *
hr_qp->rq.max_gs);
hr_qp->rq.wqe_cnt = cnt;
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE &&
......
......@@ -497,7 +497,8 @@ int irdma_uk_send(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info,
FIELD_PREP(IRDMAQPSQ_IMMDATA, info->imm_data));
i = 0;
} else {
qp->wqe_ops.iw_set_fragment(wqe, 0, op_info->sg_list,
qp->wqe_ops.iw_set_fragment(wqe, 0,
frag_cnt ? op_info->sg_list : NULL,
qp->swqe_polarity);
i = 1;
}
......@@ -1005,6 +1006,7 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq,
int ret_code;
bool move_cq_head = true;
u8 polarity;
u8 op_type;
bool ext_valid;
__le64 *ext_cqe;
......@@ -1187,7 +1189,6 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq,
do {
__le64 *sw_wqe;
u64 wqe_qword;
u8 op_type;
u32 tail;
tail = qp->sq_ring.tail;
......@@ -1204,6 +1205,8 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq,
break;
}
} while (1);
if (op_type == IRDMA_OP_TYPE_BIND_MW && info->minor_err == FLUSH_PROT_ERR)
info->minor_err = FLUSH_MW_BIND_ERR;
qp->sq_flush_seen = true;
if (!IRDMA_RING_MORE_WORK(qp->sq_ring))
qp->sq_flush_complete = true;
......
......@@ -590,11 +590,14 @@ static int irdma_wait_event(struct irdma_pci_f *rf,
cqp_error = cqp_request->compl_info.error;
if (cqp_error) {
err_code = -EIO;
if (cqp_request->compl_info.maj_err_code == 0xFFFF &&
cqp_request->compl_info.min_err_code == 0x8029) {
if (!rf->reset) {
rf->reset = true;
rf->gen_ops.request_reset(rf);
if (cqp_request->compl_info.maj_err_code == 0xFFFF) {
if (cqp_request->compl_info.min_err_code == 0x8002)
err_code = -EBUSY;
else if (cqp_request->compl_info.min_err_code == 0x8029) {
if (!rf->reset) {
rf->reset = true;
rf->gen_ops.request_reset(rf);
}
}
}
}
......@@ -2598,7 +2601,7 @@ void irdma_generate_flush_completions(struct irdma_qp *iwqp)
spin_unlock_irqrestore(&iwqp->lock, flags2);
spin_unlock_irqrestore(&iwqp->iwscq->lock, flags1);
if (compl_generated)
irdma_comp_handler(iwqp->iwrcq);
irdma_comp_handler(iwqp->iwscq);
} else {
spin_unlock_irqrestore(&iwqp->iwscq->lock, flags1);
mod_delayed_work(iwqp->iwdev->cleanup_wq, &iwqp->dwork_flush,
......
......@@ -39,15 +39,18 @@ static int irdma_query_device(struct ib_device *ibdev,
props->max_send_sge = hw_attrs->uk_attrs.max_hw_wq_frags;
props->max_recv_sge = hw_attrs->uk_attrs.max_hw_wq_frags;
props->max_cq = rf->max_cq - rf->used_cqs;
props->max_cqe = rf->max_cqe;
props->max_cqe = rf->max_cqe - 1;
props->max_mr = rf->max_mr - rf->used_mrs;
props->max_mw = props->max_mr;
props->max_pd = rf->max_pd - rf->used_pds;
props->max_sge_rd = hw_attrs->uk_attrs.max_hw_read_sges;
props->max_qp_rd_atom = hw_attrs->max_hw_ird;
props->max_qp_init_rd_atom = hw_attrs->max_hw_ord;
if (rdma_protocol_roce(ibdev, 1))
if (rdma_protocol_roce(ibdev, 1)) {
props->device_cap_flags |= IB_DEVICE_RC_RNR_NAK_GEN;
props->max_pkeys = IRDMA_PKEY_TBL_SZ;
}
props->max_ah = rf->max_ah;
props->max_mcast_grp = rf->max_mcg;
props->max_mcast_qp_attach = IRDMA_MAX_MGS_PER_CTX;
......@@ -3009,6 +3012,7 @@ static int irdma_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
struct irdma_pble_alloc *palloc = &iwpbl->pble_alloc;
struct irdma_cqp_request *cqp_request;
struct cqp_cmds_info *cqp_info;
int status;
if (iwmr->type != IRDMA_MEMREG_TYPE_MEM) {
if (iwmr->region) {
......@@ -3039,8 +3043,11 @@ static int irdma_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
cqp_info->post_sq = 1;
cqp_info->in.u.dealloc_stag.dev = &iwdev->rf->sc_dev;
cqp_info->in.u.dealloc_stag.scratch = (uintptr_t)cqp_request;
irdma_handle_cqp_op(iwdev->rf, cqp_request);
status = irdma_handle_cqp_op(iwdev->rf, cqp_request);
irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request);
if (status)
return status;
irdma_free_stag(iwdev, iwmr->stag);
done:
if (iwpbl->pbl_allocated)
......
......@@ -166,6 +166,12 @@ static int process_pma_cmd(struct mlx5_ib_dev *dev, u32 port_num,
mdev = dev->mdev;
mdev_port_num = 1;
}
if (MLX5_CAP_GEN(dev->mdev, num_ports) == 1) {
/* set local port to one for Function-Per-Port HCA. */
mdev = dev->mdev;
mdev_port_num = 1;
}
/* Declaring support of extended counters */
if (in_mad->mad_hdr.attr_id == IB_PMA_CLASS_PORT_INFO) {
struct ib_class_port_info cpi = {};
......
......@@ -4336,7 +4336,7 @@ static int mlx5r_probe(struct auxiliary_device *adev,
dev->mdev = mdev;
dev->num_ports = num_ports;
if (ll == IB_LINK_LAYER_ETHERNET && !mlx5_is_roce_init_enabled(mdev))
if (ll == IB_LINK_LAYER_ETHERNET && !mlx5_get_roce_state(mdev))
profile = &raw_eth_profile;
else
profile = &pf_profile;
......
......@@ -708,6 +708,7 @@ struct mlx5_ib_umr_context {
};
enum {
MLX5_UMR_STATE_UNINIT,
MLX5_UMR_STATE_ACTIVE,
MLX5_UMR_STATE_RECOVER,
MLX5_UMR_STATE_ERR,
......
......@@ -177,6 +177,7 @@ int mlx5r_umr_resource_init(struct mlx5_ib_dev *dev)
sema_init(&dev->umrc.sem, MAX_UMR_WR);
mutex_init(&dev->umrc.lock);
dev->umrc.state = MLX5_UMR_STATE_ACTIVE;
return 0;
......@@ -191,6 +192,8 @@ int mlx5r_umr_resource_init(struct mlx5_ib_dev *dev)
void mlx5r_umr_resource_cleanup(struct mlx5_ib_dev *dev)
{
if (dev->umrc.state == MLX5_UMR_STATE_UNINIT)
return;
ib_destroy_qp(dev->umrc.qp);
ib_free_cq(dev->umrc.cq);
ib_dealloc_pd(dev->umrc.pd);
......
......@@ -29,7 +29,7 @@ static struct page *siw_get_pblpage(struct siw_mem *mem, u64 addr, int *idx)
dma_addr_t paddr = siw_pbl_get_buffer(pbl, offset, NULL, idx);
if (paddr)
return virt_to_page(paddr);
return virt_to_page((void *)paddr);
return NULL;
}
......@@ -533,13 +533,23 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s)
kunmap_local(kaddr);
}
} else {
u64 va = sge->laddr + sge_off;
/*
* Cast to an uintptr_t to preserve all 64 bits
* in sge->laddr.
*/
uintptr_t va = (uintptr_t)(sge->laddr + sge_off);
page_array[seg] = virt_to_page(va & PAGE_MASK);
/*
* virt_to_page() takes a (void *) pointer
* so cast to a (void *) meaning it will be 64
* bits on a 64 bit platform and 32 bits on a
* 32 bit platform.
*/
page_array[seg] = virt_to_page((void *)(va & PAGE_MASK));
if (do_crc)
crypto_shash_update(
c_tx->mpa_crc_hd,
(void *)(uintptr_t)va,
(void *)va,
plen);
}
......
......@@ -1004,7 +1004,8 @@ rtrs_clt_get_copy_req(struct rtrs_clt_path *alive_path,
static int rtrs_post_rdma_write_sg(struct rtrs_clt_con *con,
struct rtrs_clt_io_req *req,
struct rtrs_rbuf *rbuf, bool fr_en,
u32 size, u32 imm, struct ib_send_wr *wr,
u32 count, u32 size, u32 imm,
struct ib_send_wr *wr,
struct ib_send_wr *tail)
{
struct rtrs_clt_path *clt_path = to_clt_path(con->c.path);
......@@ -1024,12 +1025,12 @@ static int rtrs_post_rdma_write_sg(struct rtrs_clt_con *con,
num_sge = 2;
ptail = tail;
} else {
for_each_sg(req->sglist, sg, req->sg_cnt, i) {
for_each_sg(req->sglist, sg, count, i) {
sge[i].addr = sg_dma_address(sg);
sge[i].length = sg_dma_len(sg);
sge[i].lkey = clt_path->s.dev->ib_pd->local_dma_lkey;
}
num_sge = 1 + req->sg_cnt;
num_sge = 1 + count;
}
sge[i].addr = req->iu->dma_addr;
sge[i].length = size;
......@@ -1142,7 +1143,7 @@ static int rtrs_clt_write_req(struct rtrs_clt_io_req *req)
*/
rtrs_clt_update_all_stats(req, WRITE);
ret = rtrs_post_rdma_write_sg(req->con, req, rbuf, fr_en,
ret = rtrs_post_rdma_write_sg(req->con, req, rbuf, fr_en, count,
req->usr_len + sizeof(*msg),
imm, wr, &inv_wr);
if (ret) {
......
......@@ -595,7 +595,7 @@ static int map_cont_bufs(struct rtrs_srv_path *srv_path)
struct sg_table *sgt = &srv_mr->sgt;
struct scatterlist *s;
struct ib_mr *mr;
int nr, chunks;
int nr, nr_sgt, chunks;
chunks = chunks_per_mr * mri;
if (!always_invalidate)
......@@ -610,19 +610,19 @@ static int map_cont_bufs(struct rtrs_srv_path *srv_path)
sg_set_page(s, srv->chunks[chunks + i],
max_chunk_size, 0);
nr = ib_dma_map_sg(srv_path->s.dev->ib_dev, sgt->sgl,
nr_sgt = ib_dma_map_sg(srv_path->s.dev->ib_dev, sgt->sgl,
sgt->nents, DMA_BIDIRECTIONAL);
if (nr < sgt->nents) {
err = nr < 0 ? nr : -EINVAL;
if (!nr_sgt) {
err = -EINVAL;
goto free_sg;
}
mr = ib_alloc_mr(srv_path->s.dev->ib_pd, IB_MR_TYPE_MEM_REG,
sgt->nents);
nr_sgt);
if (IS_ERR(mr)) {
err = PTR_ERR(mr);
goto unmap_sg;
}
nr = ib_map_mr_sg(mr, sgt->sgl, sgt->nents,
nr = ib_map_mr_sg(mr, sgt->sgl, nr_sgt,
NULL, max_chunk_size);
if (nr < 0 || nr < sgt->nents) {
err = nr < 0 ? nr : -EINVAL;
......@@ -641,7 +641,7 @@ static int map_cont_bufs(struct rtrs_srv_path *srv_path)
}
}
/* Eventually dma addr for each chunk can be cached */
for_each_sg(sgt->sgl, s, sgt->orig_nents, i)
for_each_sg(sgt->sgl, s, nr_sgt, i)
srv_path->dma_addr[chunks + i] = sg_dma_address(s);
ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey));
......
......@@ -1961,7 +1961,8 @@ static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp)
if (scmnd) {
req = scsi_cmd_priv(scmnd);
scmnd = srp_claim_req(ch, req, NULL, scmnd);
} else {
}
if (!scmnd) {
shost_printk(KERN_ERR, target->scsi_host,
"Null scmnd for RSP w/tag %#016llx received on ch %td / QP %#x\n",
rsp->tag, ch - target->ch, ch->qp->qp_num);
......
......@@ -494,6 +494,24 @@ static int max_uc_list_get_devlink_param(struct mlx5_core_dev *dev)
return err;
}
bool mlx5_is_roce_on(struct mlx5_core_dev *dev)
{
struct devlink *devlink = priv_to_devlink(dev);
union devlink_param_value val;
int err;
err = devlink_param_driverinit_value_get(devlink,
DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE,
&val);
if (!err)
return val.vbool;
mlx5_core_dbg(dev, "Failed to get param. err = %d\n", err);
return MLX5_CAP_GEN(dev, roce);
}
EXPORT_SYMBOL(mlx5_is_roce_on);
static int handle_hca_cap_2(struct mlx5_core_dev *dev, void *set_ctx)
{
void *set_hca_cap;
......@@ -597,7 +615,8 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx)
MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix));
if (MLX5_CAP_GEN(dev, roce_rw_supported))
MLX5_SET(cmd_hca_cap, set_hca_cap, roce, mlx5_is_roce_init_enabled(dev));
MLX5_SET(cmd_hca_cap, set_hca_cap, roce,
mlx5_is_roce_on(dev));
max_uc_list = max_uc_list_get_devlink_param(dev);
if (max_uc_list > 0)
......@@ -623,7 +642,7 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx)
*/
static bool is_roce_fw_disabled(struct mlx5_core_dev *dev)
{
return (MLX5_CAP_GEN(dev, roce_rw_supported) && !mlx5_is_roce_init_enabled(dev)) ||
return (MLX5_CAP_GEN(dev, roce_rw_supported) && !mlx5_is_roce_on(dev)) ||
(!MLX5_CAP_GEN(dev, roce_rw_supported) && !MLX5_CAP_GEN(dev, roce));
}
......
......@@ -1280,16 +1280,17 @@ enum {
MLX5_TRIGGERED_CMD_COMP = (u64)1 << 32,
};
static inline bool mlx5_is_roce_init_enabled(struct mlx5_core_dev *dev)
bool mlx5_is_roce_on(struct mlx5_core_dev *dev);
static inline bool mlx5_get_roce_state(struct mlx5_core_dev *dev)
{
struct devlink *devlink = priv_to_devlink(dev);
union devlink_param_value val;
int err;
err = devlink_param_driverinit_value_get(devlink,
DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE,
&val);
return err ? MLX5_CAP_GEN(dev, roce) : val.vbool;
if (MLX5_CAP_GEN(dev, roce_rw_supported))
return MLX5_CAP_GEN(dev, roce);
/* If RoCE cap is read-only in FW, get RoCE state from devlink
* in order to support RoCE enable/disable feature
*/
return mlx5_is_roce_on(dev);
}
#endif /* MLX5_DRIVER_H */
......@@ -1225,6 +1225,7 @@ void mmput_async(struct mm_struct *mm)
schedule_work(&mm->async_put_work);
}
}
EXPORT_SYMBOL_GPL(mmput_async);
#endif
/**
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment