Commit 5eeb6335 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma

Pull rdma fixes from Jason Gunthorpe:
 "Still not much going on, the usual set of oops and driver fixes this
  time:

   - Fix two uapi breakage regressions in mlx5 drivers

   - Various oops fixes in hfi1, mlx4, umem, uverbs, and ipoib

   - A protocol bug fix for hfi1 preventing it from implementing the
     verbs API properly, and a compatability fix for EXEC STACK user
     programs

   - Fix missed refcounting in the 'advise_mr' patches merged this
     cycle.

   - Fix wrong use of the uABI in the hns SRQ patches merged this cycle"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma:
  IB/uverbs: Fix OOPs in uverbs_user_mmap_disassociate
  IB/ipoib: Fix for use-after-free in ipoib_cm_tx_start
  IB/uverbs: Fix ioctl query port to consider device disassociation
  RDMA/mlx5: Fix flow creation on representors
  IB/uverbs: Fix OOPs upon device disassociation
  RDMA/umem: Add missing initialization of owning_mm
  RDMA/hns: Update the kernel header file of hns
  IB/mlx5: Fix how advise_mr() launches async work
  RDMA/device: Expose ib_device_try_get(()
  IB/hfi1: Add limit test for RC/UC send via loopback
  IB/hfi1: Remove overly conservative VM_EXEC flag check
  IB/{hfi1, qib}: Fix WC.byte_len calculation for UD_SEND_WITH_IMM
  IB/mlx4: Fix using wrong function to destroy sqp AHs under SRIOV
  RDMA/mlx5: Fix check for supported user flags when creating a QP
parents 9ace868a 7b21b69a
......@@ -267,7 +267,6 @@ static inline int ib_mad_enforce_security(struct ib_mad_agent_private *map,
#endif
struct ib_device *ib_device_get_by_index(u32 ifindex);
void ib_device_put(struct ib_device *device);
/* RDMA device netlink */
void nldev_init(void);
void nldev_exit(void);
......
......@@ -156,19 +156,26 @@ struct ib_device *ib_device_get_by_index(u32 index)
down_read(&lists_rwsem);
device = __ib_device_get_by_index(index);
if (device) {
/* Do not return a device if unregistration has started. */
if (!refcount_inc_not_zero(&device->refcount))
if (!ib_device_try_get(device))
device = NULL;
}
up_read(&lists_rwsem);
return device;
}
/**
* ib_device_put - Release IB device reference
* @device: device whose reference to be released
*
* ib_device_put() releases reference to the IB device to allow it to be
* unregistered and eventually free.
*/
void ib_device_put(struct ib_device *device)
{
if (refcount_dec_and_test(&device->refcount))
complete(&device->unreg_completion);
}
EXPORT_SYMBOL(ib_device_put);
static struct ib_device *__ib_device_get_by_name(const char *name)
{
......@@ -303,7 +310,6 @@ struct ib_device *ib_alloc_device(size_t size)
rwlock_init(&device->client_data_lock);
INIT_LIST_HEAD(&device->client_data_list);
INIT_LIST_HEAD(&device->port_list);
refcount_set(&device->refcount, 1);
init_completion(&device->unreg_completion);
return device;
......@@ -620,6 +626,7 @@ int ib_register_device(struct ib_device *device, const char *name,
goto cg_cleanup;
}
refcount_set(&device->refcount, 1);
device->reg_state = IB_DEV_REGISTERED;
list_for_each_entry(client, &client_list, list)
......
......@@ -352,6 +352,8 @@ struct ib_umem_odp *ib_alloc_odp_umem(struct ib_ucontext_per_mm *per_mm,
umem->writable = 1;
umem->is_odp = 1;
odp_data->per_mm = per_mm;
umem->owning_mm = per_mm->mm;
mmgrab(umem->owning_mm);
mutex_init(&odp_data->umem_mutex);
init_completion(&odp_data->notifier_completion);
......@@ -384,6 +386,7 @@ struct ib_umem_odp *ib_alloc_odp_umem(struct ib_ucontext_per_mm *per_mm,
out_page_list:
vfree(odp_data->page_list);
out_odp_data:
mmdrop(umem->owning_mm);
kfree(odp_data);
return ERR_PTR(ret);
}
......
......@@ -204,6 +204,9 @@ void ib_uverbs_release_file(struct kref *ref)
if (atomic_dec_and_test(&file->device->refcount))
ib_uverbs_comp_dev(file->device);
if (file->async_file)
kref_put(&file->async_file->ref,
ib_uverbs_release_async_event_file);
put_device(&file->device->dev);
kfree(file);
}
......@@ -964,11 +967,19 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
/* Get an arbitrary mm pointer that hasn't been cleaned yet */
mutex_lock(&ufile->umap_lock);
if (!list_empty(&ufile->umaps)) {
mm = list_first_entry(&ufile->umaps,
struct rdma_umap_priv, list)
->vma->vm_mm;
mmget(mm);
while (!list_empty(&ufile->umaps)) {
int ret;
priv = list_first_entry(&ufile->umaps,
struct rdma_umap_priv, list);
mm = priv->vma->vm_mm;
ret = mmget_not_zero(mm);
if (!ret) {
list_del_init(&priv->list);
mm = NULL;
continue;
}
break;
}
mutex_unlock(&ufile->umap_lock);
if (!mm)
......@@ -1096,10 +1107,6 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp)
list_del_init(&file->list);
mutex_unlock(&file->device->lists_mutex);
if (file->async_file)
kref_put(&file->async_file->ref,
ib_uverbs_release_async_event_file);
kref_put(&file->ref, ib_uverbs_release_file);
return 0;
......
......@@ -168,12 +168,18 @@ void copy_port_attr_to_resp(struct ib_port_attr *attr,
static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_PORT)(
struct uverbs_attr_bundle *attrs)
{
struct ib_device *ib_dev = attrs->ufile->device->ib_dev;
struct ib_device *ib_dev;
struct ib_port_attr attr = {};
struct ib_uverbs_query_port_resp_ex resp = {};
struct ib_ucontext *ucontext;
int ret;
u8 port_num;
ucontext = ib_uverbs_get_ucontext(attrs);
if (IS_ERR(ucontext))
return PTR_ERR(ucontext);
ib_dev = ucontext->device;
/* FIXME: Extend the UAPI_DEF_OBJ_NEEDS_FN stuff.. */
if (!ib_dev->ops.query_port)
return -EOPNOTSUPP;
......
......@@ -488,7 +488,7 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
vmf = 1;
break;
case STATUS:
if (flags & (unsigned long)(VM_WRITE | VM_EXEC)) {
if (flags & VM_WRITE) {
ret = -EPERM;
goto done;
}
......
......@@ -987,7 +987,6 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) {
wc.ex.imm_data = packet->ohdr->u.ud.imm_data;
wc.wc_flags = IB_WC_WITH_IMM;
tlen -= sizeof(u32);
} else if (opcode == IB_OPCODE_UD_SEND_ONLY) {
wc.ex.imm_data = 0;
wc.wc_flags = 0;
......
......@@ -210,6 +210,7 @@ struct ib_srq *hns_roce_create_srq(struct ib_pd *pd,
struct ib_udata *udata)
{
struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
struct hns_roce_ib_create_srq_resp resp = {};
struct hns_roce_srq *srq;
int srq_desc_size;
int srq_buf_size;
......@@ -378,16 +379,21 @@ struct ib_srq *hns_roce_create_srq(struct ib_pd *pd,
srq->event = hns_roce_ib_srq_event;
srq->ibsrq.ext.xrc.srq_num = srq->srqn;
resp.srqn = srq->srqn;
if (udata) {
if (ib_copy_to_udata(udata, &srq->srqn, sizeof(__u32))) {
if (ib_copy_to_udata(udata, &resp,
min(udata->outlen, sizeof(resp)))) {
ret = -EFAULT;
goto err_wrid;
goto err_srqc_alloc;
}
}
return &srq->ibsrq;
err_srqc_alloc:
hns_roce_srq_free(hr_dev, srq);
err_wrid:
kvfree(srq->wrid);
......
......@@ -1411,7 +1411,7 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
sqp_mad = (struct mlx4_mad_snd_buf *) (sqp->tx_ring[wire_tx_ix].buf.addr);
if (sqp->tx_ring[wire_tx_ix].ah)
rdma_destroy_ah(sqp->tx_ring[wire_tx_ix].ah, 0);
mlx4_ib_destroy_ah(sqp->tx_ring[wire_tx_ix].ah, 0);
sqp->tx_ring[wire_tx_ix].ah = ah;
ib_dma_sync_single_for_cpu(&dev->ib_dev,
sqp->tx_ring[wire_tx_ix].buf.map,
......@@ -1902,7 +1902,7 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work)
if (wc.status == IB_WC_SUCCESS) {
switch (wc.opcode) {
case IB_WC_SEND:
rdma_destroy_ah(sqp->tx_ring[wc.wr_id &
mlx4_ib_destroy_ah(sqp->tx_ring[wc.wr_id &
(MLX4_NUM_TUNNEL_BUFS - 1)].ah, 0);
sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
= NULL;
......@@ -1931,7 +1931,7 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work)
" status = %d, wrid = 0x%llx\n",
ctx->slave, wc.status, wc.wr_id);
if (!MLX4_TUN_IS_RECV(wc.wr_id)) {
rdma_destroy_ah(sqp->tx_ring[wc.wr_id &
mlx4_ib_destroy_ah(sqp->tx_ring[wc.wr_id &
(MLX4_NUM_TUNNEL_BUFS - 1)].ah, 0);
sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
= NULL;
......
......@@ -630,8 +630,7 @@ const struct uapi_definition mlx5_ib_flow_defs[] = {
UAPI_DEF_IS_OBJ_SUPPORTED(flow_is_supported)),
UAPI_DEF_CHAIN_OBJ_TREE(
UVERBS_OBJECT_FLOW,
&mlx5_ib_fs,
UAPI_DEF_IS_OBJ_SUPPORTED(flow_is_supported)),
&mlx5_ib_fs),
UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION,
&mlx5_ib_flow_actions),
{},
......
......@@ -1595,10 +1595,12 @@ static void mlx5_ib_prefetch_mr_work(struct work_struct *work)
struct prefetch_mr_work *w =
container_of(work, struct prefetch_mr_work, work);
if (w->dev->ib_dev.reg_state == IB_DEV_REGISTERED)
if (ib_device_try_get(&w->dev->ib_dev)) {
mlx5_ib_prefetch_sg_list(w->dev, w->pf_flags, w->sg_list,
w->num_sge);
ib_device_put(&w->dev->ib_dev);
}
put_device(&w->dev->ib_dev.dev);
kfree(w);
}
......@@ -1617,15 +1619,13 @@ int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
return mlx5_ib_prefetch_sg_list(dev, pf_flags, sg_list,
num_sge);
if (dev->ib_dev.reg_state != IB_DEV_REGISTERED)
return -ENODEV;
work = kvzalloc(struct_size(work, sg_list, num_sge), GFP_KERNEL);
if (!work)
return -ENOMEM;
memcpy(work->sg_list, sg_list, num_sge * sizeof(struct ib_sge));
get_device(&dev->ib_dev.dev);
work->dev = dev;
work->pf_flags = pf_flags;
work->num_sge = num_sge;
......
......@@ -1912,14 +1912,16 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
}
if (!check_flags_mask(ucmd.flags,
MLX5_QP_FLAG_SIGNATURE |
MLX5_QP_FLAG_ALLOW_SCATTER_CQE |
MLX5_QP_FLAG_BFREG_INDEX |
MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE |
MLX5_QP_FLAG_SCATTER_CQE |
MLX5_QP_FLAG_SIGNATURE |
MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC |
MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC |
MLX5_QP_FLAG_TUNNEL_OFFLOADS |
MLX5_QP_FLAG_BFREG_INDEX |
MLX5_QP_FLAG_TYPE_DCT |
MLX5_QP_FLAG_TYPE_DCI |
MLX5_QP_FLAG_ALLOW_SCATTER_CQE |
MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE))
MLX5_QP_FLAG_TYPE_DCT))
return -EINVAL;
err = get_qp_user_index(to_mucontext(pd->uobject->context),
......
......@@ -512,7 +512,6 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct ib_header *hdr,
opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) {
wc.ex.imm_data = ohdr->u.ud.imm_data;
wc.wc_flags = IB_WC_WITH_IMM;
tlen -= sizeof(u32);
} else if (opcode == IB_OPCODE_UD_SEND_ONLY) {
wc.ex.imm_data = 0;
wc.wc_flags = 0;
......
......@@ -2910,6 +2910,8 @@ void rvt_ruc_loopback(struct rvt_qp *sqp)
goto op_err;
if (!ret)
goto rnr_nak;
if (wqe->length > qp->r_len)
goto inv_err;
break;
case IB_WR_RDMA_WRITE_WITH_IMM:
......@@ -3078,7 +3080,10 @@ void rvt_ruc_loopback(struct rvt_qp *sqp)
goto err;
inv_err:
send_status = IB_WC_REM_INV_REQ_ERR;
send_status =
sqp->ibqp.qp_type == IB_QPT_RC ?
IB_WC_REM_INV_REQ_ERR :
IB_WC_SUCCESS;
wc.status = IB_WC_LOC_QP_OP_ERR;
goto err;
......
......@@ -248,7 +248,6 @@ struct ipoib_cm_tx {
struct list_head list;
struct net_device *dev;
struct ipoib_neigh *neigh;
struct ipoib_path *path;
struct ipoib_tx_buf *tx_ring;
unsigned int tx_head;
unsigned int tx_tail;
......
......@@ -1312,7 +1312,6 @@ struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path
neigh->cm = tx;
tx->neigh = neigh;
tx->path = path;
tx->dev = dev;
list_add(&tx->list, &priv->cm.start_list);
set_bit(IPOIB_FLAG_INITIALIZED, &tx->flags);
......@@ -1371,7 +1370,7 @@ static void ipoib_cm_tx_start(struct work_struct *work)
neigh->daddr + QPN_AND_OPTIONS_OFFSET);
goto free_neigh;
}
memcpy(&pathrec, &p->path->pathrec, sizeof(pathrec));
memcpy(&pathrec, &path->pathrec, sizeof(pathrec));
spin_unlock_irqrestore(&priv->lock, flags);
netif_tx_unlock_bh(dev);
......
......@@ -2579,9 +2579,10 @@ struct ib_device {
const struct uapi_definition *driver_def;
enum rdma_driver_id driver_id;
/*
* Provides synchronization between device unregistration and netlink
* commands on a device. To be used only by core.
* Positive refcount indicates that the device is currently
* registered and cannot be unregistered.
*/
refcount_t refcount;
struct completion unreg_completion;
......@@ -3926,6 +3927,25 @@ static inline bool ib_access_writable(int access_flags)
int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
struct ib_mr_status *mr_status);
/**
* ib_device_try_get: Hold a registration lock
* device: The device to lock
*
* A device under an active registration lock cannot become unregistered. It
* is only possible to obtain a registration lock on a device that is fully
* registered, otherwise this function returns false.
*
* The registration lock is only necessary for actions which require the
* device to still be registered. Uses that only require the device pointer to
* be valid should use get_device(&ibdev->dev) to hold the memory.
*
*/
static inline bool ib_device_try_get(struct ib_device *dev)
{
return refcount_inc_not_zero(&dev->refcount);
}
void ib_device_put(struct ib_device *device);
struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, u8 port,
u16 pkey, const union ib_gid *gid,
const struct sockaddr *addr);
......
......@@ -52,6 +52,11 @@ struct hns_roce_ib_create_srq {
__aligned_u64 que_addr;
};
struct hns_roce_ib_create_srq_resp {
__u32 srqn;
__u32 reserved;
};
struct hns_roce_ib_create_qp {
__aligned_u64 buf_addr;
__aligned_u64 db_addr;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment