Commit e29bff46 authored by Doug Ledford's avatar Doug Ledford

Merge branch 'k.o/for-4.6-rc' into testing/4.6

parents d53e181c e6bd18f5
...@@ -6027,7 +6027,7 @@ F: include/scsi/*iscsi* ...@@ -6027,7 +6027,7 @@ F: include/scsi/*iscsi*
ISCSI EXTENSIONS FOR RDMA (ISER) INITIATOR ISCSI EXTENSIONS FOR RDMA (ISER) INITIATOR
M: Or Gerlitz <ogerlitz@mellanox.com> M: Or Gerlitz <ogerlitz@mellanox.com>
M: Sagi Grimberg <sagig@mellanox.com> M: Sagi Grimberg <sagi@grimberg.me>
M: Roi Dayan <roid@mellanox.com> M: Roi Dayan <roid@mellanox.com>
L: linux-rdma@vger.kernel.org L: linux-rdma@vger.kernel.org
S: Supported S: Supported
...@@ -6037,7 +6037,7 @@ Q: http://patchwork.kernel.org/project/linux-rdma/list/ ...@@ -6037,7 +6037,7 @@ Q: http://patchwork.kernel.org/project/linux-rdma/list/
F: drivers/infiniband/ulp/iser/ F: drivers/infiniband/ulp/iser/
ISCSI EXTENSIONS FOR RDMA (ISER) TARGET ISCSI EXTENSIONS FOR RDMA (ISER) TARGET
M: Sagi Grimberg <sagig@mellanox.com> M: Sagi Grimberg <sagi@grimberg.me>
T: git git://git.kernel.org/pub/scm/linux/kernel/git/nab/target-pending.git master T: git git://git.kernel.org/pub/scm/linux/kernel/git/nab/target-pending.git master
L: linux-rdma@vger.kernel.org L: linux-rdma@vger.kernel.org
L: target-devel@vger.kernel.org L: target-devel@vger.kernel.org
......
...@@ -48,6 +48,7 @@ ...@@ -48,6 +48,7 @@
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include <rdma/ib.h>
#include <rdma/ib_cm.h> #include <rdma/ib_cm.h>
#include <rdma/ib_user_cm.h> #include <rdma/ib_user_cm.h>
#include <rdma/ib_marshall.h> #include <rdma/ib_marshall.h>
...@@ -1103,6 +1104,9 @@ static ssize_t ib_ucm_write(struct file *filp, const char __user *buf, ...@@ -1103,6 +1104,9 @@ static ssize_t ib_ucm_write(struct file *filp, const char __user *buf,
struct ib_ucm_cmd_hdr hdr; struct ib_ucm_cmd_hdr hdr;
ssize_t result; ssize_t result;
if (WARN_ON_ONCE(!ib_safe_file_access(filp)))
return -EACCES;
if (len < sizeof(hdr)) if (len < sizeof(hdr))
return -EINVAL; return -EINVAL;
......
...@@ -1574,6 +1574,9 @@ static ssize_t ucma_write(struct file *filp, const char __user *buf, ...@@ -1574,6 +1574,9 @@ static ssize_t ucma_write(struct file *filp, const char __user *buf,
struct rdma_ucm_cmd_hdr hdr; struct rdma_ucm_cmd_hdr hdr;
ssize_t ret; ssize_t ret;
if (WARN_ON_ONCE(!ib_safe_file_access(filp)))
return -EACCES;
if (len < sizeof(hdr)) if (len < sizeof(hdr))
return -EINVAL; return -EINVAL;
......
...@@ -48,6 +48,8 @@ ...@@ -48,6 +48,8 @@
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include <rdma/ib.h>
#include "uverbs.h" #include "uverbs.h"
MODULE_AUTHOR("Roland Dreier"); MODULE_AUTHOR("Roland Dreier");
...@@ -709,6 +711,9 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, ...@@ -709,6 +711,9 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
int srcu_key; int srcu_key;
ssize_t ret; ssize_t ret;
if (WARN_ON_ONCE(!ib_safe_file_access(filp)))
return -EACCES;
if (count < sizeof hdr) if (count < sizeof hdr)
return -EINVAL; return -EINVAL;
......
...@@ -1860,6 +1860,7 @@ EXPORT_SYMBOL(ib_drain_rq); ...@@ -1860,6 +1860,7 @@ EXPORT_SYMBOL(ib_drain_rq);
void ib_drain_qp(struct ib_qp *qp) void ib_drain_qp(struct ib_qp *qp)
{ {
ib_drain_sq(qp); ib_drain_sq(qp);
if (!qp->srq)
ib_drain_rq(qp); ib_drain_rq(qp);
} }
EXPORT_SYMBOL(ib_drain_qp); EXPORT_SYMBOL(ib_drain_qp);
...@@ -1390,6 +1390,8 @@ int iwch_register_device(struct iwch_dev *dev) ...@@ -1390,6 +1390,8 @@ int iwch_register_device(struct iwch_dev *dev)
dev->ibdev.iwcm->add_ref = iwch_qp_add_ref; dev->ibdev.iwcm->add_ref = iwch_qp_add_ref;
dev->ibdev.iwcm->rem_ref = iwch_qp_rem_ref; dev->ibdev.iwcm->rem_ref = iwch_qp_rem_ref;
dev->ibdev.iwcm->get_qp = iwch_get_qp; dev->ibdev.iwcm->get_qp = iwch_get_qp;
memcpy(dev->ibdev.iwcm->ifname, dev->rdev.t3cdev_p->lldev->name,
sizeof(dev->ibdev.iwcm->ifname));
ret = ib_register_device(&dev->ibdev, NULL); ret = ib_register_device(&dev->ibdev, NULL);
if (ret) if (ret)
......
...@@ -162,7 +162,7 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, ...@@ -162,7 +162,7 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
cq->bar2_va = c4iw_bar2_addrs(rdev, cq->cqid, T4_BAR2_QTYPE_INGRESS, cq->bar2_va = c4iw_bar2_addrs(rdev, cq->cqid, T4_BAR2_QTYPE_INGRESS,
&cq->bar2_qid, &cq->bar2_qid,
user ? &cq->bar2_pa : NULL); user ? &cq->bar2_pa : NULL);
if (user && !cq->bar2_va) { if (user && !cq->bar2_pa) {
pr_warn(MOD "%s: cqid %u not in BAR2 range.\n", pr_warn(MOD "%s: cqid %u not in BAR2 range.\n",
pci_name(rdev->lldi.pdev), cq->cqid); pci_name(rdev->lldi.pdev), cq->cqid);
ret = -EINVAL; ret = -EINVAL;
......
...@@ -580,6 +580,8 @@ int c4iw_register_device(struct c4iw_dev *dev) ...@@ -580,6 +580,8 @@ int c4iw_register_device(struct c4iw_dev *dev)
dev->ibdev.iwcm->add_ref = c4iw_qp_add_ref; dev->ibdev.iwcm->add_ref = c4iw_qp_add_ref;
dev->ibdev.iwcm->rem_ref = c4iw_qp_rem_ref; dev->ibdev.iwcm->rem_ref = c4iw_qp_rem_ref;
dev->ibdev.iwcm->get_qp = c4iw_get_qp; dev->ibdev.iwcm->get_qp = c4iw_get_qp;
memcpy(dev->ibdev.iwcm->ifname, dev->rdev.lldi.ports[0]->name,
sizeof(dev->ibdev.iwcm->ifname));
ret = ib_register_device(&dev->ibdev, NULL); ret = ib_register_device(&dev->ibdev, NULL);
if (ret) if (ret)
......
...@@ -185,6 +185,10 @@ void __iomem *c4iw_bar2_addrs(struct c4iw_rdev *rdev, unsigned int qid, ...@@ -185,6 +185,10 @@ void __iomem *c4iw_bar2_addrs(struct c4iw_rdev *rdev, unsigned int qid,
if (pbar2_pa) if (pbar2_pa)
*pbar2_pa = (rdev->bar2_pa + bar2_qoffset) & PAGE_MASK; *pbar2_pa = (rdev->bar2_pa + bar2_qoffset) & PAGE_MASK;
if (is_t4(rdev->lldi.adapter_type))
return NULL;
return rdev->bar2_kva + bar2_qoffset; return rdev->bar2_kva + bar2_qoffset;
} }
...@@ -270,7 +274,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, ...@@ -270,7 +274,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
/* /*
* User mode must have bar2 access. * User mode must have bar2 access.
*/ */
if (user && (!wq->sq.bar2_va || !wq->rq.bar2_va)) { if (user && (!wq->sq.bar2_pa || !wq->rq.bar2_pa)) {
pr_warn(MOD "%s: sqid %u or rqid %u not in BAR2 range.\n", pr_warn(MOD "%s: sqid %u or rqid %u not in BAR2 range.\n",
pci_name(rdev->lldi.pdev), wq->sq.qid, wq->rq.qid); pci_name(rdev->lldi.pdev), wq->sq.qid, wq->rq.qid);
goto free_dma; goto free_dma;
...@@ -1895,13 +1899,27 @@ int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, ...@@ -1895,13 +1899,27 @@ int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
void c4iw_drain_sq(struct ib_qp *ibqp) void c4iw_drain_sq(struct ib_qp *ibqp)
{ {
struct c4iw_qp *qp = to_c4iw_qp(ibqp); struct c4iw_qp *qp = to_c4iw_qp(ibqp);
unsigned long flag;
bool need_to_wait;
spin_lock_irqsave(&qp->lock, flag);
need_to_wait = !t4_sq_empty(&qp->wq);
spin_unlock_irqrestore(&qp->lock, flag);
if (need_to_wait)
wait_for_completion(&qp->sq_drained); wait_for_completion(&qp->sq_drained);
} }
void c4iw_drain_rq(struct ib_qp *ibqp) void c4iw_drain_rq(struct ib_qp *ibqp)
{ {
struct c4iw_qp *qp = to_c4iw_qp(ibqp); struct c4iw_qp *qp = to_c4iw_qp(ibqp);
unsigned long flag;
bool need_to_wait;
spin_lock_irqsave(&qp->lock, flag);
need_to_wait = !t4_rq_empty(&qp->wq);
spin_unlock_irqrestore(&qp->lock, flag);
if (need_to_wait)
wait_for_completion(&qp->rq_drained); wait_for_completion(&qp->rq_drained);
} }
...@@ -530,7 +530,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, ...@@ -530,7 +530,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
sizeof(struct mlx5_wqe_ctrl_seg)) / sizeof(struct mlx5_wqe_ctrl_seg)) /
sizeof(struct mlx5_wqe_data_seg); sizeof(struct mlx5_wqe_data_seg);
props->max_sge = min(max_rq_sg, max_sq_sg); props->max_sge = min(max_rq_sg, max_sq_sg);
props->max_sge_rd = props->max_sge; props->max_sge_rd = MLX5_MAX_SGE_RD;
props->max_cq = 1 << MLX5_CAP_GEN(mdev, log_max_cq); props->max_cq = 1 << MLX5_CAP_GEN(mdev, log_max_cq);
props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_cq_sz)) - 1; props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_cq_sz)) - 1;
props->max_mr = 1 << MLX5_CAP_GEN(mdev, log_max_mkey); props->max_mr = 1 << MLX5_CAP_GEN(mdev, log_max_mkey);
......
...@@ -45,6 +45,8 @@ ...@@ -45,6 +45,8 @@
#include <linux/export.h> #include <linux/export.h>
#include <linux/uio.h> #include <linux/uio.h>
#include <rdma/ib.h>
#include "qib.h" #include "qib.h"
#include "qib_common.h" #include "qib_common.h"
#include "qib_user_sdma.h" #include "qib_user_sdma.h"
...@@ -2067,6 +2069,9 @@ static ssize_t qib_write(struct file *fp, const char __user *data, ...@@ -2067,6 +2069,9 @@ static ssize_t qib_write(struct file *fp, const char __user *data,
ssize_t ret = 0; ssize_t ret = 0;
void *dest; void *dest;
if (WARN_ON_ONCE(!ib_safe_file_access(fp)))
return -EACCES;
if (count < sizeof(cmd.type)) { if (count < sizeof(cmd.type)) {
ret = -EINVAL; ret = -EINVAL;
goto bail; goto bail;
......
...@@ -1637,9 +1637,9 @@ int rvt_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, ...@@ -1637,9 +1637,9 @@ int rvt_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
spin_unlock_irqrestore(&qp->s_hlock, flags); spin_unlock_irqrestore(&qp->s_hlock, flags);
if (nreq) { if (nreq) {
if (call_send) if (call_send)
rdi->driver_f.schedule_send_no_lock(qp);
else
rdi->driver_f.do_send(qp); rdi->driver_f.do_send(qp);
else
rdi->driver_f.schedule_send_no_lock(qp);
} }
return err; return err;
} }
......
...@@ -3,4 +3,4 @@ July, 2015 ...@@ -3,4 +3,4 @@ July, 2015
- Remove unneeded file entries in sysfs - Remove unneeded file entries in sysfs
- Remove software processing of IB protocol and place in library for use - Remove software processing of IB protocol and place in library for use
by qib, ipath (if still present), hfi1, and eventually soft-roce by qib, ipath (if still present), hfi1, and eventually soft-roce
- Replace incorrect uAPI
...@@ -49,6 +49,8 @@ ...@@ -49,6 +49,8 @@
#include <linux/vmalloc.h> #include <linux/vmalloc.h>
#include <linux/io.h> #include <linux/io.h>
#include <rdma/ib.h>
#include "hfi.h" #include "hfi.h"
#include "pio.h" #include "pio.h"
#include "device.h" #include "device.h"
...@@ -190,6 +192,10 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data, ...@@ -190,6 +192,10 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data,
int uctxt_required = 1; int uctxt_required = 1;
int must_be_root = 0; int must_be_root = 0;
/* FIXME: This interface cannot continue out of staging */
if (WARN_ON_ONCE(!ib_safe_file_access(fp)))
return -EACCES;
if (count < sizeof(cmd)) { if (count < sizeof(cmd)) {
ret = -EINVAL; ret = -EINVAL;
goto bail; goto bail;
...@@ -791,15 +797,16 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) ...@@ -791,15 +797,16 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
spin_unlock_irqrestore(&dd->uctxt_lock, flags); spin_unlock_irqrestore(&dd->uctxt_lock, flags);
dd->rcd[uctxt->ctxt] = NULL; dd->rcd[uctxt->ctxt] = NULL;
hfi1_user_exp_rcv_free(fdata);
hfi1_clear_ctxt_pkey(dd, uctxt->ctxt);
uctxt->rcvwait_to = 0; uctxt->rcvwait_to = 0;
uctxt->piowait_to = 0; uctxt->piowait_to = 0;
uctxt->rcvnowait = 0; uctxt->rcvnowait = 0;
uctxt->pionowait = 0; uctxt->pionowait = 0;
uctxt->event_flags = 0; uctxt->event_flags = 0;
hfi1_user_exp_rcv_free(fdata);
hfi1_clear_ctxt_pkey(dd, uctxt->ctxt);
hfi1_stats.sps_ctxts--; hfi1_stats.sps_ctxts--;
if (++dd->freectxts == dd->num_user_contexts) if (++dd->freectxts == dd->num_user_contexts)
aspm_enable_all(dd); aspm_enable_all(dd);
...@@ -1127,27 +1134,13 @@ static int setup_subctxt(struct hfi1_ctxtdata *uctxt) ...@@ -1127,27 +1134,13 @@ static int setup_subctxt(struct hfi1_ctxtdata *uctxt)
static int user_init(struct file *fp) static int user_init(struct file *fp)
{ {
int ret;
unsigned int rcvctrl_ops = 0; unsigned int rcvctrl_ops = 0;
struct hfi1_filedata *fd = fp->private_data; struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_ctxtdata *uctxt = fd->uctxt;
/* make sure that the context has already been setup */ /* make sure that the context has already been setup */
if (!test_bit(HFI1_CTXT_SETUP_DONE, &uctxt->event_flags)) { if (!test_bit(HFI1_CTXT_SETUP_DONE, &uctxt->event_flags))
ret = -EFAULT; return -EFAULT;
goto done;
}
/*
* Subctxts don't need to initialize anything since master
* has done it.
*/
if (fd->subctxt) {
ret = wait_event_interruptible(uctxt->wait, !test_bit(
HFI1_CTXT_MASTER_UNINIT,
&uctxt->event_flags));
goto expected;
}
/* initialize poll variables... */ /* initialize poll variables... */
uctxt->urgent = 0; uctxt->urgent = 0;
...@@ -1202,19 +1195,7 @@ static int user_init(struct file *fp) ...@@ -1202,19 +1195,7 @@ static int user_init(struct file *fp)
wake_up(&uctxt->wait); wake_up(&uctxt->wait);
} }
expected: return 0;
/*
* Expected receive has to be setup for all processes (including
* shared contexts). However, it has to be done after the master
* context has been fully configured as it depends on the
* eager/expected split of the RcvArray entries.
* Setting it up here ensures that the subcontexts will be waiting
* (due to the above wait_event_interruptible() until the master
* is setup.
*/
ret = hfi1_user_exp_rcv_init(fp);
done:
return ret;
} }
static int get_ctxt_info(struct file *fp, void __user *ubase, __u32 len) static int get_ctxt_info(struct file *fp, void __user *ubase, __u32 len)
...@@ -1261,7 +1242,7 @@ static int setup_ctxt(struct file *fp) ...@@ -1261,7 +1242,7 @@ static int setup_ctxt(struct file *fp)
int ret = 0; int ret = 0;
/* /*
* Context should be set up only once (including allocation and * Context should be set up only once, including allocation and
* programming of eager buffers. This is done if context sharing * programming of eager buffers. This is done if context sharing
* is not requested or by the master process. * is not requested or by the master process.
*/ */
...@@ -1282,8 +1263,27 @@ static int setup_ctxt(struct file *fp) ...@@ -1282,8 +1263,27 @@ static int setup_ctxt(struct file *fp)
if (ret) if (ret)
goto done; goto done;
} }
} else {
ret = wait_event_interruptible(uctxt->wait, !test_bit(
HFI1_CTXT_MASTER_UNINIT,
&uctxt->event_flags));
if (ret)
goto done;
} }
ret = hfi1_user_sdma_alloc_queues(uctxt, fp); ret = hfi1_user_sdma_alloc_queues(uctxt, fp);
if (ret)
goto done;
/*
* Expected receive has to be setup for all processes (including
* shared contexts). However, it has to be done after the master
* context has been fully configured as it depends on the
* eager/expected split of the RcvArray entries.
* Setting it up here ensures that the subcontexts will be waiting
* (due to the above wait_event_interruptible() until the master
* is setup.
*/
ret = hfi1_user_exp_rcv_init(fp);
if (ret) if (ret)
goto done; goto done;
...@@ -1565,29 +1565,8 @@ static loff_t ui_lseek(struct file *filp, loff_t offset, int whence) ...@@ -1565,29 +1565,8 @@ static loff_t ui_lseek(struct file *filp, loff_t offset, int whence)
{ {
struct hfi1_devdata *dd = filp->private_data; struct hfi1_devdata *dd = filp->private_data;
switch (whence) { return fixed_size_llseek(filp, offset, whence,
case SEEK_SET: (dd->kregend - dd->kregbase) + DC8051_DATA_MEM_SIZE);
break;
case SEEK_CUR:
offset += filp->f_pos;
break;
case SEEK_END:
offset = ((dd->kregend - dd->kregbase) + DC8051_DATA_MEM_SIZE) -
offset;
break;
default:
return -EINVAL;
}
if (offset < 0)
return -EINVAL;
if (offset >= (dd->kregend - dd->kregbase) + DC8051_DATA_MEM_SIZE)
return -EINVAL;
filp->f_pos = offset;
return filp->f_pos;
} }
/* NOTE: assumes unsigned long is 8 bytes */ /* NOTE: assumes unsigned long is 8 bytes */
......
...@@ -71,6 +71,7 @@ static inline void mmu_notifier_range_start(struct mmu_notifier *, ...@@ -71,6 +71,7 @@ static inline void mmu_notifier_range_start(struct mmu_notifier *,
struct mm_struct *, struct mm_struct *,
unsigned long, unsigned long); unsigned long, unsigned long);
static void mmu_notifier_mem_invalidate(struct mmu_notifier *, static void mmu_notifier_mem_invalidate(struct mmu_notifier *,
struct mm_struct *,
unsigned long, unsigned long); unsigned long, unsigned long);
static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *, static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *,
unsigned long, unsigned long); unsigned long, unsigned long);
...@@ -137,7 +138,7 @@ void hfi1_mmu_rb_unregister(struct rb_root *root) ...@@ -137,7 +138,7 @@ void hfi1_mmu_rb_unregister(struct rb_root *root)
rbnode = rb_entry(node, struct mmu_rb_node, node); rbnode = rb_entry(node, struct mmu_rb_node, node);
rb_erase(node, root); rb_erase(node, root);
if (handler->ops->remove) if (handler->ops->remove)
handler->ops->remove(root, rbnode, false); handler->ops->remove(root, rbnode, NULL);
} }
} }
...@@ -176,7 +177,7 @@ int hfi1_mmu_rb_insert(struct rb_root *root, struct mmu_rb_node *mnode) ...@@ -176,7 +177,7 @@ int hfi1_mmu_rb_insert(struct rb_root *root, struct mmu_rb_node *mnode)
return ret; return ret;
} }
/* Caller must host handler lock */ /* Caller must hold handler lock */
static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler, static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler,
unsigned long addr, unsigned long addr,
unsigned long len) unsigned long len)
...@@ -200,15 +201,21 @@ static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler, ...@@ -200,15 +201,21 @@ static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler,
return node; return node;
} }
/* Caller must *not* hold handler lock. */
static void __mmu_rb_remove(struct mmu_rb_handler *handler, static void __mmu_rb_remove(struct mmu_rb_handler *handler,
struct mmu_rb_node *node, bool arg) struct mmu_rb_node *node, struct mm_struct *mm)
{ {
unsigned long flags;
/* Validity of handler and node pointers has been checked by caller. */ /* Validity of handler and node pointers has been checked by caller. */
hfi1_cdbg(MMU, "Removing node addr 0x%llx, len %u", node->addr, hfi1_cdbg(MMU, "Removing node addr 0x%llx, len %u", node->addr,
node->len); node->len);
spin_lock_irqsave(&handler->lock, flags);
__mmu_int_rb_remove(node, handler->root); __mmu_int_rb_remove(node, handler->root);
spin_unlock_irqrestore(&handler->lock, flags);
if (handler->ops->remove) if (handler->ops->remove)
handler->ops->remove(handler->root, node, arg); handler->ops->remove(handler->root, node, mm);
} }
struct mmu_rb_node *hfi1_mmu_rb_search(struct rb_root *root, unsigned long addr, struct mmu_rb_node *hfi1_mmu_rb_search(struct rb_root *root, unsigned long addr,
...@@ -231,14 +238,11 @@ struct mmu_rb_node *hfi1_mmu_rb_search(struct rb_root *root, unsigned long addr, ...@@ -231,14 +238,11 @@ struct mmu_rb_node *hfi1_mmu_rb_search(struct rb_root *root, unsigned long addr,
void hfi1_mmu_rb_remove(struct rb_root *root, struct mmu_rb_node *node) void hfi1_mmu_rb_remove(struct rb_root *root, struct mmu_rb_node *node)
{ {
struct mmu_rb_handler *handler = find_mmu_handler(root); struct mmu_rb_handler *handler = find_mmu_handler(root);
unsigned long flags;
if (!handler || !node) if (!handler || !node)
return; return;
spin_lock_irqsave(&handler->lock, flags); __mmu_rb_remove(handler, node, NULL);
__mmu_rb_remove(handler, node, false);
spin_unlock_irqrestore(&handler->lock, flags);
} }
static struct mmu_rb_handler *find_mmu_handler(struct rb_root *root) static struct mmu_rb_handler *find_mmu_handler(struct rb_root *root)
...@@ -260,7 +264,7 @@ static struct mmu_rb_handler *find_mmu_handler(struct rb_root *root) ...@@ -260,7 +264,7 @@ static struct mmu_rb_handler *find_mmu_handler(struct rb_root *root)
static inline void mmu_notifier_page(struct mmu_notifier *mn, static inline void mmu_notifier_page(struct mmu_notifier *mn,
struct mm_struct *mm, unsigned long addr) struct mm_struct *mm, unsigned long addr)
{ {
mmu_notifier_mem_invalidate(mn, addr, addr + PAGE_SIZE); mmu_notifier_mem_invalidate(mn, mm, addr, addr + PAGE_SIZE);
} }
static inline void mmu_notifier_range_start(struct mmu_notifier *mn, static inline void mmu_notifier_range_start(struct mmu_notifier *mn,
...@@ -268,25 +272,31 @@ static inline void mmu_notifier_range_start(struct mmu_notifier *mn, ...@@ -268,25 +272,31 @@ static inline void mmu_notifier_range_start(struct mmu_notifier *mn,
unsigned long start, unsigned long start,
unsigned long end) unsigned long end)
{ {
mmu_notifier_mem_invalidate(mn, start, end); mmu_notifier_mem_invalidate(mn, mm, start, end);
} }
static void mmu_notifier_mem_invalidate(struct mmu_notifier *mn, static void mmu_notifier_mem_invalidate(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long start, unsigned long end) unsigned long start, unsigned long end)
{ {
struct mmu_rb_handler *handler = struct mmu_rb_handler *handler =
container_of(mn, struct mmu_rb_handler, mn); container_of(mn, struct mmu_rb_handler, mn);
struct rb_root *root = handler->root; struct rb_root *root = handler->root;
struct mmu_rb_node *node; struct mmu_rb_node *node, *ptr = NULL;
unsigned long flags; unsigned long flags;
spin_lock_irqsave(&handler->lock, flags); spin_lock_irqsave(&handler->lock, flags);
for (node = __mmu_int_rb_iter_first(root, start, end - 1); node; for (node = __mmu_int_rb_iter_first(root, start, end - 1);
node = __mmu_int_rb_iter_next(node, start, end - 1)) { node; node = ptr) {
/* Guard against node removal. */
ptr = __mmu_int_rb_iter_next(node, start, end - 1);
hfi1_cdbg(MMU, "Invalidating node addr 0x%llx, len %u", hfi1_cdbg(MMU, "Invalidating node addr 0x%llx, len %u",
node->addr, node->len); node->addr, node->len);
if (handler->ops->invalidate(root, node)) if (handler->ops->invalidate(root, node)) {
__mmu_rb_remove(handler, node, true); spin_unlock_irqrestore(&handler->lock, flags);
__mmu_rb_remove(handler, node, mm);
spin_lock_irqsave(&handler->lock, flags);
}
} }
spin_unlock_irqrestore(&handler->lock, flags); spin_unlock_irqrestore(&handler->lock, flags);
} }
...@@ -59,7 +59,8 @@ struct mmu_rb_node { ...@@ -59,7 +59,8 @@ struct mmu_rb_node {
struct mmu_rb_ops { struct mmu_rb_ops {
bool (*filter)(struct mmu_rb_node *, unsigned long, unsigned long); bool (*filter)(struct mmu_rb_node *, unsigned long, unsigned long);
int (*insert)(struct rb_root *, struct mmu_rb_node *); int (*insert)(struct rb_root *, struct mmu_rb_node *);
void (*remove)(struct rb_root *, struct mmu_rb_node *, bool); void (*remove)(struct rb_root *, struct mmu_rb_node *,
struct mm_struct *);
int (*invalidate)(struct rb_root *, struct mmu_rb_node *); int (*invalidate)(struct rb_root *, struct mmu_rb_node *);
}; };
......
...@@ -519,10 +519,12 @@ static void iowait_sdma_drained(struct iowait *wait) ...@@ -519,10 +519,12 @@ static void iowait_sdma_drained(struct iowait *wait)
* do the flush work until that QP's * do the flush work until that QP's
* sdma work has finished. * sdma work has finished.
*/ */
spin_lock(&qp->s_lock);
if (qp->s_flags & RVT_S_WAIT_DMA) { if (qp->s_flags & RVT_S_WAIT_DMA) {
qp->s_flags &= ~RVT_S_WAIT_DMA; qp->s_flags &= ~RVT_S_WAIT_DMA;
hfi1_schedule_send(qp); hfi1_schedule_send(qp);
} }
spin_unlock(&qp->s_lock);
} }
/** /**
......
...@@ -87,7 +87,8 @@ static u32 find_phys_blocks(struct page **, unsigned, struct tid_pageset *); ...@@ -87,7 +87,8 @@ static u32 find_phys_blocks(struct page **, unsigned, struct tid_pageset *);
static int set_rcvarray_entry(struct file *, unsigned long, u32, static int set_rcvarray_entry(struct file *, unsigned long, u32,
struct tid_group *, struct page **, unsigned); struct tid_group *, struct page **, unsigned);
static int mmu_rb_insert(struct rb_root *, struct mmu_rb_node *); static int mmu_rb_insert(struct rb_root *, struct mmu_rb_node *);
static void mmu_rb_remove(struct rb_root *, struct mmu_rb_node *, bool); static void mmu_rb_remove(struct rb_root *, struct mmu_rb_node *,
struct mm_struct *);
static int mmu_rb_invalidate(struct rb_root *, struct mmu_rb_node *); static int mmu_rb_invalidate(struct rb_root *, struct mmu_rb_node *);
static int program_rcvarray(struct file *, unsigned long, struct tid_group *, static int program_rcvarray(struct file *, unsigned long, struct tid_group *,
struct tid_pageset *, unsigned, u16, struct page **, struct tid_pageset *, unsigned, u16, struct page **,
...@@ -254,6 +255,8 @@ int hfi1_user_exp_rcv_free(struct hfi1_filedata *fd) ...@@ -254,6 +255,8 @@ int hfi1_user_exp_rcv_free(struct hfi1_filedata *fd)
struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct tid_group *grp, *gptr; struct tid_group *grp, *gptr;
if (!test_bit(HFI1_CTXT_SETUP_DONE, &uctxt->event_flags))
return 0;
/* /*
* The notifier would have been removed when the process'es mm * The notifier would have been removed when the process'es mm
* was freed. * was freed.
...@@ -899,7 +902,7 @@ static int unprogram_rcvarray(struct file *fp, u32 tidinfo, ...@@ -899,7 +902,7 @@ static int unprogram_rcvarray(struct file *fp, u32 tidinfo,
if (!node || node->rcventry != (uctxt->expected_base + rcventry)) if (!node || node->rcventry != (uctxt->expected_base + rcventry))
return -EBADF; return -EBADF;
if (HFI1_CAP_IS_USET(TID_UNMAP)) if (HFI1_CAP_IS_USET(TID_UNMAP))
mmu_rb_remove(&fd->tid_rb_root, &node->mmu, false); mmu_rb_remove(&fd->tid_rb_root, &node->mmu, NULL);
else else
hfi1_mmu_rb_remove(&fd->tid_rb_root, &node->mmu); hfi1_mmu_rb_remove(&fd->tid_rb_root, &node->mmu);
...@@ -965,7 +968,7 @@ static void unlock_exp_tids(struct hfi1_ctxtdata *uctxt, ...@@ -965,7 +968,7 @@ static void unlock_exp_tids(struct hfi1_ctxtdata *uctxt,
continue; continue;
if (HFI1_CAP_IS_USET(TID_UNMAP)) if (HFI1_CAP_IS_USET(TID_UNMAP))
mmu_rb_remove(&fd->tid_rb_root, mmu_rb_remove(&fd->tid_rb_root,
&node->mmu, false); &node->mmu, NULL);
else else
hfi1_mmu_rb_remove(&fd->tid_rb_root, hfi1_mmu_rb_remove(&fd->tid_rb_root,
&node->mmu); &node->mmu);
...@@ -1032,7 +1035,7 @@ static int mmu_rb_insert(struct rb_root *root, struct mmu_rb_node *node) ...@@ -1032,7 +1035,7 @@ static int mmu_rb_insert(struct rb_root *root, struct mmu_rb_node *node)
} }
static void mmu_rb_remove(struct rb_root *root, struct mmu_rb_node *node, static void mmu_rb_remove(struct rb_root *root, struct mmu_rb_node *node,
bool notifier) struct mm_struct *mm)
{ {
struct hfi1_filedata *fdata = struct hfi1_filedata *fdata =
container_of(root, struct hfi1_filedata, tid_rb_root); container_of(root, struct hfi1_filedata, tid_rb_root);
......
...@@ -278,7 +278,8 @@ static inline void pq_update(struct hfi1_user_sdma_pkt_q *); ...@@ -278,7 +278,8 @@ static inline void pq_update(struct hfi1_user_sdma_pkt_q *);
static void user_sdma_free_request(struct user_sdma_request *, bool); static void user_sdma_free_request(struct user_sdma_request *, bool);
static int pin_vector_pages(struct user_sdma_request *, static int pin_vector_pages(struct user_sdma_request *,
struct user_sdma_iovec *); struct user_sdma_iovec *);
static void unpin_vector_pages(struct mm_struct *, struct page **, unsigned); static void unpin_vector_pages(struct mm_struct *, struct page **, unsigned,
unsigned);
static int check_header_template(struct user_sdma_request *, static int check_header_template(struct user_sdma_request *,
struct hfi1_pkt_header *, u32, u32); struct hfi1_pkt_header *, u32, u32);
static int set_txreq_header(struct user_sdma_request *, static int set_txreq_header(struct user_sdma_request *,
...@@ -299,7 +300,8 @@ static int defer_packet_queue( ...@@ -299,7 +300,8 @@ static int defer_packet_queue(
static void activate_packet_queue(struct iowait *, int); static void activate_packet_queue(struct iowait *, int);
static bool sdma_rb_filter(struct mmu_rb_node *, unsigned long, unsigned long); static bool sdma_rb_filter(struct mmu_rb_node *, unsigned long, unsigned long);
static int sdma_rb_insert(struct rb_root *, struct mmu_rb_node *); static int sdma_rb_insert(struct rb_root *, struct mmu_rb_node *);
static void sdma_rb_remove(struct rb_root *, struct mmu_rb_node *, bool); static void sdma_rb_remove(struct rb_root *, struct mmu_rb_node *,
struct mm_struct *);
static int sdma_rb_invalidate(struct rb_root *, struct mmu_rb_node *); static int sdma_rb_invalidate(struct rb_root *, struct mmu_rb_node *);
static struct mmu_rb_ops sdma_rb_ops = { static struct mmu_rb_ops sdma_rb_ops = {
...@@ -1063,8 +1065,10 @@ static int pin_vector_pages(struct user_sdma_request *req, ...@@ -1063,8 +1065,10 @@ static int pin_vector_pages(struct user_sdma_request *req,
rb_node = hfi1_mmu_rb_search(&pq->sdma_rb_root, rb_node = hfi1_mmu_rb_search(&pq->sdma_rb_root,
(unsigned long)iovec->iov.iov_base, (unsigned long)iovec->iov.iov_base,
iovec->iov.iov_len); iovec->iov.iov_len);
if (rb_node) if (rb_node && !IS_ERR(rb_node))
node = container_of(rb_node, struct sdma_mmu_node, rb); node = container_of(rb_node, struct sdma_mmu_node, rb);
else
rb_node = NULL;
if (!node) { if (!node) {
node = kzalloc(sizeof(*node), GFP_KERNEL); node = kzalloc(sizeof(*node), GFP_KERNEL);
...@@ -1107,7 +1111,8 @@ static int pin_vector_pages(struct user_sdma_request *req, ...@@ -1107,7 +1111,8 @@ static int pin_vector_pages(struct user_sdma_request *req,
goto bail; goto bail;
} }
if (pinned != npages) { if (pinned != npages) {
unpin_vector_pages(current->mm, pages, pinned); unpin_vector_pages(current->mm, pages, node->npages,
pinned);
ret = -EFAULT; ret = -EFAULT;
goto bail; goto bail;
} }
...@@ -1147,9 +1152,9 @@ static int pin_vector_pages(struct user_sdma_request *req, ...@@ -1147,9 +1152,9 @@ static int pin_vector_pages(struct user_sdma_request *req,
} }
static void unpin_vector_pages(struct mm_struct *mm, struct page **pages, static void unpin_vector_pages(struct mm_struct *mm, struct page **pages,
unsigned npages) unsigned start, unsigned npages)
{ {
hfi1_release_user_pages(mm, pages, npages, 0); hfi1_release_user_pages(mm, pages + start, npages, 0);
kfree(pages); kfree(pages);
} }
...@@ -1502,7 +1507,7 @@ static void user_sdma_free_request(struct user_sdma_request *req, bool unpin) ...@@ -1502,7 +1507,7 @@ static void user_sdma_free_request(struct user_sdma_request *req, bool unpin)
&req->pq->sdma_rb_root, &req->pq->sdma_rb_root,
(unsigned long)req->iovs[i].iov.iov_base, (unsigned long)req->iovs[i].iov.iov_base,
req->iovs[i].iov.iov_len); req->iovs[i].iov.iov_len);
if (!mnode) if (!mnode || IS_ERR(mnode))
continue; continue;
node = container_of(mnode, struct sdma_mmu_node, rb); node = container_of(mnode, struct sdma_mmu_node, rb);
...@@ -1547,7 +1552,7 @@ static int sdma_rb_insert(struct rb_root *root, struct mmu_rb_node *mnode) ...@@ -1547,7 +1552,7 @@ static int sdma_rb_insert(struct rb_root *root, struct mmu_rb_node *mnode)
} }
static void sdma_rb_remove(struct rb_root *root, struct mmu_rb_node *mnode, static void sdma_rb_remove(struct rb_root *root, struct mmu_rb_node *mnode,
bool notifier) struct mm_struct *mm)
{ {
struct sdma_mmu_node *node = struct sdma_mmu_node *node =
container_of(mnode, struct sdma_mmu_node, rb); container_of(mnode, struct sdma_mmu_node, rb);
...@@ -1557,14 +1562,20 @@ static void sdma_rb_remove(struct rb_root *root, struct mmu_rb_node *mnode, ...@@ -1557,14 +1562,20 @@ static void sdma_rb_remove(struct rb_root *root, struct mmu_rb_node *mnode,
node->pq->n_locked -= node->npages; node->pq->n_locked -= node->npages;
spin_unlock(&node->pq->evict_lock); spin_unlock(&node->pq->evict_lock);
unpin_vector_pages(notifier ? NULL : current->mm, node->pages, /*
* If mm is set, we are being called by the MMU notifier and we
* should not pass a mm_struct to unpin_vector_page(). This is to
* prevent a deadlock when hfi1_release_user_pages() attempts to
* take the mmap_sem, which the MMU notifier has already taken.
*/
unpin_vector_pages(mm ? NULL : current->mm, node->pages, 0,
node->npages); node->npages);
/* /*
* If called by the MMU notifier, we have to adjust the pinned * If called by the MMU notifier, we have to adjust the pinned
* page count ourselves. * page count ourselves.
*/ */
if (notifier) if (mm)
current->mm->pinned_vm -= node->npages; mm->pinned_vm -= node->npages;
kfree(node); kfree(node);
} }
......
...@@ -392,6 +392,17 @@ enum { ...@@ -392,6 +392,17 @@ enum {
MLX5_CAP_OFF_CMDIF_CSUM = 46, MLX5_CAP_OFF_CMDIF_CSUM = 46,
}; };
enum {
/*
* Max wqe size for rdma read is 512 bytes, so this
* limits our max_sge_rd as the wqe needs to fit:
* - ctrl segment (16 bytes)
* - rdma segment (16 bytes)
* - scatter elements (16 bytes each)
*/
MLX5_MAX_SGE_RD = (512 - 16 - 16) / 16
};
struct mlx5_inbox_hdr { struct mlx5_inbox_hdr {
__be16 opcode; __be16 opcode;
u8 rsvd[4]; u8 rsvd[4];
......
...@@ -34,6 +34,7 @@ ...@@ -34,6 +34,7 @@
#define _RDMA_IB_H #define _RDMA_IB_H
#include <linux/types.h> #include <linux/types.h>
#include <linux/sched.h>
struct ib_addr { struct ib_addr {
union { union {
...@@ -86,4 +87,19 @@ struct sockaddr_ib { ...@@ -86,4 +87,19 @@ struct sockaddr_ib {
__u64 sib_scope_id; __u64 sib_scope_id;
}; };
/*
* The IB interfaces that use write() as bi-directional ioctl() are
* fundamentally unsafe, since there are lots of ways to trigger "write()"
* calls from various contexts with elevated privileges. That includes the
* traditional suid executable error message writes, but also various kernel
* interfaces that can write to file descriptors.
*
* This function provides protection for the legacy API by restricting the
* calling context.
*/
static inline bool ib_safe_file_access(struct file *filp)
{
return filp->f_cred == current_cred() && segment_eq(get_fs(), USER_DS);
}
#endif /* _RDMA_IB_H */ #endif /* _RDMA_IB_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment