Commit 0b776eb5 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband:
  mlx4_core: Increase command timeout for INIT_HCA to 10 seconds
  IPoIB/cm: Use common CQ for CM send completions
  IB/uverbs: Fix checking of userspace object ownership
  IB/mlx4: Sanity check userspace send queue sizes
  IPoIB: Rewrite "if (!likely(...))" as "if (unlikely(!(...)))"
  IB/ehca: Enable large page MRs by default
  IB/ehca: Change meaning of hca_cap_mr_pgsize
  IB/ehca: Fix ehca_encode_hwpage_size() and alloc_fmr()
  IB/ehca: Fix masking error in {,re}reg_phys_mr()
  IB/ehca: Supply QP token for SRQ base QPs
  IPoIB: Use round_jiffies() for ah_reap_task
  RDMA/cma: Fix deadlock destroying listen requests
  RDMA/cma: Add locking around QP accesses
  IB/mthca: Avoid alignment traps when writing doorbells
  mlx4_core: Kill mlx4_write64_raw()
parents 0d681009 77109cc2
...@@ -114,13 +114,16 @@ struct rdma_id_private { ...@@ -114,13 +114,16 @@ struct rdma_id_private {
struct rdma_bind_list *bind_list; struct rdma_bind_list *bind_list;
struct hlist_node node; struct hlist_node node;
struct list_head list; struct list_head list; /* listen_any_list or cma_device.list */
struct list_head listen_list; struct list_head listen_list; /* per device listens */
struct cma_device *cma_dev; struct cma_device *cma_dev;
struct list_head mc_list; struct list_head mc_list;
int internal_id;
enum cma_state state; enum cma_state state;
spinlock_t lock; spinlock_t lock;
struct mutex qp_mutex;
struct completion comp; struct completion comp;
atomic_t refcount; atomic_t refcount;
wait_queue_head_t wait_remove; wait_queue_head_t wait_remove;
...@@ -389,6 +392,7 @@ struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler, ...@@ -389,6 +392,7 @@ struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
id_priv->id.event_handler = event_handler; id_priv->id.event_handler = event_handler;
id_priv->id.ps = ps; id_priv->id.ps = ps;
spin_lock_init(&id_priv->lock); spin_lock_init(&id_priv->lock);
mutex_init(&id_priv->qp_mutex);
init_completion(&id_priv->comp); init_completion(&id_priv->comp);
atomic_set(&id_priv->refcount, 1); atomic_set(&id_priv->refcount, 1);
init_waitqueue_head(&id_priv->wait_remove); init_waitqueue_head(&id_priv->wait_remove);
...@@ -474,61 +478,86 @@ EXPORT_SYMBOL(rdma_create_qp); ...@@ -474,61 +478,86 @@ EXPORT_SYMBOL(rdma_create_qp);
void rdma_destroy_qp(struct rdma_cm_id *id) void rdma_destroy_qp(struct rdma_cm_id *id)
{ {
ib_destroy_qp(id->qp); struct rdma_id_private *id_priv;
id_priv = container_of(id, struct rdma_id_private, id);
mutex_lock(&id_priv->qp_mutex);
ib_destroy_qp(id_priv->id.qp);
id_priv->id.qp = NULL;
mutex_unlock(&id_priv->qp_mutex);
} }
EXPORT_SYMBOL(rdma_destroy_qp); EXPORT_SYMBOL(rdma_destroy_qp);
static int cma_modify_qp_rtr(struct rdma_cm_id *id) static int cma_modify_qp_rtr(struct rdma_id_private *id_priv)
{ {
struct ib_qp_attr qp_attr; struct ib_qp_attr qp_attr;
int qp_attr_mask, ret; int qp_attr_mask, ret;
if (!id->qp) mutex_lock(&id_priv->qp_mutex);
return 0; if (!id_priv->id.qp) {
ret = 0;
goto out;
}
/* Need to update QP attributes from default values. */ /* Need to update QP attributes from default values. */
qp_attr.qp_state = IB_QPS_INIT; qp_attr.qp_state = IB_QPS_INIT;
ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask); ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
if (ret) if (ret)
return ret; goto out;
ret = ib_modify_qp(id->qp, &qp_attr, qp_attr_mask); ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
if (ret) if (ret)
return ret; goto out;
qp_attr.qp_state = IB_QPS_RTR; qp_attr.qp_state = IB_QPS_RTR;
ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask); ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
if (ret) if (ret)
return ret; goto out;
return ib_modify_qp(id->qp, &qp_attr, qp_attr_mask); ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
out:
mutex_unlock(&id_priv->qp_mutex);
return ret;
} }
static int cma_modify_qp_rts(struct rdma_cm_id *id) static int cma_modify_qp_rts(struct rdma_id_private *id_priv)
{ {
struct ib_qp_attr qp_attr; struct ib_qp_attr qp_attr;
int qp_attr_mask, ret; int qp_attr_mask, ret;
if (!id->qp) mutex_lock(&id_priv->qp_mutex);
return 0; if (!id_priv->id.qp) {
ret = 0;
goto out;
}
qp_attr.qp_state = IB_QPS_RTS; qp_attr.qp_state = IB_QPS_RTS;
ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask); ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
if (ret) if (ret)
return ret; goto out;
return ib_modify_qp(id->qp, &qp_attr, qp_attr_mask); ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
out:
mutex_unlock(&id_priv->qp_mutex);
return ret;
} }
static int cma_modify_qp_err(struct rdma_cm_id *id) static int cma_modify_qp_err(struct rdma_id_private *id_priv)
{ {
struct ib_qp_attr qp_attr; struct ib_qp_attr qp_attr;
int ret;
if (!id->qp) mutex_lock(&id_priv->qp_mutex);
return 0; if (!id_priv->id.qp) {
ret = 0;
goto out;
}
qp_attr.qp_state = IB_QPS_ERR; qp_attr.qp_state = IB_QPS_ERR;
return ib_modify_qp(id->qp, &qp_attr, IB_QP_STATE); ret = ib_modify_qp(id_priv->id.qp, &qp_attr, IB_QP_STATE);
out:
mutex_unlock(&id_priv->qp_mutex);
return ret;
} }
static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv, static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv,
...@@ -717,50 +746,27 @@ static void cma_cancel_route(struct rdma_id_private *id_priv) ...@@ -717,50 +746,27 @@ static void cma_cancel_route(struct rdma_id_private *id_priv)
} }
} }
static inline int cma_internal_listen(struct rdma_id_private *id_priv)
{
return (id_priv->state == CMA_LISTEN) && id_priv->cma_dev &&
cma_any_addr(&id_priv->id.route.addr.src_addr);
}
static void cma_destroy_listen(struct rdma_id_private *id_priv)
{
cma_exch(id_priv, CMA_DESTROYING);
if (id_priv->cma_dev) {
switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
case RDMA_TRANSPORT_IB:
if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
ib_destroy_cm_id(id_priv->cm_id.ib);
break;
case RDMA_TRANSPORT_IWARP:
if (id_priv->cm_id.iw && !IS_ERR(id_priv->cm_id.iw))
iw_destroy_cm_id(id_priv->cm_id.iw);
break;
default:
break;
}
cma_detach_from_dev(id_priv);
}
list_del(&id_priv->listen_list);
cma_deref_id(id_priv);
wait_for_completion(&id_priv->comp);
kfree(id_priv);
}
static void cma_cancel_listens(struct rdma_id_private *id_priv) static void cma_cancel_listens(struct rdma_id_private *id_priv)
{ {
struct rdma_id_private *dev_id_priv; struct rdma_id_private *dev_id_priv;
/*
* Remove from listen_any_list to prevent added devices from spawning
* additional listen requests.
*/
mutex_lock(&lock); mutex_lock(&lock);
list_del(&id_priv->list); list_del(&id_priv->list);
while (!list_empty(&id_priv->listen_list)) { while (!list_empty(&id_priv->listen_list)) {
dev_id_priv = list_entry(id_priv->listen_list.next, dev_id_priv = list_entry(id_priv->listen_list.next,
struct rdma_id_private, listen_list); struct rdma_id_private, listen_list);
cma_destroy_listen(dev_id_priv); /* sync with device removal to avoid duplicate destruction */
list_del_init(&dev_id_priv->list);
list_del(&dev_id_priv->listen_list);
mutex_unlock(&lock);
rdma_destroy_id(&dev_id_priv->id);
mutex_lock(&lock);
} }
mutex_unlock(&lock); mutex_unlock(&lock);
} }
...@@ -848,6 +854,9 @@ void rdma_destroy_id(struct rdma_cm_id *id) ...@@ -848,6 +854,9 @@ void rdma_destroy_id(struct rdma_cm_id *id)
cma_deref_id(id_priv); cma_deref_id(id_priv);
wait_for_completion(&id_priv->comp); wait_for_completion(&id_priv->comp);
if (id_priv->internal_id)
cma_deref_id(id_priv->id.context);
kfree(id_priv->id.route.path_rec); kfree(id_priv->id.route.path_rec);
kfree(id_priv); kfree(id_priv);
} }
...@@ -857,11 +866,11 @@ static int cma_rep_recv(struct rdma_id_private *id_priv) ...@@ -857,11 +866,11 @@ static int cma_rep_recv(struct rdma_id_private *id_priv)
{ {
int ret; int ret;
ret = cma_modify_qp_rtr(&id_priv->id); ret = cma_modify_qp_rtr(id_priv);
if (ret) if (ret)
goto reject; goto reject;
ret = cma_modify_qp_rts(&id_priv->id); ret = cma_modify_qp_rts(id_priv);
if (ret) if (ret)
goto reject; goto reject;
...@@ -871,7 +880,7 @@ static int cma_rep_recv(struct rdma_id_private *id_priv) ...@@ -871,7 +880,7 @@ static int cma_rep_recv(struct rdma_id_private *id_priv)
return 0; return 0;
reject: reject:
cma_modify_qp_err(&id_priv->id); cma_modify_qp_err(id_priv);
ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED, ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED,
NULL, 0, NULL, 0); NULL, 0, NULL, 0);
return ret; return ret;
...@@ -947,7 +956,7 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) ...@@ -947,7 +956,7 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
/* ignore event */ /* ignore event */
goto out; goto out;
case IB_CM_REJ_RECEIVED: case IB_CM_REJ_RECEIVED:
cma_modify_qp_err(&id_priv->id); cma_modify_qp_err(id_priv);
event.status = ib_event->param.rej_rcvd.reason; event.status = ib_event->param.rej_rcvd.reason;
event.event = RDMA_CM_EVENT_REJECTED; event.event = RDMA_CM_EVENT_REJECTED;
event.param.conn.private_data = ib_event->private_data; event.param.conn.private_data = ib_event->private_data;
...@@ -1404,14 +1413,13 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv, ...@@ -1404,14 +1413,13 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
cma_attach_to_dev(dev_id_priv, cma_dev); cma_attach_to_dev(dev_id_priv, cma_dev);
list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list); list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
atomic_inc(&id_priv->refcount);
dev_id_priv->internal_id = 1;
ret = rdma_listen(id, id_priv->backlog); ret = rdma_listen(id, id_priv->backlog);
if (ret) if (ret)
goto err; printk(KERN_WARNING "RDMA CMA: cma_listen_on_dev, error %d, "
"listening on device %s", ret, cma_dev->device->name);
return;
err:
cma_destroy_listen(dev_id_priv);
} }
static void cma_listen_on_all(struct rdma_id_private *id_priv) static void cma_listen_on_all(struct rdma_id_private *id_priv)
...@@ -2264,7 +2272,7 @@ static int cma_connect_iw(struct rdma_id_private *id_priv, ...@@ -2264,7 +2272,7 @@ static int cma_connect_iw(struct rdma_id_private *id_priv,
sin = (struct sockaddr_in*) &id_priv->id.route.addr.dst_addr; sin = (struct sockaddr_in*) &id_priv->id.route.addr.dst_addr;
cm_id->remote_addr = *sin; cm_id->remote_addr = *sin;
ret = cma_modify_qp_rtr(&id_priv->id); ret = cma_modify_qp_rtr(id_priv);
if (ret) if (ret)
goto out; goto out;
...@@ -2331,7 +2339,7 @@ static int cma_accept_ib(struct rdma_id_private *id_priv, ...@@ -2331,7 +2339,7 @@ static int cma_accept_ib(struct rdma_id_private *id_priv,
int qp_attr_mask, ret; int qp_attr_mask, ret;
if (id_priv->id.qp) { if (id_priv->id.qp) {
ret = cma_modify_qp_rtr(&id_priv->id); ret = cma_modify_qp_rtr(id_priv);
if (ret) if (ret)
goto out; goto out;
...@@ -2370,7 +2378,7 @@ static int cma_accept_iw(struct rdma_id_private *id_priv, ...@@ -2370,7 +2378,7 @@ static int cma_accept_iw(struct rdma_id_private *id_priv,
struct iw_cm_conn_param iw_param; struct iw_cm_conn_param iw_param;
int ret; int ret;
ret = cma_modify_qp_rtr(&id_priv->id); ret = cma_modify_qp_rtr(id_priv);
if (ret) if (ret)
return ret; return ret;
...@@ -2442,7 +2450,7 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) ...@@ -2442,7 +2450,7 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
return 0; return 0;
reject: reject:
cma_modify_qp_err(id); cma_modify_qp_err(id_priv);
rdma_reject(id, NULL, 0); rdma_reject(id, NULL, 0);
return ret; return ret;
} }
...@@ -2512,7 +2520,7 @@ int rdma_disconnect(struct rdma_cm_id *id) ...@@ -2512,7 +2520,7 @@ int rdma_disconnect(struct rdma_cm_id *id)
switch (rdma_node_get_transport(id->device->node_type)) { switch (rdma_node_get_transport(id->device->node_type)) {
case RDMA_TRANSPORT_IB: case RDMA_TRANSPORT_IB:
ret = cma_modify_qp_err(id); ret = cma_modify_qp_err(id_priv);
if (ret) if (ret)
goto out; goto out;
/* Initiate or respond to a disconnect. */ /* Initiate or respond to a disconnect. */
...@@ -2543,9 +2551,11 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) ...@@ -2543,9 +2551,11 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
cma_disable_remove(id_priv, CMA_ADDR_RESOLVED)) cma_disable_remove(id_priv, CMA_ADDR_RESOLVED))
return 0; return 0;
mutex_lock(&id_priv->qp_mutex);
if (!status && id_priv->id.qp) if (!status && id_priv->id.qp)
status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid, status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid,
multicast->rec.mlid); multicast->rec.mlid);
mutex_unlock(&id_priv->qp_mutex);
memset(&event, 0, sizeof event); memset(&event, 0, sizeof event);
event.status = status; event.status = status;
...@@ -2757,16 +2767,12 @@ static void cma_process_remove(struct cma_device *cma_dev) ...@@ -2757,16 +2767,12 @@ static void cma_process_remove(struct cma_device *cma_dev)
id_priv = list_entry(cma_dev->id_list.next, id_priv = list_entry(cma_dev->id_list.next,
struct rdma_id_private, list); struct rdma_id_private, list);
if (cma_internal_listen(id_priv)) { list_del(&id_priv->listen_list);
cma_destroy_listen(id_priv);
continue;
}
list_del_init(&id_priv->list); list_del_init(&id_priv->list);
atomic_inc(&id_priv->refcount); atomic_inc(&id_priv->refcount);
mutex_unlock(&lock); mutex_unlock(&lock);
ret = cma_remove_id_dev(id_priv); ret = id_priv->internal_id ? 1 : cma_remove_id_dev(id_priv);
cma_deref_id(id_priv); cma_deref_id(id_priv);
if (ret) if (ret)
rdma_destroy_id(&id_priv->id); rdma_destroy_id(&id_priv->id);
......
...@@ -147,8 +147,12 @@ static struct ib_uobject *__idr_get_uobj(struct idr *idr, int id, ...@@ -147,8 +147,12 @@ static struct ib_uobject *__idr_get_uobj(struct idr *idr, int id,
spin_lock(&ib_uverbs_idr_lock); spin_lock(&ib_uverbs_idr_lock);
uobj = idr_find(idr, id); uobj = idr_find(idr, id);
if (uobj) if (uobj) {
if (uobj->context == context)
kref_get(&uobj->ref); kref_get(&uobj->ref);
else
uobj = NULL;
}
spin_unlock(&ib_uverbs_idr_lock); spin_unlock(&ib_uverbs_idr_lock);
return uobj; return uobj;
......
...@@ -323,7 +323,6 @@ extern int ehca_static_rate; ...@@ -323,7 +323,6 @@ extern int ehca_static_rate;
extern int ehca_port_act_time; extern int ehca_port_act_time;
extern int ehca_use_hp_mr; extern int ehca_use_hp_mr;
extern int ehca_scaling_code; extern int ehca_scaling_code;
extern int ehca_mr_largepage;
struct ipzu_queue_resp { struct ipzu_queue_resp {
u32 qe_size; /* queue entry size */ u32 qe_size; /* queue entry size */
......
...@@ -77,6 +77,7 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props) ...@@ -77,6 +77,7 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props)
} }
memset(props, 0, sizeof(struct ib_device_attr)); memset(props, 0, sizeof(struct ib_device_attr));
props->page_size_cap = shca->hca_cap_mr_pgsize;
props->fw_ver = rblock->hw_ver; props->fw_ver = rblock->hw_ver;
props->max_mr_size = rblock->max_mr_size; props->max_mr_size = rblock->max_mr_size;
props->vendor_id = rblock->vendor_id >> 8; props->vendor_id = rblock->vendor_id >> 8;
......
...@@ -65,7 +65,7 @@ int ehca_port_act_time = 30; ...@@ -65,7 +65,7 @@ int ehca_port_act_time = 30;
int ehca_poll_all_eqs = 1; int ehca_poll_all_eqs = 1;
int ehca_static_rate = -1; int ehca_static_rate = -1;
int ehca_scaling_code = 0; int ehca_scaling_code = 0;
int ehca_mr_largepage = 0; int ehca_mr_largepage = 1;
module_param_named(open_aqp1, ehca_open_aqp1, int, S_IRUGO); module_param_named(open_aqp1, ehca_open_aqp1, int, S_IRUGO);
module_param_named(debug_level, ehca_debug_level, int, S_IRUGO); module_param_named(debug_level, ehca_debug_level, int, S_IRUGO);
...@@ -260,13 +260,20 @@ static struct cap_descr { ...@@ -260,13 +260,20 @@ static struct cap_descr {
{ HCA_CAP_MINI_QP, "HCA_CAP_MINI_QP" }, { HCA_CAP_MINI_QP, "HCA_CAP_MINI_QP" },
}; };
int ehca_sense_attributes(struct ehca_shca *shca) static int ehca_sense_attributes(struct ehca_shca *shca)
{ {
int i, ret = 0; int i, ret = 0;
u64 h_ret; u64 h_ret;
struct hipz_query_hca *rblock; struct hipz_query_hca *rblock;
struct hipz_query_port *port; struct hipz_query_port *port;
static const u32 pgsize_map[] = {
HCA_CAP_MR_PGSIZE_4K, 0x1000,
HCA_CAP_MR_PGSIZE_64K, 0x10000,
HCA_CAP_MR_PGSIZE_1M, 0x100000,
HCA_CAP_MR_PGSIZE_16M, 0x1000000,
};
rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
if (!rblock) { if (!rblock) {
ehca_gen_err("Cannot allocate rblock memory."); ehca_gen_err("Cannot allocate rblock memory.");
...@@ -329,8 +336,15 @@ int ehca_sense_attributes(struct ehca_shca *shca) ...@@ -329,8 +336,15 @@ int ehca_sense_attributes(struct ehca_shca *shca)
if (EHCA_BMASK_GET(hca_cap_descr[i].mask, shca->hca_cap)) if (EHCA_BMASK_GET(hca_cap_descr[i].mask, shca->hca_cap))
ehca_gen_dbg(" %s", hca_cap_descr[i].descr); ehca_gen_dbg(" %s", hca_cap_descr[i].descr);
shca->hca_cap_mr_pgsize = rblock->memory_page_size_supported; /* translate supported MR page sizes; always support 4K */
shca->hca_cap_mr_pgsize = EHCA_PAGESIZE;
if (ehca_mr_largepage) { /* support extra sizes only if enabled */
for (i = 0; i < ARRAY_SIZE(pgsize_map); i += 2)
if (rblock->memory_page_size_supported & pgsize_map[i])
shca->hca_cap_mr_pgsize |= pgsize_map[i + 1];
}
/* query max MTU from first port -- it's the same for all ports */
port = (struct hipz_query_port *)rblock; port = (struct hipz_query_port *)rblock;
h_ret = hipz_h_query_port(shca->ipz_hca_handle, 1, port); h_ret = hipz_h_query_port(shca->ipz_hca_handle, 1, port);
if (h_ret != H_SUCCESS) { if (h_ret != H_SUCCESS) {
......
...@@ -72,24 +72,14 @@ enum ehca_mr_pgsize { ...@@ -72,24 +72,14 @@ enum ehca_mr_pgsize {
static u32 ehca_encode_hwpage_size(u32 pgsize) static u32 ehca_encode_hwpage_size(u32 pgsize)
{ {
u32 idx = 0; int log = ilog2(pgsize);
pgsize >>= 12; WARN_ON(log < 12 || log > 24 || log & 3);
/* return (log - 12) / 4;
* map mr page size into hw code:
* 0, 1, 2, 3 for 4K, 64K, 1M, 64M
*/
while (!(pgsize & 1)) {
idx++;
pgsize >>= 4;
}
return idx;
} }
static u64 ehca_get_max_hwpage_size(struct ehca_shca *shca) static u64 ehca_get_max_hwpage_size(struct ehca_shca *shca)
{ {
if (shca->hca_cap_mr_pgsize & HCA_CAP_MR_PGSIZE_16M) return 1UL << ilog2(shca->hca_cap_mr_pgsize);
return EHCA_MR_PGSIZE16M;
return EHCA_MR_PGSIZE4K;
} }
static struct ehca_mr *ehca_mr_new(void) static struct ehca_mr *ehca_mr_new(void)
...@@ -259,7 +249,7 @@ struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd, ...@@ -259,7 +249,7 @@ struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
pginfo.u.phy.num_phys_buf = num_phys_buf; pginfo.u.phy.num_phys_buf = num_phys_buf;
pginfo.u.phy.phys_buf_array = phys_buf_array; pginfo.u.phy.phys_buf_array = phys_buf_array;
pginfo.next_hwpage = pginfo.next_hwpage =
((u64)iova_start & ~(hw_pgsize - 1)) / hw_pgsize; ((u64)iova_start & ~PAGE_MASK) / hw_pgsize;
ret = ehca_reg_mr(shca, e_mr, iova_start, size, mr_access_flags, ret = ehca_reg_mr(shca, e_mr, iova_start, size, mr_access_flags,
e_pd, &pginfo, &e_mr->ib.ib_mr.lkey, e_pd, &pginfo, &e_mr->ib.ib_mr.lkey,
...@@ -296,7 +286,7 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, ...@@ -296,7 +286,7 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
container_of(pd->device, struct ehca_shca, ib_device); container_of(pd->device, struct ehca_shca, ib_device);
struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
struct ehca_mr_pginfo pginfo; struct ehca_mr_pginfo pginfo;
int ret; int ret, page_shift;
u32 num_kpages; u32 num_kpages;
u32 num_hwpages; u32 num_hwpages;
u64 hwpage_size; u64 hwpage_size;
...@@ -351,9 +341,7 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, ...@@ -351,9 +341,7 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
/* determine number of MR pages */ /* determine number of MR pages */
num_kpages = NUM_CHUNKS((virt % PAGE_SIZE) + length, PAGE_SIZE); num_kpages = NUM_CHUNKS((virt % PAGE_SIZE) + length, PAGE_SIZE);
/* select proper hw_pgsize */ /* select proper hw_pgsize */
if (ehca_mr_largepage && page_shift = PAGE_SHIFT;
(shca->hca_cap_mr_pgsize & HCA_CAP_MR_PGSIZE_16M)) {
int page_shift = PAGE_SHIFT;
if (e_mr->umem->hugetlb) { if (e_mr->umem->hugetlb) {
/* determine page_shift, clamp between 4K and 16M */ /* determine page_shift, clamp between 4K and 16M */
page_shift = (fls64(length - 1) + 3) & ~3; page_shift = (fls64(length - 1) + 3) & ~3;
...@@ -361,9 +349,12 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, ...@@ -361,9 +349,12 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
EHCA_MR_PGSHIFT16M); EHCA_MR_PGSHIFT16M);
} }
hwpage_size = 1UL << page_shift; hwpage_size = 1UL << page_shift;
} else
hwpage_size = EHCA_MR_PGSIZE4K; /* ehca1 only supports 4k */ /* now that we have the desired page size, shift until it's
ehca_dbg(pd->device, "hwpage_size=%lx", hwpage_size); * supported, too. 4K is always supported, so this terminates.
*/
while (!(hwpage_size & shca->hca_cap_mr_pgsize))
hwpage_size >>= 4;
reg_user_mr_fallback: reg_user_mr_fallback:
num_hwpages = NUM_CHUNKS((virt % hwpage_size) + length, hwpage_size); num_hwpages = NUM_CHUNKS((virt % hwpage_size) + length, hwpage_size);
...@@ -547,7 +538,7 @@ int ehca_rereg_phys_mr(struct ib_mr *mr, ...@@ -547,7 +538,7 @@ int ehca_rereg_phys_mr(struct ib_mr *mr,
pginfo.u.phy.num_phys_buf = num_phys_buf; pginfo.u.phy.num_phys_buf = num_phys_buf;
pginfo.u.phy.phys_buf_array = phys_buf_array; pginfo.u.phy.phys_buf_array = phys_buf_array;
pginfo.next_hwpage = pginfo.next_hwpage =
((u64)iova_start & ~(hw_pgsize - 1)) / hw_pgsize; ((u64)iova_start & ~PAGE_MASK) / hw_pgsize;
} }
if (mr_rereg_mask & IB_MR_REREG_ACCESS) if (mr_rereg_mask & IB_MR_REREG_ACCESS)
new_acl = mr_access_flags; new_acl = mr_access_flags;
...@@ -809,8 +800,9 @@ struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd, ...@@ -809,8 +800,9 @@ struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd,
ib_fmr = ERR_PTR(-EINVAL); ib_fmr = ERR_PTR(-EINVAL);
goto alloc_fmr_exit0; goto alloc_fmr_exit0;
} }
hw_pgsize = ehca_get_max_hwpage_size(shca);
if ((1 << fmr_attr->page_shift) != hw_pgsize) { hw_pgsize = 1 << fmr_attr->page_shift;
if (!(hw_pgsize & shca->hca_cap_mr_pgsize)) {
ehca_err(pd->device, "unsupported fmr_attr->page_shift=%x", ehca_err(pd->device, "unsupported fmr_attr->page_shift=%x",
fmr_attr->page_shift); fmr_attr->page_shift);
ib_fmr = ERR_PTR(-EINVAL); ib_fmr = ERR_PTR(-EINVAL);
...@@ -826,6 +818,7 @@ struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd, ...@@ -826,6 +818,7 @@ struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd,
/* register MR on HCA */ /* register MR on HCA */
memset(&pginfo, 0, sizeof(pginfo)); memset(&pginfo, 0, sizeof(pginfo));
pginfo.hwpage_size = hw_pgsize;
/* /*
* pginfo.num_hwpages==0, ie register_rpages() will not be called * pginfo.num_hwpages==0, ie register_rpages() will not be called
* but deferred to map_phys_fmr() * but deferred to map_phys_fmr()
......
...@@ -451,7 +451,6 @@ static struct ehca_qp *internal_create_qp( ...@@ -451,7 +451,6 @@ static struct ehca_qp *internal_create_qp(
has_srq = 1; has_srq = 1;
parms.ext_type = EQPT_SRQBASE; parms.ext_type = EQPT_SRQBASE;
parms.srq_qpn = my_srq->real_qp_num; parms.srq_qpn = my_srq->real_qp_num;
parms.srq_token = my_srq->token;
} }
if (is_llqp && has_srq) { if (is_llqp && has_srq) {
...@@ -583,6 +582,9 @@ static struct ehca_qp *internal_create_qp( ...@@ -583,6 +582,9 @@ static struct ehca_qp *internal_create_qp(
goto create_qp_exit1; goto create_qp_exit1;
} }
if (has_srq)
parms.srq_token = my_qp->token;
parms.servicetype = ibqptype2servicetype(qp_type); parms.servicetype = ibqptype2servicetype(qp_type);
if (parms.servicetype < 0) { if (parms.servicetype < 0) {
ret = -EINVAL; ret = -EINVAL;
......
...@@ -63,6 +63,10 @@ struct mlx4_ib_sqp { ...@@ -63,6 +63,10 @@ struct mlx4_ib_sqp {
u8 header_buf[MLX4_IB_UD_HEADER_SIZE]; u8 header_buf[MLX4_IB_UD_HEADER_SIZE];
}; };
enum {
MLX4_IB_MIN_SQ_STRIDE = 6
};
static const __be32 mlx4_ib_opcode[] = { static const __be32 mlx4_ib_opcode[] = {
[IB_WR_SEND] = __constant_cpu_to_be32(MLX4_OPCODE_SEND), [IB_WR_SEND] = __constant_cpu_to_be32(MLX4_OPCODE_SEND),
[IB_WR_SEND_WITH_IMM] = __constant_cpu_to_be32(MLX4_OPCODE_SEND_IMM), [IB_WR_SEND_WITH_IMM] = __constant_cpu_to_be32(MLX4_OPCODE_SEND_IMM),
...@@ -285,9 +289,17 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, ...@@ -285,9 +289,17 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
return 0; return 0;
} }
static int set_user_sq_size(struct mlx4_ib_qp *qp, static int set_user_sq_size(struct mlx4_ib_dev *dev,
struct mlx4_ib_qp *qp,
struct mlx4_ib_create_qp *ucmd) struct mlx4_ib_create_qp *ucmd)
{ {
/* Sanity check SQ size before proceeding */
if ((1 << ucmd->log_sq_bb_count) > dev->dev->caps.max_wqes ||
ucmd->log_sq_stride >
ilog2(roundup_pow_of_two(dev->dev->caps.max_sq_desc_sz)) ||
ucmd->log_sq_stride < MLX4_IB_MIN_SQ_STRIDE)
return -EINVAL;
qp->sq.wqe_cnt = 1 << ucmd->log_sq_bb_count; qp->sq.wqe_cnt = 1 << ucmd->log_sq_bb_count;
qp->sq.wqe_shift = ucmd->log_sq_stride; qp->sq.wqe_shift = ucmd->log_sq_stride;
...@@ -330,7 +342,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, ...@@ -330,7 +342,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
qp->sq_no_prefetch = ucmd.sq_no_prefetch; qp->sq_no_prefetch = ucmd.sq_no_prefetch;
err = set_user_sq_size(qp, &ucmd); err = set_user_sq_size(dev, qp, &ucmd);
if (err) if (err)
goto err; goto err;
......
...@@ -204,16 +204,11 @@ static void dump_cqe(struct mthca_dev *dev, void *cqe_ptr) ...@@ -204,16 +204,11 @@ static void dump_cqe(struct mthca_dev *dev, void *cqe_ptr)
static inline void update_cons_index(struct mthca_dev *dev, struct mthca_cq *cq, static inline void update_cons_index(struct mthca_dev *dev, struct mthca_cq *cq,
int incr) int incr)
{ {
__be32 doorbell[2];
if (mthca_is_memfree(dev)) { if (mthca_is_memfree(dev)) {
*cq->set_ci_db = cpu_to_be32(cq->cons_index); *cq->set_ci_db = cpu_to_be32(cq->cons_index);
wmb(); wmb();
} else { } else {
doorbell[0] = cpu_to_be32(MTHCA_TAVOR_CQ_DB_INC_CI | cq->cqn); mthca_write64(MTHCA_TAVOR_CQ_DB_INC_CI | cq->cqn, incr - 1,
doorbell[1] = cpu_to_be32(incr - 1);
mthca_write64(doorbell,
dev->kar + MTHCA_CQ_DOORBELL, dev->kar + MTHCA_CQ_DOORBELL,
MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
/* /*
...@@ -731,17 +726,12 @@ int mthca_poll_cq(struct ib_cq *ibcq, int num_entries, ...@@ -731,17 +726,12 @@ int mthca_poll_cq(struct ib_cq *ibcq, int num_entries,
int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags) int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags)
{ {
__be32 doorbell[2]; u32 dbhi = ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
doorbell[0] = cpu_to_be32(((flags & IB_CQ_SOLICITED_MASK) ==
IB_CQ_SOLICITED ?
MTHCA_TAVOR_CQ_DB_REQ_NOT_SOL : MTHCA_TAVOR_CQ_DB_REQ_NOT_SOL :
MTHCA_TAVOR_CQ_DB_REQ_NOT) | MTHCA_TAVOR_CQ_DB_REQ_NOT) |
to_mcq(cq)->cqn); to_mcq(cq)->cqn;
doorbell[1] = (__force __be32) 0xffffffff;
mthca_write64(doorbell, mthca_write64(dbhi, 0xffffffff, to_mdev(cq->device)->kar + MTHCA_CQ_DOORBELL,
to_mdev(cq->device)->kar + MTHCA_CQ_DOORBELL,
MTHCA_GET_DOORBELL_LOCK(&to_mdev(cq->device)->doorbell_lock)); MTHCA_GET_DOORBELL_LOCK(&to_mdev(cq->device)->doorbell_lock));
return 0; return 0;
...@@ -750,19 +740,16 @@ int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags) ...@@ -750,19 +740,16 @@ int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags)
int mthca_arbel_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) int mthca_arbel_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
{ {
struct mthca_cq *cq = to_mcq(ibcq); struct mthca_cq *cq = to_mcq(ibcq);
__be32 doorbell[2]; __be32 db_rec[2];
u32 sn; u32 dbhi;
__be32 ci; u32 sn = cq->arm_sn & 3;
sn = cq->arm_sn & 3;
ci = cpu_to_be32(cq->cons_index);
doorbell[0] = ci; db_rec[0] = cpu_to_be32(cq->cons_index);
doorbell[1] = cpu_to_be32((cq->cqn << 8) | (2 << 5) | (sn << 3) | db_rec[1] = cpu_to_be32((cq->cqn << 8) | (2 << 5) | (sn << 3) |
((flags & IB_CQ_SOLICITED_MASK) == ((flags & IB_CQ_SOLICITED_MASK) ==
IB_CQ_SOLICITED ? 1 : 2)); IB_CQ_SOLICITED ? 1 : 2));
mthca_write_db_rec(doorbell, cq->arm_db); mthca_write_db_rec(db_rec, cq->arm_db);
/* /*
* Make sure that the doorbell record in host memory is * Make sure that the doorbell record in host memory is
...@@ -770,14 +757,12 @@ int mthca_arbel_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) ...@@ -770,14 +757,12 @@ int mthca_arbel_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
*/ */
wmb(); wmb();
doorbell[0] = cpu_to_be32((sn << 28) | dbhi = (sn << 28) |
((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ? ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
MTHCA_ARBEL_CQ_DB_REQ_NOT_SOL : MTHCA_ARBEL_CQ_DB_REQ_NOT_SOL :
MTHCA_ARBEL_CQ_DB_REQ_NOT) | MTHCA_ARBEL_CQ_DB_REQ_NOT) | cq->cqn;
cq->cqn);
doorbell[1] = ci;
mthca_write64(doorbell, mthca_write64(dbhi, cq->cons_index,
to_mdev(ibcq->device)->kar + MTHCA_CQ_DOORBELL, to_mdev(ibcq->device)->kar + MTHCA_CQ_DOORBELL,
MTHCA_GET_DOORBELL_LOCK(&to_mdev(ibcq->device)->doorbell_lock)); MTHCA_GET_DOORBELL_LOCK(&to_mdev(ibcq->device)->doorbell_lock));
......
...@@ -58,10 +58,10 @@ static inline void mthca_write64_raw(__be64 val, void __iomem *dest) ...@@ -58,10 +58,10 @@ static inline void mthca_write64_raw(__be64 val, void __iomem *dest)
__raw_writeq((__force u64) val, dest); __raw_writeq((__force u64) val, dest);
} }
static inline void mthca_write64(__be32 val[2], void __iomem *dest, static inline void mthca_write64(u32 hi, u32 lo, void __iomem *dest,
spinlock_t *doorbell_lock) spinlock_t *doorbell_lock)
{ {
__raw_writeq(*(u64 *) val, dest); __raw_writeq((__force u64) cpu_to_be64((u64) hi << 32 | lo), dest);
} }
static inline void mthca_write_db_rec(__be32 val[2], __be32 *db) static inline void mthca_write_db_rec(__be32 val[2], __be32 *db)
...@@ -87,14 +87,17 @@ static inline void mthca_write64_raw(__be64 val, void __iomem *dest) ...@@ -87,14 +87,17 @@ static inline void mthca_write64_raw(__be64 val, void __iomem *dest)
__raw_writel(((__force u32 *) &val)[1], dest + 4); __raw_writel(((__force u32 *) &val)[1], dest + 4);
} }
static inline void mthca_write64(__be32 val[2], void __iomem *dest, static inline void mthca_write64(u32 hi, u32 lo, void __iomem *dest,
spinlock_t *doorbell_lock) spinlock_t *doorbell_lock)
{ {
unsigned long flags; unsigned long flags;
hi = (__force u32) cpu_to_be32(hi);
lo = (__force u32) cpu_to_be32(lo);
spin_lock_irqsave(doorbell_lock, flags); spin_lock_irqsave(doorbell_lock, flags);
__raw_writel((__force u32) val[0], dest); __raw_writel(hi, dest);
__raw_writel((__force u32) val[1], dest + 4); __raw_writel(lo, dest + 4);
spin_unlock_irqrestore(doorbell_lock, flags); spin_unlock_irqrestore(doorbell_lock, flags);
} }
......
...@@ -173,11 +173,6 @@ static inline u64 async_mask(struct mthca_dev *dev) ...@@ -173,11 +173,6 @@ static inline u64 async_mask(struct mthca_dev *dev)
static inline void tavor_set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci) static inline void tavor_set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci)
{ {
__be32 doorbell[2];
doorbell[0] = cpu_to_be32(MTHCA_EQ_DB_SET_CI | eq->eqn);
doorbell[1] = cpu_to_be32(ci & (eq->nent - 1));
/* /*
* This barrier makes sure that all updates to ownership bits * This barrier makes sure that all updates to ownership bits
* done by set_eqe_hw() hit memory before the consumer index * done by set_eqe_hw() hit memory before the consumer index
...@@ -187,7 +182,7 @@ static inline void tavor_set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u ...@@ -187,7 +182,7 @@ static inline void tavor_set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u
* having set_eqe_hw() overwrite the owner field. * having set_eqe_hw() overwrite the owner field.
*/ */
wmb(); wmb();
mthca_write64(doorbell, mthca_write64(MTHCA_EQ_DB_SET_CI | eq->eqn, ci & (eq->nent - 1),
dev->kar + MTHCA_EQ_DOORBELL, dev->kar + MTHCA_EQ_DOORBELL,
MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
} }
...@@ -212,12 +207,7 @@ static inline void set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci) ...@@ -212,12 +207,7 @@ static inline void set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci)
static inline void tavor_eq_req_not(struct mthca_dev *dev, int eqn) static inline void tavor_eq_req_not(struct mthca_dev *dev, int eqn)
{ {
__be32 doorbell[2]; mthca_write64(MTHCA_EQ_DB_REQ_NOT | eqn, 0,
doorbell[0] = cpu_to_be32(MTHCA_EQ_DB_REQ_NOT | eqn);
doorbell[1] = 0;
mthca_write64(doorbell,
dev->kar + MTHCA_EQ_DOORBELL, dev->kar + MTHCA_EQ_DOORBELL,
MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
} }
...@@ -230,12 +220,7 @@ static inline void arbel_eq_req_not(struct mthca_dev *dev, u32 eqn_mask) ...@@ -230,12 +220,7 @@ static inline void arbel_eq_req_not(struct mthca_dev *dev, u32 eqn_mask)
static inline void disarm_cq(struct mthca_dev *dev, int eqn, int cqn) static inline void disarm_cq(struct mthca_dev *dev, int eqn, int cqn)
{ {
if (!mthca_is_memfree(dev)) { if (!mthca_is_memfree(dev)) {
__be32 doorbell[2]; mthca_write64(MTHCA_EQ_DB_DISARM_CQ | eqn, cqn,
doorbell[0] = cpu_to_be32(MTHCA_EQ_DB_DISARM_CQ | eqn);
doorbell[1] = cpu_to_be32(cqn);
mthca_write64(doorbell,
dev->kar + MTHCA_EQ_DOORBELL, dev->kar + MTHCA_EQ_DOORBELL,
MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
} }
......
...@@ -1799,15 +1799,11 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, ...@@ -1799,15 +1799,11 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
out: out:
if (likely(nreq)) { if (likely(nreq)) {
__be32 doorbell[2];
doorbell[0] = cpu_to_be32(((qp->sq.next_ind << qp->sq.wqe_shift) +
qp->send_wqe_offset) | f0 | op0);
doorbell[1] = cpu_to_be32((qp->qpn << 8) | size0);
wmb(); wmb();
mthca_write64(doorbell, mthca_write64(((qp->sq.next_ind << qp->sq.wqe_shift) +
qp->send_wqe_offset) | f0 | op0,
(qp->qpn << 8) | size0,
dev->kar + MTHCA_SEND_DOORBELL, dev->kar + MTHCA_SEND_DOORBELL,
MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
/* /*
...@@ -1829,7 +1825,6 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, ...@@ -1829,7 +1825,6 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
{ {
struct mthca_dev *dev = to_mdev(ibqp->device); struct mthca_dev *dev = to_mdev(ibqp->device);
struct mthca_qp *qp = to_mqp(ibqp); struct mthca_qp *qp = to_mqp(ibqp);
__be32 doorbell[2];
unsigned long flags; unsigned long flags;
int err = 0; int err = 0;
int nreq; int nreq;
...@@ -1907,13 +1902,10 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, ...@@ -1907,13 +1902,10 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
if (unlikely(nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB)) { if (unlikely(nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB)) {
nreq = 0; nreq = 0;
doorbell[0] = cpu_to_be32((qp->rq.next_ind << qp->rq.wqe_shift) | size0);
doorbell[1] = cpu_to_be32(qp->qpn << 8);
wmb(); wmb();
mthca_write64(doorbell, mthca_write64((qp->rq.next_ind << qp->rq.wqe_shift) | size0,
dev->kar + MTHCA_RECEIVE_DOORBELL, qp->qpn << 8, dev->kar + MTHCA_RECEIVE_DOORBELL,
MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
qp->rq.next_ind = ind; qp->rq.next_ind = ind;
...@@ -1923,13 +1915,10 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, ...@@ -1923,13 +1915,10 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
out: out:
if (likely(nreq)) { if (likely(nreq)) {
doorbell[0] = cpu_to_be32((qp->rq.next_ind << qp->rq.wqe_shift) | size0);
doorbell[1] = cpu_to_be32((qp->qpn << 8) | nreq);
wmb(); wmb();
mthca_write64(doorbell, mthca_write64((qp->rq.next_ind << qp->rq.wqe_shift) | size0,
dev->kar + MTHCA_RECEIVE_DOORBELL, qp->qpn << 8 | nreq, dev->kar + MTHCA_RECEIVE_DOORBELL,
MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
} }
...@@ -1951,7 +1940,7 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, ...@@ -1951,7 +1940,7 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
{ {
struct mthca_dev *dev = to_mdev(ibqp->device); struct mthca_dev *dev = to_mdev(ibqp->device);
struct mthca_qp *qp = to_mqp(ibqp); struct mthca_qp *qp = to_mqp(ibqp);
__be32 doorbell[2]; u32 dbhi;
void *wqe; void *wqe;
void *prev_wqe; void *prev_wqe;
unsigned long flags; unsigned long flags;
...@@ -1981,10 +1970,8 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, ...@@ -1981,10 +1970,8 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
if (unlikely(nreq == MTHCA_ARBEL_MAX_WQES_PER_SEND_DB)) { if (unlikely(nreq == MTHCA_ARBEL_MAX_WQES_PER_SEND_DB)) {
nreq = 0; nreq = 0;
doorbell[0] = cpu_to_be32((MTHCA_ARBEL_MAX_WQES_PER_SEND_DB << 24) | dbhi = (MTHCA_ARBEL_MAX_WQES_PER_SEND_DB << 24) |
((qp->sq.head & 0xffff) << 8) | ((qp->sq.head & 0xffff) << 8) | f0 | op0;
f0 | op0);
doorbell[1] = cpu_to_be32((qp->qpn << 8) | size0);
qp->sq.head += MTHCA_ARBEL_MAX_WQES_PER_SEND_DB; qp->sq.head += MTHCA_ARBEL_MAX_WQES_PER_SEND_DB;
...@@ -2000,7 +1987,8 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, ...@@ -2000,7 +1987,8 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
* write MMIO send doorbell. * write MMIO send doorbell.
*/ */
wmb(); wmb();
mthca_write64(doorbell,
mthca_write64(dbhi, (qp->qpn << 8) | size0,
dev->kar + MTHCA_SEND_DOORBELL, dev->kar + MTHCA_SEND_DOORBELL,
MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
} }
...@@ -2154,10 +2142,7 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, ...@@ -2154,10 +2142,7 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
out: out:
if (likely(nreq)) { if (likely(nreq)) {
doorbell[0] = cpu_to_be32((nreq << 24) | dbhi = (nreq << 24) | ((qp->sq.head & 0xffff) << 8) | f0 | op0;
((qp->sq.head & 0xffff) << 8) |
f0 | op0);
doorbell[1] = cpu_to_be32((qp->qpn << 8) | size0);
qp->sq.head += nreq; qp->sq.head += nreq;
...@@ -2173,8 +2158,8 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, ...@@ -2173,8 +2158,8 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
* write MMIO send doorbell. * write MMIO send doorbell.
*/ */
wmb(); wmb();
mthca_write64(doorbell,
dev->kar + MTHCA_SEND_DOORBELL, mthca_write64(dbhi, (qp->qpn << 8) | size0, dev->kar + MTHCA_SEND_DOORBELL,
MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
} }
......
...@@ -491,7 +491,6 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, ...@@ -491,7 +491,6 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
{ {
struct mthca_dev *dev = to_mdev(ibsrq->device); struct mthca_dev *dev = to_mdev(ibsrq->device);
struct mthca_srq *srq = to_msrq(ibsrq); struct mthca_srq *srq = to_msrq(ibsrq);
__be32 doorbell[2];
unsigned long flags; unsigned long flags;
int err = 0; int err = 0;
int first_ind; int first_ind;
...@@ -563,16 +562,13 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, ...@@ -563,16 +562,13 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
if (unlikely(nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB)) { if (unlikely(nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB)) {
nreq = 0; nreq = 0;
doorbell[0] = cpu_to_be32(first_ind << srq->wqe_shift);
doorbell[1] = cpu_to_be32(srq->srqn << 8);
/* /*
* Make sure that descriptors are written * Make sure that descriptors are written
* before doorbell is rung. * before doorbell is rung.
*/ */
wmb(); wmb();
mthca_write64(doorbell, mthca_write64(first_ind << srq->wqe_shift, srq->srqn << 8,
dev->kar + MTHCA_RECEIVE_DOORBELL, dev->kar + MTHCA_RECEIVE_DOORBELL,
MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
...@@ -581,16 +577,13 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, ...@@ -581,16 +577,13 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
} }
if (likely(nreq)) { if (likely(nreq)) {
doorbell[0] = cpu_to_be32(first_ind << srq->wqe_shift);
doorbell[1] = cpu_to_be32((srq->srqn << 8) | nreq);
/* /*
* Make sure that descriptors are written before * Make sure that descriptors are written before
* doorbell is rung. * doorbell is rung.
*/ */
wmb(); wmb();
mthca_write64(doorbell, mthca_write64(first_ind << srq->wqe_shift, (srq->srqn << 8) | nreq,
dev->kar + MTHCA_RECEIVE_DOORBELL, dev->kar + MTHCA_RECEIVE_DOORBELL,
MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
} }
......
...@@ -84,9 +84,8 @@ enum { ...@@ -84,9 +84,8 @@ enum {
IPOIB_MCAST_RUN = 6, IPOIB_MCAST_RUN = 6,
IPOIB_STOP_REAPER = 7, IPOIB_STOP_REAPER = 7,
IPOIB_MCAST_STARTED = 8, IPOIB_MCAST_STARTED = 8,
IPOIB_FLAG_NETIF_STOPPED = 9, IPOIB_FLAG_ADMIN_CM = 9,
IPOIB_FLAG_ADMIN_CM = 10, IPOIB_FLAG_UMCAST = 10,
IPOIB_FLAG_UMCAST = 11,
IPOIB_MAX_BACKOFF_SECONDS = 16, IPOIB_MAX_BACKOFF_SECONDS = 16,
...@@ -98,9 +97,9 @@ enum { ...@@ -98,9 +97,9 @@ enum {
#define IPOIB_OP_RECV (1ul << 31) #define IPOIB_OP_RECV (1ul << 31)
#ifdef CONFIG_INFINIBAND_IPOIB_CM #ifdef CONFIG_INFINIBAND_IPOIB_CM
#define IPOIB_CM_OP_SRQ (1ul << 30) #define IPOIB_OP_CM (1ul << 30)
#else #else
#define IPOIB_CM_OP_SRQ (0) #define IPOIB_OP_CM (0)
#endif #endif
/* structs */ /* structs */
...@@ -197,7 +196,6 @@ struct ipoib_cm_rx { ...@@ -197,7 +196,6 @@ struct ipoib_cm_rx {
struct ipoib_cm_tx { struct ipoib_cm_tx {
struct ib_cm_id *id; struct ib_cm_id *id;
struct ib_cq *cq;
struct ib_qp *qp; struct ib_qp *qp;
struct list_head list; struct list_head list;
struct net_device *dev; struct net_device *dev;
...@@ -294,6 +292,7 @@ struct ipoib_dev_priv { ...@@ -294,6 +292,7 @@ struct ipoib_dev_priv {
unsigned tx_tail; unsigned tx_tail;
struct ib_sge tx_sge; struct ib_sge tx_sge;
struct ib_send_wr tx_wr; struct ib_send_wr tx_wr;
unsigned tx_outstanding;
struct ib_wc ibwc[IPOIB_NUM_WC]; struct ib_wc ibwc[IPOIB_NUM_WC];
...@@ -504,6 +503,7 @@ void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx); ...@@ -504,6 +503,7 @@ void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx);
void ipoib_cm_skb_too_long(struct net_device* dev, struct sk_buff *skb, void ipoib_cm_skb_too_long(struct net_device* dev, struct sk_buff *skb,
unsigned int mtu); unsigned int mtu);
void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc); void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc);
void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc);
#else #else
struct ipoib_cm_tx; struct ipoib_cm_tx;
...@@ -592,6 +592,9 @@ static inline void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *w ...@@ -592,6 +592,9 @@ static inline void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *w
{ {
} }
static inline void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
{
}
#endif #endif
#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
......
...@@ -87,7 +87,7 @@ static int ipoib_cm_post_receive(struct net_device *dev, int id) ...@@ -87,7 +87,7 @@ static int ipoib_cm_post_receive(struct net_device *dev, int id)
struct ib_recv_wr *bad_wr; struct ib_recv_wr *bad_wr;
int i, ret; int i, ret;
priv->cm.rx_wr.wr_id = id | IPOIB_CM_OP_SRQ; priv->cm.rx_wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV;
for (i = 0; i < IPOIB_CM_RX_SG; ++i) for (i = 0; i < IPOIB_CM_RX_SG; ++i)
priv->cm.rx_sge[i].addr = priv->cm.srq_ring[id].mapping[i]; priv->cm.rx_sge[i].addr = priv->cm.srq_ring[id].mapping[i];
...@@ -401,7 +401,7 @@ static void skb_put_frags(struct sk_buff *skb, unsigned int hdr_space, ...@@ -401,7 +401,7 @@ static void skb_put_frags(struct sk_buff *skb, unsigned int hdr_space,
void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
{ {
struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_dev_priv *priv = netdev_priv(dev);
unsigned int wr_id = wc->wr_id & ~IPOIB_CM_OP_SRQ; unsigned int wr_id = wc->wr_id & ~(IPOIB_OP_CM | IPOIB_OP_RECV);
struct sk_buff *skb, *newskb; struct sk_buff *skb, *newskb;
struct ipoib_cm_rx *p; struct ipoib_cm_rx *p;
unsigned long flags; unsigned long flags;
...@@ -412,7 +412,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) ...@@ -412,7 +412,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
wr_id, wc->status); wr_id, wc->status);
if (unlikely(wr_id >= ipoib_recvq_size)) { if (unlikely(wr_id >= ipoib_recvq_size)) {
if (wr_id == (IPOIB_CM_RX_DRAIN_WRID & ~IPOIB_CM_OP_SRQ)) { if (wr_id == (IPOIB_CM_RX_DRAIN_WRID & ~(IPOIB_OP_CM | IPOIB_OP_RECV))) {
spin_lock_irqsave(&priv->lock, flags); spin_lock_irqsave(&priv->lock, flags);
list_splice_init(&priv->cm.rx_drain_list, &priv->cm.rx_reap_list); list_splice_init(&priv->cm.rx_drain_list, &priv->cm.rx_reap_list);
ipoib_cm_start_rx_drain(priv); ipoib_cm_start_rx_drain(priv);
...@@ -434,7 +434,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) ...@@ -434,7 +434,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
goto repost; goto repost;
} }
if (!likely(wr_id & IPOIB_CM_RX_UPDATE_MASK)) { if (unlikely(!(wr_id & IPOIB_CM_RX_UPDATE_MASK))) {
p = wc->qp->qp_context; p = wc->qp->qp_context;
if (p && time_after_eq(jiffies, p->jiffies + IPOIB_CM_RX_UPDATE_TIME)) { if (p && time_after_eq(jiffies, p->jiffies + IPOIB_CM_RX_UPDATE_TIME)) {
spin_lock_irqsave(&priv->lock, flags); spin_lock_irqsave(&priv->lock, flags);
...@@ -498,7 +498,7 @@ static inline int post_send(struct ipoib_dev_priv *priv, ...@@ -498,7 +498,7 @@ static inline int post_send(struct ipoib_dev_priv *priv,
priv->tx_sge.addr = addr; priv->tx_sge.addr = addr;
priv->tx_sge.length = len; priv->tx_sge.length = len;
priv->tx_wr.wr_id = wr_id; priv->tx_wr.wr_id = wr_id | IPOIB_OP_CM;
return ib_post_send(tx->qp, &priv->tx_wr, &bad_wr); return ib_post_send(tx->qp, &priv->tx_wr, &bad_wr);
} }
...@@ -549,20 +549,19 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_ ...@@ -549,20 +549,19 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_
dev->trans_start = jiffies; dev->trans_start = jiffies;
++tx->tx_head; ++tx->tx_head;
if (tx->tx_head - tx->tx_tail == ipoib_sendq_size) { if (++priv->tx_outstanding == ipoib_sendq_size) {
ipoib_dbg(priv, "TX ring 0x%x full, stopping kernel net queue\n", ipoib_dbg(priv, "TX ring 0x%x full, stopping kernel net queue\n",
tx->qp->qp_num); tx->qp->qp_num);
netif_stop_queue(dev); netif_stop_queue(dev);
set_bit(IPOIB_FLAG_NETIF_STOPPED, &tx->flags);
} }
} }
} }
static void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ipoib_cm_tx *tx, void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
struct ib_wc *wc)
{ {
struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_dev_priv *priv = netdev_priv(dev);
unsigned int wr_id = wc->wr_id; struct ipoib_cm_tx *tx = wc->qp->qp_context;
unsigned int wr_id = wc->wr_id & ~IPOIB_OP_CM;
struct ipoib_tx_buf *tx_req; struct ipoib_tx_buf *tx_req;
unsigned long flags; unsigned long flags;
...@@ -587,11 +586,10 @@ static void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ipoib_cm_tx *tx ...@@ -587,11 +586,10 @@ static void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ipoib_cm_tx *tx
spin_lock_irqsave(&priv->tx_lock, flags); spin_lock_irqsave(&priv->tx_lock, flags);
++tx->tx_tail; ++tx->tx_tail;
if (unlikely(test_bit(IPOIB_FLAG_NETIF_STOPPED, &tx->flags)) && if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) &&
tx->tx_head - tx->tx_tail <= ipoib_sendq_size >> 1) { netif_queue_stopped(dev) &&
clear_bit(IPOIB_FLAG_NETIF_STOPPED, &tx->flags); test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
netif_wake_queue(dev); netif_wake_queue(dev);
}
if (wc->status != IB_WC_SUCCESS && if (wc->status != IB_WC_SUCCESS &&
wc->status != IB_WC_WR_FLUSH_ERR) { wc->status != IB_WC_WR_FLUSH_ERR) {
...@@ -614,11 +612,6 @@ static void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ipoib_cm_tx *tx ...@@ -614,11 +612,6 @@ static void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ipoib_cm_tx *tx
tx->neigh = NULL; tx->neigh = NULL;
} }
/* queue would be re-started anyway when TX is destroyed,
* but it makes sense to do it ASAP here. */
if (test_and_clear_bit(IPOIB_FLAG_NETIF_STOPPED, &tx->flags))
netif_wake_queue(dev);
if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) { if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
list_move(&tx->list, &priv->cm.reap_list); list_move(&tx->list, &priv->cm.reap_list);
queue_work(ipoib_workqueue, &priv->cm.reap_task); queue_work(ipoib_workqueue, &priv->cm.reap_task);
...@@ -632,19 +625,6 @@ static void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ipoib_cm_tx *tx ...@@ -632,19 +625,6 @@ static void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ipoib_cm_tx *tx
spin_unlock_irqrestore(&priv->tx_lock, flags); spin_unlock_irqrestore(&priv->tx_lock, flags);
} }
static void ipoib_cm_tx_completion(struct ib_cq *cq, void *tx_ptr)
{
struct ipoib_cm_tx *tx = tx_ptr;
int n, i;
ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
do {
n = ib_poll_cq(cq, IPOIB_NUM_WC, tx->ibwc);
for (i = 0; i < n; ++i)
ipoib_cm_handle_tx_wc(tx->dev, tx, tx->ibwc + i);
} while (n == IPOIB_NUM_WC);
}
int ipoib_cm_dev_open(struct net_device *dev) int ipoib_cm_dev_open(struct net_device *dev)
{ {
struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_dev_priv *priv = netdev_priv(dev);
...@@ -807,17 +787,18 @@ static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even ...@@ -807,17 +787,18 @@ static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
return 0; return 0;
} }
static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ib_cq *cq) static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_cm_tx *tx)
{ {
struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ib_qp_init_attr attr = { struct ib_qp_init_attr attr = {
.send_cq = cq, .send_cq = priv->cq,
.recv_cq = priv->cq, .recv_cq = priv->cq,
.srq = priv->cm.srq, .srq = priv->cm.srq,
.cap.max_send_wr = ipoib_sendq_size, .cap.max_send_wr = ipoib_sendq_size,
.cap.max_send_sge = 1, .cap.max_send_sge = 1,
.sq_sig_type = IB_SIGNAL_ALL_WR, .sq_sig_type = IB_SIGNAL_ALL_WR,
.qp_type = IB_QPT_RC, .qp_type = IB_QPT_RC,
.qp_context = tx
}; };
return ib_create_qp(priv->pd, &attr); return ib_create_qp(priv->pd, &attr);
...@@ -899,21 +880,7 @@ static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn, ...@@ -899,21 +880,7 @@ static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn,
goto err_tx; goto err_tx;
} }
p->cq = ib_create_cq(priv->ca, ipoib_cm_tx_completion, NULL, p, p->qp = ipoib_cm_create_tx_qp(p->dev, p);
ipoib_sendq_size + 1, 0);
if (IS_ERR(p->cq)) {
ret = PTR_ERR(p->cq);
ipoib_warn(priv, "failed to allocate tx cq: %d\n", ret);
goto err_cq;
}
ret = ib_req_notify_cq(p->cq, IB_CQ_NEXT_COMP);
if (ret) {
ipoib_warn(priv, "failed to request completion notification: %d\n", ret);
goto err_req_notify;
}
p->qp = ipoib_cm_create_tx_qp(p->dev, p->cq);
if (IS_ERR(p->qp)) { if (IS_ERR(p->qp)) {
ret = PTR_ERR(p->qp); ret = PTR_ERR(p->qp);
ipoib_warn(priv, "failed to allocate tx qp: %d\n", ret); ipoib_warn(priv, "failed to allocate tx qp: %d\n", ret);
...@@ -950,12 +917,8 @@ static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn, ...@@ -950,12 +917,8 @@ static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn,
err_id: err_id:
p->id = NULL; p->id = NULL;
ib_destroy_qp(p->qp); ib_destroy_qp(p->qp);
err_req_notify:
err_qp: err_qp:
p->qp = NULL; p->qp = NULL;
ib_destroy_cq(p->cq);
err_cq:
p->cq = NULL;
err_tx: err_tx:
return ret; return ret;
} }
...@@ -964,6 +927,8 @@ static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p) ...@@ -964,6 +927,8 @@ static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p)
{ {
struct ipoib_dev_priv *priv = netdev_priv(p->dev); struct ipoib_dev_priv *priv = netdev_priv(p->dev);
struct ipoib_tx_buf *tx_req; struct ipoib_tx_buf *tx_req;
unsigned long flags;
unsigned long begin;
ipoib_dbg(priv, "Destroy active connection 0x%x head 0x%x tail 0x%x\n", ipoib_dbg(priv, "Destroy active connection 0x%x head 0x%x tail 0x%x\n",
p->qp ? p->qp->qp_num : 0, p->tx_head, p->tx_tail); p->qp ? p->qp->qp_num : 0, p->tx_head, p->tx_tail);
...@@ -971,27 +936,40 @@ static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p) ...@@ -971,27 +936,40 @@ static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p)
if (p->id) if (p->id)
ib_destroy_cm_id(p->id); ib_destroy_cm_id(p->id);
if (p->qp) if (p->tx_ring) {
ib_destroy_qp(p->qp); /* Wait for all sends to complete */
begin = jiffies;
while ((int) p->tx_tail - (int) p->tx_head < 0) {
if (time_after(jiffies, begin + 5 * HZ)) {
ipoib_warn(priv, "timing out; %d sends not completed\n",
p->tx_head - p->tx_tail);
goto timeout;
}
if (p->cq) msleep(1);
ib_destroy_cq(p->cq); }
}
if (test_bit(IPOIB_FLAG_NETIF_STOPPED, &p->flags)) timeout:
netif_wake_queue(p->dev);
if (p->tx_ring) {
while ((int) p->tx_tail - (int) p->tx_head < 0) { while ((int) p->tx_tail - (int) p->tx_head < 0) {
tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)]; tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)];
ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len, ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len,
DMA_TO_DEVICE); DMA_TO_DEVICE);
dev_kfree_skb_any(tx_req->skb); dev_kfree_skb_any(tx_req->skb);
++p->tx_tail; ++p->tx_tail;
spin_lock_irqsave(&priv->tx_lock, flags);
if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) &&
netif_queue_stopped(p->dev) &&
test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
netif_wake_queue(p->dev);
spin_unlock_irqrestore(&priv->tx_lock, flags);
} }
kfree(p->tx_ring); if (p->qp)
} ib_destroy_qp(p->qp);
kfree(p->tx_ring);
kfree(p); kfree(p);
} }
......
...@@ -267,11 +267,10 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) ...@@ -267,11 +267,10 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
spin_lock_irqsave(&priv->tx_lock, flags); spin_lock_irqsave(&priv->tx_lock, flags);
++priv->tx_tail; ++priv->tx_tail;
if (unlikely(test_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags)) && if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) &&
priv->tx_head - priv->tx_tail <= ipoib_sendq_size >> 1) { netif_queue_stopped(dev) &&
clear_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags); test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
netif_wake_queue(dev); netif_wake_queue(dev);
}
spin_unlock_irqrestore(&priv->tx_lock, flags); spin_unlock_irqrestore(&priv->tx_lock, flags);
if (wc->status != IB_WC_SUCCESS && if (wc->status != IB_WC_SUCCESS &&
...@@ -301,15 +300,19 @@ int ipoib_poll(struct napi_struct *napi, int budget) ...@@ -301,15 +300,19 @@ int ipoib_poll(struct napi_struct *napi, int budget)
for (i = 0; i < n; i++) { for (i = 0; i < n; i++) {
struct ib_wc *wc = priv->ibwc + i; struct ib_wc *wc = priv->ibwc + i;
if (wc->wr_id & IPOIB_CM_OP_SRQ) { if (wc->wr_id & IPOIB_OP_RECV) {
++done; ++done;
if (wc->wr_id & IPOIB_OP_CM)
ipoib_cm_handle_rx_wc(dev, wc); ipoib_cm_handle_rx_wc(dev, wc);
} else if (wc->wr_id & IPOIB_OP_RECV) { else
++done;
ipoib_ib_handle_rx_wc(dev, wc); ipoib_ib_handle_rx_wc(dev, wc);
} else } else {
if (wc->wr_id & IPOIB_OP_CM)
ipoib_cm_handle_tx_wc(dev, wc);
else
ipoib_ib_handle_tx_wc(dev, wc); ipoib_ib_handle_tx_wc(dev, wc);
} }
}
if (n != t) if (n != t)
break; break;
...@@ -401,10 +404,9 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, ...@@ -401,10 +404,9 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
address->last_send = priv->tx_head; address->last_send = priv->tx_head;
++priv->tx_head; ++priv->tx_head;
if (priv->tx_head - priv->tx_tail == ipoib_sendq_size) { if (++priv->tx_outstanding == ipoib_sendq_size) {
ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n"); ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n");
netif_stop_queue(dev); netif_stop_queue(dev);
set_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags);
} }
} }
} }
...@@ -436,7 +438,8 @@ void ipoib_reap_ah(struct work_struct *work) ...@@ -436,7 +438,8 @@ void ipoib_reap_ah(struct work_struct *work)
__ipoib_reap_ah(dev); __ipoib_reap_ah(dev);
if (!test_bit(IPOIB_STOP_REAPER, &priv->flags)) if (!test_bit(IPOIB_STOP_REAPER, &priv->flags))
queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task, HZ); queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task,
round_jiffies_relative(HZ));
} }
int ipoib_ib_dev_open(struct net_device *dev) int ipoib_ib_dev_open(struct net_device *dev)
...@@ -472,7 +475,8 @@ int ipoib_ib_dev_open(struct net_device *dev) ...@@ -472,7 +475,8 @@ int ipoib_ib_dev_open(struct net_device *dev)
} }
clear_bit(IPOIB_STOP_REAPER, &priv->flags); clear_bit(IPOIB_STOP_REAPER, &priv->flags);
queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task, HZ); queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task,
round_jiffies_relative(HZ));
set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags); set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
...@@ -561,13 +565,18 @@ void ipoib_drain_cq(struct net_device *dev) ...@@ -561,13 +565,18 @@ void ipoib_drain_cq(struct net_device *dev)
if (priv->ibwc[i].status == IB_WC_SUCCESS) if (priv->ibwc[i].status == IB_WC_SUCCESS)
priv->ibwc[i].status = IB_WC_WR_FLUSH_ERR; priv->ibwc[i].status = IB_WC_WR_FLUSH_ERR;
if (priv->ibwc[i].wr_id & IPOIB_CM_OP_SRQ) if (priv->ibwc[i].wr_id & IPOIB_OP_RECV) {
if (priv->ibwc[i].wr_id & IPOIB_OP_CM)
ipoib_cm_handle_rx_wc(dev, priv->ibwc + i); ipoib_cm_handle_rx_wc(dev, priv->ibwc + i);
else if (priv->ibwc[i].wr_id & IPOIB_OP_RECV) else
ipoib_ib_handle_rx_wc(dev, priv->ibwc + i); ipoib_ib_handle_rx_wc(dev, priv->ibwc + i);
} else {
if (priv->ibwc[i].wr_id & IPOIB_OP_CM)
ipoib_cm_handle_tx_wc(dev, priv->ibwc + i);
else else
ipoib_ib_handle_tx_wc(dev, priv->ibwc + i); ipoib_ib_handle_tx_wc(dev, priv->ibwc + i);
} }
}
} while (n == IPOIB_NUM_WC); } while (n == IPOIB_NUM_WC);
} }
...@@ -612,6 +621,7 @@ int ipoib_ib_dev_stop(struct net_device *dev, int flush) ...@@ -612,6 +621,7 @@ int ipoib_ib_dev_stop(struct net_device *dev, int flush)
DMA_TO_DEVICE); DMA_TO_DEVICE);
dev_kfree_skb_any(tx_req->skb); dev_kfree_skb_any(tx_req->skb);
++priv->tx_tail; ++priv->tx_tail;
--priv->tx_outstanding;
} }
for (i = 0; i < ipoib_recvq_size; ++i) { for (i = 0; i < ipoib_recvq_size; ++i) {
......
...@@ -148,8 +148,6 @@ static int ipoib_stop(struct net_device *dev) ...@@ -148,8 +148,6 @@ static int ipoib_stop(struct net_device *dev)
netif_stop_queue(dev); netif_stop_queue(dev);
clear_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags);
/* /*
* Now flush workqueue to make sure a scheduled task doesn't * Now flush workqueue to make sure a scheduled task doesn't
* bring our internal state back up. * bring our internal state back up.
...@@ -902,7 +900,7 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) ...@@ -902,7 +900,7 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
goto out_rx_ring_cleanup; goto out_rx_ring_cleanup;
} }
/* priv->tx_head & tx_tail are already 0 */ /* priv->tx_head, tx_tail & tx_outstanding are already 0 */
if (ipoib_ib_dev_init(dev, ca, port)) if (ipoib_ib_dev_init(dev, ca, port))
goto out_tx_ring_cleanup; goto out_tx_ring_cleanup;
......
...@@ -736,7 +736,7 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param) ...@@ -736,7 +736,7 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
MLX4_PUT(inbox, (u8) (PAGE_SHIFT - 12), INIT_HCA_UAR_PAGE_SZ_OFFSET); MLX4_PUT(inbox, (u8) (PAGE_SHIFT - 12), INIT_HCA_UAR_PAGE_SZ_OFFSET);
MLX4_PUT(inbox, param->log_uar_sz, INIT_HCA_LOG_UAR_SZ_OFFSET); MLX4_PUT(inbox, param->log_uar_sz, INIT_HCA_LOG_UAR_SZ_OFFSET);
err = mlx4_cmd(dev, mailbox->dma, 0, 0, MLX4_CMD_INIT_HCA, 1000); err = mlx4_cmd(dev, mailbox->dma, 0, 0, MLX4_CMD_INIT_HCA, 10000);
if (err) if (err)
mlx4_err(dev, "INIT_HCA returns %d\n", err); mlx4_err(dev, "INIT_HCA returns %d\n", err);
......
...@@ -52,11 +52,6 @@ ...@@ -52,11 +52,6 @@
#define MLX4_INIT_DOORBELL_LOCK(ptr) do { } while (0) #define MLX4_INIT_DOORBELL_LOCK(ptr) do { } while (0)
#define MLX4_GET_DOORBELL_LOCK(ptr) (NULL) #define MLX4_GET_DOORBELL_LOCK(ptr) (NULL)
static inline void mlx4_write64_raw(__be64 val, void __iomem *dest)
{
__raw_writeq((__force u64) val, dest);
}
static inline void mlx4_write64(__be32 val[2], void __iomem *dest, static inline void mlx4_write64(__be32 val[2], void __iomem *dest,
spinlock_t *doorbell_lock) spinlock_t *doorbell_lock)
{ {
...@@ -75,12 +70,6 @@ static inline void mlx4_write64(__be32 val[2], void __iomem *dest, ...@@ -75,12 +70,6 @@ static inline void mlx4_write64(__be32 val[2], void __iomem *dest,
#define MLX4_INIT_DOORBELL_LOCK(ptr) spin_lock_init(ptr) #define MLX4_INIT_DOORBELL_LOCK(ptr) spin_lock_init(ptr)
#define MLX4_GET_DOORBELL_LOCK(ptr) (ptr) #define MLX4_GET_DOORBELL_LOCK(ptr) (ptr)
static inline void mlx4_write64_raw(__be64 val, void __iomem *dest)
{
__raw_writel(((__force u32 *) &val)[0], dest);
__raw_writel(((__force u32 *) &val)[1], dest + 4);
}
static inline void mlx4_write64(__be32 val[2], void __iomem *dest, static inline void mlx4_write64(__be32 val[2], void __iomem *dest,
spinlock_t *doorbell_lock) spinlock_t *doorbell_lock)
{ {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment