Commit 14f369d1 authored by Roland Dreier's avatar Roland Dreier

Merge branches 'amso1100', 'cma', 'cxgb3', 'ehca', 'ipath', 'ipoib', 'iser',...

Merge branches 'amso1100', 'cma', 'cxgb3', 'ehca', 'ipath', 'ipoib', 'iser', 'misc', 'mlx4' and 'nes' into for-next
......@@ -36,11 +36,11 @@ Datagram vs Connected modes
fabric with a 2K MTU, the IPoIB MTU will be 2048 - 4 = 2044 bytes.
In connected mode, the IB RC (Reliable Connected) transport is used.
Connected mode is to takes advantage of the connected nature of the
IB transport and allows an MTU up to the maximal IP packet size of
64K, which reduces the number of IP packets needed for handling
large UDP datagrams, TCP segments, etc and increases the performance
for large messages.
Connected mode takes advantage of the connected nature of the IB
transport and allows an MTU up to the maximal IP packet size of 64K,
which reduces the number of IP packets needed for handling large UDP
datagrams, TCP segments, etc and increases the performance for large
messages.
In connected mode, the interface's UD QP is still used for multicast
and communication with peers that don't support connected mode. In
......
This diff is collapsed.
......@@ -330,17 +330,7 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv)
union ib_gid gid;
int ret = -ENODEV;
switch (rdma_node_get_transport(dev_addr->dev_type)) {
case RDMA_TRANSPORT_IB:
ib_addr_get_sgid(dev_addr, &gid);
break;
case RDMA_TRANSPORT_IWARP:
iw_addr_get_sgid(dev_addr, &gid);
break;
default:
return -ENODEV;
}
rdma_addr_get_sgid(dev_addr, &gid);
list_for_each_entry(cma_dev, &dev_list, list) {
ret = ib_find_cached_gid(cma_dev->device, &gid,
&id_priv->id.port_num, NULL);
......@@ -1032,11 +1022,17 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
if (rt->num_paths == 2)
rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
ib_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr,
&id->route.addr.dev_addr);
if (ret)
goto destroy_id;
if (cma_any_addr((struct sockaddr *) &rt->addr.src_addr)) {
rt->addr.dev_addr.dev_type = ARPHRD_INFINIBAND;
rdma_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid);
ib_addr_set_pkey(&rt->addr.dev_addr, rt->path_rec[0].pkey);
} else {
ret = rdma_translate_ip((struct sockaddr *) &rt->addr.src_addr,
&rt->addr.dev_addr);
if (ret)
goto destroy_id;
}
rdma_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
id_priv = container_of(id, struct rdma_id_private, id);
id_priv->state = CMA_CONNECT;
......@@ -1071,10 +1067,12 @@ static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
cma_save_net_info(&id->route.addr, &listen_id->route.addr,
ip_ver, port, src, dst);
ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr,
&id->route.addr.dev_addr);
if (ret)
goto err;
if (!cma_any_addr((struct sockaddr *) &id->route.addr.src_addr)) {
ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr,
&id->route.addr.dev_addr);
if (ret)
goto err;
}
id_priv = container_of(id, struct rdma_id_private, id);
id_priv->state = CMA_CONNECT;
......@@ -1474,15 +1472,6 @@ static void cma_listen_on_all(struct rdma_id_private *id_priv)
mutex_unlock(&lock);
}
static int cma_bind_any(struct rdma_cm_id *id, sa_family_t af)
{
struct sockaddr_storage addr_in;
memset(&addr_in, 0, sizeof addr_in);
addr_in.ss_family = af;
return rdma_bind_addr(id, (struct sockaddr *) &addr_in);
}
int rdma_listen(struct rdma_cm_id *id, int backlog)
{
struct rdma_id_private *id_priv;
......@@ -1490,7 +1479,8 @@ int rdma_listen(struct rdma_cm_id *id, int backlog)
id_priv = container_of(id, struct rdma_id_private, id);
if (id_priv->state == CMA_IDLE) {
ret = cma_bind_any(id, AF_INET);
((struct sockaddr *) &id->route.addr.src_addr)->sa_family = AF_INET;
ret = rdma_bind_addr(id, (struct sockaddr *) &id->route.addr.src_addr);
if (ret)
return ret;
}
......@@ -1565,8 +1555,8 @@ static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
struct sockaddr_in6 *sin6;
memset(&path_rec, 0, sizeof path_rec);
ib_addr_get_sgid(&addr->dev_addr, &path_rec.sgid);
ib_addr_get_dgid(&addr->dev_addr, &path_rec.dgid);
rdma_addr_get_sgid(&addr->dev_addr, &path_rec.sgid);
rdma_addr_get_dgid(&addr->dev_addr, &path_rec.dgid);
path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(&addr->dev_addr));
path_rec.numb_path = 1;
path_rec.reversible = 1;
......@@ -1781,7 +1771,11 @@ static int cma_bind_loopback(struct rdma_id_private *id_priv)
if (ret)
goto out;
ib_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid);
id_priv->id.route.addr.dev_addr.dev_type =
(rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB) ?
ARPHRD_INFINIBAND : ARPHRD_ETHER;
rdma_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid);
ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey);
id_priv->id.port_num = p;
cma_attach_to_dev(id_priv, cma_dev);
......@@ -1839,7 +1833,7 @@ static void addr_handler(int status, struct sockaddr *src_addr,
static int cma_resolve_loopback(struct rdma_id_private *id_priv)
{
struct cma_work *work;
struct sockaddr_in *src_in, *dst_in;
struct sockaddr *src, *dst;
union ib_gid gid;
int ret;
......@@ -1853,14 +1847,19 @@ static int cma_resolve_loopback(struct rdma_id_private *id_priv)
goto err;
}
ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
ib_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid);
rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid);
if (cma_zero_addr((struct sockaddr *) &id_priv->id.route.addr.src_addr)) {
src_in = (struct sockaddr_in *)&id_priv->id.route.addr.src_addr;
dst_in = (struct sockaddr_in *)&id_priv->id.route.addr.dst_addr;
src_in->sin_family = dst_in->sin_family;
src_in->sin_addr.s_addr = dst_in->sin_addr.s_addr;
src = (struct sockaddr *) &id_priv->id.route.addr.src_addr;
if (cma_zero_addr(src)) {
dst = (struct sockaddr *) &id_priv->id.route.addr.dst_addr;
if ((src->sa_family = dst->sa_family) == AF_INET) {
((struct sockaddr_in *) src)->sin_addr.s_addr =
((struct sockaddr_in *) dst)->sin_addr.s_addr;
} else {
ipv6_addr_copy(&((struct sockaddr_in6 *) src)->sin6_addr,
&((struct sockaddr_in6 *) dst)->sin6_addr);
}
}
work->id = id_priv;
......@@ -1878,10 +1877,14 @@ static int cma_resolve_loopback(struct rdma_id_private *id_priv)
static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
struct sockaddr *dst_addr)
{
if (src_addr && src_addr->sa_family)
return rdma_bind_addr(id, src_addr);
else
return cma_bind_any(id, dst_addr->sa_family);
if (!src_addr || !src_addr->sa_family) {
src_addr = (struct sockaddr *) &id->route.addr.src_addr;
if ((src_addr->sa_family = dst_addr->sa_family) == AF_INET6) {
((struct sockaddr_in6 *) src_addr)->sin6_scope_id =
((struct sockaddr_in6 *) dst_addr)->sin6_scope_id;
}
}
return rdma_bind_addr(id, src_addr);
}
int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
......@@ -2077,6 +2080,25 @@ static int cma_get_port(struct rdma_id_private *id_priv)
return ret;
}
static int cma_check_linklocal(struct rdma_dev_addr *dev_addr,
struct sockaddr *addr)
{
#if defined(CONFIG_IPv6) || defined(CONFIG_IPV6_MODULE)
struct sockaddr_in6 *sin6;
if (addr->sa_family != AF_INET6)
return 0;
sin6 = (struct sockaddr_in6 *) addr;
if ((ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) &&
!sin6->sin6_scope_id)
return -EINVAL;
dev_addr->bound_dev_if = sin6->sin6_scope_id;
#endif
return 0;
}
int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
{
struct rdma_id_private *id_priv;
......@@ -2089,7 +2111,13 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
if (!cma_comp_exch(id_priv, CMA_IDLE, CMA_ADDR_BOUND))
return -EINVAL;
if (!cma_any_addr(addr)) {
ret = cma_check_linklocal(&id->route.addr.dev_addr, addr);
if (ret)
goto err1;
if (cma_loopback_addr(addr)) {
ret = cma_bind_loopback(id_priv);
} else if (!cma_zero_addr(addr)) {
ret = rdma_translate_ip(addr, &id->route.addr.dev_addr);
if (ret)
goto err1;
......@@ -2108,7 +2136,7 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
return 0;
err2:
if (!cma_any_addr(addr)) {
if (id_priv->cma_dev) {
mutex_lock(&lock);
cma_detach_from_dev(id_priv);
mutex_unlock(&lock);
......@@ -2687,10 +2715,15 @@ static void cma_set_mgid(struct rdma_id_private *id_priv,
if (cma_any_addr(addr)) {
memset(mgid, 0, sizeof *mgid);
} else if ((addr->sa_family == AF_INET6) &&
((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFF10A01B) ==
((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFFF0FFFF) ==
0xFF10A01B)) {
/* IPv6 address is an SA assigned MGID. */
memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
} else if ((addr->sa_family == AF_INET6)) {
ipv6_ib_mc_map(&sin6->sin6_addr, dev_addr->broadcast, mc_map);
if (id_priv->id.ps == RDMA_PS_UDP)
mc_map[7] = 0x01; /* Use RDMA CM signature */
*mgid = *(union ib_gid *) (mc_map + 4);
} else {
ip_ib_mc_map(sin->sin_addr.s_addr, dev_addr->broadcast, mc_map);
if (id_priv->id.ps == RDMA_PS_UDP)
......@@ -2716,7 +2749,7 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid);
if (id_priv->id.ps == RDMA_PS_UDP)
rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
ib_addr_get_sgid(dev_addr, &rec.port_gid);
rdma_addr_get_sgid(dev_addr, &rec.port_gid);
rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
rec.join_state = 1;
......@@ -2815,7 +2848,7 @@ static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id
dev_addr = &id_priv->id.route.addr.dev_addr;
if ((dev_addr->src_dev == ndev) &&
if ((dev_addr->bound_dev_if == ndev->ifindex) &&
memcmp(dev_addr->src_dev_addr, ndev->dev_addr, ndev->addr_len)) {
printk(KERN_INFO "RDMA CM addr change for ndev %s used by id %p\n",
ndev->name, &id_priv->id);
......
......@@ -604,6 +604,12 @@ static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask)
return ret ? ret : id;
}
void ib_sa_unpack_path(void *attribute, struct ib_sa_path_rec *rec)
{
ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), attribute, rec);
}
EXPORT_SYMBOL(ib_sa_unpack_path);
static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
int status,
struct ib_sa_mad *mad)
......
......@@ -43,6 +43,7 @@
#include <rdma/rdma_user_cm.h>
#include <rdma/ib_marshall.h>
#include <rdma/rdma_cm.h>
#include <rdma/rdma_cm_ib.h>
MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access");
......@@ -562,10 +563,10 @@ static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp *resp,
switch (route->num_paths) {
case 0:
dev_addr = &route->addr.dev_addr;
ib_addr_get_dgid(dev_addr,
(union ib_gid *) &resp->ib_route[0].dgid);
ib_addr_get_sgid(dev_addr,
(union ib_gid *) &resp->ib_route[0].sgid);
rdma_addr_get_dgid(dev_addr,
(union ib_gid *) &resp->ib_route[0].dgid);
rdma_addr_get_sgid(dev_addr,
(union ib_gid *) &resp->ib_route[0].sgid);
resp->ib_route[0].pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
break;
case 2:
......@@ -812,6 +813,51 @@ static int ucma_set_option_id(struct ucma_context *ctx, int optname,
return ret;
}
static int ucma_set_ib_path(struct ucma_context *ctx,
struct ib_path_rec_data *path_data, size_t optlen)
{
struct ib_sa_path_rec sa_path;
struct rdma_cm_event event;
int ret;
if (optlen % sizeof(*path_data))
return -EINVAL;
for (; optlen; optlen -= sizeof(*path_data), path_data++) {
if (path_data->flags == (IB_PATH_GMP | IB_PATH_PRIMARY |
IB_PATH_BIDIRECTIONAL))
break;
}
if (!optlen)
return -EINVAL;
ib_sa_unpack_path(path_data->path_rec, &sa_path);
ret = rdma_set_ib_paths(ctx->cm_id, &sa_path, 1);
if (ret)
return ret;
memset(&event, 0, sizeof event);
event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
return ucma_event_handler(ctx->cm_id, &event);
}
static int ucma_set_option_ib(struct ucma_context *ctx, int optname,
void *optval, size_t optlen)
{
int ret;
switch (optname) {
case RDMA_OPTION_IB_PATH:
ret = ucma_set_ib_path(ctx, optval, optlen);
break;
default:
ret = -ENOSYS;
}
return ret;
}
static int ucma_set_option_level(struct ucma_context *ctx, int level,
int optname, void *optval, size_t optlen)
{
......@@ -821,6 +867,9 @@ static int ucma_set_option_level(struct ucma_context *ctx, int level,
case RDMA_OPTION_ID:
ret = ucma_set_option_id(ctx, optname, optval, optlen);
break;
case RDMA_OPTION_IB:
ret = ucma_set_option_ib(ctx, optname, optval, optlen);
break;
default:
ret = -ENOSYS;
}
......
......@@ -285,7 +285,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
ucontext = ibdev->alloc_ucontext(ibdev, &udata);
if (IS_ERR(ucontext)) {
ret = PTR_ERR(file->ucontext);
ret = PTR_ERR(ucontext);
goto err;
}
......
......@@ -365,18 +365,19 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
spin_lock_irqsave(&qhp->lock, flag);
if (qhp->attr.state > IWCH_QP_STATE_RTS) {
spin_unlock_irqrestore(&qhp->lock, flag);
return -EINVAL;
err = -EINVAL;
goto out;
}
num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr,
qhp->wq.sq_size_log2);
if (num_wrs <= 0) {
spin_unlock_irqrestore(&qhp->lock, flag);
return -ENOMEM;
err = -ENOMEM;
goto out;
}
while (wr) {
if (num_wrs == 0) {
err = -ENOMEM;
*bad_wr = wr;
break;
}
idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
......@@ -428,10 +429,8 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
wr->opcode);
err = -EINVAL;
}
if (err) {
*bad_wr = wr;
if (err)
break;
}
wqe->send.wrid.id0.hi = qhp->wq.sq_wptr;
sqp->wr_id = wr->wr_id;
sqp->opcode = wr2opcode(t3_wr_opcode);
......@@ -454,6 +453,10 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
}
spin_unlock_irqrestore(&qhp->lock, flag);
ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
out:
if (err)
*bad_wr = wr;
return err;
}
......@@ -471,18 +474,19 @@ int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
spin_lock_irqsave(&qhp->lock, flag);
if (qhp->attr.state > IWCH_QP_STATE_RTS) {
spin_unlock_irqrestore(&qhp->lock, flag);
return -EINVAL;
err = -EINVAL;
goto out;
}
num_wrs = Q_FREECNT(qhp->wq.rq_rptr, qhp->wq.rq_wptr,
qhp->wq.rq_size_log2) - 1;
if (!wr) {
spin_unlock_irqrestore(&qhp->lock, flag);
return -EINVAL;
err = -ENOMEM;
goto out;
}
while (wr) {
if (wr->num_sge > T3_MAX_SGE) {
err = -EINVAL;
*bad_wr = wr;
break;
}
idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
......@@ -494,10 +498,10 @@ int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
err = build_zero_stag_recv(qhp, wqe, wr);
else
err = -ENOMEM;
if (err) {
*bad_wr = wr;
if (err)
break;
}
build_fw_riwrh((void *) wqe, T3_WR_RCV, T3_COMPLETION_FLAG,
Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2),
0, sizeof(struct t3_receive_wr) >> 3, T3_SOPEOP);
......@@ -511,6 +515,10 @@ int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
}
spin_unlock_irqrestore(&qhp->lock, flag);
ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
out:
if (err)
*bad_wr = wr;
return err;
}
......
......@@ -375,6 +375,7 @@ extern rwlock_t ehca_qp_idr_lock;
extern rwlock_t ehca_cq_idr_lock;
extern struct idr ehca_qp_idr;
extern struct idr ehca_cq_idr;
extern spinlock_t shca_list_lock;
extern int ehca_static_rate;
extern int ehca_port_act_time;
......
......@@ -169,12 +169,15 @@ int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq)
unsigned long flags;
u64 h_ret;
spin_lock_irqsave(&eq->spinlock, flags);
ibmebus_free_irq(eq->ist, (void *)shca);
h_ret = hipz_h_destroy_eq(shca->ipz_hca_handle, eq);
spin_lock_irqsave(&shca_list_lock, flags);
eq->is_initialized = 0;
spin_unlock_irqrestore(&shca_list_lock, flags);
spin_unlock_irqrestore(&eq->spinlock, flags);
tasklet_kill(&eq->interrupt_task);
h_ret = hipz_h_destroy_eq(shca->ipz_hca_handle, eq);
if (h_ret != H_SUCCESS) {
ehca_err(&shca->ib_device, "Can't free EQ resources.");
......
......@@ -123,7 +123,7 @@ DEFINE_IDR(ehca_qp_idr);
DEFINE_IDR(ehca_cq_idr);
static LIST_HEAD(shca_list); /* list of all registered ehcas */
static DEFINE_SPINLOCK(shca_list_lock);
DEFINE_SPINLOCK(shca_list_lock);
static struct timer_list poll_eqs_timer;
......
......@@ -400,7 +400,6 @@ static inline void map_ib_wc_status(u32 cqe_status,
static inline int post_one_send(struct ehca_qp *my_qp,
struct ib_send_wr *cur_send_wr,
struct ib_send_wr **bad_send_wr,
int hidden)
{
struct ehca_wqe *wqe_p;
......@@ -412,8 +411,6 @@ static inline int post_one_send(struct ehca_qp *my_qp,
wqe_p = ipz_qeit_get_inc(&my_qp->ipz_squeue);
if (unlikely(!wqe_p)) {
/* too many posted work requests: queue overflow */
if (bad_send_wr)
*bad_send_wr = cur_send_wr;
ehca_err(my_qp->ib_qp.device, "Too many posted WQEs "
"qp_num=%x", my_qp->ib_qp.qp_num);
return -ENOMEM;
......@@ -433,8 +430,6 @@ static inline int post_one_send(struct ehca_qp *my_qp,
*/
if (unlikely(ret)) {
my_qp->ipz_squeue.current_q_offset = start_offset;
if (bad_send_wr)
*bad_send_wr = cur_send_wr;
ehca_err(my_qp->ib_qp.device, "Could not write WQE "
"qp_num=%x", my_qp->ib_qp.qp_num);
return -EINVAL;
......@@ -448,7 +443,6 @@ int ehca_post_send(struct ib_qp *qp,
struct ib_send_wr **bad_send_wr)
{
struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
struct ib_send_wr *cur_send_wr;
int wqe_cnt = 0;
int ret = 0;
unsigned long flags;
......@@ -457,7 +451,8 @@ int ehca_post_send(struct ib_qp *qp,
if (unlikely(my_qp->state < IB_QPS_RTS)) {
ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x",
my_qp->state, qp->qp_num);
return -EINVAL;
ret = -EINVAL;
goto out;
}
/* LOCK the QUEUE */
......@@ -476,24 +471,21 @@ int ehca_post_send(struct ib_qp *qp,
struct ib_send_wr circ_wr;
memset(&circ_wr, 0, sizeof(circ_wr));
circ_wr.opcode = IB_WR_RDMA_READ;
post_one_send(my_qp, &circ_wr, NULL, 1); /* ignore retcode */
post_one_send(my_qp, &circ_wr, 1); /* ignore retcode */
wqe_cnt++;
ehca_dbg(qp->device, "posted circ wr qp_num=%x", qp->qp_num);
my_qp->message_count = my_qp->packet_count = 0;
}
/* loop processes list of send reqs */
for (cur_send_wr = send_wr; cur_send_wr != NULL;
cur_send_wr = cur_send_wr->next) {
ret = post_one_send(my_qp, cur_send_wr, bad_send_wr, 0);
while (send_wr) {
ret = post_one_send(my_qp, send_wr, 0);
if (unlikely(ret)) {
/* if one or more WQEs were successful, don't fail */
if (wqe_cnt)
ret = 0;
goto post_send_exit0;
}
wqe_cnt++;
} /* eof for cur_send_wr */
send_wr = send_wr->next;
}
post_send_exit0:
iosync(); /* serialize GAL register access */
......@@ -503,6 +495,10 @@ int ehca_post_send(struct ib_qp *qp,
my_qp, qp->qp_num, wqe_cnt, ret);
my_qp->message_count += wqe_cnt;
spin_unlock_irqrestore(&my_qp->spinlock_s, flags);
out:
if (ret)
*bad_send_wr = send_wr;
return ret;
}
......@@ -511,7 +507,6 @@ static int internal_post_recv(struct ehca_qp *my_qp,
struct ib_recv_wr *recv_wr,
struct ib_recv_wr **bad_recv_wr)
{
struct ib_recv_wr *cur_recv_wr;
struct ehca_wqe *wqe_p;
int wqe_cnt = 0;
int ret = 0;
......@@ -522,27 +517,23 @@ static int internal_post_recv(struct ehca_qp *my_qp,
if (unlikely(!HAS_RQ(my_qp))) {
ehca_err(dev, "QP has no RQ ehca_qp=%p qp_num=%x ext_type=%d",
my_qp, my_qp->real_qp_num, my_qp->ext_type);
return -ENODEV;
ret = -ENODEV;
goto out;
}
/* LOCK the QUEUE */
spin_lock_irqsave(&my_qp->spinlock_r, flags);
/* loop processes list of send reqs */
for (cur_recv_wr = recv_wr; cur_recv_wr != NULL;
cur_recv_wr = cur_recv_wr->next) {
/* loop processes list of recv reqs */
while (recv_wr) {
u64 start_offset = my_qp->ipz_rqueue.current_q_offset;
/* get pointer next to free WQE */
wqe_p = ipz_qeit_get_inc(&my_qp->ipz_rqueue);
if (unlikely(!wqe_p)) {
/* too many posted work requests: queue overflow */
if (bad_recv_wr)
*bad_recv_wr = cur_recv_wr;
if (wqe_cnt == 0) {
ret = -ENOMEM;
ehca_err(dev, "Too many posted WQEs "
"qp_num=%x", my_qp->real_qp_num);
}
ret = -ENOMEM;
ehca_err(dev, "Too many posted WQEs "
"qp_num=%x", my_qp->real_qp_num);
goto post_recv_exit0;
}
/*
......@@ -552,7 +543,7 @@ static int internal_post_recv(struct ehca_qp *my_qp,
rq_map_idx = start_offset / my_qp->ipz_rqueue.qe_size;
/* write a RECV WQE into the QUEUE */
ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, cur_recv_wr,
ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, recv_wr,
rq_map_idx);
/*
* if something failed,
......@@ -560,22 +551,20 @@ static int internal_post_recv(struct ehca_qp *my_qp,
*/
if (unlikely(ret)) {
my_qp->ipz_rqueue.current_q_offset = start_offset;
*bad_recv_wr = cur_recv_wr;
if (wqe_cnt == 0) {
ret = -EINVAL;
ehca_err(dev, "Could not write WQE "
"qp_num=%x", my_qp->real_qp_num);
}
ret = -EINVAL;
ehca_err(dev, "Could not write WQE "
"qp_num=%x", my_qp->real_qp_num);
goto post_recv_exit0;
}
qmap_entry = &my_qp->rq_map.map[rq_map_idx];
qmap_entry->app_wr_id = get_app_wr_id(cur_recv_wr->wr_id);
qmap_entry->app_wr_id = get_app_wr_id(recv_wr->wr_id);
qmap_entry->reported = 0;
qmap_entry->cqe_req = 1;
wqe_cnt++;
} /* eof for cur_recv_wr */
recv_wr = recv_wr->next;
} /* eof for recv_wr */
post_recv_exit0:
iosync(); /* serialize GAL register access */
......@@ -584,6 +573,11 @@ static int internal_post_recv(struct ehca_qp *my_qp,
ehca_dbg(dev, "ehca_qp=%p qp_num=%x wqe_cnt=%d ret=%i",
my_qp, my_qp->real_qp_num, wqe_cnt, ret);
spin_unlock_irqrestore(&my_qp->spinlock_r, flags);
out:
if (ret)
*bad_recv_wr = recv_wr;
return ret;
}
......@@ -597,6 +591,7 @@ int ehca_post_recv(struct ib_qp *qp,
if (unlikely(my_qp->state == IB_QPS_RESET)) {
ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x",
my_qp->state, qp->qp_num);
*bad_recv_wr = recv_wr;
return -EINVAL;
}
......
......@@ -39,6 +39,7 @@
#include <linux/delay.h>
#include <linux/netdevice.h>
#include <linux/vmalloc.h>
#include <linux/bitmap.h>
#include "ipath_kernel.h"
#include "ipath_verbs.h"
......@@ -1697,7 +1698,7 @@ void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start,
unsigned len, int avail)
{
unsigned long flags;
unsigned end, cnt = 0, next;
unsigned end, cnt = 0;
/* There are two bits per send buffer (busy and generation) */
start *= 2;
......@@ -1748,12 +1749,7 @@ void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start,
if (dd->ipath_pioupd_thresh) {
end = 2 * (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k);
next = find_first_bit(dd->ipath_pioavailkernel, end);
while (next < end) {
cnt++;
next = find_next_bit(dd->ipath_pioavailkernel, end,
next + 1);
}
cnt = bitmap_weight(dd->ipath_pioavailkernel, end);
}
spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
......
......@@ -103,7 +103,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
props->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE;
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM)
props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
if (dev->dev->caps.max_gso_sz)
if (dev->dev->caps.max_gso_sz && dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BLH)
props->device_cap_flags |= IB_DEVICE_UD_TSO;
if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_RESERVED_LKEY)
props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY;
......
......@@ -54,7 +54,8 @@ enum {
/*
* Largest possible UD header: send with GRH and immediate data.
*/
MLX4_IB_UD_HEADER_SIZE = 72
MLX4_IB_UD_HEADER_SIZE = 72,
MLX4_IB_LSO_HEADER_SPARE = 128,
};
struct mlx4_ib_sqp {
......@@ -67,7 +68,8 @@ struct mlx4_ib_sqp {
};
enum {
MLX4_IB_MIN_SQ_STRIDE = 6
MLX4_IB_MIN_SQ_STRIDE = 6,
MLX4_IB_CACHE_LINE_SIZE = 64,
};
static const __be32 mlx4_ib_opcode[] = {
......@@ -261,7 +263,7 @@ static int send_wqe_overhead(enum ib_qp_type type, u32 flags)
case IB_QPT_UD:
return sizeof (struct mlx4_wqe_ctrl_seg) +
sizeof (struct mlx4_wqe_datagram_seg) +
((flags & MLX4_IB_QP_LSO) ? 64 : 0);
((flags & MLX4_IB_QP_LSO) ? MLX4_IB_LSO_HEADER_SPARE : 0);
case IB_QPT_UC:
return sizeof (struct mlx4_wqe_ctrl_seg) +
sizeof (struct mlx4_wqe_raddr_seg);
......@@ -897,7 +899,6 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) |
(to_mlx4_st(ibqp->qp_type) << 16));
context->flags |= cpu_to_be32(1 << 8); /* DE? */
if (!(attr_mask & IB_QP_PATH_MIG_STATE))
context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11);
......@@ -1467,16 +1468,12 @@ static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg)
static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr,
struct mlx4_ib_qp *qp, unsigned *lso_seg_len,
__be32 *lso_hdr_sz)
__be32 *lso_hdr_sz, __be32 *blh)
{
unsigned halign = ALIGN(sizeof *wqe + wr->wr.ud.hlen, 16);
/*
* This is a temporary limitation and will be removed in
* a forthcoming FW release:
*/
if (unlikely(halign > 64))
return -EINVAL;
if (unlikely(halign > MLX4_IB_CACHE_LINE_SIZE))
*blh = cpu_to_be32(1 << 6);
if (unlikely(!(qp->flags & MLX4_IB_QP_LSO) &&
wr->num_sge > qp->sq.max_gs - (halign >> 4)))
......@@ -1522,6 +1519,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
__be32 dummy;
__be32 *lso_wqe;
__be32 uninitialized_var(lso_hdr_sz);
__be32 blh;
int i;
spin_lock_irqsave(&qp->sq.lock, flags);
......@@ -1530,6 +1528,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
for (nreq = 0; wr; ++nreq, wr = wr->next) {
lso_wqe = &dummy;
blh = 0;
if (mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
err = -ENOMEM;
......@@ -1616,7 +1615,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
if (wr->opcode == IB_WR_LSO) {
err = build_lso_seg(wqe, wr, qp, &seglen, &lso_hdr_sz);
err = build_lso_seg(wqe, wr, qp, &seglen, &lso_hdr_sz, &blh);
if (unlikely(err)) {
*bad_wr = wr;
goto out;
......@@ -1687,7 +1686,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
}
ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] |
(ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0);
(ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0) | blh;
stamp = ind + qp->sq_spare_wqes;
ind += DIV_ROUND_UP(size * 16, 1U << qp->sq.wqe_shift);
......
......@@ -4,14 +4,13 @@ config INFINIBAND_NES
select LIBCRC32C
select INET_LRO
---help---
This is a low-level driver for NetEffect RDMA enabled
Network Interface Cards (RNIC).
This is the RDMA Network Interface Card (RNIC) driver for
NetEffect Ethernet Cluster Server Adapters.
config INFINIBAND_NES_DEBUG
bool "Verbose debugging output"
depends on INFINIBAND_NES
default n
---help---
This option causes the NetEffect RNIC driver to produce debug
messages. Select this if you are developing the driver
or trying to diagnose a problem.
This option enables debug messages from the NetEffect RNIC
driver. Select this if you are diagnosing a problem.
/*
* Copyright (c) 2006 - 2009 Intel-NE, Inc. All rights reserved.
* Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved.
* Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
......@@ -521,7 +521,8 @@ static int __devinit nes_probe(struct pci_dev *pcidev, const struct pci_device_i
spin_lock_init(&nesdev->indexed_regs_lock);
/* Remap the PCI registers in adapter BAR0 to kernel VA space */
mmio_regs = ioremap_nocache(pci_resource_start(pcidev, BAR_0), sizeof(mmio_regs));
mmio_regs = ioremap_nocache(pci_resource_start(pcidev, BAR_0),
pci_resource_len(pcidev, BAR_0));
if (mmio_regs == NULL) {
printk(KERN_ERR PFX "Unable to remap BAR0\n");
ret = -EIO;
......
/*
* Copyright (c) 2006 - 2009 Intel-NE, Inc. All rights reserved.
* Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved.
* Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
......
This diff is collapsed.
/*
* Copyright (c) 2006 - 2009 Intel-NE, Inc. All rights reserved.
* Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
......@@ -47,6 +47,8 @@
#define IEFT_MPA_KEY_REP "MPA ID Rep Frame"
#define IETF_MPA_KEY_SIZE 16
#define IETF_MPA_VERSION 1
#define IETF_MAX_PRIV_DATA_LEN 512
#define IETF_MPA_FRAME_SIZE 20
enum ietf_mpa_flags {
IETF_MPA_FLAGS_MARKERS = 0x80, /* receive Markers */
......@@ -169,7 +171,7 @@ struct nes_timer_entry {
#define NES_CM_DEF_SEQ2 0x18ed5740
#define NES_CM_DEF_LOCAL_ID2 0xb807
#define MAX_CM_BUFFER 512
#define MAX_CM_BUFFER (IETF_MPA_FRAME_SIZE + IETF_MAX_PRIV_DATA_LEN)
typedef u32 nes_addr_t;
......@@ -198,6 +200,7 @@ enum nes_cm_node_state {
NES_CM_STATE_TIME_WAIT,
NES_CM_STATE_LAST_ACK,
NES_CM_STATE_CLOSING,
NES_CM_STATE_LISTENER_DESTROYED,
NES_CM_STATE_CLOSED
};
......
/*
* Copyright (c) 2006 - 2009 Intel-NE, Inc. All rights reserved.
* Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
......
/*
* Copyright (c) 2006 - 2009 Intel-NE, Inc. All rights reserved.
* Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
......@@ -424,8 +424,9 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) {
nesadapter->base_pd = 1;
nesadapter->device_cap_flags =
IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_WINDOW;
nesadapter->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY |
IB_DEVICE_MEM_WINDOW |
IB_DEVICE_MEM_MGT_EXTENSIONS;
nesadapter->allocated_qps = (unsigned long *)&(((unsigned char *)nesadapter)
[(sizeof(struct nes_adapter)+(sizeof(unsigned long)-1))&(~(sizeof(unsigned long)-1))]);
......@@ -436,11 +437,12 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) {
nesadapter->qp_table = (struct nes_qp **)(&nesadapter->allocated_arps[BITS_TO_LONGS(arp_table_size)]);
/* mark the usual suspect QPs and CQs as in use */
/* mark the usual suspect QPs, MR and CQs as in use */
for (u32temp = 0; u32temp < NES_FIRST_QPN; u32temp++) {
set_bit(u32temp, nesadapter->allocated_qps);
set_bit(u32temp, nesadapter->allocated_cqs);
}
set_bit(0, nesadapter->allocated_mrs);
for (u32temp = 0; u32temp < 20; u32temp++)
set_bit(u32temp, nesadapter->allocated_pds);
......@@ -481,7 +483,7 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) {
nesadapter->max_irrq_wr = (u32temp >> 16) & 3;
nesadapter->max_sge = 4;
nesadapter->max_cqe = 32767;
nesadapter->max_cqe = 32766;
if (nes_read_eeprom_values(nesdev, nesadapter)) {
printk(KERN_ERR PFX "Unable to read EEPROM data.\n");
......@@ -1355,6 +1357,8 @@ int nes_init_phy(struct nes_device *nesdev)
}
if ((phy_type == NES_PHY_TYPE_ARGUS) ||
(phy_type == NES_PHY_TYPE_SFP_D)) {
u32 first_time = 1;
/* Check firmware heartbeat */
nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee);
temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
......@@ -1362,8 +1366,13 @@ int nes_init_phy(struct nes_device *nesdev)
nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee);
temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
if (temp_phy_data != temp_phy_data2)
return 0;
if (temp_phy_data != temp_phy_data2) {
nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7fd);
temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
if ((temp_phy_data & 0xff) > 0x20)
return 0;
printk(PFX "Reinitializing PHY\n");
}
/* no heartbeat, configure the PHY */
nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0x0000, 0x8000);
......@@ -1399,7 +1408,7 @@ int nes_init_phy(struct nes_device *nesdev)
temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
do {
if (counter++ > 150) {
nes_debug(NES_DBG_PHY, "No PHY heartbeat\n");
printk(PFX "No PHY heartbeat\n");
break;
}
mdelay(1);
......@@ -1413,11 +1422,20 @@ int nes_init_phy(struct nes_device *nesdev)
nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7fd);
temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
if (counter++ > 300) {
nes_debug(NES_DBG_PHY, "PHY did not track\n");
break;
if (((temp_phy_data & 0xff) == 0x0) && first_time) {
first_time = 0;
counter = 0;
/* reset AMCC PHY and try again */
nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0xe854, 0x00c0);
nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0xe854, 0x0040);
continue;
} else {
printk(PFX "PHY did not track\n");
break;
}
}
mdelay(10);
} while (((temp_phy_data & 0xff) != 0x50) && ((temp_phy_data & 0xff) != 0x70));
} while ((temp_phy_data & 0xff) < 0x30);
/* setup signal integrity */
nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd003, 0x0000);
......
/*
* Copyright (c) 2006 - 2009 Intel-NE, Inc. All rights reserved.
* Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
......@@ -546,11 +546,23 @@ enum nes_iwarp_sq_fmr_wqe_word_idx {
NES_IWARP_SQ_FMR_WQE_PBL_LENGTH_IDX = 14,
};
enum nes_iwarp_sq_fmr_opcodes {
NES_IWARP_SQ_FMR_WQE_ZERO_BASED = (1<<6),
NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_4K = (0<<7),
NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_2M = (1<<7),
NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_LOCAL_READ = (1<<16),
NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_LOCAL_WRITE = (1<<17),
NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_REMOTE_READ = (1<<18),
NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_REMOTE_WRITE = (1<<19),
NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_WINDOW_BIND = (1<<20),
};
#define NES_IWARP_SQ_FMR_WQE_MR_LENGTH_HIGH_MASK 0xFF;
enum nes_iwarp_sq_locinv_wqe_word_idx {
NES_IWARP_SQ_LOCINV_WQE_INV_STAG_IDX = 6,
};
enum nes_iwarp_rq_wqe_word_idx {
NES_IWARP_RQ_WQE_TOTAL_PAYLOAD_IDX = 1,
NES_IWARP_RQ_WQE_COMP_CTX_LOW_IDX = 2,
......@@ -1153,6 +1165,19 @@ struct nes_pbl {
/* TODO: need to add list for two level tables */
};
#define NES_4K_PBL_CHUNK_SIZE 4096
struct nes_fast_mr_wqe_pbl {
u64 *kva;
dma_addr_t paddr;
};
struct nes_ib_fast_reg_page_list {
struct ib_fast_reg_page_list ibfrpl;
struct nes_fast_mr_wqe_pbl nes_wqe_pbl;
u64 pbl;
};
struct nes_listener {
struct work_struct work;
struct workqueue_struct *wq;
......
/*
* Copyright (c) 2006 - 2009 Intel-NE, Inc. All rights reserved.
* Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
......
/*
* Copyright (c) 2006 - 2009 Intel-NE, Inc. All rights reserved.
* Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved.
* Copyright (c) 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Cisco Systems. All rights reserved.
* Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
......@@ -86,6 +86,7 @@ enum iwnes_memreg_type {
IWNES_MEMREG_TYPE_CQ = 0x0002,
IWNES_MEMREG_TYPE_MW = 0x0003,
IWNES_MEMREG_TYPE_FMR = 0x0004,
IWNES_MEMREG_TYPE_FMEM = 0x0005,
};
struct nes_mem_reg_req {
......
/*
* Copyright (c) 2006 - 2009 Intel-NE, Inc. All rights reserved.
* Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
......
This diff is collapsed.
/*
* Copyright (c) 2006 - 2009 Intel-NE, Inc. All rights reserved.
* Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved.
* Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
......@@ -135,19 +135,15 @@ struct nes_qp {
struct ib_qp ibqp;
void *allocated_buffer;
struct iw_cm_id *cm_id;
struct workqueue_struct *wq;
struct nes_cq *nesscq;
struct nes_cq *nesrcq;
struct nes_pd *nespd;
void *cm_node; /* handle of the node this QP is associated with */
struct ietf_mpa_frame *ietf_frame;
dma_addr_t ietf_frame_pbase;
wait_queue_head_t state_waitq;
struct ib_mr *lsmm_mr;
unsigned long socket;
struct nes_hw_qp hwqp;
struct work_struct work;
struct work_struct ae_work;
enum ib_qp_state ibqp_state;
u32 iwarp_state;
u32 hte_index;
......@@ -165,19 +161,20 @@ struct nes_qp {
struct page *page;
struct timer_list terminate_timer;
enum ib_event_type terminate_eventtype;
wait_queue_head_t kick_waitq;
u16 in_disconnect;
u16 active_conn:1;
u16 skip_lsmm:1;
u16 user_mode:1;
u16 hte_added:1;
u16 flush_issued:1;
u16 destroyed:1;
u16 sig_all:1;
u16 rsvd:9;
u16 private_data_len;
u16 term_sq_flush_code;
u16 term_rq_flush_code;
u8 active_conn;
u8 skip_lsmm;
u8 user_mode;
u8 hte_added;
u8 hw_iwarp_state;
u8 flush_issued;
u8 hw_tcp_state;
u8 term_flags;
u8 destroyed;
u8 sq_kmapped;
};
#endif /* NES_VERBS_H */
......@@ -884,6 +884,7 @@ struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neighbour,
neigh->neighbour = neighbour;
neigh->dev = dev;
memset(&neigh->dgid.raw, 0, sizeof (union ib_gid));
*to_ipoib_neigh(neighbour) = neigh;
skb_queue_head_init(&neigh->queue);
ipoib_cm_set(neigh, NULL);
......
......@@ -209,6 +209,8 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
mem_copy->copy_buf = NULL;
}
#define IS_4K_ALIGNED(addr) ((((unsigned long)addr) & ~MASK_4K) == 0)
/**
* iser_sg_to_page_vec - Translates scatterlist entries to physical addresses
* and returns the length of resulting physical address array (may be less than
......@@ -221,62 +223,52 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
* where --few fragments of the same page-- are present in the SG as
* consecutive elements. Also, it handles one entry SG.
*/
static int iser_sg_to_page_vec(struct iser_data_buf *data,
struct iser_page_vec *page_vec,
struct ib_device *ibdev)
{
struct scatterlist *sgl = (struct scatterlist *)data->buf;
struct scatterlist *sg;
u64 first_addr, last_addr, page;
int end_aligned;
unsigned int cur_page = 0;
struct scatterlist *sg, *sgl = (struct scatterlist *)data->buf;
u64 start_addr, end_addr, page, chunk_start = 0;
unsigned long total_sz = 0;
int i;
unsigned int dma_len;
int i, new_chunk, cur_page, last_ent = data->dma_nents - 1;
/* compute the offset of first element */
page_vec->offset = (u64) sgl[0].offset & ~MASK_4K;
new_chunk = 1;
cur_page = 0;
for_each_sg(sgl, sg, data->dma_nents, i) {
unsigned int dma_len = ib_sg_dma_len(ibdev, sg);
start_addr = ib_sg_dma_address(ibdev, sg);
if (new_chunk)
chunk_start = start_addr;
dma_len = ib_sg_dma_len(ibdev, sg);
end_addr = start_addr + dma_len;
total_sz += dma_len;
first_addr = ib_sg_dma_address(ibdev, sg);
last_addr = first_addr + dma_len;
end_aligned = !(last_addr & ~MASK_4K);
/* continue to collect page fragments till aligned or SG ends */
while (!end_aligned && (i + 1 < data->dma_nents)) {
sg = sg_next(sg);
i++;
dma_len = ib_sg_dma_len(ibdev, sg);
total_sz += dma_len;
last_addr = ib_sg_dma_address(ibdev, sg) + dma_len;
end_aligned = !(last_addr & ~MASK_4K);
/* collect page fragments until aligned or end of SG list */
if (!IS_4K_ALIGNED(end_addr) && i < last_ent) {
new_chunk = 0;
continue;
}
/* handle the 1st page in the 1st DMA element */
if (cur_page == 0) {
page = first_addr & MASK_4K;
page_vec->pages[cur_page] = page;
cur_page++;
new_chunk = 1;
/* address of the first page in the contiguous chunk;
masking relevant for the very first SG entry,
which might be unaligned */
page = chunk_start & MASK_4K;
do {
page_vec->pages[cur_page++] = page;
page += SIZE_4K;
} else
page = first_addr;
for (; page < last_addr; page += SIZE_4K) {
page_vec->pages[cur_page] = page;
cur_page++;
}
} while (page < end_addr);
}
page_vec->data_size = total_sz;
iser_dbg("page_vec->data_size:%d cur_page %d\n", page_vec->data_size,cur_page);
return cur_page;
}
#define IS_4K_ALIGNED(addr) ((((unsigned long)addr) & ~MASK_4K) == 0)
/**
* iser_data_buf_aligned_len - Tries to determine the maximal correctly aligned
......@@ -284,42 +276,40 @@ static int iser_sg_to_page_vec(struct iser_data_buf *data,
* the number of entries which are aligned correctly. Supports the case where
* consecutive SG elements are actually fragments of the same physcial page.
*/
static unsigned int iser_data_buf_aligned_len(struct iser_data_buf *data,
struct ib_device *ibdev)
static int iser_data_buf_aligned_len(struct iser_data_buf *data,
struct ib_device *ibdev)
{
struct scatterlist *sgl, *sg;
u64 end_addr, next_addr;
int i, cnt;
unsigned int ret_len = 0;
struct scatterlist *sgl, *sg, *next_sg = NULL;
u64 start_addr, end_addr;
int i, ret_len, start_check = 0;
if (data->dma_nents == 1)
return 1;
sgl = (struct scatterlist *)data->buf;
start_addr = ib_sg_dma_address(ibdev, sgl);
cnt = 0;
for_each_sg(sgl, sg, data->dma_nents, i) {
/* iser_dbg("Checking sg iobuf [%d]: phys=0x%08lX "
"offset: %ld sz: %ld\n", i,
(unsigned long)sg_phys(sg),
(unsigned long)sg->offset,
(unsigned long)sg->length); */
end_addr = ib_sg_dma_address(ibdev, sg) +
ib_sg_dma_len(ibdev, sg);
/* iser_dbg("Checking sg iobuf end address "
"0x%08lX\n", end_addr); */
if (i + 1 < data->dma_nents) {
next_addr = ib_sg_dma_address(ibdev, sg_next(sg));
/* are i, i+1 fragments of the same page? */
if (end_addr == next_addr) {
cnt++;
continue;
} else if (!IS_4K_ALIGNED(end_addr)) {
ret_len = cnt + 1;
break;
}
}
cnt++;
if (start_check && !IS_4K_ALIGNED(start_addr))
break;
next_sg = sg_next(sg);
if (!next_sg)
break;
end_addr = start_addr + ib_sg_dma_len(ibdev, sg);
start_addr = ib_sg_dma_address(ibdev, next_sg);
if (end_addr == start_addr) {
start_check = 0;
continue;
} else
start_check = 1;
if (!IS_4K_ALIGNED(end_addr))
break;
}
if (i == data->dma_nents)
ret_len = cnt; /* loop ended */
ret_len = (next_sg) ? i : i+1;
iser_dbg("Found %d aligned entries out of %d in sg:0x%p\n",
ret_len, data->dma_nents, data);
return ret_len;
......
......@@ -90,6 +90,7 @@ static void dump_dev_cap_flags(struct mlx4_dev *dev, u32 flags)
[ 9] = "Q_Key violation counter",
[10] = "VMM",
[12] = "DPDP",
[15] = "Big LSO headers",
[16] = "MW support",
[17] = "APM support",
[18] = "Atomic ops support",
......@@ -235,7 +236,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MPT_OFFSET);
dev_cap->max_mpts = 1 << (field & 0x3f);
MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_EQ_OFFSET);
dev_cap->reserved_eqs = 1 << (field & 0xf);
dev_cap->reserved_eqs = field & 0xf;
MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_EQ_OFFSET);
dev_cap->max_eqs = 1 << (field & 0xf);
MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_MTT_OFFSET);
......
......@@ -61,6 +61,7 @@ enum {
MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR = 1 << 8,
MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR = 1 << 9,
MLX4_DEV_CAP_FLAG_DPDP = 1 << 12,
MLX4_DEV_CAP_FLAG_BLH = 1 << 15,
MLX4_DEV_CAP_FLAG_MEM_WINDOW = 1 << 16,
MLX4_DEV_CAP_FLAG_APM = 1 << 17,
MLX4_DEV_CAP_FLAG_ATOMIC = 1 << 18,
......
......@@ -36,6 +36,7 @@
#include <linux/in.h>
#include <linux/in6.h>
#include <linux/if_arp.h>
#include <linux/netdevice.h>
#include <linux/socket.h>
#include <rdma/ib_verbs.h>
......@@ -60,8 +61,8 @@ struct rdma_dev_addr {
unsigned char src_dev_addr[MAX_ADDR_LEN];
unsigned char dst_dev_addr[MAX_ADDR_LEN];
unsigned char broadcast[MAX_ADDR_LEN];
enum rdma_node_type dev_type;
struct net_device *src_dev;
unsigned short dev_type;
int bound_dev_if;
};
/**
......@@ -121,40 +122,29 @@ static inline void ib_addr_get_mgid(struct rdma_dev_addr *dev_addr,
memcpy(gid, dev_addr->broadcast + 4, sizeof *gid);
}
static inline void ib_addr_get_sgid(struct rdma_dev_addr *dev_addr,
union ib_gid *gid)
static inline int rdma_addr_gid_offset(struct rdma_dev_addr *dev_addr)
{
memcpy(gid, dev_addr->src_dev_addr + 4, sizeof *gid);
return dev_addr->dev_type == ARPHRD_INFINIBAND ? 4 : 0;
}
static inline void ib_addr_set_sgid(struct rdma_dev_addr *dev_addr,
union ib_gid *gid)
static inline void rdma_addr_get_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid)
{
memcpy(dev_addr->src_dev_addr + 4, gid, sizeof *gid);
memcpy(gid, dev_addr->src_dev_addr + rdma_addr_gid_offset(dev_addr), sizeof *gid);
}
static inline void ib_addr_get_dgid(struct rdma_dev_addr *dev_addr,
union ib_gid *gid)
static inline void rdma_addr_set_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid)
{
memcpy(gid, dev_addr->dst_dev_addr + 4, sizeof *gid);
memcpy(dev_addr->src_dev_addr + rdma_addr_gid_offset(dev_addr), gid, sizeof *gid);
}
static inline void ib_addr_set_dgid(struct rdma_dev_addr *dev_addr,
union ib_gid *gid)
static inline void rdma_addr_get_dgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid)
{
memcpy(dev_addr->dst_dev_addr + 4, gid, sizeof *gid);
memcpy(gid, dev_addr->dst_dev_addr + rdma_addr_gid_offset(dev_addr), sizeof *gid);
}
static inline void iw_addr_get_sgid(struct rdma_dev_addr *dev_addr,
union ib_gid *gid)
{
memcpy(gid, dev_addr->src_dev_addr, sizeof *gid);
}
static inline void iw_addr_get_dgid(struct rdma_dev_addr *dev_addr,
union ib_gid *gid)
static inline void rdma_addr_set_dgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid)
{
memcpy(gid, dev_addr->dst_dev_addr, sizeof *gid);
memcpy(dev_addr->dst_dev_addr + rdma_addr_gid_offset(dev_addr), gid, sizeof *gid);
}
#endif /* IB_ADDR_H */
......@@ -379,4 +379,10 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
struct ib_sa_path_rec *rec,
struct ib_ah_attr *ah_attr);
/**
* ib_sa_unpack_path - Convert a path record from MAD format to struct
* ib_sa_path_rec.
*/
void ib_sa_unpack_path(void *attribute, struct ib_sa_path_rec *rec);
#endif /* IB_SA_H */
......@@ -35,6 +35,22 @@
#include <linux/types.h>
enum {
IB_PATH_GMP = 1,
IB_PATH_PRIMARY = (1<<1),
IB_PATH_ALTERNATE = (1<<2),
IB_PATH_OUTBOUND = (1<<3),
IB_PATH_INBOUND = (1<<4),
IB_PATH_INBOUND_REVERSE = (1<<5),
IB_PATH_BIDIRECTIONAL = IB_PATH_OUTBOUND | IB_PATH_INBOUND_REVERSE
};
struct ib_path_rec_data {
__u32 flags;
__u32 reserved;
__u32 path_rec[16];
};
struct ib_user_path_rec {
__u8 dgid[16];
__u8 sgid[16];
......
......@@ -1425,6 +1425,11 @@ int ib_destroy_qp(struct ib_qp *qp);
* @send_wr: A list of work requests to post on the send queue.
* @bad_send_wr: On an immediate failure, this parameter will reference
* the work request that failed to be posted on the QP.
*
* While IBA Vol. 1 section 11.4.1.1 specifies that if an immediate
* error is returned, the QP state shall not be affected,
* ib_post_send() will return an immediate error after queueing any
* earlier work requests in the list.
*/
static inline int ib_post_send(struct ib_qp *qp,
struct ib_send_wr *send_wr,
......
......@@ -215,12 +215,14 @@ struct rdma_ucm_event_resp {
/* Option levels */
enum {
RDMA_OPTION_ID = 0
RDMA_OPTION_ID = 0,
RDMA_OPTION_IB = 1
};
/* Option details */
enum {
RDMA_OPTION_ID_TOS = 0
RDMA_OPTION_ID_TOS = 0,
RDMA_OPTION_IB_PATH = 1
};
struct rdma_ucm_set_option {
......
......@@ -182,8 +182,8 @@ static int rds_ib_conn_info_visitor(struct rds_connection *conn,
ic = conn->c_transport_data;
dev_addr = &ic->i_cm_id->route.addr.dev_addr;
ib_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid);
ib_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid);
rdma_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid);
rdma_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid);
rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client);
iinfo->max_send_wr = ic->i_send_ring.w_nr;
......
......@@ -184,8 +184,8 @@ static int rds_iw_conn_info_visitor(struct rds_connection *conn,
ic = conn->c_transport_data;
dev_addr = &ic->i_cm_id->route.addr.dev_addr;
ib_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid);
ib_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid);
rdma_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid);
rdma_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid);
rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client);
iinfo->max_send_wr = ic->i_send_ring.w_nr;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment