Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband: (42 commits) IB/qib: Fix refcount leak in lkey/rkey validation IB/qib: Improve SERDES tunning on QMH boards IB/qib: Unnecessary delayed completions on RC connection IB/qib: Issue pre-emptive NAKs on eager buffer overflow IB/qib: RDMA lkey/rkey validation is inefficient for large MRs IB/qib: Change QPN increment IB/qib: Add fix missing from earlier patch IB/qib: Change receive queue/QPN selection IB/qib: Fix interrupt mitigation IB/qib: Avoid duplicate writes to the rcv head register IB/qib: Add a few new SERDES tunings IB/qib: Reset packet list after freeing IB/qib: New SERDES init routine and improvements to SI quality IB/qib: Clear WAIT_SEND flags when setting QP to error state IB/qib: Fix context allocation with multiple HCAs IB/qib: Fix multi-Florida HCA host panic on reboot IB/qib: Handle transitions from ACTIVE_DEFERRED to ACTIVE better IB/qib: UD send with immediate receive completion has wrong size IB/qib: Set port physical state even if other fields are invalid IB/qib: Generate completion callback on errors ...

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband: (42 commits) IB/qib: Fix refcount leak in lkey/rkey validation IB/qib: Improve SERDES tunning on QMH boards IB/qib: Unnecessary delayed completions on RC connection IB/qib: Issue pre-emptive NAKs on eager buffer overflow IB/qib: RDMA lkey/rkey validation is inefficient for large MRs IB/qib: Change QPN increment IB/qib: Add fix missing from earlier patch IB/qib: Change receive queue/QPN selection IB/qib: Fix interrupt mitigation IB/qib: Avoid duplicate writes to the rcv head register IB/qib: Add a few new SERDES tunings IB/qib: Reset packet list after freeing IB/qib: New SERDES init routine and improvements to SI quality IB/qib: Clear WAIT_SEND flags when setting QP to error state IB/qib: Fix context allocation with multiple HCAs IB/qib: Fix multi-Florida HCA host panic on reboot IB/qib: Handle transitions from ACTIVE_DEFERRED to ACTIVE better IB/qib: UD send with immediate receive completion has wrong size IB/qib: Set port physical state even if other fields are invalid IB/qib: Generate completion callback on errors ...
f1d6d6cd · Linus Torvalds · b9d919a4 · 2b76c057 · f1d6d6cd · f1d6d6cd
Commit f1d6d6cd authored Jan 11, 2011 by Linus Torvalds
37 changed files
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c
@@ -189,6 +189,7 @@ int cxio_create_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq, int kernel)
 	return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup));
 }

+#ifdef notyet
 int cxio_resize_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq)
 {
 	struct rdma_cq_setup setup;
@@ -200,6 +201,7 @@ int cxio_resize_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq)
 	setup.ovfl_mode = 1;
 	return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup));
 }
+#endif

 static u32 get_qpid(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx)
 {

--- a/drivers/infiniband/hw/cxgb3/iwch_provider.h
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h
@@ -335,8 +335,6 @@ int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg);
 int iwch_post_zb_read(struct iwch_qp *qhp);
 int iwch_register_device(struct iwch_dev *dev);
 void iwch_unregister_device(struct iwch_dev *dev);
-int iwch_quiesce_qps(struct iwch_cq *chp);
-int iwch_resume_qps(struct iwch_cq *chp);
 void stop_read_rep_timer(struct iwch_qp *qhp);
 int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
 		      struct iwch_mr *mhp, int shift);

--- a/drivers/infiniband/hw/cxgb3/iwch_qp.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c
@@ -1149,59 +1149,3 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
 	PDBG("%s exit state %d\n", __func__, qhp->attr.state);
 	return ret;
 }
-
-static int quiesce_qp(struct iwch_qp *qhp)
-{
-	spin_lock_irq(&qhp->lock);
-	iwch_quiesce_tid(qhp->ep);
-	qhp->flags |= QP_QUIESCED;
-	spin_unlock_irq(&qhp->lock);
-	return 0;
-}
-
-static int resume_qp(struct iwch_qp *qhp)
-{
-	spin_lock_irq(&qhp->lock);
-	iwch_resume_tid(qhp->ep);
-	qhp->flags &= ~QP_QUIESCED;
-	spin_unlock_irq(&qhp->lock);
-	return 0;
-}
-
-int iwch_quiesce_qps(struct iwch_cq *chp)
-{
-	int i;
-	struct iwch_qp *qhp;
-
-	for (i=0; i < T3_MAX_NUM_QP; i++) {
-		qhp = get_qhp(chp->rhp, i);
-		if (!qhp)
-			continue;
-		if ((qhp->attr.rcq == chp->cq.cqid) && !qp_quiesced(qhp)) {
-			quiesce_qp(qhp);
-			continue;
-		}
-		if ((qhp->attr.scq == chp->cq.cqid) && !qp_quiesced(qhp))
-			quiesce_qp(qhp);
-	}
-	return 0;
-}
-
-int iwch_resume_qps(struct iwch_cq *chp)
-{
-	int i;
-	struct iwch_qp *qhp;
-
-	for (i=0; i < T3_MAX_NUM_QP; i++) {
-		qhp = get_qhp(chp->rhp, i);
-		if (!qhp)
-			continue;
-		if ((qhp->attr.rcq == chp->cq.cqid) && qp_quiesced(qhp)) {
-			resume_qp(qhp);
-			continue;
-		}
-		if ((qhp->attr.scq == chp->cq.cqid) && qp_quiesced(qhp))
-			resume_qp(qhp);
-	}
-	return 0;
-}
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -760,7 +760,6 @@ int c4iw_flush_rq(struct t4_wq *wq, struct t4_cq *cq, int count);
 int c4iw_flush_sq(struct t4_wq *wq, struct t4_cq *cq, int count);
 int c4iw_ev_handler(struct c4iw_dev *rnicp, u32 qid);
 u16 c4iw_rqes_posted(struct c4iw_qp *qhp);
-int c4iw_post_zb_read(struct c4iw_qp *qhp);
 int c4iw_post_terminate(struct c4iw_qp *qhp, struct t4_cqe *err_cqe);
 u32 c4iw_get_cqid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx);
 void c4iw_put_cqid(struct c4iw_rdev *rdev, u32 qid,

--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -892,36 +892,6 @@ static inline void build_term_codes(struct t4_cqe *err_cqe, u8 *layer_type,
 	}
 }

-int c4iw_post_zb_read(struct c4iw_qp *qhp)
-{
-	union t4_wr *wqe;
-	struct sk_buff *skb;
-	u8 len16;
-
-	PDBG("%s enter\n", __func__);
-	skb = alloc_skb(40, GFP_KERNEL);
-	if (!skb) {
-		printk(KERN_ERR "%s cannot send zb_read!!\n", __func__);
-		return -ENOMEM;
-	}
-	set_wr_txq(skb, CPL_PRIORITY_DATA, qhp->ep->txq_idx);
-
-	wqe = (union t4_wr *)skb_put(skb, sizeof wqe->read);
-	memset(wqe, 0, sizeof wqe->read);
-	wqe->read.r2 = cpu_to_be64(0);
-	wqe->read.stag_sink = cpu_to_be32(1);
-	wqe->read.to_sink_hi = cpu_to_be32(0);
-	wqe->read.to_sink_lo = cpu_to_be32(1);
-	wqe->read.stag_src = cpu_to_be32(1);
-	wqe->read.plen = cpu_to_be32(0);
-	wqe->read.to_src_hi = cpu_to_be32(0);
-	wqe->read.to_src_lo = cpu_to_be32(1);
-	len16 = DIV_ROUND_UP(sizeof wqe->read, 16);
-	init_wr_hdr(wqe, 0, FW_RI_RDMA_READ_WR, FW_RI_COMPLETION_FLAG, len16);
-
-	return c4iw_ofld_send(&qhp->rhp->rdev, skb);
-}
-
 static void post_terminate(struct c4iw_qp *qhp, struct t4_cqe *err_cqe,
 			   gfp_t gfp)
 {
@@ -1029,7 +999,6 @@ static int rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
 	wqe->cookie = (unsigned long) &ep->com.wr_wait;

 	wqe->u.fini.type = FW_RI_TYPE_FINI;
-	c4iw_init_wr_wait(&ep->com.wr_wait);
 	ret = c4iw_ofld_send(&rhp->rdev, skb);
 	if (ret)
 		goto out;
@@ -1125,7 +1094,6 @@ static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp)
 	if (qhp->attr.mpa_attr.initiator)
 		build_rtr_msg(qhp->attr.mpa_attr.p2p_type, &wqe->u.init);

-	c4iw_init_wr_wait(&qhp->ep->com.wr_wait);
 	ret = c4iw_ofld_send(&rhp->rdev, skb);
 	if (ret)
 		goto out;

--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -530,9 +530,8 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
 	for (j = 0; j < 6; j++) {
 		if (!pdev->resource[j].start)
 			continue;
-		ipath_cdbg(VERBOSE, "BAR %d start %llx, end %llx, len %llx\n",
-			   j, (unsigned long long)pdev->resource[j].start,
-			   (unsigned long long)pdev->resource[j].end,
+		ipath_cdbg(VERBOSE, "BAR %d %pR, len %llx\n",
+			   j, &pdev->resource[j],
 			   (unsigned long long)pci_resource_len(pdev, j));
 	}


--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -397,10 +397,14 @@ int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
 		cq->resize_buf = NULL;
 		cq->resize_umem = NULL;
 	} else {
+		struct mlx4_ib_cq_buf tmp_buf;
+		int tmp_cqe = 0;
+
 		spin_lock_irq(&cq->lock);
 		if (cq->resize_buf) {
 			mlx4_ib_cq_resize_copy_cqes(cq);
-			mlx4_ib_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe);
+			tmp_buf = cq->buf;
+			tmp_cqe = cq->ibcq.cqe;
 			cq->buf      = cq->resize_buf->buf;
 			cq->ibcq.cqe = cq->resize_buf->cqe;

@@ -408,6 +412,9 @@ int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
 			cq->resize_buf = NULL;
 		}
 		spin_unlock_irq(&cq->lock);
+
+		if (tmp_cqe)
+			mlx4_ib_free_cq_buf(dev, &tmp_buf, tmp_cqe);
 	}

 	goto out;

--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -211,6 +211,8 @@ static void forward_trap(struct mlx4_ib_dev *dev, u8 port_num, struct ib_mad *ma
 	if (agent) {
 		send_buf = ib_create_send_mad(agent, qpn, 0, 0, IB_MGMT_MAD_HDR,
 					      IB_MGMT_MAD_DATA, GFP_ATOMIC);
+		if (IS_ERR(send_buf))
+			return;
 		/*
 		 * We rely here on the fact that MLX QPs don't use the
 		 * address handle after the send is posted (this is

--- a/drivers/infiniband/hw/mthca/mthca_mad.c
+++ b/drivers/infiniband/hw/mthca/mthca_mad.c
@@ -171,6 +171,8 @@ static void forward_trap(struct mthca_dev *dev,
 	if (agent) {
 		send_buf = ib_create_send_mad(agent, qpn, 0, 0, IB_MGMT_MAD_HDR,
 					      IB_MGMT_MAD_DATA, GFP_ATOMIC);
+		if (IS_ERR(send_buf))
+			return;
 		/*
 		 * We rely here on the fact that MLX QPs don't use the
 		 * address handle after the send is posted (this is

--- a/drivers/infiniband/hw/nes/nes_nic.c
+++ b/drivers/infiniband/hw/nes/nes_nic.c
@@ -908,8 +908,8 @@ static void nes_netdev_set_multicast_list(struct net_device *netdev)
 					nesvnic->nic_index &&
 					mc_index < max_pft_entries_avaiable) {
 						nes_debug(NES_DBG_NIC_RX,
-					"mc_index=%d skipping nic_index=%d,\
-					used for=%d \n", mc_index,
+					"mc_index=%d skipping nic_index=%d, "
+					"used for=%d \n", mc_index,
 					nesvnic->nic_index,
 					nesadapter->pft_mcast_map[mc_index]);
 				mc_index++;

--- a/drivers/infiniband/hw/qib/qib.h
+++ b/drivers/infiniband/hw/qib/qib.h
@@ -766,7 +766,7 @@ struct qib_devdata {
 	void (*f_sdma_hw_start_up)(struct qib_pportdata *);
 	void (*f_sdma_init_early)(struct qib_pportdata *);
 	void (*f_set_cntr_sample)(struct qib_pportdata *, u32, u32);
-	void (*f_update_usrhead)(struct qib_ctxtdata *, u64, u32, u32);
+	void (*f_update_usrhead)(struct qib_ctxtdata *, u64, u32, u32, u32);
 	u32 (*f_hdrqempty)(struct qib_ctxtdata *);
 	u64 (*f_portcntr)(struct qib_pportdata *, u32);
 	u32 (*f_read_cntrs)(struct qib_devdata *, loff_t, char **,

--- a/drivers/infiniband/hw/qib/qib_cq.c
+++ b/drivers/infiniband/hw/qib/qib_cq.c
@@ -100,7 +100,8 @@ void qib_cq_enter(struct qib_cq *cq, struct ib_wc *entry, int solicited)
 	wc->head = next;

 	if (cq->notify == IB_CQ_NEXT_COMP ||
-	    (cq->notify == IB_CQ_SOLICITED && solicited)) {
+	    (cq->notify == IB_CQ_SOLICITED &&
+	     (solicited || entry->status != IB_WC_SUCCESS))) {
 		cq->notify = IB_CQ_NONE;
 		cq->triggered++;
 		/*

--- a/drivers/infiniband/hw/qib/qib_driver.c
+++ b/drivers/infiniband/hw/qib/qib_driver.c
@@ -71,6 +71,11 @@ MODULE_DESCRIPTION("QLogic IB driver");
 */
 #define QIB_PIO_MAXIBHDR 128

+/*
+ * QIB_MAX_PKT_RCV is the max # if packets processed per receive interrupt.
+ */
+#define QIB_MAX_PKT_RECV 64
+
 struct qlogic_ib_stats qib_stats;

 const char *qib_get_unit_name(int unit)
@@ -284,14 +289,147 @@ static inline void *qib_get_egrbuf(const struct qib_ctxtdata *rcd, u32 etail)
 * Returns 1 if error was a CRC, else 0.
 * Needed for some chip's synthesized error counters.
 */
-static u32 qib_rcv_hdrerr(struct qib_pportdata *ppd, u32 ctxt,
-			  u32 eflags, u32 l, u32 etail, __le32 *rhf_addr,
-			  struct qib_message_header *hdr)
+static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd,
+			  u32 ctxt, u32 eflags, u32 l, u32 etail,
+			  __le32 *rhf_addr, struct qib_message_header *rhdr)
 {
 	u32 ret = 0;

 	if (eflags & (QLOGIC_IB_RHF_H_ICRCERR | QLOGIC_IB_RHF_H_VCRCERR))
 		ret = 1;
+	else if (eflags == QLOGIC_IB_RHF_H_TIDERR) {
+		/* For TIDERR and RC QPs premptively schedule a NAK */
+		struct qib_ib_header *hdr = (struct qib_ib_header *) rhdr;
+		struct qib_other_headers *ohdr = NULL;
+		struct qib_ibport *ibp = &ppd->ibport_data;
+		struct qib_qp *qp = NULL;
+		u32 tlen = qib_hdrget_length_in_bytes(rhf_addr);
+		u16 lid  = be16_to_cpu(hdr->lrh[1]);
+		int lnh = be16_to_cpu(hdr->lrh[0]) & 3;
+		u32 qp_num;
+		u32 opcode;
+		u32 psn;
+		int diff;
+		unsigned long flags;
+
+		/* Sanity check packet */
+		if (tlen < 24)
+			goto drop;
+
+		if (lid < QIB_MULTICAST_LID_BASE) {
+			lid &= ~((1 << ppd->lmc) - 1);
+			if (unlikely(lid != ppd->lid))
+				goto drop;
+		}
+
+		/* Check for GRH */
+		if (lnh == QIB_LRH_BTH)
+			ohdr = &hdr->u.oth;
+		else if (lnh == QIB_LRH_GRH) {
+			u32 vtf;
+
+			ohdr = &hdr->u.l.oth;
+			if (hdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR)
+				goto drop;
+			vtf = be32_to_cpu(hdr->u.l.grh.version_tclass_flow);
+			if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION)
+				goto drop;
+		} else
+			goto drop;
+
+		/* Get opcode and PSN from packet */
+		opcode = be32_to_cpu(ohdr->bth[0]);
+		opcode >>= 24;
+		psn = be32_to_cpu(ohdr->bth[2]);
+
+		/* Get the destination QP number. */
+		qp_num = be32_to_cpu(ohdr->bth[1]) & QIB_QPN_MASK;
+		if (qp_num != QIB_MULTICAST_QPN) {
+			int ruc_res;
+			qp = qib_lookup_qpn(ibp, qp_num);
+			if (!qp)
+				goto drop;
+
+			/*
+			 * Handle only RC QPs - for other QP types drop error
+			 * packet.
+			 */
+			spin_lock(&qp->r_lock);
+
+			/* Check for valid receive state. */
+			if (!(ib_qib_state_ops[qp->state] &
+			      QIB_PROCESS_RECV_OK)) {
+				ibp->n_pkt_drops++;
+				goto unlock;
+			}
+
+			switch (qp->ibqp.qp_type) {
+			case IB_QPT_RC:
+				spin_lock_irqsave(&qp->s_lock, flags);
+				ruc_res =
+					qib_ruc_check_hdr(
+						ibp, hdr,
+						lnh == QIB_LRH_GRH,
+						qp,
+						be32_to_cpu(ohdr->bth[0]));
+				if (ruc_res) {
+					spin_unlock_irqrestore(&qp->s_lock,
+							       flags);
+					goto unlock;
+				}
+				spin_unlock_irqrestore(&qp->s_lock, flags);
+
+				/* Only deal with RDMA Writes for now */
+				if (opcode <
+				    IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST) {
+					diff = qib_cmp24(psn, qp->r_psn);
+					if (!qp->r_nak_state && diff >= 0) {
+						ibp->n_rc_seqnak++;
+						qp->r_nak_state =
+							IB_NAK_PSN_ERROR;
+						/* Use the expected PSN. */
+						qp->r_ack_psn = qp->r_psn;
+						/*
+						 * Wait to send the sequence
+						 * NAK until all packets
+						 * in the receive queue have
+						 * been processed.
+						 * Otherwise, we end up
+						 * propagating congestion.
+						 */
+						if (list_empty(&qp->rspwait)) {
+							qp->r_flags |=
+								QIB_R_RSP_NAK;
+							atomic_inc(
+								&qp->refcount);
+							list_add_tail(
+							 &qp->rspwait,
+							 &rcd->qp_wait_list);
+						}
+					} /* Out of sequence NAK */
+				} /* QP Request NAKs */
+				break;
+			case IB_QPT_SMI:
+			case IB_QPT_GSI:
+			case IB_QPT_UD:
+			case IB_QPT_UC:
+			default:
+				/* For now don't handle any other QP types */
+				break;
+			}
+
+unlock:
+			spin_unlock(&qp->r_lock);
+			/*
+			 * Notify qib_destroy_qp() if it is waiting
+			 * for us to finish.
+			 */
+			if (atomic_dec_and_test(&qp->refcount))
+				wake_up(&qp->wait);
+		} /* Unicast QP */
+	} /* Valid packet with TIDErr */
+
+drop:
 	return ret;
 }

@@ -335,7 +473,7 @@ u32 qib_kreceive(struct qib_ctxtdata *rcd, u32 *llic, u32 *npkts)
 		smp_rmb();  /* prevent speculative reads of dma'ed hdrq */
 	}

-	for (last = 0, i = 1; !last && i <= 64; i += !last) {
+	for (last = 0, i = 1; !last; i += !last) {
 		hdr = dd->f_get_msgheader(dd, rhf_addr);
 		eflags = qib_hdrget_err_flags(rhf_addr);
 		etype = qib_hdrget_rcv_type(rhf_addr);
@@ -371,7 +509,7 @@ u32 qib_kreceive(struct qib_ctxtdata *rcd, u32 *llic, u32 *npkts)
 		 * packets; only qibhdrerr should be set.
 		 */
 		if (unlikely(eflags))
-			crcs += qib_rcv_hdrerr(ppd, rcd->ctxt, eflags, l,
+			crcs += qib_rcv_hdrerr(rcd, ppd, rcd->ctxt, eflags, l,
 					       etail, rhf_addr, hdr);
 		else if (etype == RCVHQ_RCV_TYPE_NON_KD) {
 			qib_ib_rcv(rcd, hdr, ebuf, tlen);
@@ -384,6 +522,9 @@ u32 qib_kreceive(struct qib_ctxtdata *rcd, u32 *llic, u32 *npkts)
 		l += rsize;
 		if (l >= maxcnt)
 			l = 0;
+		if (i == QIB_MAX_PKT_RECV)
+			last = 1;
+
 		rhf_addr = (__le32 *) rcd->rcvhdrq + l + dd->rhf_offset;
 		if (dd->flags & QIB_NODMA_RTAIL) {
 			u32 seq = qib_hdrget_seq(rhf_addr);
@@ -402,7 +543,7 @@ u32 qib_kreceive(struct qib_ctxtdata *rcd, u32 *llic, u32 *npkts)
 		 */
 		lval = l;
 		if (!last && !(i & 0xf)) {
-			dd->f_update_usrhead(rcd, lval, updegr, etail);
+			dd->f_update_usrhead(rcd, lval, updegr, etail, i);
 			updegr = 0;
 		}
 	}
@@ -444,7 +585,7 @@ u32 qib_kreceive(struct qib_ctxtdata *rcd, u32 *llic, u32 *npkts)
 	 * if no packets were processed.
 	 */
 	lval = (u64)rcd->head | dd->rhdrhead_intr_off;
-	dd->f_update_usrhead(rcd, lval, updegr, etail);
+	dd->f_update_usrhead(rcd, lval, updegr, etail, i);
 	return crcs;
 }


--- a/drivers/infiniband/hw/qib/qib_file_ops.c
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
@@ -1379,17 +1379,17 @@ static int get_a_ctxt(struct file *fp, const struct qib_user_info *uinfo,
 		/* find device (with ACTIVE ports) with fewest ctxts in use */
 		for (ndev = 0; ndev < devmax; ndev++) {
 			struct qib_devdata *dd = qib_lookup(ndev);
-			unsigned cused = 0, cfree = 0;
+			unsigned cused = 0, cfree = 0, pusable = 0;
 			if (!dd)
 				continue;
 			if (port && port <= dd->num_pports &&
 			    usable(dd->pport + port - 1))
-				dusable = 1;
+				pusable = 1;
 			else
 				for (i = 0; i < dd->num_pports; i++)
 					if (usable(dd->pport + i))
-						dusable++;
-			if (!dusable)
+						pusable++;
+			if (!pusable)
 				continue;
 			for (ctxt = dd->first_user_ctxt; ctxt < dd->cfgctxts;
 			     ctxt++)
@@ -1397,7 +1397,7 @@ static int get_a_ctxt(struct file *fp, const struct qib_user_info *uinfo,
 					cused++;
 				else
 					cfree++;
-			if (cfree && cused < inuse) {
+			if (pusable && cfree && cused < inuse) {
 				udd = dd;
 				inuse = cused;
 			}

--- a/drivers/infiniband/hw/qib/qib_iba6120.c
+++ b/drivers/infiniband/hw/qib/qib_iba6120.c
@@ -2074,7 +2074,7 @@ static void qib_6120_config_ctxts(struct qib_devdata *dd)
 }

 static void qib_update_6120_usrhead(struct qib_ctxtdata *rcd, u64 hd,
-				    u32 updegr, u32 egrhd)
+				    u32 updegr, u32 egrhd, u32 npkts)
 {
 	qib_write_ureg(rcd->dd, ur_rcvhdrhead, hd, rcd->ctxt);
 	if (updegr)

--- a/drivers/infiniband/hw/qib/qib_iba7220.c
+++ b/drivers/infiniband/hw/qib/qib_iba7220.c
@@ -2297,7 +2297,7 @@ static void qib_7220_config_ctxts(struct qib_devdata *dd)
 	nchipctxts = qib_read_kreg32(dd, kr_portcnt);
 	dd->cspec->numctxts = nchipctxts;
 	if (qib_n_krcv_queues > 1) {
-		dd->qpn_mask = 0x3f;
+		dd->qpn_mask = 0x3e;
 		dd->first_user_ctxt = qib_n_krcv_queues * dd->num_pports;
 		if (dd->first_user_ctxt > nchipctxts)
 			dd->first_user_ctxt = nchipctxts;
@@ -2703,7 +2703,7 @@ static int qib_7220_set_loopback(struct qib_pportdata *ppd, const char *what)
 }

 static void qib_update_7220_usrhead(struct qib_ctxtdata *rcd, u64 hd,
-				    u32 updegr, u32 egrhd)
+				    u32 updegr, u32 egrhd, u32 npkts)
 {
 	qib_write_ureg(rcd->dd, ur_rcvhdrhead, hd, rcd->ctxt);
 	if (updegr)

--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
--- a/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c
@@ -92,9 +92,11 @@ unsigned long *qib_cpulist;
 /* set number of contexts we'll actually use */
 void qib_set_ctxtcnt(struct qib_devdata *dd)
 {
-	if (!qib_cfgctxts)
+	if (!qib_cfgctxts) {
 		dd->cfgctxts = dd->first_user_ctxt + num_online_cpus();
-	else if (qib_cfgctxts < dd->num_pports)
+		if (dd->cfgctxts > dd->ctxtcnt)
+			dd->cfgctxts = dd->ctxtcnt;
+	} else if (qib_cfgctxts < dd->num_pports)
 		dd->cfgctxts = dd->ctxtcnt;
 	else if (qib_cfgctxts <= dd->ctxtcnt)
 		dd->cfgctxts = qib_cfgctxts;

--- a/drivers/infiniband/hw/qib/qib_intr.c
+++ b/drivers/infiniband/hw/qib/qib_intr.c
@@ -131,7 +131,8 @@ void qib_handle_e_ibstatuschanged(struct qib_pportdata *ppd, u64 ibcs)
 			/* start a 75msec timer to clear symbol errors */
 			mod_timer(&ppd->symerr_clear_timer,
 				  msecs_to_jiffies(75));
-		} else if (ltstate == IB_PHYSPORTSTATE_LINKUP) {
+		} else if (ltstate == IB_PHYSPORTSTATE_LINKUP &&
+			   !(ppd->lflags & QIBL_LINKACTIVE)) {
 			/* active, but not active defered */
 			qib_hol_up(ppd); /* useful only for 6120 now */
 			*ppd->statusp |=

--- a/drivers/infiniband/hw/qib/qib_keys.c
+++ b/drivers/infiniband/hw/qib/qib_keys.c
@@ -136,7 +136,6 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd,
 	struct qib_mregion *mr;
 	unsigned n, m;
 	size_t off;
-	int ret = 0;
 	unsigned long flags;

 	/*
@@ -152,6 +151,8 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd,
 		if (!dev->dma_mr)
 			goto bail;
 		atomic_inc(&dev->dma_mr->refcount);
+		spin_unlock_irqrestore(&rkt->lock, flags);
+
 		isge->mr = dev->dma_mr;
 		isge->vaddr = (void *) sge->addr;
 		isge->length = sge->length;
@@ -170,8 +171,23 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd,
 		     off + sge->length > mr->length ||
 		     (mr->access_flags & acc) != acc))
 		goto bail;
+	atomic_inc(&mr->refcount);
+	spin_unlock_irqrestore(&rkt->lock, flags);

 	off += mr->offset;
+	if (mr->page_shift) {
+		/*
+		page sizes are uniform power of 2 so no loop is necessary
+		entries_spanned_by_off is the number of times the loop below
+		would have executed.
+		*/
+		size_t entries_spanned_by_off;
+
+		entries_spanned_by_off = off >> mr->page_shift;
+		off -= (entries_spanned_by_off << mr->page_shift);
+		m = entries_spanned_by_off/QIB_SEGSZ;
+		n = entries_spanned_by_off%QIB_SEGSZ;
+	} else {
 		m = 0;
 		n = 0;
 		while (off >= mr->map[m]->segs[n].length) {
@@ -182,7 +198,7 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd,
 				n = 0;
 			}
 		}
-	atomic_inc(&mr->refcount);
+	}
 	isge->mr = mr;
 	isge->vaddr = mr->map[m]->segs[n].vaddr + off;
 	isge->length = mr->map[m]->segs[n].length - off;
@@ -190,10 +206,10 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd,
 	isge->m = m;
 	isge->n = n;
 ok:
-	ret = 1;
+	return 1;
 bail:
 	spin_unlock_irqrestore(&rkt->lock, flags);
-	return ret;
+	return 0;
 }

 /**
@@ -214,7 +230,6 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
 	struct qib_mregion *mr;
 	unsigned n, m;
 	size_t off;
-	int ret = 0;
 	unsigned long flags;

 	/*
@@ -231,6 +246,8 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
 		if (!dev->dma_mr)
 			goto bail;
 		atomic_inc(&dev->dma_mr->refcount);
+		spin_unlock_irqrestore(&rkt->lock, flags);
+
 		sge->mr = dev->dma_mr;
 		sge->vaddr = (void *) vaddr;
 		sge->length = len;
@@ -248,8 +265,23 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
 	if (unlikely(vaddr < mr->iova || off + len > mr->length ||
 		     (mr->access_flags & acc) == 0))
 		goto bail;
+	atomic_inc(&mr->refcount);
+	spin_unlock_irqrestore(&rkt->lock, flags);

 	off += mr->offset;
+	if (mr->page_shift) {
+		/*
+		page sizes are uniform power of 2 so no loop is necessary
+		entries_spanned_by_off is the number of times the loop below
+		would have executed.
+		*/
+		size_t entries_spanned_by_off;
+
+		entries_spanned_by_off = off >> mr->page_shift;
+		off -= (entries_spanned_by_off << mr->page_shift);
+		m = entries_spanned_by_off/QIB_SEGSZ;
+		n = entries_spanned_by_off%QIB_SEGSZ;
+	} else {
 		m = 0;
 		n = 0;
 		while (off >= mr->map[m]->segs[n].length) {
@@ -260,7 +292,7 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
 				n = 0;
 			}
 		}
-	atomic_inc(&mr->refcount);
+	}
 	sge->mr = mr;
 	sge->vaddr = mr->map[m]->segs[n].vaddr + off;
 	sge->length = mr->map[m]->segs[n].length - off;
@@ -268,10 +300,10 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
 	sge->m = m;
 	sge->n = n;
 ok:
-	ret = 1;
+	return 1;
 bail:
 	spin_unlock_irqrestore(&rkt->lock, flags);
-	return ret;
+	return 0;
 }

 /*

--- a/drivers/infiniband/hw/qib/qib_mad.c
+++ b/drivers/infiniband/hw/qib/qib_mad.c
@@ -668,8 +668,8 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
 	lid = be16_to_cpu(pip->lid);
 	/* Must be a valid unicast LID address. */
 	if (lid == 0 || lid >= QIB_MULTICAST_LID_BASE)
-		goto err;
-	if (ppd->lid != lid || ppd->lmc != (pip->mkeyprot_resv_lmc & 7)) {
+		smp->status |= IB_SMP_INVALID_FIELD;
+	else if (ppd->lid != lid || ppd->lmc != (pip->mkeyprot_resv_lmc & 7)) {
 		if (ppd->lid != lid)
 			qib_set_uevent_bits(ppd, _QIB_EVENT_LID_CHANGE_BIT);
 		if (ppd->lmc != (pip->mkeyprot_resv_lmc & 7))
@@ -683,8 +683,8 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
 	msl = pip->neighbormtu_mastersmsl & 0xF;
 	/* Must be a valid unicast LID address. */
 	if (smlid == 0 || smlid >= QIB_MULTICAST_LID_BASE)
-		goto err;
-	if (smlid != ibp->sm_lid || msl != ibp->sm_sl) {
+		smp->status |= IB_SMP_INVALID_FIELD;
+	else if (smlid != ibp->sm_lid || msl != ibp->sm_sl) {
 		spin_lock_irqsave(&ibp->lock, flags);
 		if (ibp->sm_ah) {
 			if (smlid != ibp->sm_lid)
@@ -707,7 +707,8 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
 		if (lwe == 0xFF)
 			lwe = ppd->link_width_supported;
 		else if (lwe >= 16 || (lwe & ~ppd->link_width_supported))
-			goto err;
+			smp->status |= IB_SMP_INVALID_FIELD;
+		else if (lwe != ppd->link_width_enabled)
 			set_link_width_enabled(ppd, lwe);
 	}

@@ -721,7 +722,8 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
 		if (lse == 15)
 			lse = ppd->link_speed_supported;
 		else if (lse >= 8 || (lse & ~ppd->link_speed_supported))
-			goto err;
+			smp->status |= IB_SMP_INVALID_FIELD;
+		else if (lse != ppd->link_speed_enabled)
 			set_link_speed_enabled(ppd, lse);
 	}

@@ -738,7 +740,7 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
 					IB_LINKINITCMD_POLL);
 		break;
 	default:
-		goto err;
+		smp->status |= IB_SMP_INVALID_FIELD;
 	}

 	ibp->mkeyprot = pip->mkeyprot_resv_lmc >> 6;
@@ -748,14 +750,16 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,

 	mtu = ib_mtu_enum_to_int((pip->neighbormtu_mastersmsl >> 4) & 0xF);
 	if (mtu == -1)
-		goto err;
+		smp->status |= IB_SMP_INVALID_FIELD;
+	else
 		qib_set_mtu(ppd, mtu);

 	/* Set operational VLs */
 	vls = (pip->operationalvl_pei_peo_fpi_fpo >> 4) & 0xF;
 	if (vls) {
 		if (vls > ppd->vls_supported)
-			goto err;
+			smp->status |= IB_SMP_INVALID_FIELD;
+		else
 			(void) dd->f_set_ib_cfg(ppd, QIB_IB_CFG_OP_VLS, vls);
 	}

@@ -770,10 +774,10 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,

 	ore = pip->localphyerrors_overrunerrors;
 	if (set_phyerrthreshold(ppd, (ore >> 4) & 0xF))
-		goto err;
+		smp->status |= IB_SMP_INVALID_FIELD;

 	if (set_overrunthreshold(ppd, (ore & 0xF)))
-		goto err;
+		smp->status |= IB_SMP_INVALID_FIELD;

 	ibp->subnet_timeout = pip->clientrereg_resv_subnetto & 0x1F;

@@ -792,7 +796,7 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
 	state = pip->linkspeed_portstate & 0xF;
 	lstate = (pip->portphysstate_linkdown >> 4) & 0xF;
 	if (lstate && !(state == IB_PORT_DOWN || state == IB_PORT_NOP))
-		goto err;
+		smp->status |= IB_SMP_INVALID_FIELD;

 	/*
 	 * Only state changes of DOWN, ARM, and ACTIVE are valid
@@ -812,8 +816,10 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
 			lstate = QIB_IB_LINKDOWN;
 		else if (lstate == 3)
 			lstate = QIB_IB_LINKDOWN_DISABLE;
-		else
-			goto err;
+		else {
+			smp->status |= IB_SMP_INVALID_FIELD;
+			break;
+		}
 		spin_lock_irqsave(&ppd->lflags_lock, flags);
 		ppd->lflags &= ~QIBL_LINKV;
 		spin_unlock_irqrestore(&ppd->lflags_lock, flags);
@@ -835,8 +841,7 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
 		qib_set_linkstate(ppd, QIB_IB_LINKACTIVE);
 		break;
 	default:
-		/* XXX We have already partially updated our state! */
-		goto err;
+		smp->status |= IB_SMP_INVALID_FIELD;
 	}

 	ret = subn_get_portinfo(smp, ibdev, port);

--- a/drivers/infiniband/hw/qib/qib_mr.c
+++ b/drivers/infiniband/hw/qib/qib_mr.c
@@ -39,7 +39,6 @@
 /* Fast memory region */
 struct qib_fmr {
 	struct ib_fmr ibfmr;
-	u8 page_shift;
 	struct qib_mregion mr;        /* must be last */
 };

@@ -107,6 +106,7 @@ static struct qib_mr *alloc_mr(int count, struct qib_lkey_table *lk_table)
 			goto bail;
 	}
 	mr->mr.mapsz = m;
+	mr->mr.page_shift = 0;
 	mr->mr.max_segs = count;

 	/*
@@ -231,6 +231,8 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 	mr->mr.access_flags = mr_access_flags;
 	mr->umem = umem;

+	if (is_power_of_2(umem->page_size))
+		mr->mr.page_shift = ilog2(umem->page_size);
 	m = 0;
 	n = 0;
 	list_for_each_entry(chunk, &umem->chunk_list, list) {
@@ -390,7 +392,7 @@ struct ib_fmr *qib_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
 	fmr->mr.offset = 0;
 	fmr->mr.access_flags = mr_access_flags;
 	fmr->mr.max_segs = fmr_attr->max_pages;
-	fmr->page_shift = fmr_attr->page_shift;
+	fmr->mr.page_shift = fmr_attr->page_shift;

 	atomic_set(&fmr->mr.refcount, 0);
 	ret = &fmr->ibfmr;
@@ -437,7 +439,7 @@ int qib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
 	spin_lock_irqsave(&rkt->lock, flags);
 	fmr->mr.user_base = iova;
 	fmr->mr.iova = iova;
-	ps = 1 << fmr->page_shift;
+	ps = 1 << fmr->mr.page_shift;
 	fmr->mr.length = list_len * ps;
 	m = 0;
 	n = 0;

--- a/drivers/infiniband/hw/qib/qib_qp.c
+++ b/drivers/infiniband/hw/qib/qib_qp.c
@@ -48,13 +48,12 @@ static inline unsigned mk_qpn(struct qib_qpn_table *qpt,

 static inline unsigned find_next_offset(struct qib_qpn_table *qpt,
 					struct qpn_map *map, unsigned off,
-					unsigned r)
+					unsigned n)
 {
 	if (qpt->mask) {
 		off++;
-		if ((off & qpt->mask) >> 1 != r)
-			off = ((off & qpt->mask) ?
-				(off | qpt->mask) + 1 : off) | (r << 1);
+		if (((off & qpt->mask) >> 1) >= n)
+			off = (off | qpt->mask) + 2;
 	} else
 		off = find_next_zero_bit(map->page, BITS_PER_PAGE, off);
 	return off;
@@ -123,7 +122,6 @@ static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt,
 	u32 i, offset, max_scan, qpn;
 	struct qpn_map *map;
 	u32 ret;
-	int r;

 	if (type == IB_QPT_SMI || type == IB_QPT_GSI) {
 		unsigned n;
@@ -139,15 +137,11 @@ static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt,
 		goto bail;
 	}

-	r = smp_processor_id();
-	if (r >= dd->n_krcv_queues)
-		r %= dd->n_krcv_queues;
-	qpn = qpt->last + 1;
+	qpn = qpt->last + 2;
 	if (qpn >= QPN_MAX)
 		qpn = 2;
-	if (qpt->mask && ((qpn & qpt->mask) >> 1) != r)
-		qpn = ((qpn & qpt->mask) ? (qpn | qpt->mask) + 1 : qpn) |
-			(r << 1);
+	if (qpt->mask && ((qpn & qpt->mask) >> 1) >= dd->n_krcv_queues)
+		qpn = (qpn | qpt->mask) + 2;
 	offset = qpn & BITS_PER_PAGE_MASK;
 	map = &qpt->map[qpn / BITS_PER_PAGE];
 	max_scan = qpt->nmaps - !offset;
@@ -163,7 +157,8 @@ static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt,
 				ret = qpn;
 				goto bail;
 			}
-			offset = find_next_offset(qpt, map, offset, r);
+			offset = find_next_offset(qpt, map, offset,
+				dd->n_krcv_queues);
 			qpn = mk_qpn(qpt, map, offset);
 			/*
 			 * This test differs from alloc_pidmap().
@@ -183,13 +178,13 @@ static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt,
 			if (qpt->nmaps == QPNMAP_ENTRIES)
 				break;
 			map = &qpt->map[qpt->nmaps++];
-			offset = qpt->mask ? (r << 1) : 0;
+			offset = 0;
 		} else if (map < &qpt->map[qpt->nmaps]) {
 			++map;
-			offset = qpt->mask ? (r << 1) : 0;
+			offset = 0;
 		} else {
 			map = &qpt->map[0];
-			offset = qpt->mask ? (r << 1) : 2;
+			offset = 2;
 		}
 		qpn = mk_qpn(qpt, map, offset);
 	}
@@ -468,6 +463,10 @@ int qib_error_qp(struct qib_qp *qp, enum ib_wc_status err)
 		qp->s_flags &= ~(QIB_S_TIMER | QIB_S_WAIT_RNR);
 		del_timer(&qp->s_timer);
 	}
+
+	if (qp->s_flags & QIB_S_ANY_WAIT_SEND)
+		qp->s_flags &= ~QIB_S_ANY_WAIT_SEND;
+
 	spin_lock(&dev->pending_lock);
 	if (!list_empty(&qp->iowait) && !(qp->s_flags & QIB_S_BUSY)) {
 		qp->s_flags &= ~QIB_S_ANY_WAIT_IO;
@@ -1061,7 +1060,6 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd,
 		}
 		qp->ibqp.qp_num = err;
 		qp->port_num = init_attr->port_num;
-		qp->processor_id = smp_processor_id();
 		qib_reset_qp(qp, init_attr->qp_type);
 		break;


--- a/drivers/infiniband/hw/qib/qib_rc.c
+++ b/drivers/infiniband/hw/qib/qib_rc.c
@@ -1407,6 +1407,7 @@ static void qib_rc_rcv_resp(struct qib_ibport *ibp,
 			    struct qib_ctxtdata *rcd)
 {
 	struct qib_swqe *wqe;
+	struct qib_pportdata *ppd = ppd_from_ibp(ibp);
 	enum ib_wc_status status;
 	unsigned long flags;
 	int diff;
@@ -1414,6 +1415,29 @@ static void qib_rc_rcv_resp(struct qib_ibport *ibp,
 	u32 aeth;
 	u64 val;

+	if (opcode != OP(RDMA_READ_RESPONSE_MIDDLE)) {
+		/*
+		 * If ACK'd PSN on SDMA busy list try to make progress to
+		 * reclaim SDMA credits.
+		 */
+		if ((qib_cmp24(psn, qp->s_sending_psn) >= 0) &&
+		    (qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)) {
+
+			/*
+			 * If send tasklet not running attempt to progress
+			 * SDMA queue.
+			 */
+			if (!(qp->s_flags & QIB_S_BUSY)) {
+				/* Acquire SDMA Lock */
+				spin_lock_irqsave(&ppd->sdma_lock, flags);
+				/* Invoke sdma make progress */
+				qib_sdma_make_progress(ppd);
+				/* Release SDMA Lock */
+				spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+			}
+		}
+	}
+
 	spin_lock_irqsave(&qp->s_lock, flags);

 	/* Ignore invalid responses. */

--- a/drivers/infiniband/hw/qib/qib_ud.c
+++ b/drivers/infiniband/hw/qib/qib_ud.c
@@ -445,13 +445,14 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
 	qkey = be32_to_cpu(ohdr->u.ud.deth[0]);
 	src_qp = be32_to_cpu(ohdr->u.ud.deth[1]) & QIB_QPN_MASK;

-	/* Get the number of bytes the message was padded by. */
+	/*
+	 * Get the number of bytes the message was padded by
+	 * and drop incomplete packets.
+	 */
 	pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
-	if (unlikely(tlen < (hdrsize + pad + 4))) {
-		/* Drop incomplete packets. */
-		ibp->n_pkt_drops++;
-		goto bail;
-	}
+	if (unlikely(tlen < (hdrsize + pad + 4)))
+		goto drop;
+
 	tlen -= hdrsize + pad + 4;

 	/*
@@ -460,10 +461,8 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
 	 */
 	if (qp->ibqp.qp_num) {
 		if (unlikely(hdr->lrh[1] == IB_LID_PERMISSIVE ||
-			     hdr->lrh[3] == IB_LID_PERMISSIVE)) {
-			ibp->n_pkt_drops++;
-			goto bail;
-		}
+			     hdr->lrh[3] == IB_LID_PERMISSIVE))
+			goto drop;
 		if (qp->ibqp.qp_num > 1) {
 			u16 pkey1, pkey2;

@@ -476,7 +475,7 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
 						0xF,
 					      src_qp, qp->ibqp.qp_num,
 					      hdr->lrh[3], hdr->lrh[1]);
-				goto bail;
+				return;
 			}
 		}
 		if (unlikely(qkey != qp->qkey)) {
@@ -484,30 +483,24 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
 				      (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF,
 				      src_qp, qp->ibqp.qp_num,
 				      hdr->lrh[3], hdr->lrh[1]);
-			goto bail;
+			return;
 		}
 		/* Drop invalid MAD packets (see 13.5.3.1). */
 		if (unlikely(qp->ibqp.qp_num == 1 &&
 			     (tlen != 256 ||
-			      (be16_to_cpu(hdr->lrh[0]) >> 12) == 15))) {
-			ibp->n_pkt_drops++;
-			goto bail;
-		}
+			      (be16_to_cpu(hdr->lrh[0]) >> 12) == 15)))
+			goto drop;
 	} else {
 		struct ib_smp *smp;

 		/* Drop invalid MAD packets (see 13.5.3.1). */
-		if (tlen != 256 || (be16_to_cpu(hdr->lrh[0]) >> 12) != 15) {
-			ibp->n_pkt_drops++;
-			goto bail;
-		}
+		if (tlen != 256 || (be16_to_cpu(hdr->lrh[0]) >> 12) != 15)
+			goto drop;
 		smp = (struct ib_smp *) data;
 		if ((hdr->lrh[1] == IB_LID_PERMISSIVE ||
 		     hdr->lrh[3] == IB_LID_PERMISSIVE) &&
-		    smp->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
-			ibp->n_pkt_drops++;
-			goto bail;
-		}
+		    smp->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
+			goto drop;
 	}

 	/*
@@ -519,14 +512,12 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
 	    opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) {
 		wc.ex.imm_data = ohdr->u.ud.imm_data;
 		wc.wc_flags = IB_WC_WITH_IMM;
-		hdrsize += sizeof(u32);
+		tlen -= sizeof(u32);
 	} else if (opcode == IB_OPCODE_UD_SEND_ONLY) {
 		wc.ex.imm_data = 0;
 		wc.wc_flags = 0;
-	} else {
-		ibp->n_pkt_drops++;
-		goto bail;
-	}
+	} else
+		goto drop;

 	/*
 	 * A GRH is expected to preceed the data even if not
@@ -556,8 +547,7 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
 	/* Silently drop packets which are too big. */
 	if (unlikely(wc.byte_len > qp->r_len)) {
 		qp->r_flags |= QIB_R_REUSE_SGE;
-		ibp->n_pkt_drops++;
-		return;
+		goto drop;
 	}
 	if (has_grh) {
 		qib_copy_sge(&qp->r_sge, &hdr->u.l.grh,
@@ -594,5 +584,8 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
 	qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
 		     (ohdr->bth[0] &
 			cpu_to_be32(IB_BTH_SOLICITED)) != 0);
-bail:;
+	return;
+
+drop:
+	ibp->n_pkt_drops++;
 }
--- a/drivers/infiniband/hw/qib/qib_user_sdma.c
+++ b/drivers/infiniband/hw/qib/qib_user_sdma.c
@@ -382,6 +382,7 @@ static void qib_user_sdma_free_pkt_list(struct device *dev,

 		kmem_cache_free(pq->pkt_slab, pkt);
 	}
+	INIT_LIST_HEAD(list);
 }

 /*

--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -301,6 +301,7 @@ struct qib_mregion {
 	int access_flags;
 	u32 max_segs;           /* number of qib_segs in all the arrays */
 	u32 mapsz;              /* size of the map array */
+	u8  page_shift;         /* 0 - non unform/non powerof2 sizes */
 	atomic_t refcount;
 	struct qib_segarray *map[0];    /* the segments */
 };
@@ -435,7 +436,6 @@ struct qib_qp {
 	spinlock_t r_lock;      /* used for APM */
 	spinlock_t s_lock;
 	atomic_t s_dma_busy;
-	unsigned processor_id;	/* Processor ID QP is bound to */
 	u32 s_flags;
 	u32 s_cur_size;         /* size of send packet in bytes */
 	u32 s_len;              /* total length of s_sge */
@@ -813,13 +813,8 @@ extern struct workqueue_struct *qib_cq_wq;
 */
 static inline void qib_schedule_send(struct qib_qp *qp)
 {
-	if (qib_send_ok(qp)) {
-		if (qp->processor_id == smp_processor_id())
+	if (qib_send_ok(qp))
 		queue_work(qib_wq, &qp->s_work);
-		else
-			queue_work_on(qp->processor_id,
-				      qib_wq, &qp->s_work);
-	}
 }

 static inline int qib_pkey_ok(u16 pkey1, u16 pkey2)

--- a/drivers/infiniband/ulp/ipoib/Kconfig
+++ b/drivers/infiniband/ulp/ipoib/Kconfig
 config INFINIBAND_IPOIB
 	tristate "IP-over-InfiniBand"
 	depends on NETDEVICES && INET && (IPV6 || IPV6=n)
-	select INET_LRO
 	---help---
 	  Support for the IP-over-InfiniBand protocol (IPoIB). This
 	  transports IP packets over InfiniBand so you can use your IB

--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -50,7 +50,7 @@
 #include <rdma/ib_verbs.h>
 #include <rdma/ib_pack.h>
 #include <rdma/ib_sa.h>
-#include <linux/inet_lro.h>
+#include <linux/sched.h>

 /* constants */

@@ -100,9 +100,6 @@ enum {
 	IPOIB_MCAST_FLAG_BUSY	  = 2,	/* joining or already joined */
 	IPOIB_MCAST_FLAG_ATTACHED = 3,

-	IPOIB_MAX_LRO_DESCRIPTORS = 8,
-	IPOIB_LRO_MAX_AGGR 	  = 64,
-
 	MAX_SEND_CQE		  = 16,
 	IPOIB_CM_COPYBREAK	  = 256,
 };
@@ -262,11 +259,6 @@ struct ipoib_ethtool_st {
 	u16     max_coalesced_frames;
 };

-struct ipoib_lro {
-	struct net_lro_mgr lro_mgr;
-	struct net_lro_desc lro_desc[IPOIB_MAX_LRO_DESCRIPTORS];
-};
-
 /*
 * Device private locking: network stack tx_lock protects members used
 * in TX fast path, lock protects everything else.  lock nests inside
@@ -352,8 +344,6 @@ struct ipoib_dev_priv {
 	int	hca_caps;
 	struct ipoib_ethtool_st ethtool;
 	struct timer_list poll_timer;
-
-	struct ipoib_lro lro;
 };

 struct ipoib_ah {

--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -1480,6 +1480,7 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr,

 		if (test_bit(IPOIB_FLAG_CSUM, &priv->flags)) {
 			dev->features |= NETIF_F_IP_CSUM | NETIF_F_SG;
+			priv->dev->features |= NETIF_F_GRO;
 			if (priv->hca_caps & IB_DEVICE_UD_TSO)
 				dev->features |= NETIF_F_TSO;
 		}

--- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
@@ -106,63 +106,12 @@ static int ipoib_set_coalesce(struct net_device *dev,
 	return 0;
 }

-static const char ipoib_stats_keys[][ETH_GSTRING_LEN] = {
-	"LRO aggregated", "LRO flushed",
-	"LRO avg aggr", "LRO no desc"
-};
-
-static void ipoib_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
-{
-	switch (stringset) {
-	case ETH_SS_STATS:
-		memcpy(data, *ipoib_stats_keys,	sizeof(ipoib_stats_keys));
-		break;
-	}
-}
-
-static int ipoib_get_sset_count(struct net_device *dev, int sset)
-{
-	switch (sset) {
-	case ETH_SS_STATS:
-		return ARRAY_SIZE(ipoib_stats_keys);
-	default:
-		return -EOPNOTSUPP;
-	}
-}
-
-static void ipoib_get_ethtool_stats(struct net_device *dev,
-				struct ethtool_stats *stats, uint64_t *data)
-{
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
-	int index = 0;
-
-	/* Get LRO statistics */
-	data[index++] = priv->lro.lro_mgr.stats.aggregated;
-	data[index++] = priv->lro.lro_mgr.stats.flushed;
-	if (priv->lro.lro_mgr.stats.flushed)
-		data[index++] = priv->lro.lro_mgr.stats.aggregated /
-				priv->lro.lro_mgr.stats.flushed;
-	else
-		data[index++] = 0;
-	data[index++] = priv->lro.lro_mgr.stats.no_desc;
-}
-
-static int ipoib_set_flags(struct net_device *dev, u32 flags)
-{
-	return ethtool_op_set_flags(dev, flags, ETH_FLAG_LRO);
-}
-
 static const struct ethtool_ops ipoib_ethtool_ops = {
 	.get_drvinfo		= ipoib_get_drvinfo,
 	.get_rx_csum		= ipoib_get_rx_csum,
 	.set_tso		= ipoib_set_tso,
 	.get_coalesce		= ipoib_get_coalesce,
 	.set_coalesce		= ipoib_set_coalesce,
-	.get_flags		= ethtool_op_get_flags,
-	.set_flags		= ipoib_set_flags,
-	.get_strings		= ipoib_get_strings,
-	.get_sset_count		= ipoib_get_sset_count,
-	.get_ethtool_stats	= ipoib_get_ethtool_stats,
 };

 void ipoib_set_ethtool_ops(struct net_device *dev)

--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -295,10 +295,7 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
 	if (test_bit(IPOIB_FLAG_CSUM, &priv->flags) && likely(wc->csum_ok))
 		skb->ip_summed = CHECKSUM_UNNECESSARY;

-	if (dev->features & NETIF_F_LRO)
-		lro_receive_skb(&priv->lro.lro_mgr, skb, NULL);
-	else
-		netif_receive_skb(skb);
+	napi_gro_receive(&priv->napi, skb);

 repost:
 	if (unlikely(ipoib_ib_post_receive(dev, wr_id)))
@@ -450,9 +447,6 @@ int ipoib_poll(struct napi_struct *napi, int budget)
 	}

 	if (done < budget) {
-		if (dev->features & NETIF_F_LRO)
-			lro_flush_all(&priv->lro.lro_mgr);
-
 		napi_complete(napi);
 		if (unlikely(ib_req_notify_cq(priv->recv_cq,
 					      IB_CQ_NEXT_COMP |

--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -60,15 +60,6 @@ MODULE_PARM_DESC(send_queue_size, "Number of descriptors in send queue");
 module_param_named(recv_queue_size, ipoib_recvq_size, int, 0444);
 MODULE_PARM_DESC(recv_queue_size, "Number of descriptors in receive queue");

-static int lro;
-module_param(lro, bool, 0444);
-MODULE_PARM_DESC(lro,  "Enable LRO (Large Receive Offload)");
-
-static int lro_max_aggr = IPOIB_LRO_MAX_AGGR;
-module_param(lro_max_aggr, int, 0644);
-MODULE_PARM_DESC(lro_max_aggr, "LRO: Max packets to be aggregated "
-		"(default = 64)");
-
 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
 int ipoib_debug_level;

@@ -976,54 +967,6 @@ static const struct header_ops ipoib_header_ops = {
 	.create	= ipoib_hard_header,
 };

-static int get_skb_hdr(struct sk_buff *skb, void **iphdr,
-		       void **tcph, u64 *hdr_flags, void *priv)
-{
-	unsigned int ip_len;
-	struct iphdr *iph;
-
-	if (unlikely(skb->protocol != htons(ETH_P_IP)))
-		return -1;
-
-	/*
-	 * In the future we may add an else clause that verifies the
-	 * checksum and allows devices which do not calculate checksum
-	 * to use LRO.
-	 */
-	if (unlikely(skb->ip_summed != CHECKSUM_UNNECESSARY))
-		return -1;
-
-	/* Check for non-TCP packet */
-	skb_reset_network_header(skb);
-	iph = ip_hdr(skb);
-	if (iph->protocol != IPPROTO_TCP)
-		return -1;
-
-	ip_len = ip_hdrlen(skb);
-	skb_set_transport_header(skb, ip_len);
-	*tcph = tcp_hdr(skb);
-
-	/* check if IP header and TCP header are complete */
-	if (ntohs(iph->tot_len) < ip_len + tcp_hdrlen(skb))
-		return -1;
-
-	*hdr_flags = LRO_IPV4 | LRO_TCP;
-	*iphdr = iph;
-
-	return 0;
-}
-
-static void ipoib_lro_setup(struct ipoib_dev_priv *priv)
-{
-	priv->lro.lro_mgr.max_aggr	 = lro_max_aggr;
-	priv->lro.lro_mgr.max_desc	 = IPOIB_MAX_LRO_DESCRIPTORS;
-	priv->lro.lro_mgr.lro_arr	 = priv->lro.lro_desc;
-	priv->lro.lro_mgr.get_skb_header = get_skb_hdr;
-	priv->lro.lro_mgr.features	 = LRO_F_NAPI;
-	priv->lro.lro_mgr.dev		 = priv->dev;
-	priv->lro.lro_mgr.ip_summed_aggr = CHECKSUM_UNNECESSARY;
-}
-
 static const struct net_device_ops ipoib_netdev_ops = {
 	.ndo_open		 = ipoib_open,
 	.ndo_stop		 = ipoib_stop,
@@ -1067,8 +1010,6 @@ static void ipoib_setup(struct net_device *dev)

 	priv->dev = dev;

-	ipoib_lro_setup(priv);
-
 	spin_lock_init(&priv->lock);

 	mutex_init(&priv->vlan_mutex);
@@ -1218,8 +1159,7 @@ int ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca)
 		priv->dev->features |= NETIF_F_SG | NETIF_F_IP_CSUM;
 	}

-	if (lro)
-		priv->dev->features |= NETIF_F_LRO;
+	priv->dev->features |= NETIF_F_GRO;

 	if (priv->dev->features & NETIF_F_SG && priv->hca_caps & IB_DEVICE_UD_TSO)
 		priv->dev->features |= NETIF_F_TSO;

--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
--- a/drivers/infiniband/ulp/srp/ib_srp.h
+++ b/drivers/infiniband/ulp/srp/ib_srp.h
@@ -59,16 +59,15 @@ enum {

 	SRP_RQ_SHIFT    	= 6,
 	SRP_RQ_SIZE		= 1 << SRP_RQ_SHIFT,
-	SRP_RQ_MASK		= SRP_RQ_SIZE - 1,

 	SRP_SQ_SIZE		= SRP_RQ_SIZE,
-	SRP_SQ_MASK		= SRP_SQ_SIZE - 1,
 	SRP_RSP_SQ_SIZE		= 1,
 	SRP_REQ_SQ_SIZE		= SRP_SQ_SIZE - SRP_RSP_SQ_SIZE,
 	SRP_TSK_MGMT_SQ_SIZE	= 1,
 	SRP_CMD_SQ_SIZE		= SRP_REQ_SQ_SIZE - SRP_TSK_MGMT_SQ_SIZE,

-	SRP_TAG_TSK_MGMT	= 1 << (SRP_RQ_SHIFT + 1),
+	SRP_TAG_NO_REQ		= ~0U,
+	SRP_TAG_TSK_MGMT	= 1U << 31,

 	SRP_FMR_SIZE		= 256,
 	SRP_FMR_POOL_SIZE	= 1024,
@@ -113,15 +112,29 @@ struct srp_request {
 	struct list_head	list;
 	struct scsi_cmnd       *scmnd;
 	struct srp_iu	       *cmd;
-	struct srp_iu	       *tsk_mgmt;
 	struct ib_pool_fmr     *fmr;
-	struct completion	done;
 	short			index;
-	u8			cmd_done;
-	u8			tsk_status;
 };

 struct srp_target_port {
+	/* These are RW in the hot path, and commonly used together */
+	struct list_head	free_tx;
+	struct list_head	free_reqs;
+	spinlock_t		lock;
+	s32			req_lim;
+
+	/* These are read-only in the hot path */
+	struct ib_cq	       *send_cq ____cacheline_aligned_in_smp;
+	struct ib_cq	       *recv_cq;
+	struct ib_qp	       *qp;
+	u32			lkey;
+	u32			rkey;
+	enum srp_target_state	state;
+
+	/* Everything above this point is used in the hot path of
+	 * command processing. Try to keep them packed into cachelines.
+	 */
+
 	__be64			id_ext;
 	__be64			ioc_guid;
 	__be64			service_id;
@@ -138,24 +151,13 @@ struct srp_target_port {
 	int			path_query_id;

 	struct ib_cm_id	       *cm_id;
-	struct ib_cq	       *recv_cq;
-	struct ib_cq	       *send_cq;
-	struct ib_qp	       *qp;

 	int			max_ti_iu_len;
-	s32			req_lim;

 	int			zero_req_lim;

-	unsigned		rx_head;
-	struct srp_iu	       *rx_ring[SRP_RQ_SIZE];
-
-	unsigned		tx_head;
-	unsigned		tx_tail;
 	struct srp_iu	       *tx_ring[SRP_SQ_SIZE];
-
-	struct list_head	free_reqs;
-	struct list_head	req_queue;
+	struct srp_iu	       *rx_ring[SRP_RQ_SIZE];
 	struct srp_request	req_ring[SRP_CMD_SQ_SIZE];

 	struct work_struct	work;
@@ -163,16 +165,18 @@ struct srp_target_port {
 	struct list_head	list;
 	struct completion	done;
 	int			status;
-	enum srp_target_state	state;
 	int			qp_in_error;
+
+	struct completion	tsk_mgmt_done;
+	u8			tsk_mgmt_status;
 };

 struct srp_iu {
+	struct list_head	list;
 	u64			dma;
 	void		       *buf;
 	size_t			size;
 	enum dma_data_direction	direction;
-	enum srp_iu_type	type;
 };

 #endif /* IB_SRP_H */
--- a/drivers/net/mlx4/alloc.c
+++ b/drivers/net/mlx4/alloc.c
@@ -178,6 +178,7 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
 	} else {
 		int i;

+		buf->direct.buf  = NULL;
 		buf->nbufs       = (size + PAGE_SIZE - 1) / PAGE_SIZE;
 		buf->npages      = buf->nbufs;
 		buf->page_shift  = PAGE_SHIFT;
@@ -229,7 +230,7 @@ void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf)
 		dma_free_coherent(&dev->pdev->dev, size, buf->direct.buf,
 				  buf->direct.map);
 	else {
-		if (BITS_PER_LONG == 64)
+		if (BITS_PER_LONG == 64 && buf->direct.buf)
 			vunmap(buf->direct.buf);

 		for (i = 0; i < buf->nbufs; ++i)

--- a/drivers/net/mlx4/fw.c
+++ b/drivers/net/mlx4/fw.c
@@ -289,10 +289,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 		MLX4_GET(field, outbox, QUERY_DEV_CAP_LOG_BF_REG_SZ_OFFSET);
 		dev_cap->bf_reg_size = 1 << (field & 0x1f);
 		MLX4_GET(field, outbox, QUERY_DEV_CAP_LOG_MAX_BF_REGS_PER_PAGE_OFFSET);
-		if ((1 << (field & 0x3f)) > (PAGE_SIZE / dev_cap->bf_reg_size)) {
-			mlx4_warn(dev, "firmware bug: log2 # of blue flame regs is invalid (%d), forcing 3\n", field & 0x1f);
+		if ((1 << (field & 0x3f)) > (PAGE_SIZE / dev_cap->bf_reg_size))
 			field = 3;
-		}
 		dev_cap->bf_regs_per_page = 1 << (field & 0x3f);
 		mlx4_dbg(dev, "BlueFlame available (reg size %d, regs/page %d)\n",
 			 dev_cap->bf_reg_size, dev_cap->bf_regs_per_page);