Commit f605f26e authored by Bob Pearson's avatar Bob Pearson Committed by Jason Gunthorpe

RDMA/rxe: Protect QP state with qp->state_lock

Currently the rxe driver makes little effort to make the changes to qp
state (which includes qp->attr.qp_state, qp->attr.sq_draining and
qp->valid) atomic between different client threads and IO threads. In
particular a common template is for an RDMA application to call
ib_modify_qp() to move a qp to ERR state and then wait until all the
packet and work queues have drained before calling ib_destroy_qp(). None
of these state changes are protected by locks to assure that the changes
are executed atomically and that memory barriers are included. This has
been observed to lead to incorrect behavior around qp cleanup.

This patch continues the work of the previous patches in this series and
adds locking code around qp state changes and lookups.

Link: https://lore.kernel.org/r/20230405042611.6467-5-rpearsonhpe@gmail.comSigned-off-by: default avatarBob Pearson <rpearsonhpe@gmail.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@nvidia.com>
parent 7b560b89
...@@ -118,10 +118,12 @@ void retransmit_timer(struct timer_list *t) ...@@ -118,10 +118,12 @@ void retransmit_timer(struct timer_list *t)
rxe_dbg_qp(qp, "retransmit timer fired\n"); rxe_dbg_qp(qp, "retransmit timer fired\n");
spin_lock_bh(&qp->state_lock);
if (qp->valid) { if (qp->valid) {
qp->comp.timeout = 1; qp->comp.timeout = 1;
rxe_sched_task(&qp->comp.task); rxe_sched_task(&qp->comp.task);
} }
spin_unlock_bh(&qp->state_lock);
} }
void rxe_comp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb) void rxe_comp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb)
...@@ -479,9 +481,8 @@ static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe) ...@@ -479,9 +481,8 @@ static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
static void comp_check_sq_drain_done(struct rxe_qp *qp) static void comp_check_sq_drain_done(struct rxe_qp *qp)
{ {
if (unlikely(qp_state(qp) == IB_QPS_SQD)) {
/* state_lock used by requester & completer */
spin_lock_bh(&qp->state_lock); spin_lock_bh(&qp->state_lock);
if (unlikely(qp_state(qp) == IB_QPS_SQD)) {
if (qp->attr.sq_draining && qp->comp.psn == qp->req.psn) { if (qp->attr.sq_draining && qp->comp.psn == qp->req.psn) {
qp->attr.sq_draining = 0; qp->attr.sq_draining = 0;
spin_unlock_bh(&qp->state_lock); spin_unlock_bh(&qp->state_lock);
...@@ -497,8 +498,8 @@ static void comp_check_sq_drain_done(struct rxe_qp *qp) ...@@ -497,8 +498,8 @@ static void comp_check_sq_drain_done(struct rxe_qp *qp)
} }
return; return;
} }
spin_unlock_bh(&qp->state_lock);
} }
spin_unlock_bh(&qp->state_lock);
} }
static inline enum comp_state complete_ack(struct rxe_qp *qp, static inline enum comp_state complete_ack(struct rxe_qp *qp,
...@@ -614,6 +615,26 @@ static void free_pkt(struct rxe_pkt_info *pkt) ...@@ -614,6 +615,26 @@ static void free_pkt(struct rxe_pkt_info *pkt)
ib_device_put(dev); ib_device_put(dev);
} }
/* reset the retry timer if
* - QP is type RC
* - there is a packet sent by the requester that
* might be acked (we still might get spurious
* timeouts but try to keep them as few as possible)
* - the timeout parameter is set
* - the QP is alive
*/
static void reset_retry_timer(struct rxe_qp *qp)
{
if (qp_type(qp) == IB_QPT_RC && qp->qp_timeout_jiffies) {
spin_lock_bh(&qp->state_lock);
if (qp_state(qp) >= IB_QPS_RTS &&
psn_compare(qp->req.psn, qp->comp.psn) > 0)
mod_timer(&qp->retrans_timer,
jiffies + qp->qp_timeout_jiffies);
spin_unlock_bh(&qp->state_lock);
}
}
int rxe_completer(struct rxe_qp *qp) int rxe_completer(struct rxe_qp *qp)
{ {
struct rxe_dev *rxe = to_rdev(qp->ibqp.device); struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
...@@ -623,14 +644,17 @@ int rxe_completer(struct rxe_qp *qp) ...@@ -623,14 +644,17 @@ int rxe_completer(struct rxe_qp *qp)
enum comp_state state; enum comp_state state;
int ret; int ret;
spin_lock_bh(&qp->state_lock);
if (!qp->valid || qp_state(qp) == IB_QPS_ERR || if (!qp->valid || qp_state(qp) == IB_QPS_ERR ||
qp_state(qp) == IB_QPS_RESET) { qp_state(qp) == IB_QPS_RESET) {
bool notify = qp->valid && (qp_state(qp) == IB_QPS_ERR); bool notify = qp->valid && (qp_state(qp) == IB_QPS_ERR);
drain_resp_pkts(qp); drain_resp_pkts(qp);
flush_send_queue(qp, notify); flush_send_queue(qp, notify);
spin_unlock_bh(&qp->state_lock);
goto exit; goto exit;
} }
spin_unlock_bh(&qp->state_lock);
if (qp->comp.timeout) { if (qp->comp.timeout) {
qp->comp.timeout_retry = 1; qp->comp.timeout_retry = 1;
...@@ -718,20 +742,7 @@ int rxe_completer(struct rxe_qp *qp) ...@@ -718,20 +742,7 @@ int rxe_completer(struct rxe_qp *qp)
break; break;
} }
/* re reset the timeout counter if reset_retry_timer(qp);
* (1) QP is type RC
* (2) the QP is alive
* (3) there is a packet sent by the requester that
* might be acked (we still might get spurious
* timeouts but try to keep them as few as possible)
* (4) the timeout parameter is set
*/
if ((qp_type(qp) == IB_QPT_RC) &&
(qp_state(qp) >= IB_QPS_RTS) &&
(psn_compare(qp->req.psn, qp->comp.psn) > 0) &&
qp->qp_timeout_jiffies)
mod_timer(&qp->retrans_timer,
jiffies + qp->qp_timeout_jiffies);
goto exit; goto exit;
case COMPST_ERROR_RETRY: case COMPST_ERROR_RETRY:
...@@ -793,6 +804,7 @@ int rxe_completer(struct rxe_qp *qp) ...@@ -793,6 +804,7 @@ int rxe_completer(struct rxe_qp *qp)
*/ */
qp->req.wait_for_rnr_timer = 1; qp->req.wait_for_rnr_timer = 1;
rxe_dbg_qp(qp, "set rnr nak timer\n"); rxe_dbg_qp(qp, "set rnr nak timer\n");
// TODO who protects from destroy_qp??
mod_timer(&qp->rnr_nak_timer, mod_timer(&qp->rnr_nak_timer,
jiffies + rnrnak_jiffies(aeth_syn(pkt) jiffies + rnrnak_jiffies(aeth_syn(pkt)
& ~AETH_TYPE_MASK)); & ~AETH_TYPE_MASK));
......
...@@ -413,11 +413,14 @@ int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt, ...@@ -413,11 +413,14 @@ int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
int is_request = pkt->mask & RXE_REQ_MASK; int is_request = pkt->mask & RXE_REQ_MASK;
struct rxe_dev *rxe = to_rdev(qp->ibqp.device); struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
spin_lock_bh(&qp->state_lock);
if ((is_request && (qp_state(qp) < IB_QPS_RTS)) || if ((is_request && (qp_state(qp) < IB_QPS_RTS)) ||
(!is_request && (qp_state(qp) < IB_QPS_RTR))) { (!is_request && (qp_state(qp) < IB_QPS_RTR))) {
spin_unlock_bh(&qp->state_lock);
rxe_dbg_qp(qp, "Packet dropped. QP is not in ready state\n"); rxe_dbg_qp(qp, "Packet dropped. QP is not in ready state\n");
goto drop; goto drop;
} }
spin_unlock_bh(&qp->state_lock);
rxe_icrc_generate(skb, pkt); rxe_icrc_generate(skb, pkt);
......
...@@ -325,8 +325,10 @@ int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd, ...@@ -325,8 +325,10 @@ int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd,
if (err) if (err)
goto err2; goto err2;
spin_lock_bh(&qp->state_lock);
qp->attr.qp_state = IB_QPS_RESET; qp->attr.qp_state = IB_QPS_RESET;
qp->valid = 1; qp->valid = 1;
spin_unlock_bh(&qp->state_lock);
return 0; return 0;
...@@ -377,27 +379,9 @@ int rxe_qp_to_init(struct rxe_qp *qp, struct ib_qp_init_attr *init) ...@@ -377,27 +379,9 @@ int rxe_qp_to_init(struct rxe_qp *qp, struct ib_qp_init_attr *init)
return 0; return 0;
} }
/* called by the modify qp verb, this routine checks all the parameters before
* making any changes
*/
int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp, int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp,
struct ib_qp_attr *attr, int mask) struct ib_qp_attr *attr, int mask)
{ {
enum ib_qp_state cur_state = (mask & IB_QP_CUR_STATE) ?
attr->cur_qp_state : qp->attr.qp_state;
enum ib_qp_state new_state = (mask & IB_QP_STATE) ?
attr->qp_state : cur_state;
if (!ib_modify_qp_is_ok(cur_state, new_state, qp_type(qp), mask)) {
rxe_dbg_qp(qp, "invalid mask or state\n");
goto err1;
}
if (mask & IB_QP_STATE && cur_state == IB_QPS_SQD) {
if (qp->attr.sq_draining && new_state != IB_QPS_ERR)
goto err1;
}
if (mask & IB_QP_PORT) { if (mask & IB_QP_PORT) {
if (!rdma_is_port_valid(&rxe->ib_dev, attr->port_num)) { if (!rdma_is_port_valid(&rxe->ib_dev, attr->port_num)) {
rxe_dbg_qp(qp, "invalid port %d\n", attr->port_num); rxe_dbg_qp(qp, "invalid port %d\n", attr->port_num);
...@@ -508,22 +492,96 @@ static void rxe_qp_reset(struct rxe_qp *qp) ...@@ -508,22 +492,96 @@ static void rxe_qp_reset(struct rxe_qp *qp)
/* move the qp to the error state */ /* move the qp to the error state */
void rxe_qp_error(struct rxe_qp *qp) void rxe_qp_error(struct rxe_qp *qp)
{ {
spin_lock_bh(&qp->state_lock);
qp->attr.qp_state = IB_QPS_ERR; qp->attr.qp_state = IB_QPS_ERR;
/* drain work and packet queues */ /* drain work and packet queues */
rxe_sched_task(&qp->resp.task); rxe_sched_task(&qp->resp.task);
rxe_sched_task(&qp->comp.task); rxe_sched_task(&qp->comp.task);
rxe_sched_task(&qp->req.task); rxe_sched_task(&qp->req.task);
spin_unlock_bh(&qp->state_lock);
} }
static void rxe_qp_sqd(struct rxe_qp *qp, struct ib_qp_attr *attr,
int mask)
{
spin_lock_bh(&qp->state_lock);
qp->attr.sq_draining = 1;
rxe_sched_task(&qp->comp.task);
rxe_sched_task(&qp->req.task);
spin_unlock_bh(&qp->state_lock);
}
/* caller should hold qp->state_lock */
static int __qp_chk_state(struct rxe_qp *qp, struct ib_qp_attr *attr,
int mask)
{
enum ib_qp_state cur_state;
enum ib_qp_state new_state;
cur_state = (mask & IB_QP_CUR_STATE) ?
attr->cur_qp_state : qp->attr.qp_state;
new_state = (mask & IB_QP_STATE) ?
attr->qp_state : cur_state;
if (!ib_modify_qp_is_ok(cur_state, new_state, qp_type(qp), mask))
return -EINVAL;
if (mask & IB_QP_STATE && cur_state == IB_QPS_SQD) {
if (qp->attr.sq_draining && new_state != IB_QPS_ERR)
return -EINVAL;
}
return 0;
}
static const char *const qps2str[] = {
[IB_QPS_RESET] = "RESET",
[IB_QPS_INIT] = "INIT",
[IB_QPS_RTR] = "RTR",
[IB_QPS_RTS] = "RTS",
[IB_QPS_SQD] = "SQD",
[IB_QPS_SQE] = "SQE",
[IB_QPS_ERR] = "ERR",
};
/* called by the modify qp verb */ /* called by the modify qp verb */
int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask, int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask,
struct ib_udata *udata) struct ib_udata *udata)
{ {
enum ib_qp_state cur_state = (mask & IB_QP_CUR_STATE) ?
attr->cur_qp_state : qp->attr.qp_state;
int err; int err;
if (mask & IB_QP_CUR_STATE)
qp->attr.cur_qp_state = attr->qp_state;
if (mask & IB_QP_STATE) {
spin_lock_bh(&qp->state_lock);
err = __qp_chk_state(qp, attr, mask);
if (!err) {
qp->attr.qp_state = attr->qp_state;
rxe_dbg_qp(qp, "state -> %s\n",
qps2str[attr->qp_state]);
}
spin_unlock_bh(&qp->state_lock);
if (err)
return err;
switch (attr->qp_state) {
case IB_QPS_RESET:
rxe_qp_reset(qp);
break;
case IB_QPS_SQD:
rxe_qp_sqd(qp, attr, mask);
break;
case IB_QPS_ERR:
rxe_qp_error(qp);
break;
default:
break;
}
}
if (mask & IB_QP_MAX_QP_RD_ATOMIC) { if (mask & IB_QP_MAX_QP_RD_ATOMIC) {
int max_rd_atomic = attr->max_rd_atomic ? int max_rd_atomic = attr->max_rd_atomic ?
roundup_pow_of_two(attr->max_rd_atomic) : 0; roundup_pow_of_two(attr->max_rd_atomic) : 0;
...@@ -545,9 +603,6 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask, ...@@ -545,9 +603,6 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask,
return err; return err;
} }
if (mask & IB_QP_CUR_STATE)
qp->attr.cur_qp_state = attr->qp_state;
if (mask & IB_QP_EN_SQD_ASYNC_NOTIFY) if (mask & IB_QP_EN_SQD_ASYNC_NOTIFY)
qp->attr.en_sqd_async_notify = attr->en_sqd_async_notify; qp->attr.en_sqd_async_notify = attr->en_sqd_async_notify;
...@@ -627,48 +682,6 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask, ...@@ -627,48 +682,6 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask,
if (mask & IB_QP_DEST_QPN) if (mask & IB_QP_DEST_QPN)
qp->attr.dest_qp_num = attr->dest_qp_num; qp->attr.dest_qp_num = attr->dest_qp_num;
if (mask & IB_QP_STATE) {
qp->attr.qp_state = attr->qp_state;
switch (attr->qp_state) {
case IB_QPS_RESET:
rxe_dbg_qp(qp, "state -> RESET\n");
rxe_qp_reset(qp);
break;
case IB_QPS_INIT:
rxe_dbg_qp(qp, "state -> INIT\n");
break;
case IB_QPS_RTR:
rxe_dbg_qp(qp, "state -> RTR\n");
break;
case IB_QPS_RTS:
rxe_dbg_qp(qp, "state -> RTS\n");
break;
case IB_QPS_SQD:
rxe_dbg_qp(qp, "state -> SQD\n");
if (cur_state != IB_QPS_SQD) {
qp->attr.sq_draining = 1;
rxe_sched_task(&qp->comp.task);
rxe_sched_task(&qp->req.task);
}
break;
case IB_QPS_SQE:
rxe_dbg_qp(qp, "state -> SQE !!?\n");
/* Not possible from modify_qp. */
break;
case IB_QPS_ERR:
rxe_dbg_qp(qp, "state -> ERR\n");
rxe_qp_error(qp);
break;
}
}
return 0; return 0;
} }
...@@ -695,10 +708,12 @@ int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask) ...@@ -695,10 +708,12 @@ int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask)
/* Applications that get this state typically spin on it. /* Applications that get this state typically spin on it.
* Yield the processor * Yield the processor
*/ */
if (qp->attr.sq_draining) spin_lock_bh(&qp->state_lock);
if (qp->attr.sq_draining) {
spin_unlock_bh(&qp->state_lock);
cond_resched(); cond_resched();
}
rxe_dbg_qp(qp, "attr->sq_draining = %d\n", attr->sq_draining); spin_unlock_bh(&qp->state_lock);
return 0; return 0;
} }
...@@ -722,7 +737,9 @@ static void rxe_qp_do_cleanup(struct work_struct *work) ...@@ -722,7 +737,9 @@ static void rxe_qp_do_cleanup(struct work_struct *work)
{ {
struct rxe_qp *qp = container_of(work, typeof(*qp), cleanup_work.work); struct rxe_qp *qp = container_of(work, typeof(*qp), cleanup_work.work);
spin_lock_bh(&qp->state_lock);
qp->valid = 0; qp->valid = 0;
spin_unlock_bh(&qp->state_lock);
qp->qp_timeout_jiffies = 0; qp->qp_timeout_jiffies = 0;
if (qp_type(qp) == IB_QPT_RC) { if (qp_type(qp) == IB_QPT_RC) {
......
...@@ -38,13 +38,19 @@ static int check_type_state(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, ...@@ -38,13 +38,19 @@ static int check_type_state(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
return -EINVAL; return -EINVAL;
} }
spin_lock_bh(&qp->state_lock);
if (pkt->mask & RXE_REQ_MASK) { if (pkt->mask & RXE_REQ_MASK) {
if (unlikely(qp_state(qp) < IB_QPS_RTR)) if (unlikely(qp_state(qp) < IB_QPS_RTR)) {
spin_unlock_bh(&qp->state_lock);
return -EINVAL; return -EINVAL;
}
} else { } else {
if (unlikely(qp_state(qp) < IB_QPS_RTS)) if (unlikely(qp_state(qp) < IB_QPS_RTS)) {
spin_unlock_bh(&qp->state_lock);
return -EINVAL; return -EINVAL;
} }
}
spin_unlock_bh(&qp->state_lock);
return 0; return 0;
} }
......
...@@ -102,24 +102,33 @@ void rnr_nak_timer(struct timer_list *t) ...@@ -102,24 +102,33 @@ void rnr_nak_timer(struct timer_list *t)
rxe_dbg_qp(qp, "nak timer fired\n"); rxe_dbg_qp(qp, "nak timer fired\n");
spin_lock_bh(&qp->state_lock);
if (qp->valid) {
/* request a send queue retry */ /* request a send queue retry */
qp->req.need_retry = 1; qp->req.need_retry = 1;
qp->req.wait_for_rnr_timer = 0; qp->req.wait_for_rnr_timer = 0;
rxe_sched_task(&qp->req.task); rxe_sched_task(&qp->req.task);
}
spin_unlock_bh(&qp->state_lock);
} }
static void req_check_sq_drain_done(struct rxe_qp *qp) static void req_check_sq_drain_done(struct rxe_qp *qp)
{ {
struct rxe_queue *q = qp->sq.queue; struct rxe_queue *q;
unsigned int index = qp->req.wqe_index; unsigned int index;
unsigned int cons = queue_get_consumer(q, QUEUE_TYPE_FROM_CLIENT); unsigned int cons;
struct rxe_send_wqe *wqe = queue_addr_from_index(q, cons); struct rxe_send_wqe *wqe;
spin_lock_bh(&qp->state_lock);
if (qp_state(qp) == IB_QPS_SQD) {
q = qp->sq.queue;
index = qp->req.wqe_index;
cons = queue_get_consumer(q, QUEUE_TYPE_FROM_CLIENT);
wqe = queue_addr_from_index(q, cons);
if (unlikely(qp_state(qp) == IB_QPS_SQD)) {
/* check to see if we are drained; /* check to see if we are drained;
* state_lock used by requester and completer * state_lock used by requester and completer
*/ */
spin_lock_bh(&qp->state_lock);
do { do {
if (!qp->attr.sq_draining) if (!qp->attr.sq_draining)
/* comp just finished */ /* comp just finished */
...@@ -144,28 +153,40 @@ static void req_check_sq_drain_done(struct rxe_qp *qp) ...@@ -144,28 +153,40 @@ static void req_check_sq_drain_done(struct rxe_qp *qp)
} }
return; return;
} while (0); } while (0);
spin_unlock_bh(&qp->state_lock);
} }
spin_unlock_bh(&qp->state_lock);
} }
static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp) static struct rxe_send_wqe *__req_next_wqe(struct rxe_qp *qp)
{ {
struct rxe_send_wqe *wqe;
struct rxe_queue *q = qp->sq.queue; struct rxe_queue *q = qp->sq.queue;
unsigned int index = qp->req.wqe_index; unsigned int index = qp->req.wqe_index;
unsigned int prod; unsigned int prod;
req_check_sq_drain_done(qp);
prod = queue_get_producer(q, QUEUE_TYPE_FROM_CLIENT); prod = queue_get_producer(q, QUEUE_TYPE_FROM_CLIENT);
if (index == prod) if (index == prod)
return NULL; return NULL;
else
return queue_addr_from_index(q, index);
}
wqe = queue_addr_from_index(q, index); static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp)
{
struct rxe_send_wqe *wqe;
req_check_sq_drain_done(qp);
wqe = __req_next_wqe(qp);
if (wqe == NULL)
return NULL;
spin_lock(&qp->state_lock);
if (unlikely((qp_state(qp) == IB_QPS_SQD) && if (unlikely((qp_state(qp) == IB_QPS_SQD) &&
(wqe->state != wqe_state_processing))) (wqe->state != wqe_state_processing))) {
spin_unlock(&qp->state_lock);
return NULL; return NULL;
}
spin_unlock(&qp->state_lock);
wqe->mask = wr_opcode_mask(wqe->wr.opcode, qp); wqe->mask = wr_opcode_mask(wqe->wr.opcode, qp);
return wqe; return wqe;
...@@ -656,15 +677,16 @@ int rxe_requester(struct rxe_qp *qp) ...@@ -656,15 +677,16 @@ int rxe_requester(struct rxe_qp *qp)
struct rxe_ah *ah; struct rxe_ah *ah;
struct rxe_av *av; struct rxe_av *av;
if (unlikely(!qp->valid)) spin_lock_bh(&qp->state_lock);
if (unlikely(!qp->valid)) {
spin_unlock_bh(&qp->state_lock);
goto exit; goto exit;
}
if (unlikely(qp_state(qp) == IB_QPS_ERR)) { if (unlikely(qp_state(qp) == IB_QPS_ERR)) {
wqe = req_next_wqe(qp); wqe = __req_next_wqe(qp);
spin_unlock_bh(&qp->state_lock);
if (wqe) if (wqe)
/*
* Generate an error completion for error qp state
*/
goto err; goto err;
else else
goto exit; goto exit;
...@@ -678,8 +700,10 @@ int rxe_requester(struct rxe_qp *qp) ...@@ -678,8 +700,10 @@ int rxe_requester(struct rxe_qp *qp)
qp->req.wait_psn = 0; qp->req.wait_psn = 0;
qp->req.need_retry = 0; qp->req.need_retry = 0;
qp->req.wait_for_rnr_timer = 0; qp->req.wait_for_rnr_timer = 0;
spin_unlock_bh(&qp->state_lock);
goto exit; goto exit;
} }
spin_unlock_bh(&qp->state_lock);
/* we come here if the retransmit timer has fired /* we come here if the retransmit timer has fired
* or if the rnr timer has fired. If the retransmit * or if the rnr timer has fired. If the retransmit
...@@ -839,8 +863,7 @@ int rxe_requester(struct rxe_qp *qp) ...@@ -839,8 +863,7 @@ int rxe_requester(struct rxe_qp *qp)
/* update wqe_index for each wqe completion */ /* update wqe_index for each wqe completion */
qp->req.wqe_index = queue_next_index(qp->sq.queue, qp->req.wqe_index); qp->req.wqe_index = queue_next_index(qp->sq.queue, qp->req.wqe_index);
wqe->state = wqe_state_error; wqe->state = wqe_state_error;
qp->attr.qp_state = IB_QPS_ERR; rxe_qp_error(qp);
rxe_sched_task(&qp->comp.task);
exit: exit:
ret = -EAGAIN; ret = -EAGAIN;
out: out:
......
...@@ -1137,8 +1137,13 @@ static enum resp_states do_complete(struct rxe_qp *qp, ...@@ -1137,8 +1137,13 @@ static enum resp_states do_complete(struct rxe_qp *qp,
return RESPST_ERR_CQ_OVERFLOW; return RESPST_ERR_CQ_OVERFLOW;
finish: finish:
if (unlikely(qp_state(qp) == IB_QPS_ERR)) spin_lock_bh(&qp->state_lock);
if (unlikely(qp_state(qp) == IB_QPS_ERR)) {
spin_unlock_bh(&qp->state_lock);
return RESPST_CHK_RESOURCE; return RESPST_CHK_RESOURCE;
}
spin_unlock_bh(&qp->state_lock);
if (unlikely(!pkt)) if (unlikely(!pkt))
return RESPST_DONE; return RESPST_DONE;
if (qp_type(qp) == IB_QPT_RC) if (qp_type(qp) == IB_QPT_RC)
...@@ -1464,14 +1469,17 @@ int rxe_responder(struct rxe_qp *qp) ...@@ -1464,14 +1469,17 @@ int rxe_responder(struct rxe_qp *qp)
struct rxe_pkt_info *pkt = NULL; struct rxe_pkt_info *pkt = NULL;
int ret; int ret;
spin_lock_bh(&qp->state_lock);
if (!qp->valid || qp_state(qp) == IB_QPS_ERR || if (!qp->valid || qp_state(qp) == IB_QPS_ERR ||
qp_state(qp) == IB_QPS_RESET) { qp_state(qp) == IB_QPS_RESET) {
bool notify = qp->valid && (qp_state(qp) == IB_QPS_ERR); bool notify = qp->valid && (qp_state(qp) == IB_QPS_ERR);
drain_req_pkts(qp); drain_req_pkts(qp);
flush_recv_queue(qp, notify); flush_recv_queue(qp, notify);
spin_unlock_bh(&qp->state_lock);
goto exit; goto exit;
} }
spin_unlock_bh(&qp->state_lock);
qp->resp.aeth_syndrome = AETH_ACK_UNLIMITED; qp->resp.aeth_syndrome = AETH_ACK_UNLIMITED;
......
...@@ -660,42 +660,70 @@ static int rxe_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) ...@@ -660,42 +660,70 @@ static int rxe_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
} }
/* send wr */ /* send wr */
/* sanity check incoming send work request */
static int validate_send_wr(struct rxe_qp *qp, const struct ib_send_wr *ibwr, static int validate_send_wr(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
unsigned int mask, unsigned int length) unsigned int *maskp, unsigned int *lengthp)
{ {
int num_sge = ibwr->num_sge; int num_sge = ibwr->num_sge;
struct rxe_sq *sq = &qp->sq; struct rxe_sq *sq = &qp->sq;
unsigned int mask = 0;
unsigned long length = 0;
int err = -EINVAL;
int i;
if (unlikely(num_sge > sq->max_sge)) { do {
rxe_dbg_qp(qp, "num_sge > max_sge"); mask = wr_opcode_mask(ibwr->opcode, qp);
goto err_out; if (!mask) {
rxe_err_qp(qp, "bad wr opcode for qp type");
break;
} }
if (unlikely(mask & WR_ATOMIC_MASK)) { if (num_sge > sq->max_sge) {
if (length != 8) { rxe_err_qp(qp, "num_sge > max_sge");
rxe_dbg_qp(qp, "atomic length != 8"); break;
goto err_out;
} }
length = 0;
for (i = 0; i < ibwr->num_sge; i++)
length += ibwr->sg_list[i].length;
if (length > (1UL << 31)) {
rxe_err_qp(qp, "message length too long");
break;
}
if (mask & WR_ATOMIC_MASK) {
if (length != 8) {
rxe_err_qp(qp, "atomic length != 8");
break;
}
if (atomic_wr(ibwr)->remote_addr & 0x7) { if (atomic_wr(ibwr)->remote_addr & 0x7) {
rxe_dbg_qp(qp, "misaligned atomic address"); rxe_err_qp(qp, "misaligned atomic address");
goto err_out; break;
} }
} }
if (ibwr->send_flags & IB_SEND_INLINE) {
if (unlikely((ibwr->send_flags & IB_SEND_INLINE) && if (!(mask & WR_INLINE_MASK)) {
(length > sq->max_inline))) { rxe_err_qp(qp, "opcode doesn't support inline data");
rxe_dbg_qp(qp, "inline length too big"); break;
goto err_out; }
if (length > sq->max_inline) {
rxe_err_qp(qp, "inline length too big");
break;
}
} }
return 0; err = 0;
} while (0);
err_out: *maskp = mask;
return -EINVAL; *lengthp = (int)length;
return err;
} }
static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr, static int init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr,
const struct ib_send_wr *ibwr) const struct ib_send_wr *ibwr)
{ {
wr->wr_id = ibwr->wr_id; wr->wr_id = ibwr->wr_id;
...@@ -711,8 +739,18 @@ static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr, ...@@ -711,8 +739,18 @@ static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr,
wr->wr.ud.ah_num = to_rah(ibah)->ah_num; wr->wr.ud.ah_num = to_rah(ibah)->ah_num;
if (qp_type(qp) == IB_QPT_GSI) if (qp_type(qp) == IB_QPT_GSI)
wr->wr.ud.pkey_index = ud_wr(ibwr)->pkey_index; wr->wr.ud.pkey_index = ud_wr(ibwr)->pkey_index;
if (wr->opcode == IB_WR_SEND_WITH_IMM)
switch (wr->opcode) {
case IB_WR_SEND_WITH_IMM:
wr->ex.imm_data = ibwr->ex.imm_data; wr->ex.imm_data = ibwr->ex.imm_data;
break;
case IB_WR_SEND:
break;
default:
rxe_err_qp(qp, "bad wr opcode %d for UD/GSI QP",
wr->opcode);
return -EINVAL;
}
} else { } else {
switch (wr->opcode) { switch (wr->opcode) {
case IB_WR_RDMA_WRITE_WITH_IMM: case IB_WR_RDMA_WRITE_WITH_IMM:
...@@ -729,6 +767,11 @@ static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr, ...@@ -729,6 +767,11 @@ static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr,
case IB_WR_SEND_WITH_INV: case IB_WR_SEND_WITH_INV:
wr->ex.invalidate_rkey = ibwr->ex.invalidate_rkey; wr->ex.invalidate_rkey = ibwr->ex.invalidate_rkey;
break; break;
case IB_WR_RDMA_READ_WITH_INV:
wr->ex.invalidate_rkey = ibwr->ex.invalidate_rkey;
wr->wr.rdma.remote_addr = rdma_wr(ibwr)->remote_addr;
wr->wr.rdma.rkey = rdma_wr(ibwr)->rkey;
break;
case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_CMP_AND_SWP:
case IB_WR_ATOMIC_FETCH_AND_ADD: case IB_WR_ATOMIC_FETCH_AND_ADD:
wr->wr.atomic.remote_addr = wr->wr.atomic.remote_addr =
...@@ -746,10 +789,20 @@ static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr, ...@@ -746,10 +789,20 @@ static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr,
wr->wr.reg.key = reg_wr(ibwr)->key; wr->wr.reg.key = reg_wr(ibwr)->key;
wr->wr.reg.access = reg_wr(ibwr)->access; wr->wr.reg.access = reg_wr(ibwr)->access;
break; break;
case IB_WR_SEND:
case IB_WR_BIND_MW:
case IB_WR_FLUSH:
case IB_WR_ATOMIC_WRITE:
break;
default: default:
rxe_err_qp(qp, "unsupported wr opcode %d",
wr->opcode);
return -EINVAL;
break; break;
} }
} }
return 0;
} }
static void copy_inline_data_to_wqe(struct rxe_send_wqe *wqe, static void copy_inline_data_to_wqe(struct rxe_send_wqe *wqe,
...@@ -765,19 +818,22 @@ static void copy_inline_data_to_wqe(struct rxe_send_wqe *wqe, ...@@ -765,19 +818,22 @@ static void copy_inline_data_to_wqe(struct rxe_send_wqe *wqe,
} }
} }
static void init_send_wqe(struct rxe_qp *qp, const struct ib_send_wr *ibwr, static int init_send_wqe(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
unsigned int mask, unsigned int length, unsigned int mask, unsigned int length,
struct rxe_send_wqe *wqe) struct rxe_send_wqe *wqe)
{ {
int num_sge = ibwr->num_sge; int num_sge = ibwr->num_sge;
int err;
init_send_wr(qp, &wqe->wr, ibwr); err = init_send_wr(qp, &wqe->wr, ibwr);
if (err)
return err;
/* local operation */ /* local operation */
if (unlikely(mask & WR_LOCAL_OP_MASK)) { if (unlikely(mask & WR_LOCAL_OP_MASK)) {
wqe->mask = mask; wqe->mask = mask;
wqe->state = wqe_state_posted; wqe->state = wqe_state_posted;
return; return 0;
} }
if (unlikely(ibwr->send_flags & IB_SEND_INLINE)) if (unlikely(ibwr->send_flags & IB_SEND_INLINE))
...@@ -796,93 +852,62 @@ static void init_send_wqe(struct rxe_qp *qp, const struct ib_send_wr *ibwr, ...@@ -796,93 +852,62 @@ static void init_send_wqe(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
wqe->dma.sge_offset = 0; wqe->dma.sge_offset = 0;
wqe->state = wqe_state_posted; wqe->state = wqe_state_posted;
wqe->ssn = atomic_add_return(1, &qp->ssn); wqe->ssn = atomic_add_return(1, &qp->ssn);
return 0;
} }
static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr, static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr)
unsigned int mask, u32 length)
{ {
int err; int err;
struct rxe_sq *sq = &qp->sq; struct rxe_sq *sq = &qp->sq;
struct rxe_send_wqe *send_wqe; struct rxe_send_wqe *send_wqe;
unsigned long flags; unsigned int mask;
unsigned int length;
int full; int full;
err = validate_send_wr(qp, ibwr, mask, length); err = validate_send_wr(qp, ibwr, &mask, &length);
if (err) if (err)
return err; return err;
spin_lock_irqsave(&qp->sq.sq_lock, flags);
full = queue_full(sq->queue, QUEUE_TYPE_FROM_ULP); full = queue_full(sq->queue, QUEUE_TYPE_FROM_ULP);
if (unlikely(full)) { if (unlikely(full)) {
spin_unlock_irqrestore(&qp->sq.sq_lock, flags); rxe_err_qp(qp, "send queue full");
rxe_dbg_qp(qp, "queue full");
return -ENOMEM; return -ENOMEM;
} }
send_wqe = queue_producer_addr(sq->queue, QUEUE_TYPE_FROM_ULP); send_wqe = queue_producer_addr(sq->queue, QUEUE_TYPE_FROM_ULP);
init_send_wqe(qp, ibwr, mask, length, send_wqe); err = init_send_wqe(qp, ibwr, mask, length, send_wqe);
if (!err)
queue_advance_producer(sq->queue, QUEUE_TYPE_FROM_ULP); queue_advance_producer(sq->queue, QUEUE_TYPE_FROM_ULP);
spin_unlock_irqrestore(&qp->sq.sq_lock, flags); return err;
return 0;
} }
static int rxe_post_send_kernel(struct rxe_qp *qp, const struct ib_send_wr *wr, static int rxe_post_send_kernel(struct rxe_qp *qp,
const struct ib_send_wr *ibwr,
const struct ib_send_wr **bad_wr) const struct ib_send_wr **bad_wr)
{ {
int err = 0; int err = 0;
unsigned int mask; unsigned long flags;
unsigned int length = 0;
int i;
struct ib_send_wr *next;
while (wr) {
mask = wr_opcode_mask(wr->opcode, qp);
if (unlikely(!mask)) {
rxe_dbg_qp(qp, "bad wr opcode for qp");
err = -EINVAL;
*bad_wr = wr;
break;
}
if (unlikely((wr->send_flags & IB_SEND_INLINE) &&
!(mask & WR_INLINE_MASK))) {
rxe_dbg_qp(qp, "opcode doesn't support inline data");
err = -EINVAL;
*bad_wr = wr;
break;
}
next = wr->next;
length = 0;
for (i = 0; i < wr->num_sge; i++)
length += wr->sg_list[i].length;
if (length > 1<<31) {
err = -EINVAL;
rxe_dbg_qp(qp, "message length too long");
*bad_wr = wr;
break;
}
err = post_one_send(qp, wr, mask, length); spin_lock_irqsave(&qp->sq.sq_lock, flags);
while (ibwr) {
err = post_one_send(qp, ibwr);
if (err) { if (err) {
*bad_wr = wr; *bad_wr = ibwr;
break; break;
} }
ibwr = ibwr->next;
wr = next;
} }
spin_unlock_irqrestore(&qp->sq.sq_lock, flags);
/* if we didn't post anything there's nothing to do */
if (!err) if (!err)
rxe_sched_task(&qp->req.task); rxe_sched_task(&qp->req.task);
if (unlikely(qp_state(qp) == IB_QPS_ERR)) spin_lock_bh(&qp->state_lock);
if (qp_state(qp) == IB_QPS_ERR)
rxe_sched_task(&qp->comp.task); rxe_sched_task(&qp->comp.task);
spin_unlock_bh(&qp->state_lock);
return err; return err;
} }
...@@ -893,19 +918,21 @@ static int rxe_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, ...@@ -893,19 +918,21 @@ static int rxe_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
struct rxe_qp *qp = to_rqp(ibqp); struct rxe_qp *qp = to_rqp(ibqp);
int err; int err;
if (unlikely(!qp->valid)) { spin_lock_bh(&qp->state_lock);
*bad_wr = wr; /* caller has already called destroy_qp */
err = -EINVAL; if (WARN_ON_ONCE(!qp->valid)) {
rxe_dbg_qp(qp, "qp destroyed"); spin_unlock_bh(&qp->state_lock);
goto err_out; rxe_err_qp(qp, "qp has been destroyed");
return -EINVAL;
} }
if (unlikely(qp_state(qp) < IB_QPS_RTS)) { if (unlikely(qp_state(qp) < IB_QPS_RTS)) {
spin_unlock_bh(&qp->state_lock);
*bad_wr = wr; *bad_wr = wr;
err = -EINVAL; rxe_err_qp(qp, "qp not ready to send");
rxe_dbg_qp(qp, "qp not ready to send"); return -EINVAL;
goto err_out;
} }
spin_unlock_bh(&qp->state_lock);
if (qp->is_user) { if (qp->is_user) {
/* Utilize process context to do protocol processing */ /* Utilize process context to do protocol processing */
...@@ -913,14 +940,10 @@ static int rxe_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, ...@@ -913,14 +940,10 @@ static int rxe_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
} else { } else {
err = rxe_post_send_kernel(qp, wr, bad_wr); err = rxe_post_send_kernel(qp, wr, bad_wr);
if (err) if (err)
goto err_out; return err;
} }
return 0; return 0;
err_out:
rxe_err_qp(qp, "returned err = %d", err);
return err;
} }
/* recv wr */ /* recv wr */
...@@ -985,18 +1008,27 @@ static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, ...@@ -985,18 +1008,27 @@ static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
struct rxe_rq *rq = &qp->rq; struct rxe_rq *rq = &qp->rq;
unsigned long flags; unsigned long flags;
if (unlikely((qp_state(qp) < IB_QPS_INIT) || !qp->valid)) { spin_lock_bh(&qp->state_lock);
/* caller has already called destroy_qp */
if (WARN_ON_ONCE(!qp->valid)) {
spin_unlock_bh(&qp->state_lock);
rxe_err_qp(qp, "qp has been destroyed");
return -EINVAL;
}
/* see C10-97.2.1 */
if (unlikely((qp_state(qp) < IB_QPS_INIT))) {
spin_unlock_bh(&qp->state_lock);
*bad_wr = wr; *bad_wr = wr;
err = -EINVAL; rxe_dbg_qp(qp, "qp not ready to post recv");
rxe_dbg_qp(qp, "qp destroyed or not ready to post recv"); return -EINVAL;
goto err_out;
} }
spin_unlock_bh(&qp->state_lock);
if (unlikely(qp->srq)) { if (unlikely(qp->srq)) {
*bad_wr = wr; *bad_wr = wr;
err = -EINVAL; rxe_dbg_qp(qp, "qp has srq, use post_srq_recv instead");
rxe_dbg_qp(qp, "use post_srq_recv instead"); return -EINVAL;
goto err_out;
} }
spin_lock_irqsave(&rq->producer_lock, flags); spin_lock_irqsave(&rq->producer_lock, flags);
...@@ -1012,12 +1044,10 @@ static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, ...@@ -1012,12 +1044,10 @@ static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
spin_unlock_irqrestore(&rq->producer_lock, flags); spin_unlock_irqrestore(&rq->producer_lock, flags);
spin_lock_bh(&qp->state_lock);
if (qp_state(qp) == IB_QPS_ERR) if (qp_state(qp) == IB_QPS_ERR)
rxe_sched_task(&qp->resp.task); rxe_sched_task(&qp->resp.task);
spin_unlock_bh(&qp->state_lock);
err_out:
if (err)
rxe_err_qp(qp, "returned err = %d", err);
return err; return err;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment