Commit 76b06402 authored by Doug Ledford's avatar Doug Ledford

Merge branches 'ib_core', 'ib_ipoib', 'srpt', 'drain-cq-v4' and 'net/9p' into k.o/for-4.6

......@@ -1657,3 +1657,167 @@ int ib_sg_to_pages(struct ib_mr *mr,
return i;
}
EXPORT_SYMBOL(ib_sg_to_pages);
struct ib_drain_cqe {
struct ib_cqe cqe;
struct completion done;
};
static void ib_drain_qp_done(struct ib_cq *cq, struct ib_wc *wc)
{
struct ib_drain_cqe *cqe = container_of(wc->wr_cqe, struct ib_drain_cqe,
cqe);
complete(&cqe->done);
}
/*
* Post a WR and block until its completion is reaped for the SQ.
*/
static void __ib_drain_sq(struct ib_qp *qp)
{
struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
struct ib_drain_cqe sdrain;
struct ib_send_wr swr = {}, *bad_swr;
int ret;
if (qp->send_cq->poll_ctx == IB_POLL_DIRECT) {
WARN_ONCE(qp->send_cq->poll_ctx == IB_POLL_DIRECT,
"IB_POLL_DIRECT poll_ctx not supported for drain\n");
return;
}
swr.wr_cqe = &sdrain.cqe;
sdrain.cqe.done = ib_drain_qp_done;
init_completion(&sdrain.done);
ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
if (ret) {
WARN_ONCE(ret, "failed to drain send queue: %d\n", ret);
return;
}
ret = ib_post_send(qp, &swr, &bad_swr);
if (ret) {
WARN_ONCE(ret, "failed to drain send queue: %d\n", ret);
return;
}
wait_for_completion(&sdrain.done);
}
/*
* Post a WR and block until its completion is reaped for the RQ.
*/
static void __ib_drain_rq(struct ib_qp *qp)
{
struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
struct ib_drain_cqe rdrain;
struct ib_recv_wr rwr = {}, *bad_rwr;
int ret;
if (qp->recv_cq->poll_ctx == IB_POLL_DIRECT) {
WARN_ONCE(qp->recv_cq->poll_ctx == IB_POLL_DIRECT,
"IB_POLL_DIRECT poll_ctx not supported for drain\n");
return;
}
rwr.wr_cqe = &rdrain.cqe;
rdrain.cqe.done = ib_drain_qp_done;
init_completion(&rdrain.done);
ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
if (ret) {
WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret);
return;
}
ret = ib_post_recv(qp, &rwr, &bad_rwr);
if (ret) {
WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret);
return;
}
wait_for_completion(&rdrain.done);
}
/**
* ib_drain_sq() - Block until all SQ CQEs have been consumed by the
* application.
* @qp: queue pair to drain
*
* If the device has a provider-specific drain function, then
* call that. Otherwise call the generic drain function
* __ib_drain_sq().
*
* The caller must:
*
* ensure there is room in the CQ and SQ for the drain work request and
* completion.
*
* allocate the CQ using ib_alloc_cq() and the CQ poll context cannot be
* IB_POLL_DIRECT.
*
* ensure that there are no other contexts that are posting WRs concurrently.
* Otherwise the drain is not guaranteed.
*/
void ib_drain_sq(struct ib_qp *qp)
{
if (qp->device->drain_sq)
qp->device->drain_sq(qp);
else
__ib_drain_sq(qp);
}
EXPORT_SYMBOL(ib_drain_sq);
/**
* ib_drain_rq() - Block until all RQ CQEs have been consumed by the
* application.
* @qp: queue pair to drain
*
* If the device has a provider-specific drain function, then
* call that. Otherwise call the generic drain function
* __ib_drain_rq().
*
* The caller must:
*
* ensure there is room in the CQ and RQ for the drain work request and
* completion.
*
* allocate the CQ using ib_alloc_cq() and the CQ poll context cannot be
* IB_POLL_DIRECT.
*
* ensure that there are no other contexts that are posting WRs concurrently.
* Otherwise the drain is not guaranteed.
*/
void ib_drain_rq(struct ib_qp *qp)
{
if (qp->device->drain_rq)
qp->device->drain_rq(qp);
else
__ib_drain_rq(qp);
}
EXPORT_SYMBOL(ib_drain_rq);
/**
* ib_drain_qp() - Block until all CQEs have been consumed by the
* application on both the RQ and SQ.
* @qp: queue pair to drain
*
* The caller must:
*
* ensure there is room in the CQ(s), SQ, and RQ for drain work requests
* and completions.
*
* allocate the CQs using ib_alloc_cq() and the CQ poll context cannot be
* IB_POLL_DIRECT.
*
* ensure that there are no other contexts that are posting WRs concurrently.
* Otherwise the drain is not guaranteed.
*/
void ib_drain_qp(struct ib_qp *qp)
{
ib_drain_sq(qp);
ib_drain_rq(qp);
}
EXPORT_SYMBOL(ib_drain_qp);
......@@ -815,8 +815,15 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
}
}
out:
if (wq)
if (wq) {
if (unlikely(qhp->attr.state != C4IW_QP_STATE_RTS)) {
if (t4_sq_empty(wq))
complete(&qhp->sq_drained);
if (t4_rq_empty(wq))
complete(&qhp->rq_drained);
}
spin_unlock(&qhp->lock);
}
return ret;
}
......
......@@ -476,6 +476,8 @@ struct c4iw_qp {
wait_queue_head_t wait;
struct timer_list timer;
int sq_sig_all;
struct completion rq_drained;
struct completion sq_drained;
};
static inline struct c4iw_qp *to_c4iw_qp(struct ib_qp *ibqp)
......@@ -1016,6 +1018,8 @@ extern int c4iw_wr_log;
extern int db_fc_threshold;
extern int db_coalescing_threshold;
extern int use_dsgl;
void c4iw_drain_rq(struct ib_qp *qp);
void c4iw_drain_sq(struct ib_qp *qp);
#endif
......@@ -564,6 +564,8 @@ int c4iw_register_device(struct c4iw_dev *dev)
dev->ibdev.get_protocol_stats = c4iw_get_mib;
dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION;
dev->ibdev.get_port_immutable = c4iw_port_immutable;
dev->ibdev.drain_sq = c4iw_drain_sq;
dev->ibdev.drain_rq = c4iw_drain_rq;
dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL);
if (!dev->ibdev.iwcm)
......
......@@ -1697,6 +1697,8 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
qhp->attr.max_ird = 0;
qhp->sq_sig_all = attrs->sq_sig_type == IB_SIGNAL_ALL_WR;
spin_lock_init(&qhp->lock);
init_completion(&qhp->sq_drained);
init_completion(&qhp->rq_drained);
mutex_init(&qhp->mutex);
init_waitqueue_head(&qhp->wait);
atomic_set(&qhp->refcnt, 1);
......@@ -1888,3 +1890,17 @@ int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
init_attr->sq_sig_type = qhp->sq_sig_all ? IB_SIGNAL_ALL_WR : 0;
return 0;
}
void c4iw_drain_sq(struct ib_qp *ibqp)
{
struct c4iw_qp *qp = to_c4iw_qp(ibqp);
wait_for_completion(&qp->sq_drained);
}
void c4iw_drain_rq(struct ib_qp *ibqp)
{
struct c4iw_qp *qp = to_c4iw_qp(ibqp);
wait_for_completion(&qp->rq_drained);
}
......@@ -244,6 +244,7 @@ struct ipoib_cm_tx {
unsigned tx_tail;
unsigned long flags;
u32 mtu;
unsigned max_send_sge;
};
struct ipoib_cm_rx_buf {
......@@ -390,6 +391,7 @@ struct ipoib_dev_priv {
int hca_caps;
struct ipoib_ethtool_st ethtool;
struct timer_list poll_timer;
unsigned max_send_sge;
};
struct ipoib_ah {
......
......@@ -710,6 +710,7 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ipoib_tx_buf *tx_req;
int rc;
unsigned usable_sge = tx->max_send_sge - !!skb_headlen(skb);
if (unlikely(skb->len > tx->mtu)) {
ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n",
......@@ -719,7 +720,23 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_
ipoib_cm_skb_too_long(dev, skb, tx->mtu - IPOIB_ENCAP_LEN);
return;
}
if (skb_shinfo(skb)->nr_frags > usable_sge) {
if (skb_linearize(skb) < 0) {
ipoib_warn(priv, "skb could not be linearized\n");
++dev->stats.tx_dropped;
++dev->stats.tx_errors;
dev_kfree_skb_any(skb);
return;
}
/* Does skb_linearize return ok without reducing nr_frags? */
if (skb_shinfo(skb)->nr_frags > usable_sge) {
ipoib_warn(priv, "too many frags after skb linearize\n");
++dev->stats.tx_dropped;
++dev->stats.tx_errors;
dev_kfree_skb_any(skb);
return;
}
}
ipoib_dbg_data(priv, "sending packet: head 0x%x length %d connection 0x%x\n",
tx->tx_head, skb->len, tx->qp->qp_num);
......@@ -1031,7 +1048,8 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_
struct ib_qp *tx_qp;
if (dev->features & NETIF_F_SG)
attr.cap.max_send_sge = MAX_SKB_FRAGS + 1;
attr.cap.max_send_sge =
min_t(u32, priv->ca->attrs.max_sge, MAX_SKB_FRAGS + 1);
tx_qp = ib_create_qp(priv->pd, &attr);
if (PTR_ERR(tx_qp) == -EINVAL) {
......@@ -1040,6 +1058,7 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_
attr.create_flags &= ~IB_QP_CREATE_USE_GFP_NOIO;
tx_qp = ib_create_qp(priv->pd, &attr);
}
tx->max_send_sge = attr.cap.max_send_sge;
return tx_qp;
}
......
......@@ -538,6 +538,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
struct ipoib_tx_buf *tx_req;
int hlen, rc;
void *phead;
unsigned usable_sge = priv->max_send_sge - !!skb_headlen(skb);
if (skb_is_gso(skb)) {
hlen = skb_transport_offset(skb) + tcp_hdrlen(skb);
......@@ -561,6 +562,23 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
phead = NULL;
hlen = 0;
}
if (skb_shinfo(skb)->nr_frags > usable_sge) {
if (skb_linearize(skb) < 0) {
ipoib_warn(priv, "skb could not be linearized\n");
++dev->stats.tx_dropped;
++dev->stats.tx_errors;
dev_kfree_skb_any(skb);
return;
}
/* Does skb_linearize return ok without reducing nr_frags? */
if (skb_shinfo(skb)->nr_frags > usable_sge) {
ipoib_warn(priv, "too many frags after skb linearize\n");
++dev->stats.tx_dropped;
++dev->stats.tx_errors;
dev_kfree_skb_any(skb);
return;
}
}
ipoib_dbg_data(priv, "sending packet, length=%d address=%p qpn=0x%06x\n",
skb->len, address, qpn);
......
......@@ -206,7 +206,8 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
init_attr.create_flags |= IB_QP_CREATE_NETIF_QP;
if (dev->features & NETIF_F_SG)
init_attr.cap.max_send_sge = MAX_SKB_FRAGS + 1;
init_attr.cap.max_send_sge =
min_t(u32, priv->ca->attrs.max_sge, MAX_SKB_FRAGS + 1);
priv->qp = ib_create_qp(priv->pd, &init_attr);
if (IS_ERR(priv->qp)) {
......@@ -233,6 +234,8 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
priv->rx_wr.next = NULL;
priv->rx_wr.sg_list = priv->rx_sge;
priv->max_send_sge = init_attr.cap.max_send_sge;
return 0;
out_free_send_cq:
......
......@@ -458,9 +458,6 @@ struct iser_fr_pool {
* @comp: iser completion context
* @fr_pool: connection fast registration poool
* @pi_support: Indicate device T10-PI support
* @last: last send wr to signal all flush errors were drained
* @last_cqe: cqe handler for last wr
* @last_comp: completes when all connection completions consumed
*/
struct ib_conn {
struct rdma_cm_id *cma_id;
......@@ -472,10 +469,7 @@ struct ib_conn {
struct iser_comp *comp;
struct iser_fr_pool fr_pool;
bool pi_support;
struct ib_send_wr last;
struct ib_cqe last_cqe;
struct ib_cqe reg_cqe;
struct completion last_comp;
};
/**
......@@ -617,7 +611,6 @@ void iser_cmd_comp(struct ib_cq *cq, struct ib_wc *wc);
void iser_ctrl_comp(struct ib_cq *cq, struct ib_wc *wc);
void iser_dataout_comp(struct ib_cq *cq, struct ib_wc *wc);
void iser_reg_comp(struct ib_cq *cq, struct ib_wc *wc);
void iser_last_comp(struct ib_cq *cq, struct ib_wc *wc);
void iser_task_rdma_init(struct iscsi_iser_task *task);
......
......@@ -729,13 +729,6 @@ void iser_dataout_comp(struct ib_cq *cq, struct ib_wc *wc)
kmem_cache_free(ig.desc_cache, desc);
}
void iser_last_comp(struct ib_cq *cq, struct ib_wc *wc)
{
struct ib_conn *ib_conn = wc->qp->qp_context;
complete(&ib_conn->last_comp);
}
void iser_task_rdma_init(struct iscsi_iser_task *iser_task)
{
......
......@@ -663,7 +663,6 @@ void iser_conn_release(struct iser_conn *iser_conn)
int iser_conn_terminate(struct iser_conn *iser_conn)
{
struct ib_conn *ib_conn = &iser_conn->ib_conn;
struct ib_send_wr *bad_wr;
int err = 0;
/* terminate the iser conn only if the conn state is UP */
......@@ -688,14 +687,8 @@ int iser_conn_terminate(struct iser_conn *iser_conn)
iser_err("Failed to disconnect, conn: 0x%p err %d\n",
iser_conn, err);
/* post an indication that all flush errors were consumed */
err = ib_post_send(ib_conn->qp, &ib_conn->last, &bad_wr);
if (err) {
iser_err("conn %p failed to post last wr", ib_conn);
return 1;
}
wait_for_completion(&ib_conn->last_comp);
/* block until all flush errors are consumed */
ib_drain_sq(ib_conn->qp);
}
return 1;
......@@ -954,10 +947,6 @@ void iser_conn_init(struct iser_conn *iser_conn)
ib_conn->post_recv_buf_count = 0;
ib_conn->reg_cqe.done = iser_reg_comp;
ib_conn->last_cqe.done = iser_last_comp;
ib_conn->last.wr_cqe = &ib_conn->last_cqe;
ib_conn->last.opcode = IB_WR_SEND;
init_completion(&ib_conn->last_comp);
}
/**
......
......@@ -446,49 +446,17 @@ static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
dev->max_pages_per_mr);
}
static void srp_drain_done(struct ib_cq *cq, struct ib_wc *wc)
{
struct srp_rdma_ch *ch = cq->cq_context;
complete(&ch->done);
}
static struct ib_cqe srp_drain_cqe = {
.done = srp_drain_done,
};
/**
* srp_destroy_qp() - destroy an RDMA queue pair
* @ch: SRP RDMA channel.
*
* Change a queue pair into the error state and wait until all receive
* completions have been processed before destroying it. This avoids that
* the receive completion handler can access the queue pair while it is
* Drain the qp before destroying it. This avoids that the receive
* completion handler can access the queue pair while it is
* being destroyed.
*/
static void srp_destroy_qp(struct srp_rdma_ch *ch)
{
static struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
static struct ib_recv_wr wr = { 0 };
struct ib_recv_wr *bad_wr;
int ret;
wr.wr_cqe = &srp_drain_cqe;
/* Destroying a QP and reusing ch->done is only safe if not connected */
WARN_ON_ONCE(ch->connected);
ret = ib_modify_qp(ch->qp, &attr, IB_QP_STATE);
WARN_ONCE(ret, "ib_cm_init_qp_attr() returned %d\n", ret);
if (ret)
goto out;
init_completion(&ch->done);
ret = ib_post_recv(ch->qp, &wr, &bad_wr);
WARN_ONCE(ret, "ib_post_recv() returned %d\n", ret);
if (ret == 0)
wait_for_completion(&ch->done);
out:
ib_drain_rq(ch->qp);
ib_destroy_qp(ch->qp);
}
......@@ -508,7 +476,7 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
if (!init_attr)
return -ENOMEM;
/* queue_size + 1 for ib_drain_qp */
/* queue_size + 1 for ib_drain_rq() */
recv_cq = ib_alloc_cq(dev->dev, ch, target->queue_size + 1,
ch->comp_vector, IB_POLL_SOFTIRQ);
if (IS_ERR(recv_cq)) {
......
......@@ -91,76 +91,32 @@ MODULE_PARM_DESC(srpt_service_guid,
" instead of using the node_guid of the first HCA.");
static struct ib_client srpt_client;
static void srpt_release_channel(struct srpt_rdma_ch *ch);
static void srpt_release_cmd(struct se_cmd *se_cmd);
static void srpt_free_ch(struct kref *kref);
static int srpt_queue_status(struct se_cmd *cmd);
static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc);
static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc);
static void srpt_process_wait_list(struct srpt_rdma_ch *ch);
/**
* opposite_dma_dir() - Swap DMA_TO_DEVICE and DMA_FROM_DEVICE.
*/
static inline
enum dma_data_direction opposite_dma_dir(enum dma_data_direction dir)
{
switch (dir) {
case DMA_TO_DEVICE: return DMA_FROM_DEVICE;
case DMA_FROM_DEVICE: return DMA_TO_DEVICE;
default: return dir;
}
}
/**
* srpt_sdev_name() - Return the name associated with the HCA.
*
* Examples are ib0, ib1, ...
*/
static inline const char *srpt_sdev_name(struct srpt_device *sdev)
{
return sdev->device->name;
}
static enum rdma_ch_state srpt_get_ch_state(struct srpt_rdma_ch *ch)
{
unsigned long flags;
enum rdma_ch_state state;
spin_lock_irqsave(&ch->spinlock, flags);
state = ch->state;
spin_unlock_irqrestore(&ch->spinlock, flags);
return state;
}
static enum rdma_ch_state
srpt_set_ch_state(struct srpt_rdma_ch *ch, enum rdma_ch_state new_state)
{
unsigned long flags;
enum rdma_ch_state prev;
spin_lock_irqsave(&ch->spinlock, flags);
prev = ch->state;
ch->state = new_state;
spin_unlock_irqrestore(&ch->spinlock, flags);
return prev;
}
/**
* srpt_test_and_set_ch_state() - Test and set the channel state.
*
* Returns true if and only if the channel state has been set to the new state.
/*
* The only allowed channel state changes are those that change the channel
* state into a state with a higher numerical value. Hence the new > prev test.
*/
static bool
srpt_test_and_set_ch_state(struct srpt_rdma_ch *ch, enum rdma_ch_state old,
enum rdma_ch_state new)
static bool srpt_set_ch_state(struct srpt_rdma_ch *ch, enum rdma_ch_state new)
{
unsigned long flags;
enum rdma_ch_state prev;
bool changed = false;
spin_lock_irqsave(&ch->spinlock, flags);
prev = ch->state;
if (prev == old)
if (new > prev) {
ch->state = new;
changed = true;
}
spin_unlock_irqrestore(&ch->spinlock, flags);
return prev == old;
return changed;
}
/**
......@@ -182,7 +138,7 @@ static void srpt_event_handler(struct ib_event_handler *handler,
return;
pr_debug("ASYNC event= %d on device= %s\n", event->event,
srpt_sdev_name(sdev));
sdev->device->name);
switch (event->event) {
case IB_EVENT_PORT_ERR:
......@@ -220,25 +176,39 @@ static void srpt_srq_event(struct ib_event *event, void *ctx)
pr_info("SRQ event %d\n", event->event);
}
static const char *get_ch_state_name(enum rdma_ch_state s)
{
switch (s) {
case CH_CONNECTING:
return "connecting";
case CH_LIVE:
return "live";
case CH_DISCONNECTING:
return "disconnecting";
case CH_DRAINING:
return "draining";
case CH_DISCONNECTED:
return "disconnected";
}
return "???";
}
/**
* srpt_qp_event() - QP event callback function.
*/
static void srpt_qp_event(struct ib_event *event, struct srpt_rdma_ch *ch)
{
pr_debug("QP event %d on cm_id=%p sess_name=%s state=%d\n",
event->event, ch->cm_id, ch->sess_name, srpt_get_ch_state(ch));
event->event, ch->cm_id, ch->sess_name, ch->state);
switch (event->event) {
case IB_EVENT_COMM_EST:
ib_cm_notify(ch->cm_id, event->event);
break;
case IB_EVENT_QP_LAST_WQE_REACHED:
if (srpt_test_and_set_ch_state(ch, CH_DRAINING,
CH_RELEASING))
srpt_release_channel(ch);
else
pr_debug("%s: state %d - ignored LAST_WQE.\n",
ch->sess_name, srpt_get_ch_state(ch));
pr_debug("%s-%d, state %s: received Last WQE event.\n",
ch->sess_name, ch->qp->qp_num,
get_ch_state_name(ch->state));
break;
default:
pr_err("received unrecognized IB QP event %d\n", event->event);
......@@ -281,7 +251,7 @@ static void srpt_get_class_port_info(struct ib_dm_mad *mad)
struct ib_class_port_info *cif;
cif = (struct ib_class_port_info *)mad->data;
memset(cif, 0, sizeof *cif);
memset(cif, 0, sizeof(*cif));
cif->base_version = 1;
cif->class_version = 1;
cif->resp_time_value = 20;
......@@ -340,7 +310,7 @@ static void srpt_get_ioc(struct srpt_port *sport, u32 slot,
return;
}
memset(iocp, 0, sizeof *iocp);
memset(iocp, 0, sizeof(*iocp));
strcpy(iocp->id_string, SRPT_ID_STRING);
iocp->guid = cpu_to_be64(srpt_service_guid);
iocp->vendor_id = cpu_to_be32(sdev->device->attrs.vendor_id);
......@@ -390,7 +360,7 @@ static void srpt_get_svc_entries(u64 ioc_guid,
}
svc_entries = (struct ib_dm_svc_entries *)mad->data;
memset(svc_entries, 0, sizeof *svc_entries);
memset(svc_entries, 0, sizeof(*svc_entries));
svc_entries->service_entries[0].id = cpu_to_be64(ioc_guid);
snprintf(svc_entries->service_entries[0].name,
sizeof(svc_entries->service_entries[0].name),
......@@ -484,7 +454,7 @@ static void srpt_mad_recv_handler(struct ib_mad_agent *mad_agent,
rsp->ah = ah;
dm_mad = rsp->mad;
memcpy(dm_mad, mad_wc->recv_buf.mad, sizeof *dm_mad);
memcpy(dm_mad, mad_wc->recv_buf.mad, sizeof(*dm_mad));
dm_mad->mad_hdr.method = IB_MGMT_METHOD_GET_RESP;
dm_mad->mad_hdr.status = 0;
......@@ -532,7 +502,7 @@ static int srpt_refresh_port(struct srpt_port *sport)
struct ib_port_attr port_attr;
int ret;
memset(&port_modify, 0, sizeof port_modify);
memset(&port_modify, 0, sizeof(port_modify));
port_modify.set_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP;
port_modify.clr_port_cap_mask = 0;
......@@ -553,7 +523,7 @@ static int srpt_refresh_port(struct srpt_port *sport)
goto err_query_port;
if (!sport->mad_agent) {
memset(&reg_req, 0, sizeof reg_req);
memset(&reg_req, 0, sizeof(reg_req));
reg_req.mgmt_class = IB_MGMT_CLASS_DEVICE_MGMT;
reg_req.mgmt_class_version = IB_MGMT_BASE_VERSION;
set_bit(IB_MGMT_METHOD_GET, reg_req.method_mask);
......@@ -840,6 +810,39 @@ static int srpt_post_send(struct srpt_rdma_ch *ch,
return ret;
}
/**
* srpt_zerolength_write() - Perform a zero-length RDMA write.
*
* A quote from the InfiniBand specification: C9-88: For an HCA responder
* using Reliable Connection service, for each zero-length RDMA READ or WRITE
* request, the R_Key shall not be validated, even if the request includes
* Immediate data.
*/
static int srpt_zerolength_write(struct srpt_rdma_ch *ch)
{
struct ib_send_wr wr, *bad_wr;
memset(&wr, 0, sizeof(wr));
wr.opcode = IB_WR_RDMA_WRITE;
wr.wr_cqe = &ch->zw_cqe;
wr.send_flags = IB_SEND_SIGNALED;
return ib_post_send(ch->qp, &wr, &bad_wr);
}
static void srpt_zerolength_write_done(struct ib_cq *cq, struct ib_wc *wc)
{
struct srpt_rdma_ch *ch = cq->cq_context;
if (wc->status == IB_WC_SUCCESS) {
srpt_process_wait_list(ch);
} else {
if (srpt_set_ch_state(ch, CH_DISCONNECTED))
schedule_work(&ch->release_work);
else
WARN_ONCE("%s-%d\n", ch->sess_name, ch->qp->qp_num);
}
}
/**
* srpt_get_desc_tbl() - Parse the data descriptors of an SRP_CMD request.
* @ioctx: Pointer to the I/O context associated with the request.
......@@ -903,14 +906,14 @@ static int srpt_get_desc_tbl(struct srpt_send_ioctx *ioctx,
db = (struct srp_direct_buf *)(srp_cmd->add_data
+ add_cdb_offset);
memcpy(ioctx->rbufs, db, sizeof *db);
memcpy(ioctx->rbufs, db, sizeof(*db));
*data_len = be32_to_cpu(db->len);
} else if (((srp_cmd->buf_fmt & 0xf) == SRP_DATA_DESC_INDIRECT) ||
((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_INDIRECT)) {
idb = (struct srp_indirect_buf *)(srp_cmd->add_data
+ add_cdb_offset);
ioctx->n_rbuf = be32_to_cpu(idb->table_desc.len) / sizeof *db;
ioctx->n_rbuf = be32_to_cpu(idb->table_desc.len) / sizeof(*db);
if (ioctx->n_rbuf >
(srp_cmd->data_out_desc_cnt + srp_cmd->data_in_desc_cnt)) {
......@@ -929,7 +932,7 @@ static int srpt_get_desc_tbl(struct srpt_send_ioctx *ioctx,
ioctx->rbufs = &ioctx->single_rbuf;
else {
ioctx->rbufs =
kmalloc(ioctx->n_rbuf * sizeof *db, GFP_ATOMIC);
kmalloc(ioctx->n_rbuf * sizeof(*db), GFP_ATOMIC);
if (!ioctx->rbufs) {
ioctx->n_rbuf = 0;
ret = -ENOMEM;
......@@ -938,7 +941,7 @@ static int srpt_get_desc_tbl(struct srpt_send_ioctx *ioctx,
}
db = idb->desc_list;
memcpy(ioctx->rbufs, db, ioctx->n_rbuf * sizeof *db);
memcpy(ioctx->rbufs, db, ioctx->n_rbuf * sizeof(*db));
*data_len = be32_to_cpu(idb->len);
}
out:
......@@ -956,7 +959,7 @@ static int srpt_init_ch_qp(struct srpt_rdma_ch *ch, struct ib_qp *qp)
struct ib_qp_attr *attr;
int ret;
attr = kzalloc(sizeof *attr, GFP_KERNEL);
attr = kzalloc(sizeof(*attr), GFP_KERNEL);
if (!attr)
return -ENOMEM;
......@@ -1070,7 +1073,7 @@ static void srpt_unmap_sg_to_ib_sge(struct srpt_rdma_ch *ch,
dir = ioctx->cmd.data_direction;
BUG_ON(dir == DMA_NONE);
ib_dma_unmap_sg(ch->sport->sdev->device, sg, ioctx->sg_cnt,
opposite_dma_dir(dir));
target_reverse_dma_direction(&ioctx->cmd));
ioctx->mapped_sg_count = 0;
}
}
......@@ -1107,7 +1110,7 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
ioctx->sg_cnt = sg_cnt = cmd->t_data_nents;
count = ib_dma_map_sg(ch->sport->sdev->device, sg, sg_cnt,
opposite_dma_dir(dir));
target_reverse_dma_direction(cmd));
if (unlikely(!count))
return -EAGAIN;
......@@ -1313,10 +1316,7 @@ static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx)
/*
* If the command is in a state where the target core is waiting for
* the ib_srpt driver, change the state to the next state. Changing
* the state of the command from SRPT_STATE_NEED_DATA to
* SRPT_STATE_DATA_IN ensures that srpt_xmit_response() will call this
* function a second time.
* the ib_srpt driver, change the state to the next state.
*/
spin_lock_irqsave(&ioctx->spinlock, flags);
......@@ -1325,25 +1325,17 @@ static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx)
case SRPT_STATE_NEED_DATA:
ioctx->state = SRPT_STATE_DATA_IN;
break;
case SRPT_STATE_DATA_IN:
case SRPT_STATE_CMD_RSP_SENT:
case SRPT_STATE_MGMT_RSP_SENT:
ioctx->state = SRPT_STATE_DONE;
break;
default:
WARN_ONCE(true, "%s: unexpected I/O context state %d\n",
__func__, state);
break;
}
spin_unlock_irqrestore(&ioctx->spinlock, flags);
if (state == SRPT_STATE_DONE) {
struct srpt_rdma_ch *ch = ioctx->ch;
BUG_ON(ch->sess == NULL);
target_put_sess_cmd(&ioctx->cmd);
goto out;
}
pr_debug("Aborting cmd with state %d and tag %lld\n", state,
ioctx->cmd.tag);
......@@ -1351,19 +1343,16 @@ static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx)
case SRPT_STATE_NEW:
case SRPT_STATE_DATA_IN:
case SRPT_STATE_MGMT:
case SRPT_STATE_DONE:
/*
* Do nothing - defer abort processing until
* srpt_queue_response() is invoked.
*/
WARN_ON(!transport_check_aborted_status(&ioctx->cmd, false));
break;
case SRPT_STATE_NEED_DATA:
/* DMA_TO_DEVICE (write) - RDMA read error. */
/* XXX(hch): this is a horrible layering violation.. */
spin_lock_irqsave(&ioctx->cmd.t_state_lock, flags);
ioctx->cmd.transport_state &= ~CMD_T_ACTIVE;
spin_unlock_irqrestore(&ioctx->cmd.t_state_lock, flags);
pr_debug("tag %#llx: RDMA read error\n", ioctx->cmd.tag);
transport_generic_request_failure(&ioctx->cmd,
TCM_CHECK_CONDITION_ABORT_CMD);
break;
case SRPT_STATE_CMD_RSP_SENT:
/*
......@@ -1371,18 +1360,16 @@ static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx)
* not been received in time.
*/
srpt_unmap_sg_to_ib_sge(ioctx->ch, ioctx);
target_put_sess_cmd(&ioctx->cmd);
transport_generic_free_cmd(&ioctx->cmd, 0);
break;
case SRPT_STATE_MGMT_RSP_SENT:
srpt_set_cmd_state(ioctx, SRPT_STATE_DONE);
target_put_sess_cmd(&ioctx->cmd);
transport_generic_free_cmd(&ioctx->cmd, 0);
break;
default:
WARN(1, "Unexpected command state (%d)", state);
break;
}
out:
return state;
}
......@@ -1422,9 +1409,14 @@ static void srpt_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
container_of(wc->wr_cqe, struct srpt_send_ioctx, rdma_cqe);
if (unlikely(wc->status != IB_WC_SUCCESS)) {
/*
* Note: if an RDMA write error completion is received that
* means that a SEND also has been posted. Defer further
* processing of the associated command until the send error
* completion has been received.
*/
pr_info("RDMA_WRITE for ioctx 0x%p failed with status %d\n",
ioctx, wc->status);
srpt_abort_cmd(ioctx);
}
}
......@@ -1464,7 +1456,7 @@ static int srpt_build_cmd_rsp(struct srpt_rdma_ch *ch,
sense_data_len = ioctx->cmd.scsi_sense_length;
WARN_ON(sense_data_len > sizeof(ioctx->sense_data));
memset(srp_rsp, 0, sizeof *srp_rsp);
memset(srp_rsp, 0, sizeof(*srp_rsp));
srp_rsp->opcode = SRP_RSP;
srp_rsp->req_lim_delta =
cpu_to_be32(1 + atomic_xchg(&ch->req_lim_delta, 0));
......@@ -1514,7 +1506,7 @@ static int srpt_build_tskmgmt_rsp(struct srpt_rdma_ch *ch,
srp_rsp = ioctx->ioctx.buf;
BUG_ON(!srp_rsp);
memset(srp_rsp, 0, sizeof *srp_rsp);
memset(srp_rsp, 0, sizeof(*srp_rsp));
srp_rsp->opcode = SRP_RSP;
srp_rsp->req_lim_delta =
......@@ -1528,80 +1520,6 @@ static int srpt_build_tskmgmt_rsp(struct srpt_rdma_ch *ch,
return resp_len;
}
#define NO_SUCH_LUN ((uint64_t)-1LL)
/*
* SCSI LUN addressing method. See also SAM-2 and the section about
* eight byte LUNs.
*/
enum scsi_lun_addr_method {
SCSI_LUN_ADDR_METHOD_PERIPHERAL = 0,
SCSI_LUN_ADDR_METHOD_FLAT = 1,
SCSI_LUN_ADDR_METHOD_LUN = 2,
SCSI_LUN_ADDR_METHOD_EXTENDED_LUN = 3,
};
/*
* srpt_unpack_lun() - Convert from network LUN to linear LUN.
*
* Convert an 2-byte, 4-byte, 6-byte or 8-byte LUN structure in network byte
* order (big endian) to a linear LUN. Supports three LUN addressing methods:
* peripheral, flat and logical unit. See also SAM-2, section 4.9.4 (page 40).
*/
static uint64_t srpt_unpack_lun(const uint8_t *lun, int len)
{
uint64_t res = NO_SUCH_LUN;
int addressing_method;
if (unlikely(len < 2)) {
pr_err("Illegal LUN length %d, expected 2 bytes or more\n",
len);
goto out;
}
switch (len) {
case 8:
if ((*((__be64 *)lun) &
cpu_to_be64(0x0000FFFFFFFFFFFFLL)) != 0)
goto out_err;
break;
case 4:
if (*((__be16 *)&lun[2]) != 0)
goto out_err;
break;
case 6:
if (*((__be32 *)&lun[2]) != 0)
goto out_err;
break;
case 2:
break;
default:
goto out_err;
}
addressing_method = (*lun) >> 6; /* highest two bits of byte 0 */
switch (addressing_method) {
case SCSI_LUN_ADDR_METHOD_PERIPHERAL:
case SCSI_LUN_ADDR_METHOD_FLAT:
case SCSI_LUN_ADDR_METHOD_LUN:
res = *(lun + 1) | (((*lun) & 0x3f) << 8);
break;
case SCSI_LUN_ADDR_METHOD_EXTENDED_LUN:
default:
pr_err("Unimplemented LUN addressing method %u\n",
addressing_method);
break;
}
out:
return res;
out_err:
pr_err("Support for multi-level LUNs has not yet been implemented\n");
goto out;
}
static int srpt_check_stop_free(struct se_cmd *cmd)
{
struct srpt_send_ioctx *ioctx = container_of(cmd,
......@@ -1613,16 +1531,14 @@ static int srpt_check_stop_free(struct se_cmd *cmd)
/**
* srpt_handle_cmd() - Process SRP_CMD.
*/
static int srpt_handle_cmd(struct srpt_rdma_ch *ch,
static void srpt_handle_cmd(struct srpt_rdma_ch *ch,
struct srpt_recv_ioctx *recv_ioctx,
struct srpt_send_ioctx *send_ioctx)
{
struct se_cmd *cmd;
struct srp_cmd *srp_cmd;
uint64_t unpacked_lun;
u64 data_len;
enum dma_data_direction dir;
sense_reason_t ret;
int rc;
BUG_ON(!send_ioctx);
......@@ -1650,65 +1566,23 @@ static int srpt_handle_cmd(struct srpt_rdma_ch *ch,
if (srpt_get_desc_tbl(send_ioctx, srp_cmd, &dir, &data_len)) {
pr_err("0x%llx: parsing SRP descriptor table failed.\n",
srp_cmd->tag);
ret = TCM_INVALID_CDB_FIELD;
goto send_sense;
goto release_ioctx;
}
unpacked_lun = srpt_unpack_lun((uint8_t *)&srp_cmd->lun,
sizeof(srp_cmd->lun));
rc = target_submit_cmd(cmd, ch->sess, srp_cmd->cdb,
&send_ioctx->sense_data[0], unpacked_lun, data_len,
&send_ioctx->sense_data[0],
scsilun_to_int(&srp_cmd->lun), data_len,
TCM_SIMPLE_TAG, dir, TARGET_SCF_ACK_KREF);
if (rc != 0) {
ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
goto send_sense;
pr_debug("target_submit_cmd() returned %d for tag %#llx\n", rc,
srp_cmd->tag);
goto release_ioctx;
}
return 0;
send_sense:
transport_send_check_condition_and_sense(cmd, ret, 0);
return -1;
}
/**
* srpt_rx_mgmt_fn_tag() - Process a task management function by tag.
* @ch: RDMA channel of the task management request.
* @fn: Task management function to perform.
* @req_tag: Tag of the SRP task management request.
* @mgmt_ioctx: I/O context of the task management request.
*
* Returns zero if the target core will process the task management
* request asynchronously.
*
* Note: It is assumed that the initiator serializes tag-based task management
* requests.
*/
static int srpt_rx_mgmt_fn_tag(struct srpt_send_ioctx *ioctx, u64 tag)
{
struct srpt_device *sdev;
struct srpt_rdma_ch *ch;
struct srpt_send_ioctx *target;
int ret, i;
return;
ret = -EINVAL;
ch = ioctx->ch;
BUG_ON(!ch);
BUG_ON(!ch->sport);
sdev = ch->sport->sdev;
BUG_ON(!sdev);
spin_lock_irq(&sdev->spinlock);
for (i = 0; i < ch->rq_size; ++i) {
target = ch->ioctx_ring[i];
if (target->cmd.se_lun == ioctx->cmd.se_lun &&
target->cmd.tag == tag &&
srpt_get_cmd_state(target) != SRPT_STATE_DONE) {
ret = 0;
/* now let the target core abort &target->cmd; */
break;
}
}
spin_unlock_irq(&sdev->spinlock);
return ret;
release_ioctx:
send_ioctx->state = SRPT_STATE_DONE;
srpt_release_cmd(cmd);
}
static int srp_tmr_to_tcm(int fn)
......@@ -1744,8 +1618,6 @@ static void srpt_handle_tsk_mgmt(struct srpt_rdma_ch *ch,
struct srp_tsk_mgmt *srp_tsk;
struct se_cmd *cmd;
struct se_session *sess = ch->sess;
uint64_t unpacked_lun;
uint32_t tag = 0;
int tcm_tmr;
int rc;
......@@ -1761,25 +1633,9 @@ static void srpt_handle_tsk_mgmt(struct srpt_rdma_ch *ch,
srpt_set_cmd_state(send_ioctx, SRPT_STATE_MGMT);
send_ioctx->cmd.tag = srp_tsk->tag;
tcm_tmr = srp_tmr_to_tcm(srp_tsk->tsk_mgmt_func);
if (tcm_tmr < 0) {
send_ioctx->cmd.se_tmr_req->response =
TMR_TASK_MGMT_FUNCTION_NOT_SUPPORTED;
goto fail;
}
unpacked_lun = srpt_unpack_lun((uint8_t *)&srp_tsk->lun,
sizeof(srp_tsk->lun));
if (srp_tsk->tsk_mgmt_func == SRP_TSK_ABORT_TASK) {
rc = srpt_rx_mgmt_fn_tag(send_ioctx, srp_tsk->task_tag);
if (rc < 0) {
send_ioctx->cmd.se_tmr_req->response =
TMR_TASK_DOES_NOT_EXIST;
goto fail;
}
tag = srp_tsk->task_tag;
}
rc = target_submit_tmr(&send_ioctx->cmd, sess, NULL, unpacked_lun,
srp_tsk, tcm_tmr, GFP_KERNEL, tag,
rc = target_submit_tmr(&send_ioctx->cmd, sess, NULL,
scsilun_to_int(&srp_tsk->lun), srp_tsk, tcm_tmr,
GFP_KERNEL, srp_tsk->task_tag,
TARGET_SCF_ACK_KREF);
if (rc != 0) {
send_ioctx->cmd.se_tmr_req->response = TMR_FUNCTION_REJECTED;
......@@ -1800,7 +1656,6 @@ static void srpt_handle_new_iu(struct srpt_rdma_ch *ch,
struct srpt_send_ioctx *send_ioctx)
{
struct srp_cmd *srp_cmd;
enum rdma_ch_state ch_state;
BUG_ON(!ch);
BUG_ON(!recv_ioctx);
......@@ -1809,13 +1664,12 @@ static void srpt_handle_new_iu(struct srpt_rdma_ch *ch,
recv_ioctx->ioctx.dma, srp_max_req_size,
DMA_FROM_DEVICE);
ch_state = srpt_get_ch_state(ch);
if (unlikely(ch_state == CH_CONNECTING)) {
if (unlikely(ch->state == CH_CONNECTING)) {
list_add_tail(&recv_ioctx->wait_list, &ch->cmd_wait_list);
goto out;
}
if (unlikely(ch_state != CH_LIVE))
if (unlikely(ch->state != CH_LIVE))
goto out;
srp_cmd = recv_ioctx->ioctx.buf;
......@@ -1878,6 +1732,28 @@ static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc)
}
}
/*
* This function must be called from the context in which RDMA completions are
* processed because it accesses the wait list without protection against
* access from other threads.
*/
static void srpt_process_wait_list(struct srpt_rdma_ch *ch)
{
struct srpt_send_ioctx *ioctx;
while (!list_empty(&ch->cmd_wait_list) &&
ch->state >= CH_LIVE &&
(ioctx = srpt_get_send_ioctx(ch)) != NULL) {
struct srpt_recv_ioctx *recv_ioctx;
recv_ioctx = list_first_entry(&ch->cmd_wait_list,
struct srpt_recv_ioctx,
wait_list);
list_del(&recv_ioctx->wait_list);
srpt_handle_new_iu(ch, recv_ioctx, ioctx);
}
}
/**
* Note: Although this has not yet been observed during tests, at least in
* theory it is possible that the srpt_get_send_ioctx() call invoked by
......@@ -1905,15 +1781,10 @@ static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc)
atomic_inc(&ch->sq_wr_avail);
if (wc->status != IB_WC_SUCCESS) {
if (wc->status != IB_WC_SUCCESS)
pr_info("sending response for ioctx 0x%p failed"
" with status %d\n", ioctx, wc->status);
atomic_dec(&ch->req_lim);
srpt_abort_cmd(ioctx);
goto out;
}
if (state != SRPT_STATE_DONE) {
srpt_unmap_sg_to_ib_sge(ch, ioctx);
transport_generic_free_cmd(&ioctx->cmd, 0);
......@@ -1922,18 +1793,7 @@ static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc)
" wr_id = %u.\n", ioctx->ioctx.index);
}
out:
while (!list_empty(&ch->cmd_wait_list) &&
srpt_get_ch_state(ch) == CH_LIVE &&
(ioctx = srpt_get_send_ioctx(ch)) != NULL) {
struct srpt_recv_ioctx *recv_ioctx;
recv_ioctx = list_first_entry(&ch->cmd_wait_list,
struct srpt_recv_ioctx,
wait_list);
list_del(&recv_ioctx->wait_list);
srpt_handle_new_iu(ch, recv_ioctx, ioctx);
}
srpt_process_wait_list(ch);
}
/**
......@@ -1950,7 +1810,7 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
WARN_ON(ch->rq_size < 1);
ret = -ENOMEM;
qp_init = kzalloc(sizeof *qp_init, GFP_KERNEL);
qp_init = kzalloc(sizeof(*qp_init), GFP_KERNEL);
if (!qp_init)
goto out;
......@@ -2017,168 +1877,102 @@ static void srpt_destroy_ch_ib(struct srpt_rdma_ch *ch)
}
/**
* __srpt_close_ch() - Close an RDMA channel by setting the QP error state.
* srpt_close_ch() - Close an RDMA channel.
*
* Reset the QP and make sure all resources associated with the channel will
* be deallocated at an appropriate time.
* Make sure all resources associated with the channel will be deallocated at
* an appropriate time.
*
* Note: The caller must hold ch->sport->sdev->spinlock.
* Returns true if and only if the channel state has been modified into
* CH_DRAINING.
*/
static void __srpt_close_ch(struct srpt_rdma_ch *ch)
static bool srpt_close_ch(struct srpt_rdma_ch *ch)
{
enum rdma_ch_state prev_state;
unsigned long flags;
spin_lock_irqsave(&ch->spinlock, flags);
prev_state = ch->state;
switch (prev_state) {
case CH_CONNECTING:
case CH_LIVE:
ch->state = CH_DISCONNECTING;
break;
default:
break;
}
spin_unlock_irqrestore(&ch->spinlock, flags);
int ret;
switch (prev_state) {
case CH_CONNECTING:
ib_send_cm_rej(ch->cm_id, IB_CM_REJ_NO_RESOURCES, NULL, 0,
NULL, 0);
/* fall through */
case CH_LIVE:
if (ib_send_cm_dreq(ch->cm_id, NULL, 0) < 0)
pr_err("sending CM DREQ failed.\n");
break;
case CH_DISCONNECTING:
break;
case CH_DRAINING:
case CH_RELEASING:
break;
if (!srpt_set_ch_state(ch, CH_DRAINING)) {
pr_debug("%s-%d: already closed\n", ch->sess_name,
ch->qp->qp_num);
return false;
}
}
/**
* srpt_close_ch() - Close an RDMA channel.
*/
static void srpt_close_ch(struct srpt_rdma_ch *ch)
{
struct srpt_device *sdev;
kref_get(&ch->kref);
sdev = ch->sport->sdev;
spin_lock_irq(&sdev->spinlock);
__srpt_close_ch(ch);
spin_unlock_irq(&sdev->spinlock);
}
/**
* srpt_shutdown_session() - Whether or not a session may be shut down.
*/
static int srpt_shutdown_session(struct se_session *se_sess)
{
struct srpt_rdma_ch *ch = se_sess->fabric_sess_ptr;
unsigned long flags;
ret = srpt_ch_qp_err(ch);
if (ret < 0)
pr_err("%s-%d: changing queue pair into error state failed: %d\n",
ch->sess_name, ch->qp->qp_num, ret);
spin_lock_irqsave(&ch->spinlock, flags);
if (ch->in_shutdown) {
spin_unlock_irqrestore(&ch->spinlock, flags);
return true;
pr_debug("%s-%d: queued zerolength write\n", ch->sess_name,
ch->qp->qp_num);
ret = srpt_zerolength_write(ch);
if (ret < 0) {
pr_err("%s-%d: queuing zero-length write failed: %d\n",
ch->sess_name, ch->qp->qp_num, ret);
if (srpt_set_ch_state(ch, CH_DISCONNECTED))
schedule_work(&ch->release_work);
else
WARN_ON_ONCE(true);
}
ch->in_shutdown = true;
target_sess_cmd_list_set_waiting(se_sess);
spin_unlock_irqrestore(&ch->spinlock, flags);
kref_put(&ch->kref, srpt_free_ch);
return true;
}
/**
* srpt_drain_channel() - Drain a channel by resetting the IB queue pair.
* @cm_id: Pointer to the CM ID of the channel to be drained.
*
* Note: Must be called from inside srpt_cm_handler to avoid a race between
* accessing sdev->spinlock and the call to kfree(sdev) in srpt_remove_one()
* (the caller of srpt_cm_handler holds the cm_id spinlock; srpt_remove_one()
* waits until all target sessions for the associated IB device have been
* unregistered and target session registration involves a call to
* ib_destroy_cm_id(), which locks the cm_id spinlock and hence waits until
* this function has finished).
/*
* Change the channel state into CH_DISCONNECTING. If a channel has not yet
* reached the connected state, close it. If a channel is in the connected
* state, send a DREQ. If a DREQ has been received, send a DREP. Note: it is
* the responsibility of the caller to ensure that this function is not
* invoked concurrently with the code that accepts a connection. This means
* that this function must either be invoked from inside a CM callback
* function or that it must be invoked with the srpt_port.mutex held.
*/
static void srpt_drain_channel(struct ib_cm_id *cm_id)
static int srpt_disconnect_ch(struct srpt_rdma_ch *ch)
{
struct srpt_device *sdev;
struct srpt_rdma_ch *ch;
int ret;
bool do_reset = false;
WARN_ON_ONCE(irqs_disabled());
if (!srpt_set_ch_state(ch, CH_DISCONNECTING))
return -ENOTCONN;
sdev = cm_id->context;
BUG_ON(!sdev);
spin_lock_irq(&sdev->spinlock);
list_for_each_entry(ch, &sdev->rch_list, list) {
if (ch->cm_id == cm_id) {
do_reset = srpt_test_and_set_ch_state(ch,
CH_CONNECTING, CH_DRAINING) ||
srpt_test_and_set_ch_state(ch,
CH_LIVE, CH_DRAINING) ||
srpt_test_and_set_ch_state(ch,
CH_DISCONNECTING, CH_DRAINING);
break;
}
}
spin_unlock_irq(&sdev->spinlock);
ret = ib_send_cm_dreq(ch->cm_id, NULL, 0);
if (ret < 0)
ret = ib_send_cm_drep(ch->cm_id, NULL, 0);
if (do_reset) {
if (ch->sess)
srpt_shutdown_session(ch->sess);
if (ret < 0 && srpt_close_ch(ch))
ret = 0;
ret = srpt_ch_qp_err(ch);
if (ret < 0)
pr_err("Setting queue pair in error state"
" failed: %d\n", ret);
}
return ret;
}
/**
* srpt_find_channel() - Look up an RDMA channel.
* @cm_id: Pointer to the CM ID of the channel to be looked up.
*
* Return NULL if no matching RDMA channel has been found.
*/
static struct srpt_rdma_ch *srpt_find_channel(struct srpt_device *sdev,
struct ib_cm_id *cm_id)
static void __srpt_close_all_ch(struct srpt_device *sdev)
{
struct srpt_rdma_ch *ch;
bool found;
WARN_ON_ONCE(irqs_disabled());
BUG_ON(!sdev);
lockdep_assert_held(&sdev->mutex);
found = false;
spin_lock_irq(&sdev->spinlock);
list_for_each_entry(ch, &sdev->rch_list, list) {
if (ch->cm_id == cm_id) {
found = true;
break;
}
if (srpt_disconnect_ch(ch) >= 0)
pr_info("Closing channel %s-%d because target %s has been disabled\n",
ch->sess_name, ch->qp->qp_num,
sdev->device->name);
srpt_close_ch(ch);
}
spin_unlock_irq(&sdev->spinlock);
return found ? ch : NULL;
}
/**
* srpt_release_channel() - Release channel resources.
*
* Schedules the actual release because:
* - Calling the ib_destroy_cm_id() call from inside an IB CM callback would
* trigger a deadlock.
* - It is not safe to call TCM transport_* functions from interrupt context.
* srpt_shutdown_session() - Whether or not a session may be shut down.
*/
static void srpt_release_channel(struct srpt_rdma_ch *ch)
static int srpt_shutdown_session(struct se_session *se_sess)
{
schedule_work(&ch->release_work);
return 1;
}
static void srpt_free_ch(struct kref *kref)
{
struct srpt_rdma_ch *ch = container_of(kref, struct srpt_rdma_ch, kref);
kfree(ch);
}
static void srpt_release_channel_work(struct work_struct *w)
......@@ -2188,8 +1982,8 @@ static void srpt_release_channel_work(struct work_struct *w)
struct se_session *se_sess;
ch = container_of(w, struct srpt_rdma_ch, release_work);
pr_debug("ch = %p; ch->sess = %p; release_done = %p\n", ch, ch->sess,
ch->release_done);
pr_debug("%s: %s-%d; release_done = %p\n", __func__, ch->sess_name,
ch->qp->qp_num, ch->release_done);
sdev = ch->sport->sdev;
BUG_ON(!sdev);
......@@ -2197,6 +1991,7 @@ static void srpt_release_channel_work(struct work_struct *w)
se_sess = ch->sess;
BUG_ON(!se_sess);
target_sess_cmd_list_set_waiting(se_sess);
target_wait_for_sess_cmds(se_sess);
transport_deregister_session_configfs(se_sess);
......@@ -2211,16 +2006,15 @@ static void srpt_release_channel_work(struct work_struct *w)
ch->sport->sdev, ch->rq_size,
ch->rsp_size, DMA_TO_DEVICE);
spin_lock_irq(&sdev->spinlock);
list_del(&ch->list);
spin_unlock_irq(&sdev->spinlock);
mutex_lock(&sdev->mutex);
list_del_init(&ch->list);
if (ch->release_done)
complete(ch->release_done);
mutex_unlock(&sdev->mutex);
wake_up(&sdev->ch_releaseQ);
kfree(ch);
kref_put(&ch->kref, srpt_free_ch);
}
/**
......@@ -2266,9 +2060,9 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
be64_to_cpu(*(__be64 *)&sdev->port[param->port - 1].gid.raw[0]),
be64_to_cpu(*(__be64 *)&sdev->port[param->port - 1].gid.raw[8]));
rsp = kzalloc(sizeof *rsp, GFP_KERNEL);
rej = kzalloc(sizeof *rej, GFP_KERNEL);
rep_param = kzalloc(sizeof *rep_param, GFP_KERNEL);
rsp = kzalloc(sizeof(*rsp), GFP_KERNEL);
rej = kzalloc(sizeof(*rej), GFP_KERNEL);
rep_param = kzalloc(sizeof(*rep_param), GFP_KERNEL);
if (!rsp || !rej || !rep_param) {
ret = -ENOMEM;
......@@ -2297,7 +2091,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
if ((req->req_flags & SRP_MTCH_ACTION) == SRP_MULTICHAN_SINGLE) {
rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_NO_CHAN;
spin_lock_irq(&sdev->spinlock);
mutex_lock(&sdev->mutex);
list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list) {
if (!memcmp(ch->i_port_id, req->initiator_port_id, 16)
......@@ -2305,26 +2099,16 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
&& param->port == ch->sport->port
&& param->listen_id == ch->sport->sdev->cm_id
&& ch->cm_id) {
enum rdma_ch_state ch_state;
ch_state = srpt_get_ch_state(ch);
if (ch_state != CH_CONNECTING
&& ch_state != CH_LIVE)
if (srpt_disconnect_ch(ch) < 0)
continue;
/* found an existing channel */
pr_debug("Found existing channel %s"
" cm_id= %p state= %d\n",
ch->sess_name, ch->cm_id, ch_state);
__srpt_close_ch(ch);
pr_info("Relogin - closed existing channel %s\n",
ch->sess_name);
rsp->rsp_flags =
SRP_LOGIN_RSP_MULTICHAN_TERMINATED;
}
}
spin_unlock_irq(&sdev->spinlock);
mutex_unlock(&sdev->mutex);
} else
rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_MAINTAINED;
......@@ -2340,7 +2124,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
goto reject;
}
ch = kzalloc(sizeof *ch, GFP_KERNEL);
ch = kzalloc(sizeof(*ch), GFP_KERNEL);
if (!ch) {
rej->reason = cpu_to_be32(
SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
......@@ -2349,11 +2133,14 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
goto reject;
}
kref_init(&ch->kref);
ch->zw_cqe.done = srpt_zerolength_write_done;
INIT_WORK(&ch->release_work, srpt_release_channel_work);
memcpy(ch->i_port_id, req->initiator_port_id, 16);
memcpy(ch->t_port_id, req->target_port_id, 16);
ch->sport = &sdev->port[param->port - 1];
ch->cm_id = cm_id;
cm_id->context = ch;
/*
* Avoid QUEUE_FULL conditions by limiting the number of buffers used
* for the SRP protocol to the command queue size.
......@@ -2453,7 +2240,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
/* create cm reply */
rep_param->qp_num = ch->qp->qp_num;
rep_param->private_data = (void *)rsp;
rep_param->private_data_len = sizeof *rsp;
rep_param->private_data_len = sizeof(*rsp);
rep_param->rnr_retry_count = 7;
rep_param->flow_control = 1;
rep_param->failover_accepted = 0;
......@@ -2468,14 +2255,14 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
goto release_channel;
}
spin_lock_irq(&sdev->spinlock);
mutex_lock(&sdev->mutex);
list_add_tail(&ch->list, &sdev->rch_list);
spin_unlock_irq(&sdev->spinlock);
mutex_unlock(&sdev->mutex);
goto out;
release_channel:
srpt_set_ch_state(ch, CH_RELEASING);
srpt_disconnect_ch(ch);
transport_deregister_session_configfs(ch->sess);
transport_deregister_session(ch->sess);
ch->sess = NULL;
......@@ -2497,7 +2284,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
| SRP_BUF_FORMAT_INDIRECT);
ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0,
(void *)rej, sizeof *rej);
(void *)rej, sizeof(*rej));
out:
kfree(rep_param);
......@@ -2507,10 +2294,23 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
return ret;
}
static void srpt_cm_rej_recv(struct ib_cm_id *cm_id)
static void srpt_cm_rej_recv(struct srpt_rdma_ch *ch,
enum ib_cm_rej_reason reason,
const u8 *private_data,
u8 private_data_len)
{
pr_info("Received IB REJ for cm_id %p.\n", cm_id);
srpt_drain_channel(cm_id);
char *priv = NULL;
int i;
if (private_data_len && (priv = kmalloc(private_data_len * 3 + 1,
GFP_KERNEL))) {
for (i = 0; i < private_data_len; i++)
sprintf(priv + 3 * i, " %02x", private_data[i]);
}
pr_info("Received CM REJ for ch %s-%d; reason %d%s%s.\n",
ch->sess_name, ch->qp->qp_num, reason, private_data_len ?
"; private data" : "", priv ? priv : " (?)");
kfree(priv);
}
/**
......@@ -2519,87 +2319,23 @@ static void srpt_cm_rej_recv(struct ib_cm_id *cm_id)
* An IB_CM_RTU_RECEIVED message indicates that the connection is established
* and that the recipient may begin transmitting (RTU = ready to use).
*/
static void srpt_cm_rtu_recv(struct ib_cm_id *cm_id)
static void srpt_cm_rtu_recv(struct srpt_rdma_ch *ch)
{
struct srpt_rdma_ch *ch;
int ret;
ch = srpt_find_channel(cm_id->context, cm_id);
BUG_ON(!ch);
if (srpt_test_and_set_ch_state(ch, CH_CONNECTING, CH_LIVE)) {
struct srpt_recv_ioctx *ioctx, *ioctx_tmp;
if (srpt_set_ch_state(ch, CH_LIVE)) {
ret = srpt_ch_qp_rts(ch, ch->qp);
list_for_each_entry_safe(ioctx, ioctx_tmp, &ch->cmd_wait_list,
wait_list) {
list_del(&ioctx->wait_list);
srpt_handle_new_iu(ch, ioctx, NULL);
}
if (ret)
if (ret == 0) {
/* Trigger wait list processing. */
ret = srpt_zerolength_write(ch);
WARN_ONCE(ret < 0, "%d\n", ret);
} else {
srpt_close_ch(ch);
}
}
static void srpt_cm_timewait_exit(struct ib_cm_id *cm_id)
{
pr_info("Received IB TimeWait exit for cm_id %p.\n", cm_id);
srpt_drain_channel(cm_id);
}
static void srpt_cm_rep_error(struct ib_cm_id *cm_id)
{
pr_info("Received IB REP error for cm_id %p.\n", cm_id);
srpt_drain_channel(cm_id);
}
/**
* srpt_cm_dreq_recv() - Process reception of a DREQ message.
*/
static void srpt_cm_dreq_recv(struct ib_cm_id *cm_id)
{
struct srpt_rdma_ch *ch;
unsigned long flags;
bool send_drep = false;
ch = srpt_find_channel(cm_id->context, cm_id);
BUG_ON(!ch);
pr_debug("cm_id= %p ch->state= %d\n", cm_id, srpt_get_ch_state(ch));
spin_lock_irqsave(&ch->spinlock, flags);
switch (ch->state) {
case CH_CONNECTING:
case CH_LIVE:
send_drep = true;
ch->state = CH_DISCONNECTING;
break;
case CH_DISCONNECTING:
case CH_DRAINING:
case CH_RELEASING:
WARN(true, "unexpected channel state %d\n", ch->state);
break;
}
spin_unlock_irqrestore(&ch->spinlock, flags);
if (send_drep) {
if (ib_send_cm_drep(ch->cm_id, NULL, 0) < 0)
pr_err("Sending IB DREP failed.\n");
pr_info("Received DREQ and sent DREP for session %s.\n",
ch->sess_name);
}
}
/**
* srpt_cm_drep_recv() - Process reception of a DREP message.
*/
static void srpt_cm_drep_recv(struct ib_cm_id *cm_id)
{
pr_info("Received InfiniBand DREP message for cm_id %p.\n", cm_id);
srpt_drain_channel(cm_id);
}
/**
* srpt_cm_handler() - IB connection manager callback function.
*
......@@ -2612,6 +2348,7 @@ static void srpt_cm_drep_recv(struct ib_cm_id *cm_id)
*/
static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
{
struct srpt_rdma_ch *ch = cm_id->context;
int ret;
ret = 0;
......@@ -2621,32 +2358,39 @@ static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
event->private_data);
break;
case IB_CM_REJ_RECEIVED:
srpt_cm_rej_recv(cm_id);
srpt_cm_rej_recv(ch, event->param.rej_rcvd.reason,
event->private_data,
IB_CM_REJ_PRIVATE_DATA_SIZE);
break;
case IB_CM_RTU_RECEIVED:
case IB_CM_USER_ESTABLISHED:
srpt_cm_rtu_recv(cm_id);
srpt_cm_rtu_recv(ch);
break;
case IB_CM_DREQ_RECEIVED:
srpt_cm_dreq_recv(cm_id);
srpt_disconnect_ch(ch);
break;
case IB_CM_DREP_RECEIVED:
srpt_cm_drep_recv(cm_id);
pr_info("Received CM DREP message for ch %s-%d.\n",
ch->sess_name, ch->qp->qp_num);
srpt_close_ch(ch);
break;
case IB_CM_TIMEWAIT_EXIT:
srpt_cm_timewait_exit(cm_id);
pr_info("Received CM TimeWait exit for ch %s-%d.\n",
ch->sess_name, ch->qp->qp_num);
srpt_close_ch(ch);
break;
case IB_CM_REP_ERROR:
srpt_cm_rep_error(cm_id);
pr_info("Received CM REP error for ch %s-%d.\n", ch->sess_name,
ch->qp->qp_num);
break;
case IB_CM_DREQ_ERROR:
pr_info("Received IB DREQ ERROR event.\n");
pr_info("Received CM DREQ ERROR event.\n");
break;
case IB_CM_MRA_RECEIVED:
pr_info("Received IB MRA event\n");
pr_info("Received CM MRA event\n");
break;
default:
pr_err("received unrecognized IB CM event %d\n", event->event);
pr_err("received unrecognized CM event %d\n", event->event);
break;
}
......@@ -2755,41 +2499,14 @@ static int srpt_write_pending_status(struct se_cmd *se_cmd)
*/
static int srpt_write_pending(struct se_cmd *se_cmd)
{
struct srpt_rdma_ch *ch;
struct srpt_send_ioctx *ioctx;
struct srpt_send_ioctx *ioctx =
container_of(se_cmd, struct srpt_send_ioctx, cmd);
struct srpt_rdma_ch *ch = ioctx->ch;
enum srpt_command_state new_state;
enum rdma_ch_state ch_state;
int ret;
ioctx = container_of(se_cmd, struct srpt_send_ioctx, cmd);
new_state = srpt_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA);
WARN_ON(new_state == SRPT_STATE_DONE);
ch = ioctx->ch;
BUG_ON(!ch);
ch_state = srpt_get_ch_state(ch);
switch (ch_state) {
case CH_CONNECTING:
WARN(true, "unexpected channel state %d\n", ch_state);
ret = -EINVAL;
goto out;
case CH_LIVE:
break;
case CH_DISCONNECTING:
case CH_DRAINING:
case CH_RELEASING:
pr_debug("cmd with tag %lld: channel disconnecting\n",
ioctx->cmd.tag);
srpt_set_cmd_state(ioctx, SRPT_STATE_DATA_IN);
ret = -EINVAL;
goto out;
}
ret = srpt_xfer_data(ch, ioctx);
out:
return ret;
return srpt_xfer_data(ch, ioctx);
}
static u8 tcm_to_srp_tsk_mgmt_status(const int tcm_mgmt_status)
......@@ -2920,36 +2637,25 @@ static void srpt_refresh_port_work(struct work_struct *work)
srpt_refresh_port(sport);
}
static int srpt_ch_list_empty(struct srpt_device *sdev)
{
int res;
spin_lock_irq(&sdev->spinlock);
res = list_empty(&sdev->rch_list);
spin_unlock_irq(&sdev->spinlock);
return res;
}
/**
* srpt_release_sdev() - Free the channel resources associated with a target.
*/
static int srpt_release_sdev(struct srpt_device *sdev)
{
struct srpt_rdma_ch *ch, *tmp_ch;
int res;
int i, res;
WARN_ON_ONCE(irqs_disabled());
BUG_ON(!sdev);
spin_lock_irq(&sdev->spinlock);
list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list)
__srpt_close_ch(ch);
spin_unlock_irq(&sdev->spinlock);
mutex_lock(&sdev->mutex);
for (i = 0; i < ARRAY_SIZE(sdev->port); i++)
sdev->port[i].enabled = false;
__srpt_close_all_ch(sdev);
mutex_unlock(&sdev->mutex);
res = wait_event_interruptible(sdev->ch_releaseQ,
srpt_ch_list_empty(sdev));
list_empty_careful(&sdev->rch_list));
if (res)
pr_err("%s: interrupted.\n", __func__);
......@@ -3003,14 +2709,14 @@ static void srpt_add_one(struct ib_device *device)
pr_debug("device = %p, device->dma_ops = %p\n", device,
device->dma_ops);
sdev = kzalloc(sizeof *sdev, GFP_KERNEL);
sdev = kzalloc(sizeof(*sdev), GFP_KERNEL);
if (!sdev)
goto err;
sdev->device = device;
INIT_LIST_HEAD(&sdev->rch_list);
init_waitqueue_head(&sdev->ch_releaseQ);
spin_lock_init(&sdev->spinlock);
mutex_init(&sdev->mutex);
sdev->pd = ib_alloc_pd(device);
if (IS_ERR(sdev->pd))
......@@ -3082,7 +2788,7 @@ static void srpt_add_one(struct ib_device *device)
if (srpt_refresh_port(sport)) {
pr_err("MAD registration failed for %s-%d.\n",
srpt_sdev_name(sdev), i);
sdev->device->name, i);
goto err_ring;
}
snprintf(sport->port_guid, sizeof(sport->port_guid),
......@@ -3231,24 +2937,26 @@ static void srpt_release_cmd(struct se_cmd *se_cmd)
static void srpt_close_session(struct se_session *se_sess)
{
DECLARE_COMPLETION_ONSTACK(release_done);
struct srpt_rdma_ch *ch;
struct srpt_device *sdev;
unsigned long res;
ch = se_sess->fabric_sess_ptr;
WARN_ON(ch->sess != se_sess);
struct srpt_rdma_ch *ch = se_sess->fabric_sess_ptr;
struct srpt_device *sdev = ch->sport->sdev;
bool wait;
pr_debug("ch %p state %d\n", ch, srpt_get_ch_state(ch));
pr_debug("ch %s-%d state %d\n", ch->sess_name, ch->qp->qp_num,
ch->state);
sdev = ch->sport->sdev;
spin_lock_irq(&sdev->spinlock);
mutex_lock(&sdev->mutex);
BUG_ON(ch->release_done);
ch->release_done = &release_done;
__srpt_close_ch(ch);
spin_unlock_irq(&sdev->spinlock);
wait = !list_empty(&ch->list);
srpt_disconnect_ch(ch);
mutex_unlock(&sdev->mutex);
res = wait_for_completion_timeout(&release_done, 60 * HZ);
WARN_ON(res == 0);
if (!wait)
return;
while (wait_for_completion_timeout(&release_done, 180 * HZ) == 0)
pr_info("%s(%s-%d state %d): still waiting ...\n", __func__,
ch->sess_name, ch->qp->qp_num, ch->state);
}
/**
......@@ -3456,6 +3164,8 @@ static ssize_t srpt_tpg_enable_store(struct config_item *item,
{
struct se_portal_group *se_tpg = to_tpg(item);
struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1);
struct srpt_device *sdev = sport->sdev;
struct srpt_rdma_ch *ch;
unsigned long tmp;
int ret;
......@@ -3469,11 +3179,24 @@ static ssize_t srpt_tpg_enable_store(struct config_item *item,
pr_err("Illegal value for srpt_tpg_store_enable: %lu\n", tmp);
return -EINVAL;
}
if (tmp == 1)
sport->enabled = true;
else
sport->enabled = false;
if (sport->enabled == tmp)
goto out;
sport->enabled = tmp;
if (sport->enabled)
goto out;
mutex_lock(&sdev->mutex);
list_for_each_entry(ch, &sdev->rch_list, list) {
if (ch->sport == sport) {
pr_debug("%s: ch %p %s-%d\n", __func__, ch,
ch->sess_name, ch->qp->qp_num);
srpt_disconnect_ch(ch);
srpt_close_ch(ch);
}
}
mutex_unlock(&sdev->mutex);
out:
return count;
}
......@@ -3565,7 +3288,6 @@ static struct configfs_attribute *srpt_wwn_attrs[] = {
static const struct target_core_fabric_ops srpt_template = {
.module = THIS_MODULE,
.name = "srpt",
.node_acl_size = sizeof(struct srpt_node_acl),
.get_fabric_name = srpt_get_fabric_name,
.tpg_get_wwn = srpt_get_fabric_wwn,
.tpg_get_tag = srpt_get_tag,
......
......@@ -220,18 +220,18 @@ struct srpt_send_ioctx {
* enum rdma_ch_state - SRP channel state.
* @CH_CONNECTING: QP is in RTR state; waiting for RTU.
* @CH_LIVE: QP is in RTS state.
* @CH_DISCONNECTING: DREQ has been received; waiting for DREP
* or DREQ has been send and waiting for DREP
* or .
* @CH_DRAINING: QP is in ERR state; waiting for last WQE event.
* @CH_RELEASING: Last WQE event has been received; releasing resources.
* @CH_DISCONNECTING: DREQ has been sent and waiting for DREP or DREQ has
* been received.
* @CH_DRAINING: DREP has been received or waiting for DREP timed out
* and last work request has been queued.
* @CH_DISCONNECTED: Last completion has been received.
*/
enum rdma_ch_state {
CH_CONNECTING,
CH_LIVE,
CH_DISCONNECTING,
CH_DRAINING,
CH_RELEASING
CH_DISCONNECTED,
};
/**
......@@ -267,6 +267,8 @@ struct srpt_rdma_ch {
struct ib_cm_id *cm_id;
struct ib_qp *qp;
struct ib_cq *cq;
struct ib_cqe zw_cqe;
struct kref kref;
int rq_size;
u32 rsp_size;
atomic_t sq_wr_avail;
......@@ -286,7 +288,6 @@ struct srpt_rdma_ch {
u8 sess_name[36];
struct work_struct release_work;
struct completion *release_done;
bool in_shutdown;
};
/**
......@@ -343,7 +344,7 @@ struct srpt_port {
* @ioctx_ring: Per-HCA SRQ.
* @rch_list: Per-device channel list -- see also srpt_rdma_ch.list.
* @ch_releaseQ: Enables waiting for removal from rch_list.
* @spinlock: Protects rch_list and tpg.
* @mutex: Protects rch_list.
* @port: Information about the ports owned by this HCA.
* @event_handler: Per-HCA asynchronous IB event handler.
* @list: Node in srpt_dev_list.
......@@ -357,18 +358,10 @@ struct srpt_device {
struct srpt_recv_ioctx **ioctx_ring;
struct list_head rch_list;
wait_queue_head_t ch_releaseQ;
spinlock_t spinlock;
struct mutex mutex;
struct srpt_port port[2];
struct ib_event_handler event_handler;
struct list_head list;
};
/**
* struct srpt_node_acl - Per-initiator ACL data (managed via configfs).
* @nacl: Target core node ACL information.
*/
struct srpt_node_acl {
struct se_node_acl nacl;
};
#endif /* IB_SRPT_H */
......@@ -1846,6 +1846,8 @@ struct ib_device {
int (*check_mr_status)(struct ib_mr *mr, u32 check_mask,
struct ib_mr_status *mr_status);
void (*disassociate_ucontext)(struct ib_ucontext *ibcontext);
void (*drain_rq)(struct ib_qp *qp);
void (*drain_sq)(struct ib_qp *qp);
struct ib_dma_mapping_ops *dma_ops;
......@@ -3094,4 +3096,7 @@ int ib_sg_to_pages(struct ib_mr *mr,
int sg_nents,
int (*set_page)(struct ib_mr *, u64));
void ib_drain_rq(struct ib_qp *qp);
void ib_drain_sq(struct ib_qp *qp);
void ib_drain_qp(struct ib_qp *qp);
#endif /* IB_VERBS_H */
......@@ -109,14 +109,13 @@ struct p9_trans_rdma {
/**
* p9_rdma_context - Keeps track of in-process WR
*
* @wc_op: The original WR op for when the CQE completes in error.
* @busa: Bus address to unmap when the WR completes
* @req: Keeps track of requests (send)
* @rc: Keepts track of replies (receive)
*/
struct p9_rdma_req;
struct p9_rdma_context {
enum ib_wc_opcode wc_op;
struct ib_cqe cqe;
dma_addr_t busa;
union {
struct p9_req_t *req;
......@@ -284,9 +283,12 @@ p9_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
}
static void
handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma,
struct p9_rdma_context *c, enum ib_wc_status status, u32 byte_len)
recv_done(struct ib_cq *cq, struct ib_wc *wc)
{
struct p9_client *client = cq->cq_context;
struct p9_trans_rdma *rdma = client->trans;
struct p9_rdma_context *c =
container_of(wc->wr_cqe, struct p9_rdma_context, cqe);
struct p9_req_t *req;
int err = 0;
int16_t tag;
......@@ -295,7 +297,7 @@ handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma,
ib_dma_unmap_single(rdma->cm_id->device, c->busa, client->msize,
DMA_FROM_DEVICE);
if (status != IB_WC_SUCCESS)
if (wc->status != IB_WC_SUCCESS)
goto err_out;
err = p9_parse_header(c->rc, NULL, NULL, &tag, 1);
......@@ -316,21 +318,32 @@ handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma,
req->rc = c->rc;
p9_client_cb(client, req, REQ_STATUS_RCVD);
out:
up(&rdma->rq_sem);
kfree(c);
return;
err_out:
p9_debug(P9_DEBUG_ERROR, "req %p err %d status %d\n", req, err, status);
p9_debug(P9_DEBUG_ERROR, "req %p err %d status %d\n",
req, err, wc->status);
rdma->state = P9_RDMA_FLUSHING;
client->status = Disconnected;
goto out;
}
static void
handle_send(struct p9_client *client, struct p9_trans_rdma *rdma,
struct p9_rdma_context *c, enum ib_wc_status status, u32 byte_len)
send_done(struct ib_cq *cq, struct ib_wc *wc)
{
struct p9_client *client = cq->cq_context;
struct p9_trans_rdma *rdma = client->trans;
struct p9_rdma_context *c =
container_of(wc->wr_cqe, struct p9_rdma_context, cqe);
ib_dma_unmap_single(rdma->cm_id->device,
c->busa, c->req->tc->size,
DMA_TO_DEVICE);
up(&rdma->sq_sem);
kfree(c);
}
static void qp_event_handler(struct ib_event *event, void *context)
......@@ -339,42 +352,6 @@ static void qp_event_handler(struct ib_event *event, void *context)
event->event, context);
}
static void cq_comp_handler(struct ib_cq *cq, void *cq_context)
{
struct p9_client *client = cq_context;
struct p9_trans_rdma *rdma = client->trans;
int ret;
struct ib_wc wc;
ib_req_notify_cq(rdma->cq, IB_CQ_NEXT_COMP);
while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) {
struct p9_rdma_context *c = (void *) (unsigned long) wc.wr_id;
switch (c->wc_op) {
case IB_WC_RECV:
handle_recv(client, rdma, c, wc.status, wc.byte_len);
up(&rdma->rq_sem);
break;
case IB_WC_SEND:
handle_send(client, rdma, c, wc.status, wc.byte_len);
up(&rdma->sq_sem);
break;
default:
pr_err("unexpected completion type, c->wc_op=%d, wc.opcode=%d, status=%d\n",
c->wc_op, wc.opcode, wc.status);
break;
}
kfree(c);
}
}
static void cq_event_handler(struct ib_event *e, void *v)
{
p9_debug(P9_DEBUG_ERROR, "CQ event %d context %p\n", e->event, v);
}
static void rdma_destroy_trans(struct p9_trans_rdma *rdma)
{
if (!rdma)
......@@ -387,7 +364,7 @@ static void rdma_destroy_trans(struct p9_trans_rdma *rdma)
ib_dealloc_pd(rdma->pd);
if (rdma->cq && !IS_ERR(rdma->cq))
ib_destroy_cq(rdma->cq);
ib_free_cq(rdma->cq);
if (rdma->cm_id && !IS_ERR(rdma->cm_id))
rdma_destroy_id(rdma->cm_id);
......@@ -408,13 +385,14 @@ post_recv(struct p9_client *client, struct p9_rdma_context *c)
if (ib_dma_mapping_error(rdma->cm_id->device, c->busa))
goto error;
c->cqe.done = recv_done;
sge.addr = c->busa;
sge.length = client->msize;
sge.lkey = rdma->pd->local_dma_lkey;
wr.next = NULL;
c->wc_op = IB_WC_RECV;
wr.wr_id = (unsigned long) c;
wr.wr_cqe = &c->cqe;
wr.sg_list = &sge;
wr.num_sge = 1;
return ib_post_recv(rdma->qp, &wr, &bad_wr);
......@@ -499,13 +477,14 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
goto send_error;
}
c->cqe.done = send_done;
sge.addr = c->busa;
sge.length = c->req->tc->size;
sge.lkey = rdma->pd->local_dma_lkey;
wr.next = NULL;
c->wc_op = IB_WC_SEND;
wr.wr_id = (unsigned long) c;
wr.wr_cqe = &c->cqe;
wr.opcode = IB_WR_SEND;
wr.send_flags = IB_SEND_SIGNALED;
wr.sg_list = &sge;
......@@ -642,7 +621,6 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args)
struct p9_trans_rdma *rdma;
struct rdma_conn_param conn_param;
struct ib_qp_init_attr qp_attr;
struct ib_cq_init_attr cq_attr = {};
/* Parse the transport specific mount options */
err = parse_opts(args, &opts);
......@@ -695,13 +673,11 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args)
goto error;
/* Create the Completion Queue */
cq_attr.cqe = opts.sq_depth + opts.rq_depth + 1;
rdma->cq = ib_create_cq(rdma->cm_id->device, cq_comp_handler,
cq_event_handler, client,
&cq_attr);
rdma->cq = ib_alloc_cq(rdma->cm_id->device, client,
opts.sq_depth + opts.rq_depth + 1,
0, IB_POLL_SOFTIRQ);
if (IS_ERR(rdma->cq))
goto error;
ib_req_notify_cq(rdma->cq, IB_CQ_NEXT_COMP);
/* Create the Protection Domain */
rdma->pd = ib_alloc_pd(rdma->cm_id->device);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment