Commit a264cf5e authored by Tyrel Datwyler's avatar Tyrel Datwyler Committed by Martin K. Petersen

scsi: ibmvfc: Fix command state accounting and stale response detection

Prior to commit 1f4a4a19 ("scsi: ibmvfc: Complete commands outside the
host/queue lock") responses to commands were completed sequentially with
the host lock held such that a command had a basic binary state of active
or free. It was therefore a simple affair of ensuring the assocaiated
ibmvfc_event to a VIOS response was valid by testing that it was not
already free. The lock relexation work to complete commands outside the
lock inadverdently made it a trinary command state such that a command is
either in flight, received and being completed, or completed and now
free. This breaks the stale command detection logic as a command may be
still marked active and been placed on the delayed completion list when a
second stale response for the same command arrives. This can lead to double
completions and list corruption. This issue was exposed by a recent VIOS
regression were a missing memory barrier could occasionally result in the
ibmvfc client receiving a duplicate response for the same command.

Fix the issue by introducing the atomic ibmvfc_event.active to track the
trinary state of a command. The state is explicitly set to 1 when a command
is successfully sent. The CRQ response handlers use
atomic_dec_if_positive() to test for stale responses and correctly
transition to the completion state when a active command is received.
Finally, atomic_dec_and_test() is used to sanity check transistions when
commands are freed as a result of a completion, or moved to the purge list
as a result of error handling or adapter reset.

Link: https://lore.kernel.org/r/20210716205220.1101150-1-tyreld@linux.ibm.com
Fixes: 1f4a4a19 ("scsi: ibmvfc: Complete commands outside the host/queue lock")
Cc: stable@vger.kernel.org
Signed-off-by: default avatarTyrel Datwyler <tyreld@linux.ibm.com>
Signed-off-by: default avatarMartin K. Petersen <martin.petersen@oracle.com>
parent 70edd2e6
...@@ -807,6 +807,13 @@ static int ibmvfc_init_event_pool(struct ibmvfc_host *vhost, ...@@ -807,6 +807,13 @@ static int ibmvfc_init_event_pool(struct ibmvfc_host *vhost,
for (i = 0; i < size; ++i) { for (i = 0; i < size; ++i) {
struct ibmvfc_event *evt = &pool->events[i]; struct ibmvfc_event *evt = &pool->events[i];
/*
* evt->active states
* 1 = in flight
* 0 = being completed
* -1 = free/freed
*/
atomic_set(&evt->active, -1);
atomic_set(&evt->free, 1); atomic_set(&evt->free, 1);
evt->crq.valid = 0x80; evt->crq.valid = 0x80;
evt->crq.ioba = cpu_to_be64(pool->iu_token + (sizeof(*evt->xfer_iu) * i)); evt->crq.ioba = cpu_to_be64(pool->iu_token + (sizeof(*evt->xfer_iu) * i));
...@@ -1017,6 +1024,7 @@ static void ibmvfc_free_event(struct ibmvfc_event *evt) ...@@ -1017,6 +1024,7 @@ static void ibmvfc_free_event(struct ibmvfc_event *evt)
BUG_ON(!ibmvfc_valid_event(pool, evt)); BUG_ON(!ibmvfc_valid_event(pool, evt));
BUG_ON(atomic_inc_return(&evt->free) != 1); BUG_ON(atomic_inc_return(&evt->free) != 1);
BUG_ON(atomic_dec_and_test(&evt->active));
spin_lock_irqsave(&evt->queue->l_lock, flags); spin_lock_irqsave(&evt->queue->l_lock, flags);
list_add_tail(&evt->queue_list, &evt->queue->free); list_add_tail(&evt->queue_list, &evt->queue->free);
...@@ -1072,6 +1080,12 @@ static void ibmvfc_complete_purge(struct list_head *purge_list) ...@@ -1072,6 +1080,12 @@ static void ibmvfc_complete_purge(struct list_head *purge_list)
**/ **/
static void ibmvfc_fail_request(struct ibmvfc_event *evt, int error_code) static void ibmvfc_fail_request(struct ibmvfc_event *evt, int error_code)
{ {
/*
* Anything we are failing should still be active. Otherwise, it
* implies we already got a response for the command and are doing
* something bad like double completing it.
*/
BUG_ON(!atomic_dec_and_test(&evt->active));
if (evt->cmnd) { if (evt->cmnd) {
evt->cmnd->result = (error_code << 16); evt->cmnd->result = (error_code << 16);
evt->done = ibmvfc_scsi_eh_done; evt->done = ibmvfc_scsi_eh_done;
...@@ -1723,6 +1737,7 @@ static int ibmvfc_send_event(struct ibmvfc_event *evt, ...@@ -1723,6 +1737,7 @@ static int ibmvfc_send_event(struct ibmvfc_event *evt,
evt->done(evt); evt->done(evt);
} else { } else {
atomic_set(&evt->active, 1);
spin_unlock_irqrestore(&evt->queue->l_lock, flags); spin_unlock_irqrestore(&evt->queue->l_lock, flags);
ibmvfc_trc_start(evt); ibmvfc_trc_start(evt);
} }
...@@ -3251,7 +3266,7 @@ static void ibmvfc_handle_crq(struct ibmvfc_crq *crq, struct ibmvfc_host *vhost, ...@@ -3251,7 +3266,7 @@ static void ibmvfc_handle_crq(struct ibmvfc_crq *crq, struct ibmvfc_host *vhost,
return; return;
} }
if (unlikely(atomic_read(&evt->free))) { if (unlikely(atomic_dec_if_positive(&evt->active))) {
dev_err(vhost->dev, "Received duplicate correlation_token 0x%08llx!\n", dev_err(vhost->dev, "Received duplicate correlation_token 0x%08llx!\n",
crq->ioba); crq->ioba);
return; return;
...@@ -3778,7 +3793,7 @@ static void ibmvfc_handle_scrq(struct ibmvfc_crq *crq, struct ibmvfc_host *vhost ...@@ -3778,7 +3793,7 @@ static void ibmvfc_handle_scrq(struct ibmvfc_crq *crq, struct ibmvfc_host *vhost
return; return;
} }
if (unlikely(atomic_read(&evt->free))) { if (unlikely(atomic_dec_if_positive(&evt->active))) {
dev_err(vhost->dev, "Received duplicate correlation_token 0x%08llx!\n", dev_err(vhost->dev, "Received duplicate correlation_token 0x%08llx!\n",
crq->ioba); crq->ioba);
return; return;
......
...@@ -745,6 +745,7 @@ struct ibmvfc_event { ...@@ -745,6 +745,7 @@ struct ibmvfc_event {
struct ibmvfc_target *tgt; struct ibmvfc_target *tgt;
struct scsi_cmnd *cmnd; struct scsi_cmnd *cmnd;
atomic_t free; atomic_t free;
atomic_t active;
union ibmvfc_iu *xfer_iu; union ibmvfc_iu *xfer_iu;
void (*done)(struct ibmvfc_event *evt); void (*done)(struct ibmvfc_event *evt);
void (*_done)(struct ibmvfc_event *evt); void (*_done)(struct ibmvfc_event *evt);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment