Commit 3e16e83a authored by Don Brace's avatar Don Brace Committed by Martin K. Petersen

scsi: hpsa: correct race condition in offload enabled

Correct race condition where ioaccel is re-enabled before the raid_map is
updated. For RAID_1, RAID_1ADM, and RAID 5/6 there is a BUG_ON called which
is bad.

 - Change event thread to disable ioaccel only. Send all requests down the
   RAID path instead.

 - Have rescan thread handle offload_enable.

 - Since there is only one rescan allowed at a time, turning
   offload_enabled on/off should not be racy. Each handler queues up a
   rescan if one is already in progress.

  - For timing diagram, offload_enabled is initially off due to a change
    (transformation: splitmirror/remirror), ...

  otbe = offload_to_be_enabled
  oe   = offload_enabled

  Time Event         Rescan              Completion     Request
       Worker        Worker              Thread         Thread
  ---- ------        ------              ----------     -------
   T0   |             |                       + UA      |
   T1   |             + rescan started        | 0x3f    |
   T2   + Event       |                       | 0x0e    |
   T3   + Ack msg     |                       |         |
   T4   |             + if (!dev[i]->oe &&    |         |
   T5   |             |     dev[i]->otbe)     |         |
   T6   |             |      get_raid_map     |         |
   T7   + otbe = 1    |                       |         |
   T8   |             |                       |         |
   T9   |             + oe = otbe             |         |
   T10  |             |                       |         + ioaccel request
   T11                                                  * BUG_ON

  T0 - I/O completion with UA 0x3f 0x0e sets rescan flag.
  T1 - rescan worker thread starts a rescan.
  T2 - event comes in
  T3 - event thread starts and issues "Acknowledge" message
  ...
  T6 - rescan thread has bypassed code to reload new raid map.
  ...
  T7 - event thread runs and sets offload_to_be_enabled
  ...
  T9 - rescan thread turns on offload_enabled.
  T10- request comes in and goes down ioaccel path.
  T11- BUG_ON.

 - After the patch is applied, ioaccel_enabled can only be re-enabled in
   the re-scan thread.

Link: https://lore.kernel.org/r/158472877894.14200.7077843399036368335.stgit@brunhildaReviewed-by: default avatarScott Teel <scott.teel@microsemi.com>
Reviewed-by: default avatarMatt Perricone <matt.perricone@microsemi.com>
Reviewed-by: default avatarScott Benesh <scott.benesh@microsemi.com>
Signed-off-by: default avatarDon Brace <don.brace@microsemi.com>
Signed-off-by: default avatarMartin K. Petersen <martin.petersen@oracle.com>
parent fd6282af
...@@ -504,6 +504,12 @@ static ssize_t host_store_rescan(struct device *dev, ...@@ -504,6 +504,12 @@ static ssize_t host_store_rescan(struct device *dev,
return count; return count;
} }
static void hpsa_turn_off_ioaccel_for_device(struct hpsa_scsi_dev_t *device)
{
device->offload_enabled = 0;
device->offload_to_be_enabled = 0;
}
static ssize_t host_show_firmware_revision(struct device *dev, static ssize_t host_show_firmware_revision(struct device *dev,
struct device_attribute *attr, char *buf) struct device_attribute *attr, char *buf)
{ {
...@@ -1738,8 +1744,7 @@ static void hpsa_figure_phys_disk_ptrs(struct ctlr_info *h, ...@@ -1738,8 +1744,7 @@ static void hpsa_figure_phys_disk_ptrs(struct ctlr_info *h,
__func__, __func__,
h->scsi_host->host_no, logical_drive->bus, h->scsi_host->host_no, logical_drive->bus,
logical_drive->target, logical_drive->lun); logical_drive->target, logical_drive->lun);
logical_drive->offload_enabled = 0; hpsa_turn_off_ioaccel_for_device(logical_drive);
logical_drive->offload_to_be_enabled = 0;
logical_drive->queue_depth = 8; logical_drive->queue_depth = 8;
} }
} }
...@@ -2499,8 +2504,7 @@ static void process_ioaccel2_completion(struct ctlr_info *h, ...@@ -2499,8 +2504,7 @@ static void process_ioaccel2_completion(struct ctlr_info *h,
IOACCEL2_SERV_RESPONSE_FAILURE) { IOACCEL2_SERV_RESPONSE_FAILURE) {
if (c2->error_data.status == if (c2->error_data.status ==
IOACCEL2_STATUS_SR_IOACCEL_DISABLED) { IOACCEL2_STATUS_SR_IOACCEL_DISABLED) {
dev->offload_enabled = 0; hpsa_turn_off_ioaccel_for_device(dev);
dev->offload_to_be_enabled = 0;
} }
if (dev->in_reset) { if (dev->in_reset) {
...@@ -3670,10 +3674,17 @@ static void hpsa_get_ioaccel_status(struct ctlr_info *h, ...@@ -3670,10 +3674,17 @@ static void hpsa_get_ioaccel_status(struct ctlr_info *h,
this_device->offload_config = this_device->offload_config =
!!(ioaccel_status & OFFLOAD_CONFIGURED_BIT); !!(ioaccel_status & OFFLOAD_CONFIGURED_BIT);
if (this_device->offload_config) { if (this_device->offload_config) {
this_device->offload_to_be_enabled = bool offload_enabled =
!!(ioaccel_status & OFFLOAD_ENABLED_BIT); !!(ioaccel_status & OFFLOAD_ENABLED_BIT);
if (hpsa_get_raid_map(h, scsi3addr, this_device)) /*
this_device->offload_to_be_enabled = 0; * Check to see if offload can be enabled.
*/
if (offload_enabled) {
rc = hpsa_get_raid_map(h, scsi3addr, this_device);
if (rc) /* could not load raid_map */
goto out;
this_device->offload_to_be_enabled = 1;
}
} }
out: out:
...@@ -3996,8 +4007,7 @@ static int hpsa_update_device_info(struct ctlr_info *h, ...@@ -3996,8 +4007,7 @@ static int hpsa_update_device_info(struct ctlr_info *h,
} else { } else {
this_device->raid_level = RAID_UNKNOWN; this_device->raid_level = RAID_UNKNOWN;
this_device->offload_config = 0; this_device->offload_config = 0;
this_device->offload_enabled = 0; hpsa_turn_off_ioaccel_for_device(this_device);
this_device->offload_to_be_enabled = 0;
this_device->hba_ioaccel_enabled = 0; this_device->hba_ioaccel_enabled = 0;
this_device->volume_offline = 0; this_device->volume_offline = 0;
this_device->queue_depth = h->nr_cmds; this_device->queue_depth = h->nr_cmds;
...@@ -5230,8 +5240,12 @@ static int hpsa_scsi_ioaccel_raid_map(struct ctlr_info *h, ...@@ -5230,8 +5240,12 @@ static int hpsa_scsi_ioaccel_raid_map(struct ctlr_info *h,
/* Handles load balance across RAID 1 members. /* Handles load balance across RAID 1 members.
* (2-drive R1 and R10 with even # of drives.) * (2-drive R1 and R10 with even # of drives.)
* Appropriate for SSDs, not optimal for HDDs * Appropriate for SSDs, not optimal for HDDs
* Ensure we have the correct raid_map.
*/ */
BUG_ON(le16_to_cpu(map->layout_map_count) != 2); if (le16_to_cpu(map->layout_map_count) != 2) {
hpsa_turn_off_ioaccel_for_device(dev);
return IO_ACCEL_INELIGIBLE;
}
if (dev->offload_to_mirror) if (dev->offload_to_mirror)
map_index += le16_to_cpu(map->data_disks_per_row); map_index += le16_to_cpu(map->data_disks_per_row);
dev->offload_to_mirror = !dev->offload_to_mirror; dev->offload_to_mirror = !dev->offload_to_mirror;
...@@ -5239,8 +5253,12 @@ static int hpsa_scsi_ioaccel_raid_map(struct ctlr_info *h, ...@@ -5239,8 +5253,12 @@ static int hpsa_scsi_ioaccel_raid_map(struct ctlr_info *h,
case HPSA_RAID_ADM: case HPSA_RAID_ADM:
/* Handles N-way mirrors (R1-ADM) /* Handles N-way mirrors (R1-ADM)
* and R10 with # of drives divisible by 3.) * and R10 with # of drives divisible by 3.)
* Ensure we have the correct raid_map.
*/ */
BUG_ON(le16_to_cpu(map->layout_map_count) != 3); if (le16_to_cpu(map->layout_map_count) != 3) {
hpsa_turn_off_ioaccel_for_device(dev);
return IO_ACCEL_INELIGIBLE;
}
offload_to_mirror = dev->offload_to_mirror; offload_to_mirror = dev->offload_to_mirror;
raid_map_helper(map, offload_to_mirror, raid_map_helper(map, offload_to_mirror,
...@@ -5265,7 +5283,10 @@ static int hpsa_scsi_ioaccel_raid_map(struct ctlr_info *h, ...@@ -5265,7 +5283,10 @@ static int hpsa_scsi_ioaccel_raid_map(struct ctlr_info *h,
r5or6_blocks_per_row = r5or6_blocks_per_row =
le16_to_cpu(map->strip_size) * le16_to_cpu(map->strip_size) *
le16_to_cpu(map->data_disks_per_row); le16_to_cpu(map->data_disks_per_row);
BUG_ON(r5or6_blocks_per_row == 0); if (r5or6_blocks_per_row == 0) {
hpsa_turn_off_ioaccel_for_device(dev);
return IO_ACCEL_INELIGIBLE;
}
stripesize = r5or6_blocks_per_row * stripesize = r5or6_blocks_per_row *
le16_to_cpu(map->layout_map_count); le16_to_cpu(map->layout_map_count);
#if BITS_PER_LONG == 32 #if BITS_PER_LONG == 32
...@@ -8285,7 +8306,7 @@ static int detect_controller_lockup(struct ctlr_info *h) ...@@ -8285,7 +8306,7 @@ static int detect_controller_lockup(struct ctlr_info *h)
* *
* Called from monitor controller worker (hpsa_event_monitor_worker) * Called from monitor controller worker (hpsa_event_monitor_worker)
* *
* A Volume (or Volumes that comprise an Array set may be undergoing a * A Volume (or Volumes that comprise an Array set) may be undergoing a
* transformation, so we will be turning off ioaccel for all volumes that * transformation, so we will be turning off ioaccel for all volumes that
* make up the Array. * make up the Array.
*/ */
...@@ -8308,6 +8329,9 @@ static void hpsa_set_ioaccel_status(struct ctlr_info *h) ...@@ -8308,6 +8329,9 @@ static void hpsa_set_ioaccel_status(struct ctlr_info *h)
* Run through current device list used during I/O requests. * Run through current device list used during I/O requests.
*/ */
for (i = 0; i < h->ndevices; i++) { for (i = 0; i < h->ndevices; i++) {
int offload_to_be_enabled = 0;
int offload_config = 0;
device = h->dev[i]; device = h->dev[i];
if (!device) if (!device)
...@@ -8325,25 +8349,35 @@ static void hpsa_set_ioaccel_status(struct ctlr_info *h) ...@@ -8325,25 +8349,35 @@ static void hpsa_set_ioaccel_status(struct ctlr_info *h)
continue; continue;
ioaccel_status = buf[IOACCEL_STATUS_BYTE]; ioaccel_status = buf[IOACCEL_STATUS_BYTE];
device->offload_config =
/*
* Check if offload is still configured on
*/
offload_config =
!!(ioaccel_status & OFFLOAD_CONFIGURED_BIT); !!(ioaccel_status & OFFLOAD_CONFIGURED_BIT);
if (device->offload_config) /*
device->offload_to_be_enabled = * If offload is configured on, check to see if ioaccel
* needs to be enabled.
*/
if (offload_config)
offload_to_be_enabled =
!!(ioaccel_status & OFFLOAD_ENABLED_BIT); !!(ioaccel_status & OFFLOAD_ENABLED_BIT);
/*
* If ioaccel is to be re-enabled, re-enable later during the
* scan operation so the driver can get a fresh raidmap
* before turning ioaccel back on.
*/
if (offload_to_be_enabled)
continue;
/* /*
* Immediately turn off ioaccel for any volume the * Immediately turn off ioaccel for any volume the
* controller tells us to. Some of the reasons could be: * controller tells us to. Some of the reasons could be:
* transformation - change to the LVs of an Array. * transformation - change to the LVs of an Array.
* degraded volume - component failure * degraded volume - component failure
*
* If ioaccel is to be re-enabled, re-enable later during the
* scan operation so the driver can get a fresh raidmap
* before turning ioaccel back on.
*
*/ */
if (!device->offload_to_be_enabled) hpsa_turn_off_ioaccel_for_device(device);
device->offload_enabled = 0;
} }
kfree(buf); kfree(buf);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment