Commit 3c31b52f authored by Dan Williams's avatar Dan Williams

scsi: async sd resume

async_schedule() sd resume work to allow disks and other devices to
resume in parallel.

This moves the entirety of scsi_device resume to an async context to
ensure that scsi_device_resume() remains ordered with respect to the
completion of the start/stop command.  For the duration of the resume,
new command submissions (that do not originate from the scsi-core) will
be deferred (BLKPREP_DEFER).

It adds a new ASYNC_DOMAIN_EXCLUSIVE(scsi_sd_pm_domain) as a container
of these operations.  Like scsi_sd_probe_domain it is flushed at
sd_remove() time to ensure async ops do not continue past the
end-of-life of the sdev.  The implementation explicitly refrains from
reusing scsi_sd_probe_domain directly for this purpose as it is flushed
at the end of dpm_resume(), potentially defeating some of the benefit.
Given sdevs are quiesced it is permissible for these resume operations
to bleed past the async_synchronize_full() calls made by the driver
core.

We defer the resolution of which pm callback to call until
scsi_dev_type_{suspend|resume} time and guarantee that the callback
parameter is never NULL.  With this in place the type of resume
operation is encoded in the async function identifier.

There is a concern that async resume could trigger PSU overload.  In the
enterprise, storage enclosures enforce staggered spin-up regardless of
what the kernel does making async scanning safe by default.  Outside of
that context a user can disable asynchronous scanning via a kernel
command line or CONFIG_SCSI_SCAN_ASYNC.  Honor that setting when
deciding whether to do resume asynchronously.

Inspired by Todd's analysis and initial proposal [2]:
https://01.org/suspendresume/blogs/tebrandt/2013/hard-disk-resume-optimization-simpler-approach

Cc: Len Brown <len.brown@intel.com>
Cc: Phillip Susi <psusi@ubuntu.com>
[alan: bug fix and clean up suggestion]
Acked-by: default avatarAlan Stern <stern@rowland.harvard.edu>
Suggested-by: default avatarTodd Brandt <todd.e.brandt@linux.intel.com>
[djbw: kick all resume work to the async queue]
Signed-off-by: default avatarDan Williams <dan.j.williams@intel.com>
parent 455c6fdb
...@@ -263,6 +263,9 @@ config SCSI_SCAN_ASYNC ...@@ -263,6 +263,9 @@ config SCSI_SCAN_ASYNC
You can override this choice by specifying "scsi_mod.scan=sync" You can override this choice by specifying "scsi_mod.scan=sync"
or async on the kernel's command line. or async on the kernel's command line.
Note that this setting also affects whether resuming from
system suspend will be performed asynchronously.
menu "SCSI Transports" menu "SCSI Transports"
depends on SCSI depends on SCSI
......
...@@ -91,6 +91,15 @@ EXPORT_SYMBOL(scsi_logging_level); ...@@ -91,6 +91,15 @@ EXPORT_SYMBOL(scsi_logging_level);
ASYNC_DOMAIN(scsi_sd_probe_domain); ASYNC_DOMAIN(scsi_sd_probe_domain);
EXPORT_SYMBOL(scsi_sd_probe_domain); EXPORT_SYMBOL(scsi_sd_probe_domain);
/*
* Separate domain (from scsi_sd_probe_domain) to maximize the benefit of
* asynchronous system resume operations. It is marked 'exclusive' to avoid
* being included in the async_synchronize_full() that is invoked by
* dpm_resume()
*/
ASYNC_DOMAIN_EXCLUSIVE(scsi_sd_pm_domain);
EXPORT_SYMBOL(scsi_sd_pm_domain);
/* NB: These are exposed through /proc/scsi/scsi and form part of the ABI. /* NB: These are exposed through /proc/scsi/scsi and form part of the ABI.
* You may not alter any existing entry (although adding new ones is * You may not alter any existing entry (although adding new ones is
* encouraged once assigned by ANSI/INCITS T10 * encouraged once assigned by ANSI/INCITS T10
......
...@@ -18,35 +18,77 @@ ...@@ -18,35 +18,77 @@
#ifdef CONFIG_PM_SLEEP #ifdef CONFIG_PM_SLEEP
static int scsi_dev_type_suspend(struct device *dev, int (*cb)(struct device *)) static int do_scsi_suspend(struct device *dev, const struct dev_pm_ops *pm)
{ {
return pm && pm->suspend ? pm->suspend(dev) : 0;
}
static int do_scsi_freeze(struct device *dev, const struct dev_pm_ops *pm)
{
return pm && pm->freeze ? pm->freeze(dev) : 0;
}
static int do_scsi_poweroff(struct device *dev, const struct dev_pm_ops *pm)
{
return pm && pm->poweroff ? pm->poweroff(dev) : 0;
}
static int do_scsi_resume(struct device *dev, const struct dev_pm_ops *pm)
{
return pm && pm->resume ? pm->resume(dev) : 0;
}
static int do_scsi_thaw(struct device *dev, const struct dev_pm_ops *pm)
{
return pm && pm->thaw ? pm->thaw(dev) : 0;
}
static int do_scsi_restore(struct device *dev, const struct dev_pm_ops *pm)
{
return pm && pm->restore ? pm->restore(dev) : 0;
}
static int scsi_dev_type_suspend(struct device *dev,
int (*cb)(struct device *, const struct dev_pm_ops *))
{
const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
int err; int err;
/* flush pending in-flight resume operations, suspend is synchronous */
async_synchronize_full_domain(&scsi_sd_pm_domain);
err = scsi_device_quiesce(to_scsi_device(dev)); err = scsi_device_quiesce(to_scsi_device(dev));
if (err == 0) { if (err == 0) {
if (cb) { err = cb(dev, pm);
err = cb(dev);
if (err) if (err)
scsi_device_resume(to_scsi_device(dev)); scsi_device_resume(to_scsi_device(dev));
} }
}
dev_dbg(dev, "scsi suspend: %d\n", err); dev_dbg(dev, "scsi suspend: %d\n", err);
return err; return err;
} }
static int scsi_dev_type_resume(struct device *dev, int (*cb)(struct device *)) static int scsi_dev_type_resume(struct device *dev,
int (*cb)(struct device *, const struct dev_pm_ops *))
{ {
const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
int err = 0; int err = 0;
if (cb) err = cb(dev, pm);
err = cb(dev);
scsi_device_resume(to_scsi_device(dev)); scsi_device_resume(to_scsi_device(dev));
dev_dbg(dev, "scsi resume: %d\n", err); dev_dbg(dev, "scsi resume: %d\n", err);
if (err == 0) {
pm_runtime_disable(dev);
pm_runtime_set_active(dev);
pm_runtime_enable(dev);
}
return err; return err;
} }
static int static int
scsi_bus_suspend_common(struct device *dev, int (*cb)(struct device *)) scsi_bus_suspend_common(struct device *dev,
int (*cb)(struct device *, const struct dev_pm_ops *))
{ {
int err = 0; int err = 0;
...@@ -66,20 +108,54 @@ scsi_bus_suspend_common(struct device *dev, int (*cb)(struct device *)) ...@@ -66,20 +108,54 @@ scsi_bus_suspend_common(struct device *dev, int (*cb)(struct device *))
return err; return err;
} }
static int static void async_sdev_resume(void *dev, async_cookie_t cookie)
scsi_bus_resume_common(struct device *dev, int (*cb)(struct device *))
{ {
int err = 0; scsi_dev_type_resume(dev, do_scsi_resume);
}
if (scsi_is_sdev_device(dev)) static void async_sdev_thaw(void *dev, async_cookie_t cookie)
err = scsi_dev_type_resume(dev, cb); {
scsi_dev_type_resume(dev, do_scsi_thaw);
}
if (err == 0) { static void async_sdev_restore(void *dev, async_cookie_t cookie)
{
scsi_dev_type_resume(dev, do_scsi_restore);
}
static int scsi_bus_resume_common(struct device *dev,
int (*cb)(struct device *, const struct dev_pm_ops *))
{
async_func_t fn;
if (!scsi_is_sdev_device(dev))
fn = NULL;
else if (cb == do_scsi_resume)
fn = async_sdev_resume;
else if (cb == do_scsi_thaw)
fn = async_sdev_thaw;
else if (cb == do_scsi_restore)
fn = async_sdev_restore;
else
fn = NULL;
if (fn) {
async_schedule_domain(fn, dev, &scsi_sd_pm_domain);
/*
* If a user has disabled async probing a likely reason
* is due to a storage enclosure that does not inject
* staggered spin-ups. For safety, make resume
* synchronous as well in that case.
*/
if (strncmp(scsi_scan_type, "async", 5) != 0)
async_synchronize_full_domain(&scsi_sd_pm_domain);
} else {
pm_runtime_disable(dev); pm_runtime_disable(dev);
pm_runtime_set_active(dev); pm_runtime_set_active(dev);
pm_runtime_enable(dev); pm_runtime_enable(dev);
} }
return err; return 0;
} }
static int scsi_bus_prepare(struct device *dev) static int scsi_bus_prepare(struct device *dev)
...@@ -97,38 +173,32 @@ static int scsi_bus_prepare(struct device *dev) ...@@ -97,38 +173,32 @@ static int scsi_bus_prepare(struct device *dev)
static int scsi_bus_suspend(struct device *dev) static int scsi_bus_suspend(struct device *dev)
{ {
const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; return scsi_bus_suspend_common(dev, do_scsi_suspend);
return scsi_bus_suspend_common(dev, pm ? pm->suspend : NULL);
} }
static int scsi_bus_resume(struct device *dev) static int scsi_bus_resume(struct device *dev)
{ {
const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; return scsi_bus_resume_common(dev, do_scsi_resume);
return scsi_bus_resume_common(dev, pm ? pm->resume : NULL);
} }
static int scsi_bus_freeze(struct device *dev) static int scsi_bus_freeze(struct device *dev)
{ {
const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; return scsi_bus_suspend_common(dev, do_scsi_freeze);
return scsi_bus_suspend_common(dev, pm ? pm->freeze : NULL);
} }
static int scsi_bus_thaw(struct device *dev) static int scsi_bus_thaw(struct device *dev)
{ {
const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; return scsi_bus_resume_common(dev, do_scsi_thaw);
return scsi_bus_resume_common(dev, pm ? pm->thaw : NULL);
} }
static int scsi_bus_poweroff(struct device *dev) static int scsi_bus_poweroff(struct device *dev)
{ {
const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; return scsi_bus_suspend_common(dev, do_scsi_poweroff);
return scsi_bus_suspend_common(dev, pm ? pm->poweroff : NULL);
} }
static int scsi_bus_restore(struct device *dev) static int scsi_bus_restore(struct device *dev)
{ {
const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; return scsi_bus_resume_common(dev, do_scsi_restore);
return scsi_bus_resume_common(dev, pm ? pm->restore : NULL);
} }
#else /* CONFIG_PM_SLEEP */ #else /* CONFIG_PM_SLEEP */
......
...@@ -112,6 +112,7 @@ extern void scsi_exit_procfs(void); ...@@ -112,6 +112,7 @@ extern void scsi_exit_procfs(void);
#endif /* CONFIG_PROC_FS */ #endif /* CONFIG_PROC_FS */
/* scsi_scan.c */ /* scsi_scan.c */
extern char scsi_scan_type[];
extern int scsi_complete_async_scans(void); extern int scsi_complete_async_scans(void);
extern int scsi_scan_host_selected(struct Scsi_Host *, unsigned int, extern int scsi_scan_host_selected(struct Scsi_Host *, unsigned int,
unsigned int, unsigned int, int); unsigned int, unsigned int, int);
...@@ -166,6 +167,7 @@ static inline int scsi_autopm_get_host(struct Scsi_Host *h) { return 0; } ...@@ -166,6 +167,7 @@ static inline int scsi_autopm_get_host(struct Scsi_Host *h) { return 0; }
static inline void scsi_autopm_put_host(struct Scsi_Host *h) {} static inline void scsi_autopm_put_host(struct Scsi_Host *h) {}
#endif /* CONFIG_PM_RUNTIME */ #endif /* CONFIG_PM_RUNTIME */
extern struct async_domain scsi_sd_pm_domain;
extern struct async_domain scsi_sd_probe_domain; extern struct async_domain scsi_sd_probe_domain;
/* /*
......
...@@ -97,7 +97,7 @@ MODULE_PARM_DESC(max_luns, ...@@ -97,7 +97,7 @@ MODULE_PARM_DESC(max_luns,
#define SCSI_SCAN_TYPE_DEFAULT "sync" #define SCSI_SCAN_TYPE_DEFAULT "sync"
#endif #endif
static char scsi_scan_type[6] = SCSI_SCAN_TYPE_DEFAULT; char scsi_scan_type[6] = SCSI_SCAN_TYPE_DEFAULT;
module_param_string(scan, scsi_scan_type, sizeof(scsi_scan_type), S_IRUGO); module_param_string(scan, scsi_scan_type, sizeof(scsi_scan_type), S_IRUGO);
MODULE_PARM_DESC(scan, "sync, async or none"); MODULE_PARM_DESC(scan, "sync, async or none");
......
...@@ -3020,6 +3020,7 @@ static int sd_remove(struct device *dev) ...@@ -3020,6 +3020,7 @@ static int sd_remove(struct device *dev)
devt = disk_devt(sdkp->disk); devt = disk_devt(sdkp->disk);
scsi_autopm_get_device(sdkp->device); scsi_autopm_get_device(sdkp->device);
async_synchronize_full_domain(&scsi_sd_pm_domain);
async_synchronize_full_domain(&scsi_sd_probe_domain); async_synchronize_full_domain(&scsi_sd_probe_domain);
blk_queue_prep_rq(sdkp->device->request_queue, scsi_prep_fn); blk_queue_prep_rq(sdkp->device->request_queue, scsi_prep_fn);
blk_queue_unprep_rq(sdkp->device->request_queue, NULL); blk_queue_unprep_rq(sdkp->device->request_queue, NULL);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment