Commit d4b7332e authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'block-6.1-2022-10-20' of git://git.kernel.dk/linux

Pull block fixes from Jens Axboe:

 - NVMe pull request via Christoph:
      - fix nvme-hwmon for DMA non-cohehrent architectures (Serge Semin)
      - add a nvme-hwmong maintainer (Christoph Hellwig)
      - fix error pointer dereference in error handling (Dan Carpenter)
      - fix invalid memory reference in nvmet_subsys_attr_qid_max_show
        (Daniel Wagner)
      - don't limit the DMA segment size in nvme-apple (Russell King)
      - fix workqueue MEM_RECLAIM flushing dependency (Sagi Grimberg)
      - disable write zeroes on various Kingston SSDs (Xander Li)

 - fix a memory leak with block device tracing (Ye)

 - flexible-array fix for ublk (Yushan)

 - document the ublk recovery feature from this merge window
   (ZiyangZhang)

 - remove dead bfq variable in struct (Yuwei)

 - error handling rq clearing fix (Yu)

 - add an IRQ safety check for the cached bio freeing (Pavel)

 - drbd bio cloning fix (Christoph)

* tag 'block-6.1-2022-10-20' of git://git.kernel.dk/linux:
  blktrace: remove unnessary stop block trace in 'blk_trace_shutdown'
  blktrace: fix possible memleak in '__blk_trace_remove'
  blktrace: introduce 'blk_trace_{start,stop}' helper
  bio: safeguard REQ_ALLOC_CACHE bio put
  block, bfq: remove unused variable for bfq_queue
  drbd: only clone bio if we have a backing device
  ublk_drv: use flexible-array member instead of zero-length array
  nvmet: fix invalid memory reference in nvmet_subsys_attr_qid_max_show
  nvmet: fix workqueue MEM_RECLAIM flushing dependency
  nvme-hwmon: kmalloc the NVME SMART log buffer
  nvme-hwmon: consistently ignore errors from nvme_hwmon_init
  nvme: add Guenther as nvme-hwmon maintainer
  nvme-apple: don't limit DMA segement size
  nvme-pci: disable write zeroes on various Kingston SSD
  nvme: fix error pointer dereference in error handling
  Documentation: document ublk user recovery feature
  blk-mq: fix null pointer dereference in blk_mq_clear_rq_mapping()
parents 294e73ff 2db96217
......@@ -144,6 +144,42 @@ managing and controlling ublk devices with help of several control commands:
For retrieving device info via ``ublksrv_ctrl_dev_info``. It is the server's
responsibility to save IO target specific info in userspace.
- ``UBLK_CMD_START_USER_RECOVERY``
This command is valid if ``UBLK_F_USER_RECOVERY`` feature is enabled. This
command is accepted after the old process has exited, ublk device is quiesced
and ``/dev/ublkc*`` is released. User should send this command before he starts
a new process which re-opens ``/dev/ublkc*``. When this command returns, the
ublk device is ready for the new process.
- ``UBLK_CMD_END_USER_RECOVERY``
This command is valid if ``UBLK_F_USER_RECOVERY`` feature is enabled. This
command is accepted after ublk device is quiesced and a new process has
opened ``/dev/ublkc*`` and get all ublk queues be ready. When this command
returns, ublk device is unquiesced and new I/O requests are passed to the
new process.
- user recovery feature description
Two new features are added for user recovery: ``UBLK_F_USER_RECOVERY`` and
``UBLK_F_USER_RECOVERY_REISSUE``.
With ``UBLK_F_USER_RECOVERY`` set, after one ubq_daemon(ublk server's io
handler) is dying, ublk does not delete ``/dev/ublkb*`` during the whole
recovery stage and ublk device ID is kept. It is ublk server's
responsibility to recover the device context by its own knowledge.
Requests which have not been issued to userspace are requeued. Requests
which have been issued to userspace are aborted.
With ``UBLK_F_USER_RECOVERY_REISSUE`` set, after one ubq_daemon(ublk
server's io handler) is dying, contrary to ``UBLK_F_USER_RECOVERY``,
requests which have been issued to userspace are requeued and will be
re-issued to the new process after handling ``UBLK_CMD_END_USER_RECOVERY``.
``UBLK_F_USER_RECOVERY_REISSUE`` is designed for backends who tolerate
double-write since the driver may issue the same I/O request twice. It
might be useful to a read-only FS or a VM backend.
Data plane
----------
......
......@@ -14713,6 +14713,12 @@ F: drivers/nvme/target/auth.c
F: drivers/nvme/target/fabrics-cmd-auth.c
F: include/linux/nvme-auth.h
NVM EXPRESS HARDWARE MONITORING SUPPORT
M: Guenter Roeck <linux@roeck-us.net>
L: linux-nvme@lists.infradead.org
S: Supported
F: drivers/nvme/host/hwmon.c
NVM EXPRESS FC TRANSPORT DRIVERS
M: James Smart <james.smart@broadcom.com>
L: linux-nvme@lists.infradead.org
......
......@@ -369,12 +369,8 @@ struct bfq_queue {
unsigned long split_time; /* time of last split */
unsigned long first_IO_time; /* time of first I/O for this queue */
unsigned long creation_time; /* when this queue is created */
/* max service rate measured so far */
u32 max_service_rate;
/*
* Pointer to the waker queue for this queue, i.e., to the
* queue Q such that this queue happens to get new I/O right
......
......@@ -741,7 +741,7 @@ void bio_put(struct bio *bio)
return;
}
if (bio->bi_opf & REQ_ALLOC_CACHE) {
if ((bio->bi_opf & REQ_ALLOC_CACHE) && !WARN_ON_ONCE(in_interrupt())) {
struct bio_alloc_cache *cache;
bio_uninit(bio);
......
......@@ -3112,8 +3112,11 @@ static void blk_mq_clear_rq_mapping(struct blk_mq_tags *drv_tags,
struct page *page;
unsigned long flags;
/* There is no need to clear a driver tags own mapping */
if (drv_tags == tags)
/*
* There is no need to clear mapping if driver tags is not initialized
* or the mapping belongs to the driver tags.
*/
if (!drv_tags || drv_tags == tags)
return;
list_for_each_entry(page, &tags->page_list, lru) {
......
......@@ -30,11 +30,6 @@ static struct drbd_request *drbd_req_new(struct drbd_device *device, struct bio
return NULL;
memset(req, 0, sizeof(*req));
req->private_bio = bio_alloc_clone(device->ldev->backing_bdev, bio_src,
GFP_NOIO, &drbd_io_bio_set);
req->private_bio->bi_private = req;
req->private_bio->bi_end_io = drbd_request_endio;
req->rq_state = (bio_data_dir(bio_src) == WRITE ? RQ_WRITE : 0)
| (bio_op(bio_src) == REQ_OP_WRITE_ZEROES ? RQ_ZEROES : 0)
| (bio_op(bio_src) == REQ_OP_DISCARD ? RQ_UNMAP : 0);
......@@ -1219,9 +1214,12 @@ drbd_request_prepare(struct drbd_device *device, struct bio *bio)
/* Update disk stats */
req->start_jif = bio_start_io_acct(req->master_bio);
if (!get_ldev(device)) {
bio_put(req->private_bio);
req->private_bio = NULL;
if (get_ldev(device)) {
req->private_bio = bio_alloc_clone(device->ldev->backing_bdev,
bio, GFP_NOIO,
&drbd_io_bio_set);
req->private_bio->bi_private = req;
req->private_bio->bi_end_io = drbd_request_endio;
}
/* process discards always from our submitter thread */
......
......@@ -124,7 +124,7 @@ struct ublk_queue {
bool force_abort;
unsigned short nr_io_ready; /* how many ios setup */
struct ublk_device *dev;
struct ublk_io ios[0];
struct ublk_io ios[];
};
#define UBLK_DAEMON_MONITOR_PERIOD (5 * HZ)
......
......@@ -1039,6 +1039,8 @@ static void apple_nvme_reset_work(struct work_struct *work)
dma_max_mapping_size(anv->dev) >> 9);
anv->ctrl.max_segments = NVME_MAX_SEGS;
dma_set_max_seg_size(anv->dev, 0xffffffff);
/*
* Enable NVMMU and linear submission queues.
* While we could keep those disabled and pretend this is slightly
......
......@@ -3262,8 +3262,12 @@ int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl)
return ret;
if (!ctrl->identified && !nvme_discovery_ctrl(ctrl)) {
/*
* Do not return errors unless we are in a controller reset,
* the controller works perfectly fine without hwmon.
*/
ret = nvme_hwmon_init(ctrl);
if (ret < 0)
if (ret == -EINTR)
return ret;
}
......@@ -4846,7 +4850,7 @@ int nvme_alloc_admin_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set,
return 0;
out_cleanup_admin_q:
blk_mq_destroy_queue(ctrl->fabrics_q);
blk_mq_destroy_queue(ctrl->admin_q);
out_free_tagset:
blk_mq_free_tag_set(ctrl->admin_tagset);
return ret;
......
......@@ -12,7 +12,7 @@
struct nvme_hwmon_data {
struct nvme_ctrl *ctrl;
struct nvme_smart_log log;
struct nvme_smart_log *log;
struct mutex read_lock;
};
......@@ -60,14 +60,14 @@ static int nvme_set_temp_thresh(struct nvme_ctrl *ctrl, int sensor, bool under,
static int nvme_hwmon_get_smart_log(struct nvme_hwmon_data *data)
{
return nvme_get_log(data->ctrl, NVME_NSID_ALL, NVME_LOG_SMART, 0,
NVME_CSI_NVM, &data->log, sizeof(data->log), 0);
NVME_CSI_NVM, data->log, sizeof(*data->log), 0);
}
static int nvme_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
u32 attr, int channel, long *val)
{
struct nvme_hwmon_data *data = dev_get_drvdata(dev);
struct nvme_smart_log *log = &data->log;
struct nvme_smart_log *log = data->log;
int temp;
int err;
......@@ -163,7 +163,7 @@ static umode_t nvme_hwmon_is_visible(const void *_data,
case hwmon_temp_max:
case hwmon_temp_min:
if ((!channel && data->ctrl->wctemp) ||
(channel && data->log.temp_sensor[channel - 1])) {
(channel && data->log->temp_sensor[channel - 1])) {
if (data->ctrl->quirks &
NVME_QUIRK_NO_TEMP_THRESH_CHANGE)
return 0444;
......@@ -176,7 +176,7 @@ static umode_t nvme_hwmon_is_visible(const void *_data,
break;
case hwmon_temp_input:
case hwmon_temp_label:
if (!channel || data->log.temp_sensor[channel - 1])
if (!channel || data->log->temp_sensor[channel - 1])
return 0444;
break;
default:
......@@ -230,7 +230,13 @@ int nvme_hwmon_init(struct nvme_ctrl *ctrl)
data = kzalloc(sizeof(*data), GFP_KERNEL);
if (!data)
return 0;
return -ENOMEM;
data->log = kzalloc(sizeof(*data->log), GFP_KERNEL);
if (!data->log) {
err = -ENOMEM;
goto err_free_data;
}
data->ctrl = ctrl;
mutex_init(&data->read_lock);
......@@ -238,8 +244,7 @@ int nvme_hwmon_init(struct nvme_ctrl *ctrl)
err = nvme_hwmon_get_smart_log(data);
if (err) {
dev_warn(dev, "Failed to read smart log (error %d)\n", err);
kfree(data);
return err;
goto err_free_log;
}
hwmon = hwmon_device_register_with_info(dev, "nvme",
......@@ -247,11 +252,17 @@ int nvme_hwmon_init(struct nvme_ctrl *ctrl)
NULL);
if (IS_ERR(hwmon)) {
dev_warn(dev, "Failed to instantiate hwmon device\n");
kfree(data);
return PTR_ERR(hwmon);
err = PTR_ERR(hwmon);
goto err_free_log;
}
ctrl->hwmon_device = hwmon;
return 0;
err_free_log:
kfree(data->log);
err_free_data:
kfree(data);
return err;
}
void nvme_hwmon_exit(struct nvme_ctrl *ctrl)
......@@ -262,6 +273,7 @@ void nvme_hwmon_exit(struct nvme_ctrl *ctrl)
hwmon_device_unregister(ctrl->hwmon_device);
ctrl->hwmon_device = NULL;
kfree(data->log);
kfree(data);
}
}
......@@ -3511,6 +3511,16 @@ static const struct pci_device_id nvme_id_table[] = {
.driver_data = NVME_QUIRK_NO_DEEPEST_PS, },
{ PCI_DEVICE(0x2646, 0x2263), /* KINGSTON A2000 NVMe SSD */
.driver_data = NVME_QUIRK_NO_DEEPEST_PS, },
{ PCI_DEVICE(0x2646, 0x5018), /* KINGSTON OM8SFP4xxxxP OS21012 NVMe SSD */
.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
{ PCI_DEVICE(0x2646, 0x5016), /* KINGSTON OM3PGP4xxxxP OS21011 NVMe SSD */
.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
{ PCI_DEVICE(0x2646, 0x501A), /* KINGSTON OM8PGP4xxxxP OS21005 NVMe SSD */
.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
{ PCI_DEVICE(0x2646, 0x501B), /* KINGSTON OM8PGP4xxxxQ OS21005 NVMe SSD */
.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
{ PCI_DEVICE(0x2646, 0x501E), /* KINGSTON OM3PGP4xxxxQ OS21011 NVMe SSD */
.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
{ PCI_DEVICE(0x1e4B, 0x1001), /* MAXIO MAP1001 */
.driver_data = NVME_QUIRK_BOGUS_NID, },
{ PCI_DEVICE(0x1e4B, 0x1002), /* MAXIO MAP1002 */
......
......@@ -1290,12 +1290,8 @@ static ssize_t nvmet_subsys_attr_qid_max_show(struct config_item *item,
static ssize_t nvmet_subsys_attr_qid_max_store(struct config_item *item,
const char *page, size_t cnt)
{
struct nvmet_port *port = to_nvmet_port(item);
u16 qid_max;
if (nvmet_is_port_enabled(port, __func__))
return -EACCES;
if (sscanf(page, "%hu\n", &qid_max) != 1)
return -EINVAL;
......
......@@ -1176,7 +1176,7 @@ static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl)
* reset the keep alive timer when the controller is enabled.
*/
if (ctrl->kato)
mod_delayed_work(system_wq, &ctrl->ka_work, ctrl->kato * HZ);
mod_delayed_work(nvmet_wq, &ctrl->ka_work, ctrl->kato * HZ);
}
static void nvmet_clear_ctrl(struct nvmet_ctrl *ctrl)
......
......@@ -346,8 +346,40 @@ static void put_probe_ref(void)
mutex_unlock(&blk_probe_mutex);
}
static int blk_trace_start(struct blk_trace *bt)
{
if (bt->trace_state != Blktrace_setup &&
bt->trace_state != Blktrace_stopped)
return -EINVAL;
blktrace_seq++;
smp_mb();
bt->trace_state = Blktrace_running;
raw_spin_lock_irq(&running_trace_lock);
list_add(&bt->running_list, &running_trace_list);
raw_spin_unlock_irq(&running_trace_lock);
trace_note_time(bt);
return 0;
}
static int blk_trace_stop(struct blk_trace *bt)
{
if (bt->trace_state != Blktrace_running)
return -EINVAL;
bt->trace_state = Blktrace_stopped;
raw_spin_lock_irq(&running_trace_lock);
list_del_init(&bt->running_list);
raw_spin_unlock_irq(&running_trace_lock);
relay_flush(bt->rchan);
return 0;
}
static void blk_trace_cleanup(struct request_queue *q, struct blk_trace *bt)
{
blk_trace_stop(bt);
synchronize_rcu();
blk_trace_free(q, bt);
put_probe_ref();
......@@ -362,7 +394,6 @@ static int __blk_trace_remove(struct request_queue *q)
if (!bt)
return -EINVAL;
if (bt->trace_state != Blktrace_running)
blk_trace_cleanup(q, bt);
return 0;
......@@ -658,7 +689,6 @@ static int compat_blk_trace_setup(struct request_queue *q, char *name,
static int __blk_trace_startstop(struct request_queue *q, int start)
{
int ret;
struct blk_trace *bt;
bt = rcu_dereference_protected(q->blk_trace,
......@@ -666,36 +696,10 @@ static int __blk_trace_startstop(struct request_queue *q, int start)
if (bt == NULL)
return -EINVAL;
/*
* For starting a trace, we can transition from a setup or stopped
* trace. For stopping a trace, the state must be running
*/
ret = -EINVAL;
if (start) {
if (bt->trace_state == Blktrace_setup ||
bt->trace_state == Blktrace_stopped) {
blktrace_seq++;
smp_mb();
bt->trace_state = Blktrace_running;
raw_spin_lock_irq(&running_trace_lock);
list_add(&bt->running_list, &running_trace_list);
raw_spin_unlock_irq(&running_trace_lock);
trace_note_time(bt);
ret = 0;
}
} else {
if (bt->trace_state == Blktrace_running) {
bt->trace_state = Blktrace_stopped;
raw_spin_lock_irq(&running_trace_lock);
list_del_init(&bt->running_list);
raw_spin_unlock_irq(&running_trace_lock);
relay_flush(bt->rchan);
ret = 0;
}
}
return ret;
if (start)
return blk_trace_start(bt);
else
return blk_trace_stop(bt);
}
int blk_trace_startstop(struct request_queue *q, int start)
......@@ -772,10 +776,8 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
void blk_trace_shutdown(struct request_queue *q)
{
if (rcu_dereference_protected(q->blk_trace,
lockdep_is_held(&q->debugfs_mutex))) {
__blk_trace_startstop(q, 0);
lockdep_is_held(&q->debugfs_mutex)))
__blk_trace_remove(q);
}
}
#ifdef CONFIG_BLK_CGROUP
......@@ -1614,13 +1616,7 @@ static int blk_trace_remove_queue(struct request_queue *q)
if (bt == NULL)
return -EINVAL;
if (bt->trace_state == Blktrace_running) {
bt->trace_state = Blktrace_stopped;
raw_spin_lock_irq(&running_trace_lock);
list_del_init(&bt->running_list);
raw_spin_unlock_irq(&running_trace_lock);
relay_flush(bt->rchan);
}
blk_trace_stop(bt);
put_probe_ref();
synchronize_rcu();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment