Commit 1718de78 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-5.2/block-post-20190516' of git://git.kernel.dk/linux-block

Pull more block updates from Jens Axboe:
 "This is mainly some late lightnvm changes that came in just before the
  merge window, as well as fixes that have been queued up since the
  initial pull request was frozen.

  This contains:

   - lightnvm changes, fixing race conditions, improving memory
     utilization, and improving pblk compatability (Chansol, Igor,
     Marcin)

   - NVMe pull request with minor fixes all over the map (via Christoph)

   - remove redundant error print in sata_rcar (Geert)

   - struct_size() cleanup (Jackie)

   - dasd CONFIG_LBADF warning fix (Ming)

   - brd cond_resched() improvement (Mikulas)"

* tag 'for-5.2/block-post-20190516' of git://git.kernel.dk/linux-block: (41 commits)
  block/bio-integrity: use struct_size() in kmalloc()
  nvme: validate cntlid during controller initialisation
  nvme: change locking for the per-subsystem controller list
  nvme: trace all async notice events
  nvme: fix typos in nvme status code values
  nvme-fabrics: remove unused argument
  nvme-multipath: avoid crash on invalid subsystem cntlid enumeration
  nvme-fc: use separate work queue to avoid warning
  nvme-rdma: remove redundant reference between ib_device and tagset
  nvme-pci: mark expected switch fall-through
  nvme-pci: add known admin effects to augument admin effects log page
  nvme-pci: init shadow doorbell after each reset
  brd: add cond_resched to brd_free_pages
  sata_rcar: Remove ata_host_alloc() error printing
  s390/dasd: fix build warning in dasd_eckd_build_cp_raw
  lightnvm: pblk: use nvm_rq_to_ppa_list()
  lightnvm: pblk: simplify partial read path
  lightnvm: do not remove instance under global lock
  lightnvm: track inflight target creations
  lightnvm: pblk: recover only written metadata
  ...
parents 815d469d 7a102d90
...@@ -43,8 +43,7 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, ...@@ -43,8 +43,7 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
unsigned inline_vecs; unsigned inline_vecs;
if (!bs || !mempool_initialized(&bs->bio_integrity_pool)) { if (!bs || !mempool_initialized(&bs->bio_integrity_pool)) {
bip = kmalloc(sizeof(struct bio_integrity_payload) + bip = kmalloc(struct_size(bip, bip_inline_vecs, nr_vecs), gfp_mask);
sizeof(struct bio_vec) * nr_vecs, gfp_mask);
inline_vecs = nr_vecs; inline_vecs = nr_vecs;
} else { } else {
bip = mempool_alloc(&bs->bio_integrity_pool, gfp_mask); bip = mempool_alloc(&bs->bio_integrity_pool, gfp_mask);
......
...@@ -909,7 +909,6 @@ static int sata_rcar_probe(struct platform_device *pdev) ...@@ -909,7 +909,6 @@ static int sata_rcar_probe(struct platform_device *pdev)
host = ata_host_alloc(dev, 1); host = ata_host_alloc(dev, 1);
if (!host) { if (!host) {
dev_err(dev, "ata_host_alloc failed\n");
ret = -ENOMEM; ret = -ENOMEM;
goto err_pm_put; goto err_pm_put;
} }
......
...@@ -152,6 +152,12 @@ static void brd_free_pages(struct brd_device *brd) ...@@ -152,6 +152,12 @@ static void brd_free_pages(struct brd_device *brd)
pos++; pos++;
/*
* It takes 3.4 seconds to remove 80GiB ramdisk.
* So, we need cond_resched to avoid stalling the CPU.
*/
cond_resched();
/* /*
* This assumes radix_tree_gang_lookup always returns as * This assumes radix_tree_gang_lookup always returns as
* many pages as possible. If the radix-tree code changes, * many pages as possible. If the radix-tree code changes,
......
...@@ -45,6 +45,8 @@ struct nvm_dev_map { ...@@ -45,6 +45,8 @@ struct nvm_dev_map {
int num_ch; int num_ch;
}; };
static void nvm_free(struct kref *ref);
static struct nvm_target *nvm_find_target(struct nvm_dev *dev, const char *name) static struct nvm_target *nvm_find_target(struct nvm_dev *dev, const char *name)
{ {
struct nvm_target *tgt; struct nvm_target *tgt;
...@@ -325,6 +327,7 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create) ...@@ -325,6 +327,7 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
struct nvm_target *t; struct nvm_target *t;
struct nvm_tgt_dev *tgt_dev; struct nvm_tgt_dev *tgt_dev;
void *targetdata; void *targetdata;
unsigned int mdts;
int ret; int ret;
switch (create->conf.type) { switch (create->conf.type) {
...@@ -412,8 +415,12 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create) ...@@ -412,8 +415,12 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
tdisk->private_data = targetdata; tdisk->private_data = targetdata;
tqueue->queuedata = targetdata; tqueue->queuedata = targetdata;
blk_queue_max_hw_sectors(tqueue, mdts = (dev->geo.csecs >> 9) * NVM_MAX_VLBA;
(dev->geo.csecs >> 9) * NVM_MAX_VLBA); if (dev->geo.mdts) {
mdts = min_t(u32, dev->geo.mdts,
(dev->geo.csecs >> 9) * NVM_MAX_VLBA);
}
blk_queue_max_hw_sectors(tqueue, mdts);
set_capacity(tdisk, tt->capacity(targetdata)); set_capacity(tdisk, tt->capacity(targetdata));
add_disk(tdisk); add_disk(tdisk);
...@@ -476,7 +483,6 @@ static void __nvm_remove_target(struct nvm_target *t, bool graceful) ...@@ -476,7 +483,6 @@ static void __nvm_remove_target(struct nvm_target *t, bool graceful)
/** /**
* nvm_remove_tgt - Removes a target from the media manager * nvm_remove_tgt - Removes a target from the media manager
* @dev: device
* @remove: ioctl structure with target name to remove. * @remove: ioctl structure with target name to remove.
* *
* Returns: * Returns:
...@@ -484,18 +490,28 @@ static void __nvm_remove_target(struct nvm_target *t, bool graceful) ...@@ -484,18 +490,28 @@ static void __nvm_remove_target(struct nvm_target *t, bool graceful)
* 1: on not found * 1: on not found
* <0: on error * <0: on error
*/ */
static int nvm_remove_tgt(struct nvm_dev *dev, struct nvm_ioctl_remove *remove) static int nvm_remove_tgt(struct nvm_ioctl_remove *remove)
{ {
struct nvm_target *t; struct nvm_target *t;
struct nvm_dev *dev;
mutex_lock(&dev->mlock); down_read(&nvm_lock);
t = nvm_find_target(dev, remove->tgtname); list_for_each_entry(dev, &nvm_devices, devices) {
if (!t) { mutex_lock(&dev->mlock);
t = nvm_find_target(dev, remove->tgtname);
if (t) {
mutex_unlock(&dev->mlock);
break;
}
mutex_unlock(&dev->mlock); mutex_unlock(&dev->mlock);
return 1;
} }
up_read(&nvm_lock);
if (!t)
return 1;
__nvm_remove_target(t, true); __nvm_remove_target(t, true);
mutex_unlock(&dev->mlock); kref_put(&dev->ref, nvm_free);
return 0; return 0;
} }
...@@ -1089,15 +1105,16 @@ static int nvm_core_init(struct nvm_dev *dev) ...@@ -1089,15 +1105,16 @@ static int nvm_core_init(struct nvm_dev *dev)
return ret; return ret;
} }
static void nvm_free(struct nvm_dev *dev) static void nvm_free(struct kref *ref)
{ {
if (!dev) struct nvm_dev *dev = container_of(ref, struct nvm_dev, ref);
return;
if (dev->dma_pool) if (dev->dma_pool)
dev->ops->destroy_dma_pool(dev->dma_pool); dev->ops->destroy_dma_pool(dev->dma_pool);
nvm_unregister_map(dev); if (dev->rmap)
nvm_unregister_map(dev);
kfree(dev->lun_map); kfree(dev->lun_map);
kfree(dev); kfree(dev);
} }
...@@ -1134,7 +1151,13 @@ static int nvm_init(struct nvm_dev *dev) ...@@ -1134,7 +1151,13 @@ static int nvm_init(struct nvm_dev *dev)
struct nvm_dev *nvm_alloc_dev(int node) struct nvm_dev *nvm_alloc_dev(int node)
{ {
return kzalloc_node(sizeof(struct nvm_dev), GFP_KERNEL, node); struct nvm_dev *dev;
dev = kzalloc_node(sizeof(struct nvm_dev), GFP_KERNEL, node);
if (dev)
kref_init(&dev->ref);
return dev;
} }
EXPORT_SYMBOL(nvm_alloc_dev); EXPORT_SYMBOL(nvm_alloc_dev);
...@@ -1142,12 +1165,16 @@ int nvm_register(struct nvm_dev *dev) ...@@ -1142,12 +1165,16 @@ int nvm_register(struct nvm_dev *dev)
{ {
int ret, exp_pool_size; int ret, exp_pool_size;
if (!dev->q || !dev->ops) if (!dev->q || !dev->ops) {
kref_put(&dev->ref, nvm_free);
return -EINVAL; return -EINVAL;
}
ret = nvm_init(dev); ret = nvm_init(dev);
if (ret) if (ret) {
kref_put(&dev->ref, nvm_free);
return ret; return ret;
}
exp_pool_size = max_t(int, PAGE_SIZE, exp_pool_size = max_t(int, PAGE_SIZE,
(NVM_MAX_VLBA * (sizeof(u64) + dev->geo.sos))); (NVM_MAX_VLBA * (sizeof(u64) + dev->geo.sos)));
...@@ -1157,7 +1184,7 @@ int nvm_register(struct nvm_dev *dev) ...@@ -1157,7 +1184,7 @@ int nvm_register(struct nvm_dev *dev)
exp_pool_size); exp_pool_size);
if (!dev->dma_pool) { if (!dev->dma_pool) {
pr_err("nvm: could not create dma pool\n"); pr_err("nvm: could not create dma pool\n");
nvm_free(dev); kref_put(&dev->ref, nvm_free);
return -ENOMEM; return -ENOMEM;
} }
...@@ -1179,6 +1206,7 @@ void nvm_unregister(struct nvm_dev *dev) ...@@ -1179,6 +1206,7 @@ void nvm_unregister(struct nvm_dev *dev)
if (t->dev->parent != dev) if (t->dev->parent != dev)
continue; continue;
__nvm_remove_target(t, false); __nvm_remove_target(t, false);
kref_put(&dev->ref, nvm_free);
} }
mutex_unlock(&dev->mlock); mutex_unlock(&dev->mlock);
...@@ -1186,13 +1214,14 @@ void nvm_unregister(struct nvm_dev *dev) ...@@ -1186,13 +1214,14 @@ void nvm_unregister(struct nvm_dev *dev)
list_del(&dev->devices); list_del(&dev->devices);
up_write(&nvm_lock); up_write(&nvm_lock);
nvm_free(dev); kref_put(&dev->ref, nvm_free);
} }
EXPORT_SYMBOL(nvm_unregister); EXPORT_SYMBOL(nvm_unregister);
static int __nvm_configure_create(struct nvm_ioctl_create *create) static int __nvm_configure_create(struct nvm_ioctl_create *create)
{ {
struct nvm_dev *dev; struct nvm_dev *dev;
int ret;
down_write(&nvm_lock); down_write(&nvm_lock);
dev = nvm_find_nvm_dev(create->dev); dev = nvm_find_nvm_dev(create->dev);
...@@ -1203,7 +1232,12 @@ static int __nvm_configure_create(struct nvm_ioctl_create *create) ...@@ -1203,7 +1232,12 @@ static int __nvm_configure_create(struct nvm_ioctl_create *create)
return -EINVAL; return -EINVAL;
} }
return nvm_create_tgt(dev, create); kref_get(&dev->ref);
ret = nvm_create_tgt(dev, create);
if (ret)
kref_put(&dev->ref, nvm_free);
return ret;
} }
static long nvm_ioctl_info(struct file *file, void __user *arg) static long nvm_ioctl_info(struct file *file, void __user *arg)
...@@ -1322,8 +1356,6 @@ static long nvm_ioctl_dev_create(struct file *file, void __user *arg) ...@@ -1322,8 +1356,6 @@ static long nvm_ioctl_dev_create(struct file *file, void __user *arg)
static long nvm_ioctl_dev_remove(struct file *file, void __user *arg) static long nvm_ioctl_dev_remove(struct file *file, void __user *arg)
{ {
struct nvm_ioctl_remove remove; struct nvm_ioctl_remove remove;
struct nvm_dev *dev;
int ret = 0;
if (copy_from_user(&remove, arg, sizeof(struct nvm_ioctl_remove))) if (copy_from_user(&remove, arg, sizeof(struct nvm_ioctl_remove)))
return -EFAULT; return -EFAULT;
...@@ -1335,13 +1367,7 @@ static long nvm_ioctl_dev_remove(struct file *file, void __user *arg) ...@@ -1335,13 +1367,7 @@ static long nvm_ioctl_dev_remove(struct file *file, void __user *arg)
return -EINVAL; return -EINVAL;
} }
list_for_each_entry(dev, &nvm_devices, devices) { return nvm_remove_tgt(&remove);
ret = nvm_remove_tgt(dev, &remove);
if (!ret)
break;
}
return ret;
} }
/* kept for compatibility reasons */ /* kept for compatibility reasons */
......
...@@ -18,7 +18,8 @@ ...@@ -18,7 +18,8 @@
#include "pblk.h" #include "pblk.h"
int pblk_write_to_cache(struct pblk *pblk, struct bio *bio, unsigned long flags) void pblk_write_to_cache(struct pblk *pblk, struct bio *bio,
unsigned long flags)
{ {
struct request_queue *q = pblk->dev->q; struct request_queue *q = pblk->dev->q;
struct pblk_w_ctx w_ctx; struct pblk_w_ctx w_ctx;
...@@ -43,6 +44,7 @@ int pblk_write_to_cache(struct pblk *pblk, struct bio *bio, unsigned long flags) ...@@ -43,6 +44,7 @@ int pblk_write_to_cache(struct pblk *pblk, struct bio *bio, unsigned long flags)
goto retry; goto retry;
case NVM_IO_ERR: case NVM_IO_ERR:
pblk_pipeline_stop(pblk); pblk_pipeline_stop(pblk);
bio_io_error(bio);
goto out; goto out;
} }
...@@ -79,7 +81,9 @@ int pblk_write_to_cache(struct pblk *pblk, struct bio *bio, unsigned long flags) ...@@ -79,7 +81,9 @@ int pblk_write_to_cache(struct pblk *pblk, struct bio *bio, unsigned long flags)
out: out:
generic_end_io_acct(q, REQ_OP_WRITE, &pblk->disk->part0, start_time); generic_end_io_acct(q, REQ_OP_WRITE, &pblk->disk->part0, start_time);
pblk_write_should_kick(pblk); pblk_write_should_kick(pblk);
return ret;
if (ret == NVM_IO_DONE)
bio_endio(bio);
} }
/* /*
......
...@@ -562,11 +562,9 @@ int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd) ...@@ -562,11 +562,9 @@ int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd)
int pblk_submit_io_sync_sem(struct pblk *pblk, struct nvm_rq *rqd) int pblk_submit_io_sync_sem(struct pblk *pblk, struct nvm_rq *rqd)
{ {
struct ppa_addr *ppa_list; struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
int ret; int ret;
ppa_list = (rqd->nr_ppas > 1) ? rqd->ppa_list : &rqd->ppa_addr;
pblk_down_chunk(pblk, ppa_list[0]); pblk_down_chunk(pblk, ppa_list[0]);
ret = pblk_submit_io_sync(pblk, rqd); ret = pblk_submit_io_sync(pblk, rqd);
pblk_up_chunk(pblk, ppa_list[0]); pblk_up_chunk(pblk, ppa_list[0]);
...@@ -725,6 +723,7 @@ int pblk_line_smeta_read(struct pblk *pblk, struct pblk_line *line) ...@@ -725,6 +723,7 @@ int pblk_line_smeta_read(struct pblk *pblk, struct pblk_line *line)
struct nvm_tgt_dev *dev = pblk->dev; struct nvm_tgt_dev *dev = pblk->dev;
struct pblk_line_meta *lm = &pblk->lm; struct pblk_line_meta *lm = &pblk->lm;
struct bio *bio; struct bio *bio;
struct ppa_addr *ppa_list;
struct nvm_rq rqd; struct nvm_rq rqd;
u64 paddr = pblk_line_smeta_start(pblk, line); u64 paddr = pblk_line_smeta_start(pblk, line);
int i, ret; int i, ret;
...@@ -748,9 +747,10 @@ int pblk_line_smeta_read(struct pblk *pblk, struct pblk_line *line) ...@@ -748,9 +747,10 @@ int pblk_line_smeta_read(struct pblk *pblk, struct pblk_line *line)
rqd.opcode = NVM_OP_PREAD; rqd.opcode = NVM_OP_PREAD;
rqd.nr_ppas = lm->smeta_sec; rqd.nr_ppas = lm->smeta_sec;
rqd.is_seq = 1; rqd.is_seq = 1;
ppa_list = nvm_rq_to_ppa_list(&rqd);
for (i = 0; i < lm->smeta_sec; i++, paddr++) for (i = 0; i < lm->smeta_sec; i++, paddr++)
rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id); ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
ret = pblk_submit_io_sync(pblk, &rqd); ret = pblk_submit_io_sync(pblk, &rqd);
if (ret) { if (ret) {
...@@ -761,8 +761,10 @@ int pblk_line_smeta_read(struct pblk *pblk, struct pblk_line *line) ...@@ -761,8 +761,10 @@ int pblk_line_smeta_read(struct pblk *pblk, struct pblk_line *line)
atomic_dec(&pblk->inflight_io); atomic_dec(&pblk->inflight_io);
if (rqd.error) if (rqd.error && rqd.error != NVM_RSP_WARN_HIGHECC) {
pblk_log_read_err(pblk, &rqd); pblk_log_read_err(pblk, &rqd);
ret = -EIO;
}
clear_rqd: clear_rqd:
pblk_free_rqd_meta(pblk, &rqd); pblk_free_rqd_meta(pblk, &rqd);
...@@ -775,6 +777,7 @@ static int pblk_line_smeta_write(struct pblk *pblk, struct pblk_line *line, ...@@ -775,6 +777,7 @@ static int pblk_line_smeta_write(struct pblk *pblk, struct pblk_line *line,
struct nvm_tgt_dev *dev = pblk->dev; struct nvm_tgt_dev *dev = pblk->dev;
struct pblk_line_meta *lm = &pblk->lm; struct pblk_line_meta *lm = &pblk->lm;
struct bio *bio; struct bio *bio;
struct ppa_addr *ppa_list;
struct nvm_rq rqd; struct nvm_rq rqd;
__le64 *lba_list = emeta_to_lbas(pblk, line->emeta->buf); __le64 *lba_list = emeta_to_lbas(pblk, line->emeta->buf);
__le64 addr_empty = cpu_to_le64(ADDR_EMPTY); __le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
...@@ -799,12 +802,13 @@ static int pblk_line_smeta_write(struct pblk *pblk, struct pblk_line *line, ...@@ -799,12 +802,13 @@ static int pblk_line_smeta_write(struct pblk *pblk, struct pblk_line *line,
rqd.opcode = NVM_OP_PWRITE; rqd.opcode = NVM_OP_PWRITE;
rqd.nr_ppas = lm->smeta_sec; rqd.nr_ppas = lm->smeta_sec;
rqd.is_seq = 1; rqd.is_seq = 1;
ppa_list = nvm_rq_to_ppa_list(&rqd);
for (i = 0; i < lm->smeta_sec; i++, paddr++) { for (i = 0; i < lm->smeta_sec; i++, paddr++) {
struct pblk_sec_meta *meta = pblk_get_meta(pblk, struct pblk_sec_meta *meta = pblk_get_meta(pblk,
rqd.meta_list, i); rqd.meta_list, i);
rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id); ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
meta->lba = lba_list[paddr] = addr_empty; meta->lba = lba_list[paddr] = addr_empty;
} }
...@@ -834,8 +838,9 @@ int pblk_line_emeta_read(struct pblk *pblk, struct pblk_line *line, ...@@ -834,8 +838,9 @@ int pblk_line_emeta_read(struct pblk *pblk, struct pblk_line *line,
struct nvm_geo *geo = &dev->geo; struct nvm_geo *geo = &dev->geo;
struct pblk_line_mgmt *l_mg = &pblk->l_mg; struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line_meta *lm = &pblk->lm; struct pblk_line_meta *lm = &pblk->lm;
void *ppa_list, *meta_list; void *ppa_list_buf, *meta_list;
struct bio *bio; struct bio *bio;
struct ppa_addr *ppa_list;
struct nvm_rq rqd; struct nvm_rq rqd;
u64 paddr = line->emeta_ssec; u64 paddr = line->emeta_ssec;
dma_addr_t dma_ppa_list, dma_meta_list; dma_addr_t dma_ppa_list, dma_meta_list;
...@@ -851,7 +856,7 @@ int pblk_line_emeta_read(struct pblk *pblk, struct pblk_line *line, ...@@ -851,7 +856,7 @@ int pblk_line_emeta_read(struct pblk *pblk, struct pblk_line *line,
if (!meta_list) if (!meta_list)
return -ENOMEM; return -ENOMEM;
ppa_list = meta_list + pblk_dma_meta_size(pblk); ppa_list_buf = meta_list + pblk_dma_meta_size(pblk);
dma_ppa_list = dma_meta_list + pblk_dma_meta_size(pblk); dma_ppa_list = dma_meta_list + pblk_dma_meta_size(pblk);
next_rq: next_rq:
...@@ -872,11 +877,12 @@ int pblk_line_emeta_read(struct pblk *pblk, struct pblk_line *line, ...@@ -872,11 +877,12 @@ int pblk_line_emeta_read(struct pblk *pblk, struct pblk_line *line,
rqd.bio = bio; rqd.bio = bio;
rqd.meta_list = meta_list; rqd.meta_list = meta_list;
rqd.ppa_list = ppa_list; rqd.ppa_list = ppa_list_buf;
rqd.dma_meta_list = dma_meta_list; rqd.dma_meta_list = dma_meta_list;
rqd.dma_ppa_list = dma_ppa_list; rqd.dma_ppa_list = dma_ppa_list;
rqd.opcode = NVM_OP_PREAD; rqd.opcode = NVM_OP_PREAD;
rqd.nr_ppas = rq_ppas; rqd.nr_ppas = rq_ppas;
ppa_list = nvm_rq_to_ppa_list(&rqd);
for (i = 0; i < rqd.nr_ppas; ) { for (i = 0; i < rqd.nr_ppas; ) {
struct ppa_addr ppa = addr_to_gen_ppa(pblk, paddr, line_id); struct ppa_addr ppa = addr_to_gen_ppa(pblk, paddr, line_id);
...@@ -904,7 +910,7 @@ int pblk_line_emeta_read(struct pblk *pblk, struct pblk_line *line, ...@@ -904,7 +910,7 @@ int pblk_line_emeta_read(struct pblk *pblk, struct pblk_line *line,
} }
for (j = 0; j < min; j++, i++, paddr++) for (j = 0; j < min; j++, i++, paddr++)
rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line_id); ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line_id);
} }
ret = pblk_submit_io_sync(pblk, &rqd); ret = pblk_submit_io_sync(pblk, &rqd);
...@@ -916,8 +922,11 @@ int pblk_line_emeta_read(struct pblk *pblk, struct pblk_line *line, ...@@ -916,8 +922,11 @@ int pblk_line_emeta_read(struct pblk *pblk, struct pblk_line *line,
atomic_dec(&pblk->inflight_io); atomic_dec(&pblk->inflight_io);
if (rqd.error) if (rqd.error && rqd.error != NVM_RSP_WARN_HIGHECC) {
pblk_log_read_err(pblk, &rqd); pblk_log_read_err(pblk, &rqd);
ret = -EIO;
goto free_rqd_dma;
}
emeta_buf += rq_len; emeta_buf += rq_len;
left_ppas -= rq_ppas; left_ppas -= rq_ppas;
...@@ -1162,7 +1171,6 @@ static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line, ...@@ -1162,7 +1171,6 @@ static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line,
off = bit * geo->ws_opt; off = bit * geo->ws_opt;
bitmap_set(line->map_bitmap, off, lm->smeta_sec); bitmap_set(line->map_bitmap, off, lm->smeta_sec);
line->sec_in_line -= lm->smeta_sec; line->sec_in_line -= lm->smeta_sec;
line->smeta_ssec = off;
line->cur_sec = off + lm->smeta_sec; line->cur_sec = off + lm->smeta_sec;
if (init && pblk_line_smeta_write(pblk, line, off)) { if (init && pblk_line_smeta_write(pblk, line, off)) {
...@@ -1521,11 +1529,9 @@ void pblk_ppa_to_line_put(struct pblk *pblk, struct ppa_addr ppa) ...@@ -1521,11 +1529,9 @@ void pblk_ppa_to_line_put(struct pblk *pblk, struct ppa_addr ppa)
void pblk_rq_to_line_put(struct pblk *pblk, struct nvm_rq *rqd) void pblk_rq_to_line_put(struct pblk *pblk, struct nvm_rq *rqd)
{ {
struct ppa_addr *ppa_list; struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
int i; int i;
ppa_list = (rqd->nr_ppas > 1) ? rqd->ppa_list : &rqd->ppa_addr;
for (i = 0; i < rqd->nr_ppas; i++) for (i = 0; i < rqd->nr_ppas; i++)
pblk_ppa_to_line_put(pblk, ppa_list[i]); pblk_ppa_to_line_put(pblk, ppa_list[i]);
} }
...@@ -1699,6 +1705,14 @@ static void __pblk_line_put(struct pblk *pblk, struct pblk_line *line) ...@@ -1699,6 +1705,14 @@ static void __pblk_line_put(struct pblk *pblk, struct pblk_line *line)
spin_lock(&line->lock); spin_lock(&line->lock);
WARN_ON(line->state != PBLK_LINESTATE_GC); WARN_ON(line->state != PBLK_LINESTATE_GC);
if (line->w_err_gc->has_gc_err) {
spin_unlock(&line->lock);
pblk_err(pblk, "line %d had errors during GC\n", line->id);
pblk_put_line_back(pblk, line);
line->w_err_gc->has_gc_err = 0;
return;
}
line->state = PBLK_LINESTATE_FREE; line->state = PBLK_LINESTATE_FREE;
trace_pblk_line_state(pblk_disk_name(pblk), line->id, trace_pblk_line_state(pblk_disk_name(pblk), line->id,
line->state); line->state);
...@@ -2023,7 +2037,7 @@ void pblk_update_map(struct pblk *pblk, sector_t lba, struct ppa_addr ppa) ...@@ -2023,7 +2037,7 @@ void pblk_update_map(struct pblk *pblk, sector_t lba, struct ppa_addr ppa)
struct ppa_addr ppa_l2p; struct ppa_addr ppa_l2p;
/* logic error: lba out-of-bounds. Ignore update */ /* logic error: lba out-of-bounds. Ignore update */
if (!(lba < pblk->rl.nr_secs)) { if (!(lba < pblk->capacity)) {
WARN(1, "pblk: corrupted L2P map request\n"); WARN(1, "pblk: corrupted L2P map request\n");
return; return;
} }
...@@ -2063,7 +2077,7 @@ int pblk_update_map_gc(struct pblk *pblk, sector_t lba, struct ppa_addr ppa_new, ...@@ -2063,7 +2077,7 @@ int pblk_update_map_gc(struct pblk *pblk, sector_t lba, struct ppa_addr ppa_new,
#endif #endif
/* logic error: lba out-of-bounds. Ignore update */ /* logic error: lba out-of-bounds. Ignore update */
if (!(lba < pblk->rl.nr_secs)) { if (!(lba < pblk->capacity)) {
WARN(1, "pblk: corrupted L2P map request\n"); WARN(1, "pblk: corrupted L2P map request\n");
return 0; return 0;
} }
...@@ -2109,7 +2123,7 @@ void pblk_update_map_dev(struct pblk *pblk, sector_t lba, ...@@ -2109,7 +2123,7 @@ void pblk_update_map_dev(struct pblk *pblk, sector_t lba,
} }
/* logic error: lba out-of-bounds. Ignore update */ /* logic error: lba out-of-bounds. Ignore update */
if (!(lba < pblk->rl.nr_secs)) { if (!(lba < pblk->capacity)) {
WARN(1, "pblk: corrupted L2P map request\n"); WARN(1, "pblk: corrupted L2P map request\n");
return; return;
} }
...@@ -2135,8 +2149,8 @@ void pblk_update_map_dev(struct pblk *pblk, sector_t lba, ...@@ -2135,8 +2149,8 @@ void pblk_update_map_dev(struct pblk *pblk, sector_t lba,
spin_unlock(&pblk->trans_lock); spin_unlock(&pblk->trans_lock);
} }
void pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas, int pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas,
sector_t blba, int nr_secs) sector_t blba, int nr_secs, bool *from_cache)
{ {
int i; int i;
...@@ -2150,10 +2164,19 @@ void pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas, ...@@ -2150,10 +2164,19 @@ void pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas,
if (!pblk_ppa_empty(ppa) && !pblk_addr_in_cache(ppa)) { if (!pblk_ppa_empty(ppa) && !pblk_addr_in_cache(ppa)) {
struct pblk_line *line = pblk_ppa_to_line(pblk, ppa); struct pblk_line *line = pblk_ppa_to_line(pblk, ppa);
if (i > 0 && *from_cache)
break;
*from_cache = false;
kref_get(&line->ref); kref_get(&line->ref);
} else {
if (i > 0 && !*from_cache)
break;
*from_cache = true;
} }
} }
spin_unlock(&pblk->trans_lock); spin_unlock(&pblk->trans_lock);
return i;
} }
void pblk_lookup_l2p_rand(struct pblk *pblk, struct ppa_addr *ppas, void pblk_lookup_l2p_rand(struct pblk *pblk, struct ppa_addr *ppas,
...@@ -2167,7 +2190,7 @@ void pblk_lookup_l2p_rand(struct pblk *pblk, struct ppa_addr *ppas, ...@@ -2167,7 +2190,7 @@ void pblk_lookup_l2p_rand(struct pblk *pblk, struct ppa_addr *ppas,
lba = lba_list[i]; lba = lba_list[i];
if (lba != ADDR_EMPTY) { if (lba != ADDR_EMPTY) {
/* logic error: lba out-of-bounds. Ignore update */ /* logic error: lba out-of-bounds. Ignore update */
if (!(lba < pblk->rl.nr_secs)) { if (!(lba < pblk->capacity)) {
WARN(1, "pblk: corrupted L2P map request\n"); WARN(1, "pblk: corrupted L2P map request\n");
continue; continue;
} }
......
...@@ -59,24 +59,28 @@ static void pblk_gc_writer_kick(struct pblk_gc *gc) ...@@ -59,24 +59,28 @@ static void pblk_gc_writer_kick(struct pblk_gc *gc)
wake_up_process(gc->gc_writer_ts); wake_up_process(gc->gc_writer_ts);
} }
static void pblk_put_line_back(struct pblk *pblk, struct pblk_line *line) void pblk_put_line_back(struct pblk *pblk, struct pblk_line *line)
{ {
struct pblk_line_mgmt *l_mg = &pblk->l_mg; struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct list_head *move_list; struct list_head *move_list;
spin_lock(&l_mg->gc_lock);
spin_lock(&line->lock); spin_lock(&line->lock);
WARN_ON(line->state != PBLK_LINESTATE_GC); WARN_ON(line->state != PBLK_LINESTATE_GC);
line->state = PBLK_LINESTATE_CLOSED; line->state = PBLK_LINESTATE_CLOSED;
trace_pblk_line_state(pblk_disk_name(pblk), line->id, trace_pblk_line_state(pblk_disk_name(pblk), line->id,
line->state); line->state);
/* We need to reset gc_group in order to ensure that
* pblk_line_gc_list will return proper move_list
* since right now current line is not on any of the
* gc lists.
*/
line->gc_group = PBLK_LINEGC_NONE;
move_list = pblk_line_gc_list(pblk, line); move_list = pblk_line_gc_list(pblk, line);
spin_unlock(&line->lock); spin_unlock(&line->lock);
list_add_tail(&line->list, move_list);
if (move_list) { spin_unlock(&l_mg->gc_lock);
spin_lock(&l_mg->gc_lock);
list_add_tail(&line->list, move_list);
spin_unlock(&l_mg->gc_lock);
}
} }
static void pblk_gc_line_ws(struct work_struct *work) static void pblk_gc_line_ws(struct work_struct *work)
...@@ -84,8 +88,6 @@ static void pblk_gc_line_ws(struct work_struct *work) ...@@ -84,8 +88,6 @@ static void pblk_gc_line_ws(struct work_struct *work)
struct pblk_line_ws *gc_rq_ws = container_of(work, struct pblk_line_ws *gc_rq_ws = container_of(work,
struct pblk_line_ws, ws); struct pblk_line_ws, ws);
struct pblk *pblk = gc_rq_ws->pblk; struct pblk *pblk = gc_rq_ws->pblk;
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
struct pblk_gc *gc = &pblk->gc; struct pblk_gc *gc = &pblk->gc;
struct pblk_line *line = gc_rq_ws->line; struct pblk_line *line = gc_rq_ws->line;
struct pblk_gc_rq *gc_rq = gc_rq_ws->priv; struct pblk_gc_rq *gc_rq = gc_rq_ws->priv;
...@@ -93,18 +95,10 @@ static void pblk_gc_line_ws(struct work_struct *work) ...@@ -93,18 +95,10 @@ static void pblk_gc_line_ws(struct work_struct *work)
up(&gc->gc_sem); up(&gc->gc_sem);
gc_rq->data = vmalloc(array_size(gc_rq->nr_secs, geo->csecs));
if (!gc_rq->data) {
pblk_err(pblk, "could not GC line:%d (%d/%d)\n",
line->id, *line->vsc, gc_rq->nr_secs);
goto out;
}
/* Read from GC victim block */ /* Read from GC victim block */
ret = pblk_submit_read_gc(pblk, gc_rq); ret = pblk_submit_read_gc(pblk, gc_rq);
if (ret) { if (ret) {
pblk_err(pblk, "failed GC read in line:%d (err:%d)\n", line->w_err_gc->has_gc_err = 1;
line->id, ret);
goto out; goto out;
} }
...@@ -189,6 +183,8 @@ static void pblk_gc_line_prepare_ws(struct work_struct *work) ...@@ -189,6 +183,8 @@ static void pblk_gc_line_prepare_ws(struct work_struct *work)
struct pblk_line *line = line_ws->line; struct pblk_line *line = line_ws->line;
struct pblk_line_mgmt *l_mg = &pblk->l_mg; struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line_meta *lm = &pblk->lm; struct pblk_line_meta *lm = &pblk->lm;
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
struct pblk_gc *gc = &pblk->gc; struct pblk_gc *gc = &pblk->gc;
struct pblk_line_ws *gc_rq_ws; struct pblk_line_ws *gc_rq_ws;
struct pblk_gc_rq *gc_rq; struct pblk_gc_rq *gc_rq;
...@@ -247,9 +243,13 @@ static void pblk_gc_line_prepare_ws(struct work_struct *work) ...@@ -247,9 +243,13 @@ static void pblk_gc_line_prepare_ws(struct work_struct *work)
gc_rq->nr_secs = nr_secs; gc_rq->nr_secs = nr_secs;
gc_rq->line = line; gc_rq->line = line;
gc_rq->data = vmalloc(array_size(gc_rq->nr_secs, geo->csecs));
if (!gc_rq->data)
goto fail_free_gc_rq;
gc_rq_ws = kmalloc(sizeof(struct pblk_line_ws), GFP_KERNEL); gc_rq_ws = kmalloc(sizeof(struct pblk_line_ws), GFP_KERNEL);
if (!gc_rq_ws) if (!gc_rq_ws)
goto fail_free_gc_rq; goto fail_free_gc_data;
gc_rq_ws->pblk = pblk; gc_rq_ws->pblk = pblk;
gc_rq_ws->line = line; gc_rq_ws->line = line;
...@@ -281,6 +281,8 @@ static void pblk_gc_line_prepare_ws(struct work_struct *work) ...@@ -281,6 +281,8 @@ static void pblk_gc_line_prepare_ws(struct work_struct *work)
return; return;
fail_free_gc_data:
vfree(gc_rq->data);
fail_free_gc_rq: fail_free_gc_rq:
kfree(gc_rq); kfree(gc_rq);
fail_free_lba_list: fail_free_lba_list:
...@@ -290,8 +292,11 @@ static void pblk_gc_line_prepare_ws(struct work_struct *work) ...@@ -290,8 +292,11 @@ static void pblk_gc_line_prepare_ws(struct work_struct *work)
fail_free_ws: fail_free_ws:
kfree(line_ws); kfree(line_ws);
/* Line goes back to closed state, so we cannot release additional
* reference for line, since we do that only when we want to do
* gc to free line state transition.
*/
pblk_put_line_back(pblk, line); pblk_put_line_back(pblk, line);
kref_put(&line->ref, pblk_line_put);
atomic_dec(&gc->read_inflight_gc); atomic_dec(&gc->read_inflight_gc);
pblk_err(pblk, "failed to GC line %d\n", line->id); pblk_err(pblk, "failed to GC line %d\n", line->id);
...@@ -355,8 +360,13 @@ static int pblk_gc_read(struct pblk *pblk) ...@@ -355,8 +360,13 @@ static int pblk_gc_read(struct pblk *pblk)
pblk_gc_kick(pblk); pblk_gc_kick(pblk);
if (pblk_gc_line(pblk, line)) if (pblk_gc_line(pblk, line)) {
pblk_err(pblk, "failed to GC line %d\n", line->id); pblk_err(pblk, "failed to GC line %d\n", line->id);
/* rollback */
spin_lock(&gc->r_lock);
list_add_tail(&line->list, &gc->r_list);
spin_unlock(&gc->r_lock);
}
return 0; return 0;
} }
......
...@@ -47,33 +47,6 @@ static struct pblk_global_caches pblk_caches = { ...@@ -47,33 +47,6 @@ static struct pblk_global_caches pblk_caches = {
struct bio_set pblk_bio_set; struct bio_set pblk_bio_set;
static int pblk_rw_io(struct request_queue *q, struct pblk *pblk,
struct bio *bio)
{
int ret;
/* Read requests must be <= 256kb due to NVMe's 64 bit completion bitmap
* constraint. Writes can be of arbitrary size.
*/
if (bio_data_dir(bio) == READ) {
blk_queue_split(q, &bio);
ret = pblk_submit_read(pblk, bio);
if (ret == NVM_IO_DONE && bio_flagged(bio, BIO_CLONED))
bio_put(bio);
return ret;
}
/* Prevent deadlock in the case of a modest LUN configuration and large
* user I/Os. Unless stalled, the rate limiter leaves at least 256KB
* available for user I/O.
*/
if (pblk_get_secs(bio) > pblk_rl_max_io(&pblk->rl))
blk_queue_split(q, &bio);
return pblk_write_to_cache(pblk, bio, PBLK_IOTYPE_USER);
}
static blk_qc_t pblk_make_rq(struct request_queue *q, struct bio *bio) static blk_qc_t pblk_make_rq(struct request_queue *q, struct bio *bio)
{ {
struct pblk *pblk = q->queuedata; struct pblk *pblk = q->queuedata;
...@@ -86,13 +59,21 @@ static blk_qc_t pblk_make_rq(struct request_queue *q, struct bio *bio) ...@@ -86,13 +59,21 @@ static blk_qc_t pblk_make_rq(struct request_queue *q, struct bio *bio)
} }
} }
switch (pblk_rw_io(q, pblk, bio)) { /* Read requests must be <= 256kb due to NVMe's 64 bit completion bitmap
case NVM_IO_ERR: * constraint. Writes can be of arbitrary size.
bio_io_error(bio); */
break; if (bio_data_dir(bio) == READ) {
case NVM_IO_DONE: blk_queue_split(q, &bio);
bio_endio(bio); pblk_submit_read(pblk, bio);
break; } else {
/* Prevent deadlock in the case of a modest LUN configuration
* and large user I/Os. Unless stalled, the rate limiter
* leaves at least 256KB available for user I/O.
*/
if (pblk_get_secs(bio) > pblk_rl_max_io(&pblk->rl))
blk_queue_split(q, &bio);
pblk_write_to_cache(pblk, bio, PBLK_IOTYPE_USER);
} }
return BLK_QC_T_NONE; return BLK_QC_T_NONE;
...@@ -105,7 +86,7 @@ static size_t pblk_trans_map_size(struct pblk *pblk) ...@@ -105,7 +86,7 @@ static size_t pblk_trans_map_size(struct pblk *pblk)
if (pblk->addrf_len < 32) if (pblk->addrf_len < 32)
entry_size = 4; entry_size = 4;
return entry_size * pblk->rl.nr_secs; return entry_size * pblk->capacity;
} }
#ifdef CONFIG_NVM_PBLK_DEBUG #ifdef CONFIG_NVM_PBLK_DEBUG
...@@ -164,13 +145,18 @@ static int pblk_l2p_init(struct pblk *pblk, bool factory_init) ...@@ -164,13 +145,18 @@ static int pblk_l2p_init(struct pblk *pblk, bool factory_init)
int ret = 0; int ret = 0;
map_size = pblk_trans_map_size(pblk); map_size = pblk_trans_map_size(pblk);
pblk->trans_map = vmalloc(map_size); pblk->trans_map = __vmalloc(map_size, GFP_KERNEL | __GFP_NOWARN
if (!pblk->trans_map) | __GFP_RETRY_MAYFAIL | __GFP_HIGHMEM,
PAGE_KERNEL);
if (!pblk->trans_map) {
pblk_err(pblk, "failed to allocate L2P (need %zu of memory)\n",
map_size);
return -ENOMEM; return -ENOMEM;
}
pblk_ppa_set_empty(&ppa); pblk_ppa_set_empty(&ppa);
for (i = 0; i < pblk->rl.nr_secs; i++) for (i = 0; i < pblk->capacity; i++)
pblk_trans_map_set(pblk, i, ppa); pblk_trans_map_set(pblk, i, ppa);
ret = pblk_l2p_recover(pblk, factory_init); ret = pblk_l2p_recover(pblk, factory_init);
...@@ -701,7 +687,6 @@ static int pblk_set_provision(struct pblk *pblk, int nr_free_chks) ...@@ -701,7 +687,6 @@ static int pblk_set_provision(struct pblk *pblk, int nr_free_chks)
* on user capacity consider only provisioned blocks * on user capacity consider only provisioned blocks
*/ */
pblk->rl.total_blocks = nr_free_chks; pblk->rl.total_blocks = nr_free_chks;
pblk->rl.nr_secs = nr_free_chks * geo->clba;
/* Consider sectors used for metadata */ /* Consider sectors used for metadata */
sec_meta = (lm->smeta_sec + lm->emeta_sec[0]) * l_mg->nr_free_lines; sec_meta = (lm->smeta_sec + lm->emeta_sec[0]) * l_mg->nr_free_lines;
...@@ -1284,7 +1269,7 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk, ...@@ -1284,7 +1269,7 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
pblk_info(pblk, "luns:%u, lines:%d, secs:%llu, buf entries:%u\n", pblk_info(pblk, "luns:%u, lines:%d, secs:%llu, buf entries:%u\n",
geo->all_luns, pblk->l_mg.nr_lines, geo->all_luns, pblk->l_mg.nr_lines,
(unsigned long long)pblk->rl.nr_secs, (unsigned long long)pblk->capacity,
pblk->rwb.nr_entries); pblk->rwb.nr_entries);
wake_up_process(pblk->writer_ts); wake_up_process(pblk->writer_ts);
......
...@@ -162,6 +162,7 @@ int pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd, ...@@ -162,6 +162,7 @@ int pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
*erase_ppa = ppa_list[i]; *erase_ppa = ppa_list[i];
erase_ppa->a.blk = e_line->id; erase_ppa->a.blk = e_line->id;
erase_ppa->a.reserved = 0;
spin_unlock(&e_line->lock); spin_unlock(&e_line->lock);
......
...@@ -642,7 +642,7 @@ unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd, ...@@ -642,7 +642,7 @@ unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd,
* be directed to disk. * be directed to disk.
*/ */
int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba, int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
struct ppa_addr ppa, int bio_iter, bool advanced_bio) struct ppa_addr ppa)
{ {
struct pblk *pblk = container_of(rb, struct pblk, rwb); struct pblk *pblk = container_of(rb, struct pblk, rwb);
struct pblk_rb_entry *entry; struct pblk_rb_entry *entry;
...@@ -673,15 +673,6 @@ int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba, ...@@ -673,15 +673,6 @@ int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
ret = 0; ret = 0;
goto out; goto out;
} }
/* Only advance the bio if it hasn't been advanced already. If advanced,
* this bio is at least a partial bio (i.e., it has partially been
* filled with data from the cache). If part of the data resides on the
* media, we will read later on
*/
if (unlikely(!advanced_bio))
bio_advance(bio, bio_iter * PBLK_EXPOSED_PAGE_SIZE);
data = bio_data(bio); data = bio_data(bio);
memcpy(data, entry->data, rb->seg_size); memcpy(data, entry->data, rb->seg_size);
...@@ -799,8 +790,8 @@ int pblk_rb_tear_down_check(struct pblk_rb *rb) ...@@ -799,8 +790,8 @@ int pblk_rb_tear_down_check(struct pblk_rb *rb)
} }
out: out:
spin_unlock(&rb->w_lock);
spin_unlock_irq(&rb->s_lock); spin_unlock_irq(&rb->s_lock);
spin_unlock(&rb->w_lock);
return ret; return ret;
} }
......
...@@ -26,8 +26,7 @@ ...@@ -26,8 +26,7 @@
* issued. * issued.
*/ */
static int pblk_read_from_cache(struct pblk *pblk, struct bio *bio, static int pblk_read_from_cache(struct pblk *pblk, struct bio *bio,
sector_t lba, struct ppa_addr ppa, sector_t lba, struct ppa_addr ppa)
int bio_iter, bool advanced_bio)
{ {
#ifdef CONFIG_NVM_PBLK_DEBUG #ifdef CONFIG_NVM_PBLK_DEBUG
/* Callers must ensure that the ppa points to a cache address */ /* Callers must ensure that the ppa points to a cache address */
...@@ -35,73 +34,75 @@ static int pblk_read_from_cache(struct pblk *pblk, struct bio *bio, ...@@ -35,73 +34,75 @@ static int pblk_read_from_cache(struct pblk *pblk, struct bio *bio,
BUG_ON(!pblk_addr_in_cache(ppa)); BUG_ON(!pblk_addr_in_cache(ppa));
#endif #endif
return pblk_rb_copy_to_bio(&pblk->rwb, bio, lba, ppa, return pblk_rb_copy_to_bio(&pblk->rwb, bio, lba, ppa);
bio_iter, advanced_bio);
} }
static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd, static int pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd,
struct bio *bio, sector_t blba, struct bio *bio, sector_t blba,
unsigned long *read_bitmap) bool *from_cache)
{ {
void *meta_list = rqd->meta_list; void *meta_list = rqd->meta_list;
struct ppa_addr ppas[NVM_MAX_VLBA]; int nr_secs, i;
int nr_secs = rqd->nr_ppas;
bool advanced_bio = false;
int i, j = 0;
pblk_lookup_l2p_seq(pblk, ppas, blba, nr_secs); retry:
nr_secs = pblk_lookup_l2p_seq(pblk, rqd->ppa_list, blba, rqd->nr_ppas,
from_cache);
if (!*from_cache)
goto end;
for (i = 0; i < nr_secs; i++) { for (i = 0; i < nr_secs; i++) {
struct ppa_addr p = ppas[i];
struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i); struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i);
sector_t lba = blba + i; sector_t lba = blba + i;
retry: if (pblk_ppa_empty(rqd->ppa_list[i])) {
if (pblk_ppa_empty(p)) {
__le64 addr_empty = cpu_to_le64(ADDR_EMPTY); __le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
WARN_ON(test_and_set_bit(i, read_bitmap));
meta->lba = addr_empty; meta->lba = addr_empty;
} else if (pblk_addr_in_cache(rqd->ppa_list[i])) {
if (unlikely(!advanced_bio)) { /*
bio_advance(bio, (i) * PBLK_EXPOSED_PAGE_SIZE); * Try to read from write buffer. The address is later
advanced_bio = true; * checked on the write buffer to prevent retrieving
* overwritten data.
*/
if (!pblk_read_from_cache(pblk, bio, lba,
rqd->ppa_list[i])) {
if (i == 0) {
/*
* We didn't call with bio_advance()
* yet, so we can just retry.
*/
goto retry;
} else {
/*
* We already call bio_advance()
* so we cannot retry and we need
* to quit that function in order
* to allow caller to handle the bio
* splitting in the current sector
* position.
*/
nr_secs = i;
goto end;
}
} }
goto next;
}
/* Try to read from write buffer. The address is later checked
* on the write buffer to prevent retrieving overwritten data.
*/
if (pblk_addr_in_cache(p)) {
if (!pblk_read_from_cache(pblk, bio, lba, p, i,
advanced_bio)) {
pblk_lookup_l2p_seq(pblk, &p, lba, 1);
goto retry;
}
WARN_ON(test_and_set_bit(i, read_bitmap));
meta->lba = cpu_to_le64(lba); meta->lba = cpu_to_le64(lba);
advanced_bio = true;
#ifdef CONFIG_NVM_PBLK_DEBUG #ifdef CONFIG_NVM_PBLK_DEBUG
atomic_long_inc(&pblk->cache_reads); atomic_long_inc(&pblk->cache_reads);
#endif #endif
} else {
/* Read from media non-cached sectors */
rqd->ppa_list[j++] = p;
} }
bio_advance(bio, PBLK_EXPOSED_PAGE_SIZE);
next:
if (advanced_bio)
bio_advance(bio, PBLK_EXPOSED_PAGE_SIZE);
} }
end:
if (pblk_io_aligned(pblk, nr_secs)) if (pblk_io_aligned(pblk, nr_secs))
rqd->is_seq = 1; rqd->is_seq = 1;
#ifdef CONFIG_NVM_PBLK_DEBUG #ifdef CONFIG_NVM_PBLK_DEBUG
atomic_long_add(nr_secs, &pblk->inflight_reads); atomic_long_add(nr_secs, &pblk->inflight_reads);
#endif #endif
return nr_secs;
} }
...@@ -175,12 +176,12 @@ static void pblk_read_check_rand(struct pblk *pblk, struct nvm_rq *rqd, ...@@ -175,12 +176,12 @@ static void pblk_read_check_rand(struct pblk *pblk, struct nvm_rq *rqd,
WARN_ONCE(j != rqd->nr_ppas, "pblk: corrupted random request\n"); WARN_ONCE(j != rqd->nr_ppas, "pblk: corrupted random request\n");
} }
static void pblk_end_user_read(struct bio *bio) static void pblk_end_user_read(struct bio *bio, int error)
{ {
#ifdef CONFIG_NVM_PBLK_DEBUG if (error && error != NVM_RSP_WARN_HIGHECC)
WARN_ONCE(bio->bi_status, "pblk: corrupted read bio\n"); bio_io_error(bio);
#endif else
bio_endio(bio); bio_endio(bio);
} }
static void __pblk_end_io_read(struct pblk *pblk, struct nvm_rq *rqd, static void __pblk_end_io_read(struct pblk *pblk, struct nvm_rq *rqd,
...@@ -197,9 +198,7 @@ static void __pblk_end_io_read(struct pblk *pblk, struct nvm_rq *rqd, ...@@ -197,9 +198,7 @@ static void __pblk_end_io_read(struct pblk *pblk, struct nvm_rq *rqd,
pblk_log_read_err(pblk, rqd); pblk_log_read_err(pblk, rqd);
pblk_read_check_seq(pblk, rqd, r_ctx->lba); pblk_read_check_seq(pblk, rqd, r_ctx->lba);
bio_put(int_bio);
if (int_bio)
bio_put(int_bio);
if (put_line) if (put_line)
pblk_rq_to_line_put(pblk, rqd); pblk_rq_to_line_put(pblk, rqd);
...@@ -219,188 +218,17 @@ static void pblk_end_io_read(struct nvm_rq *rqd) ...@@ -219,188 +218,17 @@ static void pblk_end_io_read(struct nvm_rq *rqd)
struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd); struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
struct bio *bio = (struct bio *)r_ctx->private; struct bio *bio = (struct bio *)r_ctx->private;
pblk_end_user_read(bio); pblk_end_user_read(bio, rqd->error);
__pblk_end_io_read(pblk, rqd, true); __pblk_end_io_read(pblk, rqd, true);
} }
static void pblk_end_partial_read(struct nvm_rq *rqd)
{
struct pblk *pblk = rqd->private;
struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
struct pblk_pr_ctx *pr_ctx = r_ctx->private;
struct pblk_sec_meta *meta;
struct bio *new_bio = rqd->bio;
struct bio *bio = pr_ctx->orig_bio;
void *meta_list = rqd->meta_list;
unsigned long *read_bitmap = pr_ctx->bitmap;
struct bvec_iter orig_iter = BVEC_ITER_ALL_INIT;
struct bvec_iter new_iter = BVEC_ITER_ALL_INIT;
int nr_secs = pr_ctx->orig_nr_secs;
int nr_holes = nr_secs - bitmap_weight(read_bitmap, nr_secs);
void *src_p, *dst_p;
int bit, i;
if (unlikely(nr_holes == 1)) {
struct ppa_addr ppa;
ppa = rqd->ppa_addr;
rqd->ppa_list = pr_ctx->ppa_ptr;
rqd->dma_ppa_list = pr_ctx->dma_ppa_list;
rqd->ppa_list[0] = ppa;
}
for (i = 0; i < nr_secs; i++) {
meta = pblk_get_meta(pblk, meta_list, i);
pr_ctx->lba_list_media[i] = le64_to_cpu(meta->lba);
meta->lba = cpu_to_le64(pr_ctx->lba_list_mem[i]);
}
/* Fill the holes in the original bio */
i = 0;
for (bit = 0; bit < nr_secs; bit++) {
if (!test_bit(bit, read_bitmap)) {
struct bio_vec dst_bv, src_bv;
struct pblk_line *line;
line = pblk_ppa_to_line(pblk, rqd->ppa_list[i]);
kref_put(&line->ref, pblk_line_put);
meta = pblk_get_meta(pblk, meta_list, bit);
meta->lba = cpu_to_le64(pr_ctx->lba_list_media[i]);
dst_bv = bio_iter_iovec(bio, orig_iter);
src_bv = bio_iter_iovec(new_bio, new_iter);
src_p = kmap_atomic(src_bv.bv_page);
dst_p = kmap_atomic(dst_bv.bv_page);
memcpy(dst_p + dst_bv.bv_offset,
src_p + src_bv.bv_offset,
PBLK_EXPOSED_PAGE_SIZE);
kunmap_atomic(src_p);
kunmap_atomic(dst_p);
flush_dcache_page(dst_bv.bv_page);
mempool_free(src_bv.bv_page, &pblk->page_bio_pool);
bio_advance_iter(new_bio, &new_iter,
PBLK_EXPOSED_PAGE_SIZE);
i++;
}
bio_advance_iter(bio, &orig_iter, PBLK_EXPOSED_PAGE_SIZE);
}
bio_put(new_bio);
kfree(pr_ctx);
/* restore original request */
rqd->bio = NULL;
rqd->nr_ppas = nr_secs;
bio_endio(bio);
__pblk_end_io_read(pblk, rqd, false);
}
static int pblk_setup_partial_read(struct pblk *pblk, struct nvm_rq *rqd,
unsigned int bio_init_idx,
unsigned long *read_bitmap,
int nr_holes)
{
void *meta_list = rqd->meta_list;
struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
struct pblk_pr_ctx *pr_ctx;
struct bio *new_bio, *bio = r_ctx->private;
int nr_secs = rqd->nr_ppas;
int i;
new_bio = bio_alloc(GFP_KERNEL, nr_holes);
if (pblk_bio_add_pages(pblk, new_bio, GFP_KERNEL, nr_holes))
goto fail_bio_put;
if (nr_holes != new_bio->bi_vcnt) {
WARN_ONCE(1, "pblk: malformed bio\n");
goto fail_free_pages;
}
pr_ctx = kzalloc(sizeof(struct pblk_pr_ctx), GFP_KERNEL);
if (!pr_ctx)
goto fail_free_pages;
for (i = 0; i < nr_secs; i++) {
struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i);
pr_ctx->lba_list_mem[i] = le64_to_cpu(meta->lba);
}
new_bio->bi_iter.bi_sector = 0; /* internal bio */
bio_set_op_attrs(new_bio, REQ_OP_READ, 0);
rqd->bio = new_bio;
rqd->nr_ppas = nr_holes;
pr_ctx->orig_bio = bio;
bitmap_copy(pr_ctx->bitmap, read_bitmap, NVM_MAX_VLBA);
pr_ctx->bio_init_idx = bio_init_idx;
pr_ctx->orig_nr_secs = nr_secs;
r_ctx->private = pr_ctx;
if (unlikely(nr_holes == 1)) {
pr_ctx->ppa_ptr = rqd->ppa_list;
pr_ctx->dma_ppa_list = rqd->dma_ppa_list;
rqd->ppa_addr = rqd->ppa_list[0];
}
return 0;
fail_free_pages:
pblk_bio_free_pages(pblk, new_bio, 0, new_bio->bi_vcnt);
fail_bio_put:
bio_put(new_bio);
return -ENOMEM;
}
static int pblk_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd,
unsigned int bio_init_idx,
unsigned long *read_bitmap, int nr_secs)
{
int nr_holes;
int ret;
nr_holes = nr_secs - bitmap_weight(read_bitmap, nr_secs);
if (pblk_setup_partial_read(pblk, rqd, bio_init_idx, read_bitmap,
nr_holes))
return NVM_IO_ERR;
rqd->end_io = pblk_end_partial_read;
ret = pblk_submit_io(pblk, rqd);
if (ret) {
bio_put(rqd->bio);
pblk_err(pblk, "partial read IO submission failed\n");
goto err;
}
return NVM_IO_OK;
err:
pblk_err(pblk, "failed to perform partial read\n");
/* Free allocated pages in new bio */
pblk_bio_free_pages(pblk, rqd->bio, 0, rqd->bio->bi_vcnt);
__pblk_end_io_read(pblk, rqd, false);
return NVM_IO_ERR;
}
static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd, struct bio *bio, static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd, struct bio *bio,
sector_t lba, unsigned long *read_bitmap) sector_t lba, bool *from_cache)
{ {
struct pblk_sec_meta *meta = pblk_get_meta(pblk, rqd->meta_list, 0); struct pblk_sec_meta *meta = pblk_get_meta(pblk, rqd->meta_list, 0);
struct ppa_addr ppa; struct ppa_addr ppa;
pblk_lookup_l2p_seq(pblk, &ppa, lba, 1); pblk_lookup_l2p_seq(pblk, &ppa, lba, 1, from_cache);
#ifdef CONFIG_NVM_PBLK_DEBUG #ifdef CONFIG_NVM_PBLK_DEBUG
atomic_long_inc(&pblk->inflight_reads); atomic_long_inc(&pblk->inflight_reads);
...@@ -410,7 +238,6 @@ static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd, struct bio *bio, ...@@ -410,7 +238,6 @@ static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd, struct bio *bio,
if (pblk_ppa_empty(ppa)) { if (pblk_ppa_empty(ppa)) {
__le64 addr_empty = cpu_to_le64(ADDR_EMPTY); __le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
WARN_ON(test_and_set_bit(0, read_bitmap));
meta->lba = addr_empty; meta->lba = addr_empty;
return; return;
} }
...@@ -419,12 +246,11 @@ static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd, struct bio *bio, ...@@ -419,12 +246,11 @@ static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd, struct bio *bio,
* write buffer to prevent retrieving overwritten data. * write buffer to prevent retrieving overwritten data.
*/ */
if (pblk_addr_in_cache(ppa)) { if (pblk_addr_in_cache(ppa)) {
if (!pblk_read_from_cache(pblk, bio, lba, ppa, 0, 1)) { if (!pblk_read_from_cache(pblk, bio, lba, ppa)) {
pblk_lookup_l2p_seq(pblk, &ppa, lba, 1); pblk_lookup_l2p_seq(pblk, &ppa, lba, 1, from_cache);
goto retry; goto retry;
} }
WARN_ON(test_and_set_bit(0, read_bitmap));
meta->lba = cpu_to_le64(lba); meta->lba = cpu_to_le64(lba);
#ifdef CONFIG_NVM_PBLK_DEBUG #ifdef CONFIG_NVM_PBLK_DEBUG
...@@ -435,95 +261,92 @@ static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd, struct bio *bio, ...@@ -435,95 +261,92 @@ static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd, struct bio *bio,
} }
} }
int pblk_submit_read(struct pblk *pblk, struct bio *bio) void pblk_submit_read(struct pblk *pblk, struct bio *bio)
{ {
struct nvm_tgt_dev *dev = pblk->dev; struct nvm_tgt_dev *dev = pblk->dev;
struct request_queue *q = dev->q; struct request_queue *q = dev->q;
sector_t blba = pblk_get_lba(bio); sector_t blba = pblk_get_lba(bio);
unsigned int nr_secs = pblk_get_secs(bio); unsigned int nr_secs = pblk_get_secs(bio);
bool from_cache;
struct pblk_g_ctx *r_ctx; struct pblk_g_ctx *r_ctx;
struct nvm_rq *rqd; struct nvm_rq *rqd;
unsigned int bio_init_idx; struct bio *int_bio, *split_bio;
DECLARE_BITMAP(read_bitmap, NVM_MAX_VLBA);
int ret = NVM_IO_ERR;
generic_start_io_acct(q, REQ_OP_READ, bio_sectors(bio), generic_start_io_acct(q, REQ_OP_READ, bio_sectors(bio),
&pblk->disk->part0); &pblk->disk->part0);
bitmap_zero(read_bitmap, nr_secs);
rqd = pblk_alloc_rqd(pblk, PBLK_READ); rqd = pblk_alloc_rqd(pblk, PBLK_READ);
rqd->opcode = NVM_OP_PREAD; rqd->opcode = NVM_OP_PREAD;
rqd->nr_ppas = nr_secs; rqd->nr_ppas = nr_secs;
rqd->bio = NULL; /* cloned bio if needed */
rqd->private = pblk; rqd->private = pblk;
rqd->end_io = pblk_end_io_read; rqd->end_io = pblk_end_io_read;
r_ctx = nvm_rq_to_pdu(rqd); r_ctx = nvm_rq_to_pdu(rqd);
r_ctx->start_time = jiffies; r_ctx->start_time = jiffies;
r_ctx->lba = blba; r_ctx->lba = blba;
r_ctx->private = bio; /* original bio */
/* Save the index for this bio's start. This is needed in case if (pblk_alloc_rqd_meta(pblk, rqd)) {
* we need to fill a partial read. bio_io_error(bio);
*/ pblk_free_rqd(pblk, rqd, PBLK_READ);
bio_init_idx = pblk_get_bi_idx(bio); return;
}
if (pblk_alloc_rqd_meta(pblk, rqd)) /* Clone read bio to deal internally with:
goto fail_rqd_free; * -read errors when reading from drive
* -bio_advance() calls during cache reads
*/
int_bio = bio_clone_fast(bio, GFP_KERNEL, &pblk_bio_set);
if (nr_secs > 1) if (nr_secs > 1)
pblk_read_ppalist_rq(pblk, rqd, bio, blba, read_bitmap); nr_secs = pblk_read_ppalist_rq(pblk, rqd, int_bio, blba,
&from_cache);
else else
pblk_read_rq(pblk, rqd, bio, blba, read_bitmap); pblk_read_rq(pblk, rqd, int_bio, blba, &from_cache);
if (bitmap_full(read_bitmap, nr_secs)) { split_retry:
r_ctx->private = bio; /* original bio */
rqd->bio = int_bio; /* internal bio */
if (from_cache && nr_secs == rqd->nr_ppas) {
/* All data was read from cache, we can complete the IO. */
pblk_end_user_read(bio, 0);
atomic_inc(&pblk->inflight_io); atomic_inc(&pblk->inflight_io);
__pblk_end_io_read(pblk, rqd, false); __pblk_end_io_read(pblk, rqd, false);
return NVM_IO_DONE; } else if (nr_secs != rqd->nr_ppas) {
} /* The read bio request could be partially filled by the write
* buffer, but there are some holes that need to be read from
/* All sectors are to be read from the device */ * the drive. In order to handle this, we will use block layer
if (bitmap_empty(read_bitmap, rqd->nr_ppas)) { * mechanism to split this request in to smaller ones and make
struct bio *int_bio = NULL; * a chain of it.
*/
split_bio = bio_split(bio, nr_secs * NR_PHY_IN_LOG, GFP_KERNEL,
&pblk_bio_set);
bio_chain(split_bio, bio);
generic_make_request(bio);
/* New bio contains first N sectors of the previous one, so
* we can continue to use existing rqd, but we need to shrink
* the number of PPAs in it. New bio is also guaranteed that
* it contains only either data from cache or from drive, newer
* mix of them.
*/
bio = split_bio;
rqd->nr_ppas = nr_secs;
if (rqd->nr_ppas == 1)
rqd->ppa_addr = rqd->ppa_list[0];
/* Clone read bio to deal with read errors internally */ /* Recreate int_bio - existing might have some needed internal
* fields modified already.
*/
bio_put(int_bio);
int_bio = bio_clone_fast(bio, GFP_KERNEL, &pblk_bio_set); int_bio = bio_clone_fast(bio, GFP_KERNEL, &pblk_bio_set);
if (!int_bio) { goto split_retry;
pblk_err(pblk, "could not clone read bio\n"); } else if (pblk_submit_io(pblk, rqd)) {
goto fail_end_io; /* Submitting IO to drive failed, let's report an error */
} rqd->error = -ENODEV;
pblk_end_io_read(rqd);
rqd->bio = int_bio;
if (pblk_submit_io(pblk, rqd)) {
pblk_err(pblk, "read IO submission failed\n");
ret = NVM_IO_ERR;
goto fail_end_io;
}
return NVM_IO_OK;
} }
/* The read bio request could be partially filled by the write buffer,
* but there are some holes that need to be read from the drive.
*/
ret = pblk_partial_read_bio(pblk, rqd, bio_init_idx, read_bitmap,
nr_secs);
if (ret)
goto fail_meta_free;
return NVM_IO_OK;
fail_meta_free:
nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list);
fail_rqd_free:
pblk_free_rqd(pblk, rqd, PBLK_READ);
return ret;
fail_end_io:
__pblk_end_io_read(pblk, rqd, false);
return ret;
} }
static int read_ppalist_rq_gc(struct pblk *pblk, struct nvm_rq *rqd, static int read_ppalist_rq_gc(struct pblk *pblk, struct nvm_rq *rqd,
...@@ -568,7 +391,7 @@ static int read_rq_gc(struct pblk *pblk, struct nvm_rq *rqd, ...@@ -568,7 +391,7 @@ static int read_rq_gc(struct pblk *pblk, struct nvm_rq *rqd,
goto out; goto out;
/* logic error: lba out-of-bounds */ /* logic error: lba out-of-bounds */
if (lba >= pblk->rl.nr_secs) { if (lba >= pblk->capacity) {
WARN(1, "pblk: read lba out of bounds\n"); WARN(1, "pblk: read lba out of bounds\n");
goto out; goto out;
} }
...@@ -642,7 +465,6 @@ int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq) ...@@ -642,7 +465,6 @@ int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq)
if (pblk_submit_io_sync(pblk, &rqd)) { if (pblk_submit_io_sync(pblk, &rqd)) {
ret = -EIO; ret = -EIO;
pblk_err(pblk, "GC read request failed\n");
goto err_free_bio; goto err_free_bio;
} }
......
...@@ -93,10 +93,24 @@ static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line) ...@@ -93,10 +93,24 @@ static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line)
static void pblk_update_line_wp(struct pblk *pblk, struct pblk_line *line, static void pblk_update_line_wp(struct pblk *pblk, struct pblk_line *line,
u64 written_secs) u64 written_secs)
{ {
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
int i; int i;
for (i = 0; i < written_secs; i += pblk->min_write_pgs) for (i = 0; i < written_secs; i += pblk->min_write_pgs)
pblk_alloc_page(pblk, line, pblk->min_write_pgs); __pblk_alloc_page(pblk, line, pblk->min_write_pgs);
spin_lock(&l_mg->free_lock);
if (written_secs > line->left_msecs) {
/*
* We have all data sectors written
* and some emeta sectors written too.
*/
line->left_msecs = 0;
} else {
/* We have only some data sectors written. */
line->left_msecs -= written_secs;
}
spin_unlock(&l_mg->free_lock);
} }
static u64 pblk_sec_in_open_line(struct pblk *pblk, struct pblk_line *line) static u64 pblk_sec_in_open_line(struct pblk *pblk, struct pblk_line *line)
...@@ -165,6 +179,7 @@ static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line, ...@@ -165,6 +179,7 @@ static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line,
struct pblk_pad_rq *pad_rq; struct pblk_pad_rq *pad_rq;
struct nvm_rq *rqd; struct nvm_rq *rqd;
struct bio *bio; struct bio *bio;
struct ppa_addr *ppa_list;
void *data; void *data;
__le64 *lba_list = emeta_to_lbas(pblk, line->emeta->buf); __le64 *lba_list = emeta_to_lbas(pblk, line->emeta->buf);
u64 w_ptr = line->cur_sec; u64 w_ptr = line->cur_sec;
...@@ -194,7 +209,7 @@ static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line, ...@@ -194,7 +209,7 @@ static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line,
rq_ppas = pblk_calc_secs(pblk, left_ppas, 0, false); rq_ppas = pblk_calc_secs(pblk, left_ppas, 0, false);
if (rq_ppas < pblk->min_write_pgs) { if (rq_ppas < pblk->min_write_pgs) {
pblk_err(pblk, "corrupted pad line %d\n", line->id); pblk_err(pblk, "corrupted pad line %d\n", line->id);
goto fail_free_pad; goto fail_complete;
} }
rq_len = rq_ppas * geo->csecs; rq_len = rq_ppas * geo->csecs;
...@@ -203,7 +218,7 @@ static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line, ...@@ -203,7 +218,7 @@ static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line,
PBLK_VMALLOC_META, GFP_KERNEL); PBLK_VMALLOC_META, GFP_KERNEL);
if (IS_ERR(bio)) { if (IS_ERR(bio)) {
ret = PTR_ERR(bio); ret = PTR_ERR(bio);
goto fail_free_pad; goto fail_complete;
} }
bio->bi_iter.bi_sector = 0; /* internal bio */ bio->bi_iter.bi_sector = 0; /* internal bio */
...@@ -212,8 +227,11 @@ static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line, ...@@ -212,8 +227,11 @@ static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line,
rqd = pblk_alloc_rqd(pblk, PBLK_WRITE_INT); rqd = pblk_alloc_rqd(pblk, PBLK_WRITE_INT);
ret = pblk_alloc_rqd_meta(pblk, rqd); ret = pblk_alloc_rqd_meta(pblk, rqd);
if (ret) if (ret) {
goto fail_free_rqd; pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT);
bio_put(bio);
goto fail_complete;
}
rqd->bio = bio; rqd->bio = bio;
rqd->opcode = NVM_OP_PWRITE; rqd->opcode = NVM_OP_PWRITE;
...@@ -222,6 +240,7 @@ static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line, ...@@ -222,6 +240,7 @@ static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line,
rqd->end_io = pblk_end_io_recov; rqd->end_io = pblk_end_io_recov;
rqd->private = pad_rq; rqd->private = pad_rq;
ppa_list = nvm_rq_to_ppa_list(rqd);
meta_list = rqd->meta_list; meta_list = rqd->meta_list;
for (i = 0; i < rqd->nr_ppas; ) { for (i = 0; i < rqd->nr_ppas; ) {
...@@ -249,18 +268,21 @@ static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line, ...@@ -249,18 +268,21 @@ static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line,
lba_list[w_ptr] = addr_empty; lba_list[w_ptr] = addr_empty;
meta = pblk_get_meta(pblk, meta_list, i); meta = pblk_get_meta(pblk, meta_list, i);
meta->lba = addr_empty; meta->lba = addr_empty;
rqd->ppa_list[i] = dev_ppa; ppa_list[i] = dev_ppa;
} }
} }
kref_get(&pad_rq->ref); kref_get(&pad_rq->ref);
pblk_down_chunk(pblk, rqd->ppa_list[0]); pblk_down_chunk(pblk, ppa_list[0]);
ret = pblk_submit_io(pblk, rqd); ret = pblk_submit_io(pblk, rqd);
if (ret) { if (ret) {
pblk_err(pblk, "I/O submission failed: %d\n", ret); pblk_err(pblk, "I/O submission failed: %d\n", ret);
pblk_up_chunk(pblk, rqd->ppa_list[0]); pblk_up_chunk(pblk, ppa_list[0]);
goto fail_free_rqd; kref_put(&pad_rq->ref, pblk_recov_complete);
pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT);
bio_put(bio);
goto fail_complete;
} }
left_line_ppas -= rq_ppas; left_line_ppas -= rq_ppas;
...@@ -268,13 +290,9 @@ static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line, ...@@ -268,13 +290,9 @@ static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line,
if (left_ppas && left_line_ppas) if (left_ppas && left_line_ppas)
goto next_pad_rq; goto next_pad_rq;
fail_complete:
kref_put(&pad_rq->ref, pblk_recov_complete); kref_put(&pad_rq->ref, pblk_recov_complete);
wait_for_completion(&pad_rq->wait);
if (!wait_for_completion_io_timeout(&pad_rq->wait,
msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
pblk_err(pblk, "pad write timed out\n");
ret = -ETIME;
}
if (!pblk_line_is_full(line)) if (!pblk_line_is_full(line))
pblk_err(pblk, "corrupted padded line: %d\n", line->id); pblk_err(pblk, "corrupted padded line: %d\n", line->id);
...@@ -283,14 +301,6 @@ static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line, ...@@ -283,14 +301,6 @@ static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line,
free_rq: free_rq:
kfree(pad_rq); kfree(pad_rq);
return ret; return ret;
fail_free_rqd:
pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT);
bio_put(bio);
fail_free_pad:
kfree(pad_rq);
vfree(data);
return ret;
} }
static int pblk_pad_distance(struct pblk *pblk, struct pblk_line *line) static int pblk_pad_distance(struct pblk *pblk, struct pblk_line *line)
...@@ -412,6 +422,7 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line, ...@@ -412,6 +422,7 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
rqd->ppa_list = ppa_list; rqd->ppa_list = ppa_list;
rqd->dma_ppa_list = dma_ppa_list; rqd->dma_ppa_list = dma_ppa_list;
rqd->dma_meta_list = dma_meta_list; rqd->dma_meta_list = dma_meta_list;
ppa_list = nvm_rq_to_ppa_list(rqd);
if (pblk_io_aligned(pblk, rq_ppas)) if (pblk_io_aligned(pblk, rq_ppas))
rqd->is_seq = 1; rqd->is_seq = 1;
...@@ -430,7 +441,7 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line, ...@@ -430,7 +441,7 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
} }
for (j = 0; j < pblk->min_write_pgs; j++, i++) for (j = 0; j < pblk->min_write_pgs; j++, i++)
rqd->ppa_list[i] = ppa_list[i] =
addr_to_gen_ppa(pblk, paddr + j, line->id); addr_to_gen_ppa(pblk, paddr + j, line->id);
} }
...@@ -444,7 +455,7 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line, ...@@ -444,7 +455,7 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
atomic_dec(&pblk->inflight_io); atomic_dec(&pblk->inflight_io);
/* If a read fails, do a best effort by padding the line and retrying */ /* If a read fails, do a best effort by padding the line and retrying */
if (rqd->error) { if (rqd->error && rqd->error != NVM_RSP_WARN_HIGHECC) {
int pad_distance, ret; int pad_distance, ret;
if (padded) { if (padded) {
...@@ -474,11 +485,11 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line, ...@@ -474,11 +485,11 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
lba_list[paddr++] = cpu_to_le64(lba); lba_list[paddr++] = cpu_to_le64(lba);
if (lba == ADDR_EMPTY || lba > pblk->rl.nr_secs) if (lba == ADDR_EMPTY || lba >= pblk->capacity)
continue; continue;
line->nr_valid_lbas++; line->nr_valid_lbas++;
pblk_update_map(pblk, lba, rqd->ppa_list[i]); pblk_update_map(pblk, lba, ppa_list[i]);
} }
left_ppas -= rq_ppas; left_ppas -= rq_ppas;
...@@ -647,10 +658,12 @@ static int pblk_line_was_written(struct pblk_line *line, ...@@ -647,10 +658,12 @@ static int pblk_line_was_written(struct pblk_line *line,
bppa = pblk->luns[smeta_blk].bppa; bppa = pblk->luns[smeta_blk].bppa;
chunk = &line->chks[pblk_ppa_to_pos(geo, bppa)]; chunk = &line->chks[pblk_ppa_to_pos(geo, bppa)];
if (chunk->state & NVM_CHK_ST_FREE) if (chunk->state & NVM_CHK_ST_CLOSED ||
return 0; (chunk->state & NVM_CHK_ST_OPEN
&& chunk->wp >= lm->smeta_sec))
return 1;
return 1; return 0;
} }
static bool pblk_line_is_open(struct pblk *pblk, struct pblk_line *line) static bool pblk_line_is_open(struct pblk *pblk, struct pblk_line *line)
...@@ -844,6 +857,7 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk) ...@@ -844,6 +857,7 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
spin_unlock(&l_mg->free_lock); spin_unlock(&l_mg->free_lock);
} else { } else {
spin_lock(&l_mg->free_lock); spin_lock(&l_mg->free_lock);
l_mg->data_line = data_line;
/* Allocate next line for preparation */ /* Allocate next line for preparation */
l_mg->data_next = pblk_line_get(pblk); l_mg->data_next = pblk_line_get(pblk);
if (l_mg->data_next) { if (l_mg->data_next) {
......
...@@ -228,6 +228,7 @@ static void pblk_submit_rec(struct work_struct *work) ...@@ -228,6 +228,7 @@ static void pblk_submit_rec(struct work_struct *work)
mempool_free(recovery, &pblk->rec_pool); mempool_free(recovery, &pblk->rec_pool);
atomic_dec(&pblk->inflight_io); atomic_dec(&pblk->inflight_io);
pblk_write_kick(pblk);
} }
......
...@@ -43,8 +43,6 @@ ...@@ -43,8 +43,6 @@
#define PBLK_CACHE_NAME_LEN (DISK_NAME_LEN + 16) #define PBLK_CACHE_NAME_LEN (DISK_NAME_LEN + 16)
#define PBLK_COMMAND_TIMEOUT_MS 30000
/* Max 512 LUNs per device */ /* Max 512 LUNs per device */
#define PBLK_MAX_LUNS_BITMAP (4) #define PBLK_MAX_LUNS_BITMAP (4)
...@@ -123,18 +121,6 @@ struct pblk_g_ctx { ...@@ -123,18 +121,6 @@ struct pblk_g_ctx {
u64 lba; u64 lba;
}; };
/* partial read context */
struct pblk_pr_ctx {
struct bio *orig_bio;
DECLARE_BITMAP(bitmap, NVM_MAX_VLBA);
unsigned int orig_nr_secs;
unsigned int bio_init_idx;
void *ppa_ptr;
dma_addr_t dma_ppa_list;
u64 lba_list_mem[NVM_MAX_VLBA];
u64 lba_list_media[NVM_MAX_VLBA];
};
/* Pad context */ /* Pad context */
struct pblk_pad_rq { struct pblk_pad_rq {
struct pblk *pblk; struct pblk *pblk;
...@@ -305,7 +291,6 @@ struct pblk_rl { ...@@ -305,7 +291,6 @@ struct pblk_rl {
struct timer_list u_timer; struct timer_list u_timer;
unsigned long long nr_secs;
unsigned long total_blocks; unsigned long total_blocks;
atomic_t free_blocks; /* Total number of free blocks (+ OP) */ atomic_t free_blocks; /* Total number of free blocks (+ OP) */
...@@ -440,6 +425,7 @@ struct pblk_smeta { ...@@ -440,6 +425,7 @@ struct pblk_smeta {
struct pblk_w_err_gc { struct pblk_w_err_gc {
int has_write_err; int has_write_err;
int has_gc_err;
__le64 *lba_list; __le64 *lba_list;
}; };
...@@ -465,7 +451,6 @@ struct pblk_line { ...@@ -465,7 +451,6 @@ struct pblk_line {
int meta_line; /* Metadata line id */ int meta_line; /* Metadata line id */
int meta_distance; /* Distance between data and metadata */ int meta_distance; /* Distance between data and metadata */
u64 smeta_ssec; /* Sector where smeta starts */
u64 emeta_ssec; /* Sector where emeta starts */ u64 emeta_ssec; /* Sector where emeta starts */
unsigned int sec_in_line; /* Number of usable secs in line */ unsigned int sec_in_line; /* Number of usable secs in line */
...@@ -762,7 +747,7 @@ unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd, ...@@ -762,7 +747,7 @@ unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd,
unsigned int pos, unsigned int nr_entries, unsigned int pos, unsigned int nr_entries,
unsigned int count); unsigned int count);
int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba, int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
struct ppa_addr ppa, int bio_iter, bool advanced_bio); struct ppa_addr ppa);
unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int entries); unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int entries);
unsigned int pblk_rb_sync_init(struct pblk_rb *rb, unsigned long *flags); unsigned int pblk_rb_sync_init(struct pblk_rb *rb, unsigned long *flags);
...@@ -862,15 +847,15 @@ int pblk_update_map_gc(struct pblk *pblk, sector_t lba, struct ppa_addr ppa, ...@@ -862,15 +847,15 @@ int pblk_update_map_gc(struct pblk *pblk, sector_t lba, struct ppa_addr ppa,
struct pblk_line *gc_line, u64 paddr); struct pblk_line *gc_line, u64 paddr);
void pblk_lookup_l2p_rand(struct pblk *pblk, struct ppa_addr *ppas, void pblk_lookup_l2p_rand(struct pblk *pblk, struct ppa_addr *ppas,
u64 *lba_list, int nr_secs); u64 *lba_list, int nr_secs);
void pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas, int pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas,
sector_t blba, int nr_secs); sector_t blba, int nr_secs, bool *from_cache);
void *pblk_get_meta_for_writes(struct pblk *pblk, struct nvm_rq *rqd); void *pblk_get_meta_for_writes(struct pblk *pblk, struct nvm_rq *rqd);
void pblk_get_packed_meta(struct pblk *pblk, struct nvm_rq *rqd); void pblk_get_packed_meta(struct pblk *pblk, struct nvm_rq *rqd);
/* /*
* pblk user I/O write path * pblk user I/O write path
*/ */
int pblk_write_to_cache(struct pblk *pblk, struct bio *bio, void pblk_write_to_cache(struct pblk *pblk, struct bio *bio,
unsigned long flags); unsigned long flags);
int pblk_write_gc_to_cache(struct pblk *pblk, struct pblk_gc_rq *gc_rq); int pblk_write_gc_to_cache(struct pblk *pblk, struct pblk_gc_rq *gc_rq);
...@@ -896,7 +881,7 @@ void pblk_write_kick(struct pblk *pblk); ...@@ -896,7 +881,7 @@ void pblk_write_kick(struct pblk *pblk);
* pblk read path * pblk read path
*/ */
extern struct bio_set pblk_bio_set; extern struct bio_set pblk_bio_set;
int pblk_submit_read(struct pblk *pblk, struct bio *bio); void pblk_submit_read(struct pblk *pblk, struct bio *bio);
int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq); int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq);
/* /*
* pblk recovery * pblk recovery
...@@ -921,6 +906,7 @@ void pblk_gc_free_full_lines(struct pblk *pblk); ...@@ -921,6 +906,7 @@ void pblk_gc_free_full_lines(struct pblk *pblk);
void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled, void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled,
int *gc_active); int *gc_active);
int pblk_gc_sysfs_force(struct pblk *pblk, int force); int pblk_gc_sysfs_force(struct pblk *pblk, int force);
void pblk_put_line_back(struct pblk *pblk, struct pblk_line *line);
/* /*
* pblk rate limiter * pblk rate limiter
......
...@@ -1257,10 +1257,9 @@ static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, ...@@ -1257,10 +1257,9 @@ static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
return 0; return 0;
} }
effects |= nvme_known_admin_effects(opcode);
if (ctrl->effects) if (ctrl->effects)
effects = le32_to_cpu(ctrl->effects->acs[opcode]); effects = le32_to_cpu(ctrl->effects->acs[opcode]);
else
effects = nvme_known_admin_effects(opcode);
/* /*
* For simplicity, IO to all namespaces is quiesced even if the command * For simplicity, IO to all namespaces is quiesced even if the command
...@@ -2342,20 +2341,35 @@ static const struct attribute_group *nvme_subsys_attrs_groups[] = { ...@@ -2342,20 +2341,35 @@ static const struct attribute_group *nvme_subsys_attrs_groups[] = {
NULL, NULL,
}; };
static int nvme_active_ctrls(struct nvme_subsystem *subsys) static bool nvme_validate_cntlid(struct nvme_subsystem *subsys,
struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
{ {
int count = 0; struct nvme_ctrl *tmp;
struct nvme_ctrl *ctrl;
lockdep_assert_held(&nvme_subsystems_lock);
list_for_each_entry(tmp, &subsys->ctrls, subsys_entry) {
if (ctrl->state == NVME_CTRL_DELETING ||
ctrl->state == NVME_CTRL_DEAD)
continue;
if (tmp->cntlid == ctrl->cntlid) {
dev_err(ctrl->device,
"Duplicate cntlid %u with %s, rejecting\n",
ctrl->cntlid, dev_name(tmp->device));
return false;
}
mutex_lock(&subsys->lock); if ((id->cmic & (1 << 1)) ||
list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { (ctrl->opts && ctrl->opts->discovery_nqn))
if (ctrl->state != NVME_CTRL_DELETING && continue;
ctrl->state != NVME_CTRL_DEAD)
count++; dev_err(ctrl->device,
"Subsystem does not support multiple controllers\n");
return false;
} }
mutex_unlock(&subsys->lock);
return count; return true;
} }
static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
...@@ -2395,22 +2409,13 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) ...@@ -2395,22 +2409,13 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
mutex_lock(&nvme_subsystems_lock); mutex_lock(&nvme_subsystems_lock);
found = __nvme_find_get_subsystem(subsys->subnqn); found = __nvme_find_get_subsystem(subsys->subnqn);
if (found) { if (found) {
/*
* Verify that the subsystem actually supports multiple
* controllers, else bail out.
*/
if (!(ctrl->opts && ctrl->opts->discovery_nqn) &&
nvme_active_ctrls(found) && !(id->cmic & (1 << 1))) {
dev_err(ctrl->device,
"ignoring ctrl due to duplicate subnqn (%s).\n",
found->subnqn);
nvme_put_subsystem(found);
ret = -EINVAL;
goto out_unlock;
}
__nvme_release_subsystem(subsys); __nvme_release_subsystem(subsys);
subsys = found; subsys = found;
if (!nvme_validate_cntlid(subsys, ctrl, id)) {
ret = -EINVAL;
goto out_put_subsystem;
}
} else { } else {
ret = device_add(&subsys->dev); ret = device_add(&subsys->dev);
if (ret) { if (ret) {
...@@ -2422,23 +2427,20 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) ...@@ -2422,23 +2427,20 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
list_add_tail(&subsys->entry, &nvme_subsystems); list_add_tail(&subsys->entry, &nvme_subsystems);
} }
ctrl->subsys = subsys;
mutex_unlock(&nvme_subsystems_lock);
if (sysfs_create_link(&subsys->dev.kobj, &ctrl->device->kobj, if (sysfs_create_link(&subsys->dev.kobj, &ctrl->device->kobj,
dev_name(ctrl->device))) { dev_name(ctrl->device))) {
dev_err(ctrl->device, dev_err(ctrl->device,
"failed to create sysfs link from subsystem.\n"); "failed to create sysfs link from subsystem.\n");
/* the transport driver will eventually put the subsystem */ goto out_put_subsystem;
return -EINVAL;
} }
mutex_lock(&subsys->lock); ctrl->subsys = subsys;
list_add_tail(&ctrl->subsys_entry, &subsys->ctrls); list_add_tail(&ctrl->subsys_entry, &subsys->ctrls);
mutex_unlock(&subsys->lock); mutex_unlock(&nvme_subsystems_lock);
return 0; return 0;
out_put_subsystem:
nvme_put_subsystem(subsys);
out_unlock: out_unlock:
mutex_unlock(&nvme_subsystems_lock); mutex_unlock(&nvme_subsystems_lock);
put_device(&subsys->dev); put_device(&subsys->dev);
...@@ -3605,19 +3607,18 @@ static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result) ...@@ -3605,19 +3607,18 @@ static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result)
{ {
u32 aer_notice_type = (result & 0xff00) >> 8; u32 aer_notice_type = (result & 0xff00) >> 8;
trace_nvme_async_event(ctrl, aer_notice_type);
switch (aer_notice_type) { switch (aer_notice_type) {
case NVME_AER_NOTICE_NS_CHANGED: case NVME_AER_NOTICE_NS_CHANGED:
trace_nvme_async_event(ctrl, aer_notice_type);
set_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events); set_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events);
nvme_queue_scan(ctrl); nvme_queue_scan(ctrl);
break; break;
case NVME_AER_NOTICE_FW_ACT_STARTING: case NVME_AER_NOTICE_FW_ACT_STARTING:
trace_nvme_async_event(ctrl, aer_notice_type);
queue_work(nvme_wq, &ctrl->fw_act_work); queue_work(nvme_wq, &ctrl->fw_act_work);
break; break;
#ifdef CONFIG_NVME_MULTIPATH #ifdef CONFIG_NVME_MULTIPATH
case NVME_AER_NOTICE_ANA: case NVME_AER_NOTICE_ANA:
trace_nvme_async_event(ctrl, aer_notice_type);
if (!ctrl->ana_log_buf) if (!ctrl->ana_log_buf)
break; break;
queue_work(nvme_wq, &ctrl->ana_work); queue_work(nvme_wq, &ctrl->ana_work);
...@@ -3696,10 +3697,10 @@ static void nvme_free_ctrl(struct device *dev) ...@@ -3696,10 +3697,10 @@ static void nvme_free_ctrl(struct device *dev)
__free_page(ctrl->discard_page); __free_page(ctrl->discard_page);
if (subsys) { if (subsys) {
mutex_lock(&subsys->lock); mutex_lock(&nvme_subsystems_lock);
list_del(&ctrl->subsys_entry); list_del(&ctrl->subsys_entry);
mutex_unlock(&subsys->lock);
sysfs_remove_link(&subsys->dev.kobj, dev_name(ctrl->device)); sysfs_remove_link(&subsys->dev.kobj, dev_name(ctrl->device));
mutex_unlock(&nvme_subsystems_lock);
} }
ctrl->ops->free_ctrl(ctrl); ctrl->ops->free_ctrl(ctrl);
......
...@@ -978,7 +978,7 @@ EXPORT_SYMBOL_GPL(nvmf_free_options); ...@@ -978,7 +978,7 @@ EXPORT_SYMBOL_GPL(nvmf_free_options);
NVMF_OPT_DISABLE_SQFLOW) NVMF_OPT_DISABLE_SQFLOW)
static struct nvme_ctrl * static struct nvme_ctrl *
nvmf_create_ctrl(struct device *dev, const char *buf, size_t count) nvmf_create_ctrl(struct device *dev, const char *buf)
{ {
struct nvmf_ctrl_options *opts; struct nvmf_ctrl_options *opts;
struct nvmf_transport_ops *ops; struct nvmf_transport_ops *ops;
...@@ -1073,7 +1073,7 @@ static ssize_t nvmf_dev_write(struct file *file, const char __user *ubuf, ...@@ -1073,7 +1073,7 @@ static ssize_t nvmf_dev_write(struct file *file, const char __user *ubuf,
goto out_unlock; goto out_unlock;
} }
ctrl = nvmf_create_ctrl(nvmf_device, buf, count); ctrl = nvmf_create_ctrl(nvmf_device, buf);
if (IS_ERR(ctrl)) { if (IS_ERR(ctrl)) {
ret = PTR_ERR(ctrl); ret = PTR_ERR(ctrl);
goto out_unlock; goto out_unlock;
......
...@@ -202,7 +202,7 @@ static LIST_HEAD(nvme_fc_lport_list); ...@@ -202,7 +202,7 @@ static LIST_HEAD(nvme_fc_lport_list);
static DEFINE_IDA(nvme_fc_local_port_cnt); static DEFINE_IDA(nvme_fc_local_port_cnt);
static DEFINE_IDA(nvme_fc_ctrl_cnt); static DEFINE_IDA(nvme_fc_ctrl_cnt);
static struct workqueue_struct *nvme_fc_wq;
/* /*
* These items are short-term. They will eventually be moved into * These items are short-term. They will eventually be moved into
...@@ -2054,7 +2054,7 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg) ...@@ -2054,7 +2054,7 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg)
*/ */
if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) { if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) {
active = atomic_xchg(&ctrl->err_work_active, 1); active = atomic_xchg(&ctrl->err_work_active, 1);
if (!active && !schedule_work(&ctrl->err_work)) { if (!active && !queue_work(nvme_fc_wq, &ctrl->err_work)) {
atomic_set(&ctrl->err_work_active, 0); atomic_set(&ctrl->err_work_active, 0);
WARN_ON(1); WARN_ON(1);
} }
...@@ -3399,6 +3399,10 @@ static int __init nvme_fc_init_module(void) ...@@ -3399,6 +3399,10 @@ static int __init nvme_fc_init_module(void)
{ {
int ret; int ret;
nvme_fc_wq = alloc_workqueue("nvme_fc_wq", WQ_MEM_RECLAIM, 0);
if (!nvme_fc_wq)
return -ENOMEM;
/* /*
* NOTE: * NOTE:
* It is expected that in the future the kernel will combine * It is expected that in the future the kernel will combine
...@@ -3416,7 +3420,7 @@ static int __init nvme_fc_init_module(void) ...@@ -3416,7 +3420,7 @@ static int __init nvme_fc_init_module(void)
ret = class_register(&fc_class); ret = class_register(&fc_class);
if (ret) { if (ret) {
pr_err("couldn't register class fc\n"); pr_err("couldn't register class fc\n");
return ret; goto out_destroy_wq;
} }
/* /*
...@@ -3440,6 +3444,9 @@ static int __init nvme_fc_init_module(void) ...@@ -3440,6 +3444,9 @@ static int __init nvme_fc_init_module(void)
device_destroy(&fc_class, MKDEV(0, 0)); device_destroy(&fc_class, MKDEV(0, 0));
out_destroy_class: out_destroy_class:
class_unregister(&fc_class); class_unregister(&fc_class);
out_destroy_wq:
destroy_workqueue(nvme_fc_wq);
return ret; return ret;
} }
...@@ -3456,6 +3463,7 @@ static void __exit nvme_fc_exit_module(void) ...@@ -3456,6 +3463,7 @@ static void __exit nvme_fc_exit_module(void)
device_destroy(&fc_class, MKDEV(0, 0)); device_destroy(&fc_class, MKDEV(0, 0));
class_unregister(&fc_class); class_unregister(&fc_class);
destroy_workqueue(nvme_fc_wq);
} }
module_init(nvme_fc_init_module); module_init(nvme_fc_init_module);
......
...@@ -977,6 +977,7 @@ int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node) ...@@ -977,6 +977,7 @@ int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node)
geo->csecs = 1 << ns->lba_shift; geo->csecs = 1 << ns->lba_shift;
geo->sos = ns->ms; geo->sos = ns->ms;
geo->ext = ns->ext; geo->ext = ns->ext;
geo->mdts = ns->ctrl->max_hw_sectors;
dev->q = q; dev->q = q;
memcpy(dev->name, disk_name, DISK_NAME_LEN); memcpy(dev->name, disk_name, DISK_NAME_LEN);
......
...@@ -31,7 +31,7 @@ void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns, ...@@ -31,7 +31,7 @@ void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->head->instance); sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->head->instance);
} else if (ns->head->disk) { } else if (ns->head->disk) {
sprintf(disk_name, "nvme%dc%dn%d", ctrl->subsys->instance, sprintf(disk_name, "nvme%dc%dn%d", ctrl->subsys->instance,
ctrl->cntlid, ns->head->instance); ctrl->instance, ns->head->instance);
*flags = GENHD_FL_HIDDEN; *flags = GENHD_FL_HIDDEN;
} else { } else {
sprintf(disk_name, "nvme%dn%d", ctrl->subsys->instance, sprintf(disk_name, "nvme%dn%d", ctrl->subsys->instance,
......
...@@ -1296,6 +1296,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) ...@@ -1296,6 +1296,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
switch (dev->ctrl.state) { switch (dev->ctrl.state) {
case NVME_CTRL_DELETING: case NVME_CTRL_DELETING:
shutdown = true; shutdown = true;
/* fall through */
case NVME_CTRL_CONNECTING: case NVME_CTRL_CONNECTING:
case NVME_CTRL_RESETTING: case NVME_CTRL_RESETTING:
dev_warn_ratelimited(dev->ctrl.device, dev_warn_ratelimited(dev->ctrl.device,
...@@ -2280,8 +2281,6 @@ static int nvme_dev_add(struct nvme_dev *dev) ...@@ -2280,8 +2281,6 @@ static int nvme_dev_add(struct nvme_dev *dev)
return ret; return ret;
} }
dev->ctrl.tagset = &dev->tagset; dev->ctrl.tagset = &dev->tagset;
nvme_dbbuf_set(dev);
} else { } else {
blk_mq_update_nr_hw_queues(&dev->tagset, dev->online_queues - 1); blk_mq_update_nr_hw_queues(&dev->tagset, dev->online_queues - 1);
...@@ -2289,6 +2288,7 @@ static int nvme_dev_add(struct nvme_dev *dev) ...@@ -2289,6 +2288,7 @@ static int nvme_dev_add(struct nvme_dev *dev)
nvme_free_queues(dev, dev->online_queues); nvme_free_queues(dev, dev->online_queues);
} }
nvme_dbbuf_set(dev);
return 0; return 0;
} }
......
...@@ -697,15 +697,6 @@ static int nvme_rdma_alloc_io_queues(struct nvme_rdma_ctrl *ctrl) ...@@ -697,15 +697,6 @@ static int nvme_rdma_alloc_io_queues(struct nvme_rdma_ctrl *ctrl)
return ret; return ret;
} }
static void nvme_rdma_free_tagset(struct nvme_ctrl *nctrl,
struct blk_mq_tag_set *set)
{
struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
blk_mq_free_tag_set(set);
nvme_rdma_dev_put(ctrl->device);
}
static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl, static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
bool admin) bool admin)
{ {
...@@ -744,24 +735,9 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl, ...@@ -744,24 +735,9 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
ret = blk_mq_alloc_tag_set(set); ret = blk_mq_alloc_tag_set(set);
if (ret) if (ret)
goto out; return ERR_PTR(ret);
/*
* We need a reference on the device as long as the tag_set is alive,
* as the MRs in the request structures need a valid ib_device.
*/
ret = nvme_rdma_dev_get(ctrl->device);
if (!ret) {
ret = -EINVAL;
goto out_free_tagset;
}
return set; return set;
out_free_tagset:
blk_mq_free_tag_set(set);
out:
return ERR_PTR(ret);
} }
static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl, static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl,
...@@ -769,7 +745,7 @@ static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl, ...@@ -769,7 +745,7 @@ static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl,
{ {
if (remove) { if (remove) {
blk_cleanup_queue(ctrl->ctrl.admin_q); blk_cleanup_queue(ctrl->ctrl.admin_q);
nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset); blk_mq_free_tag_set(ctrl->ctrl.admin_tagset);
} }
if (ctrl->async_event_sqe.data) { if (ctrl->async_event_sqe.data) {
nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe, nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe,
...@@ -847,7 +823,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, ...@@ -847,7 +823,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
blk_cleanup_queue(ctrl->ctrl.admin_q); blk_cleanup_queue(ctrl->ctrl.admin_q);
out_free_tagset: out_free_tagset:
if (new) if (new)
nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset); blk_mq_free_tag_set(ctrl->ctrl.admin_tagset);
out_free_async_qe: out_free_async_qe:
nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe, nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe,
sizeof(struct nvme_command), DMA_TO_DEVICE); sizeof(struct nvme_command), DMA_TO_DEVICE);
...@@ -862,7 +838,7 @@ static void nvme_rdma_destroy_io_queues(struct nvme_rdma_ctrl *ctrl, ...@@ -862,7 +838,7 @@ static void nvme_rdma_destroy_io_queues(struct nvme_rdma_ctrl *ctrl,
{ {
if (remove) { if (remove) {
blk_cleanup_queue(ctrl->ctrl.connect_q); blk_cleanup_queue(ctrl->ctrl.connect_q);
nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.tagset); blk_mq_free_tag_set(ctrl->ctrl.tagset);
} }
nvme_rdma_free_io_queues(ctrl); nvme_rdma_free_io_queues(ctrl);
} }
...@@ -903,7 +879,7 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new) ...@@ -903,7 +879,7 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new)
blk_cleanup_queue(ctrl->ctrl.connect_q); blk_cleanup_queue(ctrl->ctrl.connect_q);
out_free_tag_set: out_free_tag_set:
if (new) if (new)
nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.tagset); blk_mq_free_tag_set(ctrl->ctrl.tagset);
out_free_io_queues: out_free_io_queues:
nvme_rdma_free_io_queues(ctrl); nvme_rdma_free_io_queues(ctrl);
return ret; return ret;
......
...@@ -167,6 +167,7 @@ TRACE_EVENT(nvme_async_event, ...@@ -167,6 +167,7 @@ TRACE_EVENT(nvme_async_event,
aer_name(NVME_AER_NOTICE_NS_CHANGED), aer_name(NVME_AER_NOTICE_NS_CHANGED),
aer_name(NVME_AER_NOTICE_ANA), aer_name(NVME_AER_NOTICE_ANA),
aer_name(NVME_AER_NOTICE_FW_ACT_STARTING), aer_name(NVME_AER_NOTICE_FW_ACT_STARTING),
aer_name(NVME_AER_NOTICE_DISC_CHANGED),
aer_name(NVME_AER_ERROR), aer_name(NVME_AER_ERROR),
aer_name(NVME_AER_SMART), aer_name(NVME_AER_SMART),
aer_name(NVME_AER_CSS), aer_name(NVME_AER_CSS),
......
...@@ -3827,7 +3827,7 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_raw(struct dasd_device *startdev, ...@@ -3827,7 +3827,7 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_raw(struct dasd_device *startdev,
if ((start_padding_sectors || end_padding_sectors) && if ((start_padding_sectors || end_padding_sectors) &&
(rq_data_dir(req) == WRITE)) { (rq_data_dir(req) == WRITE)) {
DBF_DEV_EVENT(DBF_ERR, basedev, DBF_DEV_EVENT(DBF_ERR, basedev,
"raw write not track aligned (%lu,%lu) req %p", "raw write not track aligned (%llu,%llu) req %p",
start_padding_sectors, end_padding_sectors, req); start_padding_sectors, end_padding_sectors, req);
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
} }
......
...@@ -358,6 +358,7 @@ struct nvm_geo { ...@@ -358,6 +358,7 @@ struct nvm_geo {
u16 csecs; /* sector size */ u16 csecs; /* sector size */
u16 sos; /* out-of-band area size */ u16 sos; /* out-of-band area size */
bool ext; /* metadata in extended data buffer */ bool ext; /* metadata in extended data buffer */
u32 mdts; /* Max data transfer size*/
/* device write constrains */ /* device write constrains */
u32 ws_min; /* minimum write size */ u32 ws_min; /* minimum write size */
...@@ -427,6 +428,7 @@ struct nvm_dev { ...@@ -427,6 +428,7 @@ struct nvm_dev {
char name[DISK_NAME_LEN]; char name[DISK_NAME_LEN];
void *private_data; void *private_data;
struct kref ref;
void *rmap; void *rmap;
struct mutex mlock; struct mutex mlock;
......
...@@ -1246,9 +1246,9 @@ enum { ...@@ -1246,9 +1246,9 @@ enum {
NVME_SC_FW_NEEDS_SUBSYS_RESET = 0x110, NVME_SC_FW_NEEDS_SUBSYS_RESET = 0x110,
NVME_SC_FW_NEEDS_RESET = 0x111, NVME_SC_FW_NEEDS_RESET = 0x111,
NVME_SC_FW_NEEDS_MAX_TIME = 0x112, NVME_SC_FW_NEEDS_MAX_TIME = 0x112,
NVME_SC_FW_ACIVATE_PROHIBITED = 0x113, NVME_SC_FW_ACTIVATE_PROHIBITED = 0x113,
NVME_SC_OVERLAPPING_RANGE = 0x114, NVME_SC_OVERLAPPING_RANGE = 0x114,
NVME_SC_NS_INSUFFICENT_CAP = 0x115, NVME_SC_NS_INSUFFICIENT_CAP = 0x115,
NVME_SC_NS_ID_UNAVAILABLE = 0x116, NVME_SC_NS_ID_UNAVAILABLE = 0x116,
NVME_SC_NS_ALREADY_ATTACHED = 0x118, NVME_SC_NS_ALREADY_ATTACHED = 0x118,
NVME_SC_NS_IS_PRIVATE = 0x119, NVME_SC_NS_IS_PRIVATE = 0x119,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment