Commit 1d02369d authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:
 "Final round of fixes for this merge window - some of this has come up
  after the initial pull request, and some of it was put in a post-merge
  branch before the merge window.

  This contains:

   - Fix for a bad check for an error on dma mapping in the mtip32xx
     driver, from Alexey Khoroshilov.

   - A set of fixes for lightnvm, from Javier, Matias, and Wenwei.

   - An NVMe completion record corruption fix from Marta, ensuring that
     we read things in the right order.

   - Two writeback fixes from Tejun, marked for stable@ as well.

   - A blk-mq sw queue iterator fix from Thomas, fixing an oops for
     sparse CPU maps.  They hit this in the hot plug/unplug rework"

* 'for-linus' of git://git.kernel.dk/linux-block:
  nvme: avoid cqe corruption when update at the same time as read
  writeback, cgroup: fix use of the wrong bdi_writeback which mismatches the inode
  writeback, cgroup: fix premature wb_put() in locked_inode_to_wb_and_lock_list()
  blk-mq: Use proper cpumask iterator
  mtip32xx: fix checks for dma mapping errors
  lightnvm: do not load L2P table if not supported
  lightnvm: do not reserve lun on l2p loading
  nvme: lightnvm: return ppa completion status
  lightnvm: add a bitmap of luns
  lightnvm: specify target's logical address area
  null_blk: add lightnvm null_blk device to the nullb_list
parents 8f40842e d783e0bd
...@@ -416,12 +416,14 @@ void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx) ...@@ -416,12 +416,14 @@ void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx)
static void blk_mq_sysfs_init(struct request_queue *q) static void blk_mq_sysfs_init(struct request_queue *q)
{ {
struct blk_mq_ctx *ctx; struct blk_mq_ctx *ctx;
int i; int cpu;
kobject_init(&q->mq_kobj, &blk_mq_ktype); kobject_init(&q->mq_kobj, &blk_mq_ktype);
queue_for_each_ctx(q, ctx, i) for_each_possible_cpu(cpu) {
ctx = per_cpu_ptr(q->queue_ctx, cpu);
kobject_init(&ctx->kobj, &blk_mq_ctx_ktype); kobject_init(&ctx->kobj, &blk_mq_ctx_ktype);
}
} }
int blk_mq_register_disk(struct gendisk *disk) int blk_mq_register_disk(struct gendisk *disk)
......
...@@ -1798,11 +1798,12 @@ static void blk_mq_map_swqueue(struct request_queue *q, ...@@ -1798,11 +1798,12 @@ static void blk_mq_map_swqueue(struct request_queue *q,
/* /*
* Map software to hardware queues * Map software to hardware queues
*/ */
queue_for_each_ctx(q, ctx, i) { for_each_possible_cpu(i) {
/* If the cpu isn't online, the cpu is mapped to first hctx */ /* If the cpu isn't online, the cpu is mapped to first hctx */
if (!cpumask_test_cpu(i, online_mask)) if (!cpumask_test_cpu(i, online_mask))
continue; continue;
ctx = per_cpu_ptr(q->queue_ctx, i);
hctx = q->mq_ops->map_queue(q, i); hctx = q->mq_ops->map_queue(q, i);
cpumask_set_cpu(i, hctx->cpumask); cpumask_set_cpu(i, hctx->cpumask);
......
...@@ -2051,7 +2051,7 @@ static int exec_drive_taskfile(struct driver_data *dd, ...@@ -2051,7 +2051,7 @@ static int exec_drive_taskfile(struct driver_data *dd,
outbuf, outbuf,
taskout, taskout,
DMA_TO_DEVICE); DMA_TO_DEVICE);
if (outbuf_dma == 0) { if (pci_dma_mapping_error(dd->pdev, outbuf_dma)) {
err = -ENOMEM; err = -ENOMEM;
goto abort; goto abort;
} }
...@@ -2068,7 +2068,7 @@ static int exec_drive_taskfile(struct driver_data *dd, ...@@ -2068,7 +2068,7 @@ static int exec_drive_taskfile(struct driver_data *dd,
inbuf_dma = pci_map_single(dd->pdev, inbuf_dma = pci_map_single(dd->pdev,
inbuf, inbuf,
taskin, DMA_FROM_DEVICE); taskin, DMA_FROM_DEVICE);
if (inbuf_dma == 0) { if (pci_dma_mapping_error(dd->pdev, inbuf_dma)) {
err = -ENOMEM; err = -ENOMEM;
goto abort; goto abort;
} }
......
...@@ -742,10 +742,11 @@ static int null_add_dev(void) ...@@ -742,10 +742,11 @@ static int null_add_dev(void)
add_disk(disk); add_disk(disk);
done:
mutex_lock(&lock); mutex_lock(&lock);
list_add_tail(&nullb->list, &nullb_list); list_add_tail(&nullb->list, &nullb_list);
mutex_unlock(&lock); mutex_unlock(&lock);
done:
return 0; return 0;
out_cleanup_lightnvm: out_cleanup_lightnvm:
......
...@@ -464,8 +464,13 @@ static int nvm_core_init(struct nvm_dev *dev) ...@@ -464,8 +464,13 @@ static int nvm_core_init(struct nvm_dev *dev)
dev->nr_luns = dev->luns_per_chnl * dev->nr_chnls; dev->nr_luns = dev->luns_per_chnl * dev->nr_chnls;
dev->total_secs = dev->nr_luns * dev->sec_per_lun; dev->total_secs = dev->nr_luns * dev->sec_per_lun;
dev->lun_map = kcalloc(BITS_TO_LONGS(dev->nr_luns),
sizeof(unsigned long), GFP_KERNEL);
if (!dev->lun_map)
return -ENOMEM;
INIT_LIST_HEAD(&dev->online_targets); INIT_LIST_HEAD(&dev->online_targets);
mutex_init(&dev->mlock); mutex_init(&dev->mlock);
spin_lock_init(&dev->lock);
return 0; return 0;
} }
...@@ -585,6 +590,7 @@ int nvm_register(struct request_queue *q, char *disk_name, ...@@ -585,6 +590,7 @@ int nvm_register(struct request_queue *q, char *disk_name,
return 0; return 0;
err_init: err_init:
kfree(dev->lun_map);
kfree(dev); kfree(dev);
return ret; return ret;
} }
...@@ -607,6 +613,7 @@ void nvm_unregister(char *disk_name) ...@@ -607,6 +613,7 @@ void nvm_unregister(char *disk_name)
up_write(&nvm_lock); up_write(&nvm_lock);
nvm_exit(dev); nvm_exit(dev);
kfree(dev->lun_map);
kfree(dev); kfree(dev);
} }
EXPORT_SYMBOL(nvm_unregister); EXPORT_SYMBOL(nvm_unregister);
......
...@@ -20,6 +20,68 @@ ...@@ -20,6 +20,68 @@
#include "gennvm.h" #include "gennvm.h"
static int gennvm_get_area(struct nvm_dev *dev, sector_t *lba, sector_t len)
{
struct gen_nvm *gn = dev->mp;
struct gennvm_area *area, *prev, *next;
sector_t begin = 0;
sector_t max_sectors = (dev->sec_size * dev->total_secs) >> 9;
if (len > max_sectors)
return -EINVAL;
area = kmalloc(sizeof(struct gennvm_area), GFP_KERNEL);
if (!area)
return -ENOMEM;
prev = NULL;
spin_lock(&dev->lock);
list_for_each_entry(next, &gn->area_list, list) {
if (begin + len > next->begin) {
begin = next->end;
prev = next;
continue;
}
break;
}
if ((begin + len) > max_sectors) {
spin_unlock(&dev->lock);
kfree(area);
return -EINVAL;
}
area->begin = *lba = begin;
area->end = begin + len;
if (prev) /* insert into sorted order */
list_add(&area->list, &prev->list);
else
list_add(&area->list, &gn->area_list);
spin_unlock(&dev->lock);
return 0;
}
static void gennvm_put_area(struct nvm_dev *dev, sector_t begin)
{
struct gen_nvm *gn = dev->mp;
struct gennvm_area *area;
spin_lock(&dev->lock);
list_for_each_entry(area, &gn->area_list, list) {
if (area->begin != begin)
continue;
list_del(&area->list);
spin_unlock(&dev->lock);
kfree(area);
return;
}
spin_unlock(&dev->lock);
}
static void gennvm_blocks_free(struct nvm_dev *dev) static void gennvm_blocks_free(struct nvm_dev *dev)
{ {
struct gen_nvm *gn = dev->mp; struct gen_nvm *gn = dev->mp;
...@@ -195,7 +257,7 @@ static int gennvm_blocks_init(struct nvm_dev *dev, struct gen_nvm *gn) ...@@ -195,7 +257,7 @@ static int gennvm_blocks_init(struct nvm_dev *dev, struct gen_nvm *gn)
} }
} }
if (dev->ops->get_l2p_tbl) { if ((dev->identity.dom & NVM_RSP_L2P) && dev->ops->get_l2p_tbl) {
ret = dev->ops->get_l2p_tbl(dev, 0, dev->total_secs, ret = dev->ops->get_l2p_tbl(dev, 0, dev->total_secs,
gennvm_block_map, dev); gennvm_block_map, dev);
if (ret) { if (ret) {
...@@ -229,6 +291,7 @@ static int gennvm_register(struct nvm_dev *dev) ...@@ -229,6 +291,7 @@ static int gennvm_register(struct nvm_dev *dev)
gn->dev = dev; gn->dev = dev;
gn->nr_luns = dev->nr_luns; gn->nr_luns = dev->nr_luns;
INIT_LIST_HEAD(&gn->area_list);
dev->mp = gn; dev->mp = gn;
ret = gennvm_luns_init(dev, gn); ret = gennvm_luns_init(dev, gn);
...@@ -419,10 +482,23 @@ static int gennvm_erase_blk(struct nvm_dev *dev, struct nvm_block *blk, ...@@ -419,10 +482,23 @@ static int gennvm_erase_blk(struct nvm_dev *dev, struct nvm_block *blk,
return nvm_erase_ppa(dev, &addr, 1); return nvm_erase_ppa(dev, &addr, 1);
} }
static int gennvm_reserve_lun(struct nvm_dev *dev, int lunid)
{
return test_and_set_bit(lunid, dev->lun_map);
}
static void gennvm_release_lun(struct nvm_dev *dev, int lunid)
{
WARN_ON(!test_and_clear_bit(lunid, dev->lun_map));
}
static struct nvm_lun *gennvm_get_lun(struct nvm_dev *dev, int lunid) static struct nvm_lun *gennvm_get_lun(struct nvm_dev *dev, int lunid)
{ {
struct gen_nvm *gn = dev->mp; struct gen_nvm *gn = dev->mp;
if (unlikely(lunid >= dev->nr_luns))
return NULL;
return &gn->luns[lunid].vlun; return &gn->luns[lunid].vlun;
} }
...@@ -464,7 +540,13 @@ static struct nvmm_type gennvm = { ...@@ -464,7 +540,13 @@ static struct nvmm_type gennvm = {
.erase_blk = gennvm_erase_blk, .erase_blk = gennvm_erase_blk,
.get_lun = gennvm_get_lun, .get_lun = gennvm_get_lun,
.reserve_lun = gennvm_reserve_lun,
.release_lun = gennvm_release_lun,
.lun_info_print = gennvm_lun_info_print, .lun_info_print = gennvm_lun_info_print,
.get_area = gennvm_get_area,
.put_area = gennvm_put_area,
}; };
static int __init gennvm_module_init(void) static int __init gennvm_module_init(void)
......
...@@ -39,8 +39,14 @@ struct gen_nvm { ...@@ -39,8 +39,14 @@ struct gen_nvm {
int nr_luns; int nr_luns;
struct gen_lun *luns; struct gen_lun *luns;
struct list_head area_list;
}; };
struct gennvm_area {
struct list_head list;
sector_t begin;
sector_t end; /* end is excluded */
};
#define gennvm_for_each_lun(bm, lun, i) \ #define gennvm_for_each_lun(bm, lun, i) \
for ((i) = 0, lun = &(bm)->luns[0]; \ for ((i) = 0, lun = &(bm)->luns[0]; \
(i) < (bm)->nr_luns; (i)++, lun = &(bm)->luns[(i)]) (i) < (bm)->nr_luns; (i)++, lun = &(bm)->luns[(i)])
......
...@@ -965,25 +965,11 @@ static void rrpc_requeue(struct work_struct *work) ...@@ -965,25 +965,11 @@ static void rrpc_requeue(struct work_struct *work)
static void rrpc_gc_free(struct rrpc *rrpc) static void rrpc_gc_free(struct rrpc *rrpc)
{ {
struct rrpc_lun *rlun;
int i;
if (rrpc->krqd_wq) if (rrpc->krqd_wq)
destroy_workqueue(rrpc->krqd_wq); destroy_workqueue(rrpc->krqd_wq);
if (rrpc->kgc_wq) if (rrpc->kgc_wq)
destroy_workqueue(rrpc->kgc_wq); destroy_workqueue(rrpc->kgc_wq);
if (!rrpc->luns)
return;
for (i = 0; i < rrpc->nr_luns; i++) {
rlun = &rrpc->luns[i];
if (!rlun->blocks)
break;
vfree(rlun->blocks);
}
} }
static int rrpc_gc_init(struct rrpc *rrpc) static int rrpc_gc_init(struct rrpc *rrpc)
...@@ -1053,8 +1039,11 @@ static int rrpc_map_init(struct rrpc *rrpc) ...@@ -1053,8 +1039,11 @@ static int rrpc_map_init(struct rrpc *rrpc)
{ {
struct nvm_dev *dev = rrpc->dev; struct nvm_dev *dev = rrpc->dev;
sector_t i; sector_t i;
u64 slba;
int ret; int ret;
slba = rrpc->soffset >> (ilog2(dev->sec_size) - 9);
rrpc->trans_map = vzalloc(sizeof(struct rrpc_addr) * rrpc->nr_sects); rrpc->trans_map = vzalloc(sizeof(struct rrpc_addr) * rrpc->nr_sects);
if (!rrpc->trans_map) if (!rrpc->trans_map)
return -ENOMEM; return -ENOMEM;
...@@ -1076,7 +1065,7 @@ static int rrpc_map_init(struct rrpc *rrpc) ...@@ -1076,7 +1065,7 @@ static int rrpc_map_init(struct rrpc *rrpc)
return 0; return 0;
/* Bring up the mapping table from device */ /* Bring up the mapping table from device */
ret = dev->ops->get_l2p_tbl(dev, 0, dev->total_secs, rrpc_l2p_update, ret = dev->ops->get_l2p_tbl(dev, slba, rrpc->nr_sects, rrpc_l2p_update,
rrpc); rrpc);
if (ret) { if (ret) {
pr_err("nvm: rrpc: could not read L2P table.\n"); pr_err("nvm: rrpc: could not read L2P table.\n");
...@@ -1086,7 +1075,6 @@ static int rrpc_map_init(struct rrpc *rrpc) ...@@ -1086,7 +1075,6 @@ static int rrpc_map_init(struct rrpc *rrpc)
return 0; return 0;
} }
/* Minimum pages needed within a lun */ /* Minimum pages needed within a lun */
#define PAGE_POOL_SIZE 16 #define PAGE_POOL_SIZE 16
#define ADDR_POOL_SIZE 64 #define ADDR_POOL_SIZE 64
...@@ -1141,6 +1129,23 @@ static void rrpc_core_free(struct rrpc *rrpc) ...@@ -1141,6 +1129,23 @@ static void rrpc_core_free(struct rrpc *rrpc)
static void rrpc_luns_free(struct rrpc *rrpc) static void rrpc_luns_free(struct rrpc *rrpc)
{ {
struct nvm_dev *dev = rrpc->dev;
struct nvm_lun *lun;
struct rrpc_lun *rlun;
int i;
if (!rrpc->luns)
return;
for (i = 0; i < rrpc->nr_luns; i++) {
rlun = &rrpc->luns[i];
lun = rlun->parent;
if (!lun)
break;
dev->mt->release_lun(dev, lun->id);
vfree(rlun->blocks);
}
kfree(rrpc->luns); kfree(rrpc->luns);
} }
...@@ -1148,7 +1153,7 @@ static int rrpc_luns_init(struct rrpc *rrpc, int lun_begin, int lun_end) ...@@ -1148,7 +1153,7 @@ static int rrpc_luns_init(struct rrpc *rrpc, int lun_begin, int lun_end)
{ {
struct nvm_dev *dev = rrpc->dev; struct nvm_dev *dev = rrpc->dev;
struct rrpc_lun *rlun; struct rrpc_lun *rlun;
int i, j; int i, j, ret = -EINVAL;
if (dev->sec_per_blk > MAX_INVALID_PAGES_STORAGE * BITS_PER_LONG) { if (dev->sec_per_blk > MAX_INVALID_PAGES_STORAGE * BITS_PER_LONG) {
pr_err("rrpc: number of pages per block too high."); pr_err("rrpc: number of pages per block too high.");
...@@ -1164,25 +1169,26 @@ static int rrpc_luns_init(struct rrpc *rrpc, int lun_begin, int lun_end) ...@@ -1164,25 +1169,26 @@ static int rrpc_luns_init(struct rrpc *rrpc, int lun_begin, int lun_end)
/* 1:1 mapping */ /* 1:1 mapping */
for (i = 0; i < rrpc->nr_luns; i++) { for (i = 0; i < rrpc->nr_luns; i++) {
struct nvm_lun *lun = dev->mt->get_lun(dev, lun_begin + i); int lunid = lun_begin + i;
struct nvm_lun *lun;
rlun = &rrpc->luns[i];
rlun->rrpc = rrpc;
rlun->parent = lun;
INIT_LIST_HEAD(&rlun->prio_list);
INIT_LIST_HEAD(&rlun->open_list);
INIT_LIST_HEAD(&rlun->closed_list);
INIT_WORK(&rlun->ws_gc, rrpc_lun_gc); if (dev->mt->reserve_lun(dev, lunid)) {
spin_lock_init(&rlun->lock); pr_err("rrpc: lun %u is already allocated\n", lunid);
goto err;
}
rrpc->total_blocks += dev->blks_per_lun; lun = dev->mt->get_lun(dev, lunid);
rrpc->nr_sects += dev->sec_per_lun; if (!lun)
goto err;
rlun = &rrpc->luns[i];
rlun->parent = lun;
rlun->blocks = vzalloc(sizeof(struct rrpc_block) * rlun->blocks = vzalloc(sizeof(struct rrpc_block) *
rrpc->dev->blks_per_lun); rrpc->dev->blks_per_lun);
if (!rlun->blocks) if (!rlun->blocks) {
ret = -ENOMEM;
goto err; goto err;
}
for (j = 0; j < rrpc->dev->blks_per_lun; j++) { for (j = 0; j < rrpc->dev->blks_per_lun; j++) {
struct rrpc_block *rblk = &rlun->blocks[j]; struct rrpc_block *rblk = &rlun->blocks[j];
...@@ -1193,11 +1199,43 @@ static int rrpc_luns_init(struct rrpc *rrpc, int lun_begin, int lun_end) ...@@ -1193,11 +1199,43 @@ static int rrpc_luns_init(struct rrpc *rrpc, int lun_begin, int lun_end)
INIT_LIST_HEAD(&rblk->prio); INIT_LIST_HEAD(&rblk->prio);
spin_lock_init(&rblk->lock); spin_lock_init(&rblk->lock);
} }
rlun->rrpc = rrpc;
INIT_LIST_HEAD(&rlun->prio_list);
INIT_LIST_HEAD(&rlun->open_list);
INIT_LIST_HEAD(&rlun->closed_list);
INIT_WORK(&rlun->ws_gc, rrpc_lun_gc);
spin_lock_init(&rlun->lock);
rrpc->total_blocks += dev->blks_per_lun;
rrpc->nr_sects += dev->sec_per_lun;
} }
return 0; return 0;
err: err:
return -ENOMEM; return ret;
}
/* returns 0 on success and stores the beginning address in *begin */
static int rrpc_area_init(struct rrpc *rrpc, sector_t *begin)
{
struct nvm_dev *dev = rrpc->dev;
struct nvmm_type *mt = dev->mt;
sector_t size = rrpc->nr_sects * dev->sec_size;
size >>= 9;
return mt->get_area(dev, begin, size);
}
static void rrpc_area_free(struct rrpc *rrpc)
{
struct nvm_dev *dev = rrpc->dev;
struct nvmm_type *mt = dev->mt;
mt->put_area(dev, rrpc->soffset);
} }
static void rrpc_free(struct rrpc *rrpc) static void rrpc_free(struct rrpc *rrpc)
...@@ -1206,6 +1244,7 @@ static void rrpc_free(struct rrpc *rrpc) ...@@ -1206,6 +1244,7 @@ static void rrpc_free(struct rrpc *rrpc)
rrpc_map_free(rrpc); rrpc_map_free(rrpc);
rrpc_core_free(rrpc); rrpc_core_free(rrpc);
rrpc_luns_free(rrpc); rrpc_luns_free(rrpc);
rrpc_area_free(rrpc);
kfree(rrpc); kfree(rrpc);
} }
...@@ -1327,6 +1366,7 @@ static void *rrpc_init(struct nvm_dev *dev, struct gendisk *tdisk, ...@@ -1327,6 +1366,7 @@ static void *rrpc_init(struct nvm_dev *dev, struct gendisk *tdisk,
struct request_queue *bqueue = dev->q; struct request_queue *bqueue = dev->q;
struct request_queue *tqueue = tdisk->queue; struct request_queue *tqueue = tdisk->queue;
struct rrpc *rrpc; struct rrpc *rrpc;
sector_t soffset;
int ret; int ret;
if (!(dev->identity.dom & NVM_RSP_L2P)) { if (!(dev->identity.dom & NVM_RSP_L2P)) {
...@@ -1352,6 +1392,13 @@ static void *rrpc_init(struct nvm_dev *dev, struct gendisk *tdisk, ...@@ -1352,6 +1392,13 @@ static void *rrpc_init(struct nvm_dev *dev, struct gendisk *tdisk,
/* simple round-robin strategy */ /* simple round-robin strategy */
atomic_set(&rrpc->next_lun, -1); atomic_set(&rrpc->next_lun, -1);
ret = rrpc_area_init(rrpc, &soffset);
if (ret < 0) {
pr_err("nvm: rrpc: could not initialize area\n");
return ERR_PTR(ret);
}
rrpc->soffset = soffset;
ret = rrpc_luns_init(rrpc, lun_begin, lun_end); ret = rrpc_luns_init(rrpc, lun_begin, lun_end);
if (ret) { if (ret) {
pr_err("nvm: rrpc: could not initialize luns\n"); pr_err("nvm: rrpc: could not initialize luns\n");
......
...@@ -97,6 +97,7 @@ struct rrpc { ...@@ -97,6 +97,7 @@ struct rrpc {
struct nvm_dev *dev; struct nvm_dev *dev;
struct gendisk *disk; struct gendisk *disk;
sector_t soffset; /* logical sector offset */
u64 poffset; /* physical page offset */ u64 poffset; /* physical page offset */
int lun_offset; int lun_offset;
......
...@@ -146,6 +146,14 @@ struct nvme_nvm_command { ...@@ -146,6 +146,14 @@ struct nvme_nvm_command {
}; };
}; };
struct nvme_nvm_completion {
__le64 result; /* Used by LightNVM to return ppa completions */
__le16 sq_head; /* how much of this queue may be reclaimed */
__le16 sq_id; /* submission queue that generated this entry */
__u16 command_id; /* of the command which completed */
__le16 status; /* did the command fail, and if so, why? */
};
#define NVME_NVM_LP_MLC_PAIRS 886 #define NVME_NVM_LP_MLC_PAIRS 886
struct nvme_nvm_lp_mlc { struct nvme_nvm_lp_mlc {
__u16 num_pairs; __u16 num_pairs;
...@@ -507,6 +515,10 @@ static inline void nvme_nvm_rqtocmd(struct request *rq, struct nvm_rq *rqd, ...@@ -507,6 +515,10 @@ static inline void nvme_nvm_rqtocmd(struct request *rq, struct nvm_rq *rqd,
static void nvme_nvm_end_io(struct request *rq, int error) static void nvme_nvm_end_io(struct request *rq, int error)
{ {
struct nvm_rq *rqd = rq->end_io_data; struct nvm_rq *rqd = rq->end_io_data;
struct nvme_nvm_completion *cqe = rq->special;
if (cqe)
rqd->ppa_status = le64_to_cpu(cqe->result);
nvm_end_io(rqd, error); nvm_end_io(rqd, error);
...@@ -526,7 +538,8 @@ static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd) ...@@ -526,7 +538,8 @@ static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
if (IS_ERR(rq)) if (IS_ERR(rq))
return -ENOMEM; return -ENOMEM;
cmd = kzalloc(sizeof(struct nvme_nvm_command), GFP_KERNEL); cmd = kzalloc(sizeof(struct nvme_nvm_command) +
sizeof(struct nvme_nvm_completion), GFP_KERNEL);
if (!cmd) { if (!cmd) {
blk_mq_free_request(rq); blk_mq_free_request(rq);
return -ENOMEM; return -ENOMEM;
...@@ -545,7 +558,7 @@ static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd) ...@@ -545,7 +558,7 @@ static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
rq->cmd = (unsigned char *)cmd; rq->cmd = (unsigned char *)cmd;
rq->cmd_len = sizeof(struct nvme_nvm_command); rq->cmd_len = sizeof(struct nvme_nvm_command);
rq->special = (void *)0; rq->special = cmd + 1;
rq->end_io_data = rqd; rq->end_io_data = rqd;
......
...@@ -723,6 +723,13 @@ static void nvme_complete_rq(struct request *req) ...@@ -723,6 +723,13 @@ static void nvme_complete_rq(struct request *req)
blk_mq_end_request(req, error); blk_mq_end_request(req, error);
} }
/* We read the CQE phase first to check if the rest of the entry is valid */
static inline bool nvme_cqe_valid(struct nvme_queue *nvmeq, u16 head,
u16 phase)
{
return (le16_to_cpu(nvmeq->cqes[head].status) & 1) == phase;
}
static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag) static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
{ {
u16 head, phase; u16 head, phase;
...@@ -730,13 +737,10 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag) ...@@ -730,13 +737,10 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
head = nvmeq->cq_head; head = nvmeq->cq_head;
phase = nvmeq->cq_phase; phase = nvmeq->cq_phase;
for (;;) { while (nvme_cqe_valid(nvmeq, head, phase)) {
struct nvme_completion cqe = nvmeq->cqes[head]; struct nvme_completion cqe = nvmeq->cqes[head];
u16 status = le16_to_cpu(cqe.status);
struct request *req; struct request *req;
if ((status & 1) != phase)
break;
if (++head == nvmeq->q_depth) { if (++head == nvmeq->q_depth) {
head = 0; head = 0;
phase = !phase; phase = !phase;
...@@ -767,7 +771,7 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag) ...@@ -767,7 +771,7 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
req = blk_mq_tag_to_rq(*nvmeq->tags, cqe.command_id); req = blk_mq_tag_to_rq(*nvmeq->tags, cqe.command_id);
if (req->cmd_type == REQ_TYPE_DRV_PRIV && req->special) if (req->cmd_type == REQ_TYPE_DRV_PRIV && req->special)
memcpy(req->special, &cqe, sizeof(cqe)); memcpy(req->special, &cqe, sizeof(cqe));
blk_mq_complete_request(req, status >> 1); blk_mq_complete_request(req, le16_to_cpu(cqe.status) >> 1);
} }
...@@ -808,18 +812,16 @@ static irqreturn_t nvme_irq(int irq, void *data) ...@@ -808,18 +812,16 @@ static irqreturn_t nvme_irq(int irq, void *data)
static irqreturn_t nvme_irq_check(int irq, void *data) static irqreturn_t nvme_irq_check(int irq, void *data)
{ {
struct nvme_queue *nvmeq = data; struct nvme_queue *nvmeq = data;
struct nvme_completion cqe = nvmeq->cqes[nvmeq->cq_head]; if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase))
if ((le16_to_cpu(cqe.status) & 1) != nvmeq->cq_phase) return IRQ_WAKE_THREAD;
return IRQ_NONE; return IRQ_NONE;
return IRQ_WAKE_THREAD;
} }
static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
{ {
struct nvme_queue *nvmeq = hctx->driver_data; struct nvme_queue *nvmeq = hctx->driver_data;
if ((le16_to_cpu(nvmeq->cqes[nvmeq->cq_head].status) & 1) == if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase)) {
nvmeq->cq_phase) {
spin_lock_irq(&nvmeq->q_lock); spin_lock_irq(&nvmeq->q_lock);
__nvme_process_cq(nvmeq, &tag); __nvme_process_cq(nvmeq, &tag);
spin_unlock_irq(&nvmeq->q_lock); spin_unlock_irq(&nvmeq->q_lock);
......
...@@ -281,13 +281,15 @@ locked_inode_to_wb_and_lock_list(struct inode *inode) ...@@ -281,13 +281,15 @@ locked_inode_to_wb_and_lock_list(struct inode *inode)
wb_get(wb); wb_get(wb);
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
spin_lock(&wb->list_lock); spin_lock(&wb->list_lock);
wb_put(wb); /* not gonna deref it anymore */
/* i_wb may have changed inbetween, can't use inode_to_wb() */ /* i_wb may have changed inbetween, can't use inode_to_wb() */
if (likely(wb == inode->i_wb)) if (likely(wb == inode->i_wb)) {
return wb; /* @inode already has ref */ wb_put(wb); /* @inode already has ref */
return wb;
}
spin_unlock(&wb->list_lock); spin_unlock(&wb->list_lock);
wb_put(wb);
cpu_relax(); cpu_relax();
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
} }
...@@ -1337,10 +1339,10 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) ...@@ -1337,10 +1339,10 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
* we go e.g. from filesystem. Flusher thread uses __writeback_single_inode() * we go e.g. from filesystem. Flusher thread uses __writeback_single_inode()
* and does more profound writeback list handling in writeback_sb_inodes(). * and does more profound writeback list handling in writeback_sb_inodes().
*/ */
static int static int writeback_single_inode(struct inode *inode,
writeback_single_inode(struct inode *inode, struct bdi_writeback *wb, struct writeback_control *wbc)
struct writeback_control *wbc)
{ {
struct bdi_writeback *wb;
int ret = 0; int ret = 0;
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
...@@ -1378,7 +1380,8 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb, ...@@ -1378,7 +1380,8 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
ret = __writeback_single_inode(inode, wbc); ret = __writeback_single_inode(inode, wbc);
wbc_detach_inode(wbc); wbc_detach_inode(wbc);
spin_lock(&wb->list_lock);
wb = inode_to_wb_and_lock_list(inode);
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
/* /*
* If inode is clean, remove it from writeback lists. Otherwise don't * If inode is clean, remove it from writeback lists. Otherwise don't
...@@ -1453,6 +1456,7 @@ static long writeback_sb_inodes(struct super_block *sb, ...@@ -1453,6 +1456,7 @@ static long writeback_sb_inodes(struct super_block *sb,
while (!list_empty(&wb->b_io)) { while (!list_empty(&wb->b_io)) {
struct inode *inode = wb_inode(wb->b_io.prev); struct inode *inode = wb_inode(wb->b_io.prev);
struct bdi_writeback *tmp_wb;
if (inode->i_sb != sb) { if (inode->i_sb != sb) {
if (work->sb) { if (work->sb) {
...@@ -1543,15 +1547,23 @@ static long writeback_sb_inodes(struct super_block *sb, ...@@ -1543,15 +1547,23 @@ static long writeback_sb_inodes(struct super_block *sb,
cond_resched(); cond_resched();
} }
/*
spin_lock(&wb->list_lock); * Requeue @inode if still dirty. Be careful as @inode may
* have been switched to another wb in the meantime.
*/
tmp_wb = inode_to_wb_and_lock_list(inode);
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
if (!(inode->i_state & I_DIRTY_ALL)) if (!(inode->i_state & I_DIRTY_ALL))
wrote++; wrote++;
requeue_inode(inode, wb, &wbc); requeue_inode(inode, tmp_wb, &wbc);
inode_sync_complete(inode); inode_sync_complete(inode);
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
if (unlikely(tmp_wb != wb)) {
spin_unlock(&tmp_wb->list_lock);
spin_lock(&wb->list_lock);
}
/* /*
* bail out to wb_writeback() often enough to check * bail out to wb_writeback() often enough to check
* background threshold and other termination conditions. * background threshold and other termination conditions.
...@@ -2338,7 +2350,6 @@ EXPORT_SYMBOL(sync_inodes_sb); ...@@ -2338,7 +2350,6 @@ EXPORT_SYMBOL(sync_inodes_sb);
*/ */
int write_inode_now(struct inode *inode, int sync) int write_inode_now(struct inode *inode, int sync)
{ {
struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
struct writeback_control wbc = { struct writeback_control wbc = {
.nr_to_write = LONG_MAX, .nr_to_write = LONG_MAX,
.sync_mode = sync ? WB_SYNC_ALL : WB_SYNC_NONE, .sync_mode = sync ? WB_SYNC_ALL : WB_SYNC_NONE,
...@@ -2350,7 +2361,7 @@ int write_inode_now(struct inode *inode, int sync) ...@@ -2350,7 +2361,7 @@ int write_inode_now(struct inode *inode, int sync)
wbc.nr_to_write = 0; wbc.nr_to_write = 0;
might_sleep(); might_sleep();
return writeback_single_inode(inode, wb, &wbc); return writeback_single_inode(inode, &wbc);
} }
EXPORT_SYMBOL(write_inode_now); EXPORT_SYMBOL(write_inode_now);
...@@ -2367,7 +2378,7 @@ EXPORT_SYMBOL(write_inode_now); ...@@ -2367,7 +2378,7 @@ EXPORT_SYMBOL(write_inode_now);
*/ */
int sync_inode(struct inode *inode, struct writeback_control *wbc) int sync_inode(struct inode *inode, struct writeback_control *wbc)
{ {
return writeback_single_inode(inode, &inode_to_bdi(inode)->wb, wbc); return writeback_single_inode(inode, wbc);
} }
EXPORT_SYMBOL(sync_inode); EXPORT_SYMBOL(sync_inode);
......
...@@ -263,22 +263,8 @@ static inline void *blk_mq_rq_to_pdu(struct request *rq) ...@@ -263,22 +263,8 @@ static inline void *blk_mq_rq_to_pdu(struct request *rq)
for ((i) = 0; (i) < (q)->nr_hw_queues && \ for ((i) = 0; (i) < (q)->nr_hw_queues && \
({ hctx = (q)->queue_hw_ctx[i]; 1; }); (i)++) ({ hctx = (q)->queue_hw_ctx[i]; 1; }); (i)++)
#define queue_for_each_ctx(q, ctx, i) \
for ((i) = 0; (i) < (q)->nr_queues && \
({ ctx = per_cpu_ptr((q)->queue_ctx, (i)); 1; }); (i)++)
#define hctx_for_each_ctx(hctx, ctx, i) \ #define hctx_for_each_ctx(hctx, ctx, i) \
for ((i) = 0; (i) < (hctx)->nr_ctx && \ for ((i) = 0; (i) < (hctx)->nr_ctx && \
({ ctx = (hctx)->ctxs[(i)]; 1; }); (i)++) ({ ctx = (hctx)->ctxs[(i)]; 1; }); (i)++)
#define blk_ctx_sum(q, sum) \
({ \
struct blk_mq_ctx *__x; \
unsigned int __ret = 0, __i; \
\
queue_for_each_ctx((q), __x, __i) \
__ret += sum; \
__ret; \
})
#endif #endif
...@@ -242,6 +242,7 @@ struct nvm_rq { ...@@ -242,6 +242,7 @@ struct nvm_rq {
uint16_t nr_pages; uint16_t nr_pages;
uint16_t flags; uint16_t flags;
u64 ppa_status; /* ppa media status */
int error; int error;
}; };
...@@ -346,6 +347,7 @@ struct nvm_dev { ...@@ -346,6 +347,7 @@ struct nvm_dev {
int nr_luns; int nr_luns;
unsigned max_pages_per_blk; unsigned max_pages_per_blk;
unsigned long *lun_map;
void *ppalist_pool; void *ppalist_pool;
struct nvm_id identity; struct nvm_id identity;
...@@ -355,6 +357,7 @@ struct nvm_dev { ...@@ -355,6 +357,7 @@ struct nvm_dev {
char name[DISK_NAME_LEN]; char name[DISK_NAME_LEN];
struct mutex mlock; struct mutex mlock;
spinlock_t lock;
}; };
static inline struct ppa_addr generic_to_dev_addr(struct nvm_dev *dev, static inline struct ppa_addr generic_to_dev_addr(struct nvm_dev *dev,
...@@ -465,8 +468,13 @@ typedef int (nvmm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *); ...@@ -465,8 +468,13 @@ typedef int (nvmm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *);
typedef int (nvmm_erase_blk_fn)(struct nvm_dev *, struct nvm_block *, typedef int (nvmm_erase_blk_fn)(struct nvm_dev *, struct nvm_block *,
unsigned long); unsigned long);
typedef struct nvm_lun *(nvmm_get_lun_fn)(struct nvm_dev *, int); typedef struct nvm_lun *(nvmm_get_lun_fn)(struct nvm_dev *, int);
typedef int (nvmm_reserve_lun)(struct nvm_dev *, int);
typedef void (nvmm_release_lun)(struct nvm_dev *, int);
typedef void (nvmm_lun_info_print_fn)(struct nvm_dev *); typedef void (nvmm_lun_info_print_fn)(struct nvm_dev *);
typedef int (nvmm_get_area_fn)(struct nvm_dev *, sector_t *, sector_t);
typedef void (nvmm_put_area_fn)(struct nvm_dev *, sector_t);
struct nvmm_type { struct nvmm_type {
const char *name; const char *name;
unsigned int version[3]; unsigned int version[3];
...@@ -488,9 +496,15 @@ struct nvmm_type { ...@@ -488,9 +496,15 @@ struct nvmm_type {
/* Configuration management */ /* Configuration management */
nvmm_get_lun_fn *get_lun; nvmm_get_lun_fn *get_lun;
nvmm_reserve_lun *reserve_lun;
nvmm_release_lun *release_lun;
/* Statistics */ /* Statistics */
nvmm_lun_info_print_fn *lun_info_print; nvmm_lun_info_print_fn *lun_info_print;
nvmm_get_area_fn *get_area;
nvmm_put_area_fn *put_area;
struct list_head list; struct list_head list;
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment