Commit d624f371 authored by Javier González's avatar Javier González Committed by Jens Axboe

lightnvm: pblk: generalize erase path

Erase I/Os are scheduled with the following goals in mind: (i) minimize
LUNs collisions with write I/Os, and (ii) even out the price of erasing
on every write, instead of putting all the burden on when garbage
collection runs. This works well on the current design, but is specific
to the default mapping algorithm.

This patch generalizes the erase path so that other mapping algorithms
can select an arbitrary line to be erased instead. It also gets rid of
the erase semaphore since it creates jittering for user writes.
Signed-off-by: default avatarJavier González <javier@cnexlabs.com>
Signed-off-by: default avatarMatias Bjørling <matias@cnexlabs.com>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent c2e9f5d4
...@@ -61,7 +61,6 @@ static void pblk_end_io_erase(struct nvm_rq *rqd) ...@@ -61,7 +61,6 @@ static void pblk_end_io_erase(struct nvm_rq *rqd)
{ {
struct pblk *pblk = rqd->private; struct pblk *pblk = rqd->private;
up(&pblk->erase_sem);
__pblk_end_io_erase(pblk, rqd); __pblk_end_io_erase(pblk, rqd);
mempool_free(rqd, pblk->r_rq_pool); mempool_free(rqd, pblk->r_rq_pool);
} }
...@@ -1373,7 +1372,8 @@ struct pblk_line *pblk_line_get_data(struct pblk *pblk) ...@@ -1373,7 +1372,8 @@ struct pblk_line *pblk_line_get_data(struct pblk *pblk)
return pblk->l_mg.data_line; return pblk->l_mg.data_line;
} }
struct pblk_line *pblk_line_get_data_next(struct pblk *pblk) /* For now, always erase next line */
struct pblk_line *pblk_line_get_erase(struct pblk *pblk)
{ {
return pblk->l_mg.data_next; return pblk->l_mg.data_next;
} }
......
...@@ -545,7 +545,7 @@ static int pblk_lines_init(struct pblk *pblk) ...@@ -545,7 +545,7 @@ static int pblk_lines_init(struct pblk *pblk)
struct pblk_line_meta *lm = &pblk->lm; struct pblk_line_meta *lm = &pblk->lm;
struct pblk_line *line; struct pblk_line *line;
unsigned int smeta_len, emeta_len; unsigned int smeta_len, emeta_len;
long nr_bad_blks, nr_meta_blks, nr_free_blks; long nr_bad_blks, nr_free_blks;
int bb_distance; int bb_distance;
int i; int i;
int ret; int ret;
...@@ -591,9 +591,8 @@ static int pblk_lines_init(struct pblk *pblk) ...@@ -591,9 +591,8 @@ static int pblk_lines_init(struct pblk *pblk)
} }
lm->emeta_bb = geo->nr_luns - i; lm->emeta_bb = geo->nr_luns - i;
nr_meta_blks = (lm->smeta_sec + lm->emeta_sec + lm->min_blk_line = 1 + DIV_ROUND_UP(lm->smeta_sec + lm->emeta_sec,
(geo->sec_per_blk / 2)) / geo->sec_per_blk; geo->sec_per_blk);
lm->min_blk_line = nr_meta_blks + 1;
l_mg->nr_lines = geo->blks_per_lun; l_mg->nr_lines = geo->blks_per_lun;
l_mg->log_line = l_mg->data_line = NULL; l_mg->log_line = l_mg->data_line = NULL;
...@@ -716,8 +715,6 @@ static int pblk_lines_init(struct pblk *pblk) ...@@ -716,8 +715,6 @@ static int pblk_lines_init(struct pblk *pblk)
pblk_set_provision(pblk, nr_free_blks); pblk_set_provision(pblk, nr_free_blks);
sema_init(&pblk->erase_sem, 1);
/* Cleanup per-LUN bad block lists - managed within lines on run-time */ /* Cleanup per-LUN bad block lists - managed within lines on run-time */
for (i = 0; i < geo->nr_luns; i++) for (i = 0; i < geo->nr_luns; i++)
kfree(pblk->luns[i].bb_list); kfree(pblk->luns[i].bb_list);
......
...@@ -92,8 +92,9 @@ void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd, ...@@ -92,8 +92,9 @@ void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
{ {
struct nvm_tgt_dev *dev = pblk->dev; struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo; struct nvm_geo *geo = &dev->geo;
struct pblk_line *e_line = pblk_line_get_data_next(pblk); struct pblk_line_meta *lm = &pblk->lm;
struct pblk_sec_meta *meta_list = rqd->meta_list; struct pblk_sec_meta *meta_list = rqd->meta_list;
struct pblk_line *e_line, *d_line;
unsigned int map_secs; unsigned int map_secs;
int min = pblk->min_write_pgs; int min = pblk->min_write_pgs;
int i, erase_lun; int i, erase_lun;
...@@ -106,32 +107,49 @@ void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd, ...@@ -106,32 +107,49 @@ void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
erase_lun = rqd->ppa_list[i].g.lun * geo->nr_chnls + erase_lun = rqd->ppa_list[i].g.lun * geo->nr_chnls +
rqd->ppa_list[i].g.ch; rqd->ppa_list[i].g.ch;
/* line can change after page map */
e_line = pblk_line_get_erase(pblk);
spin_lock(&e_line->lock);
if (!test_bit(erase_lun, e_line->erase_bitmap)) { if (!test_bit(erase_lun, e_line->erase_bitmap)) {
if (down_trylock(&pblk->erase_sem))
continue;
set_bit(erase_lun, e_line->erase_bitmap); set_bit(erase_lun, e_line->erase_bitmap);
atomic_dec(&e_line->left_eblks); atomic_dec(&e_line->left_eblks);
*erase_ppa = rqd->ppa_list[i]; *erase_ppa = rqd->ppa_list[i];
erase_ppa->g.blk = e_line->id; erase_ppa->g.blk = e_line->id;
spin_unlock(&e_line->lock);
/* Avoid evaluating e_line->left_eblks */ /* Avoid evaluating e_line->left_eblks */
return pblk_map_rq(pblk, rqd, sentry, lun_bitmap, return pblk_map_rq(pblk, rqd, sentry, lun_bitmap,
valid_secs, i + min); valid_secs, i + min);
} }
spin_unlock(&e_line->lock);
} }
/* Erase blocks that are bad in this line but might not be in next */ e_line = pblk_line_get_erase(pblk);
if (unlikely(ppa_empty(*erase_ppa))) { d_line = pblk_line_get_data(pblk);
struct pblk_line_meta *lm = &pblk->lm;
i = find_first_zero_bit(e_line->erase_bitmap, lm->blk_per_line); /* Erase blocks that are bad in this line but might not be in next */
if (i == lm->blk_per_line) if (unlikely(ppa_empty(*erase_ppa)) &&
bitmap_weight(d_line->blk_bitmap, lm->blk_per_line)) {
int bit = -1;
retry:
bit = find_next_bit(d_line->blk_bitmap,
lm->blk_per_line, bit + 1);
if (bit >= lm->blk_per_line)
return; return;
set_bit(i, e_line->erase_bitmap); spin_lock(&e_line->lock);
if (test_bit(bit, e_line->erase_bitmap)) {
spin_unlock(&e_line->lock);
goto retry;
}
spin_unlock(&e_line->lock);
set_bit(bit, e_line->erase_bitmap);
atomic_dec(&e_line->left_eblks); atomic_dec(&e_line->left_eblks);
*erase_ppa = pblk->luns[i].bppa; /* set ch and lun */ *erase_ppa = pblk->luns[bit].bppa; /* set ch and lun */
erase_ppa->g.blk = e_line->id; erase_ppa->g.blk = e_line->id;
} }
} }
...@@ -521,20 +521,19 @@ unsigned int pblk_rb_read_to_bio_list(struct pblk_rb *rb, struct bio *bio, ...@@ -521,20 +521,19 @@ unsigned int pblk_rb_read_to_bio_list(struct pblk_rb *rb, struct bio *bio,
* This function is used by the write thread to form the write bio that will * This function is used by the write thread to form the write bio that will
* persist data on the write buffer to the media. * persist data on the write buffer to the media.
*/ */
unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct bio *bio, unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd,
struct pblk_c_ctx *c_ctx, struct bio *bio, unsigned int pos,
unsigned int pos, unsigned int nr_entries, unsigned int count)
unsigned int nr_entries,
unsigned int count)
{ {
struct pblk *pblk = container_of(rb, struct pblk, rwb); struct pblk *pblk = container_of(rb, struct pblk, rwb);
struct request_queue *q = pblk->dev->q;
struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
struct pblk_rb_entry *entry; struct pblk_rb_entry *entry;
struct page *page; struct page *page;
unsigned int pad = 0, read = 0, to_read = nr_entries; unsigned int pad = 0, to_read = nr_entries;
unsigned int user_io = 0, gc_io = 0; unsigned int user_io = 0, gc_io = 0;
unsigned int i; unsigned int i;
int flags; int flags;
int ret;
if (count < nr_entries) { if (count < nr_entries) {
pad = nr_entries - count; pad = nr_entries - count;
...@@ -570,17 +569,17 @@ unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct bio *bio, ...@@ -570,17 +569,17 @@ unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct bio *bio,
flags |= PBLK_SUBMITTED_ENTRY; flags |= PBLK_SUBMITTED_ENTRY;
/* Release flags on context. Protect from writes */ /* Release flags on context. Protect from writes */
smp_store_release(&entry->w_ctx.flags, flags); smp_store_release(&entry->w_ctx.flags, flags);
goto out; return NVM_IO_ERR;
} }
ret = bio_add_page(bio, page, rb->seg_size, 0); if (bio_add_pc_page(q, bio, page, rb->seg_size, 0) !=
if (ret != rb->seg_size) { rb->seg_size) {
pr_err("pblk: could not add page to write bio\n"); pr_err("pblk: could not add page to write bio\n");
flags &= ~PBLK_WRITTEN_DATA; flags &= ~PBLK_WRITTEN_DATA;
flags |= PBLK_SUBMITTED_ENTRY; flags |= PBLK_SUBMITTED_ENTRY;
/* Release flags on context. Protect from writes */ /* Release flags on context. Protect from writes */
smp_store_release(&entry->w_ctx.flags, flags); smp_store_release(&entry->w_ctx.flags, flags);
goto out; return NVM_IO_ERR;
} }
if (flags & PBLK_FLUSH_ENTRY) { if (flags & PBLK_FLUSH_ENTRY) {
...@@ -607,14 +606,20 @@ unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct bio *bio, ...@@ -607,14 +606,20 @@ unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct bio *bio,
pos = (pos + 1) & (rb->nr_entries - 1); pos = (pos + 1) & (rb->nr_entries - 1);
} }
read = to_read; if (pad) {
if (pblk_bio_add_pages(pblk, bio, GFP_KERNEL, pad)) {
pr_err("pblk: could not pad page in write bio\n");
return NVM_IO_ERR;
}
}
pblk_rl_out(&pblk->rl, user_io, gc_io); pblk_rl_out(&pblk->rl, user_io, gc_io);
#ifdef CONFIG_NVM_DEBUG #ifdef CONFIG_NVM_DEBUG
atomic_long_add(pad, &((struct pblk *) atomic_long_add(pad, &((struct pblk *)
(container_of(rb, struct pblk, rwb)))->padded_writes); (container_of(rb, struct pblk, rwb)))->padded_writes);
#endif #endif
out:
return read; return NVM_IO_OK;
} }
/* /*
......
...@@ -219,11 +219,10 @@ static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd, ...@@ -219,11 +219,10 @@ static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
} }
static int pblk_setup_w_rq(struct pblk *pblk, struct nvm_rq *rqd, static int pblk_setup_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
struct pblk_c_ctx *c_ctx) struct pblk_c_ctx *c_ctx, struct ppa_addr *erase_ppa)
{ {
struct pblk_line_meta *lm = &pblk->lm; struct pblk_line_meta *lm = &pblk->lm;
struct pblk_line *e_line = pblk_line_get_data_next(pblk); struct pblk_line *e_line = pblk_line_get_erase(pblk);
struct ppa_addr erase_ppa;
unsigned int valid = c_ctx->nr_valid; unsigned int valid = c_ctx->nr_valid;
unsigned int padded = c_ctx->nr_padded; unsigned int padded = c_ctx->nr_padded;
unsigned int nr_secs = valid + padded; unsigned int nr_secs = valid + padded;
...@@ -231,40 +230,23 @@ static int pblk_setup_w_rq(struct pblk *pblk, struct nvm_rq *rqd, ...@@ -231,40 +230,23 @@ static int pblk_setup_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
int ret = 0; int ret = 0;
lun_bitmap = kzalloc(lm->lun_bitmap_len, GFP_KERNEL); lun_bitmap = kzalloc(lm->lun_bitmap_len, GFP_KERNEL);
if (!lun_bitmap) { if (!lun_bitmap)
ret = -ENOMEM; return -ENOMEM;
goto out;
}
c_ctx->lun_bitmap = lun_bitmap; c_ctx->lun_bitmap = lun_bitmap;
ret = pblk_alloc_w_rq(pblk, rqd, nr_secs); ret = pblk_alloc_w_rq(pblk, rqd, nr_secs);
if (ret) { if (ret) {
kfree(lun_bitmap); kfree(lun_bitmap);
goto out; return ret;
} }
ppa_set_empty(&erase_ppa); if (likely(!atomic_read(&e_line->left_eblks) || !e_line))
if (likely(!e_line || !atomic_read(&e_line->left_eblks)))
pblk_map_rq(pblk, rqd, c_ctx->sentry, lun_bitmap, valid, 0); pblk_map_rq(pblk, rqd, c_ctx->sentry, lun_bitmap, valid, 0);
else else
pblk_map_erase_rq(pblk, rqd, c_ctx->sentry, lun_bitmap, pblk_map_erase_rq(pblk, rqd, c_ctx->sentry, lun_bitmap,
valid, &erase_ppa); valid, erase_ppa);
out: return 0;
if (unlikely(e_line && !ppa_empty(erase_ppa))) {
if (pblk_blk_erase_async(pblk, erase_ppa)) {
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
int bit;
atomic_inc(&e_line->left_eblks);
bit = erase_ppa.g.lun * geo->nr_chnls + erase_ppa.g.ch;
WARN_ON(!test_and_clear_bit(bit, e_line->erase_bitmap));
up(&pblk->erase_sem);
}
}
return ret;
} }
int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd, int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd,
...@@ -311,16 +293,60 @@ static int pblk_calc_secs_to_sync(struct pblk *pblk, unsigned int secs_avail, ...@@ -311,16 +293,60 @@ static int pblk_calc_secs_to_sync(struct pblk *pblk, unsigned int secs_avail,
return secs_to_sync; return secs_to_sync;
} }
static int pblk_submit_io_set(struct pblk *pblk, struct nvm_rq *rqd)
{
struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
struct ppa_addr erase_ppa;
int err;
ppa_set_empty(&erase_ppa);
/* Assign lbas to ppas and populate request structure */
err = pblk_setup_w_rq(pblk, rqd, c_ctx, &erase_ppa);
if (err) {
pr_err("pblk: could not setup write request: %d\n", err);
return NVM_IO_ERR;
}
/* Submit write for current data line */
err = pblk_submit_io(pblk, rqd);
if (err) {
pr_err("pblk: I/O submission failed: %d\n", err);
return NVM_IO_ERR;
}
/* Submit available erase for next data line */
if (unlikely(!ppa_empty(erase_ppa)) &&
pblk_blk_erase_async(pblk, erase_ppa)) {
struct pblk_line *e_line = pblk_line_get_erase(pblk);
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
int bit;
atomic_inc(&e_line->left_eblks);
bit = erase_ppa.g.lun * geo->nr_chnls + erase_ppa.g.ch;
WARN_ON(!test_and_clear_bit(bit, e_line->erase_bitmap));
}
return NVM_IO_OK;
}
static void pblk_free_write_rqd(struct pblk *pblk, struct nvm_rq *rqd)
{
struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
struct bio *bio = rqd->bio;
if (c_ctx->nr_padded)
pblk_bio_free_pages(pblk, bio, rqd->nr_ppas, c_ctx->nr_padded);
}
static int pblk_submit_write(struct pblk *pblk) static int pblk_submit_write(struct pblk *pblk)
{ {
struct bio *bio; struct bio *bio;
struct nvm_rq *rqd; struct nvm_rq *rqd;
struct pblk_c_ctx *c_ctx;
unsigned int pgs_read;
unsigned int secs_avail, secs_to_sync, secs_to_com; unsigned int secs_avail, secs_to_sync, secs_to_com;
unsigned int secs_to_flush; unsigned int secs_to_flush;
unsigned long pos; unsigned long pos;
int err;
/* If there are no sectors in the cache, flushes (bios without data) /* If there are no sectors in the cache, flushes (bios without data)
* will be cleared on the cache threads * will be cleared on the cache threads
...@@ -338,7 +364,6 @@ static int pblk_submit_write(struct pblk *pblk) ...@@ -338,7 +364,6 @@ static int pblk_submit_write(struct pblk *pblk)
pr_err("pblk: cannot allocate write req.\n"); pr_err("pblk: cannot allocate write req.\n");
return 1; return 1;
} }
c_ctx = nvm_rq_to_pdu(rqd);
bio = bio_alloc(GFP_KERNEL, pblk->max_write_pgs); bio = bio_alloc(GFP_KERNEL, pblk->max_write_pgs);
if (!bio) { if (!bio) {
...@@ -358,29 +383,14 @@ static int pblk_submit_write(struct pblk *pblk) ...@@ -358,29 +383,14 @@ static int pblk_submit_write(struct pblk *pblk)
secs_to_com = (secs_to_sync > secs_avail) ? secs_avail : secs_to_sync; secs_to_com = (secs_to_sync > secs_avail) ? secs_avail : secs_to_sync;
pos = pblk_rb_read_commit(&pblk->rwb, secs_to_com); pos = pblk_rb_read_commit(&pblk->rwb, secs_to_com);
pgs_read = pblk_rb_read_to_bio(&pblk->rwb, bio, c_ctx, pos, if (pblk_rb_read_to_bio(&pblk->rwb, rqd, bio, pos, secs_to_sync,
secs_to_sync, secs_avail); secs_avail)) {
if (!pgs_read) {
pr_err("pblk: corrupted write bio\n"); pr_err("pblk: corrupted write bio\n");
goto fail_put_bio; goto fail_put_bio;
} }
if (c_ctx->nr_padded) if (pblk_submit_io_set(pblk, rqd))
if (pblk_bio_add_pages(pblk, bio, GFP_KERNEL, c_ctx->nr_padded))
goto fail_put_bio;
/* Assign lbas to ppas and populate request structure */
err = pblk_setup_w_rq(pblk, rqd, c_ctx);
if (err) {
pr_err("pblk: could not setup write request\n");
goto fail_free_bio; goto fail_free_bio;
}
err = pblk_submit_io(pblk, rqd);
if (err) {
pr_err("pblk: I/O submission failed: %d\n", err);
goto fail_free_bio;
}
#ifdef CONFIG_NVM_DEBUG #ifdef CONFIG_NVM_DEBUG
atomic_long_add(secs_to_sync, &pblk->sub_writes); atomic_long_add(secs_to_sync, &pblk->sub_writes);
...@@ -389,8 +399,7 @@ static int pblk_submit_write(struct pblk *pblk) ...@@ -389,8 +399,7 @@ static int pblk_submit_write(struct pblk *pblk)
return 0; return 0;
fail_free_bio: fail_free_bio:
if (c_ctx->nr_padded) pblk_free_write_rqd(pblk, rqd);
pblk_bio_free_pages(pblk, bio, secs_to_sync, c_ctx->nr_padded);
fail_put_bio: fail_put_bio:
bio_put(bio); bio_put(bio);
fail_free_rqd: fail_free_rqd:
......
...@@ -500,7 +500,6 @@ struct pblk { ...@@ -500,7 +500,6 @@ struct pblk {
struct pblk_rl rl; struct pblk_rl rl;
int sec_per_write; int sec_per_write;
struct semaphore erase_sem;
unsigned char instance_uuid[16]; unsigned char instance_uuid[16];
#ifdef CONFIG_NVM_DEBUG #ifdef CONFIG_NVM_DEBUG
...@@ -583,11 +582,9 @@ void pblk_rb_write_entry_gc(struct pblk_rb *rb, void *data, ...@@ -583,11 +582,9 @@ void pblk_rb_write_entry_gc(struct pblk_rb *rb, void *data,
struct pblk_w_ctx *pblk_rb_w_ctx(struct pblk_rb *rb, unsigned int pos); struct pblk_w_ctx *pblk_rb_w_ctx(struct pblk_rb *rb, unsigned int pos);
void pblk_rb_sync_l2p(struct pblk_rb *rb); void pblk_rb_sync_l2p(struct pblk_rb *rb);
unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct bio *bio, unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd,
struct pblk_c_ctx *c_ctx, struct bio *bio, unsigned int pos,
unsigned int pos, unsigned int nr_entries, unsigned int count);
unsigned int nr_entries,
unsigned int count);
unsigned int pblk_rb_read_to_bio_list(struct pblk_rb *rb, struct bio *bio, unsigned int pblk_rb_read_to_bio_list(struct pblk_rb *rb, struct bio *bio,
struct list_head *list, struct list_head *list,
unsigned int max); unsigned int max);
...@@ -633,7 +630,7 @@ struct pblk_line *pblk_line_replace_data(struct pblk *pblk); ...@@ -633,7 +630,7 @@ struct pblk_line *pblk_line_replace_data(struct pblk *pblk);
int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line); int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line);
void pblk_line_recov_close(struct pblk *pblk, struct pblk_line *line); void pblk_line_recov_close(struct pblk *pblk, struct pblk_line *line);
struct pblk_line *pblk_line_get_data(struct pblk *pblk); struct pblk_line *pblk_line_get_data(struct pblk *pblk);
struct pblk_line *pblk_line_get_data_next(struct pblk *pblk); struct pblk_line *pblk_line_get_erase(struct pblk *pblk);
int pblk_line_erase(struct pblk *pblk, struct pblk_line *line); int pblk_line_erase(struct pblk *pblk, struct pblk_line *line);
int pblk_line_is_full(struct pblk_line *line); int pblk_line_is_full(struct pblk_line *line);
void pblk_line_free(struct pblk *pblk, struct pblk_line *line); void pblk_line_free(struct pblk *pblk, struct pblk_line *line);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment