Commit a7689938 authored by Javier González's avatar Javier González Committed by Jens Axboe

lightnvm: pblk: use exact free block counter in RL

Until now, pblk's rate-limiter has used a heuristic to reserve space for
GC I/O given that the over-provision area was fixed.

In preparation for allowing to define the over-provision area on target
creation, define a dedicated free_block counter in the rate-limiter to
track the number of blocks being used for user data.
Signed-off-by: default avatarJavier González <javier@cnexlabs.com>
Signed-off-by: default avatarHans Holmberg <hans.holmberg@cnexlabs.com>
Signed-off-by: default avatarMatias Bjørling <m@bjorling.me>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent aed49e19
...@@ -1145,7 +1145,7 @@ int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line) ...@@ -1145,7 +1145,7 @@ int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line)
} }
spin_unlock(&l_mg->free_lock); spin_unlock(&l_mg->free_lock);
pblk_rl_free_lines_dec(&pblk->rl, line); pblk_rl_free_lines_dec(&pblk->rl, line, true);
if (!pblk_line_init_bb(pblk, line, 0)) { if (!pblk_line_init_bb(pblk, line, 0)) {
list_add(&line->list, &l_mg->free_list); list_add(&line->list, &l_mg->free_list);
...@@ -1233,7 +1233,7 @@ static struct pblk_line *pblk_line_retry(struct pblk *pblk, ...@@ -1233,7 +1233,7 @@ static struct pblk_line *pblk_line_retry(struct pblk *pblk,
l_mg->data_line = retry_line; l_mg->data_line = retry_line;
spin_unlock(&l_mg->free_lock); spin_unlock(&l_mg->free_lock);
pblk_rl_free_lines_dec(&pblk->rl, retry_line); pblk_rl_free_lines_dec(&pblk->rl, line, false);
if (pblk_line_erase(pblk, retry_line)) if (pblk_line_erase(pblk, retry_line))
goto retry; goto retry;
...@@ -1252,7 +1252,6 @@ struct pblk_line *pblk_line_get_first_data(struct pblk *pblk) ...@@ -1252,7 +1252,6 @@ struct pblk_line *pblk_line_get_first_data(struct pblk *pblk)
{ {
struct pblk_line_mgmt *l_mg = &pblk->l_mg; struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line *line; struct pblk_line *line;
int is_next = 0;
spin_lock(&l_mg->free_lock); spin_lock(&l_mg->free_lock);
line = pblk_line_get(pblk); line = pblk_line_get(pblk);
...@@ -1280,7 +1279,6 @@ struct pblk_line *pblk_line_get_first_data(struct pblk *pblk) ...@@ -1280,7 +1279,6 @@ struct pblk_line *pblk_line_get_first_data(struct pblk *pblk)
} else { } else {
l_mg->data_next->seq_nr = l_mg->d_seq_nr++; l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
l_mg->data_next->type = PBLK_LINETYPE_DATA; l_mg->data_next->type = PBLK_LINETYPE_DATA;
is_next = 1;
} }
spin_unlock(&l_mg->free_lock); spin_unlock(&l_mg->free_lock);
...@@ -1290,10 +1288,6 @@ struct pblk_line *pblk_line_get_first_data(struct pblk *pblk) ...@@ -1290,10 +1288,6 @@ struct pblk_line *pblk_line_get_first_data(struct pblk *pblk)
return NULL; return NULL;
} }
pblk_rl_free_lines_dec(&pblk->rl, line);
if (is_next)
pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next);
retry_setup: retry_setup:
if (!pblk_line_init_metadata(pblk, line, NULL)) { if (!pblk_line_init_metadata(pblk, line, NULL)) {
line = pblk_line_retry(pblk, line); line = pblk_line_retry(pblk, line);
...@@ -1311,6 +1305,8 @@ struct pblk_line *pblk_line_get_first_data(struct pblk *pblk) ...@@ -1311,6 +1305,8 @@ struct pblk_line *pblk_line_get_first_data(struct pblk *pblk)
goto retry_setup; goto retry_setup;
} }
pblk_rl_free_lines_dec(&pblk->rl, line, true);
return line; return line;
} }
...@@ -1395,7 +1391,6 @@ struct pblk_line *pblk_line_replace_data(struct pblk *pblk) ...@@ -1395,7 +1391,6 @@ struct pblk_line *pblk_line_replace_data(struct pblk *pblk)
struct pblk_line_mgmt *l_mg = &pblk->l_mg; struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line *cur, *new = NULL; struct pblk_line *cur, *new = NULL;
unsigned int left_seblks; unsigned int left_seblks;
int is_next = 0;
cur = l_mg->data_line; cur = l_mg->data_line;
new = l_mg->data_next; new = l_mg->data_next;
...@@ -1444,6 +1439,8 @@ struct pblk_line *pblk_line_replace_data(struct pblk *pblk) ...@@ -1444,6 +1439,8 @@ struct pblk_line *pblk_line_replace_data(struct pblk *pblk)
goto retry_setup; goto retry_setup;
} }
pblk_rl_free_lines_dec(&pblk->rl, new, true);
/* Allocate next line for preparation */ /* Allocate next line for preparation */
spin_lock(&l_mg->free_lock); spin_lock(&l_mg->free_lock);
l_mg->data_next = pblk_line_get(pblk); l_mg->data_next = pblk_line_get(pblk);
...@@ -1457,13 +1454,9 @@ struct pblk_line *pblk_line_replace_data(struct pblk *pblk) ...@@ -1457,13 +1454,9 @@ struct pblk_line *pblk_line_replace_data(struct pblk *pblk)
} else { } else {
l_mg->data_next->seq_nr = l_mg->d_seq_nr++; l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
l_mg->data_next->type = PBLK_LINETYPE_DATA; l_mg->data_next->type = PBLK_LINETYPE_DATA;
is_next = 1;
} }
spin_unlock(&l_mg->free_lock); spin_unlock(&l_mg->free_lock);
if (is_next)
pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next);
out: out:
return new; return new;
} }
......
...@@ -579,22 +579,34 @@ static unsigned int calc_emeta_len(struct pblk *pblk) ...@@ -579,22 +579,34 @@ static unsigned int calc_emeta_len(struct pblk *pblk)
static void pblk_set_provision(struct pblk *pblk, long nr_free_blks) static void pblk_set_provision(struct pblk *pblk, long nr_free_blks)
{ {
struct nvm_tgt_dev *dev = pblk->dev; struct nvm_tgt_dev *dev = pblk->dev;
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line_meta *lm = &pblk->lm;
struct nvm_geo *geo = &dev->geo; struct nvm_geo *geo = &dev->geo;
sector_t provisioned; sector_t provisioned;
int sec_meta, blk_meta;
pblk->over_pct = 20; pblk->op = 20;
provisioned = nr_free_blks; provisioned = nr_free_blks;
provisioned *= (100 - pblk->over_pct); provisioned *= (100 - pblk->op);
sector_div(provisioned, 100); sector_div(provisioned, 100);
pblk->op_blks = nr_free_blks - provisioned;
/* Internally pblk manages all free blocks, but all calculations based /* Internally pblk manages all free blocks, but all calculations based
* on user capacity consider only provisioned blocks * on user capacity consider only provisioned blocks
*/ */
pblk->rl.total_blocks = nr_free_blks; pblk->rl.total_blocks = nr_free_blks;
pblk->rl.nr_secs = nr_free_blks * geo->sec_per_chk; pblk->rl.nr_secs = nr_free_blks * geo->sec_per_chk;
pblk->capacity = provisioned * geo->sec_per_chk;
/* Consider sectors used for metadata */
sec_meta = (lm->smeta_sec + lm->emeta_sec[0]) * l_mg->nr_free_lines;
blk_meta = DIV_ROUND_UP(sec_meta, geo->sec_per_chk);
pblk->capacity = (provisioned - blk_meta) * geo->sec_per_chk;
atomic_set(&pblk->rl.free_blocks, nr_free_blks); atomic_set(&pblk->rl.free_blocks, nr_free_blks);
atomic_set(&pblk->rl.free_user_blocks, nr_free_blks);
} }
static int pblk_lines_alloc_metadata(struct pblk *pblk) static int pblk_lines_alloc_metadata(struct pblk *pblk)
......
...@@ -989,10 +989,8 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk) ...@@ -989,10 +989,8 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
} }
spin_unlock(&l_mg->free_lock); spin_unlock(&l_mg->free_lock);
if (is_next) { if (is_next)
pblk_line_erase(pblk, l_mg->data_next); pblk_line_erase(pblk, l_mg->data_next);
pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next);
}
out: out:
if (found_lines != recovered_lines) if (found_lines != recovered_lines)
......
...@@ -89,17 +89,15 @@ unsigned long pblk_rl_nr_free_blks(struct pblk_rl *rl) ...@@ -89,17 +89,15 @@ unsigned long pblk_rl_nr_free_blks(struct pblk_rl *rl)
return atomic_read(&rl->free_blocks); return atomic_read(&rl->free_blocks);
} }
/* unsigned long pblk_rl_nr_user_free_blks(struct pblk_rl *rl)
* We check for (i) the number of free blocks in the current LUN and (ii) the {
* total number of free blocks in the pblk instance. This is to even out the return atomic_read(&rl->free_user_blocks);
* number of free blocks on each LUN when GC kicks in. }
*
* Only the total number of free blocks is used to configure the rate limiter. static void __pblk_rl_update_rates(struct pblk_rl *rl,
*/ unsigned long free_blocks)
void pblk_rl_update_rates(struct pblk_rl *rl)
{ {
struct pblk *pblk = container_of(rl, struct pblk, rl); struct pblk *pblk = container_of(rl, struct pblk, rl);
unsigned long free_blocks = pblk_rl_nr_free_blks(rl);
int max = rl->rb_budget; int max = rl->rb_budget;
if (free_blocks >= rl->high) { if (free_blocks >= rl->high) {
...@@ -132,20 +130,37 @@ void pblk_rl_update_rates(struct pblk_rl *rl) ...@@ -132,20 +130,37 @@ void pblk_rl_update_rates(struct pblk_rl *rl)
pblk_gc_should_stop(pblk); pblk_gc_should_stop(pblk);
} }
void pblk_rl_update_rates(struct pblk_rl *rl)
{
__pblk_rl_update_rates(rl, pblk_rl_nr_user_free_blks(rl));
}
void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line) void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line)
{ {
int blk_in_line = atomic_read(&line->blk_in_line); int blk_in_line = atomic_read(&line->blk_in_line);
int free_blocks;
atomic_add(blk_in_line, &rl->free_blocks); atomic_add(blk_in_line, &rl->free_blocks);
pblk_rl_update_rates(rl); free_blocks = atomic_add_return(blk_in_line, &rl->free_user_blocks);
__pblk_rl_update_rates(rl, free_blocks);
} }
void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line) void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line,
bool used)
{ {
int blk_in_line = atomic_read(&line->blk_in_line); int blk_in_line = atomic_read(&line->blk_in_line);
int free_blocks;
atomic_sub(blk_in_line, &rl->free_blocks); atomic_sub(blk_in_line, &rl->free_blocks);
pblk_rl_update_rates(rl);
if (used)
free_blocks = atomic_sub_return(blk_in_line,
&rl->free_user_blocks);
else
free_blocks = atomic_read(&rl->free_user_blocks);
__pblk_rl_update_rates(rl, free_blocks);
} }
int pblk_rl_high_thrs(struct pblk_rl *rl) int pblk_rl_high_thrs(struct pblk_rl *rl)
...@@ -174,16 +189,21 @@ void pblk_rl_free(struct pblk_rl *rl) ...@@ -174,16 +189,21 @@ void pblk_rl_free(struct pblk_rl *rl)
void pblk_rl_init(struct pblk_rl *rl, int budget) void pblk_rl_init(struct pblk_rl *rl, int budget)
{ {
struct pblk *pblk = container_of(rl, struct pblk, rl); struct pblk *pblk = container_of(rl, struct pblk, rl);
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line_meta *lm = &pblk->lm; struct pblk_line_meta *lm = &pblk->lm;
int min_blocks = lm->blk_per_line * PBLK_GC_RSV_LINE; int min_blocks = lm->blk_per_line * PBLK_GC_RSV_LINE;
int sec_meta, blk_meta;
unsigned int rb_windows; unsigned int rb_windows;
rl->high = rl->total_blocks / PBLK_USER_HIGH_THRS; /* Consider sectors used for metadata */
rl->high_pw = get_count_order(rl->high); sec_meta = (lm->smeta_sec + lm->emeta_sec[0]) * l_mg->nr_free_lines;
blk_meta = DIV_ROUND_UP(sec_meta, geo->sec_per_chk);
rl->low = rl->total_blocks / PBLK_USER_LOW_THRS; rl->high = pblk->op_blks - blk_meta - lm->blk_per_line;
if (rl->low < min_blocks) rl->high_pw = get_count_order(rl->high);
rl->low = min_blocks;
rl->rsv_blocks = min_blocks; rl->rsv_blocks = min_blocks;
......
...@@ -49,11 +49,12 @@ static ssize_t pblk_sysfs_luns_show(struct pblk *pblk, char *page) ...@@ -49,11 +49,12 @@ static ssize_t pblk_sysfs_luns_show(struct pblk *pblk, char *page)
static ssize_t pblk_sysfs_rate_limiter(struct pblk *pblk, char *page) static ssize_t pblk_sysfs_rate_limiter(struct pblk *pblk, char *page)
{ {
int free_blocks, total_blocks; int free_blocks, free_user_blocks, total_blocks;
int rb_user_max, rb_user_cnt; int rb_user_max, rb_user_cnt;
int rb_gc_max, rb_gc_cnt, rb_budget, rb_state; int rb_gc_max, rb_gc_cnt, rb_budget, rb_state;
free_blocks = atomic_read(&pblk->rl.free_blocks); free_blocks = pblk_rl_nr_free_blks(&pblk->rl);
free_user_blocks = pblk_rl_nr_user_free_blks(&pblk->rl);
rb_user_max = pblk->rl.rb_user_max; rb_user_max = pblk->rl.rb_user_max;
rb_user_cnt = atomic_read(&pblk->rl.rb_user_cnt); rb_user_cnt = atomic_read(&pblk->rl.rb_user_cnt);
rb_gc_max = pblk->rl.rb_gc_max; rb_gc_max = pblk->rl.rb_gc_max;
...@@ -64,16 +65,16 @@ static ssize_t pblk_sysfs_rate_limiter(struct pblk *pblk, char *page) ...@@ -64,16 +65,16 @@ static ssize_t pblk_sysfs_rate_limiter(struct pblk *pblk, char *page)
total_blocks = pblk->rl.total_blocks; total_blocks = pblk->rl.total_blocks;
return snprintf(page, PAGE_SIZE, return snprintf(page, PAGE_SIZE,
"u:%u/%u,gc:%u/%u(%u/%u)(stop:<%u,full:>%u,free:%d/%d)-%d\n", "u:%u/%u,gc:%u/%u(%u)(stop:<%u,full:>%u,free:%d/%d/%d)-%d\n",
rb_user_cnt, rb_user_cnt,
rb_user_max, rb_user_max,
rb_gc_cnt, rb_gc_cnt,
rb_gc_max, rb_gc_max,
rb_state, rb_state,
rb_budget, rb_budget,
pblk->rl.low,
pblk->rl.high, pblk->rl.high,
free_blocks, free_blocks,
free_user_blocks,
total_blocks, total_blocks,
READ_ONCE(pblk->rl.rb_user_active)); READ_ONCE(pblk->rl.rb_user_active));
} }
......
...@@ -252,9 +252,6 @@ struct pblk_rl { ...@@ -252,9 +252,6 @@ struct pblk_rl {
unsigned int high; /* Upper threshold for rate limiter (free run - unsigned int high; /* Upper threshold for rate limiter (free run -
* user I/O rate limiter * user I/O rate limiter
*/ */
unsigned int low; /* Lower threshold for rate limiter (user I/O
* rate limiter - stall)
*/
unsigned int high_pw; /* High rounded up as a power of 2 */ unsigned int high_pw; /* High rounded up as a power of 2 */
#define PBLK_USER_HIGH_THRS 8 /* Begin write limit at 12% available blks */ #define PBLK_USER_HIGH_THRS 8 /* Begin write limit at 12% available blks */
...@@ -288,7 +285,9 @@ struct pblk_rl { ...@@ -288,7 +285,9 @@ struct pblk_rl {
unsigned long long nr_secs; unsigned long long nr_secs;
unsigned long total_blocks; unsigned long total_blocks;
atomic_t free_blocks;
atomic_t free_blocks; /* Total number of free blocks (+ OP) */
atomic_t free_user_blocks; /* Number of user free blocks (no OP) */
}; };
#define PBLK_LINE_EMPTY (~0U) #define PBLK_LINE_EMPTY (~0U)
...@@ -579,7 +578,9 @@ struct pblk { ...@@ -579,7 +578,9 @@ struct pblk {
*/ */
sector_t capacity; /* Device capacity when bad blocks are subtracted */ sector_t capacity; /* Device capacity when bad blocks are subtracted */
int over_pct; /* Percentage of device used for over-provisioning */
int op; /* Percentage of device used for over-provisioning */
int op_blks; /* Number of blocks used for over-provisioning */
/* pblk provisioning values. Used by rate limiter */ /* pblk provisioning values. Used by rate limiter */
struct pblk_rl rl; struct pblk_rl rl;
...@@ -839,6 +840,7 @@ void pblk_rl_free(struct pblk_rl *rl); ...@@ -839,6 +840,7 @@ void pblk_rl_free(struct pblk_rl *rl);
void pblk_rl_update_rates(struct pblk_rl *rl); void pblk_rl_update_rates(struct pblk_rl *rl);
int pblk_rl_high_thrs(struct pblk_rl *rl); int pblk_rl_high_thrs(struct pblk_rl *rl);
unsigned long pblk_rl_nr_free_blks(struct pblk_rl *rl); unsigned long pblk_rl_nr_free_blks(struct pblk_rl *rl);
unsigned long pblk_rl_nr_user_free_blks(struct pblk_rl *rl);
int pblk_rl_user_may_insert(struct pblk_rl *rl, int nr_entries); int pblk_rl_user_may_insert(struct pblk_rl *rl, int nr_entries);
void pblk_rl_inserted(struct pblk_rl *rl, int nr_entries); void pblk_rl_inserted(struct pblk_rl *rl, int nr_entries);
void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries); void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries);
...@@ -847,7 +849,8 @@ void pblk_rl_gc_in(struct pblk_rl *rl, int nr_entries); ...@@ -847,7 +849,8 @@ void pblk_rl_gc_in(struct pblk_rl *rl, int nr_entries);
void pblk_rl_out(struct pblk_rl *rl, int nr_user, int nr_gc); void pblk_rl_out(struct pblk_rl *rl, int nr_user, int nr_gc);
int pblk_rl_max_io(struct pblk_rl *rl); int pblk_rl_max_io(struct pblk_rl *rl);
void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line); void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line);
void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line); void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line,
bool used);
int pblk_rl_is_limit(struct pblk_rl *rl); int pblk_rl_is_limit(struct pblk_rl *rl);
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment