Commit a88a341a authored by Tejun Heo's avatar Tejun Heo Committed by Jens Axboe

writeback: move bandwidth related fields from backing_dev_info into bdi_writeback

Currently, a bdi (backing_dev_info) embeds single wb (bdi_writeback)
and the role of the separation is unclear.  For cgroup support for
writeback IOs, a bdi will be updated to host multiple wb's where each
wb serves writeback IOs of a different cgroup on the bdi.  To achieve
that, a wb should carry all states necessary for servicing writeback
IOs for a cgroup independently.

This patch moves bandwidth related fields from backing_dev_info into
bdi_writeback.

* The moved fields are: bw_time_stamp, dirtied_stamp, written_stamp,
  write_bandwidth, avg_write_bandwidth, dirty_ratelimit,
  balanced_dirty_ratelimit, completions and dirty_exceeded.

* writeback_chunk_size() and over_bground_thresh() now take @wb
  instead of @bdi.

* bdi_writeout_fraction(bdi, ...)	-> wb_writeout_fraction(wb, ...)
  bdi_dirty_limit(bdi, ...)		-> wb_dirty_limit(wb, ...)
  bdi_position_ration(bdi, ...)		-> wb_position_ratio(wb, ...)
  bdi_update_writebandwidth(bdi, ...)	-> wb_update_write_bandwidth(wb, ...)
  [__]bdi_update_bandwidth(bdi, ...)	-> [__]wb_update_bandwidth(wb, ...)
  bdi_{max|min}_pause(bdi, ...)		-> wb_{max|min}_pause(wb, ...)
  bdi_dirty_limits(bdi, ...)		-> wb_dirty_limits(wb, ...)

* Init/exits of the relocated fields are moved to bdi_wb_init/exit()
  respectively.  Note that explicit zeroing is dropped in the process
  as wb's are cleared in entirety anyway.

* As there's still only one bdi_writeback per backing_dev_info, all
  uses of bdi->stat[] are mechanically replaced with bdi->wb.stat[]
  introducing no behavior changes.

v2: Typo in description fixed as suggested by Jan.
Signed-off-by: default avatarTejun Heo <tj@kernel.org>
Reviewed-by: default avatarJan Kara <jack@suse.cz>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Jaegeuk Kim <jaegeuk@kernel.org>
Cc: Steven Whitehouse <swhiteho@redhat.com>
Signed-off-by: default avatarJens Axboe <axboe@fb.com>
parent 93f78d88
......@@ -53,7 +53,7 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
PAGE_CACHE_SHIFT;
res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
} else if (type == DIRTY_DENTS) {
if (sbi->sb->s_bdi->dirty_exceeded)
if (sbi->sb->s_bdi->wb.dirty_exceeded)
return false;
mem_size = get_pages(sbi, F2FS_DIRTY_DENTS);
res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
......@@ -70,7 +70,7 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
sizeof(struct extent_node)) >> PAGE_CACHE_SHIFT;
res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
} else {
if (sbi->sb->s_bdi->dirty_exceeded)
if (sbi->sb->s_bdi->wb.dirty_exceeded)
return false;
}
return res;
......
......@@ -713,7 +713,7 @@ static inline unsigned int max_hw_blocks(struct f2fs_sb_info *sbi)
*/
static inline int nr_pages_to_skip(struct f2fs_sb_info *sbi, int type)
{
if (sbi->sb->s_bdi->dirty_exceeded)
if (sbi->sb->s_bdi->wb.dirty_exceeded)
return 0;
if (type == DATA)
......
......@@ -624,7 +624,7 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
return ret;
}
static long writeback_chunk_size(struct backing_dev_info *bdi,
static long writeback_chunk_size(struct bdi_writeback *wb,
struct wb_writeback_work *work)
{
long pages;
......@@ -645,7 +645,7 @@ static long writeback_chunk_size(struct backing_dev_info *bdi,
if (work->sync_mode == WB_SYNC_ALL || work->tagged_writepages)
pages = LONG_MAX;
else {
pages = min(bdi->avg_write_bandwidth / 2,
pages = min(wb->avg_write_bandwidth / 2,
global_dirty_limit / DIRTY_SCOPE);
pages = min(pages, work->nr_pages);
pages = round_down(pages + MIN_WRITEBACK_PAGES,
......@@ -743,7 +743,7 @@ static long writeback_sb_inodes(struct super_block *sb,
inode->i_state |= I_SYNC;
spin_unlock(&inode->i_lock);
write_chunk = writeback_chunk_size(wb->bdi, work);
write_chunk = writeback_chunk_size(wb, work);
wbc.nr_to_write = write_chunk;
wbc.pages_skipped = 0;
......@@ -830,7 +830,7 @@ static long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
return nr_pages - work.nr_pages;
}
static bool over_bground_thresh(struct backing_dev_info *bdi)
static bool over_bground_thresh(struct bdi_writeback *wb)
{
unsigned long background_thresh, dirty_thresh;
......@@ -840,8 +840,7 @@ static bool over_bground_thresh(struct backing_dev_info *bdi)
global_page_state(NR_UNSTABLE_NFS) > background_thresh)
return true;
if (wb_stat(&bdi->wb, WB_RECLAIMABLE) >
bdi_dirty_limit(bdi, background_thresh))
if (wb_stat(wb, WB_RECLAIMABLE) > wb_dirty_limit(wb, background_thresh))
return true;
return false;
......@@ -854,7 +853,7 @@ static bool over_bground_thresh(struct backing_dev_info *bdi)
static void wb_update_bandwidth(struct bdi_writeback *wb,
unsigned long start_time)
{
__bdi_update_bandwidth(wb->bdi, 0, 0, 0, 0, 0, start_time);
__wb_update_bandwidth(wb, 0, 0, 0, 0, 0, start_time);
}
/*
......@@ -906,7 +905,7 @@ static long wb_writeback(struct bdi_writeback *wb,
* For background writeout, stop when we are below the
* background dirty threshold
*/
if (work->for_background && !over_bground_thresh(wb->bdi))
if (work->for_background && !over_bground_thresh(wb))
break;
/*
......@@ -998,7 +997,7 @@ static unsigned long get_nr_dirty_pages(void)
static long wb_check_background_flush(struct bdi_writeback *wb)
{
if (over_bground_thresh(wb->bdi)) {
if (over_bground_thresh(wb)) {
struct wb_writeback_work work = {
.nr_pages = LONG_MAX,
......
......@@ -748,7 +748,7 @@ static int gfs2_write_inode(struct inode *inode, struct writeback_control *wbc)
if (wbc->sync_mode == WB_SYNC_ALL)
gfs2_log_flush(GFS2_SB(inode), ip->i_gl, NORMAL_FLUSH);
if (bdi->dirty_exceeded)
if (bdi->wb.dirty_exceeded)
gfs2_ail1_flush(sdp, wbc);
else
filemap_fdatawrite(metamapping);
......
......@@ -60,16 +60,6 @@ struct bdi_writeback {
spinlock_t list_lock; /* protects the b_* lists */
struct percpu_counter stat[NR_WB_STAT_ITEMS];
};
struct backing_dev_info {
struct list_head bdi_list;
unsigned long ra_pages; /* max readahead in PAGE_CACHE_SIZE units */
unsigned int capabilities; /* Device capabilities */
congested_fn *congested_fn; /* Function pointer if device is md/dm */
void *congested_data; /* Pointer to aux data for congested func */
char *name;
unsigned long bw_time_stamp; /* last time write bw is updated */
unsigned long dirtied_stamp;
......@@ -88,6 +78,16 @@ struct backing_dev_info {
struct fprop_local_percpu completions;
int dirty_exceeded;
};
struct backing_dev_info {
struct list_head bdi_list;
unsigned long ra_pages; /* max readahead in PAGE_CACHE_SIZE units */
unsigned int capabilities; /* Device capabilities */
congested_fn *congested_fn; /* Function pointer if device is md/dm */
void *congested_data; /* Pointer to aux data for congested func */
char *name;
unsigned int min_ratio;
unsigned int max_ratio, max_prop_frac;
......
......@@ -155,10 +155,9 @@ int dirty_writeback_centisecs_handler(struct ctl_table *, int,
void __user *, size_t *, loff_t *);
void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty);
unsigned long bdi_dirty_limit(struct backing_dev_info *bdi,
unsigned long dirty);
unsigned long wb_dirty_limit(struct bdi_writeback *wb, unsigned long dirty);
void __bdi_update_bandwidth(struct backing_dev_info *bdi,
void __wb_update_bandwidth(struct bdi_writeback *wb,
unsigned long thresh,
unsigned long bg_thresh,
unsigned long dirty,
......
......@@ -400,13 +400,13 @@ TRACE_EVENT(bdi_dirty_ratelimit,
TP_fast_assign(
strlcpy(__entry->bdi, dev_name(bdi->dev), 32);
__entry->write_bw = KBps(bdi->write_bandwidth);
__entry->avg_write_bw = KBps(bdi->avg_write_bandwidth);
__entry->write_bw = KBps(bdi->wb.write_bandwidth);
__entry->avg_write_bw = KBps(bdi->wb.avg_write_bandwidth);
__entry->dirty_rate = KBps(dirty_rate);
__entry->dirty_ratelimit = KBps(bdi->dirty_ratelimit);
__entry->dirty_ratelimit = KBps(bdi->wb.dirty_ratelimit);
__entry->task_ratelimit = KBps(task_ratelimit);
__entry->balanced_dirty_ratelimit =
KBps(bdi->balanced_dirty_ratelimit);
KBps(bdi->wb.balanced_dirty_ratelimit);
),
TP_printk("bdi %s: "
......
......@@ -66,7 +66,7 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
spin_unlock(&wb->list_lock);
global_dirty_limits(&background_thresh, &dirty_thresh);
bdi_thresh = bdi_dirty_limit(bdi, dirty_thresh);
bdi_thresh = wb_dirty_limit(wb, dirty_thresh);
#define K(x) ((x) << (PAGE_SHIFT - 10))
seq_printf(m,
......@@ -91,7 +91,7 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
K(background_thresh),
(unsigned long) K(wb_stat(wb, WB_DIRTIED)),
(unsigned long) K(wb_stat(wb, WB_WRITTEN)),
(unsigned long) K(bdi->write_bandwidth),
(unsigned long) K(wb->write_bandwidth),
nr_dirty,
nr_io,
nr_more_io,
......@@ -376,6 +376,11 @@ void bdi_unregister(struct backing_dev_info *bdi)
}
EXPORT_SYMBOL(bdi_unregister);
/*
* Initial write bandwidth: 100 MB/s
*/
#define INIT_BW (100 << (20 - PAGE_SHIFT))
static int bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi)
{
int i, err;
......@@ -391,11 +396,22 @@ static int bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi)
spin_lock_init(&wb->list_lock);
INIT_DELAYED_WORK(&wb->dwork, bdi_writeback_workfn);
wb->bw_time_stamp = jiffies;
wb->balanced_dirty_ratelimit = INIT_BW;
wb->dirty_ratelimit = INIT_BW;
wb->write_bandwidth = INIT_BW;
wb->avg_write_bandwidth = INIT_BW;
err = fprop_local_init_percpu(&wb->completions, GFP_KERNEL);
if (err)
return err;
for (i = 0; i < NR_WB_STAT_ITEMS; i++) {
err = percpu_counter_init(&wb->stat[i], 0, GFP_KERNEL);
if (err) {
while (--i)
percpu_counter_destroy(&wb->stat[i]);
fprop_local_destroy_percpu(&wb->completions);
return err;
}
}
......@@ -411,12 +427,9 @@ static void bdi_wb_exit(struct bdi_writeback *wb)
for (i = 0; i < NR_WB_STAT_ITEMS; i++)
percpu_counter_destroy(&wb->stat[i]);
}
/*
* Initial write bandwidth: 100 MB/s
*/
#define INIT_BW (100 << (20 - PAGE_SHIFT))
fprop_local_destroy_percpu(&wb->completions);
}
int bdi_init(struct backing_dev_info *bdi)
{
......@@ -435,22 +448,6 @@ int bdi_init(struct backing_dev_info *bdi)
if (err)
return err;
bdi->dirty_exceeded = 0;
bdi->bw_time_stamp = jiffies;
bdi->written_stamp = 0;
bdi->balanced_dirty_ratelimit = INIT_BW;
bdi->dirty_ratelimit = INIT_BW;
bdi->write_bandwidth = INIT_BW;
bdi->avg_write_bandwidth = INIT_BW;
err = fprop_local_init_percpu(&bdi->completions, GFP_KERNEL);
if (err) {
bdi_wb_exit(&bdi->wb);
return err;
}
return 0;
}
EXPORT_SYMBOL(bdi_init);
......@@ -468,8 +465,6 @@ void bdi_destroy(struct backing_dev_info *bdi)
}
bdi_wb_exit(&bdi->wb);
fprop_local_destroy_percpu(&bdi->completions);
}
EXPORT_SYMBOL(bdi_destroy);
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment