Commit c8462cc9 authored by Wu Fengguang's avatar Wu Fengguang

writeback: limit max dirty pause time

Apply two policies to scale down the max pause time for

1) small number of concurrent dirtiers
2) small memory system (comparing to storage bandwidth)

MAX_PAUSE=200ms may only be suitable for high end servers with lots of
concurrent dirtiers, where the large pause time can reduce much overheads.

Otherwise, smaller pause time is desirable whenever possible, so as to
get good responsiveness and smooth user experiences. It's actually
required for good disk utilization in the case when all the dirty pages
can be synced to disk within MAX_PAUSE=200ms.
Signed-off-by: default avatarWu Fengguang <fengguang.wu@intel.com>
parent 143dfe86
...@@ -939,6 +939,43 @@ static unsigned long dirty_poll_interval(unsigned long dirty, ...@@ -939,6 +939,43 @@ static unsigned long dirty_poll_interval(unsigned long dirty,
return 1; return 1;
} }
static unsigned long bdi_max_pause(struct backing_dev_info *bdi,
unsigned long bdi_dirty)
{
unsigned long bw = bdi->avg_write_bandwidth;
unsigned long hi = ilog2(bw);
unsigned long lo = ilog2(bdi->dirty_ratelimit);
unsigned long t;
/* target for 20ms max pause on 1-dd case */
t = HZ / 50;
/*
* Scale up pause time for concurrent dirtiers in order to reduce CPU
* overheads.
*
* (N * 20ms) on 2^N concurrent tasks.
*/
if (hi > lo)
t += (hi - lo) * (20 * HZ) / 1024;
/*
* Limit pause time for small memory systems. If sleeping for too long
* time, a small pool of dirty/writeback pages may go empty and disk go
* idle.
*
* 8 serves as the safety ratio.
*/
if (bdi_dirty)
t = min(t, bdi_dirty * HZ / (8 * bw + 1));
/*
* The pause time will be settled within range (max_pause/4, max_pause).
* Apply a minimal value of 4 to get a non-zero max_pause/4.
*/
return clamp_val(t, 4, MAX_PAUSE);
}
/* /*
* balance_dirty_pages() must be called by processes which are generating dirty * balance_dirty_pages() must be called by processes which are generating dirty
* data. It looks at the number of dirty pages in the machine and will force * data. It looks at the number of dirty pages in the machine and will force
...@@ -958,6 +995,7 @@ static void balance_dirty_pages(struct address_space *mapping, ...@@ -958,6 +995,7 @@ static void balance_dirty_pages(struct address_space *mapping,
unsigned long dirty_thresh; unsigned long dirty_thresh;
unsigned long bdi_thresh; unsigned long bdi_thresh;
long pause = 0; long pause = 0;
long max_pause;
bool dirty_exceeded = false; bool dirty_exceeded = false;
unsigned long task_ratelimit; unsigned long task_ratelimit;
unsigned long dirty_ratelimit; unsigned long dirty_ratelimit;
...@@ -1035,18 +1073,20 @@ static void balance_dirty_pages(struct address_space *mapping, ...@@ -1035,18 +1073,20 @@ static void balance_dirty_pages(struct address_space *mapping,
nr_dirty, bdi_thresh, bdi_dirty, nr_dirty, bdi_thresh, bdi_dirty,
start_time); start_time);
max_pause = bdi_max_pause(bdi, bdi_dirty);
dirty_ratelimit = bdi->dirty_ratelimit; dirty_ratelimit = bdi->dirty_ratelimit;
pos_ratio = bdi_position_ratio(bdi, dirty_thresh, pos_ratio = bdi_position_ratio(bdi, dirty_thresh,
background_thresh, nr_dirty, background_thresh, nr_dirty,
bdi_thresh, bdi_dirty); bdi_thresh, bdi_dirty);
if (unlikely(pos_ratio == 0)) { if (unlikely(pos_ratio == 0)) {
pause = MAX_PAUSE; pause = max_pause;
goto pause; goto pause;
} }
task_ratelimit = (u64)dirty_ratelimit * task_ratelimit = (u64)dirty_ratelimit *
pos_ratio >> RATELIMIT_CALC_SHIFT; pos_ratio >> RATELIMIT_CALC_SHIFT;
pause = (HZ * pages_dirtied) / (task_ratelimit | 1); pause = (HZ * pages_dirtied) / (task_ratelimit | 1);
pause = min_t(long, pause, MAX_PAUSE); pause = min(pause, max_pause);
pause: pause:
__set_current_state(TASK_UNINTERRUPTIBLE); __set_current_state(TASK_UNINTERRUPTIBLE);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment