Commit 3a1bfe87 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] small-machine writer throttling fix

The current writer throttling in balance_dirty_pages() assumes that the
writer will be effectively throttled on request queues.

That works fine when the amount of data which can be placed into a
queue is "much less than" total memory.

But if the machine has a small amount of memory, or many disks, or has
large request queues, or large requests, it can go wrong.

For example, with mem=96m and dirty_async_ratio=15, we want to be able
to clamp dirty+writeback memory at 15 megabytes.  But it doesn't work,
because a single SCSI request queue can hold 40 megs or more.  The
heavy writer keeps on dirtying memory until that queue fills up.

So add a test for that - if we did some writeback, and we're *still*
over the dirty+writeback threshold then make the caller take an
explicit nap on some writes terminating.  And keep on doing that until
the dirty+writeback memory subsides.
parent 8ead40f5
...@@ -327,6 +327,7 @@ writeback_inodes(struct writeback_control *wbc) ...@@ -327,6 +327,7 @@ writeback_inodes(struct writeback_control *wbc)
} }
spin_unlock(&sb_lock); spin_unlock(&sb_lock);
spin_unlock(&inode_lock); spin_unlock(&inode_lock);
blk_run_queues();
} }
/* /*
......
...@@ -95,32 +95,34 @@ void balance_dirty_pages(struct address_space *mapping) ...@@ -95,32 +95,34 @@ void balance_dirty_pages(struct address_space *mapping)
struct page_state ps; struct page_state ps;
long background_thresh; long background_thresh;
long async_thresh; long async_thresh;
unsigned long dirty_and_writeback; struct backing_dev_info *bdi = mapping->backing_dev_info;
struct backing_dev_info *bdi;
get_page_state(&ps);
dirty_and_writeback = ps.nr_dirty + ps.nr_writeback;
background_thresh = (dirty_background_ratio * total_pages) / 100; background_thresh = (dirty_background_ratio * total_pages) / 100;
async_thresh = (dirty_async_ratio * total_pages) / 100; async_thresh = (dirty_async_ratio * total_pages) / 100;
bdi = mapping->backing_dev_info;
if (dirty_and_writeback > async_thresh) { get_page_state(&ps);
while (ps.nr_dirty + ps.nr_writeback > async_thresh) {
struct writeback_control wbc = { struct writeback_control wbc = {
.bdi = bdi, .bdi = bdi,
.sync_mode = WB_SYNC_NONE, .sync_mode = WB_SYNC_NONE,
.older_than_this = NULL, .older_than_this = NULL,
.nr_to_write = sync_writeback_pages(), .nr_to_write = sync_writeback_pages(),
}; };
if (!dirty_exceeded)
dirty_exceeded = 1; dirty_exceeded = 1;
if (ps.nr_dirty)
writeback_inodes(&wbc); writeback_inodes(&wbc);
get_page_state(&ps); get_page_state(&ps);
} else { if (ps.nr_dirty + ps.nr_writeback <= async_thresh)
if (dirty_exceeded) break;
dirty_exceeded = 0; blk_congestion_wait(WRITE, HZ/10);
} }
dirty_exceeded = 0;
if (!writeback_in_progress(bdi) && ps.nr_dirty > background_thresh) if (!writeback_in_progress(bdi) && ps.nr_dirty > background_thresh)
pdflush_operation(background_writeout, 0); pdflush_operation(background_writeout, 0);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment