Commit 55b50278 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] real-time enhanced page allocator and throttling

From: Robert Love <rml@tech9.net>

- Let real-time tasks dip further into the reserves than usual in
  __alloc_pages().  There are a lot of ways to special case this.  This
  patch just cuts z->pages_low in half, before doing the incremental min
  thing, for real-time tasks.  I do not do anything in the low memory slow
  path.  We can be a _lot_ more aggressive if we want.  Right now, we just
  give real-time tasks a little help.

- Never ever call balance_dirty_pages() on a real-time task.  Where and
  how exactly we handle this is up for debate.  We could, for example,
  special case real-time tasks inside balance_dirty_pages().  This would
  allow us to perform some of the work (say, waking up pdflush) but not
  other work (say, the active throttling).  As it stands now, we do the
  per-processor accounting in balance_dirty_pages_ratelimited() but we
  never call balance_dirty_pages().  Lots of approaches work.  What we want
  to do is never engage the real-time task in forced writeback.
parent 5fc4d839
...@@ -282,6 +282,8 @@ struct signal_struct { ...@@ -282,6 +282,8 @@ struct signal_struct {
#define MAX_PRIO (MAX_RT_PRIO + 40) #define MAX_PRIO (MAX_RT_PRIO + 40)
#define rt_task(p) ((p)->prio < MAX_RT_PRIO)
/* /*
* Some day this will be a full-fledged user tracking system.. * Some day this will be a full-fledged user tracking system..
*/ */
......
...@@ -84,7 +84,6 @@ int dirty_writeback_centisecs_handler(struct ctl_table *, int, struct file *, ...@@ -84,7 +84,6 @@ int dirty_writeback_centisecs_handler(struct ctl_table *, int, struct file *,
void __user *, size_t *); void __user *, size_t *);
void page_writeback_init(void); void page_writeback_init(void);
void balance_dirty_pages(struct address_space *mapping);
void balance_dirty_pages_ratelimited(struct address_space *mapping); void balance_dirty_pages_ratelimited(struct address_space *mapping);
int pdflush_operation(void (*fn)(unsigned long), unsigned long arg0); int pdflush_operation(void (*fn)(unsigned long), unsigned long arg0);
int do_writepages(struct address_space *mapping, struct writeback_control *wbc); int do_writepages(struct address_space *mapping, struct writeback_control *wbc);
......
...@@ -179,7 +179,6 @@ static DEFINE_PER_CPU(struct runqueue, runqueues); ...@@ -179,7 +179,6 @@ static DEFINE_PER_CPU(struct runqueue, runqueues);
#define this_rq() (&__get_cpu_var(runqueues)) #define this_rq() (&__get_cpu_var(runqueues))
#define task_rq(p) cpu_rq(task_cpu(p)) #define task_rq(p) cpu_rq(task_cpu(p))
#define cpu_curr(cpu) (cpu_rq(cpu)->curr) #define cpu_curr(cpu) (cpu_rq(cpu)->curr)
#define rt_task(p) ((p)->prio < MAX_RT_PRIO)
/* /*
* Default context-switch locking: * Default context-switch locking:
......
...@@ -111,6 +111,7 @@ get_dirty_limits(struct page_state *ps, long *pbackground, long *pdirty) ...@@ -111,6 +111,7 @@ get_dirty_limits(struct page_state *ps, long *pbackground, long *pdirty)
int unmapped_ratio; int unmapped_ratio;
long background; long background;
long dirty; long dirty;
struct task_struct *tsk;
get_page_state(ps); get_page_state(ps);
...@@ -129,7 +130,8 @@ get_dirty_limits(struct page_state *ps, long *pbackground, long *pdirty) ...@@ -129,7 +130,8 @@ get_dirty_limits(struct page_state *ps, long *pbackground, long *pdirty)
background = (background_ratio * total_pages) / 100; background = (background_ratio * total_pages) / 100;
dirty = (dirty_ratio * total_pages) / 100; dirty = (dirty_ratio * total_pages) / 100;
if (current->flags & PF_LESS_THROTTLE) { tsk = current;
if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk)) {
background += background / 4; background += background / 4;
dirty += dirty / 4; dirty += dirty / 4;
} }
...@@ -144,7 +146,7 @@ get_dirty_limits(struct page_state *ps, long *pbackground, long *pdirty) ...@@ -144,7 +146,7 @@ get_dirty_limits(struct page_state *ps, long *pbackground, long *pdirty)
* If we're over `background_thresh' then pdflush is woken to perform some * If we're over `background_thresh' then pdflush is woken to perform some
* writeout. * writeout.
*/ */
void balance_dirty_pages(struct address_space *mapping) static void balance_dirty_pages(struct address_space *mapping)
{ {
struct page_state ps; struct page_state ps;
long nr_reclaimable; long nr_reclaimable;
...@@ -219,6 +221,10 @@ void balance_dirty_pages_ratelimited(struct address_space *mapping) ...@@ -219,6 +221,10 @@ void balance_dirty_pages_ratelimited(struct address_space *mapping)
if (dirty_exceeded) if (dirty_exceeded)
ratelimit = 8; ratelimit = 8;
/*
* Check the rate limiting. Also, we do not want to throttle real-time
* tasks in balance_dirty_pages(). Period.
*/
if (get_cpu_var(ratelimits)++ >= ratelimit) { if (get_cpu_var(ratelimits)++ >= ratelimit) {
__get_cpu_var(ratelimits) = 0; __get_cpu_var(ratelimits) = 0;
put_cpu_var(ratelimits); put_cpu_var(ratelimits);
......
...@@ -520,7 +520,8 @@ static struct page *buffered_rmqueue(struct zone *zone, int order, int cold) ...@@ -520,7 +520,8 @@ static struct page *buffered_rmqueue(struct zone *zone, int order, int cold)
* *
* Herein lies the mysterious "incremental min". That's the * Herein lies the mysterious "incremental min". That's the
* *
* min += z->pages_low; * local_low = z->pages_low;
* min += local_low;
* *
* thing. The intent here is to provide additional protection to low zones for * thing. The intent here is to provide additional protection to low zones for
* allocation requests which _could_ use higher zones. So a GFP_HIGHMEM * allocation requests which _could_ use higher zones. So a GFP_HIGHMEM
...@@ -538,10 +539,11 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order, ...@@ -538,10 +539,11 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order,
unsigned long min; unsigned long min;
struct zone **zones, *classzone; struct zone **zones, *classzone;
struct page *page; struct page *page;
struct reclaim_state reclaim_state;
struct task_struct *p = current;
int i; int i;
int cold; int cold;
int do_retry; int do_retry;
struct reclaim_state reclaim_state;
might_sleep_if(wait); might_sleep_if(wait);
...@@ -558,8 +560,17 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order, ...@@ -558,8 +560,17 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order,
min = 1UL << order; min = 1UL << order;
for (i = 0; zones[i] != NULL; i++) { for (i = 0; zones[i] != NULL; i++) {
struct zone *z = zones[i]; struct zone *z = zones[i];
unsigned long local_low;
/*
* This is the fabled 'incremental min'. We let real-time tasks
* dip their real-time paws a little deeper into reserves.
*/
local_low = z->pages_low;
if (rt_task(p))
local_low >>= 1;
min += local_low;
min += z->pages_low;
if (z->free_pages >= min || if (z->free_pages >= min ||
(!wait && z->free_pages >= z->pages_high)) { (!wait && z->free_pages >= z->pages_high)) {
page = buffered_rmqueue(z, order, cold); page = buffered_rmqueue(z, order, cold);
...@@ -582,6 +593,8 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order, ...@@ -582,6 +593,8 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order,
local_min = z->pages_min; local_min = z->pages_min;
if (gfp_mask & __GFP_HIGH) if (gfp_mask & __GFP_HIGH)
local_min >>= 2; local_min >>= 2;
if (rt_task(p))
local_min >>= 1;
min += local_min; min += local_min;
if (z->free_pages >= min || if (z->free_pages >= min ||
(!wait && z->free_pages >= z->pages_high)) { (!wait && z->free_pages >= z->pages_high)) {
...@@ -595,7 +608,7 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order, ...@@ -595,7 +608,7 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order,
/* here we're in the low on memory slow path */ /* here we're in the low on memory slow path */
rebalance: rebalance:
if ((current->flags & (PF_MEMALLOC | PF_MEMDIE)) && !in_interrupt()) { if ((p->flags & (PF_MEMALLOC | PF_MEMDIE)) && !in_interrupt()) {
/* go through the zonelist yet again, ignoring mins */ /* go through the zonelist yet again, ignoring mins */
for (i = 0; zones[i] != NULL; i++) { for (i = 0; zones[i] != NULL; i++) {
struct zone *z = zones[i]; struct zone *z = zones[i];
...@@ -611,14 +624,14 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order, ...@@ -611,14 +624,14 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order,
if (!wait) if (!wait)
goto nopage; goto nopage;
current->flags |= PF_MEMALLOC; p->flags |= PF_MEMALLOC;
reclaim_state.reclaimed_slab = 0; reclaim_state.reclaimed_slab = 0;
current->reclaim_state = &reclaim_state; p->reclaim_state = &reclaim_state;
try_to_free_pages(classzone, gfp_mask, order); try_to_free_pages(classzone, gfp_mask, order);
current->reclaim_state = NULL; p->reclaim_state = NULL;
current->flags &= ~PF_MEMALLOC; p->flags &= ~PF_MEMALLOC;
/* go through the zonelist yet one more time */ /* go through the zonelist yet one more time */
min = 1UL << order; min = 1UL << order;
...@@ -658,7 +671,7 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order, ...@@ -658,7 +671,7 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order,
if (!(gfp_mask & __GFP_NOWARN)) { if (!(gfp_mask & __GFP_NOWARN)) {
printk("%s: page allocation failure." printk("%s: page allocation failure."
" order:%d, mode:0x%x\n", " order:%d, mode:0x%x\n",
current->comm, order, gfp_mask); p->comm, order, gfp_mask);
} }
return NULL; return NULL;
got_pg: got_pg:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment