Commit 0e1e7c7a authored by Christoph Lameter's avatar Christoph Lameter Committed by Linus Torvalds

Memoryless nodes: Use N_HIGH_MEMORY for cpusets

cpusets try to ensure that any node added to a cpuset's mems_allowed is
on-line and contains memory.  The assumption was that online nodes contained
memory.  Thus, it is possible to add memoryless nodes to a cpuset and then add
tasks to this cpuset.  This results in continuous series of oom-kill and
apparent system hang.

Change cpusets to use node_states[N_HIGH_MEMORY] [a.k.a.  node_memory_map] in
place of node_online_map when vetting memories.  Return error if admin
attempts to write a non-empty mems_allowed node mask containing only
memoryless-nodes.
Signed-off-by: default avatarLee Schermerhorn <lee.schermerhorn@hp.com>
Signed-off-by: default avatarBob Picco <bob.picco@hp.com>
Signed-off-by: default avatarNishanth Aravamudan <nacc@us.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@skynet.ie>
Signed-off-by: default avatarChristoph Lameter <clameter@sgi.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 523b9458
...@@ -35,7 +35,8 @@ CONTENTS: ...@@ -35,7 +35,8 @@ CONTENTS:
---------------------- ----------------------
Cpusets provide a mechanism for assigning a set of CPUs and Memory Cpusets provide a mechanism for assigning a set of CPUs and Memory
Nodes to a set of tasks. Nodes to a set of tasks. In this document "Memory Node" refers to
an on-line node that contains memory.
Cpusets constrain the CPU and Memory placement of tasks to only Cpusets constrain the CPU and Memory placement of tasks to only
the resources within a tasks current cpuset. They form a nested the resources within a tasks current cpuset. They form a nested
...@@ -220,8 +221,8 @@ and name space for cpusets, with a minimum of additional kernel code. ...@@ -220,8 +221,8 @@ and name space for cpusets, with a minimum of additional kernel code.
The cpus and mems files in the root (top_cpuset) cpuset are The cpus and mems files in the root (top_cpuset) cpuset are
read-only. The cpus file automatically tracks the value of read-only. The cpus file automatically tracks the value of
cpu_online_map using a CPU hotplug notifier, and the mems file cpu_online_map using a CPU hotplug notifier, and the mems file
automatically tracks the value of node_online_map using the automatically tracks the value of node_states[N_MEMORY]--i.e.,
cpuset_track_online_nodes() hook. nodes with memory--using the cpuset_track_online_nodes() hook.
1.4 What are exclusive cpusets ? 1.4 What are exclusive cpusets ?
......
...@@ -93,7 +93,7 @@ static inline nodemask_t cpuset_mems_allowed(struct task_struct *p) ...@@ -93,7 +93,7 @@ static inline nodemask_t cpuset_mems_allowed(struct task_struct *p)
return node_possible_map; return node_possible_map;
} }
#define cpuset_current_mems_allowed (node_online_map) #define cpuset_current_mems_allowed (node_states[N_HIGH_MEMORY])
static inline void cpuset_init_current_mems_allowed(void) {} static inline void cpuset_init_current_mems_allowed(void) {}
static inline void cpuset_update_task_memory_state(void) {} static inline void cpuset_update_task_memory_state(void) {}
#define cpuset_nodes_subset_current_mems_allowed(nodes) (1) #define cpuset_nodes_subset_current_mems_allowed(nodes) (1)
......
...@@ -581,26 +581,28 @@ static void guarantee_online_cpus(const struct cpuset *cs, cpumask_t *pmask) ...@@ -581,26 +581,28 @@ static void guarantee_online_cpus(const struct cpuset *cs, cpumask_t *pmask)
/* /*
* Return in *pmask the portion of a cpusets's mems_allowed that * Return in *pmask the portion of a cpusets's mems_allowed that
* are online. If none are online, walk up the cpuset hierarchy * are online, with memory. If none are online with memory, walk
* until we find one that does have some online mems. If we get * up the cpuset hierarchy until we find one that does have some
* all the way to the top and still haven't found any online mems, * online mems. If we get all the way to the top and still haven't
* return node_online_map. * found any online mems, return node_states[N_HIGH_MEMORY].
* *
* One way or another, we guarantee to return some non-empty subset * One way or another, we guarantee to return some non-empty subset
* of node_online_map. * of node_states[N_HIGH_MEMORY].
* *
* Call with callback_mutex held. * Call with callback_mutex held.
*/ */
static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask) static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask)
{ {
while (cs && !nodes_intersects(cs->mems_allowed, node_online_map)) while (cs && !nodes_intersects(cs->mems_allowed,
node_states[N_HIGH_MEMORY]))
cs = cs->parent; cs = cs->parent;
if (cs) if (cs)
nodes_and(*pmask, cs->mems_allowed, node_online_map); nodes_and(*pmask, cs->mems_allowed,
node_states[N_HIGH_MEMORY]);
else else
*pmask = node_online_map; *pmask = node_states[N_HIGH_MEMORY];
BUG_ON(!nodes_intersects(*pmask, node_online_map)); BUG_ON(!nodes_intersects(*pmask, node_states[N_HIGH_MEMORY]));
} }
/** /**
...@@ -924,7 +926,10 @@ static int update_nodemask(struct cpuset *cs, char *buf) ...@@ -924,7 +926,10 @@ static int update_nodemask(struct cpuset *cs, char *buf)
int fudge; int fudge;
int retval; int retval;
/* top_cpuset.mems_allowed tracks node_online_map; it's read-only */ /*
* top_cpuset.mems_allowed tracks node_stats[N_HIGH_MEMORY];
* it's read-only
*/
if (cs == &top_cpuset) if (cs == &top_cpuset)
return -EACCES; return -EACCES;
...@@ -941,8 +946,21 @@ static int update_nodemask(struct cpuset *cs, char *buf) ...@@ -941,8 +946,21 @@ static int update_nodemask(struct cpuset *cs, char *buf)
retval = nodelist_parse(buf, trialcs.mems_allowed); retval = nodelist_parse(buf, trialcs.mems_allowed);
if (retval < 0) if (retval < 0)
goto done; goto done;
if (!nodes_intersects(trialcs.mems_allowed,
node_states[N_HIGH_MEMORY])) {
/*
* error if only memoryless nodes specified.
*/
retval = -ENOSPC;
goto done;
}
} }
nodes_and(trialcs.mems_allowed, trialcs.mems_allowed, node_online_map); /*
* Exclude memoryless nodes. We know that trialcs.mems_allowed
* contains at least one node with memory.
*/
nodes_and(trialcs.mems_allowed, trialcs.mems_allowed,
node_states[N_HIGH_MEMORY]);
oldmem = cs->mems_allowed; oldmem = cs->mems_allowed;
if (nodes_equal(oldmem, trialcs.mems_allowed)) { if (nodes_equal(oldmem, trialcs.mems_allowed)) {
retval = 0; /* Too easy - nothing to do */ retval = 0; /* Too easy - nothing to do */
...@@ -2098,8 +2116,9 @@ static void guarantee_online_cpus_mems_in_subtree(const struct cpuset *cur) ...@@ -2098,8 +2116,9 @@ static void guarantee_online_cpus_mems_in_subtree(const struct cpuset *cur)
/* /*
* The cpus_allowed and mems_allowed nodemasks in the top_cpuset track * The cpus_allowed and mems_allowed nodemasks in the top_cpuset track
* cpu_online_map and node_online_map. Force the top cpuset to track * cpu_online_map and node_states[N_HIGH_MEMORY]. Force the top cpuset to
* whats online after any CPU or memory node hotplug or unplug event. * track what's online after any CPU or memory node hotplug or unplug
* event.
* *
* To ensure that we don't remove a CPU or node from the top cpuset * To ensure that we don't remove a CPU or node from the top cpuset
* that is currently in use by a child cpuset (which would violate * that is currently in use by a child cpuset (which would violate
...@@ -2119,7 +2138,7 @@ static void common_cpu_mem_hotplug_unplug(void) ...@@ -2119,7 +2138,7 @@ static void common_cpu_mem_hotplug_unplug(void)
guarantee_online_cpus_mems_in_subtree(&top_cpuset); guarantee_online_cpus_mems_in_subtree(&top_cpuset);
top_cpuset.cpus_allowed = cpu_online_map; top_cpuset.cpus_allowed = cpu_online_map;
top_cpuset.mems_allowed = node_online_map; top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
mutex_unlock(&callback_mutex); mutex_unlock(&callback_mutex);
mutex_unlock(&manage_mutex); mutex_unlock(&manage_mutex);
...@@ -2147,8 +2166,9 @@ static int cpuset_handle_cpuhp(struct notifier_block *nb, ...@@ -2147,8 +2166,9 @@ static int cpuset_handle_cpuhp(struct notifier_block *nb,
#ifdef CONFIG_MEMORY_HOTPLUG #ifdef CONFIG_MEMORY_HOTPLUG
/* /*
* Keep top_cpuset.mems_allowed tracking node_online_map. * Keep top_cpuset.mems_allowed tracking node_states[N_HIGH_MEMORY].
* Call this routine anytime after you change node_online_map. * Call this routine anytime after you change
* node_states[N_HIGH_MEMORY].
* See also the previous routine cpuset_handle_cpuhp(). * See also the previous routine cpuset_handle_cpuhp().
*/ */
...@@ -2167,7 +2187,7 @@ void cpuset_track_online_nodes(void) ...@@ -2167,7 +2187,7 @@ void cpuset_track_online_nodes(void)
void __init cpuset_init_smp(void) void __init cpuset_init_smp(void)
{ {
top_cpuset.cpus_allowed = cpu_online_map; top_cpuset.cpus_allowed = cpu_online_map;
top_cpuset.mems_allowed = node_online_map; top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
hotcpu_notifier(cpuset_handle_cpuhp, 0); hotcpu_notifier(cpuset_handle_cpuhp, 0);
} }
...@@ -2309,7 +2329,7 @@ void cpuset_init_current_mems_allowed(void) ...@@ -2309,7 +2329,7 @@ void cpuset_init_current_mems_allowed(void)
* *
* Description: Returns the nodemask_t mems_allowed of the cpuset * Description: Returns the nodemask_t mems_allowed of the cpuset
* attached to the specified @tsk. Guaranteed to return some non-empty * attached to the specified @tsk. Guaranteed to return some non-empty
* subset of node_online_map, even if this means going outside the * subset of node_states[N_HIGH_MEMORY], even if this means going outside the
* tasks cpuset. * tasks cpuset.
**/ **/
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment