Commit 2756d373 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-3.19' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup

Pull cgroup update from Tejun Heo:
 "cpuset got simplified a bit.  cgroup core got a fix on unified
  hierarchy and grew some effective css related interfaces which will be
  used for blkio support for writeback IO traffic which is currently
  being worked on"

* 'for-3.19' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
  cgroup: implement cgroup_get_e_css()
  cgroup: add cgroup_subsys->css_e_css_changed()
  cgroup: add cgroup_subsys->css_released()
  cgroup: fix the async css offline wait logic in cgroup_subtree_control_write()
  cgroup: restructure child_subsys_mask handling in cgroup_subtree_control_write()
  cgroup: separate out cgroup_calc_child_subsys_mask() from cgroup_refresh_child_subsys_mask()
  cpuset: lock vs unlock typo
  cpuset: simplify cpuset_node_allowed API
  cpuset: convert callback_mutex to a spinlock
parents 4e8790f7 eeecbd19
...@@ -638,8 +638,10 @@ struct cgroup_subsys { ...@@ -638,8 +638,10 @@ struct cgroup_subsys {
struct cgroup_subsys_state *(*css_alloc)(struct cgroup_subsys_state *parent_css); struct cgroup_subsys_state *(*css_alloc)(struct cgroup_subsys_state *parent_css);
int (*css_online)(struct cgroup_subsys_state *css); int (*css_online)(struct cgroup_subsys_state *css);
void (*css_offline)(struct cgroup_subsys_state *css); void (*css_offline)(struct cgroup_subsys_state *css);
void (*css_released)(struct cgroup_subsys_state *css);
void (*css_free)(struct cgroup_subsys_state *css); void (*css_free)(struct cgroup_subsys_state *css);
void (*css_reset)(struct cgroup_subsys_state *css); void (*css_reset)(struct cgroup_subsys_state *css);
void (*css_e_css_changed)(struct cgroup_subsys_state *css);
int (*can_attach)(struct cgroup_subsys_state *css, int (*can_attach)(struct cgroup_subsys_state *css,
struct cgroup_taskset *tset); struct cgroup_taskset *tset);
...@@ -934,6 +936,8 @@ void css_task_iter_end(struct css_task_iter *it); ...@@ -934,6 +936,8 @@ void css_task_iter_end(struct css_task_iter *it);
int cgroup_attach_task_all(struct task_struct *from, struct task_struct *); int cgroup_attach_task_all(struct task_struct *from, struct task_struct *);
int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from); int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from);
struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgroup,
struct cgroup_subsys *ss);
struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry, struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry,
struct cgroup_subsys *ss); struct cgroup_subsys *ss);
......
...@@ -48,29 +48,16 @@ extern nodemask_t cpuset_mems_allowed(struct task_struct *p); ...@@ -48,29 +48,16 @@ extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
void cpuset_init_current_mems_allowed(void); void cpuset_init_current_mems_allowed(void);
int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask); int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask);
extern int __cpuset_node_allowed_softwall(int node, gfp_t gfp_mask); extern int __cpuset_node_allowed(int node, gfp_t gfp_mask);
extern int __cpuset_node_allowed_hardwall(int node, gfp_t gfp_mask);
static inline int cpuset_node_allowed_softwall(int node, gfp_t gfp_mask) static inline int cpuset_node_allowed(int node, gfp_t gfp_mask)
{ {
return nr_cpusets() <= 1 || return nr_cpusets() <= 1 || __cpuset_node_allowed(node, gfp_mask);
__cpuset_node_allowed_softwall(node, gfp_mask);
} }
static inline int cpuset_node_allowed_hardwall(int node, gfp_t gfp_mask) static inline int cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
{ {
return nr_cpusets() <= 1 || return cpuset_node_allowed(zone_to_nid(z), gfp_mask);
__cpuset_node_allowed_hardwall(node, gfp_mask);
}
static inline int cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask)
{
return cpuset_node_allowed_softwall(zone_to_nid(z), gfp_mask);
}
static inline int cpuset_zone_allowed_hardwall(struct zone *z, gfp_t gfp_mask)
{
return cpuset_node_allowed_hardwall(zone_to_nid(z), gfp_mask);
} }
extern int cpuset_mems_allowed_intersects(const struct task_struct *tsk1, extern int cpuset_mems_allowed_intersects(const struct task_struct *tsk1,
...@@ -179,22 +166,12 @@ static inline int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask) ...@@ -179,22 +166,12 @@ static inline int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask)
return 1; return 1;
} }
static inline int cpuset_node_allowed_softwall(int node, gfp_t gfp_mask) static inline int cpuset_node_allowed(int node, gfp_t gfp_mask)
{
return 1;
}
static inline int cpuset_node_allowed_hardwall(int node, gfp_t gfp_mask)
{
return 1;
}
static inline int cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask)
{ {
return 1; return 1;
} }
static inline int cpuset_zone_allowed_hardwall(struct zone *z, gfp_t gfp_mask) static inline int cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
{ {
return 1; return 1;
} }
......
...@@ -277,6 +277,10 @@ static struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp, ...@@ -277,6 +277,10 @@ static struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
if (!(cgrp->root->subsys_mask & (1 << ss->id))) if (!(cgrp->root->subsys_mask & (1 << ss->id)))
return NULL; return NULL;
/*
* This function is used while updating css associations and thus
* can't test the csses directly. Use ->child_subsys_mask.
*/
while (cgroup_parent(cgrp) && while (cgroup_parent(cgrp) &&
!(cgroup_parent(cgrp)->child_subsys_mask & (1 << ss->id))) !(cgroup_parent(cgrp)->child_subsys_mask & (1 << ss->id)))
cgrp = cgroup_parent(cgrp); cgrp = cgroup_parent(cgrp);
...@@ -284,6 +288,39 @@ static struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp, ...@@ -284,6 +288,39 @@ static struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
return cgroup_css(cgrp, ss); return cgroup_css(cgrp, ss);
} }
/**
* cgroup_get_e_css - get a cgroup's effective css for the specified subsystem
* @cgrp: the cgroup of interest
* @ss: the subsystem of interest
*
* Find and get the effective css of @cgrp for @ss. The effective css is
* defined as the matching css of the nearest ancestor including self which
* has @ss enabled. If @ss is not mounted on the hierarchy @cgrp is on,
* the root css is returned, so this function always returns a valid css.
* The returned css must be put using css_put().
*/
struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgrp,
struct cgroup_subsys *ss)
{
struct cgroup_subsys_state *css;
rcu_read_lock();
do {
css = cgroup_css(cgrp, ss);
if (css && css_tryget_online(css))
goto out_unlock;
cgrp = cgroup_parent(cgrp);
} while (cgrp);
css = init_css_set.subsys[ss->id];
css_get(css);
out_unlock:
rcu_read_unlock();
return css;
}
/* convenient tests for these bits */ /* convenient tests for these bits */
static inline bool cgroup_is_dead(const struct cgroup *cgrp) static inline bool cgroup_is_dead(const struct cgroup *cgrp)
{ {
...@@ -1019,31 +1056,30 @@ static void cgroup_put(struct cgroup *cgrp) ...@@ -1019,31 +1056,30 @@ static void cgroup_put(struct cgroup *cgrp)
} }
/** /**
* cgroup_refresh_child_subsys_mask - update child_subsys_mask * cgroup_calc_child_subsys_mask - calculate child_subsys_mask
* @cgrp: the target cgroup * @cgrp: the target cgroup
* @subtree_control: the new subtree_control mask to consider
* *
* On the default hierarchy, a subsystem may request other subsystems to be * On the default hierarchy, a subsystem may request other subsystems to be
* enabled together through its ->depends_on mask. In such cases, more * enabled together through its ->depends_on mask. In such cases, more
* subsystems than specified in "cgroup.subtree_control" may be enabled. * subsystems than specified in "cgroup.subtree_control" may be enabled.
* *
* This function determines which subsystems need to be enabled given the * This function calculates which subsystems need to be enabled if
* current @cgrp->subtree_control and records it in * @subtree_control is to be applied to @cgrp. The returned mask is always
* @cgrp->child_subsys_mask. The resulting mask is always a superset of * a superset of @subtree_control and follows the usual hierarchy rules.
* @cgrp->subtree_control and follows the usual hierarchy rules.
*/ */
static void cgroup_refresh_child_subsys_mask(struct cgroup *cgrp) static unsigned int cgroup_calc_child_subsys_mask(struct cgroup *cgrp,
unsigned int subtree_control)
{ {
struct cgroup *parent = cgroup_parent(cgrp); struct cgroup *parent = cgroup_parent(cgrp);
unsigned int cur_ss_mask = cgrp->subtree_control; unsigned int cur_ss_mask = subtree_control;
struct cgroup_subsys *ss; struct cgroup_subsys *ss;
int ssid; int ssid;
lockdep_assert_held(&cgroup_mutex); lockdep_assert_held(&cgroup_mutex);
if (!cgroup_on_dfl(cgrp)) { if (!cgroup_on_dfl(cgrp))
cgrp->child_subsys_mask = cur_ss_mask; return cur_ss_mask;
return;
}
while (true) { while (true) {
unsigned int new_ss_mask = cur_ss_mask; unsigned int new_ss_mask = cur_ss_mask;
...@@ -1067,7 +1103,20 @@ static void cgroup_refresh_child_subsys_mask(struct cgroup *cgrp) ...@@ -1067,7 +1103,20 @@ static void cgroup_refresh_child_subsys_mask(struct cgroup *cgrp)
cur_ss_mask = new_ss_mask; cur_ss_mask = new_ss_mask;
} }
cgrp->child_subsys_mask = cur_ss_mask; return cur_ss_mask;
}
/**
* cgroup_refresh_child_subsys_mask - update child_subsys_mask
* @cgrp: the target cgroup
*
* Update @cgrp->child_subsys_mask according to the current
* @cgrp->subtree_control using cgroup_calc_child_subsys_mask().
*/
static void cgroup_refresh_child_subsys_mask(struct cgroup *cgrp)
{
cgrp->child_subsys_mask =
cgroup_calc_child_subsys_mask(cgrp, cgrp->subtree_control);
} }
/** /**
...@@ -2641,7 +2690,7 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, ...@@ -2641,7 +2690,7 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
loff_t off) loff_t off)
{ {
unsigned int enable = 0, disable = 0; unsigned int enable = 0, disable = 0;
unsigned int css_enable, css_disable, old_ctrl, new_ctrl; unsigned int css_enable, css_disable, old_sc, new_sc, old_ss, new_ss;
struct cgroup *cgrp, *child; struct cgroup *cgrp, *child;
struct cgroup_subsys *ss; struct cgroup_subsys *ss;
char *tok; char *tok;
...@@ -2693,36 +2742,6 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, ...@@ -2693,36 +2742,6 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
ret = -ENOENT; ret = -ENOENT;
goto out_unlock; goto out_unlock;
} }
/*
* @ss is already enabled through dependency and
* we'll just make it visible. Skip draining.
*/
if (cgrp->child_subsys_mask & (1 << ssid))
continue;
/*
* Because css offlining is asynchronous, userland
* might try to re-enable the same controller while
* the previous instance is still around. In such
* cases, wait till it's gone using offline_waitq.
*/
cgroup_for_each_live_child(child, cgrp) {
DEFINE_WAIT(wait);
if (!cgroup_css(child, ss))
continue;
cgroup_get(child);
prepare_to_wait(&child->offline_waitq, &wait,
TASK_UNINTERRUPTIBLE);
cgroup_kn_unlock(of->kn);
schedule();
finish_wait(&child->offline_waitq, &wait);
cgroup_put(child);
return restart_syscall();
}
} else if (disable & (1 << ssid)) { } else if (disable & (1 << ssid)) {
if (!(cgrp->subtree_control & (1 << ssid))) { if (!(cgrp->subtree_control & (1 << ssid))) {
disable &= ~(1 << ssid); disable &= ~(1 << ssid);
...@@ -2758,18 +2777,47 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, ...@@ -2758,18 +2777,47 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
* subsystems than specified may need to be enabled or disabled * subsystems than specified may need to be enabled or disabled
* depending on subsystem dependencies. * depending on subsystem dependencies.
*/ */
cgrp->subtree_control |= enable; old_sc = cgrp->subtree_control;
cgrp->subtree_control &= ~disable; old_ss = cgrp->child_subsys_mask;
new_sc = (old_sc | enable) & ~disable;
new_ss = cgroup_calc_child_subsys_mask(cgrp, new_sc);
old_ctrl = cgrp->child_subsys_mask; css_enable = ~old_ss & new_ss;
cgroup_refresh_child_subsys_mask(cgrp); css_disable = old_ss & ~new_ss;
new_ctrl = cgrp->child_subsys_mask;
css_enable = ~old_ctrl & new_ctrl;
css_disable = old_ctrl & ~new_ctrl;
enable |= css_enable; enable |= css_enable;
disable |= css_disable; disable |= css_disable;
/*
* Because css offlining is asynchronous, userland might try to
* re-enable the same controller while the previous instance is
* still around. In such cases, wait till it's gone using
* offline_waitq.
*/
for_each_subsys(ss, ssid) {
if (!(css_enable & (1 << ssid)))
continue;
cgroup_for_each_live_child(child, cgrp) {
DEFINE_WAIT(wait);
if (!cgroup_css(child, ss))
continue;
cgroup_get(child);
prepare_to_wait(&child->offline_waitq, &wait,
TASK_UNINTERRUPTIBLE);
cgroup_kn_unlock(of->kn);
schedule();
finish_wait(&child->offline_waitq, &wait);
cgroup_put(child);
return restart_syscall();
}
}
cgrp->subtree_control = new_sc;
cgrp->child_subsys_mask = new_ss;
/* /*
* Create new csses or make the existing ones visible. A css is * Create new csses or make the existing ones visible. A css is
* created invisible if it's being implicitly enabled through * created invisible if it's being implicitly enabled through
...@@ -2825,6 +2873,24 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, ...@@ -2825,6 +2873,24 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
} }
} }
/*
* The effective csses of all the descendants (excluding @cgrp) may
* have changed. Subsystems can optionally subscribe to this event
* by implementing ->css_e_css_changed() which is invoked if any of
* the effective csses seen from the css's cgroup may have changed.
*/
for_each_subsys(ss, ssid) {
struct cgroup_subsys_state *this_css = cgroup_css(cgrp, ss);
struct cgroup_subsys_state *css;
if (!ss->css_e_css_changed || !this_css)
continue;
css_for_each_descendant_pre(css, this_css)
if (css != this_css)
ss->css_e_css_changed(css);
}
kernfs_activate(cgrp->kn); kernfs_activate(cgrp->kn);
ret = 0; ret = 0;
out_unlock: out_unlock:
...@@ -2832,9 +2898,8 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, ...@@ -2832,9 +2898,8 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
return ret ?: nbytes; return ret ?: nbytes;
err_undo_css: err_undo_css:
cgrp->subtree_control &= ~enable; cgrp->subtree_control = old_sc;
cgrp->subtree_control |= disable; cgrp->child_subsys_mask = old_ss;
cgroup_refresh_child_subsys_mask(cgrp);
for_each_subsys(ss, ssid) { for_each_subsys(ss, ssid) {
if (!(enable & (1 << ssid))) if (!(enable & (1 << ssid)))
...@@ -4370,6 +4435,8 @@ static void css_release_work_fn(struct work_struct *work) ...@@ -4370,6 +4435,8 @@ static void css_release_work_fn(struct work_struct *work)
if (ss) { if (ss) {
/* css release path */ /* css release path */
cgroup_idr_remove(&ss->css_idr, css->id); cgroup_idr_remove(&ss->css_idr, css->id);
if (ss->css_released)
ss->css_released(css);
} else { } else {
/* cgroup release path */ /* cgroup release path */
cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id); cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
......
...@@ -248,34 +248,34 @@ static struct cpuset top_cpuset = { ...@@ -248,34 +248,34 @@ static struct cpuset top_cpuset = {
if (is_cpuset_online(((des_cs) = css_cs((pos_css))))) if (is_cpuset_online(((des_cs) = css_cs((pos_css)))))
/* /*
* There are two global mutexes guarding cpuset structures - cpuset_mutex * There are two global locks guarding cpuset structures - cpuset_mutex and
* and callback_mutex. The latter may nest inside the former. We also * callback_lock. We also require taking task_lock() when dereferencing a
* require taking task_lock() when dereferencing a task's cpuset pointer. * task's cpuset pointer. See "The task_lock() exception", at the end of this
* See "The task_lock() exception", at the end of this comment. * comment.
* *
* A task must hold both mutexes to modify cpusets. If a task holds * A task must hold both locks to modify cpusets. If a task holds
* cpuset_mutex, then it blocks others wanting that mutex, ensuring that it * cpuset_mutex, then it blocks others wanting that mutex, ensuring that it
* is the only task able to also acquire callback_mutex and be able to * is the only task able to also acquire callback_lock and be able to
* modify cpusets. It can perform various checks on the cpuset structure * modify cpusets. It can perform various checks on the cpuset structure
* first, knowing nothing will change. It can also allocate memory while * first, knowing nothing will change. It can also allocate memory while
* just holding cpuset_mutex. While it is performing these checks, various * just holding cpuset_mutex. While it is performing these checks, various
* callback routines can briefly acquire callback_mutex to query cpusets. * callback routines can briefly acquire callback_lock to query cpusets.
* Once it is ready to make the changes, it takes callback_mutex, blocking * Once it is ready to make the changes, it takes callback_lock, blocking
* everyone else. * everyone else.
* *
* Calls to the kernel memory allocator can not be made while holding * Calls to the kernel memory allocator can not be made while holding
* callback_mutex, as that would risk double tripping on callback_mutex * callback_lock, as that would risk double tripping on callback_lock
* from one of the callbacks into the cpuset code from within * from one of the callbacks into the cpuset code from within
* __alloc_pages(). * __alloc_pages().
* *
* If a task is only holding callback_mutex, then it has read-only * If a task is only holding callback_lock, then it has read-only
* access to cpusets. * access to cpusets.
* *
* Now, the task_struct fields mems_allowed and mempolicy may be changed * Now, the task_struct fields mems_allowed and mempolicy may be changed
* by other task, we use alloc_lock in the task_struct fields to protect * by other task, we use alloc_lock in the task_struct fields to protect
* them. * them.
* *
* The cpuset_common_file_read() handlers only hold callback_mutex across * The cpuset_common_file_read() handlers only hold callback_lock across
* small pieces of code, such as when reading out possibly multi-word * small pieces of code, such as when reading out possibly multi-word
* cpumasks and nodemasks. * cpumasks and nodemasks.
* *
...@@ -284,7 +284,7 @@ static struct cpuset top_cpuset = { ...@@ -284,7 +284,7 @@ static struct cpuset top_cpuset = {
*/ */
static DEFINE_MUTEX(cpuset_mutex); static DEFINE_MUTEX(cpuset_mutex);
static DEFINE_MUTEX(callback_mutex); static DEFINE_SPINLOCK(callback_lock);
/* /*
* CPU / memory hotplug is handled asynchronously. * CPU / memory hotplug is handled asynchronously.
...@@ -329,7 +329,7 @@ static struct file_system_type cpuset_fs_type = { ...@@ -329,7 +329,7 @@ static struct file_system_type cpuset_fs_type = {
* One way or another, we guarantee to return some non-empty subset * One way or another, we guarantee to return some non-empty subset
* of cpu_online_mask. * of cpu_online_mask.
* *
* Call with callback_mutex held. * Call with callback_lock or cpuset_mutex held.
*/ */
static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask) static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask)
{ {
...@@ -347,7 +347,7 @@ static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask) ...@@ -347,7 +347,7 @@ static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask)
* One way or another, we guarantee to return some non-empty subset * One way or another, we guarantee to return some non-empty subset
* of node_states[N_MEMORY]. * of node_states[N_MEMORY].
* *
* Call with callback_mutex held. * Call with callback_lock or cpuset_mutex held.
*/ */
static void guarantee_online_mems(struct cpuset *cs, nodemask_t *pmask) static void guarantee_online_mems(struct cpuset *cs, nodemask_t *pmask)
{ {
...@@ -359,7 +359,7 @@ static void guarantee_online_mems(struct cpuset *cs, nodemask_t *pmask) ...@@ -359,7 +359,7 @@ static void guarantee_online_mems(struct cpuset *cs, nodemask_t *pmask)
/* /*
* update task's spread flag if cpuset's page/slab spread flag is set * update task's spread flag if cpuset's page/slab spread flag is set
* *
* Called with callback_mutex/cpuset_mutex held * Call with callback_lock or cpuset_mutex held.
*/ */
static void cpuset_update_task_spread_flag(struct cpuset *cs, static void cpuset_update_task_spread_flag(struct cpuset *cs,
struct task_struct *tsk) struct task_struct *tsk)
...@@ -886,9 +886,9 @@ static void update_cpumasks_hier(struct cpuset *cs, struct cpumask *new_cpus) ...@@ -886,9 +886,9 @@ static void update_cpumasks_hier(struct cpuset *cs, struct cpumask *new_cpus)
continue; continue;
rcu_read_unlock(); rcu_read_unlock();
mutex_lock(&callback_mutex); spin_lock_irq(&callback_lock);
cpumask_copy(cp->effective_cpus, new_cpus); cpumask_copy(cp->effective_cpus, new_cpus);
mutex_unlock(&callback_mutex); spin_unlock_irq(&callback_lock);
WARN_ON(!cgroup_on_dfl(cp->css.cgroup) && WARN_ON(!cgroup_on_dfl(cp->css.cgroup) &&
!cpumask_equal(cp->cpus_allowed, cp->effective_cpus)); !cpumask_equal(cp->cpus_allowed, cp->effective_cpus));
...@@ -953,9 +953,9 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, ...@@ -953,9 +953,9 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
if (retval < 0) if (retval < 0)
return retval; return retval;
mutex_lock(&callback_mutex); spin_lock_irq(&callback_lock);
cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed); cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed);
mutex_unlock(&callback_mutex); spin_unlock_irq(&callback_lock);
/* use trialcs->cpus_allowed as a temp variable */ /* use trialcs->cpus_allowed as a temp variable */
update_cpumasks_hier(cs, trialcs->cpus_allowed); update_cpumasks_hier(cs, trialcs->cpus_allowed);
...@@ -1142,9 +1142,9 @@ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems) ...@@ -1142,9 +1142,9 @@ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems)
continue; continue;
rcu_read_unlock(); rcu_read_unlock();
mutex_lock(&callback_mutex); spin_lock_irq(&callback_lock);
cp->effective_mems = *new_mems; cp->effective_mems = *new_mems;
mutex_unlock(&callback_mutex); spin_unlock_irq(&callback_lock);
WARN_ON(!cgroup_on_dfl(cp->css.cgroup) && WARN_ON(!cgroup_on_dfl(cp->css.cgroup) &&
!nodes_equal(cp->mems_allowed, cp->effective_mems)); !nodes_equal(cp->mems_allowed, cp->effective_mems));
...@@ -1165,7 +1165,7 @@ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems) ...@@ -1165,7 +1165,7 @@ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems)
* mempolicies and if the cpuset is marked 'memory_migrate', * mempolicies and if the cpuset is marked 'memory_migrate',
* migrate the tasks pages to the new memory. * migrate the tasks pages to the new memory.
* *
* Call with cpuset_mutex held. May take callback_mutex during call. * Call with cpuset_mutex held. May take callback_lock during call.
* Will take tasklist_lock, scan tasklist for tasks in cpuset cs, * Will take tasklist_lock, scan tasklist for tasks in cpuset cs,
* lock each such tasks mm->mmap_sem, scan its vma's and rebind * lock each such tasks mm->mmap_sem, scan its vma's and rebind
* their mempolicies to the cpusets new mems_allowed. * their mempolicies to the cpusets new mems_allowed.
...@@ -1212,9 +1212,9 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs, ...@@ -1212,9 +1212,9 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
if (retval < 0) if (retval < 0)
goto done; goto done;
mutex_lock(&callback_mutex); spin_lock_irq(&callback_lock);
cs->mems_allowed = trialcs->mems_allowed; cs->mems_allowed = trialcs->mems_allowed;
mutex_unlock(&callback_mutex); spin_unlock_irq(&callback_lock);
/* use trialcs->mems_allowed as a temp variable */ /* use trialcs->mems_allowed as a temp variable */
update_nodemasks_hier(cs, &cs->mems_allowed); update_nodemasks_hier(cs, &cs->mems_allowed);
...@@ -1305,9 +1305,9 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, ...@@ -1305,9 +1305,9 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
spread_flag_changed = ((is_spread_slab(cs) != is_spread_slab(trialcs)) spread_flag_changed = ((is_spread_slab(cs) != is_spread_slab(trialcs))
|| (is_spread_page(cs) != is_spread_page(trialcs))); || (is_spread_page(cs) != is_spread_page(trialcs)));
mutex_lock(&callback_mutex); spin_lock_irq(&callback_lock);
cs->flags = trialcs->flags; cs->flags = trialcs->flags;
mutex_unlock(&callback_mutex); spin_unlock_irq(&callback_lock);
if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed) if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed)
rebuild_sched_domains_locked(); rebuild_sched_domains_locked();
...@@ -1714,7 +1714,7 @@ static int cpuset_common_seq_show(struct seq_file *sf, void *v) ...@@ -1714,7 +1714,7 @@ static int cpuset_common_seq_show(struct seq_file *sf, void *v)
count = seq_get_buf(sf, &buf); count = seq_get_buf(sf, &buf);
s = buf; s = buf;
mutex_lock(&callback_mutex); spin_lock_irq(&callback_lock);
switch (type) { switch (type) {
case FILE_CPULIST: case FILE_CPULIST:
...@@ -1741,7 +1741,7 @@ static int cpuset_common_seq_show(struct seq_file *sf, void *v) ...@@ -1741,7 +1741,7 @@ static int cpuset_common_seq_show(struct seq_file *sf, void *v)
seq_commit(sf, -1); seq_commit(sf, -1);
} }
out_unlock: out_unlock:
mutex_unlock(&callback_mutex); spin_unlock_irq(&callback_lock);
return ret; return ret;
} }
...@@ -1958,12 +1958,12 @@ static int cpuset_css_online(struct cgroup_subsys_state *css) ...@@ -1958,12 +1958,12 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
cpuset_inc(); cpuset_inc();
mutex_lock(&callback_mutex); spin_lock_irq(&callback_lock);
if (cgroup_on_dfl(cs->css.cgroup)) { if (cgroup_on_dfl(cs->css.cgroup)) {
cpumask_copy(cs->effective_cpus, parent->effective_cpus); cpumask_copy(cs->effective_cpus, parent->effective_cpus);
cs->effective_mems = parent->effective_mems; cs->effective_mems = parent->effective_mems;
} }
mutex_unlock(&callback_mutex); spin_unlock_irq(&callback_lock);
if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags)) if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags))
goto out_unlock; goto out_unlock;
...@@ -1990,10 +1990,10 @@ static int cpuset_css_online(struct cgroup_subsys_state *css) ...@@ -1990,10 +1990,10 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
} }
rcu_read_unlock(); rcu_read_unlock();
mutex_lock(&callback_mutex); spin_lock_irq(&callback_lock);
cs->mems_allowed = parent->mems_allowed; cs->mems_allowed = parent->mems_allowed;
cpumask_copy(cs->cpus_allowed, parent->cpus_allowed); cpumask_copy(cs->cpus_allowed, parent->cpus_allowed);
mutex_unlock(&callback_mutex); spin_unlock_irq(&callback_lock);
out_unlock: out_unlock:
mutex_unlock(&cpuset_mutex); mutex_unlock(&cpuset_mutex);
return 0; return 0;
...@@ -2032,7 +2032,7 @@ static void cpuset_css_free(struct cgroup_subsys_state *css) ...@@ -2032,7 +2032,7 @@ static void cpuset_css_free(struct cgroup_subsys_state *css)
static void cpuset_bind(struct cgroup_subsys_state *root_css) static void cpuset_bind(struct cgroup_subsys_state *root_css)
{ {
mutex_lock(&cpuset_mutex); mutex_lock(&cpuset_mutex);
mutex_lock(&callback_mutex); spin_lock_irq(&callback_lock);
if (cgroup_on_dfl(root_css->cgroup)) { if (cgroup_on_dfl(root_css->cgroup)) {
cpumask_copy(top_cpuset.cpus_allowed, cpu_possible_mask); cpumask_copy(top_cpuset.cpus_allowed, cpu_possible_mask);
...@@ -2043,7 +2043,7 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css) ...@@ -2043,7 +2043,7 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css)
top_cpuset.mems_allowed = top_cpuset.effective_mems; top_cpuset.mems_allowed = top_cpuset.effective_mems;
} }
mutex_unlock(&callback_mutex); spin_unlock_irq(&callback_lock);
mutex_unlock(&cpuset_mutex); mutex_unlock(&cpuset_mutex);
} }
...@@ -2128,12 +2128,12 @@ hotplug_update_tasks_legacy(struct cpuset *cs, ...@@ -2128,12 +2128,12 @@ hotplug_update_tasks_legacy(struct cpuset *cs,
{ {
bool is_empty; bool is_empty;
mutex_lock(&callback_mutex); spin_lock_irq(&callback_lock);
cpumask_copy(cs->cpus_allowed, new_cpus); cpumask_copy(cs->cpus_allowed, new_cpus);
cpumask_copy(cs->effective_cpus, new_cpus); cpumask_copy(cs->effective_cpus, new_cpus);
cs->mems_allowed = *new_mems; cs->mems_allowed = *new_mems;
cs->effective_mems = *new_mems; cs->effective_mems = *new_mems;
mutex_unlock(&callback_mutex); spin_unlock_irq(&callback_lock);
/* /*
* Don't call update_tasks_cpumask() if the cpuset becomes empty, * Don't call update_tasks_cpumask() if the cpuset becomes empty,
...@@ -2170,10 +2170,10 @@ hotplug_update_tasks(struct cpuset *cs, ...@@ -2170,10 +2170,10 @@ hotplug_update_tasks(struct cpuset *cs,
if (nodes_empty(*new_mems)) if (nodes_empty(*new_mems))
*new_mems = parent_cs(cs)->effective_mems; *new_mems = parent_cs(cs)->effective_mems;
mutex_lock(&callback_mutex); spin_lock_irq(&callback_lock);
cpumask_copy(cs->effective_cpus, new_cpus); cpumask_copy(cs->effective_cpus, new_cpus);
cs->effective_mems = *new_mems; cs->effective_mems = *new_mems;
mutex_unlock(&callback_mutex); spin_unlock_irq(&callback_lock);
if (cpus_updated) if (cpus_updated)
update_tasks_cpumask(cs); update_tasks_cpumask(cs);
...@@ -2259,21 +2259,21 @@ static void cpuset_hotplug_workfn(struct work_struct *work) ...@@ -2259,21 +2259,21 @@ static void cpuset_hotplug_workfn(struct work_struct *work)
/* synchronize cpus_allowed to cpu_active_mask */ /* synchronize cpus_allowed to cpu_active_mask */
if (cpus_updated) { if (cpus_updated) {
mutex_lock(&callback_mutex); spin_lock_irq(&callback_lock);
if (!on_dfl) if (!on_dfl)
cpumask_copy(top_cpuset.cpus_allowed, &new_cpus); cpumask_copy(top_cpuset.cpus_allowed, &new_cpus);
cpumask_copy(top_cpuset.effective_cpus, &new_cpus); cpumask_copy(top_cpuset.effective_cpus, &new_cpus);
mutex_unlock(&callback_mutex); spin_unlock_irq(&callback_lock);
/* we don't mess with cpumasks of tasks in top_cpuset */ /* we don't mess with cpumasks of tasks in top_cpuset */
} }
/* synchronize mems_allowed to N_MEMORY */ /* synchronize mems_allowed to N_MEMORY */
if (mems_updated) { if (mems_updated) {
mutex_lock(&callback_mutex); spin_lock_irq(&callback_lock);
if (!on_dfl) if (!on_dfl)
top_cpuset.mems_allowed = new_mems; top_cpuset.mems_allowed = new_mems;
top_cpuset.effective_mems = new_mems; top_cpuset.effective_mems = new_mems;
mutex_unlock(&callback_mutex); spin_unlock_irq(&callback_lock);
update_tasks_nodemask(&top_cpuset); update_tasks_nodemask(&top_cpuset);
} }
...@@ -2366,11 +2366,13 @@ void __init cpuset_init_smp(void) ...@@ -2366,11 +2366,13 @@ void __init cpuset_init_smp(void)
void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask) void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
{ {
mutex_lock(&callback_mutex); unsigned long flags;
spin_lock_irqsave(&callback_lock, flags);
rcu_read_lock(); rcu_read_lock();
guarantee_online_cpus(task_cs(tsk), pmask); guarantee_online_cpus(task_cs(tsk), pmask);
rcu_read_unlock(); rcu_read_unlock();
mutex_unlock(&callback_mutex); spin_unlock_irqrestore(&callback_lock, flags);
} }
void cpuset_cpus_allowed_fallback(struct task_struct *tsk) void cpuset_cpus_allowed_fallback(struct task_struct *tsk)
...@@ -2416,12 +2418,13 @@ void cpuset_init_current_mems_allowed(void) ...@@ -2416,12 +2418,13 @@ void cpuset_init_current_mems_allowed(void)
nodemask_t cpuset_mems_allowed(struct task_struct *tsk) nodemask_t cpuset_mems_allowed(struct task_struct *tsk)
{ {
nodemask_t mask; nodemask_t mask;
unsigned long flags;
mutex_lock(&callback_mutex); spin_lock_irqsave(&callback_lock, flags);
rcu_read_lock(); rcu_read_lock();
guarantee_online_mems(task_cs(tsk), &mask); guarantee_online_mems(task_cs(tsk), &mask);
rcu_read_unlock(); rcu_read_unlock();
mutex_unlock(&callback_mutex); spin_unlock_irqrestore(&callback_lock, flags);
return mask; return mask;
} }
...@@ -2440,7 +2443,7 @@ int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask) ...@@ -2440,7 +2443,7 @@ int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask)
/* /*
* nearest_hardwall_ancestor() - Returns the nearest mem_exclusive or * nearest_hardwall_ancestor() - Returns the nearest mem_exclusive or
* mem_hardwall ancestor to the specified cpuset. Call holding * mem_hardwall ancestor to the specified cpuset. Call holding
* callback_mutex. If no ancestor is mem_exclusive or mem_hardwall * callback_lock. If no ancestor is mem_exclusive or mem_hardwall
* (an unusual configuration), then returns the root cpuset. * (an unusual configuration), then returns the root cpuset.
*/ */
static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs) static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs)
...@@ -2451,7 +2454,7 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs) ...@@ -2451,7 +2454,7 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs)
} }
/** /**
* cpuset_node_allowed_softwall - Can we allocate on a memory node? * cpuset_node_allowed - Can we allocate on a memory node?
* @node: is this an allowed node? * @node: is this an allowed node?
* @gfp_mask: memory allocation flags * @gfp_mask: memory allocation flags
* *
...@@ -2463,13 +2466,6 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs) ...@@ -2463,13 +2466,6 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs)
* flag, yes. * flag, yes.
* Otherwise, no. * Otherwise, no.
* *
* If __GFP_HARDWALL is set, cpuset_node_allowed_softwall() reduces to
* cpuset_node_allowed_hardwall(). Otherwise, cpuset_node_allowed_softwall()
* might sleep, and might allow a node from an enclosing cpuset.
*
* cpuset_node_allowed_hardwall() only handles the simpler case of hardwall
* cpusets, and never sleeps.
*
* The __GFP_THISNODE placement logic is really handled elsewhere, * The __GFP_THISNODE placement logic is really handled elsewhere,
* by forcibly using a zonelist starting at a specified node, and by * by forcibly using a zonelist starting at a specified node, and by
* (in get_page_from_freelist()) refusing to consider the zones for * (in get_page_from_freelist()) refusing to consider the zones for
...@@ -2482,13 +2478,12 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs) ...@@ -2482,13 +2478,12 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs)
* GFP_KERNEL allocations are not so marked, so can escape to the * GFP_KERNEL allocations are not so marked, so can escape to the
* nearest enclosing hardwalled ancestor cpuset. * nearest enclosing hardwalled ancestor cpuset.
* *
* Scanning up parent cpusets requires callback_mutex. The * Scanning up parent cpusets requires callback_lock. The
* __alloc_pages() routine only calls here with __GFP_HARDWALL bit * __alloc_pages() routine only calls here with __GFP_HARDWALL bit
* _not_ set if it's a GFP_KERNEL allocation, and all nodes in the * _not_ set if it's a GFP_KERNEL allocation, and all nodes in the
* current tasks mems_allowed came up empty on the first pass over * current tasks mems_allowed came up empty on the first pass over
* the zonelist. So only GFP_KERNEL allocations, if all nodes in the * the zonelist. So only GFP_KERNEL allocations, if all nodes in the
* cpuset are short of memory, might require taking the callback_mutex * cpuset are short of memory, might require taking the callback_lock.
* mutex.
* *
* The first call here from mm/page_alloc:get_page_from_freelist() * The first call here from mm/page_alloc:get_page_from_freelist()
* has __GFP_HARDWALL set in gfp_mask, enforcing hardwall cpusets, * has __GFP_HARDWALL set in gfp_mask, enforcing hardwall cpusets,
...@@ -2505,20 +2500,15 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs) ...@@ -2505,20 +2500,15 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs)
* TIF_MEMDIE - any node ok * TIF_MEMDIE - any node ok
* GFP_KERNEL - any node in enclosing hardwalled cpuset ok * GFP_KERNEL - any node in enclosing hardwalled cpuset ok
* GFP_USER - only nodes in current tasks mems allowed ok. * GFP_USER - only nodes in current tasks mems allowed ok.
*
* Rule:
* Don't call cpuset_node_allowed_softwall if you can't sleep, unless you
* pass in the __GFP_HARDWALL flag set in gfp_flag, which disables
* the code that might scan up ancestor cpusets and sleep.
*/ */
int __cpuset_node_allowed_softwall(int node, gfp_t gfp_mask) int __cpuset_node_allowed(int node, gfp_t gfp_mask)
{ {
struct cpuset *cs; /* current cpuset ancestors */ struct cpuset *cs; /* current cpuset ancestors */
int allowed; /* is allocation in zone z allowed? */ int allowed; /* is allocation in zone z allowed? */
unsigned long flags;
if (in_interrupt() || (gfp_mask & __GFP_THISNODE)) if (in_interrupt() || (gfp_mask & __GFP_THISNODE))
return 1; return 1;
might_sleep_if(!(gfp_mask & __GFP_HARDWALL));
if (node_isset(node, current->mems_allowed)) if (node_isset(node, current->mems_allowed))
return 1; return 1;
/* /*
...@@ -2534,55 +2524,17 @@ int __cpuset_node_allowed_softwall(int node, gfp_t gfp_mask) ...@@ -2534,55 +2524,17 @@ int __cpuset_node_allowed_softwall(int node, gfp_t gfp_mask)
return 1; return 1;
/* Not hardwall and node outside mems_allowed: scan up cpusets */ /* Not hardwall and node outside mems_allowed: scan up cpusets */
mutex_lock(&callback_mutex); spin_lock_irqsave(&callback_lock, flags);
rcu_read_lock(); rcu_read_lock();
cs = nearest_hardwall_ancestor(task_cs(current)); cs = nearest_hardwall_ancestor(task_cs(current));
allowed = node_isset(node, cs->mems_allowed); allowed = node_isset(node, cs->mems_allowed);
rcu_read_unlock(); rcu_read_unlock();
mutex_unlock(&callback_mutex); spin_unlock_irqrestore(&callback_lock, flags);
return allowed; return allowed;
} }
/*
* cpuset_node_allowed_hardwall - Can we allocate on a memory node?
* @node: is this an allowed node?
* @gfp_mask: memory allocation flags
*
* If we're in interrupt, yes, we can always allocate. If __GFP_THISNODE is
* set, yes, we can always allocate. If node is in our task's mems_allowed,
* yes. If the task has been OOM killed and has access to memory reserves as
* specified by the TIF_MEMDIE flag, yes.
* Otherwise, no.
*
* The __GFP_THISNODE placement logic is really handled elsewhere,
* by forcibly using a zonelist starting at a specified node, and by
* (in get_page_from_freelist()) refusing to consider the zones for
* any node on the zonelist except the first. By the time any such
* calls get to this routine, we should just shut up and say 'yes'.
*
* Unlike the cpuset_node_allowed_softwall() variant, above,
* this variant requires that the node be in the current task's
* mems_allowed or that we're in interrupt. It does not scan up the
* cpuset hierarchy for the nearest enclosing mem_exclusive cpuset.
* It never sleeps.
*/
int __cpuset_node_allowed_hardwall(int node, gfp_t gfp_mask)
{
if (in_interrupt() || (gfp_mask & __GFP_THISNODE))
return 1;
if (node_isset(node, current->mems_allowed))
return 1;
/*
* Allow tasks that have access to memory reserves because they have
* been OOM killed to get memory anywhere.
*/
if (unlikely(test_thread_flag(TIF_MEMDIE)))
return 1;
return 0;
}
/** /**
* cpuset_mem_spread_node() - On which node to begin search for a file page * cpuset_mem_spread_node() - On which node to begin search for a file page
* cpuset_slab_spread_node() - On which node to begin search for a slab page * cpuset_slab_spread_node() - On which node to begin search for a slab page
......
...@@ -582,7 +582,7 @@ static struct page *dequeue_huge_page_vma(struct hstate *h, ...@@ -582,7 +582,7 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
for_each_zone_zonelist_nodemask(zone, z, zonelist, for_each_zone_zonelist_nodemask(zone, z, zonelist,
MAX_NR_ZONES - 1, nodemask) { MAX_NR_ZONES - 1, nodemask) {
if (cpuset_zone_allowed_softwall(zone, htlb_alloc_mask(h))) { if (cpuset_zone_allowed(zone, htlb_alloc_mask(h))) {
page = dequeue_huge_page_node(h, zone_to_nid(zone)); page = dequeue_huge_page_node(h, zone_to_nid(zone));
if (page) { if (page) {
if (avoid_reserve) if (avoid_reserve)
......
...@@ -233,7 +233,7 @@ static enum oom_constraint constrained_alloc(struct zonelist *zonelist, ...@@ -233,7 +233,7 @@ static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
/* Check this allocation failure is caused by cpuset's wall function */ /* Check this allocation failure is caused by cpuset's wall function */
for_each_zone_zonelist_nodemask(zone, z, zonelist, for_each_zone_zonelist_nodemask(zone, z, zonelist,
high_zoneidx, nodemask) high_zoneidx, nodemask)
if (!cpuset_zone_allowed_softwall(zone, gfp_mask)) if (!cpuset_zone_allowed(zone, gfp_mask))
cpuset_limited = true; cpuset_limited = true;
if (cpuset_limited) { if (cpuset_limited) {
......
...@@ -1990,7 +1990,7 @@ get_page_from_freelist(gfp_t gfp_mask, nodemask_t *nodemask, unsigned int order, ...@@ -1990,7 +1990,7 @@ get_page_from_freelist(gfp_t gfp_mask, nodemask_t *nodemask, unsigned int order,
/* /*
* Scan zonelist, looking for a zone with enough free. * Scan zonelist, looking for a zone with enough free.
* See also __cpuset_node_allowed_softwall() comment in kernel/cpuset.c. * See also __cpuset_node_allowed() comment in kernel/cpuset.c.
*/ */
for_each_zone_zonelist_nodemask(zone, z, zonelist, for_each_zone_zonelist_nodemask(zone, z, zonelist,
high_zoneidx, nodemask) { high_zoneidx, nodemask) {
...@@ -2001,7 +2001,7 @@ get_page_from_freelist(gfp_t gfp_mask, nodemask_t *nodemask, unsigned int order, ...@@ -2001,7 +2001,7 @@ get_page_from_freelist(gfp_t gfp_mask, nodemask_t *nodemask, unsigned int order,
continue; continue;
if (cpusets_enabled() && if (cpusets_enabled() &&
(alloc_flags & ALLOC_CPUSET) && (alloc_flags & ALLOC_CPUSET) &&
!cpuset_zone_allowed_softwall(zone, gfp_mask)) !cpuset_zone_allowed(zone, gfp_mask))
continue; continue;
/* /*
* Distribute pages in proportion to the individual * Distribute pages in proportion to the individual
...@@ -2529,7 +2529,7 @@ gfp_to_alloc_flags(gfp_t gfp_mask) ...@@ -2529,7 +2529,7 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
alloc_flags |= ALLOC_HARDER; alloc_flags |= ALLOC_HARDER;
/* /*
* Ignore cpuset mems for GFP_ATOMIC rather than fail, see the * Ignore cpuset mems for GFP_ATOMIC rather than fail, see the
* comment for __cpuset_node_allowed_softwall(). * comment for __cpuset_node_allowed().
*/ */
alloc_flags &= ~ALLOC_CPUSET; alloc_flags &= ~ALLOC_CPUSET;
} else if (unlikely(rt_task(current)) && !in_interrupt()) } else if (unlikely(rt_task(current)) && !in_interrupt())
......
...@@ -3015,7 +3015,7 @@ static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags) ...@@ -3015,7 +3015,7 @@ static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
nid = zone_to_nid(zone); nid = zone_to_nid(zone);
if (cpuset_zone_allowed_hardwall(zone, flags) && if (cpuset_zone_allowed(zone, flags | __GFP_HARDWALL) &&
get_node(cache, nid) && get_node(cache, nid) &&
get_node(cache, nid)->free_objects) { get_node(cache, nid)->free_objects) {
obj = ____cache_alloc_node(cache, obj = ____cache_alloc_node(cache,
......
...@@ -1665,7 +1665,8 @@ static void *get_any_partial(struct kmem_cache *s, gfp_t flags, ...@@ -1665,7 +1665,8 @@ static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
n = get_node(s, zone_to_nid(zone)); n = get_node(s, zone_to_nid(zone));
if (n && cpuset_zone_allowed_hardwall(zone, flags) && if (n && cpuset_zone_allowed(zone,
flags | __GFP_HARDWALL) &&
n->nr_partial > s->min_partial) { n->nr_partial > s->min_partial) {
object = get_partial_node(s, n, c, flags); object = get_partial_node(s, n, c, flags);
if (object) { if (object) {
......
...@@ -2405,7 +2405,8 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc) ...@@ -2405,7 +2405,8 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
* to global LRU. * to global LRU.
*/ */
if (global_reclaim(sc)) { if (global_reclaim(sc)) {
if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) if (!cpuset_zone_allowed(zone,
GFP_KERNEL | __GFP_HARDWALL))
continue; continue;
lru_pages += zone_reclaimable_pages(zone); lru_pages += zone_reclaimable_pages(zone);
...@@ -3388,7 +3389,7 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx) ...@@ -3388,7 +3389,7 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx)
if (!populated_zone(zone)) if (!populated_zone(zone))
return; return;
if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) if (!cpuset_zone_allowed(zone, GFP_KERNEL | __GFP_HARDWALL))
return; return;
pgdat = zone->zone_pgdat; pgdat = zone->zone_pgdat;
if (pgdat->kswapd_max_order < order) { if (pgdat->kswapd_max_order < order) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment