Commit a86ce680 authored by Waiman Long's avatar Waiman Long Committed by Tejun Heo

cgroup/cpuset: Extract out CS_CPU_EXCLUSIVE & CS_SCHED_LOAD_BALANCE handling

Extract out the setting of CS_CPU_EXCLUSIVE and CS_SCHED_LOAD_BALANCE
flags as well as the rebuilding of scheduling domains into the new
update_partition_exclusive() and update_partition_sd_lb() helper
functions to simplify the logic. The update_partition_exclusive()
helper is called mainly at the beginning of the caller, but it may be
called at the end too. The update_partition_sd_lb() helper is called
at the end of the caller.

This patch should reduce the chance that cpuset partition will end up
in an incorrect state.
Signed-off-by: default avatarWaiman Long <longman@redhat.com>
Signed-off-by: default avatarTejun Heo <tj@kernel.org>
parent c8c92620
...@@ -1255,7 +1255,7 @@ static void update_tasks_cpumask(struct cpuset *cs, struct cpumask *new_cpus) ...@@ -1255,7 +1255,7 @@ static void update_tasks_cpumask(struct cpuset *cs, struct cpumask *new_cpus)
static void compute_effective_cpumask(struct cpumask *new_cpus, static void compute_effective_cpumask(struct cpumask *new_cpus,
struct cpuset *cs, struct cpuset *parent) struct cpuset *cs, struct cpuset *parent)
{ {
if (parent->nr_subparts_cpus) { if (parent->nr_subparts_cpus && is_partition_valid(cs)) {
cpumask_or(new_cpus, parent->effective_cpus, cpumask_or(new_cpus, parent->effective_cpus,
parent->subparts_cpus); parent->subparts_cpus);
cpumask_and(new_cpus, new_cpus, cs->cpus_allowed); cpumask_and(new_cpus, new_cpus, cs->cpus_allowed);
...@@ -1277,6 +1277,50 @@ enum subparts_cmd { ...@@ -1277,6 +1277,50 @@ enum subparts_cmd {
static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
int turning_on); int turning_on);
/*
* Update partition exclusive flag
*
* Return: 0 if successful, an error code otherwise
*/
static int update_partition_exclusive(struct cpuset *cs, int new_prs)
{
bool exclusive = (new_prs > 0);
if (exclusive && !is_cpu_exclusive(cs)) {
if (update_flag(CS_CPU_EXCLUSIVE, cs, 1))
return PERR_NOTEXCL;
} else if (!exclusive && is_cpu_exclusive(cs)) {
/* Turning off CS_CPU_EXCLUSIVE will not return error */
update_flag(CS_CPU_EXCLUSIVE, cs, 0);
}
return 0;
}
/*
* Update partition load balance flag and/or rebuild sched domain
*
* Changing load balance flag will automatically call
* rebuild_sched_domains_locked().
*/
static void update_partition_sd_lb(struct cpuset *cs, int old_prs)
{
int new_prs = cs->partition_root_state;
bool new_lb = (new_prs != PRS_ISOLATED);
bool rebuild_domains = (new_prs > 0) || (old_prs > 0);
if (new_lb != !!is_sched_load_balance(cs)) {
rebuild_domains = true;
if (new_lb)
set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
else
clear_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
}
if (rebuild_domains)
rebuild_sched_domains_locked();
}
/** /**
* update_parent_subparts_cpumask - update subparts_cpus mask of parent cpuset * update_parent_subparts_cpumask - update subparts_cpus mask of parent cpuset
* @cs: The cpuset that requests change in partition root state * @cs: The cpuset that requests change in partition root state
...@@ -1336,8 +1380,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cs, int cmd, ...@@ -1336,8 +1380,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cs, int cmd,
return is_partition_invalid(parent) return is_partition_invalid(parent)
? PERR_INVPARENT : PERR_NOTPART; ? PERR_INVPARENT : PERR_NOTPART;
} }
if ((newmask && cpumask_empty(newmask)) || if (!newmask && cpumask_empty(cs->cpus_allowed))
(!newmask && cpumask_empty(cs->cpus_allowed)))
return PERR_CPUSEMPTY; return PERR_CPUSEMPTY;
/* /*
...@@ -1403,11 +1446,16 @@ static int update_parent_subparts_cpumask(struct cpuset *cs, int cmd, ...@@ -1403,11 +1446,16 @@ static int update_parent_subparts_cpumask(struct cpuset *cs, int cmd,
cpumask_and(tmp->addmask, newmask, parent->cpus_allowed); cpumask_and(tmp->addmask, newmask, parent->cpus_allowed);
adding = cpumask_andnot(tmp->addmask, tmp->addmask, adding = cpumask_andnot(tmp->addmask, tmp->addmask,
parent->subparts_cpus); parent->subparts_cpus);
/*
* Empty cpumask is not allewed
*/
if (cpumask_empty(newmask)) {
part_error = PERR_CPUSEMPTY;
/* /*
* Make partition invalid if parent's effective_cpus could * Make partition invalid if parent's effective_cpus could
* become empty and there are tasks in the parent. * become empty and there are tasks in the parent.
*/ */
if (adding && } else if (adding &&
cpumask_subset(parent->effective_cpus, tmp->addmask) && cpumask_subset(parent->effective_cpus, tmp->addmask) &&
!cpumask_intersects(tmp->delmask, cpu_active_mask) && !cpumask_intersects(tmp->delmask, cpu_active_mask) &&
partition_is_populated(parent, cs)) { partition_is_populated(parent, cs)) {
...@@ -1480,14 +1528,13 @@ static int update_parent_subparts_cpumask(struct cpuset *cs, int cmd, ...@@ -1480,14 +1528,13 @@ static int update_parent_subparts_cpumask(struct cpuset *cs, int cmd,
/* /*
* Transitioning between invalid to valid or vice versa may require * Transitioning between invalid to valid or vice versa may require
* changing CS_CPU_EXCLUSIVE and CS_SCHED_LOAD_BALANCE. * changing CS_CPU_EXCLUSIVE.
*/ */
if (old_prs != new_prs) { if (old_prs != new_prs) {
if (is_prs_invalid(old_prs) && !is_cpu_exclusive(cs) && int err = update_partition_exclusive(cs, new_prs);
(update_flag(CS_CPU_EXCLUSIVE, cs, 1) < 0))
return PERR_NOTEXCL; if (err)
if (is_prs_invalid(new_prs) && is_cpu_exclusive(cs)) return err;
update_flag(CS_CPU_EXCLUSIVE, cs, 0);
} }
/* /*
...@@ -1524,15 +1571,16 @@ static int update_parent_subparts_cpumask(struct cpuset *cs, int cmd, ...@@ -1524,15 +1571,16 @@ static int update_parent_subparts_cpumask(struct cpuset *cs, int cmd,
update_tasks_cpumask(parent, tmp->addmask); update_tasks_cpumask(parent, tmp->addmask);
/* /*
* Set or clear CS_SCHED_LOAD_BALANCE when partcmd_update, if necessary. * For partcmd_update without newmask, it is being called from
* rebuild_sched_domains_locked() may be called. * cpuset_hotplug_workfn() where cpus_read_lock() wasn't taken.
* Update the load balance flag and scheduling domain if
* cpus_read_trylock() is successful.
*/ */
if (old_prs != new_prs) { if ((cmd == partcmd_update) && !newmask && cpus_read_trylock()) {
if (old_prs == PRS_ISOLATED) update_partition_sd_lb(cs, old_prs);
update_flag(CS_SCHED_LOAD_BALANCE, cs, 1); cpus_read_unlock();
else if (new_prs == PRS_ISOLATED)
update_flag(CS_SCHED_LOAD_BALANCE, cs, 0);
} }
notify_partition_change(cs, old_prs); notify_partition_change(cs, old_prs);
return 0; return 0;
} }
...@@ -1766,6 +1814,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, ...@@ -1766,6 +1814,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
int retval; int retval;
struct tmpmasks tmp; struct tmpmasks tmp;
bool invalidate = false; bool invalidate = false;
int old_prs = cs->partition_root_state;
/* top_cpuset.cpus_allowed tracks cpu_online_mask; it's read-only */ /* top_cpuset.cpus_allowed tracks cpu_online_mask; it's read-only */
if (cs == &top_cpuset) if (cs == &top_cpuset)
...@@ -1885,6 +1934,9 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, ...@@ -1885,6 +1934,9 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
*/ */
if (parent->child_ecpus_count) if (parent->child_ecpus_count)
update_sibling_cpumasks(parent, cs, &tmp); update_sibling_cpumasks(parent, cs, &tmp);
/* Update CS_SCHED_LOAD_BALANCE and/or sched_domains */
update_partition_sd_lb(cs, old_prs);
} }
return 0; return 0;
} }
...@@ -2261,7 +2313,6 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, ...@@ -2261,7 +2313,6 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
static int update_prstate(struct cpuset *cs, int new_prs) static int update_prstate(struct cpuset *cs, int new_prs)
{ {
int err = PERR_NONE, old_prs = cs->partition_root_state; int err = PERR_NONE, old_prs = cs->partition_root_state;
bool sched_domain_rebuilt = false;
struct cpuset *parent = parent_cs(cs); struct cpuset *parent = parent_cs(cs);
struct tmpmasks tmpmask; struct tmpmasks tmpmask;
...@@ -2280,45 +2331,28 @@ static int update_prstate(struct cpuset *cs, int new_prs) ...@@ -2280,45 +2331,28 @@ static int update_prstate(struct cpuset *cs, int new_prs)
if (alloc_cpumasks(NULL, &tmpmask)) if (alloc_cpumasks(NULL, &tmpmask))
return -ENOMEM; return -ENOMEM;
err = update_partition_exclusive(cs, new_prs);
if (err)
goto out;
if (!old_prs) { if (!old_prs) {
/* /*
* Turning on partition root requires setting the * cpus_allowed cannot be empty.
* CS_CPU_EXCLUSIVE bit implicitly as well and cpus_allowed
* cannot be empty.
*/ */
if (cpumask_empty(cs->cpus_allowed)) { if (cpumask_empty(cs->cpus_allowed)) {
err = PERR_CPUSEMPTY; err = PERR_CPUSEMPTY;
goto out; goto out;
} }
err = update_flag(CS_CPU_EXCLUSIVE, cs, 1);
if (err) {
err = PERR_NOTEXCL;
goto out;
}
err = update_parent_subparts_cpumask(cs, partcmd_enable, err = update_parent_subparts_cpumask(cs, partcmd_enable,
NULL, &tmpmask); NULL, &tmpmask);
if (err) { if (err)
update_flag(CS_CPU_EXCLUSIVE, cs, 0);
goto out; goto out;
}
if (new_prs == PRS_ISOLATED) {
/*
* Disable the load balance flag should not return an
* error unless the system is running out of memory.
*/
update_flag(CS_SCHED_LOAD_BALANCE, cs, 0);
sched_domain_rebuilt = true;
}
} else if (old_prs && new_prs) { } else if (old_prs && new_prs) {
/* /*
* A change in load balance state only, no change in cpumasks. * A change in load balance state only, no change in cpumasks.
*/ */
update_flag(CS_SCHED_LOAD_BALANCE, cs, (new_prs != PRS_ISOLATED)); goto out;
sched_domain_rebuilt = true;
goto out; /* Sched domain is rebuilt in update_flag() */
} else { } else {
/* /*
* Switching back to member is always allowed even if it * Switching back to member is always allowed even if it
...@@ -2337,15 +2371,6 @@ static int update_prstate(struct cpuset *cs, int new_prs) ...@@ -2337,15 +2371,6 @@ static int update_prstate(struct cpuset *cs, int new_prs)
compute_effective_cpumask(cs->effective_cpus, cs, parent); compute_effective_cpumask(cs->effective_cpus, cs, parent);
spin_unlock_irq(&callback_lock); spin_unlock_irq(&callback_lock);
} }
/* Turning off CS_CPU_EXCLUSIVE will not return error */
update_flag(CS_CPU_EXCLUSIVE, cs, 0);
if (!is_sched_load_balance(cs)) {
/* Make sure load balance is on */
update_flag(CS_SCHED_LOAD_BALANCE, cs, 1);
sched_domain_rebuilt = true;
}
} }
update_tasks_cpumask(parent, tmpmask.new_cpus); update_tasks_cpumask(parent, tmpmask.new_cpus);
...@@ -2353,18 +2378,21 @@ static int update_prstate(struct cpuset *cs, int new_prs) ...@@ -2353,18 +2378,21 @@ static int update_prstate(struct cpuset *cs, int new_prs)
if (parent->child_ecpus_count) if (parent->child_ecpus_count)
update_sibling_cpumasks(parent, cs, &tmpmask); update_sibling_cpumasks(parent, cs, &tmpmask);
if (!sched_domain_rebuilt)
rebuild_sched_domains_locked();
out: out:
/* /*
* Make partition invalid if an error happen * Make partition invalid & disable CS_CPU_EXCLUSIVE if an error
* happens.
*/ */
if (err) if (err) {
new_prs = -new_prs; new_prs = -new_prs;
update_partition_exclusive(cs, new_prs);
}
spin_lock_irq(&callback_lock); spin_lock_irq(&callback_lock);
cs->partition_root_state = new_prs; cs->partition_root_state = new_prs;
WRITE_ONCE(cs->prs_err, err); WRITE_ONCE(cs->prs_err, err);
spin_unlock_irq(&callback_lock); spin_unlock_irq(&callback_lock);
/* /*
* Update child cpusets, if present. * Update child cpusets, if present.
* Force update if switching back to member. * Force update if switching back to member.
...@@ -2372,6 +2400,9 @@ static int update_prstate(struct cpuset *cs, int new_prs) ...@@ -2372,6 +2400,9 @@ static int update_prstate(struct cpuset *cs, int new_prs)
if (!list_empty(&cs->css.children)) if (!list_empty(&cs->css.children))
update_cpumasks_hier(cs, &tmpmask, !new_prs); update_cpumasks_hier(cs, &tmpmask, !new_prs);
/* Update sched domains and load balance flag */
update_partition_sd_lb(cs, old_prs);
notify_partition_change(cs, old_prs); notify_partition_change(cs, old_prs);
free_cpumasks(NULL, &tmpmask); free_cpumasks(NULL, &tmpmask);
return 0; return 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment