Commit 2069dd75 authored by Peter Zijlstra's avatar Peter Zijlstra Committed by Ingo Molnar

sched: Rewrite tg_shares_up)

By tracking a per-cpu load-avg for each cfs_rq and folding it into a
global task_group load on each tick we can rework tg_shares_up to be
strictly per-cpu.

This should improve cpu-cgroup performance for smp systems
significantly.

[ Paul: changed to use queueing cfs_rq + bug fixes ]
Signed-off-by: default avatarPaul Turner <pjt@google.com>
Signed-off-by: default avatarPeter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20101115234937.580480400@google.com>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent 48c5ccae
...@@ -1885,8 +1885,6 @@ static inline void wake_up_idle_cpu(int cpu) { } ...@@ -1885,8 +1885,6 @@ static inline void wake_up_idle_cpu(int cpu) { }
extern unsigned int sysctl_sched_latency; extern unsigned int sysctl_sched_latency;
extern unsigned int sysctl_sched_min_granularity; extern unsigned int sysctl_sched_min_granularity;
extern unsigned int sysctl_sched_wakeup_granularity; extern unsigned int sysctl_sched_wakeup_granularity;
extern unsigned int sysctl_sched_shares_ratelimit;
extern unsigned int sysctl_sched_shares_thresh;
extern unsigned int sysctl_sched_child_runs_first; extern unsigned int sysctl_sched_child_runs_first;
enum sched_tunable_scaling { enum sched_tunable_scaling {
......
...@@ -253,6 +253,8 @@ struct task_group { ...@@ -253,6 +253,8 @@ struct task_group {
/* runqueue "owned" by this group on each cpu */ /* runqueue "owned" by this group on each cpu */
struct cfs_rq **cfs_rq; struct cfs_rq **cfs_rq;
unsigned long shares; unsigned long shares;
atomic_t load_weight;
#endif #endif
#ifdef CONFIG_RT_GROUP_SCHED #ifdef CONFIG_RT_GROUP_SCHED
...@@ -359,15 +361,11 @@ struct cfs_rq { ...@@ -359,15 +361,11 @@ struct cfs_rq {
*/ */
unsigned long h_load; unsigned long h_load;
/* u64 load_avg;
* this cpu's part of tg->shares u64 load_period;
*/ u64 load_stamp;
unsigned long shares;
/* unsigned long load_contribution;
* load.weight at the time we set shares
*/
unsigned long rq_weight;
#endif #endif
#endif #endif
}; };
...@@ -806,20 +804,6 @@ late_initcall(sched_init_debug); ...@@ -806,20 +804,6 @@ late_initcall(sched_init_debug);
*/ */
const_debug unsigned int sysctl_sched_nr_migrate = 32; const_debug unsigned int sysctl_sched_nr_migrate = 32;
/*
* ratelimit for updating the group shares.
* default: 0.25ms
*/
unsigned int sysctl_sched_shares_ratelimit = 250000;
unsigned int normalized_sysctl_sched_shares_ratelimit = 250000;
/*
* Inject some fuzzyness into changing the per-cpu group shares
* this avoids remote rq-locks at the expense of fairness.
* default: 4
*/
unsigned int sysctl_sched_shares_thresh = 4;
/* /*
* period over which we average the RT time consumption, measured * period over which we average the RT time consumption, measured
* in ms. * in ms.
...@@ -1369,6 +1353,12 @@ static inline void update_load_sub(struct load_weight *lw, unsigned long dec) ...@@ -1369,6 +1353,12 @@ static inline void update_load_sub(struct load_weight *lw, unsigned long dec)
lw->inv_weight = 0; lw->inv_weight = 0;
} }
static inline void update_load_set(struct load_weight *lw, unsigned long w)
{
lw->weight = w;
lw->inv_weight = 0;
}
/* /*
* To aid in avoiding the subversion of "niceness" due to uneven distribution * To aid in avoiding the subversion of "niceness" due to uneven distribution
* of tasks with abnormal "nice" values across CPUs the contribution that * of tasks with abnormal "nice" values across CPUs the contribution that
...@@ -1557,97 +1547,44 @@ static unsigned long cpu_avg_load_per_task(int cpu) ...@@ -1557,97 +1547,44 @@ static unsigned long cpu_avg_load_per_task(int cpu)
#ifdef CONFIG_FAIR_GROUP_SCHED #ifdef CONFIG_FAIR_GROUP_SCHED
static __read_mostly unsigned long __percpu *update_shares_data; static void update_cfs_load(struct cfs_rq *cfs_rq);
static void update_cfs_shares(struct cfs_rq *cfs_rq);
static void __set_se_shares(struct sched_entity *se, unsigned long shares);
/*
* Calculate and set the cpu's group shares.
*/
static void update_group_shares_cpu(struct task_group *tg, int cpu,
unsigned long sd_shares,
unsigned long sd_rq_weight,
unsigned long *usd_rq_weight)
{
unsigned long shares, rq_weight;
int boost = 0;
rq_weight = usd_rq_weight[cpu];
if (!rq_weight) {
boost = 1;
rq_weight = NICE_0_LOAD;
}
/*
* \Sum_j shares_j * rq_weight_i
* shares_i = -----------------------------
* \Sum_j rq_weight_j
*/
shares = (sd_shares * rq_weight) / sd_rq_weight;
shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES);
if (abs(shares - tg->se[cpu]->load.weight) >
sysctl_sched_shares_thresh) {
struct rq *rq = cpu_rq(cpu);
unsigned long flags;
raw_spin_lock_irqsave(&rq->lock, flags);
tg->cfs_rq[cpu]->rq_weight = boost ? 0 : rq_weight;
tg->cfs_rq[cpu]->shares = boost ? 0 : shares;
__set_se_shares(tg->se[cpu], shares);
raw_spin_unlock_irqrestore(&rq->lock, flags);
}
}
/* /*
* Re-compute the task group their per cpu shares over the given domain. * update tg->load_weight by folding this cpu's load_avg
* This needs to be done in a bottom-up fashion because the rq weight of a
* parent group depends on the shares of its child groups.
*/ */
static int tg_shares_up(struct task_group *tg, void *data) static int tg_shares_up(struct task_group *tg, void *data)
{ {
unsigned long weight, rq_weight = 0, sum_weight = 0, shares = 0; long load_avg;
unsigned long *usd_rq_weight; struct cfs_rq *cfs_rq;
struct sched_domain *sd = data;
unsigned long flags; unsigned long flags;
int i; int cpu = (long)data;
struct rq *rq;
if (!tg->se[0]) if (!tg->se[cpu])
return 0; return 0;
local_irq_save(flags); rq = cpu_rq(cpu);
usd_rq_weight = per_cpu_ptr(update_shares_data, smp_processor_id()); cfs_rq = tg->cfs_rq[cpu];
for_each_cpu(i, sched_domain_span(sd)) {
weight = tg->cfs_rq[i]->load.weight;
usd_rq_weight[i] = weight;
rq_weight += weight;
/*
* If there are currently no tasks on the cpu pretend there
* is one of average load so that when a new task gets to
* run here it will not get delayed by group starvation.
*/
if (!weight)
weight = NICE_0_LOAD;
sum_weight += weight; raw_spin_lock_irqsave(&rq->lock, flags);
shares += tg->cfs_rq[i]->shares;
}
if (!rq_weight) update_rq_clock(rq);
rq_weight = sum_weight; update_cfs_load(cfs_rq);
if ((!shares && rq_weight) || shares > tg->shares) load_avg = div64_u64(cfs_rq->load_avg, cfs_rq->load_period+1);
shares = tg->shares; load_avg -= cfs_rq->load_contribution;
if (!sd->parent || !(sd->parent->flags & SD_LOAD_BALANCE)) atomic_add(load_avg, &tg->load_weight);
shares = tg->shares; cfs_rq->load_contribution += load_avg;
for_each_cpu(i, sched_domain_span(sd)) /*
update_group_shares_cpu(tg, i, shares, rq_weight, usd_rq_weight); * We need to update shares after updating tg->load_weight in
* order to adjust the weight of groups with long running tasks.
*/
update_cfs_shares(cfs_rq);
local_irq_restore(flags); raw_spin_unlock_irqrestore(&rq->lock, flags);
return 0; return 0;
} }
...@@ -1666,7 +1603,7 @@ static int tg_load_down(struct task_group *tg, void *data) ...@@ -1666,7 +1603,7 @@ static int tg_load_down(struct task_group *tg, void *data)
load = cpu_rq(cpu)->load.weight; load = cpu_rq(cpu)->load.weight;
} else { } else {
load = tg->parent->cfs_rq[cpu]->h_load; load = tg->parent->cfs_rq[cpu]->h_load;
load *= tg->cfs_rq[cpu]->shares; load *= tg->se[cpu]->load.weight;
load /= tg->parent->cfs_rq[cpu]->load.weight + 1; load /= tg->parent->cfs_rq[cpu]->load.weight + 1;
} }
...@@ -1675,21 +1612,16 @@ static int tg_load_down(struct task_group *tg, void *data) ...@@ -1675,21 +1612,16 @@ static int tg_load_down(struct task_group *tg, void *data)
return 0; return 0;
} }
static void update_shares(struct sched_domain *sd) static void update_shares(long cpu)
{ {
s64 elapsed;
u64 now;
if (root_task_group_empty()) if (root_task_group_empty())
return; return;
now = local_clock(); /*
elapsed = now - sd->last_update; * XXX: replace with an on-demand list
*/
if (elapsed >= (s64)(u64)sysctl_sched_shares_ratelimit) { walk_tg_tree(tg_nop, tg_shares_up, (void *)cpu);
sd->last_update = now;
walk_tg_tree(tg_nop, tg_shares_up, sd);
}
} }
static void update_h_load(long cpu) static void update_h_load(long cpu)
...@@ -1699,7 +1631,7 @@ static void update_h_load(long cpu) ...@@ -1699,7 +1631,7 @@ static void update_h_load(long cpu)
#else #else
static inline void update_shares(struct sched_domain *sd) static inline void update_shares(int cpu)
{ {
} }
...@@ -1824,15 +1756,6 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2) ...@@ -1824,15 +1756,6 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2)
#endif #endif
#ifdef CONFIG_FAIR_GROUP_SCHED
static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares)
{
#ifdef CONFIG_SMP
cfs_rq->shares = shares;
#endif
}
#endif
static void calc_load_account_idle(struct rq *this_rq); static void calc_load_account_idle(struct rq *this_rq);
static void update_sysctl(void); static void update_sysctl(void);
static int get_update_sysctl_factor(void); static int get_update_sysctl_factor(void);
...@@ -5551,7 +5474,6 @@ static void update_sysctl(void) ...@@ -5551,7 +5474,6 @@ static void update_sysctl(void)
SET_SYSCTL(sched_min_granularity); SET_SYSCTL(sched_min_granularity);
SET_SYSCTL(sched_latency); SET_SYSCTL(sched_latency);
SET_SYSCTL(sched_wakeup_granularity); SET_SYSCTL(sched_wakeup_granularity);
SET_SYSCTL(sched_shares_ratelimit);
#undef SET_SYSCTL #undef SET_SYSCTL
} }
...@@ -7787,8 +7709,7 @@ static void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq, ...@@ -7787,8 +7709,7 @@ static void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
se->cfs_rq = parent->my_q; se->cfs_rq = parent->my_q;
se->my_q = cfs_rq; se->my_q = cfs_rq;
se->load.weight = tg->shares; update_load_set(&se->load, tg->shares);
se->load.inv_weight = 0;
se->parent = parent; se->parent = parent;
} }
#endif #endif
...@@ -7881,10 +7802,6 @@ void __init sched_init(void) ...@@ -7881,10 +7802,6 @@ void __init sched_init(void)
#endif /* CONFIG_CGROUP_SCHED */ #endif /* CONFIG_CGROUP_SCHED */
#if defined CONFIG_FAIR_GROUP_SCHED && defined CONFIG_SMP
update_shares_data = __alloc_percpu(nr_cpu_ids * sizeof(unsigned long),
__alignof__(unsigned long));
#endif
for_each_possible_cpu(i) { for_each_possible_cpu(i) {
struct rq *rq; struct rq *rq;
...@@ -8452,8 +8369,7 @@ static void __set_se_shares(struct sched_entity *se, unsigned long shares) ...@@ -8452,8 +8369,7 @@ static void __set_se_shares(struct sched_entity *se, unsigned long shares)
if (on_rq) if (on_rq)
dequeue_entity(cfs_rq, se, 0); dequeue_entity(cfs_rq, se, 0);
se->load.weight = shares; update_load_set(&se->load, shares);
se->load.inv_weight = 0;
if (on_rq) if (on_rq)
enqueue_entity(cfs_rq, se, 0); enqueue_entity(cfs_rq, se, 0);
...@@ -8510,7 +8426,6 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares) ...@@ -8510,7 +8426,6 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares)
/* /*
* force a rebalance * force a rebalance
*/ */
cfs_rq_set_shares(tg->cfs_rq[i], 0);
set_se_shares(tg->se[i], shares); set_se_shares(tg->se[i], shares);
} }
......
...@@ -202,15 +202,22 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) ...@@ -202,15 +202,22 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
spread0 = min_vruntime - rq0_min_vruntime; spread0 = min_vruntime - rq0_min_vruntime;
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "spread0", SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "spread0",
SPLIT_NS(spread0)); SPLIT_NS(spread0));
SEQ_printf(m, " .%-30s: %ld\n", "nr_running", cfs_rq->nr_running);
SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight);
SEQ_printf(m, " .%-30s: %d\n", "nr_spread_over", SEQ_printf(m, " .%-30s: %d\n", "nr_spread_over",
cfs_rq->nr_spread_over); cfs_rq->nr_spread_over);
SEQ_printf(m, " .%-30s: %ld\n", "nr_running", cfs_rq->nr_running);
SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight);
#ifdef CONFIG_FAIR_GROUP_SCHED #ifdef CONFIG_FAIR_GROUP_SCHED
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
SEQ_printf(m, " .%-30s: %lu\n", "shares", cfs_rq->shares); SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "load_avg",
SPLIT_NS(cfs_rq->load_avg));
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "load_period",
SPLIT_NS(cfs_rq->load_period));
SEQ_printf(m, " .%-30s: %ld\n", "load_contrib",
cfs_rq->load_contribution);
SEQ_printf(m, " .%-30s: %d\n", "load_tg",
atomic_read(&tg->load_weight));
#endif #endif
print_cfs_group_stats(m, cpu, cfs_rq->tg); print_cfs_group_stats(m, cpu, cfs_rq->tg);
#endif #endif
} }
......
...@@ -417,7 +417,6 @@ int sched_proc_update_handler(struct ctl_table *table, int write, ...@@ -417,7 +417,6 @@ int sched_proc_update_handler(struct ctl_table *table, int write,
WRT_SYSCTL(sched_min_granularity); WRT_SYSCTL(sched_min_granularity);
WRT_SYSCTL(sched_latency); WRT_SYSCTL(sched_latency);
WRT_SYSCTL(sched_wakeup_granularity); WRT_SYSCTL(sched_wakeup_granularity);
WRT_SYSCTL(sched_shares_ratelimit);
#undef WRT_SYSCTL #undef WRT_SYSCTL
return 0; return 0;
...@@ -633,7 +632,6 @@ account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se) ...@@ -633,7 +632,6 @@ account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
list_add(&se->group_node, &cfs_rq->tasks); list_add(&se->group_node, &cfs_rq->tasks);
} }
cfs_rq->nr_running++; cfs_rq->nr_running++;
se->on_rq = 1;
} }
static void static void
...@@ -647,9 +645,89 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se) ...@@ -647,9 +645,89 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
list_del_init(&se->group_node); list_del_init(&se->group_node);
} }
cfs_rq->nr_running--; cfs_rq->nr_running--;
se->on_rq = 0;
} }
#if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED
static void update_cfs_load(struct cfs_rq *cfs_rq)
{
u64 period = sched_avg_period();
u64 now, delta;
if (!cfs_rq)
return;
now = rq_of(cfs_rq)->clock;
delta = now - cfs_rq->load_stamp;
cfs_rq->load_stamp = now;
cfs_rq->load_period += delta;
cfs_rq->load_avg += delta * cfs_rq->load.weight;
while (cfs_rq->load_period > period) {
/*
* Inline assembly required to prevent the compiler
* optimising this loop into a divmod call.
* See __iter_div_u64_rem() for another example of this.
*/
asm("" : "+rm" (cfs_rq->load_period));
cfs_rq->load_period /= 2;
cfs_rq->load_avg /= 2;
}
}
static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
unsigned long weight)
{
if (se->on_rq)
account_entity_dequeue(cfs_rq, se);
update_load_set(&se->load, weight);
if (se->on_rq)
account_entity_enqueue(cfs_rq, se);
}
static void update_cfs_shares(struct cfs_rq *cfs_rq)
{
struct task_group *tg;
struct sched_entity *se;
long load_weight, load, shares;
if (!cfs_rq)
return;
tg = cfs_rq->tg;
se = tg->se[cpu_of(rq_of(cfs_rq))];
if (!se)
return;
load = cfs_rq->load.weight;
load_weight = atomic_read(&tg->load_weight);
load_weight -= cfs_rq->load_contribution;
load_weight += load;
shares = (tg->shares * load);
if (load_weight)
shares /= load_weight;
if (shares < MIN_SHARES)
shares = MIN_SHARES;
if (shares > tg->shares)
shares = tg->shares;
reweight_entity(cfs_rq_of(se), se, shares);
}
#else /* CONFIG_FAIR_GROUP_SCHED */
static inline void update_cfs_load(struct cfs_rq *cfs_rq)
{
}
static inline void update_cfs_shares(struct cfs_rq *cfs_rq)
{
}
#endif /* CONFIG_FAIR_GROUP_SCHED */
static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
{ {
#ifdef CONFIG_SCHEDSTATS #ifdef CONFIG_SCHEDSTATS
...@@ -771,7 +849,9 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) ...@@ -771,7 +849,9 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
* Update run-time statistics of the 'current'. * Update run-time statistics of the 'current'.
*/ */
update_curr(cfs_rq); update_curr(cfs_rq);
update_cfs_load(cfs_rq);
account_entity_enqueue(cfs_rq, se); account_entity_enqueue(cfs_rq, se);
update_cfs_shares(cfs_rq);
if (flags & ENQUEUE_WAKEUP) { if (flags & ENQUEUE_WAKEUP) {
place_entity(cfs_rq, se, 0); place_entity(cfs_rq, se, 0);
...@@ -782,6 +862,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) ...@@ -782,6 +862,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
check_spread(cfs_rq, se); check_spread(cfs_rq, se);
if (se != cfs_rq->curr) if (se != cfs_rq->curr)
__enqueue_entity(cfs_rq, se); __enqueue_entity(cfs_rq, se);
se->on_rq = 1;
} }
static void __clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) static void __clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
...@@ -825,8 +906,11 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) ...@@ -825,8 +906,11 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
if (se != cfs_rq->curr) if (se != cfs_rq->curr)
__dequeue_entity(cfs_rq, se); __dequeue_entity(cfs_rq, se);
se->on_rq = 0;
update_cfs_load(cfs_rq);
account_entity_dequeue(cfs_rq, se); account_entity_dequeue(cfs_rq, se);
update_min_vruntime(cfs_rq); update_min_vruntime(cfs_rq);
update_cfs_shares(cfs_rq);
/* /*
* Normalize the entity after updating the min_vruntime because the * Normalize the entity after updating the min_vruntime because the
...@@ -1055,6 +1139,13 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) ...@@ -1055,6 +1139,13 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
flags = ENQUEUE_WAKEUP; flags = ENQUEUE_WAKEUP;
} }
for_each_sched_entity(se) {
struct cfs_rq *cfs_rq = cfs_rq_of(se);
update_cfs_load(cfs_rq);
update_cfs_shares(cfs_rq);
}
hrtick_update(rq); hrtick_update(rq);
} }
...@@ -1071,12 +1162,20 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) ...@@ -1071,12 +1162,20 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
for_each_sched_entity(se) { for_each_sched_entity(se) {
cfs_rq = cfs_rq_of(se); cfs_rq = cfs_rq_of(se);
dequeue_entity(cfs_rq, se, flags); dequeue_entity(cfs_rq, se, flags);
/* Don't dequeue parent if it has other entities besides us */ /* Don't dequeue parent if it has other entities besides us */
if (cfs_rq->load.weight) if (cfs_rq->load.weight)
break; break;
flags |= DEQUEUE_SLEEP; flags |= DEQUEUE_SLEEP;
} }
for_each_sched_entity(se) {
struct cfs_rq *cfs_rq = cfs_rq_of(se);
update_cfs_load(cfs_rq);
update_cfs_shares(cfs_rq);
}
hrtick_update(rq); hrtick_update(rq);
} }
...@@ -1143,51 +1242,20 @@ static void task_waking_fair(struct rq *rq, struct task_struct *p) ...@@ -1143,51 +1242,20 @@ static void task_waking_fair(struct rq *rq, struct task_struct *p)
* Adding load to a group doesn't make a group heavier, but can cause movement * Adding load to a group doesn't make a group heavier, but can cause movement
* of group shares between cpus. Assuming the shares were perfectly aligned one * of group shares between cpus. Assuming the shares were perfectly aligned one
* can calculate the shift in shares. * can calculate the shift in shares.
*
* The problem is that perfectly aligning the shares is rather expensive, hence
* we try to avoid doing that too often - see update_shares(), which ratelimits
* this change.
*
* We compensate this by not only taking the current delta into account, but
* also considering the delta between when the shares were last adjusted and
* now.
*
* We still saw a performance dip, some tracing learned us that between
* cgroup:/ and cgroup:/foo balancing the number of affine wakeups increased
* significantly. Therefore try to bias the error in direction of failing
* the affine wakeup.
*
*/ */
static long effective_load(struct task_group *tg, int cpu, static long effective_load(struct task_group *tg, int cpu, long wl, long wg)
long wl, long wg)
{ {
struct sched_entity *se = tg->se[cpu]; struct sched_entity *se = tg->se[cpu];
if (!tg->parent) if (!tg->parent)
return wl; return wl;
/*
* By not taking the decrease of shares on the other cpu into
* account our error leans towards reducing the affine wakeups.
*/
if (!wl && sched_feat(ASYM_EFF_LOAD))
return wl;
for_each_sched_entity(se) { for_each_sched_entity(se) {
long S, rw, s, a, b; long S, rw, s, a, b;
long more_w;
/*
* Instead of using this increment, also add the difference
* between when the shares were last updated and now.
*/
more_w = se->my_q->load.weight - se->my_q->rq_weight;
wl += more_w;
wg += more_w;
S = se->my_q->tg->shares; S = se->my_q->tg->shares;
s = se->my_q->shares; s = se->load.weight;
rw = se->my_q->rq_weight; rw = se->my_q->load.weight;
a = S*(rw + wl); a = S*(rw + wl);
b = S*rw + s*wg; b = S*rw + s*wg;
...@@ -1508,23 +1576,6 @@ select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_ ...@@ -1508,23 +1576,6 @@ select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_
sd = tmp; sd = tmp;
} }
#ifdef CONFIG_FAIR_GROUP_SCHED
if (sched_feat(LB_SHARES_UPDATE)) {
/*
* Pick the largest domain to update shares over
*/
tmp = sd;
if (affine_sd && (!tmp || affine_sd->span_weight > sd->span_weight))
tmp = affine_sd;
if (tmp) {
raw_spin_unlock(&rq->lock);
update_shares(tmp);
raw_spin_lock(&rq->lock);
}
}
#endif
if (affine_sd) { if (affine_sd) {
if (cpu == prev_cpu || wake_affine(affine_sd, p, sync)) if (cpu == prev_cpu || wake_affine(affine_sd, p, sync))
return select_idle_sibling(p, cpu); return select_idle_sibling(p, cpu);
...@@ -3014,7 +3065,6 @@ static int load_balance(int this_cpu, struct rq *this_rq, ...@@ -3014,7 +3065,6 @@ static int load_balance(int this_cpu, struct rq *this_rq,
schedstat_inc(sd, lb_count[idle]); schedstat_inc(sd, lb_count[idle]);
redo: redo:
update_shares(sd);
group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle, group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle,
cpus, balance); cpus, balance);
...@@ -3156,8 +3206,6 @@ static int load_balance(int this_cpu, struct rq *this_rq, ...@@ -3156,8 +3206,6 @@ static int load_balance(int this_cpu, struct rq *this_rq,
else else
ld_moved = 0; ld_moved = 0;
out: out:
if (ld_moved)
update_shares(sd);
return ld_moved; return ld_moved;
} }
...@@ -3549,6 +3597,8 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle) ...@@ -3549,6 +3597,8 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
int update_next_balance = 0; int update_next_balance = 0;
int need_serialize; int need_serialize;
update_shares(cpu);
for_each_domain(cpu, sd) { for_each_domain(cpu, sd) {
if (!(sd->flags & SD_LOAD_BALANCE)) if (!(sd->flags & SD_LOAD_BALANCE))
continue; continue;
......
...@@ -52,8 +52,6 @@ SCHED_FEAT(ARCH_POWER, 0) ...@@ -52,8 +52,6 @@ SCHED_FEAT(ARCH_POWER, 0)
SCHED_FEAT(HRTICK, 0) SCHED_FEAT(HRTICK, 0)
SCHED_FEAT(DOUBLE_TICK, 0) SCHED_FEAT(DOUBLE_TICK, 0)
SCHED_FEAT(LB_BIAS, 1) SCHED_FEAT(LB_BIAS, 1)
SCHED_FEAT(LB_SHARES_UPDATE, 1)
SCHED_FEAT(ASYM_EFF_LOAD, 1)
/* /*
* Spin-wait on mutex acquisition when the mutex owner is running on * Spin-wait on mutex acquisition when the mutex owner is running on
......
...@@ -259,8 +259,6 @@ static int min_wakeup_granularity_ns; /* 0 usecs */ ...@@ -259,8 +259,6 @@ static int min_wakeup_granularity_ns; /* 0 usecs */
static int max_wakeup_granularity_ns = NSEC_PER_SEC; /* 1 second */ static int max_wakeup_granularity_ns = NSEC_PER_SEC; /* 1 second */
static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE; static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1; static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1;
static int min_sched_shares_ratelimit = 100000; /* 100 usec */
static int max_sched_shares_ratelimit = NSEC_PER_SEC; /* 1 second */
#endif #endif
#ifdef CONFIG_COMPACTION #ifdef CONFIG_COMPACTION
...@@ -304,15 +302,6 @@ static struct ctl_table kern_table[] = { ...@@ -304,15 +302,6 @@ static struct ctl_table kern_table[] = {
.extra1 = &min_wakeup_granularity_ns, .extra1 = &min_wakeup_granularity_ns,
.extra2 = &max_wakeup_granularity_ns, .extra2 = &max_wakeup_granularity_ns,
}, },
{
.procname = "sched_shares_ratelimit",
.data = &sysctl_sched_shares_ratelimit,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = sched_proc_update_handler,
.extra1 = &min_sched_shares_ratelimit,
.extra2 = &max_sched_shares_ratelimit,
},
{ {
.procname = "sched_tunable_scaling", .procname = "sched_tunable_scaling",
.data = &sysctl_sched_tunable_scaling, .data = &sysctl_sched_tunable_scaling,
...@@ -322,14 +311,6 @@ static struct ctl_table kern_table[] = { ...@@ -322,14 +311,6 @@ static struct ctl_table kern_table[] = {
.extra1 = &min_sched_tunable_scaling, .extra1 = &min_sched_tunable_scaling,
.extra2 = &max_sched_tunable_scaling, .extra2 = &max_sched_tunable_scaling,
}, },
{
.procname = "sched_shares_thresh",
.data = &sysctl_sched_shares_thresh,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &zero,
},
{ {
.procname = "sched_migration_cost", .procname = "sched_migration_cost",
.data = &sysctl_sched_migration_cost, .data = &sysctl_sched_migration_cost,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment