Commit 26762423 authored by Peng Liu's avatar Peng Liu Committed by Peter Zijlstra

sched/deadline: Optimize sched_dl_global_validate()

Under CONFIG_SMP, dl_bw is per root domain, but not per CPU.
When checking or updating dl_bw, currently iterating every CPU is
overdoing, just need iterate each root domain once.
Suggested-by: default avatarPeter Zijlstra <peterz@infradead.org>
Signed-off-by: default avatarPeng Liu <iwtbavbm@gmail.com>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: default avatarDaniel Bristot de Oliveira <bristot@redhat.com>
Acked-by: default avatarJuri Lelli <juri.lelli@redhat.com>
Link: https://lkml.kernel.org/r/78d21ee792cc48ff79e8cd62a5f26208463684d6.1602171061.git.iwtbavbm@gmail.com
parent b9c88f75
...@@ -97,6 +97,17 @@ static inline unsigned long dl_bw_capacity(int i) ...@@ -97,6 +97,17 @@ static inline unsigned long dl_bw_capacity(int i)
return __dl_bw_capacity(i); return __dl_bw_capacity(i);
} }
} }
static inline bool dl_bw_visited(int cpu, u64 gen)
{
struct root_domain *rd = cpu_rq(cpu)->rd;
if (rd->visit_gen == gen)
return true;
rd->visit_gen = gen;
return false;
}
#else #else
static inline struct dl_bw *dl_bw_of(int i) static inline struct dl_bw *dl_bw_of(int i)
{ {
...@@ -112,6 +123,11 @@ static inline unsigned long dl_bw_capacity(int i) ...@@ -112,6 +123,11 @@ static inline unsigned long dl_bw_capacity(int i)
{ {
return SCHED_CAPACITY_SCALE; return SCHED_CAPACITY_SCALE;
} }
static inline bool dl_bw_visited(int cpu, u64 gen)
{
return false;
}
#endif #endif
static inline static inline
...@@ -2535,11 +2551,15 @@ const struct sched_class dl_sched_class ...@@ -2535,11 +2551,15 @@ const struct sched_class dl_sched_class
.update_curr = update_curr_dl, .update_curr = update_curr_dl,
}; };
/* Used for dl_bw check and update, used under sched_rt_handler()::mutex */
static u64 dl_generation;
int sched_dl_global_validate(void) int sched_dl_global_validate(void)
{ {
u64 runtime = global_rt_runtime(); u64 runtime = global_rt_runtime();
u64 period = global_rt_period(); u64 period = global_rt_period();
u64 new_bw = to_ratio(period, runtime); u64 new_bw = to_ratio(period, runtime);
u64 gen = ++dl_generation;
struct dl_bw *dl_b; struct dl_bw *dl_b;
int cpu, ret = 0; int cpu, ret = 0;
unsigned long flags; unsigned long flags;
...@@ -2548,13 +2568,13 @@ int sched_dl_global_validate(void) ...@@ -2548,13 +2568,13 @@ int sched_dl_global_validate(void)
* Here we want to check the bandwidth not being set to some * Here we want to check the bandwidth not being set to some
* value smaller than the currently allocated bandwidth in * value smaller than the currently allocated bandwidth in
* any of the root_domains. * any of the root_domains.
*
* FIXME: Cycling on all the CPUs is overdoing, but simpler than
* cycling on root_domains... Discussion on different/better
* solutions is welcome!
*/ */
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
rcu_read_lock_sched(); rcu_read_lock_sched();
if (dl_bw_visited(cpu, gen))
goto next;
dl_b = dl_bw_of(cpu); dl_b = dl_bw_of(cpu);
raw_spin_lock_irqsave(&dl_b->lock, flags); raw_spin_lock_irqsave(&dl_b->lock, flags);
...@@ -2562,6 +2582,7 @@ int sched_dl_global_validate(void) ...@@ -2562,6 +2582,7 @@ int sched_dl_global_validate(void)
ret = -EBUSY; ret = -EBUSY;
raw_spin_unlock_irqrestore(&dl_b->lock, flags); raw_spin_unlock_irqrestore(&dl_b->lock, flags);
next:
rcu_read_unlock_sched(); rcu_read_unlock_sched();
if (ret) if (ret)
...@@ -2587,6 +2608,7 @@ static void init_dl_rq_bw_ratio(struct dl_rq *dl_rq) ...@@ -2587,6 +2608,7 @@ static void init_dl_rq_bw_ratio(struct dl_rq *dl_rq)
void sched_dl_do_global(void) void sched_dl_do_global(void)
{ {
u64 new_bw = -1; u64 new_bw = -1;
u64 gen = ++dl_generation;
struct dl_bw *dl_b; struct dl_bw *dl_b;
int cpu; int cpu;
unsigned long flags; unsigned long flags;
...@@ -2597,11 +2619,14 @@ void sched_dl_do_global(void) ...@@ -2597,11 +2619,14 @@ void sched_dl_do_global(void)
if (global_rt_runtime() != RUNTIME_INF) if (global_rt_runtime() != RUNTIME_INF)
new_bw = to_ratio(global_rt_period(), global_rt_runtime()); new_bw = to_ratio(global_rt_period(), global_rt_runtime());
/*
* FIXME: As above...
*/
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
rcu_read_lock_sched(); rcu_read_lock_sched();
if (dl_bw_visited(cpu, gen)) {
rcu_read_unlock_sched();
continue;
}
dl_b = dl_bw_of(cpu); dl_b = dl_bw_of(cpu);
raw_spin_lock_irqsave(&dl_b->lock, flags); raw_spin_lock_irqsave(&dl_b->lock, flags);
......
...@@ -801,6 +801,15 @@ struct root_domain { ...@@ -801,6 +801,15 @@ struct root_domain {
struct dl_bw dl_bw; struct dl_bw dl_bw;
struct cpudl cpudl; struct cpudl cpudl;
/*
* Indicate whether a root_domain's dl_bw has been checked or
* updated. It's monotonously increasing value.
*
* Also, some corner cases, like 'wrap around' is dangerous, but given
* that u64 is 'big enough'. So that shouldn't be a concern.
*/
u64 visit_gen;
#ifdef HAVE_RT_PUSH_IPI #ifdef HAVE_RT_PUSH_IPI
/* /*
* For IPI pull requests, loop across the rto_mask. * For IPI pull requests, loop across the rto_mask.
......
...@@ -516,6 +516,7 @@ static int init_rootdomain(struct root_domain *rd) ...@@ -516,6 +516,7 @@ static int init_rootdomain(struct root_domain *rd)
init_irq_work(&rd->rto_push_work, rto_push_irq_work_func); init_irq_work(&rd->rto_push_work, rto_push_irq_work_func);
#endif #endif
rd->visit_gen = 0;
init_dl_bw(&rd->dl_bw); init_dl_bw(&rd->dl_bw);
if (cpudl_init(&rd->cpudl) != 0) if (cpudl_init(&rd->cpudl) != 0)
goto free_rto_mask; goto free_rto_mask;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment