Commit 36534698 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'sched_urgent_for_v6.10' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler fixes from Borislav Petkov:

 - Fix a performance regression when measuring the CPU time of a thread
   (clock_gettime(CLOCK_THREAD_CPUTIME_ID,...)) due to the addition of
   PSI IRQ time accounting in the hotpath

 - Fix a task_struct leak due to missing to decrement the refcount when
   the task is enqueued before the timer which is supposed to do that,
   expires

 - Revert an attempt to expedite detaching of movable tasks, as finding
   those could become very costly. Turns out the original issue wasn't
   even hit by anyone

* tag 'sched_urgent_for_v6.10' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  sched: Move psi_account_irqtime() out of update_rq_clock_task() hotpath
  sched/deadline: Fix task_struct reference leak
  Revert "sched/fair: Make sure to try to detach at least one movable task"
parents 35ce4632 ddae0ca2
...@@ -723,7 +723,6 @@ static void update_rq_clock_task(struct rq *rq, s64 delta) ...@@ -723,7 +723,6 @@ static void update_rq_clock_task(struct rq *rq, s64 delta)
rq->prev_irq_time += irq_delta; rq->prev_irq_time += irq_delta;
delta -= irq_delta; delta -= irq_delta;
psi_account_irqtime(rq->curr, irq_delta);
delayacct_irq(rq->curr, irq_delta); delayacct_irq(rq->curr, irq_delta);
#endif #endif
#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
...@@ -5665,7 +5664,7 @@ void sched_tick(void) ...@@ -5665,7 +5664,7 @@ void sched_tick(void)
{ {
int cpu = smp_processor_id(); int cpu = smp_processor_id();
struct rq *rq = cpu_rq(cpu); struct rq *rq = cpu_rq(cpu);
struct task_struct *curr = rq->curr; struct task_struct *curr;
struct rq_flags rf; struct rq_flags rf;
unsigned long hw_pressure; unsigned long hw_pressure;
u64 resched_latency; u64 resched_latency;
...@@ -5677,6 +5676,9 @@ void sched_tick(void) ...@@ -5677,6 +5676,9 @@ void sched_tick(void)
rq_lock(rq, &rf); rq_lock(rq, &rf);
curr = rq->curr;
psi_account_irqtime(rq, curr, NULL);
update_rq_clock(rq); update_rq_clock(rq);
hw_pressure = arch_scale_hw_pressure(cpu_of(rq)); hw_pressure = arch_scale_hw_pressure(cpu_of(rq));
update_hw_load_avg(rq_clock_task(rq), rq, hw_pressure); update_hw_load_avg(rq_clock_task(rq), rq, hw_pressure);
...@@ -6737,6 +6739,7 @@ static void __sched notrace __schedule(unsigned int sched_mode) ...@@ -6737,6 +6739,7 @@ static void __sched notrace __schedule(unsigned int sched_mode)
++*switch_count; ++*switch_count;
migrate_disable_switch(rq, prev); migrate_disable_switch(rq, prev);
psi_account_irqtime(rq, prev, next);
psi_sched_switch(prev, next, !task_on_rq_queued(prev)); psi_sched_switch(prev, next, !task_on_rq_queued(prev));
trace_sched_switch(sched_mode & SM_MASK_PREEMPT, prev, next, prev_state); trace_sched_switch(sched_mode & SM_MASK_PREEMPT, prev, next, prev_state);
......
...@@ -1804,8 +1804,13 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags) ...@@ -1804,8 +1804,13 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
* The replenish timer needs to be canceled. No * The replenish timer needs to be canceled. No
* problem if it fires concurrently: boosted threads * problem if it fires concurrently: boosted threads
* are ignored in dl_task_timer(). * are ignored in dl_task_timer().
*
* If the timer callback was running (hrtimer_try_to_cancel == -1),
* it will eventually call put_task_struct().
*/ */
hrtimer_try_to_cancel(&p->dl.dl_timer); if (hrtimer_try_to_cancel(&p->dl.dl_timer) == 1 &&
!dl_server(&p->dl))
put_task_struct(p);
p->dl.dl_throttled = 0; p->dl.dl_throttled = 0;
} }
} else if (!dl_prio(p->normal_prio)) { } else if (!dl_prio(p->normal_prio)) {
......
...@@ -9149,12 +9149,8 @@ static int detach_tasks(struct lb_env *env) ...@@ -9149,12 +9149,8 @@ static int detach_tasks(struct lb_env *env)
break; break;
env->loop++; env->loop++;
/* /* We've more or less seen every task there is, call it quits */
* We've more or less seen every task there is, call it quits if (env->loop > env->loop_max)
* unless we haven't found any movable task yet.
*/
if (env->loop > env->loop_max &&
!(env->flags & LBF_ALL_PINNED))
break; break;
/* take a breather every nr_migrate tasks */ /* take a breather every nr_migrate tasks */
...@@ -11393,9 +11389,7 @@ static int sched_balance_rq(int this_cpu, struct rq *this_rq, ...@@ -11393,9 +11389,7 @@ static int sched_balance_rq(int this_cpu, struct rq *this_rq,
if (env.flags & LBF_NEED_BREAK) { if (env.flags & LBF_NEED_BREAK) {
env.flags &= ~LBF_NEED_BREAK; env.flags &= ~LBF_NEED_BREAK;
/* Stop if we tried all running tasks */ goto more_balance;
if (env.loop < busiest->nr_running)
goto more_balance;
} }
/* /*
......
...@@ -773,6 +773,7 @@ static void psi_group_change(struct psi_group *group, int cpu, ...@@ -773,6 +773,7 @@ static void psi_group_change(struct psi_group *group, int cpu,
enum psi_states s; enum psi_states s;
u32 state_mask; u32 state_mask;
lockdep_assert_rq_held(cpu_rq(cpu));
groupc = per_cpu_ptr(group->pcpu, cpu); groupc = per_cpu_ptr(group->pcpu, cpu);
/* /*
...@@ -991,22 +992,32 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next, ...@@ -991,22 +992,32 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next,
} }
#ifdef CONFIG_IRQ_TIME_ACCOUNTING #ifdef CONFIG_IRQ_TIME_ACCOUNTING
void psi_account_irqtime(struct task_struct *task, u32 delta) void psi_account_irqtime(struct rq *rq, struct task_struct *curr, struct task_struct *prev)
{ {
int cpu = task_cpu(task); int cpu = task_cpu(curr);
struct psi_group *group; struct psi_group *group;
struct psi_group_cpu *groupc; struct psi_group_cpu *groupc;
u64 now; u64 now, irq;
s64 delta;
if (static_branch_likely(&psi_disabled)) if (static_branch_likely(&psi_disabled))
return; return;
if (!task->pid) if (!curr->pid)
return;
lockdep_assert_rq_held(rq);
group = task_psi_group(curr);
if (prev && task_psi_group(prev) == group)
return; return;
now = cpu_clock(cpu); now = cpu_clock(cpu);
irq = irq_time_read(cpu);
delta = (s64)(irq - rq->psi_irq_time);
if (delta < 0)
return;
rq->psi_irq_time = irq;
group = task_psi_group(task);
do { do {
if (!group->enabled) if (!group->enabled)
continue; continue;
......
...@@ -1126,6 +1126,7 @@ struct rq { ...@@ -1126,6 +1126,7 @@ struct rq {
#ifdef CONFIG_IRQ_TIME_ACCOUNTING #ifdef CONFIG_IRQ_TIME_ACCOUNTING
u64 prev_irq_time; u64 prev_irq_time;
u64 psi_irq_time;
#endif #endif
#ifdef CONFIG_PARAVIRT #ifdef CONFIG_PARAVIRT
u64 prev_steal_time; u64 prev_steal_time;
......
...@@ -110,8 +110,12 @@ __schedstats_from_se(struct sched_entity *se) ...@@ -110,8 +110,12 @@ __schedstats_from_se(struct sched_entity *se)
void psi_task_change(struct task_struct *task, int clear, int set); void psi_task_change(struct task_struct *task, int clear, int set);
void psi_task_switch(struct task_struct *prev, struct task_struct *next, void psi_task_switch(struct task_struct *prev, struct task_struct *next,
bool sleep); bool sleep);
void psi_account_irqtime(struct task_struct *task, u32 delta); #ifdef CONFIG_IRQ_TIME_ACCOUNTING
void psi_account_irqtime(struct rq *rq, struct task_struct *curr, struct task_struct *prev);
#else
static inline void psi_account_irqtime(struct rq *rq, struct task_struct *curr,
struct task_struct *prev) {}
#endif /*CONFIG_IRQ_TIME_ACCOUNTING */
/* /*
* PSI tracks state that persists across sleeps, such as iowaits and * PSI tracks state that persists across sleeps, such as iowaits and
* memory stalls. As a result, it has to distinguish between sleeps, * memory stalls. As a result, it has to distinguish between sleeps,
...@@ -192,7 +196,8 @@ static inline void psi_ttwu_dequeue(struct task_struct *p) {} ...@@ -192,7 +196,8 @@ static inline void psi_ttwu_dequeue(struct task_struct *p) {}
static inline void psi_sched_switch(struct task_struct *prev, static inline void psi_sched_switch(struct task_struct *prev,
struct task_struct *next, struct task_struct *next,
bool sleep) {} bool sleep) {}
static inline void psi_account_irqtime(struct task_struct *task, u32 delta) {} static inline void psi_account_irqtime(struct rq *rq, struct task_struct *curr,
struct task_struct *prev) {}
#endif /* CONFIG_PSI */ #endif /* CONFIG_PSI */
#ifdef CONFIG_SCHED_INFO #ifdef CONFIG_SCHED_INFO
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment