Commit 3ccfebed authored by Mathieu Desnoyers's avatar Mathieu Desnoyers Committed by Ingo Molnar

powerpc, membarrier: Skip memory barrier in switch_mm()

Allow PowerPC to skip the full memory barrier in switch_mm(), and
only issue the barrier when scheduling into a task belonging to a
process that has registered to use expedited private.

Threads targeting the same VM but which belong to different thread
groups is a tricky case. It has a few consequences:

It turns out that we cannot rely on get_nr_threads(p) to count the
number of threads using a VM. We can use
(atomic_read(&mm->mm_users) == 1 && get_nr_threads(p) == 1)
instead to skip the synchronize_sched() for cases where the VM only has
a single user, and that user only has a single thread.

It also turns out that we cannot use for_each_thread() to set
thread flags in all threads using a VM, as it only iterates on the
thread group.

Therefore, test the membarrier state variable directly rather than
relying on thread flags. This means
membarrier_register_private_expedited() needs to set the
MEMBARRIER_STATE_PRIVATE_EXPEDITED flag, issue synchronize_sched(), and
only then set MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY which allows
private expedited membarrier commands to succeed.
membarrier_arch_switch_mm() now tests for the
MEMBARRIER_STATE_PRIVATE_EXPEDITED flag.
Signed-off-by: default avatarMathieu Desnoyers <mathieu.desnoyers@efficios.com>
Acked-by: default avatarThomas Gleixner <tglx@linutronix.de>
Acked-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Andrea Parri <parri.andrea@gmail.com>
Cc: Andrew Hunter <ahh@google.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Avi Kivity <avi@scylladb.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Dave Watson <davejwatson@fb.com>
Cc: David Sehr <sehr@google.com>
Cc: Greg Hackmann <ghackmann@google.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Maged Michael <maged.michael@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-api@vger.kernel.org
Cc: linux-arch@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Link: http://lkml.kernel.org/r/20180129202020.8515-3-mathieu.desnoyers@efficios.comSigned-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent 667ca1ec
...@@ -8944,6 +8944,7 @@ L: linux-kernel@vger.kernel.org ...@@ -8944,6 +8944,7 @@ L: linux-kernel@vger.kernel.org
S: Supported S: Supported
F: kernel/sched/membarrier.c F: kernel/sched/membarrier.c
F: include/uapi/linux/membarrier.h F: include/uapi/linux/membarrier.h
F: arch/powerpc/include/asm/membarrier.h
MEMORY MANAGEMENT MEMORY MANAGEMENT
L: linux-mm@kvack.org L: linux-mm@kvack.org
......
...@@ -140,6 +140,7 @@ config PPC ...@@ -140,6 +140,7 @@ config PPC
select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_PMEM_API if PPC64 select ARCH_HAS_PMEM_API if PPC64
select ARCH_HAS_MEMBARRIER_CALLBACKS
select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE
select ARCH_HAS_SG_CHAIN select ARCH_HAS_SG_CHAIN
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
......
#ifndef _ASM_POWERPC_MEMBARRIER_H
#define _ASM_POWERPC_MEMBARRIER_H
static inline void membarrier_arch_switch_mm(struct mm_struct *prev,
struct mm_struct *next,
struct task_struct *tsk)
{
/*
* Only need the full barrier when switching between processes.
* Barrier when switching from kernel to userspace is not
* required here, given that it is implied by mmdrop(). Barrier
* when switching from userspace to kernel is not needed after
* store to rq->curr.
*/
if (likely(!(atomic_read(&next->membarrier_state) &
MEMBARRIER_STATE_PRIVATE_EXPEDITED) || !prev))
return;
/*
* The membarrier system call requires a full memory barrier
* after storing to rq->curr, before going back to user-space.
*/
smp_mb();
}
#endif /* _ASM_POWERPC_MEMBARRIER_H */
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/cpu.h> #include <linux/cpu.h>
#include <linux/sched/mm.h>
#include <asm/mmu_context.h> #include <asm/mmu_context.h>
...@@ -58,6 +59,10 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, ...@@ -58,6 +59,10 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
* *
* On the read side the barrier is in pte_xchg(), which orders * On the read side the barrier is in pte_xchg(), which orders
* the store to the PTE vs the load of mm_cpumask. * the store to the PTE vs the load of mm_cpumask.
*
* This full barrier is needed by membarrier when switching
* between processes after store to rq->curr, before user-space
* memory accesses.
*/ */
smp_mb(); smp_mb();
...@@ -80,6 +85,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, ...@@ -80,6 +85,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
if (new_on_cpu) if (new_on_cpu)
radix_kvm_prefetch_workaround(next); radix_kvm_prefetch_workaround(next);
else
membarrier_arch_switch_mm(prev, next, tsk);
/* /*
* The actual HW switching method differs between the various * The actual HW switching method differs between the various
......
...@@ -215,14 +215,25 @@ static inline void memalloc_noreclaim_restore(unsigned int flags) ...@@ -215,14 +215,25 @@ static inline void memalloc_noreclaim_restore(unsigned int flags)
#ifdef CONFIG_MEMBARRIER #ifdef CONFIG_MEMBARRIER
enum { enum {
MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY = (1U << 0), MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY = (1U << 0),
MEMBARRIER_STATE_SWITCH_MM = (1U << 1), MEMBARRIER_STATE_PRIVATE_EXPEDITED = (1U << 1),
}; };
#ifdef CONFIG_ARCH_HAS_MEMBARRIER_CALLBACKS
#include <asm/membarrier.h>
#endif
static inline void membarrier_execve(struct task_struct *t) static inline void membarrier_execve(struct task_struct *t)
{ {
atomic_set(&t->mm->membarrier_state, 0); atomic_set(&t->mm->membarrier_state, 0);
} }
#else #else
#ifdef CONFIG_ARCH_HAS_MEMBARRIER_CALLBACKS
static inline void membarrier_arch_switch_mm(struct mm_struct *prev,
struct mm_struct *next,
struct task_struct *tsk)
{
}
#endif
static inline void membarrier_execve(struct task_struct *t) static inline void membarrier_execve(struct task_struct *t)
{ {
} }
......
...@@ -1412,6 +1412,9 @@ config USERFAULTFD ...@@ -1412,6 +1412,9 @@ config USERFAULTFD
Enable the userfaultfd() system call that allows to intercept and Enable the userfaultfd() system call that allows to intercept and
handle page faults in userland. handle page faults in userland.
config ARCH_HAS_MEMBARRIER_CALLBACKS
bool
config EMBEDDED config EMBEDDED
bool "Embedded system" bool "Embedded system"
option allnoconfig_y option allnoconfig_y
......
...@@ -2698,16 +2698,6 @@ static struct rq *finish_task_switch(struct task_struct *prev) ...@@ -2698,16 +2698,6 @@ static struct rq *finish_task_switch(struct task_struct *prev)
prev_state = prev->state; prev_state = prev->state;
vtime_task_switch(prev); vtime_task_switch(prev);
perf_event_task_sched_in(prev, current); perf_event_task_sched_in(prev, current);
/*
* The membarrier system call requires a full memory barrier
* after storing to rq->curr, before going back to user-space.
*
* TODO: This smp_mb__after_unlock_lock can go away if PPC end
* up adding a full barrier to switch_mm(), or we should figure
* out if a smp_mb__after_unlock_lock is really the proper API
* to use.
*/
smp_mb__after_unlock_lock();
finish_task(prev); finish_task(prev);
finish_lock_switch(rq); finish_lock_switch(rq);
finish_arch_post_lock_switch(); finish_arch_post_lock_switch();
......
...@@ -118,6 +118,14 @@ static void membarrier_register_private_expedited(void) ...@@ -118,6 +118,14 @@ static void membarrier_register_private_expedited(void)
if (atomic_read(&mm->membarrier_state) if (atomic_read(&mm->membarrier_state)
& MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY) & MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY)
return; return;
atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED, &mm->membarrier_state);
if (!(atomic_read(&mm->mm_users) == 1 && get_nr_threads(p) == 1)) {
/*
* Ensure all future scheduler executions will observe the
* new thread flag state for this process.
*/
synchronize_sched();
}
atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY, atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY,
&mm->membarrier_state); &mm->membarrier_state);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment