Commit 60815cf2 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/borntraeger/linux

Pull ACCESS_ONCE cleanup preparation from Christian Borntraeger:
 "kernel: Provide READ_ONCE and ASSIGN_ONCE

  As discussed on LKML http://marc.info/?i=54611D86.4040306%40de.ibm.com
  ACCESS_ONCE might fail with specific compilers for non-scalar
  accesses.

  Here is a set of patches to tackle that problem.

  The first patch introduce READ_ONCE and ASSIGN_ONCE.  If the data
  structure is larger than the machine word size memcpy is used and a
  warning is emitted.  The next patches fix up several in-tree users of
  ACCESS_ONCE on non-scalar types.

  This does not yet contain a patch that forces ACCESS_ONCE to work only
  on scalar types.  This is targetted for the next merge window as Linux
  next already contains new offenders regarding ACCESS_ONCE vs.
  non-scalar types"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/borntraeger/linux:
  s390/kvm: REPLACE barrier fixup with READ_ONCE
  arm/spinlock: Replace ACCESS_ONCE with READ_ONCE
  arm64/spinlock: Replace ACCESS_ONCE READ_ONCE
  mips/gup: Replace ACCESS_ONCE with READ_ONCE
  x86/gup: Replace ACCESS_ONCE with READ_ONCE
  x86/spinlock: Replace ACCESS_ONCE with READ_ONCE
  mm: replace ACCESS_ONCE with READ_ONCE or barriers
  kernel: Provide READ_ONCE and ASSIGN_ONCE
parents bfc7249c 5de72a22
...@@ -120,12 +120,12 @@ static inline int arch_spin_value_unlocked(arch_spinlock_t lock) ...@@ -120,12 +120,12 @@ static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
static inline int arch_spin_is_locked(arch_spinlock_t *lock) static inline int arch_spin_is_locked(arch_spinlock_t *lock)
{ {
return !arch_spin_value_unlocked(ACCESS_ONCE(*lock)); return !arch_spin_value_unlocked(READ_ONCE(*lock));
} }
static inline int arch_spin_is_contended(arch_spinlock_t *lock) static inline int arch_spin_is_contended(arch_spinlock_t *lock)
{ {
struct __raw_tickets tickets = ACCESS_ONCE(lock->tickets); struct __raw_tickets tickets = READ_ONCE(lock->tickets);
return (tickets.next - tickets.owner) > 1; return (tickets.next - tickets.owner) > 1;
} }
#define arch_spin_is_contended arch_spin_is_contended #define arch_spin_is_contended arch_spin_is_contended
......
...@@ -99,12 +99,12 @@ static inline int arch_spin_value_unlocked(arch_spinlock_t lock) ...@@ -99,12 +99,12 @@ static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
static inline int arch_spin_is_locked(arch_spinlock_t *lock) static inline int arch_spin_is_locked(arch_spinlock_t *lock)
{ {
return !arch_spin_value_unlocked(ACCESS_ONCE(*lock)); return !arch_spin_value_unlocked(READ_ONCE(*lock));
} }
static inline int arch_spin_is_contended(arch_spinlock_t *lock) static inline int arch_spin_is_contended(arch_spinlock_t *lock)
{ {
arch_spinlock_t lockval = ACCESS_ONCE(*lock); arch_spinlock_t lockval = READ_ONCE(*lock);
return (lockval.next - lockval.owner) > 1; return (lockval.next - lockval.owner) > 1;
} }
#define arch_spin_is_contended arch_spin_is_contended #define arch_spin_is_contended arch_spin_is_contended
......
...@@ -30,7 +30,7 @@ static inline pte_t gup_get_pte(pte_t *ptep) ...@@ -30,7 +30,7 @@ static inline pte_t gup_get_pte(pte_t *ptep)
return pte; return pte;
#else #else
return ACCESS_ONCE(*ptep); return READ_ONCE(*ptep);
#endif #endif
} }
......
...@@ -227,12 +227,10 @@ static void ipte_lock_simple(struct kvm_vcpu *vcpu) ...@@ -227,12 +227,10 @@ static void ipte_lock_simple(struct kvm_vcpu *vcpu)
goto out; goto out;
ic = &vcpu->kvm->arch.sca->ipte_control; ic = &vcpu->kvm->arch.sca->ipte_control;
do { do {
old = *ic; old = READ_ONCE(*ic);
barrier();
while (old.k) { while (old.k) {
cond_resched(); cond_resched();
old = *ic; old = READ_ONCE(*ic);
barrier();
} }
new = old; new = old;
new.k = 1; new.k = 1;
...@@ -251,8 +249,7 @@ static void ipte_unlock_simple(struct kvm_vcpu *vcpu) ...@@ -251,8 +249,7 @@ static void ipte_unlock_simple(struct kvm_vcpu *vcpu)
goto out; goto out;
ic = &vcpu->kvm->arch.sca->ipte_control; ic = &vcpu->kvm->arch.sca->ipte_control;
do { do {
old = *ic; old = READ_ONCE(*ic);
barrier();
new = old; new = old;
new.k = 0; new.k = 0;
} while (cmpxchg(&ic->val, old.val, new.val) != old.val); } while (cmpxchg(&ic->val, old.val, new.val) != old.val);
...@@ -267,12 +264,10 @@ static void ipte_lock_siif(struct kvm_vcpu *vcpu) ...@@ -267,12 +264,10 @@ static void ipte_lock_siif(struct kvm_vcpu *vcpu)
ic = &vcpu->kvm->arch.sca->ipte_control; ic = &vcpu->kvm->arch.sca->ipte_control;
do { do {
old = *ic; old = READ_ONCE(*ic);
barrier();
while (old.kg) { while (old.kg) {
cond_resched(); cond_resched();
old = *ic; old = READ_ONCE(*ic);
barrier();
} }
new = old; new = old;
new.k = 1; new.k = 1;
...@@ -286,8 +281,7 @@ static void ipte_unlock_siif(struct kvm_vcpu *vcpu) ...@@ -286,8 +281,7 @@ static void ipte_unlock_siif(struct kvm_vcpu *vcpu)
ic = &vcpu->kvm->arch.sca->ipte_control; ic = &vcpu->kvm->arch.sca->ipte_control;
do { do {
old = *ic; old = READ_ONCE(*ic);
barrier();
new = old; new = old;
new.kh--; new.kh--;
if (!new.kh) if (!new.kh)
......
...@@ -92,7 +92,7 @@ static __always_inline void arch_spin_lock(arch_spinlock_t *lock) ...@@ -92,7 +92,7 @@ static __always_inline void arch_spin_lock(arch_spinlock_t *lock)
unsigned count = SPIN_THRESHOLD; unsigned count = SPIN_THRESHOLD;
do { do {
if (ACCESS_ONCE(lock->tickets.head) == inc.tail) if (READ_ONCE(lock->tickets.head) == inc.tail)
goto out; goto out;
cpu_relax(); cpu_relax();
} while (--count); } while (--count);
...@@ -105,7 +105,7 @@ static __always_inline int arch_spin_trylock(arch_spinlock_t *lock) ...@@ -105,7 +105,7 @@ static __always_inline int arch_spin_trylock(arch_spinlock_t *lock)
{ {
arch_spinlock_t old, new; arch_spinlock_t old, new;
old.tickets = ACCESS_ONCE(lock->tickets); old.tickets = READ_ONCE(lock->tickets);
if (old.tickets.head != (old.tickets.tail & ~TICKET_SLOWPATH_FLAG)) if (old.tickets.head != (old.tickets.tail & ~TICKET_SLOWPATH_FLAG))
return 0; return 0;
...@@ -162,14 +162,14 @@ static __always_inline void arch_spin_unlock(arch_spinlock_t *lock) ...@@ -162,14 +162,14 @@ static __always_inline void arch_spin_unlock(arch_spinlock_t *lock)
static inline int arch_spin_is_locked(arch_spinlock_t *lock) static inline int arch_spin_is_locked(arch_spinlock_t *lock)
{ {
struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets); struct __raw_tickets tmp = READ_ONCE(lock->tickets);
return tmp.tail != tmp.head; return tmp.tail != tmp.head;
} }
static inline int arch_spin_is_contended(arch_spinlock_t *lock) static inline int arch_spin_is_contended(arch_spinlock_t *lock)
{ {
struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets); struct __raw_tickets tmp = READ_ONCE(lock->tickets);
return (__ticket_t)(tmp.tail - tmp.head) > TICKET_LOCK_INC; return (__ticket_t)(tmp.tail - tmp.head) > TICKET_LOCK_INC;
} }
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
static inline pte_t gup_get_pte(pte_t *ptep) static inline pte_t gup_get_pte(pte_t *ptep)
{ {
#ifndef CONFIG_X86_PAE #ifndef CONFIG_X86_PAE
return ACCESS_ONCE(*ptep); return READ_ONCE(*ptep);
#else #else
/* /*
* With get_user_pages_fast, we walk down the pagetables without taking * With get_user_pages_fast, we walk down the pagetables without taking
......
...@@ -186,6 +186,80 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); ...@@ -186,6 +186,80 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
# define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __LINE__) # define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __LINE__)
#endif #endif
#include <uapi/linux/types.h>
static __always_inline void data_access_exceeds_word_size(void)
#ifdef __compiletime_warning
__compiletime_warning("data access exceeds word size and won't be atomic")
#endif
;
static __always_inline void data_access_exceeds_word_size(void)
{
}
static __always_inline void __read_once_size(volatile void *p, void *res, int size)
{
switch (size) {
case 1: *(__u8 *)res = *(volatile __u8 *)p; break;
case 2: *(__u16 *)res = *(volatile __u16 *)p; break;
case 4: *(__u32 *)res = *(volatile __u32 *)p; break;
#ifdef CONFIG_64BIT
case 8: *(__u64 *)res = *(volatile __u64 *)p; break;
#endif
default:
barrier();
__builtin_memcpy((void *)res, (const void *)p, size);
data_access_exceeds_word_size();
barrier();
}
}
static __always_inline void __assign_once_size(volatile void *p, void *res, int size)
{
switch (size) {
case 1: *(volatile __u8 *)p = *(__u8 *)res; break;
case 2: *(volatile __u16 *)p = *(__u16 *)res; break;
case 4: *(volatile __u32 *)p = *(__u32 *)res; break;
#ifdef CONFIG_64BIT
case 8: *(volatile __u64 *)p = *(__u64 *)res; break;
#endif
default:
barrier();
__builtin_memcpy((void *)p, (const void *)res, size);
data_access_exceeds_word_size();
barrier();
}
}
/*
* Prevent the compiler from merging or refetching reads or writes. The
* compiler is also forbidden from reordering successive instances of
* READ_ONCE, ASSIGN_ONCE and ACCESS_ONCE (see below), but only when the
* compiler is aware of some particular ordering. One way to make the
* compiler aware of ordering is to put the two invocations of READ_ONCE,
* ASSIGN_ONCE or ACCESS_ONCE() in different C statements.
*
* In contrast to ACCESS_ONCE these two macros will also work on aggregate
* data types like structs or unions. If the size of the accessed data
* type exceeds the word size of the machine (e.g., 32 bits or 64 bits)
* READ_ONCE() and ASSIGN_ONCE() will fall back to memcpy and print a
* compile-time warning.
*
* Their two major use cases are: (1) Mediating communication between
* process-level code and irq/NMI handlers, all running on the same CPU,
* and (2) Ensuring that the compiler does not fold, spindle, or otherwise
* mutilate accesses that either do not require ordering or that interact
* with an explicit memory barrier or atomic instruction that provides the
* required ordering.
*/
#define READ_ONCE(x) \
({ typeof(x) __val; __read_once_size(&x, &__val, sizeof(__val)); __val; })
#define ASSIGN_ONCE(val, x) \
({ typeof(x) __val; __val = val; __assign_once_size(&x, &__val, sizeof(__val)); __val; })
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */
#endif /* __ASSEMBLY__ */ #endif /* __ASSEMBLY__ */
......
...@@ -968,7 +968,7 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, ...@@ -968,7 +968,7 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
pudp = pud_offset(&pgd, addr); pudp = pud_offset(&pgd, addr);
do { do {
pud_t pud = ACCESS_ONCE(*pudp); pud_t pud = READ_ONCE(*pudp);
next = pud_addr_end(addr, end); next = pud_addr_end(addr, end);
if (pud_none(pud)) if (pud_none(pud))
......
...@@ -3195,7 +3195,16 @@ static int handle_pte_fault(struct mm_struct *mm, ...@@ -3195,7 +3195,16 @@ static int handle_pte_fault(struct mm_struct *mm,
pte_t entry; pte_t entry;
spinlock_t *ptl; spinlock_t *ptl;
entry = ACCESS_ONCE(*pte); /*
* some architectures can have larger ptes than wordsize,
* e.g.ppc44x-defconfig has CONFIG_PTE_64BIT=y and CONFIG_32BIT=y,
* so READ_ONCE or ACCESS_ONCE cannot guarantee atomic accesses.
* The code below just needs a consistent view for the ifs and
* we later double check anyway with the ptl lock held. So here
* a barrier will do.
*/
entry = *pte;
barrier();
if (!pte_present(entry)) { if (!pte_present(entry)) {
if (pte_none(entry)) { if (pte_none(entry)) {
if (vma->vm_ops) { if (vma->vm_ops) {
......
...@@ -583,7 +583,8 @@ pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address) ...@@ -583,7 +583,8 @@ pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address)
* without holding anon_vma lock for write. So when looking for a * without holding anon_vma lock for write. So when looking for a
* genuine pmde (in which to find pte), test present and !THP together. * genuine pmde (in which to find pte), test present and !THP together.
*/ */
pmde = ACCESS_ONCE(*pmd); pmde = *pmd;
barrier();
if (!pmd_present(pmde) || pmd_trans_huge(pmde)) if (!pmd_present(pmde) || pmd_trans_huge(pmde))
pmd = NULL; pmd = NULL;
out: out:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment