Merge branch 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull locking updates from Ingo Molnar: "The main changes in this cycle were: - a big round of FUTEX_UNLOCK_PI improvements, fixes, cleanups and general restructuring - lockdep updates such as new checks for lock_downgrade() - introduce the new atomic_try_cmpxchg() locking API and use it to optimize refcount code generation - ... plus misc fixes, updates and cleanups" * 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (38 commits) MAINTAINERS: Add FUTEX SUBSYSTEM futex: Clarify mark_wake_futex memory barrier usage futex: Fix small (and harmless looking) inconsistencies futex: Avoid freeing an active timer rtmutex: Plug preempt count leak in rt_mutex_futex_unlock() rtmutex: Fix more prio comparisons rtmutex: Fix PI chain order integrity sched,tracing: Update trace_sched_pi_setprio() sched/rtmutex: Refactor rt_mutex_setprio() rtmutex: Clean up sched/deadline/rtmutex: Dont miss the dl_runtime/dl_period update sched/rtmutex/deadline: Fix a PI crash for deadline tasks rtmutex: Deboost before waking up the top waiter locking/ww-mutex: Limit stress test to 2 seconds locking/atomic: Fix atomic_try_cmpxchg() semantics lockdep: Fix per-cpu static objects futex: Drop hb->lock before enqueueing on the rtmutex futex: Futex_unlock_pi() determinism futex: Rework futex_lock_pi() to use rt_mutex_*_proxy_lock() futex,rt_mutex: Restructure rt_mutex_finish_proxy_lock() ...

Merge branch 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull locking updates from Ingo Molnar: "The main changes in this cycle were: - a big round of FUTEX_UNLOCK_PI improvements, fixes, cleanups and general restructuring - lockdep updates such as new checks for lock_downgrade() - introduce the new atomic_try_cmpxchg() locking API and use it to optimize refcount code generation - ... plus misc fixes, updates and cleanups" * 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (38 commits) MAINTAINERS: Add FUTEX SUBSYSTEM futex: Clarify mark_wake_futex memory barrier usage futex: Fix small (and harmless looking) inconsistencies futex: Avoid freeing an active timer rtmutex: Plug preempt count leak in rt_mutex_futex_unlock() rtmutex: Fix more prio comparisons rtmutex: Fix PI chain order integrity sched,tracing: Update trace_sched_pi_setprio() sched/rtmutex: Refactor rt_mutex_setprio() rtmutex: Clean up sched/deadline/rtmutex: Dont miss the dl_runtime/dl_period update sched/rtmutex/deadline: Fix a PI crash for deadline tasks rtmutex: Deboost before waking up the top waiter locking/ww-mutex: Limit stress test to 2 seconds locking/atomic: Fix atomic_try_cmpxchg() semantics lockdep: Fix per-cpu static objects futex: Drop hb->lock before enqueueing on the rtmutex futex: Futex_unlock_pi() determinism futex: Rework futex_lock_pi() to use rt_mutex_*_proxy_lock() futex,rt_mutex: Restructure rt_mutex_finish_proxy_lock() ...
207fb8c3 · Linus Torvalds · 3527d3e9 · 59cd42c2 · 207fb8c3 · 207fb8c3
Commit 207fb8c3 authored May 01, 2017 by Linus Torvalds
29 changed files
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5415,6 +5415,23 @@ F:	fs/fuse/
 F:	include/uapi/linux/fuse.h
 F:	Documentation/filesystems/fuse.txt

+FUTEX SUBSYSTEM
+M:	Thomas Gleixner <tglx@linutronix.de>
+M:	Ingo Molnar <mingo@redhat.com>
+R:	Peter Zijlstra <peterz@infradead.org>
+R:	Darren Hart <dvhart@infradead.org>
+L:	linux-kernel@vger.kernel.org
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking/core
+S:	Maintained
+F:	kernel/futex.c
+F:	kernel/futex_compat.c
+F:	include/asm-generic/futex.h
+F:	include/linux/futex.h
+F:	include/uapi/linux/futex.h
+F:	tools/testing/selftests/futex/
+F:	tools/perf/bench/futex*
+F:	Documentation/*futex*
+
 FUTURE DOMAIN TMC-16x0 SCSI DRIVER (16-bit)
 M:	Rik Faith <faith@cs.unc.edu>
 L:	linux-scsi@vger.kernel.org

--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -186,6 +186,12 @@ static __always_inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 	return cmpxchg(&v->counter, old, new);
 }

+#define atomic_try_cmpxchg atomic_try_cmpxchg
+static __always_inline bool atomic_try_cmpxchg(atomic_t *v, int *old, int new)
+{
+	return try_cmpxchg(&v->counter, old, new);
+}
+
 static inline int atomic_xchg(atomic_t *v, int new)
 {
 	return xchg(&v->counter, new);
@@ -201,16 +207,12 @@ static inline void atomic_##op(int i, atomic_t *v)			\
 }

 #define ATOMIC_FETCH_OP(op, c_op)					\
-static inline int atomic_fetch_##op(int i, atomic_t *v)		\
+static inline int atomic_fetch_##op(int i, atomic_t *v)			\
 {									\
-	int old, val = atomic_read(v);					\
-	for (;;) {							\
-		old = atomic_cmpxchg(v, val, val c_op i);		\
-		if (old == val)						\
-			break;						\
-		val = old;						\
-	}								\
-	return old;							\
+	int val = atomic_read(v);					\
+	do {								\
+	} while (!atomic_try_cmpxchg(v, &val, val c_op i));		\
+	return val;							\
 }

 #define ATOMIC_OPS(op, c_op)						\
@@ -236,16 +238,11 @@ ATOMIC_OPS(xor, ^)
 */
 static __always_inline int __atomic_add_unless(atomic_t *v, int a, int u)
 {
-	int c, old;
-	c = atomic_read(v);
-	for (;;) {
-		if (unlikely(c == (u)))
-			break;
-		old = atomic_cmpxchg((v), c, c + (a));
-		if (likely(old == c))
+	int c = atomic_read(v);
+	do {
+		if (unlikely(c == u))
 			break;
-		c = old;
-	}
+	} while (!atomic_try_cmpxchg(v, &c, c + a));
 	return c;
 }


--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -176,6 +176,12 @@ static inline long atomic64_cmpxchg(atomic64_t *v, long old, long new)
 	return cmpxchg(&v->counter, old, new);
 }

+#define atomic64_try_cmpxchg atomic64_try_cmpxchg
+static __always_inline bool atomic64_try_cmpxchg(atomic64_t *v, long *old, long new)
+{
+	return try_cmpxchg(&v->counter, old, new);
+}
+
 static inline long atomic64_xchg(atomic64_t *v, long new)
 {
 	return xchg(&v->counter, new);
@@ -192,17 +198,12 @@ static inline long atomic64_xchg(atomic64_t *v, long new)
 */
 static inline bool atomic64_add_unless(atomic64_t *v, long a, long u)
 {
-	long c, old;
-	c = atomic64_read(v);
-	for (;;) {
-		if (unlikely(c == (u)))
-			break;
-		old = atomic64_cmpxchg((v), c, c + (a));
-		if (likely(old == c))
-			break;
-		c = old;
-	}
-	return c != (u);
+	long c = atomic64_read(v);
+	do {
+		if (unlikely(c == u))
+			return false;
+	} while (!atomic64_try_cmpxchg(v, &c, c + a));
+	return true;
 }

 #define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
@@ -216,17 +217,12 @@ static inline bool atomic64_add_unless(atomic64_t *v, long a, long u)
 */
 static inline long atomic64_dec_if_positive(atomic64_t *v)
 {
-	long c, old, dec;
-	c = atomic64_read(v);
-	for (;;) {
+	long dec, c = atomic64_read(v);
+	do {
 		dec = c - 1;
 		if (unlikely(dec < 0))
 			break;
-		old = atomic64_cmpxchg((v), c, dec);
-		if (likely(old == c))
-			break;
-		c = old;
-	}
+	} while (!atomic64_try_cmpxchg(v, &c, dec));
 	return dec;
 }

@@ -242,14 +238,10 @@ static inline void atomic64_##op(long i, atomic64_t *v)			\
 #define ATOMIC64_FETCH_OP(op, c_op)					\
 static inline long atomic64_fetch_##op(long i, atomic64_t *v)		\
 {									\
-	long old, val = atomic64_read(v);				\
-	for (;;) {							\
-		old = atomic64_cmpxchg(v, val, val c_op i);		\
-		if (old == val)						\
-			break;						\
-		val = old;						\
-	}								\
-	return old;							\
+	long val = atomic64_read(v);					\
+	do {								\
+	} while (!atomic64_try_cmpxchg(v, &val, val c_op i));		\
+	return val;							\
 }

 #define ATOMIC64_OPS(op, c_op)						\

--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -153,6 +153,76 @@ extern void __add_wrong_size(void)
 #define cmpxchg_local(ptr, old, new)					\
 	__cmpxchg_local(ptr, old, new, sizeof(*(ptr)))

+
+#define __raw_try_cmpxchg(_ptr, _pold, _new, size, lock)		\
+({									\
+	bool success;							\
+	__typeof__(_ptr) _old = (_pold);				\
+	__typeof__(*(_ptr)) __old = *_old;				\
+	__typeof__(*(_ptr)) __new = (_new);				\
+	switch (size) {							\
+	case __X86_CASE_B:						\
+	{								\
+		volatile u8 *__ptr = (volatile u8 *)(_ptr);		\
+		asm volatile(lock "cmpxchgb %[new], %[ptr]"		\
+			     CC_SET(z)					\
+			     : CC_OUT(z) (success),			\
+			       [ptr] "+m" (*__ptr),			\
+			       [old] "+a" (__old)			\
+			     : [new] "q" (__new)			\
+			     : "memory");				\
+		break;							\
+	}								\
+	case __X86_CASE_W:						\
+	{								\
+		volatile u16 *__ptr = (volatile u16 *)(_ptr);		\
+		asm volatile(lock "cmpxchgw %[new], %[ptr]"		\
+			     CC_SET(z)					\
+			     : CC_OUT(z) (success),			\
+			       [ptr] "+m" (*__ptr),			\
+			       [old] "+a" (__old)			\
+			     : [new] "r" (__new)			\
+			     : "memory");				\
+		break;							\
+	}								\
+	case __X86_CASE_L:						\
+	{								\
+		volatile u32 *__ptr = (volatile u32 *)(_ptr);		\
+		asm volatile(lock "cmpxchgl %[new], %[ptr]"		\
+			     CC_SET(z)					\
+			     : CC_OUT(z) (success),			\
+			       [ptr] "+m" (*__ptr),			\
+			       [old] "+a" (__old)			\
+			     : [new] "r" (__new)			\
+			     : "memory");				\
+		break;							\
+	}								\
+	case __X86_CASE_Q:						\
+	{								\
+		volatile u64 *__ptr = (volatile u64 *)(_ptr);		\
+		asm volatile(lock "cmpxchgq %[new], %[ptr]"		\
+			     CC_SET(z)					\
+			     : CC_OUT(z) (success),			\
+			       [ptr] "+m" (*__ptr),			\
+			       [old] "+a" (__old)			\
+			     : [new] "r" (__new)			\
+			     : "memory");				\
+		break;							\
+	}								\
+	default:							\
+		__cmpxchg_wrong_size();					\
+	}								\
+	if (unlikely(!success))						\
+		*_old = __old;						\
+	likely(success);						\
+})
+
+#define __try_cmpxchg(ptr, pold, new, size)				\
+	__raw_try_cmpxchg((ptr), (pold), (new), (size), LOCK_PREFIX)
+
+#define try_cmpxchg(ptr, pold, new)					\
+	__try_cmpxchg((ptr), (pold), (new), sizeof(*(ptr)))
+
 /*
 * xadd() adds "inc" to "*ptr" and atomically returns the previous
 * value of "*ptr".

--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -423,6 +423,29 @@
 #endif
 #endif /* atomic_cmpxchg_relaxed */

+#ifndef atomic_try_cmpxchg
+
+#define __atomic_try_cmpxchg(type, _p, _po, _n)				\
+({									\
+	typeof(_po) __po = (_po);					\
+	typeof(*(_po)) __r, __o = *__po;				\
+	__r = atomic_cmpxchg##type((_p), __o, (_n));			\
+	if (unlikely(__r != __o))					\
+		*__po = __r;						\
+	likely(__r == __o);						\
+})
+
+#define atomic_try_cmpxchg(_p, _po, _n)		__atomic_try_cmpxchg(, _p, _po, _n)
+#define atomic_try_cmpxchg_relaxed(_p, _po, _n)	__atomic_try_cmpxchg(_relaxed, _p, _po, _n)
+#define atomic_try_cmpxchg_acquire(_p, _po, _n)	__atomic_try_cmpxchg(_acquire, _p, _po, _n)
+#define atomic_try_cmpxchg_release(_p, _po, _n)	__atomic_try_cmpxchg(_release, _p, _po, _n)
+
+#else /* atomic_try_cmpxchg */
+#define atomic_try_cmpxchg_relaxed	atomic_try_cmpxchg
+#define atomic_try_cmpxchg_acquire	atomic_try_cmpxchg
+#define atomic_try_cmpxchg_release	atomic_try_cmpxchg
+#endif /* atomic_try_cmpxchg */
+
 /* cmpxchg_relaxed */
 #ifndef cmpxchg_relaxed
 #define  cmpxchg_relaxed		cmpxchg
@@ -996,6 +1019,29 @@ static inline int atomic_dec_if_positive(atomic_t *v)
 #endif
 #endif /* atomic64_cmpxchg_relaxed */

+#ifndef atomic64_try_cmpxchg
+
+#define __atomic64_try_cmpxchg(type, _p, _po, _n)			\
+({									\
+	typeof(_po) __po = (_po);					\
+	typeof(*(_po)) __r, __o = *__po;				\
+	__r = atomic64_cmpxchg##type((_p), __o, (_n));			\
+	if (unlikely(__r != __o))					\
+		*__po = __r;						\
+	likely(__r == __o);						\
+})
+
+#define atomic64_try_cmpxchg(_p, _po, _n)		__atomic64_try_cmpxchg(, _p, _po, _n)
+#define atomic64_try_cmpxchg_relaxed(_p, _po, _n)	__atomic64_try_cmpxchg(_relaxed, _p, _po, _n)
+#define atomic64_try_cmpxchg_acquire(_p, _po, _n)	__atomic64_try_cmpxchg(_acquire, _p, _po, _n)
+#define atomic64_try_cmpxchg_release(_p, _po, _n)	__atomic64_try_cmpxchg(_release, _p, _po, _n)
+
+#else /* atomic64_try_cmpxchg */
+#define atomic64_try_cmpxchg_relaxed	atomic64_try_cmpxchg
+#define atomic64_try_cmpxchg_acquire	atomic64_try_cmpxchg
+#define atomic64_try_cmpxchg_release	atomic64_try_cmpxchg
+#endif /* atomic64_try_cmpxchg */
+
 #ifndef atomic64_andnot
 static inline void atomic64_andnot(long long i, atomic64_t *v)
 {

--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -181,6 +181,7 @@ extern struct cred init_cred;
 #ifdef CONFIG_RT_MUTEXES
 # define INIT_RT_MUTEXES(tsk)						\
 	.pi_waiters = RB_ROOT,						\
+	.pi_top_task = NULL,						\
 	.pi_waiters_leftmost = NULL,
 #else
 # define INIT_RT_MUTEXES(tsk)

--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -361,6 +361,8 @@ static inline void lock_set_subclass(struct lockdep_map *lock,
 	lock_set_class(lock, lock->name, lock->key, subclass, ip);
 }

+extern void lock_downgrade(struct lockdep_map *lock, unsigned long ip);
+
 extern void lockdep_set_current_reclaim_state(gfp_t gfp_mask);
 extern void lockdep_clear_current_reclaim_state(void);
 extern void lockdep_trace_alloc(gfp_t mask);
@@ -411,6 +413,7 @@ static inline void lockdep_on(void)

 # define lock_acquire(l, s, t, r, c, n, i)	do { } while (0)
 # define lock_release(l, n, i)			do { } while (0)
+# define lock_downgrade(l, i)			do { } while (0)
 # define lock_set_class(l, n, k, s, i)		do { } while (0)
 # define lock_set_subclass(l, s, i)		do { } while (0)
 # define lockdep_set_current_reclaim_state(g)	do { } while (0)

--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -493,6 +493,7 @@ static inline int module_is_live(struct module *mod)
 struct module *__module_text_address(unsigned long addr);
 struct module *__module_address(unsigned long addr);
 bool is_module_address(unsigned long addr);
+bool __is_module_percpu_address(unsigned long addr, unsigned long *can_addr);
 bool is_module_percpu_address(unsigned long addr);
 bool is_module_text_address(unsigned long addr);

@@ -660,6 +661,11 @@ static inline bool is_module_percpu_address(unsigned long addr)
 	return false;
 }

+static inline bool __is_module_percpu_address(unsigned long addr, unsigned long *can_addr)
+{
+	return false;
+}
+
 static inline bool is_module_text_address(unsigned long addr)
 {
 	return false;

--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -110,6 +110,7 @@ extern int __init pcpu_page_first_chunk(size_t reserved_size,
 #endif

 extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align);
+extern bool __is_kernel_percpu_address(unsigned long addr, unsigned long *can_addr);
 extern bool is_kernel_percpu_address(unsigned long addr);

 #if !defined(CONFIG_SMP) || !defined(CONFIG_HAVE_SETUP_PER_CPU_AREA)

--- a/include/linux/refcount.h
+++ b/include/linux/refcount.h
@@ -6,17 +6,36 @@
 #include <linux/spinlock.h>
 #include <linux/kernel.h>

+/**
+ * refcount_t - variant of atomic_t specialized for reference counts
+ * @refs: atomic_t counter field
+ *
+ * The counter saturates at UINT_MAX and will not move once
+ * there. This avoids wrapping the counter and causing 'spurious'
+ * use-after-free bugs.
+ */
 typedef struct refcount_struct {
 	atomic_t refs;
 } refcount_t;

 #define REFCOUNT_INIT(n)	{ .refs = ATOMIC_INIT(n), }

+/**
+ * refcount_set - set a refcount's value
+ * @r: the refcount
+ * @n: value to which the refcount will be set
+ */
 static inline void refcount_set(refcount_t *r, unsigned int n)
 {
 	atomic_set(&r->refs, n);
 }

+/**
+ * refcount_read - get a refcount's value
+ * @r: the refcount
+ *
+ * Return: the refcount's value
+ */
 static inline unsigned int refcount_read(const refcount_t *r)
 {
 	return atomic_read(&r->refs);

--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -779,6 +779,8 @@ struct task_struct {
 	/* PI waiters blocked on a rt_mutex held by this task: */
 	struct rb_root			pi_waiters;
 	struct rb_node			*pi_waiters_leftmost;
+	/* Updated under owner's pi_lock and rq lock */
+	struct task_struct		*pi_top_task;
 	/* Deadlock detection and priority inheritance handling: */
 	struct rt_mutex_waiter		*pi_blocked_on;
 #endif

--- a/include/linux/sched/rt.h
+++ b/include/linux/sched/rt.h
@@ -18,27 +18,20 @@ static inline int rt_task(struct task_struct *p)
 }

 #ifdef CONFIG_RT_MUTEXES
-extern int rt_mutex_getprio(struct task_struct *p);
-extern void rt_mutex_setprio(struct task_struct *p, int prio);
-extern int rt_mutex_get_effective_prio(struct task_struct *task, int newprio);
-extern struct task_struct *rt_mutex_get_top_task(struct task_struct *task);
+/*
+ * Must hold either p->pi_lock or task_rq(p)->lock.
+ */
+static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *p)
+{
+	return p->pi_top_task;
+}
+extern void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task);
 extern void rt_mutex_adjust_pi(struct task_struct *p);
 static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
 {
 	return tsk->pi_blocked_on != NULL;
 }
 #else
-static inline int rt_mutex_getprio(struct task_struct *p)
-{
-	return p->normal_prio;
-}
-
-static inline int rt_mutex_get_effective_prio(struct task_struct *task,
-					      int newprio)
-{
-	return newprio;
-}
-
 static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *task)
 {
 	return NULL;

--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -120,6 +120,13 @@ extern unsigned int setup_max_cpus;
 extern void __init setup_nr_cpu_ids(void);
 extern void __init smp_init(void);

+extern int __boot_cpu_id;
+
+static inline int get_boot_cpu_id(void)
+{
+	return __boot_cpu_id;
+}
+
 #else /* !SMP */

 static inline void smp_send_stop(void) { }
@@ -158,6 +165,11 @@ static inline void smp_init(void) { up_late_init(); }
 static inline void smp_init(void) { }
 #endif

+static inline int get_boot_cpu_id(void)
+{
+	return 0;
+}
+
 #endif /* !SMP */

 /*

--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -70,7 +70,7 @@ DECLARE_EVENT_CLASS(sched_wakeup_template,
 	TP_fast_assign(
 		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
 		__entry->pid		= p->pid;
-		__entry->prio		= p->prio;
+		__entry->prio		= p->prio; /* XXX SCHED_DEADLINE */
 		__entry->success	= 1; /* rudiment, kill when possible */
 		__entry->target_cpu	= task_cpu(p);
 	),
@@ -147,6 +147,7 @@ TRACE_EVENT(sched_switch,
 		memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
 		__entry->next_pid	= next->pid;
 		__entry->next_prio	= next->prio;
+		/* XXX SCHED_DEADLINE */
 	),

 	TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==> next_comm=%s next_pid=%d next_prio=%d",
@@ -181,7 +182,7 @@ TRACE_EVENT(sched_migrate_task,
 	TP_fast_assign(
 		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
 		__entry->pid		= p->pid;
-		__entry->prio		= p->prio;
+		__entry->prio		= p->prio; /* XXX SCHED_DEADLINE */
 		__entry->orig_cpu	= task_cpu(p);
 		__entry->dest_cpu	= dest_cpu;
 	),
@@ -206,7 +207,7 @@ DECLARE_EVENT_CLASS(sched_process_template,
 	TP_fast_assign(
 		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
 		__entry->pid		= p->pid;
-		__entry->prio		= p->prio;
+		__entry->prio		= p->prio; /* XXX SCHED_DEADLINE */
 	),

 	TP_printk("comm=%s pid=%d prio=%d",
@@ -253,7 +254,7 @@ TRACE_EVENT(sched_process_wait,
 	TP_fast_assign(
 		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
 		__entry->pid		= pid_nr(pid);
-		__entry->prio		= current->prio;
+		__entry->prio		= current->prio; /* XXX SCHED_DEADLINE */
 	),

 	TP_printk("comm=%s pid=%d prio=%d",
@@ -413,9 +414,9 @@ DEFINE_EVENT(sched_stat_runtime, sched_stat_runtime,
 */
 TRACE_EVENT(sched_pi_setprio,

-	TP_PROTO(struct task_struct *tsk, int newprio),
+	TP_PROTO(struct task_struct *tsk, struct task_struct *pi_task),

-	TP_ARGS(tsk, newprio),
+	TP_ARGS(tsk, pi_task),

 	TP_STRUCT__entry(
 		__array( char,	comm,	TASK_COMM_LEN	)
@@ -428,7 +429,8 @@ TRACE_EVENT(sched_pi_setprio,
 		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
 		__entry->pid		= tsk->pid;
 		__entry->oldprio	= tsk->prio;
-		__entry->newprio	= newprio;
+		__entry->newprio	= pi_task ? pi_task->prio : tsk->prio;
+		/* XXX SCHED_DEADLINE bits missing */
 	),

 	TP_printk("comm=%s pid=%d oldprio=%d newprio=%d",

--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -1125,6 +1125,8 @@ core_initcall(cpu_hotplug_pm_sync_init);

 #endif /* CONFIG_PM_SLEEP_SMP */

+int __boot_cpu_id;
+
 #endif /* CONFIG_SMP */

 /* Boot processor state steps */
@@ -1815,6 +1817,10 @@ void __init boot_cpu_init(void)
 	set_cpu_active(cpu, true);
 	set_cpu_present(cpu, true);
 	set_cpu_possible(cpu, true);
+
+#ifdef CONFIG_SMP
+	__boot_cpu_id = cpu;
+#endif
 }

 /*

--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1438,6 +1438,7 @@ static void rt_mutex_init_task(struct task_struct *p)
 #ifdef CONFIG_RT_MUTEXES
 	p->pi_waiters = RB_ROOT;
 	p->pi_waiters_leftmost = NULL;
+	p->pi_top_task = NULL;
 	p->pi_blocked_on = NULL;
 #endif
 }

--- a/kernel/futex.c
+++ b/kernel/futex.c
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
--- a/kernel/locking/rtmutex-debug.c
+++ b/kernel/locking/rtmutex-debug.c
@@ -174,12 +174,3 @@ void debug_rt_mutex_init(struct rt_mutex *lock, const char *name)
 	lock->name = name;
 }

-void
-rt_mutex_deadlock_account_lock(struct rt_mutex *lock, struct task_struct *task)
-{
-}
-
-void rt_mutex_deadlock_account_unlock(struct task_struct *task)
-{
-}
-
--- a/kernel/locking/rtmutex-debug.h
+++ b/kernel/locking/rtmutex-debug.h
@@ -9,9 +9,6 @@
 * This file contains macros used solely by rtmutex.c. Debug version.
 */

-extern void
-rt_mutex_deadlock_account_lock(struct rt_mutex *lock, struct task_struct *task);
-extern void rt_mutex_deadlock_account_unlock(struct task_struct *task);
 extern void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter);
 extern void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter);
 extern void debug_rt_mutex_init(struct rt_mutex *lock, const char *name);

--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
--- a/kernel/locking/rtmutex.h
+++ b/kernel/locking/rtmutex.h
@@ -11,8 +11,6 @@
 */

 #define rt_mutex_deadlock_check(l)			(0)
-#define rt_mutex_deadlock_account_lock(m, t)		do { } while (0)
-#define rt_mutex_deadlock_account_unlock(l)		do { } while (0)
 #define debug_rt_mutex_init_waiter(w)			do { } while (0)
 #define debug_rt_mutex_free_waiter(w)			do { } while (0)
 #define debug_rt_mutex_lock(l)				do { } while (0)

--- a/kernel/locking/rtmutex_common.h
+++ b/kernel/locking/rtmutex_common.h
@@ -34,6 +34,7 @@ struct rt_mutex_waiter {
 	struct rt_mutex		*deadlock_lock;
 #endif
 	int prio;
+	u64 deadline;
 };

 /*
@@ -103,16 +104,26 @@ extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
 				       struct task_struct *proxy_owner);
 extern void rt_mutex_proxy_unlock(struct rt_mutex *lock,
 				  struct task_struct *proxy_owner);
+extern void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter);
+extern int __rt_mutex_start_proxy_lock(struct rt_mutex *lock,
+				     struct rt_mutex_waiter *waiter,
+				     struct task_struct *task);
 extern int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
 				     struct rt_mutex_waiter *waiter,
 				     struct task_struct *task);
-extern int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
-				      struct hrtimer_sleeper *to,
-				      struct rt_mutex_waiter *waiter);
-extern int rt_mutex_timed_futex_lock(struct rt_mutex *l, struct hrtimer_sleeper *to);
-extern bool rt_mutex_futex_unlock(struct rt_mutex *lock,
-				  struct wake_q_head *wqh);
-extern void rt_mutex_adjust_prio(struct task_struct *task);
+extern int rt_mutex_wait_proxy_lock(struct rt_mutex *lock,
+			       struct hrtimer_sleeper *to,
+			       struct rt_mutex_waiter *waiter);
+extern bool rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock,
+				 struct rt_mutex_waiter *waiter);
+
+extern int rt_mutex_futex_trylock(struct rt_mutex *l);
+
+extern void rt_mutex_futex_unlock(struct rt_mutex *lock);
+extern bool __rt_mutex_futex_unlock(struct rt_mutex *lock,
+				 struct wake_q_head *wqh);
+
+extern void rt_mutex_postunlock(struct wake_q_head *wake_q);

 #ifdef CONFIG_DEBUG_RT_MUTEXES
 # include "rtmutex-debug.h"

--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -124,10 +124,8 @@ EXPORT_SYMBOL(up_write);
 */
 void downgrade_write(struct rw_semaphore *sem)
 {
-	/*
-	 * lockdep: a downgraded write will live on as a write
-	 * dependency.
-	 */
+	lock_downgrade(&sem->dep_map, _RET_IP_);
+
 	rwsem_set_reader_owned(sem);
 	__downgrade_write(sem);
 }

--- a/kernel/locking/test-ww_mutex.c
+++ b/kernel/locking/test-ww_mutex.c
@@ -353,8 +353,8 @@ static int test_cycle(unsigned int ncpus)
 struct stress {
 	struct work_struct work;
 	struct ww_mutex *locks;
+	unsigned long timeout;
 	int nlocks;
-	int nloops;
 };

 static int *get_random_order(int count)
@@ -398,12 +398,11 @@ static void stress_inorder_work(struct work_struct *work)
 	if (!order)
 		return;

-	ww_acquire_init(&ctx, &ww_class);
-
 	do {
 		int contended = -1;
 		int n, err;

+		ww_acquire_init(&ctx, &ww_class);
 retry:
 		err = 0;
 		for (n = 0; n < nlocks; n++) {
@@ -433,9 +432,9 @@ static void stress_inorder_work(struct work_struct *work)
 				    __func__, err);
 			break;
 		}
-	} while (--stress->nloops);

-	ww_acquire_fini(&ctx);
+		ww_acquire_fini(&ctx);
+	} while (!time_after(jiffies, stress->timeout));

 	kfree(order);
 	kfree(stress);
@@ -470,9 +469,9 @@ static void stress_reorder_work(struct work_struct *work)
 	kfree(order);
 	order = NULL;

-	ww_acquire_init(&ctx, &ww_class);
-
 	do {
+		ww_acquire_init(&ctx, &ww_class);
+
 		list_for_each_entry(ll, &locks, link) {
 			err = ww_mutex_lock(ll->lock, &ctx);
 			if (!err)
@@ -495,9 +494,9 @@ static void stress_reorder_work(struct work_struct *work)
 		dummy_load(stress);
 		list_for_each_entry(ll, &locks, link)
 			ww_mutex_unlock(ll->lock);
-	} while (--stress->nloops);

-	ww_acquire_fini(&ctx);
+		ww_acquire_fini(&ctx);
+	} while (!time_after(jiffies, stress->timeout));

 out:
 	list_for_each_entry_safe(ll, ln, &locks, link)
@@ -523,7 +522,7 @@ static void stress_one_work(struct work_struct *work)
 				    __func__, err);
 			break;
 		}
-	} while (--stress->nloops);
+	} while (!time_after(jiffies, stress->timeout));

 	kfree(stress);
 }
@@ -533,7 +532,7 @@ static void stress_one_work(struct work_struct *work)
 #define STRESS_ONE BIT(2)
 #define STRESS_ALL (STRESS_INORDER | STRESS_REORDER | STRESS_ONE)

-static int stress(int nlocks, int nthreads, int nloops, unsigned int flags)
+static int stress(int nlocks, int nthreads, unsigned int flags)
 {
 	struct ww_mutex *locks;
 	int n;
@@ -575,7 +574,7 @@ static int stress(int nlocks, int nthreads, int nloops, unsigned int flags)
 		INIT_WORK(&stress->work, fn);
 		stress->locks = locks;
 		stress->nlocks = nlocks;
-		stress->nloops = nloops;
+		stress->timeout = jiffies + 2*HZ;

 		queue_work(wq, &stress->work);
 		nthreads--;
@@ -619,15 +618,15 @@ static int __init test_ww_mutex_init(void)
 	if (ret)
 		return ret;

-	ret = stress(16, 2*ncpus, 1<<10, STRESS_INORDER);
+	ret = stress(16, 2*ncpus, STRESS_INORDER);
 	if (ret)
 		return ret;

-	ret = stress(16, 2*ncpus, 1<<10, STRESS_REORDER);
+	ret = stress(16, 2*ncpus, STRESS_REORDER);
 	if (ret)
 		return ret;

-	ret = stress(4095, hweight32(STRESS_ALL)*ncpus, 1<<12, STRESS_ALL);
+	ret = stress(4095, hweight32(STRESS_ALL)*ncpus, STRESS_ALL);
 	if (ret)
 		return ret;


--- a/kernel/module.c
+++ b/kernel/module.c
@@ -665,16 +665,7 @@ static void percpu_modcopy(struct module *mod,
 		memcpy(per_cpu_ptr(mod->percpu, cpu), from, size);
 }

-/**
- * is_module_percpu_address - test whether address is from module static percpu
- * @addr: address to test
- *
- * Test whether @addr belongs to module static percpu area.
- *
- * RETURNS:
- * %true if @addr is from module static percpu area
- */
-bool is_module_percpu_address(unsigned long addr)
+bool __is_module_percpu_address(unsigned long addr, unsigned long *can_addr)
 {
 	struct module *mod;
 	unsigned int cpu;
@@ -688,9 +679,15 @@ bool is_module_percpu_address(unsigned long addr)
 			continue;
 		for_each_possible_cpu(cpu) {
 			void *start = per_cpu_ptr(mod->percpu, cpu);
-
-			if ((void *)addr >= start &&
-			    (void *)addr < start + mod->percpu_size) {
+			void *va = (void *)addr;
+
+			if (va >= start && va < start + mod->percpu_size) {
+				if (can_addr) {
+					*can_addr = (unsigned long) (va - start);
+					*can_addr += (unsigned long)
+						per_cpu_ptr(mod->percpu,
+							    get_boot_cpu_id());
+				}
 				preempt_enable();
 				return true;
 			}
@@ -701,6 +698,20 @@ bool is_module_percpu_address(unsigned long addr)
 	return false;
 }

+/**
+ * is_module_percpu_address - test whether address is from module static percpu
+ * @addr: address to test
+ *
+ * Test whether @addr belongs to module static percpu area.
+ *
+ * RETURNS:
+ * %true if @addr is from module static percpu area
+ */
+bool is_module_percpu_address(unsigned long addr)
+{
+	return __is_module_percpu_address(addr, NULL);
+}
+
 #else /* ... !CONFIG_SMP */

 static inline void __percpu *mod_percpu(struct module *mod)
@@ -732,6 +743,11 @@ bool is_module_percpu_address(unsigned long addr)
 	return false;
 }

+bool __is_module_percpu_address(unsigned long addr, unsigned long *can_addr)
+{
+	return false;
+}
+
 #endif /* CONFIG_SMP */

 #define MODINFO_ATTR(field)	\

--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3671,10 +3671,25 @@ EXPORT_SYMBOL(default_wake_function);

 #ifdef CONFIG_RT_MUTEXES

+static inline int __rt_effective_prio(struct task_struct *pi_task, int prio)
+{
+	if (pi_task)
+		prio = min(prio, pi_task->prio);
+
+	return prio;
+}
+
+static inline int rt_effective_prio(struct task_struct *p, int prio)
+{
+	struct task_struct *pi_task = rt_mutex_get_top_task(p);
+
+	return __rt_effective_prio(pi_task, prio);
+}
+
 /*
 * rt_mutex_setprio - set the current priority of a task
- * @p: task
- * @prio: prio value (kernel-internal form)
+ * @p: task to boost
+ * @pi_task: donor task
 *
 * This function changes the 'effective' priority of a task. It does
 * not touch ->normal_prio like __setscheduler().
@@ -3682,18 +3697,42 @@ EXPORT_SYMBOL(default_wake_function);
 * Used by the rt_mutex code to implement priority inheritance
 * logic. Call site only calls if the priority of the task changed.
 */
-void rt_mutex_setprio(struct task_struct *p, int prio)
+void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
 {
-	int oldprio, queued, running, queue_flag =
+	int prio, oldprio, queued, running, queue_flag =
 		DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
 	const struct sched_class *prev_class;
 	struct rq_flags rf;
 	struct rq *rq;

-	BUG_ON(prio > MAX_PRIO);
+	/* XXX used to be waiter->prio, not waiter->task->prio */
+	prio = __rt_effective_prio(pi_task, p->normal_prio);
+
+	/*
+	 * If nothing changed; bail early.
+	 */
+	if (p->pi_top_task == pi_task && prio == p->prio && !dl_prio(prio))
+		return;

 	rq = __task_rq_lock(p, &rf);
 	update_rq_clock(rq);
+	/*
+	 * Set under pi_lock && rq->lock, such that the value can be used under
+	 * either lock.
+	 *
+	 * Note that there is loads of tricky to make this pointer cache work
+	 * right. rt_mutex_slowunlock()+rt_mutex_postunlock() work together to
+	 * ensure a task is de-boosted (pi_task is set to NULL) before the
+	 * task is allowed to run again (and can exit). This ensures the pointer
+	 * points to a blocked task -- which guaratees the task is present.
+	 */
+	p->pi_top_task = pi_task;
+
+	/*
+	 * For FIFO/RR we only need to set prio, if that matches we're done.
+	 */
+	if (prio == p->prio && !dl_prio(prio))
+		goto out_unlock;

 	/*
 	 * Idle task boosting is a nono in general. There is one
@@ -3713,7 +3752,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
 		goto out_unlock;
 	}

-	trace_sched_pi_setprio(p, prio);
+	trace_sched_pi_setprio(p, pi_task);
 	oldprio = p->prio;

 	if (oldprio == prio)
@@ -3737,7 +3776,6 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
 	 *          running task
 	 */
 	if (dl_prio(prio)) {
-		struct task_struct *pi_task = rt_mutex_get_top_task(p);
 		if (!dl_prio(p->normal_prio) ||
 		    (pi_task && dl_entity_preempt(&pi_task->dl, &p->dl))) {
 			p->dl.dl_boosted = 1;
@@ -3775,6 +3813,11 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
 	balance_callback(rq);
 	preempt_enable();
 }
+#else
+static inline int rt_effective_prio(struct task_struct *p, int prio)
+{
+	return prio;
+}
 #endif

 void set_user_nice(struct task_struct *p, long nice)
@@ -4021,10 +4064,9 @@ static void __setscheduler(struct rq *rq, struct task_struct *p,
 	 * Keep a potential priority boosting if called from
 	 * sched_setscheduler().
 	 */
+	p->prio = normal_prio(p);
 	if (keep_boost)
-		p->prio = rt_mutex_get_effective_prio(p, normal_prio(p));
-	else
-		p->prio = normal_prio(p);
+		p->prio = rt_effective_prio(p, p->prio);

 	if (dl_prio(p->prio))
 		p->sched_class = &dl_sched_class;
@@ -4311,7 +4353,7 @@ static int __sched_setscheduler(struct task_struct *p,
 		 * the runqueue. This will be done when the task deboost
 		 * itself.
 		 */
-		new_effective_prio = rt_mutex_get_effective_prio(p, newprio);
+		new_effective_prio = rt_effective_prio(p, newprio);
 		if (new_effective_prio == oldprio)
 			queue_flags &= ~DEQUEUE_MOVE;
 	}

--- a/lib/refcount.c
+++ b/lib/refcount.c
@@ -37,11 +37,29 @@
 #include <linux/refcount.h>
 #include <linux/bug.h>

+/**
+ * refcount_add_not_zero - add a value to a refcount unless it is 0
+ * @i: the value to add to the refcount
+ * @r: the refcount
+ *
+ * Will saturate at UINT_MAX and WARN.
+ *
+ * Provides no memory ordering, it is assumed the caller has guaranteed the
+ * object memory to be stable (RCU, etc.). It does provide a control dependency
+ * and thereby orders future stores. See the comment on top.
+ *
+ * Use of this function is not recommended for the normal reference counting
+ * use case in which references are taken and released one at a time.  In these
+ * cases, refcount_inc(), or one of its variants, should instead be used to
+ * increment a reference count.
+ *
+ * Return: false if the passed refcount is 0, true otherwise
+ */
 bool refcount_add_not_zero(unsigned int i, refcount_t *r)
 {
-	unsigned int old, new, val = atomic_read(&r->refs);
+	unsigned int new, val = atomic_read(&r->refs);

-	for (;;) {
+	do {
 		if (!val)
 			return false;

@@ -51,12 +69,8 @@ bool refcount_add_not_zero(unsigned int i, refcount_t *r)
 		new = val + i;
 		if (new < val)
 			new = UINT_MAX;
-		old = atomic_cmpxchg_relaxed(&r->refs, val, new);
-		if (old == val)
-			break;

-		val = old;
-	}
+	} while (!atomic_try_cmpxchg_relaxed(&r->refs, &val, new));

 	WARN_ONCE(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n");

@@ -64,24 +78,45 @@ bool refcount_add_not_zero(unsigned int i, refcount_t *r)
 }
 EXPORT_SYMBOL_GPL(refcount_add_not_zero);

+/**
+ * refcount_add - add a value to a refcount
+ * @i: the value to add to the refcount
+ * @r: the refcount
+ *
+ * Similar to atomic_add(), but will saturate at UINT_MAX and WARN.
+ *
+ * Provides no memory ordering, it is assumed the caller has guaranteed the
+ * object memory to be stable (RCU, etc.). It does provide a control dependency
+ * and thereby orders future stores. See the comment on top.
+ *
+ * Use of this function is not recommended for the normal reference counting
+ * use case in which references are taken and released one at a time.  In these
+ * cases, refcount_inc(), or one of its variants, should instead be used to
+ * increment a reference count.
+ */
 void refcount_add(unsigned int i, refcount_t *r)
 {
 	WARN_ONCE(!refcount_add_not_zero(i, r), "refcount_t: addition on 0; use-after-free.\n");
 }
 EXPORT_SYMBOL_GPL(refcount_add);

-/*
- * Similar to atomic_inc_not_zero(), will saturate at UINT_MAX and WARN.
+/**
+ * refcount_inc_not_zero - increment a refcount unless it is 0
+ * @r: the refcount to increment
+ *
+ * Similar to atomic_inc_not_zero(), but will saturate at UINT_MAX and WARN.
 *
 * Provides no memory ordering, it is assumed the caller has guaranteed the
 * object memory to be stable (RCU, etc.). It does provide a control dependency
 * and thereby orders future stores. See the comment on top.
+ *
+ * Return: true if the increment was successful, false otherwise
 */
 bool refcount_inc_not_zero(refcount_t *r)
 {
-	unsigned int old, new, val = atomic_read(&r->refs);
+	unsigned int new, val = atomic_read(&r->refs);

-	for (;;) {
+	do {
 		new = val + 1;

 		if (!val)
@@ -90,12 +125,7 @@ bool refcount_inc_not_zero(refcount_t *r)
 		if (unlikely(!new))
 			return true;

-		old = atomic_cmpxchg_relaxed(&r->refs, val, new);
-		if (old == val)
-			break;
-
-		val = old;
-	}
+	} while (!atomic_try_cmpxchg_relaxed(&r->refs, &val, new));

 	WARN_ONCE(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n");

@@ -103,11 +133,17 @@ bool refcount_inc_not_zero(refcount_t *r)
 }
 EXPORT_SYMBOL_GPL(refcount_inc_not_zero);

-/*
- * Similar to atomic_inc(), will saturate at UINT_MAX and WARN.
+/**
+ * refcount_inc - increment a refcount
+ * @r: the refcount to increment
+ *
+ * Similar to atomic_inc(), but will saturate at UINT_MAX and WARN.
 *
 * Provides no memory ordering, it is assumed the caller already has a
- * reference on the object, will WARN when this is not so.
+ * reference on the object.
+ *
+ * Will WARN if the refcount is 0, as this represents a possible use-after-free
+ * condition.
 */
 void refcount_inc(refcount_t *r)
 {
@@ -115,11 +151,31 @@ void refcount_inc(refcount_t *r)
 }
 EXPORT_SYMBOL_GPL(refcount_inc);

+/**
+ * refcount_sub_and_test - subtract from a refcount and test if it is 0
+ * @i: amount to subtract from the refcount
+ * @r: the refcount
+ *
+ * Similar to atomic_dec_and_test(), but it will WARN, return false and
+ * ultimately leak on underflow and will fail to decrement when saturated
+ * at UINT_MAX.
+ *
+ * Provides release memory ordering, such that prior loads and stores are done
+ * before, and provides a control dependency such that free() must come after.
+ * See the comment on top.
+ *
+ * Use of this function is not recommended for the normal reference counting
+ * use case in which references are taken and released one at a time.  In these
+ * cases, refcount_dec(), or one of its variants, should instead be used to
+ * decrement a reference count.
+ *
+ * Return: true if the resulting refcount is 0, false otherwise
+ */
 bool refcount_sub_and_test(unsigned int i, refcount_t *r)
 {
-	unsigned int old, new, val = atomic_read(&r->refs);
+	unsigned int new, val = atomic_read(&r->refs);

-	for (;;) {
+	do {
 		if (unlikely(val == UINT_MAX))
 			return false;

@@ -129,24 +185,24 @@ bool refcount_sub_and_test(unsigned int i, refcount_t *r)
 			return false;
 		}

-		old = atomic_cmpxchg_release(&r->refs, val, new);
-		if (old == val)
-			break;
-
-		val = old;
-	}
+	} while (!atomic_try_cmpxchg_release(&r->refs, &val, new));

 	return !new;
 }
 EXPORT_SYMBOL_GPL(refcount_sub_and_test);

-/*
+/**
+ * refcount_dec_and_test - decrement a refcount and test if it is 0
+ * @r: the refcount
+ *
 * Similar to atomic_dec_and_test(), it will WARN on underflow and fail to
 * decrement when saturated at UINT_MAX.
 *
 * Provides release memory ordering, such that prior loads and stores are done
 * before, and provides a control dependency such that free() must come after.
 * See the comment on top.
+ *
+ * Return: true if the resulting refcount is 0, false otherwise
 */
 bool refcount_dec_and_test(refcount_t *r)
 {
@@ -154,21 +210,26 @@ bool refcount_dec_and_test(refcount_t *r)
 }
 EXPORT_SYMBOL_GPL(refcount_dec_and_test);

-/*
+/**
+ * refcount_dec - decrement a refcount
+ * @r: the refcount
+ *
 * Similar to atomic_dec(), it will WARN on underflow and fail to decrement
 * when saturated at UINT_MAX.
 *
 * Provides release memory ordering, such that prior loads and stores are done
 * before.
 */
-
 void refcount_dec(refcount_t *r)
 {
 	WARN_ONCE(refcount_dec_and_test(r), "refcount_t: decrement hit 0; leaking memory.\n");
 }
 EXPORT_SYMBOL_GPL(refcount_dec);

-/*
+/**
+ * refcount_dec_if_one - decrement a refcount if it is 1
+ * @r: the refcount
+ *
 * No atomic_t counterpart, it attempts a 1 -> 0 transition and returns the
 * success thereof.
 *
@@ -178,24 +239,33 @@ EXPORT_SYMBOL_GPL(refcount_dec);
 * It can be used like a try-delete operator; this explicit case is provided
 * and not cmpxchg in generic, because that would allow implementing unsafe
 * operations.
+ *
+ * Return: true if the resulting refcount is 0, false otherwise
 */
 bool refcount_dec_if_one(refcount_t *r)
 {
-	return atomic_cmpxchg_release(&r->refs, 1, 0) == 1;
+	int val = 1;
+
+	return atomic_try_cmpxchg_release(&r->refs, &val, 0);
 }
 EXPORT_SYMBOL_GPL(refcount_dec_if_one);

-/*
+/**
+ * refcount_dec_not_one - decrement a refcount if it is not 1
+ * @r: the refcount
+ *
 * No atomic_t counterpart, it decrements unless the value is 1, in which case
 * it will return false.
 *
 * Was often done like: atomic_add_unless(&var, -1, 1)
+ *
+ * Return: true if the decrement operation was successful, false otherwise
 */
 bool refcount_dec_not_one(refcount_t *r)
 {
-	unsigned int old, new, val = atomic_read(&r->refs);
+	unsigned int new, val = atomic_read(&r->refs);

-	for (;;) {
+	do {
 		if (unlikely(val == UINT_MAX))
 			return true;

@@ -208,24 +278,27 @@ bool refcount_dec_not_one(refcount_t *r)
 			return true;
 		}

-		old = atomic_cmpxchg_release(&r->refs, val, new);
-		if (old == val)
-			break;
-
-		val = old;
-	}
+	} while (!atomic_try_cmpxchg_release(&r->refs, &val, new));

 	return true;
 }
 EXPORT_SYMBOL_GPL(refcount_dec_not_one);

-/*
+/**
+ * refcount_dec_and_mutex_lock - return holding mutex if able to decrement
+ *                               refcount to 0
+ * @r: the refcount
+ * @lock: the mutex to be locked
+ *
 * Similar to atomic_dec_and_mutex_lock(), it will WARN on underflow and fail
 * to decrement when saturated at UINT_MAX.
 *
 * Provides release memory ordering, such that prior loads and stores are done
 * before, and provides a control dependency such that free() must come after.
 * See the comment on top.
+ *
+ * Return: true and hold mutex if able to decrement refcount to 0, false
+ *         otherwise
 */
 bool refcount_dec_and_mutex_lock(refcount_t *r, struct mutex *lock)
 {
@@ -242,13 +315,21 @@ bool refcount_dec_and_mutex_lock(refcount_t *r, struct mutex *lock)
 }
 EXPORT_SYMBOL_GPL(refcount_dec_and_mutex_lock);

-/*
+/**
+ * refcount_dec_and_lock - return holding spinlock if able to decrement
+ *                         refcount to 0
+ * @r: the refcount
+ * @lock: the spinlock to be locked
+ *
 * Similar to atomic_dec_and_lock(), it will WARN on underflow and fail to
 * decrement when saturated at UINT_MAX.
 *
 * Provides release memory ordering, such that prior loads and stores are done
 * before, and provides a control dependency such that free() must come after.
 * See the comment on top.
+ *
+ * Return: true and hold spinlock if able to decrement refcount to 0, false
+ *         otherwise
 */
 bool refcount_dec_and_lock(refcount_t *r, spinlock_t *lock)
 {

--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1284,18 +1284,7 @@ void free_percpu(void __percpu *ptr)
 }
 EXPORT_SYMBOL_GPL(free_percpu);

-/**
- * is_kernel_percpu_address - test whether address is from static percpu area
- * @addr: address to test
- *
- * Test whether @addr belongs to in-kernel static percpu area.  Module
- * static percpu areas are not considered.  For those, use
- * is_module_percpu_address().
- *
- * RETURNS:
- * %true if @addr is from in-kernel static percpu area, %false otherwise.
- */
-bool is_kernel_percpu_address(unsigned long addr)
+bool __is_kernel_percpu_address(unsigned long addr, unsigned long *can_addr)
 {
 #ifdef CONFIG_SMP
 	const size_t static_size = __per_cpu_end - __per_cpu_start;
@@ -1304,15 +1293,38 @@ bool is_kernel_percpu_address(unsigned long addr)

 	for_each_possible_cpu(cpu) {
 		void *start = per_cpu_ptr(base, cpu);
+		void *va = (void *)addr;

-		if ((void *)addr >= start && (void *)addr < start + static_size)
+		if (va >= start && va < start + static_size) {
+			if (can_addr) {
+				*can_addr = (unsigned long) (va - start);
+				*can_addr += (unsigned long)
+					per_cpu_ptr(base, get_boot_cpu_id());
+			}
 			return true;
-        }
+		}
+	}
 #endif
 	/* on UP, can't distinguish from other static vars, always false */
 	return false;
 }

+/**
+ * is_kernel_percpu_address - test whether address is from static percpu area
+ * @addr: address to test
+ *
+ * Test whether @addr belongs to in-kernel static percpu area.  Module
+ * static percpu areas are not considered.  For those, use
+ * is_module_percpu_address().
+ *
+ * RETURNS:
+ * %true if @addr is from in-kernel static percpu area, %false otherwise.
+ */
+bool is_kernel_percpu_address(unsigned long addr)
+{
+	return __is_kernel_percpu_address(addr, NULL);
+}
+
 /**
 * per_cpu_ptr_to_phys - convert translated percpu address to physical address
 * @addr: the address to be converted to physical address