Commit f2a84170 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-3.17' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu

Pull percpu updates from Tejun Heo:

 - Major reorganization of percpu header files which I think makes
   things a lot more readable and logical than before.

 - percpu-refcount is updated so that it requires explicit destruction
   and can be reinitialized if necessary.  This was pulled into the
   block tree to replace the custom percpu refcnting implemented in
   blk-mq.

 - In the process, percpu and percpu-refcount got cleaned up a bit

* 'for-3.17' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu: (21 commits)
  percpu-refcount: implement percpu_ref_reinit() and percpu_ref_is_zero()
  percpu-refcount: require percpu_ref to be exited explicitly
  percpu-refcount: use unsigned long for pcpu_count pointer
  percpu-refcount: add helpers for ->percpu_count accesses
  percpu-refcount: one bit is enough for REF_STATUS
  percpu-refcount, aio: use percpu_ref_cancel_init() in ioctx_alloc()
  workqueue: stronger test in process_one_work()
  workqueue: clear POOL_DISASSOCIATED in rebind_workers()
  percpu: Use ALIGN macro instead of hand coding alignment calculation
  percpu: invoke __verify_pcpu_ptr() from the generic part of accessors and operations
  percpu: preffity percpu header files
  percpu: use raw_cpu_*() to define __this_cpu_*()
  percpu: reorder macros in percpu header files
  percpu: move {raw|this}_cpu_*() definitions to include/linux/percpu-defs.h
  percpu: move generic {raw|this}_cpu_*_N() definitions to include/asm-generic/percpu.h
  percpu: only allow sized arch overrides for {raw|this}_cpu_*() ops
  percpu: reorganize include/linux/percpu-defs.h
  percpu: move accessors from include/linux/percpu.h to percpu-defs.h
  percpu: include/asm-generic/percpu.h should contain only arch-overridable parts
  percpu: introduce arch_raw_cpu_ptr()
  ...
parents c4c3f5fb 2d722782
...@@ -52,10 +52,9 @@ ...@@ -52,10 +52,9 @@
* Compared to the generic __my_cpu_offset version, the following * Compared to the generic __my_cpu_offset version, the following
* saves one instruction and avoids clobbering a temp register. * saves one instruction and avoids clobbering a temp register.
*/ */
#define raw_cpu_ptr(ptr) \ #define arch_raw_cpu_ptr(ptr) \
({ \ ({ \
unsigned long tcp_ptr__; \ unsigned long tcp_ptr__; \
__verify_pcpu_ptr(ptr); \
asm volatile("add " __percpu_arg(1) ", %0" \ asm volatile("add " __percpu_arg(1) ", %0" \
: "=r" (tcp_ptr__) \ : "=r" (tcp_ptr__) \
: "m" (this_cpu_off), "0" (ptr)); \ : "m" (this_cpu_off), "0" (ptr)); \
......
...@@ -825,7 +825,7 @@ int core_tpg_add_lun( ...@@ -825,7 +825,7 @@ int core_tpg_add_lun(
ret = core_dev_export(dev, tpg, lun); ret = core_dev_export(dev, tpg, lun);
if (ret < 0) { if (ret < 0) {
percpu_ref_cancel_init(&lun->lun_ref); percpu_ref_exit(&lun->lun_ref);
return ret; return ret;
} }
...@@ -880,5 +880,7 @@ int core_tpg_post_dellun( ...@@ -880,5 +880,7 @@ int core_tpg_post_dellun(
lun->lun_status = TRANSPORT_LUN_STATUS_FREE; lun->lun_status = TRANSPORT_LUN_STATUS_FREE;
spin_unlock(&tpg->tpg_lun_lock); spin_unlock(&tpg->tpg_lun_lock);
percpu_ref_exit(&lun->lun_ref);
return 0; return 0;
} }
...@@ -506,6 +506,8 @@ static void free_ioctx(struct work_struct *work) ...@@ -506,6 +506,8 @@ static void free_ioctx(struct work_struct *work)
aio_free_ring(ctx); aio_free_ring(ctx);
free_percpu(ctx->cpu); free_percpu(ctx->cpu);
percpu_ref_exit(&ctx->reqs);
percpu_ref_exit(&ctx->users);
kmem_cache_free(kioctx_cachep, ctx); kmem_cache_free(kioctx_cachep, ctx);
} }
...@@ -715,8 +717,8 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) ...@@ -715,8 +717,8 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
err: err:
mutex_unlock(&ctx->ring_lock); mutex_unlock(&ctx->ring_lock);
free_percpu(ctx->cpu); free_percpu(ctx->cpu);
free_percpu(ctx->reqs.pcpu_count); percpu_ref_exit(&ctx->reqs);
free_percpu(ctx->users.pcpu_count); percpu_ref_exit(&ctx->users);
kmem_cache_free(kioctx_cachep, ctx); kmem_cache_free(kioctx_cachep, ctx);
pr_debug("error allocating ioctx %d\n", err); pr_debug("error allocating ioctx %d\n", err);
return ERR_PTR(err); return ERR_PTR(err);
......
This diff is collapsed.
This diff is collapsed.
...@@ -57,11 +57,9 @@ struct percpu_ref { ...@@ -57,11 +57,9 @@ struct percpu_ref {
atomic_t count; atomic_t count;
/* /*
* The low bit of the pointer indicates whether the ref is in percpu * The low bit of the pointer indicates whether the ref is in percpu
* mode; if set, then get/put will manipulate the atomic_t (this is a * mode; if set, then get/put will manipulate the atomic_t.
* hack because we need to keep the pointer around for
* percpu_ref_kill_rcu())
*/ */
unsigned __percpu *pcpu_count; unsigned long pcpu_count_ptr;
percpu_ref_func_t *release; percpu_ref_func_t *release;
percpu_ref_func_t *confirm_kill; percpu_ref_func_t *confirm_kill;
struct rcu_head rcu; struct rcu_head rcu;
...@@ -69,7 +67,8 @@ struct percpu_ref { ...@@ -69,7 +67,8 @@ struct percpu_ref {
int __must_check percpu_ref_init(struct percpu_ref *ref, int __must_check percpu_ref_init(struct percpu_ref *ref,
percpu_ref_func_t *release); percpu_ref_func_t *release);
void percpu_ref_cancel_init(struct percpu_ref *ref); void percpu_ref_reinit(struct percpu_ref *ref);
void percpu_ref_exit(struct percpu_ref *ref);
void percpu_ref_kill_and_confirm(struct percpu_ref *ref, void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
percpu_ref_func_t *confirm_kill); percpu_ref_func_t *confirm_kill);
...@@ -88,12 +87,28 @@ static inline void percpu_ref_kill(struct percpu_ref *ref) ...@@ -88,12 +87,28 @@ static inline void percpu_ref_kill(struct percpu_ref *ref)
return percpu_ref_kill_and_confirm(ref, NULL); return percpu_ref_kill_and_confirm(ref, NULL);
} }
#define PCPU_STATUS_BITS 2
#define PCPU_STATUS_MASK ((1 << PCPU_STATUS_BITS) - 1)
#define PCPU_REF_PTR 0
#define PCPU_REF_DEAD 1 #define PCPU_REF_DEAD 1
#define REF_STATUS(count) (((unsigned long) count) & PCPU_STATUS_MASK) /*
* Internal helper. Don't use outside percpu-refcount proper. The
* function doesn't return the pointer and let the caller test it for NULL
* because doing so forces the compiler to generate two conditional
* branches as it can't assume that @ref->pcpu_count is not NULL.
*/
static inline bool __pcpu_ref_alive(struct percpu_ref *ref,
unsigned __percpu **pcpu_countp)
{
unsigned long pcpu_ptr = ACCESS_ONCE(ref->pcpu_count_ptr);
/* paired with smp_store_release() in percpu_ref_reinit() */
smp_read_barrier_depends();
if (unlikely(pcpu_ptr & PCPU_REF_DEAD))
return false;
*pcpu_countp = (unsigned __percpu *)pcpu_ptr;
return true;
}
/** /**
* percpu_ref_get - increment a percpu refcount * percpu_ref_get - increment a percpu refcount
...@@ -107,9 +122,7 @@ static inline void percpu_ref_get(struct percpu_ref *ref) ...@@ -107,9 +122,7 @@ static inline void percpu_ref_get(struct percpu_ref *ref)
rcu_read_lock_sched(); rcu_read_lock_sched();
pcpu_count = ACCESS_ONCE(ref->pcpu_count); if (__pcpu_ref_alive(ref, &pcpu_count))
if (likely(REF_STATUS(pcpu_count) == PCPU_REF_PTR))
this_cpu_inc(*pcpu_count); this_cpu_inc(*pcpu_count);
else else
atomic_inc(&ref->count); atomic_inc(&ref->count);
...@@ -133,9 +146,7 @@ static inline bool percpu_ref_tryget(struct percpu_ref *ref) ...@@ -133,9 +146,7 @@ static inline bool percpu_ref_tryget(struct percpu_ref *ref)
rcu_read_lock_sched(); rcu_read_lock_sched();
pcpu_count = ACCESS_ONCE(ref->pcpu_count); if (__pcpu_ref_alive(ref, &pcpu_count)) {
if (likely(REF_STATUS(pcpu_count) == PCPU_REF_PTR)) {
this_cpu_inc(*pcpu_count); this_cpu_inc(*pcpu_count);
ret = true; ret = true;
} else { } else {
...@@ -168,9 +179,7 @@ static inline bool percpu_ref_tryget_live(struct percpu_ref *ref) ...@@ -168,9 +179,7 @@ static inline bool percpu_ref_tryget_live(struct percpu_ref *ref)
rcu_read_lock_sched(); rcu_read_lock_sched();
pcpu_count = ACCESS_ONCE(ref->pcpu_count); if (__pcpu_ref_alive(ref, &pcpu_count)) {
if (likely(REF_STATUS(pcpu_count) == PCPU_REF_PTR)) {
this_cpu_inc(*pcpu_count); this_cpu_inc(*pcpu_count);
ret = true; ret = true;
} }
...@@ -193,9 +202,7 @@ static inline void percpu_ref_put(struct percpu_ref *ref) ...@@ -193,9 +202,7 @@ static inline void percpu_ref_put(struct percpu_ref *ref)
rcu_read_lock_sched(); rcu_read_lock_sched();
pcpu_count = ACCESS_ONCE(ref->pcpu_count); if (__pcpu_ref_alive(ref, &pcpu_count))
if (likely(REF_STATUS(pcpu_count) == PCPU_REF_PTR))
this_cpu_dec(*pcpu_count); this_cpu_dec(*pcpu_count);
else if (unlikely(atomic_dec_and_test(&ref->count))) else if (unlikely(atomic_dec_and_test(&ref->count)))
ref->release(ref); ref->release(ref);
...@@ -203,4 +210,19 @@ static inline void percpu_ref_put(struct percpu_ref *ref) ...@@ -203,4 +210,19 @@ static inline void percpu_ref_put(struct percpu_ref *ref)
rcu_read_unlock_sched(); rcu_read_unlock_sched();
} }
/**
* percpu_ref_is_zero - test whether a percpu refcount reached zero
* @ref: percpu_ref to test
*
* Returns %true if @ref reached zero.
*/
static inline bool percpu_ref_is_zero(struct percpu_ref *ref)
{
unsigned __percpu *pcpu_count;
if (__pcpu_ref_alive(ref, &pcpu_count))
return false;
return !atomic_read(&ref->count);
}
#endif #endif
This diff is collapsed.
...@@ -1638,7 +1638,7 @@ static int cgroup_setup_root(struct cgroup_root *root, unsigned int ss_mask) ...@@ -1638,7 +1638,7 @@ static int cgroup_setup_root(struct cgroup_root *root, unsigned int ss_mask)
exit_root_id: exit_root_id:
cgroup_exit_root_id(root); cgroup_exit_root_id(root);
cancel_ref: cancel_ref:
percpu_ref_cancel_init(&root_cgrp->self.refcnt); percpu_ref_exit(&root_cgrp->self.refcnt);
out: out:
free_cgrp_cset_links(&tmp_links); free_cgrp_cset_links(&tmp_links);
return ret; return ret;
...@@ -4175,6 +4175,8 @@ static void css_free_work_fn(struct work_struct *work) ...@@ -4175,6 +4175,8 @@ static void css_free_work_fn(struct work_struct *work)
container_of(work, struct cgroup_subsys_state, destroy_work); container_of(work, struct cgroup_subsys_state, destroy_work);
struct cgroup *cgrp = css->cgroup; struct cgroup *cgrp = css->cgroup;
percpu_ref_exit(&css->refcnt);
if (css->ss) { if (css->ss) {
/* css free path */ /* css free path */
if (css->parent) if (css->parent)
...@@ -4372,7 +4374,7 @@ static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss) ...@@ -4372,7 +4374,7 @@ static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss)
err_free_id: err_free_id:
cgroup_idr_remove(&ss->css_idr, css->id); cgroup_idr_remove(&ss->css_idr, css->id);
err_free_percpu_ref: err_free_percpu_ref:
percpu_ref_cancel_init(&css->refcnt); percpu_ref_exit(&css->refcnt);
err_free_css: err_free_css:
call_rcu(&css->rcu_head, css_free_rcu_fn); call_rcu(&css->rcu_head, css_free_rcu_fn);
return err; return err;
...@@ -4483,7 +4485,7 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, ...@@ -4483,7 +4485,7 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
out_free_id: out_free_id:
cgroup_idr_remove(&root->cgroup_idr, cgrp->id); cgroup_idr_remove(&root->cgroup_idr, cgrp->id);
out_cancel_ref: out_cancel_ref:
percpu_ref_cancel_init(&cgrp->self.refcnt); percpu_ref_exit(&cgrp->self.refcnt);
out_free_cgrp: out_free_cgrp:
kfree(cgrp); kfree(cgrp);
out_unlock: out_unlock:
......
...@@ -1962,6 +1962,7 @@ __acquires(&pool->lock) ...@@ -1962,6 +1962,7 @@ __acquires(&pool->lock)
lockdep_copy_map(&lockdep_map, &work->lockdep_map); lockdep_copy_map(&lockdep_map, &work->lockdep_map);
#endif #endif
/* ensure we're on the correct CPU */
WARN_ON_ONCE(!(pool->flags & POOL_DISASSOCIATED) && WARN_ON_ONCE(!(pool->flags & POOL_DISASSOCIATED) &&
raw_smp_processor_id() != pool->cpu); raw_smp_processor_id() != pool->cpu);
...@@ -4574,11 +4575,10 @@ static int workqueue_cpu_up_callback(struct notifier_block *nfb, ...@@ -4574,11 +4575,10 @@ static int workqueue_cpu_up_callback(struct notifier_block *nfb,
for_each_pool(pool, pi) { for_each_pool(pool, pi) {
mutex_lock(&pool->attach_mutex); mutex_lock(&pool->attach_mutex);
if (pool->cpu == cpu) { if (pool->cpu == cpu)
rebind_workers(pool); rebind_workers(pool);
} else if (pool->cpu < 0) { else if (pool->cpu < 0)
restore_unbound_workers_cpumask(pool, cpu); restore_unbound_workers_cpumask(pool, cpu);
}
mutex_unlock(&pool->attach_mutex); mutex_unlock(&pool->attach_mutex);
} }
......
...@@ -31,6 +31,11 @@ ...@@ -31,6 +31,11 @@
#define PCPU_COUNT_BIAS (1U << 31) #define PCPU_COUNT_BIAS (1U << 31)
static unsigned __percpu *pcpu_count_ptr(struct percpu_ref *ref)
{
return (unsigned __percpu *)(ref->pcpu_count_ptr & ~PCPU_REF_DEAD);
}
/** /**
* percpu_ref_init - initialize a percpu refcount * percpu_ref_init - initialize a percpu refcount
* @ref: percpu_ref to initialize * @ref: percpu_ref to initialize
...@@ -46,8 +51,8 @@ int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release) ...@@ -46,8 +51,8 @@ int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release)
{ {
atomic_set(&ref->count, 1 + PCPU_COUNT_BIAS); atomic_set(&ref->count, 1 + PCPU_COUNT_BIAS);
ref->pcpu_count = alloc_percpu(unsigned); ref->pcpu_count_ptr = (unsigned long)alloc_percpu(unsigned);
if (!ref->pcpu_count) if (!ref->pcpu_count_ptr)
return -ENOMEM; return -ENOMEM;
ref->release = release; ref->release = release;
...@@ -56,53 +61,71 @@ int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release) ...@@ -56,53 +61,71 @@ int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release)
EXPORT_SYMBOL_GPL(percpu_ref_init); EXPORT_SYMBOL_GPL(percpu_ref_init);
/** /**
* percpu_ref_cancel_init - cancel percpu_ref_init() * percpu_ref_reinit - re-initialize a percpu refcount
* @ref: percpu_ref to cancel init for * @ref: perpcu_ref to re-initialize
* *
* Once a percpu_ref is initialized, its destruction is initiated by * Re-initialize @ref so that it's in the same state as when it finished
* percpu_ref_kill() and completes asynchronously, which can be painful to * percpu_ref_init(). @ref must have been initialized successfully, killed
* do when destroying a half-constructed object in init failure path. * and reached 0 but not exited.
* *
* This function destroys @ref without invoking @ref->release and the * Note that percpu_ref_tryget[_live]() are safe to perform on @ref while
* memory area containing it can be freed immediately on return. To * this function is in progress.
* prevent accidental misuse, it's required that @ref has finished
* percpu_ref_init(), whether successful or not, but never used.
*
* The weird name and usage restriction are to prevent people from using
* this function by mistake for normal shutdown instead of
* percpu_ref_kill().
*/ */
void percpu_ref_cancel_init(struct percpu_ref *ref) void percpu_ref_reinit(struct percpu_ref *ref)
{ {
unsigned __percpu *pcpu_count = ref->pcpu_count; unsigned __percpu *pcpu_count = pcpu_count_ptr(ref);
int cpu; int cpu;
WARN_ON_ONCE(atomic_read(&ref->count) != 1 + PCPU_COUNT_BIAS); BUG_ON(!pcpu_count);
WARN_ON(!percpu_ref_is_zero(ref));
atomic_set(&ref->count, 1 + PCPU_COUNT_BIAS);
/*
* Restore per-cpu operation. smp_store_release() is paired with
* smp_read_barrier_depends() in __pcpu_ref_alive() and guarantees
* that the zeroing is visible to all percpu accesses which can see
* the following PCPU_REF_DEAD clearing.
*/
for_each_possible_cpu(cpu)
*per_cpu_ptr(pcpu_count, cpu) = 0;
smp_store_release(&ref->pcpu_count_ptr,
ref->pcpu_count_ptr & ~PCPU_REF_DEAD);
}
EXPORT_SYMBOL_GPL(percpu_ref_reinit);
/**
* percpu_ref_exit - undo percpu_ref_init()
* @ref: percpu_ref to exit
*
* This function exits @ref. The caller is responsible for ensuring that
* @ref is no longer in active use. The usual places to invoke this
* function from are the @ref->release() callback or in init failure path
* where percpu_ref_init() succeeded but other parts of the initialization
* of the embedding object failed.
*/
void percpu_ref_exit(struct percpu_ref *ref)
{
unsigned __percpu *pcpu_count = pcpu_count_ptr(ref);
if (pcpu_count) { if (pcpu_count) {
for_each_possible_cpu(cpu) free_percpu(pcpu_count);
WARN_ON_ONCE(*per_cpu_ptr(pcpu_count, cpu)); ref->pcpu_count_ptr = PCPU_REF_DEAD;
free_percpu(ref->pcpu_count);
} }
} }
EXPORT_SYMBOL_GPL(percpu_ref_cancel_init); EXPORT_SYMBOL_GPL(percpu_ref_exit);
static void percpu_ref_kill_rcu(struct rcu_head *rcu) static void percpu_ref_kill_rcu(struct rcu_head *rcu)
{ {
struct percpu_ref *ref = container_of(rcu, struct percpu_ref, rcu); struct percpu_ref *ref = container_of(rcu, struct percpu_ref, rcu);
unsigned __percpu *pcpu_count = ref->pcpu_count; unsigned __percpu *pcpu_count = pcpu_count_ptr(ref);
unsigned count = 0; unsigned count = 0;
int cpu; int cpu;
/* Mask out PCPU_REF_DEAD */
pcpu_count = (unsigned __percpu *)
(((unsigned long) pcpu_count) & ~PCPU_STATUS_MASK);
for_each_possible_cpu(cpu) for_each_possible_cpu(cpu)
count += *per_cpu_ptr(pcpu_count, cpu); count += *per_cpu_ptr(pcpu_count, cpu);
free_percpu(pcpu_count);
pr_debug("global %i pcpu %i", atomic_read(&ref->count), (int) count); pr_debug("global %i pcpu %i", atomic_read(&ref->count), (int) count);
/* /*
...@@ -152,11 +175,10 @@ static void percpu_ref_kill_rcu(struct rcu_head *rcu) ...@@ -152,11 +175,10 @@ static void percpu_ref_kill_rcu(struct rcu_head *rcu)
void percpu_ref_kill_and_confirm(struct percpu_ref *ref, void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
percpu_ref_func_t *confirm_kill) percpu_ref_func_t *confirm_kill)
{ {
WARN_ONCE(REF_STATUS(ref->pcpu_count) == PCPU_REF_DEAD, WARN_ONCE(ref->pcpu_count_ptr & PCPU_REF_DEAD,
"percpu_ref_kill() called more than once!\n"); "percpu_ref_kill() called more than once!\n");
ref->pcpu_count = (unsigned __percpu *) ref->pcpu_count_ptr |= PCPU_REF_DEAD;
(((unsigned long) ref->pcpu_count)|PCPU_REF_DEAD);
ref->confirm_kill = confirm_kill; ref->confirm_kill = confirm_kill;
call_rcu_sched(&ref->rcu, percpu_ref_kill_rcu); call_rcu_sched(&ref->rcu, percpu_ref_kill_rcu);
......
...@@ -720,8 +720,7 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved) ...@@ -720,8 +720,7 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved)
if (unlikely(align < 2)) if (unlikely(align < 2))
align = 2; align = 2;
if (unlikely(size & 1)) size = ALIGN(size, 2);
size++;
if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) { if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) {
WARN(true, "illegal size (%zu) or align (%zu) for " WARN(true, "illegal size (%zu) or align (%zu) for "
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment