Commit 1eaad053 authored by David Mosberger's avatar David Mosberger

ia64: Change per-CPU implementation so that __get_cpu_var() returns the

	canonical address (l-value).  To get the virtually mapped
	alias (which is more efficient), use __ia64_per_cpu_var().  The
	latter is safe only if the address of the l-value is never passed
	to another CPU (i.e., not stored in any global place).
	For extremely efficient, portable per-CPU variables, there is
	now a new API local.h which was introduced by Rusty Russell.
	To use this, declare a variable of type local_t as a per-CPU
	variable and then use {__,}cpu_local_FOO() to manipulate such
	variables.  This patch also updated the atomic interface with
	a 64-bit counter.
parent ec26ea39
...@@ -64,9 +64,10 @@ EXPORT_SYMBOL(ia64_pfn_valid); ...@@ -64,9 +64,10 @@ EXPORT_SYMBOL(ia64_pfn_valid);
#endif #endif
#include <asm/processor.h> #include <asm/processor.h>
EXPORT_SYMBOL(cpu_info__per_cpu); EXPORT_SYMBOL(per_cpu__cpu_info);
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
EXPORT_SYMBOL(__per_cpu_offset); EXPORT_SYMBOL(__per_cpu_offset);
EXPORT_SYMBOL(per_cpu__local_per_cpu_offset);
#endif #endif
EXPORT_SYMBOL(kernel_thread); EXPORT_SYMBOL(kernel_thread);
......
...@@ -566,7 +566,7 @@ static struct vm_operations_struct pfm_vm_ops={ ...@@ -566,7 +566,7 @@ static struct vm_operations_struct pfm_vm_ops={
#define pfm_wait_task_inactive(t) wait_task_inactive(t) #define pfm_wait_task_inactive(t) wait_task_inactive(t)
#define pfm_get_cpu_var(v) __get_cpu_var(v) #define pfm_get_cpu_var(v) __ia64_per_cpu_var(v)
#define pfm_get_cpu_data(a,b) per_cpu(a, b) #define pfm_get_cpu_data(a,b) per_cpu(a, b)
typedef irqreturn_t pfm_irq_handler_t; typedef irqreturn_t pfm_irq_handler_t;
#define PFM_IRQ_HANDLER_RET(v) do { \ #define PFM_IRQ_HANDLER_RET(v) do { \
......
...@@ -56,6 +56,7 @@ unsigned long __per_cpu_offset[NR_CPUS]; ...@@ -56,6 +56,7 @@ unsigned long __per_cpu_offset[NR_CPUS];
#endif #endif
DEFINE_PER_CPU(struct cpuinfo_ia64, cpu_info); DEFINE_PER_CPU(struct cpuinfo_ia64, cpu_info);
DEFINE_PER_CPU(unsigned long, local_per_cpu_offset);
DEFINE_PER_CPU(unsigned long, ia64_phys_stacked_size_p8); DEFINE_PER_CPU(unsigned long, ia64_phys_stacked_size_p8);
unsigned long ia64_cycles_per_usec; unsigned long ia64_cycles_per_usec;
struct ia64_boot_param *ia64_boot_param; struct ia64_boot_param *ia64_boot_param;
...@@ -709,6 +710,8 @@ cpu_init (void) ...@@ -709,6 +710,8 @@ cpu_init (void)
memcpy(cpu_data, __phys_per_cpu_start, __per_cpu_end - __per_cpu_start); memcpy(cpu_data, __phys_per_cpu_start, __per_cpu_end - __per_cpu_start);
__per_cpu_offset[cpu] = (char *) cpu_data - __per_cpu_start; __per_cpu_offset[cpu] = (char *) cpu_data - __per_cpu_start;
cpu_data += PERCPU_PAGE_SIZE; cpu_data += PERCPU_PAGE_SIZE;
per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu];
} }
} }
cpu_data = __per_cpu_start + __per_cpu_offset[smp_processor_id()]; cpu_data = __per_cpu_start + __per_cpu_offset[smp_processor_id()];
...@@ -716,19 +719,18 @@ cpu_init (void) ...@@ -716,19 +719,18 @@ cpu_init (void)
cpu_data = __phys_per_cpu_start; cpu_data = __phys_per_cpu_start;
#endif /* !CONFIG_SMP */ #endif /* !CONFIG_SMP */
cpu_info = cpu_data + ((char *) &__get_cpu_var(cpu_info) - __per_cpu_start);
#ifdef CONFIG_NUMA
cpu_info->node_data = get_node_data_ptr();
#endif
get_max_cacheline_size(); get_max_cacheline_size();
/* /*
* We can't pass "local_cpu_data" to identify_cpu() because we haven't called * We can't pass "local_cpu_data" to identify_cpu() because we haven't called
* ia64_mmu_init() yet. And we can't call ia64_mmu_init() first because it * ia64_mmu_init() yet. And we can't call ia64_mmu_init() first because it
* depends on the data returned by identify_cpu(). We break the dependency by * depends on the data returned by identify_cpu(). We break the dependency by
* accessing cpu_data() the old way, through identity mapped space. * accessing cpu_data() through the canonical per-CPU address.
*/ */
cpu_info = cpu_data + ((char *) &__ia64_per_cpu_var(cpu_info) - __per_cpu_start);
#ifdef CONFIG_NUMA
cpu_info->node_data = get_node_data_ptr();
#endif
identify_cpu(cpu_info); identify_cpu(cpu_info);
#ifdef CONFIG_MCKINLEY #ifdef CONFIG_MCKINLEY
......
...@@ -72,7 +72,7 @@ static volatile struct call_data_struct *call_data; ...@@ -72,7 +72,7 @@ static volatile struct call_data_struct *call_data;
#define IPI_CPU_STOP 1 #define IPI_CPU_STOP 1
/* This needs to be cacheline aligned because it is written to by *other* CPUs. */ /* This needs to be cacheline aligned because it is written to by *other* CPUs. */
static DEFINE_PER_CPU(__u64, ipi_operation) ____cacheline_aligned; static DEFINE_PER_CPU(u64, ipi_operation) ____cacheline_aligned;
static void static void
stop_this_cpu (void) stop_this_cpu (void)
...@@ -91,7 +91,7 @@ irqreturn_t ...@@ -91,7 +91,7 @@ irqreturn_t
handle_IPI (int irq, void *dev_id, struct pt_regs *regs) handle_IPI (int irq, void *dev_id, struct pt_regs *regs)
{ {
int this_cpu = get_cpu(); int this_cpu = get_cpu();
unsigned long *pending_ipis = &__get_cpu_var(ipi_operation); unsigned long *pending_ipis = &__ia64_per_cpu_var(ipi_operation);
unsigned long ops; unsigned long ops;
/* Count this now; we may make a call that never returns. */ /* Count this now; we may make a call that never returns. */
......
#ifndef _ASM_IA64_LOCAL_H
#define _ASM_IA64_LOCAL_H
/*
* Copyright (C) 2003 Hewlett-Packard Co
* David Mosberger-Tang <davidm@hpl.hp.com>
*/
#include <linux/percpu.h>
typedef struct {
atomic64_t val;
} local_t;
#define LOCAL_INIT(i) ((local_t) { { (i) } })
#define local_read(l) atomic64_read(&(l)->val)
#define local_set(l, i) atomic64_set(&(l)->val, i)
#define local_inc(l) atomic64_inc(&(l)->val)
#define local_dec(l) atomic64_dec(&(l)->val)
#define local_add(l) atomic64_add(&(l)->val)
#define local_sub(l) atomic64_sub(&(l)->val)
/* Non-atomic variants, i.e., preemption disabled and won't be touched in interrupt, etc. */
#define __local_inc(l) (++(l)->val.counter)
#define __local_dec(l) (--(l)->val.counter)
#define __local_add(i,l) ((l)->val.counter += (i))
#define __local_sub(i,l) ((l)->val.counter -= (i))
/*
* Use these for per-cpu local_t variables. Note they take a variable (eg. mystruct.foo),
* not an address.
*/
#define cpu_local_read(v) local_read(&__ia64_per_cpu_var(v))
#define cpu_local_set(v, i) local_set(&__ia64_per_cpu_var(v), (i))
#define cpu_local_inc(v) local_inc(&__ia64_per_cpu_var(v))
#define cpu_local_dec(v) local_dec(&__ia64_per_cpu_var(v))
#define cpu_local_add(i, v) local_add((i), &__ia64_per_cpu_var(v))
#define cpu_local_sub(i, v) local_sub((i), &__ia64_per_cpu_var(v))
/*
* Non-atomic increments, i.e., preemption disabled and won't be touched in interrupt,
* etc.
*/
#define __cpu_local_inc(v) __local_inc(&__ia64_per_cpu_var(v))
#define __cpu_local_dec(v) __local_dec(&__ia64_per_cpu_var(v))
#define __cpu_local_add(i, v) __local_add((i), &__ia64_per_cpu_var(v))
#define __cpu_local_sub(i, v) __local_sub((i), &__ia64_per_cpu_var(v))
#endif /* _ASM_IA64_LOCAL_H */
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* "int" types were carefully placed so as to ensure proper operation * "int" types were carefully placed so as to ensure proper operation
* of the macros. * of the macros.
* *
* Copyright (C) 1998, 1999, 2002 Hewlett-Packard Co * Copyright (C) 1998, 1999, 2002-2003 Hewlett-Packard Co
* David Mosberger-Tang <davidm@hpl.hp.com> * David Mosberger-Tang <davidm@hpl.hp.com>
*/ */
#include <linux/types.h> #include <linux/types.h>
...@@ -21,11 +21,16 @@ ...@@ -21,11 +21,16 @@
* memory accesses are ordered. * memory accesses are ordered.
*/ */
typedef struct { volatile __s32 counter; } atomic_t; typedef struct { volatile __s32 counter; } atomic_t;
typedef struct { volatile __s64 counter; } atomic64_t;
#define ATOMIC_INIT(i) ((atomic_t) { (i) }) #define ATOMIC_INIT(i) ((atomic_t) { (i) })
#define ATOMIC64_INIT(i) ((atomic64_t) { (i) })
#define atomic_read(v) ((v)->counter) #define atomic_read(v) ((v)->counter)
#define atomic64_read(v) ((v)->counter)
#define atomic_set(v,i) (((v)->counter) = (i)) #define atomic_set(v,i) (((v)->counter) = (i))
#define atomic64_set(v,i) (((v)->counter) = (i))
static __inline__ int static __inline__ int
ia64_atomic_add (int i, atomic_t *v) ia64_atomic_add (int i, atomic_t *v)
...@@ -37,7 +42,21 @@ ia64_atomic_add (int i, atomic_t *v) ...@@ -37,7 +42,21 @@ ia64_atomic_add (int i, atomic_t *v)
CMPXCHG_BUGCHECK(v); CMPXCHG_BUGCHECK(v);
old = atomic_read(v); old = atomic_read(v);
new = old + i; new = old + i;
} while (ia64_cmpxchg("acq", v, old, old + i, sizeof(atomic_t)) != old); } while (ia64_cmpxchg("acq", v, old, new, sizeof(atomic_t)) != old);
return new;
}
static __inline__ int
ia64_atomic64_add (int i, atomic64_t *v)
{
__s64 old, new;
CMPXCHG_BUGCHECK_DECL
do {
CMPXCHG_BUGCHECK(v);
old = atomic_read(v);
new = old + i;
} while (ia64_cmpxchg("acq", v, old, new, sizeof(atomic_t)) != old);
return new; return new;
} }
...@@ -55,6 +74,20 @@ ia64_atomic_sub (int i, atomic_t *v) ...@@ -55,6 +74,20 @@ ia64_atomic_sub (int i, atomic_t *v)
return new; return new;
} }
static __inline__ int
ia64_atomic64_sub (int i, atomic64_t *v)
{
__s64 old, new;
CMPXCHG_BUGCHECK_DECL
do {
CMPXCHG_BUGCHECK(v);
old = atomic_read(v);
new = old - i;
} while (ia64_cmpxchg("acq", v, old, new, sizeof(atomic_t)) != old);
return new;
}
#define atomic_add_return(i,v) \ #define atomic_add_return(i,v) \
({ \ ({ \
int __ia64_aar_i = (i); \ int __ia64_aar_i = (i); \
...@@ -67,6 +100,18 @@ ia64_atomic_sub (int i, atomic_t *v) ...@@ -67,6 +100,18 @@ ia64_atomic_sub (int i, atomic_t *v)
: ia64_atomic_add(__ia64_aar_i, v); \ : ia64_atomic_add(__ia64_aar_i, v); \
}) })
#define atomic64_add_return(i,v) \
({ \
long __ia64_aar_i = (i); \
(__builtin_constant_p(i) \
&& ( (__ia64_aar_i == 1) || (__ia64_aar_i == 4) \
|| (__ia64_aar_i == 8) || (__ia64_aar_i == 16) \
|| (__ia64_aar_i == -1) || (__ia64_aar_i == -4) \
|| (__ia64_aar_i == -8) || (__ia64_aar_i == -16))) \
? ia64_fetch_and_add(__ia64_aar_i, &(v)->counter) \
: ia64_atomic64_add(__ia64_aar_i, v); \
})
/* /*
* Atomically add I to V and return TRUE if the resulting value is * Atomically add I to V and return TRUE if the resulting value is
* negative. * negative.
...@@ -77,6 +122,12 @@ atomic_add_negative (int i, atomic_t *v) ...@@ -77,6 +122,12 @@ atomic_add_negative (int i, atomic_t *v)
return atomic_add_return(i, v) < 0; return atomic_add_return(i, v) < 0;
} }
static __inline__ int
atomic64_add_negative (int i, atomic64_t *v)
{
return atomic64_add_return(i, v) < 0;
}
#define atomic_sub_return(i,v) \ #define atomic_sub_return(i,v) \
({ \ ({ \
int __ia64_asr_i = (i); \ int __ia64_asr_i = (i); \
...@@ -89,18 +140,40 @@ atomic_add_negative (int i, atomic_t *v) ...@@ -89,18 +140,40 @@ atomic_add_negative (int i, atomic_t *v)
: ia64_atomic_sub(__ia64_asr_i, v); \ : ia64_atomic_sub(__ia64_asr_i, v); \
}) })
#define atomic64_sub_return(i,v) \
({ \
long __ia64_asr_i = (i); \
(__builtin_constant_p(i) \
&& ( (__ia64_asr_i == 1) || (__ia64_asr_i == 4) \
|| (__ia64_asr_i == 8) || (__ia64_asr_i == 16) \
|| (__ia64_asr_i == -1) || (__ia64_asr_i == -4) \
|| (__ia64_asr_i == -8) || (__ia64_asr_i == -16))) \
? ia64_fetch_and_add(-__ia64_asr_i, &(v)->counter) \
: ia64_atomic64_sub(__ia64_asr_i, v); \
})
#define atomic_dec_return(v) atomic_sub_return(1, (v)) #define atomic_dec_return(v) atomic_sub_return(1, (v))
#define atomic_inc_return(v) atomic_add_return(1, (v)) #define atomic_inc_return(v) atomic_add_return(1, (v))
#define atomic64_dec_return(v) atomic64_sub_return(1, (v))
#define atomic64_inc_return(v) atomic64_add_return(1, (v))
#define atomic_sub_and_test(i,v) (atomic_sub_return((i), (v)) == 0) #define atomic_sub_and_test(i,v) (atomic_sub_return((i), (v)) == 0)
#define atomic_dec_and_test(v) (atomic_sub_return(1, (v)) == 0) #define atomic_dec_and_test(v) (atomic_sub_return(1, (v)) == 0)
#define atomic_inc_and_test(v) (atomic_add_return(1, (v)) != 0) #define atomic_inc_and_test(v) (atomic_add_return(1, (v)) != 0)
#define atomic64_sub_and_test(i,v) (atomic64_sub_return((i), (v)) == 0)
#define atomic64_dec_and_test(v) (atomic64_sub_return(1, (v)) == 0)
#define atomic64_inc_and_test(v) (atomic64_add_return(1, (v)) != 0)
#define atomic_add(i,v) atomic_add_return((i), (v)) #define atomic_add(i,v) atomic_add_return((i), (v))
#define atomic_sub(i,v) atomic_sub_return((i), (v)) #define atomic_sub(i,v) atomic_sub_return((i), (v))
#define atomic_inc(v) atomic_add(1, (v)) #define atomic_inc(v) atomic_add(1, (v))
#define atomic_dec(v) atomic_sub(1, (v)) #define atomic_dec(v) atomic_sub(1, (v))
#define atomic64_add(i,v) atomic64_add_return((i), (v))
#define atomic64_sub(i,v) atomic64_sub_return((i), (v))
#define atomic64_inc(v) atomic64_add(1, (v))
#define atomic64_dec(v) atomic64_sub(1, (v))
/* Atomic operations are already serializing */ /* Atomic operations are already serializing */
#define smp_mb__before_atomic_dec() barrier() #define smp_mb__before_atomic_dec() barrier()
#define smp_mb__after_atomic_dec() barrier() #define smp_mb__after_atomic_dec() barrier()
......
...@@ -86,9 +86,9 @@ delayed_tlb_flush (void) ...@@ -86,9 +86,9 @@ delayed_tlb_flush (void)
{ {
extern void local_flush_tlb_all (void); extern void local_flush_tlb_all (void);
if (unlikely(__get_cpu_var(ia64_need_tlb_flush))) { if (unlikely(__ia64_per_cpu_var(ia64_need_tlb_flush))) {
local_flush_tlb_all(); local_flush_tlb_all();
__get_cpu_var(ia64_need_tlb_flush) = 0; __ia64_per_cpu_var(ia64_need_tlb_flush) = 0;
} }
} }
......
#ifndef _ASM_IA64_PERCPU_H #ifndef _ASM_IA64_PERCPU_H
#define _ASM_IA64_PERCPU_H #define _ASM_IA64_PERCPU_H
#include <linux/config.h>
#include <linux/compiler.h>
/* /*
* Copyright (C) 2002-2003 Hewlett-Packard Co * Copyright (C) 2002-2003 Hewlett-Packard Co
* David Mosberger-Tang <davidm@hpl.hp.com> * David Mosberger-Tang <davidm@hpl.hp.com>
*/ */
#define PERCPU_ENOUGH_ROOM PERCPU_PAGE_SIZE #define PERCPU_ENOUGH_ROOM PERCPU_PAGE_SIZE
#ifdef __ASSEMBLY__ #ifdef __ASSEMBLY__
# define THIS_CPU(var) (per_cpu__##var) /* use this to mark accesses to per-CPU variables... */
#define THIS_CPU(var) (var##__per_cpu) /* use this to mark accesses to per-CPU variables... */
#else /* !__ASSEMBLY__ */ #else /* !__ASSEMBLY__ */
#include <linux/config.h>
#include <linux/threads.h> #include <linux/threads.h>
#define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name
/*
* Pretty much a literal copy of asm-generic/percpu.h, except that percpu_modcopy() is an
* external routine, to avoid include-hell.
*/
#ifdef CONFIG_SMP
extern unsigned long __per_cpu_offset[NR_CPUS]; extern unsigned long __per_cpu_offset[NR_CPUS];
/* Equal to __per_cpu_offset[smp_processor_id()], but faster to access: */
DECLARE_PER_CPU(unsigned long, local_per_cpu_offset);
/* Separate out the type, so (int[3], foo) works. */
#define DEFINE_PER_CPU(type, name) \ #define DEFINE_PER_CPU(type, name) \
__attribute__((__section__(".data.percpu"))) __typeof__(type) name##__per_cpu __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name
#define DECLARE_PER_CPU(type, name) extern __typeof__(type) name##__per_cpu
#define __get_cpu_var(var) (var##__per_cpu) #define per_cpu(var, cpu) (*RELOC_HIDE(&per_cpu__##var, __per_cpu_offset[cpu]))
#ifdef CONFIG_SMP #define __get_cpu_var(var) (*RELOC_HIDE(&per_cpu__##var, __ia64_per_cpu_var(local_per_cpu_offset)))
# define per_cpu(var, cpu) (*RELOC_HIDE(&var##__per_cpu, __per_cpu_offset[cpu]))
extern void percpu_modcopy(void *pcpudst, const void *src, unsigned long size); extern void percpu_modcopy(void *pcpudst, const void *src, unsigned long size);
#else
# define per_cpu(var, cpu) ((void)cpu, __get_cpu_var(var))
#endif
#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(var##__per_cpu) #else /* ! SMP */
#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(var##__per_cpu)
#define DEFINE_PER_CPU(type, name) __typeof__(type) per_cpu__##name
#define per_cpu(var, cpu) ((void)cpu, per_cpu__##var)
#define __get_cpu_var(var) per_cpu__##var
#endif /* SMP */
#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var)
#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var)
/* ia64-specific part: */
extern void setup_per_cpu_areas (void); extern void setup_per_cpu_areas (void);
/*
* Be extremely careful when taking the address of this variable! Due to virtual
* remapping, it is different from the canonical address returned by __get_cpu_var(var)!
* On the positive side, using __ia64_per_cpu_var() instead of __get_cpu_var() is slightly
* more efficient.
*/
#define __ia64_per_cpu_var(var) (per_cpu__##var)
#endif /* !__ASSEMBLY__ */ #endif /* !__ASSEMBLY__ */
#endif /* _ASM_IA64_PERCPU_H */ #endif /* _ASM_IA64_PERCPU_H */
...@@ -191,10 +191,12 @@ struct cpuinfo_ia64 { ...@@ -191,10 +191,12 @@ struct cpuinfo_ia64 {
DECLARE_PER_CPU(struct cpuinfo_ia64, cpu_info); DECLARE_PER_CPU(struct cpuinfo_ia64, cpu_info);
/* /*
* The "local" data pointer. It points to the per-CPU data of the currently executing * The "local" data variable. It refers to the per-CPU data of the currently executing
* CPU, much like "current" points to the per-task data of the currently executing task. * CPU, much like "current" points to the per-task data of the currently executing task.
* Do not use the address of local_cpu_data, since it will be different from
* cpu_data(smp_processor_id())!
*/ */
#define local_cpu_data (&__get_cpu_var(cpu_info)) #define local_cpu_data (&__ia64_per_cpu_var(cpu_info))
#define cpu_data(cpu) (&per_cpu(cpu_info, cpu)) #define cpu_data(cpu) (&per_cpu(cpu_info, cpu))
extern void identify_cpu (struct cpuinfo_ia64 *); extern void identify_cpu (struct cpuinfo_ia64 *);
......
...@@ -126,7 +126,7 @@ ia64_tlb_flush_mmu (struct mmu_gather *tlb, unsigned long start, unsigned long e ...@@ -126,7 +126,7 @@ ia64_tlb_flush_mmu (struct mmu_gather *tlb, unsigned long start, unsigned long e
static inline struct mmu_gather * static inline struct mmu_gather *
tlb_gather_mmu (struct mm_struct *mm, unsigned int full_mm_flush) tlb_gather_mmu (struct mm_struct *mm, unsigned int full_mm_flush)
{ {
struct mmu_gather *tlb = &per_cpu(mmu_gathers, smp_processor_id()); struct mmu_gather *tlb = &__get_cpu_var(mmu_gathers);
tlb->mm = mm; tlb->mm = mm;
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment