Commit cbd379b1 authored by Russell King's avatar Russell King

Merge branches 'fixes', 'mcpm', 'misc' and 'mmci' into for-next

......@@ -16,6 +16,9 @@ Required properties:
performs the same operation).
"marvell,"aurora-outer-cache: Marvell Controller designed to be
compatible with the ARM one with outer cache mode.
"bcm,bcm11351-a2-pl310-cache": For Broadcom bcm11351 chipset where an
offset needs to be added to the address before passing down to the L2
cache controller
- cache-unified : Specifies the cache is a unified cache.
- cache-level : Should be set to 2 for a level 2 cache.
- reg : Physical base address and size of cache controller's memory mapped
......
......@@ -175,6 +175,9 @@ config ARCH_HAS_CPUFREQ
and that the relevant menu configurations are displayed for
it.
config ARCH_HAS_BANDGAP
bool
config GENERIC_HWEIGHT
bool
default y
......
......@@ -59,37 +59,43 @@ comma = ,
# Note that GCC does not numerically define an architecture version
# macro, but instead defines a whole series of macros which makes
# testing for a specific architecture or later rather impossible.
arch-$(CONFIG_CPU_32v7) :=-D__LINUX_ARM_ARCH__=7 $(call cc-option,-march=armv7-a,-march=armv5t -Wa$(comma)-march=armv7-a)
arch-$(CONFIG_CPU_32v6) :=-D__LINUX_ARM_ARCH__=6 $(call cc-option,-march=armv6,-march=armv5t -Wa$(comma)-march=armv6)
arch-$(CONFIG_CPU_32v7) =-D__LINUX_ARM_ARCH__=7 $(call cc-option,-march=armv7-a,-march=armv5t -Wa$(comma)-march=armv7-a)
arch-$(CONFIG_CPU_32v6) =-D__LINUX_ARM_ARCH__=6 $(call cc-option,-march=armv6,-march=armv5t -Wa$(comma)-march=armv6)
# Only override the compiler option if ARMv6. The ARMv6K extensions are
# always available in ARMv7
ifeq ($(CONFIG_CPU_32v6),y)
arch-$(CONFIG_CPU_32v6K) :=-D__LINUX_ARM_ARCH__=6 $(call cc-option,-march=armv6k,-march=armv5t -Wa$(comma)-march=armv6k)
arch-$(CONFIG_CPU_32v6K) =-D__LINUX_ARM_ARCH__=6 $(call cc-option,-march=armv6k,-march=armv5t -Wa$(comma)-march=armv6k)
endif
arch-$(CONFIG_CPU_32v5) :=-D__LINUX_ARM_ARCH__=5 $(call cc-option,-march=armv5te,-march=armv4t)
arch-$(CONFIG_CPU_32v4T) :=-D__LINUX_ARM_ARCH__=4 -march=armv4t
arch-$(CONFIG_CPU_32v4) :=-D__LINUX_ARM_ARCH__=4 -march=armv4
arch-$(CONFIG_CPU_32v3) :=-D__LINUX_ARM_ARCH__=3 -march=armv3
arch-$(CONFIG_CPU_32v5) =-D__LINUX_ARM_ARCH__=5 $(call cc-option,-march=armv5te,-march=armv4t)
arch-$(CONFIG_CPU_32v4T) =-D__LINUX_ARM_ARCH__=4 -march=armv4t
arch-$(CONFIG_CPU_32v4) =-D__LINUX_ARM_ARCH__=4 -march=armv4
arch-$(CONFIG_CPU_32v3) =-D__LINUX_ARM_ARCH__=3 -march=armv3
# Evaluate arch cc-option calls now
arch-y := $(arch-y)
# This selects how we optimise for the processor.
tune-$(CONFIG_CPU_ARM7TDMI) :=-mtune=arm7tdmi
tune-$(CONFIG_CPU_ARM720T) :=-mtune=arm7tdmi
tune-$(CONFIG_CPU_ARM740T) :=-mtune=arm7tdmi
tune-$(CONFIG_CPU_ARM9TDMI) :=-mtune=arm9tdmi
tune-$(CONFIG_CPU_ARM940T) :=-mtune=arm9tdmi
tune-$(CONFIG_CPU_ARM946E) :=$(call cc-option,-mtune=arm9e,-mtune=arm9tdmi)
tune-$(CONFIG_CPU_ARM920T) :=-mtune=arm9tdmi
tune-$(CONFIG_CPU_ARM922T) :=-mtune=arm9tdmi
tune-$(CONFIG_CPU_ARM925T) :=-mtune=arm9tdmi
tune-$(CONFIG_CPU_ARM926T) :=-mtune=arm9tdmi
tune-$(CONFIG_CPU_FA526) :=-mtune=arm9tdmi
tune-$(CONFIG_CPU_SA110) :=-mtune=strongarm110
tune-$(CONFIG_CPU_SA1100) :=-mtune=strongarm1100
tune-$(CONFIG_CPU_XSCALE) :=$(call cc-option,-mtune=xscale,-mtune=strongarm110) -Wa,-mcpu=xscale
tune-$(CONFIG_CPU_XSC3) :=$(call cc-option,-mtune=xscale,-mtune=strongarm110) -Wa,-mcpu=xscale
tune-$(CONFIG_CPU_FEROCEON) :=$(call cc-option,-mtune=marvell-f,-mtune=xscale)
tune-$(CONFIG_CPU_V6) :=$(call cc-option,-mtune=arm1136j-s,-mtune=strongarm)
tune-$(CONFIG_CPU_V6K) :=$(call cc-option,-mtune=arm1136j-s,-mtune=strongarm)
tune-$(CONFIG_CPU_ARM7TDMI) =-mtune=arm7tdmi
tune-$(CONFIG_CPU_ARM720T) =-mtune=arm7tdmi
tune-$(CONFIG_CPU_ARM740T) =-mtune=arm7tdmi
tune-$(CONFIG_CPU_ARM9TDMI) =-mtune=arm9tdmi
tune-$(CONFIG_CPU_ARM940T) =-mtune=arm9tdmi
tune-$(CONFIG_CPU_ARM946E) =$(call cc-option,-mtune=arm9e,-mtune=arm9tdmi)
tune-$(CONFIG_CPU_ARM920T) =-mtune=arm9tdmi
tune-$(CONFIG_CPU_ARM922T) =-mtune=arm9tdmi
tune-$(CONFIG_CPU_ARM925T) =-mtune=arm9tdmi
tune-$(CONFIG_CPU_ARM926T) =-mtune=arm9tdmi
tune-$(CONFIG_CPU_FA526) =-mtune=arm9tdmi
tune-$(CONFIG_CPU_SA110) =-mtune=strongarm110
tune-$(CONFIG_CPU_SA1100) =-mtune=strongarm1100
tune-$(CONFIG_CPU_XSCALE) =$(call cc-option,-mtune=xscale,-mtune=strongarm110) -Wa,-mcpu=xscale
tune-$(CONFIG_CPU_XSC3) =$(call cc-option,-mtune=xscale,-mtune=strongarm110) -Wa,-mcpu=xscale
tune-$(CONFIG_CPU_FEROCEON) =$(call cc-option,-mtune=marvell-f,-mtune=xscale)
tune-$(CONFIG_CPU_V6) =$(call cc-option,-mtune=arm1136j-s,-mtune=strongarm)
tune-$(CONFIG_CPU_V6K) =$(call cc-option,-mtune=arm1136j-s,-mtune=strongarm)
# Evaluate tune cc-option calls now
tune-y := $(tune-y)
ifeq ($(CONFIG_AEABI),y)
CFLAGS_ABI :=-mabi=aapcs-linux -mno-thumb-interwork
......@@ -289,9 +295,10 @@ zImage Image xipImage bootpImage uImage: vmlinux
zinstall uinstall install: vmlinux
$(Q)$(MAKE) $(build)=$(boot) MACHINE=$(MACHINE) $@
%.dtb: scripts
%.dtb: | scripts
$(Q)$(MAKE) $(build)=$(boot)/dts MACHINE=$(MACHINE) $(boot)/dts/$@
PHONY += dtbs
dtbs: scripts
$(Q)$(MAKE) $(build)=$(boot)/dts MACHINE=$(MACHINE) dtbs
......
......@@ -53,6 +53,17 @@ static const void *getprop(const void *fdt, const char *node_path,
return fdt_getprop(fdt, offset, property, len);
}
static uint32_t get_cell_size(const void *fdt)
{
int len;
uint32_t cell_size = 1;
const uint32_t *size_len = getprop(fdt, "/", "#size-cells", &len);
if (size_len)
cell_size = fdt32_to_cpu(*size_len);
return cell_size;
}
static void merge_fdt_bootargs(void *fdt, const char *fdt_cmdline)
{
char cmdline[COMMAND_LINE_SIZE];
......@@ -95,9 +106,11 @@ static void merge_fdt_bootargs(void *fdt, const char *fdt_cmdline)
int atags_to_fdt(void *atag_list, void *fdt, int total_space)
{
struct tag *atag = atag_list;
uint32_t mem_reg_property[2 * NR_BANKS];
/* In the case of 64 bits memory size, need to reserve 2 cells for
* address and size for each bank */
uint32_t mem_reg_property[2 * 2 * NR_BANKS];
int memcount = 0;
int ret;
int ret, memsize;
/* make sure we've got an aligned pointer */
if ((u32)atag_list & 0x3)
......@@ -137,8 +150,25 @@ int atags_to_fdt(void *atag_list, void *fdt, int total_space)
continue;
if (!atag->u.mem.size)
continue;
mem_reg_property[memcount++] = cpu_to_fdt32(atag->u.mem.start);
mem_reg_property[memcount++] = cpu_to_fdt32(atag->u.mem.size);
memsize = get_cell_size(fdt);
if (memsize == 2) {
/* if memsize is 2, that means that
* each data needs 2 cells of 32 bits,
* so the data are 64 bits */
uint64_t *mem_reg_prop64 =
(uint64_t *)mem_reg_property;
mem_reg_prop64[memcount++] =
cpu_to_fdt64(atag->u.mem.start);
mem_reg_prop64[memcount++] =
cpu_to_fdt64(atag->u.mem.size);
} else {
mem_reg_property[memcount++] =
cpu_to_fdt32(atag->u.mem.start);
mem_reg_property[memcount++] =
cpu_to_fdt32(atag->u.mem.size);
}
} else if (atag->hdr.tag == ATAG_INITRD2) {
uint32_t initrd_start, initrd_size;
initrd_start = atag->u.initrd.start;
......@@ -150,8 +180,10 @@ int atags_to_fdt(void *atag_list, void *fdt, int total_space)
}
}
if (memcount)
setprop(fdt, "/memory", "reg", mem_reg_property, 4*memcount);
if (memcount) {
setprop(fdt, "/memory", "reg", mem_reg_property,
4 * memcount * memsize);
}
return fdt_pack(fdt);
}
......@@ -142,7 +142,6 @@ start:
mov r7, r1 @ save architecture ID
mov r8, r2 @ save atags pointer
#ifndef __ARM_ARCH_2__
/*
* Booting from Angel - need to enter SVC mode and disable
* FIQs/IRQs (numeric definitions from angel arm.h source).
......@@ -158,10 +157,6 @@ not_angel:
safe_svcmode_maskall r0
msr spsr_cxsf, r9 @ Save the CPU boot mode in
@ SPSR
#else
teqp pc, #0x0c000003 @ turn off interrupts
#endif
/*
* Note that some cache flushing and other stuff may
* be needed here - is there an Angel SWI call for this?
......@@ -183,7 +178,19 @@ not_angel:
ldr r4, =zreladdr
#endif
bl cache_on
/*
* Set up a page table only if it won't overwrite ourself.
* That means r4 < pc && r4 - 16k page directory > &_end.
* Given that r4 > &_end is most unfrequent, we add a rough
* additional 1MB of room for a possible appended DTB.
*/
mov r0, pc
cmp r0, r4
ldrcc r0, LC0+32
addcc r0, r0, pc
cmpcc r4, r0
orrcc r4, r4, #1 @ remember we skipped cache_on
blcs cache_on
restart: adr r0, LC0
ldmia r0, {r1, r2, r3, r6, r10, r11, r12}
......@@ -229,7 +236,7 @@ restart: adr r0, LC0
* r0 = delta
* r2 = BSS start
* r3 = BSS end
* r4 = final kernel address
* r4 = final kernel address (possibly with LSB set)
* r5 = appended dtb size (still unknown)
* r6 = _edata
* r7 = architecture ID
......@@ -277,6 +284,7 @@ restart: adr r0, LC0
*/
cmp r0, #1
sub r0, r4, #TEXT_OFFSET
bic r0, r0, #1
add r0, r0, #0x100
mov r1, r6
sub r2, sp, r6
......@@ -323,12 +331,13 @@ dtb_check_done:
/*
* Check to see if we will overwrite ourselves.
* r4 = final kernel address
* r4 = final kernel address (possibly with LSB set)
* r9 = size of decompressed image
* r10 = end of this image, including bss/stack/malloc space if non XIP
* We basically want:
* r4 - 16k page directory >= r10 -> OK
* r4 + image length <= address of wont_overwrite -> OK
* Note: the possible LSB in r4 is harmless here.
*/
add r10, r10, #16384
cmp r4, r10
......@@ -390,7 +399,8 @@ dtb_check_done:
add sp, sp, r6
#endif
bl cache_clean_flush
tst r4, #1
bleq cache_clean_flush
adr r0, BSYM(restart)
add r0, r0, r6
......@@ -402,7 +412,7 @@ wont_overwrite:
* r0 = delta
* r2 = BSS start
* r3 = BSS end
* r4 = kernel execution address
* r4 = kernel execution address (possibly with LSB set)
* r5 = appended dtb size (0 if not present)
* r7 = architecture ID
* r8 = atags pointer
......@@ -465,6 +475,15 @@ not_relocated: mov r0, #0
cmp r2, r3
blo 1b
/*
* Did we skip the cache setup earlier?
* That is indicated by the LSB in r4.
* Do it now if so.
*/
tst r4, #1
bic r4, r4, #1
blne cache_on
/*
* The C runtime environment should now be setup sufficiently.
* Set up some pointers, and start decompressing.
......@@ -513,6 +532,7 @@ LC0: .word LC0 @ r1
.word _got_start @ r11
.word _got_end @ ip
.word .L_user_stack_end @ sp
.word _end - restart + 16384 + 1024*1024
.size LC0, . - LC0
#ifdef CONFIG_ARCH_RPC
......
......@@ -47,10 +47,10 @@ uart@3e000000 {
};
L2: l2-cache {
compatible = "arm,pl310-cache";
reg = <0x3ff20000 0x1000>;
cache-unified;
cache-level = <2>;
compatible = "bcm,bcm11351-a2-pl310-cache";
reg = <0x3ff20000 0x1000>;
cache-unified;
cache-level = <2>;
};
timer@35006000 {
......
......@@ -32,11 +32,11 @@
1901: adr r0, 1902b
bl printascii
mov r0, r9
bl printhex8
bl printhex2
adr r0, 1903b
bl printascii
mov r0, r10
bl printhex8
bl printhex2
adr r0, 1904b
bl printascii
#endif
......
......@@ -19,10 +19,6 @@
#include <asm/smp.h>
#include <asm/smp_plat.h>
static void __init simple_smp_init_cpus(void)
{
}
static int __cpuinit mcpm_boot_secondary(unsigned int cpu, struct task_struct *idle)
{
unsigned int mpidr, pcpu, pcluster, ret;
......@@ -74,7 +70,6 @@ static void mcpm_cpu_die(unsigned int cpu)
#endif
static struct smp_operations __initdata mcpm_smp_ops = {
.smp_init_cpus = simple_smp_init_cpus,
.smp_boot_secondary = mcpm_boot_secondary,
.smp_secondary_init = mcpm_secondary_init,
#ifdef CONFIG_HOTPLUG_CPU
......
......@@ -46,7 +46,7 @@
__rem; \
})
#if __GNUC__ < 4
#if __GNUC__ < 4 || !defined(CONFIG_AEABI)
/*
* gcc versions earlier than 4.0 are simply too problematic for the
......
......@@ -130,16 +130,16 @@ static inline u32 __raw_readl(const volatile void __iomem *addr)
*/
extern void __iomem *__arm_ioremap_pfn_caller(unsigned long, unsigned long,
size_t, unsigned int, void *);
extern void __iomem *__arm_ioremap_caller(unsigned long, size_t, unsigned int,
extern void __iomem *__arm_ioremap_caller(phys_addr_t, size_t, unsigned int,
void *);
extern void __iomem *__arm_ioremap_pfn(unsigned long, unsigned long, size_t, unsigned int);
extern void __iomem *__arm_ioremap(unsigned long, size_t, unsigned int);
extern void __iomem *__arm_ioremap_exec(unsigned long, size_t, bool cached);
extern void __iomem *__arm_ioremap(phys_addr_t, size_t, unsigned int);
extern void __iomem *__arm_ioremap_exec(phys_addr_t, size_t, bool cached);
extern void __iounmap(volatile void __iomem *addr);
extern void __arm_iounmap(volatile void __iomem *addr);
extern void __iomem * (*arch_ioremap_caller)(unsigned long, size_t,
extern void __iomem * (*arch_ioremap_caller)(phys_addr_t, size_t,
unsigned int, void *);
extern void (*arch_iounmap)(volatile void __iomem *);
......
......@@ -18,6 +18,7 @@
#include <asm/cacheflush.h>
#include <asm/cachetype.h>
#include <asm/proc-fns.h>
#include <asm/smp_plat.h>
#include <asm-generic/mm_hooks.h>
void __check_vmalloc_seq(struct mm_struct *mm);
......@@ -27,7 +28,15 @@ void __check_vmalloc_seq(struct mm_struct *mm);
void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk);
#define init_new_context(tsk,mm) ({ atomic64_set(&mm->context.id, 0); 0; })
DECLARE_PER_CPU(atomic64_t, active_asids);
#ifdef CONFIG_ARM_ERRATA_798181
void a15_erratum_get_cpumask(int this_cpu, struct mm_struct *mm,
cpumask_t *mask);
#else /* !CONFIG_ARM_ERRATA_798181 */
static inline void a15_erratum_get_cpumask(int this_cpu, struct mm_struct *mm,
cpumask_t *mask)
{
}
#endif /* CONFIG_ARM_ERRATA_798181 */
#else /* !CONFIG_CPU_HAS_ASID */
......@@ -98,12 +107,16 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
#ifdef CONFIG_MMU
unsigned int cpu = smp_processor_id();
#ifdef CONFIG_SMP
/* check for possible thread migration */
if (!cpumask_empty(mm_cpumask(next)) &&
/*
* __sync_icache_dcache doesn't broadcast the I-cache invalidation,
* so check for possible thread migration and invalidate the I-cache
* if we're new to this CPU.
*/
if (cache_ops_need_broadcast() &&
!cpumask_empty(mm_cpumask(next)) &&
!cpumask_test_cpu(cpu, mm_cpumask(next)))
__flush_icache_all();
#endif
if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next)) || prev != next) {
check_and_switch_context(next, tsk);
if (cache_is_vivt())
......
......@@ -97,19 +97,22 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
static inline int arch_spin_trylock(arch_spinlock_t *lock)
{
unsigned long tmp;
unsigned long contended, res;
u32 slock;
__asm__ __volatile__(
" ldrex %0, [%2]\n"
" subs %1, %0, %0, ror #16\n"
" addeq %0, %0, %3\n"
" strexeq %1, %0, [%2]"
: "=&r" (slock), "=&r" (tmp)
: "r" (&lock->slock), "I" (1 << TICKET_SHIFT)
: "cc");
if (tmp == 0) {
do {
__asm__ __volatile__(
" ldrex %0, [%3]\n"
" mov %2, #0\n"
" subs %1, %0, %0, ror #16\n"
" addeq %0, %0, %4\n"
" strexeq %2, %0, [%3]"
: "=&r" (slock), "=&r" (contended), "=r" (res)
: "r" (&lock->slock), "I" (1 << TICKET_SHIFT)
: "cc");
} while (res);
if (!contended) {
smp_mb();
return 1;
} else {
......
......@@ -58,7 +58,7 @@ struct thread_info {
struct cpu_context_save cpu_context; /* cpu context */
__u32 syscall; /* syscall number */
__u8 used_cp[16]; /* thread used copro */
unsigned long tp_value;
unsigned long tp_value[2]; /* TLS registers */
#ifdef CONFIG_CRUNCH
struct crunch_state crunchstate;
#endif
......
......@@ -2,27 +2,30 @@
#define __ASMARM_TLS_H
#ifdef __ASSEMBLY__
.macro set_tls_none, tp, tmp1, tmp2
#include <asm/asm-offsets.h>
.macro switch_tls_none, base, tp, tpuser, tmp1, tmp2
.endm
.macro set_tls_v6k, tp, tmp1, tmp2
.macro switch_tls_v6k, base, tp, tpuser, tmp1, tmp2
mrc p15, 0, \tmp2, c13, c0, 2 @ get the user r/w register
mcr p15, 0, \tp, c13, c0, 3 @ set TLS register
mov \tmp1, #0
mcr p15, 0, \tmp1, c13, c0, 2 @ clear user r/w TLS register
mcr p15, 0, \tpuser, c13, c0, 2 @ and the user r/w register
str \tmp2, [\base, #TI_TP_VALUE + 4] @ save it
.endm
.macro set_tls_v6, tp, tmp1, tmp2
.macro switch_tls_v6, base, tp, tpuser, tmp1, tmp2
ldr \tmp1, =elf_hwcap
ldr \tmp1, [\tmp1, #0]
mov \tmp2, #0xffff0fff
tst \tmp1, #HWCAP_TLS @ hardware TLS available?
mcrne p15, 0, \tp, c13, c0, 3 @ yes, set TLS register
movne \tmp1, #0
mcrne p15, 0, \tmp1, c13, c0, 2 @ clear user r/w TLS register
streq \tp, [\tmp2, #-15] @ set TLS value at 0xffff0ff0
mrcne p15, 0, \tmp2, c13, c0, 2 @ get the user r/w register
mcrne p15, 0, \tp, c13, c0, 3 @ yes, set TLS register
mcrne p15, 0, \tpuser, c13, c0, 2 @ set user r/w register
strne \tmp2, [\base, #TI_TP_VALUE + 4] @ save it
.endm
.macro set_tls_software, tp, tmp1, tmp2
.macro switch_tls_software, base, tp, tpuser, tmp1, tmp2
mov \tmp1, #0xffff0fff
str \tp, [\tmp1, #-15] @ set TLS value at 0xffff0ff0
.endm
......@@ -31,19 +34,30 @@
#ifdef CONFIG_TLS_REG_EMUL
#define tls_emu 1
#define has_tls_reg 1
#define set_tls set_tls_none
#define switch_tls switch_tls_none
#elif defined(CONFIG_CPU_V6)
#define tls_emu 0
#define has_tls_reg (elf_hwcap & HWCAP_TLS)
#define set_tls set_tls_v6
#define switch_tls switch_tls_v6
#elif defined(CONFIG_CPU_32v6K)
#define tls_emu 0
#define has_tls_reg 1
#define set_tls set_tls_v6k
#define switch_tls switch_tls_v6k
#else
#define tls_emu 0
#define has_tls_reg 0
#define set_tls set_tls_software
#define switch_tls switch_tls_software
#endif
#ifndef __ASSEMBLY__
static inline unsigned long get_tpuser(void)
{
unsigned long reg = 0;
if (has_tls_reg && !tls_emu)
__asm__("mrc p15, 0, %0, c13, c0, 2" : "=r" (reg));
return reg;
}
#endif
#endif /* __ASMARM_TLS_H */
......@@ -685,15 +685,16 @@ ENTRY(__switch_to)
UNWIND(.fnstart )
UNWIND(.cantunwind )
add ip, r1, #TI_CPU_SAVE
ldr r3, [r2, #TI_TP_VALUE]
ARM( stmia ip!, {r4 - sl, fp, sp, lr} ) @ Store most regs on stack
THUMB( stmia ip!, {r4 - sl, fp} ) @ Store most regs on stack
THUMB( str sp, [ip], #4 )
THUMB( str lr, [ip], #4 )
ldr r4, [r2, #TI_TP_VALUE]
ldr r5, [r2, #TI_TP_VALUE + 4]
#ifdef CONFIG_CPU_USE_DOMAINS
ldr r6, [r2, #TI_CPU_DOMAIN]
#endif
set_tls r3, r4, r5
switch_tls r1, r4, r5, r3, r7
#if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
ldr r7, [r2, #TI_TASK]
ldr r8, =__stack_chk_guard
......
......@@ -362,6 +362,16 @@ ENTRY(vector_swi)
str r0, [sp, #S_OLD_R0] @ Save OLD_R0
zero_fp
#ifdef CONFIG_ALIGNMENT_TRAP
ldr ip, __cr_alignment
ldr ip, [ip]
mcr p15, 0, ip, c1, c0 @ update control register
#endif
enable_irq
ct_user_exit
get_thread_info tsk
/*
* Get the system call number.
*/
......@@ -375,9 +385,9 @@ ENTRY(vector_swi)
#ifdef CONFIG_ARM_THUMB
tst r8, #PSR_T_BIT
movne r10, #0 @ no thumb OABI emulation
ldreq r10, [lr, #-4] @ get SWI instruction
USER( ldreq r10, [lr, #-4] ) @ get SWI instruction
#else
ldr r10, [lr, #-4] @ get SWI instruction
USER( ldr r10, [lr, #-4] ) @ get SWI instruction
#endif
#ifdef CONFIG_CPU_ENDIAN_BE8
rev r10, r10 @ little endian instruction
......@@ -392,22 +402,13 @@ ENTRY(vector_swi)
/* Legacy ABI only, possibly thumb mode. */
tst r8, #PSR_T_BIT @ this is SPSR from save_user_regs
addne scno, r7, #__NR_SYSCALL_BASE @ put OS number in
ldreq scno, [lr, #-4]
USER( ldreq scno, [lr, #-4] )
#else
/* Legacy ABI only. */
ldr scno, [lr, #-4] @ get SWI instruction
USER( ldr scno, [lr, #-4] ) @ get SWI instruction
#endif
#ifdef CONFIG_ALIGNMENT_TRAP
ldr ip, __cr_alignment
ldr ip, [ip]
mcr p15, 0, ip, c1, c0 @ update control register
#endif
enable_irq
ct_user_exit
get_thread_info tsk
adr tbl, sys_call_table @ load syscall table pointer
#if defined(CONFIG_OABI_COMPAT)
......@@ -442,6 +443,21 @@ local_restart:
eor r0, scno, #__NR_SYSCALL_BASE @ put OS number back
bcs arm_syscall
b sys_ni_syscall @ not private func
#if defined(CONFIG_OABI_COMPAT) || !defined(CONFIG_AEABI)
/*
* We failed to handle a fault trying to access the page
* containing the swi instruction, but we're not really in a
* position to return -EFAULT. Instead, return back to the
* instruction and re-enter the user fault handling path trying
* to page it in. This will likely result in sending SEGV to the
* current task.
*/
9001:
sub lr, lr, #4
str lr, [sp, #S_PC]
b ret_fast_syscall
#endif
ENDPROC(vector_swi)
/*
......
......@@ -569,6 +569,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
return;
}
perf_callchain_store(entry, regs->ARM_pc);
tail = (struct frame_tail __user *)regs->ARM_fp - 1;
while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
......
......@@ -39,6 +39,7 @@
#include <asm/thread_notify.h>
#include <asm/stacktrace.h>
#include <asm/mach/time.h>
#include <asm/tls.h>
#ifdef CONFIG_CC_STACKPROTECTOR
#include <linux/stackprotector.h>
......@@ -374,7 +375,8 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start,
clear_ptrace_hw_breakpoint(p);
if (clone_flags & CLONE_SETTLS)
thread->tp_value = childregs->ARM_r3;
thread->tp_value[0] = childregs->ARM_r3;
thread->tp_value[1] = get_tpuser();
thread_notify(THREAD_NOTIFY_COPY, thread);
......
......@@ -849,7 +849,7 @@ long arch_ptrace(struct task_struct *child, long request,
#endif
case PTRACE_GET_THREAD_AREA:
ret = put_user(task_thread_info(child)->tp_value,
ret = put_user(task_thread_info(child)->tp_value[0],
datap);
break;
......
......@@ -456,6 +456,13 @@ void __init smp_setup_processor_id(void)
for (i = 1; i < nr_cpu_ids; ++i)
cpu_logical_map(i) = i == cpu ? 0 : i;
/*
* clear __my_cpu_offset on boot CPU to avoid hang caused by
* using percpu variable early, for example, lockdep will
* access percpu variable inside lock_release
*/
set_my_cpu_offset(0);
printk(KERN_INFO "Booting Linux on physical CPU 0x%x\n", mpidr);
}
......
......@@ -103,7 +103,7 @@ static void broadcast_tlb_a15_erratum(void)
static void broadcast_tlb_mm_a15_erratum(struct mm_struct *mm)
{
int cpu, this_cpu;
int this_cpu;
cpumask_t mask = { CPU_BITS_NONE };
if (!erratum_a15_798181())
......@@ -111,21 +111,7 @@ static void broadcast_tlb_mm_a15_erratum(struct mm_struct *mm)
dummy_flush_tlb_a15_erratum();
this_cpu = get_cpu();
for_each_online_cpu(cpu) {
if (cpu == this_cpu)
continue;
/*
* We only need to send an IPI if the other CPUs are running
* the same ASID as the one being invalidated. There is no
* need for locking around the active_asids check since the
* switch_mm() function has at least one dmb() (as required by
* this workaround) in case a context switch happens on
* another CPU after the condition below.
*/
if (atomic64_read(&mm->context.id) ==
atomic64_read(&per_cpu(active_asids, cpu)))
cpumask_set_cpu(cpu, &mask);
}
a15_erratum_get_cpumask(this_cpu, mm, &mask);
smp_call_function_many(&mask, ipi_flush_tlb_a15_erratum, NULL, 1);
put_cpu();
}
......
......@@ -581,7 +581,7 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs)
return regs->ARM_r0;
case NR(set_tls):
thread->tp_value = regs->ARM_r0;
thread->tp_value[0] = regs->ARM_r0;
if (tls_emu)
return 0;
if (has_tls_reg) {
......@@ -699,7 +699,7 @@ static int get_tp_trap(struct pt_regs *regs, unsigned int instr)
int reg = (instr >> 12) & 15;
if (reg == 15)
return 1;
regs->uregs[reg] = current_thread_info()->tp_value;
regs->uregs[reg] = current_thread_info()->tp_value[0];
regs->ARM_pc += 4;
return 0;
}
......
......@@ -116,7 +116,7 @@ static void __init ebsa110_map_io(void)
iotable_init(ebsa110_io_desc, ARRAY_SIZE(ebsa110_io_desc));
}
static void __iomem *ebsa110_ioremap_caller(unsigned long cookie, size_t size,
static void __iomem *ebsa110_ioremap_caller(phys_addr_t cookie, size_t size,
unsigned int flags, void *caller)
{
return (void __iomem *)cookie;
......
......@@ -65,7 +65,7 @@ static void imx3_idle(void)
: "=r" (reg));
}
static void __iomem *imx3_ioremap_caller(unsigned long phys_addr, size_t size,
static void __iomem *imx3_ioremap_caller(phys_addr_t phys_addr, size_t size,
unsigned int mtype, void *caller)
{
if (mtype == MT_DEVICE) {
......
......@@ -23,7 +23,7 @@
#include "pci.h"
static void __iomem *__iop13xx_ioremap_caller(unsigned long cookie,
static void __iomem *__iop13xx_ioremap_caller(phys_addr_t cookie,
size_t size, unsigned int mtype, void *caller)
{
void __iomem * retval;
......
......@@ -559,7 +559,7 @@ void ixp4xx_restart(char mode, const char *cmd)
* fallback to the default.
*/
static void __iomem *ixp4xx_ioremap_caller(unsigned long addr, size_t size,
static void __iomem *ixp4xx_ioremap_caller(phys_addr_t addr, size_t size,
unsigned int mtype, void *caller)
{
if (!is_pci_memory(addr))
......
......@@ -23,7 +23,7 @@ extern void msm_map_msm8x60_io(void);
extern void msm_map_msm8960_io(void);
extern void msm_map_qsd8x50_io(void);
extern void __iomem *__msm_ioremap_caller(unsigned long phys_addr, size_t size,
extern void __iomem *__msm_ioremap_caller(phys_addr_t phys_addr, size_t size,
unsigned int mtype, void *caller);
extern struct smp_operations msm_smp_ops;
......
......@@ -172,7 +172,7 @@ void __init msm_map_msm7x30_io(void)
}
#endif /* CONFIG_ARCH_MSM7X30 */
void __iomem *__msm_ioremap_caller(unsigned long phys_addr, size_t size,
void __iomem *__msm_ioremap_caller(phys_addr_t phys_addr, size_t size,
unsigned int mtype, void *caller)
{
if (mtype == MT_DEVICE) {
......
......@@ -4,6 +4,7 @@ config ARCH_OMAP
config ARCH_OMAP2PLUS
bool "TI OMAP2/3/4/5 SoCs with device tree support" if (ARCH_MULTI_V6 || ARCH_MULTI_V7)
select ARCH_HAS_CPUFREQ
select ARCH_HAS_BANDGAP
select ARCH_HAS_HOLES_MEMORYMODEL
select ARCH_OMAP
select ARCH_REQUIRE_GPIOLIB
......
......@@ -523,6 +523,147 @@ static void aurora_flush_range(unsigned long start, unsigned long end)
}
}
/*
* For certain Broadcom SoCs, depending on the address range, different offsets
* need to be added to the address before passing it to L2 for
* invalidation/clean/flush
*
* Section Address Range Offset EMI
* 1 0x00000000 - 0x3FFFFFFF 0x80000000 VC
* 2 0x40000000 - 0xBFFFFFFF 0x40000000 SYS
* 3 0xC0000000 - 0xFFFFFFFF 0x80000000 VC
*
* When the start and end addresses have crossed two different sections, we
* need to break the L2 operation into two, each within its own section.
* For example, if we need to invalidate addresses starts at 0xBFFF0000 and
* ends at 0xC0001000, we need do invalidate 1) 0xBFFF0000 - 0xBFFFFFFF and 2)
* 0xC0000000 - 0xC0001000
*
* Note 1:
* By breaking a single L2 operation into two, we may potentially suffer some
* performance hit, but keep in mind the cross section case is very rare
*
* Note 2:
* We do not need to handle the case when the start address is in
* Section 1 and the end address is in Section 3, since it is not a valid use
* case
*
* Note 3:
* Section 1 in practical terms can no longer be used on rev A2. Because of
* that the code does not need to handle section 1 at all.
*
*/
#define BCM_SYS_EMI_START_ADDR 0x40000000UL
#define BCM_VC_EMI_SEC3_START_ADDR 0xC0000000UL
#define BCM_SYS_EMI_OFFSET 0x40000000UL
#define BCM_VC_EMI_OFFSET 0x80000000UL
static inline int bcm_addr_is_sys_emi(unsigned long addr)
{
return (addr >= BCM_SYS_EMI_START_ADDR) &&
(addr < BCM_VC_EMI_SEC3_START_ADDR);
}
static inline unsigned long bcm_l2_phys_addr(unsigned long addr)
{
if (bcm_addr_is_sys_emi(addr))
return addr + BCM_SYS_EMI_OFFSET;
else
return addr + BCM_VC_EMI_OFFSET;
}
static void bcm_inv_range(unsigned long start, unsigned long end)
{
unsigned long new_start, new_end;
BUG_ON(start < BCM_SYS_EMI_START_ADDR);
if (unlikely(end <= start))
return;
new_start = bcm_l2_phys_addr(start);
new_end = bcm_l2_phys_addr(end);
/* normal case, no cross section between start and end */
if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) {
l2x0_inv_range(new_start, new_end);
return;
}
/* They cross sections, so it can only be a cross from section
* 2 to section 3
*/
l2x0_inv_range(new_start,
bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1));
l2x0_inv_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR),
new_end);
}
static void bcm_clean_range(unsigned long start, unsigned long end)
{
unsigned long new_start, new_end;
BUG_ON(start < BCM_SYS_EMI_START_ADDR);
if (unlikely(end <= start))
return;
if ((end - start) >= l2x0_size) {
l2x0_clean_all();
return;
}
new_start = bcm_l2_phys_addr(start);
new_end = bcm_l2_phys_addr(end);
/* normal case, no cross section between start and end */
if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) {
l2x0_clean_range(new_start, new_end);
return;
}
/* They cross sections, so it can only be a cross from section
* 2 to section 3
*/
l2x0_clean_range(new_start,
bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1));
l2x0_clean_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR),
new_end);
}
static void bcm_flush_range(unsigned long start, unsigned long end)
{
unsigned long new_start, new_end;
BUG_ON(start < BCM_SYS_EMI_START_ADDR);
if (unlikely(end <= start))
return;
if ((end - start) >= l2x0_size) {
l2x0_flush_all();
return;
}
new_start = bcm_l2_phys_addr(start);
new_end = bcm_l2_phys_addr(end);
/* normal case, no cross section between start and end */
if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) {
l2x0_flush_range(new_start, new_end);
return;
}
/* They cross sections, so it can only be a cross from section
* 2 to section 3
*/
l2x0_flush_range(new_start,
bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1));
l2x0_flush_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR),
new_end);
}
static void __init l2x0_of_setup(const struct device_node *np,
u32 *aux_val, u32 *aux_mask)
{
......@@ -765,6 +906,21 @@ static const struct l2x0_of_data aurora_no_outer_data = {
},
};
static const struct l2x0_of_data bcm_l2x0_data = {
.setup = pl310_of_setup,
.save = pl310_save,
.outer_cache = {
.resume = pl310_resume,
.inv_range = bcm_inv_range,
.clean_range = bcm_clean_range,
.flush_range = bcm_flush_range,
.sync = l2x0_cache_sync,
.flush_all = l2x0_flush_all,
.inv_all = l2x0_inv_all,
.disable = l2x0_disable,
},
};
static const struct of_device_id l2x0_ids[] __initconst = {
{ .compatible = "arm,pl310-cache", .data = (void *)&pl310_data },
{ .compatible = "arm,l220-cache", .data = (void *)&l2x0_data },
......@@ -773,6 +929,8 @@ static const struct of_device_id l2x0_ids[] __initconst = {
.data = (void *)&aurora_no_outer_data},
{ .compatible = "marvell,aurora-outer-cache",
.data = (void *)&aurora_with_outer_data},
{ .compatible = "bcm,bcm11351-a2-pl310-cache",
.data = (void *)&bcm_l2x0_data},
{}
};
......
......@@ -39,19 +39,43 @@
* non 64-bit operations.
*/
#define ASID_FIRST_VERSION (1ULL << ASID_BITS)
#define NUM_USER_ASIDS (ASID_FIRST_VERSION - 1)
#define ASID_TO_IDX(asid) ((asid & ~ASID_MASK) - 1)
#define IDX_TO_ASID(idx) ((idx + 1) & ~ASID_MASK)
#define NUM_USER_ASIDS ASID_FIRST_VERSION
static DEFINE_RAW_SPINLOCK(cpu_asid_lock);
static atomic64_t asid_generation = ATOMIC64_INIT(ASID_FIRST_VERSION);
static DECLARE_BITMAP(asid_map, NUM_USER_ASIDS);
DEFINE_PER_CPU(atomic64_t, active_asids);
static DEFINE_PER_CPU(atomic64_t, active_asids);
static DEFINE_PER_CPU(u64, reserved_asids);
static cpumask_t tlb_flush_pending;
#ifdef CONFIG_ARM_ERRATA_798181
void a15_erratum_get_cpumask(int this_cpu, struct mm_struct *mm,
cpumask_t *mask)
{
int cpu;
unsigned long flags;
u64 context_id, asid;
raw_spin_lock_irqsave(&cpu_asid_lock, flags);
context_id = mm->context.id.counter;
for_each_online_cpu(cpu) {
if (cpu == this_cpu)
continue;
/*
* We only need to send an IPI if the other CPUs are
* running the same ASID as the one being invalidated.
*/
asid = per_cpu(active_asids, cpu).counter;
if (asid == 0)
asid = per_cpu(reserved_asids, cpu);
if (context_id == asid)
cpumask_set_cpu(cpu, mask);
}
raw_spin_unlock_irqrestore(&cpu_asid_lock, flags);
}
#endif
#ifdef CONFIG_ARM_LPAE
static void cpu_set_reserved_ttbr0(void)
{
......@@ -128,7 +152,16 @@ static void flush_context(unsigned int cpu)
asid = 0;
} else {
asid = atomic64_xchg(&per_cpu(active_asids, i), 0);
__set_bit(ASID_TO_IDX(asid), asid_map);
/*
* If this CPU has already been through a
* rollover, but hasn't run another task in
* the meantime, we must preserve its reserved
* ASID, as this is the only trace we have of
* the process it is still running.
*/
if (asid == 0)
asid = per_cpu(reserved_asids, i);
__set_bit(asid & ~ASID_MASK, asid_map);
}
per_cpu(reserved_asids, i) = asid;
}
......@@ -167,17 +200,19 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu)
/*
* Allocate a free ASID. If we can't find one, take a
* note of the currently active ASIDs and mark the TLBs
* as requiring flushes.
* as requiring flushes. We always count from ASID #1,
* as we reserve ASID #0 to switch via TTBR0 and indicate
* rollover events.
*/
asid = find_first_zero_bit(asid_map, NUM_USER_ASIDS);
asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1);
if (asid == NUM_USER_ASIDS) {
generation = atomic64_add_return(ASID_FIRST_VERSION,
&asid_generation);
flush_context(cpu);
asid = find_first_zero_bit(asid_map, NUM_USER_ASIDS);
asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1);
}
__set_bit(asid, asid_map);
asid = generation | IDX_TO_ASID(asid);
asid |= generation;
cpumask_clear(mm_cpumask(mm));
}
......
......@@ -880,10 +880,24 @@ static void __dma_page_dev_to_cpu(struct page *page, unsigned long off,
dma_cache_maint_page(page, off, size, dir, dmac_unmap_area);
/*
* Mark the D-cache clean for this page to avoid extra flushing.
* Mark the D-cache clean for these pages to avoid extra flushing.
*/
if (dir != DMA_TO_DEVICE && off == 0 && size >= PAGE_SIZE)
set_bit(PG_dcache_clean, &page->flags);
if (dir != DMA_TO_DEVICE && size >= PAGE_SIZE) {
unsigned long pfn;
size_t left = size;
pfn = page_to_pfn(page) + off / PAGE_SIZE;
off %= PAGE_SIZE;
if (off) {
pfn++;
left -= PAGE_SIZE - off;
}
while (left >= PAGE_SIZE) {
page = pfn_to_page(pfn++);
set_bit(PG_dcache_clean, &page->flags);
left -= PAGE_SIZE;
}
}
}
/**
......
......@@ -287,7 +287,7 @@ void flush_dcache_page(struct page *page)
mapping = page_mapping(page);
if (!cache_ops_need_broadcast() &&
mapping && !mapping_mapped(mapping))
mapping && !page_mapped(page))
clear_bit(PG_dcache_clean, &page->flags);
else {
__flush_dcache_page(mapping, page);
......
......@@ -331,10 +331,10 @@ void __iomem * __arm_ioremap_pfn_caller(unsigned long pfn,
return (void __iomem *) (offset + addr);
}
void __iomem *__arm_ioremap_caller(unsigned long phys_addr, size_t size,
void __iomem *__arm_ioremap_caller(phys_addr_t phys_addr, size_t size,
unsigned int mtype, void *caller)
{
unsigned long last_addr;
phys_addr_t last_addr;
unsigned long offset = phys_addr & ~PAGE_MASK;
unsigned long pfn = __phys_to_pfn(phys_addr);
......@@ -367,12 +367,12 @@ __arm_ioremap_pfn(unsigned long pfn, unsigned long offset, size_t size,
}
EXPORT_SYMBOL(__arm_ioremap_pfn);
void __iomem * (*arch_ioremap_caller)(unsigned long, size_t,
void __iomem * (*arch_ioremap_caller)(phys_addr_t, size_t,
unsigned int, void *) =
__arm_ioremap_caller;
void __iomem *
__arm_ioremap(unsigned long phys_addr, size_t size, unsigned int mtype)
__arm_ioremap(phys_addr_t phys_addr, size_t size, unsigned int mtype)
{
return arch_ioremap_caller(phys_addr, size, mtype,
__builtin_return_address(0));
......@@ -387,7 +387,7 @@ EXPORT_SYMBOL(__arm_ioremap);
* CONFIG_GENERIC_ALLOCATOR for allocating external memory.
*/
void __iomem *
__arm_ioremap_exec(unsigned long phys_addr, size_t size, bool cached)
__arm_ioremap_exec(phys_addr_t phys_addr, size_t size, bool cached)
{
unsigned int mtype;
......
......@@ -87,16 +87,16 @@ void __iomem *__arm_ioremap_pfn_caller(unsigned long pfn, unsigned long offset,
return __arm_ioremap_pfn(pfn, offset, size, mtype);
}
void __iomem *__arm_ioremap(unsigned long phys_addr, size_t size,
void __iomem *__arm_ioremap(phys_addr_t phys_addr, size_t size,
unsigned int mtype)
{
return (void __iomem *)phys_addr;
}
EXPORT_SYMBOL(__arm_ioremap);
void __iomem * (*arch_ioremap_caller)(unsigned long, size_t, unsigned int, void *);
void __iomem * (*arch_ioremap_caller)(phys_addr_t, size_t, unsigned int, void *);
void __iomem *__arm_ioremap_caller(unsigned long phys_addr, size_t size,
void __iomem *__arm_ioremap_caller(phys_addr_t phys_addr, size_t size,
unsigned int mtype, void *caller)
{
return __arm_ioremap(phys_addr, size, mtype);
......
......@@ -11,8 +11,6 @@
#include <linux/linkage.h>
#include <linux/init.h>
__INIT
/*
* Realview/Versatile Express specific entry point for secondary CPUs.
* This provides a "holding pen" into which all secondary cores are held
......
This diff is collapsed.
......@@ -94,6 +94,7 @@
/* Extended status bits for the ST Micro variants */
#define MCI_ST_SDIOIT (1 << 22)
#define MCI_ST_CEATAEND (1 << 23)
#define MCI_ST_CARDBUSY (1 << 24)
#define MMCICLEAR 0x038
#define MCI_CMDCRCFAILCLR (1 << 0)
......@@ -110,6 +111,7 @@
/* Extended status bits for the ST Micro variants */
#define MCI_ST_SDIOITC (1 << 22)
#define MCI_ST_CEATAENDC (1 << 23)
#define MCI_ST_BUSYENDC (1 << 24)
#define MMCIMASK0 0x03c
#define MCI_CMDCRCFAILMASK (1 << 0)
......@@ -183,6 +185,8 @@ struct mmci_host {
unsigned int cclk;
u32 pwr_reg;
u32 clk_reg;
u32 datactrl_reg;
bool vqmmc_enabled;
struct mmci_platform_data *plat;
struct variant_data *variant;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment