Commit 28dce7c7 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'arc-4.3-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/vgupta/arc

Pull ARC architecture updates from Vineet Gupta:
 "ARC updates for 4.3:

   - perf support for ARCv2 based cores (sampling interrupt, SMP)
   - leftovers for ARCv2 support
   - futex fixes"

* tag 'arc-4.3-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/vgupta/arc:
  ARCv2: entry: Fix reserved handler
  ARCv2: perf: Finally introduce HS perf unit
  ARCv2: perf: SMP support
  ARCv2: perf: implement exclusion of event counting in user or kernel mode
  ARCv2: perf: Support sampling events using overflow interrupts
  ARCv2: perf: implement "event_set_period"
  ARC: perf: cap the number of counters to hardware max of 32
  ARC: Eliminate some ARCv2 specific code for ARCompact build
  ARC: add/fix some comments in code - no functional change
  ARC: change some branchs to jumps to resolve linkage errors
  ARC: ensure futex ops are atomic in !LLSC config
  ARC: Enable HAVE_FUTEX_CMPXCHG
  ARC: make futex_atomic_cmpxchg_inatomic() return bimodal
  ARC: futex cosmetics
  ARC: add barriers to futex code
  ARCv2: IOC: Allow boot time disable
  ARCv2: SLC: Allow boot time disable
  ARCv2: Support IO Coherency and permutations involving L1 and L2 caches
  ARC: Enable optimistic spinning for LLSC config
  MAINTAINERS: add git tree for the arc architecture
parents 361f7d17 3d592659
* ARC HS Performance Counters
The ARC HS can be configured with a pipeline performance monitor for counting
CPU and cache events like cache misses and hits. Like conventional PCT there
are 100+ hardware conditions dynamically mapped to upto 32 counters.
It also supports overflow interrupts.
Required properties:
- compatible : should contain
"snps,archs-pct"
Example:
pmu {
compatible = "snps,archs-pct";
};
...@@ -9911,8 +9911,9 @@ SYNOPSYS ARC ARCHITECTURE ...@@ -9911,8 +9911,9 @@ SYNOPSYS ARC ARCHITECTURE
M: Vineet Gupta <vgupta@synopsys.com> M: Vineet Gupta <vgupta@synopsys.com>
S: Supported S: Supported
F: arch/arc/ F: arch/arc/
F: Documentation/devicetree/bindings/arc/ F: Documentation/devicetree/bindings/arc/*
F: drivers/tty/serial/arc_uart.c F: drivers/tty/serial/arc_uart.c
T: git git://git.kernel.org/pub/scm/linux/kernel/git/vgupta/arc.git
SYNOPSYS ARC SDP platform support SYNOPSYS ARC SDP platform support
M: Alexey Brodkin <abrodkin@synopsys.com> M: Alexey Brodkin <abrodkin@synopsys.com>
......
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
config ARC config ARC
def_bool y def_bool y
select ARCH_SUPPORTS_ATOMIC_RMW if ARC_HAS_LLSC
select BUILDTIME_EXTABLE_SORT select BUILDTIME_EXTABLE_SORT
select COMMON_CLK select COMMON_CLK
select CLONE_BACKWARDS select CLONE_BACKWARDS
...@@ -22,6 +23,7 @@ config ARC ...@@ -22,6 +23,7 @@ config ARC
select GENERIC_SMP_IDLE_THREAD select GENERIC_SMP_IDLE_THREAD
select HAVE_ARCH_KGDB select HAVE_ARCH_KGDB
select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRACEHOOK
select HAVE_FUTEX_CMPXCHG
select HAVE_IOREMAP_PROT select HAVE_IOREMAP_PROT
select HAVE_KPROBES select HAVE_KPROBES
select HAVE_KRETPROBES select HAVE_KRETPROBES
......
...@@ -72,12 +72,13 @@ arcpct0: pct { ...@@ -72,12 +72,13 @@ arcpct0: pct {
}; };
/* /*
* This INTC is actually connected to DW APB GPIO * The DW APB ICTL intc on MB is connected to CPU intc via a
* which acts as a wire between MB INTC and CPU INTC. * DT "invisible" DW APB GPIO block, configured to simply pass thru
* GPIO INTC is configured in platform init code * interrupts - setup accordinly in platform init (plat-axs10x/ax10x.c)
* and here we mimic direct connection from MB INTC to *
* CPU INTC, thus we set "interrupts = <7>" instead of * So here we mimic a direct connection betwen them, ignoring the
* "interrupts = <12>" * ABPG GPIO. Thus set "interrupts = <24>" (DW APB GPIO to core)
* instead of "interrupts = <12>" (DW APB ICTL to DW APB GPIO)
* *
* This intc actually resides on MB, but we move it here to * This intc actually resides on MB, but we move it here to
* avoid duplicating the MB dtsi file given that IRQ from * avoid duplicating the MB dtsi file given that IRQ from
......
...@@ -35,6 +35,7 @@ ...@@ -35,6 +35,7 @@
#define ARC_REG_RTT_BCR 0xF2 #define ARC_REG_RTT_BCR 0xF2
#define ARC_REG_IRQ_BCR 0xF3 #define ARC_REG_IRQ_BCR 0xF3
#define ARC_REG_SMART_BCR 0xFF #define ARC_REG_SMART_BCR 0xFF
#define ARC_REG_CLUSTER_BCR 0xcf
/* status32 Bits Positions */ /* status32 Bits Positions */
#define STATUS_AE_BIT 5 /* Exception active */ #define STATUS_AE_BIT 5 /* Exception active */
......
...@@ -53,6 +53,8 @@ extern void arc_cache_init(void); ...@@ -53,6 +53,8 @@ extern void arc_cache_init(void);
extern char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len); extern char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len);
extern void read_decode_cache_bcr(void); extern void read_decode_cache_bcr(void);
extern int ioc_exists;
#endif /* !__ASSEMBLY__ */ #endif /* !__ASSEMBLY__ */
/* Instruction cache related Auxiliary registers */ /* Instruction cache related Auxiliary registers */
...@@ -94,4 +96,10 @@ extern void read_decode_cache_bcr(void); ...@@ -94,4 +96,10 @@ extern void read_decode_cache_bcr(void);
#define SLC_CTRL_BUSY 0x100 #define SLC_CTRL_BUSY 0x100
#define SLC_CTRL_RGN_OP_INV 0x200 #define SLC_CTRL_RGN_OP_INV 0x200
/* IO coherency related Auxiliary registers */
#define ARC_REG_IO_COH_ENABLE 0x500
#define ARC_REG_IO_COH_PARTIAL 0x501
#define ARC_REG_IO_COH_AP0_BASE 0x508
#define ARC_REG_IO_COH_AP0_SIZE 0x509
#endif /* _ASM_CACHE_H */ #endif /* _ASM_CACHE_H */
...@@ -110,18 +110,18 @@ static inline unsigned long __xchg(unsigned long val, volatile void *ptr, ...@@ -110,18 +110,18 @@ static inline unsigned long __xchg(unsigned long val, volatile void *ptr,
sizeof(*(ptr)))) sizeof(*(ptr))))
/* /*
* On ARC700, EX insn is inherently atomic, so by default "vanilla" xchg() need * xchg() maps directly to ARC EX instruction which guarantees atomicity.
* not require any locking. However there's a quirk. * However in !LLSC config, it also needs to be use @atomic_ops_lock spinlock
* ARC lacks native CMPXCHG, thus emulated (see above), using external locking - * due to a subtle reason:
* incidently it "reuses" the same atomic_ops_lock used by atomic APIs. * - For !LLSC, cmpxchg() needs to use that lock (see above) and there is lot
* Now, llist code uses cmpxchg() and xchg() on same data, so xchg() needs to * of kernel code which calls xchg()/cmpxchg() on same data (see llist.h)
* abide by same serializing rules, thus ends up using atomic_ops_lock as well. * Hence xchg() needs to follow same locking rules.
* *
* This however is only relevant if SMP and/or ARC lacks LLSC * Technically the lock is also needed for UP (boils down to irq save/restore)
* if (UP or LLSC) * but we can cheat a bit since cmpxchg() atomic_ops_lock() would cause irqs to
* xchg doesn't need serialization * be disabled thus can't possibly be interrpted/preempted/clobbered by xchg()
* else <==> !(UP or LLSC) <==> (!UP and !LLSC) <==> (SMP and !LLSC) * Other way around, xchg is one instruction anyways, so can't be interrupted
* xchg needs serialization * as such
*/ */
#if !defined(CONFIG_ARC_HAS_LLSC) && defined(CONFIG_SMP) #if !defined(CONFIG_ARC_HAS_LLSC) && defined(CONFIG_SMP)
......
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg)\ #define __futex_atomic_op(insn, ret, oldval, uaddr, oparg)\
\ \
smp_mb(); \
__asm__ __volatile__( \ __asm__ __volatile__( \
"1: llock %1, [%2] \n" \ "1: llock %1, [%2] \n" \
insn "\n" \ insn "\n" \
...@@ -30,7 +31,7 @@ ...@@ -30,7 +31,7 @@
" .section .fixup,\"ax\" \n" \ " .section .fixup,\"ax\" \n" \
" .align 4 \n" \ " .align 4 \n" \
"4: mov %0, %4 \n" \ "4: mov %0, %4 \n" \
" b 3b \n" \ " j 3b \n" \
" .previous \n" \ " .previous \n" \
" .section __ex_table,\"a\" \n" \ " .section __ex_table,\"a\" \n" \
" .align 4 \n" \ " .align 4 \n" \
...@@ -40,12 +41,14 @@ ...@@ -40,12 +41,14 @@
\ \
: "=&r" (ret), "=&r" (oldval) \ : "=&r" (ret), "=&r" (oldval) \
: "r" (uaddr), "r" (oparg), "ir" (-EFAULT) \ : "r" (uaddr), "r" (oparg), "ir" (-EFAULT) \
: "cc", "memory") : "cc", "memory"); \
smp_mb() \
#else /* !CONFIG_ARC_HAS_LLSC */ #else /* !CONFIG_ARC_HAS_LLSC */
#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg)\ #define __futex_atomic_op(insn, ret, oldval, uaddr, oparg)\
\ \
smp_mb(); \
__asm__ __volatile__( \ __asm__ __volatile__( \
"1: ld %1, [%2] \n" \ "1: ld %1, [%2] \n" \
insn "\n" \ insn "\n" \
...@@ -55,7 +58,7 @@ ...@@ -55,7 +58,7 @@
" .section .fixup,\"ax\" \n" \ " .section .fixup,\"ax\" \n" \
" .align 4 \n" \ " .align 4 \n" \
"4: mov %0, %4 \n" \ "4: mov %0, %4 \n" \
" b 3b \n" \ " j 3b \n" \
" .previous \n" \ " .previous \n" \
" .section __ex_table,\"a\" \n" \ " .section __ex_table,\"a\" \n" \
" .align 4 \n" \ " .align 4 \n" \
...@@ -65,7 +68,8 @@ ...@@ -65,7 +68,8 @@
\ \
: "=&r" (ret), "=&r" (oldval) \ : "=&r" (ret), "=&r" (oldval) \
: "r" (uaddr), "r" (oparg), "ir" (-EFAULT) \ : "r" (uaddr), "r" (oparg), "ir" (-EFAULT) \
: "cc", "memory") : "cc", "memory"); \
smp_mb() \
#endif #endif
...@@ -83,6 +87,9 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) ...@@ -83,6 +87,9 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
return -EFAULT; return -EFAULT;
#ifndef CONFIG_ARC_HAS_LLSC
preempt_disable(); /* to guarantee atomic r-m-w of futex op */
#endif
pagefault_disable(); pagefault_disable();
switch (op) { switch (op) {
...@@ -90,6 +97,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) ...@@ -90,6 +97,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
__futex_atomic_op("mov %0, %3", ret, oldval, uaddr, oparg); __futex_atomic_op("mov %0, %3", ret, oldval, uaddr, oparg);
break; break;
case FUTEX_OP_ADD: case FUTEX_OP_ADD:
/* oldval = *uaddr; *uaddr += oparg ; ret = *uaddr */
__futex_atomic_op("add %0, %1, %3", ret, oldval, uaddr, oparg); __futex_atomic_op("add %0, %1, %3", ret, oldval, uaddr, oparg);
break; break;
case FUTEX_OP_OR: case FUTEX_OP_OR:
...@@ -106,6 +114,9 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) ...@@ -106,6 +114,9 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
} }
pagefault_enable(); pagefault_enable();
#ifndef CONFIG_ARC_HAS_LLSC
preempt_enable();
#endif
if (!ret) { if (!ret) {
switch (cmp) { switch (cmp) {
...@@ -134,54 +145,57 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) ...@@ -134,54 +145,57 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
return ret; return ret;
} }
/* Compare-xchg with pagefaults disabled. /*
* Notes: * cmpxchg of futex (pagefaults disabled by caller)
* -Best-Effort: Exchg happens only if compare succeeds. * Return 0 for success, -EFAULT otherwise
* If compare fails, returns; leaving retry/looping to upper layers
* -successful cmp-xchg: return orig value in @addr (same as cmp val)
* -Compare fails: return orig value in @addr
* -user access r/w fails: return -EFAULT
*/ */
static inline int static inline int
futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 expval,
u32 newval) u32 newval)
{ {
u32 val; int ret = 0;
u32 existval;
if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT; return -EFAULT;
pagefault_disable(); #ifndef CONFIG_ARC_HAS_LLSC
preempt_disable(); /* to guarantee atomic r-m-w of futex op */
#endif
smp_mb();
__asm__ __volatile__( __asm__ __volatile__(
#ifdef CONFIG_ARC_HAS_LLSC #ifdef CONFIG_ARC_HAS_LLSC
"1: llock %0, [%3] \n" "1: llock %1, [%4] \n"
" brne %0, %1, 3f \n" " brne %1, %2, 3f \n"
"2: scond %2, [%3] \n" "2: scond %3, [%4] \n"
" bnz 1b \n" " bnz 1b \n"
#else #else
"1: ld %0, [%3] \n" "1: ld %1, [%4] \n"
" brne %0, %1, 3f \n" " brne %1, %2, 3f \n"
"2: st %2, [%3] \n" "2: st %3, [%4] \n"
#endif #endif
"3: \n" "3: \n"
" .section .fixup,\"ax\" \n" " .section .fixup,\"ax\" \n"
"4: mov %0, %4 \n" "4: mov %0, %5 \n"
" b 3b \n" " j 3b \n"
" .previous \n" " .previous \n"
" .section __ex_table,\"a\" \n" " .section __ex_table,\"a\" \n"
" .align 4 \n" " .align 4 \n"
" .word 1b, 4b \n" " .word 1b, 4b \n"
" .word 2b, 4b \n" " .word 2b, 4b \n"
" .previous\n" " .previous\n"
: "=&r"(val) : "+&r"(ret), "=&r"(existval)
: "r"(oldval), "r"(newval), "r"(uaddr), "ir"(-EFAULT) : "r"(expval), "r"(newval), "r"(uaddr), "ir"(-EFAULT)
: "cc", "memory"); : "cc", "memory");
pagefault_enable(); smp_mb();
*uval = val; #ifndef CONFIG_ARC_HAS_LLSC
return val; preempt_enable();
#endif
*uval = existval;
return ret;
} }
#endif #endif
/* /*
* Linux performance counter support for ARC * Linux performance counter support for ARC
* *
* Copyright (C) 2014-2015 Synopsys, Inc. (www.synopsys.com)
* Copyright (C) 2011-2013 Synopsys, Inc. (www.synopsys.com) * Copyright (C) 2011-2013 Synopsys, Inc. (www.synopsys.com)
* *
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
...@@ -12,8 +13,8 @@ ...@@ -12,8 +13,8 @@
#ifndef __ASM_PERF_EVENT_H #ifndef __ASM_PERF_EVENT_H
#define __ASM_PERF_EVENT_H #define __ASM_PERF_EVENT_H
/* real maximum varies per CPU, this is the maximum supported by the driver */ /* Max number of counters that PCT block may ever have */
#define ARC_PMU_MAX_HWEVENTS 64 #define ARC_PERF_MAX_COUNTERS 32
#define ARC_REG_CC_BUILD 0xF6 #define ARC_REG_CC_BUILD 0xF6
#define ARC_REG_CC_INDEX 0x240 #define ARC_REG_CC_INDEX 0x240
...@@ -28,15 +29,22 @@ ...@@ -28,15 +29,22 @@
#define ARC_REG_PCT_CONFIG 0x254 #define ARC_REG_PCT_CONFIG 0x254
#define ARC_REG_PCT_CONTROL 0x255 #define ARC_REG_PCT_CONTROL 0x255
#define ARC_REG_PCT_INDEX 0x256 #define ARC_REG_PCT_INDEX 0x256
#define ARC_REG_PCT_INT_CNTL 0x25C
#define ARC_REG_PCT_INT_CNTH 0x25D
#define ARC_REG_PCT_INT_CTRL 0x25E
#define ARC_REG_PCT_INT_ACT 0x25F
#define ARC_REG_PCT_CONFIG_USER (1 << 18) /* count in user mode */
#define ARC_REG_PCT_CONFIG_KERN (1 << 19) /* count in kernel mode */
#define ARC_REG_PCT_CONTROL_CC (1 << 16) /* clear counts */ #define ARC_REG_PCT_CONTROL_CC (1 << 16) /* clear counts */
#define ARC_REG_PCT_CONTROL_SN (1 << 17) /* snapshot */ #define ARC_REG_PCT_CONTROL_SN (1 << 17) /* snapshot */
struct arc_reg_pct_build { struct arc_reg_pct_build {
#ifdef CONFIG_CPU_BIG_ENDIAN #ifdef CONFIG_CPU_BIG_ENDIAN
unsigned int m:8, c:8, r:6, s:2, v:8; unsigned int m:8, c:8, r:5, i:1, s:2, v:8;
#else #else
unsigned int v:8, s:2, r:6, c:8, m:8; unsigned int v:8, s:2, i:1, r:5, c:8, m:8;
#endif #endif
}; };
...@@ -95,10 +103,13 @@ static const char * const arc_pmu_ev_hw_map[] = { ...@@ -95,10 +103,13 @@ static const char * const arc_pmu_ev_hw_map[] = {
/* counts condition */ /* counts condition */
[PERF_COUNT_HW_INSTRUCTIONS] = "iall", [PERF_COUNT_HW_INSTRUCTIONS] = "iall",
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmp", [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmp", /* Excludes ZOL jumps */
[PERF_COUNT_ARC_BPOK] = "bpok", /* NP-NT, PT-T, PNT-NT */ [PERF_COUNT_ARC_BPOK] = "bpok", /* NP-NT, PT-T, PNT-NT */
#ifdef CONFIG_ISA_ARCV2
[PERF_COUNT_HW_BRANCH_MISSES] = "bpmp",
#else
[PERF_COUNT_HW_BRANCH_MISSES] = "bpfail", /* NP-T, PT-NT, PNT-T */ [PERF_COUNT_HW_BRANCH_MISSES] = "bpfail", /* NP-T, PT-NT, PNT-T */
#endif
[PERF_COUNT_ARC_LDC] = "imemrdc", /* Instr: mem read cached */ [PERF_COUNT_ARC_LDC] = "imemrdc", /* Instr: mem read cached */
[PERF_COUNT_ARC_STC] = "imemwrc", /* Instr: mem write cached */ [PERF_COUNT_ARC_STC] = "imemwrc", /* Instr: mem write cached */
......
...@@ -57,13 +57,8 @@ VECTOR handle_interrupt ; (23) End of fixed IRQs ...@@ -57,13 +57,8 @@ VECTOR handle_interrupt ; (23) End of fixed IRQs
.section .text, "ax",@progbits .section .text, "ax",@progbits
res_service: ; processor restart reserved:
flag 0x1 ; not implemented flag 1 ; Unexpected event, halt
nop
nop
reserved: ; processor restart
rtie ; jump to processor initializations
;##################### Interrupt Handling ############################## ;##################### Interrupt Handling ##############################
......
...@@ -42,7 +42,7 @@ ENTRY(ret_from_fork) ...@@ -42,7 +42,7 @@ ENTRY(ret_from_fork)
; when the forked child comes here from the __switch_to function ; when the forked child comes here from the __switch_to function
; r0 has the last task pointer. ; r0 has the last task pointer.
; put last task in scheduler queue ; put last task in scheduler queue
bl @schedule_tail jl @schedule_tail
ld r9, [sp, PT_status32] ld r9, [sp, PT_status32]
brne r9, 0, 1f brne r9, 0, 1f
...@@ -320,7 +320,7 @@ resume_user_mode_begin: ...@@ -320,7 +320,7 @@ resume_user_mode_begin:
; --- (Slow Path #1) task preemption --- ; --- (Slow Path #1) task preemption ---
bbit0 r9, TIF_NEED_RESCHED, .Lchk_pend_signals bbit0 r9, TIF_NEED_RESCHED, .Lchk_pend_signals
mov blink, resume_user_mode_begin ; tail-call to U mode ret chks mov blink, resume_user_mode_begin ; tail-call to U mode ret chks
b @schedule ; BTST+Bnz causes relo error in link j @schedule ; BTST+Bnz causes relo error in link
.Lchk_pend_signals: .Lchk_pend_signals:
IRQ_ENABLE r10 IRQ_ENABLE r10
...@@ -381,7 +381,7 @@ resume_kernel_mode: ...@@ -381,7 +381,7 @@ resume_kernel_mode:
bbit0 r9, TIF_NEED_RESCHED, .Lrestore_regs bbit0 r9, TIF_NEED_RESCHED, .Lrestore_regs
; Invoke PREEMPTION ; Invoke PREEMPTION
bl preempt_schedule_irq jl preempt_schedule_irq
; preempt_schedule_irq() always returns with IRQ disabled ; preempt_schedule_irq() always returns with IRQ disabled
#endif #endif
......
This diff is collapsed.
...@@ -65,7 +65,7 @@ asmlinkage void ret_from_fork(void); ...@@ -65,7 +65,7 @@ asmlinkage void ret_from_fork(void);
* ------------------ * ------------------
* | r25 | <==== top of Stack (thread.ksp) * | r25 | <==== top of Stack (thread.ksp)
* ~ ~ * ~ ~
* | --to-- | (CALLEE Regs of user mode) * | --to-- | (CALLEE Regs of kernel mode)
* | r13 | * | r13 |
* ------------------ * ------------------
* | fp | * | fp |
......
...@@ -34,7 +34,7 @@ ...@@ -34,7 +34,7 @@
" .section .fixup,\"ax\"\n" \ " .section .fixup,\"ax\"\n" \
" .align 4\n" \ " .align 4\n" \
"3: mov %0, 1\n" \ "3: mov %0, 1\n" \
" b 2b\n" \ " j 2b\n" \
" .previous\n" \ " .previous\n" \
" .section __ex_table,\"a\"\n" \ " .section __ex_table,\"a\"\n" \
" .align 4\n" \ " .align 4\n" \
...@@ -82,7 +82,7 @@ ...@@ -82,7 +82,7 @@
" .section .fixup,\"ax\"\n" \ " .section .fixup,\"ax\"\n" \
" .align 4\n" \ " .align 4\n" \
"4: mov %0, 1\n" \ "4: mov %0, 1\n" \
" b 3b\n" \ " j 3b\n" \
" .previous\n" \ " .previous\n" \
" .section __ex_table,\"a\"\n" \ " .section __ex_table,\"a\"\n" \
" .align 4\n" \ " .align 4\n" \
...@@ -113,7 +113,7 @@ ...@@ -113,7 +113,7 @@
" .section .fixup,\"ax\"\n" \ " .section .fixup,\"ax\"\n" \
" .align 4\n" \ " .align 4\n" \
"6: mov %0, 1\n" \ "6: mov %0, 1\n" \
" b 5b\n" \ " j 5b\n" \
" .previous\n" \ " .previous\n" \
" .section __ex_table,\"a\"\n" \ " .section __ex_table,\"a\"\n" \
" .align 4\n" \ " .align 4\n" \
......
...@@ -22,15 +22,22 @@ ...@@ -22,15 +22,22 @@
#include <asm/setup.h> #include <asm/setup.h>
static int l2_line_sz; static int l2_line_sz;
int ioc_exists;
volatile int slc_enable = 1, ioc_enable = 1;
void (*_cache_line_loop_ic_fn)(unsigned long paddr, unsigned long vaddr, void (*_cache_line_loop_ic_fn)(unsigned long paddr, unsigned long vaddr,
unsigned long sz, const int cacheop); unsigned long sz, const int cacheop);
void (*__dma_cache_wback_inv)(unsigned long start, unsigned long sz);
void (*__dma_cache_inv)(unsigned long start, unsigned long sz);
void (*__dma_cache_wback)(unsigned long start, unsigned long sz);
char *arc_cache_mumbojumbo(int c, char *buf, int len) char *arc_cache_mumbojumbo(int c, char *buf, int len)
{ {
int n = 0; int n = 0;
struct cpuinfo_arc_cache *p; struct cpuinfo_arc_cache *p;
#define IS_USED_RUN(v) ((v) ? "" : "(disabled) ")
#define PR_CACHE(p, cfg, str) \ #define PR_CACHE(p, cfg, str) \
if (!(p)->ver) \ if (!(p)->ver) \
n += scnprintf(buf + n, len - n, str"\t\t: N/A\n"); \ n += scnprintf(buf + n, len - n, str"\t\t: N/A\n"); \
...@@ -45,10 +52,18 @@ char *arc_cache_mumbojumbo(int c, char *buf, int len) ...@@ -45,10 +52,18 @@ char *arc_cache_mumbojumbo(int c, char *buf, int len)
PR_CACHE(&cpuinfo_arc700[c].icache, CONFIG_ARC_HAS_ICACHE, "I-Cache"); PR_CACHE(&cpuinfo_arc700[c].icache, CONFIG_ARC_HAS_ICACHE, "I-Cache");
PR_CACHE(&cpuinfo_arc700[c].dcache, CONFIG_ARC_HAS_DCACHE, "D-Cache"); PR_CACHE(&cpuinfo_arc700[c].dcache, CONFIG_ARC_HAS_DCACHE, "D-Cache");
if (!is_isa_arcv2())
return buf;
p = &cpuinfo_arc700[c].slc; p = &cpuinfo_arc700[c].slc;
if (p->ver) if (p->ver)
n += scnprintf(buf + n, len - n, n += scnprintf(buf + n, len - n,
"SLC\t\t: %uK, %uB Line\n", p->sz_k, p->line_len); "SLC\t\t: %uK, %uB Line%s\n",
p->sz_k, p->line_len, IS_USED_RUN(slc_enable));
if (ioc_exists)
n += scnprintf(buf + n, len - n, "IOC\t\t:%s\n",
IS_USED_RUN(ioc_enable));
return buf; return buf;
} }
...@@ -58,18 +73,9 @@ char *arc_cache_mumbojumbo(int c, char *buf, int len) ...@@ -58,18 +73,9 @@ char *arc_cache_mumbojumbo(int c, char *buf, int len)
* the cpuinfo structure for later use. * the cpuinfo structure for later use.
* No Validation done here, simply read/convert the BCRs * No Validation done here, simply read/convert the BCRs
*/ */
void read_decode_cache_bcr(void) static void read_decode_cache_bcr_arcv2(int cpu)
{ {
struct cpuinfo_arc_cache *p_ic, *p_dc, *p_slc; struct cpuinfo_arc_cache *p_slc = &cpuinfo_arc700[cpu].slc;
unsigned int cpu = smp_processor_id();
struct bcr_cache {
#ifdef CONFIG_CPU_BIG_ENDIAN
unsigned int pad:12, line_len:4, sz:4, config:4, ver:8;
#else
unsigned int ver:8, config:4, sz:4, line_len:4, pad:12;
#endif
} ibcr, dbcr;
struct bcr_generic sbcr; struct bcr_generic sbcr;
struct bcr_slc_cfg { struct bcr_slc_cfg {
...@@ -80,6 +86,39 @@ void read_decode_cache_bcr(void) ...@@ -80,6 +86,39 @@ void read_decode_cache_bcr(void)
#endif #endif
} slc_cfg; } slc_cfg;
struct bcr_clust_cfg {
#ifdef CONFIG_CPU_BIG_ENDIAN
unsigned int pad:7, c:1, num_entries:8, num_cores:8, ver:8;
#else
unsigned int ver:8, num_cores:8, num_entries:8, c:1, pad:7;
#endif
} cbcr;
READ_BCR(ARC_REG_SLC_BCR, sbcr);
if (sbcr.ver) {
READ_BCR(ARC_REG_SLC_CFG, slc_cfg);
p_slc->ver = sbcr.ver;
p_slc->sz_k = 128 << slc_cfg.sz;
l2_line_sz = p_slc->line_len = (slc_cfg.lsz == 0) ? 128 : 64;
}
READ_BCR(ARC_REG_CLUSTER_BCR, cbcr);
if (cbcr.c && ioc_enable)
ioc_exists = 1;
}
void read_decode_cache_bcr(void)
{
struct cpuinfo_arc_cache *p_ic, *p_dc;
unsigned int cpu = smp_processor_id();
struct bcr_cache {
#ifdef CONFIG_CPU_BIG_ENDIAN
unsigned int pad:12, line_len:4, sz:4, config:4, ver:8;
#else
unsigned int ver:8, config:4, sz:4, line_len:4, pad:12;
#endif
} ibcr, dbcr;
p_ic = &cpuinfo_arc700[cpu].icache; p_ic = &cpuinfo_arc700[cpu].icache;
READ_BCR(ARC_REG_IC_BCR, ibcr); READ_BCR(ARC_REG_IC_BCR, ibcr);
...@@ -122,17 +161,8 @@ void read_decode_cache_bcr(void) ...@@ -122,17 +161,8 @@ void read_decode_cache_bcr(void)
p_dc->ver = dbcr.ver; p_dc->ver = dbcr.ver;
slc_chk: slc_chk:
if (!is_isa_arcv2()) if (is_isa_arcv2())
return; read_decode_cache_bcr_arcv2(cpu);
p_slc = &cpuinfo_arc700[cpu].slc;
READ_BCR(ARC_REG_SLC_BCR, sbcr);
if (sbcr.ver) {
READ_BCR(ARC_REG_SLC_CFG, slc_cfg);
p_slc->ver = sbcr.ver;
p_slc->sz_k = 128 << slc_cfg.sz;
l2_line_sz = p_slc->line_len = (slc_cfg.lsz == 0) ? 128 : 64;
}
} }
/* /*
...@@ -516,11 +546,6 @@ noinline void slc_op(unsigned long paddr, unsigned long sz, const int op) ...@@ -516,11 +546,6 @@ noinline void slc_op(unsigned long paddr, unsigned long sz, const int op)
#endif #endif
} }
static inline int need_slc_flush(void)
{
return is_isa_arcv2() && l2_line_sz;
}
/*********************************************************** /***********************************************************
* Exported APIs * Exported APIs
*/ */
...@@ -569,31 +594,75 @@ void flush_dcache_page(struct page *page) ...@@ -569,31 +594,75 @@ void flush_dcache_page(struct page *page)
} }
EXPORT_SYMBOL(flush_dcache_page); EXPORT_SYMBOL(flush_dcache_page);
void dma_cache_wback_inv(unsigned long start, unsigned long sz) /*
* DMA ops for systems with L1 cache only
* Make memory coherent with L1 cache by flushing/invalidating L1 lines
*/
static void __dma_cache_wback_inv_l1(unsigned long start, unsigned long sz)
{ {
__dc_line_op_k(start, sz, OP_FLUSH_N_INV); __dc_line_op_k(start, sz, OP_FLUSH_N_INV);
}
static void __dma_cache_inv_l1(unsigned long start, unsigned long sz)
{
__dc_line_op_k(start, sz, OP_INV);
}
static void __dma_cache_wback_l1(unsigned long start, unsigned long sz)
{
__dc_line_op_k(start, sz, OP_FLUSH);
}
if (need_slc_flush()) /*
* DMA ops for systems with both L1 and L2 caches, but without IOC
* Both L1 and L2 lines need to be explicity flushed/invalidated
*/
static void __dma_cache_wback_inv_slc(unsigned long start, unsigned long sz)
{
__dc_line_op_k(start, sz, OP_FLUSH_N_INV);
slc_op(start, sz, OP_FLUSH_N_INV); slc_op(start, sz, OP_FLUSH_N_INV);
} }
EXPORT_SYMBOL(dma_cache_wback_inv);
void dma_cache_inv(unsigned long start, unsigned long sz) static void __dma_cache_inv_slc(unsigned long start, unsigned long sz)
{ {
__dc_line_op_k(start, sz, OP_INV); __dc_line_op_k(start, sz, OP_INV);
if (need_slc_flush())
slc_op(start, sz, OP_INV); slc_op(start, sz, OP_INV);
} }
EXPORT_SYMBOL(dma_cache_inv);
void dma_cache_wback(unsigned long start, unsigned long sz) static void __dma_cache_wback_slc(unsigned long start, unsigned long sz)
{ {
__dc_line_op_k(start, sz, OP_FLUSH); __dc_line_op_k(start, sz, OP_FLUSH);
if (need_slc_flush())
slc_op(start, sz, OP_FLUSH); slc_op(start, sz, OP_FLUSH);
} }
/*
* DMA ops for systems with IOC
* IOC hardware snoops all DMA traffic keeping the caches consistent with
* memory - eliding need for any explicit cache maintenance of DMA buffers
*/
static void __dma_cache_wback_inv_ioc(unsigned long start, unsigned long sz) {}
static void __dma_cache_inv_ioc(unsigned long start, unsigned long sz) {}
static void __dma_cache_wback_ioc(unsigned long start, unsigned long sz) {}
/*
* Exported DMA API
*/
void dma_cache_wback_inv(unsigned long start, unsigned long sz)
{
__dma_cache_wback_inv(start, sz);
}
EXPORT_SYMBOL(dma_cache_wback_inv);
void dma_cache_inv(unsigned long start, unsigned long sz)
{
__dma_cache_inv(start, sz);
}
EXPORT_SYMBOL(dma_cache_inv);
void dma_cache_wback(unsigned long start, unsigned long sz)
{
__dma_cache_wback(start, sz);
}
EXPORT_SYMBOL(dma_cache_wback); EXPORT_SYMBOL(dma_cache_wback);
/* /*
...@@ -848,4 +917,41 @@ void arc_cache_init(void) ...@@ -848,4 +917,41 @@ void arc_cache_init(void)
panic("Disable CONFIG_ARC_CACHE_VIPT_ALIASING\n"); panic("Disable CONFIG_ARC_CACHE_VIPT_ALIASING\n");
} }
} }
if (is_isa_arcv2() && l2_line_sz && !slc_enable) {
/* IM set : flush before invalidate */
write_aux_reg(ARC_REG_SLC_CTRL,
read_aux_reg(ARC_REG_SLC_CTRL) | SLC_CTRL_IM);
write_aux_reg(ARC_REG_SLC_INVALIDATE, 1);
/* Important to wait for flush to complete */
while (read_aux_reg(ARC_REG_SLC_CTRL) & SLC_CTRL_BUSY);
write_aux_reg(ARC_REG_SLC_CTRL,
read_aux_reg(ARC_REG_SLC_CTRL) | SLC_CTRL_DISABLE);
}
if (is_isa_arcv2() && ioc_exists) {
/* IO coherency base - 0x8z */
write_aux_reg(ARC_REG_IO_COH_AP0_BASE, 0x80000);
/* IO coherency aperture size - 512Mb: 0x8z-0xAz */
write_aux_reg(ARC_REG_IO_COH_AP0_SIZE, 0x11);
/* Enable partial writes */
write_aux_reg(ARC_REG_IO_COH_PARTIAL, 1);
/* Enable IO coherency */
write_aux_reg(ARC_REG_IO_COH_ENABLE, 1);
__dma_cache_wback_inv = __dma_cache_wback_inv_ioc;
__dma_cache_inv = __dma_cache_inv_ioc;
__dma_cache_wback = __dma_cache_wback_ioc;
} else if (is_isa_arcv2() && l2_line_sz && slc_enable) {
__dma_cache_wback_inv = __dma_cache_wback_inv_slc;
__dma_cache_inv = __dma_cache_inv_slc;
__dma_cache_wback = __dma_cache_wback_slc;
} else {
__dma_cache_wback_inv = __dma_cache_wback_inv_l1;
__dma_cache_inv = __dma_cache_inv_l1;
__dma_cache_wback = __dma_cache_wback_l1;
}
} }
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include <linux/dma-mapping.h> #include <linux/dma-mapping.h>
#include <linux/dma-debug.h> #include <linux/dma-debug.h>
#include <linux/export.h> #include <linux/export.h>
#include <asm/cache.h>
#include <asm/cacheflush.h> #include <asm/cacheflush.h>
/* /*
...@@ -53,6 +54,20 @@ void *dma_alloc_coherent(struct device *dev, size_t size, ...@@ -53,6 +54,20 @@ void *dma_alloc_coherent(struct device *dev, size_t size,
{ {
void *paddr, *kvaddr; void *paddr, *kvaddr;
/*
* IOC relies on all data (even coherent DMA data) being in cache
* Thus allocate normal cached memory
*
* The gains with IOC are two pronged:
* -For streaming data, elides needs for cache maintenance, saving
* cycles in flush code, and bus bandwidth as all the lines of a
* buffer need to be flushed out to memory
* -For coherent data, Read/Write to buffers terminate early in cache
* (vs. always going to memory - thus are faster)
*/
if (is_isa_arcv2() && ioc_exists)
return dma_alloc_noncoherent(dev, size, dma_handle, gfp);
/* This is linear addr (0x8000_0000 based) */ /* This is linear addr (0x8000_0000 based) */
paddr = alloc_pages_exact(size, gfp); paddr = alloc_pages_exact(size, gfp);
if (!paddr) if (!paddr)
...@@ -85,6 +100,9 @@ EXPORT_SYMBOL(dma_alloc_coherent); ...@@ -85,6 +100,9 @@ EXPORT_SYMBOL(dma_alloc_coherent);
void dma_free_coherent(struct device *dev, size_t size, void *kvaddr, void dma_free_coherent(struct device *dev, size_t size, void *kvaddr,
dma_addr_t dma_handle) dma_addr_t dma_handle)
{ {
if (is_isa_arcv2() && ioc_exists)
return dma_free_noncoherent(dev, size, kvaddr, dma_handle);
iounmap((void __force __iomem *)kvaddr); iounmap((void __force __iomem *)kvaddr);
free_pages_exact((void *)dma_handle, size); free_pages_exact((void *)dma_handle, size);
......
...@@ -46,7 +46,7 @@ static void __init axs10x_enable_gpio_intc_wire(void) ...@@ -46,7 +46,7 @@ static void __init axs10x_enable_gpio_intc_wire(void)
* ------------------- ------------------- * ------------------- -------------------
* | snps,dw-apb-gpio | | snps,dw-apb-gpio | * | snps,dw-apb-gpio | | snps,dw-apb-gpio |
* ------------------- ------------------- * ------------------- -------------------
* | | * | #12 |
* | [ Debug UART on cpu card ] * | [ Debug UART on cpu card ]
* | * |
* ------------------------ * ------------------------
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment