Commit e07af262 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'arc-5.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/vgupta/arc

Pull ARC updates from Vineet Gupta:
 "Finally a big pile of changes for ARC (atomics/mm). These are from our
  internal arc64 tree, preparing mainline for eventual arc64 support.
  I'm spreading them out to avoid tsunami of patches in one release.

   - MM rework:
       - Implement up to 4 paging levels
       - Enable STRICT_MM_TYPECHECK
       - switch pgtable_t back to 'struct page *'

   - Atomics rework / implement relaxed accessors

   - Retire legacy MMUv1,v2; ARC750 cores

   - A few other build errors, typos"

* tag 'arc-5.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/vgupta/arc: (33 commits)
  ARC: mm: vmalloc sync from kernel to user table to update PMD ...
  ARC: mm: support 4 levels of page tables
  ARC: mm: support 3 levels of page tables
  ARC: mm: switch to asm-generic/pgalloc.h
  ARC: mm: switch pgtable_t back to struct page *
  ARC: mm: hack to allow 2 level build with 4 level code
  ARC: mm: disintegrate pgtable.h into levels and flags
  ARC: mm: disintegrate mmu.h (arcv2 bits out)
  ARC: mm: move MMU specific bits out of entry code ...
  ARC: mm: move MMU specific bits out of ASID allocator
  ARC: mm: non-functional code movement/cleanup
  ARC: mm: pmd_populate* to use the canonical set_pmd (and drop pmd_set)
  ARC: ioremap: use more commonly used PAGE_KERNEL based uncached flag
  ARC: mm: Enable STRICT_MM_TYPECHECKS
  ARC: mm: Fixes to allow STRICT_MM_TYPECHECKS
  ARC: mm: move mmu/cache externs out to setup.h
  ARC: mm: remove tlb paranoid code
  ARC: mm: use SCRATCH_DATA0 register for caching pgdir in ARCv2 only
  ARC: retire MMUv1 and MMUv2 support
  ARC: retire ARC750 support
  ...
parents 063df71a 56809a28
...@@ -116,16 +116,9 @@ choice ...@@ -116,16 +116,9 @@ choice
default ARC_CPU_770 if ISA_ARCOMPACT default ARC_CPU_770 if ISA_ARCOMPACT
default ARC_CPU_HS if ISA_ARCV2 default ARC_CPU_HS if ISA_ARCV2
if ISA_ARCOMPACT
config ARC_CPU_750D
bool "ARC750D"
select ARC_CANT_LLSC
help
Support for ARC750 core
config ARC_CPU_770 config ARC_CPU_770
bool "ARC770" bool "ARC770"
depends on ISA_ARCOMPACT
select ARC_HAS_SWAPE select ARC_HAS_SWAPE
help help
Support for ARC770 core introduced with Rel 4.10 (Summer 2011) Support for ARC770 core introduced with Rel 4.10 (Summer 2011)
...@@ -135,8 +128,6 @@ config ARC_CPU_770 ...@@ -135,8 +128,6 @@ config ARC_CPU_770
-Caches: New Prog Model, Region Flush -Caches: New Prog Model, Region Flush
-Insns: endian swap, load-locked/store-conditional, time-stamp-ctr -Insns: endian swap, load-locked/store-conditional, time-stamp-ctr
endif #ISA_ARCOMPACT
config ARC_CPU_HS config ARC_CPU_HS
bool "ARC-HS" bool "ARC-HS"
depends on ISA_ARCV2 depends on ISA_ARCV2
...@@ -274,33 +265,17 @@ config ARC_DCCM_BASE ...@@ -274,33 +265,17 @@ config ARC_DCCM_BASE
choice choice
prompt "MMU Version" prompt "MMU Version"
default ARC_MMU_V3 if ARC_CPU_770 default ARC_MMU_V3 if ISA_ARCOMPACT
default ARC_MMU_V2 if ARC_CPU_750D default ARC_MMU_V4 if ISA_ARCV2
default ARC_MMU_V4 if ARC_CPU_HS
if ISA_ARCOMPACT
config ARC_MMU_V1
bool "MMU v1"
help
Orig ARC700 MMU
config ARC_MMU_V2
bool "MMU v2"
help
Fixed the deficiency of v1 - possible thrashing in memcpy scenario
when 2 D-TLB and 1 I-TLB entries index into same 2way set.
config ARC_MMU_V3 config ARC_MMU_V3
bool "MMU v3" bool "MMU v3"
depends on ARC_CPU_770 depends on ISA_ARCOMPACT
help help
Introduced with ARC700 4.10: New Features Introduced with ARC700 4.10: New Features
Variable Page size (1k-16k), var JTLB size 128 x (2 or 4) Variable Page size (1k-16k), var JTLB size 128 x (2 or 4)
Shared Address Spaces (SASID) Shared Address Spaces (SASID)
endif
config ARC_MMU_V4 config ARC_MMU_V4
bool "MMU v4" bool "MMU v4"
depends on ISA_ARCV2 depends on ISA_ARCV2
...@@ -319,7 +294,6 @@ config ARC_PAGE_SIZE_8K ...@@ -319,7 +294,6 @@ config ARC_PAGE_SIZE_8K
config ARC_PAGE_SIZE_16K config ARC_PAGE_SIZE_16K
bool "16KB" bool "16KB"
depends on ARC_MMU_V3 || ARC_MMU_V4
config ARC_PAGE_SIZE_4K config ARC_PAGE_SIZE_4K
bool "4KB" bool "4KB"
...@@ -340,6 +314,10 @@ config ARC_HUGEPAGE_16M ...@@ -340,6 +314,10 @@ config ARC_HUGEPAGE_16M
endchoice endchoice
config PGTABLE_LEVELS
int "Number of Page table levels"
default 2
config ARC_COMPACT_IRQ_LEVELS config ARC_COMPACT_IRQ_LEVELS
depends on ISA_ARCOMPACT depends on ISA_ARCOMPACT
bool "Setup Timer IRQ as high Priority" bool "Setup Timer IRQ as high Priority"
...@@ -563,9 +541,6 @@ config ARC_DW2_UNWIND ...@@ -563,9 +541,6 @@ config ARC_DW2_UNWIND
If you don't debug the kernel, you can say N, but we may not be able If you don't debug the kernel, you can say N, but we may not be able
to solve problems without frame unwind information to solve problems without frame unwind information
config ARC_DBG_TLB_PARANOIA
bool "Paranoia Checks in Low Level TLB Handlers"
config ARC_DBG_JUMP_LABEL config ARC_DBG_JUMP_LABEL
bool "Paranoid checks in Static Keys (jump labels) code" bool "Paranoid checks in Static Keys (jump labels) code"
depends on JUMP_LABEL depends on JUMP_LABEL
......
/* SPDX-License-Identifier: GPL-2.0-only */
#ifndef _ASM_ARC_ATOMIC_LLSC_H
#define _ASM_ARC_ATOMIC_LLSC_H
#define arch_atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
#define ATOMIC_OP(op, c_op, asm_op) \
static inline void arch_atomic_##op(int i, atomic_t *v) \
{ \
unsigned int val; \
\
__asm__ __volatile__( \
"1: llock %[val], [%[ctr]] \n" \
" " #asm_op " %[val], %[val], %[i] \n" \
" scond %[val], [%[ctr]] \n" \
" bnz 1b \n" \
: [val] "=&r" (val) /* Early clobber to prevent reg reuse */ \
: [ctr] "r" (&v->counter), /* Not "m": llock only supports reg direct addr mode */ \
[i] "ir" (i) \
: "cc"); \
} \
#define ATOMIC_OP_RETURN(op, c_op, asm_op) \
static inline int arch_atomic_##op##_return_relaxed(int i, atomic_t *v) \
{ \
unsigned int val; \
\
__asm__ __volatile__( \
"1: llock %[val], [%[ctr]] \n" \
" " #asm_op " %[val], %[val], %[i] \n" \
" scond %[val], [%[ctr]] \n" \
" bnz 1b \n" \
: [val] "=&r" (val) \
: [ctr] "r" (&v->counter), \
[i] "ir" (i) \
: "cc"); \
\
return val; \
}
#define arch_atomic_add_return_relaxed arch_atomic_add_return_relaxed
#define arch_atomic_sub_return_relaxed arch_atomic_sub_return_relaxed
#define ATOMIC_FETCH_OP(op, c_op, asm_op) \
static inline int arch_atomic_fetch_##op##_relaxed(int i, atomic_t *v) \
{ \
unsigned int val, orig; \
\
__asm__ __volatile__( \
"1: llock %[orig], [%[ctr]] \n" \
" " #asm_op " %[val], %[orig], %[i] \n" \
" scond %[val], [%[ctr]] \n" \
" bnz 1b \n" \
: [val] "=&r" (val), \
[orig] "=&r" (orig) \
: [ctr] "r" (&v->counter), \
[i] "ir" (i) \
: "cc"); \
\
return orig; \
}
#define arch_atomic_fetch_add_relaxed arch_atomic_fetch_add_relaxed
#define arch_atomic_fetch_sub_relaxed arch_atomic_fetch_sub_relaxed
#define arch_atomic_fetch_and_relaxed arch_atomic_fetch_and_relaxed
#define arch_atomic_fetch_andnot_relaxed arch_atomic_fetch_andnot_relaxed
#define arch_atomic_fetch_or_relaxed arch_atomic_fetch_or_relaxed
#define arch_atomic_fetch_xor_relaxed arch_atomic_fetch_xor_relaxed
#define ATOMIC_OPS(op, c_op, asm_op) \
ATOMIC_OP(op, c_op, asm_op) \
ATOMIC_OP_RETURN(op, c_op, asm_op) \
ATOMIC_FETCH_OP(op, c_op, asm_op)
ATOMIC_OPS(add, +=, add)
ATOMIC_OPS(sub, -=, sub)
#undef ATOMIC_OPS
#define ATOMIC_OPS(op, c_op, asm_op) \
ATOMIC_OP(op, c_op, asm_op) \
ATOMIC_FETCH_OP(op, c_op, asm_op)
ATOMIC_OPS(and, &=, and)
ATOMIC_OPS(andnot, &= ~, bic)
ATOMIC_OPS(or, |=, or)
ATOMIC_OPS(xor, ^=, xor)
#define arch_atomic_andnot arch_atomic_andnot
#undef ATOMIC_OPS
#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
#endif
/* SPDX-License-Identifier: GPL-2.0-only */
#ifndef _ASM_ARC_ATOMIC_SPLOCK_H
#define _ASM_ARC_ATOMIC_SPLOCK_H
/*
* Non hardware assisted Atomic-R-M-W
* Locking would change to irq-disabling only (UP) and spinlocks (SMP)
*/
static inline void arch_atomic_set(atomic_t *v, int i)
{
/*
* Independent of hardware support, all of the atomic_xxx() APIs need
* to follow the same locking rules to make sure that a "hardware"
* atomic insn (e.g. LD) doesn't clobber an "emulated" atomic insn
* sequence
*
* Thus atomic_set() despite being 1 insn (and seemingly atomic)
* requires the locking.
*/
unsigned long flags;
atomic_ops_lock(flags);
WRITE_ONCE(v->counter, i);
atomic_ops_unlock(flags);
}
#define arch_atomic_set_release(v, i) arch_atomic_set((v), (i))
#define ATOMIC_OP(op, c_op, asm_op) \
static inline void arch_atomic_##op(int i, atomic_t *v) \
{ \
unsigned long flags; \
\
atomic_ops_lock(flags); \
v->counter c_op i; \
atomic_ops_unlock(flags); \
}
#define ATOMIC_OP_RETURN(op, c_op, asm_op) \
static inline int arch_atomic_##op##_return(int i, atomic_t *v) \
{ \
unsigned long flags; \
unsigned int temp; \
\
/* \
* spin lock/unlock provides the needed smp_mb() before/after \
*/ \
atomic_ops_lock(flags); \
temp = v->counter; \
temp c_op i; \
v->counter = temp; \
atomic_ops_unlock(flags); \
\
return temp; \
}
#define ATOMIC_FETCH_OP(op, c_op, asm_op) \
static inline int arch_atomic_fetch_##op(int i, atomic_t *v) \
{ \
unsigned long flags; \
unsigned int orig; \
\
/* \
* spin lock/unlock provides the needed smp_mb() before/after \
*/ \
atomic_ops_lock(flags); \
orig = v->counter; \
v->counter c_op i; \
atomic_ops_unlock(flags); \
\
return orig; \
}
#define ATOMIC_OPS(op, c_op, asm_op) \
ATOMIC_OP(op, c_op, asm_op) \
ATOMIC_OP_RETURN(op, c_op, asm_op) \
ATOMIC_FETCH_OP(op, c_op, asm_op)
ATOMIC_OPS(add, +=, add)
ATOMIC_OPS(sub, -=, sub)
#undef ATOMIC_OPS
#define ATOMIC_OPS(op, c_op, asm_op) \
ATOMIC_OP(op, c_op, asm_op) \
ATOMIC_FETCH_OP(op, c_op, asm_op)
ATOMIC_OPS(and, &=, and)
ATOMIC_OPS(andnot, &= ~, bic)
ATOMIC_OPS(or, |=, or)
ATOMIC_OPS(xor, ^=, xor)
#define arch_atomic_andnot arch_atomic_andnot
#define arch_atomic_fetch_andnot arch_atomic_fetch_andnot
#undef ATOMIC_OPS
#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
#endif
This diff is collapsed.
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* ARCv2 supports 64-bit exclusive load (LLOCKD) / store (SCONDD)
* - The address HAS to be 64-bit aligned
*/
#ifndef _ASM_ARC_ATOMIC64_ARCV2_H
#define _ASM_ARC_ATOMIC64_ARCV2_H
typedef struct {
s64 __aligned(8) counter;
} atomic64_t;
#define ATOMIC64_INIT(a) { (a) }
static inline s64 arch_atomic64_read(const atomic64_t *v)
{
s64 val;
__asm__ __volatile__(
" ldd %0, [%1] \n"
: "=r"(val)
: "r"(&v->counter));
return val;
}
static inline void arch_atomic64_set(atomic64_t *v, s64 a)
{
/*
* This could have been a simple assignment in "C" but would need
* explicit volatile. Otherwise gcc optimizers could elide the store
* which borked atomic64 self-test
* In the inline asm version, memory clobber needed for exact same
* reason, to tell gcc about the store.
*
* This however is not needed for sibling atomic64_add() etc since both
* load/store are explicitly done in inline asm. As long as API is used
* for each access, gcc has no way to optimize away any load/store
*/
__asm__ __volatile__(
" std %0, [%1] \n"
:
: "r"(a), "r"(&v->counter)
: "memory");
}
#define ATOMIC64_OP(op, op1, op2) \
static inline void arch_atomic64_##op(s64 a, atomic64_t *v) \
{ \
s64 val; \
\
__asm__ __volatile__( \
"1: \n" \
" llockd %0, [%1] \n" \
" " #op1 " %L0, %L0, %L2 \n" \
" " #op2 " %H0, %H0, %H2 \n" \
" scondd %0, [%1] \n" \
" bnz 1b \n" \
: "=&r"(val) \
: "r"(&v->counter), "ir"(a) \
: "cc"); \
} \
#define ATOMIC64_OP_RETURN(op, op1, op2) \
static inline s64 arch_atomic64_##op##_return_relaxed(s64 a, atomic64_t *v) \
{ \
s64 val; \
\
__asm__ __volatile__( \
"1: \n" \
" llockd %0, [%1] \n" \
" " #op1 " %L0, %L0, %L2 \n" \
" " #op2 " %H0, %H0, %H2 \n" \
" scondd %0, [%1] \n" \
" bnz 1b \n" \
: [val] "=&r"(val) \
: "r"(&v->counter), "ir"(a) \
: "cc"); /* memory clobber comes from smp_mb() */ \
\
return val; \
}
#define arch_atomic64_add_return_relaxed arch_atomic64_add_return_relaxed
#define arch_atomic64_sub_return_relaxed arch_atomic64_sub_return_relaxed
#define ATOMIC64_FETCH_OP(op, op1, op2) \
static inline s64 arch_atomic64_fetch_##op##_relaxed(s64 a, atomic64_t *v) \
{ \
s64 val, orig; \
\
__asm__ __volatile__( \
"1: \n" \
" llockd %0, [%2] \n" \
" " #op1 " %L1, %L0, %L3 \n" \
" " #op2 " %H1, %H0, %H3 \n" \
" scondd %1, [%2] \n" \
" bnz 1b \n" \
: "=&r"(orig), "=&r"(val) \
: "r"(&v->counter), "ir"(a) \
: "cc"); /* memory clobber comes from smp_mb() */ \
\
return orig; \
}
#define arch_atomic64_fetch_add_relaxed arch_atomic64_fetch_add_relaxed
#define arch_atomic64_fetch_sub_relaxed arch_atomic64_fetch_sub_relaxed
#define arch_atomic64_fetch_and_relaxed arch_atomic64_fetch_and_relaxed
#define arch_atomic64_fetch_andnot_relaxed arch_atomic64_fetch_andnot_relaxed
#define arch_atomic64_fetch_or_relaxed arch_atomic64_fetch_or_relaxed
#define arch_atomic64_fetch_xor_relaxed arch_atomic64_fetch_xor_relaxed
#define ATOMIC64_OPS(op, op1, op2) \
ATOMIC64_OP(op, op1, op2) \
ATOMIC64_OP_RETURN(op, op1, op2) \
ATOMIC64_FETCH_OP(op, op1, op2)
ATOMIC64_OPS(add, add.f, adc)
ATOMIC64_OPS(sub, sub.f, sbc)
#undef ATOMIC64_OPS
#define ATOMIC64_OPS(op, op1, op2) \
ATOMIC64_OP(op, op1, op2) \
ATOMIC64_FETCH_OP(op, op1, op2)
ATOMIC64_OPS(and, and, and)
ATOMIC64_OPS(andnot, bic, bic)
ATOMIC64_OPS(or, or, or)
ATOMIC64_OPS(xor, xor, xor)
#define arch_atomic64_andnot arch_atomic64_andnot
#undef ATOMIC64_OPS
#undef ATOMIC64_FETCH_OP
#undef ATOMIC64_OP_RETURN
#undef ATOMIC64_OP
static inline s64
arch_atomic64_cmpxchg(atomic64_t *ptr, s64 expected, s64 new)
{
s64 prev;
smp_mb();
__asm__ __volatile__(
"1: llockd %0, [%1] \n"
" brne %L0, %L2, 2f \n"
" brne %H0, %H2, 2f \n"
" scondd %3, [%1] \n"
" bnz 1b \n"
"2: \n"
: "=&r"(prev)
: "r"(ptr), "ir"(expected), "r"(new)
: "cc"); /* memory clobber comes from smp_mb() */
smp_mb();
return prev;
}
static inline s64 arch_atomic64_xchg(atomic64_t *ptr, s64 new)
{
s64 prev;
smp_mb();
__asm__ __volatile__(
"1: llockd %0, [%1] \n"
" scondd %2, [%1] \n"
" bnz 1b \n"
"2: \n"
: "=&r"(prev)
: "r"(ptr), "r"(new)
: "cc"); /* memory clobber comes from smp_mb() */
smp_mb();
return prev;
}
/**
* arch_atomic64_dec_if_positive - decrement by 1 if old value positive
* @v: pointer of type atomic64_t
*
* The function returns the old value of *v minus 1, even if
* the atomic variable, v, was not decremented.
*/
static inline s64 arch_atomic64_dec_if_positive(atomic64_t *v)
{
s64 val;
smp_mb();
__asm__ __volatile__(
"1: llockd %0, [%1] \n"
" sub.f %L0, %L0, 1 # w0 - 1, set C on borrow\n"
" sub.c %H0, %H0, 1 # if C set, w1 - 1\n"
" brlt %H0, 0, 2f \n"
" scondd %0, [%1] \n"
" bnz 1b \n"
"2: \n"
: "=&r"(val)
: "r"(&v->counter)
: "cc"); /* memory clobber comes from smp_mb() */
smp_mb();
return val;
}
#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive
/**
* arch_atomic64_fetch_add_unless - add unless the number is a given value
* @v: pointer of type atomic64_t
* @a: the amount to add to v...
* @u: ...unless v is equal to u.
*
* Atomically adds @a to @v, if it was not @u.
* Returns the old value of @v
*/
static inline s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
{
s64 old, temp;
smp_mb();
__asm__ __volatile__(
"1: llockd %0, [%2] \n"
" brne %L0, %L4, 2f # continue to add since v != u \n"
" breq.d %H0, %H4, 3f # return since v == u \n"
"2: \n"
" add.f %L1, %L0, %L3 \n"
" adc %H1, %H0, %H3 \n"
" scondd %1, [%2] \n"
" bnz 1b \n"
"3: \n"
: "=&r"(old), "=&r" (temp)
: "r"(&v->counter), "r"(a), "r"(u)
: "cc"); /* memory clobber comes from smp_mb() */
smp_mb();
return old;
}
#define arch_atomic64_fetch_add_unless arch_atomic64_fetch_add_unless
#endif
...@@ -14,188 +14,6 @@ ...@@ -14,188 +14,6 @@
#include <linux/types.h> #include <linux/types.h>
#include <linux/compiler.h> #include <linux/compiler.h>
#include <asm/barrier.h>
#ifndef CONFIG_ARC_HAS_LLSC
#include <asm/smp.h>
#endif
#ifdef CONFIG_ARC_HAS_LLSC
/*
* Hardware assisted Atomic-R-M-W
*/
#define BIT_OP(op, c_op, asm_op) \
static inline void op##_bit(unsigned long nr, volatile unsigned long *m)\
{ \
unsigned int temp; \
\
m += nr >> 5; \
\
nr &= 0x1f; \
\
__asm__ __volatile__( \
"1: llock %0, [%1] \n" \
" " #asm_op " %0, %0, %2 \n" \
" scond %0, [%1] \n" \
" bnz 1b \n" \
: "=&r"(temp) /* Early clobber, to prevent reg reuse */ \
: "r"(m), /* Not "m": llock only supports reg direct addr mode */ \
"ir"(nr) \
: "cc"); \
}
/*
* Semantically:
* Test the bit
* if clear
* set it and return 0 (old value)
* else
* return 1 (old value).
*
* Since ARC lacks a equivalent h/w primitive, the bit is set unconditionally
* and the old value of bit is returned
*/
#define TEST_N_BIT_OP(op, c_op, asm_op) \
static inline int test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\
{ \
unsigned long old, temp; \
\
m += nr >> 5; \
\
nr &= 0x1f; \
\
/* \
* Explicit full memory barrier needed before/after as \
* LLOCK/SCOND themselves don't provide any such smenatic \
*/ \
smp_mb(); \
\
__asm__ __volatile__( \
"1: llock %0, [%2] \n" \
" " #asm_op " %1, %0, %3 \n" \
" scond %1, [%2] \n" \
" bnz 1b \n" \
: "=&r"(old), "=&r"(temp) \
: "r"(m), "ir"(nr) \
: "cc"); \
\
smp_mb(); \
\
return (old & (1 << nr)) != 0; \
}
#else /* !CONFIG_ARC_HAS_LLSC */
/*
* Non hardware assisted Atomic-R-M-W
* Locking would change to irq-disabling only (UP) and spinlocks (SMP)
*
* There's "significant" micro-optimization in writing our own variants of
* bitops (over generic variants)
*
* (1) The generic APIs have "signed" @nr while we have it "unsigned"
* This avoids extra code to be generated for pointer arithmatic, since
* is "not sure" that index is NOT -ve
* (2) Utilize the fact that ARCompact bit fidding insn (BSET/BCLR/ASL) etc
* only consider bottom 5 bits of @nr, so NO need to mask them off.
* (GCC Quirk: however for constant @nr we still need to do the masking
* at compile time)
*/
#define BIT_OP(op, c_op, asm_op) \
static inline void op##_bit(unsigned long nr, volatile unsigned long *m)\
{ \
unsigned long temp, flags; \
m += nr >> 5; \
\
/* \
* spin lock/unlock provide the needed smp_mb() before/after \
*/ \
bitops_lock(flags); \
\
temp = *m; \
*m = temp c_op (1UL << (nr & 0x1f)); \
\
bitops_unlock(flags); \
}
#define TEST_N_BIT_OP(op, c_op, asm_op) \
static inline int test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\
{ \
unsigned long old, flags; \
m += nr >> 5; \
\
bitops_lock(flags); \
\
old = *m; \
*m = old c_op (1UL << (nr & 0x1f)); \
\
bitops_unlock(flags); \
\
return (old & (1UL << (nr & 0x1f))) != 0; \
}
#endif
/***************************************
* Non atomic variants
**************************************/
#define __BIT_OP(op, c_op, asm_op) \
static inline void __##op##_bit(unsigned long nr, volatile unsigned long *m) \
{ \
unsigned long temp; \
m += nr >> 5; \
\
temp = *m; \
*m = temp c_op (1UL << (nr & 0x1f)); \
}
#define __TEST_N_BIT_OP(op, c_op, asm_op) \
static inline int __test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\
{ \
unsigned long old; \
m += nr >> 5; \
\
old = *m; \
*m = old c_op (1UL << (nr & 0x1f)); \
\
return (old & (1UL << (nr & 0x1f))) != 0; \
}
#define BIT_OPS(op, c_op, asm_op) \
\
/* set_bit(), clear_bit(), change_bit() */ \
BIT_OP(op, c_op, asm_op) \
\
/* test_and_set_bit(), test_and_clear_bit(), test_and_change_bit() */\
TEST_N_BIT_OP(op, c_op, asm_op) \
\
/* __set_bit(), __clear_bit(), __change_bit() */ \
__BIT_OP(op, c_op, asm_op) \
\
/* __test_and_set_bit(), __test_and_clear_bit(), __test_and_change_bit() */\
__TEST_N_BIT_OP(op, c_op, asm_op)
BIT_OPS(set, |, bset)
BIT_OPS(clear, & ~, bclr)
BIT_OPS(change, ^, bxor)
/*
* This routine doesn't need to be atomic.
*/
static inline int
test_bit(unsigned int nr, const volatile unsigned long *addr)
{
unsigned long mask;
addr += nr >> 5;
mask = 1UL << (nr & 0x1f);
return ((mask & *addr) != 0);
}
#ifdef CONFIG_ISA_ARCOMPACT #ifdef CONFIG_ISA_ARCOMPACT
...@@ -296,7 +114,7 @@ static inline __attribute__ ((const)) unsigned long __ffs(unsigned long word) ...@@ -296,7 +114,7 @@ static inline __attribute__ ((const)) unsigned long __ffs(unsigned long word)
* @result: [1-32] * @result: [1-32]
* fls(1) = 1, fls(0x80000000) = 32, fls(0) = 0 * fls(1) = 1, fls(0x80000000) = 32, fls(0) = 0
*/ */
static inline __attribute__ ((const)) int fls(unsigned long x) static inline __attribute__ ((const)) int fls(unsigned int x)
{ {
int n; int n;
...@@ -323,7 +141,7 @@ static inline __attribute__ ((const)) int __fls(unsigned long x) ...@@ -323,7 +141,7 @@ static inline __attribute__ ((const)) int __fls(unsigned long x)
* ffs = Find First Set in word (LSB to MSB) * ffs = Find First Set in word (LSB to MSB)
* @result: [1-32], 0 if all 0's * @result: [1-32], 0 if all 0's
*/ */
static inline __attribute__ ((const)) int ffs(unsigned long x) static inline __attribute__ ((const)) int ffs(unsigned int x)
{ {
int n; int n;
...@@ -368,6 +186,8 @@ static inline __attribute__ ((const)) unsigned long __ffs(unsigned long x) ...@@ -368,6 +186,8 @@ static inline __attribute__ ((const)) unsigned long __ffs(unsigned long x)
#include <asm-generic/bitops/fls64.h> #include <asm-generic/bitops/fls64.h>
#include <asm-generic/bitops/sched.h> #include <asm-generic/bitops/sched.h>
#include <asm-generic/bitops/lock.h> #include <asm-generic/bitops/lock.h>
#include <asm-generic/bitops/atomic.h>
#include <asm-generic/bitops/non-atomic.h>
#include <asm-generic/bitops/find.h> #include <asm-generic/bitops/find.h>
#include <asm-generic/bitops/le.h> #include <asm-generic/bitops/le.h>
......
...@@ -62,10 +62,6 @@ ...@@ -62,10 +62,6 @@
#define ARCH_SLAB_MINALIGN 8 #define ARCH_SLAB_MINALIGN 8
#endif #endif
extern void arc_cache_init(void);
extern char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len);
extern void read_decode_cache_bcr(void);
extern int ioc_enable; extern int ioc_enable;
extern unsigned long perip_base, perip_end; extern unsigned long perip_base, perip_end;
......
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
#ifndef __ASM_ARC_CMPXCHG_H #ifndef __ASM_ARC_CMPXCHG_H
#define __ASM_ARC_CMPXCHG_H #define __ASM_ARC_CMPXCHG_H
#include <linux/build_bug.h>
#include <linux/types.h> #include <linux/types.h>
#include <asm/barrier.h> #include <asm/barrier.h>
...@@ -13,146 +14,130 @@ ...@@ -13,146 +14,130 @@
#ifdef CONFIG_ARC_HAS_LLSC #ifdef CONFIG_ARC_HAS_LLSC
static inline unsigned long /*
__cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new) * if (*ptr == @old)
{ * *ptr = @new
unsigned long prev; */
#define __cmpxchg(ptr, old, new) \
/* ({ \
* Explicit full memory barrier needed before/after as __typeof__(*(ptr)) _prev; \
* LLOCK/SCOND themselves don't provide any such semantics \
*/ __asm__ __volatile__( \
smp_mb(); "1: llock %0, [%1] \n" \
" brne %0, %2, 2f \n" \
__asm__ __volatile__( " scond %3, [%1] \n" \
"1: llock %0, [%1] \n" " bnz 1b \n" \
" brne %0, %2, 2f \n" "2: \n" \
" scond %3, [%1] \n" : "=&r"(_prev) /* Early clobber prevent reg reuse */ \
" bnz 1b \n" : "r"(ptr), /* Not "m": llock only supports reg */ \
"2: \n" "ir"(old), \
: "=&r"(prev) /* Early clobber, to prevent reg reuse */ "r"(new) /* Not "ir": scond can't take LIMM */ \
: "r"(ptr), /* Not "m": llock only supports reg direct addr mode */ : "cc", \
"ir"(expected), "memory"); /* gcc knows memory is clobbered */ \
"r"(new) /* can't be "ir". scond can't take LIMM for "b" */ \
: "cc", "memory"); /* so that gcc knows memory is being written here */ _prev; \
})
smp_mb();
return prev;
}
#else /* !CONFIG_ARC_HAS_LLSC */
static inline unsigned long
__cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new)
{
unsigned long flags;
int prev;
volatile unsigned long *p = ptr;
/*
* spin lock/unlock provide the needed smp_mb() before/after
*/
atomic_ops_lock(flags);
prev = *p;
if (prev == expected)
*p = new;
atomic_ops_unlock(flags);
return prev;
}
#endif
#define arch_cmpxchg(ptr, o, n) ({ \ #define arch_cmpxchg_relaxed(ptr, old, new) \
(typeof(*(ptr)))__cmpxchg((ptr), \ ({ \
(unsigned long)(o), \ __typeof__(ptr) _p_ = (ptr); \
(unsigned long)(n)); \ __typeof__(*(ptr)) _o_ = (old); \
__typeof__(*(ptr)) _n_ = (new); \
__typeof__(*(ptr)) _prev_; \
\
switch(sizeof((_p_))) { \
case 4: \
_prev_ = __cmpxchg(_p_, _o_, _n_); \
break; \
default: \
BUILD_BUG(); \
} \
_prev_; \
}) })
/* #else
* atomic_cmpxchg is same as cmpxchg
* LLSC: only different in data-type, semantics are exactly same
* !LLSC: cmpxchg() has to use an external lock atomic_ops_lock to guarantee
* semantics, and this lock also happens to be used by atomic_*()
*/
#define arch_atomic_cmpxchg(v, o, n) ((int)arch_cmpxchg(&((v)->counter), (o), (n)))
#define arch_cmpxchg(ptr, old, new) \
({ \
volatile __typeof__(ptr) _p_ = (ptr); \
__typeof__(*(ptr)) _o_ = (old); \
__typeof__(*(ptr)) _n_ = (new); \
__typeof__(*(ptr)) _prev_; \
unsigned long __flags; \
\
BUILD_BUG_ON(sizeof(_p_) != 4); \
\
/* \
* spin lock/unlock provide the needed smp_mb() before/after \
*/ \
atomic_ops_lock(__flags); \
_prev_ = *_p_; \
if (_prev_ == _o_) \
*_p_ = _n_; \
atomic_ops_unlock(__flags); \
_prev_; \
})
#endif
/* /*
* xchg (reg with memory) based on "Native atomic" EX insn * xchg
*/ */
static inline unsigned long __xchg(unsigned long val, volatile void *ptr, #ifdef CONFIG_ARC_HAS_LLSC
int size)
{
extern unsigned long __xchg_bad_pointer(void);
switch (size) {
case 4:
smp_mb();
__asm__ __volatile__(
" ex %0, [%1] \n"
: "+r"(val)
: "r"(ptr)
: "memory");
smp_mb(); #define __xchg(ptr, val) \
({ \
__asm__ __volatile__( \
" ex %0, [%1] \n" /* set new value */ \
: "+r"(val) \
: "r"(ptr) \
: "memory"); \
_val_; /* get old value */ \
})
return val; #define arch_xchg_relaxed(ptr, val) \
} ({ \
return __xchg_bad_pointer(); __typeof__(ptr) _p_ = (ptr); \
} __typeof__(*(ptr)) _val_ = (val); \
\
switch(sizeof(*(_p_))) { \
case 4: \
_val_ = __xchg(_p_, _val_); \
break; \
default: \
BUILD_BUG(); \
} \
_val_; \
})
#define _xchg(ptr, with) ((typeof(*(ptr)))__xchg((unsigned long)(with), (ptr), \ #else /* !CONFIG_ARC_HAS_LLSC */
sizeof(*(ptr))))
/* /*
* xchg() maps directly to ARC EX instruction which guarantees atomicity. * EX instructions is baseline and present in !LLSC too. But in this
* However in !LLSC config, it also needs to be use @atomic_ops_lock spinlock * regime it still needs use @atomic_ops_lock spinlock to allow interop
* due to a subtle reason: * with cmpxchg() which uses spinlock in !LLSC
* - For !LLSC, cmpxchg() needs to use that lock (see above) and there is lot * (llist.h use xchg and cmpxchg on sama data)
* of kernel code which calls xchg()/cmpxchg() on same data (see llist.h)
* Hence xchg() needs to follow same locking rules.
*
* Technically the lock is also needed for UP (boils down to irq save/restore)
* but we can cheat a bit since cmpxchg() atomic_ops_lock() would cause irqs to
* be disabled thus can't possibly be interrupted/preempted/clobbered by xchg()
* Other way around, xchg is one instruction anyways, so can't be interrupted
* as such
*/ */
#if !defined(CONFIG_ARC_HAS_LLSC) && defined(CONFIG_SMP) #define arch_xchg(ptr, val) \
({ \
#define arch_xchg(ptr, with) \ __typeof__(ptr) _p_ = (ptr); \
({ \ __typeof__(*(ptr)) _val_ = (val); \
unsigned long flags; \ \
typeof(*(ptr)) old_val; \ unsigned long __flags; \
\ \
atomic_ops_lock(flags); \ atomic_ops_lock(__flags); \
old_val = _xchg(ptr, with); \ \
atomic_ops_unlock(flags); \ __asm__ __volatile__( \
old_val; \ " ex %0, [%1] \n" \
: "+r"(_val_) \
: "r"(_p_) \
: "memory"); \
\
atomic_ops_unlock(__flags); \
_val_; \
}) })
#else
#define arch_xchg(ptr, with) _xchg(ptr, with)
#endif #endif
/*
* "atomic" variant of xchg()
* REQ: It needs to follow the same serialization rules as other atomic_xxx()
* Since xchg() doesn't always do that, it would seem that following definition
* is incorrect. But here's the rationale:
* SMP : Even xchg() takes the atomic_ops_lock, so OK.
* LLSC: atomic_ops_lock are not relevant at all (even if SMP, since LLSC
* is natively "SMP safe", no serialization required).
* UP : other atomics disable IRQ, so no way a difft ctxt atomic_xchg()
* could clobber them. atomic_xchg() itself would be 1 insn, so it
* can't be clobbered by others. Thus no serialization required when
* atomic_xchg is involved.
*/
#define arch_atomic_xchg(v, new) (arch_xchg(&((v)->counter), new))
#endif #endif
...@@ -126,19 +126,11 @@ ...@@ -126,19 +126,11 @@
* to be saved again on kernel mode stack, as part of pt_regs. * to be saved again on kernel mode stack, as part of pt_regs.
*-------------------------------------------------------------*/ *-------------------------------------------------------------*/
.macro PROLOG_FREEUP_REG reg, mem .macro PROLOG_FREEUP_REG reg, mem
#ifndef ARC_USE_SCRATCH_REG
sr \reg, [ARC_REG_SCRATCH_DATA0]
#else
st \reg, [\mem] st \reg, [\mem]
#endif
.endm .endm
.macro PROLOG_RESTORE_REG reg, mem .macro PROLOG_RESTORE_REG reg, mem
#ifndef ARC_USE_SCRATCH_REG
lr \reg, [ARC_REG_SCRATCH_DATA0]
#else
ld \reg, [\mem] ld \reg, [\mem]
#endif
.endm .endm
/*-------------------------------------------------------------- /*--------------------------------------------------------------
......
...@@ -58,14 +58,6 @@ static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, ...@@ -58,14 +58,6 @@ static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
pmd_t *pmd); pmd_t *pmd);
/* Generic variants assume pgtable_t is struct page *, hence need for these */
#define __HAVE_ARCH_PGTABLE_DEPOSIT
extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
pgtable_t pgtable);
#define __HAVE_ARCH_PGTABLE_WITHDRAW
extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
#define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE #define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
extern void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, extern void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
unsigned long end); unsigned long end);
......
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2004, 2007-2010, 2011-2012, 2019-20 Synopsys, Inc. (www.synopsys.com)
*
* MMUv3 (arc700) / MMUv4 (archs) are software page walked and software managed.
* This file contains the TLB access registers and commands
*/
#ifndef _ASM_ARC_MMU_ARCV2_H
#define _ASM_ARC_MMU_ARCV2_H
/*
* TLB Management regs
*/
#define ARC_REG_MMU_BCR 0x06f
#ifdef CONFIG_ARC_MMU_V3
#define ARC_REG_TLBPD0 0x405
#define ARC_REG_TLBPD1 0x406
#define ARC_REG_TLBPD1HI 0 /* Dummy: allows common code */
#define ARC_REG_TLBINDEX 0x407
#define ARC_REG_TLBCOMMAND 0x408
#define ARC_REG_PID 0x409
#define ARC_REG_SCRATCH_DATA0 0x418
#else
#define ARC_REG_TLBPD0 0x460
#define ARC_REG_TLBPD1 0x461
#define ARC_REG_TLBPD1HI 0x463
#define ARC_REG_TLBINDEX 0x464
#define ARC_REG_TLBCOMMAND 0x465
#define ARC_REG_PID 0x468
#define ARC_REG_SCRATCH_DATA0 0x46c
#endif
/* Bits in MMU PID reg */
#define __TLB_ENABLE (1 << 31)
#define __PROG_ENABLE (1 << 30)
#define MMU_ENABLE (__TLB_ENABLE | __PROG_ENABLE)
/* Bits in TLB Index reg */
#define TLB_LKUP_ERR 0x80000000
#ifdef CONFIG_ARC_MMU_V3
#define TLB_DUP_ERR (TLB_LKUP_ERR | 0x00000001)
#else
#define TLB_DUP_ERR (TLB_LKUP_ERR | 0x40000000)
#endif
/*
* TLB Commands
*/
#define TLBWrite 0x1
#define TLBRead 0x2
#define TLBGetIndex 0x3
#define TLBProbe 0x4
#define TLBWriteNI 0x5 /* write JTLB without inv uTLBs */
#define TLBIVUTLB 0x6 /* explicitly inv uTLBs */
#ifdef CONFIG_ARC_MMU_V4
#define TLBInsertEntry 0x7
#define TLBDeleteEntry 0x8
#endif
/* Masks for actual TLB "PD"s */
#define PTE_BITS_IN_PD0 (_PAGE_GLOBAL | _PAGE_PRESENT | _PAGE_HW_SZ)
#define PTE_BITS_RWX (_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ)
#define PTE_BITS_NON_RWX_IN_PD1 (PAGE_MASK_PHYS | _PAGE_CACHEABLE)
#ifndef __ASSEMBLY__
struct mm_struct;
extern int pae40_exist_but_not_enab(void);
static inline int is_pae40_enabled(void)
{
return IS_ENABLED(CONFIG_ARC_HAS_PAE40);
}
static inline void mmu_setup_asid(struct mm_struct *mm, unsigned long asid)
{
write_aux_reg(ARC_REG_PID, asid | MMU_ENABLE);
}
static inline void mmu_setup_pgd(struct mm_struct *mm, void *pgd)
{
/* PGD cached in MMU reg to avoid 3 mem lookups: task->mm->pgd */
#ifdef CONFIG_ISA_ARCV2
write_aux_reg(ARC_REG_SCRATCH_DATA0, (unsigned int)pgd);
#endif
}
#else
.macro ARC_MMU_REENABLE reg
lr \reg, [ARC_REG_PID]
or \reg, \reg, MMU_ENABLE
sr \reg, [ARC_REG_PID]
.endm
#endif /* !__ASSEMBLY__ */
#endif
...@@ -7,98 +7,15 @@ ...@@ -7,98 +7,15 @@
#define _ASM_ARC_MMU_H #define _ASM_ARC_MMU_H
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
#include <linux/threads.h> /* NR_CPUS */
#endif
#if defined(CONFIG_ARC_MMU_V1)
#define CONFIG_ARC_MMU_VER 1
#elif defined(CONFIG_ARC_MMU_V2)
#define CONFIG_ARC_MMU_VER 2
#elif defined(CONFIG_ARC_MMU_V3)
#define CONFIG_ARC_MMU_VER 3
#elif defined(CONFIG_ARC_MMU_V4)
#define CONFIG_ARC_MMU_VER 4
#endif
/* MMU Management regs */
#define ARC_REG_MMU_BCR 0x06f
#if (CONFIG_ARC_MMU_VER < 4)
#define ARC_REG_TLBPD0 0x405
#define ARC_REG_TLBPD1 0x406
#define ARC_REG_TLBPD1HI 0 /* Dummy: allows code sharing with ARC700 */
#define ARC_REG_TLBINDEX 0x407
#define ARC_REG_TLBCOMMAND 0x408
#define ARC_REG_PID 0x409
#define ARC_REG_SCRATCH_DATA0 0x418
#else
#define ARC_REG_TLBPD0 0x460
#define ARC_REG_TLBPD1 0x461
#define ARC_REG_TLBPD1HI 0x463
#define ARC_REG_TLBINDEX 0x464
#define ARC_REG_TLBCOMMAND 0x465
#define ARC_REG_PID 0x468
#define ARC_REG_SCRATCH_DATA0 0x46c
#endif
#if defined(CONFIG_ISA_ARCV2) || !defined(CONFIG_SMP)
#define ARC_USE_SCRATCH_REG
#endif
/* Bits in MMU PID register */
#define __TLB_ENABLE (1 << 31)
#define __PROG_ENABLE (1 << 30)
#define MMU_ENABLE (__TLB_ENABLE | __PROG_ENABLE)
/* Error code if probe fails */
#define TLB_LKUP_ERR 0x80000000
#if (CONFIG_ARC_MMU_VER < 4)
#define TLB_DUP_ERR (TLB_LKUP_ERR | 0x00000001)
#else
#define TLB_DUP_ERR (TLB_LKUP_ERR | 0x40000000)
#endif
/* TLB Commands */
#define TLBWrite 0x1
#define TLBRead 0x2
#define TLBGetIndex 0x3
#define TLBProbe 0x4
#if (CONFIG_ARC_MMU_VER >= 2)
#define TLBWriteNI 0x5 /* write JTLB without inv uTLBs */
#define TLBIVUTLB 0x6 /* explicitly inv uTLBs */
#else
#define TLBWriteNI TLBWrite /* Not present in hardware, fallback */
#endif
#if (CONFIG_ARC_MMU_VER >= 4)
#define TLBInsertEntry 0x7
#define TLBDeleteEntry 0x8
#endif
#ifndef __ASSEMBLY__ #include <linux/threads.h> /* NR_CPUS */
typedef struct { typedef struct {
unsigned long asid[NR_CPUS]; /* 8 bit MMU PID + Generation cycle */ unsigned long asid[NR_CPUS]; /* 8 bit MMU PID + Generation cycle */
} mm_context_t; } mm_context_t;
#ifdef CONFIG_ARC_DBG_TLB_PARANOIA
void tlb_paranoid_check(unsigned int mm_asid, unsigned long address);
#else
#define tlb_paranoid_check(a, b)
#endif #endif
void arc_mmu_init(void); #include <asm/mmu-arcv2.h>
extern char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len);
void read_decode_mmu_bcr(void);
static inline int is_pae40_enabled(void)
{
return IS_ENABLED(CONFIG_ARC_HAS_PAE40);
}
extern int pae40_exist_but_not_enab(void);
#endif /* !__ASSEMBLY__ */
#endif #endif
...@@ -15,22 +15,23 @@ ...@@ -15,22 +15,23 @@
#ifndef _ASM_ARC_MMU_CONTEXT_H #ifndef _ASM_ARC_MMU_CONTEXT_H
#define _ASM_ARC_MMU_CONTEXT_H #define _ASM_ARC_MMU_CONTEXT_H
#include <asm/arcregs.h>
#include <asm/tlb.h>
#include <linux/sched/mm.h> #include <linux/sched/mm.h>
#include <asm/tlb.h>
#include <asm-generic/mm_hooks.h> #include <asm-generic/mm_hooks.h>
/* ARC700 ASID Management /* ARC ASID Management
*
* MMU tags TLBs with an 8-bit ASID, avoiding need to flush the TLB on
* context-switch.
* *
* ARC MMU provides 8-bit ASID (0..255) to TAG TLB entries, allowing entries * ASID is managed per cpu, so task threads across CPUs can have different
* with same vaddr (different tasks) to co-exit. This provides for * ASID. Global ASID management is needed if hardware supports TLB shootdown
* "Fast Context Switch" i.e. no TLB flush on ctxt-switch * and/or shared TLB across cores, which ARC doesn't.
* *
* Linux assigns each task a unique ASID. A simple round-robin allocation * Each task is assigned unique ASID, with a simple round-robin allocator
* of H/w ASID is done using software tracker @asid_cpu. * tracked in @asid_cpu. When 8-bit value rolls over,a new cycle is started
* When it reaches max 255, the allocation cycle starts afresh by flushing * over from 0, and TLB is flushed
* the entire TLB and wrapping ASID back to zero.
* *
* A new allocation cycle, post rollover, could potentially reassign an ASID * A new allocation cycle, post rollover, could potentially reassign an ASID
* to a different task. Thus the rule is to refresh the ASID in a new cycle. * to a different task. Thus the rule is to refresh the ASID in a new cycle.
...@@ -93,7 +94,7 @@ static inline void get_new_mmu_context(struct mm_struct *mm) ...@@ -93,7 +94,7 @@ static inline void get_new_mmu_context(struct mm_struct *mm)
asid_mm(mm, cpu) = asid_cpu(cpu); asid_mm(mm, cpu) = asid_cpu(cpu);
set_hw: set_hw:
write_aux_reg(ARC_REG_PID, hw_pid(mm, cpu) | MMU_ENABLE); mmu_setup_asid(mm, hw_pid(mm, cpu));
local_irq_restore(flags); local_irq_restore(flags);
} }
...@@ -146,10 +147,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, ...@@ -146,10 +147,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
*/ */
cpumask_set_cpu(cpu, mm_cpumask(next)); cpumask_set_cpu(cpu, mm_cpumask(next));
#ifdef ARC_USE_SCRATCH_REG mmu_setup_pgd(next, next->pgd);
/* PGD cached in MMU reg to avoid 3 mem lookups: task->mm->pgd */
write_aux_reg(ARC_REG_SCRATCH_DATA0, next->pgd);
#endif
get_new_mmu_context(next); get_new_mmu_context(next);
} }
......
...@@ -34,57 +34,55 @@ void copy_user_highpage(struct page *to, struct page *from, ...@@ -34,57 +34,55 @@ void copy_user_highpage(struct page *to, struct page *from,
unsigned long u_vaddr, struct vm_area_struct *vma); unsigned long u_vaddr, struct vm_area_struct *vma);
void clear_user_page(void *to, unsigned long u_vaddr, struct page *page); void clear_user_page(void *to, unsigned long u_vaddr, struct page *page);
#undef STRICT_MM_TYPECHECKS
#ifdef STRICT_MM_TYPECHECKS
/*
* These are used to make use of C type-checking..
*/
typedef struct {
#ifdef CONFIG_ARC_HAS_PAE40
unsigned long long pte;
#else
unsigned long pte;
#endif
} pte_t;
typedef struct { typedef struct {
unsigned long pgd; unsigned long pgd;
} pgd_t; } pgd_t;
#define pgd_val(x) ((x).pgd)
#define __pgd(x) ((pgd_t) { (x) })
#if CONFIG_PGTABLE_LEVELS > 3
typedef struct { typedef struct {
unsigned long pgprot; unsigned long pud;
} pgprot_t; } pud_t;
#define pte_val(x) ((x).pte) #define pud_val(x) ((x).pud)
#define pgd_val(x) ((x).pgd) #define __pud(x) ((pud_t) { (x) })
#define pgprot_val(x) ((x).pgprot)
#define __pte(x) ((pte_t) { (x) }) #endif
#define __pgd(x) ((pgd_t) { (x) })
#define __pgprot(x) ((pgprot_t) { (x) }) #if CONFIG_PGTABLE_LEVELS > 2
#define pte_pgprot(x) __pgprot(pte_val(x)) typedef struct {
unsigned long pmd;
} pmd_t;
#else /* !STRICT_MM_TYPECHECKS */ #define pmd_val(x) ((x).pmd)
#define __pmd(x) ((pmd_t) { (x) })
#endif
typedef struct {
#ifdef CONFIG_ARC_HAS_PAE40 #ifdef CONFIG_ARC_HAS_PAE40
typedef unsigned long long pte_t; unsigned long long pte;
#else #else
typedef unsigned long pte_t; unsigned long pte;
#endif #endif
typedef unsigned long pgd_t; } pte_t;
typedef unsigned long pgprot_t;
#define pte_val(x) (x) #define pte_val(x) ((x).pte)
#define pgd_val(x) (x) #define __pte(x) ((pte_t) { (x) })
#define pgprot_val(x) (x)
#define __pte(x) (x)
#define __pgd(x) (x)
#define __pgprot(x) (x)
#define pte_pgprot(x) (x)
#endif typedef struct {
unsigned long pgprot;
} pgprot_t;
#define pgprot_val(x) ((x).pgprot)
#define __pgprot(x) ((pgprot_t) { (x) })
#define pte_pgprot(x) __pgprot(pte_val(x))
typedef pte_t * pgtable_t; typedef struct page *pgtable_t;
/* /*
* Use virt_to_pfn with caution: * Use virt_to_pfn with caution:
...@@ -122,8 +120,8 @@ extern int pfn_valid(unsigned long pfn); ...@@ -122,8 +120,8 @@ extern int pfn_valid(unsigned long pfn);
* virt here means link-address/program-address as embedded in object code. * virt here means link-address/program-address as embedded in object code.
* And for ARC, link-addr = physical address * And for ARC, link-addr = physical address
*/ */
#define __pa(vaddr) ((unsigned long)(vaddr)) #define __pa(vaddr) ((unsigned long)(vaddr))
#define __va(paddr) ((void *)((unsigned long)(paddr))) #define __va(paddr) ((void *)((unsigned long)(paddr)))
#define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr)) #define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr))
#define virt_addr_valid(kaddr) pfn_valid(virt_to_pfn(kaddr)) #define virt_addr_valid(kaddr) pfn_valid(virt_to_pfn(kaddr))
......
...@@ -31,30 +31,32 @@ ...@@ -31,30 +31,32 @@
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/log2.h> #include <linux/log2.h>
#include <asm-generic/pgalloc.h>
static inline void static inline void
pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
{ {
pmd_set(pmd, pte); /*
* The cast to long below is OK in 32-bit PAE40 regime with long long pte
* Despite "wider" pte, the pte table needs to be in non-PAE low memory
* as all higher levels can only hold long pointers.
*
* The cast itself is needed given simplistic definition of set_pmd()
*/
set_pmd(pmd, __pmd((unsigned long)pte));
} }
static inline void static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t pte_page)
pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t ptep)
{ {
pmd_set(pmd, (pte_t *) ptep); set_pmd(pmd, __pmd((unsigned long)page_address(pte_page)));
}
static inline int __get_order_pgd(void)
{
return get_order(PTRS_PER_PGD * sizeof(pgd_t));
} }
static inline pgd_t *pgd_alloc(struct mm_struct *mm) static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{ {
int num, num2; pgd_t *ret = (pgd_t *) __get_free_page(GFP_KERNEL);
pgd_t *ret = (pgd_t *) __get_free_pages(GFP_KERNEL, __get_order_pgd());
if (ret) { if (ret) {
int num, num2;
num = USER_PTRS_PER_PGD + USER_KERNEL_GUTTER / PGDIR_SIZE; num = USER_PTRS_PER_PGD + USER_KERNEL_GUTTER / PGDIR_SIZE;
memzero(ret, num * sizeof(pgd_t)); memzero(ret, num * sizeof(pgd_t));
...@@ -68,64 +70,27 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) ...@@ -68,64 +70,27 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
return ret; return ret;
} }
static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) #if CONFIG_PGTABLE_LEVELS > 3
{
free_pages((unsigned long)pgd, __get_order_pgd());
}
/*
* With software-only page-tables, addr-split for traversal is tweakable and
* that directly governs how big tables would be at each level.
* Further, the MMU page size is configurable.
* Thus we need to programatically assert the size constraint
* All of this is const math, allowing gcc to do constant folding/propagation.
*/
static inline int __get_order_pte(void) static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4dp, pud_t *pudp)
{ {
return get_order(PTRS_PER_PTE * sizeof(pte_t)); set_p4d(p4dp, __p4d((unsigned long)pudp));
} }
static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm) #define __pud_free_tlb(tlb, pmd, addr) pud_free((tlb)->mm, pmd)
{
pte_t *pte;
pte = (pte_t *) __get_free_pages(GFP_KERNEL | __GFP_ZERO, #endif
__get_order_pte());
return pte; #if CONFIG_PGTABLE_LEVELS > 2
}
static inline pgtable_t static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmdp)
pte_alloc_one(struct mm_struct *mm)
{ {
pgtable_t pte_pg; set_pud(pudp, __pud((unsigned long)pmdp));
struct page *page;
pte_pg = (pgtable_t)__get_free_pages(GFP_KERNEL, __get_order_pte());
if (!pte_pg)
return 0;
memzero((void *)pte_pg, PTRS_PER_PTE * sizeof(pte_t));
page = virt_to_page(pte_pg);
if (!pgtable_pte_page_ctor(page)) {
__free_page(page);
return 0;
}
return pte_pg;
} }
static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) #define __pmd_free_tlb(tlb, pmd, addr) pmd_free((tlb)->mm, pmd)
{
free_pages((unsigned long)pte, __get_order_pte()); /* takes phy addr */
}
static inline void pte_free(struct mm_struct *mm, pgtable_t ptep) #endif
{
pgtable_pte_page_dtor(virt_to_page(ptep));
free_pages((unsigned long)ptep, __get_order_pte());
}
#define __pte_free_tlb(tlb, pte, addr) pte_free((tlb)->mm, pte) #define __pte_free_tlb(tlb, pte, addr) pte_free((tlb)->mm, pte)
......
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
*/
/*
* page table flags for software walked/managed MMUv3 (ARC700) and MMUv4 (HS)
* There correspond to the corresponding bits in the TLB
*/
#ifndef _ASM_ARC_PGTABLE_BITS_ARCV2_H
#define _ASM_ARC_PGTABLE_BITS_ARCV2_H
#ifdef CONFIG_ARC_CACHE_PAGES
#define _PAGE_CACHEABLE (1 << 0) /* Cached (H) */
#else
#define _PAGE_CACHEABLE 0
#endif
#define _PAGE_EXECUTE (1 << 1) /* User Execute (H) */
#define _PAGE_WRITE (1 << 2) /* User Write (H) */
#define _PAGE_READ (1 << 3) /* User Read (H) */
#define _PAGE_ACCESSED (1 << 4) /* Accessed (s) */
#define _PAGE_DIRTY (1 << 5) /* Modified (s) */
#define _PAGE_SPECIAL (1 << 6)
#define _PAGE_GLOBAL (1 << 8) /* ASID agnostic (H) */
#define _PAGE_PRESENT (1 << 9) /* PTE/TLB Valid (H) */
#ifdef CONFIG_ARC_MMU_V4
#define _PAGE_HW_SZ (1 << 10) /* Normal/super (H) */
#else
#define _PAGE_HW_SZ 0
#endif
/* Defaults for every user page */
#define ___DEF (_PAGE_PRESENT | _PAGE_CACHEABLE)
/* Set of bits not changed in pte_modify */
#define _PAGE_CHG_MASK (PAGE_MASK_PHYS | _PAGE_ACCESSED | _PAGE_DIRTY | \
_PAGE_SPECIAL)
/* More Abbrevaited helpers */
#define PAGE_U_NONE __pgprot(___DEF)
#define PAGE_U_R __pgprot(___DEF | _PAGE_READ)
#define PAGE_U_W_R __pgprot(___DEF | _PAGE_READ | _PAGE_WRITE)
#define PAGE_U_X_R __pgprot(___DEF | _PAGE_READ | _PAGE_EXECUTE)
#define PAGE_U_X_W_R __pgprot(___DEF \
| _PAGE_READ | _PAGE_WRITE | _PAGE_EXECUTE)
#define PAGE_KERNEL __pgprot(___DEF | _PAGE_GLOBAL \
| _PAGE_READ | _PAGE_WRITE | _PAGE_EXECUTE)
#define PAGE_SHARED PAGE_U_W_R
#define pgprot_noncached(prot) (__pgprot(pgprot_val(prot) & ~_PAGE_CACHEABLE))
/*
* Mapping of vm_flags (Generic VM) to PTE flags (arch specific)
*
* Certain cases have 1:1 mapping
* e.g. __P101 means VM_READ, VM_EXEC and !VM_SHARED
* which directly corresponds to PAGE_U_X_R
*
* Other rules which cause the divergence from 1:1 mapping
*
* 1. Although ARC700 can do exclusive execute/write protection (meaning R
* can be tracked independet of X/W unlike some other CPUs), still to
* keep things consistent with other archs:
* -Write implies Read: W => R
* -Execute implies Read: X => R
*
* 2. Pvt Writable doesn't have Write Enabled initially: Pvt-W => !W
* This is to enable COW mechanism
*/
/* xwr */
#define __P000 PAGE_U_NONE
#define __P001 PAGE_U_R
#define __P010 PAGE_U_R /* Pvt-W => !W */
#define __P011 PAGE_U_R /* Pvt-W => !W */
#define __P100 PAGE_U_X_R /* X => R */
#define __P101 PAGE_U_X_R
#define __P110 PAGE_U_X_R /* Pvt-W => !W and X => R */
#define __P111 PAGE_U_X_R /* Pvt-W => !W */
#define __S000 PAGE_U_NONE
#define __S001 PAGE_U_R
#define __S010 PAGE_U_W_R /* W => R */
#define __S011 PAGE_U_W_R
#define __S100 PAGE_U_X_R /* X => R */
#define __S101 PAGE_U_X_R
#define __S110 PAGE_U_X_W_R /* X => R */
#define __S111 PAGE_U_X_W_R
#ifndef __ASSEMBLY__
#define pte_write(pte) (pte_val(pte) & _PAGE_WRITE)
#define pte_dirty(pte) (pte_val(pte) & _PAGE_DIRTY)
#define pte_young(pte) (pte_val(pte) & _PAGE_ACCESSED)
#define pte_special(pte) (pte_val(pte) & _PAGE_SPECIAL)
#define PTE_BIT_FUNC(fn, op) \
static inline pte_t pte_##fn(pte_t pte) { pte_val(pte) op; return pte; }
PTE_BIT_FUNC(mknotpresent, &= ~(_PAGE_PRESENT));
PTE_BIT_FUNC(wrprotect, &= ~(_PAGE_WRITE));
PTE_BIT_FUNC(mkwrite, |= (_PAGE_WRITE));
PTE_BIT_FUNC(mkclean, &= ~(_PAGE_DIRTY));
PTE_BIT_FUNC(mkdirty, |= (_PAGE_DIRTY));
PTE_BIT_FUNC(mkold, &= ~(_PAGE_ACCESSED));
PTE_BIT_FUNC(mkyoung, |= (_PAGE_ACCESSED));
PTE_BIT_FUNC(mkspecial, |= (_PAGE_SPECIAL));
PTE_BIT_FUNC(mkhuge, |= (_PAGE_HW_SZ));
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
{
return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot));
}
static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pteval)
{
set_pte(ptep, pteval);
}
void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
pte_t *ptep);
/* Encode swap {type,off} tuple into PTE
* We reserve 13 bits for 5-bit @type, keeping bits 12-5 zero, ensuring that
* PAGE_PRESENT is zero in a PTE holding swap "identifier"
*/
#define __swp_entry(type, off) ((swp_entry_t) \
{ ((type) & 0x1f) | ((off) << 13) })
/* Decode a PTE containing swap "identifier "into constituents */
#define __swp_type(pte_lookalike) (((pte_lookalike).val) & 0x1f)
#define __swp_offset(pte_lookalike) ((pte_lookalike).val >> 13)
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
#define kern_addr_valid(addr) (1)
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#include <asm/hugepage.h>
#endif
#endif /* __ASSEMBLY__ */
#endif
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2020 Synopsys, Inc. (www.synopsys.com)
*/
/*
* Helpers for implemenintg paging levels
*/
#ifndef _ASM_ARC_PGTABLE_LEVELS_H
#define _ASM_ARC_PGTABLE_LEVELS_H
#if CONFIG_PGTABLE_LEVELS == 2
/*
* 2 level paging setup for software walked MMUv3 (ARC700) and MMUv4 (HS)
*
* [31] 32 bit virtual address [0]
* -------------------------------------------------------
* | | <---------- PGDIR_SHIFT ----------> |
* | | | <-- PAGE_SHIFT --> |
* -------------------------------------------------------
* | | |
* | | --> off in page frame
* | ---> index into Page Table
* ----> index into Page Directory
*
* Given software walk, the vaddr split is arbitrary set to 11:8:13
* However enabling of super page in a 2 level regime pegs PGDIR_SHIFT to
* super page size.
*/
#if defined(CONFIG_ARC_HUGEPAGE_16M)
#define PGDIR_SHIFT 24
#elif defined(CONFIG_ARC_HUGEPAGE_2M)
#define PGDIR_SHIFT 21
#else
/*
* No Super page case
* Default value provides 11:8:13 (8K), 10:10:12 (4K)
* Limits imposed by pgtable_t only PAGE_SIZE long
* (so 4K page can only have 1K entries: or 10 bits)
*/
#ifdef CONFIG_ARC_PAGE_SIZE_4K
#define PGDIR_SHIFT 22
#else
#define PGDIR_SHIFT 21
#endif
#endif
#else /* CONFIG_PGTABLE_LEVELS != 2 */
/*
* A default 3 level paging testing setup in software walked MMU
* MMUv4 (8K page): <4> : <7> : <8> : <13>
* A default 4 level paging testing setup in software walked MMU
* MMUv4 (8K page): <4> : <3> : <4> : <8> : <13>
*/
#define PGDIR_SHIFT 28
#if CONFIG_PGTABLE_LEVELS > 3
#define PUD_SHIFT 25
#endif
#if CONFIG_PGTABLE_LEVELS > 2
#define PMD_SHIFT 21
#endif
#endif /* CONFIG_PGTABLE_LEVELS */
#define PGDIR_SIZE BIT(PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE - 1))
#define PTRS_PER_PGD BIT(32 - PGDIR_SHIFT)
#if CONFIG_PGTABLE_LEVELS > 3
#define PUD_SIZE BIT(PUD_SHIFT)
#define PUD_MASK (~(PUD_SIZE - 1))
#define PTRS_PER_PUD BIT(PGDIR_SHIFT - PUD_SHIFT)
#endif
#if CONFIG_PGTABLE_LEVELS > 2
#define PMD_SIZE BIT(PMD_SHIFT)
#define PMD_MASK (~(PMD_SIZE - 1))
#define PTRS_PER_PMD BIT(PUD_SHIFT - PMD_SHIFT)
#endif
#define PTRS_PER_PTE BIT(PMD_SHIFT - PAGE_SHIFT)
#ifndef __ASSEMBLY__
#if CONFIG_PGTABLE_LEVELS > 3
#include <asm-generic/pgtable-nop4d.h>
#elif CONFIG_PGTABLE_LEVELS > 2
#include <asm-generic/pgtable-nopud.h>
#else
#include <asm-generic/pgtable-nopmd.h>
#endif
/*
* 1st level paging: pgd
*/
#define pgd_index(addr) ((addr) >> PGDIR_SHIFT)
#define pgd_offset(mm, addr) (((mm)->pgd) + pgd_index(addr))
#define pgd_offset_k(addr) pgd_offset(&init_mm, addr)
#define pgd_ERROR(e) \
pr_crit("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
#if CONFIG_PGTABLE_LEVELS > 3
/* In 4 level paging, p4d_* macros work on pgd */
#define p4d_none(x) (!p4d_val(x))
#define p4d_bad(x) ((p4d_val(x) & ~PAGE_MASK))
#define p4d_present(x) (p4d_val(x))
#define p4d_clear(xp) do { p4d_val(*(xp)) = 0; } while (0)
#define p4d_pgtable(p4d) ((pud_t *)(p4d_val(p4d) & PAGE_MASK))
#define p4d_page(p4d) virt_to_page(p4d_pgtable(p4d))
#define set_p4d(p4dp, p4d) (*(p4dp) = p4d)
/*
* 2nd level paging: pud
*/
#define pud_ERROR(e) \
pr_crit("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, pud_val(e))
#endif
#if CONFIG_PGTABLE_LEVELS > 2
/*
* In 3 level paging, pud_* macros work on pgd
* In 4 level paging, pud_* macros work on pud
*/
#define pud_none(x) (!pud_val(x))
#define pud_bad(x) ((pud_val(x) & ~PAGE_MASK))
#define pud_present(x) (pud_val(x))
#define pud_clear(xp) do { pud_val(*(xp)) = 0; } while (0)
#define pud_pgtable(pud) ((pmd_t *)(pud_val(pud) & PAGE_MASK))
#define pud_page(pud) virt_to_page(pud_pgtable(pud))
#define set_pud(pudp, pud) (*(pudp) = pud)
/*
* 3rd level paging: pmd
*/
#define pmd_ERROR(e) \
pr_crit("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e))
#define pmd_pfn(pmd) ((pmd_val(pmd) & PMD_MASK) >> PAGE_SHIFT)
#define pfn_pmd(pfn,prot) __pmd(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
#define mk_pmd(page,prot) pfn_pmd(page_to_pfn(page),prot)
#endif
/*
* Due to the strange way generic pgtable level folding works, the pmd_* macros
* - are valid even for 2 levels (which supposedly only has pgd - pte)
* - behave differently for 2 vs. 3
* In 2 level paging (pgd -> pte), pmd_* macros work on pgd
* In 3+ level paging (pgd -> pmd -> pte), pmd_* macros work on pmd
*/
#define pmd_none(x) (!pmd_val(x))
#define pmd_bad(x) ((pmd_val(x) & ~PAGE_MASK))
#define pmd_present(x) (pmd_val(x))
#define pmd_clear(xp) do { pmd_val(*(xp)) = 0; } while (0)
#define pmd_page_vaddr(pmd) (pmd_val(pmd) & PAGE_MASK)
#define pmd_page(pmd) virt_to_page(pmd_page_vaddr(pmd))
#define set_pmd(pmdp, pmd) (*(pmdp) = pmd)
#define pmd_pgtable(pmd) ((pgtable_t) pmd_page_vaddr(pmd))
/*
* 4th level paging: pte
*/
#define pte_ERROR(e) \
pr_crit("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
#define pte_none(x) (!pte_val(x))
#define pte_present(x) (pte_val(x) & _PAGE_PRESENT)
#define pte_clear(mm,addr,ptep) set_pte_at(mm, addr, ptep, __pte(0))
#define pte_page(pte) pfn_to_page(pte_pfn(pte))
#define set_pte(ptep, pte) ((*(ptep)) = (pte))
#define pte_pfn(pte) (pte_val(pte) >> PAGE_SHIFT)
#define pfn_pte(pfn, prot) __pte(__pfn_to_phys(pfn) | pgprot_val(prot))
#define mk_pte(page, prot) pfn_pte(page_to_pfn(page), prot)
#ifdef CONFIG_ISA_ARCV2
#define pmd_leaf(x) (pmd_val(x) & _PAGE_HW_SZ)
#endif
#endif /* !__ASSEMBLY__ */
#endif
This diff is collapsed.
...@@ -93,7 +93,7 @@ extern unsigned int get_wchan(struct task_struct *p); ...@@ -93,7 +93,7 @@ extern unsigned int get_wchan(struct task_struct *p);
#define VMALLOC_START (PAGE_OFFSET - (CONFIG_ARC_KVADDR_SIZE << 20)) #define VMALLOC_START (PAGE_OFFSET - (CONFIG_ARC_KVADDR_SIZE << 20))
/* 1 PGDIR_SIZE each for fixmap/pkmap, 2 PGDIR_SIZE gutter (see asm/highmem.h) */ /* 1 PGDIR_SIZE each for fixmap/pkmap, 2 PGDIR_SIZE gutter (see asm/highmem.h) */
#define VMALLOC_SIZE ((CONFIG_ARC_KVADDR_SIZE << 20) - PGDIR_SIZE * 4) #define VMALLOC_SIZE ((CONFIG_ARC_KVADDR_SIZE << 20) - PMD_SIZE * 4)
#define VMALLOC_END (VMALLOC_START + VMALLOC_SIZE) #define VMALLOC_END (VMALLOC_START + VMALLOC_SIZE)
......
...@@ -2,8 +2,8 @@ ...@@ -2,8 +2,8 @@
/* /*
* Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
*/ */
#ifndef __ASMARC_SETUP_H #ifndef __ASM_ARC_SETUP_H
#define __ASMARC_SETUP_H #define __ASM_ARC_SETUP_H
#include <linux/types.h> #include <linux/types.h>
...@@ -34,4 +34,12 @@ long __init arc_get_mem_sz(void); ...@@ -34,4 +34,12 @@ long __init arc_get_mem_sz(void);
#define IS_AVAIL2(v, s, cfg) IS_AVAIL1(v, s), IS_AVAIL1(v, IS_USED_CFG(cfg)) #define IS_AVAIL2(v, s, cfg) IS_AVAIL1(v, s), IS_AVAIL1(v, IS_USED_CFG(cfg))
#define IS_AVAIL3(v, v2, s) IS_AVAIL1(v, s), IS_AVAIL1(v, IS_DISABLED_RUN(v2)) #define IS_AVAIL3(v, v2, s) IS_AVAIL1(v, s), IS_AVAIL1(v, IS_DISABLED_RUN(v2))
extern void arc_mmu_init(void);
extern char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len);
extern void read_decode_mmu_bcr(void);
extern void arc_cache_init(void);
extern char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len);
extern void read_decode_cache_bcr(void);
#endif /* __ASMARC_SETUP_H */ #endif /* __ASMARC_SETUP_H */
...@@ -105,7 +105,6 @@ static inline const char *arc_platform_smp_cpuinfo(void) ...@@ -105,7 +105,6 @@ static inline const char *arc_platform_smp_cpuinfo(void)
#include <asm/spinlock.h> #include <asm/spinlock.h>
extern arch_spinlock_t smp_atomic_ops_lock; extern arch_spinlock_t smp_atomic_ops_lock;
extern arch_spinlock_t smp_bitops_lock;
#define atomic_ops_lock(flags) do { \ #define atomic_ops_lock(flags) do { \
local_irq_save(flags); \ local_irq_save(flags); \
...@@ -117,24 +116,11 @@ extern arch_spinlock_t smp_bitops_lock; ...@@ -117,24 +116,11 @@ extern arch_spinlock_t smp_bitops_lock;
local_irq_restore(flags); \ local_irq_restore(flags); \
} while (0) } while (0)
#define bitops_lock(flags) do { \
local_irq_save(flags); \
arch_spin_lock(&smp_bitops_lock); \
} while (0)
#define bitops_unlock(flags) do { \
arch_spin_unlock(&smp_bitops_lock); \
local_irq_restore(flags); \
} while (0)
#else /* !CONFIG_SMP */ #else /* !CONFIG_SMP */
#define atomic_ops_lock(flags) local_irq_save(flags) #define atomic_ops_lock(flags) local_irq_save(flags)
#define atomic_ops_unlock(flags) local_irq_restore(flags) #define atomic_ops_unlock(flags) local_irq_restore(flags)
#define bitops_lock(flags) local_irq_save(flags)
#define bitops_unlock(flags) local_irq_restore(flags)
#endif /* !CONFIG_SMP */ #endif /* !CONFIG_SMP */
#endif /* !CONFIG_ARC_HAS_LLSC */ #endif /* !CONFIG_ARC_HAS_LLSC */
......
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
*/
#ifndef __ASM_TLB_MMU_V1_H__
#define __ASM_TLB_MMU_V1_H__
#include <asm/mmu.h>
#if defined(__ASSEMBLY__) && (CONFIG_ARC_MMU_VER == 1)
.macro TLB_WRITE_HEURISTICS
#define JH_HACK1
#undef JH_HACK2
#undef JH_HACK3
#ifdef JH_HACK3
; Calculate set index for 2-way MMU
; -avoiding use of GetIndex from MMU
; and its unpleasant LFSR pseudo-random sequence
;
; r1 = TLBPD0 from TLB_RELOAD above
;
; -- jh_ex_way_set not cleared on startup
; didn't want to change setup.c
; hence extra instruction to clean
;
; -- should be in cache since in same line
; as r0/r1 saves above
;
ld r0,[jh_ex_way_sel] ; victim pointer
and r0,r0,1 ; clean
xor.f r0,r0,1 ; flip
st r0,[jh_ex_way_sel] ; store back
asr r0,r1,12 ; get set # <<1, note bit 12=R=0
or.nz r0,r0,1 ; set way bit
and r0,r0,0xff ; clean
sr r0,[ARC_REG_TLBINDEX]
#endif
#ifdef JH_HACK2
; JH hack #2
; Faster than hack #1 in non-thrash case, but hard-coded for 2-way MMU
; Slower in thrash case (where it matters) because more code is executed
; Inefficient due to two-register paradigm of this miss handler
;
/* r1 = data TLBPD0 at this point */
lr r0,[eret] /* instruction address */
xor r0,r0,r1 /* compare set # */
and.f r0,r0,0x000fe000 /* 2-way MMU mask */
bne 88f /* not in same set - no need to probe */
lr r0,[eret] /* instruction address */
and r0,r0,PAGE_MASK /* VPN of instruction address */
; lr r1,[ARC_REG_TLBPD0] /* Data VPN+ASID - already in r1 from TLB_RELOAD*/
and r1,r1,0xff /* Data ASID */
or r0,r0,r1 /* Instruction address + Data ASID */
lr r1,[ARC_REG_TLBPD0] /* save TLBPD0 containing data TLB*/
sr r0,[ARC_REG_TLBPD0] /* write instruction address to TLBPD0 */
sr TLBProbe, [ARC_REG_TLBCOMMAND] /* Look for instruction */
lr r0,[ARC_REG_TLBINDEX] /* r0 = index where instruction is, if at all */
sr r1,[ARC_REG_TLBPD0] /* restore TLBPD0 */
xor r0,r0,1 /* flip bottom bit of data index */
b.d 89f
sr r0,[ARC_REG_TLBINDEX] /* and put it back */
88:
sr TLBGetIndex, [ARC_REG_TLBCOMMAND]
89:
#endif
#ifdef JH_HACK1
;
; Always checks whether instruction will be kicked out by dtlb miss
;
mov_s r3, r1 ; save PD0 prepared by TLB_RELOAD in r3
lr r0,[eret] /* instruction address */
and r0,r0,PAGE_MASK /* VPN of instruction address */
bmsk r1,r3,7 /* Data ASID, bits 7-0 */
or_s r0,r0,r1 /* Instruction address + Data ASID */
sr r0,[ARC_REG_TLBPD0] /* write instruction address to TLBPD0 */
sr TLBProbe, [ARC_REG_TLBCOMMAND] /* Look for instruction */
lr r0,[ARC_REG_TLBINDEX] /* r0 = index where instruction is, if at all */
sr r3,[ARC_REG_TLBPD0] /* restore TLBPD0 */
sr TLBGetIndex, [ARC_REG_TLBCOMMAND]
lr r1,[ARC_REG_TLBINDEX] /* r1 = index where MMU wants to put data */
cmp r0,r1 /* if no match on indices, go around */
xor.eq r1,r1,1 /* flip bottom bit of data index */
sr r1,[ARC_REG_TLBINDEX] /* and put it back */
#endif
.endm
#endif
#endif
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
#include <asm/errno.h> #include <asm/errno.h>
#include <asm/arcregs.h> #include <asm/arcregs.h>
#include <asm/irqflags.h> #include <asm/irqflags.h>
#include <asm/mmu.h>
; A maximum number of supported interrupts in the core interrupt controller. ; A maximum number of supported interrupts in the core interrupt controller.
; This number is not equal to the maximum interrupt number (256) because ; This number is not equal to the maximum interrupt number (256) because
......
...@@ -101,11 +101,8 @@ ENTRY(EV_MachineCheck) ...@@ -101,11 +101,8 @@ ENTRY(EV_MachineCheck)
lr r0, [efa] lr r0, [efa]
mov r1, sp mov r1, sp
; hardware auto-disables MMU, re-enable it to allow kernel vaddr ; MC excpetions disable MMU
; access for say stack unwinding of modules for crash dumps ARC_MMU_REENABLE r3
lr r3, [ARC_REG_PID]
or r3, r3, MMU_ENABLE
sr r3, [ARC_REG_PID]
lsr r3, r2, 8 lsr r3, r2, 8
bmsk r3, r3, 7 bmsk r3, r3, 7
......
...@@ -142,7 +142,7 @@ IRQCHIP_DECLARE(arc_intc, "snps,arc700-intc", init_onchip_IRQ); ...@@ -142,7 +142,7 @@ IRQCHIP_DECLARE(arc_intc, "snps,arc700-intc", init_onchip_IRQ);
* Time hard-ISR, timer_interrupt( ) calls spin_unlock_irq several times. * Time hard-ISR, timer_interrupt( ) calls spin_unlock_irq several times.
* Here local_irq_enable( ) shd not re-enable lower priority interrupts * Here local_irq_enable( ) shd not re-enable lower priority interrupts
* -If called from soft-ISR, it must re-enable all interrupts * -If called from soft-ISR, it must re-enable all interrupts
* soft ISR are low prioity jobs which can be very slow, thus all IRQs * soft ISR are low priority jobs which can be very slow, thus all IRQs
* must be enabled while they run. * must be enabled while they run.
* Now hardware context wise we may still be in L2 ISR (not done rtie) * Now hardware context wise we may still be in L2 ISR (not done rtie)
* still we must re-enable both L1 and L2 IRQs * still we must re-enable both L1 and L2 IRQs
......
...@@ -29,10 +29,8 @@ ...@@ -29,10 +29,8 @@
#ifndef CONFIG_ARC_HAS_LLSC #ifndef CONFIG_ARC_HAS_LLSC
arch_spinlock_t smp_atomic_ops_lock = __ARCH_SPIN_LOCK_UNLOCKED; arch_spinlock_t smp_atomic_ops_lock = __ARCH_SPIN_LOCK_UNLOCKED;
arch_spinlock_t smp_bitops_lock = __ARCH_SPIN_LOCK_UNLOCKED;
EXPORT_SYMBOL_GPL(smp_atomic_ops_lock); EXPORT_SYMBOL_GPL(smp_atomic_ops_lock);
EXPORT_SYMBOL_GPL(smp_bitops_lock);
#endif #endif
struct plat_smp_ops __weak plat_smp_ops; struct plat_smp_ops __weak plat_smp_ops;
...@@ -283,7 +281,7 @@ static void ipi_send_msg_one(int cpu, enum ipi_msg_type msg) ...@@ -283,7 +281,7 @@ static void ipi_send_msg_one(int cpu, enum ipi_msg_type msg)
/* /*
* Call the platform specific IPI kick function, but avoid if possible: * Call the platform specific IPI kick function, but avoid if possible:
* Only do so if there's no pending msg from other concurrent sender(s). * Only do so if there's no pending msg from other concurrent sender(s).
* Otherwise, recevier will see this msg as well when it takes the * Otherwise, receiver will see this msg as well when it takes the
* IPI corresponding to that msg. This is true, even if it is already in * IPI corresponding to that msg. This is true, even if it is already in
* IPI handler, because !@old means it has not yet dequeued the msg(s) * IPI handler, because !@old means it has not yet dequeued the msg(s)
* so @new msg can be a free-loader * so @new msg can be a free-loader
......
...@@ -149,7 +149,7 @@ arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs, ...@@ -149,7 +149,7 @@ arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs,
#else #else
/* On ARC, only Dward based unwinder works. fp based backtracing is /* On ARC, only Dward based unwinder works. fp based backtracing is
* not possible (-fno-omit-frame-pointer) because of the way function * not possible (-fno-omit-frame-pointer) because of the way function
* prelogue is setup (callee regs saved and then fp set and not other * prologue is setup (callee regs saved and then fp set and not other
* way around * way around
*/ */
pr_warn_once("CONFIG_ARC_DW2_UNWIND needs to be enabled\n"); pr_warn_once("CONFIG_ARC_DW2_UNWIND needs to be enabled\n");
......
...@@ -205,93 +205,24 @@ void read_decode_cache_bcr(void) ...@@ -205,93 +205,24 @@ void read_decode_cache_bcr(void)
#define OP_INV_IC 0x4 #define OP_INV_IC 0x4
/* /*
* I-Cache Aliasing in ARC700 VIPT caches (MMU v1-v3) * Cache Flush programming model
* *
* ARC VIPT I-cache uses vaddr to index into cache and paddr to match the tag. * ARC700 MMUv3 I$ and D$ are both VIPT and can potentially alias.
* The orig Cache Management Module "CDU" only required paddr to invalidate a * Programming model requires both paddr and vaddr irrespecive of aliasing
* certain line since it sufficed as index in Non-Aliasing VIPT cache-geometry. * considerations:
* Infact for distinct V1,V2,P: all of {V1-P},{V2-P},{P-P} would end up fetching * - vaddr in {I,D}C_IV?L
* the exact same line. * - paddr in {I,D}C_PTAG
* *
* However for larger Caches (way-size > page-size) - i.e. in Aliasing config, * In HS38x (MMUv4), D$ is PIPT, I$ is VIPT and can still alias.
* paddr alone could not be used to correctly index the cache. * Programming model is different for aliasing vs. non-aliasing I$
* - D$ / Non-aliasing I$: only paddr in {I,D}C_IV?L
* - Aliasing I$: same as ARC700 above (so MMUv3 routine used for MMUv4 I$)
* *
* ------------------ * - If PAE40 is enabled, independent of aliasing considerations, the higher
* MMU v1/v2 (Fixed Page Size 8k) * bits needs to be written into PTAG_HI
* ------------------
* The solution was to provide CDU with these additonal vaddr bits. These
* would be bits [x:13], x would depend on cache-geometry, 13 comes from
* standard page size of 8k.
* H/w folks chose [17:13] to be a future safe range, and moreso these 5 bits
* of vaddr could easily be "stuffed" in the paddr as bits [4:0] since the
* orig 5 bits of paddr were anyways ignored by CDU line ops, as they
* represent the offset within cache-line. The adv of using this "clumsy"
* interface for additional info was no new reg was needed in CDU programming
* model.
*
* 17:13 represented the max num of bits passable, actual bits needed were
* fewer, based on the num-of-aliases possible.
* -for 2 alias possibility, only bit 13 needed (32K cache)
* -for 4 alias possibility, bits 14:13 needed (64K cache)
*
* ------------------
* MMU v3
* ------------------
* This ver of MMU supports variable page sizes (1k-16k): although Linux will
* only support 8k (default), 16k and 4k.
* However from hardware perspective, smaller page sizes aggravate aliasing
* meaning more vaddr bits needed to disambiguate the cache-line-op ;
* the existing scheme of piggybacking won't work for certain configurations.
* Two new registers IC_PTAG and DC_PTAG inttoduced.
* "tag" bits are provided in PTAG, index bits in existing IVIL/IVDL/FLDL regs
*/ */
static inline static inline
void __cache_line_loop_v2(phys_addr_t paddr, unsigned long vaddr,
unsigned long sz, const int op, const int full_page)
{
unsigned int aux_cmd;
int num_lines;
if (op == OP_INV_IC) {
aux_cmd = ARC_REG_IC_IVIL;
} else {
/* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */
aux_cmd = op & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL;
}
/* Ensure we properly floor/ceil the non-line aligned/sized requests
* and have @paddr - aligned to cache line and integral @num_lines.
* This however can be avoided for page sized since:
* -@paddr will be cache-line aligned already (being page aligned)
* -@sz will be integral multiple of line size (being page sized).
*/
if (!full_page) {
sz += paddr & ~CACHE_LINE_MASK;
paddr &= CACHE_LINE_MASK;
vaddr &= CACHE_LINE_MASK;
}
num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES);
/* MMUv2 and before: paddr contains stuffed vaddrs bits */
paddr |= (vaddr >> PAGE_SHIFT) & 0x1F;
while (num_lines-- > 0) {
write_aux_reg(aux_cmd, paddr);
paddr += L1_CACHE_BYTES;
}
}
/*
* For ARC700 MMUv3 I-cache and D-cache flushes
* - ARC700 programming model requires paddr and vaddr be passed in seperate
* AUX registers (*_IV*L and *_PTAG respectively) irrespective of whether the
* caches actually alias or not.
* - For HS38, only the aliasing I-cache configuration uses the PTAG reg
* (non aliasing I-cache version doesn't; while D-cache can't possibly alias)
*/
static inline
void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr, void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr,
unsigned long sz, const int op, const int full_page) unsigned long sz, const int op, const int full_page)
{ {
...@@ -350,17 +281,6 @@ void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr, ...@@ -350,17 +281,6 @@ void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr,
#ifndef USE_RGN_FLSH #ifndef USE_RGN_FLSH
/* /*
* In HS38x (MMU v4), I-cache is VIPT (can alias), D-cache is PIPT
* Here's how cache ops are implemented
*
* - D-cache: only paddr needed (in DC_IVDL/DC_FLDL)
* - I-cache Non Aliasing: Despite VIPT, only paddr needed (in IC_IVIL)
* - I-cache Aliasing: Both vaddr and paddr needed (in IC_IVIL, IC_PTAG
* respectively, similar to MMU v3 programming model, hence
* __cache_line_loop_v3() is used)
*
* If PAE40 is enabled, independent of aliasing considerations, the higher bits
* needs to be written into PTAG_HI
*/ */
static inline static inline
void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr, void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr,
...@@ -460,11 +380,9 @@ void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr, ...@@ -460,11 +380,9 @@ void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr,
#endif #endif
#if (CONFIG_ARC_MMU_VER < 3) #ifdef CONFIG_ARC_MMU_V3
#define __cache_line_loop __cache_line_loop_v2
#elif (CONFIG_ARC_MMU_VER == 3)
#define __cache_line_loop __cache_line_loop_v3 #define __cache_line_loop __cache_line_loop_v3
#elif (CONFIG_ARC_MMU_VER > 3) #else
#define __cache_line_loop __cache_line_loop_v4 #define __cache_line_loop __cache_line_loop_v4
#endif #endif
...@@ -1123,7 +1041,7 @@ void clear_user_page(void *to, unsigned long u_vaddr, struct page *page) ...@@ -1123,7 +1041,7 @@ void clear_user_page(void *to, unsigned long u_vaddr, struct page *page)
clear_page(to); clear_page(to);
clear_bit(PG_dc_clean, &page->flags); clear_bit(PG_dc_clean, &page->flags);
} }
EXPORT_SYMBOL(clear_user_page);
/********************************************************************** /**********************************************************************
* Explicit Cache flush request from user space via syscall * Explicit Cache flush request from user space via syscall
......
...@@ -33,28 +33,34 @@ noinline static int handle_kernel_vaddr_fault(unsigned long address) ...@@ -33,28 +33,34 @@ noinline static int handle_kernel_vaddr_fault(unsigned long address)
pud_t *pud, *pud_k; pud_t *pud, *pud_k;
pmd_t *pmd, *pmd_k; pmd_t *pmd, *pmd_k;
pgd = pgd_offset_fast(current->active_mm, address); pgd = pgd_offset(current->active_mm, address);
pgd_k = pgd_offset_k(address); pgd_k = pgd_offset_k(address);
if (!pgd_present(*pgd_k)) if (pgd_none (*pgd_k))
goto bad_area; goto bad_area;
if (!pgd_present(*pgd))
set_pgd(pgd, *pgd_k);
p4d = p4d_offset(pgd, address); p4d = p4d_offset(pgd, address);
p4d_k = p4d_offset(pgd_k, address); p4d_k = p4d_offset(pgd_k, address);
if (!p4d_present(*p4d_k)) if (p4d_none(*p4d_k))
goto bad_area; goto bad_area;
if (!p4d_present(*p4d))
set_p4d(p4d, *p4d_k);
pud = pud_offset(p4d, address); pud = pud_offset(p4d, address);
pud_k = pud_offset(p4d_k, address); pud_k = pud_offset(p4d_k, address);
if (!pud_present(*pud_k)) if (pud_none(*pud_k))
goto bad_area; goto bad_area;
if (!pud_present(*pud))
set_pud(pud, *pud_k);
pmd = pmd_offset(pud, address); pmd = pmd_offset(pud, address);
pmd_k = pmd_offset(pud_k, address); pmd_k = pmd_offset(pud_k, address);
if (!pmd_present(*pmd_k)) if (pmd_none(*pmd_k))
goto bad_area; goto bad_area;
if (!pmd_present(*pmd))
set_pmd(pmd, *pmd_k); set_pmd(pmd, *pmd_k);
/* XXX: create the TLB entry here */ /* XXX: create the TLB entry here */
return 0; return 0;
......
...@@ -189,6 +189,11 @@ void __init mem_init(void) ...@@ -189,6 +189,11 @@ void __init mem_init(void)
{ {
memblock_free_all(); memblock_free_all();
highmem_init(); highmem_init();
BUILD_BUG_ON((PTRS_PER_PGD * sizeof(pgd_t)) > PAGE_SIZE);
BUILD_BUG_ON((PTRS_PER_PUD * sizeof(pud_t)) > PAGE_SIZE);
BUILD_BUG_ON((PTRS_PER_PMD * sizeof(pmd_t)) > PAGE_SIZE);
BUILD_BUG_ON((PTRS_PER_PTE * sizeof(pte_t)) > PAGE_SIZE);
} }
#ifdef CONFIG_HIGHMEM #ifdef CONFIG_HIGHMEM
......
...@@ -39,7 +39,8 @@ void __iomem *ioremap(phys_addr_t paddr, unsigned long size) ...@@ -39,7 +39,8 @@ void __iomem *ioremap(phys_addr_t paddr, unsigned long size)
if (arc_uncached_addr_space(paddr)) if (arc_uncached_addr_space(paddr))
return (void __iomem *)(u32)paddr; return (void __iomem *)(u32)paddr;
return ioremap_prot(paddr, size, PAGE_KERNEL_NO_CACHE); return ioremap_prot(paddr, size,
pgprot_val(pgprot_noncached(PAGE_KERNEL)));
} }
EXPORT_SYMBOL(ioremap); EXPORT_SYMBOL(ioremap);
......
This diff is collapsed.
...@@ -39,7 +39,6 @@ ...@@ -39,7 +39,6 @@
#include <asm/arcregs.h> #include <asm/arcregs.h>
#include <asm/cache.h> #include <asm/cache.h>
#include <asm/processor.h> #include <asm/processor.h>
#include <asm/tlb-mmu1.h>
#ifdef CONFIG_ISA_ARCOMPACT #ifdef CONFIG_ISA_ARCOMPACT
;----------------------------------------------------------------- ;-----------------------------------------------------------------
...@@ -94,11 +93,6 @@ ex_saved_reg1: ...@@ -94,11 +93,6 @@ ex_saved_reg1:
st_s r1, [r0, 4] st_s r1, [r0, 4]
st_s r2, [r0, 8] st_s r2, [r0, 8]
st_s r3, [r0, 12] st_s r3, [r0, 12]
; VERIFY if the ASID in MMU-PID Reg is same as
; one in Linux data structures
tlb_paranoid_check_asm
.endm .endm
.macro TLBMISS_RESTORE_REGS .macro TLBMISS_RESTORE_REGS
...@@ -148,53 +142,16 @@ ex_saved_reg1: ...@@ -148,53 +142,16 @@ ex_saved_reg1:
#endif #endif
;============================================================================ ;============================================================================
; Troubleshooting Stuff ;TLB Miss handling Code
;============================================================================ ;============================================================================
; Linux keeps ASID (Address Space ID) in task->active_mm->context.asid #ifndef PMD_SHIFT
; When Creating TLB Entries, instead of doing 3 dependent loads from memory, #define PMD_SHIFT PUD_SHIFT
; we use the MMU PID Reg to get current ASID.
; In bizzare scenrios SW and HW ASID can get out-of-sync which is trouble.
; So we try to detect this in TLB Mis shandler
.macro tlb_paranoid_check_asm
#ifdef CONFIG_ARC_DBG_TLB_PARANOIA
GET_CURR_TASK_ON_CPU r3
ld r0, [r3, TASK_ACT_MM]
ld r0, [r0, MM_CTXT+MM_CTXT_ASID]
breq r0, 0, 55f ; Error if no ASID allocated
lr r1, [ARC_REG_PID]
and r1, r1, 0xFF
and r2, r0, 0xFF ; MMU PID bits only for comparison
breq r1, r2, 5f
55:
; Error if H/w and S/w ASID don't match, but NOT if in kernel mode
lr r2, [erstatus]
bbit0 r2, STATUS_U_BIT, 5f
; We sure are in troubled waters, Flag the error, but to do so
; need to switch to kernel mode stack to call error routine
GET_TSK_STACK_BASE r3, sp
; Call printk to shoutout aloud
mov r2, 1
j print_asid_mismatch
5: ; ASIDs match so proceed normally
nop
#endif #endif
.endm #ifndef PUD_SHIFT
#define PUD_SHIFT PGDIR_SHIFT
;============================================================================ #endif
;TLB Miss handling Code
;============================================================================
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; This macro does the page-table lookup for the faulting address. ; This macro does the page-table lookup for the faulting address.
...@@ -203,7 +160,7 @@ ex_saved_reg1: ...@@ -203,7 +160,7 @@ ex_saved_reg1:
lr r2, [efa] lr r2, [efa]
#ifdef ARC_USE_SCRATCH_REG #ifdef CONFIG_ISA_ARCV2
lr r1, [ARC_REG_SCRATCH_DATA0] ; current pgd lr r1, [ARC_REG_SCRATCH_DATA0] ; current pgd
#else #else
GET_CURR_TASK_ON_CPU r1 GET_CURR_TASK_ON_CPU r1
...@@ -216,6 +173,24 @@ ex_saved_reg1: ...@@ -216,6 +173,24 @@ ex_saved_reg1:
tst r3, r3 tst r3, r3
bz do_slow_path_pf ; if no Page Table, do page fault bz do_slow_path_pf ; if no Page Table, do page fault
#if CONFIG_PGTABLE_LEVELS > 3
lsr r0, r2, PUD_SHIFT ; Bits for indexing into PUD
and r0, r0, (PTRS_PER_PUD - 1)
ld.as r1, [r3, r0] ; PMD entry
tst r1, r1
bz do_slow_path_pf
mov r3, r1
#endif
#if CONFIG_PGTABLE_LEVELS > 2
lsr r0, r2, PMD_SHIFT ; Bits for indexing into PMD
and r0, r0, (PTRS_PER_PMD - 1)
ld.as r1, [r3, r0] ; PMD entry
tst r1, r1
bz do_slow_path_pf
mov r3, r1
#endif
#ifdef CONFIG_TRANSPARENT_HUGEPAGE #ifdef CONFIG_TRANSPARENT_HUGEPAGE
and.f 0, r3, _PAGE_HW_SZ ; Is this Huge PMD (thp) and.f 0, r3, _PAGE_HW_SZ ; Is this Huge PMD (thp)
add2.nz r1, r1, r0 add2.nz r1, r1, r0
...@@ -279,7 +254,7 @@ ex_saved_reg1: ...@@ -279,7 +254,7 @@ ex_saved_reg1:
; Commit the TLB entry into MMU ; Commit the TLB entry into MMU
.macro COMMIT_ENTRY_TO_MMU .macro COMMIT_ENTRY_TO_MMU
#if (CONFIG_ARC_MMU_VER < 4) #ifdef CONFIG_ARC_MMU_V3
/* Get free TLB slot: Set = computed from vaddr, way = random */ /* Get free TLB slot: Set = computed from vaddr, way = random */
sr TLBGetIndex, [ARC_REG_TLBCOMMAND] sr TLBGetIndex, [ARC_REG_TLBCOMMAND]
...@@ -375,13 +350,6 @@ ENTRY(EV_TLBMissD) ...@@ -375,13 +350,6 @@ ENTRY(EV_TLBMissD)
CONV_PTE_TO_TLB CONV_PTE_TO_TLB
#if (CONFIG_ARC_MMU_VER == 1)
; MMU with 2 way set assoc J-TLB, needs some help in pathetic case of
; memcpy where 3 parties contend for 2 ways, ensuing a livelock.
; But only for old MMU or one with Metal Fix
TLB_WRITE_HEURISTICS
#endif
COMMIT_ENTRY_TO_MMU COMMIT_ENTRY_TO_MMU
TLBMISS_RESTORE_REGS TLBMISS_RESTORE_REGS
EV_TLBMissD_fast_ret: ; additional label for VDK OS-kit instrumentation EV_TLBMissD_fast_ret: ; additional label for VDK OS-kit instrumentation
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment