Commit e07af262 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'arc-5.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/vgupta/arc

Pull ARC updates from Vineet Gupta:
 "Finally a big pile of changes for ARC (atomics/mm). These are from our
  internal arc64 tree, preparing mainline for eventual arc64 support.
  I'm spreading them out to avoid tsunami of patches in one release.

   - MM rework:
       - Implement up to 4 paging levels
       - Enable STRICT_MM_TYPECHECK
       - switch pgtable_t back to 'struct page *'

   - Atomics rework / implement relaxed accessors

   - Retire legacy MMUv1,v2; ARC750 cores

   - A few other build errors, typos"

* tag 'arc-5.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/vgupta/arc: (33 commits)
  ARC: mm: vmalloc sync from kernel to user table to update PMD ...
  ARC: mm: support 4 levels of page tables
  ARC: mm: support 3 levels of page tables
  ARC: mm: switch to asm-generic/pgalloc.h
  ARC: mm: switch pgtable_t back to struct page *
  ARC: mm: hack to allow 2 level build with 4 level code
  ARC: mm: disintegrate pgtable.h into levels and flags
  ARC: mm: disintegrate mmu.h (arcv2 bits out)
  ARC: mm: move MMU specific bits out of entry code ...
  ARC: mm: move MMU specific bits out of ASID allocator
  ARC: mm: non-functional code movement/cleanup
  ARC: mm: pmd_populate* to use the canonical set_pmd (and drop pmd_set)
  ARC: ioremap: use more commonly used PAGE_KERNEL based uncached flag
  ARC: mm: Enable STRICT_MM_TYPECHECKS
  ARC: mm: Fixes to allow STRICT_MM_TYPECHECKS
  ARC: mm: move mmu/cache externs out to setup.h
  ARC: mm: remove tlb paranoid code
  ARC: mm: use SCRATCH_DATA0 register for caching pgdir in ARCv2 only
  ARC: retire MMUv1 and MMUv2 support
  ARC: retire ARC750 support
  ...
parents 063df71a 56809a28
......@@ -116,16 +116,9 @@ choice
default ARC_CPU_770 if ISA_ARCOMPACT
default ARC_CPU_HS if ISA_ARCV2
if ISA_ARCOMPACT
config ARC_CPU_750D
bool "ARC750D"
select ARC_CANT_LLSC
help
Support for ARC750 core
config ARC_CPU_770
bool "ARC770"
depends on ISA_ARCOMPACT
select ARC_HAS_SWAPE
help
Support for ARC770 core introduced with Rel 4.10 (Summer 2011)
......@@ -135,8 +128,6 @@ config ARC_CPU_770
-Caches: New Prog Model, Region Flush
-Insns: endian swap, load-locked/store-conditional, time-stamp-ctr
endif #ISA_ARCOMPACT
config ARC_CPU_HS
bool "ARC-HS"
depends on ISA_ARCV2
......@@ -274,33 +265,17 @@ config ARC_DCCM_BASE
choice
prompt "MMU Version"
default ARC_MMU_V3 if ARC_CPU_770
default ARC_MMU_V2 if ARC_CPU_750D
default ARC_MMU_V4 if ARC_CPU_HS
if ISA_ARCOMPACT
config ARC_MMU_V1
bool "MMU v1"
help
Orig ARC700 MMU
config ARC_MMU_V2
bool "MMU v2"
help
Fixed the deficiency of v1 - possible thrashing in memcpy scenario
when 2 D-TLB and 1 I-TLB entries index into same 2way set.
default ARC_MMU_V3 if ISA_ARCOMPACT
default ARC_MMU_V4 if ISA_ARCV2
config ARC_MMU_V3
bool "MMU v3"
depends on ARC_CPU_770
depends on ISA_ARCOMPACT
help
Introduced with ARC700 4.10: New Features
Variable Page size (1k-16k), var JTLB size 128 x (2 or 4)
Shared Address Spaces (SASID)
endif
config ARC_MMU_V4
bool "MMU v4"
depends on ISA_ARCV2
......@@ -319,7 +294,6 @@ config ARC_PAGE_SIZE_8K
config ARC_PAGE_SIZE_16K
bool "16KB"
depends on ARC_MMU_V3 || ARC_MMU_V4
config ARC_PAGE_SIZE_4K
bool "4KB"
......@@ -340,6 +314,10 @@ config ARC_HUGEPAGE_16M
endchoice
config PGTABLE_LEVELS
int "Number of Page table levels"
default 2
config ARC_COMPACT_IRQ_LEVELS
depends on ISA_ARCOMPACT
bool "Setup Timer IRQ as high Priority"
......@@ -563,9 +541,6 @@ config ARC_DW2_UNWIND
If you don't debug the kernel, you can say N, but we may not be able
to solve problems without frame unwind information
config ARC_DBG_TLB_PARANOIA
bool "Paranoia Checks in Low Level TLB Handlers"
config ARC_DBG_JUMP_LABEL
bool "Paranoid checks in Static Keys (jump labels) code"
depends on JUMP_LABEL
......
/* SPDX-License-Identifier: GPL-2.0-only */
#ifndef _ASM_ARC_ATOMIC_LLSC_H
#define _ASM_ARC_ATOMIC_LLSC_H
#define arch_atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
#define ATOMIC_OP(op, c_op, asm_op) \
static inline void arch_atomic_##op(int i, atomic_t *v) \
{ \
unsigned int val; \
\
__asm__ __volatile__( \
"1: llock %[val], [%[ctr]] \n" \
" " #asm_op " %[val], %[val], %[i] \n" \
" scond %[val], [%[ctr]] \n" \
" bnz 1b \n" \
: [val] "=&r" (val) /* Early clobber to prevent reg reuse */ \
: [ctr] "r" (&v->counter), /* Not "m": llock only supports reg direct addr mode */ \
[i] "ir" (i) \
: "cc"); \
} \
#define ATOMIC_OP_RETURN(op, c_op, asm_op) \
static inline int arch_atomic_##op##_return_relaxed(int i, atomic_t *v) \
{ \
unsigned int val; \
\
__asm__ __volatile__( \
"1: llock %[val], [%[ctr]] \n" \
" " #asm_op " %[val], %[val], %[i] \n" \
" scond %[val], [%[ctr]] \n" \
" bnz 1b \n" \
: [val] "=&r" (val) \
: [ctr] "r" (&v->counter), \
[i] "ir" (i) \
: "cc"); \
\
return val; \
}
#define arch_atomic_add_return_relaxed arch_atomic_add_return_relaxed
#define arch_atomic_sub_return_relaxed arch_atomic_sub_return_relaxed
#define ATOMIC_FETCH_OP(op, c_op, asm_op) \
static inline int arch_atomic_fetch_##op##_relaxed(int i, atomic_t *v) \
{ \
unsigned int val, orig; \
\
__asm__ __volatile__( \
"1: llock %[orig], [%[ctr]] \n" \
" " #asm_op " %[val], %[orig], %[i] \n" \
" scond %[val], [%[ctr]] \n" \
" bnz 1b \n" \
: [val] "=&r" (val), \
[orig] "=&r" (orig) \
: [ctr] "r" (&v->counter), \
[i] "ir" (i) \
: "cc"); \
\
return orig; \
}
#define arch_atomic_fetch_add_relaxed arch_atomic_fetch_add_relaxed
#define arch_atomic_fetch_sub_relaxed arch_atomic_fetch_sub_relaxed
#define arch_atomic_fetch_and_relaxed arch_atomic_fetch_and_relaxed
#define arch_atomic_fetch_andnot_relaxed arch_atomic_fetch_andnot_relaxed
#define arch_atomic_fetch_or_relaxed arch_atomic_fetch_or_relaxed
#define arch_atomic_fetch_xor_relaxed arch_atomic_fetch_xor_relaxed
#define ATOMIC_OPS(op, c_op, asm_op) \
ATOMIC_OP(op, c_op, asm_op) \
ATOMIC_OP_RETURN(op, c_op, asm_op) \
ATOMIC_FETCH_OP(op, c_op, asm_op)
ATOMIC_OPS(add, +=, add)
ATOMIC_OPS(sub, -=, sub)
#undef ATOMIC_OPS
#define ATOMIC_OPS(op, c_op, asm_op) \
ATOMIC_OP(op, c_op, asm_op) \
ATOMIC_FETCH_OP(op, c_op, asm_op)
ATOMIC_OPS(and, &=, and)
ATOMIC_OPS(andnot, &= ~, bic)
ATOMIC_OPS(or, |=, or)
ATOMIC_OPS(xor, ^=, xor)
#define arch_atomic_andnot arch_atomic_andnot
#undef ATOMIC_OPS
#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
#endif
/* SPDX-License-Identifier: GPL-2.0-only */
#ifndef _ASM_ARC_ATOMIC_SPLOCK_H
#define _ASM_ARC_ATOMIC_SPLOCK_H
/*
* Non hardware assisted Atomic-R-M-W
* Locking would change to irq-disabling only (UP) and spinlocks (SMP)
*/
static inline void arch_atomic_set(atomic_t *v, int i)
{
/*
* Independent of hardware support, all of the atomic_xxx() APIs need
* to follow the same locking rules to make sure that a "hardware"
* atomic insn (e.g. LD) doesn't clobber an "emulated" atomic insn
* sequence
*
* Thus atomic_set() despite being 1 insn (and seemingly atomic)
* requires the locking.
*/
unsigned long flags;
atomic_ops_lock(flags);
WRITE_ONCE(v->counter, i);
atomic_ops_unlock(flags);
}
#define arch_atomic_set_release(v, i) arch_atomic_set((v), (i))
#define ATOMIC_OP(op, c_op, asm_op) \
static inline void arch_atomic_##op(int i, atomic_t *v) \
{ \
unsigned long flags; \
\
atomic_ops_lock(flags); \
v->counter c_op i; \
atomic_ops_unlock(flags); \
}
#define ATOMIC_OP_RETURN(op, c_op, asm_op) \
static inline int arch_atomic_##op##_return(int i, atomic_t *v) \
{ \
unsigned long flags; \
unsigned int temp; \
\
/* \
* spin lock/unlock provides the needed smp_mb() before/after \
*/ \
atomic_ops_lock(flags); \
temp = v->counter; \
temp c_op i; \
v->counter = temp; \
atomic_ops_unlock(flags); \
\
return temp; \
}
#define ATOMIC_FETCH_OP(op, c_op, asm_op) \
static inline int arch_atomic_fetch_##op(int i, atomic_t *v) \
{ \
unsigned long flags; \
unsigned int orig; \
\
/* \
* spin lock/unlock provides the needed smp_mb() before/after \
*/ \
atomic_ops_lock(flags); \
orig = v->counter; \
v->counter c_op i; \
atomic_ops_unlock(flags); \
\
return orig; \
}
#define ATOMIC_OPS(op, c_op, asm_op) \
ATOMIC_OP(op, c_op, asm_op) \
ATOMIC_OP_RETURN(op, c_op, asm_op) \
ATOMIC_FETCH_OP(op, c_op, asm_op)
ATOMIC_OPS(add, +=, add)
ATOMIC_OPS(sub, -=, sub)
#undef ATOMIC_OPS
#define ATOMIC_OPS(op, c_op, asm_op) \
ATOMIC_OP(op, c_op, asm_op) \
ATOMIC_FETCH_OP(op, c_op, asm_op)
ATOMIC_OPS(and, &=, and)
ATOMIC_OPS(andnot, &= ~, bic)
ATOMIC_OPS(or, |=, or)
ATOMIC_OPS(xor, ^=, xor)
#define arch_atomic_andnot arch_atomic_andnot
#define arch_atomic_fetch_andnot arch_atomic_fetch_andnot
#undef ATOMIC_OPS
#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
#endif
This diff is collapsed.
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* ARCv2 supports 64-bit exclusive load (LLOCKD) / store (SCONDD)
* - The address HAS to be 64-bit aligned
*/
#ifndef _ASM_ARC_ATOMIC64_ARCV2_H
#define _ASM_ARC_ATOMIC64_ARCV2_H
typedef struct {
s64 __aligned(8) counter;
} atomic64_t;
#define ATOMIC64_INIT(a) { (a) }
static inline s64 arch_atomic64_read(const atomic64_t *v)
{
s64 val;
__asm__ __volatile__(
" ldd %0, [%1] \n"
: "=r"(val)
: "r"(&v->counter));
return val;
}
static inline void arch_atomic64_set(atomic64_t *v, s64 a)
{
/*
* This could have been a simple assignment in "C" but would need
* explicit volatile. Otherwise gcc optimizers could elide the store
* which borked atomic64 self-test
* In the inline asm version, memory clobber needed for exact same
* reason, to tell gcc about the store.
*
* This however is not needed for sibling atomic64_add() etc since both
* load/store are explicitly done in inline asm. As long as API is used
* for each access, gcc has no way to optimize away any load/store
*/
__asm__ __volatile__(
" std %0, [%1] \n"
:
: "r"(a), "r"(&v->counter)
: "memory");
}
#define ATOMIC64_OP(op, op1, op2) \
static inline void arch_atomic64_##op(s64 a, atomic64_t *v) \
{ \
s64 val; \
\
__asm__ __volatile__( \
"1: \n" \
" llockd %0, [%1] \n" \
" " #op1 " %L0, %L0, %L2 \n" \
" " #op2 " %H0, %H0, %H2 \n" \
" scondd %0, [%1] \n" \
" bnz 1b \n" \
: "=&r"(val) \
: "r"(&v->counter), "ir"(a) \
: "cc"); \
} \
#define ATOMIC64_OP_RETURN(op, op1, op2) \
static inline s64 arch_atomic64_##op##_return_relaxed(s64 a, atomic64_t *v) \
{ \
s64 val; \
\
__asm__ __volatile__( \
"1: \n" \
" llockd %0, [%1] \n" \
" " #op1 " %L0, %L0, %L2 \n" \
" " #op2 " %H0, %H0, %H2 \n" \
" scondd %0, [%1] \n" \
" bnz 1b \n" \
: [val] "=&r"(val) \
: "r"(&v->counter), "ir"(a) \
: "cc"); /* memory clobber comes from smp_mb() */ \
\
return val; \
}
#define arch_atomic64_add_return_relaxed arch_atomic64_add_return_relaxed
#define arch_atomic64_sub_return_relaxed arch_atomic64_sub_return_relaxed
#define ATOMIC64_FETCH_OP(op, op1, op2) \
static inline s64 arch_atomic64_fetch_##op##_relaxed(s64 a, atomic64_t *v) \
{ \
s64 val, orig; \
\
__asm__ __volatile__( \
"1: \n" \
" llockd %0, [%2] \n" \
" " #op1 " %L1, %L0, %L3 \n" \
" " #op2 " %H1, %H0, %H3 \n" \
" scondd %1, [%2] \n" \
" bnz 1b \n" \
: "=&r"(orig), "=&r"(val) \
: "r"(&v->counter), "ir"(a) \
: "cc"); /* memory clobber comes from smp_mb() */ \
\
return orig; \
}
#define arch_atomic64_fetch_add_relaxed arch_atomic64_fetch_add_relaxed
#define arch_atomic64_fetch_sub_relaxed arch_atomic64_fetch_sub_relaxed
#define arch_atomic64_fetch_and_relaxed arch_atomic64_fetch_and_relaxed
#define arch_atomic64_fetch_andnot_relaxed arch_atomic64_fetch_andnot_relaxed
#define arch_atomic64_fetch_or_relaxed arch_atomic64_fetch_or_relaxed
#define arch_atomic64_fetch_xor_relaxed arch_atomic64_fetch_xor_relaxed
#define ATOMIC64_OPS(op, op1, op2) \
ATOMIC64_OP(op, op1, op2) \
ATOMIC64_OP_RETURN(op, op1, op2) \
ATOMIC64_FETCH_OP(op, op1, op2)
ATOMIC64_OPS(add, add.f, adc)
ATOMIC64_OPS(sub, sub.f, sbc)
#undef ATOMIC64_OPS
#define ATOMIC64_OPS(op, op1, op2) \
ATOMIC64_OP(op, op1, op2) \
ATOMIC64_FETCH_OP(op, op1, op2)
ATOMIC64_OPS(and, and, and)
ATOMIC64_OPS(andnot, bic, bic)
ATOMIC64_OPS(or, or, or)
ATOMIC64_OPS(xor, xor, xor)
#define arch_atomic64_andnot arch_atomic64_andnot
#undef ATOMIC64_OPS
#undef ATOMIC64_FETCH_OP
#undef ATOMIC64_OP_RETURN
#undef ATOMIC64_OP
static inline s64
arch_atomic64_cmpxchg(atomic64_t *ptr, s64 expected, s64 new)
{
s64 prev;
smp_mb();
__asm__ __volatile__(
"1: llockd %0, [%1] \n"
" brne %L0, %L2, 2f \n"
" brne %H0, %H2, 2f \n"
" scondd %3, [%1] \n"
" bnz 1b \n"
"2: \n"
: "=&r"(prev)
: "r"(ptr), "ir"(expected), "r"(new)
: "cc"); /* memory clobber comes from smp_mb() */
smp_mb();
return prev;
}
static inline s64 arch_atomic64_xchg(atomic64_t *ptr, s64 new)
{
s64 prev;
smp_mb();
__asm__ __volatile__(
"1: llockd %0, [%1] \n"
" scondd %2, [%1] \n"
" bnz 1b \n"
"2: \n"
: "=&r"(prev)
: "r"(ptr), "r"(new)
: "cc"); /* memory clobber comes from smp_mb() */
smp_mb();
return prev;
}
/**
* arch_atomic64_dec_if_positive - decrement by 1 if old value positive
* @v: pointer of type atomic64_t
*
* The function returns the old value of *v minus 1, even if
* the atomic variable, v, was not decremented.
*/
static inline s64 arch_atomic64_dec_if_positive(atomic64_t *v)
{
s64 val;
smp_mb();
__asm__ __volatile__(
"1: llockd %0, [%1] \n"
" sub.f %L0, %L0, 1 # w0 - 1, set C on borrow\n"
" sub.c %H0, %H0, 1 # if C set, w1 - 1\n"
" brlt %H0, 0, 2f \n"
" scondd %0, [%1] \n"
" bnz 1b \n"
"2: \n"
: "=&r"(val)
: "r"(&v->counter)
: "cc"); /* memory clobber comes from smp_mb() */
smp_mb();
return val;
}
#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive
/**
* arch_atomic64_fetch_add_unless - add unless the number is a given value
* @v: pointer of type atomic64_t
* @a: the amount to add to v...
* @u: ...unless v is equal to u.
*
* Atomically adds @a to @v, if it was not @u.
* Returns the old value of @v
*/
static inline s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
{
s64 old, temp;
smp_mb();
__asm__ __volatile__(
"1: llockd %0, [%2] \n"
" brne %L0, %L4, 2f # continue to add since v != u \n"
" breq.d %H0, %H4, 3f # return since v == u \n"
"2: \n"
" add.f %L1, %L0, %L3 \n"
" adc %H1, %H0, %H3 \n"
" scondd %1, [%2] \n"
" bnz 1b \n"
"3: \n"
: "=&r"(old), "=&r" (temp)
: "r"(&v->counter), "r"(a), "r"(u)
: "cc"); /* memory clobber comes from smp_mb() */
smp_mb();
return old;
}
#define arch_atomic64_fetch_add_unless arch_atomic64_fetch_add_unless
#endif
......@@ -14,188 +14,6 @@
#include <linux/types.h>
#include <linux/compiler.h>
#include <asm/barrier.h>
#ifndef CONFIG_ARC_HAS_LLSC
#include <asm/smp.h>
#endif
#ifdef CONFIG_ARC_HAS_LLSC
/*
* Hardware assisted Atomic-R-M-W
*/
#define BIT_OP(op, c_op, asm_op) \
static inline void op##_bit(unsigned long nr, volatile unsigned long *m)\
{ \
unsigned int temp; \
\
m += nr >> 5; \
\
nr &= 0x1f; \
\
__asm__ __volatile__( \
"1: llock %0, [%1] \n" \
" " #asm_op " %0, %0, %2 \n" \
" scond %0, [%1] \n" \
" bnz 1b \n" \
: "=&r"(temp) /* Early clobber, to prevent reg reuse */ \
: "r"(m), /* Not "m": llock only supports reg direct addr mode */ \
"ir"(nr) \
: "cc"); \
}
/*
* Semantically:
* Test the bit
* if clear
* set it and return 0 (old value)
* else
* return 1 (old value).
*
* Since ARC lacks a equivalent h/w primitive, the bit is set unconditionally
* and the old value of bit is returned
*/
#define TEST_N_BIT_OP(op, c_op, asm_op) \
static inline int test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\
{ \
unsigned long old, temp; \
\
m += nr >> 5; \
\
nr &= 0x1f; \
\
/* \
* Explicit full memory barrier needed before/after as \
* LLOCK/SCOND themselves don't provide any such smenatic \
*/ \
smp_mb(); \
\
__asm__ __volatile__( \
"1: llock %0, [%2] \n" \
" " #asm_op " %1, %0, %3 \n" \
" scond %1, [%2] \n" \
" bnz 1b \n" \
: "=&r"(old), "=&r"(temp) \
: "r"(m), "ir"(nr) \
: "cc"); \
\
smp_mb(); \
\
return (old & (1 << nr)) != 0; \
}
#else /* !CONFIG_ARC_HAS_LLSC */
/*
* Non hardware assisted Atomic-R-M-W
* Locking would change to irq-disabling only (UP) and spinlocks (SMP)
*
* There's "significant" micro-optimization in writing our own variants of
* bitops (over generic variants)
*
* (1) The generic APIs have "signed" @nr while we have it "unsigned"
* This avoids extra code to be generated for pointer arithmatic, since
* is "not sure" that index is NOT -ve
* (2) Utilize the fact that ARCompact bit fidding insn (BSET/BCLR/ASL) etc
* only consider bottom 5 bits of @nr, so NO need to mask them off.
* (GCC Quirk: however for constant @nr we still need to do the masking
* at compile time)
*/
#define BIT_OP(op, c_op, asm_op) \
static inline void op##_bit(unsigned long nr, volatile unsigned long *m)\
{ \
unsigned long temp, flags; \
m += nr >> 5; \
\
/* \
* spin lock/unlock provide the needed smp_mb() before/after \
*/ \
bitops_lock(flags); \
\
temp = *m; \
*m = temp c_op (1UL << (nr & 0x1f)); \
\
bitops_unlock(flags); \
}
#define TEST_N_BIT_OP(op, c_op, asm_op) \
static inline int test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\
{ \
unsigned long old, flags; \
m += nr >> 5; \
\
bitops_lock(flags); \
\
old = *m; \
*m = old c_op (1UL << (nr & 0x1f)); \
\
bitops_unlock(flags); \
\
return (old & (1UL << (nr & 0x1f))) != 0; \
}
#endif
/***************************************
* Non atomic variants
**************************************/
#define __BIT_OP(op, c_op, asm_op) \
static inline void __##op##_bit(unsigned long nr, volatile unsigned long *m) \
{ \
unsigned long temp; \
m += nr >> 5; \
\
temp = *m; \
*m = temp c_op (1UL << (nr & 0x1f)); \
}
#define __TEST_N_BIT_OP(op, c_op, asm_op) \
static inline int __test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\
{ \
unsigned long old; \
m += nr >> 5; \
\
old = *m; \
*m = old c_op (1UL << (nr & 0x1f)); \
\
return (old & (1UL << (nr & 0x1f))) != 0; \
}
#define BIT_OPS(op, c_op, asm_op) \
\
/* set_bit(), clear_bit(), change_bit() */ \
BIT_OP(op, c_op, asm_op) \
\
/* test_and_set_bit(), test_and_clear_bit(), test_and_change_bit() */\
TEST_N_BIT_OP(op, c_op, asm_op) \
\
/* __set_bit(), __clear_bit(), __change_bit() */ \
__BIT_OP(op, c_op, asm_op) \
\
/* __test_and_set_bit(), __test_and_clear_bit(), __test_and_change_bit() */\
__TEST_N_BIT_OP(op, c_op, asm_op)
BIT_OPS(set, |, bset)
BIT_OPS(clear, & ~, bclr)
BIT_OPS(change, ^, bxor)
/*
* This routine doesn't need to be atomic.
*/
static inline int
test_bit(unsigned int nr, const volatile unsigned long *addr)
{
unsigned long mask;
addr += nr >> 5;
mask = 1UL << (nr & 0x1f);
return ((mask & *addr) != 0);
}
#ifdef CONFIG_ISA_ARCOMPACT
......@@ -296,7 +114,7 @@ static inline __attribute__ ((const)) unsigned long __ffs(unsigned long word)
* @result: [1-32]
* fls(1) = 1, fls(0x80000000) = 32, fls(0) = 0
*/
static inline __attribute__ ((const)) int fls(unsigned long x)
static inline __attribute__ ((const)) int fls(unsigned int x)
{
int n;
......@@ -323,7 +141,7 @@ static inline __attribute__ ((const)) int __fls(unsigned long x)
* ffs = Find First Set in word (LSB to MSB)
* @result: [1-32], 0 if all 0's
*/
static inline __attribute__ ((const)) int ffs(unsigned long x)
static inline __attribute__ ((const)) int ffs(unsigned int x)
{
int n;
......@@ -368,6 +186,8 @@ static inline __attribute__ ((const)) unsigned long __ffs(unsigned long x)
#include <asm-generic/bitops/fls64.h>
#include <asm-generic/bitops/sched.h>
#include <asm-generic/bitops/lock.h>
#include <asm-generic/bitops/atomic.h>
#include <asm-generic/bitops/non-atomic.h>
#include <asm-generic/bitops/find.h>
#include <asm-generic/bitops/le.h>
......
......@@ -62,10 +62,6 @@
#define ARCH_SLAB_MINALIGN 8
#endif
extern void arc_cache_init(void);
extern char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len);
extern void read_decode_cache_bcr(void);
extern int ioc_enable;
extern unsigned long perip_base, perip_end;
......
......@@ -6,6 +6,7 @@
#ifndef __ASM_ARC_CMPXCHG_H
#define __ASM_ARC_CMPXCHG_H
#include <linux/build_bug.h>
#include <linux/types.h>
#include <asm/barrier.h>
......@@ -13,146 +14,130 @@
#ifdef CONFIG_ARC_HAS_LLSC
static inline unsigned long
__cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new)
{
unsigned long prev;
/*
* Explicit full memory barrier needed before/after as
* LLOCK/SCOND themselves don't provide any such semantics
/*
* if (*ptr == @old)
* *ptr = @new
*/
smp_mb();
__asm__ __volatile__(
"1: llock %0, [%1] \n"
" brne %0, %2, 2f \n"
" scond %3, [%1] \n"
" bnz 1b \n"
"2: \n"
: "=&r"(prev) /* Early clobber, to prevent reg reuse */
: "r"(ptr), /* Not "m": llock only supports reg direct addr mode */
"ir"(expected),
"r"(new) /* can't be "ir". scond can't take LIMM for "b" */
: "cc", "memory"); /* so that gcc knows memory is being written here */
smp_mb();
return prev;
}
#else /* !CONFIG_ARC_HAS_LLSC */
static inline unsigned long
__cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new)
{
unsigned long flags;
int prev;
volatile unsigned long *p = ptr;
#define __cmpxchg(ptr, old, new) \
({ \
__typeof__(*(ptr)) _prev; \
\
__asm__ __volatile__( \
"1: llock %0, [%1] \n" \
" brne %0, %2, 2f \n" \
" scond %3, [%1] \n" \
" bnz 1b \n" \
"2: \n" \
: "=&r"(_prev) /* Early clobber prevent reg reuse */ \
: "r"(ptr), /* Not "m": llock only supports reg */ \
"ir"(old), \
"r"(new) /* Not "ir": scond can't take LIMM */ \
: "cc", \
"memory"); /* gcc knows memory is clobbered */ \
\
_prev; \
})
/*
* spin lock/unlock provide the needed smp_mb() before/after
*/
atomic_ops_lock(flags);
prev = *p;
if (prev == expected)
*p = new;
atomic_ops_unlock(flags);
return prev;
}
#define arch_cmpxchg_relaxed(ptr, old, new) \
({ \
__typeof__(ptr) _p_ = (ptr); \
__typeof__(*(ptr)) _o_ = (old); \
__typeof__(*(ptr)) _n_ = (new); \
__typeof__(*(ptr)) _prev_; \
\
switch(sizeof((_p_))) { \
case 4: \
_prev_ = __cmpxchg(_p_, _o_, _n_); \
break; \
default: \
BUILD_BUG(); \
} \
_prev_; \
})
#endif
#else
#define arch_cmpxchg(ptr, o, n) ({ \
(typeof(*(ptr)))__cmpxchg((ptr), \
(unsigned long)(o), \
(unsigned long)(n)); \
#define arch_cmpxchg(ptr, old, new) \
({ \
volatile __typeof__(ptr) _p_ = (ptr); \
__typeof__(*(ptr)) _o_ = (old); \
__typeof__(*(ptr)) _n_ = (new); \
__typeof__(*(ptr)) _prev_; \
unsigned long __flags; \
\
BUILD_BUG_ON(sizeof(_p_) != 4); \
\
/* \
* spin lock/unlock provide the needed smp_mb() before/after \
*/ \
atomic_ops_lock(__flags); \
_prev_ = *_p_; \
if (_prev_ == _o_) \
*_p_ = _n_; \
atomic_ops_unlock(__flags); \
_prev_; \
})
/*
* atomic_cmpxchg is same as cmpxchg
* LLSC: only different in data-type, semantics are exactly same
* !LLSC: cmpxchg() has to use an external lock atomic_ops_lock to guarantee
* semantics, and this lock also happens to be used by atomic_*()
*/
#define arch_atomic_cmpxchg(v, o, n) ((int)arch_cmpxchg(&((v)->counter), (o), (n)))
#endif
/*
* xchg (reg with memory) based on "Native atomic" EX insn
* xchg
*/
static inline unsigned long __xchg(unsigned long val, volatile void *ptr,
int size)
{
extern unsigned long __xchg_bad_pointer(void);
switch (size) {
case 4:
smp_mb();
__asm__ __volatile__(
" ex %0, [%1] \n"
: "+r"(val)
: "r"(ptr)
: "memory");
#ifdef CONFIG_ARC_HAS_LLSC
smp_mb();
#define __xchg(ptr, val) \
({ \
__asm__ __volatile__( \
" ex %0, [%1] \n" /* set new value */ \
: "+r"(val) \
: "r"(ptr) \
: "memory"); \
_val_; /* get old value */ \
})
return val;
}
return __xchg_bad_pointer();
}
#define arch_xchg_relaxed(ptr, val) \
({ \
__typeof__(ptr) _p_ = (ptr); \
__typeof__(*(ptr)) _val_ = (val); \
\
switch(sizeof(*(_p_))) { \
case 4: \
_val_ = __xchg(_p_, _val_); \
break; \
default: \
BUILD_BUG(); \
} \
_val_; \
})
#define _xchg(ptr, with) ((typeof(*(ptr)))__xchg((unsigned long)(with), (ptr), \
sizeof(*(ptr))))
#else /* !CONFIG_ARC_HAS_LLSC */
/*
* xchg() maps directly to ARC EX instruction which guarantees atomicity.
* However in !LLSC config, it also needs to be use @atomic_ops_lock spinlock
* due to a subtle reason:
* - For !LLSC, cmpxchg() needs to use that lock (see above) and there is lot
* of kernel code which calls xchg()/cmpxchg() on same data (see llist.h)
* Hence xchg() needs to follow same locking rules.
*
* Technically the lock is also needed for UP (boils down to irq save/restore)
* but we can cheat a bit since cmpxchg() atomic_ops_lock() would cause irqs to
* be disabled thus can't possibly be interrupted/preempted/clobbered by xchg()
* Other way around, xchg is one instruction anyways, so can't be interrupted
* as such
* EX instructions is baseline and present in !LLSC too. But in this
* regime it still needs use @atomic_ops_lock spinlock to allow interop
* with cmpxchg() which uses spinlock in !LLSC
* (llist.h use xchg and cmpxchg on sama data)
*/
#if !defined(CONFIG_ARC_HAS_LLSC) && defined(CONFIG_SMP)
#define arch_xchg(ptr, with) \
#define arch_xchg(ptr, val) \
({ \
unsigned long flags; \
typeof(*(ptr)) old_val; \
__typeof__(ptr) _p_ = (ptr); \
__typeof__(*(ptr)) _val_ = (val); \
\
unsigned long __flags; \
\
atomic_ops_lock(flags); \
old_val = _xchg(ptr, with); \
atomic_ops_unlock(flags); \
old_val; \
atomic_ops_lock(__flags); \
\
__asm__ __volatile__( \
" ex %0, [%1] \n" \
: "+r"(_val_) \
: "r"(_p_) \
: "memory"); \
\
atomic_ops_unlock(__flags); \
_val_; \
})
#else
#define arch_xchg(ptr, with) _xchg(ptr, with)
#endif
/*
* "atomic" variant of xchg()
* REQ: It needs to follow the same serialization rules as other atomic_xxx()
* Since xchg() doesn't always do that, it would seem that following definition
* is incorrect. But here's the rationale:
* SMP : Even xchg() takes the atomic_ops_lock, so OK.
* LLSC: atomic_ops_lock are not relevant at all (even if SMP, since LLSC
* is natively "SMP safe", no serialization required).
* UP : other atomics disable IRQ, so no way a difft ctxt atomic_xchg()
* could clobber them. atomic_xchg() itself would be 1 insn, so it
* can't be clobbered by others. Thus no serialization required when
* atomic_xchg is involved.
*/
#define arch_atomic_xchg(v, new) (arch_xchg(&((v)->counter), new))
#endif
......@@ -126,19 +126,11 @@
* to be saved again on kernel mode stack, as part of pt_regs.
*-------------------------------------------------------------*/
.macro PROLOG_FREEUP_REG reg, mem
#ifndef ARC_USE_SCRATCH_REG
sr \reg, [ARC_REG_SCRATCH_DATA0]
#else
st \reg, [\mem]
#endif
.endm
.macro PROLOG_RESTORE_REG reg, mem
#ifndef ARC_USE_SCRATCH_REG
lr \reg, [ARC_REG_SCRATCH_DATA0]
#else
ld \reg, [\mem]
#endif
.endm
/*--------------------------------------------------------------
......
......@@ -58,14 +58,6 @@ static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
pmd_t *pmd);
/* Generic variants assume pgtable_t is struct page *, hence need for these */
#define __HAVE_ARCH_PGTABLE_DEPOSIT
extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
pgtable_t pgtable);
#define __HAVE_ARCH_PGTABLE_WITHDRAW
extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
#define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
extern void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
unsigned long end);
......
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2004, 2007-2010, 2011-2012, 2019-20 Synopsys, Inc. (www.synopsys.com)
*
* MMUv3 (arc700) / MMUv4 (archs) are software page walked and software managed.
* This file contains the TLB access registers and commands
*/
#ifndef _ASM_ARC_MMU_ARCV2_H
#define _ASM_ARC_MMU_ARCV2_H
/*
* TLB Management regs
*/
#define ARC_REG_MMU_BCR 0x06f
#ifdef CONFIG_ARC_MMU_V3
#define ARC_REG_TLBPD0 0x405
#define ARC_REG_TLBPD1 0x406
#define ARC_REG_TLBPD1HI 0 /* Dummy: allows common code */
#define ARC_REG_TLBINDEX 0x407
#define ARC_REG_TLBCOMMAND 0x408
#define ARC_REG_PID 0x409
#define ARC_REG_SCRATCH_DATA0 0x418
#else
#define ARC_REG_TLBPD0 0x460
#define ARC_REG_TLBPD1 0x461
#define ARC_REG_TLBPD1HI 0x463
#define ARC_REG_TLBINDEX 0x464
#define ARC_REG_TLBCOMMAND 0x465
#define ARC_REG_PID 0x468
#define ARC_REG_SCRATCH_DATA0 0x46c
#endif
/* Bits in MMU PID reg */
#define __TLB_ENABLE (1 << 31)
#define __PROG_ENABLE (1 << 30)
#define MMU_ENABLE (__TLB_ENABLE | __PROG_ENABLE)
/* Bits in TLB Index reg */
#define TLB_LKUP_ERR 0x80000000
#ifdef CONFIG_ARC_MMU_V3
#define TLB_DUP_ERR (TLB_LKUP_ERR | 0x00000001)
#else
#define TLB_DUP_ERR (TLB_LKUP_ERR | 0x40000000)
#endif
/*
* TLB Commands
*/
#define TLBWrite 0x1
#define TLBRead 0x2
#define TLBGetIndex 0x3
#define TLBProbe 0x4
#define TLBWriteNI 0x5 /* write JTLB without inv uTLBs */
#define TLBIVUTLB 0x6 /* explicitly inv uTLBs */
#ifdef CONFIG_ARC_MMU_V4
#define TLBInsertEntry 0x7
#define TLBDeleteEntry 0x8
#endif
/* Masks for actual TLB "PD"s */
#define PTE_BITS_IN_PD0 (_PAGE_GLOBAL | _PAGE_PRESENT | _PAGE_HW_SZ)
#define PTE_BITS_RWX (_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ)
#define PTE_BITS_NON_RWX_IN_PD1 (PAGE_MASK_PHYS | _PAGE_CACHEABLE)
#ifndef __ASSEMBLY__
struct mm_struct;
extern int pae40_exist_but_not_enab(void);
static inline int is_pae40_enabled(void)
{
return IS_ENABLED(CONFIG_ARC_HAS_PAE40);
}
static inline void mmu_setup_asid(struct mm_struct *mm, unsigned long asid)
{
write_aux_reg(ARC_REG_PID, asid | MMU_ENABLE);
}
static inline void mmu_setup_pgd(struct mm_struct *mm, void *pgd)
{
/* PGD cached in MMU reg to avoid 3 mem lookups: task->mm->pgd */
#ifdef CONFIG_ISA_ARCV2
write_aux_reg(ARC_REG_SCRATCH_DATA0, (unsigned int)pgd);
#endif
}
#else
.macro ARC_MMU_REENABLE reg
lr \reg, [ARC_REG_PID]
or \reg, \reg, MMU_ENABLE
sr \reg, [ARC_REG_PID]
.endm
#endif /* !__ASSEMBLY__ */
#endif
......@@ -7,98 +7,15 @@
#define _ASM_ARC_MMU_H
#ifndef __ASSEMBLY__
#include <linux/threads.h> /* NR_CPUS */
#endif
#if defined(CONFIG_ARC_MMU_V1)
#define CONFIG_ARC_MMU_VER 1
#elif defined(CONFIG_ARC_MMU_V2)
#define CONFIG_ARC_MMU_VER 2
#elif defined(CONFIG_ARC_MMU_V3)
#define CONFIG_ARC_MMU_VER 3
#elif defined(CONFIG_ARC_MMU_V4)
#define CONFIG_ARC_MMU_VER 4
#endif
/* MMU Management regs */
#define ARC_REG_MMU_BCR 0x06f
#if (CONFIG_ARC_MMU_VER < 4)
#define ARC_REG_TLBPD0 0x405
#define ARC_REG_TLBPD1 0x406
#define ARC_REG_TLBPD1HI 0 /* Dummy: allows code sharing with ARC700 */
#define ARC_REG_TLBINDEX 0x407
#define ARC_REG_TLBCOMMAND 0x408
#define ARC_REG_PID 0x409
#define ARC_REG_SCRATCH_DATA0 0x418
#else
#define ARC_REG_TLBPD0 0x460
#define ARC_REG_TLBPD1 0x461
#define ARC_REG_TLBPD1HI 0x463
#define ARC_REG_TLBINDEX 0x464
#define ARC_REG_TLBCOMMAND 0x465
#define ARC_REG_PID 0x468
#define ARC_REG_SCRATCH_DATA0 0x46c
#endif
#if defined(CONFIG_ISA_ARCV2) || !defined(CONFIG_SMP)
#define ARC_USE_SCRATCH_REG
#endif
/* Bits in MMU PID register */
#define __TLB_ENABLE (1 << 31)
#define __PROG_ENABLE (1 << 30)
#define MMU_ENABLE (__TLB_ENABLE | __PROG_ENABLE)
/* Error code if probe fails */
#define TLB_LKUP_ERR 0x80000000
#if (CONFIG_ARC_MMU_VER < 4)
#define TLB_DUP_ERR (TLB_LKUP_ERR | 0x00000001)
#else
#define TLB_DUP_ERR (TLB_LKUP_ERR | 0x40000000)
#endif
/* TLB Commands */
#define TLBWrite 0x1
#define TLBRead 0x2
#define TLBGetIndex 0x3
#define TLBProbe 0x4
#if (CONFIG_ARC_MMU_VER >= 2)
#define TLBWriteNI 0x5 /* write JTLB without inv uTLBs */
#define TLBIVUTLB 0x6 /* explicitly inv uTLBs */
#else
#define TLBWriteNI TLBWrite /* Not present in hardware, fallback */
#endif
#if (CONFIG_ARC_MMU_VER >= 4)
#define TLBInsertEntry 0x7
#define TLBDeleteEntry 0x8
#endif
#ifndef __ASSEMBLY__
#include <linux/threads.h> /* NR_CPUS */
typedef struct {
unsigned long asid[NR_CPUS]; /* 8 bit MMU PID + Generation cycle */
} mm_context_t;
#ifdef CONFIG_ARC_DBG_TLB_PARANOIA
void tlb_paranoid_check(unsigned int mm_asid, unsigned long address);
#else
#define tlb_paranoid_check(a, b)
#endif
void arc_mmu_init(void);
extern char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len);
void read_decode_mmu_bcr(void);
static inline int is_pae40_enabled(void)
{
return IS_ENABLED(CONFIG_ARC_HAS_PAE40);
}
extern int pae40_exist_but_not_enab(void);
#endif /* !__ASSEMBLY__ */
#include <asm/mmu-arcv2.h>
#endif
......@@ -15,22 +15,23 @@
#ifndef _ASM_ARC_MMU_CONTEXT_H
#define _ASM_ARC_MMU_CONTEXT_H
#include <asm/arcregs.h>
#include <asm/tlb.h>
#include <linux/sched/mm.h>
#include <asm/tlb.h>
#include <asm-generic/mm_hooks.h>
/* ARC700 ASID Management
/* ARC ASID Management
*
* MMU tags TLBs with an 8-bit ASID, avoiding need to flush the TLB on
* context-switch.
*
* ARC MMU provides 8-bit ASID (0..255) to TAG TLB entries, allowing entries
* with same vaddr (different tasks) to co-exit. This provides for
* "Fast Context Switch" i.e. no TLB flush on ctxt-switch
* ASID is managed per cpu, so task threads across CPUs can have different
* ASID. Global ASID management is needed if hardware supports TLB shootdown
* and/or shared TLB across cores, which ARC doesn't.
*
* Linux assigns each task a unique ASID. A simple round-robin allocation
* of H/w ASID is done using software tracker @asid_cpu.
* When it reaches max 255, the allocation cycle starts afresh by flushing
* the entire TLB and wrapping ASID back to zero.
* Each task is assigned unique ASID, with a simple round-robin allocator
* tracked in @asid_cpu. When 8-bit value rolls over,a new cycle is started
* over from 0, and TLB is flushed
*
* A new allocation cycle, post rollover, could potentially reassign an ASID
* to a different task. Thus the rule is to refresh the ASID in a new cycle.
......@@ -93,7 +94,7 @@ static inline void get_new_mmu_context(struct mm_struct *mm)
asid_mm(mm, cpu) = asid_cpu(cpu);
set_hw:
write_aux_reg(ARC_REG_PID, hw_pid(mm, cpu) | MMU_ENABLE);
mmu_setup_asid(mm, hw_pid(mm, cpu));
local_irq_restore(flags);
}
......@@ -146,10 +147,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
*/
cpumask_set_cpu(cpu, mm_cpumask(next));
#ifdef ARC_USE_SCRATCH_REG
/* PGD cached in MMU reg to avoid 3 mem lookups: task->mm->pgd */
write_aux_reg(ARC_REG_SCRATCH_DATA0, next->pgd);
#endif
mmu_setup_pgd(next, next->pgd);
get_new_mmu_context(next);
}
......
......@@ -34,12 +34,35 @@ void copy_user_highpage(struct page *to, struct page *from,
unsigned long u_vaddr, struct vm_area_struct *vma);
void clear_user_page(void *to, unsigned long u_vaddr, struct page *page);
#undef STRICT_MM_TYPECHECKS
typedef struct {
unsigned long pgd;
} pgd_t;
#define pgd_val(x) ((x).pgd)
#define __pgd(x) ((pgd_t) { (x) })
#if CONFIG_PGTABLE_LEVELS > 3
typedef struct {
unsigned long pud;
} pud_t;
#define pud_val(x) ((x).pud)
#define __pud(x) ((pud_t) { (x) })
#endif
#if CONFIG_PGTABLE_LEVELS > 2
typedef struct {
unsigned long pmd;
} pmd_t;
#define pmd_val(x) ((x).pmd)
#define __pmd(x) ((pmd_t) { (x) })
#endif
#ifdef STRICT_MM_TYPECHECKS
/*
* These are used to make use of C type-checking..
*/
typedef struct {
#ifdef CONFIG_ARC_HAS_PAE40
unsigned long long pte;
......@@ -47,44 +70,19 @@ typedef struct {
unsigned long pte;
#endif
} pte_t;
typedef struct {
unsigned long pgd;
} pgd_t;
#define pte_val(x) ((x).pte)
#define __pte(x) ((pte_t) { (x) })
typedef struct {
unsigned long pgprot;
} pgprot_t;
#define pte_val(x) ((x).pte)
#define pgd_val(x) ((x).pgd)
#define pgprot_val(x) ((x).pgprot)
#define __pte(x) ((pte_t) { (x) })
#define __pgd(x) ((pgd_t) { (x) })
#define __pgprot(x) ((pgprot_t) { (x) })
#define pte_pgprot(x) __pgprot(pte_val(x))
#else /* !STRICT_MM_TYPECHECKS */
#ifdef CONFIG_ARC_HAS_PAE40
typedef unsigned long long pte_t;
#else
typedef unsigned long pte_t;
#endif
typedef unsigned long pgd_t;
typedef unsigned long pgprot_t;
#define pte_val(x) (x)
#define pgd_val(x) (x)
#define pgprot_val(x) (x)
#define __pte(x) (x)
#define __pgd(x) (x)
#define __pgprot(x) (x)
#define pte_pgprot(x) (x)
#endif
typedef pte_t * pgtable_t;
typedef struct page *pgtable_t;
/*
* Use virt_to_pfn with caution:
......
......@@ -31,30 +31,32 @@
#include <linux/mm.h>
#include <linux/log2.h>
#include <asm-generic/pgalloc.h>
static inline void
pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
{
pmd_set(pmd, pte);
}
static inline void
pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t ptep)
{
pmd_set(pmd, (pte_t *) ptep);
/*
* The cast to long below is OK in 32-bit PAE40 regime with long long pte
* Despite "wider" pte, the pte table needs to be in non-PAE low memory
* as all higher levels can only hold long pointers.
*
* The cast itself is needed given simplistic definition of set_pmd()
*/
set_pmd(pmd, __pmd((unsigned long)pte));
}
static inline int __get_order_pgd(void)
static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t pte_page)
{
return get_order(PTRS_PER_PGD * sizeof(pgd_t));
set_pmd(pmd, __pmd((unsigned long)page_address(pte_page)));
}
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{
int num, num2;
pgd_t *ret = (pgd_t *) __get_free_pages(GFP_KERNEL, __get_order_pgd());
pgd_t *ret = (pgd_t *) __get_free_page(GFP_KERNEL);
if (ret) {
int num, num2;
num = USER_PTRS_PER_PGD + USER_KERNEL_GUTTER / PGDIR_SIZE;
memzero(ret, num * sizeof(pgd_t));
......@@ -68,64 +70,27 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
return ret;
}
static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
{
free_pages((unsigned long)pgd, __get_order_pgd());
}
#if CONFIG_PGTABLE_LEVELS > 3
/*
* With software-only page-tables, addr-split for traversal is tweakable and
* that directly governs how big tables would be at each level.
* Further, the MMU page size is configurable.
* Thus we need to programatically assert the size constraint
* All of this is const math, allowing gcc to do constant folding/propagation.
*/
static inline int __get_order_pte(void)
static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4dp, pud_t *pudp)
{
return get_order(PTRS_PER_PTE * sizeof(pte_t));
set_p4d(p4dp, __p4d((unsigned long)pudp));
}
static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
{
pte_t *pte;
#define __pud_free_tlb(tlb, pmd, addr) pud_free((tlb)->mm, pmd)
pte = (pte_t *) __get_free_pages(GFP_KERNEL | __GFP_ZERO,
__get_order_pte());
#endif
return pte;
}
#if CONFIG_PGTABLE_LEVELS > 2
static inline pgtable_t
pte_alloc_one(struct mm_struct *mm)
static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmdp)
{
pgtable_t pte_pg;
struct page *page;
pte_pg = (pgtable_t)__get_free_pages(GFP_KERNEL, __get_order_pte());
if (!pte_pg)
return 0;
memzero((void *)pte_pg, PTRS_PER_PTE * sizeof(pte_t));
page = virt_to_page(pte_pg);
if (!pgtable_pte_page_ctor(page)) {
__free_page(page);
return 0;
}
return pte_pg;
set_pud(pudp, __pud((unsigned long)pmdp));
}
static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
{
free_pages((unsigned long)pte, __get_order_pte()); /* takes phy addr */
}
#define __pmd_free_tlb(tlb, pmd, addr) pmd_free((tlb)->mm, pmd)
static inline void pte_free(struct mm_struct *mm, pgtable_t ptep)
{
pgtable_pte_page_dtor(virt_to_page(ptep));
free_pages((unsigned long)ptep, __get_order_pte());
}
#endif
#define __pte_free_tlb(tlb, pte, addr) pte_free((tlb)->mm, pte)
......
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
*/
/*
* page table flags for software walked/managed MMUv3 (ARC700) and MMUv4 (HS)
* There correspond to the corresponding bits in the TLB
*/
#ifndef _ASM_ARC_PGTABLE_BITS_ARCV2_H
#define _ASM_ARC_PGTABLE_BITS_ARCV2_H
#ifdef CONFIG_ARC_CACHE_PAGES
#define _PAGE_CACHEABLE (1 << 0) /* Cached (H) */
#else
#define _PAGE_CACHEABLE 0
#endif
#define _PAGE_EXECUTE (1 << 1) /* User Execute (H) */
#define _PAGE_WRITE (1 << 2) /* User Write (H) */
#define _PAGE_READ (1 << 3) /* User Read (H) */
#define _PAGE_ACCESSED (1 << 4) /* Accessed (s) */
#define _PAGE_DIRTY (1 << 5) /* Modified (s) */
#define _PAGE_SPECIAL (1 << 6)
#define _PAGE_GLOBAL (1 << 8) /* ASID agnostic (H) */
#define _PAGE_PRESENT (1 << 9) /* PTE/TLB Valid (H) */
#ifdef CONFIG_ARC_MMU_V4
#define _PAGE_HW_SZ (1 << 10) /* Normal/super (H) */
#else
#define _PAGE_HW_SZ 0
#endif
/* Defaults for every user page */
#define ___DEF (_PAGE_PRESENT | _PAGE_CACHEABLE)
/* Set of bits not changed in pte_modify */
#define _PAGE_CHG_MASK (PAGE_MASK_PHYS | _PAGE_ACCESSED | _PAGE_DIRTY | \
_PAGE_SPECIAL)
/* More Abbrevaited helpers */
#define PAGE_U_NONE __pgprot(___DEF)
#define PAGE_U_R __pgprot(___DEF | _PAGE_READ)
#define PAGE_U_W_R __pgprot(___DEF | _PAGE_READ | _PAGE_WRITE)
#define PAGE_U_X_R __pgprot(___DEF | _PAGE_READ | _PAGE_EXECUTE)
#define PAGE_U_X_W_R __pgprot(___DEF \
| _PAGE_READ | _PAGE_WRITE | _PAGE_EXECUTE)
#define PAGE_KERNEL __pgprot(___DEF | _PAGE_GLOBAL \
| _PAGE_READ | _PAGE_WRITE | _PAGE_EXECUTE)
#define PAGE_SHARED PAGE_U_W_R
#define pgprot_noncached(prot) (__pgprot(pgprot_val(prot) & ~_PAGE_CACHEABLE))
/*
* Mapping of vm_flags (Generic VM) to PTE flags (arch specific)
*
* Certain cases have 1:1 mapping
* e.g. __P101 means VM_READ, VM_EXEC and !VM_SHARED
* which directly corresponds to PAGE_U_X_R
*
* Other rules which cause the divergence from 1:1 mapping
*
* 1. Although ARC700 can do exclusive execute/write protection (meaning R
* can be tracked independet of X/W unlike some other CPUs), still to
* keep things consistent with other archs:
* -Write implies Read: W => R
* -Execute implies Read: X => R
*
* 2. Pvt Writable doesn't have Write Enabled initially: Pvt-W => !W
* This is to enable COW mechanism
*/
/* xwr */
#define __P000 PAGE_U_NONE
#define __P001 PAGE_U_R
#define __P010 PAGE_U_R /* Pvt-W => !W */
#define __P011 PAGE_U_R /* Pvt-W => !W */
#define __P100 PAGE_U_X_R /* X => R */
#define __P101 PAGE_U_X_R
#define __P110 PAGE_U_X_R /* Pvt-W => !W and X => R */
#define __P111 PAGE_U_X_R /* Pvt-W => !W */
#define __S000 PAGE_U_NONE
#define __S001 PAGE_U_R
#define __S010 PAGE_U_W_R /* W => R */
#define __S011 PAGE_U_W_R
#define __S100 PAGE_U_X_R /* X => R */
#define __S101 PAGE_U_X_R
#define __S110 PAGE_U_X_W_R /* X => R */
#define __S111 PAGE_U_X_W_R
#ifndef __ASSEMBLY__
#define pte_write(pte) (pte_val(pte) & _PAGE_WRITE)
#define pte_dirty(pte) (pte_val(pte) & _PAGE_DIRTY)
#define pte_young(pte) (pte_val(pte) & _PAGE_ACCESSED)
#define pte_special(pte) (pte_val(pte) & _PAGE_SPECIAL)
#define PTE_BIT_FUNC(fn, op) \
static inline pte_t pte_##fn(pte_t pte) { pte_val(pte) op; return pte; }
PTE_BIT_FUNC(mknotpresent, &= ~(_PAGE_PRESENT));
PTE_BIT_FUNC(wrprotect, &= ~(_PAGE_WRITE));
PTE_BIT_FUNC(mkwrite, |= (_PAGE_WRITE));
PTE_BIT_FUNC(mkclean, &= ~(_PAGE_DIRTY));
PTE_BIT_FUNC(mkdirty, |= (_PAGE_DIRTY));
PTE_BIT_FUNC(mkold, &= ~(_PAGE_ACCESSED));
PTE_BIT_FUNC(mkyoung, |= (_PAGE_ACCESSED));
PTE_BIT_FUNC(mkspecial, |= (_PAGE_SPECIAL));
PTE_BIT_FUNC(mkhuge, |= (_PAGE_HW_SZ));
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
{
return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot));
}
static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pteval)
{
set_pte(ptep, pteval);
}
void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
pte_t *ptep);
/* Encode swap {type,off} tuple into PTE
* We reserve 13 bits for 5-bit @type, keeping bits 12-5 zero, ensuring that
* PAGE_PRESENT is zero in a PTE holding swap "identifier"
*/
#define __swp_entry(type, off) ((swp_entry_t) \
{ ((type) & 0x1f) | ((off) << 13) })
/* Decode a PTE containing swap "identifier "into constituents */
#define __swp_type(pte_lookalike) (((pte_lookalike).val) & 0x1f)
#define __swp_offset(pte_lookalike) ((pte_lookalike).val >> 13)
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
#define kern_addr_valid(addr) (1)
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#include <asm/hugepage.h>
#endif
#endif /* __ASSEMBLY__ */
#endif
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2020 Synopsys, Inc. (www.synopsys.com)
*/
/*
* Helpers for implemenintg paging levels
*/
#ifndef _ASM_ARC_PGTABLE_LEVELS_H
#define _ASM_ARC_PGTABLE_LEVELS_H
#if CONFIG_PGTABLE_LEVELS == 2
/*
* 2 level paging setup for software walked MMUv3 (ARC700) and MMUv4 (HS)
*
* [31] 32 bit virtual address [0]
* -------------------------------------------------------
* | | <---------- PGDIR_SHIFT ----------> |
* | | | <-- PAGE_SHIFT --> |
* -------------------------------------------------------
* | | |
* | | --> off in page frame
* | ---> index into Page Table
* ----> index into Page Directory
*
* Given software walk, the vaddr split is arbitrary set to 11:8:13
* However enabling of super page in a 2 level regime pegs PGDIR_SHIFT to
* super page size.
*/
#if defined(CONFIG_ARC_HUGEPAGE_16M)
#define PGDIR_SHIFT 24
#elif defined(CONFIG_ARC_HUGEPAGE_2M)
#define PGDIR_SHIFT 21
#else
/*
* No Super page case
* Default value provides 11:8:13 (8K), 10:10:12 (4K)
* Limits imposed by pgtable_t only PAGE_SIZE long
* (so 4K page can only have 1K entries: or 10 bits)
*/
#ifdef CONFIG_ARC_PAGE_SIZE_4K
#define PGDIR_SHIFT 22
#else
#define PGDIR_SHIFT 21
#endif
#endif
#else /* CONFIG_PGTABLE_LEVELS != 2 */
/*
* A default 3 level paging testing setup in software walked MMU
* MMUv4 (8K page): <4> : <7> : <8> : <13>
* A default 4 level paging testing setup in software walked MMU
* MMUv4 (8K page): <4> : <3> : <4> : <8> : <13>
*/
#define PGDIR_SHIFT 28
#if CONFIG_PGTABLE_LEVELS > 3
#define PUD_SHIFT 25
#endif
#if CONFIG_PGTABLE_LEVELS > 2
#define PMD_SHIFT 21
#endif
#endif /* CONFIG_PGTABLE_LEVELS */
#define PGDIR_SIZE BIT(PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE - 1))
#define PTRS_PER_PGD BIT(32 - PGDIR_SHIFT)
#if CONFIG_PGTABLE_LEVELS > 3
#define PUD_SIZE BIT(PUD_SHIFT)
#define PUD_MASK (~(PUD_SIZE - 1))
#define PTRS_PER_PUD BIT(PGDIR_SHIFT - PUD_SHIFT)
#endif
#if CONFIG_PGTABLE_LEVELS > 2
#define PMD_SIZE BIT(PMD_SHIFT)
#define PMD_MASK (~(PMD_SIZE - 1))
#define PTRS_PER_PMD BIT(PUD_SHIFT - PMD_SHIFT)
#endif
#define PTRS_PER_PTE BIT(PMD_SHIFT - PAGE_SHIFT)
#ifndef __ASSEMBLY__
#if CONFIG_PGTABLE_LEVELS > 3
#include <asm-generic/pgtable-nop4d.h>
#elif CONFIG_PGTABLE_LEVELS > 2
#include <asm-generic/pgtable-nopud.h>
#else
#include <asm-generic/pgtable-nopmd.h>
#endif
/*
* 1st level paging: pgd
*/
#define pgd_index(addr) ((addr) >> PGDIR_SHIFT)
#define pgd_offset(mm, addr) (((mm)->pgd) + pgd_index(addr))
#define pgd_offset_k(addr) pgd_offset(&init_mm, addr)
#define pgd_ERROR(e) \
pr_crit("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
#if CONFIG_PGTABLE_LEVELS > 3
/* In 4 level paging, p4d_* macros work on pgd */
#define p4d_none(x) (!p4d_val(x))
#define p4d_bad(x) ((p4d_val(x) & ~PAGE_MASK))
#define p4d_present(x) (p4d_val(x))
#define p4d_clear(xp) do { p4d_val(*(xp)) = 0; } while (0)
#define p4d_pgtable(p4d) ((pud_t *)(p4d_val(p4d) & PAGE_MASK))
#define p4d_page(p4d) virt_to_page(p4d_pgtable(p4d))
#define set_p4d(p4dp, p4d) (*(p4dp) = p4d)
/*
* 2nd level paging: pud
*/
#define pud_ERROR(e) \
pr_crit("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, pud_val(e))
#endif
#if CONFIG_PGTABLE_LEVELS > 2
/*
* In 3 level paging, pud_* macros work on pgd
* In 4 level paging, pud_* macros work on pud
*/
#define pud_none(x) (!pud_val(x))
#define pud_bad(x) ((pud_val(x) & ~PAGE_MASK))
#define pud_present(x) (pud_val(x))
#define pud_clear(xp) do { pud_val(*(xp)) = 0; } while (0)
#define pud_pgtable(pud) ((pmd_t *)(pud_val(pud) & PAGE_MASK))
#define pud_page(pud) virt_to_page(pud_pgtable(pud))
#define set_pud(pudp, pud) (*(pudp) = pud)
/*
* 3rd level paging: pmd
*/
#define pmd_ERROR(e) \
pr_crit("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e))
#define pmd_pfn(pmd) ((pmd_val(pmd) & PMD_MASK) >> PAGE_SHIFT)
#define pfn_pmd(pfn,prot) __pmd(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
#define mk_pmd(page,prot) pfn_pmd(page_to_pfn(page),prot)
#endif
/*
* Due to the strange way generic pgtable level folding works, the pmd_* macros
* - are valid even for 2 levels (which supposedly only has pgd - pte)
* - behave differently for 2 vs. 3
* In 2 level paging (pgd -> pte), pmd_* macros work on pgd
* In 3+ level paging (pgd -> pmd -> pte), pmd_* macros work on pmd
*/
#define pmd_none(x) (!pmd_val(x))
#define pmd_bad(x) ((pmd_val(x) & ~PAGE_MASK))
#define pmd_present(x) (pmd_val(x))
#define pmd_clear(xp) do { pmd_val(*(xp)) = 0; } while (0)
#define pmd_page_vaddr(pmd) (pmd_val(pmd) & PAGE_MASK)
#define pmd_page(pmd) virt_to_page(pmd_page_vaddr(pmd))
#define set_pmd(pmdp, pmd) (*(pmdp) = pmd)
#define pmd_pgtable(pmd) ((pgtable_t) pmd_page_vaddr(pmd))
/*
* 4th level paging: pte
*/
#define pte_ERROR(e) \
pr_crit("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
#define pte_none(x) (!pte_val(x))
#define pte_present(x) (pte_val(x) & _PAGE_PRESENT)
#define pte_clear(mm,addr,ptep) set_pte_at(mm, addr, ptep, __pte(0))
#define pte_page(pte) pfn_to_page(pte_pfn(pte))
#define set_pte(ptep, pte) ((*(ptep)) = (pte))
#define pte_pfn(pte) (pte_val(pte) >> PAGE_SHIFT)
#define pfn_pte(pfn, prot) __pte(__pfn_to_phys(pfn) | pgprot_val(prot))
#define mk_pte(page, prot) pfn_pte(page_to_pfn(page), prot)
#ifdef CONFIG_ISA_ARCV2
#define pmd_leaf(x) (pmd_val(x) & _PAGE_HW_SZ)
#endif
#endif /* !__ASSEMBLY__ */
#endif
This diff is collapsed.
......@@ -93,7 +93,7 @@ extern unsigned int get_wchan(struct task_struct *p);
#define VMALLOC_START (PAGE_OFFSET - (CONFIG_ARC_KVADDR_SIZE << 20))
/* 1 PGDIR_SIZE each for fixmap/pkmap, 2 PGDIR_SIZE gutter (see asm/highmem.h) */
#define VMALLOC_SIZE ((CONFIG_ARC_KVADDR_SIZE << 20) - PGDIR_SIZE * 4)
#define VMALLOC_SIZE ((CONFIG_ARC_KVADDR_SIZE << 20) - PMD_SIZE * 4)
#define VMALLOC_END (VMALLOC_START + VMALLOC_SIZE)
......
......@@ -2,8 +2,8 @@
/*
* Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
*/
#ifndef __ASMARC_SETUP_H
#define __ASMARC_SETUP_H
#ifndef __ASM_ARC_SETUP_H
#define __ASM_ARC_SETUP_H
#include <linux/types.h>
......@@ -34,4 +34,12 @@ long __init arc_get_mem_sz(void);
#define IS_AVAIL2(v, s, cfg) IS_AVAIL1(v, s), IS_AVAIL1(v, IS_USED_CFG(cfg))
#define IS_AVAIL3(v, v2, s) IS_AVAIL1(v, s), IS_AVAIL1(v, IS_DISABLED_RUN(v2))
extern void arc_mmu_init(void);
extern char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len);
extern void read_decode_mmu_bcr(void);
extern void arc_cache_init(void);
extern char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len);
extern void read_decode_cache_bcr(void);
#endif /* __ASMARC_SETUP_H */
......@@ -105,7 +105,6 @@ static inline const char *arc_platform_smp_cpuinfo(void)
#include <asm/spinlock.h>
extern arch_spinlock_t smp_atomic_ops_lock;
extern arch_spinlock_t smp_bitops_lock;
#define atomic_ops_lock(flags) do { \
local_irq_save(flags); \
......@@ -117,24 +116,11 @@ extern arch_spinlock_t smp_bitops_lock;
local_irq_restore(flags); \
} while (0)
#define bitops_lock(flags) do { \
local_irq_save(flags); \
arch_spin_lock(&smp_bitops_lock); \
} while (0)
#define bitops_unlock(flags) do { \
arch_spin_unlock(&smp_bitops_lock); \
local_irq_restore(flags); \
} while (0)
#else /* !CONFIG_SMP */
#define atomic_ops_lock(flags) local_irq_save(flags)
#define atomic_ops_unlock(flags) local_irq_restore(flags)
#define bitops_lock(flags) local_irq_save(flags)
#define bitops_unlock(flags) local_irq_restore(flags)
#endif /* !CONFIG_SMP */
#endif /* !CONFIG_ARC_HAS_LLSC */
......
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
*/
#ifndef __ASM_TLB_MMU_V1_H__
#define __ASM_TLB_MMU_V1_H__
#include <asm/mmu.h>
#if defined(__ASSEMBLY__) && (CONFIG_ARC_MMU_VER == 1)
.macro TLB_WRITE_HEURISTICS
#define JH_HACK1
#undef JH_HACK2
#undef JH_HACK3
#ifdef JH_HACK3
; Calculate set index for 2-way MMU
; -avoiding use of GetIndex from MMU
; and its unpleasant LFSR pseudo-random sequence
;
; r1 = TLBPD0 from TLB_RELOAD above
;
; -- jh_ex_way_set not cleared on startup
; didn't want to change setup.c
; hence extra instruction to clean
;
; -- should be in cache since in same line
; as r0/r1 saves above
;
ld r0,[jh_ex_way_sel] ; victim pointer
and r0,r0,1 ; clean
xor.f r0,r0,1 ; flip
st r0,[jh_ex_way_sel] ; store back
asr r0,r1,12 ; get set # <<1, note bit 12=R=0
or.nz r0,r0,1 ; set way bit
and r0,r0,0xff ; clean
sr r0,[ARC_REG_TLBINDEX]
#endif
#ifdef JH_HACK2
; JH hack #2
; Faster than hack #1 in non-thrash case, but hard-coded for 2-way MMU
; Slower in thrash case (where it matters) because more code is executed
; Inefficient due to two-register paradigm of this miss handler
;
/* r1 = data TLBPD0 at this point */
lr r0,[eret] /* instruction address */
xor r0,r0,r1 /* compare set # */
and.f r0,r0,0x000fe000 /* 2-way MMU mask */
bne 88f /* not in same set - no need to probe */
lr r0,[eret] /* instruction address */
and r0,r0,PAGE_MASK /* VPN of instruction address */
; lr r1,[ARC_REG_TLBPD0] /* Data VPN+ASID - already in r1 from TLB_RELOAD*/
and r1,r1,0xff /* Data ASID */
or r0,r0,r1 /* Instruction address + Data ASID */
lr r1,[ARC_REG_TLBPD0] /* save TLBPD0 containing data TLB*/
sr r0,[ARC_REG_TLBPD0] /* write instruction address to TLBPD0 */
sr TLBProbe, [ARC_REG_TLBCOMMAND] /* Look for instruction */
lr r0,[ARC_REG_TLBINDEX] /* r0 = index where instruction is, if at all */
sr r1,[ARC_REG_TLBPD0] /* restore TLBPD0 */
xor r0,r0,1 /* flip bottom bit of data index */
b.d 89f
sr r0,[ARC_REG_TLBINDEX] /* and put it back */
88:
sr TLBGetIndex, [ARC_REG_TLBCOMMAND]
89:
#endif
#ifdef JH_HACK1
;
; Always checks whether instruction will be kicked out by dtlb miss
;
mov_s r3, r1 ; save PD0 prepared by TLB_RELOAD in r3
lr r0,[eret] /* instruction address */
and r0,r0,PAGE_MASK /* VPN of instruction address */
bmsk r1,r3,7 /* Data ASID, bits 7-0 */
or_s r0,r0,r1 /* Instruction address + Data ASID */
sr r0,[ARC_REG_TLBPD0] /* write instruction address to TLBPD0 */
sr TLBProbe, [ARC_REG_TLBCOMMAND] /* Look for instruction */
lr r0,[ARC_REG_TLBINDEX] /* r0 = index where instruction is, if at all */
sr r3,[ARC_REG_TLBPD0] /* restore TLBPD0 */
sr TLBGetIndex, [ARC_REG_TLBCOMMAND]
lr r1,[ARC_REG_TLBINDEX] /* r1 = index where MMU wants to put data */
cmp r0,r1 /* if no match on indices, go around */
xor.eq r1,r1,1 /* flip bottom bit of data index */
sr r1,[ARC_REG_TLBINDEX] /* and put it back */
#endif
.endm
#endif
#endif
......@@ -10,6 +10,7 @@
#include <asm/errno.h>
#include <asm/arcregs.h>
#include <asm/irqflags.h>
#include <asm/mmu.h>
; A maximum number of supported interrupts in the core interrupt controller.
; This number is not equal to the maximum interrupt number (256) because
......
......@@ -101,11 +101,8 @@ ENTRY(EV_MachineCheck)
lr r0, [efa]
mov r1, sp
; hardware auto-disables MMU, re-enable it to allow kernel vaddr
; access for say stack unwinding of modules for crash dumps
lr r3, [ARC_REG_PID]
or r3, r3, MMU_ENABLE
sr r3, [ARC_REG_PID]
; MC excpetions disable MMU
ARC_MMU_REENABLE r3
lsr r3, r2, 8
bmsk r3, r3, 7
......
......@@ -142,7 +142,7 @@ IRQCHIP_DECLARE(arc_intc, "snps,arc700-intc", init_onchip_IRQ);
* Time hard-ISR, timer_interrupt( ) calls spin_unlock_irq several times.
* Here local_irq_enable( ) shd not re-enable lower priority interrupts
* -If called from soft-ISR, it must re-enable all interrupts
* soft ISR are low prioity jobs which can be very slow, thus all IRQs
* soft ISR are low priority jobs which can be very slow, thus all IRQs
* must be enabled while they run.
* Now hardware context wise we may still be in L2 ISR (not done rtie)
* still we must re-enable both L1 and L2 IRQs
......
......@@ -29,10 +29,8 @@
#ifndef CONFIG_ARC_HAS_LLSC
arch_spinlock_t smp_atomic_ops_lock = __ARCH_SPIN_LOCK_UNLOCKED;
arch_spinlock_t smp_bitops_lock = __ARCH_SPIN_LOCK_UNLOCKED;
EXPORT_SYMBOL_GPL(smp_atomic_ops_lock);
EXPORT_SYMBOL_GPL(smp_bitops_lock);
#endif
struct plat_smp_ops __weak plat_smp_ops;
......@@ -283,7 +281,7 @@ static void ipi_send_msg_one(int cpu, enum ipi_msg_type msg)
/*
* Call the platform specific IPI kick function, but avoid if possible:
* Only do so if there's no pending msg from other concurrent sender(s).
* Otherwise, recevier will see this msg as well when it takes the
* Otherwise, receiver will see this msg as well when it takes the
* IPI corresponding to that msg. This is true, even if it is already in
* IPI handler, because !@old means it has not yet dequeued the msg(s)
* so @new msg can be a free-loader
......
......@@ -149,7 +149,7 @@ arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs,
#else
/* On ARC, only Dward based unwinder works. fp based backtracing is
* not possible (-fno-omit-frame-pointer) because of the way function
* prelogue is setup (callee regs saved and then fp set and not other
* prologue is setup (callee regs saved and then fp set and not other
* way around
*/
pr_warn_once("CONFIG_ARC_DW2_UNWIND needs to be enabled\n");
......
......@@ -205,92 +205,23 @@ void read_decode_cache_bcr(void)
#define OP_INV_IC 0x4
/*
* I-Cache Aliasing in ARC700 VIPT caches (MMU v1-v3)
* Cache Flush programming model
*
* ARC VIPT I-cache uses vaddr to index into cache and paddr to match the tag.
* The orig Cache Management Module "CDU" only required paddr to invalidate a
* certain line since it sufficed as index in Non-Aliasing VIPT cache-geometry.
* Infact for distinct V1,V2,P: all of {V1-P},{V2-P},{P-P} would end up fetching
* the exact same line.
* ARC700 MMUv3 I$ and D$ are both VIPT and can potentially alias.
* Programming model requires both paddr and vaddr irrespecive of aliasing
* considerations:
* - vaddr in {I,D}C_IV?L
* - paddr in {I,D}C_PTAG
*
* However for larger Caches (way-size > page-size) - i.e. in Aliasing config,
* paddr alone could not be used to correctly index the cache.
* In HS38x (MMUv4), D$ is PIPT, I$ is VIPT and can still alias.
* Programming model is different for aliasing vs. non-aliasing I$
* - D$ / Non-aliasing I$: only paddr in {I,D}C_IV?L
* - Aliasing I$: same as ARC700 above (so MMUv3 routine used for MMUv4 I$)
*
* ------------------
* MMU v1/v2 (Fixed Page Size 8k)
* ------------------
* The solution was to provide CDU with these additonal vaddr bits. These
* would be bits [x:13], x would depend on cache-geometry, 13 comes from
* standard page size of 8k.
* H/w folks chose [17:13] to be a future safe range, and moreso these 5 bits
* of vaddr could easily be "stuffed" in the paddr as bits [4:0] since the
* orig 5 bits of paddr were anyways ignored by CDU line ops, as they
* represent the offset within cache-line. The adv of using this "clumsy"
* interface for additional info was no new reg was needed in CDU programming
* model.
*
* 17:13 represented the max num of bits passable, actual bits needed were
* fewer, based on the num-of-aliases possible.
* -for 2 alias possibility, only bit 13 needed (32K cache)
* -for 4 alias possibility, bits 14:13 needed (64K cache)
*
* ------------------
* MMU v3
* ------------------
* This ver of MMU supports variable page sizes (1k-16k): although Linux will
* only support 8k (default), 16k and 4k.
* However from hardware perspective, smaller page sizes aggravate aliasing
* meaning more vaddr bits needed to disambiguate the cache-line-op ;
* the existing scheme of piggybacking won't work for certain configurations.
* Two new registers IC_PTAG and DC_PTAG inttoduced.
* "tag" bits are provided in PTAG, index bits in existing IVIL/IVDL/FLDL regs
*/
static inline
void __cache_line_loop_v2(phys_addr_t paddr, unsigned long vaddr,
unsigned long sz, const int op, const int full_page)
{
unsigned int aux_cmd;
int num_lines;
if (op == OP_INV_IC) {
aux_cmd = ARC_REG_IC_IVIL;
} else {
/* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */
aux_cmd = op & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL;
}
/* Ensure we properly floor/ceil the non-line aligned/sized requests
* and have @paddr - aligned to cache line and integral @num_lines.
* This however can be avoided for page sized since:
* -@paddr will be cache-line aligned already (being page aligned)
* -@sz will be integral multiple of line size (being page sized).
* - If PAE40 is enabled, independent of aliasing considerations, the higher
* bits needs to be written into PTAG_HI
*/
if (!full_page) {
sz += paddr & ~CACHE_LINE_MASK;
paddr &= CACHE_LINE_MASK;
vaddr &= CACHE_LINE_MASK;
}
num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES);
/* MMUv2 and before: paddr contains stuffed vaddrs bits */
paddr |= (vaddr >> PAGE_SHIFT) & 0x1F;
while (num_lines-- > 0) {
write_aux_reg(aux_cmd, paddr);
paddr += L1_CACHE_BYTES;
}
}
/*
* For ARC700 MMUv3 I-cache and D-cache flushes
* - ARC700 programming model requires paddr and vaddr be passed in seperate
* AUX registers (*_IV*L and *_PTAG respectively) irrespective of whether the
* caches actually alias or not.
* - For HS38, only the aliasing I-cache configuration uses the PTAG reg
* (non aliasing I-cache version doesn't; while D-cache can't possibly alias)
*/
static inline
void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr,
unsigned long sz, const int op, const int full_page)
......@@ -350,17 +281,6 @@ void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr,
#ifndef USE_RGN_FLSH
/*
* In HS38x (MMU v4), I-cache is VIPT (can alias), D-cache is PIPT
* Here's how cache ops are implemented
*
* - D-cache: only paddr needed (in DC_IVDL/DC_FLDL)
* - I-cache Non Aliasing: Despite VIPT, only paddr needed (in IC_IVIL)
* - I-cache Aliasing: Both vaddr and paddr needed (in IC_IVIL, IC_PTAG
* respectively, similar to MMU v3 programming model, hence
* __cache_line_loop_v3() is used)
*
* If PAE40 is enabled, independent of aliasing considerations, the higher bits
* needs to be written into PTAG_HI
*/
static inline
void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr,
......@@ -460,11 +380,9 @@ void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr,
#endif
#if (CONFIG_ARC_MMU_VER < 3)
#define __cache_line_loop __cache_line_loop_v2
#elif (CONFIG_ARC_MMU_VER == 3)
#ifdef CONFIG_ARC_MMU_V3
#define __cache_line_loop __cache_line_loop_v3
#elif (CONFIG_ARC_MMU_VER > 3)
#else
#define __cache_line_loop __cache_line_loop_v4
#endif
......@@ -1123,7 +1041,7 @@ void clear_user_page(void *to, unsigned long u_vaddr, struct page *page)
clear_page(to);
clear_bit(PG_dc_clean, &page->flags);
}
EXPORT_SYMBOL(clear_user_page);
/**********************************************************************
* Explicit Cache flush request from user space via syscall
......
......@@ -33,27 +33,33 @@ noinline static int handle_kernel_vaddr_fault(unsigned long address)
pud_t *pud, *pud_k;
pmd_t *pmd, *pmd_k;
pgd = pgd_offset_fast(current->active_mm, address);
pgd = pgd_offset(current->active_mm, address);
pgd_k = pgd_offset_k(address);
if (!pgd_present(*pgd_k))
if (pgd_none (*pgd_k))
goto bad_area;
if (!pgd_present(*pgd))
set_pgd(pgd, *pgd_k);
p4d = p4d_offset(pgd, address);
p4d_k = p4d_offset(pgd_k, address);
if (!p4d_present(*p4d_k))
if (p4d_none(*p4d_k))
goto bad_area;
if (!p4d_present(*p4d))
set_p4d(p4d, *p4d_k);
pud = pud_offset(p4d, address);
pud_k = pud_offset(p4d_k, address);
if (!pud_present(*pud_k))
if (pud_none(*pud_k))
goto bad_area;
if (!pud_present(*pud))
set_pud(pud, *pud_k);
pmd = pmd_offset(pud, address);
pmd_k = pmd_offset(pud_k, address);
if (!pmd_present(*pmd_k))
if (pmd_none(*pmd_k))
goto bad_area;
if (!pmd_present(*pmd))
set_pmd(pmd, *pmd_k);
/* XXX: create the TLB entry here */
......
......@@ -189,6 +189,11 @@ void __init mem_init(void)
{
memblock_free_all();
highmem_init();
BUILD_BUG_ON((PTRS_PER_PGD * sizeof(pgd_t)) > PAGE_SIZE);
BUILD_BUG_ON((PTRS_PER_PUD * sizeof(pud_t)) > PAGE_SIZE);
BUILD_BUG_ON((PTRS_PER_PMD * sizeof(pmd_t)) > PAGE_SIZE);
BUILD_BUG_ON((PTRS_PER_PTE * sizeof(pte_t)) > PAGE_SIZE);
}
#ifdef CONFIG_HIGHMEM
......
......@@ -39,7 +39,8 @@ void __iomem *ioremap(phys_addr_t paddr, unsigned long size)
if (arc_uncached_addr_space(paddr))
return (void __iomem *)(u32)paddr;
return ioremap_prot(paddr, size, PAGE_KERNEL_NO_CACHE);
return ioremap_prot(paddr, size,
pgprot_val(pgprot_noncached(PAGE_KERNEL)));
}
EXPORT_SYMBOL(ioremap);
......
This diff is collapsed.
......@@ -39,7 +39,6 @@
#include <asm/arcregs.h>
#include <asm/cache.h>
#include <asm/processor.h>
#include <asm/tlb-mmu1.h>
#ifdef CONFIG_ISA_ARCOMPACT
;-----------------------------------------------------------------
......@@ -94,11 +93,6 @@ ex_saved_reg1:
st_s r1, [r0, 4]
st_s r2, [r0, 8]
st_s r3, [r0, 12]
; VERIFY if the ASID in MMU-PID Reg is same as
; one in Linux data structures
tlb_paranoid_check_asm
.endm
.macro TLBMISS_RESTORE_REGS
......@@ -148,53 +142,16 @@ ex_saved_reg1:
#endif
;============================================================================
; Troubleshooting Stuff
;TLB Miss handling Code
;============================================================================
; Linux keeps ASID (Address Space ID) in task->active_mm->context.asid
; When Creating TLB Entries, instead of doing 3 dependent loads from memory,
; we use the MMU PID Reg to get current ASID.
; In bizzare scenrios SW and HW ASID can get out-of-sync which is trouble.
; So we try to detect this in TLB Mis shandler
.macro tlb_paranoid_check_asm
#ifdef CONFIG_ARC_DBG_TLB_PARANOIA
GET_CURR_TASK_ON_CPU r3
ld r0, [r3, TASK_ACT_MM]
ld r0, [r0, MM_CTXT+MM_CTXT_ASID]
breq r0, 0, 55f ; Error if no ASID allocated
lr r1, [ARC_REG_PID]
and r1, r1, 0xFF
and r2, r0, 0xFF ; MMU PID bits only for comparison
breq r1, r2, 5f
55:
; Error if H/w and S/w ASID don't match, but NOT if in kernel mode
lr r2, [erstatus]
bbit0 r2, STATUS_U_BIT, 5f
; We sure are in troubled waters, Flag the error, but to do so
; need to switch to kernel mode stack to call error routine
GET_TSK_STACK_BASE r3, sp
; Call printk to shoutout aloud
mov r2, 1
j print_asid_mismatch
5: ; ASIDs match so proceed normally
nop
#ifndef PMD_SHIFT
#define PMD_SHIFT PUD_SHIFT
#endif
.endm
;============================================================================
;TLB Miss handling Code
;============================================================================
#ifndef PUD_SHIFT
#define PUD_SHIFT PGDIR_SHIFT
#endif
;-----------------------------------------------------------------------------
; This macro does the page-table lookup for the faulting address.
......@@ -203,7 +160,7 @@ ex_saved_reg1:
lr r2, [efa]
#ifdef ARC_USE_SCRATCH_REG
#ifdef CONFIG_ISA_ARCV2
lr r1, [ARC_REG_SCRATCH_DATA0] ; current pgd
#else
GET_CURR_TASK_ON_CPU r1
......@@ -216,6 +173,24 @@ ex_saved_reg1:
tst r3, r3
bz do_slow_path_pf ; if no Page Table, do page fault
#if CONFIG_PGTABLE_LEVELS > 3
lsr r0, r2, PUD_SHIFT ; Bits for indexing into PUD
and r0, r0, (PTRS_PER_PUD - 1)
ld.as r1, [r3, r0] ; PMD entry
tst r1, r1
bz do_slow_path_pf
mov r3, r1
#endif
#if CONFIG_PGTABLE_LEVELS > 2
lsr r0, r2, PMD_SHIFT ; Bits for indexing into PMD
and r0, r0, (PTRS_PER_PMD - 1)
ld.as r1, [r3, r0] ; PMD entry
tst r1, r1
bz do_slow_path_pf
mov r3, r1
#endif
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
and.f 0, r3, _PAGE_HW_SZ ; Is this Huge PMD (thp)
add2.nz r1, r1, r0
......@@ -279,7 +254,7 @@ ex_saved_reg1:
; Commit the TLB entry into MMU
.macro COMMIT_ENTRY_TO_MMU
#if (CONFIG_ARC_MMU_VER < 4)
#ifdef CONFIG_ARC_MMU_V3
/* Get free TLB slot: Set = computed from vaddr, way = random */
sr TLBGetIndex, [ARC_REG_TLBCOMMAND]
......@@ -375,13 +350,6 @@ ENTRY(EV_TLBMissD)
CONV_PTE_TO_TLB
#if (CONFIG_ARC_MMU_VER == 1)
; MMU with 2 way set assoc J-TLB, needs some help in pathetic case of
; memcpy where 3 parties contend for 2 ways, ensuing a livelock.
; But only for old MMU or one with Metal Fix
TLB_WRITE_HEURISTICS
#endif
COMMIT_ENTRY_TO_MMU
TLBMISS_RESTORE_REGS
EV_TLBMissD_fast_ret: ; additional label for VDK OS-kit instrumentation
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment