RISC-V: User-Visible Changes

This merge contains the user-visible, ABI-breaking changes that we want to make sure we have in Linux before our first release. Highlights include: * VDSO entries for clock_get/gettimeofday/getcpu have been added. These are simple syscalls now, but we want to let glibc use them from the start so we can make them faster later. * A VDSO entry for instruction cache flushing has been added so userspace can flush the instruction cache. * The VDSO symbol versions for __vdso_cmpxchg{32,64} have been removed, as those VDSO entries don't actually exist. Conflicts: arch/riscv/include/asm/tlbflush.h

RISC-V: User-Visible Changes
This merge contains the user-visible, ABI-breaking changes that we want to make sure we have in Linux before our first release. Highlights include: * VDSO entries for clock_get/gettimeofday/getcpu have been added. These are simple syscalls now, but we want to let glibc use them from the start so we can make them faster later. * A VDSO entry for instruction cache flushing has been added so userspace can flush the instruction cache. * The VDSO symbol versions for __vdso_cmpxchg{32,64} have been removed, as those VDSO entries don't actually exist. Conflicts: arch/riscv/include/asm/tlbflush.h
07f8ba74 · Palmer Dabbelt · f8182f61 · 0e710ac6 · 07f8ba74 · 07f8ba74
Commit 07f8ba74 authored Dec 01, 2017 by Palmer Dabbelt
19 changed files
--- a/arch/riscv/include/asm/cacheflush.h
+++ b/arch/riscv/include/asm/cacheflush.h
@@ -18,22 +18,44 @@

 #undef flush_icache_range
 #undef flush_icache_user_range
+#undef flush_dcache_page

 static inline void local_flush_icache_all(void)
 {
 	asm volatile ("fence.i" ::: "memory");
 }

+#define PG_dcache_clean PG_arch_1
+
+static inline void flush_dcache_page(struct page *page)
+{
+	if (test_bit(PG_dcache_clean, &page->flags))
+		clear_bit(PG_dcache_clean, &page->flags);
+}
+
+/*
+ * RISC-V doesn't have an instruction to flush parts of the instruction cache,
+ * so instead we just flush the whole thing.
+ */
+#define flush_icache_range(start, end) flush_icache_all()
+#define flush_icache_user_range(vma, pg, addr, len) flush_icache_all()
+
 #ifndef CONFIG_SMP

-#define flush_icache_range(start, end) local_flush_icache_all()
-#define flush_icache_user_range(vma, pg, addr, len) local_flush_icache_all()
+#define flush_icache_all() local_flush_icache_all()
+#define flush_icache_mm(mm, local) flush_icache_all()

 #else /* CONFIG_SMP */

-#define flush_icache_range(start, end) sbi_remote_fence_i(0)
-#define flush_icache_user_range(vma, pg, addr, len) sbi_remote_fence_i(0)
+#define flush_icache_all() sbi_remote_fence_i(0)
+void flush_icache_mm(struct mm_struct *mm, bool local);

 #endif /* CONFIG_SMP */

+/*
+ * Bits in sys_riscv_flush_icache()'s flags argument.
+ */
+#define SYS_RISCV_FLUSH_ICACHE_LOCAL 1UL
+#define SYS_RISCV_FLUSH_ICACHE_ALL   (SYS_RISCV_FLUSH_ICACHE_LOCAL)
+
 #endif /* _ASM_RISCV_CACHEFLUSH_H */
--- a/arch/riscv/include/asm/mmu.h
+++ b/arch/riscv/include/asm/mmu.h
@@ -19,6 +19,10 @@

 typedef struct {
 	void *vdso;
+#ifdef CONFIG_SMP
+	/* A local icache flush is needed before user execution can resume. */
+	cpumask_t icache_stale_mask;
+#endif
 } mm_context_t;

 #endif /* __ASSEMBLY__ */

--- a/arch/riscv/include/asm/mmu_context.h
+++ b/arch/riscv/include/asm/mmu_context.h
 /*
 * Copyright (C) 2012 Regents of the University of California
+ * Copyright (C) 2017 SiFive
 *
 *   This program is free software; you can redistribute it and/or
 *   modify it under the terms of the GNU General Public License
@@ -19,6 +20,7 @@
 #include <linux/mm.h>
 #include <linux/sched.h>
 #include <asm/tlbflush.h>
+#include <asm/cacheflush.h>

 static inline void enter_lazy_tlb(struct mm_struct *mm,
 	struct task_struct *task)
@@ -46,12 +48,54 @@ static inline void set_pgdir(pgd_t *pgd)
 	csr_write(sptbr, virt_to_pfn(pgd) | SPTBR_MODE);
 }

+/*
+ * When necessary, performs a deferred icache flush for the given MM context,
+ * on the local CPU.  RISC-V has no direct mechanism for instruction cache
+ * shoot downs, so instead we send an IPI that informs the remote harts they
+ * need to flush their local instruction caches.  To avoid pathologically slow
+ * behavior in a common case (a bunch of single-hart processes on a many-hart
+ * machine, ie 'make -j') we avoid the IPIs for harts that are not currently
+ * executing a MM context and instead schedule a deferred local instruction
+ * cache flush to be performed before execution resumes on each hart.  This
+ * actually performs that local instruction cache flush, which implicitly only
+ * refers to the current hart.
+ */
+static inline void flush_icache_deferred(struct mm_struct *mm)
+{
+#ifdef CONFIG_SMP
+	unsigned int cpu = smp_processor_id();
+	cpumask_t *mask = &mm->context.icache_stale_mask;
+
+	if (cpumask_test_cpu(cpu, mask)) {
+		cpumask_clear_cpu(cpu, mask);
+		/*
+		 * Ensure the remote hart's writes are visible to this hart.
+		 * This pairs with a barrier in flush_icache_mm.
+		 */
+		smp_mb();
+		local_flush_icache_all();
+	}
+#endif
+}
+
 static inline void switch_mm(struct mm_struct *prev,
 	struct mm_struct *next, struct task_struct *task)
 {
 	if (likely(prev != next)) {
+		/*
+		 * Mark the current MM context as inactive, and the next as
+		 * active.  This is at least used by the icache flushing
+		 * routines in order to determine who should
+		 */
+		unsigned int cpu = smp_processor_id();
+
+		cpumask_clear_cpu(cpu, mm_cpumask(prev));
+		cpumask_set_cpu(cpu, mm_cpumask(next));
+
 		set_pgdir(next->pgd);
 		local_flush_tlb_all();
+
+		flush_icache_deferred(next);
 	}
 }


--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -178,28 +178,6 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long addr)
 #define pte_offset_map(dir, addr)	pte_offset_kernel((dir), (addr))
 #define pte_unmap(pte)			((void)(pte))

-/*
- * Certain architectures need to do special things when PTEs within
- * a page table are directly modified.  Thus, the following hook is
- * made available.
- */
-static inline void set_pte(pte_t *ptep, pte_t pteval)
-{
-	*ptep = pteval;
-}
-
-static inline void set_pte_at(struct mm_struct *mm,
-	unsigned long addr, pte_t *ptep, pte_t pteval)
-{
-	set_pte(ptep, pteval);
-}
-
-static inline void pte_clear(struct mm_struct *mm,
-	unsigned long addr, pte_t *ptep)
-{
-	set_pte_at(mm, addr, ptep, __pte(0));
-}
-
 static inline int pte_present(pte_t pte)
 {
 	return (pte_val(pte) & _PAGE_PRESENT);
@@ -210,21 +188,22 @@ static inline int pte_none(pte_t pte)
 	return (pte_val(pte) == 0);
 }

-/* static inline int pte_read(pte_t pte) */
-
 static inline int pte_write(pte_t pte)
 {
 	return pte_val(pte) & _PAGE_WRITE;
 }

+static inline int pte_exec(pte_t pte)
+{
+	return pte_val(pte) & _PAGE_EXEC;
+}
+
 static inline int pte_huge(pte_t pte)
 {
 	return pte_present(pte)
 		&& (pte_val(pte) & (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC));
 }

-/* static inline int pte_exec(pte_t pte) */
-
 static inline int pte_dirty(pte_t pte)
 {
 	return pte_val(pte) & _PAGE_DIRTY;
@@ -311,6 +290,33 @@ static inline int pte_same(pte_t pte_a, pte_t pte_b)
 	return pte_val(pte_a) == pte_val(pte_b);
 }

+/*
+ * Certain architectures need to do special things when PTEs within
+ * a page table are directly modified.  Thus, the following hook is
+ * made available.
+ */
+static inline void set_pte(pte_t *ptep, pte_t pteval)
+{
+	*ptep = pteval;
+}
+
+void flush_icache_pte(pte_t pte);
+
+static inline void set_pte_at(struct mm_struct *mm,
+	unsigned long addr, pte_t *ptep, pte_t pteval)
+{
+	if (pte_present(pteval) && pte_exec(pteval))
+		flush_icache_pte(pteval);
+
+	set_pte(ptep, pteval);
+}
+
+static inline void pte_clear(struct mm_struct *mm,
+	unsigned long addr, pte_t *ptep)
+{
+	set_pte_at(mm, addr, ptep, __pte(0));
+}
+
 #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
 static inline int ptep_set_access_flags(struct vm_area_struct *vma,
 					unsigned long address, pte_t *ptep,

--- a/arch/riscv/include/asm/tlbflush.h
+++ b/arch/riscv/include/asm/tlbflush.h
@@ -17,6 +17,8 @@

 #ifdef CONFIG_MMU

+#include <linux/mm_types.h>
+
 /*
 * Flush entire local TLB.  'sfence.vma' implicitly fences with the instruction
 * cache as well, so a 'fence.i' is not necessary.

--- a/arch/riscv/include/asm/vdso-syscalls.h
+++ b/arch/riscv/include/asm/vdso-syscalls.h
+/*
+ * Copyright (C) 2017 SiFive
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _ASM_RISCV_VDSO_SYSCALLS_H
+#define _ASM_RISCV_VDSO_SYSCALLS_H
+
+#ifdef CONFIG_SMP
+
+/* These syscalls are only used by the vDSO and are not in the uapi. */
+#define __NR_riscv_flush_icache (__NR_arch_specific_syscall + 15)
+__SYSCALL(__NR_riscv_flush_icache, sys_riscv_flush_icache)
+
+#endif
+
+#endif /* _ASM_RISCV_VDSO_H */
--- a/arch/riscv/include/asm/vdso.h
+++ b/arch/riscv/include/asm/vdso.h
@@ -38,4 +38,8 @@ struct vdso_data {
 	(void __user *)((unsigned long)(base) + __vdso_##name);			\
 })

+#ifdef CONFIG_SMP
+asmlinkage long sys_riscv_flush_icache(uintptr_t, uintptr_t, uintptr_t);
+#endif
+
 #endif /* _ASM_RISCV_VDSO_H */
--- a/arch/riscv/kernel/smp.c
+++ b/arch/riscv/kernel/smp.c
@@ -108,3 +108,51 @@ void smp_send_reschedule(int cpu)
 {
 	send_ipi_message(cpumask_of(cpu), IPI_RESCHEDULE);
 }
+
+/*
+ * Performs an icache flush for the given MM context.  RISC-V has no direct
+ * mechanism for instruction cache shoot downs, so instead we send an IPI that
+ * informs the remote harts they need to flush their local instruction caches.
+ * To avoid pathologically slow behavior in a common case (a bunch of
+ * single-hart processes on a many-hart machine, ie 'make -j') we avoid the
+ * IPIs for harts that are not currently executing a MM context and instead
+ * schedule a deferred local instruction cache flush to be performed before
+ * execution resumes on each hart.
+ */
+void flush_icache_mm(struct mm_struct *mm, bool local)
+{
+	unsigned int cpu;
+	cpumask_t others, *mask;
+
+	preempt_disable();
+
+	/* Mark every hart's icache as needing a flush for this MM. */
+	mask = &mm->context.icache_stale_mask;
+	cpumask_setall(mask);
+	/* Flush this hart's I$ now, and mark it as flushed. */
+	cpu = smp_processor_id();
+	cpumask_clear_cpu(cpu, mask);
+	local_flush_icache_all();
+
+	/*
+	 * Flush the I$ of other harts concurrently executing, and mark them as
+	 * flushed.
+	 */
+	cpumask_andnot(&others, mm_cpumask(mm), cpumask_of(cpu));
+	local |= cpumask_empty(&others);
+	if (mm != current->active_mm || !local)
+		sbi_remote_fence_i(others.bits);
+	else {
+		/*
+		 * It's assumed that at least one strongly ordered operation is
+		 * performed on this hart between setting a hart's cpumask bit
+		 * and scheduling this MM context on that hart.  Sending an SBI
+		 * remote message will do this, but in the case where no
+		 * messages are sent we still need to order this hart's writes
+		 * with flush_icache_deferred().
+		 */
+		smp_mb();
+	}
+
+	preempt_enable();
+}
--- a/arch/riscv/kernel/sys_riscv.c
+++ b/arch/riscv/kernel/sys_riscv.c
@@ -14,8 +14,8 @@
 */

 #include <linux/syscalls.h>
-#include <asm/cmpxchg.h>
 #include <asm/unistd.h>
+#include <asm/cacheflush.h>

 static long riscv_sys_mmap(unsigned long addr, unsigned long len,
 			   unsigned long prot, unsigned long flags,
@@ -47,3 +47,34 @@ SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len,
 	return riscv_sys_mmap(addr, len, prot, flags, fd, offset, 12);
 }
 #endif /* !CONFIG_64BIT */
+
+#ifdef CONFIG_SMP
+/*
+ * Allows the instruction cache to be flushed from userspace.  Despite RISC-V
+ * having a direct 'fence.i' instruction available to userspace (which we
+ * can't trap!), that's not actually viable when running on Linux because the
+ * kernel might schedule a process on another hart.  There is no way for
+ * userspace to handle this without invoking the kernel (as it doesn't know the
+ * thread->hart mappings), so we've defined a RISC-V specific system call to
+ * flush the instruction cache.
+ *
+ * sys_riscv_flush_icache() is defined to flush the instruction cache over an
+ * address range, with the flush applying to either all threads or just the
+ * caller.  We don't currently do anything with the address range, that's just
+ * in there for forwards compatibility.
+ */
+SYSCALL_DEFINE3(riscv_flush_icache, uintptr_t, start, uintptr_t, end,
+	uintptr_t, flags)
+{
+	struct mm_struct *mm = current->mm;
+	bool local = (flags & SYS_RISCV_FLUSH_ICACHE_LOCAL) != 0;
+
+	/* Check the reserved flags. */
+	if (unlikely(flags & !SYS_RISCV_FLUSH_ICACHE_ALL))
+		return -EINVAL;
+
+	flush_icache_mm(mm, local);
+
+	return 0;
+}
+#endif
--- a/arch/riscv/kernel/syscall_table.c
+++ b/arch/riscv/kernel/syscall_table.c
@@ -15,6 +15,7 @@
 #include <linux/linkage.h>
 #include <linux/syscalls.h>
 #include <asm-generic/syscalls.h>
+#include <asm/vdso.h>

 #undef __SYSCALL
 #define __SYSCALL(nr, call)	[nr] = (call),
@@ -22,4 +23,5 @@
 void *sys_call_table[__NR_syscalls] = {
 	[0 ... __NR_syscalls - 1] = sys_ni_syscall,
 #include <asm/unistd.h>
+#include <asm/vdso-syscalls.h>
 };
--- a/arch/riscv/kernel/vdso/Makefile
+++ b/arch/riscv/kernel/vdso/Makefile
 # Copied from arch/tile/kernel/vdso/Makefile

 # Symbols present in the vdso
-vdso-syms = rt_sigreturn
+vdso-syms  = rt_sigreturn
+vdso-syms += gettimeofday
+vdso-syms += clock_gettime
+vdso-syms += clock_getres
+vdso-syms += getcpu
+vdso-syms += flush_icache

 # Files to link into the vdso
 obj-vdso = $(patsubst %, %.o, $(vdso-syms))

--- a/arch/riscv/kernel/vdso/clock_getres.S
+++ b/arch/riscv/kernel/vdso/clock_getres.S
+/*
+ * Copyright (C) 2017 SiFive
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+#include <linux/linkage.h>
+#include <asm/unistd.h>
+
+	.text
+/* int __vdso_clock_getres(clockid_t clock_id, struct timespec *res); */
+ENTRY(__vdso_clock_getres)
+	.cfi_startproc
+	/* For now, just do the syscall. */
+	li a7, __NR_clock_getres
+	ecall
+	ret
+	.cfi_endproc
+ENDPROC(__vdso_clock_getres)
--- a/arch/riscv/kernel/vdso/clock_gettime.S
+++ b/arch/riscv/kernel/vdso/clock_gettime.S
+/*
+ * Copyright (C) 2017 SiFive
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+#include <linux/linkage.h>
+#include <asm/unistd.h>
+
+	.text
+/* int __vdso_clock_gettime(clockid_t clock_id, struct timespec *tp); */
+ENTRY(__vdso_clock_gettime)
+	.cfi_startproc
+	/* For now, just do the syscall. */
+	li a7, __NR_clock_gettime
+	ecall
+	ret
+	.cfi_endproc
+ENDPROC(__vdso_clock_gettime)
--- a/arch/riscv/kernel/vdso/flush_icache.S
+++ b/arch/riscv/kernel/vdso/flush_icache.S
+/*
+ * Copyright (C) 2017 SiFive
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+#include <linux/linkage.h>
+#include <asm/unistd.h>
+#include <asm/vdso-syscalls.h>
+
+	.text
+/* int __vdso_flush_icache(void *start, void *end, unsigned long flags); */
+ENTRY(__vdso_flush_icache)
+	.cfi_startproc
+#ifdef CONFIG_SMP
+	li a7, __NR_riscv_flush_icache
+	ecall
+#else
+	fence.i
+	li a0, 0
+#endif
+	ret
+	.cfi_endproc
+ENDPROC(__vdso_flush_icache)
--- a/arch/riscv/kernel/vdso/getcpu.S
+++ b/arch/riscv/kernel/vdso/getcpu.S
+/*
+ * Copyright (C) 2017 SiFive
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+#include <linux/linkage.h>
+#include <asm/unistd.h>
+
+	.text
+/* int __vdso_getcpu(unsigned *cpu, unsigned *node, void *unused); */
+ENTRY(__vdso_getcpu)
+	.cfi_startproc
+	/* For now, just do the syscall. */
+	li a7, __NR_getcpu
+	ecall
+	ret
+	.cfi_endproc
+ENDPROC(__vdso_getcpu)
--- a/arch/riscv/kernel/vdso/gettimeofday.S
+++ b/arch/riscv/kernel/vdso/gettimeofday.S
+/*
+ * Copyright (C) 2017 SiFive
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+#include <linux/linkage.h>
+#include <asm/unistd.h>
+
+	.text
+/* int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz); */
+ENTRY(__vdso_gettimeofday)
+	.cfi_startproc
+	/* For now, just do the syscall. */
+	li a7, __NR_gettimeofday
+	ecall
+	ret
+	.cfi_endproc
+ENDPROC(__vdso_gettimeofday)
--- a/arch/riscv/kernel/vdso/vdso.lds.S
+++ b/arch/riscv/kernel/vdso/vdso.lds.S
@@ -70,8 +70,11 @@ VERSION
 	LINUX_4.15 {
 	global:
 		__vdso_rt_sigreturn;
-		__vdso_cmpxchg32;
-		__vdso_cmpxchg64;
+		__vdso_gettimeofday;
+		__vdso_clock_gettime;
+		__vdso_clock_getres;
+		__vdso_getcpu;
+		__vdso_flush_icache;
 	local: *;
 	};
 }
--- a/arch/riscv/mm/Makefile
+++ b/arch/riscv/mm/Makefile
@@ -2,3 +2,4 @@ obj-y += init.o
 obj-y += fault.o
 obj-y += extable.o
 obj-y += ioremap.o
+obj-y += cacheflush.o
--- a/arch/riscv/mm/cacheflush.c
+++ b/arch/riscv/mm/cacheflush.c
+/*
+ * Copyright (C) 2017 SiFive
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+#include <asm/pgtable.h>
+#include <asm/cacheflush.h>
+
+void flush_icache_pte(pte_t pte)
+{
+	struct page *page = pte_page(pte);
+
+	if (!test_and_set_bit(PG_dcache_clean, &page->flags))
+		flush_icache_all();
+}