Commit f56e65df authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'work.set_fs' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

Pull initial set_fs() removal from Al Viro:
 "Christoph's set_fs base series + fixups"

* 'work.set_fs' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
  fs: Allow a NULL pos pointer to __kernel_read
  fs: Allow a NULL pos pointer to __kernel_write
  powerpc: remove address space overrides using set_fs()
  powerpc: use non-set_fs based maccess routines
  x86: remove address space overrides using set_fs()
  x86: make TASK_SIZE_MAX usable from assembly code
  x86: move PAGE_OFFSET, TASK_SIZE & friends to page_{32,64}_types.h
  lkdtm: remove set_fs-based tests
  test_bitmap: remove user bitmap tests
  uaccess: add infrastructure for kernel builds with set_fs()
  fs: don't allow splice read/write without explicit ops
  fs: don't allow kernel reads and writes without iter ops
  sysctl: Convert to iter interfaces
  proc: add a read_iter method to proc proc_ops
  proc: cleanup the compat vs no compat file ops
  proc: remove a level of indentation in proc_get_inode
parents 24717cfb 7b84b665
......@@ -24,6 +24,9 @@ config KEXEC_ELF
config HAVE_IMA_KEXEC
bool
config SET_FS
bool
config HOTPLUG_SMT
bool
......
......@@ -39,6 +39,7 @@ config ALPHA
select OLD_SIGSUSPEND
select CPU_NO_EFFICIENT_FFS if !ALPHA_EV67
select MMU_GATHER_NO_RANGE
select SET_FS
help
The Alpha is a 64-bit general-purpose processor designed and
marketed by the Digital Equipment Corporation of blessed memory,
......
......@@ -48,6 +48,7 @@ config ARC
select PCI_SYSCALL if PCI
select PERF_USE_VMALLOC if ARC_CACHE_VIPT_ALIASING
select HAVE_ARCH_JUMP_LABEL if ISA_ARCV2 && !CPU_ENDIAN_BE32
select SET_FS
config ARCH_HAS_CACHE_LINE_SIZE
def_bool y
......
......@@ -120,6 +120,7 @@ config ARM
select PCI_SYSCALL if PCI
select PERF_USE_VMALLOC
select RTC_LIB
select SET_FS
select SYS_SUPPORTS_APM_EMULATION
# Above selects are sorted alphabetically; please add new ones
# according to that. Thanks.
......
......@@ -194,6 +194,7 @@ config ARM64
select PCI_SYSCALL if PCI
select POWER_RESET
select POWER_SUPPLY
select SET_FS
select SPARSE_IRQ
select SWIOTLB
select SYSCTL_EXCEPTION_TRACE
......
......@@ -22,6 +22,7 @@ config C6X
select GENERIC_CLOCKEVENTS
select MODULES_USE_ELF_RELA
select MMU_GATHER_NO_RANGE if MMU
select SET_FS
config MMU
def_bool n
......
......@@ -78,6 +78,7 @@ config CSKY
select PCI_DOMAINS_GENERIC if PCI
select PCI_SYSCALL if PCI
select PCI_MSI if PCI
select SET_FS
config LOCKDEP_SUPPORT
def_bool y
......
......@@ -25,6 +25,7 @@ config H8300
select HAVE_ARCH_KGDB
select HAVE_ARCH_HASH
select CPU_NO_EFFICIENT_FFS
select SET_FS
select UACCESS_MEMCPY
config CPU_BIG_ENDIAN
......
......@@ -31,6 +31,7 @@ config HEXAGON
select GENERIC_CLOCKEVENTS_BROADCAST
select MODULES_USE_ELF_RELA
select GENERIC_CPU_DEVICES
select SET_FS
help
Qualcomm Hexagon is a processor architecture designed for high
performance and low power across a wide variety of applications.
......
......@@ -56,6 +56,7 @@ config IA64
select NEED_SG_DMA_LENGTH
select NUMA if !FLATMEM
select PCI_MSI_ARCH_FALLBACKS if PCI_MSI
select SET_FS
default y
help
The Itanium Processor Family is Intel's 64-bit successor to
......
......@@ -31,6 +31,7 @@ config M68K
select NO_DMA if !MMU && !COLDFIRE
select OLD_SIGACTION
select OLD_SIGSUSPEND3
select SET_FS
select UACCESS_MEMCPY if !MMU
select VIRT_TO_BUS
......
......@@ -47,6 +47,7 @@ config MICROBLAZE
select CPU_NO_EFFICIENT_FFS
select MMU_GATHER_NO_RANGE if MMU
select SPARSE_IRQ
select SET_FS
# Endianness selection
choice
......
......@@ -88,6 +88,7 @@ config MIPS
select PERF_USE_VMALLOC
select PCI_MSI_ARCH_FALLBACKS if PCI_MSI
select RTC_LIB
select SET_FS
select SYSCTL_EXCEPTION_TRACE
select VIRT_TO_BUS
......
......@@ -48,6 +48,7 @@ config NDS32
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_DYNAMIC_FTRACE
select SET_FS
help
Andes(nds32) Linux support.
......
......@@ -27,6 +27,7 @@ config NIOS2
select USB_ARCH_HAS_HCD if USB_SUPPORT
select CPU_NO_EFFICIENT_FFS
select MMU_GATHER_NO_RANGE if MMU
select SET_FS
config GENERIC_CSUM
def_bool y
......
......@@ -39,6 +39,7 @@ config OPENRISC
select ARCH_WANT_FRAME_POINTERS
select GENERIC_IRQ_MULTI_HANDLER
select MMU_GATHER_NO_RANGE if MMU
select SET_FS
config CPU_BIG_ENDIAN
def_bool y
......
......@@ -63,6 +63,7 @@ config PARISC
select HAVE_FTRACE_MCOUNT_RECORD if HAVE_DYNAMIC_FTRACE
select HAVE_KPROBES_ON_FTRACE
select HAVE_DYNAMIC_FTRACE_WITH_REGS
select SET_FS
help
The PA-RISC microprocessor is designed by Hewlett-Packard and used
......
......@@ -83,10 +83,6 @@ struct task_struct;
void start_thread(struct pt_regs *regs, unsigned long fdptr, unsigned long sp);
void release_thread(struct task_struct *);
typedef struct {
unsigned long seg;
} mm_segment_t;
#define TS_FPR(i) fp_state.fpr[i][TS_FPROFFSET]
#define TS_CKFPR(i) ckfp_state.fpr[i][TS_FPROFFSET]
......@@ -148,7 +144,6 @@ struct thread_struct {
unsigned long ksp_vsid;
#endif
struct pt_regs *regs; /* Pointer to saved register state */
mm_segment_t addr_limit; /* for get_fs() validation */
#ifdef CONFIG_BOOKE
/* BookE base exception scratch space; align on cacheline */
unsigned long normsave[8] ____cacheline_aligned;
......@@ -296,7 +291,6 @@ struct thread_struct {
#define INIT_THREAD { \
.ksp = INIT_SP, \
.ksp_limit = INIT_SP_LIMIT, \
.addr_limit = KERNEL_DS, \
.pgdir = swapper_pg_dir, \
.fpexc_mode = MSR_FE0 | MSR_FE1, \
SPEFSCR_INIT \
......@@ -304,7 +298,6 @@ struct thread_struct {
#else
#define INIT_THREAD { \
.ksp = INIT_SP, \
.addr_limit = KERNEL_DS, \
.fpexc_mode = 0, \
}
#endif
......
......@@ -90,7 +90,6 @@ void arch_setup_new_exec(void);
#define TIF_SYSCALL_TRACE 0 /* syscall trace active */
#define TIF_SIGPENDING 1 /* signal pending */
#define TIF_NEED_RESCHED 2 /* rescheduling necessary */
#define TIF_FSCHECK 3 /* Check FS is USER_DS on return */
#define TIF_SYSCALL_EMU 4 /* syscall emulation active */
#define TIF_RESTORE_TM 5 /* need to restore TM FP/VEC/VSX */
#define TIF_PATCH_PENDING 6 /* pending live patching update */
......@@ -130,7 +129,6 @@ void arch_setup_new_exec(void);
#define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT)
#define _TIF_EMULATE_STACK_STORE (1<<TIF_EMULATE_STACK_STORE)
#define _TIF_NOHZ (1<<TIF_NOHZ)
#define _TIF_FSCHECK (1<<TIF_FSCHECK)
#define _TIF_SYSCALL_EMU (1<<TIF_SYSCALL_EMU)
#define _TIF_SYSCALL_DOTRACE (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
_TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \
......@@ -138,8 +136,7 @@ void arch_setup_new_exec(void);
#define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
_TIF_NOTIFY_RESUME | _TIF_UPROBE | \
_TIF_RESTORE_TM | _TIF_PATCH_PENDING | \
_TIF_FSCHECK)
_TIF_RESTORE_TM | _TIF_PATCH_PENDING)
#define _TIF_PERSYSCALL_MASK (_TIF_RESTOREALL|_TIF_NOERROR)
/* Bits in local_flags */
......
......@@ -8,62 +8,21 @@
#include <asm/extable.h>
#include <asm/kup.h>
/*
* The fs value determines whether argument validity checking should be
* performed or not. If get_fs() == USER_DS, checking is performed, with
* get_fs() == KERNEL_DS, checking is bypassed.
*
* For historical reasons, these macros are grossly misnamed.
*
* The fs/ds values are now the highest legal address in the "segment".
* This simplifies the checking in the routines below.
*/
#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) })
#define KERNEL_DS MAKE_MM_SEG(~0UL)
#ifdef __powerpc64__
/* We use TASK_SIZE_USER64 as TASK_SIZE is not constant */
#define USER_DS MAKE_MM_SEG(TASK_SIZE_USER64 - 1)
#define TASK_SIZE_MAX TASK_SIZE_USER64
#else
#define USER_DS MAKE_MM_SEG(TASK_SIZE - 1)
#define TASK_SIZE_MAX TASK_SIZE
#endif
#define get_fs() (current->thread.addr_limit)
static inline void set_fs(mm_segment_t fs)
static inline bool __access_ok(unsigned long addr, unsigned long size)
{
current->thread.addr_limit = fs;
/* On user-mode return check addr_limit (fs) is correct */
set_thread_flag(TIF_FSCHECK);
return addr < TASK_SIZE_MAX && size <= TASK_SIZE_MAX - addr;
}
#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg)
#define user_addr_max() (get_fs().seg)
#ifdef __powerpc64__
/*
* This check is sufficient because there is a large enough
* gap between user addresses and the kernel addresses
*/
#define __access_ok(addr, size, segment) \
(((addr) <= (segment).seg) && ((size) <= (segment).seg))
#else
static inline int __access_ok(unsigned long addr, unsigned long size,
mm_segment_t seg)
{
if (addr > seg.seg)
return 0;
return (size == 0 || size - 1 <= seg.seg - addr);
}
#endif
#define access_ok(addr, size) \
(__chk_user_ptr(addr), \
__access_ok((__force unsigned long)(addr), (size), get_fs()))
__access_ok((unsigned long)(addr), (size)))
/*
* These are the main single-value transfer routines. They automatically
......@@ -604,4 +563,20 @@ do { \
__put_user_goto(*(u8*)(_src + _i), (u8 __user *)(_dst + _i), e);\
} while (0)
#define HAVE_GET_KERNEL_NOFAULT
#define __get_kernel_nofault(dst, src, type, err_label) \
do { \
int __kr_err; \
\
__get_user_size_allowed(*((type *)(dst)), (__force type __user *)(src),\
sizeof(type), __kr_err); \
if (unlikely(__kr_err)) \
goto err_label; \
} while (0)
#define __put_kernel_nofault(dst, src, type, err_label) \
__put_user_size_goto(*((type *)(src)), \
(__force type __user *)(dst), sizeof(type), err_label)
#endif /* _ARCH_POWERPC_UACCESS_H */
......@@ -312,9 +312,6 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags)
{
user_exit();
/* Check valid addr_limit, TIF check is done there */
addr_limit_user_check();
if (thread_info_flags & _TIF_UPROBE)
uprobe_notify_resume(regs);
......
......@@ -108,11 +108,11 @@ static nokprobe_inline long address_ok(struct pt_regs *regs,
{
if (!user_mode(regs))
return 1;
if (__access_ok(ea, nb, USER_DS))
if (__access_ok(ea, nb))
return 1;
if (__access_ok(ea, 1, USER_DS))
if (__access_ok(ea, 1))
/* Access overlaps the end of the user region */
regs->dar = USER_DS.seg;
regs->dar = TASK_SIZE_MAX - 1;
else
regs->dar = ea;
return 0;
......
......@@ -88,6 +88,7 @@ config RISCV
select SPARSE_IRQ
select SYSCTL_EXCEPTION_TRACE
select THREAD_INFO_IN_TASK
select SET_FS
config ARCH_MMAP_RND_BITS_MIN
default 18 if 64BIT
......
......@@ -191,6 +191,7 @@ config S390
select PCI_DOMAINS if PCI
select PCI_MSI if PCI
select PCI_MSI_ARCH_FALLBACKS if PCI_MSI
select SET_FS
select SPARSE_IRQ
select SYSCTL_EXCEPTION_TRACE
select THREAD_INFO_IN_TASK
......
......@@ -71,6 +71,7 @@ config SUPERH
select PERF_EVENTS
select PERF_USE_VMALLOC
select RTC_LIB
select SET_FS
select SPARSE_IRQ
help
The SuperH is a RISC processor targeted for use in embedded systems
......
......@@ -51,6 +51,7 @@ config SPARC
select LOCKDEP_SMALL if LOCKDEP
select NEED_DMA_MAP_STATE
select NEED_SG_DMA_LENGTH
select SET_FS
config SPARC32
def_bool !64BIT
......
......@@ -19,6 +19,7 @@ config UML
select GENERIC_CPU_DEVICES
select GENERIC_CLOCKEVENTS
select HAVE_GCC_PLUGINS
select SET_FS
select TTY # Needed for line.c
config MMU
......
......@@ -239,7 +239,6 @@ static int load_aout_binary(struct linux_binprm *bprm)
(regs)->ss = __USER32_DS;
regs->r8 = regs->r9 = regs->r10 = regs->r11 =
regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0;
set_fs(USER_DS);
return 0;
}
......
......@@ -41,6 +41,17 @@
#define __VIRTUAL_MASK_SHIFT 32
#endif /* CONFIG_X86_PAE */
/*
* User space process size: 3GB (default).
*/
#define IA32_PAGE_OFFSET __PAGE_OFFSET
#define TASK_SIZE __PAGE_OFFSET
#define TASK_SIZE_LOW TASK_SIZE
#define TASK_SIZE_MAX TASK_SIZE
#define DEFAULT_MAP_WINDOW TASK_SIZE
#define STACK_TOP TASK_SIZE
#define STACK_TOP_MAX STACK_TOP
/*
* Kernel image size is limited to 512 MB (see in arch/x86/kernel/head_32.S)
*/
......
......@@ -59,6 +59,44 @@
#define __VIRTUAL_MASK_SHIFT 47
#endif
/*
* User space process size. This is the first address outside the user range.
* There are a few constraints that determine this:
*
* On Intel CPUs, if a SYSCALL instruction is at the highest canonical
* address, then that syscall will enter the kernel with a
* non-canonical return address, and SYSRET will explode dangerously.
* We avoid this particular problem by preventing anything executable
* from being mapped at the maximum canonical address.
*
* On AMD CPUs in the Ryzen family, there's a nasty bug in which the
* CPUs malfunction if they execute code from the highest canonical page.
* They'll speculate right off the end of the canonical space, and
* bad things happen. This is worked around in the same way as the
* Intel problem.
*
* With page table isolation enabled, we map the LDT in ... [stay tuned]
*/
#define TASK_SIZE_MAX ((_AC(1,UL) << __VIRTUAL_MASK_SHIFT) - PAGE_SIZE)
#define DEFAULT_MAP_WINDOW ((1UL << 47) - PAGE_SIZE)
/* This decides where the kernel will search for a free chunk of vm
* space during mmap's.
*/
#define IA32_PAGE_OFFSET ((current->personality & ADDR_LIMIT_3GB) ? \
0xc0000000 : 0xFFFFe000)
#define TASK_SIZE_LOW (test_thread_flag(TIF_ADDR32) ? \
IA32_PAGE_OFFSET : DEFAULT_MAP_WINDOW)
#define TASK_SIZE (test_thread_flag(TIF_ADDR32) ? \
IA32_PAGE_OFFSET : TASK_SIZE_MAX)
#define TASK_SIZE_OF(child) ((test_tsk_thread_flag(child, TIF_ADDR32)) ? \
IA32_PAGE_OFFSET : TASK_SIZE_MAX)
#define STACK_TOP TASK_SIZE_LOW
#define STACK_TOP_MAX TASK_SIZE_MAX
/*
* Maximum kernel image size is limited to 1 GiB, due to the fixmap living
* in the next 1 GiB (see level2_kernel_pgt in arch/x86/kernel/head_64.S).
......
......@@ -482,10 +482,6 @@ extern unsigned int fpu_user_xstate_size;
struct perf_event;
typedef struct {
unsigned long seg;
} mm_segment_t;
struct thread_struct {
/* Cached TLS descriptors: */
struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
......@@ -538,8 +534,6 @@ struct thread_struct {
*/
unsigned long iopl_emul;
mm_segment_t addr_limit;
unsigned int sig_on_uaccess_err:1;
/* Floating point and extended processor state */
......@@ -783,67 +777,15 @@ static inline void spin_lock_prefetch(const void *x)
})
#ifdef CONFIG_X86_32
/*
* User space process size: 3GB (default).
*/
#define IA32_PAGE_OFFSET PAGE_OFFSET
#define TASK_SIZE PAGE_OFFSET
#define TASK_SIZE_LOW TASK_SIZE
#define TASK_SIZE_MAX TASK_SIZE
#define DEFAULT_MAP_WINDOW TASK_SIZE
#define STACK_TOP TASK_SIZE
#define STACK_TOP_MAX STACK_TOP
#define INIT_THREAD { \
.sp0 = TOP_OF_INIT_STACK, \
.sysenter_cs = __KERNEL_CS, \
.addr_limit = KERNEL_DS, \
}
#define KSTK_ESP(task) (task_pt_regs(task)->sp)
#else
/*
* User space process size. This is the first address outside the user range.
* There are a few constraints that determine this:
*
* On Intel CPUs, if a SYSCALL instruction is at the highest canonical
* address, then that syscall will enter the kernel with a
* non-canonical return address, and SYSRET will explode dangerously.
* We avoid this particular problem by preventing anything executable
* from being mapped at the maximum canonical address.
*
* On AMD CPUs in the Ryzen family, there's a nasty bug in which the
* CPUs malfunction if they execute code from the highest canonical page.
* They'll speculate right off the end of the canonical space, and
* bad things happen. This is worked around in the same way as the
* Intel problem.
*
* With page table isolation enabled, we map the LDT in ... [stay tuned]
*/
#define TASK_SIZE_MAX ((1UL << __VIRTUAL_MASK_SHIFT) - PAGE_SIZE)
#define DEFAULT_MAP_WINDOW ((1UL << 47) - PAGE_SIZE)
/* This decides where the kernel will search for a free chunk of vm
* space during mmap's.
*/
#define IA32_PAGE_OFFSET ((current->personality & ADDR_LIMIT_3GB) ? \
0xc0000000 : 0xFFFFe000)
#define TASK_SIZE_LOW (test_thread_flag(TIF_ADDR32) ? \
IA32_PAGE_OFFSET : DEFAULT_MAP_WINDOW)
#define TASK_SIZE (test_thread_flag(TIF_ADDR32) ? \
IA32_PAGE_OFFSET : TASK_SIZE_MAX)
#define TASK_SIZE_OF(child) ((test_tsk_thread_flag(child, TIF_ADDR32)) ? \
IA32_PAGE_OFFSET : TASK_SIZE_MAX)
#define STACK_TOP TASK_SIZE_LOW
#define STACK_TOP_MAX TASK_SIZE_MAX
#define INIT_THREAD { \
.addr_limit = KERNEL_DS, \
}
#define INIT_THREAD { }
extern unsigned long KSTK_ESP(struct task_struct *task);
......
......@@ -102,7 +102,6 @@ struct thread_info {
#define TIF_SYSCALL_TRACEPOINT 28 /* syscall tracepoint instrumentation */
#define TIF_ADDR32 29 /* 32-bit address space on 64 bits */
#define TIF_X32 30 /* 32-bit native x86-64 binary */
#define TIF_FSCHECK 31 /* Check FS is USER_DS on return */
#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
......@@ -131,7 +130,6 @@ struct thread_info {
#define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT)
#define _TIF_ADDR32 (1 << TIF_ADDR32)
#define _TIF_X32 (1 << TIF_X32)
#define _TIF_FSCHECK (1 << TIF_FSCHECK)
/* flags to check in __switch_to() */
#define _TIF_WORK_CTXSW_BASE \
......
......@@ -12,30 +12,6 @@
#include <asm/smap.h>
#include <asm/extable.h>
/*
* The fs value determines whether argument validity checking should be
* performed or not. If get_fs() == USER_DS, checking is performed, with
* get_fs() == KERNEL_DS, checking is bypassed.
*
* For historical reasons, these macros are grossly misnamed.
*/
#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) })
#define KERNEL_DS MAKE_MM_SEG(-1UL)
#define USER_DS MAKE_MM_SEG(TASK_SIZE_MAX)
#define get_fs() (current->thread.addr_limit)
static inline void set_fs(mm_segment_t fs)
{
current->thread.addr_limit = fs;
/* On user-mode return, check fs is correct */
set_thread_flag(TIF_FSCHECK);
}
#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg)
#define user_addr_max() (current->thread.addr_limit.seg)
/*
* Test whether a block of memory is a valid user space address.
* Returns 0 if the range is valid, nonzero otherwise.
......@@ -93,7 +69,7 @@ static inline bool pagefault_disabled(void);
#define access_ok(addr, size) \
({ \
WARN_ON_IN_IRQ(); \
likely(!__range_not_ok(addr, size, user_addr_max())); \
likely(!__range_not_ok(addr, size, TASK_SIZE_MAX)); \
})
extern int __get_user_1(void);
......
......@@ -37,9 +37,6 @@ static void __used common(void)
OFFSET(TASK_stack_canary, task_struct, stack_canary);
#endif
BLANK();
OFFSET(TASK_addr_limit, task_struct, thread.addr_limit);
BLANK();
OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
......
......@@ -37,10 +37,19 @@
#define ASM_BARRIER_NOSPEC ALTERNATIVE "", "lfence", X86_FEATURE_LFENCE_RDTSC
#ifdef CONFIG_X86_5LEVEL
#define LOAD_TASK_SIZE_MINUS_N(n) \
ALTERNATIVE __stringify(mov $((1 << 47) - 4096 - (n)),%rdx), \
__stringify(mov $((1 << 56) - 4096 - (n)),%rdx), X86_FEATURE_LA57
#else
#define LOAD_TASK_SIZE_MINUS_N(n) \
mov $(TASK_SIZE_MAX - (n)),%_ASM_DX
#endif
.text
SYM_FUNC_START(__get_user_1)
mov PER_CPU_VAR(current_task), %_ASM_DX
cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
LOAD_TASK_SIZE_MINUS_N(0)
cmp %_ASM_DX,%_ASM_AX
jae bad_get_user
sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
and %_ASM_DX, %_ASM_AX
......@@ -53,15 +62,13 @@ SYM_FUNC_END(__get_user_1)
EXPORT_SYMBOL(__get_user_1)
SYM_FUNC_START(__get_user_2)
add $1,%_ASM_AX
jc bad_get_user
mov PER_CPU_VAR(current_task), %_ASM_DX
cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
LOAD_TASK_SIZE_MINUS_N(1)
cmp %_ASM_DX,%_ASM_AX
jae bad_get_user
sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
and %_ASM_DX, %_ASM_AX
ASM_STAC
2: movzwl -1(%_ASM_AX),%edx
2: movzwl (%_ASM_AX),%edx
xor %eax,%eax
ASM_CLAC
ret
......@@ -69,15 +76,13 @@ SYM_FUNC_END(__get_user_2)
EXPORT_SYMBOL(__get_user_2)
SYM_FUNC_START(__get_user_4)
add $3,%_ASM_AX
jc bad_get_user
mov PER_CPU_VAR(current_task), %_ASM_DX
cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
LOAD_TASK_SIZE_MINUS_N(3)
cmp %_ASM_DX,%_ASM_AX
jae bad_get_user
sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
and %_ASM_DX, %_ASM_AX
ASM_STAC
3: movl -3(%_ASM_AX),%edx
3: movl (%_ASM_AX),%edx
xor %eax,%eax
ASM_CLAC
ret
......@@ -86,29 +91,25 @@ EXPORT_SYMBOL(__get_user_4)
SYM_FUNC_START(__get_user_8)
#ifdef CONFIG_X86_64
add $7,%_ASM_AX
jc bad_get_user
mov PER_CPU_VAR(current_task), %_ASM_DX
cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
LOAD_TASK_SIZE_MINUS_N(7)
cmp %_ASM_DX,%_ASM_AX
jae bad_get_user
sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
and %_ASM_DX, %_ASM_AX
ASM_STAC
4: movq -7(%_ASM_AX),%rdx
4: movq (%_ASM_AX),%rdx
xor %eax,%eax
ASM_CLAC
ret
#else
add $7,%_ASM_AX
jc bad_get_user_8
mov PER_CPU_VAR(current_task), %_ASM_DX
cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
LOAD_TASK_SIZE_MINUS_N(7)
cmp %_ASM_DX,%_ASM_AX
jae bad_get_user_8
sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
and %_ASM_DX, %_ASM_AX
ASM_STAC
4: movl -7(%_ASM_AX),%edx
5: movl -3(%_ASM_AX),%ecx
4: movl (%_ASM_AX),%edx
5: movl 4(%_ASM_AX),%ecx
xor %eax,%eax
ASM_CLAC
ret
......
......@@ -33,12 +33,19 @@
* as they get called from within inline assembly.
*/
#define ENTER mov PER_CPU_VAR(current_task), %_ASM_BX
#ifdef CONFIG_X86_5LEVEL
#define LOAD_TASK_SIZE_MINUS_N(n) \
ALTERNATIVE __stringify(mov $((1 << 47) - 4096 - (n)),%rbx), \
__stringify(mov $((1 << 56) - 4096 - (n)),%rbx), X86_FEATURE_LA57
#else
#define LOAD_TASK_SIZE_MINUS_N(n) \
mov $(TASK_SIZE_MAX - (n)),%_ASM_BX
#endif
.text
SYM_FUNC_START(__put_user_1)
ENTER
cmp TASK_addr_limit(%_ASM_BX),%_ASM_CX
LOAD_TASK_SIZE_MINUS_N(0)
cmp %_ASM_BX,%_ASM_CX
jae .Lbad_put_user
SYM_INNER_LABEL(__put_user_nocheck_1, SYM_L_GLOBAL)
ASM_STAC
......@@ -51,9 +58,7 @@ EXPORT_SYMBOL(__put_user_1)
EXPORT_SYMBOL(__put_user_nocheck_1)
SYM_FUNC_START(__put_user_2)
ENTER
mov TASK_addr_limit(%_ASM_BX),%_ASM_BX
sub $1,%_ASM_BX
LOAD_TASK_SIZE_MINUS_N(1)
cmp %_ASM_BX,%_ASM_CX
jae .Lbad_put_user
SYM_INNER_LABEL(__put_user_nocheck_2, SYM_L_GLOBAL)
......@@ -67,9 +72,7 @@ EXPORT_SYMBOL(__put_user_2)
EXPORT_SYMBOL(__put_user_nocheck_2)
SYM_FUNC_START(__put_user_4)
ENTER
mov TASK_addr_limit(%_ASM_BX),%_ASM_BX
sub $3,%_ASM_BX
LOAD_TASK_SIZE_MINUS_N(3)
cmp %_ASM_BX,%_ASM_CX
jae .Lbad_put_user
SYM_INNER_LABEL(__put_user_nocheck_4, SYM_L_GLOBAL)
......@@ -83,9 +86,7 @@ EXPORT_SYMBOL(__put_user_4)
EXPORT_SYMBOL(__put_user_nocheck_4)
SYM_FUNC_START(__put_user_8)
ENTER
mov TASK_addr_limit(%_ASM_BX),%_ASM_BX
sub $7,%_ASM_BX
LOAD_TASK_SIZE_MINUS_N(7)
cmp %_ASM_BX,%_ASM_CX
jae .Lbad_put_user
SYM_INNER_LABEL(__put_user_nocheck_8, SYM_L_GLOBAL)
......
......@@ -41,6 +41,7 @@ config XTENSA
select IRQ_DOMAIN
select MODULES_USE_ELF_RELA
select PERF_USE_VMALLOC
select SET_FS
select VIRT_TO_BUS
help
Xtensa processors are 32-bit RISC machines designed by Tensilica
......
......@@ -312,16 +312,6 @@ void lkdtm_CORRUPT_LIST_DEL(void)
pr_err("list_del() corruption not detected!\n");
}
/* Test if unbalanced set_fs(KERNEL_DS)/set_fs(USER_DS) check exists. */
void lkdtm_CORRUPT_USER_DS(void)
{
pr_info("setting bad task size limit\n");
set_fs(KERNEL_DS);
/* Make sure we do not keep running with a KERNEL_DS! */
force_sig(SIGKILL);
}
/* Test that VMAP_STACK is actually allocating with a leading guard page */
void lkdtm_STACK_GUARD_PAGE_LEADING(void)
{
......
......@@ -112,7 +112,6 @@ static const struct crashtype crashtypes[] = {
CRASHTYPE(CORRUPT_STACK_STRONG),
CRASHTYPE(CORRUPT_LIST_ADD),
CRASHTYPE(CORRUPT_LIST_DEL),
CRASHTYPE(CORRUPT_USER_DS),
CRASHTYPE(STACK_GUARD_PAGE_LEADING),
CRASHTYPE(STACK_GUARD_PAGE_TRAILING),
CRASHTYPE(UNSET_SMEP),
......@@ -172,7 +171,6 @@ static const struct crashtype crashtypes[] = {
CRASHTYPE(USERCOPY_STACK_FRAME_FROM),
CRASHTYPE(USERCOPY_STACK_BEYOND),
CRASHTYPE(USERCOPY_KERNEL),
CRASHTYPE(USERCOPY_KERNEL_DS),
CRASHTYPE(STACKLEAK_ERASING),
CRASHTYPE(CFI_FORWARD_PROTO),
#ifdef CONFIG_X86_32
......
......@@ -27,7 +27,6 @@ void lkdtm_OVERFLOW_UNSIGNED(void);
void lkdtm_ARRAY_BOUNDS(void);
void lkdtm_CORRUPT_LIST_ADD(void);
void lkdtm_CORRUPT_LIST_DEL(void);
void lkdtm_CORRUPT_USER_DS(void);
void lkdtm_STACK_GUARD_PAGE_LEADING(void);
void lkdtm_STACK_GUARD_PAGE_TRAILING(void);
void lkdtm_UNSET_SMEP(void);
......@@ -96,7 +95,6 @@ void lkdtm_USERCOPY_STACK_FRAME_TO(void);
void lkdtm_USERCOPY_STACK_FRAME_FROM(void);
void lkdtm_USERCOPY_STACK_BEYOND(void);
void lkdtm_USERCOPY_KERNEL(void);
void lkdtm_USERCOPY_KERNEL_DS(void);
/* lkdtm_stackleak.c */
void lkdtm_STACKLEAK_ERASING(void);
......
......@@ -325,21 +325,6 @@ void lkdtm_USERCOPY_KERNEL(void)
vm_munmap(user_addr, PAGE_SIZE);
}
void lkdtm_USERCOPY_KERNEL_DS(void)
{
char __user *user_ptr =
(char __user *)(0xFUL << (sizeof(unsigned long) * 8 - 4));
mm_segment_t old_fs = get_fs();
char buf[10] = {0};
pr_info("attempting copy_to_user() to noncanonical address: %px\n",
user_ptr);
set_fs(KERNEL_DS);
if (copy_to_user(user_ptr, buf, sizeof(buf)) == 0)
pr_err("copy_to_user() to noncanonical address succeeded!?\n");
set_fs(old_fs);
}
void __init lkdtm_usercopy_init(void)
{
/* Prepare cache that lacks SLAB_USERCOPY flag. */
......
......@@ -297,6 +297,21 @@ static loff_t proc_reg_llseek(struct file *file, loff_t offset, int whence)
return rv;
}
static ssize_t proc_reg_read_iter(struct kiocb *iocb, struct iov_iter *iter)
{
struct proc_dir_entry *pde = PDE(file_inode(iocb->ki_filp));
ssize_t ret;
if (pde_is_permanent(pde))
return pde->proc_ops->proc_read_iter(iocb, iter);
if (!use_pde(pde))
return -EIO;
ret = pde->proc_ops->proc_read_iter(iocb, iter);
unuse_pde(pde);
return ret;
}
static ssize_t pde_read(struct proc_dir_entry *pde, struct file *file, char __user *buf, size_t count, loff_t *ppos)
{
typeof_member(struct proc_ops, proc_read) read;
......@@ -572,9 +587,18 @@ static const struct file_operations proc_reg_file_ops = {
.write = proc_reg_write,
.poll = proc_reg_poll,
.unlocked_ioctl = proc_reg_unlocked_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = proc_reg_compat_ioctl,
#endif
.mmap = proc_reg_mmap,
.get_unmapped_area = proc_reg_get_unmapped_area,
.open = proc_reg_open,
.release = proc_reg_release,
};
static const struct file_operations proc_iter_file_ops = {
.llseek = proc_reg_llseek,
.read_iter = proc_reg_read_iter,
.write = proc_reg_write,
.poll = proc_reg_poll,
.unlocked_ioctl = proc_reg_unlocked_ioctl,
.mmap = proc_reg_mmap,
.get_unmapped_area = proc_reg_get_unmapped_area,
.open = proc_reg_open,
......@@ -582,12 +606,26 @@ static const struct file_operations proc_reg_file_ops = {
};
#ifdef CONFIG_COMPAT
static const struct file_operations proc_reg_file_ops_no_compat = {
static const struct file_operations proc_reg_file_ops_compat = {
.llseek = proc_reg_llseek,
.read = proc_reg_read,
.write = proc_reg_write,
.poll = proc_reg_poll,
.unlocked_ioctl = proc_reg_unlocked_ioctl,
.compat_ioctl = proc_reg_compat_ioctl,
.mmap = proc_reg_mmap,
.get_unmapped_area = proc_reg_get_unmapped_area,
.open = proc_reg_open,
.release = proc_reg_release,
};
static const struct file_operations proc_iter_file_ops_compat = {
.llseek = proc_reg_llseek,
.read_iter = proc_reg_read_iter,
.write = proc_reg_write,
.poll = proc_reg_poll,
.unlocked_ioctl = proc_reg_unlocked_ioctl,
.compat_ioctl = proc_reg_compat_ioctl,
.mmap = proc_reg_mmap,
.get_unmapped_area = proc_reg_get_unmapped_area,
.open = proc_reg_open,
......@@ -619,42 +657,51 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
{
struct inode *inode = new_inode(sb);
if (inode) {
inode->i_ino = de->low_ino;
inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
PROC_I(inode)->pde = de;
if (!inode) {
pde_put(de);
return NULL;
}
if (is_empty_pde(de)) {
make_empty_dir_inode(inode);
return inode;
}
if (de->mode) {
inode->i_mode = de->mode;
inode->i_uid = de->uid;
inode->i_gid = de->gid;
}
if (de->size)
inode->i_size = de->size;
if (de->nlink)
set_nlink(inode, de->nlink);
inode->i_ino = de->low_ino;
inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
PROC_I(inode)->pde = de;
if (is_empty_pde(de)) {
make_empty_dir_inode(inode);
return inode;
}
if (S_ISREG(inode->i_mode)) {
inode->i_op = de->proc_iops;
if (de->mode) {
inode->i_mode = de->mode;
inode->i_uid = de->uid;
inode->i_gid = de->gid;
}
if (de->size)
inode->i_size = de->size;
if (de->nlink)
set_nlink(inode, de->nlink);
if (S_ISREG(inode->i_mode)) {
inode->i_op = de->proc_iops;
if (de->proc_ops->proc_read_iter)
inode->i_fop = &proc_iter_file_ops;
else
inode->i_fop = &proc_reg_file_ops;
#ifdef CONFIG_COMPAT
if (!de->proc_ops->proc_compat_ioctl) {
inode->i_fop = &proc_reg_file_ops_no_compat;
}
if (de->proc_ops->proc_compat_ioctl) {
if (de->proc_ops->proc_read_iter)
inode->i_fop = &proc_iter_file_ops_compat;
else
inode->i_fop = &proc_reg_file_ops_compat;
}
#endif
} else if (S_ISDIR(inode->i_mode)) {
inode->i_op = de->proc_iops;
inode->i_fop = de->proc_dir_ops;
} else if (S_ISLNK(inode->i_mode)) {
inode->i_op = de->proc_iops;
inode->i_fop = NULL;
} else
BUG();
} else
pde_put(de);
} else if (S_ISDIR(inode->i_mode)) {
inode->i_op = de->proc_iops;
inode->i_fop = de->proc_dir_ops;
} else if (S_ISLNK(inode->i_mode)) {
inode->i_op = de->proc_iops;
inode->i_fop = NULL;
} else {
BUG();
}
return inode;
}
......@@ -12,6 +12,7 @@
#include <linux/cred.h>
#include <linux/namei.h>
#include <linux/mm.h>
#include <linux/uio.h>
#include <linux/module.h>
#include <linux/bpf-cgroup.h>
#include <linux/mount.h>
......@@ -540,13 +541,14 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
return err;
}
static ssize_t proc_sys_call_handler(struct file *filp, void __user *ubuf,
size_t count, loff_t *ppos, int write)
static ssize_t proc_sys_call_handler(struct kiocb *iocb, struct iov_iter *iter,
int write)
{
struct inode *inode = file_inode(filp);
struct inode *inode = file_inode(iocb->ki_filp);
struct ctl_table_header *head = grab_header(inode);
struct ctl_table *table = PROC_I(inode)->sysctl_entry;
void *kbuf;
size_t count = iov_iter_count(iter);
char *kbuf;
ssize_t error;
if (IS_ERR(head))
......@@ -569,32 +571,30 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *ubuf,
error = -ENOMEM;
if (count >= KMALLOC_MAX_SIZE)
goto out;
kbuf = kzalloc(count + 1, GFP_KERNEL);
if (!kbuf)
goto out;
if (write) {
kbuf = memdup_user_nul(ubuf, count);
if (IS_ERR(kbuf)) {
error = PTR_ERR(kbuf);
goto out;
}
} else {
kbuf = kzalloc(count, GFP_KERNEL);
if (!kbuf)
goto out;
error = -EFAULT;
if (!copy_from_iter_full(kbuf, count, iter))
goto out_free_buf;
kbuf[count] = '\0';
}
error = BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, &kbuf, &count,
ppos);
&iocb->ki_pos);
if (error)
goto out_free_buf;
/* careful: calling conventions are nasty here */
error = table->proc_handler(table, write, kbuf, &count, ppos);
error = table->proc_handler(table, write, kbuf, &count, &iocb->ki_pos);
if (error)
goto out_free_buf;
if (!write) {
error = -EFAULT;
if (copy_to_user(ubuf, kbuf, count))
if (copy_to_iter(kbuf, count, iter) < count)
goto out_free_buf;
}
......@@ -607,16 +607,14 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *ubuf,
return error;
}
static ssize_t proc_sys_read(struct file *filp, char __user *buf,
size_t count, loff_t *ppos)
static ssize_t proc_sys_read(struct kiocb *iocb, struct iov_iter *iter)
{
return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 0);
return proc_sys_call_handler(iocb, iter, 0);
}
static ssize_t proc_sys_write(struct file *filp, const char __user *buf,
size_t count, loff_t *ppos)
static ssize_t proc_sys_write(struct kiocb *iocb, struct iov_iter *iter)
{
return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 1);
return proc_sys_call_handler(iocb, iter, 1);
}
static int proc_sys_open(struct inode *inode, struct file *filp)
......@@ -853,8 +851,10 @@ static int proc_sys_getattr(const struct path *path, struct kstat *stat,
static const struct file_operations proc_sys_file_operations = {
.open = proc_sys_open,
.poll = proc_sys_poll,
.read = proc_sys_read,
.write = proc_sys_write,
.read_iter = proc_sys_read,
.write_iter = proc_sys_write,
.splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
.llseek = default_llseek,
};
......
......@@ -419,27 +419,42 @@ static ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, lo
return ret;
}
static int warn_unsupported(struct file *file, const char *op)
{
pr_warn_ratelimited(
"kernel %s not supported for file %pD4 (pid: %d comm: %.20s)\n",
op, file, current->pid, current->comm);
return -EINVAL;
}
ssize_t __kernel_read(struct file *file, void *buf, size_t count, loff_t *pos)
{
mm_segment_t old_fs = get_fs();
struct kvec iov = {
.iov_base = buf,
.iov_len = min_t(size_t, count, MAX_RW_COUNT),
};
struct kiocb kiocb;
struct iov_iter iter;
ssize_t ret;
if (WARN_ON_ONCE(!(file->f_mode & FMODE_READ)))
return -EINVAL;
if (!(file->f_mode & FMODE_CAN_READ))
return -EINVAL;
/*
* Also fail if ->read_iter and ->read are both wired up as that
* implies very convoluted semantics.
*/
if (unlikely(!file->f_op->read_iter || file->f_op->read))
return warn_unsupported(file, "read");
if (count > MAX_RW_COUNT)
count = MAX_RW_COUNT;
set_fs(KERNEL_DS);
if (file->f_op->read)
ret = file->f_op->read(file, (void __user *)buf, count, pos);
else if (file->f_op->read_iter)
ret = new_sync_read(file, (void __user *)buf, count, pos);
else
ret = -EINVAL;
set_fs(old_fs);
init_sync_kiocb(&kiocb, file);
kiocb.ki_pos = pos ? *pos : 0;
iov_iter_kvec(&iter, READ, &iov, 1, iov.iov_len);
ret = file->f_op->read_iter(&kiocb, &iter);
if (ret > 0) {
if (pos)
*pos = kiocb.ki_pos;
fsnotify_access(file);
add_rchar(current, ret);
}
......@@ -510,28 +525,32 @@ static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t
/* caller is responsible for file_start_write/file_end_write */
ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t *pos)
{
mm_segment_t old_fs;
const char __user *p;
struct kvec iov = {
.iov_base = (void *)buf,
.iov_len = min_t(size_t, count, MAX_RW_COUNT),
};
struct kiocb kiocb;
struct iov_iter iter;
ssize_t ret;
if (WARN_ON_ONCE(!(file->f_mode & FMODE_WRITE)))
return -EBADF;
if (!(file->f_mode & FMODE_CAN_WRITE))
return -EINVAL;
/*
* Also fail if ->write_iter and ->write are both wired up as that
* implies very convoluted semantics.
*/
if (unlikely(!file->f_op->write_iter || file->f_op->write))
return warn_unsupported(file, "write");
old_fs = get_fs();
set_fs(KERNEL_DS);
p = (__force const char __user *)buf;
if (count > MAX_RW_COUNT)
count = MAX_RW_COUNT;
if (file->f_op->write)
ret = file->f_op->write(file, p, count, pos);
else if (file->f_op->write_iter)
ret = new_sync_write(file, p, count, pos);
else
ret = -EINVAL;
set_fs(old_fs);
init_sync_kiocb(&kiocb, file);
kiocb.ki_pos = pos ? *pos : 0;
iov_iter_kvec(&iter, WRITE, &iov, 1, iov.iov_len);
ret = file->f_op->write_iter(&kiocb, &iter);
if (ret > 0) {
if (pos)
*pos = kiocb.ki_pos;
fsnotify_modify(file);
add_wchar(current, ret);
}
......@@ -889,7 +908,7 @@ ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos,
}
EXPORT_SYMBOL(vfs_iter_write);
ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
static ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
unsigned long vlen, loff_t *pos, rwf_t flags)
{
struct iovec iovstack[UIO_FASTIOV];
......
......@@ -341,89 +341,6 @@ const struct pipe_buf_operations nosteal_pipe_buf_ops = {
};
EXPORT_SYMBOL(nosteal_pipe_buf_ops);
static ssize_t kernel_readv(struct file *file, const struct kvec *vec,
unsigned long vlen, loff_t offset)
{
mm_segment_t old_fs;
loff_t pos = offset;
ssize_t res;
old_fs = get_fs();
set_fs(KERNEL_DS);
/* The cast to a user pointer is valid due to the set_fs() */
res = vfs_readv(file, (const struct iovec __user *)vec, vlen, &pos, 0);
set_fs(old_fs);
return res;
}
static ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
unsigned int flags)
{
struct kvec *vec, __vec[PIPE_DEF_BUFFERS];
struct iov_iter to;
struct page **pages;
unsigned int nr_pages;
unsigned int mask;
size_t offset, base, copied = 0;
ssize_t res;
int i;
if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
return -EAGAIN;
/*
* Try to keep page boundaries matching to source pagecache ones -
* it probably won't be much help, but...
*/
offset = *ppos & ~PAGE_MASK;
iov_iter_pipe(&to, READ, pipe, len + offset);
res = iov_iter_get_pages_alloc(&to, &pages, len + offset, &base);
if (res <= 0)
return -ENOMEM;
nr_pages = DIV_ROUND_UP(res + base, PAGE_SIZE);
vec = __vec;
if (nr_pages > PIPE_DEF_BUFFERS) {
vec = kmalloc_array(nr_pages, sizeof(struct kvec), GFP_KERNEL);
if (unlikely(!vec)) {
res = -ENOMEM;
goto out;
}
}
mask = pipe->ring_size - 1;
pipe->bufs[to.head & mask].offset = offset;
pipe->bufs[to.head & mask].len -= offset;
for (i = 0; i < nr_pages; i++) {
size_t this_len = min_t(size_t, len, PAGE_SIZE - offset);
vec[i].iov_base = page_address(pages[i]) + offset;
vec[i].iov_len = this_len;
len -= this_len;
offset = 0;
}
res = kernel_readv(in, vec, nr_pages, *ppos);
if (res > 0) {
copied = res;
*ppos += res;
}
if (vec != __vec)
kfree(vec);
out:
for (i = 0; i < nr_pages; i++)
put_page(pages[i]);
kvfree(pages);
iov_iter_advance(&to, copied); /* truncates and discards */
return res;
}
/*
* Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos'
* using sendpage(). Return the number of bytes sent.
......@@ -807,33 +724,6 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
EXPORT_SYMBOL(iter_file_splice_write);
static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
struct splice_desc *sd)
{
int ret;
void *data;
loff_t tmp = sd->pos;
data = kmap(buf->page);
ret = __kernel_write(sd->u.file, data + buf->offset, sd->len, &tmp);
kunmap(buf->page);
return ret;
}
static ssize_t default_file_splice_write(struct pipe_inode_info *pipe,
struct file *out, loff_t *ppos,
size_t len, unsigned int flags)
{
ssize_t ret;
ret = splice_from_pipe(pipe, out, ppos, len, flags, write_pipe_buf);
if (ret > 0)
*ppos += ret;
return ret;
}
/**
* generic_splice_sendpage - splice data from a pipe to a socket
* @pipe: pipe to splice from
......@@ -855,15 +745,23 @@ ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out,
EXPORT_SYMBOL(generic_splice_sendpage);
static int warn_unsupported(struct file *file, const char *op)
{
pr_debug_ratelimited(
"splice %s not supported for file %pD4 (pid: %d comm: %.20s)\n",
op, file, current->pid, current->comm);
return -EINVAL;
}
/*
* Attempt to initiate a splice from pipe to file.
*/
static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
loff_t *ppos, size_t len, unsigned int flags)
{
if (out->f_op->splice_write)
return out->f_op->splice_write(pipe, out, ppos, len, flags);
return default_file_splice_write(pipe, out, ppos, len, flags);
if (unlikely(!out->f_op->splice_write))
return warn_unsupported(out, "write");
return out->f_op->splice_write(pipe, out, ppos, len, flags);
}
/*
......@@ -885,9 +783,9 @@ static long do_splice_to(struct file *in, loff_t *ppos,
if (unlikely(len > MAX_RW_COUNT))
len = MAX_RW_COUNT;
if (in->f_op->splice_read)
return in->f_op->splice_read(in, ppos, pipe, len, flags);
return default_file_splice_read(in, ppos, pipe, len, flags);
if (unlikely(!in->f_op->splice_read))
return warn_unsupported(in, "read");
return in->f_op->splice_read(in, ppos, pipe, len, flags);
}
/**
......
......@@ -136,7 +136,7 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
struct ctl_table *table, int write,
void **buf, size_t *pcount, loff_t *ppos,
char **buf, size_t *pcount, loff_t *ppos,
enum bpf_attach_type type);
int __cgroup_bpf_run_filter_setsockopt(struct sock *sock, int *level,
......
......@@ -1894,8 +1894,6 @@ static inline int call_mmap(struct file *file, struct vm_area_struct *vma)
extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *);
extern ssize_t vfs_readv(struct file *, const struct iovec __user *,
unsigned long, loff_t *, rwf_t);
extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *,
loff_t, size_t, unsigned int);
extern ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in,
......
......@@ -30,6 +30,7 @@ struct proc_ops {
unsigned int proc_flags;
int (*proc_open)(struct inode *, struct file *);
ssize_t (*proc_read)(struct file *, char __user *, size_t, loff_t *);
ssize_t (*proc_read_iter)(struct kiocb *, struct iov_iter *);
ssize_t (*proc_write)(struct file *, const char __user *, size_t, loff_t *);
loff_t (*proc_lseek)(struct file *, loff_t, int);
int (*proc_release)(struct inode *, struct file *);
......
......@@ -10,6 +10,7 @@
#include <asm/uaccess.h>
#ifdef CONFIG_SET_FS
/*
* Force the uaccess routines to be wired up for actual userspace access,
* overriding any possible set_fs(KERNEL_DS) still lingering around. Undone
......@@ -27,6 +28,23 @@ static inline void force_uaccess_end(mm_segment_t oldfs)
{
set_fs(oldfs);
}
#else /* CONFIG_SET_FS */
typedef struct {
/* empty dummy */
} mm_segment_t;
#define uaccess_kernel() (false)
#define user_addr_max() (TASK_SIZE_MAX)
static inline mm_segment_t force_uaccess_begin(void)
{
return (mm_segment_t) { };
}
static inline void force_uaccess_end(mm_segment_t oldfs)
{
}
#endif /* CONFIG_SET_FS */
/*
* Architectures should provide two primitives (raw_copy_{to,from}_user())
......
......@@ -1226,7 +1226,7 @@ const struct bpf_verifier_ops cg_dev_verifier_ops = {
*/
int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
struct ctl_table *table, int write,
void **buf, size_t *pcount, loff_t *ppos,
char **buf, size_t *pcount, loff_t *ppos,
enum bpf_attach_type type)
{
struct bpf_sysctl_kern ctx = {
......
......@@ -354,50 +354,37 @@ static const struct test_bitmap_parselist parselist_tests[] __initconst = {
};
static void __init __test_bitmap_parselist(int is_user)
static void __init test_bitmap_parselist(void)
{
int i;
int err;
ktime_t time;
DECLARE_BITMAP(bmap, 2048);
char *mode = is_user ? "_user" : "";
for (i = 0; i < ARRAY_SIZE(parselist_tests); i++) {
#define ptest parselist_tests[i]
if (is_user) {
mm_segment_t orig_fs = get_fs();
size_t len = strlen(ptest.in);
set_fs(KERNEL_DS);
time = ktime_get();
err = bitmap_parselist_user((__force const char __user *)ptest.in, len,
bmap, ptest.nbits);
time = ktime_get() - time;
set_fs(orig_fs);
} else {
time = ktime_get();
err = bitmap_parselist(ptest.in, bmap, ptest.nbits);
time = ktime_get() - time;
}
time = ktime_get();
err = bitmap_parselist(ptest.in, bmap, ptest.nbits);
time = ktime_get() - time;
if (err != ptest.errno) {
pr_err("parselist%s: %d: input is %s, errno is %d, expected %d\n",
mode, i, ptest.in, err, ptest.errno);
pr_err("parselist: %d: input is %s, errno is %d, expected %d\n",
i, ptest.in, err, ptest.errno);
continue;
}
if (!err && ptest.expected
&& !__bitmap_equal(bmap, ptest.expected, ptest.nbits)) {
pr_err("parselist%s: %d: input is %s, result is 0x%lx, expected 0x%lx\n",
mode, i, ptest.in, bmap[0],
pr_err("parselist: %d: input is %s, result is 0x%lx, expected 0x%lx\n",
i, ptest.in, bmap[0],
*ptest.expected);
continue;
}
if (ptest.flags & PARSE_TIME)
pr_err("parselist%s: %d: input is '%s' OK, Time: %llu\n",
mode, i, ptest.in, time);
pr_err("parselist: %d: input is '%s' OK, Time: %llu\n",
i, ptest.in, time);
#undef ptest
}
......@@ -443,75 +430,41 @@ static const struct test_bitmap_parselist parse_tests[] __initconst = {
#undef step
};
static void __init __test_bitmap_parse(int is_user)
static void __init test_bitmap_parse(void)
{
int i;
int err;
ktime_t time;
DECLARE_BITMAP(bmap, 2048);
char *mode = is_user ? "_user" : "";
for (i = 0; i < ARRAY_SIZE(parse_tests); i++) {
struct test_bitmap_parselist test = parse_tests[i];
size_t len = test.flags & NO_LEN ? UINT_MAX : strlen(test.in);
if (is_user) {
size_t len = strlen(test.in);
mm_segment_t orig_fs = get_fs();
set_fs(KERNEL_DS);
time = ktime_get();
err = bitmap_parse_user((__force const char __user *)test.in, len,
bmap, test.nbits);
time = ktime_get() - time;
set_fs(orig_fs);
} else {
size_t len = test.flags & NO_LEN ?
UINT_MAX : strlen(test.in);
time = ktime_get();
err = bitmap_parse(test.in, len, bmap, test.nbits);
time = ktime_get() - time;
}
time = ktime_get();
err = bitmap_parse(test.in, len, bmap, test.nbits);
time = ktime_get() - time;
if (err != test.errno) {
pr_err("parse%s: %d: input is %s, errno is %d, expected %d\n",
mode, i, test.in, err, test.errno);
pr_err("parse: %d: input is %s, errno is %d, expected %d\n",
i, test.in, err, test.errno);
continue;
}
if (!err && test.expected
&& !__bitmap_equal(bmap, test.expected, test.nbits)) {
pr_err("parse%s: %d: input is %s, result is 0x%lx, expected 0x%lx\n",
mode, i, test.in, bmap[0],
pr_err("parse: %d: input is %s, result is 0x%lx, expected 0x%lx\n",
i, test.in, bmap[0],
*test.expected);
continue;
}
if (test.flags & PARSE_TIME)
pr_err("parse%s: %d: input is '%s' OK, Time: %llu\n",
mode, i, test.in, time);
pr_err("parse: %d: input is '%s' OK, Time: %llu\n",
i, test.in, time);
}
}
static void __init test_bitmap_parselist(void)
{
__test_bitmap_parselist(0);
}
static void __init test_bitmap_parselist_user(void)
{
__test_bitmap_parselist(1);
}
static void __init test_bitmap_parse(void)
{
__test_bitmap_parse(0);
}
static void __init test_bitmap_parse_user(void)
{
__test_bitmap_parse(1);
}
#define EXP1_IN_BITS (sizeof(exp1) * 8)
static void __init test_bitmap_arr32(void)
......@@ -675,9 +628,7 @@ static void __init selftest(void)
test_replace();
test_bitmap_arr32();
test_bitmap_parse();
test_bitmap_parse_user();
test_bitmap_parselist();
test_bitmap_parselist_user();
test_mem_optimisations();
test_for_each_set_clump8();
test_bitmap_cut();
......
......@@ -9,7 +9,6 @@ EXCEPTION
#CORRUPT_STACK_STRONG Crashes entire system on success
CORRUPT_LIST_ADD list_add corruption
CORRUPT_LIST_DEL list_del corruption
CORRUPT_USER_DS Invalid address limit on user-mode return
STACK_GUARD_PAGE_LEADING
STACK_GUARD_PAGE_TRAILING
UNSET_SMEP CR4 bits went missing
......@@ -67,6 +66,5 @@ USERCOPY_STACK_FRAME_TO
USERCOPY_STACK_FRAME_FROM
USERCOPY_STACK_BEYOND
USERCOPY_KERNEL
USERCOPY_KERNEL_DS
STACKLEAK_ERASING OK: the rest of the thread stack is properly erased
CFI_FORWARD_PROTO
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment