Commit fd33c436 authored by James Morris's avatar James Morris

Merge tag 'seccomp-3.17' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux into next

parents 2ccf4661 c2e1f2e3
...@@ -7953,6 +7953,16 @@ S: Maintained ...@@ -7953,6 +7953,16 @@ S: Maintained
F: drivers/mmc/host/sdhci.* F: drivers/mmc/host/sdhci.*
F: drivers/mmc/host/sdhci-pltfm.[ch] F: drivers/mmc/host/sdhci-pltfm.[ch]
SECURE COMPUTING
M: Kees Cook <keescook@chromium.org>
T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git seccomp
S: Supported
F: kernel/seccomp.c
F: include/uapi/linux/seccomp.h
F: include/linux/seccomp.h
K: \bsecure_computing
K: \bTIF_SECCOMP\b
SECURE DIGITAL HOST CONTROLLER INTERFACE, OPEN FIRMWARE BINDINGS (SDHCI-OF) SECURE DIGITAL HOST CONTROLLER INTERFACE, OPEN FIRMWARE BINDINGS (SDHCI-OF)
M: Anton Vorontsov <anton@enomsg.org> M: Anton Vorontsov <anton@enomsg.org>
L: linuxppc-dev@lists.ozlabs.org L: linuxppc-dev@lists.ozlabs.org
......
...@@ -321,6 +321,7 @@ config HAVE_ARCH_SECCOMP_FILTER ...@@ -321,6 +321,7 @@ config HAVE_ARCH_SECCOMP_FILTER
- secure_computing is called from a ptrace_event()-safe context - secure_computing is called from a ptrace_event()-safe context
- secure_computing return value is checked and a return value of -1 - secure_computing return value is checked and a return value of -1
results in the system call being skipped immediately. results in the system call being skipped immediately.
- seccomp syscall wired up
config SECCOMP_FILTER config SECCOMP_FILTER
def_bool y def_bool y
......
...@@ -409,6 +409,7 @@ ...@@ -409,6 +409,7 @@
#define __NR_sched_setattr (__NR_SYSCALL_BASE+380) #define __NR_sched_setattr (__NR_SYSCALL_BASE+380)
#define __NR_sched_getattr (__NR_SYSCALL_BASE+381) #define __NR_sched_getattr (__NR_SYSCALL_BASE+381)
#define __NR_renameat2 (__NR_SYSCALL_BASE+382) #define __NR_renameat2 (__NR_SYSCALL_BASE+382)
#define __NR_seccomp (__NR_SYSCALL_BASE+383)
/* /*
* This may need to be greater than __NR_last_syscall+1 in order to * This may need to be greater than __NR_last_syscall+1 in order to
......
...@@ -392,6 +392,7 @@ ...@@ -392,6 +392,7 @@
/* 380 */ CALL(sys_sched_setattr) /* 380 */ CALL(sys_sched_setattr)
CALL(sys_sched_getattr) CALL(sys_sched_getattr)
CALL(sys_renameat2) CALL(sys_renameat2)
CALL(sys_seccomp)
#ifndef syscalls_counted #ifndef syscalls_counted
.equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
#define syscalls_counted #define syscalls_counted
......
...@@ -372,16 +372,17 @@ ...@@ -372,16 +372,17 @@
#define __NR_sched_setattr (__NR_Linux + 349) #define __NR_sched_setattr (__NR_Linux + 349)
#define __NR_sched_getattr (__NR_Linux + 350) #define __NR_sched_getattr (__NR_Linux + 350)
#define __NR_renameat2 (__NR_Linux + 351) #define __NR_renameat2 (__NR_Linux + 351)
#define __NR_seccomp (__NR_Linux + 352)
/* /*
* Offset of the last Linux o32 flavoured syscall * Offset of the last Linux o32 flavoured syscall
*/ */
#define __NR_Linux_syscalls 351 #define __NR_Linux_syscalls 352
#endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */ #endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */
#define __NR_O32_Linux 4000 #define __NR_O32_Linux 4000
#define __NR_O32_Linux_syscalls 351 #define __NR_O32_Linux_syscalls 352
#if _MIPS_SIM == _MIPS_SIM_ABI64 #if _MIPS_SIM == _MIPS_SIM_ABI64
...@@ -701,16 +702,17 @@ ...@@ -701,16 +702,17 @@
#define __NR_sched_setattr (__NR_Linux + 309) #define __NR_sched_setattr (__NR_Linux + 309)
#define __NR_sched_getattr (__NR_Linux + 310) #define __NR_sched_getattr (__NR_Linux + 310)
#define __NR_renameat2 (__NR_Linux + 311) #define __NR_renameat2 (__NR_Linux + 311)
#define __NR_seccomp (__NR_Linux + 312)
/* /*
* Offset of the last Linux 64-bit flavoured syscall * Offset of the last Linux 64-bit flavoured syscall
*/ */
#define __NR_Linux_syscalls 311 #define __NR_Linux_syscalls 312
#endif /* _MIPS_SIM == _MIPS_SIM_ABI64 */ #endif /* _MIPS_SIM == _MIPS_SIM_ABI64 */
#define __NR_64_Linux 5000 #define __NR_64_Linux 5000
#define __NR_64_Linux_syscalls 311 #define __NR_64_Linux_syscalls 312
#if _MIPS_SIM == _MIPS_SIM_NABI32 #if _MIPS_SIM == _MIPS_SIM_NABI32
...@@ -1034,15 +1036,16 @@ ...@@ -1034,15 +1036,16 @@
#define __NR_sched_setattr (__NR_Linux + 313) #define __NR_sched_setattr (__NR_Linux + 313)
#define __NR_sched_getattr (__NR_Linux + 314) #define __NR_sched_getattr (__NR_Linux + 314)
#define __NR_renameat2 (__NR_Linux + 315) #define __NR_renameat2 (__NR_Linux + 315)
#define __NR_seccomp (__NR_Linux + 316)
/* /*
* Offset of the last N32 flavoured syscall * Offset of the last N32 flavoured syscall
*/ */
#define __NR_Linux_syscalls 315 #define __NR_Linux_syscalls 316
#endif /* _MIPS_SIM == _MIPS_SIM_NABI32 */ #endif /* _MIPS_SIM == _MIPS_SIM_NABI32 */
#define __NR_N32_Linux 6000 #define __NR_N32_Linux 6000
#define __NR_N32_Linux_syscalls 315 #define __NR_N32_Linux_syscalls 316
#endif /* _UAPI_ASM_UNISTD_H */ #endif /* _UAPI_ASM_UNISTD_H */
...@@ -578,3 +578,4 @@ EXPORT(sys_call_table) ...@@ -578,3 +578,4 @@ EXPORT(sys_call_table)
PTR sys_sched_setattr PTR sys_sched_setattr
PTR sys_sched_getattr /* 4350 */ PTR sys_sched_getattr /* 4350 */
PTR sys_renameat2 PTR sys_renameat2
PTR sys_seccomp
...@@ -431,4 +431,5 @@ EXPORT(sys_call_table) ...@@ -431,4 +431,5 @@ EXPORT(sys_call_table)
PTR sys_sched_setattr PTR sys_sched_setattr
PTR sys_sched_getattr /* 5310 */ PTR sys_sched_getattr /* 5310 */
PTR sys_renameat2 PTR sys_renameat2
PTR sys_seccomp
.size sys_call_table,.-sys_call_table .size sys_call_table,.-sys_call_table
...@@ -424,4 +424,5 @@ EXPORT(sysn32_call_table) ...@@ -424,4 +424,5 @@ EXPORT(sysn32_call_table)
PTR sys_sched_setattr PTR sys_sched_setattr
PTR sys_sched_getattr PTR sys_sched_getattr
PTR sys_renameat2 /* 6315 */ PTR sys_renameat2 /* 6315 */
PTR sys_seccomp
.size sysn32_call_table,.-sysn32_call_table .size sysn32_call_table,.-sysn32_call_table
...@@ -557,4 +557,5 @@ EXPORT(sys32_call_table) ...@@ -557,4 +557,5 @@ EXPORT(sys32_call_table)
PTR sys_sched_setattr PTR sys_sched_setattr
PTR sys_sched_getattr /* 4350 */ PTR sys_sched_getattr /* 4350 */
PTR sys_renameat2 PTR sys_renameat2
PTR sys_seccomp
.size sys32_call_table,.-sys32_call_table .size sys32_call_table,.-sys32_call_table
...@@ -360,3 +360,4 @@ ...@@ -360,3 +360,4 @@
351 i386 sched_setattr sys_sched_setattr 351 i386 sched_setattr sys_sched_setattr
352 i386 sched_getattr sys_sched_getattr 352 i386 sched_getattr sys_sched_getattr
353 i386 renameat2 sys_renameat2 353 i386 renameat2 sys_renameat2
354 i386 seccomp sys_seccomp
...@@ -323,6 +323,7 @@ ...@@ -323,6 +323,7 @@
314 common sched_setattr sys_sched_setattr 314 common sched_setattr sys_sched_setattr
315 common sched_getattr sys_sched_getattr 315 common sched_getattr sys_sched_getattr
316 common renameat2 sys_renameat2 316 common renameat2 sys_renameat2
317 common seccomp sys_seccomp
# #
# x32-specific system call numbers start at 512 to avoid cache impact # x32-specific system call numbers start at 512 to avoid cache impact
......
...@@ -1216,7 +1216,7 @@ EXPORT_SYMBOL(install_exec_creds); ...@@ -1216,7 +1216,7 @@ EXPORT_SYMBOL(install_exec_creds);
/* /*
* determine how safe it is to execute the proposed program * determine how safe it is to execute the proposed program
* - the caller must hold ->cred_guard_mutex to protect against * - the caller must hold ->cred_guard_mutex to protect against
* PTRACE_ATTACH * PTRACE_ATTACH or seccomp thread-sync
*/ */
static void check_unsafe_exec(struct linux_binprm *bprm) static void check_unsafe_exec(struct linux_binprm *bprm)
{ {
...@@ -1234,7 +1234,7 @@ static void check_unsafe_exec(struct linux_binprm *bprm) ...@@ -1234,7 +1234,7 @@ static void check_unsafe_exec(struct linux_binprm *bprm)
* This isn't strictly necessary, but it makes it harder for LSMs to * This isn't strictly necessary, but it makes it harder for LSMs to
* mess up. * mess up.
*/ */
if (current->no_new_privs) if (task_no_new_privs(current))
bprm->unsafe |= LSM_UNSAFE_NO_NEW_PRIVS; bprm->unsafe |= LSM_UNSAFE_NO_NEW_PRIVS;
t = p; t = p;
...@@ -1272,7 +1272,7 @@ int prepare_binprm(struct linux_binprm *bprm) ...@@ -1272,7 +1272,7 @@ int prepare_binprm(struct linux_binprm *bprm)
bprm->cred->egid = current_egid(); bprm->cred->egid = current_egid();
if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) && if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) &&
!current->no_new_privs && !task_no_new_privs(current) &&
kuid_has_mapping(bprm->cred->user_ns, inode->i_uid) && kuid_has_mapping(bprm->cred->user_ns, inode->i_uid) &&
kgid_has_mapping(bprm->cred->user_ns, inode->i_gid)) { kgid_has_mapping(bprm->cred->user_ns, inode->i_gid)) {
/* Set-uid? */ /* Set-uid? */
......
...@@ -1307,13 +1307,12 @@ struct task_struct { ...@@ -1307,13 +1307,12 @@ struct task_struct {
* execve */ * execve */
unsigned in_iowait:1; unsigned in_iowait:1;
/* task may not gain privileges */
unsigned no_new_privs:1;
/* Revert to default priority/policy when forking */ /* Revert to default priority/policy when forking */
unsigned sched_reset_on_fork:1; unsigned sched_reset_on_fork:1;
unsigned sched_contributes_to_load:1; unsigned sched_contributes_to_load:1;
unsigned long atomic_flags; /* Flags needing atomic access. */
pid_t pid; pid_t pid;
pid_t tgid; pid_t tgid;
...@@ -1967,6 +1966,19 @@ static inline void memalloc_noio_restore(unsigned int flags) ...@@ -1967,6 +1966,19 @@ static inline void memalloc_noio_restore(unsigned int flags)
current->flags = (current->flags & ~PF_MEMALLOC_NOIO) | flags; current->flags = (current->flags & ~PF_MEMALLOC_NOIO) | flags;
} }
/* Per-process atomic flags. */
#define PFA_NO_NEW_PRIVS 0x00000001 /* May not gain new privileges. */
static inline bool task_no_new_privs(struct task_struct *p)
{
return test_bit(PFA_NO_NEW_PRIVS, &p->atomic_flags);
}
static inline void task_set_no_new_privs(struct task_struct *p)
{
set_bit(PFA_NO_NEW_PRIVS, &p->atomic_flags);
}
/* /*
* task->jobctl flags * task->jobctl flags
*/ */
......
...@@ -3,6 +3,8 @@ ...@@ -3,6 +3,8 @@
#include <uapi/linux/seccomp.h> #include <uapi/linux/seccomp.h>
#define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC)
#ifdef CONFIG_SECCOMP #ifdef CONFIG_SECCOMP
#include <linux/thread_info.h> #include <linux/thread_info.h>
...@@ -14,11 +16,11 @@ struct seccomp_filter; ...@@ -14,11 +16,11 @@ struct seccomp_filter;
* *
* @mode: indicates one of the valid values above for controlled * @mode: indicates one of the valid values above for controlled
* system calls available to a process. * system calls available to a process.
* @filter: The metadata and ruleset for determining what system calls * @filter: must always point to a valid seccomp-filter or NULL as it is
* are allowed for a task. * accessed without locking during system call entry.
* *
* @filter must only be accessed from the context of current as there * @filter must only be accessed from the context of current as there
* is no locking. * is no read locking.
*/ */
struct seccomp { struct seccomp {
int mode; int mode;
......
...@@ -866,4 +866,6 @@ asmlinkage long sys_process_vm_writev(pid_t pid, ...@@ -866,4 +866,6 @@ asmlinkage long sys_process_vm_writev(pid_t pid,
asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type, asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type,
unsigned long idx1, unsigned long idx2); unsigned long idx1, unsigned long idx2);
asmlinkage long sys_finit_module(int fd, const char __user *uargs, int flags); asmlinkage long sys_finit_module(int fd, const char __user *uargs, int flags);
asmlinkage long sys_seccomp(unsigned int op, unsigned int flags,
const char __user *uargs);
#endif #endif
...@@ -699,9 +699,11 @@ __SYSCALL(__NR_sched_setattr, sys_sched_setattr) ...@@ -699,9 +699,11 @@ __SYSCALL(__NR_sched_setattr, sys_sched_setattr)
__SYSCALL(__NR_sched_getattr, sys_sched_getattr) __SYSCALL(__NR_sched_getattr, sys_sched_getattr)
#define __NR_renameat2 276 #define __NR_renameat2 276
__SYSCALL(__NR_renameat2, sys_renameat2) __SYSCALL(__NR_renameat2, sys_renameat2)
#define __NR_seccomp 277
__SYSCALL(__NR_seccomp, sys_seccomp)
#undef __NR_syscalls #undef __NR_syscalls
#define __NR_syscalls 277 #define __NR_syscalls 278
/* /*
* All syscalls below here should go away really, * All syscalls below here should go away really,
......
...@@ -10,6 +10,13 @@ ...@@ -10,6 +10,13 @@
#define SECCOMP_MODE_STRICT 1 /* uses hard-coded filter. */ #define SECCOMP_MODE_STRICT 1 /* uses hard-coded filter. */
#define SECCOMP_MODE_FILTER 2 /* uses user-supplied filter. */ #define SECCOMP_MODE_FILTER 2 /* uses user-supplied filter. */
/* Valid operations for seccomp syscall. */
#define SECCOMP_SET_MODE_STRICT 0
#define SECCOMP_SET_MODE_FILTER 1
/* Valid flags for SECCOMP_SET_MODE_FILTER */
#define SECCOMP_FILTER_FLAG_TSYNC 1
/* /*
* All BPF programs must return a 32-bit value. * All BPF programs must return a 32-bit value.
* The bottom 16-bits are for optional return data. * The bottom 16-bits are for optional return data.
......
...@@ -315,6 +315,15 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) ...@@ -315,6 +315,15 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
goto free_ti; goto free_ti;
tsk->stack = ti; tsk->stack = ti;
#ifdef CONFIG_SECCOMP
/*
* We must handle setting up seccomp filters once we're under
* the sighand lock in case orig has changed between now and
* then. Until then, filter must be NULL to avoid messing up
* the usage counts on the error path calling free_task.
*/
tsk->seccomp.filter = NULL;
#endif
setup_thread_stack(tsk, orig); setup_thread_stack(tsk, orig);
clear_user_return_notifier(tsk); clear_user_return_notifier(tsk);
...@@ -1081,6 +1090,39 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) ...@@ -1081,6 +1090,39 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
return 0; return 0;
} }
static void copy_seccomp(struct task_struct *p)
{
#ifdef CONFIG_SECCOMP
/*
* Must be called with sighand->lock held, which is common to
* all threads in the group. Holding cred_guard_mutex is not
* needed because this new task is not yet running and cannot
* be racing exec.
*/
BUG_ON(!spin_is_locked(&current->sighand->siglock));
/* Ref-count the new filter user, and assign it. */
get_seccomp_filter(current);
p->seccomp = current->seccomp;
/*
* Explicitly enable no_new_privs here in case it got set
* between the task_struct being duplicated and holding the
* sighand lock. The seccomp state and nnp must be in sync.
*/
if (task_no_new_privs(current))
task_set_no_new_privs(p);
/*
* If the parent gained a seccomp mode after copying thread
* flags and between before we held the sighand lock, we have
* to manually enable the seccomp thread flag here.
*/
if (p->seccomp.mode != SECCOMP_MODE_DISABLED)
set_tsk_thread_flag(p, TIF_SECCOMP);
#endif
}
SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr) SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr)
{ {
current->clear_child_tid = tidptr; current->clear_child_tid = tidptr;
...@@ -1196,7 +1238,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, ...@@ -1196,7 +1238,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
goto fork_out; goto fork_out;
ftrace_graph_init_task(p); ftrace_graph_init_task(p);
get_seccomp_filter(p);
rt_mutex_init_task(p); rt_mutex_init_task(p);
...@@ -1436,6 +1477,12 @@ static struct task_struct *copy_process(unsigned long clone_flags, ...@@ -1436,6 +1477,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
spin_lock(&current->sighand->siglock); spin_lock(&current->sighand->siglock);
/*
* Copy seccomp details explicitly here, in case they were changed
* before holding sighand lock.
*/
copy_seccomp(p);
/* /*
* Process group and session signals need to be delivered to just the * Process group and session signals need to be delivered to just the
* parent before the fork or both the parent and the child after the * parent before the fork or both the parent and the child after the
......
...@@ -18,15 +18,17 @@ ...@@ -18,15 +18,17 @@
#include <linux/compat.h> #include <linux/compat.h>
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/seccomp.h> #include <linux/seccomp.h>
#include <linux/slab.h>
#include <linux/syscalls.h>
/* #define SECCOMP_DEBUG 1 */ /* #define SECCOMP_DEBUG 1 */
#ifdef CONFIG_SECCOMP_FILTER #ifdef CONFIG_SECCOMP_FILTER
#include <asm/syscall.h> #include <asm/syscall.h>
#include <linux/filter.h> #include <linux/filter.h>
#include <linux/pid.h>
#include <linux/ptrace.h> #include <linux/ptrace.h>
#include <linux/security.h> #include <linux/security.h>
#include <linux/slab.h>
#include <linux/tracehook.h> #include <linux/tracehook.h>
#include <linux/uaccess.h> #include <linux/uaccess.h>
...@@ -172,21 +174,24 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen) ...@@ -172,21 +174,24 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen)
*/ */
static u32 seccomp_run_filters(int syscall) static u32 seccomp_run_filters(int syscall)
{ {
struct seccomp_filter *f; struct seccomp_filter *f = ACCESS_ONCE(current->seccomp.filter);
struct seccomp_data sd; struct seccomp_data sd;
u32 ret = SECCOMP_RET_ALLOW; u32 ret = SECCOMP_RET_ALLOW;
/* Ensure unexpected behavior doesn't result in failing open. */ /* Ensure unexpected behavior doesn't result in failing open. */
if (WARN_ON(current->seccomp.filter == NULL)) if (unlikely(WARN_ON(f == NULL)))
return SECCOMP_RET_KILL; return SECCOMP_RET_KILL;
/* Make sure cross-thread synced filter points somewhere sane. */
smp_read_barrier_depends();
populate_seccomp_data(&sd); populate_seccomp_data(&sd);
/* /*
* All filters in the list are evaluated and the lowest BPF return * All filters in the list are evaluated and the lowest BPF return
* value always takes priority (ignoring the DATA). * value always takes priority (ignoring the DATA).
*/ */
for (f = current->seccomp.filter; f; f = f->prev) { for (; f; f = f->prev) {
u32 cur_ret = SK_RUN_FILTER(f->prog, (void *)&sd); u32 cur_ret = SK_RUN_FILTER(f->prog, (void *)&sd);
if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION)) if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION))
...@@ -194,29 +199,159 @@ static u32 seccomp_run_filters(int syscall) ...@@ -194,29 +199,159 @@ static u32 seccomp_run_filters(int syscall)
} }
return ret; return ret;
} }
#endif /* CONFIG_SECCOMP_FILTER */
static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode)
{
BUG_ON(!spin_is_locked(&current->sighand->siglock));
if (current->seccomp.mode && current->seccomp.mode != seccomp_mode)
return false;
return true;
}
static inline void seccomp_assign_mode(struct task_struct *task,
unsigned long seccomp_mode)
{
BUG_ON(!spin_is_locked(&task->sighand->siglock));
task->seccomp.mode = seccomp_mode;
/*
* Make sure TIF_SECCOMP cannot be set before the mode (and
* filter) is set.
*/
smp_mb__before_atomic();
set_tsk_thread_flag(task, TIF_SECCOMP);
}
#ifdef CONFIG_SECCOMP_FILTER
/* Returns 1 if the parent is an ancestor of the child. */
static int is_ancestor(struct seccomp_filter *parent,
struct seccomp_filter *child)
{
/* NULL is the root ancestor. */
if (parent == NULL)
return 1;
for (; child; child = child->prev)
if (child == parent)
return 1;
return 0;
}
/** /**
* seccomp_attach_filter: Attaches a seccomp filter to current. * seccomp_can_sync_threads: checks if all threads can be synchronized
*
* Expects sighand and cred_guard_mutex locks to be held.
*
* Returns 0 on success, -ve on error, or the pid of a thread which was
* either not in the correct seccomp mode or it did not have an ancestral
* seccomp filter.
*/
static inline pid_t seccomp_can_sync_threads(void)
{
struct task_struct *thread, *caller;
BUG_ON(!mutex_is_locked(&current->signal->cred_guard_mutex));
BUG_ON(!spin_is_locked(&current->sighand->siglock));
/* Validate all threads being eligible for synchronization. */
caller = current;
for_each_thread(caller, thread) {
pid_t failed;
/* Skip current, since it is initiating the sync. */
if (thread == caller)
continue;
if (thread->seccomp.mode == SECCOMP_MODE_DISABLED ||
(thread->seccomp.mode == SECCOMP_MODE_FILTER &&
is_ancestor(thread->seccomp.filter,
caller->seccomp.filter)))
continue;
/* Return the first thread that cannot be synchronized. */
failed = task_pid_vnr(thread);
/* If the pid cannot be resolved, then return -ESRCH */
if (unlikely(WARN_ON(failed == 0)))
failed = -ESRCH;
return failed;
}
return 0;
}
/**
* seccomp_sync_threads: sets all threads to use current's filter
*
* Expects sighand and cred_guard_mutex locks to be held, and for
* seccomp_can_sync_threads() to have returned success already
* without dropping the locks.
*
*/
static inline void seccomp_sync_threads(void)
{
struct task_struct *thread, *caller;
BUG_ON(!mutex_is_locked(&current->signal->cred_guard_mutex));
BUG_ON(!spin_is_locked(&current->sighand->siglock));
/* Synchronize all threads. */
caller = current;
for_each_thread(caller, thread) {
/* Skip current, since it needs no changes. */
if (thread == caller)
continue;
/* Get a task reference for the new leaf node. */
get_seccomp_filter(caller);
/*
* Drop the task reference to the shared ancestor since
* current's path will hold a reference. (This also
* allows a put before the assignment.)
*/
put_seccomp_filter(thread);
smp_store_release(&thread->seccomp.filter,
caller->seccomp.filter);
/*
* Opt the other thread into seccomp if needed.
* As threads are considered to be trust-realm
* equivalent (see ptrace_may_access), it is safe to
* allow one thread to transition the other.
*/
if (thread->seccomp.mode == SECCOMP_MODE_DISABLED) {
/*
* Don't let an unprivileged task work around
* the no_new_privs restriction by creating
* a thread that sets it up, enters seccomp,
* then dies.
*/
if (task_no_new_privs(caller))
task_set_no_new_privs(thread);
seccomp_assign_mode(thread, SECCOMP_MODE_FILTER);
}
}
}
/**
* seccomp_prepare_filter: Prepares a seccomp filter for use.
* @fprog: BPF program to install * @fprog: BPF program to install
* *
* Returns 0 on success or an errno on failure. * Returns filter on success or an ERR_PTR on failure.
*/ */
static long seccomp_attach_filter(struct sock_fprog *fprog) static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog)
{ {
struct seccomp_filter *filter; struct seccomp_filter *filter;
unsigned long fp_size = fprog->len * sizeof(struct sock_filter); unsigned long fp_size;
unsigned long total_insns = fprog->len;
struct sock_filter *fp; struct sock_filter *fp;
int new_len; int new_len;
long ret; long ret;
if (fprog->len == 0 || fprog->len > BPF_MAXINSNS) if (fprog->len == 0 || fprog->len > BPF_MAXINSNS)
return -EINVAL; return ERR_PTR(-EINVAL);
BUG_ON(INT_MAX / fprog->len < sizeof(struct sock_filter));
for (filter = current->seccomp.filter; filter; filter = filter->prev) fp_size = fprog->len * sizeof(struct sock_filter);
total_insns += filter->prog->len + 4; /* include a 4 instr penalty */
if (total_insns > MAX_INSNS_PER_PATH)
return -ENOMEM;
/* /*
* Installing a seccomp filter requires that the task has * Installing a seccomp filter requires that the task has
...@@ -224,14 +359,14 @@ static long seccomp_attach_filter(struct sock_fprog *fprog) ...@@ -224,14 +359,14 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
* This avoids scenarios where unprivileged tasks can affect the * This avoids scenarios where unprivileged tasks can affect the
* behavior of privileged children. * behavior of privileged children.
*/ */
if (!current->no_new_privs && if (!task_no_new_privs(current) &&
security_capable_noaudit(current_cred(), current_user_ns(), security_capable_noaudit(current_cred(), current_user_ns(),
CAP_SYS_ADMIN) != 0) CAP_SYS_ADMIN) != 0)
return -EACCES; return ERR_PTR(-EACCES);
fp = kzalloc(fp_size, GFP_KERNEL|__GFP_NOWARN); fp = kzalloc(fp_size, GFP_KERNEL|__GFP_NOWARN);
if (!fp) if (!fp)
return -ENOMEM; return ERR_PTR(-ENOMEM);
/* Copy the instructions from fprog. */ /* Copy the instructions from fprog. */
ret = -EFAULT; ret = -EFAULT;
...@@ -275,13 +410,7 @@ static long seccomp_attach_filter(struct sock_fprog *fprog) ...@@ -275,13 +410,7 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
sk_filter_select_runtime(filter->prog); sk_filter_select_runtime(filter->prog);
/* return filter;
* If there is an existing filter, make it the prev and don't drop its
* task reference.
*/
filter->prev = current->seccomp.filter;
current->seccomp.filter = filter;
return 0;
free_filter_prog: free_filter_prog:
kfree(filter->prog); kfree(filter->prog);
...@@ -289,19 +418,20 @@ static long seccomp_attach_filter(struct sock_fprog *fprog) ...@@ -289,19 +418,20 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
kfree(filter); kfree(filter);
free_prog: free_prog:
kfree(fp); kfree(fp);
return ret; return ERR_PTR(ret);
} }
/** /**
* seccomp_attach_user_filter - attaches a user-supplied sock_fprog * seccomp_prepare_user_filter - prepares a user-supplied sock_fprog
* @user_filter: pointer to the user data containing a sock_fprog. * @user_filter: pointer to the user data containing a sock_fprog.
* *
* Returns 0 on success and non-zero otherwise. * Returns 0 on success and non-zero otherwise.
*/ */
static long seccomp_attach_user_filter(char __user *user_filter) static struct seccomp_filter *
seccomp_prepare_user_filter(const char __user *user_filter)
{ {
struct sock_fprog fprog; struct sock_fprog fprog;
long ret = -EFAULT; struct seccomp_filter *filter = ERR_PTR(-EFAULT);
#ifdef CONFIG_COMPAT #ifdef CONFIG_COMPAT
if (is_compat_task()) { if (is_compat_task()) {
...@@ -314,9 +444,56 @@ static long seccomp_attach_user_filter(char __user *user_filter) ...@@ -314,9 +444,56 @@ static long seccomp_attach_user_filter(char __user *user_filter)
#endif #endif
if (copy_from_user(&fprog, user_filter, sizeof(fprog))) if (copy_from_user(&fprog, user_filter, sizeof(fprog)))
goto out; goto out;
ret = seccomp_attach_filter(&fprog); filter = seccomp_prepare_filter(&fprog);
out: out:
return ret; return filter;
}
/**
* seccomp_attach_filter: validate and attach filter
* @flags: flags to change filter behavior
* @filter: seccomp filter to add to the current process
*
* Caller must be holding current->sighand->siglock lock.
*
* Returns 0 on success, -ve on error.
*/
static long seccomp_attach_filter(unsigned int flags,
struct seccomp_filter *filter)
{
unsigned long total_insns;
struct seccomp_filter *walker;
BUG_ON(!spin_is_locked(&current->sighand->siglock));
/* Validate resulting filter length. */
total_insns = filter->prog->len;
for (walker = current->seccomp.filter; walker; walker = walker->prev)
total_insns += walker->prog->len + 4; /* 4 instr penalty */
if (total_insns > MAX_INSNS_PER_PATH)
return -ENOMEM;
/* If thread sync has been requested, check that it is possible. */
if (flags & SECCOMP_FILTER_FLAG_TSYNC) {
int ret;
ret = seccomp_can_sync_threads();
if (ret)
return ret;
}
/*
* If there is an existing filter, make it the prev and don't drop its
* task reference.
*/
filter->prev = current->seccomp.filter;
current->seccomp.filter = filter;
/* Now that the new filter is in place, synchronize to all threads. */
if (flags & SECCOMP_FILTER_FLAG_TSYNC)
seccomp_sync_threads();
return 0;
} }
/* get_seccomp_filter - increments the reference count of the filter on @tsk */ /* get_seccomp_filter - increments the reference count of the filter on @tsk */
...@@ -329,6 +506,14 @@ void get_seccomp_filter(struct task_struct *tsk) ...@@ -329,6 +506,14 @@ void get_seccomp_filter(struct task_struct *tsk)
atomic_inc(&orig->usage); atomic_inc(&orig->usage);
} }
static inline void seccomp_filter_free(struct seccomp_filter *filter)
{
if (filter) {
sk_filter_free(filter->prog);
kfree(filter);
}
}
/* put_seccomp_filter - decrements the ref count of tsk->seccomp.filter */ /* put_seccomp_filter - decrements the ref count of tsk->seccomp.filter */
void put_seccomp_filter(struct task_struct *tsk) void put_seccomp_filter(struct task_struct *tsk)
{ {
...@@ -337,8 +522,7 @@ void put_seccomp_filter(struct task_struct *tsk) ...@@ -337,8 +522,7 @@ void put_seccomp_filter(struct task_struct *tsk)
while (orig && atomic_dec_and_test(&orig->usage)) { while (orig && atomic_dec_and_test(&orig->usage)) {
struct seccomp_filter *freeme = orig; struct seccomp_filter *freeme = orig;
orig = orig->prev; orig = orig->prev;
sk_filter_free(freeme->prog); seccomp_filter_free(freeme);
kfree(freeme);
} }
} }
...@@ -382,12 +566,17 @@ static int mode1_syscalls_32[] = { ...@@ -382,12 +566,17 @@ static int mode1_syscalls_32[] = {
int __secure_computing(int this_syscall) int __secure_computing(int this_syscall)
{ {
int mode = current->seccomp.mode;
int exit_sig = 0; int exit_sig = 0;
int *syscall; int *syscall;
u32 ret; u32 ret;
switch (mode) { /*
* Make sure that any changes to mode from another thread have
* been seen after TIF_SECCOMP was seen.
*/
rmb();
switch (current->seccomp.mode) {
case SECCOMP_MODE_STRICT: case SECCOMP_MODE_STRICT:
syscall = mode1_syscalls; syscall = mode1_syscalls;
#ifdef CONFIG_COMPAT #ifdef CONFIG_COMPAT
...@@ -473,47 +662,152 @@ long prctl_get_seccomp(void) ...@@ -473,47 +662,152 @@ long prctl_get_seccomp(void)
} }
/** /**
* prctl_set_seccomp: configures current->seccomp.mode * seccomp_set_mode_strict: internal function for setting strict seccomp
* @seccomp_mode: requested mode to use
* @filter: optional struct sock_fprog for use with SECCOMP_MODE_FILTER
* *
* This function may be called repeatedly with a @seccomp_mode of * Once current->seccomp.mode is non-zero, it may not be changed.
* SECCOMP_MODE_FILTER to install additional filters. Every filter *
* successfully installed will be evaluated (in reverse order) for each system * Returns 0 on success or -EINVAL on failure.
* call the task makes. */
static long seccomp_set_mode_strict(void)
{
const unsigned long seccomp_mode = SECCOMP_MODE_STRICT;
long ret = -EINVAL;
spin_lock_irq(&current->sighand->siglock);
if (!seccomp_may_assign_mode(seccomp_mode))
goto out;
#ifdef TIF_NOTSC
disable_TSC();
#endif
seccomp_assign_mode(current, seccomp_mode);
ret = 0;
out:
spin_unlock_irq(&current->sighand->siglock);
return ret;
}
#ifdef CONFIG_SECCOMP_FILTER
/**
* seccomp_set_mode_filter: internal function for setting seccomp filter
* @flags: flags to change filter behavior
* @filter: struct sock_fprog containing filter
*
* This function may be called repeatedly to install additional filters.
* Every filter successfully installed will be evaluated (in reverse order)
* for each system call the task makes.
* *
* Once current->seccomp.mode is non-zero, it may not be changed. * Once current->seccomp.mode is non-zero, it may not be changed.
* *
* Returns 0 on success or -EINVAL on failure. * Returns 0 on success or -EINVAL on failure.
*/ */
long prctl_set_seccomp(unsigned long seccomp_mode, char __user *filter) static long seccomp_set_mode_filter(unsigned int flags,
const char __user *filter)
{ {
const unsigned long seccomp_mode = SECCOMP_MODE_FILTER;
struct seccomp_filter *prepared = NULL;
long ret = -EINVAL; long ret = -EINVAL;
if (current->seccomp.mode && /* Validate flags. */
current->seccomp.mode != seccomp_mode) if (flags & ~SECCOMP_FILTER_FLAG_MASK)
return -EINVAL;
/* Prepare the new filter before holding any locks. */
prepared = seccomp_prepare_user_filter(filter);
if (IS_ERR(prepared))
return PTR_ERR(prepared);
/*
* Make sure we cannot change seccomp or nnp state via TSYNC
* while another thread is in the middle of calling exec.
*/
if (flags & SECCOMP_FILTER_FLAG_TSYNC &&
mutex_lock_killable(&current->signal->cred_guard_mutex))
goto out_free;
spin_lock_irq(&current->sighand->siglock);
if (!seccomp_may_assign_mode(seccomp_mode))
goto out;
ret = seccomp_attach_filter(flags, prepared);
if (ret)
goto out; goto out;
/* Do not free the successfully attached filter. */
prepared = NULL;
seccomp_assign_mode(current, seccomp_mode);
out:
spin_unlock_irq(&current->sighand->siglock);
if (flags & SECCOMP_FILTER_FLAG_TSYNC)
mutex_unlock(&current->signal->cred_guard_mutex);
out_free:
seccomp_filter_free(prepared);
return ret;
}
#else
static inline long seccomp_set_mode_filter(unsigned int flags,
const char __user *filter)
{
return -EINVAL;
}
#endif
/* Common entry point for both prctl and syscall. */
static long do_seccomp(unsigned int op, unsigned int flags,
const char __user *uargs)
{
switch (op) {
case SECCOMP_SET_MODE_STRICT:
if (flags != 0 || uargs != NULL)
return -EINVAL;
return seccomp_set_mode_strict();
case SECCOMP_SET_MODE_FILTER:
return seccomp_set_mode_filter(flags, uargs);
default:
return -EINVAL;
}
}
SYSCALL_DEFINE3(seccomp, unsigned int, op, unsigned int, flags,
const char __user *, uargs)
{
return do_seccomp(op, flags, uargs);
}
/**
* prctl_set_seccomp: configures current->seccomp.mode
* @seccomp_mode: requested mode to use
* @filter: optional struct sock_fprog for use with SECCOMP_MODE_FILTER
*
* Returns 0 on success or -EINVAL on failure.
*/
long prctl_set_seccomp(unsigned long seccomp_mode, char __user *filter)
{
unsigned int op;
char __user *uargs;
switch (seccomp_mode) { switch (seccomp_mode) {
case SECCOMP_MODE_STRICT: case SECCOMP_MODE_STRICT:
ret = 0; op = SECCOMP_SET_MODE_STRICT;
#ifdef TIF_NOTSC /*
disable_TSC(); * Setting strict mode through prctl always ignored filter,
#endif * so make sure it is always NULL here to pass the internal
* check in do_seccomp().
*/
uargs = NULL;
break; break;
#ifdef CONFIG_SECCOMP_FILTER
case SECCOMP_MODE_FILTER: case SECCOMP_MODE_FILTER:
ret = seccomp_attach_user_filter(filter); op = SECCOMP_SET_MODE_FILTER;
if (ret) uargs = filter;
goto out;
break; break;
#endif
default: default:
goto out; return -EINVAL;
} }
current->seccomp.mode = seccomp_mode; /* prctl interface doesn't have flags, so they are always zero. */
set_thread_flag(TIF_SECCOMP); return do_seccomp(op, 0, uargs);
out:
return ret;
} }
...@@ -1990,12 +1990,12 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, ...@@ -1990,12 +1990,12 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
if (arg2 != 1 || arg3 || arg4 || arg5) if (arg2 != 1 || arg3 || arg4 || arg5)
return -EINVAL; return -EINVAL;
current->no_new_privs = 1; task_set_no_new_privs(current);
break; break;
case PR_GET_NO_NEW_PRIVS: case PR_GET_NO_NEW_PRIVS:
if (arg2 || arg3 || arg4 || arg5) if (arg2 || arg3 || arg4 || arg5)
return -EINVAL; return -EINVAL;
return current->no_new_privs ? 1 : 0; return task_no_new_privs(current) ? 1 : 0;
case PR_GET_THP_DISABLE: case PR_GET_THP_DISABLE:
if (arg2 || arg3 || arg4 || arg5) if (arg2 || arg3 || arg4 || arg5)
return -EINVAL; return -EINVAL;
......
...@@ -213,3 +213,6 @@ cond_syscall(compat_sys_open_by_handle_at); ...@@ -213,3 +213,6 @@ cond_syscall(compat_sys_open_by_handle_at);
/* compare kernel pointers */ /* compare kernel pointers */
cond_syscall(sys_kcmp); cond_syscall(sys_kcmp);
/* operate on Secure Computing state */
cond_syscall(sys_seccomp);
...@@ -621,7 +621,7 @@ int aa_change_hat(const char *hats[], int count, u64 token, bool permtest) ...@@ -621,7 +621,7 @@ int aa_change_hat(const char *hats[], int count, u64 token, bool permtest)
* There is no exception for unconfined as change_hat is not * There is no exception for unconfined as change_hat is not
* available. * available.
*/ */
if (current->no_new_privs) if (task_no_new_privs(current))
return -EPERM; return -EPERM;
/* released below */ /* released below */
...@@ -776,7 +776,7 @@ int aa_change_profile(const char *ns_name, const char *hname, bool onexec, ...@@ -776,7 +776,7 @@ int aa_change_profile(const char *ns_name, const char *hname, bool onexec,
* no_new_privs is set because this aways results in a reduction * no_new_privs is set because this aways results in a reduction
* of permissions. * of permissions.
*/ */
if (current->no_new_privs && !unconfined(profile)) { if (task_no_new_privs(current) && !unconfined(profile)) {
put_cred(cred); put_cred(cred);
return -EPERM; return -EPERM;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment