Commit c0a3a64e authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'seccomp-v4.14-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux

Pull seccomp updates from Kees Cook:
 "Major additions:

   - sysctl and seccomp operation to discover available actions
     (tyhicks)

   - new per-filter configurable logging infrastructure and sysctl
     (tyhicks)

   - SECCOMP_RET_LOG to log allowed syscalls (tyhicks)

   - SECCOMP_RET_KILL_PROCESS as the new strictest possible action

   - self-tests for new behaviors"

[ This is the seccomp part of the security pull request during the merge
  window that was nixed due to unrelated problems   - Linus ]

* tag 'seccomp-v4.14-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux:
  samples: Unrename SECCOMP_RET_KILL
  selftests/seccomp: Test thread vs process killing
  seccomp: Implement SECCOMP_RET_KILL_PROCESS action
  seccomp: Introduce SECCOMP_RET_KILL_PROCESS
  seccomp: Rename SECCOMP_RET_KILL to SECCOMP_RET_KILL_THREAD
  seccomp: Action to log before allowing
  seccomp: Filter flag to log all actions except SECCOMP_RET_ALLOW
  seccomp: Selftest for detection of filter flag support
  seccomp: Sysctl to configure actions that are allowed to be logged
  seccomp: Operation for checking if an action is available
  seccomp: Sysctl to display available actions
  seccomp: Provide matching filter for introspection
  selftests/seccomp: Refactor RET_ERRNO tests
  selftests/seccomp: Add simple seccomp overhead benchmark
  selftests/seccomp: Add tests for basic ptrace actions
parents 69c902f5 6849243b
......@@ -337,7 +337,7 @@ Examples for low-level BPF:
jeq #14, good /* __NR_rt_sigprocmask */
jeq #13, good /* __NR_rt_sigaction */
jeq #35, good /* __NR_nanosleep */
bad: ret #0 /* SECCOMP_RET_KILL */
bad: ret #0 /* SECCOMP_RET_KILL_THREAD */
good: ret #0x7fff0000 /* SECCOMP_RET_ALLOW */
The above example code can be placed into a file (here called "foo"), and
......
......@@ -75,6 +75,7 @@ show up in /proc/sys/kernel:
- reboot-cmd [ SPARC only ]
- rtsig-max
- rtsig-nr
- seccomp/ ==> Documentation/userspace-api/seccomp_filter.rst
- sem
- sem_next_id [ sysv ipc ]
- sg-big-buff [ generic SCSI device (sg) ]
......
......@@ -87,11 +87,16 @@ Return values
A seccomp filter may return any of the following values. If multiple
filters exist, the return value for the evaluation of a given system
call will always use the highest precedent value. (For example,
``SECCOMP_RET_KILL`` will always take precedence.)
``SECCOMP_RET_KILL_PROCESS`` will always take precedence.)
In precedence order, they are:
``SECCOMP_RET_KILL``:
``SECCOMP_RET_KILL_PROCESS``:
Results in the entire process exiting immediately without executing
the system call. The exit status of the task (``status & 0x7f``)
will be ``SIGSYS``, not ``SIGKILL``.
``SECCOMP_RET_KILL_THREAD``:
Results in the task exiting immediately without executing the
system call. The exit status of the task (``status & 0x7f``) will
be ``SIGSYS``, not ``SIGKILL``.
......@@ -141,6 +146,15 @@ In precedence order, they are:
allow use of ptrace, even of other sandboxed processes, without
extreme care; ptracers can use this mechanism to escape.)
``SECCOMP_RET_LOG``:
Results in the system call being executed after it is logged. This
should be used by application developers to learn which syscalls their
application needs without having to iterate through multiple test and
development cycles to build the list.
This action will only be logged if "log" is present in the
actions_logged sysctl string.
``SECCOMP_RET_ALLOW``:
Results in the system call being executed.
......@@ -169,7 +183,41 @@ The ``samples/seccomp/`` directory contains both an x86-specific example
and a more generic example of a higher level macro interface for BPF
program generation.
Sysctls
=======
Seccomp's sysctl files can be found in the ``/proc/sys/kernel/seccomp/``
directory. Here's a description of each file in that directory:
``actions_avail``:
A read-only ordered list of seccomp return values (refer to the
``SECCOMP_RET_*`` macros above) in string form. The ordering, from
left-to-right, is the least permissive return value to the most
permissive return value.
The list represents the set of seccomp return values supported
by the kernel. A userspace program may use this list to
determine if the actions found in the ``seccomp.h``, when the
program was built, differs from the set of actions actually
supported in the current running kernel.
``actions_logged``:
A read-write ordered list of seccomp return values (refer to the
``SECCOMP_RET_*`` macros above) that are allowed to be logged. Writes
to the file do not need to be in ordered form but reads from the file
will be ordered in the same way as the actions_avail sysctl.
It is important to note that the value of ``actions_logged`` does not
prevent certain actions from being logged when the audit subsystem is
configured to audit a task. If the action is not found in
``actions_logged`` list, the final decision on whether to audit the
action for that task is ultimately left up to the audit subsystem to
decide for all seccomp return values other than ``SECCOMP_RET_ALLOW``.
The ``allow`` string is not accepted in the ``actions_logged`` sysctl
as it is not possible to log ``SECCOMP_RET_ALLOW`` actions. Attempting
to write ``allow`` to the sysctl will result in an EINVAL being
returned.
Adding architecture support
===========================
......
......@@ -314,11 +314,7 @@ void audit_core_dumps(long signr);
static inline void audit_seccomp(unsigned long syscall, long signr, int code)
{
if (!audit_enabled)
return;
/* Force a record to be reported if a signal was delivered. */
if (signr || unlikely(!audit_dummy_context()))
if (audit_enabled && unlikely(!audit_dummy_context()))
__audit_seccomp(syscall, signr, code);
}
......
......@@ -3,7 +3,8 @@
#include <uapi/linux/seccomp.h>
#define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC)
#define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC | \
SECCOMP_FILTER_FLAG_LOG)
#ifdef CONFIG_SECCOMP
......
......@@ -11,27 +11,34 @@
#define SECCOMP_MODE_FILTER 2 /* uses user-supplied filter. */
/* Valid operations for seccomp syscall. */
#define SECCOMP_SET_MODE_STRICT 0
#define SECCOMP_SET_MODE_FILTER 1
#define SECCOMP_SET_MODE_STRICT 0
#define SECCOMP_SET_MODE_FILTER 1
#define SECCOMP_GET_ACTION_AVAIL 2
/* Valid flags for SECCOMP_SET_MODE_FILTER */
#define SECCOMP_FILTER_FLAG_TSYNC 1
#define SECCOMP_FILTER_FLAG_LOG 2
/*
* All BPF programs must return a 32-bit value.
* The bottom 16-bits are for optional return data.
* The upper 16-bits are ordered from least permissive values to most.
* The upper 16-bits are ordered from least permissive values to most,
* as a signed value (so 0x8000000 is negative).
*
* The ordering ensures that a min_t() over composed return values always
* selects the least permissive choice.
*/
#define SECCOMP_RET_KILL 0x00000000U /* kill the task immediately */
#define SECCOMP_RET_TRAP 0x00030000U /* disallow and force a SIGSYS */
#define SECCOMP_RET_ERRNO 0x00050000U /* returns an errno */
#define SECCOMP_RET_TRACE 0x7ff00000U /* pass to a tracer or disallow */
#define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */
#define SECCOMP_RET_KILL_PROCESS 0x80000000U /* kill the process */
#define SECCOMP_RET_KILL_THREAD 0x00000000U /* kill the thread */
#define SECCOMP_RET_KILL SECCOMP_RET_KILL_THREAD
#define SECCOMP_RET_TRAP 0x00030000U /* disallow and force a SIGSYS */
#define SECCOMP_RET_ERRNO 0x00050000U /* returns an errno */
#define SECCOMP_RET_TRACE 0x7ff00000U /* pass to a tracer or disallow */
#define SECCOMP_RET_LOG 0x7ffc0000U /* allow after logging */
#define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */
/* Masks for the return value sections. */
#define SECCOMP_RET_ACTION_FULL 0xffff0000U
#define SECCOMP_RET_ACTION 0x7fff0000U
#define SECCOMP_RET_DATA 0x0000ffffU
......
This diff is collapsed.
TEST_GEN_PROGS := seccomp_bpf
CFLAGS += -Wl,-no-as-needed -Wall
LDFLAGS += -lpthread
all:
include ../lib.mk
$(TEST_GEN_PROGS): seccomp_bpf.c ../kselftest_harness.h
$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@
.PHONY: all clean
BINARIES := seccomp_bpf seccomp_benchmark
CFLAGS += -Wl,-no-as-needed -Wall
seccomp_bpf: seccomp_bpf.c ../kselftest_harness.h
$(CC) $(CFLAGS) $(LDFLAGS) -lpthread $< -o $@
TEST_PROGS += $(BINARIES)
EXTRA_CLEAN := $(BINARIES)
all: $(BINARIES)
/*
* Strictly speaking, this is not a test. But it can report during test
* runs so relative performace can be measured.
*/
#define _GNU_SOURCE
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <unistd.h>
#include <linux/filter.h>
#include <linux/seccomp.h>
#include <sys/prctl.h>
#include <sys/syscall.h>
#include <sys/types.h>
#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0]))
unsigned long long timing(clockid_t clk_id, unsigned long long samples)
{
pid_t pid, ret;
unsigned long long i;
struct timespec start, finish;
pid = getpid();
assert(clock_gettime(clk_id, &start) == 0);
for (i = 0; i < samples; i++) {
ret = syscall(__NR_getpid);
assert(pid == ret);
}
assert(clock_gettime(clk_id, &finish) == 0);
i = finish.tv_sec - start.tv_sec;
i *= 1000000000;
i += finish.tv_nsec - start.tv_nsec;
printf("%lu.%09lu - %lu.%09lu = %llu\n",
finish.tv_sec, finish.tv_nsec,
start.tv_sec, start.tv_nsec,
i);
return i;
}
unsigned long long calibrate(void)
{
unsigned long long i;
printf("Calibrating reasonable sample size...\n");
for (i = 5; ; i++) {
unsigned long long samples = 1 << i;
/* Find something that takes more than 5 seconds to run. */
if (timing(CLOCK_REALTIME, samples) / 1000000000ULL > 5)
return samples;
}
}
int main(int argc, char *argv[])
{
struct sock_filter filter[] = {
BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
};
struct sock_fprog prog = {
.len = (unsigned short)ARRAY_SIZE(filter),
.filter = filter,
};
long ret;
unsigned long long samples;
unsigned long long native, filtered;
if (argc > 1)
samples = strtoull(argv[1], NULL, 0);
else
samples = calibrate();
printf("Benchmarking %llu samples...\n", samples);
native = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
printf("getpid native: %llu ns\n", native);
ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
assert(ret == 0);
ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
assert(ret == 0);
filtered = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
printf("getpid RET_ALLOW: %llu ns\n", filtered);
printf("Estimated seccomp overhead per syscall: %llu ns\n",
filtered - native);
if (filtered == native)
printf("Trying running again with more samples.\n");
return 0;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment