Commit 15bbeec0 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'core-entry-2023-04-24' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull core entry/ptrace update from Thomas Gleixner:
 "Provide a ptrace set/get interface for syscall user dispatch. The main
  purpose is to enable checkpoint/restore (CRIU) to handle processes
  which utilize syscall user dispatch correctly"

* tag 'core-entry-2023-04-24' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  selftest, ptrace: Add selftest for syscall user dispatch config api
  ptrace: Provide set/get interface for syscall user dispatch
  syscall_user_dispatch: Untag selector address before access_ok()
  syscall_user_dispatch: Split up set_syscall_user_dispatch()
parents 29e95a4b 8c8fa605
...@@ -73,6 +73,10 @@ thread-wide, without the need to invoke the kernel directly. selector ...@@ -73,6 +73,10 @@ thread-wide, without the need to invoke the kernel directly. selector
can be set to SYSCALL_DISPATCH_FILTER_ALLOW or SYSCALL_DISPATCH_FILTER_BLOCK. can be set to SYSCALL_DISPATCH_FILTER_ALLOW or SYSCALL_DISPATCH_FILTER_BLOCK.
Any other value should terminate the program with a SIGSYS. Any other value should terminate the program with a SIGSYS.
Additionally, a tasks syscall user dispatch configuration can be peeked
and poked via the PTRACE_(GET|SET)_SYSCALL_USER_DISPATCH_CONFIG ptrace
requests. This is useful for checkpoint/restart software.
Security Notes Security Notes
-------------- --------------
......
...@@ -22,6 +22,12 @@ int set_syscall_user_dispatch(unsigned long mode, unsigned long offset, ...@@ -22,6 +22,12 @@ int set_syscall_user_dispatch(unsigned long mode, unsigned long offset,
#define clear_syscall_work_syscall_user_dispatch(tsk) \ #define clear_syscall_work_syscall_user_dispatch(tsk) \
clear_task_syscall_work(tsk, SYSCALL_USER_DISPATCH) clear_task_syscall_work(tsk, SYSCALL_USER_DISPATCH)
int syscall_user_dispatch_get_config(struct task_struct *task, unsigned long size,
void __user *data);
int syscall_user_dispatch_set_config(struct task_struct *task, unsigned long size,
void __user *data);
#else #else
struct syscall_user_dispatch {}; struct syscall_user_dispatch {};
...@@ -35,6 +41,18 @@ static inline void clear_syscall_work_syscall_user_dispatch(struct task_struct * ...@@ -35,6 +41,18 @@ static inline void clear_syscall_work_syscall_user_dispatch(struct task_struct *
{ {
} }
static inline int syscall_user_dispatch_get_config(struct task_struct *task,
unsigned long size, void __user *data)
{
return -EINVAL;
}
static inline int syscall_user_dispatch_set_config(struct task_struct *task,
unsigned long size, void __user *data)
{
return -EINVAL;
}
#endif /* CONFIG_GENERIC_ENTRY */ #endif /* CONFIG_GENERIC_ENTRY */
#endif /* _SYSCALL_USER_DISPATCH_H */ #endif /* _SYSCALL_USER_DISPATCH_H */
...@@ -112,6 +112,36 @@ struct ptrace_rseq_configuration { ...@@ -112,6 +112,36 @@ struct ptrace_rseq_configuration {
__u32 pad; __u32 pad;
}; };
#define PTRACE_SET_SYSCALL_USER_DISPATCH_CONFIG 0x4210
#define PTRACE_GET_SYSCALL_USER_DISPATCH_CONFIG 0x4211
/*
* struct ptrace_sud_config - Per-task configuration for Syscall User Dispatch
* @mode: One of PR_SYS_DISPATCH_ON or PR_SYS_DISPATCH_OFF
* @selector: Tracees user virtual address of SUD selector
* @offset: SUD exclusion area (virtual address)
* @len: Length of SUD exclusion area
*
* Used to get/set the syscall user dispatch configuration for a tracee.
* Selector is optional (may be NULL), and if invalid will produce
* a SIGSEGV in the tracee upon first access.
*
* If mode is PR_SYS_DISPATCH_ON, syscall dispatch will be enabled. If
* PR_SYS_DISPATCH_OFF, syscall dispatch will be disabled and all other
* parameters must be 0. The value in *selector (if not null), also determines
* whether syscall dispatch will occur.
*
* The Syscall User Dispatch Exclusion area described by offset/len is the
* virtual address space from which syscalls will not produce a user
* dispatch.
*/
struct ptrace_sud_config {
__u64 mode;
__u64 selector;
__u64 offset;
__u64 len;
};
/* /*
* These values are stored in task->ptrace_message * These values are stored in task->ptrace_message
* by ptrace_stop to describe the current syscall-stop. * by ptrace_stop to describe the current syscall-stop.
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
*/ */
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/prctl.h> #include <linux/prctl.h>
#include <linux/ptrace.h>
#include <linux/syscall_user_dispatch.h> #include <linux/syscall_user_dispatch.h>
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include <linux/signal.h> #include <linux/signal.h>
...@@ -68,8 +69,9 @@ bool syscall_user_dispatch(struct pt_regs *regs) ...@@ -68,8 +69,9 @@ bool syscall_user_dispatch(struct pt_regs *regs)
return true; return true;
} }
int set_syscall_user_dispatch(unsigned long mode, unsigned long offset, static int task_set_syscall_user_dispatch(struct task_struct *task, unsigned long mode,
unsigned long len, char __user *selector) unsigned long offset, unsigned long len,
char __user *selector)
{ {
switch (mode) { switch (mode) {
case PR_SYS_DISPATCH_OFF: case PR_SYS_DISPATCH_OFF:
...@@ -86,7 +88,16 @@ int set_syscall_user_dispatch(unsigned long mode, unsigned long offset, ...@@ -86,7 +88,16 @@ int set_syscall_user_dispatch(unsigned long mode, unsigned long offset,
if (offset && offset + len <= offset) if (offset && offset + len <= offset)
return -EINVAL; return -EINVAL;
if (selector && !access_ok(selector, sizeof(*selector))) /*
* access_ok() will clear memory tags for tagged addresses
* if current has memory tagging enabled.
* To enable a tracer to set a tracees selector the
* selector address must be untagged for access_ok(),
* otherwise an untagged tracer will always fail to set a
* tagged tracees selector.
*/
if (selector && !access_ok(untagged_addr(selector), sizeof(*selector)))
return -EFAULT; return -EFAULT;
break; break;
...@@ -94,15 +105,60 @@ int set_syscall_user_dispatch(unsigned long mode, unsigned long offset, ...@@ -94,15 +105,60 @@ int set_syscall_user_dispatch(unsigned long mode, unsigned long offset,
return -EINVAL; return -EINVAL;
} }
current->syscall_dispatch.selector = selector; task->syscall_dispatch.selector = selector;
current->syscall_dispatch.offset = offset; task->syscall_dispatch.offset = offset;
current->syscall_dispatch.len = len; task->syscall_dispatch.len = len;
current->syscall_dispatch.on_dispatch = false; task->syscall_dispatch.on_dispatch = false;
if (mode == PR_SYS_DISPATCH_ON) if (mode == PR_SYS_DISPATCH_ON)
set_syscall_work(SYSCALL_USER_DISPATCH); set_task_syscall_work(task, SYSCALL_USER_DISPATCH);
else
clear_task_syscall_work(task, SYSCALL_USER_DISPATCH);
return 0;
}
int set_syscall_user_dispatch(unsigned long mode, unsigned long offset,
unsigned long len, char __user *selector)
{
return task_set_syscall_user_dispatch(current, mode, offset, len, selector);
}
int syscall_user_dispatch_get_config(struct task_struct *task, unsigned long size,
void __user *data)
{
struct syscall_user_dispatch *sd = &task->syscall_dispatch;
struct ptrace_sud_config cfg;
if (size != sizeof(cfg))
return -EINVAL;
if (test_task_syscall_work(task, SYSCALL_USER_DISPATCH))
cfg.mode = PR_SYS_DISPATCH_ON;
else else
clear_syscall_work(SYSCALL_USER_DISPATCH); cfg.mode = PR_SYS_DISPATCH_OFF;
cfg.offset = sd->offset;
cfg.len = sd->len;
cfg.selector = (__u64)(uintptr_t)sd->selector;
if (copy_to_user(data, &cfg, sizeof(cfg)))
return -EFAULT;
return 0; return 0;
} }
int syscall_user_dispatch_set_config(struct task_struct *task, unsigned long size,
void __user *data)
{
struct ptrace_sud_config cfg;
if (size != sizeof(cfg))
return -EINVAL;
if (copy_from_user(&cfg, data, sizeof(cfg)))
return -EFAULT;
return task_set_syscall_user_dispatch(task, cfg.mode, cfg.offset, cfg.len,
(char __user *)(uintptr_t)cfg.selector);
}
...@@ -32,6 +32,7 @@ ...@@ -32,6 +32,7 @@
#include <linux/compat.h> #include <linux/compat.h>
#include <linux/sched/signal.h> #include <linux/sched/signal.h>
#include <linux/minmax.h> #include <linux/minmax.h>
#include <linux/syscall_user_dispatch.h>
#include <asm/syscall.h> /* for syscall_get_* */ #include <asm/syscall.h> /* for syscall_get_* */
...@@ -1259,6 +1260,14 @@ int ptrace_request(struct task_struct *child, long request, ...@@ -1259,6 +1260,14 @@ int ptrace_request(struct task_struct *child, long request,
break; break;
#endif #endif
case PTRACE_SET_SYSCALL_USER_DISPATCH_CONFIG:
ret = syscall_user_dispatch_set_config(child, addr, datavp);
break;
case PTRACE_GET_SYSCALL_USER_DISPATCH_CONFIG:
ret = syscall_user_dispatch_get_config(child, addr, datavp);
break;
default: default:
break; break;
} }
......
# SPDX-License-Identifier: GPL-2.0-only # SPDX-License-Identifier: GPL-2.0-only
get_syscall_info get_syscall_info
get_set_sud
peeksiginfo peeksiginfo
vmaccess vmaccess
# SPDX-License-Identifier: GPL-2.0-only # SPDX-License-Identifier: GPL-2.0-only
CFLAGS += -std=c99 -pthread -Wall $(KHDR_INCLUDES) CFLAGS += -std=c99 -pthread -Wall $(KHDR_INCLUDES)
TEST_GEN_PROGS := get_syscall_info peeksiginfo vmaccess TEST_GEN_PROGS := get_syscall_info peeksiginfo vmaccess get_set_sud
include ../lib.mk include ../lib.mk
// SPDX-License-Identifier: GPL-2.0
#define _GNU_SOURCE
#include "../kselftest_harness.h"
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <sys/wait.h>
#include <sys/syscall.h>
#include <sys/prctl.h>
#include "linux/ptrace.h"
static int sys_ptrace(int request, pid_t pid, void *addr, void *data)
{
return syscall(SYS_ptrace, request, pid, addr, data);
}
TEST(get_set_sud)
{
struct ptrace_sud_config config;
pid_t child;
int ret = 0;
int status;
child = fork();
ASSERT_GE(child, 0);
if (child == 0) {
ASSERT_EQ(0, sys_ptrace(PTRACE_TRACEME, 0, 0, 0)) {
TH_LOG("PTRACE_TRACEME: %m");
}
kill(getpid(), SIGSTOP);
_exit(1);
}
waitpid(child, &status, 0);
memset(&config, 0xff, sizeof(config));
config.mode = PR_SYS_DISPATCH_ON;
ret = sys_ptrace(PTRACE_GET_SYSCALL_USER_DISPATCH_CONFIG, child,
(void *)sizeof(config), &config);
ASSERT_EQ(ret, 0);
ASSERT_EQ(config.mode, PR_SYS_DISPATCH_OFF);
ASSERT_EQ(config.selector, 0);
ASSERT_EQ(config.offset, 0);
ASSERT_EQ(config.len, 0);
config.mode = PR_SYS_DISPATCH_ON;
config.selector = 0;
config.offset = 0x400000;
config.len = 0x1000;
ret = sys_ptrace(PTRACE_SET_SYSCALL_USER_DISPATCH_CONFIG, child,
(void *)sizeof(config), &config);
ASSERT_EQ(ret, 0);
memset(&config, 1, sizeof(config));
ret = sys_ptrace(PTRACE_GET_SYSCALL_USER_DISPATCH_CONFIG, child,
(void *)sizeof(config), &config);
ASSERT_EQ(ret, 0);
ASSERT_EQ(config.mode, PR_SYS_DISPATCH_ON);
ASSERT_EQ(config.selector, 0);
ASSERT_EQ(config.offset, 0x400000);
ASSERT_EQ(config.len, 0x1000);
kill(child, SIGKILL);
}
TEST_HARNESS_MAIN
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment