Commit ee3e3ac0 authored by Mathieu Desnoyers's avatar Mathieu Desnoyers Committed by Peter Zijlstra

rseq: Introduce extensible rseq ABI

Introduce the extensible rseq ABI, where the feature size supported by
the kernel and the required alignment are communicated to user-space
through ELF auxiliary vectors.

This allows user-space to call rseq registration with a rseq_len of
either 32 bytes for the original struct rseq size (which includes
padding), or larger.

If rseq_len is larger than 32 bytes, then it must be large enough to
contain the feature size communicated to user-space through ELF
auxiliary vectors.
Signed-off-by: default avatarMathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20221122203932.231377-4-mathieu.desnoyers@efficios.com
parent 317c8194
...@@ -1302,6 +1302,7 @@ struct task_struct { ...@@ -1302,6 +1302,7 @@ struct task_struct {
#ifdef CONFIG_RSEQ #ifdef CONFIG_RSEQ
struct rseq __user *rseq; struct rseq __user *rseq;
u32 rseq_len;
u32 rseq_sig; u32 rseq_sig;
/* /*
* RmW on rseq_event_mask must be performed atomically * RmW on rseq_event_mask must be performed atomically
...@@ -2352,10 +2353,12 @@ static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags) ...@@ -2352,10 +2353,12 @@ static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags)
{ {
if (clone_flags & CLONE_VM) { if (clone_flags & CLONE_VM) {
t->rseq = NULL; t->rseq = NULL;
t->rseq_len = 0;
t->rseq_sig = 0; t->rseq_sig = 0;
t->rseq_event_mask = 0; t->rseq_event_mask = 0;
} else { } else {
t->rseq = current->rseq; t->rseq = current->rseq;
t->rseq_len = current->rseq_len;
t->rseq_sig = current->rseq_sig; t->rseq_sig = current->rseq_sig;
t->rseq_event_mask = current->rseq_event_mask; t->rseq_event_mask = current->rseq_event_mask;
} }
...@@ -2364,6 +2367,7 @@ static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags) ...@@ -2364,6 +2367,7 @@ static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags)
static inline void rseq_execve(struct task_struct *t) static inline void rseq_execve(struct task_struct *t)
{ {
t->rseq = NULL; t->rseq = NULL;
t->rseq_len = 0;
t->rseq_sig = 0; t->rseq_sig = 0;
t->rseq_event_mask = 0; t->rseq_event_mask = 0;
} }
......
...@@ -813,7 +813,7 @@ static long ptrace_get_rseq_configuration(struct task_struct *task, ...@@ -813,7 +813,7 @@ static long ptrace_get_rseq_configuration(struct task_struct *task,
{ {
struct ptrace_rseq_configuration conf = { struct ptrace_rseq_configuration conf = {
.rseq_abi_pointer = (u64)(uintptr_t)task->rseq, .rseq_abi_pointer = (u64)(uintptr_t)task->rseq,
.rseq_abi_size = sizeof(*task->rseq), .rseq_abi_size = task->rseq_len,
.signature = task->rseq_sig, .signature = task->rseq_sig,
.flags = 0, .flags = 0,
}; };
......
...@@ -18,6 +18,9 @@ ...@@ -18,6 +18,9 @@
#define CREATE_TRACE_POINTS #define CREATE_TRACE_POINTS
#include <trace/events/rseq.h> #include <trace/events/rseq.h>
/* The original rseq structure size (including padding) is 32 bytes. */
#define ORIG_RSEQ_SIZE 32
#define RSEQ_CS_NO_RESTART_FLAGS (RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT | \ #define RSEQ_CS_NO_RESTART_FLAGS (RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT | \
RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL | \ RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL | \
RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE) RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE)
...@@ -87,10 +90,15 @@ static int rseq_update_cpu_id(struct task_struct *t) ...@@ -87,10 +90,15 @@ static int rseq_update_cpu_id(struct task_struct *t)
u32 cpu_id = raw_smp_processor_id(); u32 cpu_id = raw_smp_processor_id();
struct rseq __user *rseq = t->rseq; struct rseq __user *rseq = t->rseq;
if (!user_write_access_begin(rseq, sizeof(*rseq))) if (!user_write_access_begin(rseq, t->rseq_len))
goto efault; goto efault;
unsafe_put_user(cpu_id, &rseq->cpu_id_start, efault_end); unsafe_put_user(cpu_id, &rseq->cpu_id_start, efault_end);
unsafe_put_user(cpu_id, &rseq->cpu_id, efault_end); unsafe_put_user(cpu_id, &rseq->cpu_id, efault_end);
/*
* Additional feature fields added after ORIG_RSEQ_SIZE
* need to be conditionally updated only if
* t->rseq_len != ORIG_RSEQ_SIZE.
*/
user_write_access_end(); user_write_access_end();
trace_rseq_update(t); trace_rseq_update(t);
return 0; return 0;
...@@ -117,6 +125,11 @@ static int rseq_reset_rseq_cpu_id(struct task_struct *t) ...@@ -117,6 +125,11 @@ static int rseq_reset_rseq_cpu_id(struct task_struct *t)
*/ */
if (put_user(cpu_id, &t->rseq->cpu_id)) if (put_user(cpu_id, &t->rseq->cpu_id))
return -EFAULT; return -EFAULT;
/*
* Additional feature fields added after ORIG_RSEQ_SIZE
* need to be conditionally reset only if
* t->rseq_len != ORIG_RSEQ_SIZE.
*/
return 0; return 0;
} }
...@@ -344,7 +357,7 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, ...@@ -344,7 +357,7 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len,
/* Unregister rseq for current thread. */ /* Unregister rseq for current thread. */
if (current->rseq != rseq || !current->rseq) if (current->rseq != rseq || !current->rseq)
return -EINVAL; return -EINVAL;
if (rseq_len != sizeof(*rseq)) if (rseq_len != current->rseq_len)
return -EINVAL; return -EINVAL;
if (current->rseq_sig != sig) if (current->rseq_sig != sig)
return -EPERM; return -EPERM;
...@@ -353,6 +366,7 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, ...@@ -353,6 +366,7 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len,
return ret; return ret;
current->rseq = NULL; current->rseq = NULL;
current->rseq_sig = 0; current->rseq_sig = 0;
current->rseq_len = 0;
return 0; return 0;
} }
...@@ -365,7 +379,7 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, ...@@ -365,7 +379,7 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len,
* the provided address differs from the prior * the provided address differs from the prior
* one. * one.
*/ */
if (current->rseq != rseq || rseq_len != sizeof(*rseq)) if (current->rseq != rseq || rseq_len != current->rseq_len)
return -EINVAL; return -EINVAL;
if (current->rseq_sig != sig) if (current->rseq_sig != sig)
return -EPERM; return -EPERM;
...@@ -374,15 +388,24 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, ...@@ -374,15 +388,24 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len,
} }
/* /*
* If there was no rseq previously registered, * If there was no rseq previously registered, ensure the provided rseq
* ensure the provided rseq is properly aligned and valid. * is properly aligned, as communcated to user-space through the ELF
* auxiliary vector AT_RSEQ_ALIGN. If rseq_len is the original rseq
* size, the required alignment is the original struct rseq alignment.
*
* In order to be valid, rseq_len is either the original rseq size, or
* large enough to contain all supported fields, as communicated to
* user-space through the ELF auxiliary vector AT_RSEQ_FEATURE_SIZE.
*/ */
if (!IS_ALIGNED((unsigned long)rseq, __alignof__(*rseq)) || if (rseq_len < ORIG_RSEQ_SIZE ||
rseq_len != sizeof(*rseq)) (rseq_len == ORIG_RSEQ_SIZE && !IS_ALIGNED((unsigned long)rseq, ORIG_RSEQ_SIZE)) ||
(rseq_len != ORIG_RSEQ_SIZE && (!IS_ALIGNED((unsigned long)rseq, __alignof__(*rseq)) ||
rseq_len < offsetof(struct rseq, end))))
return -EINVAL; return -EINVAL;
if (!access_ok(rseq, rseq_len)) if (!access_ok(rseq, rseq_len))
return -EFAULT; return -EFAULT;
current->rseq = rseq; current->rseq = rseq;
current->rseq_len = rseq_len;
current->rseq_sig = sig; current->rseq_sig = sig;
/* /*
* If rseq was previously inactive, and has just been * If rseq was previously inactive, and has just been
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment