Commit 3033f14a authored by Josh Triplett's avatar Josh Triplett Committed by Linus Torvalds
Browse files

clone: support passing tls argument via C rather than pt_regs magic

clone has some of the quirkiest syscall handling in the kernel, with a
pile of special cases, historical curiosities, and architecture-specific
calling conventions.  In particular, clone with CLONE_SETTLS accepts a
parameter "tls" that the C entry point completely ignores and some
assembly entry points overwrite; instead, the low-level arch-specific
code pulls the tls parameter out of the arch-specific register captured
as part of pt_regs on entry to the kernel.  That's a massive hack, and
it makes the arch-specific code only work when called via the specific
existing syscall entry points; because of this hack, any new clone-like
system call would have to accept an identical tls argument in exactly
the same arch-specific position, rather than providing a unified system
call entry point across architectures.

The first patch allows architectures to handle the tls argument via
normal C parameter passing, if they opt in by selecting
HAVE_COP...
parent 8c7fbe57
......@@ -499,6 +499,13 @@ config ARCH_HAS_ELF_RANDOMIZE
- arch_mmap_rnd()
- arch_randomize_brk()
config HAVE_COPY_THREAD_TLS
bool
help
Architecture provides copy_thread_tls to accept tls argument via
normal C parameter passing, rather than extracting the syscall
argument from pt_regs.
#
# ABI hall of shame
#
......
......@@ -202,7 +202,7 @@ COMPAT_SYSCALL_WRAP1(epoll_create1, int, flags);
COMPAT_SYSCALL_WRAP2(tkill, int, pid, int, sig);
COMPAT_SYSCALL_WRAP3(tgkill, int, tgid, int, pid, int, sig);
COMPAT_SYSCALL_WRAP5(perf_event_open, struct perf_event_attr __user *, attr_uptr, pid_t, pid, int, cpu, int, group_fd, unsigned long, flags);
COMPAT_SYSCALL_WRAP5(clone, unsigned long, newsp, unsigned long, clone_flags, int __user *, parent_tidptr, int __user *, child_tidptr, int, tls_val);
COMPAT_SYSCALL_WRAP5(clone, unsigned long, newsp, unsigned long, clone_flags, int __user *, parent_tidptr, int __user *, child_tidptr, unsigned long, tls);
COMPAT_SYSCALL_WRAP2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags);
COMPAT_SYSCALL_WRAP4(prlimit64, pid_t, pid, unsigned int, resource, const struct rlimit64 __user *, new_rlim, struct rlimit64 __user *, old_rlim);
COMPAT_SYSCALL_WRAP5(name_to_handle_at, int, dfd, const char __user *, name, struct file_handle __user *, handle, int __user *, mnt_id, int, flag);
......
......@@ -220,7 +220,7 @@ static unsigned long lookup_addr(char *arg)
else if (!strcmp(arg, "sys_open"))
addr = (unsigned long)do_sys_open;
else if (!strcmp(arg, "do_fork"))
addr = (unsigned long)do_fork;
addr = (unsigned long)_do_fork;
else if (!strcmp(arg, "hw_break_val"))
addr = (unsigned long)&hw_break_val;
addr = (unsigned long) dereference_function_descriptor((void *)addr);
......
......@@ -2556,8 +2556,22 @@ extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode);
/* Remove the current tasks stale references to the old mm_struct */
extern void mm_release(struct task_struct *, struct mm_struct *);
#ifdef CONFIG_HAVE_COPY_THREAD_TLS
extern int copy_thread_tls(unsigned long, unsigned long, unsigned long,
struct task_struct *, unsigned long);
#else
extern int copy_thread(unsigned long, unsigned long, unsigned long,
struct task_struct *);
/* Architectures that haven't opted into copy_thread_tls get the tls argument
* via pt_regs, so ignore the tls argument passed via C. */
static inline int copy_thread_tls(
unsigned long clone_flags, unsigned long sp, unsigned long arg,
struct task_struct *p, unsigned long tls)
{
return copy_thread(clone_flags, sp, arg, p);
}
#endif
extern void flush_thread(void);
extern void exit_thread(void);
......@@ -2576,6 +2590,7 @@ extern int do_execveat(int, struct filename *,
const char __user * const __user *,
const char __user * const __user *,
int);
extern long _do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *, unsigned long);
extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *);
struct task_struct *fork_idle(int);
extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
......
......@@ -827,15 +827,15 @@ asmlinkage long sys_syncfs(int fd);
asmlinkage long sys_fork(void);
asmlinkage long sys_vfork(void);
#ifdef CONFIG_CLONE_BACKWARDS
asmlinkage long sys_clone(unsigned long, unsigned long, int __user *, int,
asmlinkage long sys_clone(unsigned long, unsigned long, int __user *, unsigned long,
int __user *);
#else
#ifdef CONFIG_CLONE_BACKWARDS3
asmlinkage long sys_clone(unsigned long, unsigned long, int, int __user *,
int __user *, int);
int __user *, unsigned long);
#else
asmlinkage long sys_clone(unsigned long, unsigned long, int __user *,
int __user *, int);
int __user *, unsigned long);
#endif
#endif
......
......@@ -1238,7 +1238,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
unsigned long stack_size,
int __user *child_tidptr,
struct pid *pid,
int trace)
int trace,
unsigned long tls)
{
int retval;
struct task_struct *p;
......@@ -1447,7 +1448,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
retval = copy_io(clone_flags, p);
if (retval)
goto bad_fork_cleanup_namespaces;
retval = copy_thread(clone_flags, stack_start, stack_size, p);
retval = copy_thread_tls(clone_flags, stack_start, stack_size, p, tls);
if (retval)
goto bad_fork_cleanup_io;
......@@ -1659,7 +1660,7 @@ static inline void init_idle_pids(struct pid_link *links)
struct task_struct *fork_idle(int cpu)
{
struct task_struct *task;
task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0);
task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0, 0);
if (!IS_ERR(task)) {
init_idle_pids(task->pids);
init_idle(task, cpu);
......@@ -1674,11 +1675,12 @@ struct task_struct *fork_idle(int cpu)
* It copies the process, and if successful kick-starts
* it and waits for it to finish using the VM if required.
*/
long do_fork(unsigned long clone_flags,
long _do_fork(unsigned long clone_flags,
unsigned long stack_start,
unsigned long stack_size,
int __user *parent_tidptr,
int __user *child_tidptr)
int __user *child_tidptr,
unsigned long tls)
{
struct task_struct *p;
int trace = 0;
......@@ -1703,7 +1705,7 @@ long do_fork(unsigned long clone_flags,
}
p = copy_process(clone_flags, stack_start, stack_size,
child_tidptr, NULL, trace);
child_tidptr, NULL, trace, tls);
/*
* Do this prior waking up the new thread - the thread pointer
* might get invalid after that point, if the thread exits quickly.
......@@ -1744,20 +1746,34 @@ long do_fork(unsigned long clone_flags,
return nr;
}
#ifndef CONFIG_HAVE_COPY_THREAD_TLS
/* For compatibility with architectures that call do_fork directly rather than
* using the syscall entry points below. */
long do_fork(unsigned long clone_flags,
unsigned long stack_start,
unsigned long stack_size,
int __user *parent_tidptr,
int __user *child_tidptr)
{
return _do_fork(clone_flags, stack_start, stack_size,
parent_tidptr, child_tidptr, 0);
}
#endif
/*
* Create a kernel thread.
*/
pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
{
return do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn,
(unsigned long)arg, NULL, NULL);
return _do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn,
(unsigned long)arg, NULL, NULL, 0);
}
#ifdef __ARCH_WANT_SYS_FORK
SYSCALL_DEFINE0(fork)
{
#ifdef CONFIG_MMU
return do_fork(SIGCHLD, 0, 0, NULL, NULL);
return _do_fork(SIGCHLD, 0, 0, NULL, NULL, 0);
#else
/* can not support in nommu mode */
return -EINVAL;
......@@ -1768,8 +1784,8 @@ SYSCALL_DEFINE0(fork)
#ifdef __ARCH_WANT_SYS_VFORK
SYSCALL_DEFINE0(vfork)
{
return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0,
0, NULL, NULL);
return _do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0,
0, NULL, NULL, 0);
}
#endif
......@@ -1777,27 +1793,27 @@ SYSCALL_DEFINE0(vfork)
#ifdef CONFIG_CLONE_BACKWARDS
SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
int __user *, parent_tidptr,
int, tls_val,
unsigned long, tls,
int __user *, child_tidptr)
#elif defined(CONFIG_CLONE_BACKWARDS2)
SYSCALL_DEFINE5(clone, unsigned long, newsp, unsigned long, clone_flags,
int __user *, parent_tidptr,
int __user *, child_tidptr,
int, tls_val)
unsigned long, tls)
#elif defined(CONFIG_CLONE_BACKWARDS3)
SYSCALL_DEFINE6(clone, unsigned long, clone_flags, unsigned long, newsp,
int, stack_size,
int __user *, parent_tidptr,
int __user *, child_tidptr,
int, tls_val)
unsigned long, tls)
#else
SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
int __user *, parent_tidptr,
int __user *, child_tidptr,
int, tls_val)
unsigned long, tls)
#endif
{
return do_fork(clone_flags, newsp, 0, parent_tidptr, child_tidptr);
return _do_fork(clone_flags, newsp, 0, parent_tidptr, child_tidptr, tls);
}
#endif
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment