Commit 0ae71c77 authored by Rodrigo Campos's avatar Rodrigo Campos Committed by Kees Cook

seccomp: Support atomic "addfd + send reply"

Alban Crequy reported a race condition userspace faces when we want to
add some fds and make the syscall return them[1] using seccomp notify.

The problem is that currently two different ioctl() calls are needed by
the process handling the syscalls (agent) for another userspace process
(target): SECCOMP_IOCTL_NOTIF_ADDFD to allocate the fd and
SECCOMP_IOCTL_NOTIF_SEND to return that value. Therefore, it is possible
for the agent to do the first ioctl to add a file descriptor but the
target is interrupted (EINTR) before the agent does the second ioctl()
call.

This patch adds a flag to the ADDFD ioctl() so it adds the fd and
returns that value atomically to the target program, as suggested by
Kees Cook[2]. This is done by simply allowing
seccomp_do_user_notification() to add the fd and return it in this case.
Therefore, in this case the target wakes up from the wait in
seccomp_do_user_notification() either to interrupt the syscall or to add
the fd and return it.

This "allocate an fd and return" functionality is useful for syscalls
that return a file descriptor only, like connect(2). Other syscalls that
return a file descriptor but not as return value (or return more than
one fd), like socketpair(), pipe(), recvmsg with SCM_RIGHTs, will not
work with this flag.

This effectively combines SECCOMP_IOCTL_NOTIF_ADDFD and
SECCOMP_IOCTL_NOTIF_SEND into an atomic opteration. The notification's
return value, nor error can be set by the user. Upon successful invocation
of the SECCOMP_IOCTL_NOTIF_ADDFD ioctl with the SECCOMP_ADDFD_FLAG_SEND
flag, the notifying process's errno will be 0, and the return value will
be the file descriptor number that was installed.

[1]: https://lore.kernel.org/lkml/CADZs7q4sw71iNHmV8EOOXhUKJMORPzF7thraxZYddTZsxta-KQ@mail.gmail.com/
[2]: https://lore.kernel.org/lkml/202012011322.26DCBC64F2@keescook/Signed-off-by: default avatarRodrigo Campos <rodrigo@kinvolk.io>
Signed-off-by: default avatarSargun Dhillon <sargun@sargun.me>
Acked-by: default avatarTycho Andersen <tycho@tycho.pizza>
Acked-by: default avatarChristian Brauner <christian.brauner@ubuntu.com>
Signed-off-by: default avatarKees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20210517193908.3113-4-sargun@sargun.me
parent ddc47391
...@@ -259,6 +259,18 @@ and ``ioctl(SECCOMP_IOCTL_NOTIF_SEND)`` a response, indicating what should be ...@@ -259,6 +259,18 @@ and ``ioctl(SECCOMP_IOCTL_NOTIF_SEND)`` a response, indicating what should be
returned to userspace. The ``id`` member of ``struct seccomp_notif_resp`` should returned to userspace. The ``id`` member of ``struct seccomp_notif_resp`` should
be the same ``id`` as in ``struct seccomp_notif``. be the same ``id`` as in ``struct seccomp_notif``.
Userspace can also add file descriptors to the notifying process via
``ioctl(SECCOMP_IOCTL_NOTIF_ADDFD)``. The ``id`` member of
``struct seccomp_notif_addfd`` should be the same ``id`` as in
``struct seccomp_notif``. The ``newfd_flags`` flag may be used to set flags
like O_EXEC on the file descriptor in the notifying process. If the supervisor
wants to inject the file descriptor with a specific number, the
``SECCOMP_ADDFD_FLAG_SETFD`` flag can be used, and set the ``newfd`` member to
the specific number to use. If that file descriptor is already open in the
notifying process it will be replaced. The supervisor can also add an FD, and
respond atomically by using the ``SECCOMP_ADDFD_FLAG_SEND`` flag and the return
value will be the injected file descriptor number.
It is worth noting that ``struct seccomp_data`` contains the values of register It is worth noting that ``struct seccomp_data`` contains the values of register
arguments to the syscall, but does not contain pointers to memory. The task's arguments to the syscall, but does not contain pointers to memory. The task's
memory is accessible to suitably privileged traces via ``ptrace()`` or memory is accessible to suitably privileged traces via ``ptrace()`` or
......
...@@ -115,6 +115,7 @@ struct seccomp_notif_resp { ...@@ -115,6 +115,7 @@ struct seccomp_notif_resp {
/* valid flags for seccomp_notif_addfd */ /* valid flags for seccomp_notif_addfd */
#define SECCOMP_ADDFD_FLAG_SETFD (1UL << 0) /* Specify remote fd */ #define SECCOMP_ADDFD_FLAG_SETFD (1UL << 0) /* Specify remote fd */
#define SECCOMP_ADDFD_FLAG_SEND (1UL << 1) /* Addfd and return it, atomically */
/** /**
* struct seccomp_notif_addfd * struct seccomp_notif_addfd
......
...@@ -107,6 +107,7 @@ struct seccomp_knotif { ...@@ -107,6 +107,7 @@ struct seccomp_knotif {
* installing process should allocate the fd as normal. * installing process should allocate the fd as normal.
* @flags: The flags for the new file descriptor. At the moment, only O_CLOEXEC * @flags: The flags for the new file descriptor. At the moment, only O_CLOEXEC
* is allowed. * is allowed.
* @ioctl_flags: The flags used for the seccomp_addfd ioctl.
* @ret: The return value of the installing process. It is set to the fd num * @ret: The return value of the installing process. It is set to the fd num
* upon success (>= 0). * upon success (>= 0).
* @completion: Indicates that the installing process has completed fd * @completion: Indicates that the installing process has completed fd
...@@ -118,6 +119,7 @@ struct seccomp_kaddfd { ...@@ -118,6 +119,7 @@ struct seccomp_kaddfd {
struct file *file; struct file *file;
int fd; int fd;
unsigned int flags; unsigned int flags;
__u32 ioctl_flags;
union { union {
bool setfd; bool setfd;
...@@ -1065,18 +1067,37 @@ static u64 seccomp_next_notify_id(struct seccomp_filter *filter) ...@@ -1065,18 +1067,37 @@ static u64 seccomp_next_notify_id(struct seccomp_filter *filter)
return filter->notif->next_id++; return filter->notif->next_id++;
} }
static void seccomp_handle_addfd(struct seccomp_kaddfd *addfd) static void seccomp_handle_addfd(struct seccomp_kaddfd *addfd, struct seccomp_knotif *n)
{ {
int fd;
/* /*
* Remove the notification, and reset the list pointers, indicating * Remove the notification, and reset the list pointers, indicating
* that it has been handled. * that it has been handled.
*/ */
list_del_init(&addfd->list); list_del_init(&addfd->list);
if (!addfd->setfd) if (!addfd->setfd)
addfd->ret = receive_fd(addfd->file, addfd->flags); fd = receive_fd(addfd->file, addfd->flags);
else else
addfd->ret = receive_fd_replace(addfd->fd, addfd->file, fd = receive_fd_replace(addfd->fd, addfd->file, addfd->flags);
addfd->flags); addfd->ret = fd;
if (addfd->ioctl_flags & SECCOMP_ADDFD_FLAG_SEND) {
/* If we fail reset and return an error to the notifier */
if (fd < 0) {
n->state = SECCOMP_NOTIFY_SENT;
} else {
/* Return the FD we just added */
n->flags = 0;
n->error = 0;
n->val = fd;
}
}
/*
* Mark the notification as completed. From this point, addfd mem
* might be invalidated and we can't safely read it anymore.
*/
complete(&addfd->completion); complete(&addfd->completion);
} }
...@@ -1120,7 +1141,7 @@ static int seccomp_do_user_notification(int this_syscall, ...@@ -1120,7 +1141,7 @@ static int seccomp_do_user_notification(int this_syscall,
struct seccomp_kaddfd, list); struct seccomp_kaddfd, list);
/* Check if we were woken up by a addfd message */ /* Check if we were woken up by a addfd message */
if (addfd) if (addfd)
seccomp_handle_addfd(addfd); seccomp_handle_addfd(addfd, &n);
} while (n.state != SECCOMP_NOTIFY_REPLIED); } while (n.state != SECCOMP_NOTIFY_REPLIED);
...@@ -1581,7 +1602,7 @@ static long seccomp_notify_addfd(struct seccomp_filter *filter, ...@@ -1581,7 +1602,7 @@ static long seccomp_notify_addfd(struct seccomp_filter *filter,
if (addfd.newfd_flags & ~O_CLOEXEC) if (addfd.newfd_flags & ~O_CLOEXEC)
return -EINVAL; return -EINVAL;
if (addfd.flags & ~SECCOMP_ADDFD_FLAG_SETFD) if (addfd.flags & ~(SECCOMP_ADDFD_FLAG_SETFD | SECCOMP_ADDFD_FLAG_SEND))
return -EINVAL; return -EINVAL;
if (addfd.newfd && !(addfd.flags & SECCOMP_ADDFD_FLAG_SETFD)) if (addfd.newfd && !(addfd.flags & SECCOMP_ADDFD_FLAG_SETFD))
...@@ -1591,6 +1612,7 @@ static long seccomp_notify_addfd(struct seccomp_filter *filter, ...@@ -1591,6 +1612,7 @@ static long seccomp_notify_addfd(struct seccomp_filter *filter,
if (!kaddfd.file) if (!kaddfd.file)
return -EBADF; return -EBADF;
kaddfd.ioctl_flags = addfd.flags;
kaddfd.flags = addfd.newfd_flags; kaddfd.flags = addfd.newfd_flags;
kaddfd.setfd = addfd.flags & SECCOMP_ADDFD_FLAG_SETFD; kaddfd.setfd = addfd.flags & SECCOMP_ADDFD_FLAG_SETFD;
kaddfd.fd = addfd.newfd; kaddfd.fd = addfd.newfd;
...@@ -1616,6 +1638,23 @@ static long seccomp_notify_addfd(struct seccomp_filter *filter, ...@@ -1616,6 +1638,23 @@ static long seccomp_notify_addfd(struct seccomp_filter *filter,
goto out_unlock; goto out_unlock;
} }
if (addfd.flags & SECCOMP_ADDFD_FLAG_SEND) {
/*
* Disallow queuing an atomic addfd + send reply while there are
* some addfd requests still to process.
*
* There is no clear reason to support it and allows us to keep
* the loop on the other side straight-forward.
*/
if (!list_empty(&knotif->addfd)) {
ret = -EBUSY;
goto out_unlock;
}
/* Allow exactly only one reply */
knotif->state = SECCOMP_NOTIFY_REPLIED;
}
list_add(&kaddfd.list, &knotif->addfd); list_add(&kaddfd.list, &knotif->addfd);
complete(&knotif->ready); complete(&knotif->ready);
mutex_unlock(&filter->notify_lock); mutex_unlock(&filter->notify_lock);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment