Commit f9ead18c authored by Jens Axboe's avatar Jens Axboe

io_uring: split network related opcodes into its own file

While at it, convert the handlers to just use io_eopnotsupp_prep()
if CONFIG_NET isn't set.
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent e0da14de
...@@ -5,5 +5,5 @@ ...@@ -5,5 +5,5 @@
obj-$(CONFIG_IO_URING) += io_uring.o xattr.o nop.o fs.o splice.o \ obj-$(CONFIG_IO_URING) += io_uring.o xattr.o nop.o fs.o splice.o \
sync.o advise.o filetable.o \ sync.o advise.o filetable.o \
openclose.o uring_cmd.o epoll.o \ openclose.o uring_cmd.o epoll.o \
statx.o statx.o net.o
obj-$(CONFIG_IO_WQ) += io-wq.o obj-$(CONFIG_IO_WQ) += io-wq.o
...@@ -102,6 +102,7 @@ ...@@ -102,6 +102,7 @@
#include "uring_cmd.h" #include "uring_cmd.h"
#include "epoll.h" #include "epoll.h"
#include "statx.h" #include "statx.h"
#include "net.h"
#define IORING_MAX_ENTRIES 32768 #define IORING_MAX_ENTRIES 32768
#define IORING_MAX_CQ_ENTRIES (2 * IORING_MAX_ENTRIES) #define IORING_MAX_CQ_ENTRIES (2 * IORING_MAX_ENTRIES)
...@@ -131,8 +132,6 @@ ...@@ -131,8 +132,6 @@
#define IO_REQ_CLEAN_SLOW_FLAGS (REQ_F_REFCOUNT | REQ_F_LINK | REQ_F_HARDLINK |\ #define IO_REQ_CLEAN_SLOW_FLAGS (REQ_F_REFCOUNT | REQ_F_LINK | REQ_F_HARDLINK |\
IO_REQ_CLEAN_FLAGS) IO_REQ_CLEAN_FLAGS)
#define IO_APOLL_MULTI_POLLED (REQ_F_APOLL_MULTISHOT | REQ_F_POLLED)
#define IO_TCTX_REFS_CACHE_NR (1U << 10) #define IO_TCTX_REFS_CACHE_NR (1U << 10)
struct io_mapped_ubuf { struct io_mapped_ubuf {
...@@ -295,25 +294,6 @@ struct io_timeout_data { ...@@ -295,25 +294,6 @@ struct io_timeout_data {
u32 flags; u32 flags;
}; };
struct io_accept {
struct file *file;
struct sockaddr __user *addr;
int __user *addr_len;
int flags;
u32 file_slot;
unsigned long nofile;
};
struct io_socket {
struct file *file;
int domain;
int type;
int protocol;
int flags;
u32 file_slot;
unsigned long nofile;
};
struct io_cancel { struct io_cancel {
struct file *file; struct file *file;
u64 addr; u64 addr;
...@@ -350,25 +330,6 @@ struct io_rw { ...@@ -350,25 +330,6 @@ struct io_rw {
rwf_t flags; rwf_t flags;
}; };
struct io_connect {
struct file *file;
struct sockaddr __user *addr;
int addr_len;
};
struct io_sr_msg {
struct file *file;
union {
struct compat_msghdr __user *umsg_compat;
struct user_msghdr __user *umsg;
void __user *buf;
};
int msg_flags;
size_t len;
size_t done_io;
unsigned int flags;
};
struct io_rsrc_update { struct io_rsrc_update {
struct file *file; struct file *file;
u64 arg; u64 arg;
...@@ -385,30 +346,12 @@ struct io_provide_buf { ...@@ -385,30 +346,12 @@ struct io_provide_buf {
__u16 bid; __u16 bid;
}; };
struct io_shutdown {
struct file *file;
int how;
};
struct io_msg { struct io_msg {
struct file *file; struct file *file;
u64 user_data; u64 user_data;
u32 len; u32 len;
}; };
struct io_async_connect {
struct sockaddr_storage address;
};
struct io_async_msghdr {
struct iovec fast_iov[UIO_FASTIOV];
/* points to an allocated iov, if NULL we use fast_iov instead */
struct iovec *free_iov;
struct sockaddr __user *uaddr;
struct msghdr msg;
struct sockaddr_storage addr;
};
struct io_rw_state { struct io_rw_state {
struct iov_iter iter; struct iov_iter iter;
struct iov_iter_state iter_state; struct iov_iter_state iter_state;
...@@ -517,9 +460,6 @@ static void io_req_task_queue(struct io_kiocb *req); ...@@ -517,9 +460,6 @@ static void io_req_task_queue(struct io_kiocb *req);
static void __io_submit_flush_completions(struct io_ring_ctx *ctx); static void __io_submit_flush_completions(struct io_ring_ctx *ctx);
static int io_req_prep_async(struct io_kiocb *req); static int io_req_prep_async(struct io_kiocb *req);
static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
unsigned int issue_flags, u32 slot_index);
static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer); static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer);
static void io_eventfd_signal(struct io_ring_ctx *ctx); static void io_eventfd_signal(struct io_ring_ctx *ctx);
static void io_req_tw_post_queue(struct io_kiocb *req, s32 res, u32 cflags); static void io_req_tw_post_queue(struct io_kiocb *req, s32 res, u32 cflags);
...@@ -808,8 +748,7 @@ static inline unsigned int io_put_kbuf_comp(struct io_kiocb *req) ...@@ -808,8 +748,7 @@ static inline unsigned int io_put_kbuf_comp(struct io_kiocb *req)
return __io_put_kbuf(req, &req->ctx->io_buffers_comp); return __io_put_kbuf(req, &req->ctx->io_buffers_comp);
} }
static inline unsigned int io_put_kbuf(struct io_kiocb *req, inline unsigned int io_put_kbuf(struct io_kiocb *req, unsigned issue_flags)
unsigned issue_flags)
{ {
unsigned int cflags; unsigned int cflags;
...@@ -1291,12 +1230,6 @@ static __cold void io_flush_timeouts(struct io_ring_ctx *ctx) ...@@ -1291,12 +1230,6 @@ static __cold void io_flush_timeouts(struct io_ring_ctx *ctx)
spin_unlock_irq(&ctx->timeout_lock); spin_unlock_irq(&ctx->timeout_lock);
} }
static inline void io_commit_cqring(struct io_ring_ctx *ctx)
{
/* order cqe stores with ring update */
smp_store_release(&ctx->rings->cq.tail, ctx->cached_cq_tail);
}
static void __io_commit_cqring_flush(struct io_ring_ctx *ctx) static void __io_commit_cqring_flush(struct io_ring_ctx *ctx)
{ {
if (ctx->off_timeout_used || ctx->drain_active) { if (ctx->off_timeout_used || ctx->drain_active) {
...@@ -1418,7 +1351,7 @@ static inline void io_cqring_wake(struct io_ring_ctx *ctx) ...@@ -1418,7 +1351,7 @@ static inline void io_cqring_wake(struct io_ring_ctx *ctx)
* 1:1 relationship between how many times this function is called (and * 1:1 relationship between how many times this function is called (and
* hence the eventfd count) and number of CQEs posted to the CQ ring. * hence the eventfd count) and number of CQEs posted to the CQ ring.
*/ */
static inline void io_cqring_ev_posted(struct io_ring_ctx *ctx) void io_cqring_ev_posted(struct io_ring_ctx *ctx)
{ {
if (unlikely(ctx->off_timeout_used || ctx->drain_active || if (unlikely(ctx->off_timeout_used || ctx->drain_active ||
ctx->has_evfd)) ctx->has_evfd))
...@@ -1639,8 +1572,8 @@ static inline bool __io_fill_cqe_req(struct io_ring_ctx *ctx, ...@@ -1639,8 +1572,8 @@ static inline bool __io_fill_cqe_req(struct io_ring_ctx *ctx,
} }
} }
static noinline bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, s32 res,
s32 res, u32 cflags) u32 cflags)
{ {
struct io_uring_cqe *cqe; struct io_uring_cqe *cqe;
...@@ -2980,7 +2913,7 @@ static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len, ...@@ -2980,7 +2913,7 @@ static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len,
return u64_to_user_ptr(buf->addr); return u64_to_user_ptr(buf->addr);
} }
static void __user *io_buffer_select(struct io_kiocb *req, size_t *len, void __user *io_buffer_select(struct io_kiocb *req, size_t *len,
unsigned int issue_flags) unsigned int issue_flags)
{ {
struct io_ring_ctx *ctx = req->ctx; struct io_ring_ctx *ctx = req->ctx;
...@@ -3073,13 +3006,6 @@ static ssize_t io_iov_buffer_select(struct io_kiocb *req, struct iovec *iov, ...@@ -3073,13 +3006,6 @@ static ssize_t io_iov_buffer_select(struct io_kiocb *req, struct iovec *iov,
return __io_iov_buffer_select(req, iov, issue_flags); return __io_iov_buffer_select(req, iov, issue_flags);
} }
static inline bool io_do_buffer_select(struct io_kiocb *req)
{
if (!(req->flags & REQ_F_BUFFER_SELECT))
return false;
return !(req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING));
}
static struct iovec *__io_import_iovec(int ddir, struct io_kiocb *req, static struct iovec *__io_import_iovec(int ddir, struct io_kiocb *req,
struct io_rw_state *s, struct io_rw_state *s,
unsigned int issue_flags) unsigned int issue_flags)
...@@ -4025,755 +3951,6 @@ static __maybe_unused int io_eopnotsupp_prep(struct io_kiocb *kiocb, ...@@ -4025,755 +3951,6 @@ static __maybe_unused int io_eopnotsupp_prep(struct io_kiocb *kiocb,
return -EOPNOTSUPP; return -EOPNOTSUPP;
} }
#if defined(CONFIG_NET)
static int io_shutdown_prep(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{
struct io_shutdown *shutdown = io_kiocb_to_cmd(req);
if (unlikely(sqe->off || sqe->addr || sqe->rw_flags ||
sqe->buf_index || sqe->splice_fd_in))
return -EINVAL;
shutdown->how = READ_ONCE(sqe->len);
return 0;
}
static int io_shutdown(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_shutdown *shutdown = io_kiocb_to_cmd(req);
struct socket *sock;
int ret;
if (issue_flags & IO_URING_F_NONBLOCK)
return -EAGAIN;
sock = sock_from_file(req->file);
if (unlikely(!sock))
return -ENOTSOCK;
ret = __sys_shutdown_sock(sock, shutdown->how);
io_req_set_res(req, ret, 0);
return IOU_OK;
}
static bool io_net_retry(struct socket *sock, int flags)
{
if (!(flags & MSG_WAITALL))
return false;
return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET;
}
static int io_setup_async_msg(struct io_kiocb *req,
struct io_async_msghdr *kmsg)
{
struct io_async_msghdr *async_msg = req->async_data;
if (async_msg)
return -EAGAIN;
if (io_alloc_async_data(req)) {
kfree(kmsg->free_iov);
return -ENOMEM;
}
async_msg = req->async_data;
req->flags |= REQ_F_NEED_CLEANUP;
memcpy(async_msg, kmsg, sizeof(*kmsg));
async_msg->msg.msg_name = &async_msg->addr;
/* if were using fast_iov, set it to the new one */
if (!async_msg->free_iov)
async_msg->msg.msg_iter.iov = async_msg->fast_iov;
return -EAGAIN;
}
static int io_sendmsg_copy_hdr(struct io_kiocb *req,
struct io_async_msghdr *iomsg)
{
struct io_sr_msg *sr = io_kiocb_to_cmd(req);
iomsg->msg.msg_name = &iomsg->addr;
iomsg->free_iov = iomsg->fast_iov;
return sendmsg_copy_msghdr(&iomsg->msg, sr->umsg, sr->msg_flags,
&iomsg->free_iov);
}
static int io_sendmsg_prep_async(struct io_kiocb *req)
{
int ret;
ret = io_sendmsg_copy_hdr(req, req->async_data);
if (!ret)
req->flags |= REQ_F_NEED_CLEANUP;
return ret;
}
static void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req)
{
struct io_async_msghdr *io = req->async_data;
kfree(io->free_iov);
}
static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_sr_msg *sr = io_kiocb_to_cmd(req);
if (unlikely(sqe->file_index || sqe->addr2))
return -EINVAL;
sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
sr->len = READ_ONCE(sqe->len);
sr->flags = READ_ONCE(sqe->ioprio);
if (sr->flags & ~IORING_RECVSEND_POLL_FIRST)
return -EINVAL;
sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
if (sr->msg_flags & MSG_DONTWAIT)
req->flags |= REQ_F_NOWAIT;
#ifdef CONFIG_COMPAT
if (req->ctx->compat)
sr->msg_flags |= MSG_CMSG_COMPAT;
#endif
sr->done_io = 0;
return 0;
}
static int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_sr_msg *sr = io_kiocb_to_cmd(req);
struct io_async_msghdr iomsg, *kmsg;
struct socket *sock;
unsigned flags;
int min_ret = 0;
int ret;
sock = sock_from_file(req->file);
if (unlikely(!sock))
return -ENOTSOCK;
if (req_has_async_data(req)) {
kmsg = req->async_data;
} else {
ret = io_sendmsg_copy_hdr(req, &iomsg);
if (ret)
return ret;
kmsg = &iomsg;
}
if (!(req->flags & REQ_F_POLLED) &&
(sr->flags & IORING_RECVSEND_POLL_FIRST))
return io_setup_async_msg(req, kmsg);
flags = sr->msg_flags;
if (issue_flags & IO_URING_F_NONBLOCK)
flags |= MSG_DONTWAIT;
if (flags & MSG_WAITALL)
min_ret = iov_iter_count(&kmsg->msg.msg_iter);
ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags);
if (ret < min_ret) {
if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
return io_setup_async_msg(req, kmsg);
if (ret == -ERESTARTSYS)
ret = -EINTR;
if (ret > 0 && io_net_retry(sock, flags)) {
sr->done_io += ret;
req->flags |= REQ_F_PARTIAL_IO;
return io_setup_async_msg(req, kmsg);
}
req_set_fail(req);
}
/* fast path, check for non-NULL to avoid function call */
if (kmsg->free_iov)
kfree(kmsg->free_iov);
req->flags &= ~REQ_F_NEED_CLEANUP;
if (ret >= 0)
ret += sr->done_io;
else if (sr->done_io)
ret = sr->done_io;
io_req_set_res(req, ret, 0);
return IOU_OK;
}
static int io_send(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_sr_msg *sr = io_kiocb_to_cmd(req);
struct msghdr msg;
struct iovec iov;
struct socket *sock;
unsigned flags;
int min_ret = 0;
int ret;
if (!(req->flags & REQ_F_POLLED) &&
(sr->flags & IORING_RECVSEND_POLL_FIRST))
return -EAGAIN;
sock = sock_from_file(req->file);
if (unlikely(!sock))
return -ENOTSOCK;
ret = import_single_range(WRITE, sr->buf, sr->len, &iov, &msg.msg_iter);
if (unlikely(ret))
return ret;
msg.msg_name = NULL;
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_namelen = 0;
flags = sr->msg_flags;
if (issue_flags & IO_URING_F_NONBLOCK)
flags |= MSG_DONTWAIT;
if (flags & MSG_WAITALL)
min_ret = iov_iter_count(&msg.msg_iter);
msg.msg_flags = flags;
ret = sock_sendmsg(sock, &msg);
if (ret < min_ret) {
if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
return -EAGAIN;
if (ret == -ERESTARTSYS)
ret = -EINTR;
if (ret > 0 && io_net_retry(sock, flags)) {
sr->len -= ret;
sr->buf += ret;
sr->done_io += ret;
req->flags |= REQ_F_PARTIAL_IO;
return -EAGAIN;
}
req_set_fail(req);
}
if (ret >= 0)
ret += sr->done_io;
else if (sr->done_io)
ret = sr->done_io;
io_req_set_res(req, ret, 0);
return IOU_OK;
}
static int __io_recvmsg_copy_hdr(struct io_kiocb *req,
struct io_async_msghdr *iomsg)
{
struct io_sr_msg *sr = io_kiocb_to_cmd(req);
struct iovec __user *uiov;
size_t iov_len;
int ret;
ret = __copy_msghdr_from_user(&iomsg->msg, sr->umsg,
&iomsg->uaddr, &uiov, &iov_len);
if (ret)
return ret;
if (req->flags & REQ_F_BUFFER_SELECT) {
if (iov_len > 1)
return -EINVAL;
if (copy_from_user(iomsg->fast_iov, uiov, sizeof(*uiov)))
return -EFAULT;
sr->len = iomsg->fast_iov[0].iov_len;
iomsg->free_iov = NULL;
} else {
iomsg->free_iov = iomsg->fast_iov;
ret = __import_iovec(READ, uiov, iov_len, UIO_FASTIOV,
&iomsg->free_iov, &iomsg->msg.msg_iter,
false);
if (ret > 0)
ret = 0;
}
return ret;
}
#ifdef CONFIG_COMPAT
static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req,
struct io_async_msghdr *iomsg)
{
struct io_sr_msg *sr = io_kiocb_to_cmd(req);
struct compat_iovec __user *uiov;
compat_uptr_t ptr;
compat_size_t len;
int ret;
ret = __get_compat_msghdr(&iomsg->msg, sr->umsg_compat, &iomsg->uaddr,
&ptr, &len);
if (ret)
return ret;
uiov = compat_ptr(ptr);
if (req->flags & REQ_F_BUFFER_SELECT) {
compat_ssize_t clen;
if (len > 1)
return -EINVAL;
if (!access_ok(uiov, sizeof(*uiov)))
return -EFAULT;
if (__get_user(clen, &uiov->iov_len))
return -EFAULT;
if (clen < 0)
return -EINVAL;
sr->len = clen;
iomsg->free_iov = NULL;
} else {
iomsg->free_iov = iomsg->fast_iov;
ret = __import_iovec(READ, (struct iovec __user *)uiov, len,
UIO_FASTIOV, &iomsg->free_iov,
&iomsg->msg.msg_iter, true);
if (ret < 0)
return ret;
}
return 0;
}
#endif
static int io_recvmsg_copy_hdr(struct io_kiocb *req,
struct io_async_msghdr *iomsg)
{
iomsg->msg.msg_name = &iomsg->addr;
#ifdef CONFIG_COMPAT
if (req->ctx->compat)
return __io_compat_recvmsg_copy_hdr(req, iomsg);
#endif
return __io_recvmsg_copy_hdr(req, iomsg);
}
static int io_recvmsg_prep_async(struct io_kiocb *req)
{
int ret;
ret = io_recvmsg_copy_hdr(req, req->async_data);
if (!ret)
req->flags |= REQ_F_NEED_CLEANUP;
return ret;
}
static int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_sr_msg *sr = io_kiocb_to_cmd(req);
if (unlikely(sqe->file_index || sqe->addr2))
return -EINVAL;
sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
sr->len = READ_ONCE(sqe->len);
sr->flags = READ_ONCE(sqe->ioprio);
if (sr->flags & ~IORING_RECVSEND_POLL_FIRST)
return -EINVAL;
sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
if (sr->msg_flags & MSG_DONTWAIT)
req->flags |= REQ_F_NOWAIT;
if (sr->msg_flags & MSG_ERRQUEUE)
req->flags |= REQ_F_CLEAR_POLLIN;
#ifdef CONFIG_COMPAT
if (req->ctx->compat)
sr->msg_flags |= MSG_CMSG_COMPAT;
#endif
sr->done_io = 0;
return 0;
}
static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_sr_msg *sr = io_kiocb_to_cmd(req);
struct io_async_msghdr iomsg, *kmsg;
struct socket *sock;
unsigned int cflags;
unsigned flags;
int ret, min_ret = 0;
bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
sock = sock_from_file(req->file);
if (unlikely(!sock))
return -ENOTSOCK;
if (req_has_async_data(req)) {
kmsg = req->async_data;
} else {
ret = io_recvmsg_copy_hdr(req, &iomsg);
if (ret)
return ret;
kmsg = &iomsg;
}
if (!(req->flags & REQ_F_POLLED) &&
(sr->flags & IORING_RECVSEND_POLL_FIRST))
return io_setup_async_msg(req, kmsg);
if (io_do_buffer_select(req)) {
void __user *buf;
buf = io_buffer_select(req, &sr->len, issue_flags);
if (!buf)
return -ENOBUFS;
kmsg->fast_iov[0].iov_base = buf;
kmsg->fast_iov[0].iov_len = sr->len;
iov_iter_init(&kmsg->msg.msg_iter, READ, kmsg->fast_iov, 1,
sr->len);
}
flags = sr->msg_flags;
if (force_nonblock)
flags |= MSG_DONTWAIT;
if (flags & MSG_WAITALL)
min_ret = iov_iter_count(&kmsg->msg.msg_iter);
kmsg->msg.msg_get_inq = 1;
ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg, kmsg->uaddr, flags);
if (ret < min_ret) {
if (ret == -EAGAIN && force_nonblock)
return io_setup_async_msg(req, kmsg);
if (ret == -ERESTARTSYS)
ret = -EINTR;
if (ret > 0 && io_net_retry(sock, flags)) {
sr->done_io += ret;
req->flags |= REQ_F_PARTIAL_IO;
return io_setup_async_msg(req, kmsg);
}
req_set_fail(req);
} else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
req_set_fail(req);
}
/* fast path, check for non-NULL to avoid function call */
if (kmsg->free_iov)
kfree(kmsg->free_iov);
req->flags &= ~REQ_F_NEED_CLEANUP;
if (ret >= 0)
ret += sr->done_io;
else if (sr->done_io)
ret = sr->done_io;
cflags = io_put_kbuf(req, issue_flags);
if (kmsg->msg.msg_inq)
cflags |= IORING_CQE_F_SOCK_NONEMPTY;
io_req_set_res(req, ret, cflags);
return IOU_OK;
}
static int io_recv(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_sr_msg *sr = io_kiocb_to_cmd(req);
struct msghdr msg;
struct socket *sock;
struct iovec iov;
unsigned int cflags;
unsigned flags;
int ret, min_ret = 0;
bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
if (!(req->flags & REQ_F_POLLED) &&
(sr->flags & IORING_RECVSEND_POLL_FIRST))
return -EAGAIN;
sock = sock_from_file(req->file);
if (unlikely(!sock))
return -ENOTSOCK;
if (io_do_buffer_select(req)) {
void __user *buf;
buf = io_buffer_select(req, &sr->len, issue_flags);
if (!buf)
return -ENOBUFS;
sr->buf = buf;
}
ret = import_single_range(READ, sr->buf, sr->len, &iov, &msg.msg_iter);
if (unlikely(ret))
goto out_free;
msg.msg_name = NULL;
msg.msg_namelen = 0;
msg.msg_control = NULL;
msg.msg_get_inq = 1;
msg.msg_flags = 0;
msg.msg_controllen = 0;
msg.msg_iocb = NULL;
flags = sr->msg_flags;
if (force_nonblock)
flags |= MSG_DONTWAIT;
if (flags & MSG_WAITALL)
min_ret = iov_iter_count(&msg.msg_iter);
ret = sock_recvmsg(sock, &msg, flags);
if (ret < min_ret) {
if (ret == -EAGAIN && force_nonblock)
return -EAGAIN;
if (ret == -ERESTARTSYS)
ret = -EINTR;
if (ret > 0 && io_net_retry(sock, flags)) {
sr->len -= ret;
sr->buf += ret;
sr->done_io += ret;
req->flags |= REQ_F_PARTIAL_IO;
return -EAGAIN;
}
req_set_fail(req);
} else if ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
out_free:
req_set_fail(req);
}
if (ret >= 0)
ret += sr->done_io;
else if (sr->done_io)
ret = sr->done_io;
cflags = io_put_kbuf(req, issue_flags);
if (msg.msg_inq)
cflags |= IORING_CQE_F_SOCK_NONEMPTY;
io_req_set_res(req, ret, cflags);
return IOU_OK;
}
static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_accept *accept = io_kiocb_to_cmd(req);
unsigned flags;
if (sqe->len || sqe->buf_index)
return -EINVAL;
accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2));
accept->flags = READ_ONCE(sqe->accept_flags);
accept->nofile = rlimit(RLIMIT_NOFILE);
flags = READ_ONCE(sqe->ioprio);
if (flags & ~IORING_ACCEPT_MULTISHOT)
return -EINVAL;
accept->file_slot = READ_ONCE(sqe->file_index);
if (accept->file_slot) {
if (accept->flags & SOCK_CLOEXEC)
return -EINVAL;
if (flags & IORING_ACCEPT_MULTISHOT &&
accept->file_slot != IORING_FILE_INDEX_ALLOC)
return -EINVAL;
}
if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
return -EINVAL;
if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK))
accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
if (flags & IORING_ACCEPT_MULTISHOT)
req->flags |= REQ_F_APOLL_MULTISHOT;
return 0;
}
static int io_accept(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_ring_ctx *ctx = req->ctx;
struct io_accept *accept = io_kiocb_to_cmd(req);
bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
unsigned int file_flags = force_nonblock ? O_NONBLOCK : 0;
bool fixed = !!accept->file_slot;
struct file *file;
int ret, fd;
retry:
if (!fixed) {
fd = __get_unused_fd_flags(accept->flags, accept->nofile);
if (unlikely(fd < 0))
return fd;
}
file = do_accept(req->file, file_flags, accept->addr, accept->addr_len,
accept->flags);
if (IS_ERR(file)) {
if (!fixed)
put_unused_fd(fd);
ret = PTR_ERR(file);
if (ret == -EAGAIN && force_nonblock) {
/*
* if it's multishot and polled, we don't need to
* return EAGAIN to arm the poll infra since it
* has already been done
*/
if ((req->flags & IO_APOLL_MULTI_POLLED) ==
IO_APOLL_MULTI_POLLED)
ret = IOU_ISSUE_SKIP_COMPLETE;
return ret;
}
if (ret == -ERESTARTSYS)
ret = -EINTR;
req_set_fail(req);
} else if (!fixed) {
fd_install(fd, file);
ret = fd;
} else {
ret = io_fixed_fd_install(req, issue_flags, file,
accept->file_slot);
}
if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
io_req_set_res(req, ret, 0);
return IOU_OK;
}
if (ret >= 0) {
bool filled;
spin_lock(&ctx->completion_lock);
filled = io_fill_cqe_aux(ctx, req->cqe.user_data, ret,
IORING_CQE_F_MORE);
io_commit_cqring(ctx);
spin_unlock(&ctx->completion_lock);
if (filled) {
io_cqring_ev_posted(ctx);
goto retry;
}
ret = -ECANCELED;
}
return ret;
}
static int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_socket *sock = io_kiocb_to_cmd(req);
if (sqe->addr || sqe->rw_flags || sqe->buf_index)
return -EINVAL;
sock->domain = READ_ONCE(sqe->fd);
sock->type = READ_ONCE(sqe->off);
sock->protocol = READ_ONCE(sqe->len);
sock->file_slot = READ_ONCE(sqe->file_index);
sock->nofile = rlimit(RLIMIT_NOFILE);
sock->flags = sock->type & ~SOCK_TYPE_MASK;
if (sock->file_slot && (sock->flags & SOCK_CLOEXEC))
return -EINVAL;
if (sock->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
return -EINVAL;
return 0;
}
static int io_socket(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_socket *sock = io_kiocb_to_cmd(req);
bool fixed = !!sock->file_slot;
struct file *file;
int ret, fd;
if (!fixed) {
fd = __get_unused_fd_flags(sock->flags, sock->nofile);
if (unlikely(fd < 0))
return fd;
}
file = __sys_socket_file(sock->domain, sock->type, sock->protocol);
if (IS_ERR(file)) {
if (!fixed)
put_unused_fd(fd);
ret = PTR_ERR(file);
if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
return -EAGAIN;
if (ret == -ERESTARTSYS)
ret = -EINTR;
req_set_fail(req);
} else if (!fixed) {
fd_install(fd, file);
ret = fd;
} else {
ret = io_fixed_fd_install(req, issue_flags, file,
sock->file_slot);
}
io_req_set_res(req, ret, 0);
return IOU_OK;
}
static int io_connect_prep_async(struct io_kiocb *req)
{
struct io_async_connect *io = req->async_data;
struct io_connect *conn = io_kiocb_to_cmd(req);
return move_addr_to_kernel(conn->addr, conn->addr_len, &io->address);
}
static int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_connect *conn = io_kiocb_to_cmd(req);
if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in)
return -EINVAL;
conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
conn->addr_len = READ_ONCE(sqe->addr2);
return 0;
}
static int io_connect(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_connect *connect = io_kiocb_to_cmd(req);
struct io_async_connect __io, *io;
unsigned file_flags;
int ret;
bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
if (req_has_async_data(req)) {
io = req->async_data;
} else {
ret = move_addr_to_kernel(connect->addr,
connect->addr_len,
&__io.address);
if (ret)
goto out;
io = &__io;
}
file_flags = force_nonblock ? O_NONBLOCK : 0;
ret = __sys_connect_file(req->file, &io->address,
connect->addr_len, file_flags);
if ((ret == -EAGAIN || ret == -EINPROGRESS) && force_nonblock) {
if (req_has_async_data(req))
return -EAGAIN;
if (io_alloc_async_data(req)) {
ret = -ENOMEM;
goto out;
}
memcpy(req->async_data, &__io, sizeof(__io));
return -EAGAIN;
}
if (ret == -ERESTARTSYS)
ret = -EINTR;
out:
if (ret < 0)
req_set_fail(req);
io_req_set_res(req, ret, 0);
return IOU_OK;
}
#else /* !CONFIG_NET */
#define IO_NETOP_FN(op) \
static int io_##op(struct io_kiocb *req, unsigned int issue_flags) \
{ \
return -EOPNOTSUPP; \
}
#define IO_NETOP_PREP(op) \
IO_NETOP_FN(op) \
static int io_##op##_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) \
{ \
return -EOPNOTSUPP; \
} \
#define IO_NETOP_PREP_ASYNC(op) \
IO_NETOP_PREP(op) \
static int io_##op##_prep_async(struct io_kiocb *req) \
{ \
return -EOPNOTSUPP; \
}
IO_NETOP_PREP_ASYNC(sendmsg);
IO_NETOP_PREP_ASYNC(recvmsg);
IO_NETOP_PREP_ASYNC(connect);
IO_NETOP_PREP(accept);
IO_NETOP_PREP(socket);
IO_NETOP_PREP(shutdown);
IO_NETOP_FN(send);
IO_NETOP_FN(recv);
#endif /* CONFIG_NET */
struct io_poll_table { struct io_poll_table {
struct poll_table_struct pt; struct poll_table_struct pt;
struct io_kiocb *req; struct io_kiocb *req;
...@@ -7874,7 +7051,7 @@ int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx, ...@@ -7874,7 +7051,7 @@ int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx,
return 0; return 0;
} }
static int io_install_fixed_file(struct io_kiocb *req, struct file *file, int io_install_fixed_file(struct io_kiocb *req, struct file *file,
unsigned int issue_flags, u32 slot_index) unsigned int issue_flags, u32 slot_index)
__must_hold(&req->ctx->uring_lock) __must_hold(&req->ctx->uring_lock)
{ {
...@@ -10986,12 +10163,14 @@ static const struct io_op_def io_op_defs[] = { ...@@ -10986,12 +10163,14 @@ static const struct io_op_def io_op_defs[] = {
.unbound_nonreg_file = 1, .unbound_nonreg_file = 1,
.pollout = 1, .pollout = 1,
.ioprio = 1, .ioprio = 1,
#if defined(CONFIG_NET)
.async_size = sizeof(struct io_async_msghdr), .async_size = sizeof(struct io_async_msghdr),
.prep = io_sendmsg_prep, .prep = io_sendmsg_prep,
.issue = io_sendmsg, .issue = io_sendmsg,
.prep_async = io_sendmsg_prep_async, .prep_async = io_sendmsg_prep_async,
#if defined(CONFIG_NET)
.cleanup = io_sendmsg_recvmsg_cleanup, .cleanup = io_sendmsg_recvmsg_cleanup,
#else
.prep = io_eopnotsupp_prep,
#endif #endif
}, },
[IORING_OP_RECVMSG] = { [IORING_OP_RECVMSG] = {
...@@ -11000,12 +10179,14 @@ static const struct io_op_def io_op_defs[] = { ...@@ -11000,12 +10179,14 @@ static const struct io_op_def io_op_defs[] = {
.pollin = 1, .pollin = 1,
.buffer_select = 1, .buffer_select = 1,
.ioprio = 1, .ioprio = 1,
#if defined(CONFIG_NET)
.async_size = sizeof(struct io_async_msghdr), .async_size = sizeof(struct io_async_msghdr),
.prep = io_recvmsg_prep, .prep = io_recvmsg_prep,
.issue = io_recvmsg, .issue = io_recvmsg,
.prep_async = io_recvmsg_prep_async, .prep_async = io_recvmsg_prep_async,
#if defined(CONFIG_NET)
.cleanup = io_sendmsg_recvmsg_cleanup, .cleanup = io_sendmsg_recvmsg_cleanup,
#else
.prep = io_eopnotsupp_prep,
#endif #endif
}, },
[IORING_OP_TIMEOUT] = { [IORING_OP_TIMEOUT] = {
...@@ -11026,8 +10207,12 @@ static const struct io_op_def io_op_defs[] = { ...@@ -11026,8 +10207,12 @@ static const struct io_op_def io_op_defs[] = {
.pollin = 1, .pollin = 1,
.poll_exclusive = 1, .poll_exclusive = 1,
.ioprio = 1, /* used for flags */ .ioprio = 1, /* used for flags */
#if defined(CONFIG_NET)
.prep = io_accept_prep, .prep = io_accept_prep,
.issue = io_accept, .issue = io_accept,
#else
.prep = io_eopnotsupp_prep,
#endif
}, },
[IORING_OP_ASYNC_CANCEL] = { [IORING_OP_ASYNC_CANCEL] = {
.audit_skip = 1, .audit_skip = 1,
...@@ -11044,10 +10229,14 @@ static const struct io_op_def io_op_defs[] = { ...@@ -11044,10 +10229,14 @@ static const struct io_op_def io_op_defs[] = {
.needs_file = 1, .needs_file = 1,
.unbound_nonreg_file = 1, .unbound_nonreg_file = 1,
.pollout = 1, .pollout = 1,
#if defined(CONFIG_NET)
.async_size = sizeof(struct io_async_connect), .async_size = sizeof(struct io_async_connect),
.prep = io_connect_prep, .prep = io_connect_prep,
.issue = io_connect, .issue = io_connect,
.prep_async = io_connect_prep_async, .prep_async = io_connect_prep_async,
#else
.prep = io_eopnotsupp_prep,
#endif
}, },
[IORING_OP_FALLOCATE] = { [IORING_OP_FALLOCATE] = {
.needs_file = 1, .needs_file = 1,
...@@ -11117,8 +10306,12 @@ static const struct io_op_def io_op_defs[] = { ...@@ -11117,8 +10306,12 @@ static const struct io_op_def io_op_defs[] = {
.pollout = 1, .pollout = 1,
.audit_skip = 1, .audit_skip = 1,
.ioprio = 1, .ioprio = 1,
#if defined(CONFIG_NET)
.prep = io_sendmsg_prep, .prep = io_sendmsg_prep,
.issue = io_send, .issue = io_send,
#else
.prep = io_eopnotsupp_prep,
#endif
}, },
[IORING_OP_RECV] = { [IORING_OP_RECV] = {
.needs_file = 1, .needs_file = 1,
...@@ -11127,8 +10320,12 @@ static const struct io_op_def io_op_defs[] = { ...@@ -11127,8 +10320,12 @@ static const struct io_op_def io_op_defs[] = {
.buffer_select = 1, .buffer_select = 1,
.audit_skip = 1, .audit_skip = 1,
.ioprio = 1, .ioprio = 1,
#if defined(CONFIG_NET)
.prep = io_recvmsg_prep, .prep = io_recvmsg_prep,
.issue = io_recv, .issue = io_recv,
#else
.prep = io_eopnotsupp_prep,
#endif
}, },
[IORING_OP_OPENAT2] = { [IORING_OP_OPENAT2] = {
.prep = io_openat2_prep, .prep = io_openat2_prep,
...@@ -11175,8 +10372,12 @@ static const struct io_op_def io_op_defs[] = { ...@@ -11175,8 +10372,12 @@ static const struct io_op_def io_op_defs[] = {
}, },
[IORING_OP_SHUTDOWN] = { [IORING_OP_SHUTDOWN] = {
.needs_file = 1, .needs_file = 1,
#if defined(CONFIG_NET)
.prep = io_shutdown_prep, .prep = io_shutdown_prep,
.issue = io_shutdown, .issue = io_shutdown,
#else
.prep = io_eopnotsupp_prep,
#endif
}, },
[IORING_OP_RENAMEAT] = { [IORING_OP_RENAMEAT] = {
.prep = io_renameat_prep, .prep = io_renameat_prep,
...@@ -11233,8 +10434,12 @@ static const struct io_op_def io_op_defs[] = { ...@@ -11233,8 +10434,12 @@ static const struct io_op_def io_op_defs[] = {
}, },
[IORING_OP_SOCKET] = { [IORING_OP_SOCKET] = {
.audit_skip = 1, .audit_skip = 1,
#if defined(CONFIG_NET)
.prep = io_socket_prep, .prep = io_socket_prep,
.issue = io_socket, .issue = io_socket,
#else
.prep = io_eopnotsupp_prep,
#endif
}, },
[IORING_OP_URING_CMD] = { [IORING_OP_URING_CMD] = {
.needs_file = 1, .needs_file = 1,
......
...@@ -58,13 +58,35 @@ static inline void io_ring_submit_lock(struct io_ring_ctx *ctx, ...@@ -58,13 +58,35 @@ static inline void io_ring_submit_lock(struct io_ring_ctx *ctx,
lockdep_assert_held(&ctx->uring_lock); lockdep_assert_held(&ctx->uring_lock);
} }
static inline void io_commit_cqring(struct io_ring_ctx *ctx)
{
/* order cqe stores with ring update */
smp_store_release(&ctx->rings->cq.tail, ctx->cached_cq_tail);
}
void __io_req_complete(struct io_kiocb *req, unsigned issue_flags); void __io_req_complete(struct io_kiocb *req, unsigned issue_flags);
bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, s32 res,
u32 cflags);
void io_cqring_ev_posted(struct io_ring_ctx *ctx);
void __user *io_buffer_select(struct io_kiocb *req, size_t *len,
unsigned int issue_flags);
unsigned int io_put_kbuf(struct io_kiocb *req, unsigned issue_flags);
static inline bool io_do_buffer_select(struct io_kiocb *req)
{
if (!(req->flags & REQ_F_BUFFER_SELECT))
return false;
return !(req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING));
}
struct file *io_file_get_normal(struct io_kiocb *req, int fd); struct file *io_file_get_normal(struct io_kiocb *req, int fd);
struct file *io_file_get_fixed(struct io_kiocb *req, int fd, struct file *io_file_get_fixed(struct io_kiocb *req, int fd,
unsigned issue_flags); unsigned issue_flags);
int io_fixed_fd_install(struct io_kiocb *req, unsigned int issue_flags, int io_fixed_fd_install(struct io_kiocb *req, unsigned int issue_flags,
struct file *file, unsigned int file_slot); struct file *file, unsigned int file_slot);
int io_install_fixed_file(struct io_kiocb *req, struct file *file,
unsigned int issue_flags, u32 slot_index);
int io_rsrc_node_switch_start(struct io_ring_ctx *ctx); int io_rsrc_node_switch_start(struct io_ring_ctx *ctx);
int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx, int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx,
......
// SPDX-License-Identifier: GPL-2.0
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/file.h>
#include <linux/slab.h>
#include <linux/net.h>
#include <linux/compat.h>
#include <net/compat.h>
#include <linux/io_uring.h>
#include <uapi/linux/io_uring.h>
#include "io_uring_types.h"
#include "io_uring.h"
#include "net.h"
#if defined(CONFIG_NET)
struct io_shutdown {
struct file *file;
int how;
};
struct io_accept {
struct file *file;
struct sockaddr __user *addr;
int __user *addr_len;
int flags;
u32 file_slot;
unsigned long nofile;
};
struct io_socket {
struct file *file;
int domain;
int type;
int protocol;
int flags;
u32 file_slot;
unsigned long nofile;
};
struct io_connect {
struct file *file;
struct sockaddr __user *addr;
int addr_len;
};
struct io_sr_msg {
struct file *file;
union {
struct compat_msghdr __user *umsg_compat;
struct user_msghdr __user *umsg;
void __user *buf;
};
int msg_flags;
size_t len;
size_t done_io;
unsigned int flags;
};
#define IO_APOLL_MULTI_POLLED (REQ_F_APOLL_MULTISHOT | REQ_F_POLLED)
int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_shutdown *shutdown = io_kiocb_to_cmd(req);
if (unlikely(sqe->off || sqe->addr || sqe->rw_flags ||
sqe->buf_index || sqe->splice_fd_in))
return -EINVAL;
shutdown->how = READ_ONCE(sqe->len);
return 0;
}
int io_shutdown(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_shutdown *shutdown = io_kiocb_to_cmd(req);
struct socket *sock;
int ret;
if (issue_flags & IO_URING_F_NONBLOCK)
return -EAGAIN;
sock = sock_from_file(req->file);
if (unlikely(!sock))
return -ENOTSOCK;
ret = __sys_shutdown_sock(sock, shutdown->how);
io_req_set_res(req, ret, 0);
return IOU_OK;
}
static bool io_net_retry(struct socket *sock, int flags)
{
if (!(flags & MSG_WAITALL))
return false;
return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET;
}
static int io_setup_async_msg(struct io_kiocb *req,
struct io_async_msghdr *kmsg)
{
struct io_async_msghdr *async_msg = req->async_data;
if (async_msg)
return -EAGAIN;
if (io_alloc_async_data(req)) {
kfree(kmsg->free_iov);
return -ENOMEM;
}
async_msg = req->async_data;
req->flags |= REQ_F_NEED_CLEANUP;
memcpy(async_msg, kmsg, sizeof(*kmsg));
async_msg->msg.msg_name = &async_msg->addr;
/* if were using fast_iov, set it to the new one */
if (!async_msg->free_iov)
async_msg->msg.msg_iter.iov = async_msg->fast_iov;
return -EAGAIN;
}
static int io_sendmsg_copy_hdr(struct io_kiocb *req,
struct io_async_msghdr *iomsg)
{
struct io_sr_msg *sr = io_kiocb_to_cmd(req);
iomsg->msg.msg_name = &iomsg->addr;
iomsg->free_iov = iomsg->fast_iov;
return sendmsg_copy_msghdr(&iomsg->msg, sr->umsg, sr->msg_flags,
&iomsg->free_iov);
}
int io_sendmsg_prep_async(struct io_kiocb *req)
{
int ret;
ret = io_sendmsg_copy_hdr(req, req->async_data);
if (!ret)
req->flags |= REQ_F_NEED_CLEANUP;
return ret;
}
void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req)
{
struct io_async_msghdr *io = req->async_data;
kfree(io->free_iov);
}
int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_sr_msg *sr = io_kiocb_to_cmd(req);
if (unlikely(sqe->file_index || sqe->addr2))
return -EINVAL;
sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
sr->len = READ_ONCE(sqe->len);
sr->flags = READ_ONCE(sqe->ioprio);
if (sr->flags & ~IORING_RECVSEND_POLL_FIRST)
return -EINVAL;
sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
if (sr->msg_flags & MSG_DONTWAIT)
req->flags |= REQ_F_NOWAIT;
#ifdef CONFIG_COMPAT
if (req->ctx->compat)
sr->msg_flags |= MSG_CMSG_COMPAT;
#endif
sr->done_io = 0;
return 0;
}
int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_sr_msg *sr = io_kiocb_to_cmd(req);
struct io_async_msghdr iomsg, *kmsg;
struct socket *sock;
unsigned flags;
int min_ret = 0;
int ret;
sock = sock_from_file(req->file);
if (unlikely(!sock))
return -ENOTSOCK;
if (req_has_async_data(req)) {
kmsg = req->async_data;
} else {
ret = io_sendmsg_copy_hdr(req, &iomsg);
if (ret)
return ret;
kmsg = &iomsg;
}
if (!(req->flags & REQ_F_POLLED) &&
(sr->flags & IORING_RECVSEND_POLL_FIRST))
return io_setup_async_msg(req, kmsg);
flags = sr->msg_flags;
if (issue_flags & IO_URING_F_NONBLOCK)
flags |= MSG_DONTWAIT;
if (flags & MSG_WAITALL)
min_ret = iov_iter_count(&kmsg->msg.msg_iter);
ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags);
if (ret < min_ret) {
if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
return io_setup_async_msg(req, kmsg);
if (ret == -ERESTARTSYS)
ret = -EINTR;
if (ret > 0 && io_net_retry(sock, flags)) {
sr->done_io += ret;
req->flags |= REQ_F_PARTIAL_IO;
return io_setup_async_msg(req, kmsg);
}
req_set_fail(req);
}
/* fast path, check for non-NULL to avoid function call */
if (kmsg->free_iov)
kfree(kmsg->free_iov);
req->flags &= ~REQ_F_NEED_CLEANUP;
if (ret >= 0)
ret += sr->done_io;
else if (sr->done_io)
ret = sr->done_io;
io_req_set_res(req, ret, 0);
return IOU_OK;
}
int io_send(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_sr_msg *sr = io_kiocb_to_cmd(req);
struct msghdr msg;
struct iovec iov;
struct socket *sock;
unsigned flags;
int min_ret = 0;
int ret;
if (!(req->flags & REQ_F_POLLED) &&
(sr->flags & IORING_RECVSEND_POLL_FIRST))
return -EAGAIN;
sock = sock_from_file(req->file);
if (unlikely(!sock))
return -ENOTSOCK;
ret = import_single_range(WRITE, sr->buf, sr->len, &iov, &msg.msg_iter);
if (unlikely(ret))
return ret;
msg.msg_name = NULL;
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_namelen = 0;
flags = sr->msg_flags;
if (issue_flags & IO_URING_F_NONBLOCK)
flags |= MSG_DONTWAIT;
if (flags & MSG_WAITALL)
min_ret = iov_iter_count(&msg.msg_iter);
msg.msg_flags = flags;
ret = sock_sendmsg(sock, &msg);
if (ret < min_ret) {
if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
return -EAGAIN;
if (ret == -ERESTARTSYS)
ret = -EINTR;
if (ret > 0 && io_net_retry(sock, flags)) {
sr->len -= ret;
sr->buf += ret;
sr->done_io += ret;
req->flags |= REQ_F_PARTIAL_IO;
return -EAGAIN;
}
req_set_fail(req);
}
if (ret >= 0)
ret += sr->done_io;
else if (sr->done_io)
ret = sr->done_io;
io_req_set_res(req, ret, 0);
return IOU_OK;
}
static int __io_recvmsg_copy_hdr(struct io_kiocb *req,
struct io_async_msghdr *iomsg)
{
struct io_sr_msg *sr = io_kiocb_to_cmd(req);
struct iovec __user *uiov;
size_t iov_len;
int ret;
ret = __copy_msghdr_from_user(&iomsg->msg, sr->umsg,
&iomsg->uaddr, &uiov, &iov_len);
if (ret)
return ret;
if (req->flags & REQ_F_BUFFER_SELECT) {
if (iov_len > 1)
return -EINVAL;
if (copy_from_user(iomsg->fast_iov, uiov, sizeof(*uiov)))
return -EFAULT;
sr->len = iomsg->fast_iov[0].iov_len;
iomsg->free_iov = NULL;
} else {
iomsg->free_iov = iomsg->fast_iov;
ret = __import_iovec(READ, uiov, iov_len, UIO_FASTIOV,
&iomsg->free_iov, &iomsg->msg.msg_iter,
false);
if (ret > 0)
ret = 0;
}
return ret;
}
#ifdef CONFIG_COMPAT
static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req,
struct io_async_msghdr *iomsg)
{
struct io_sr_msg *sr = io_kiocb_to_cmd(req);
struct compat_iovec __user *uiov;
compat_uptr_t ptr;
compat_size_t len;
int ret;
ret = __get_compat_msghdr(&iomsg->msg, sr->umsg_compat, &iomsg->uaddr,
&ptr, &len);
if (ret)
return ret;
uiov = compat_ptr(ptr);
if (req->flags & REQ_F_BUFFER_SELECT) {
compat_ssize_t clen;
if (len > 1)
return -EINVAL;
if (!access_ok(uiov, sizeof(*uiov)))
return -EFAULT;
if (__get_user(clen, &uiov->iov_len))
return -EFAULT;
if (clen < 0)
return -EINVAL;
sr->len = clen;
iomsg->free_iov = NULL;
} else {
iomsg->free_iov = iomsg->fast_iov;
ret = __import_iovec(READ, (struct iovec __user *)uiov, len,
UIO_FASTIOV, &iomsg->free_iov,
&iomsg->msg.msg_iter, true);
if (ret < 0)
return ret;
}
return 0;
}
#endif
static int io_recvmsg_copy_hdr(struct io_kiocb *req,
struct io_async_msghdr *iomsg)
{
iomsg->msg.msg_name = &iomsg->addr;
#ifdef CONFIG_COMPAT
if (req->ctx->compat)
return __io_compat_recvmsg_copy_hdr(req, iomsg);
#endif
return __io_recvmsg_copy_hdr(req, iomsg);
}
int io_recvmsg_prep_async(struct io_kiocb *req)
{
int ret;
ret = io_recvmsg_copy_hdr(req, req->async_data);
if (!ret)
req->flags |= REQ_F_NEED_CLEANUP;
return ret;
}
int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_sr_msg *sr = io_kiocb_to_cmd(req);
if (unlikely(sqe->file_index || sqe->addr2))
return -EINVAL;
sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
sr->len = READ_ONCE(sqe->len);
sr->flags = READ_ONCE(sqe->ioprio);
if (sr->flags & ~IORING_RECVSEND_POLL_FIRST)
return -EINVAL;
sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
if (sr->msg_flags & MSG_DONTWAIT)
req->flags |= REQ_F_NOWAIT;
if (sr->msg_flags & MSG_ERRQUEUE)
req->flags |= REQ_F_CLEAR_POLLIN;
#ifdef CONFIG_COMPAT
if (req->ctx->compat)
sr->msg_flags |= MSG_CMSG_COMPAT;
#endif
sr->done_io = 0;
return 0;
}
int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_sr_msg *sr = io_kiocb_to_cmd(req);
struct io_async_msghdr iomsg, *kmsg;
struct socket *sock;
unsigned int cflags;
unsigned flags;
int ret, min_ret = 0;
bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
sock = sock_from_file(req->file);
if (unlikely(!sock))
return -ENOTSOCK;
if (req_has_async_data(req)) {
kmsg = req->async_data;
} else {
ret = io_recvmsg_copy_hdr(req, &iomsg);
if (ret)
return ret;
kmsg = &iomsg;
}
if (!(req->flags & REQ_F_POLLED) &&
(sr->flags & IORING_RECVSEND_POLL_FIRST))
return io_setup_async_msg(req, kmsg);
if (io_do_buffer_select(req)) {
void __user *buf;
buf = io_buffer_select(req, &sr->len, issue_flags);
if (!buf)
return -ENOBUFS;
kmsg->fast_iov[0].iov_base = buf;
kmsg->fast_iov[0].iov_len = sr->len;
iov_iter_init(&kmsg->msg.msg_iter, READ, kmsg->fast_iov, 1,
sr->len);
}
flags = sr->msg_flags;
if (force_nonblock)
flags |= MSG_DONTWAIT;
if (flags & MSG_WAITALL)
min_ret = iov_iter_count(&kmsg->msg.msg_iter);
kmsg->msg.msg_get_inq = 1;
ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg, kmsg->uaddr, flags);
if (ret < min_ret) {
if (ret == -EAGAIN && force_nonblock)
return io_setup_async_msg(req, kmsg);
if (ret == -ERESTARTSYS)
ret = -EINTR;
if (ret > 0 && io_net_retry(sock, flags)) {
sr->done_io += ret;
req->flags |= REQ_F_PARTIAL_IO;
return io_setup_async_msg(req, kmsg);
}
req_set_fail(req);
} else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
req_set_fail(req);
}
/* fast path, check for non-NULL to avoid function call */
if (kmsg->free_iov)
kfree(kmsg->free_iov);
req->flags &= ~REQ_F_NEED_CLEANUP;
if (ret >= 0)
ret += sr->done_io;
else if (sr->done_io)
ret = sr->done_io;
cflags = io_put_kbuf(req, issue_flags);
if (kmsg->msg.msg_inq)
cflags |= IORING_CQE_F_SOCK_NONEMPTY;
io_req_set_res(req, ret, cflags);
return IOU_OK;
}
int io_recv(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_sr_msg *sr = io_kiocb_to_cmd(req);
struct msghdr msg;
struct socket *sock;
struct iovec iov;
unsigned int cflags;
unsigned flags;
int ret, min_ret = 0;
bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
if (!(req->flags & REQ_F_POLLED) &&
(sr->flags & IORING_RECVSEND_POLL_FIRST))
return -EAGAIN;
sock = sock_from_file(req->file);
if (unlikely(!sock))
return -ENOTSOCK;
if (io_do_buffer_select(req)) {
void __user *buf;
buf = io_buffer_select(req, &sr->len, issue_flags);
if (!buf)
return -ENOBUFS;
sr->buf = buf;
}
ret = import_single_range(READ, sr->buf, sr->len, &iov, &msg.msg_iter);
if (unlikely(ret))
goto out_free;
msg.msg_name = NULL;
msg.msg_namelen = 0;
msg.msg_control = NULL;
msg.msg_get_inq = 1;
msg.msg_flags = 0;
msg.msg_controllen = 0;
msg.msg_iocb = NULL;
flags = sr->msg_flags;
if (force_nonblock)
flags |= MSG_DONTWAIT;
if (flags & MSG_WAITALL)
min_ret = iov_iter_count(&msg.msg_iter);
ret = sock_recvmsg(sock, &msg, flags);
if (ret < min_ret) {
if (ret == -EAGAIN && force_nonblock)
return -EAGAIN;
if (ret == -ERESTARTSYS)
ret = -EINTR;
if (ret > 0 && io_net_retry(sock, flags)) {
sr->len -= ret;
sr->buf += ret;
sr->done_io += ret;
req->flags |= REQ_F_PARTIAL_IO;
return -EAGAIN;
}
req_set_fail(req);
} else if ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
out_free:
req_set_fail(req);
}
if (ret >= 0)
ret += sr->done_io;
else if (sr->done_io)
ret = sr->done_io;
cflags = io_put_kbuf(req, issue_flags);
if (msg.msg_inq)
cflags |= IORING_CQE_F_SOCK_NONEMPTY;
io_req_set_res(req, ret, cflags);
return IOU_OK;
}
int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_accept *accept = io_kiocb_to_cmd(req);
unsigned flags;
if (sqe->len || sqe->buf_index)
return -EINVAL;
accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2));
accept->flags = READ_ONCE(sqe->accept_flags);
accept->nofile = rlimit(RLIMIT_NOFILE);
flags = READ_ONCE(sqe->ioprio);
if (flags & ~IORING_ACCEPT_MULTISHOT)
return -EINVAL;
accept->file_slot = READ_ONCE(sqe->file_index);
if (accept->file_slot) {
if (accept->flags & SOCK_CLOEXEC)
return -EINVAL;
if (flags & IORING_ACCEPT_MULTISHOT &&
accept->file_slot != IORING_FILE_INDEX_ALLOC)
return -EINVAL;
}
if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
return -EINVAL;
if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK))
accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
if (flags & IORING_ACCEPT_MULTISHOT)
req->flags |= REQ_F_APOLL_MULTISHOT;
return 0;
}
int io_accept(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_ring_ctx *ctx = req->ctx;
struct io_accept *accept = io_kiocb_to_cmd(req);
bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
unsigned int file_flags = force_nonblock ? O_NONBLOCK : 0;
bool fixed = !!accept->file_slot;
struct file *file;
int ret, fd;
retry:
if (!fixed) {
fd = __get_unused_fd_flags(accept->flags, accept->nofile);
if (unlikely(fd < 0))
return fd;
}
file = do_accept(req->file, file_flags, accept->addr, accept->addr_len,
accept->flags);
if (IS_ERR(file)) {
if (!fixed)
put_unused_fd(fd);
ret = PTR_ERR(file);
if (ret == -EAGAIN && force_nonblock) {
/*
* if it's multishot and polled, we don't need to
* return EAGAIN to arm the poll infra since it
* has already been done
*/
if ((req->flags & IO_APOLL_MULTI_POLLED) ==
IO_APOLL_MULTI_POLLED)
ret = IOU_ISSUE_SKIP_COMPLETE;
return ret;
}
if (ret == -ERESTARTSYS)
ret = -EINTR;
req_set_fail(req);
} else if (!fixed) {
fd_install(fd, file);
ret = fd;
} else {
ret = io_fixed_fd_install(req, issue_flags, file,
accept->file_slot);
}
if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
io_req_set_res(req, ret, 0);
return IOU_OK;
}
if (ret >= 0) {
bool filled;
spin_lock(&ctx->completion_lock);
filled = io_fill_cqe_aux(ctx, req->cqe.user_data, ret,
IORING_CQE_F_MORE);
io_commit_cqring(ctx);
spin_unlock(&ctx->completion_lock);
if (filled) {
io_cqring_ev_posted(ctx);
goto retry;
}
ret = -ECANCELED;
}
return ret;
}
int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_socket *sock = io_kiocb_to_cmd(req);
if (sqe->addr || sqe->rw_flags || sqe->buf_index)
return -EINVAL;
sock->domain = READ_ONCE(sqe->fd);
sock->type = READ_ONCE(sqe->off);
sock->protocol = READ_ONCE(sqe->len);
sock->file_slot = READ_ONCE(sqe->file_index);
sock->nofile = rlimit(RLIMIT_NOFILE);
sock->flags = sock->type & ~SOCK_TYPE_MASK;
if (sock->file_slot && (sock->flags & SOCK_CLOEXEC))
return -EINVAL;
if (sock->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
return -EINVAL;
return 0;
}
int io_socket(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_socket *sock = io_kiocb_to_cmd(req);
bool fixed = !!sock->file_slot;
struct file *file;
int ret, fd;
if (!fixed) {
fd = __get_unused_fd_flags(sock->flags, sock->nofile);
if (unlikely(fd < 0))
return fd;
}
file = __sys_socket_file(sock->domain, sock->type, sock->protocol);
if (IS_ERR(file)) {
if (!fixed)
put_unused_fd(fd);
ret = PTR_ERR(file);
if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
return -EAGAIN;
if (ret == -ERESTARTSYS)
ret = -EINTR;
req_set_fail(req);
} else if (!fixed) {
fd_install(fd, file);
ret = fd;
} else {
ret = io_fixed_fd_install(req, issue_flags, file,
sock->file_slot);
}
io_req_set_res(req, ret, 0);
return IOU_OK;
}
int io_connect_prep_async(struct io_kiocb *req)
{
struct io_async_connect *io = req->async_data;
struct io_connect *conn = io_kiocb_to_cmd(req);
return move_addr_to_kernel(conn->addr, conn->addr_len, &io->address);
}
int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_connect *conn = io_kiocb_to_cmd(req);
if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in)
return -EINVAL;
conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
conn->addr_len = READ_ONCE(sqe->addr2);
return 0;
}
int io_connect(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_connect *connect = io_kiocb_to_cmd(req);
struct io_async_connect __io, *io;
unsigned file_flags;
int ret;
bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
if (req_has_async_data(req)) {
io = req->async_data;
} else {
ret = move_addr_to_kernel(connect->addr,
connect->addr_len,
&__io.address);
if (ret)
goto out;
io = &__io;
}
file_flags = force_nonblock ? O_NONBLOCK : 0;
ret = __sys_connect_file(req->file, &io->address,
connect->addr_len, file_flags);
if ((ret == -EAGAIN || ret == -EINPROGRESS) && force_nonblock) {
if (req_has_async_data(req))
return -EAGAIN;
if (io_alloc_async_data(req)) {
ret = -ENOMEM;
goto out;
}
memcpy(req->async_data, &__io, sizeof(__io));
return -EAGAIN;
}
if (ret == -ERESTARTSYS)
ret = -EINTR;
out:
if (ret < 0)
req_set_fail(req);
io_req_set_res(req, ret, 0);
return IOU_OK;
}
#endif
// SPDX-License-Identifier: GPL-2.0
#include <linux/net.h>
#include <linux/uio.h>
#if defined(CONFIG_NET)
struct io_async_msghdr {
struct iovec fast_iov[UIO_FASTIOV];
/* points to an allocated iov, if NULL we use fast_iov instead */
struct iovec *free_iov;
struct sockaddr __user *uaddr;
struct msghdr msg;
struct sockaddr_storage addr;
};
struct io_async_connect {
struct sockaddr_storage address;
};
int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
int io_shutdown(struct io_kiocb *req, unsigned int issue_flags);
int io_sendmsg_prep_async(struct io_kiocb *req);
void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req);
int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags);
int io_send(struct io_kiocb *req, unsigned int issue_flags);
int io_recvmsg_prep_async(struct io_kiocb *req);
int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags);
int io_recv(struct io_kiocb *req, unsigned int issue_flags);
int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
int io_accept(struct io_kiocb *req, unsigned int issue_flags);
int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
int io_socket(struct io_kiocb *req, unsigned int issue_flags);
int io_connect_prep_async(struct io_kiocb *req);
int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
int io_connect(struct io_kiocb *req, unsigned int issue_flags);
#endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment