Commit e1a8fde7 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-5.19/io_uring-net-2022-05-22' of git://git.kernel.dk/linux-block

Pull io_uring 'more data in socket' support from Jens Axboe:
 "To be able to fully utilize the 'poll first' support in the core
  io_uring branch, it's advantageous knowing if the socket was empty
  after a receive. This adds support for that"

* tag 'for-5.19/io_uring-net-2022-05-22' of git://git.kernel.dk/linux-block:
  io_uring: return hint on whether more data is available after receive
  tcp: pass back data left in socket after receive
parents 368da430 f548a12e
...@@ -6115,6 +6115,7 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) ...@@ -6115,6 +6115,7 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
struct io_async_msghdr iomsg, *kmsg; struct io_async_msghdr iomsg, *kmsg;
struct io_sr_msg *sr = &req->sr_msg; struct io_sr_msg *sr = &req->sr_msg;
struct socket *sock; struct socket *sock;
unsigned int cflags;
unsigned flags; unsigned flags;
int ret, min_ret = 0; int ret, min_ret = 0;
bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
...@@ -6154,6 +6155,7 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) ...@@ -6154,6 +6155,7 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
if (flags & MSG_WAITALL) if (flags & MSG_WAITALL)
min_ret = iov_iter_count(&kmsg->msg.msg_iter); min_ret = iov_iter_count(&kmsg->msg.msg_iter);
kmsg->msg.msg_get_inq = 1;
ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg, kmsg->uaddr, flags); ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg, kmsg->uaddr, flags);
if (ret < min_ret) { if (ret < min_ret) {
if (ret == -EAGAIN && force_nonblock) if (ret == -EAGAIN && force_nonblock)
...@@ -6178,7 +6180,10 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) ...@@ -6178,7 +6180,10 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
ret += sr->done_io; ret += sr->done_io;
else if (sr->done_io) else if (sr->done_io)
ret = sr->done_io; ret = sr->done_io;
__io_req_complete(req, issue_flags, ret, io_put_kbuf(req, issue_flags)); cflags = io_put_kbuf(req, issue_flags);
if (kmsg->msg.msg_inq)
cflags |= IORING_CQE_F_SOCK_NONEMPTY;
__io_req_complete(req, issue_flags, ret, cflags);
return 0; return 0;
} }
...@@ -6188,6 +6193,7 @@ static int io_recv(struct io_kiocb *req, unsigned int issue_flags) ...@@ -6188,6 +6193,7 @@ static int io_recv(struct io_kiocb *req, unsigned int issue_flags)
struct msghdr msg; struct msghdr msg;
struct socket *sock; struct socket *sock;
struct iovec iov; struct iovec iov;
unsigned int cflags;
unsigned flags; unsigned flags;
int ret, min_ret = 0; int ret, min_ret = 0;
bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
...@@ -6214,11 +6220,12 @@ static int io_recv(struct io_kiocb *req, unsigned int issue_flags) ...@@ -6214,11 +6220,12 @@ static int io_recv(struct io_kiocb *req, unsigned int issue_flags)
goto out_free; goto out_free;
msg.msg_name = NULL; msg.msg_name = NULL;
msg.msg_namelen = 0;
msg.msg_control = NULL; msg.msg_control = NULL;
msg.msg_get_inq = 1;
msg.msg_flags = 0;
msg.msg_controllen = 0; msg.msg_controllen = 0;
msg.msg_namelen = 0;
msg.msg_iocb = NULL; msg.msg_iocb = NULL;
msg.msg_flags = 0;
flags = sr->msg_flags; flags = sr->msg_flags;
if (force_nonblock) if (force_nonblock)
...@@ -6249,7 +6256,10 @@ static int io_recv(struct io_kiocb *req, unsigned int issue_flags) ...@@ -6249,7 +6256,10 @@ static int io_recv(struct io_kiocb *req, unsigned int issue_flags)
ret += sr->done_io; ret += sr->done_io;
else if (sr->done_io) else if (sr->done_io)
ret = sr->done_io; ret = sr->done_io;
__io_req_complete(req, issue_flags, ret, io_put_kbuf(req, issue_flags)); cflags = io_put_kbuf(req, issue_flags);
if (msg.msg_inq)
cflags |= IORING_CQE_F_SOCK_NONEMPTY;
__io_req_complete(req, issue_flags, ret, cflags);
return 0; return 0;
} }
......
...@@ -50,6 +50,9 @@ struct linger { ...@@ -50,6 +50,9 @@ struct linger {
struct msghdr { struct msghdr {
void *msg_name; /* ptr to socket address structure */ void *msg_name; /* ptr to socket address structure */
int msg_namelen; /* size of socket address structure */ int msg_namelen; /* size of socket address structure */
int msg_inq; /* output, data left in socket */
struct iov_iter msg_iter; /* data */ struct iov_iter msg_iter; /* data */
/* /*
...@@ -62,8 +65,9 @@ struct msghdr { ...@@ -62,8 +65,9 @@ struct msghdr {
void __user *msg_control_user; void __user *msg_control_user;
}; };
bool msg_control_is_user : 1; bool msg_control_is_user : 1;
__kernel_size_t msg_controllen; /* ancillary data buffer length */ bool msg_get_inq : 1;/* return INQ after receive */
unsigned int msg_flags; /* flags on received message */ unsigned int msg_flags; /* flags on received message */
__kernel_size_t msg_controllen; /* ancillary data buffer length */
struct kiocb *msg_iocb; /* ptr to iocb for async requests */ struct kiocb *msg_iocb; /* ptr to iocb for async requests */
}; };
......
...@@ -258,9 +258,11 @@ struct io_uring_cqe { ...@@ -258,9 +258,11 @@ struct io_uring_cqe {
* *
* IORING_CQE_F_BUFFER If set, the upper 16 bits are the buffer ID * IORING_CQE_F_BUFFER If set, the upper 16 bits are the buffer ID
* IORING_CQE_F_MORE If set, parent SQE will generate more CQE entries * IORING_CQE_F_MORE If set, parent SQE will generate more CQE entries
* IORING_CQE_F_SOCK_NONEMPTY If set, more data to read after socket recv
*/ */
#define IORING_CQE_F_BUFFER (1U << 0) #define IORING_CQE_F_BUFFER (1U << 0)
#define IORING_CQE_F_MORE (1U << 1) #define IORING_CQE_F_MORE (1U << 1)
#define IORING_CQE_F_SOCK_NONEMPTY (1U << 2)
enum { enum {
IORING_CQE_BUFFER_SHIFT = 16, IORING_CQE_BUFFER_SHIFT = 16,
......
...@@ -2335,8 +2335,10 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len, ...@@ -2335,8 +2335,10 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len,
if (sk->sk_state == TCP_LISTEN) if (sk->sk_state == TCP_LISTEN)
goto out; goto out;
if (tp->recvmsg_inq) if (tp->recvmsg_inq) {
*cmsg_flags = TCP_CMSG_INQ; *cmsg_flags = TCP_CMSG_INQ;
msg->msg_get_inq = 1;
}
timeo = sock_rcvtimeo(sk, nonblock); timeo = sock_rcvtimeo(sk, nonblock);
/* Urgent data needs to be handled specially. */ /* Urgent data needs to be handled specially. */
...@@ -2559,7 +2561,7 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len, ...@@ -2559,7 +2561,7 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len,
int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
int flags, int *addr_len) int flags, int *addr_len)
{ {
int cmsg_flags = 0, ret, inq; int cmsg_flags = 0, ret;
struct scm_timestamping_internal tss; struct scm_timestamping_internal tss;
if (unlikely(flags & MSG_ERRQUEUE)) if (unlikely(flags & MSG_ERRQUEUE))
...@@ -2576,12 +2578,14 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, ...@@ -2576,12 +2578,14 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
release_sock(sk); release_sock(sk);
sk_defer_free_flush(sk); sk_defer_free_flush(sk);
if (cmsg_flags && ret >= 0) { if ((cmsg_flags || msg->msg_get_inq) && ret >= 0) {
if (cmsg_flags & TCP_CMSG_TS) if (cmsg_flags & TCP_CMSG_TS)
tcp_recv_timestamp(msg, sk, &tss); tcp_recv_timestamp(msg, sk, &tss);
if (cmsg_flags & TCP_CMSG_INQ) { if (msg->msg_get_inq) {
inq = tcp_inq_hint(sk); msg->msg_inq = tcp_inq_hint(sk);
put_cmsg(msg, SOL_TCP, TCP_CM_INQ, sizeof(inq), &inq); if (cmsg_flags & TCP_CMSG_INQ)
put_cmsg(msg, SOL_TCP, TCP_CM_INQ,
sizeof(msg->msg_inq), &msg->msg_inq);
} }
} }
return ret; return ret;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment