Commit 004d564f authored by Jens Axboe's avatar Jens Axboe

tools/io_uring: sync with liburing

Various fixes and changes have been applied to liburing since we
copied some select bits to the kernel testing/examples part, sync
up with liburing to get those changes.

Most notable is the change that split the CQE reading into the peek
and seen event, instead of being just a single function. Also fixes
an unsigned wrap issue in io_uring_submit(), leak of 'fd' in setup
if we fail, and various other little issues.
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 486f0692
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
#include <assert.h> #include <assert.h>
#include <errno.h> #include <errno.h>
#include <inttypes.h> #include <inttypes.h>
#include <sys/types.h>
#include <sys/stat.h> #include <sys/stat.h>
#include <sys/ioctl.h> #include <sys/ioctl.h>
...@@ -85,11 +86,16 @@ static int queue_read(struct io_uring *ring, off_t size, off_t offset) ...@@ -85,11 +86,16 @@ static int queue_read(struct io_uring *ring, off_t size, off_t offset)
struct io_uring_sqe *sqe; struct io_uring_sqe *sqe;
struct io_data *data; struct io_data *data;
data = malloc(size + sizeof(*data));
if (!data)
return 1;
sqe = io_uring_get_sqe(ring); sqe = io_uring_get_sqe(ring);
if (!sqe) if (!sqe) {
free(data);
return 1; return 1;
}
data = malloc(size + sizeof(*data));
data->read = 1; data->read = 1;
data->offset = data->first_offset = offset; data->offset = data->first_offset = offset;
...@@ -166,22 +172,23 @@ static int copy_file(struct io_uring *ring, off_t insize) ...@@ -166,22 +172,23 @@ static int copy_file(struct io_uring *ring, off_t insize)
struct io_data *data; struct io_data *data;
if (!got_comp) { if (!got_comp) {
ret = io_uring_wait_completion(ring, &cqe); ret = io_uring_wait_cqe(ring, &cqe);
got_comp = 1; got_comp = 1;
} else } else
ret = io_uring_get_completion(ring, &cqe); ret = io_uring_peek_cqe(ring, &cqe);
if (ret < 0) { if (ret < 0) {
fprintf(stderr, "io_uring_get_completion: %s\n", fprintf(stderr, "io_uring_peek_cqe: %s\n",
strerror(-ret)); strerror(-ret));
return 1; return 1;
} }
if (!cqe) if (!cqe)
break; break;
data = (struct io_data *) (uintptr_t) cqe->user_data; data = io_uring_cqe_get_data(cqe);
if (cqe->res < 0) { if (cqe->res < 0) {
if (cqe->res == -EAGAIN) { if (cqe->res == -EAGAIN) {
queue_prepped(ring, data); queue_prepped(ring, data);
io_uring_cqe_seen(ring, cqe);
continue; continue;
} }
fprintf(stderr, "cqe failed: %s\n", fprintf(stderr, "cqe failed: %s\n",
...@@ -193,6 +200,7 @@ static int copy_file(struct io_uring *ring, off_t insize) ...@@ -193,6 +200,7 @@ static int copy_file(struct io_uring *ring, off_t insize)
data->iov.iov_len -= cqe->res; data->iov.iov_len -= cqe->res;
data->offset += cqe->res; data->offset += cqe->res;
queue_prepped(ring, data); queue_prepped(ring, data);
io_uring_cqe_seen(ring, cqe);
continue; continue;
} }
...@@ -209,6 +217,7 @@ static int copy_file(struct io_uring *ring, off_t insize) ...@@ -209,6 +217,7 @@ static int copy_file(struct io_uring *ring, off_t insize)
free(data); free(data);
writes--; writes--;
} }
io_uring_cqe_seen(ring, cqe);
} }
} }
......
#ifndef LIB_URING_H #ifndef LIB_URING_H
#define LIB_URING_H #define LIB_URING_H
#ifdef __cplusplus
extern "C" {
#endif
#include <sys/uio.h> #include <sys/uio.h>
#include <signal.h> #include <signal.h>
#include <string.h> #include <string.h>
#include "../../include/uapi/linux/io_uring.h" #include "../../include/uapi/linux/io_uring.h"
#include <inttypes.h>
#include "barrier.h"
/* /*
* Library interface to io_uring * Library interface to io_uring
...@@ -46,7 +52,7 @@ struct io_uring { ...@@ -46,7 +52,7 @@ struct io_uring {
* System calls * System calls
*/ */
extern int io_uring_setup(unsigned entries, struct io_uring_params *p); extern int io_uring_setup(unsigned entries, struct io_uring_params *p);
extern int io_uring_enter(unsigned fd, unsigned to_submit, extern int io_uring_enter(int fd, unsigned to_submit,
unsigned min_complete, unsigned flags, sigset_t *sig); unsigned min_complete, unsigned flags, sigset_t *sig);
extern int io_uring_register(int fd, unsigned int opcode, void *arg, extern int io_uring_register(int fd, unsigned int opcode, void *arg,
unsigned int nr_args); unsigned int nr_args);
...@@ -59,13 +65,32 @@ extern int io_uring_queue_init(unsigned entries, struct io_uring *ring, ...@@ -59,13 +65,32 @@ extern int io_uring_queue_init(unsigned entries, struct io_uring *ring,
extern int io_uring_queue_mmap(int fd, struct io_uring_params *p, extern int io_uring_queue_mmap(int fd, struct io_uring_params *p,
struct io_uring *ring); struct io_uring *ring);
extern void io_uring_queue_exit(struct io_uring *ring); extern void io_uring_queue_exit(struct io_uring *ring);
extern int io_uring_get_completion(struct io_uring *ring, extern int io_uring_peek_cqe(struct io_uring *ring,
struct io_uring_cqe **cqe_ptr); struct io_uring_cqe **cqe_ptr);
extern int io_uring_wait_completion(struct io_uring *ring, extern int io_uring_wait_cqe(struct io_uring *ring,
struct io_uring_cqe **cqe_ptr); struct io_uring_cqe **cqe_ptr);
extern int io_uring_submit(struct io_uring *ring); extern int io_uring_submit(struct io_uring *ring);
extern struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring); extern struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring);
/*
* Must be called after io_uring_{peek,wait}_cqe() after the cqe has
* been processed by the application.
*/
static inline void io_uring_cqe_seen(struct io_uring *ring,
struct io_uring_cqe *cqe)
{
if (cqe) {
struct io_uring_cq *cq = &ring->cq;
(*cq->khead)++;
/*
* Ensure that the kernel sees our new head, the kernel has
* the matching read barrier.
*/
write_barrier();
}
}
/* /*
* Command prep helpers * Command prep helpers
*/ */
...@@ -74,8 +99,14 @@ static inline void io_uring_sqe_set_data(struct io_uring_sqe *sqe, void *data) ...@@ -74,8 +99,14 @@ static inline void io_uring_sqe_set_data(struct io_uring_sqe *sqe, void *data)
sqe->user_data = (unsigned long) data; sqe->user_data = (unsigned long) data;
} }
static inline void *io_uring_cqe_get_data(struct io_uring_cqe *cqe)
{
return (void *) (uintptr_t) cqe->user_data;
}
static inline void io_uring_prep_rw(int op, struct io_uring_sqe *sqe, int fd, static inline void io_uring_prep_rw(int op, struct io_uring_sqe *sqe, int fd,
void *addr, unsigned len, off_t offset) const void *addr, unsigned len,
off_t offset)
{ {
memset(sqe, 0, sizeof(*sqe)); memset(sqe, 0, sizeof(*sqe));
sqe->opcode = op; sqe->opcode = op;
...@@ -86,8 +117,8 @@ static inline void io_uring_prep_rw(int op, struct io_uring_sqe *sqe, int fd, ...@@ -86,8 +117,8 @@ static inline void io_uring_prep_rw(int op, struct io_uring_sqe *sqe, int fd,
} }
static inline void io_uring_prep_readv(struct io_uring_sqe *sqe, int fd, static inline void io_uring_prep_readv(struct io_uring_sqe *sqe, int fd,
struct iovec *iovecs, unsigned nr_vecs, const struct iovec *iovecs,
off_t offset) unsigned nr_vecs, off_t offset)
{ {
io_uring_prep_rw(IORING_OP_READV, sqe, fd, iovecs, nr_vecs, offset); io_uring_prep_rw(IORING_OP_READV, sqe, fd, iovecs, nr_vecs, offset);
} }
...@@ -100,14 +131,14 @@ static inline void io_uring_prep_read_fixed(struct io_uring_sqe *sqe, int fd, ...@@ -100,14 +131,14 @@ static inline void io_uring_prep_read_fixed(struct io_uring_sqe *sqe, int fd,
} }
static inline void io_uring_prep_writev(struct io_uring_sqe *sqe, int fd, static inline void io_uring_prep_writev(struct io_uring_sqe *sqe, int fd,
struct iovec *iovecs, unsigned nr_vecs, const struct iovec *iovecs,
off_t offset) unsigned nr_vecs, off_t offset)
{ {
io_uring_prep_rw(IORING_OP_WRITEV, sqe, fd, iovecs, nr_vecs, offset); io_uring_prep_rw(IORING_OP_WRITEV, sqe, fd, iovecs, nr_vecs, offset);
} }
static inline void io_uring_prep_write_fixed(struct io_uring_sqe *sqe, int fd, static inline void io_uring_prep_write_fixed(struct io_uring_sqe *sqe, int fd,
void *buf, unsigned nbytes, const void *buf, unsigned nbytes,
off_t offset) off_t offset)
{ {
io_uring_prep_rw(IORING_OP_WRITE_FIXED, sqe, fd, buf, nbytes, offset); io_uring_prep_rw(IORING_OP_WRITE_FIXED, sqe, fd, buf, nbytes, offset);
...@@ -131,13 +162,22 @@ static inline void io_uring_prep_poll_remove(struct io_uring_sqe *sqe, ...@@ -131,13 +162,22 @@ static inline void io_uring_prep_poll_remove(struct io_uring_sqe *sqe,
} }
static inline void io_uring_prep_fsync(struct io_uring_sqe *sqe, int fd, static inline void io_uring_prep_fsync(struct io_uring_sqe *sqe, int fd,
int datasync) unsigned fsync_flags)
{ {
memset(sqe, 0, sizeof(*sqe)); memset(sqe, 0, sizeof(*sqe));
sqe->opcode = IORING_OP_FSYNC; sqe->opcode = IORING_OP_FSYNC;
sqe->fd = fd; sqe->fd = fd;
if (datasync) sqe->fsync_flags = fsync_flags;
sqe->fsync_flags = IORING_FSYNC_DATASYNC; }
static inline void io_uring_prep_nop(struct io_uring_sqe *sqe)
{
memset(sqe, 0, sizeof(*sqe));
sqe->opcode = IORING_OP_NOP;
}
#ifdef __cplusplus
} }
#endif
#endif #endif
...@@ -8,8 +8,8 @@ ...@@ -8,8 +8,8 @@
#include "liburing.h" #include "liburing.h"
#include "barrier.h" #include "barrier.h"
static int __io_uring_get_completion(struct io_uring *ring, static int __io_uring_get_cqe(struct io_uring *ring,
struct io_uring_cqe **cqe_ptr, int wait) struct io_uring_cqe **cqe_ptr, int wait)
{ {
struct io_uring_cq *cq = &ring->cq; struct io_uring_cq *cq = &ring->cq;
const unsigned mask = *cq->kring_mask; const unsigned mask = *cq->kring_mask;
...@@ -39,34 +39,25 @@ static int __io_uring_get_completion(struct io_uring *ring, ...@@ -39,34 +39,25 @@ static int __io_uring_get_completion(struct io_uring *ring,
return -errno; return -errno;
} while (1); } while (1);
if (*cqe_ptr) {
*cq->khead = head + 1;
/*
* Ensure that the kernel sees our new head, the kernel has
* the matching read barrier.
*/
write_barrier();
}
return 0; return 0;
} }
/* /*
* Return an IO completion, if one is readily available * Return an IO completion, if one is readily available. Returns 0 with
* cqe_ptr filled in on success, -errno on failure.
*/ */
int io_uring_get_completion(struct io_uring *ring, int io_uring_peek_cqe(struct io_uring *ring, struct io_uring_cqe **cqe_ptr)
struct io_uring_cqe **cqe_ptr)
{ {
return __io_uring_get_completion(ring, cqe_ptr, 0); return __io_uring_get_cqe(ring, cqe_ptr, 0);
} }
/* /*
* Return an IO completion, waiting for it if necessary * Return an IO completion, waiting for it if necessary. Returns 0 with
* cqe_ptr filled in on success, -errno on failure.
*/ */
int io_uring_wait_completion(struct io_uring *ring, int io_uring_wait_cqe(struct io_uring *ring, struct io_uring_cqe **cqe_ptr)
struct io_uring_cqe **cqe_ptr)
{ {
return __io_uring_get_completion(ring, cqe_ptr, 1); return __io_uring_get_cqe(ring, cqe_ptr, 1);
} }
/* /*
...@@ -78,7 +69,7 @@ int io_uring_submit(struct io_uring *ring) ...@@ -78,7 +69,7 @@ int io_uring_submit(struct io_uring *ring)
{ {
struct io_uring_sq *sq = &ring->sq; struct io_uring_sq *sq = &ring->sq;
const unsigned mask = *sq->kring_mask; const unsigned mask = *sq->kring_mask;
unsigned ktail, ktail_next, submitted; unsigned ktail, ktail_next, submitted, to_submit;
int ret; int ret;
/* /*
...@@ -100,7 +91,8 @@ int io_uring_submit(struct io_uring *ring) ...@@ -100,7 +91,8 @@ int io_uring_submit(struct io_uring *ring)
*/ */
submitted = 0; submitted = 0;
ktail = ktail_next = *sq->ktail; ktail = ktail_next = *sq->ktail;
while (sq->sqe_head < sq->sqe_tail) { to_submit = sq->sqe_tail - sq->sqe_head;
while (to_submit--) {
ktail_next++; ktail_next++;
read_barrier(); read_barrier();
...@@ -136,7 +128,7 @@ int io_uring_submit(struct io_uring *ring) ...@@ -136,7 +128,7 @@ int io_uring_submit(struct io_uring *ring)
if (ret < 0) if (ret < 0)
return -errno; return -errno;
return 0; return ret;
} }
/* /*
......
...@@ -27,7 +27,7 @@ static int io_uring_mmap(int fd, struct io_uring_params *p, ...@@ -27,7 +27,7 @@ static int io_uring_mmap(int fd, struct io_uring_params *p,
sq->kdropped = ptr + p->sq_off.dropped; sq->kdropped = ptr + p->sq_off.dropped;
sq->array = ptr + p->sq_off.array; sq->array = ptr + p->sq_off.array;
size = p->sq_entries * sizeof(struct io_uring_sqe), size = p->sq_entries * sizeof(struct io_uring_sqe);
sq->sqes = mmap(0, size, PROT_READ | PROT_WRITE, sq->sqes = mmap(0, size, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_POPULATE, fd, MAP_SHARED | MAP_POPULATE, fd,
IORING_OFF_SQES); IORING_OFF_SQES);
...@@ -79,7 +79,7 @@ int io_uring_queue_mmap(int fd, struct io_uring_params *p, struct io_uring *ring ...@@ -79,7 +79,7 @@ int io_uring_queue_mmap(int fd, struct io_uring_params *p, struct io_uring *ring
int io_uring_queue_init(unsigned entries, struct io_uring *ring, unsigned flags) int io_uring_queue_init(unsigned entries, struct io_uring *ring, unsigned flags)
{ {
struct io_uring_params p; struct io_uring_params p;
int fd; int fd, ret;
memset(&p, 0, sizeof(p)); memset(&p, 0, sizeof(p));
p.flags = flags; p.flags = flags;
...@@ -88,7 +88,11 @@ int io_uring_queue_init(unsigned entries, struct io_uring *ring, unsigned flags) ...@@ -88,7 +88,11 @@ int io_uring_queue_init(unsigned entries, struct io_uring *ring, unsigned flags)
if (fd < 0) if (fd < 0)
return fd; return fd;
return io_uring_queue_mmap(fd, &p, ring); ret = io_uring_queue_mmap(fd, &p, ring);
if (ret)
close(fd);
return ret;
} }
void io_uring_queue_exit(struct io_uring *ring) void io_uring_queue_exit(struct io_uring *ring)
......
...@@ -7,34 +7,46 @@ ...@@ -7,34 +7,46 @@
#include <signal.h> #include <signal.h>
#include "liburing.h" #include "liburing.h"
#if defined(__x86_64) || defined(__i386__) #ifdef __alpha__
#ifndef __NR_sys_io_uring_setup /*
#define __NR_sys_io_uring_setup 425 * alpha is the only exception, all other architectures
#endif * have common numbers for new system calls.
#ifndef __NR_sys_io_uring_enter */
#define __NR_sys_io_uring_enter 426 # ifndef __NR_io_uring_setup
#endif # define __NR_io_uring_setup 535
#ifndef __NR_sys_io_uring_register # endif
#define __NR_sys_io_uring_register 427 # ifndef __NR_io_uring_enter
#endif # define __NR_io_uring_enter 536
#else # endif
#error "Arch not supported yet" # ifndef __NR_io_uring_register
# define __NR_io_uring_register 537
# endif
#else /* !__alpha__ */
# ifndef __NR_io_uring_setup
# define __NR_io_uring_setup 425
# endif
# ifndef __NR_io_uring_enter
# define __NR_io_uring_enter 426
# endif
# ifndef __NR_io_uring_register
# define __NR_io_uring_register 427
# endif
#endif #endif
int io_uring_register(int fd, unsigned int opcode, void *arg, int io_uring_register(int fd, unsigned int opcode, void *arg,
unsigned int nr_args) unsigned int nr_args)
{ {
return syscall(__NR_sys_io_uring_register, fd, opcode, arg, nr_args); return syscall(__NR_io_uring_register, fd, opcode, arg, nr_args);
} }
int io_uring_setup(unsigned entries, struct io_uring_params *p) int io_uring_setup(unsigned int entries, struct io_uring_params *p)
{ {
return syscall(__NR_sys_io_uring_setup, entries, p); return syscall(__NR_io_uring_setup, entries, p);
} }
int io_uring_enter(unsigned fd, unsigned to_submit, unsigned min_complete, int io_uring_enter(int fd, unsigned int to_submit, unsigned int min_complete,
unsigned flags, sigset_t *sig) unsigned int flags, sigset_t *sig)
{ {
return syscall(__NR_sys_io_uring_enter, fd, to_submit, min_complete, return syscall(__NR_io_uring_enter, fd, to_submit, min_complete,
flags, sig, _NSIG / 8); flags, sig, _NSIG / 8);
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment