Commit 2d091d62 authored by Pavel Begunkov's avatar Pavel Begunkov Committed by Jens Axboe

io_uring: don't vmalloc rsrc tags

We don't really need vmalloc for keeping tags, it's not a hot path and
is there out of convenience, so replace it with two level tables to not
litter kernel virtual memory mappings.
Signed-off-by: default avatarPavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/241a3422747113a8909e7e1030eb585d4a349e0d.1623634181.git.asml.silence@gmail.comSigned-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 9123c8ff
...@@ -100,6 +100,10 @@ ...@@ -100,6 +100,10 @@
#define IORING_MAX_RESTRICTIONS (IORING_RESTRICTION_LAST + \ #define IORING_MAX_RESTRICTIONS (IORING_RESTRICTION_LAST + \
IORING_REGISTER_LAST + IORING_OP_LAST) IORING_REGISTER_LAST + IORING_OP_LAST)
#define IO_RSRC_TAG_TABLE_SHIFT 9
#define IO_RSRC_TAG_TABLE_MAX (1U << IO_RSRC_TAG_TABLE_SHIFT)
#define IO_RSRC_TAG_TABLE_MASK (IO_RSRC_TAG_TABLE_MAX - 1)
#define IORING_MAX_REG_BUFFERS (1U << 14) #define IORING_MAX_REG_BUFFERS (1U << 14)
#define SQE_VALID_FLAGS (IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK| \ #define SQE_VALID_FLAGS (IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK| \
...@@ -243,7 +247,8 @@ typedef void (rsrc_put_fn)(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc); ...@@ -243,7 +247,8 @@ typedef void (rsrc_put_fn)(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc);
struct io_rsrc_data { struct io_rsrc_data {
struct io_ring_ctx *ctx; struct io_ring_ctx *ctx;
u64 *tags; u64 **tags;
unsigned int nr;
rsrc_put_fn *do_put; rsrc_put_fn *do_put;
atomic_t refs; atomic_t refs;
struct completion done; struct completion done;
...@@ -7177,9 +7182,20 @@ static int io_rsrc_ref_quiesce(struct io_rsrc_data *data, struct io_ring_ctx *ct ...@@ -7177,9 +7182,20 @@ static int io_rsrc_ref_quiesce(struct io_rsrc_data *data, struct io_ring_ctx *ct
return ret; return ret;
} }
static u64 *io_get_tag_slot(struct io_rsrc_data *data, unsigned int idx)
{
unsigned int off = idx & IO_RSRC_TAG_TABLE_MASK;
unsigned int table_idx = idx >> IO_RSRC_TAG_TABLE_SHIFT;
return &data->tags[table_idx][off];
}
static void io_rsrc_data_free(struct io_rsrc_data *data) static void io_rsrc_data_free(struct io_rsrc_data *data)
{ {
kvfree(data->tags); size_t size = data->nr * sizeof(data->tags[0][0]);
if (data->tags)
io_free_page_table((void **)data->tags, size);
kfree(data); kfree(data);
} }
...@@ -7188,33 +7204,37 @@ static int io_rsrc_data_alloc(struct io_ring_ctx *ctx, rsrc_put_fn *do_put, ...@@ -7188,33 +7204,37 @@ static int io_rsrc_data_alloc(struct io_ring_ctx *ctx, rsrc_put_fn *do_put,
struct io_rsrc_data **pdata) struct io_rsrc_data **pdata)
{ {
struct io_rsrc_data *data; struct io_rsrc_data *data;
int ret = -ENOMEM;
unsigned i; unsigned i;
data = kzalloc(sizeof(*data), GFP_KERNEL); data = kzalloc(sizeof(*data), GFP_KERNEL);
if (!data) if (!data)
return -ENOMEM; return -ENOMEM;
data->tags = (u64 **)io_alloc_page_table(nr * sizeof(data->tags[0][0]));
data->tags = kvcalloc(nr, sizeof(*data->tags), GFP_KERNEL);
if (!data->tags) { if (!data->tags) {
kfree(data); kfree(data);
return -ENOMEM; return -ENOMEM;
} }
data->nr = nr;
data->ctx = ctx;
data->do_put = do_put;
if (utags) { if (utags) {
ret = -EFAULT;
for (i = 0; i < nr; i++) { for (i = 0; i < nr; i++) {
if (copy_from_user(&data->tags[i], &utags[i], if (copy_from_user(io_get_tag_slot(data, i), &utags[i],
sizeof(data->tags[i]))) { sizeof(data->tags[i])))
io_rsrc_data_free(data); goto fail;
return -EFAULT;
}
} }
} }
atomic_set(&data->refs, 1); atomic_set(&data->refs, 1);
data->ctx = ctx;
data->do_put = do_put;
init_completion(&data->done); init_completion(&data->done);
*pdata = data; *pdata = data;
return 0; return 0;
fail:
io_rsrc_data_free(data);
return ret;
} }
static bool io_alloc_file_tables(struct io_file_table *table, unsigned nr_files) static bool io_alloc_file_tables(struct io_file_table *table, unsigned nr_files)
...@@ -7683,7 +7703,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, ...@@ -7683,7 +7703,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
/* allow sparse sets */ /* allow sparse sets */
if (fd == -1) { if (fd == -1) {
ret = -EINVAL; ret = -EINVAL;
if (unlikely(ctx->file_data->tags[i])) if (unlikely(*io_get_tag_slot(ctx->file_data, i)))
goto out_fput; goto out_fput;
continue; continue;
} }
...@@ -7781,7 +7801,7 @@ static int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx, ...@@ -7781,7 +7801,7 @@ static int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx,
if (!prsrc) if (!prsrc)
return -ENOMEM; return -ENOMEM;
prsrc->tag = data->tags[idx]; prsrc->tag = *io_get_tag_slot(data, idx);
prsrc->rsrc = rsrc; prsrc->rsrc = rsrc;
list_add(&prsrc->list, &node->rsrc_list); list_add(&prsrc->list, &node->rsrc_list);
return 0; return 0;
...@@ -7851,7 +7871,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx, ...@@ -7851,7 +7871,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
err = -EBADF; err = -EBADF;
break; break;
} }
data->tags[up->offset + done] = tag; *io_get_tag_slot(data, up->offset + done) = tag;
io_fixed_file_set(file_slot, file); io_fixed_file_set(file_slot, file);
err = io_sqe_file_register(ctx, file, i); err = io_sqe_file_register(ctx, file, i);
if (err) { if (err) {
...@@ -8437,7 +8457,7 @@ static int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg, ...@@ -8437,7 +8457,7 @@ static int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
ret = io_buffer_validate(&iov); ret = io_buffer_validate(&iov);
if (ret) if (ret)
break; break;
if (!iov.iov_base && data->tags[i]) { if (!iov.iov_base && *io_get_tag_slot(data, i)) {
ret = -EINVAL; ret = -EINVAL;
break; break;
} }
...@@ -8510,7 +8530,7 @@ static int __io_sqe_buffers_update(struct io_ring_ctx *ctx, ...@@ -8510,7 +8530,7 @@ static int __io_sqe_buffers_update(struct io_ring_ctx *ctx,
} }
ctx->user_bufs[i] = imu; ctx->user_bufs[i] = imu;
ctx->buf_data->tags[offset] = tag; *io_get_tag_slot(ctx->buf_data, offset) = tag;
} }
if (needs_switch) if (needs_switch)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment