Commit 1e6fa521 authored by Jens Axboe's avatar Jens Axboe

io_uring: COW io_identity on mismatch

If the io_identity doesn't completely match the task, then create a
copy of it and use that. The existing copy remains valid until the last
user of it has gone away.

This also changes the personality lookup to be indexed by io_identity,
instead of creds directly.
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 98447d65
...@@ -1040,6 +1040,27 @@ static inline void req_set_fail_links(struct io_kiocb *req) ...@@ -1040,6 +1040,27 @@ static inline void req_set_fail_links(struct io_kiocb *req)
req->flags |= REQ_F_FAIL_LINK; req->flags |= REQ_F_FAIL_LINK;
} }
/*
* None of these are dereferenced, they are simply used to check if any of
* them have changed. If we're under current and check they are still the
* same, we're fine to grab references to them for actual out-of-line use.
*/
static void io_init_identity(struct io_identity *id)
{
id->files = current->files;
id->mm = current->mm;
#ifdef CONFIG_BLK_CGROUP
rcu_read_lock();
id->blkcg_css = blkcg_css();
rcu_read_unlock();
#endif
id->creds = current_cred();
id->nsproxy = current->nsproxy;
id->fs = current->fs;
id->fsize = rlimit(RLIMIT_FSIZE);
refcount_set(&id->count, 1);
}
/* /*
* Note: must call io_req_init_async() for the first time you * Note: must call io_req_init_async() for the first time you
* touch any members of io_wq_work. * touch any members of io_wq_work.
...@@ -1051,6 +1072,7 @@ static inline void io_req_init_async(struct io_kiocb *req) ...@@ -1051,6 +1072,7 @@ static inline void io_req_init_async(struct io_kiocb *req)
memset(&req->work, 0, sizeof(req->work)); memset(&req->work, 0, sizeof(req->work));
req->flags |= REQ_F_WORK_INITIALIZED; req->flags |= REQ_F_WORK_INITIALIZED;
io_init_identity(&req->identity);
req->work.identity = &req->identity; req->work.identity = &req->identity;
} }
...@@ -1157,6 +1179,14 @@ static void __io_commit_cqring(struct io_ring_ctx *ctx) ...@@ -1157,6 +1179,14 @@ static void __io_commit_cqring(struct io_ring_ctx *ctx)
} }
} }
static void io_put_identity(struct io_kiocb *req)
{
if (req->work.identity == &req->identity)
return;
if (refcount_dec_and_test(&req->work.identity->count))
kfree(req->work.identity);
}
static void io_req_clean_work(struct io_kiocb *req) static void io_req_clean_work(struct io_kiocb *req)
{ {
if (!(req->flags & REQ_F_WORK_INITIALIZED)) if (!(req->flags & REQ_F_WORK_INITIALIZED))
...@@ -1189,28 +1219,67 @@ static void io_req_clean_work(struct io_kiocb *req) ...@@ -1189,28 +1219,67 @@ static void io_req_clean_work(struct io_kiocb *req)
free_fs_struct(fs); free_fs_struct(fs);
req->work.flags &= ~IO_WQ_WORK_FS; req->work.flags &= ~IO_WQ_WORK_FS;
} }
io_put_identity(req);
} }
static void io_prep_async_work(struct io_kiocb *req) /*
* Create a private copy of io_identity, since some fields don't match
* the current context.
*/
static bool io_identity_cow(struct io_kiocb *req)
{
const struct cred *creds = NULL;
struct io_identity *id;
if (req->work.flags & IO_WQ_WORK_CREDS)
creds = req->work.identity->creds;
id = kmemdup(req->work.identity, sizeof(*id), GFP_KERNEL);
if (unlikely(!id)) {
req->work.flags |= IO_WQ_WORK_CANCEL;
return false;
}
/*
* We can safely just re-init the creds we copied Either the field
* matches the current one, or we haven't grabbed it yet. The only
* exception is ->creds, through registered personalities, so handle
* that one separately.
*/
io_init_identity(id);
if (creds)
req->work.identity->creds = creds;
/* add one for this request */
refcount_inc(&id->count);
/* drop old identity, assign new one. one ref for req, one for tctx */
if (req->work.identity != &req->identity &&
refcount_sub_and_test(2, &req->work.identity->count))
kfree(req->work.identity);
req->work.identity = id;
return true;
}
static bool io_grab_identity(struct io_kiocb *req)
{ {
const struct io_op_def *def = &io_op_defs[req->opcode]; const struct io_op_def *def = &io_op_defs[req->opcode];
struct io_identity *id = &req->identity;
struct io_ring_ctx *ctx = req->ctx; struct io_ring_ctx *ctx = req->ctx;
io_req_init_async(req); if (def->needs_fsize && id->fsize != rlimit(RLIMIT_FSIZE))
return false;
if (req->flags & REQ_F_ISREG) {
if (def->hash_reg_file || (ctx->flags & IORING_SETUP_IOPOLL))
io_wq_hash_work(&req->work, file_inode(req->file));
} else {
if (def->unbound_nonreg_file)
req->work.flags |= IO_WQ_WORK_UNBOUND;
}
if (!(req->work.flags & IO_WQ_WORK_FILES) && if (!(req->work.flags & IO_WQ_WORK_FILES) &&
(io_op_defs[req->opcode].work_flags & IO_WQ_WORK_FILES) && (def->work_flags & IO_WQ_WORK_FILES) &&
!(req->flags & REQ_F_NO_FILE_TABLE)) { !(req->flags & REQ_F_NO_FILE_TABLE)) {
req->work.identity->files = get_files_struct(current); if (id->files != current->files ||
get_nsproxy(current->nsproxy); id->nsproxy != current->nsproxy)
req->work.identity->nsproxy = current->nsproxy; return false;
atomic_inc(&id->files->count);
get_nsproxy(id->nsproxy);
req->flags |= REQ_F_INFLIGHT; req->flags |= REQ_F_INFLIGHT;
spin_lock_irq(&ctx->inflight_lock); spin_lock_irq(&ctx->inflight_lock);
...@@ -1218,46 +1287,79 @@ static void io_prep_async_work(struct io_kiocb *req) ...@@ -1218,46 +1287,79 @@ static void io_prep_async_work(struct io_kiocb *req)
spin_unlock_irq(&ctx->inflight_lock); spin_unlock_irq(&ctx->inflight_lock);
req->work.flags |= IO_WQ_WORK_FILES; req->work.flags |= IO_WQ_WORK_FILES;
} }
if (!(req->work.flags & IO_WQ_WORK_MM) &&
(def->work_flags & IO_WQ_WORK_MM)) {
mmgrab(current->mm);
req->work.identity->mm = current->mm;
req->work.flags |= IO_WQ_WORK_MM;
}
#ifdef CONFIG_BLK_CGROUP #ifdef CONFIG_BLK_CGROUP
if (!(req->work.flags & IO_WQ_WORK_BLKCG) && if (!(req->work.flags & IO_WQ_WORK_BLKCG) &&
(def->work_flags & IO_WQ_WORK_BLKCG)) { (def->work_flags & IO_WQ_WORK_BLKCG)) {
rcu_read_lock(); rcu_read_lock();
req->work.identity->blkcg_css = blkcg_css(); if (id->blkcg_css != blkcg_css()) {
rcu_read_unlock();
return false;
}
/* /*
* This should be rare, either the cgroup is dying or the task * This should be rare, either the cgroup is dying or the task
* is moving cgroups. Just punt to root for the handful of ios. * is moving cgroups. Just punt to root for the handful of ios.
*/ */
if (css_tryget_online(req->work.identity->blkcg_css)) if (css_tryget_online(id->blkcg_css))
req->work.flags |= IO_WQ_WORK_BLKCG; req->work.flags |= IO_WQ_WORK_BLKCG;
rcu_read_unlock(); rcu_read_unlock();
} }
#endif #endif
if (!(req->work.flags & IO_WQ_WORK_CREDS)) { if (!(req->work.flags & IO_WQ_WORK_CREDS)) {
req->work.identity->creds = get_current_cred(); if (id->creds != current_cred())
return false;
get_cred(id->creds);
req->work.flags |= IO_WQ_WORK_CREDS; req->work.flags |= IO_WQ_WORK_CREDS;
} }
if (!(req->work.flags & IO_WQ_WORK_FS) && if (!(req->work.flags & IO_WQ_WORK_FS) &&
(def->work_flags & IO_WQ_WORK_FS)) { (def->work_flags & IO_WQ_WORK_FS)) {
spin_lock(&current->fs->lock); if (current->fs != id->fs)
if (!current->fs->in_exec) { return false;
req->work.identity->fs = current->fs; spin_lock(&id->fs->lock);
req->work.identity->fs->users++; if (!id->fs->in_exec) {
id->fs->users++;
req->work.flags |= IO_WQ_WORK_FS; req->work.flags |= IO_WQ_WORK_FS;
} else { } else {
req->work.flags |= IO_WQ_WORK_CANCEL; req->work.flags |= IO_WQ_WORK_CANCEL;
} }
spin_unlock(&current->fs->lock); spin_unlock(&current->fs->lock);
} }
if (def->needs_fsize)
req->work.identity->fsize = rlimit(RLIMIT_FSIZE); return true;
else }
req->work.identity->fsize = RLIM_INFINITY;
static void io_prep_async_work(struct io_kiocb *req)
{
const struct io_op_def *def = &io_op_defs[req->opcode];
struct io_identity *id = &req->identity;
struct io_ring_ctx *ctx = req->ctx;
io_req_init_async(req);
if (req->flags & REQ_F_ISREG) {
if (def->hash_reg_file || (ctx->flags & IORING_SETUP_IOPOLL))
io_wq_hash_work(&req->work, file_inode(req->file));
} else {
if (def->unbound_nonreg_file)
req->work.flags |= IO_WQ_WORK_UNBOUND;
}
/* ->mm can never change on us */
if (!(req->work.flags & IO_WQ_WORK_MM) &&
(def->work_flags & IO_WQ_WORK_MM)) {
mmgrab(id->mm);
req->work.flags |= IO_WQ_WORK_MM;
}
/* if we fail grabbing identity, we must COW, regrab, and retry */
if (io_grab_identity(req))
return;
if (!io_identity_cow(req))
return;
/* can't fail at this point */
if (!io_grab_identity(req))
WARN_ON(1);
} }
static void io_prep_async_link(struct io_kiocb *req) static void io_prep_async_link(struct io_kiocb *req)
...@@ -1696,12 +1798,10 @@ static void io_dismantle_req(struct io_kiocb *req) ...@@ -1696,12 +1798,10 @@ static void io_dismantle_req(struct io_kiocb *req)
static void __io_free_req(struct io_kiocb *req) static void __io_free_req(struct io_kiocb *req)
{ {
struct io_uring_task *tctx; struct io_uring_task *tctx = req->task->io_uring;
struct io_ring_ctx *ctx; struct io_ring_ctx *ctx = req->ctx;
io_dismantle_req(req); io_dismantle_req(req);
tctx = req->task->io_uring;
ctx = req->ctx;
atomic_long_inc(&tctx->req_complete); atomic_long_inc(&tctx->req_complete);
if (tctx->in_idle) if (tctx->in_idle)
...@@ -6374,11 +6474,16 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, ...@@ -6374,11 +6474,16 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
id = READ_ONCE(sqe->personality); id = READ_ONCE(sqe->personality);
if (id) { if (id) {
struct io_identity *iod;
io_req_init_async(req); io_req_init_async(req);
req->work.identity->creds = idr_find(&ctx->personality_idr, id); iod = idr_find(&ctx->personality_idr, id);
if (unlikely(!req->work.identity->creds)) if (unlikely(!iod))
return -EINVAL; return -EINVAL;
get_cred(req->work.identity->creds); refcount_inc(&iod->count);
io_put_identity(req);
get_cred(iod->creds);
req->work.identity = iod;
req->work.flags |= IO_WQ_WORK_CREDS; req->work.flags |= IO_WQ_WORK_CREDS;
} }
...@@ -8171,11 +8276,14 @@ static int io_uring_fasync(int fd, struct file *file, int on) ...@@ -8171,11 +8276,14 @@ static int io_uring_fasync(int fd, struct file *file, int on)
static int io_remove_personalities(int id, void *p, void *data) static int io_remove_personalities(int id, void *p, void *data)
{ {
struct io_ring_ctx *ctx = data; struct io_ring_ctx *ctx = data;
const struct cred *cred; struct io_identity *iod;
cred = idr_remove(&ctx->personality_idr, id); iod = idr_remove(&ctx->personality_idr, id);
if (cred) if (iod) {
put_cred(cred); put_cred(iod->creds);
if (refcount_dec_and_test(&iod->count))
kfree(iod);
}
return 0; return 0;
} }
...@@ -9245,23 +9353,33 @@ static int io_probe(struct io_ring_ctx *ctx, void __user *arg, unsigned nr_args) ...@@ -9245,23 +9353,33 @@ static int io_probe(struct io_ring_ctx *ctx, void __user *arg, unsigned nr_args)
static int io_register_personality(struct io_ring_ctx *ctx) static int io_register_personality(struct io_ring_ctx *ctx)
{ {
const struct cred *creds = get_current_cred(); struct io_identity *id;
int id; int ret;
id = idr_alloc_cyclic(&ctx->personality_idr, (void *) creds, 1, id = kmalloc(sizeof(*id), GFP_KERNEL);
USHRT_MAX, GFP_KERNEL); if (unlikely(!id))
if (id < 0) return -ENOMEM;
put_cred(creds);
return id; io_init_identity(id);
id->creds = get_current_cred();
ret = idr_alloc_cyclic(&ctx->personality_idr, id, 1, USHRT_MAX, GFP_KERNEL);
if (ret < 0) {
put_cred(id->creds);
kfree(id);
}
return ret;
} }
static int io_unregister_personality(struct io_ring_ctx *ctx, unsigned id) static int io_unregister_personality(struct io_ring_ctx *ctx, unsigned id)
{ {
const struct cred *old_creds; struct io_identity *iod;
old_creds = idr_remove(&ctx->personality_idr, id); iod = idr_remove(&ctx->personality_idr, id);
if (old_creds) { if (iod) {
put_cred(old_creds); put_cred(iod->creds);
if (refcount_dec_and_test(&iod->count))
kfree(iod);
return 0; return 0;
} }
......
...@@ -15,6 +15,7 @@ struct io_identity { ...@@ -15,6 +15,7 @@ struct io_identity {
struct nsproxy *nsproxy; struct nsproxy *nsproxy;
struct fs_struct *fs; struct fs_struct *fs;
unsigned long fsize; unsigned long fsize;
refcount_t count;
}; };
struct io_uring_task { struct io_uring_task {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment