Commit 6faddda6 authored by Chuck Lever's avatar Chuck Lever Committed by Christian Brauner

libfs: Add directory operations for stable offsets

Create a vector of directory operations in fs/libfs.c that handles
directory seeks and readdir via stable offsets instead of the
current cursor-based mechanism.

For the moment these are unused.
Signed-off-by: default avatarChuck Lever <chuck.lever@oracle.com>
Message-Id: <168814732984.530310.11190772066786107220.stgit@manet.1015granger.net>
Signed-off-by: default avatarChristian Brauner <brauner@kernel.org>
parent 509f0069
...@@ -85,13 +85,14 @@ prototypes:: ...@@ -85,13 +85,14 @@ prototypes::
struct dentry *dentry, struct fileattr *fa); struct dentry *dentry, struct fileattr *fa);
int (*fileattr_get)(struct dentry *dentry, struct fileattr *fa); int (*fileattr_get)(struct dentry *dentry, struct fileattr *fa);
struct posix_acl * (*get_acl)(struct mnt_idmap *, struct dentry *, int); struct posix_acl * (*get_acl)(struct mnt_idmap *, struct dentry *, int);
struct offset_ctx *(*get_offset_ctx)(struct inode *inode);
locking rules: locking rules:
all may block all may block
============== ============================================= ============== ==================================================
ops i_rwsem(inode) ops i_rwsem(inode)
============== ============================================= ============== ==================================================
lookup: shared lookup: shared
create: exclusive create: exclusive
link: exclusive (both) link: exclusive (both)
...@@ -115,7 +116,8 @@ atomic_open: shared (exclusive if O_CREAT is set in open flags) ...@@ -115,7 +116,8 @@ atomic_open: shared (exclusive if O_CREAT is set in open flags)
tmpfile: no tmpfile: no
fileattr_get: no or exclusive fileattr_get: no or exclusive
fileattr_set: exclusive fileattr_set: exclusive
============== ============================================= get_offset_ctx no
============== ==================================================
Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_rwsem Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_rwsem
......
...@@ -515,6 +515,7 @@ As of kernel 2.6.22, the following members are defined: ...@@ -515,6 +515,7 @@ As of kernel 2.6.22, the following members are defined:
int (*fileattr_set)(struct mnt_idmap *idmap, int (*fileattr_set)(struct mnt_idmap *idmap,
struct dentry *dentry, struct fileattr *fa); struct dentry *dentry, struct fileattr *fa);
int (*fileattr_get)(struct dentry *dentry, struct fileattr *fa); int (*fileattr_get)(struct dentry *dentry, struct fileattr *fa);
struct offset_ctx *(*get_offset_ctx)(struct inode *inode);
}; };
Again, all methods are called without any locks being held, unless Again, all methods are called without any locks being held, unless
...@@ -675,7 +676,10 @@ otherwise noted. ...@@ -675,7 +676,10 @@ otherwise noted.
called on ioctl(FS_IOC_SETFLAGS) and ioctl(FS_IOC_FSSETXATTR) to called on ioctl(FS_IOC_SETFLAGS) and ioctl(FS_IOC_FSSETXATTR) to
change miscellaneous file flags and attributes. Callers hold change miscellaneous file flags and attributes. Callers hold
i_rwsem exclusive. If unset, then fall back to f_op->ioctl(). i_rwsem exclusive. If unset, then fall back to f_op->ioctl().
``get_offset_ctx``
called to get the offset context for a directory inode. A
filesystem must define this operation to use
simple_offset_dir_operations.
The Address Space Object The Address Space Object
======================== ========================
......
...@@ -239,6 +239,254 @@ const struct inode_operations simple_dir_inode_operations = { ...@@ -239,6 +239,254 @@ const struct inode_operations simple_dir_inode_operations = {
}; };
EXPORT_SYMBOL(simple_dir_inode_operations); EXPORT_SYMBOL(simple_dir_inode_operations);
static void offset_set(struct dentry *dentry, u32 offset)
{
dentry->d_fsdata = (void *)((uintptr_t)(offset));
}
static u32 dentry2offset(struct dentry *dentry)
{
return (u32)((uintptr_t)(dentry->d_fsdata));
}
/**
* simple_offset_init - initialize an offset_ctx
* @octx: directory offset map to be initialized
*
*/
void simple_offset_init(struct offset_ctx *octx)
{
xa_init_flags(&octx->xa, XA_FLAGS_ALLOC1);
/* 0 is '.', 1 is '..', so always start with offset 2 */
octx->next_offset = 2;
}
/**
* simple_offset_add - Add an entry to a directory's offset map
* @octx: directory offset ctx to be updated
* @dentry: new dentry being added
*
* Returns zero on success. @so_ctx and the dentry offset are updated.
* Otherwise, a negative errno value is returned.
*/
int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry)
{
static const struct xa_limit limit = XA_LIMIT(2, U32_MAX);
u32 offset;
int ret;
if (dentry2offset(dentry) != 0)
return -EBUSY;
ret = xa_alloc_cyclic(&octx->xa, &offset, dentry, limit,
&octx->next_offset, GFP_KERNEL);
if (ret < 0)
return ret;
offset_set(dentry, offset);
return 0;
}
/**
* simple_offset_remove - Remove an entry to a directory's offset map
* @octx: directory offset ctx to be updated
* @dentry: dentry being removed
*
*/
void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry)
{
u32 offset;
offset = dentry2offset(dentry);
if (offset == 0)
return;
xa_erase(&octx->xa, offset);
offset_set(dentry, 0);
}
/**
* simple_offset_rename_exchange - exchange rename with directory offsets
* @old_dir: parent of dentry being moved
* @old_dentry: dentry being moved
* @new_dir: destination parent
* @new_dentry: destination dentry
*
* Returns zero on success. Otherwise a negative errno is returned and the
* rename is rolled back.
*/
int simple_offset_rename_exchange(struct inode *old_dir,
struct dentry *old_dentry,
struct inode *new_dir,
struct dentry *new_dentry)
{
struct offset_ctx *old_ctx = old_dir->i_op->get_offset_ctx(old_dir);
struct offset_ctx *new_ctx = new_dir->i_op->get_offset_ctx(new_dir);
u32 old_index = dentry2offset(old_dentry);
u32 new_index = dentry2offset(new_dentry);
int ret;
simple_offset_remove(old_ctx, old_dentry);
simple_offset_remove(new_ctx, new_dentry);
ret = simple_offset_add(new_ctx, old_dentry);
if (ret)
goto out_restore;
ret = simple_offset_add(old_ctx, new_dentry);
if (ret) {
simple_offset_remove(new_ctx, old_dentry);
goto out_restore;
}
ret = simple_rename_exchange(old_dir, old_dentry, new_dir, new_dentry);
if (ret) {
simple_offset_remove(new_ctx, old_dentry);
simple_offset_remove(old_ctx, new_dentry);
goto out_restore;
}
return 0;
out_restore:
offset_set(old_dentry, old_index);
xa_store(&old_ctx->xa, old_index, old_dentry, GFP_KERNEL);
offset_set(new_dentry, new_index);
xa_store(&new_ctx->xa, new_index, new_dentry, GFP_KERNEL);
return ret;
}
/**
* simple_offset_destroy - Release offset map
* @octx: directory offset ctx that is about to be destroyed
*
* During fs teardown (eg. umount), a directory's offset map might still
* contain entries. xa_destroy() cleans out anything that remains.
*/
void simple_offset_destroy(struct offset_ctx *octx)
{
xa_destroy(&octx->xa);
}
/**
* offset_dir_llseek - Advance the read position of a directory descriptor
* @file: an open directory whose position is to be updated
* @offset: a byte offset
* @whence: enumerator describing the starting position for this update
*
* SEEK_END, SEEK_DATA, and SEEK_HOLE are not supported for directories.
*
* Returns the updated read position if successful; otherwise a
* negative errno is returned and the read position remains unchanged.
*/
static loff_t offset_dir_llseek(struct file *file, loff_t offset, int whence)
{
switch (whence) {
case SEEK_CUR:
offset += file->f_pos;
fallthrough;
case SEEK_SET:
if (offset >= 0)
break;
fallthrough;
default:
return -EINVAL;
}
return vfs_setpos(file, offset, U32_MAX);
}
static struct dentry *offset_find_next(struct xa_state *xas)
{
struct dentry *child, *found = NULL;
rcu_read_lock();
child = xas_next_entry(xas, U32_MAX);
if (!child)
goto out;
spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED);
if (simple_positive(child))
found = dget_dlock(child);
spin_unlock(&child->d_lock);
out:
rcu_read_unlock();
return found;
}
static bool offset_dir_emit(struct dir_context *ctx, struct dentry *dentry)
{
u32 offset = dentry2offset(dentry);
struct inode *inode = d_inode(dentry);
return ctx->actor(ctx, dentry->d_name.name, dentry->d_name.len, offset,
inode->i_ino, fs_umode_to_dtype(inode->i_mode));
}
static void offset_iterate_dir(struct dentry *dir, struct dir_context *ctx)
{
struct inode *inode = d_inode(dir);
struct offset_ctx *so_ctx = inode->i_op->get_offset_ctx(inode);
XA_STATE(xas, &so_ctx->xa, ctx->pos);
struct dentry *dentry;
while (true) {
spin_lock(&dir->d_lock);
dentry = offset_find_next(&xas);
spin_unlock(&dir->d_lock);
if (!dentry)
break;
if (!offset_dir_emit(ctx, dentry)) {
dput(dentry);
break;
}
dput(dentry);
ctx->pos = xas.xa_index + 1;
}
}
/**
* offset_readdir - Emit entries starting at offset @ctx->pos
* @file: an open directory to iterate over
* @ctx: directory iteration context
*
* Caller must hold @file's i_rwsem to prevent insertion or removal of
* entries during this call.
*
* On entry, @ctx->pos contains an offset that represents the first entry
* to be read from the directory.
*
* The operation continues until there are no more entries to read, or
* until the ctx->actor indicates there is no more space in the caller's
* output buffer.
*
* On return, @ctx->pos contains an offset that will read the next entry
* in this directory when shmem_readdir() is called again with @ctx.
*
* Return values:
* %0 - Complete
*/
static int offset_readdir(struct file *file, struct dir_context *ctx)
{
struct dentry *dir = file->f_path.dentry;
lockdep_assert_held(&d_inode(dir)->i_rwsem);
if (!dir_emit_dots(file, ctx))
return 0;
offset_iterate_dir(dir, ctx);
return 0;
}
const struct file_operations simple_offset_dir_operations = {
.llseek = offset_dir_llseek,
.iterate_shared = offset_readdir,
.read = generic_read_dir,
.fsync = noop_fsync,
};
static struct dentry *find_next_child(struct dentry *parent, struct dentry *prev) static struct dentry *find_next_child(struct dentry *parent, struct dentry *prev)
{ {
struct dentry *child = NULL; struct dentry *child = NULL;
......
...@@ -1770,6 +1770,7 @@ struct dir_context { ...@@ -1770,6 +1770,7 @@ struct dir_context {
struct iov_iter; struct iov_iter;
struct io_uring_cmd; struct io_uring_cmd;
struct offset_ctx;
struct file_operations { struct file_operations {
struct module *owner; struct module *owner;
...@@ -1857,6 +1858,7 @@ struct inode_operations { ...@@ -1857,6 +1858,7 @@ struct inode_operations {
int (*fileattr_set)(struct mnt_idmap *idmap, int (*fileattr_set)(struct mnt_idmap *idmap,
struct dentry *dentry, struct fileattr *fa); struct dentry *dentry, struct fileattr *fa);
int (*fileattr_get)(struct dentry *dentry, struct fileattr *fa); int (*fileattr_get)(struct dentry *dentry, struct fileattr *fa);
struct offset_ctx *(*get_offset_ctx)(struct inode *inode);
} ____cacheline_aligned; } ____cacheline_aligned;
static inline ssize_t call_read_iter(struct file *file, struct kiocb *kio, static inline ssize_t call_read_iter(struct file *file, struct kiocb *kio,
...@@ -2971,6 +2973,22 @@ extern ssize_t simple_read_from_buffer(void __user *to, size_t count, ...@@ -2971,6 +2973,22 @@ extern ssize_t simple_read_from_buffer(void __user *to, size_t count,
extern ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos, extern ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos,
const void __user *from, size_t count); const void __user *from, size_t count);
struct offset_ctx {
struct xarray xa;
u32 next_offset;
};
void simple_offset_init(struct offset_ctx *octx);
int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry);
void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry);
int simple_offset_rename_exchange(struct inode *old_dir,
struct dentry *old_dentry,
struct inode *new_dir,
struct dentry *new_dentry);
void simple_offset_destroy(struct offset_ctx *octx);
extern const struct file_operations simple_offset_dir_operations;
extern int __generic_file_fsync(struct file *, loff_t, loff_t, int); extern int __generic_file_fsync(struct file *, loff_t, loff_t, int);
extern int generic_file_fsync(struct file *, loff_t, loff_t, int); extern int generic_file_fsync(struct file *, loff_t, loff_t, int);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment