Commit 0108963f authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'v6.5-rc5.vfs.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull vfs fixes from Christian Brauner:

 - Fix a wrong check for O_TMPFILE during RESOLVE_CACHED lookup

 - Clean up directory iterators and clarify file_needs_f_pos_lock()

* tag 'v6.5-rc5.vfs.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
  fs: rely on ->iterate_shared to determine f_pos locking
  vfs: get rid of old '->iterate' directory operation
  proc: fix missing conversion to 'iterate_shared'
  open: make RESOLVE_CACHED correctly test for O_TMPFILE
parents f0ab9f34 7d84d1b9
...@@ -551,9 +551,8 @@ mutex or just to use i_size_read() instead. ...@@ -551,9 +551,8 @@ mutex or just to use i_size_read() instead.
Note: this does not protect the file->f_pos against concurrent modifications Note: this does not protect the file->f_pos against concurrent modifications
since this is something the userspace has to take care about. since this is something the userspace has to take care about.
->iterate() is called with i_rwsem exclusive. ->iterate_shared() is called with i_rwsem held for reading, and with the
file f_pos_lock held exclusively
->iterate_shared() is called with i_rwsem at least shared.
->fasync() is responsible for maintaining the FASYNC bit in filp->f_flags. ->fasync() is responsible for maintaining the FASYNC bit in filp->f_flags.
Most instances call fasync_helper(), which does that maintenance, so it's Most instances call fasync_helper(), which does that maintenance, so it's
......
...@@ -537,7 +537,7 @@ vfs_readdir() is gone; switch to iterate_dir() instead ...@@ -537,7 +537,7 @@ vfs_readdir() is gone; switch to iterate_dir() instead
**mandatory** **mandatory**
->readdir() is gone now; switch to ->iterate() ->readdir() is gone now; switch to ->iterate_shared()
**mandatory** **mandatory**
...@@ -693,24 +693,19 @@ parallel now. ...@@ -693,24 +693,19 @@ parallel now.
--- ---
**recommended** **mandatory**
->iterate_shared() is added; it's a parallel variant of ->iterate(). ->iterate_shared() is added.
Exclusion on struct file level is still provided (as well as that Exclusion on struct file level is still provided (as well as that
between it and lseek on the same struct file), but if your directory between it and lseek on the same struct file), but if your directory
has been opened several times, you can get these called in parallel. has been opened several times, you can get these called in parallel.
Exclusion between that method and all directory-modifying ones is Exclusion between that method and all directory-modifying ones is
still provided, of course. still provided, of course.
Often enough ->iterate() can serve as ->iterate_shared() without any If you have any per-inode or per-dentry in-core data structures modified
changes - it is a read-only operation, after all. If you have any by ->iterate_shared(), you might need something to serialize the access
per-inode or per-dentry in-core data structures modified by ->iterate(), to them. If you do dcache pre-seeding, you'll need to switch to
you might need something to serialize the access to them. If you d_alloc_parallel() for that; look for in-tree examples.
do dcache pre-seeding, you'll need to switch to d_alloc_parallel() for
that; look for in-tree examples.
Old method is only used if the new one is absent; eventually it will
be removed. Switch while you still can; the old one won't stay.
--- ---
...@@ -930,9 +925,9 @@ should be done by looking at FMODE_LSEEK in file->f_mode. ...@@ -930,9 +925,9 @@ should be done by looking at FMODE_LSEEK in file->f_mode.
filldir_t (readdir callbacks) calling conventions have changed. Instead of filldir_t (readdir callbacks) calling conventions have changed. Instead of
returning 0 or -E... it returns bool now. false means "no more" (as -E... used returning 0 or -E... it returns bool now. false means "no more" (as -E... used
to) and true - "keep going" (as 0 in old calling conventions). Rationale: to) and true - "keep going" (as 0 in old calling conventions). Rationale:
callers never looked at specific -E... values anyway. ->iterate() and callers never looked at specific -E... values anyway. -> iterate_shared()
->iterate_shared() instance require no changes at all, all filldir_t ones in instances require no changes at all, all filldir_t ones in the tree
the tree converted. converted.
--- ---
......
...@@ -2019,9 +2019,10 @@ unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn) ...@@ -2019,9 +2019,10 @@ unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn)
} }
} }
WRAP_DIR_ITER(ceph_readdir) // FIXME!
const struct file_operations ceph_dir_fops = { const struct file_operations ceph_dir_fops = {
.read = ceph_read_dir, .read = ceph_read_dir,
.iterate = ceph_readdir, .iterate_shared = shared_ceph_readdir,
.llseek = ceph_dir_llseek, .llseek = ceph_dir_llseek,
.open = ceph_open, .open = ceph_open,
.release = ceph_release, .release = ceph_release,
...@@ -2033,7 +2034,7 @@ const struct file_operations ceph_dir_fops = { ...@@ -2033,7 +2034,7 @@ const struct file_operations ceph_dir_fops = {
}; };
const struct file_operations ceph_snapdir_fops = { const struct file_operations ceph_snapdir_fops = {
.iterate = ceph_readdir, .iterate_shared = shared_ceph_readdir,
.llseek = ceph_dir_llseek, .llseek = ceph_dir_llseek,
.open = ceph_open, .open = ceph_open,
.release = ceph_release, .release = ceph_release,
......
...@@ -429,21 +429,14 @@ static int coda_readdir(struct file *coda_file, struct dir_context *ctx) ...@@ -429,21 +429,14 @@ static int coda_readdir(struct file *coda_file, struct dir_context *ctx)
cfi = coda_ftoc(coda_file); cfi = coda_ftoc(coda_file);
host_file = cfi->cfi_container; host_file = cfi->cfi_container;
if (host_file->f_op->iterate || host_file->f_op->iterate_shared) { if (host_file->f_op->iterate_shared) {
struct inode *host_inode = file_inode(host_file); struct inode *host_inode = file_inode(host_file);
ret = -ENOENT; ret = -ENOENT;
if (!IS_DEADDIR(host_inode)) { if (!IS_DEADDIR(host_inode)) {
if (host_file->f_op->iterate_shared) {
inode_lock_shared(host_inode); inode_lock_shared(host_inode);
ret = host_file->f_op->iterate_shared(host_file, ctx); ret = host_file->f_op->iterate_shared(host_file, ctx);
file_accessed(host_file); file_accessed(host_file);
inode_unlock_shared(host_inode); inode_unlock_shared(host_inode);
} else {
inode_lock(host_inode);
ret = host_file->f_op->iterate(host_file, ctx);
file_accessed(host_file);
inode_unlock(host_inode);
}
} }
return ret; return ret;
} }
...@@ -585,10 +578,11 @@ const struct inode_operations coda_dir_inode_operations = { ...@@ -585,10 +578,11 @@ const struct inode_operations coda_dir_inode_operations = {
.setattr = coda_setattr, .setattr = coda_setattr,
}; };
WRAP_DIR_ITER(coda_readdir) // FIXME!
const struct file_operations coda_dir_operations = { const struct file_operations coda_dir_operations = {
.llseek = generic_file_llseek, .llseek = generic_file_llseek,
.read = generic_read_dir, .read = generic_read_dir,
.iterate = coda_readdir, .iterate_shared = shared_coda_readdir,
.open = coda_open, .open = coda_open,
.release = coda_release, .release = coda_release,
.fsync = coda_fsync, .fsync = coda_fsync,
......
...@@ -306,10 +306,11 @@ static int exfat_iterate(struct file *file, struct dir_context *ctx) ...@@ -306,10 +306,11 @@ static int exfat_iterate(struct file *file, struct dir_context *ctx)
return err; return err;
} }
WRAP_DIR_ITER(exfat_iterate) // FIXME!
const struct file_operations exfat_dir_operations = { const struct file_operations exfat_dir_operations = {
.llseek = generic_file_llseek, .llseek = generic_file_llseek,
.read = generic_read_dir, .read = generic_read_dir,
.iterate = exfat_iterate, .iterate_shared = shared_exfat_iterate,
.unlocked_ioctl = exfat_ioctl, .unlocked_ioctl = exfat_ioctl,
#ifdef CONFIG_COMPAT #ifdef CONFIG_COMPAT
.compat_ioctl = exfat_compat_ioctl, .compat_ioctl = exfat_compat_ioctl,
......
...@@ -315,7 +315,7 @@ static int get_name(const struct path *path, char *name, struct dentry *child) ...@@ -315,7 +315,7 @@ static int get_name(const struct path *path, char *name, struct dentry *child)
goto out; goto out;
error = -EINVAL; error = -EINVAL;
if (!file->f_op->iterate && !file->f_op->iterate_shared) if (!file->f_op->iterate_shared)
goto out_close; goto out_close;
buffer.sequence = 0; buffer.sequence = 0;
......
...@@ -1049,7 +1049,7 @@ unsigned long __fdget_raw(unsigned int fd) ...@@ -1049,7 +1049,7 @@ unsigned long __fdget_raw(unsigned int fd)
static inline bool file_needs_f_pos_lock(struct file *file) static inline bool file_needs_f_pos_lock(struct file *file)
{ {
return (file->f_mode & FMODE_ATOMIC_POS) && return (file->f_mode & FMODE_ATOMIC_POS) &&
(file_count(file) > 1 || S_ISDIR(file_inode(file)->i_mode)); (file_count(file) > 1 || file->f_op->iterate_shared);
} }
unsigned long __fdget_pos(unsigned int fd) unsigned long __fdget_pos(unsigned int fd)
......
...@@ -1535,9 +1535,10 @@ const struct inode_operations jfs_dir_inode_operations = { ...@@ -1535,9 +1535,10 @@ const struct inode_operations jfs_dir_inode_operations = {
#endif #endif
}; };
WRAP_DIR_ITER(jfs_readdir) // FIXME!
const struct file_operations jfs_dir_operations = { const struct file_operations jfs_dir_operations = {
.read = generic_read_dir, .read = generic_read_dir,
.iterate = jfs_readdir, .iterate_shared = shared_jfs_readdir,
.fsync = jfs_fsync, .fsync = jfs_fsync,
.unlocked_ioctl = jfs_ioctl, .unlocked_ioctl = jfs_ioctl,
.compat_ioctl = compat_ptr_ioctl, .compat_ioctl = compat_ptr_ioctl,
......
...@@ -1525,10 +1525,11 @@ static int ntfs_dir_fsync(struct file *filp, loff_t start, loff_t end, ...@@ -1525,10 +1525,11 @@ static int ntfs_dir_fsync(struct file *filp, loff_t start, loff_t end,
#endif /* NTFS_RW */ #endif /* NTFS_RW */
WRAP_DIR_ITER(ntfs_readdir) // FIXME!
const struct file_operations ntfs_dir_ops = { const struct file_operations ntfs_dir_ops = {
.llseek = generic_file_llseek, /* Seek inside directory. */ .llseek = generic_file_llseek, /* Seek inside directory. */
.read = generic_read_dir, /* Return -EISDIR. */ .read = generic_read_dir, /* Return -EISDIR. */
.iterate = ntfs_readdir, /* Read directory contents. */ .iterate_shared = shared_ntfs_readdir, /* Read directory contents. */
#ifdef NTFS_RW #ifdef NTFS_RW
.fsync = ntfs_dir_fsync, /* Sync a directory to disk. */ .fsync = ntfs_dir_fsync, /* Sync a directory to disk. */
#endif /* NTFS_RW */ #endif /* NTFS_RW */
......
...@@ -2793,10 +2793,11 @@ const struct file_operations ocfs2_fops = { ...@@ -2793,10 +2793,11 @@ const struct file_operations ocfs2_fops = {
.remap_file_range = ocfs2_remap_file_range, .remap_file_range = ocfs2_remap_file_range,
}; };
WRAP_DIR_ITER(ocfs2_readdir) // FIXME!
const struct file_operations ocfs2_dops = { const struct file_operations ocfs2_dops = {
.llseek = generic_file_llseek, .llseek = generic_file_llseek,
.read = generic_read_dir, .read = generic_read_dir,
.iterate = ocfs2_readdir, .iterate_shared = shared_ocfs2_readdir,
.fsync = ocfs2_sync_file, .fsync = ocfs2_sync_file,
.release = ocfs2_dir_release, .release = ocfs2_dir_release,
.open = ocfs2_dir_open, .open = ocfs2_dir_open,
...@@ -2842,7 +2843,7 @@ const struct file_operations ocfs2_fops_no_plocks = { ...@@ -2842,7 +2843,7 @@ const struct file_operations ocfs2_fops_no_plocks = {
const struct file_operations ocfs2_dops_no_plocks = { const struct file_operations ocfs2_dops_no_plocks = {
.llseek = generic_file_llseek, .llseek = generic_file_llseek,
.read = generic_read_dir, .read = generic_read_dir,
.iterate = ocfs2_readdir, .iterate_shared = shared_ocfs2_readdir,
.fsync = ocfs2_sync_file, .fsync = ocfs2_sync_file,
.release = ocfs2_dir_release, .release = ocfs2_dir_release,
.open = ocfs2_dir_open, .open = ocfs2_dir_open,
......
...@@ -1322,7 +1322,7 @@ inline int build_open_flags(const struct open_how *how, struct open_flags *op) ...@@ -1322,7 +1322,7 @@ inline int build_open_flags(const struct open_how *how, struct open_flags *op)
lookup_flags |= LOOKUP_IN_ROOT; lookup_flags |= LOOKUP_IN_ROOT;
if (how->resolve & RESOLVE_CACHED) { if (how->resolve & RESOLVE_CACHED) {
/* Don't bother even trying for create/truncate/tmpfile open */ /* Don't bother even trying for create/truncate/tmpfile open */
if (flags & (O_TRUNC | O_CREAT | O_TMPFILE)) if (flags & (O_TRUNC | O_CREAT | __O_TMPFILE))
return -EAGAIN; return -EAGAIN;
lookup_flags |= LOOKUP_CACHED; lookup_flags |= LOOKUP_CACHED;
} }
......
...@@ -954,10 +954,11 @@ static int ovl_dir_open(struct inode *inode, struct file *file) ...@@ -954,10 +954,11 @@ static int ovl_dir_open(struct inode *inode, struct file *file)
return 0; return 0;
} }
WRAP_DIR_ITER(ovl_iterate) // FIXME!
const struct file_operations ovl_dir_operations = { const struct file_operations ovl_dir_operations = {
.read = generic_read_dir, .read = generic_read_dir,
.open = ovl_dir_open, .open = ovl_dir_open,
.iterate = ovl_iterate, .iterate_shared = shared_ovl_iterate,
.llseek = ovl_dir_llseek, .llseek = ovl_dir_llseek,
.fsync = ovl_dir_fsync, .fsync = ovl_dir_fsync,
.release = ovl_dir_release, .release = ovl_dir_release,
......
...@@ -2817,7 +2817,7 @@ static int proc_##LSM##_attr_dir_iterate(struct file *filp, \ ...@@ -2817,7 +2817,7 @@ static int proc_##LSM##_attr_dir_iterate(struct file *filp, \
\ \
static const struct file_operations proc_##LSM##_attr_dir_ops = { \ static const struct file_operations proc_##LSM##_attr_dir_ops = { \
.read = generic_read_dir, \ .read = generic_read_dir, \
.iterate = proc_##LSM##_attr_dir_iterate, \ .iterate_shared = proc_##LSM##_attr_dir_iterate, \
.llseek = default_llseek, \ .llseek = default_llseek, \
}; \ }; \
\ \
......
...@@ -24,6 +24,53 @@ ...@@ -24,6 +24,53 @@
#include <asm/unaligned.h> #include <asm/unaligned.h>
/*
* Some filesystems were never converted to '->iterate_shared()'
* and their directory iterators want the inode lock held for
* writing. This wrapper allows for converting from the shared
* semantics to the exclusive inode use.
*/
int wrap_directory_iterator(struct file *file,
struct dir_context *ctx,
int (*iter)(struct file *, struct dir_context *))
{
struct inode *inode = file_inode(file);
int ret;
/*
* We'd love to have an 'inode_upgrade_trylock()' operation,
* see the comment in mmap_upgrade_trylock() in mm/memory.c.
*
* But considering this is for "filesystems that never got
* converted", it really doesn't matter.
*
* Also note that since we have to return with the lock held
* for reading, we can't use the "killable()" locking here,
* since we do need to get the lock even if we're dying.
*
* We could do the write part killably and then get the read
* lock unconditionally if it mattered, but see above on why
* this does the very simplistic conversion.
*/
up_read(&inode->i_rwsem);
down_write(&inode->i_rwsem);
/*
* Since we dropped the inode lock, we should do the
* DEADDIR test again. See 'iterate_dir()' below.
*
* Note that we don't need to re-do the f_pos games,
* since the file must be locked wrt f_pos anyway.
*/
ret = -ENOENT;
if (!IS_DEADDIR(inode))
ret = iter(file, ctx);
downgrade_write(&inode->i_rwsem);
return ret;
}
EXPORT_SYMBOL(wrap_directory_iterator);
/* /*
* Note the "unsafe_put_user() semantics: we goto a * Note the "unsafe_put_user() semantics: we goto a
* label for errors. * label for errors.
...@@ -40,39 +87,28 @@ ...@@ -40,39 +87,28 @@
int iterate_dir(struct file *file, struct dir_context *ctx) int iterate_dir(struct file *file, struct dir_context *ctx)
{ {
struct inode *inode = file_inode(file); struct inode *inode = file_inode(file);
bool shared = false;
int res = -ENOTDIR; int res = -ENOTDIR;
if (file->f_op->iterate_shared)
shared = true; if (!file->f_op->iterate_shared)
else if (!file->f_op->iterate)
goto out; goto out;
res = security_file_permission(file, MAY_READ); res = security_file_permission(file, MAY_READ);
if (res) if (res)
goto out; goto out;
if (shared)
res = down_read_killable(&inode->i_rwsem); res = down_read_killable(&inode->i_rwsem);
else
res = down_write_killable(&inode->i_rwsem);
if (res) if (res)
goto out; goto out;
res = -ENOENT; res = -ENOENT;
if (!IS_DEADDIR(inode)) { if (!IS_DEADDIR(inode)) {
ctx->pos = file->f_pos; ctx->pos = file->f_pos;
if (shared)
res = file->f_op->iterate_shared(file, ctx); res = file->f_op->iterate_shared(file, ctx);
else
res = file->f_op->iterate(file, ctx);
file->f_pos = ctx->pos; file->f_pos = ctx->pos;
fsnotify_access(file); fsnotify_access(file);
file_accessed(file); file_accessed(file);
} }
if (shared)
inode_unlock_shared(inode); inode_unlock_shared(inode);
else
inode_unlock(inode);
out: out:
return res; return res;
} }
......
...@@ -179,9 +179,10 @@ static int vboxsf_dir_iterate(struct file *dir, struct dir_context *ctx) ...@@ -179,9 +179,10 @@ static int vboxsf_dir_iterate(struct file *dir, struct dir_context *ctx)
return 0; return 0;
} }
WRAP_DIR_ITER(vboxsf_dir_iterate) // FIXME!
const struct file_operations vboxsf_dir_fops = { const struct file_operations vboxsf_dir_fops = {
.open = vboxsf_dir_open, .open = vboxsf_dir_open,
.iterate = vboxsf_dir_iterate, .iterate_shared = shared_vboxsf_dir_iterate,
.release = vboxsf_dir_release, .release = vboxsf_dir_release,
.read = generic_read_dir, .read = generic_read_dir,
.llseek = generic_file_llseek, .llseek = generic_file_llseek,
......
...@@ -1780,7 +1780,6 @@ struct file_operations { ...@@ -1780,7 +1780,6 @@ struct file_operations {
ssize_t (*write_iter) (struct kiocb *, struct iov_iter *); ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
int (*iopoll)(struct kiocb *kiocb, struct io_comp_batch *, int (*iopoll)(struct kiocb *kiocb, struct io_comp_batch *,
unsigned int flags); unsigned int flags);
int (*iterate) (struct file *, struct dir_context *);
int (*iterate_shared) (struct file *, struct dir_context *); int (*iterate_shared) (struct file *, struct dir_context *);
__poll_t (*poll) (struct file *, struct poll_table_struct *); __poll_t (*poll) (struct file *, struct poll_table_struct *);
long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
...@@ -1817,6 +1816,13 @@ struct file_operations { ...@@ -1817,6 +1816,13 @@ struct file_operations {
unsigned int poll_flags); unsigned int poll_flags);
} __randomize_layout; } __randomize_layout;
/* Wrap a directory iterator that needs exclusive inode access */
int wrap_directory_iterator(struct file *, struct dir_context *,
int (*) (struct file *, struct dir_context *));
#define WRAP_DIR_ITER(x) \
static int shared_##x(struct file *file , struct dir_context *ctx) \
{ return wrap_directory_iterator(file, ctx, x); }
struct inode_operations { struct inode_operations {
struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int); struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int);
const char * (*get_link) (struct dentry *, struct inode *, struct delayed_call *); const char * (*get_link) (struct dentry *, struct inode *, struct delayed_call *);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment