Commit 3e9bff3b authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'vfs-6.11-rc6.fixes' of gitolite.kernel.org:pub/scm/linux/kernel/git/vfs/vfs

Pull vfs fixes from Christian Brauner:
 "VFS:

   - Ensure that backing files uses file->f_ops->splice_write() for
     splice

  netfs:

   - Revert the removal of PG_private_2 from netfs_release_folio() as
     cephfs still relies on this

   - When AS_RELEASE_ALWAYS is set on a mapping the folio needs to
     always be invalidated during truncation

   - Fix losing untruncated data in a folio by making letting
     netfs_release_folio() return false if the folio is dirty

   - Fix trimming of streaming-write folios in netfs_inval_folio()

   - Reset iterator before retrying a short read

   - Fix interaction of streaming writes with zero-point tracker

  afs:

   - During truncation afs currently calls truncate_setsize() which sets
     i_size, expands the pagecache and truncates it. The first two
     operations aren't needed because they will have already been done.
     So call truncate_pagecache() instead and skip the redundant parts

  overlayfs:

   - Fix checking of the number of allowed lower layers so 500 layers
     can actually be used instead of just 499

   - Add missing '\n' to pr_err() output

   - Pass string to ovl_parse_layer() and thus allow it to be used for
     Opt_lowerdir as well

  pidfd:

   - Revert blocking the creation of pidfds for kthread as apparently
     userspace relies on this. Specifically, it breaks systemd during
     shutdown

  romfs:

   - Fix romfs_read_folio() to use the correct offset with
     folio_zero_tail()"

* tag 'vfs-6.11-rc6.fixes' of gitolite.kernel.org:pub/scm/linux/kernel/git/vfs/vfs:
  netfs: Fix interaction of streaming writes with zero-point tracker
  netfs: Fix missing iterator reset on retry of short read
  netfs: Fix trimming of streaming-write folios in netfs_inval_folio()
  netfs: Fix netfs_release_folio() to say no if folio dirty
  afs: Fix post-setattr file edit to do truncation correctly
  mm: Fix missing folio invalidation calls during truncation
  ovl: ovl_parse_param_lowerdir: Add missed '\n' for pr_err
  ovl: fix wrong lowerdir number check for parameter Opt_lowerdir
  ovl: pass string to ovl_parse_layer()
  backing-file: convert to using fops->splice_write
  Revert "pidfd: prevent creation of pidfds for kthreads"
  romfs: fix romfs_read_folio()
  netfs, ceph: Partially revert "netfs: Replace PG_fscache by setting folio->private and marking dirty"
parents 5be63fc1 e00e99ba
...@@ -695,13 +695,18 @@ static void afs_setattr_edit_file(struct afs_operation *op) ...@@ -695,13 +695,18 @@ static void afs_setattr_edit_file(struct afs_operation *op)
{ {
struct afs_vnode_param *vp = &op->file[0]; struct afs_vnode_param *vp = &op->file[0];
struct afs_vnode *vnode = vp->vnode; struct afs_vnode *vnode = vp->vnode;
struct inode *inode = &vnode->netfs.inode;
if (op->setattr.attr->ia_valid & ATTR_SIZE) { if (op->setattr.attr->ia_valid & ATTR_SIZE) {
loff_t size = op->setattr.attr->ia_size; loff_t size = op->setattr.attr->ia_size;
loff_t i_size = op->setattr.old_i_size; loff_t old = op->setattr.old_i_size;
/* Note: inode->i_size was updated by afs_apply_status() inside
* the I/O and callback locks.
*/
if (size != i_size) { if (size != old) {
truncate_setsize(&vnode->netfs.inode, size); truncate_pagecache(inode, size);
netfs_resize_file(&vnode->netfs, size, true); netfs_resize_file(&vnode->netfs, size, true);
fscache_resize_cookie(afs_vnode_cache(vnode), size); fscache_resize_cookie(afs_vnode_cache(vnode), size);
} }
......
...@@ -303,13 +303,16 @@ ssize_t backing_file_splice_write(struct pipe_inode_info *pipe, ...@@ -303,13 +303,16 @@ ssize_t backing_file_splice_write(struct pipe_inode_info *pipe,
if (WARN_ON_ONCE(!(out->f_mode & FMODE_BACKING))) if (WARN_ON_ONCE(!(out->f_mode & FMODE_BACKING)))
return -EIO; return -EIO;
if (!out->f_op->splice_write)
return -EINVAL;
ret = file_remove_privs(ctx->user_file); ret = file_remove_privs(ctx->user_file);
if (ret) if (ret)
return ret; return ret;
old_cred = override_creds(ctx->cred); old_cred = override_creds(ctx->cred);
file_start_write(out); file_start_write(out);
ret = iter_file_splice_write(pipe, out, ppos, len, flags); ret = out->f_op->splice_write(pipe, out, ppos, len, flags);
file_end_write(out); file_end_write(out);
revert_creds(old_cred); revert_creds(old_cred);
......
...@@ -695,6 +695,7 @@ void ceph_evict_inode(struct inode *inode) ...@@ -695,6 +695,7 @@ void ceph_evict_inode(struct inode *inode)
percpu_counter_dec(&mdsc->metric.total_inodes); percpu_counter_dec(&mdsc->metric.total_inodes);
netfs_wait_for_outstanding_io(inode);
truncate_inode_pages_final(&inode->i_data); truncate_inode_pages_final(&inode->i_data);
if (inode->i_state & I_PINNING_NETFS_WB) if (inode->i_state & I_PINNING_NETFS_WB)
ceph_fscache_unuse_cookie(inode, true); ceph_fscache_unuse_cookie(inode, true);
......
...@@ -313,6 +313,7 @@ static bool netfs_rreq_perform_resubmissions(struct netfs_io_request *rreq) ...@@ -313,6 +313,7 @@ static bool netfs_rreq_perform_resubmissions(struct netfs_io_request *rreq)
netfs_reset_subreq_iter(rreq, subreq); netfs_reset_subreq_iter(rreq, subreq);
netfs_read_from_server(rreq, subreq); netfs_read_from_server(rreq, subreq);
} else if (test_bit(NETFS_SREQ_SHORT_IO, &subreq->flags)) { } else if (test_bit(NETFS_SREQ_SHORT_IO, &subreq->flags)) {
netfs_reset_subreq_iter(rreq, subreq);
netfs_rreq_short_read(rreq, subreq); netfs_rreq_short_read(rreq, subreq);
} }
} }
......
...@@ -97,10 +97,22 @@ EXPORT_SYMBOL(netfs_clear_inode_writeback); ...@@ -97,10 +97,22 @@ EXPORT_SYMBOL(netfs_clear_inode_writeback);
void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length) void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length)
{ {
struct netfs_folio *finfo; struct netfs_folio *finfo;
struct netfs_inode *ctx = netfs_inode(folio_inode(folio));
size_t flen = folio_size(folio); size_t flen = folio_size(folio);
_enter("{%lx},%zx,%zx", folio->index, offset, length); _enter("{%lx},%zx,%zx", folio->index, offset, length);
if (offset == 0 && length == flen) {
unsigned long long i_size = i_size_read(&ctx->inode);
unsigned long long fpos = folio_pos(folio), end;
end = umin(fpos + flen, i_size);
if (fpos < i_size && end > ctx->zero_point)
ctx->zero_point = end;
}
folio_wait_private_2(folio); /* [DEPRECATED] */
if (!folio_test_private(folio)) if (!folio_test_private(folio))
return; return;
...@@ -113,18 +125,34 @@ void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length) ...@@ -113,18 +125,34 @@ void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length)
/* We have a partially uptodate page from a streaming write. */ /* We have a partially uptodate page from a streaming write. */
unsigned int fstart = finfo->dirty_offset; unsigned int fstart = finfo->dirty_offset;
unsigned int fend = fstart + finfo->dirty_len; unsigned int fend = fstart + finfo->dirty_len;
unsigned int end = offset + length; unsigned int iend = offset + length;
if (offset >= fend) if (offset >= fend)
return; return;
if (end <= fstart) if (iend <= fstart)
return; return;
if (offset <= fstart && end >= fend)
/* The invalidation region overlaps the data. If the region
* covers the start of the data, we either move along the start
* or just erase the data entirely.
*/
if (offset <= fstart) {
if (iend >= fend)
goto erase_completely; goto erase_completely;
if (offset <= fstart && end > fstart) /* Move the start of the data. */
goto reduce_len; finfo->dirty_len = fend - iend;
if (offset > fstart && end >= fend) finfo->dirty_offset = offset;
goto move_start; return;
}
/* Reduce the length of the data if the invalidation region
* covers the tail part.
*/
if (iend >= fend) {
finfo->dirty_len = offset - fstart;
return;
}
/* A partial write was split. The caller has already zeroed /* A partial write was split. The caller has already zeroed
* it, so just absorb the hole. * it, so just absorb the hole.
*/ */
...@@ -137,12 +165,6 @@ void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length) ...@@ -137,12 +165,6 @@ void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length)
folio_clear_uptodate(folio); folio_clear_uptodate(folio);
kfree(finfo); kfree(finfo);
return; return;
reduce_len:
finfo->dirty_len = offset + length - finfo->dirty_offset;
return;
move_start:
finfo->dirty_len -= offset - finfo->dirty_offset;
finfo->dirty_offset = offset;
} }
EXPORT_SYMBOL(netfs_invalidate_folio); EXPORT_SYMBOL(netfs_invalidate_folio);
...@@ -159,12 +181,20 @@ bool netfs_release_folio(struct folio *folio, gfp_t gfp) ...@@ -159,12 +181,20 @@ bool netfs_release_folio(struct folio *folio, gfp_t gfp)
struct netfs_inode *ctx = netfs_inode(folio_inode(folio)); struct netfs_inode *ctx = netfs_inode(folio_inode(folio));
unsigned long long end; unsigned long long end;
end = folio_pos(folio) + folio_size(folio); if (folio_test_dirty(folio))
return false;
end = umin(folio_pos(folio) + folio_size(folio), i_size_read(&ctx->inode));
if (end > ctx->zero_point) if (end > ctx->zero_point)
ctx->zero_point = end; ctx->zero_point = end;
if (folio_test_private(folio)) if (folio_test_private(folio))
return false; return false;
if (unlikely(folio_test_private_2(folio))) { /* [DEPRECATED] */
if (current_is_kswapd() || !(gfp & __GFP_FS))
return false;
folio_wait_private_2(folio);
}
fscache_note_page_release(netfs_i_cookie(ctx)); fscache_note_page_release(netfs_i_cookie(ctx));
return true; return true;
} }
......
...@@ -33,6 +33,7 @@ ...@@ -33,6 +33,7 @@
int netfs_folio_written_back(struct folio *folio) int netfs_folio_written_back(struct folio *folio)
{ {
enum netfs_folio_trace why = netfs_folio_trace_clear; enum netfs_folio_trace why = netfs_folio_trace_clear;
struct netfs_inode *ictx = netfs_inode(folio->mapping->host);
struct netfs_folio *finfo; struct netfs_folio *finfo;
struct netfs_group *group = NULL; struct netfs_group *group = NULL;
int gcount = 0; int gcount = 0;
...@@ -41,6 +42,12 @@ int netfs_folio_written_back(struct folio *folio) ...@@ -41,6 +42,12 @@ int netfs_folio_written_back(struct folio *folio)
/* Streaming writes cannot be redirtied whilst under writeback, /* Streaming writes cannot be redirtied whilst under writeback,
* so discard the streaming record. * so discard the streaming record.
*/ */
unsigned long long fend;
fend = folio_pos(folio) + finfo->dirty_offset + finfo->dirty_len;
if (fend > ictx->zero_point)
ictx->zero_point = fend;
folio_detach_private(folio); folio_detach_private(folio);
group = finfo->netfs_group; group = finfo->netfs_group;
gcount++; gcount++;
......
...@@ -353,6 +353,8 @@ static void ovl_add_layer(struct fs_context *fc, enum ovl_opt layer, ...@@ -353,6 +353,8 @@ static void ovl_add_layer(struct fs_context *fc, enum ovl_opt layer,
case Opt_datadir_add: case Opt_datadir_add:
ctx->nr_data++; ctx->nr_data++;
fallthrough; fallthrough;
case Opt_lowerdir:
fallthrough;
case Opt_lowerdir_add: case Opt_lowerdir_add:
WARN_ON(ctx->nr >= ctx->capacity); WARN_ON(ctx->nr >= ctx->capacity);
l = &ctx->lower[ctx->nr++]; l = &ctx->lower[ctx->nr++];
...@@ -365,10 +367,9 @@ static void ovl_add_layer(struct fs_context *fc, enum ovl_opt layer, ...@@ -365,10 +367,9 @@ static void ovl_add_layer(struct fs_context *fc, enum ovl_opt layer,
} }
} }
static int ovl_parse_layer(struct fs_context *fc, struct fs_parameter *param, static int ovl_parse_layer(struct fs_context *fc, const char *layer_name, enum ovl_opt layer)
enum ovl_opt layer)
{ {
char *name = kstrdup(param->string, GFP_KERNEL); char *name = kstrdup(layer_name, GFP_KERNEL);
bool upper = (layer == Opt_upperdir || layer == Opt_workdir); bool upper = (layer == Opt_upperdir || layer == Opt_workdir);
struct path path; struct path path;
int err; int err;
...@@ -376,7 +377,7 @@ static int ovl_parse_layer(struct fs_context *fc, struct fs_parameter *param, ...@@ -376,7 +377,7 @@ static int ovl_parse_layer(struct fs_context *fc, struct fs_parameter *param,
if (!name) if (!name)
return -ENOMEM; return -ENOMEM;
if (upper) if (upper || layer == Opt_lowerdir)
err = ovl_mount_dir(name, &path); err = ovl_mount_dir(name, &path);
else else
err = ovl_mount_dir_noesc(name, &path); err = ovl_mount_dir_noesc(name, &path);
...@@ -432,7 +433,6 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc) ...@@ -432,7 +433,6 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc)
{ {
int err; int err;
struct ovl_fs_context *ctx = fc->fs_private; struct ovl_fs_context *ctx = fc->fs_private;
struct ovl_fs_context_layer *l;
char *dup = NULL, *iter; char *dup = NULL, *iter;
ssize_t nr_lower, nr; ssize_t nr_lower, nr;
bool data_layer = false; bool data_layer = false;
...@@ -449,7 +449,7 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc) ...@@ -449,7 +449,7 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc)
return 0; return 0;
if (*name == ':') { if (*name == ':') {
pr_err("cannot append lower layer"); pr_err("cannot append lower layer\n");
return -EINVAL; return -EINVAL;
} }
...@@ -472,35 +472,11 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc) ...@@ -472,35 +472,11 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc)
goto out_err; goto out_err;
} }
if (nr_lower > ctx->capacity) {
err = -ENOMEM;
l = krealloc_array(ctx->lower, nr_lower, sizeof(*ctx->lower),
GFP_KERNEL_ACCOUNT);
if (!l)
goto out_err;
ctx->lower = l;
ctx->capacity = nr_lower;
}
iter = dup; iter = dup;
l = ctx->lower; for (nr = 0; nr < nr_lower; nr++) {
for (nr = 0; nr < nr_lower; nr++, l++) { err = ovl_parse_layer(fc, iter, Opt_lowerdir);
ctx->nr++;
memset(l, 0, sizeof(*l));
err = ovl_mount_dir(iter, &l->path);
if (err) if (err)
goto out_put; goto out_err;
err = ovl_mount_dir_check(fc, &l->path, Opt_lowerdir, iter, false);
if (err)
goto out_put;
err = -ENOMEM;
l->name = kstrdup(iter, GFP_KERNEL_ACCOUNT);
if (!l->name)
goto out_put;
if (data_layer) if (data_layer)
ctx->nr_data++; ctx->nr_data++;
...@@ -517,8 +493,8 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc) ...@@ -517,8 +493,8 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc)
* there are no data layers. * there are no data layers.
*/ */
if (ctx->nr_data > 0) { if (ctx->nr_data > 0) {
pr_err("regular lower layers cannot follow data lower layers"); pr_err("regular lower layers cannot follow data lower layers\n");
goto out_put; goto out_err;
} }
data_layer = false; data_layer = false;
...@@ -532,9 +508,6 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc) ...@@ -532,9 +508,6 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc)
kfree(dup); kfree(dup);
return 0; return 0;
out_put:
ovl_reset_lowerdirs(ctx);
out_err: out_err:
kfree(dup); kfree(dup);
...@@ -582,7 +555,7 @@ static int ovl_parse_param(struct fs_context *fc, struct fs_parameter *param) ...@@ -582,7 +555,7 @@ static int ovl_parse_param(struct fs_context *fc, struct fs_parameter *param)
case Opt_datadir_add: case Opt_datadir_add:
case Opt_upperdir: case Opt_upperdir:
case Opt_workdir: case Opt_workdir:
err = ovl_parse_layer(fc, param, opt); err = ovl_parse_layer(fc, param->string, opt);
break; break;
case Opt_default_permissions: case Opt_default_permissions:
config->default_permissions = true; config->default_permissions = true;
......
...@@ -126,7 +126,7 @@ static int romfs_read_folio(struct file *file, struct folio *folio) ...@@ -126,7 +126,7 @@ static int romfs_read_folio(struct file *file, struct folio *folio)
} }
} }
buf = folio_zero_tail(folio, fillsize, buf); buf = folio_zero_tail(folio, fillsize, buf + fillsize);
kunmap_local(buf); kunmap_local(buf);
folio_end_read(folio, ret == 0); folio_end_read(folio, ret == 0);
return ret; return ret;
......
...@@ -2053,24 +2053,11 @@ static int __pidfd_prepare(struct pid *pid, unsigned int flags, struct file **re ...@@ -2053,24 +2053,11 @@ static int __pidfd_prepare(struct pid *pid, unsigned int flags, struct file **re
*/ */
int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret) int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret)
{ {
if (!pid) bool thread = flags & PIDFD_THREAD;
return -EINVAL;
scoped_guard(rcu) {
struct task_struct *tsk;
if (flags & PIDFD_THREAD) if (!pid || !pid_has_task(pid, thread ? PIDTYPE_PID : PIDTYPE_TGID))
tsk = pid_task(pid, PIDTYPE_PID);
else
tsk = pid_task(pid, PIDTYPE_TGID);
if (!tsk)
return -EINVAL; return -EINVAL;
/* Don't create pidfds for kernel threads for now. */
if (tsk->flags & PF_KTHREAD)
return -EINVAL;
}
return __pidfd_prepare(pid, flags, ret); return __pidfd_prepare(pid, flags, ret);
} }
...@@ -2416,12 +2403,6 @@ __latent_entropy struct task_struct *copy_process( ...@@ -2416,12 +2403,6 @@ __latent_entropy struct task_struct *copy_process(
if (clone_flags & CLONE_PIDFD) { if (clone_flags & CLONE_PIDFD) {
int flags = (clone_flags & CLONE_THREAD) ? PIDFD_THREAD : 0; int flags = (clone_flags & CLONE_THREAD) ? PIDFD_THREAD : 0;
/* Don't create pidfds for kernel threads for now. */
if (args->kthread) {
retval = -EINVAL;
goto bad_fork_free_pid;
}
/* Note that no task has been attached to @pid yet. */ /* Note that no task has been attached to @pid yet. */
retval = __pidfd_prepare(pid, flags, &pidfile); retval = __pidfd_prepare(pid, flags, &pidfile);
if (retval < 0) if (retval < 0)
......
...@@ -157,7 +157,7 @@ static void truncate_cleanup_folio(struct folio *folio) ...@@ -157,7 +157,7 @@ static void truncate_cleanup_folio(struct folio *folio)
if (folio_mapped(folio)) if (folio_mapped(folio))
unmap_mapping_folio(folio); unmap_mapping_folio(folio);
if (folio_has_private(folio)) if (folio_needs_release(folio))
folio_invalidate(folio, 0, folio_size(folio)); folio_invalidate(folio, 0, folio_size(folio));
/* /*
...@@ -219,7 +219,7 @@ bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end) ...@@ -219,7 +219,7 @@ bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end)
if (!mapping_inaccessible(folio->mapping)) if (!mapping_inaccessible(folio->mapping))
folio_zero_range(folio, offset, length); folio_zero_range(folio, offset, length);
if (folio_has_private(folio)) if (folio_needs_release(folio))
folio_invalidate(folio, offset, length); folio_invalidate(folio, offset, length);
if (!folio_test_large(folio)) if (!folio_test_large(folio))
return true; return true;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment