Commit c86243b0 authored by Vivek Goyal's avatar Vivek Goyal Committed by Miklos Szeredi

ovl: provide a mount option "volatile"

Container folks are complaining that dnf/yum issues too many sync while
installing packages and this slows down the image build. Build requirement
is such that they don't care if a node goes down while build was still
going on. In that case, they will simply throw away unfinished layer and
start new build. So they don't care about syncing intermediate state to the
disk and hence don't want to pay the price associated with sync.

So they are asking for mount options where they can disable sync on overlay
mount point.

They primarily seem to have two use cases.

- For building images, they will mount overlay with nosync and then sync
  upper layer after unmounting overlay and reuse upper as lower for next
  layer.

- For running containers, they don't seem to care about syncing upper layer
  because if node goes down, they will simply throw away upper layer and
  create a fresh one.

So this patch provides a mount option "volatile" which disables all forms
of sync. Now it is caller's responsibility to throw away upper if system
crashes or shuts down and start fresh.

With "volatile", I am seeing roughly 20% speed up in my VM where I am just
installing emacs in an image. Installation time drops from 31 seconds to 25
seconds when nosync option is used. This is for the case of building on top
of an image where all packages are already cached. That way I take out the
network operations latency out of the measurement.

Giuseppe is also looking to cut down on number of iops done on the disk. He
is complaining that often in cloud their VMs are throttled if they cross
the limit. This option can help them where they reduce number of iops (by
cutting down on frequent sync and writebacks).
Signed-off-by: default avatarGiuseppe Scrivano <gscrivan@redhat.com>
Signed-off-by: default avatarVivek Goyal <vgoyal@redhat.com>
Signed-off-by: default avatarMiklos Szeredi <mszeredi@redhat.com>
parent 235ce9ed
...@@ -564,6 +564,25 @@ Note: the mount options index=off,nfs_export=on are conflicting for a ...@@ -564,6 +564,25 @@ Note: the mount options index=off,nfs_export=on are conflicting for a
read-write mount and will result in an error. read-write mount and will result in an error.
Volatile mount
--------------
This is enabled with the "volatile" mount option. Volatile mounts are not
guaranteed to survive a crash. It is strongly recommended that volatile
mounts are only used if data written to the overlay can be recreated
without significant effort.
The advantage of mounting with the "volatile" option is that all forms of
sync calls to the upper filesystem are omitted.
When overlay is mounted with "volatile" option, the directory
"$workdir/work/incompat/volatile" is created. During next mount, overlay
checks for this directory and refuses to mount if present. This is a strong
indicator that user should throw away upper and work directories and create
fresh one. In very limited cases where the user knows that the system has
not crashed and contents of upperdir are intact, The "volatile" directory
can be removed.
Testsuite Testsuite
--------- ---------
......
...@@ -128,7 +128,8 @@ int ovl_copy_xattr(struct dentry *old, struct dentry *new) ...@@ -128,7 +128,8 @@ int ovl_copy_xattr(struct dentry *old, struct dentry *new)
return error; return error;
} }
static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len) static int ovl_copy_up_data(struct ovl_fs *ofs, struct path *old,
struct path *new, loff_t len)
{ {
struct file *old_file; struct file *old_file;
struct file *new_file; struct file *new_file;
...@@ -218,7 +219,7 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len) ...@@ -218,7 +219,7 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
len -= bytes; len -= bytes;
} }
out: out:
if (!error) if (!error && ovl_should_sync(ofs))
error = vfs_fsync(new_file, 0); error = vfs_fsync(new_file, 0);
fput(new_file); fput(new_file);
out_fput: out_fput:
...@@ -484,6 +485,7 @@ static int ovl_link_up(struct ovl_copy_up_ctx *c) ...@@ -484,6 +485,7 @@ static int ovl_link_up(struct ovl_copy_up_ctx *c)
static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp) static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp)
{ {
struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb);
int err; int err;
/* /*
...@@ -499,7 +501,8 @@ static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp) ...@@ -499,7 +501,8 @@ static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp)
upperpath.dentry = temp; upperpath.dentry = temp;
ovl_path_lowerdata(c->dentry, &datapath); ovl_path_lowerdata(c->dentry, &datapath);
err = ovl_copy_up_data(&datapath, &upperpath, c->stat.size); err = ovl_copy_up_data(ofs, &datapath, &upperpath,
c->stat.size);
if (err) if (err)
return err; return err;
} }
...@@ -784,6 +787,7 @@ static bool ovl_need_meta_copy_up(struct dentry *dentry, umode_t mode, ...@@ -784,6 +787,7 @@ static bool ovl_need_meta_copy_up(struct dentry *dentry, umode_t mode,
/* Copy up data of an inode which was copied up metadata only in the past. */ /* Copy up data of an inode which was copied up metadata only in the past. */
static int ovl_copy_up_meta_inode_data(struct ovl_copy_up_ctx *c) static int ovl_copy_up_meta_inode_data(struct ovl_copy_up_ctx *c)
{ {
struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb);
struct path upperpath, datapath; struct path upperpath, datapath;
int err; int err;
char *capability = NULL; char *capability = NULL;
...@@ -804,7 +808,7 @@ static int ovl_copy_up_meta_inode_data(struct ovl_copy_up_ctx *c) ...@@ -804,7 +808,7 @@ static int ovl_copy_up_meta_inode_data(struct ovl_copy_up_ctx *c)
goto out; goto out;
} }
err = ovl_copy_up_data(&datapath, &upperpath, c->stat.size); err = ovl_copy_up_data(ofs, &datapath, &upperpath, c->stat.size);
if (err) if (err)
goto out_free; goto out_free;
......
...@@ -331,6 +331,7 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter) ...@@ -331,6 +331,7 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
struct fd real; struct fd real;
const struct cred *old_cred; const struct cred *old_cred;
ssize_t ret; ssize_t ret;
int ifl = iocb->ki_flags;
if (!iov_iter_count(iter)) if (!iov_iter_count(iter))
return 0; return 0;
...@@ -346,11 +347,14 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter) ...@@ -346,11 +347,14 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
if (ret) if (ret)
goto out_unlock; goto out_unlock;
if (!ovl_should_sync(OVL_FS(inode->i_sb)))
ifl &= ~(IOCB_DSYNC | IOCB_SYNC);
old_cred = ovl_override_creds(file_inode(file)->i_sb); old_cred = ovl_override_creds(file_inode(file)->i_sb);
if (is_sync_kiocb(iocb)) { if (is_sync_kiocb(iocb)) {
file_start_write(real.file); file_start_write(real.file);
ret = vfs_iter_write(real.file, iter, &iocb->ki_pos, ret = vfs_iter_write(real.file, iter, &iocb->ki_pos,
ovl_iocb_to_rwf(iocb->ki_flags)); ovl_iocb_to_rwf(ifl));
file_end_write(real.file); file_end_write(real.file);
/* Update size */ /* Update size */
ovl_copyattr(ovl_inode_real(inode), inode); ovl_copyattr(ovl_inode_real(inode), inode);
...@@ -370,6 +374,7 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter) ...@@ -370,6 +374,7 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
real.flags = 0; real.flags = 0;
aio_req->orig_iocb = iocb; aio_req->orig_iocb = iocb;
kiocb_clone(&aio_req->iocb, iocb, real.file); kiocb_clone(&aio_req->iocb, iocb, real.file);
aio_req->iocb.ki_flags = ifl;
aio_req->iocb.ki_complete = ovl_aio_rw_complete; aio_req->iocb.ki_complete = ovl_aio_rw_complete;
ret = vfs_iocb_iter_write(real.file, &aio_req->iocb, iter); ret = vfs_iocb_iter_write(real.file, &aio_req->iocb, iter);
if (ret != -EIOCBQUEUED) if (ret != -EIOCBQUEUED)
...@@ -433,6 +438,9 @@ static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync) ...@@ -433,6 +438,9 @@ static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
const struct cred *old_cred; const struct cred *old_cred;
int ret; int ret;
if (!ovl_should_sync(OVL_FS(file_inode(file)->i_sb)))
return 0;
ret = ovl_real_fdget_meta(file, &real, !datasync); ret = ovl_real_fdget_meta(file, &real, !datasync);
if (ret) if (ret)
return ret; return ret;
......
...@@ -17,6 +17,7 @@ struct ovl_config { ...@@ -17,6 +17,7 @@ struct ovl_config {
bool nfs_export; bool nfs_export;
int xino; int xino;
bool metacopy; bool metacopy;
bool ovl_volatile;
}; };
struct ovl_sb { struct ovl_sb {
...@@ -90,6 +91,11 @@ static inline struct ovl_fs *OVL_FS(struct super_block *sb) ...@@ -90,6 +91,11 @@ static inline struct ovl_fs *OVL_FS(struct super_block *sb)
return (struct ovl_fs *)sb->s_fs_info; return (struct ovl_fs *)sb->s_fs_info;
} }
static inline bool ovl_should_sync(struct ovl_fs *ofs)
{
return !ofs->config.ovl_volatile;
}
/* private information held for every overlayfs dentry */ /* private information held for every overlayfs dentry */
struct ovl_entry { struct ovl_entry {
union { union {
......
...@@ -863,6 +863,9 @@ static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end, ...@@ -863,6 +863,9 @@ static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
if (!OVL_TYPE_UPPER(ovl_path_type(dentry))) if (!OVL_TYPE_UPPER(ovl_path_type(dentry)))
return 0; return 0;
if (!ovl_should_sync(OVL_FS(dentry->d_sb)))
return 0;
/* /*
* Need to check if we started out being a lower dir, but got copied up * Need to check if we started out being a lower dir, but got copied up
*/ */
......
...@@ -264,6 +264,8 @@ static int ovl_sync_fs(struct super_block *sb, int wait) ...@@ -264,6 +264,8 @@ static int ovl_sync_fs(struct super_block *sb, int wait)
if (!ovl_upper_mnt(ofs)) if (!ovl_upper_mnt(ofs))
return 0; return 0;
if (!ovl_should_sync(ofs))
return 0;
/* /*
* Not called for sync(2) call or an emergency sync (SB_I_SKIP_SYNC). * Not called for sync(2) call or an emergency sync (SB_I_SKIP_SYNC).
* All the super blocks will be iterated, including upper_sb. * All the super blocks will be iterated, including upper_sb.
...@@ -362,6 +364,8 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry) ...@@ -362,6 +364,8 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
if (ofs->config.metacopy != ovl_metacopy_def) if (ofs->config.metacopy != ovl_metacopy_def)
seq_printf(m, ",metacopy=%s", seq_printf(m, ",metacopy=%s",
ofs->config.metacopy ? "on" : "off"); ofs->config.metacopy ? "on" : "off");
if (ofs->config.ovl_volatile)
seq_puts(m, ",volatile");
return 0; return 0;
} }
...@@ -376,10 +380,12 @@ static int ovl_remount(struct super_block *sb, int *flags, char *data) ...@@ -376,10 +380,12 @@ static int ovl_remount(struct super_block *sb, int *flags, char *data)
if (*flags & SB_RDONLY && !sb_rdonly(sb)) { if (*flags & SB_RDONLY && !sb_rdonly(sb)) {
upper_sb = ovl_upper_mnt(ofs)->mnt_sb; upper_sb = ovl_upper_mnt(ofs)->mnt_sb;
if (ovl_should_sync(ofs)) {
down_read(&upper_sb->s_umount); down_read(&upper_sb->s_umount);
ret = sync_filesystem(upper_sb); ret = sync_filesystem(upper_sb);
up_read(&upper_sb->s_umount); up_read(&upper_sb->s_umount);
} }
}
return ret; return ret;
} }
...@@ -411,6 +417,7 @@ enum { ...@@ -411,6 +417,7 @@ enum {
OPT_XINO_AUTO, OPT_XINO_AUTO,
OPT_METACOPY_ON, OPT_METACOPY_ON,
OPT_METACOPY_OFF, OPT_METACOPY_OFF,
OPT_VOLATILE,
OPT_ERR, OPT_ERR,
}; };
...@@ -429,6 +436,7 @@ static const match_table_t ovl_tokens = { ...@@ -429,6 +436,7 @@ static const match_table_t ovl_tokens = {
{OPT_XINO_AUTO, "xino=auto"}, {OPT_XINO_AUTO, "xino=auto"},
{OPT_METACOPY_ON, "metacopy=on"}, {OPT_METACOPY_ON, "metacopy=on"},
{OPT_METACOPY_OFF, "metacopy=off"}, {OPT_METACOPY_OFF, "metacopy=off"},
{OPT_VOLATILE, "volatile"},
{OPT_ERR, NULL} {OPT_ERR, NULL}
}; };
...@@ -573,6 +581,10 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config) ...@@ -573,6 +581,10 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
metacopy_opt = true; metacopy_opt = true;
break; break;
case OPT_VOLATILE:
config->ovl_volatile = true;
break;
default: default:
pr_err("unrecognized mount option \"%s\" or missing value\n", pr_err("unrecognized mount option \"%s\" or missing value\n",
p); p);
...@@ -595,6 +607,11 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config) ...@@ -595,6 +607,11 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
config->index = false; config->index = false;
} }
if (!config->upperdir && config->ovl_volatile) {
pr_info("option \"volatile\" is meaningless in a non-upper mount, ignoring it.\n");
config->ovl_volatile = false;
}
err = ovl_parse_redirect_mode(config, config->redirect_mode); err = ovl_parse_redirect_mode(config, config->redirect_mode);
if (err) if (err)
return err; return err;
...@@ -1203,6 +1220,45 @@ static int ovl_check_rename_whiteout(struct dentry *workdir) ...@@ -1203,6 +1220,45 @@ static int ovl_check_rename_whiteout(struct dentry *workdir)
return err; return err;
} }
static struct dentry *ovl_lookup_or_create(struct dentry *parent,
const char *name, umode_t mode)
{
size_t len = strlen(name);
struct dentry *child;
inode_lock_nested(parent->d_inode, I_MUTEX_PARENT);
child = lookup_one_len(name, parent, len);
if (!IS_ERR(child) && !child->d_inode)
child = ovl_create_real(parent->d_inode, child,
OVL_CATTR(mode));
inode_unlock(parent->d_inode);
dput(parent);
return child;
}
/*
* Creates $workdir/work/incompat/volatile/dirty file if it is not already
* present.
*/
static int ovl_create_volatile_dirty(struct ovl_fs *ofs)
{
unsigned int ctr;
struct dentry *d = dget(ofs->workbasedir);
static const char *const volatile_path[] = {
OVL_WORKDIR_NAME, "incompat", "volatile", "dirty"
};
const char *const *name = volatile_path;
for (ctr = ARRAY_SIZE(volatile_path); ctr; ctr--, name++) {
d = ovl_lookup_or_create(d, *name, ctr > 1 ? S_IFDIR : S_IFREG);
if (IS_ERR(d))
return PTR_ERR(d);
}
dput(d);
return 0;
}
static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs, static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs,
struct path *workpath) struct path *workpath)
{ {
...@@ -1286,6 +1342,18 @@ static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs, ...@@ -1286,6 +1342,18 @@ static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs,
goto out; goto out;
} }
/*
* For volatile mount, create a incompat/volatile/dirty file to keep
* track of it.
*/
if (ofs->config.ovl_volatile) {
err = ovl_create_volatile_dirty(ofs);
if (err < 0) {
pr_err("Failed to create volatile/dirty file.\n");
goto out;
}
}
/* Check if upper/work fs supports file handles */ /* Check if upper/work fs supports file handles */
fh_type = ovl_can_decode_fh(ofs->workdir->d_sb); fh_type = ovl_can_decode_fh(ofs->workdir->d_sb);
if (ofs->config.index && !fh_type) { if (ofs->config.index && !fh_type) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment