Commit 8408fa57 authored by Kent Overstreet's avatar Kent Overstreet

bcachefs: BCH_IOCTL_FSCK_OFFLINE

This adds a new ioctl for running fsck on a list of devices.

Normally, if we wish to use the kernel's implementation of fsck we'd run
it at mount time with -o fsck. This ioctl lets us run fsck without
mounting, so that userspace bcachefs-tools can transparently switch to
the kernel's implementation of fsck when appropriate - primarily if the
kernel version of bcachefs better matches the filesystem on disk.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 7f391b2f
...@@ -83,6 +83,8 @@ struct bch_ioctl_incremental { ...@@ -83,6 +83,8 @@ struct bch_ioctl_incremental {
#define BCH_IOCTL_DEV_USAGE_V2 _IOWR(0xbc, 18, struct bch_ioctl_dev_usage_v2) #define BCH_IOCTL_DEV_USAGE_V2 _IOWR(0xbc, 18, struct bch_ioctl_dev_usage_v2)
#define BCH_IOCTL_FSCK_OFFLINE _IOW(0xbc, 19, struct bch_ioctl_fsck_offline)
/* ioctl below act on a particular file, not the filesystem as a whole: */ /* ioctl below act on a particular file, not the filesystem as a whole: */
#define BCHFS_IOC_REINHERIT_ATTRS _IOR(0xbc, 64, const char __user *) #define BCHFS_IOC_REINHERIT_ATTRS _IOR(0xbc, 64, const char __user *)
...@@ -386,4 +388,15 @@ struct bch_ioctl_subvolume { ...@@ -386,4 +388,15 @@ struct bch_ioctl_subvolume {
#define BCH_SUBVOL_SNAPSHOT_CREATE (1U << 0) #define BCH_SUBVOL_SNAPSHOT_CREATE (1U << 0)
#define BCH_SUBVOL_SNAPSHOT_RO (1U << 1) #define BCH_SUBVOL_SNAPSHOT_RO (1U << 1)
/*
* BCH_IOCTL_FSCK_OFFLINE: run fsck from the 'bcachefs fsck' userspace command,
* but with the kernel's implementation of fsck:
*/
struct bch_ioctl_fsck_offline {
__u64 flags;
__u64 opts; /* string */
__u64 nr_devs;
__u64 devs[0];
};
#endif /* _BCACHEFS_IOCTL_H */ #endif /* _BCACHEFS_IOCTL_H */
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include <linux/ioctl.h> #include <linux/ioctl.h>
#include <linux/kthread.h> #include <linux/kthread.h>
#include <linux/major.h> #include <linux/major.h>
#include <linux/poll.h>
#include <linux/sched/task.h> #include <linux/sched/task.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/uaccess.h> #include <linux/uaccess.h>
...@@ -32,12 +33,15 @@ static int copy_to_user_errcode(void __user *to, const void *from, unsigned long ...@@ -32,12 +33,15 @@ static int copy_to_user_errcode(void __user *to, const void *from, unsigned long
struct thread_with_file { struct thread_with_file {
struct task_struct *task; struct task_struct *task;
int ret; int ret;
bool done;
}; };
static void thread_with_file_exit(struct thread_with_file *thr) static void thread_with_file_exit(struct thread_with_file *thr)
{ {
if (thr->task) {
kthread_stop(thr->task); kthread_stop(thr->task);
put_task_struct(thr->task); put_task_struct(thr->task);
}
} }
__printf(4, 0) __printf(4, 0)
...@@ -194,8 +198,208 @@ static long bch2_ioctl_incremental(struct bch_ioctl_incremental __user *user_arg ...@@ -194,8 +198,208 @@ static long bch2_ioctl_incremental(struct bch_ioctl_incremental __user *user_arg
} }
#endif #endif
struct fsck_thread {
struct thread_with_file thr;
struct printbuf buf;
char **devs;
size_t nr_devs;
struct bch_opts opts;
struct log_output output;
DARRAY(char) output2;
};
static void bch2_fsck_thread_free(struct fsck_thread *thr)
{
thread_with_file_exit(&thr->thr);
if (thr->devs)
for (size_t i = 0; i < thr->nr_devs; i++)
kfree(thr->devs[i]);
darray_exit(&thr->output2);
printbuf_exit(&thr->output.buf);
kfree(thr->devs);
kfree(thr);
}
static int bch2_fsck_thread_release(struct inode *inode, struct file *file)
{
struct fsck_thread *thr = container_of(file->private_data, struct fsck_thread, thr);
bch2_fsck_thread_free(thr);
return 0;
}
static bool fsck_thread_ready(struct fsck_thread *thr)
{
return thr->output.buf.pos ||
thr->output2.nr ||
thr->thr.done;
}
static ssize_t bch2_fsck_thread_read(struct file *file, char __user *buf,
size_t len, loff_t *ppos)
{
struct fsck_thread *thr = container_of(file->private_data, struct fsck_thread, thr);
size_t copied = 0, b;
int ret = 0;
if ((file->f_flags & O_NONBLOCK) &&
!fsck_thread_ready(thr))
return -EAGAIN;
ret = wait_event_interruptible(thr->output.wait,
fsck_thread_ready(thr));
if (ret)
return ret;
if (thr->thr.done)
return 0;
while (len) {
ret = darray_make_room(&thr->output2, thr->output.buf.pos);
if (ret)
break;
spin_lock_irq(&thr->output.lock);
b = min_t(size_t, darray_room(thr->output2), thr->output.buf.pos);
memcpy(&darray_top(thr->output2), thr->output.buf.buf, b);
memmove(thr->output.buf.buf,
thr->output.buf.buf + b,
thr->output.buf.pos - b);
thr->output2.nr += b;
thr->output.buf.pos -= b;
spin_unlock_irq(&thr->output.lock);
b = min(len, thr->output2.nr);
if (!b)
break;
b -= copy_to_user(buf, thr->output2.data, b);
if (!b) {
ret = -EFAULT;
break;
}
copied += b;
buf += b;
len -= b;
memmove(thr->output2.data,
thr->output2.data + b,
thr->output2.nr - b);
thr->output2.nr -= b;
}
return copied ?: ret;
}
static __poll_t bch2_fsck_thread_poll(struct file *file, struct poll_table_struct *wait)
{
struct fsck_thread *thr = container_of(file->private_data, struct fsck_thread, thr);
poll_wait(file, &thr->output.wait, wait);
return fsck_thread_ready(thr)
? EPOLLIN|EPOLLHUP
: 0;
}
static const struct file_operations fsck_thread_ops = {
.release = bch2_fsck_thread_release,
.read = bch2_fsck_thread_read,
.poll = bch2_fsck_thread_poll,
.llseek = no_llseek,
};
static int bch2_fsck_offline_thread_fn(void *arg)
{
struct fsck_thread *thr = container_of(arg, struct fsck_thread, thr);
struct bch_fs *c = bch2_fs_open(thr->devs, thr->nr_devs, thr->opts);
thr->thr.ret = PTR_ERR_OR_ZERO(c);
if (!thr->thr.ret)
bch2_fs_stop(c);
thr->thr.done = true;
wake_up(&thr->output.wait);
return 0;
}
static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_arg)
{
struct bch_ioctl_fsck_offline arg;
struct fsck_thread *thr = NULL;
u64 *devs = NULL;
long ret = 0;
if (copy_from_user(&arg, user_arg, sizeof(arg)))
return -EFAULT;
if (arg.flags)
return -EINVAL;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (!(devs = kcalloc(arg.nr_devs, sizeof(*devs), GFP_KERNEL)) ||
!(thr = kzalloc(sizeof(*thr), GFP_KERNEL)) ||
!(thr->devs = kcalloc(arg.nr_devs, sizeof(*thr->devs), GFP_KERNEL))) {
ret = -ENOMEM;
goto err;
}
thr->nr_devs = arg.nr_devs;
thr->output.buf = PRINTBUF;
thr->output.buf.atomic++;
spin_lock_init(&thr->output.lock);
init_waitqueue_head(&thr->output.wait);
darray_init(&thr->output2);
if (copy_from_user(devs, &user_arg->devs[0], sizeof(user_arg->devs[0]) * arg.nr_devs)) {
ret = -EINVAL;
goto err;
}
for (size_t i = 0; i < arg.nr_devs; i++) {
thr->devs[i] = strndup_user((char __user *)(unsigned long) devs[i], PATH_MAX);
ret = PTR_ERR_OR_ZERO(thr->devs[i]);
if (ret)
goto err;
}
if (arg.opts) {
char *optstr = strndup_user((char __user *)(unsigned long) arg.opts, 1 << 16);
ret = PTR_ERR_OR_ZERO(optstr) ?:
bch2_parse_mount_opts(NULL, &thr->opts, optstr);
kfree(optstr);
if (ret)
goto err;
}
opt_set(thr->opts, log_output, (u64)(unsigned long)&thr->output);
ret = run_thread_with_file(&thr->thr,
&fsck_thread_ops,
bch2_fsck_offline_thread_fn,
"bch-fsck");
err:
if (ret < 0) {
if (thr)
bch2_fsck_thread_free(thr);
pr_err("ret %s", bch2_err_str(ret));
}
kfree(devs);
return ret;
}
static long bch2_global_ioctl(unsigned cmd, void __user *arg) static long bch2_global_ioctl(unsigned cmd, void __user *arg)
{ {
long ret;
switch (cmd) { switch (cmd) {
#if 0 #if 0
case BCH_IOCTL_ASSEMBLE: case BCH_IOCTL_ASSEMBLE:
...@@ -203,9 +407,18 @@ static long bch2_global_ioctl(unsigned cmd, void __user *arg) ...@@ -203,9 +407,18 @@ static long bch2_global_ioctl(unsigned cmd, void __user *arg)
case BCH_IOCTL_INCREMENTAL: case BCH_IOCTL_INCREMENTAL:
return bch2_ioctl_incremental(arg); return bch2_ioctl_incremental(arg);
#endif #endif
case BCH_IOCTL_FSCK_OFFLINE: {
ret = bch2_ioctl_fsck_offline(arg);
break;
}
default: default:
return -ENOTTY; ret = -ENOTTY;
break;
} }
if (ret < 0)
ret = bch2_err_class(ret);
return ret;
} }
static long bch2_ioctl_query_uuid(struct bch_fs *c, static long bch2_ioctl_query_uuid(struct bch_fs *c,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment