Commit 8dde2086 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-5.15-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs fixes from David Sterba:

 - fix max_inline mount option limit on 64k page system

 - lockdep fixes:
     - update bdev time in a safer way
     - move bdev put outside of sb write section when removing device
     - fix possible deadlock when mounting seed/sprout filesystem

 - zoned mode: fix split extent accounting

 - minor include fixup

* tag 'for-5.15-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: zoned: fix double counting of split ordered extent
  btrfs: fix lockdep warning while mounting sprout fs
  btrfs: delay blkdev_put until after the device remove
  btrfs: update the bdev time directly when closing
  btrfs: use correct header for div_u64 in misc.h
  btrfs: fix upper limit for max_inline for page size 64K
parents ae79394a f79645df
......@@ -3314,6 +3314,30 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
*/
fs_info->compress_type = BTRFS_COMPRESS_ZLIB;
/*
* Flag our filesystem as having big metadata blocks if they are bigger
* than the page size.
*/
if (btrfs_super_nodesize(disk_super) > PAGE_SIZE) {
if (!(features & BTRFS_FEATURE_INCOMPAT_BIG_METADATA))
btrfs_info(fs_info,
"flagging fs with big metadata feature");
features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA;
}
/* Set up fs_info before parsing mount options */
nodesize = btrfs_super_nodesize(disk_super);
sectorsize = btrfs_super_sectorsize(disk_super);
stripesize = sectorsize;
fs_info->dirty_metadata_batch = nodesize * (1 + ilog2(nr_cpu_ids));
fs_info->delalloc_batch = sectorsize * 512 * (1 + ilog2(nr_cpu_ids));
fs_info->nodesize = nodesize;
fs_info->sectorsize = sectorsize;
fs_info->sectorsize_bits = ilog2(sectorsize);
fs_info->csums_per_leaf = BTRFS_MAX_ITEM_SIZE(fs_info) / fs_info->csum_size;
fs_info->stripesize = stripesize;
ret = btrfs_parse_options(fs_info, options, sb->s_flags);
if (ret) {
err = ret;
......@@ -3340,30 +3364,6 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
if (features & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)
btrfs_info(fs_info, "has skinny extents");
/*
* flag our filesystem as having big metadata blocks if
* they are bigger than the page size
*/
if (btrfs_super_nodesize(disk_super) > PAGE_SIZE) {
if (!(features & BTRFS_FEATURE_INCOMPAT_BIG_METADATA))
btrfs_info(fs_info,
"flagging fs with big metadata feature");
features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA;
}
nodesize = btrfs_super_nodesize(disk_super);
sectorsize = btrfs_super_sectorsize(disk_super);
stripesize = sectorsize;
fs_info->dirty_metadata_batch = nodesize * (1 + ilog2(nr_cpu_ids));
fs_info->delalloc_batch = sectorsize * 512 * (1 + ilog2(nr_cpu_ids));
/* Cache block sizes */
fs_info->nodesize = nodesize;
fs_info->sectorsize = sectorsize;
fs_info->sectorsize_bits = ilog2(sectorsize);
fs_info->csums_per_leaf = BTRFS_MAX_ITEM_SIZE(fs_info) / fs_info->csum_size;
fs_info->stripesize = stripesize;
/*
* mixed block groups end up with duplicate but slightly offset
* extent buffers for the same range. It leads to corruptions
......
......@@ -3223,6 +3223,8 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg)
struct inode *inode = file_inode(file);
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_ioctl_vol_args_v2 *vol_args;
struct block_device *bdev = NULL;
fmode_t mode;
int ret;
bool cancel = false;
......@@ -3255,9 +3257,9 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg)
/* Exclusive operation is now claimed */
if (vol_args->flags & BTRFS_DEVICE_SPEC_BY_ID)
ret = btrfs_rm_device(fs_info, NULL, vol_args->devid);
ret = btrfs_rm_device(fs_info, NULL, vol_args->devid, &bdev, &mode);
else
ret = btrfs_rm_device(fs_info, vol_args->name, 0);
ret = btrfs_rm_device(fs_info, vol_args->name, 0, &bdev, &mode);
btrfs_exclop_finish(fs_info);
......@@ -3273,6 +3275,8 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg)
kfree(vol_args);
err_drop:
mnt_drop_write_file(file);
if (bdev)
blkdev_put(bdev, mode);
return ret;
}
......@@ -3281,6 +3285,8 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
struct inode *inode = file_inode(file);
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_ioctl_vol_args *vol_args;
struct block_device *bdev = NULL;
fmode_t mode;
int ret;
bool cancel;
......@@ -3302,7 +3308,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
ret = exclop_start_or_cancel_reloc(fs_info, BTRFS_EXCLOP_DEV_REMOVE,
cancel);
if (ret == 0) {
ret = btrfs_rm_device(fs_info, vol_args->name, 0);
ret = btrfs_rm_device(fs_info, vol_args->name, 0, &bdev, &mode);
if (!ret)
btrfs_info(fs_info, "disk deleted %s", vol_args->name);
btrfs_exclop_finish(fs_info);
......@@ -3311,7 +3317,8 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
kfree(vol_args);
out_drop_write:
mnt_drop_write_file(file);
if (bdev)
blkdev_put(bdev, mode);
return ret;
}
......
......@@ -5,7 +5,7 @@
#include <linux/sched.h>
#include <linux/wait.h>
#include <asm/div64.h>
#include <linux/math64.h>
#include <linux/rbtree.h>
#define in_range(b, first, len) ((b) >= (first) && (b) < (first) + (len))
......
......@@ -1049,6 +1049,7 @@ static int clone_ordered_extent(struct btrfs_ordered_extent *ordered, u64 pos,
u64 len)
{
struct inode *inode = ordered->inode;
struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
u64 file_offset = ordered->file_offset + pos;
u64 disk_bytenr = ordered->disk_bytenr + pos;
u64 num_bytes = len;
......@@ -1066,6 +1067,13 @@ static int clone_ordered_extent(struct btrfs_ordered_extent *ordered, u64 pos,
else
type = __ffs(flags_masked);
/*
* The splitting extent is already counted and will be added again
* in btrfs_add_ordered_extent_*(). Subtract num_bytes to avoid
* double counting.
*/
percpu_counter_add_batch(&fs_info->ordered_bytes, -num_bytes,
fs_info->delalloc_batch);
if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered->flags)) {
WARN_ON_ONCE(1);
ret = btrfs_add_ordered_extent_compress(BTRFS_I(inode),
......
......@@ -558,6 +558,8 @@ static int btrfs_free_stale_devices(const char *path,
struct btrfs_device *device, *tmp_device;
int ret = 0;
lockdep_assert_held(&uuid_mutex);
if (path)
ret = -ENOENT;
......@@ -988,11 +990,12 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
struct btrfs_device *orig_dev;
int ret = 0;
lockdep_assert_held(&uuid_mutex);
fs_devices = alloc_fs_devices(orig->fsid, NULL);
if (IS_ERR(fs_devices))
return fs_devices;
mutex_lock(&orig->device_list_mutex);
fs_devices->total_devices = orig->total_devices;
list_for_each_entry(orig_dev, &orig->devices, dev_list) {
......@@ -1024,10 +1027,8 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
device->fs_devices = fs_devices;
fs_devices->num_devices++;
}
mutex_unlock(&orig->device_list_mutex);
return fs_devices;
error:
mutex_unlock(&orig->device_list_mutex);
free_fs_devices(fs_devices);
return ERR_PTR(ret);
}
......@@ -1869,15 +1870,17 @@ static int btrfs_add_dev_item(struct btrfs_trans_handle *trans,
* Function to update ctime/mtime for a given device path.
* Mainly used for ctime/mtime based probe like libblkid.
*/
static void update_dev_time(const char *path_name)
static void update_dev_time(struct block_device *bdev)
{
struct file *filp;
struct inode *inode = bdev->bd_inode;
struct timespec64 now;
filp = filp_open(path_name, O_RDWR, 0);
if (IS_ERR(filp))
/* Shouldn't happen but just in case. */
if (!inode)
return;
file_update_time(filp);
filp_close(filp, NULL);
now = current_time(inode);
generic_update_time(inode, &now, S_MTIME | S_CTIME);
}
static int btrfs_rm_dev_item(struct btrfs_device *device)
......@@ -2053,11 +2056,11 @@ void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info,
btrfs_kobject_uevent(bdev, KOBJ_CHANGE);
/* Update ctime/mtime for device path for libblkid */
update_dev_time(device_path);
update_dev_time(bdev);
}
int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
u64 devid)
u64 devid, struct block_device **bdev, fmode_t *mode)
{
struct btrfs_device *device;
struct btrfs_fs_devices *cur_devices;
......@@ -2171,15 +2174,26 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
mutex_unlock(&fs_devices->device_list_mutex);
/*
* at this point, the device is zero sized and detached from
* the devices list. All that's left is to zero out the old
* supers and free the device.
* At this point, the device is zero sized and detached from the
* devices list. All that's left is to zero out the old supers and
* free the device.
*
* We cannot call btrfs_close_bdev() here because we're holding the sb
* write lock, and blkdev_put() will pull in the ->open_mutex on the
* block device and it's dependencies. Instead just flush the device
* and let the caller do the final blkdev_put.
*/
if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
btrfs_scratch_superblocks(fs_info, device->bdev,
device->name->str);
if (device->bdev) {
sync_blockdev(device->bdev);
invalidate_bdev(device->bdev);
}
}
btrfs_close_bdev(device);
*bdev = device->bdev;
*mode = device->mode;
synchronize_rcu();
btrfs_free_device(device);
......@@ -2706,7 +2720,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
btrfs_forget_devices(device_path);
/* Update ctime/mtime for blkid or udev */
update_dev_time(device_path);
update_dev_time(bdev);
return ret;
......
......@@ -472,7 +472,8 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
const u8 *uuid);
void btrfs_free_device(struct btrfs_device *device);
int btrfs_rm_device(struct btrfs_fs_info *fs_info,
const char *device_path, u64 devid);
const char *device_path, u64 devid,
struct block_device **bdev, fmode_t *mode);
void __exit btrfs_cleanup_fs_uuids(void);
int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len);
int btrfs_grow_device(struct btrfs_trans_handle *trans,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment