Commit 1263a7bf authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-6.11-rc6-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs fixes from David Sterba:

 - followup fix for direct io and fsync under some conditions, reported
   by QEMU users

 - fix a potential leak when disabling quotas while some extent tracking
   work can still happen

 - in zoned mode handle unexpected change of zone write pointer in
   RAID1-like block groups, turn the zones to read-only

* tag 'for-6.11-rc6-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: fix race between direct IO write and fsync when using same fd
  btrfs: zoned: handle broken write pointer on zones
  btrfs: qgroup: don't use extent changeset when not needed
parents d8abb73f cd9253c2
...@@ -459,7 +459,6 @@ struct btrfs_file_private { ...@@ -459,7 +459,6 @@ struct btrfs_file_private {
void *filldir_buf; void *filldir_buf;
u64 last_index; u64 last_index;
struct extent_state *llseek_cached_state; struct extent_state *llseek_cached_state;
bool fsync_skip_inode_lock;
}; };
static inline u32 BTRFS_LEAF_DATA_SIZE(const struct btrfs_fs_info *info) static inline u32 BTRFS_LEAF_DATA_SIZE(const struct btrfs_fs_info *info)
......
...@@ -864,13 +864,6 @@ ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from) ...@@ -864,13 +864,6 @@ ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
if (IS_ERR_OR_NULL(dio)) { if (IS_ERR_OR_NULL(dio)) {
ret = PTR_ERR_OR_ZERO(dio); ret = PTR_ERR_OR_ZERO(dio);
} else { } else {
struct btrfs_file_private stack_private = { 0 };
struct btrfs_file_private *private;
const bool have_private = (file->private_data != NULL);
if (!have_private)
file->private_data = &stack_private;
/* /*
* If we have a synchronous write, we must make sure the fsync * If we have a synchronous write, we must make sure the fsync
* triggered by the iomap_dio_complete() call below doesn't * triggered by the iomap_dio_complete() call below doesn't
...@@ -879,13 +872,10 @@ ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from) ...@@ -879,13 +872,10 @@ ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
* partial writes due to the input buffer (or parts of it) not * partial writes due to the input buffer (or parts of it) not
* being already faulted in. * being already faulted in.
*/ */
private = file->private_data; ASSERT(current->journal_info == NULL);
private->fsync_skip_inode_lock = true; current->journal_info = BTRFS_TRANS_DIO_WRITE_STUB;
ret = iomap_dio_complete(dio); ret = iomap_dio_complete(dio);
private->fsync_skip_inode_lock = false; current->journal_info = NULL;
if (!have_private)
file->private_data = NULL;
} }
/* No increment (+=) because iomap returns a cumulative value. */ /* No increment (+=) because iomap returns a cumulative value. */
......
...@@ -1603,7 +1603,6 @@ static inline bool skip_inode_logging(const struct btrfs_log_ctx *ctx) ...@@ -1603,7 +1603,6 @@ static inline bool skip_inode_logging(const struct btrfs_log_ctx *ctx)
*/ */
int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
{ {
struct btrfs_file_private *private = file->private_data;
struct dentry *dentry = file_dentry(file); struct dentry *dentry = file_dentry(file);
struct btrfs_inode *inode = BTRFS_I(d_inode(dentry)); struct btrfs_inode *inode = BTRFS_I(d_inode(dentry));
struct btrfs_root *root = inode->root; struct btrfs_root *root = inode->root;
...@@ -1613,7 +1612,13 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) ...@@ -1613,7 +1612,13 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
int ret = 0, err; int ret = 0, err;
u64 len; u64 len;
bool full_sync; bool full_sync;
const bool skip_ilock = (private ? private->fsync_skip_inode_lock : false); bool skip_ilock = false;
if (current->journal_info == BTRFS_TRANS_DIO_WRITE_STUB) {
skip_ilock = true;
current->journal_info = NULL;
lockdep_assert_held(&inode->vfs_inode.i_rwsem);
}
trace_btrfs_sync_file(file, datasync); trace_btrfs_sync_file(file, datasync);
......
...@@ -4346,10 +4346,9 @@ static int __btrfs_qgroup_release_data(struct btrfs_inode *inode, ...@@ -4346,10 +4346,9 @@ static int __btrfs_qgroup_release_data(struct btrfs_inode *inode,
int ret; int ret;
if (btrfs_qgroup_mode(inode->root->fs_info) == BTRFS_QGROUP_MODE_DISABLED) { if (btrfs_qgroup_mode(inode->root->fs_info) == BTRFS_QGROUP_MODE_DISABLED) {
extent_changeset_init(&changeset);
return clear_record_extent_bits(&inode->io_tree, start, return clear_record_extent_bits(&inode->io_tree, start,
start + len - 1, start + len - 1,
EXTENT_QGROUP_RESERVED, &changeset); EXTENT_QGROUP_RESERVED, NULL);
} }
/* In release case, we shouldn't have @reserved */ /* In release case, we shouldn't have @reserved */
......
...@@ -27,6 +27,12 @@ struct btrfs_root_item; ...@@ -27,6 +27,12 @@ struct btrfs_root_item;
struct btrfs_root; struct btrfs_root;
struct btrfs_path; struct btrfs_path;
/*
* Signal that a direct IO write is in progress, to avoid deadlock for sync
* direct IO writes when fsync is called during the direct IO write path.
*/
#define BTRFS_TRANS_DIO_WRITE_STUB ((void *) 1)
/* Radix-tree tag for roots that are part of the trasaction. */ /* Radix-tree tag for roots that are part of the trasaction. */
#define BTRFS_ROOT_TRANS_TAG 0 #define BTRFS_ROOT_TRANS_TAG 0
......
...@@ -1406,6 +1406,8 @@ static int btrfs_load_block_group_dup(struct btrfs_block_group *bg, ...@@ -1406,6 +1406,8 @@ static int btrfs_load_block_group_dup(struct btrfs_block_group *bg,
return -EINVAL; return -EINVAL;
} }
bg->zone_capacity = min_not_zero(zone_info[0].capacity, zone_info[1].capacity);
if (zone_info[0].alloc_offset == WP_MISSING_DEV) { if (zone_info[0].alloc_offset == WP_MISSING_DEV) {
btrfs_err(bg->fs_info, btrfs_err(bg->fs_info,
"zoned: cannot recover write pointer for zone %llu", "zoned: cannot recover write pointer for zone %llu",
...@@ -1432,7 +1434,6 @@ static int btrfs_load_block_group_dup(struct btrfs_block_group *bg, ...@@ -1432,7 +1434,6 @@ static int btrfs_load_block_group_dup(struct btrfs_block_group *bg,
} }
bg->alloc_offset = zone_info[0].alloc_offset; bg->alloc_offset = zone_info[0].alloc_offset;
bg->zone_capacity = min(zone_info[0].capacity, zone_info[1].capacity);
return 0; return 0;
} }
...@@ -1450,6 +1451,9 @@ static int btrfs_load_block_group_raid1(struct btrfs_block_group *bg, ...@@ -1450,6 +1451,9 @@ static int btrfs_load_block_group_raid1(struct btrfs_block_group *bg,
return -EINVAL; return -EINVAL;
} }
/* In case a device is missing we have a cap of 0, so don't use it. */
bg->zone_capacity = min_not_zero(zone_info[0].capacity, zone_info[1].capacity);
for (i = 0; i < map->num_stripes; i++) { for (i = 0; i < map->num_stripes; i++) {
if (zone_info[i].alloc_offset == WP_MISSING_DEV || if (zone_info[i].alloc_offset == WP_MISSING_DEV ||
zone_info[i].alloc_offset == WP_CONVENTIONAL) zone_info[i].alloc_offset == WP_CONVENTIONAL)
...@@ -1471,9 +1475,6 @@ static int btrfs_load_block_group_raid1(struct btrfs_block_group *bg, ...@@ -1471,9 +1475,6 @@ static int btrfs_load_block_group_raid1(struct btrfs_block_group *bg,
if (test_bit(0, active)) if (test_bit(0, active))
set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags); set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags);
} }
/* In case a device is missing we have a cap of 0, so don't use it. */
bg->zone_capacity = min_not_zero(zone_info[0].capacity,
zone_info[1].capacity);
} }
if (zone_info[0].alloc_offset != WP_MISSING_DEV) if (zone_info[0].alloc_offset != WP_MISSING_DEV)
...@@ -1563,6 +1564,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) ...@@ -1563,6 +1564,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
unsigned long *active = NULL; unsigned long *active = NULL;
u64 last_alloc = 0; u64 last_alloc = 0;
u32 num_sequential = 0, num_conventional = 0; u32 num_sequential = 0, num_conventional = 0;
u64 profile;
if (!btrfs_is_zoned(fs_info)) if (!btrfs_is_zoned(fs_info))
return 0; return 0;
...@@ -1623,7 +1625,8 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) ...@@ -1623,7 +1625,8 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
} }
} }
switch (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) { profile = map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
switch (profile) {
case 0: /* single */ case 0: /* single */
ret = btrfs_load_block_group_single(cache, &zone_info[0], active); ret = btrfs_load_block_group_single(cache, &zone_info[0], active);
break; break;
...@@ -1650,6 +1653,23 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) ...@@ -1650,6 +1653,23 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
goto out; goto out;
} }
if (ret == -EIO && profile != 0 && profile != BTRFS_BLOCK_GROUP_RAID0 &&
profile != BTRFS_BLOCK_GROUP_RAID10) {
/*
* Detected broken write pointer. Make this block group
* unallocatable by setting the allocation pointer at the end of
* allocatable region. Relocating this block group will fix the
* mismatch.
*
* Currently, we cannot handle RAID0 or RAID10 case like this
* because we don't have a proper zone_capacity value. But,
* reading from this block group won't work anyway by a missing
* stripe.
*/
cache->alloc_offset = cache->zone_capacity;
ret = 0;
}
out: out:
/* Reject non SINGLE data profiles without RST */ /* Reject non SINGLE data profiles without RST */
if ((map->type & BTRFS_BLOCK_GROUP_DATA) && if ((map->type & BTRFS_BLOCK_GROUP_DATA) &&
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment