Commit d38c3fa6 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-5.3-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs fixes from David Sterba:

 - tiny race window during 2 transactions aborting at the same time can
   accidentally lead to a commit

 - regression fix, possible deadlock during fiemap

 - fix for an old bug when incremental send can fail on a file that has
   been deduplicated in a special way

* tag 'for-5.3-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  Btrfs: fix deadlock between fiemap and transaction commits
  Btrfs: fix race leading to fs corruption after transaction abort
  Btrfs: fix incremental send failure after deduplication
parents 97b00aff a6d155d2
...@@ -1483,7 +1483,7 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr, ...@@ -1483,7 +1483,7 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr,
ulist_init(roots); ulist_init(roots);
ulist_init(tmp); ulist_init(tmp);
trans = btrfs_attach_transaction(root); trans = btrfs_join_transaction_nostart(root);
if (IS_ERR(trans)) { if (IS_ERR(trans)) {
if (PTR_ERR(trans) != -ENOENT && PTR_ERR(trans) != -EROFS) { if (PTR_ERR(trans) != -ENOENT && PTR_ERR(trans) != -EROFS) {
ret = PTR_ERR(trans); ret = PTR_ERR(trans);
......
...@@ -6322,68 +6322,21 @@ static int changed_extent(struct send_ctx *sctx, ...@@ -6322,68 +6322,21 @@ static int changed_extent(struct send_ctx *sctx,
{ {
int ret = 0; int ret = 0;
if (sctx->cur_ino != sctx->cmp_key->objectid) {
if (result == BTRFS_COMPARE_TREE_CHANGED) {
struct extent_buffer *leaf_l;
struct extent_buffer *leaf_r;
struct btrfs_file_extent_item *ei_l;
struct btrfs_file_extent_item *ei_r;
leaf_l = sctx->left_path->nodes[0];
leaf_r = sctx->right_path->nodes[0];
ei_l = btrfs_item_ptr(leaf_l,
sctx->left_path->slots[0],
struct btrfs_file_extent_item);
ei_r = btrfs_item_ptr(leaf_r,
sctx->right_path->slots[0],
struct btrfs_file_extent_item);
/* /*
* We may have found an extent item that has changed * We have found an extent item that changed without the inode item
* only its disk_bytenr field and the corresponding * having changed. This can happen either after relocation (where the
* inode item was not updated. This case happens due to * disk_bytenr of an extent item is replaced at
* very specific timings during relocation when a leaf * relocation.c:replace_file_extents()) or after deduplication into a
* that contains file extent items is COWed while * file in both the parent and send snapshots (where an extent item can
* relocation is ongoing and its in the stage where it * get modified or replaced with a new one). Note that deduplication
* updates data pointers. So when this happens we can * updates the inode item, but it only changes the iversion (sequence
* safely ignore it since we know it's the same extent, * field in the inode item) of the inode, so if a file is deduplicated
* but just at different logical and physical locations * the same amount of times in both the parent and send snapshots, its
* (when an extent is fully replaced with a new one, we * iversion becames the same in both snapshots, whence the inode item is
* know the generation number must have changed too, * the same on both snapshots.
* since snapshot creation implies committing the current */
* transaction, and the inode item must have been updated if (sctx->cur_ino != sctx->cmp_key->objectid)
* as well).
* This replacement of the disk_bytenr happens at
* relocation.c:replace_file_extents() through
* relocation.c:btrfs_reloc_cow_block().
*/
if (btrfs_file_extent_generation(leaf_l, ei_l) ==
btrfs_file_extent_generation(leaf_r, ei_r) &&
btrfs_file_extent_ram_bytes(leaf_l, ei_l) ==
btrfs_file_extent_ram_bytes(leaf_r, ei_r) &&
btrfs_file_extent_compression(leaf_l, ei_l) ==
btrfs_file_extent_compression(leaf_r, ei_r) &&
btrfs_file_extent_encryption(leaf_l, ei_l) ==
btrfs_file_extent_encryption(leaf_r, ei_r) &&
btrfs_file_extent_other_encoding(leaf_l, ei_l) ==
btrfs_file_extent_other_encoding(leaf_r, ei_r) &&
btrfs_file_extent_type(leaf_l, ei_l) ==
btrfs_file_extent_type(leaf_r, ei_r) &&
btrfs_file_extent_disk_bytenr(leaf_l, ei_l) !=
btrfs_file_extent_disk_bytenr(leaf_r, ei_r) &&
btrfs_file_extent_disk_num_bytes(leaf_l, ei_l) ==
btrfs_file_extent_disk_num_bytes(leaf_r, ei_r) &&
btrfs_file_extent_offset(leaf_l, ei_l) ==
btrfs_file_extent_offset(leaf_r, ei_r) &&
btrfs_file_extent_num_bytes(leaf_l, ei_l) ==
btrfs_file_extent_num_bytes(leaf_r, ei_r))
return 0; return 0;
}
inconsistent_snapshot_error(sctx, result, "extent");
return -EIO;
}
if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) { if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
if (result != BTRFS_COMPARE_TREE_DELETED) if (result != BTRFS_COMPARE_TREE_DELETED)
......
...@@ -28,15 +28,18 @@ static const unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = { ...@@ -28,15 +28,18 @@ static const unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = {
[TRANS_STATE_COMMIT_START] = (__TRANS_START | __TRANS_ATTACH), [TRANS_STATE_COMMIT_START] = (__TRANS_START | __TRANS_ATTACH),
[TRANS_STATE_COMMIT_DOING] = (__TRANS_START | [TRANS_STATE_COMMIT_DOING] = (__TRANS_START |
__TRANS_ATTACH | __TRANS_ATTACH |
__TRANS_JOIN), __TRANS_JOIN |
__TRANS_JOIN_NOSTART),
[TRANS_STATE_UNBLOCKED] = (__TRANS_START | [TRANS_STATE_UNBLOCKED] = (__TRANS_START |
__TRANS_ATTACH | __TRANS_ATTACH |
__TRANS_JOIN | __TRANS_JOIN |
__TRANS_JOIN_NOLOCK), __TRANS_JOIN_NOLOCK |
__TRANS_JOIN_NOSTART),
[TRANS_STATE_COMPLETED] = (__TRANS_START | [TRANS_STATE_COMPLETED] = (__TRANS_START |
__TRANS_ATTACH | __TRANS_ATTACH |
__TRANS_JOIN | __TRANS_JOIN |
__TRANS_JOIN_NOLOCK), __TRANS_JOIN_NOLOCK |
__TRANS_JOIN_NOSTART),
}; };
void btrfs_put_transaction(struct btrfs_transaction *transaction) void btrfs_put_transaction(struct btrfs_transaction *transaction)
...@@ -543,7 +546,8 @@ start_transaction(struct btrfs_root *root, unsigned int num_items, ...@@ -543,7 +546,8 @@ start_transaction(struct btrfs_root *root, unsigned int num_items,
ret = join_transaction(fs_info, type); ret = join_transaction(fs_info, type);
if (ret == -EBUSY) { if (ret == -EBUSY) {
wait_current_trans(fs_info); wait_current_trans(fs_info);
if (unlikely(type == TRANS_ATTACH)) if (unlikely(type == TRANS_ATTACH ||
type == TRANS_JOIN_NOSTART))
ret = -ENOENT; ret = -ENOENT;
} }
} while (ret == -EBUSY); } while (ret == -EBUSY);
...@@ -659,6 +663,16 @@ struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root ...@@ -659,6 +663,16 @@ struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root
BTRFS_RESERVE_NO_FLUSH, true); BTRFS_RESERVE_NO_FLUSH, true);
} }
/*
* Similar to regular join but it never starts a transaction when none is
* running or after waiting for the current one to finish.
*/
struct btrfs_trans_handle *btrfs_join_transaction_nostart(struct btrfs_root *root)
{
return start_transaction(root, 0, TRANS_JOIN_NOSTART,
BTRFS_RESERVE_NO_FLUSH, true);
}
/* /*
* btrfs_attach_transaction() - catch the running transaction * btrfs_attach_transaction() - catch the running transaction
* *
...@@ -2037,6 +2051,16 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) ...@@ -2037,6 +2051,16 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
} }
} else { } else {
spin_unlock(&fs_info->trans_lock); spin_unlock(&fs_info->trans_lock);
/*
* The previous transaction was aborted and was already removed
* from the list of transactions at fs_info->trans_list. So we
* abort to prevent writing a new superblock that reflects a
* corrupt state (pointing to trees with unwritten nodes/leafs).
*/
if (test_bit(BTRFS_FS_STATE_TRANS_ABORTED, &fs_info->fs_state)) {
ret = -EROFS;
goto cleanup_transaction;
}
} }
extwriter_counter_dec(cur_trans, trans->type); extwriter_counter_dec(cur_trans, trans->type);
......
...@@ -94,11 +94,13 @@ struct btrfs_transaction { ...@@ -94,11 +94,13 @@ struct btrfs_transaction {
#define __TRANS_JOIN (1U << 11) #define __TRANS_JOIN (1U << 11)
#define __TRANS_JOIN_NOLOCK (1U << 12) #define __TRANS_JOIN_NOLOCK (1U << 12)
#define __TRANS_DUMMY (1U << 13) #define __TRANS_DUMMY (1U << 13)
#define __TRANS_JOIN_NOSTART (1U << 14)
#define TRANS_START (__TRANS_START | __TRANS_FREEZABLE) #define TRANS_START (__TRANS_START | __TRANS_FREEZABLE)
#define TRANS_ATTACH (__TRANS_ATTACH) #define TRANS_ATTACH (__TRANS_ATTACH)
#define TRANS_JOIN (__TRANS_JOIN | __TRANS_FREEZABLE) #define TRANS_JOIN (__TRANS_JOIN | __TRANS_FREEZABLE)
#define TRANS_JOIN_NOLOCK (__TRANS_JOIN_NOLOCK) #define TRANS_JOIN_NOLOCK (__TRANS_JOIN_NOLOCK)
#define TRANS_JOIN_NOSTART (__TRANS_JOIN_NOSTART)
#define TRANS_EXTWRITERS (__TRANS_START | __TRANS_ATTACH) #define TRANS_EXTWRITERS (__TRANS_START | __TRANS_ATTACH)
...@@ -183,6 +185,7 @@ struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv( ...@@ -183,6 +185,7 @@ struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv(
int min_factor); int min_factor);
struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root); struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root);
struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root); struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root);
struct btrfs_trans_handle *btrfs_join_transaction_nostart(struct btrfs_root *root);
struct btrfs_trans_handle *btrfs_attach_transaction(struct btrfs_root *root); struct btrfs_trans_handle *btrfs_attach_transaction(struct btrfs_root *root);
struct btrfs_trans_handle *btrfs_attach_transaction_barrier( struct btrfs_trans_handle *btrfs_attach_transaction_barrier(
struct btrfs_root *root); struct btrfs_root *root);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment