Commit 6dec9f40 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-5.9-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs updates from David Sterba:
 "We don't have any big feature updates this time, there are lots of
  small enhacements or fixes. A highlight perhaps is the parallel fsync
  performance improvements, numbers below.

  Regarding the dio/iomap that was reverted last time, the required API
  changes are likely to land in the upcoming cycle, the btrfs part will
  be updated afterwards.

  User visible changes:

   - new mount option rescue= to group all recovery-related mount
     options so we don't have many specific options, currently
     introducing only aliases for existing options, future extensions
     are in development to allow read-only mount with partially damaged
     structures:
      - usebackuproot is an alias for rescue=usebackuproot
      - nologreplay is an alias for rescue=nologreplay

   - start deprecation of mount option inode_cache, removal scheduled to
     v5.11

   - removed deprecated mount options alloc_start and subvolrootid

   - device stats corruption counter gets incremented when a checksum
     mismatch is found

   - qgroup information exported in /sys/fs/btrfs/<UUID>/qgroups/<id>
     using sysfs

   - add link /sys/fs/btrfs/<UUID>/bdi pointing to the associated
     backing dev info

   - FS_INFO ioctl enhancements:
      - add flags to request/describe newly added items
      - new item: numeric checksum type and checksum size
      - new item: generation
      - new item: metadata_uuid

   - seed device: with one new read-write device added, print the new
     device information in /proc/mounts

   - balance: detect cancellation by Ctrl-C in existing cancellation
     points

  Performance improvements:

   - optimized versions of various helpers on little-endian
     architectures, where we don't have to do LE/BE conversion from
     on-disk format

   - tree-log/fsync optimizations leading to lower max latency reported
     by dbench, reduced by about 12%

   - all chunk tree leaves are prefetched at mount time, can improve
     mount time on large (terabyte-sized) filesystems

   - speed up parallel fsync of files with reflinked/deduped extents,
     with jobs 16 to 1024 the throughput gets improved roughly by 50% on
     average and runtime decreased roughly by 30% on average, notable
     outlier is 128 jobs with +121.2% on throughput and -54.6% runtime

   - another speed up of parallel fsync, reduce number of checksum tree
     lookups and contention, the improvements start to show up with 2
     tasks with +20% throughput and -16% runtime up to 64 with +200%
     throughput and -66% runtime

  Core:

   - umount-time qgroup leak checker

   - qgroups
      - add a way to unreserve partial range after failure, avoiding
        some EDQUOT errors
      - improved flushing logic when EDQUOT is hit

   - possible EINTR interruption caused by failed reservations after
     transaction start is better handled and documented

   - transaction abort errors are unified to EROFS in case it's not the
     original reason of abort or we don't have other way to determine
     the reason

  Fixes:

   - make truncate succeed on a NOCOW file even if data space is
     exhausted

   - fix cancelling balance on filesystem with exhausted metadata space

   - anon block device:
      - preallocate anon bdev when subvolume is created to report
        failure early
      - shorten time the anon bdev id is allocated
      - don't allocate anon bdev for internal roots

   - minor memory leak in ref-verify

   - refuse invalid combinations of compression and NOCOW file flags

   - lockdep fixes, updating the device locks

   - remove obsolete fallback logic for block group profile adjustments
     when switching from 1 to more devices, causing allocation of
     unwanted block groups

  Other cleanups, refactoring, simplifications:

   - conversions from struct inode to struct btrfs_inode in internal
     functions

   - removal of unused struct members"

* tag 'for-5.9-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (151 commits)
  btrfs: do not set the full sync flag on the inode during page release
  btrfs: release old extent maps during page release
  btrfs: fix race between page release and a fast fsync
  btrfs: open-code remount flag setting in btrfs_remount
  btrfs: if we're restriping, use the target restripe profile
  btrfs: don't adjust bg flags and use default allocation profiles
  btrfs: fix lockdep splat from btrfs_dump_space_info
  btrfs: move the chunk_mutex in btrfs_read_chunk_tree
  btrfs: open device without device_list_mutex
  btrfs: sysfs: use NOFS for device creation
  btrfs: return EROFS for BTRFS_FS_STATE_ERROR cases
  btrfs: document special case error codes for fs errors
  btrfs: don't WARN if we abort a transaction with EROFS
  btrfs: reduce contention on log trees when logging checksums
  btrfs: remove done label in writepage_delalloc
  btrfs: add comments for btrfs_reserve_flush_enum
  btrfs: relocation: review the call sites which can be interrupted by signal
  btrfs: avoid possible signal interruption of btrfs_drop_snapshot() on relocation tree
  btrfs: relocation: allow signal to cancel balance
  btrfs: raid56: remove out label in __raid56_parity_recover
  ...
parents 92b7e492 5e548b32
This diff is collapsed.
......@@ -114,8 +114,7 @@ struct btrfs_block_group {
/* For block groups in the same raid type */
struct list_head list;
/* Usage count */
atomic_t count;
refcount_t refs;
/*
* List of struct btrfs_free_clusters for this block group.
......
......@@ -151,6 +151,17 @@ struct btrfs_inode {
*/
u64 last_unlink_trans;
/*
* The id/generation of the last transaction where this inode was
* either the source or the destination of a clone/dedupe operation.
* Used when logging an inode to know if there are shared extents that
* need special care when logging checksum items, to avoid duplicate
* checksum items in a log (which can lead to a corruption where we end
* up with missing checksum ranges after log replay).
* Protected by the vfs inode lock.
*/
u64 last_reflink_trans;
/*
* Number of bytes outstanding that are going to need csums. This is
* used in ENOSPC accounting.
......
......@@ -631,10 +631,8 @@ static int btrfsic_process_superblock(struct btrfsic_state *state,
int pass;
selected_super = kzalloc(sizeof(*selected_super), GFP_NOFS);
if (NULL == selected_super) {
pr_info("btrfsic: error, kmalloc failed!\n");
if (!selected_super)
return -ENOMEM;
}
list_for_each_entry(device, dev_head, dev_list) {
int i;
......@@ -795,7 +793,6 @@ static int btrfsic_process_superblock_dev_mirror(
if (NULL == superblock_tmp) {
superblock_tmp = btrfsic_block_alloc();
if (NULL == superblock_tmp) {
pr_info("btrfsic: error, kmalloc failed!\n");
ret = -1;
goto out;
}
......@@ -921,9 +918,7 @@ static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void)
struct btrfsic_stack_frame *sf;
sf = kzalloc(sizeof(*sf), GFP_NOFS);
if (NULL == sf)
pr_info("btrfsic: alloc memory failed!\n");
else
if (sf)
sf->magic = BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER;
return sf;
}
......@@ -1313,7 +1308,6 @@ static int btrfsic_create_link_to_next_block(
if (NULL == l) {
l = btrfsic_block_link_alloc();
if (NULL == l) {
pr_info("btrfsic: error, kmalloc failed!\n");
btrfsic_release_block_ctx(next_block_ctx);
*next_blockp = NULL;
return -1;
......@@ -1470,7 +1464,6 @@ static int btrfsic_handle_extent_data(
mirror_num,
&block_was_created);
if (NULL == next_block) {
pr_info("btrfsic: error, kmalloc failed!\n");
btrfsic_release_block_ctx(&next_block_ctx);
return -1;
}
......@@ -2013,7 +2006,6 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
block = btrfsic_block_alloc();
if (NULL == block) {
pr_info("btrfsic: error, kmalloc failed!\n");
btrfsic_release_block_ctx(&block_ctx);
goto continue_loop;
}
......@@ -2234,7 +2226,6 @@ static int btrfsic_process_written_superblock(
mirror_num,
&was_created);
if (NULL == next_block) {
pr_info("btrfsic: error, kmalloc failed!\n");
btrfsic_release_block_ctx(&tmp_next_block_ctx);
return -1;
}
......@@ -2542,10 +2533,8 @@ static struct btrfsic_block_link *btrfsic_block_link_lookup_or_add(
&state->block_link_hashtable);
if (NULL == l) {
l = btrfsic_block_link_alloc();
if (NULL == l) {
pr_info("btrfsic: error, kmalloc failed!\n");
if (!l)
return NULL;
}
l->block_ref_to = next_block;
l->block_ref_from = from_block;
......@@ -2589,10 +2578,9 @@ static struct btrfsic_block *btrfsic_block_lookup_or_add(
struct btrfsic_dev_state *dev_state;
block = btrfsic_block_alloc();
if (NULL == block) {
pr_info("btrfsic: error, kmalloc failed!\n");
if (!block)
return NULL;
}
dev_state = btrfsic_dev_state_lookup(block_ctx->dev->bdev->bd_dev);
if (NULL == dev_state) {
pr_info("btrfsic: error, lookup dev_state failed!\n");
......@@ -2797,10 +2785,8 @@ int btrfsic_mount(struct btrfs_fs_info *fs_info,
return -1;
}
state = kvzalloc(sizeof(*state), GFP_KERNEL);
if (!state) {
pr_info("btrfs check-integrity: allocation failed!\n");
if (!state)
return -ENOMEM;
}
if (!btrfsic_is_initialized) {
mutex_init(&btrfsic_mutex);
......@@ -2829,7 +2815,6 @@ int btrfsic_mount(struct btrfs_fs_info *fs_info,
ds = btrfsic_dev_state_alloc();
if (NULL == ds) {
pr_info("btrfs check-integrity: kmalloc() failed!\n");
mutex_unlock(&btrfsic_mutex);
return -ENOMEM;
}
......
......@@ -172,18 +172,17 @@ static inline int compressed_bio_size(struct btrfs_fs_info *fs_info,
(DIV_ROUND_UP(disk_size, fs_info->sectorsize)) * csum_size;
}
static int check_compressed_csum(struct btrfs_inode *inode,
struct compressed_bio *cb,
static int check_compressed_csum(struct btrfs_inode *inode, struct bio *bio,
u64 disk_start)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
const u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
int ret;
struct page *page;
unsigned long i;
char *kaddr;
u8 csum[BTRFS_CSUM_SIZE];
struct compressed_bio *cb = bio->bi_private;
u8 *cb_sum = cb->sums;
if (inode->flags & BTRFS_INODE_NODATASUM)
......@@ -201,15 +200,15 @@ static int check_compressed_csum(struct btrfs_inode *inode,
if (memcmp(&csum, cb_sum, csum_size)) {
btrfs_print_data_csum_error(inode, disk_start,
csum, cb_sum, cb->mirror_num);
ret = -EIO;
goto fail;
if (btrfs_io_bio(bio)->device)
btrfs_dev_stat_inc_and_print(
btrfs_io_bio(bio)->device,
BTRFS_DEV_STAT_CORRUPTION_ERRS);
return -EIO;
}
cb_sum += csum_size;
}
ret = 0;
fail:
return ret;
return 0;
}
/* when we finish reading compressed pages from the disk, we
......@@ -244,7 +243,6 @@ static void end_compressed_bio_read(struct bio *bio)
* Record the correct mirror_num in cb->orig_bio so that
* read-repair can work properly.
*/
ASSERT(btrfs_io_bio(cb->orig_bio));
btrfs_io_bio(cb->orig_bio)->mirror_num = mirror;
cb->mirror_num = mirror;
......@@ -256,7 +254,7 @@ static void end_compressed_bio_read(struct bio *bio)
goto csum_failed;
inode = cb->inode;
ret = check_compressed_csum(BTRFS_I(inode), cb,
ret = check_compressed_csum(BTRFS_I(inode), bio,
(u64)bio->bi_iter.bi_sector << 9);
if (ret)
goto csum_failed;
......@@ -405,7 +403,7 @@ static void end_compressed_bio_write(struct bio *bio)
* This also checksums the file bytes and gets things ready for
* the end io hooks.
*/
blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
unsigned long len, u64 disk_start,
unsigned long compressed_len,
struct page **compressed_pages,
......@@ -413,7 +411,7 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
unsigned int write_flags,
struct cgroup_subsys_state *blkcg_css)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct bio *bio = NULL;
struct compressed_bio *cb;
unsigned long bytes_left;
......@@ -421,7 +419,7 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
struct page *page;
u64 first_byte = disk_start;
blk_status_t ret;
int skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
int skip_sum = inode->flags & BTRFS_INODE_NODATASUM;
WARN_ON(!PAGE_ALIGNED(start));
cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
......@@ -429,7 +427,7 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
return BLK_STS_RESOURCE;
refcount_set(&cb->pending_bios, 0);
cb->errors = 0;
cb->inode = inode;
cb->inode = &inode->vfs_inode;
cb->start = start;
cb->len = len;
cb->mirror_num = 0;
......@@ -455,7 +453,7 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
int submit = 0;
page = compressed_pages[pg_index];
page->mapping = inode->i_mapping;
page->mapping = inode->vfs_inode.i_mapping;
if (bio->bi_iter.bi_size)
submit = btrfs_bio_fits_in_stripe(page, PAGE_SIZE, bio,
0);
......
......@@ -8,6 +8,8 @@
#include <linux/sizes.h>
struct btrfs_inode;
/*
* We want to make sure that amount of RAM required to uncompress an extent is
* reasonable, so we limit the total size in ram of a compressed extent to
......@@ -88,7 +90,7 @@ int btrfs_decompress_buf2page(const char *buf, unsigned long buf_start,
unsigned long total_out, u64 disk_start,
struct bio *bio);
blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
unsigned long len, u64 disk_start,
unsigned long compressed_len,
struct page **compressed_pages,
......
......@@ -1501,6 +1501,22 @@ static int close_blocks(u64 blocknr, u64 other, u32 blocksize)
return 0;
}
#ifdef __LITTLE_ENDIAN
/*
* Compare two keys, on little-endian the disk order is same as CPU order and
* we can avoid the conversion.
*/
static int comp_keys(const struct btrfs_disk_key *disk_key,
const struct btrfs_key *k2)
{
const struct btrfs_key *k1 = (const struct btrfs_key *)disk_key;
return btrfs_comp_cpu_keys(k1, k2);
}
#else
/*
* compare two keys in a memcmp fashion
*/
......@@ -1513,6 +1529,7 @@ static int comp_keys(const struct btrfs_disk_key *disk,
return btrfs_comp_cpu_keys(&k1, k2);
}
#endif
/*
* same as comp_keys only with two btrfs_key's
......
This diff is collapsed.
......@@ -237,10 +237,10 @@ int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes)
return 0;
}
int btrfs_check_data_free_space(struct inode *inode,
int btrfs_check_data_free_space(struct btrfs_inode *inode,
struct extent_changeset **reserved, u64 start, u64 len)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_fs_info *fs_info = inode->root->fs_info;
int ret;
/* align the range */
......@@ -248,14 +248,14 @@ int btrfs_check_data_free_space(struct inode *inode,
round_down(start, fs_info->sectorsize);
start = round_down(start, fs_info->sectorsize);
ret = btrfs_alloc_data_chunk_ondemand(BTRFS_I(inode), len);
ret = btrfs_alloc_data_chunk_ondemand(inode, len);
if (ret < 0)
return ret;
/* Use new btrfs_qgroup_reserve_data to reserve precious data space. */
ret = btrfs_qgroup_reserve_data(inode, reserved, start, len);
if (ret < 0)
btrfs_free_reserved_data_space_noquota(inode, start, len);
btrfs_free_reserved_data_space_noquota(fs_info, len);
else
ret = 0;
return ret;
......@@ -269,16 +269,12 @@ int btrfs_check_data_free_space(struct inode *inode,
* which we can't sleep and is sure it won't affect qgroup reserved space.
* Like clear_bit_hook().
*/
void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start,
void btrfs_free_reserved_data_space_noquota(struct btrfs_fs_info *fs_info,
u64 len)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_space_info *data_sinfo;
/* Make sure the range is aligned to sectorsize */
len = round_up(start + len, fs_info->sectorsize) -
round_down(start, fs_info->sectorsize);
start = round_down(start, fs_info->sectorsize);
ASSERT(IS_ALIGNED(len, fs_info->sectorsize));
data_sinfo = fs_info->data_sinfo;
spin_lock(&data_sinfo->lock);
......@@ -293,17 +289,17 @@ void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start,
* This one will handle the per-inode data rsv map for accurate reserved
* space framework.
*/
void btrfs_free_reserved_data_space(struct inode *inode,
void btrfs_free_reserved_data_space(struct btrfs_inode *inode,
struct extent_changeset *reserved, u64 start, u64 len)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_fs_info *fs_info = inode->root->fs_info;
/* Make sure the range is aligned to sectorsize */
len = round_up(start + len, root->fs_info->sectorsize) -
round_down(start, root->fs_info->sectorsize);
start = round_down(start, root->fs_info->sectorsize);
len = round_up(start + len, fs_info->sectorsize) -
round_down(start, fs_info->sectorsize);
start = round_down(start, fs_info->sectorsize);
btrfs_free_reserved_data_space_noquota(inode, start, len);
btrfs_free_reserved_data_space_noquota(fs_info, len);
btrfs_qgroup_free_data(inode, reserved, start, len);
}
......@@ -557,7 +553,7 @@ void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes)
* Return 0 for success
* Return <0 for error(-ENOSPC or -EQUOT)
*/
int btrfs_delalloc_reserve_space(struct inode *inode,
int btrfs_delalloc_reserve_space(struct btrfs_inode *inode,
struct extent_changeset **reserved, u64 start, u64 len)
{
int ret;
......@@ -565,7 +561,7 @@ int btrfs_delalloc_reserve_space(struct inode *inode,
ret = btrfs_check_data_free_space(inode, reserved, start, len);
if (ret < 0)
return ret;
ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), len);
ret = btrfs_delalloc_reserve_metadata(inode, len);
if (ret < 0)
btrfs_free_reserved_data_space(inode, *reserved, start, len);
return ret;
......@@ -583,10 +579,10 @@ int btrfs_delalloc_reserve_space(struct inode *inode,
* list if there are no delalloc bytes left.
* Also it will handle the qgroup reserved space.
*/
void btrfs_delalloc_release_space(struct inode *inode,
void btrfs_delalloc_release_space(struct btrfs_inode *inode,
struct extent_changeset *reserved,
u64 start, u64 len, bool qgroup_free)
{
btrfs_delalloc_release_metadata(BTRFS_I(inode), len, qgroup_free);
btrfs_delalloc_release_metadata(inode, len, qgroup_free);
btrfs_free_reserved_data_space(inode, reserved, start, len);
}
......@@ -6,18 +6,18 @@
struct extent_changeset;
int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes);
int btrfs_check_data_free_space(struct inode *inode,
int btrfs_check_data_free_space(struct btrfs_inode *inode,
struct extent_changeset **reserved, u64 start, u64 len);
void btrfs_free_reserved_data_space(struct inode *inode,
void btrfs_free_reserved_data_space(struct btrfs_inode *inode,
struct extent_changeset *reserved, u64 start, u64 len);
void btrfs_delalloc_release_space(struct inode *inode,
void btrfs_delalloc_release_space(struct btrfs_inode *inode,
struct extent_changeset *reserved,
u64 start, u64 len, bool qgroup_free);
void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start,
void btrfs_free_reserved_data_space_noquota(struct btrfs_fs_info *fs_info,
u64 len);
void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes,
bool qgroup_free);
int btrfs_delalloc_reserve_space(struct inode *inode,
int btrfs_delalloc_reserve_space(struct btrfs_inode *inode,
struct extent_changeset **reserved, u64 start, u64 len);
#endif /* BTRFS_DELALLOC_SPACE_H */
......@@ -1116,6 +1116,7 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
mutex_init(&root->log_mutex);
mutex_init(&root->ordered_extent_mutex);
mutex_init(&root->delalloc_mutex);
init_waitqueue_head(&root->qgroup_flush_wait);
init_waitqueue_head(&root->log_writer_wait);
init_waitqueue_head(&root->log_commit_wait[0]);
init_waitqueue_head(&root->log_commit_wait[1]);
......@@ -1141,10 +1142,6 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
memset(&root->root_key, 0, sizeof(root->root_key));
memset(&root->root_item, 0, sizeof(root->root_item));
memset(&root->defrag_progress, 0, sizeof(root->defrag_progress));
if (!dummy)
root->defrag_trans_start = fs_info->generation;
else
root->defrag_trans_start = 0;
root->root_key.objectid = objectid;
root->anon_dev = 0;
......@@ -1395,7 +1392,12 @@ struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
goto out;
}
static int btrfs_init_fs_root(struct btrfs_root *root)
/*
* Initialize subvolume root in-memory structure
*
* @anon_dev: anonymous device to attach to the root, if zero, allocate new
*/
static int btrfs_init_fs_root(struct btrfs_root *root, dev_t anon_dev)
{
int ret;
unsigned int nofs_flag;
......@@ -1428,9 +1430,20 @@ static int btrfs_init_fs_root(struct btrfs_root *root)
spin_lock_init(&root->ino_cache_lock);
init_waitqueue_head(&root->ino_cache_wait);
ret = get_anon_bdev(&root->anon_dev);
if (ret)
goto fail;
/*
* Don't assign anonymous block device to roots that are not exposed to
* userspace, the id pool is limited to 1M
*/
if (is_fstree(root->root_key.objectid) &&
btrfs_root_refs(&root->root_item) > 0) {
if (!anon_dev) {
ret = get_anon_bdev(&root->anon_dev);
if (ret)
goto fail;
} else {
root->anon_dev = anon_dev;
}
}
mutex_lock(&root->objectid_mutex);
ret = btrfs_find_highest_objectid(root,
......@@ -1534,8 +1547,27 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
}
struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
u64 objectid, bool check_ref)
/*
* Get an in-memory reference of a root structure.
*
* For essential trees like root/extent tree, we grab it from fs_info directly.
* For subvolume trees, we check the cached filesystem roots first. If not
* found, then read it from disk and add it to cached fs roots.
*
* Caller should release the root by calling btrfs_put_root() after the usage.
*
* NOTE: Reloc and log trees can't be read by this function as they share the
* same root objectid.
*
* @objectid: root id
* @anon_dev: preallocated anonymous block device number for new roots,
* pass 0 for new allocation.
* @check_ref: whether to check root item references, If true, return -ENOENT
* for orphan roots
*/
static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
u64 objectid, dev_t anon_dev,
bool check_ref)
{
struct btrfs_root *root;
struct btrfs_path *path;
......@@ -1564,6 +1596,8 @@ struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
again:
root = btrfs_lookup_fs_root(fs_info, objectid);
if (root) {
/* Shouldn't get preallocated anon_dev for cached roots */
ASSERT(!anon_dev);
if (check_ref && btrfs_root_refs(&root->root_item) == 0) {
btrfs_put_root(root);
return ERR_PTR(-ENOENT);
......@@ -1583,7 +1617,7 @@ struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
goto fail;
}
ret = btrfs_init_fs_root(root);
ret = btrfs_init_fs_root(root, anon_dev);
if (ret)
goto fail;
......@@ -1616,6 +1650,33 @@ struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
return ERR_PTR(ret);
}
/*
* Get in-memory reference of a root structure
*
* @objectid: tree objectid
* @check_ref: if set, verify that the tree exists and the item has at least
* one reference
*/
struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
u64 objectid, bool check_ref)
{
return btrfs_get_root_ref(fs_info, objectid, 0, check_ref);
}
/*
* Get in-memory reference of a root structure, created as new, optionally pass
* the anonymous block device id
*
* @objectid: tree objectid
* @anon_dev: if zero, allocate a new anonymous block device or use the
* parameter value
*/
struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info,
u64 objectid, dev_t anon_dev)
{
return btrfs_get_root_ref(fs_info, objectid, anon_dev, true);
}
static int btrfs_congested_fn(void *congested_data, int bdi_bits)
{
struct btrfs_fs_info *info = (struct btrfs_fs_info *)congested_data;
......@@ -1749,7 +1810,6 @@ static int transaction_kthread(void *arg)
now = ktime_get_seconds();
if (cur->state < TRANS_STATE_COMMIT_START &&
!test_bit(BTRFS_FS_NEED_ASYNC_COMMIT, &fs_info->flags) &&
(now < cur->start_time ||
now - cur->start_time < fs_info->commit_interval)) {
spin_unlock(&fs_info->trans_lock);
......@@ -2001,8 +2061,7 @@ void btrfs_put_root(struct btrfs_root *root)
if (root->anon_dev)
free_anon_bdev(root->anon_dev);
btrfs_drew_lock_destroy(&root->snapshot_lock);
free_extent_buffer(root->node);
free_extent_buffer(root->commit_root);
free_root_extent_buffers(root);
kfree(root->free_ino_ctl);
kfree(root->free_ino_pinned);
#ifdef CONFIG_BTRFS_DEBUG
......@@ -4058,6 +4117,11 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
ASSERT(list_empty(&fs_info->delayed_iputs));
set_bit(BTRFS_FS_CLOSING_DONE, &fs_info->flags);
if (btrfs_check_quota_leak(fs_info)) {
WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
btrfs_err(fs_info, "qgroup reserved space leaked");
}
btrfs_free_qgroup_config(fs_info);
ASSERT(list_empty(&fs_info->delalloc_roots));
......
......@@ -67,6 +67,8 @@ void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info);
struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
u64 objectid, bool check_ref);
struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info,
u64 objectid, dev_t anon_dev);
void btrfs_free_fs_info(struct btrfs_fs_info *fs_info);
int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info);
......
......@@ -233,14 +233,11 @@ bool btrfs_find_delalloc_range(struct extent_io_tree *tree, u64 *start,
struct extent_state **cached_state);
/* This should be reworked in the future and put elsewhere. */
int get_state_failrec(struct extent_io_tree *tree, u64 start,
struct io_failure_record **failrec);
struct io_failure_record *get_state_failrec(struct extent_io_tree *tree, u64 start);
int set_state_failrec(struct extent_io_tree *tree, u64 start,
struct io_failure_record *failrec);
void btrfs_free_io_failure_record(struct btrfs_inode *inode, u64 start,
u64 end);
int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
struct io_failure_record **failrec_ret);
int free_io_failure(struct extent_io_tree *failure_tree,
struct extent_io_tree *io_tree,
struct io_failure_record *rec);
......
......@@ -5298,7 +5298,14 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
goto out;
}
trans = btrfs_start_transaction(tree_root, 0);
/*
* Use join to avoid potential EINTR from transaction start. See
* wait_reserve_ticket and the whole reservation callchain.
*/
if (for_reloc)
trans = btrfs_join_transaction(tree_root);
else
trans = btrfs_start_transaction(tree_root, 0);
if (IS_ERR(trans)) {
err = PTR_ERR(trans);
goto out_free;
......@@ -5466,6 +5473,14 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
}
}
/*
* This subvolume is going to be completely dropped, and won't be
* recorded as dirty roots, thus pertrans meta rsv will not be freed at
* commit transaction time. So free it here manually.
*/
btrfs_qgroup_convert_reserved_meta(root, INT_MAX);
btrfs_qgroup_free_meta_all_pertrans(root);
if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state))
btrfs_add_dropped_root(trans, root);
else
......
This diff is collapsed.
......@@ -204,7 +204,7 @@ int btree_write_cache_pages(struct address_space *mapping,
struct writeback_control *wbc);
void extent_readahead(struct readahead_control *rac);
int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len);
u64 start, u64 len);
void set_page_extent_mapped(struct page *page);
struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
......@@ -277,7 +277,7 @@ void clear_extent_buffer_uptodate(struct extent_buffer *eb);
int extent_buffer_under_io(const struct extent_buffer *eb);
void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end);
void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end);
void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
void extent_clear_unlock_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
struct page *locked_page,
unsigned bits_to_clear,
unsigned long page_ops);
......
......@@ -522,10 +522,10 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
* means this bio can contains potentially discontigous bio vecs
* so the logical offset of each should be calculated separately.
*/
blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio,
u64 file_start, int contig)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_fs_info *fs_info = inode->root->fs_info;
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
struct btrfs_ordered_sum *sums;
struct btrfs_ordered_extent *ordered = NULL;
......
This diff is collapsed.
......@@ -1334,8 +1334,9 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
io_ctl_zero_remaining_pages(io_ctl);
/* Everything is written out, now we dirty the pages in the file. */
ret = btrfs_dirty_pages(inode, io_ctl->pages, io_ctl->num_pages, 0,
i_size_read(inode), &cached_state);
ret = btrfs_dirty_pages(BTRFS_I(inode), io_ctl->pages,
io_ctl->num_pages, 0, i_size_read(inode),
&cached_state);
if (ret)
goto out_nospc;
......@@ -2703,8 +2704,7 @@ void btrfs_init_free_space_ctl(struct btrfs_block_group *block_group)
* pointed to by the cluster, someone else raced in and freed the
* cluster already. In that case, we just return without changing anything
*/
static int
__btrfs_return_cluster_to_free_space(
static void __btrfs_return_cluster_to_free_space(
struct btrfs_block_group *block_group,
struct btrfs_free_cluster *cluster)
{
......@@ -2756,7 +2756,6 @@ __btrfs_return_cluster_to_free_space(
out:
spin_unlock(&cluster->lock);
btrfs_put_block_group(block_group);
return 0;
}
static void __btrfs_remove_free_space_cache_locked(
......@@ -2907,12 +2906,11 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group *block_group,
* Otherwise, it'll get a reference on the block group pointed to by the
* cluster and remove the cluster from it.
*/
int btrfs_return_cluster_to_free_space(
void btrfs_return_cluster_to_free_space(
struct btrfs_block_group *block_group,
struct btrfs_free_cluster *cluster)
{
struct btrfs_free_space_ctl *ctl;
int ret;
/* first, get a safe pointer to the block group */
spin_lock(&cluster->lock);
......@@ -2920,28 +2918,27 @@ int btrfs_return_cluster_to_free_space(
block_group = cluster->block_group;
if (!block_group) {
spin_unlock(&cluster->lock);
return 0;
return;
}
} else if (cluster->block_group != block_group) {
/* someone else has already freed it don't redo their work */
spin_unlock(&cluster->lock);
return 0;
return;
}
atomic_inc(&block_group->count);
btrfs_get_block_group(block_group);
spin_unlock(&cluster->lock);
ctl = block_group->free_space_ctl;
/* now return any extents the cluster had on it */
spin_lock(&ctl->tree_lock);
ret = __btrfs_return_cluster_to_free_space(block_group, cluster);
__btrfs_return_cluster_to_free_space(block_group, cluster);
spin_unlock(&ctl->tree_lock);
btrfs_discard_queue_work(&block_group->fs_info->discard_ctl, block_group);
/* finally drop our ref */
btrfs_put_block_group(block_group);
return ret;
}
static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group *block_group,
......@@ -3358,7 +3355,7 @@ int btrfs_find_space_cluster(struct btrfs_block_group *block_group,
list_del_init(&entry->list);
if (!ret) {
atomic_inc(&block_group->count);
btrfs_get_block_group(block_group);
list_add_tail(&cluster->block_group_list,
&block_group->cluster_list);
cluster->block_group = block_group;
......
......@@ -136,7 +136,7 @@ void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster);
u64 btrfs_alloc_from_cluster(struct btrfs_block_group *block_group,
struct btrfs_free_cluster *cluster, u64 bytes,
u64 min_start, u64 *max_extent_size);
int btrfs_return_cluster_to_free_space(
void btrfs_return_cluster_to_free_space(
struct btrfs_block_group *block_group,
struct btrfs_free_cluster *cluster);
int btrfs_trim_block_group(struct btrfs_block_group *block_group,
......
......@@ -495,7 +495,8 @@ int btrfs_save_ino_cache(struct btrfs_root *root,
/* Just to make sure we have enough space */
prealloc += 8 * PAGE_SIZE;
ret = btrfs_delalloc_reserve_space(inode, &data_reserved, 0, prealloc);
ret = btrfs_delalloc_reserve_space(BTRFS_I(inode), &data_reserved, 0,
prealloc);
if (ret)
goto out_put;
......
This diff is collapsed.
......@@ -164,8 +164,11 @@ static int btrfs_ioctl_getflags(struct file *file, void __user *arg)
return 0;
}
/* Check if @flags are a supported and valid set of FS_*_FL flags */
static int check_fsflags(unsigned int flags)
/*
* Check if @flags are a supported and valid set of FS_*_FL flags and that
* the old and new flags are not conflicting
*/
static int check_fsflags(unsigned int old_flags, unsigned int flags)
{
if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \
FS_NOATIME_FL | FS_NODUMP_FL | \
......@@ -174,9 +177,19 @@ static int check_fsflags(unsigned int flags)
FS_NOCOW_FL))
return -EOPNOTSUPP;
/* COMPR and NOCOMP on new/old are valid */
if ((flags & FS_NOCOMP_FL) && (flags & FS_COMPR_FL))
return -EINVAL;
if ((flags & FS_COMPR_FL) && (flags & FS_NOCOW_FL))
return -EINVAL;
/* NOCOW and compression options are mutually exclusive */
if ((old_flags & FS_NOCOW_FL) && (flags & (FS_COMPR_FL | FS_NOCOMP_FL)))
return -EINVAL;
if ((flags & FS_NOCOW_FL) && (old_flags & (FS_COMPR_FL | FS_NOCOMP_FL)))
return -EINVAL;
return 0;
}
......@@ -190,7 +203,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
unsigned int fsflags, old_fsflags;
int ret;
const char *comp = NULL;
u32 binode_flags = binode->flags;
u32 binode_flags;
if (!inode_owner_or_capable(inode))
return -EPERM;
......@@ -201,22 +214,23 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
if (copy_from_user(&fsflags, arg, sizeof(fsflags)))
return -EFAULT;
ret = check_fsflags(fsflags);
if (ret)
return ret;
ret = mnt_want_write_file(file);
if (ret)
return ret;
inode_lock(inode);
fsflags = btrfs_mask_fsflags_for_type(inode, fsflags);
old_fsflags = btrfs_inode_flags_to_fsflags(binode->flags);
ret = vfs_ioc_setflags_prepare(inode, old_fsflags, fsflags);
if (ret)
goto out_unlock;
ret = check_fsflags(old_fsflags, fsflags);
if (ret)
goto out_unlock;
binode_flags = binode->flags;
if (fsflags & FS_SYNC_FL)
binode_flags |= BTRFS_INODE_SYNC;
else
......@@ -566,6 +580,7 @@ static noinline int create_subvol(struct inode *dir,
struct inode *inode;
int ret;
int err;
dev_t anon_dev = 0;
u64 objectid;
u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
u64 index = 0;
......@@ -578,6 +593,10 @@ static noinline int create_subvol(struct inode *dir,
if (ret)
goto fail_free;
ret = get_anon_bdev(&anon_dev);
if (ret < 0)
goto fail_free;
/*
* Don't create subvolume whose level is not zero. Or qgroup will be
* screwed up since it assumes subvolume qgroup's level to be 0.
......@@ -660,12 +679,15 @@ static noinline int create_subvol(struct inode *dir,
goto fail;
key.offset = (u64)-1;
new_root = btrfs_get_fs_root(fs_info, objectid, true);
new_root = btrfs_get_new_fs_root(fs_info, objectid, anon_dev);
if (IS_ERR(new_root)) {
free_anon_bdev(anon_dev);
ret = PTR_ERR(new_root);
btrfs_abort_transaction(trans, ret);
goto fail;
}
/* Freeing will be done in btrfs_put_root() of new_root */
anon_dev = 0;
btrfs_record_root_in_trans(trans, new_root);
......@@ -735,6 +757,8 @@ static noinline int create_subvol(struct inode *dir,
return ret;
fail_free:
if (anon_dev)
free_anon_bdev(anon_dev);
kfree(root_item);
return ret;
}
......@@ -762,6 +786,9 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
if (!pending_snapshot)
return -ENOMEM;
ret = get_anon_bdev(&pending_snapshot->anon_dev);
if (ret < 0)
goto free_pending;
pending_snapshot->root_item = kzalloc(sizeof(struct btrfs_root_item),
GFP_KERNEL);
pending_snapshot->path = btrfs_alloc_path();
......@@ -823,10 +850,16 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
d_instantiate(dentry, inode);
ret = 0;
pending_snapshot->anon_dev = 0;
fail:
/* Prevent double freeing of anon_dev */
if (ret && pending_snapshot->snap)
pending_snapshot->snap->anon_dev = 0;
btrfs_put_root(pending_snapshot->snap);
btrfs_subvolume_release_metadata(fs_info, &pending_snapshot->block_rsv);
free_pending:
if (pending_snapshot->anon_dev)
free_anon_bdev(pending_snapshot->anon_dev);
kfree(pending_snapshot->root_item);
btrfs_free_path(pending_snapshot->path);
kfree(pending_snapshot);
......@@ -1243,7 +1276,7 @@ static int cluster_pages_for_defrag(struct inode *inode,
page_cnt = min_t(u64, (u64)num_pages, (u64)file_end - start_index + 1);
ret = btrfs_delalloc_reserve_space(inode, &data_reserved,
ret = btrfs_delalloc_reserve_space(BTRFS_I(inode), &data_reserved,
start_index << PAGE_SHIFT,
page_cnt << PAGE_SHIFT);
if (ret)
......@@ -1265,7 +1298,7 @@ static int cluster_pages_for_defrag(struct inode *inode,
while (1) {
lock_extent_bits(tree, page_start, page_end,
&cached_state);
ordered = btrfs_lookup_ordered_extent(inode,
ordered = btrfs_lookup_ordered_extent(BTRFS_I(inode),
page_start);
unlock_extent_cached(tree, page_start, page_end,
&cached_state);
......@@ -1333,7 +1366,7 @@ static int cluster_pages_for_defrag(struct inode *inode,
spin_lock(&BTRFS_I(inode)->lock);
btrfs_mod_outstanding_extents(BTRFS_I(inode), 1);
spin_unlock(&BTRFS_I(inode)->lock);
btrfs_delalloc_release_space(inode, data_reserved,
btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved,
start_index << PAGE_SHIFT,
(page_cnt - i_done) << PAGE_SHIFT, true);
}
......@@ -1361,7 +1394,7 @@ static int cluster_pages_for_defrag(struct inode *inode,
unlock_page(pages[i]);
put_page(pages[i]);
}
btrfs_delalloc_release_space(inode, data_reserved,
btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved,
start_index << PAGE_SHIFT,
page_cnt << PAGE_SHIFT, true);
btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT);
......@@ -3198,11 +3231,15 @@ static long btrfs_ioctl_fs_info(struct btrfs_fs_info *fs_info,
struct btrfs_ioctl_fs_info_args *fi_args;
struct btrfs_device *device;
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
u64 flags_in;
int ret = 0;
fi_args = kzalloc(sizeof(*fi_args), GFP_KERNEL);
if (!fi_args)
return -ENOMEM;
fi_args = memdup_user(arg, sizeof(*fi_args));
if (IS_ERR(fi_args))
return PTR_ERR(fi_args);
flags_in = fi_args->flags;
memset(fi_args, 0, sizeof(*fi_args));
rcu_read_lock();
fi_args->num_devices = fs_devices->num_devices;
......@@ -3218,6 +3255,23 @@ static long btrfs_ioctl_fs_info(struct btrfs_fs_info *fs_info,
fi_args->sectorsize = fs_info->sectorsize;
fi_args->clone_alignment = fs_info->sectorsize;
if (flags_in & BTRFS_FS_INFO_FLAG_CSUM_INFO) {
fi_args->csum_type = btrfs_super_csum_type(fs_info->super_copy);
fi_args->csum_size = btrfs_super_csum_size(fs_info->super_copy);
fi_args->flags |= BTRFS_FS_INFO_FLAG_CSUM_INFO;
}
if (flags_in & BTRFS_FS_INFO_FLAG_GENERATION) {
fi_args->generation = fs_info->generation;
fi_args->flags |= BTRFS_FS_INFO_FLAG_GENERATION;
}
if (flags_in & BTRFS_FS_INFO_FLAG_METADATA_UUID) {
memcpy(&fi_args->metadata_uuid, fs_devices->metadata_uuid,
sizeof(fi_args->metadata_uuid));
fi_args->flags |= BTRFS_FS_INFO_FLAG_METADATA_UUID;
}
if (copy_to_user(arg, fi_args, sizeof(*fi_args)))
ret = -EFAULT;
......
......@@ -15,6 +15,7 @@
#include "disk-io.h"
#include "compression.h"
#include "delalloc-space.h"
#include "qgroup.h"
static struct kmem_cache *btrfs_ordered_extent_cache;
......@@ -152,23 +153,39 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,
return ret;
}
/* allocate and add a new ordered_extent into the per-inode tree.
/*
* Allocate and add a new ordered_extent into the per-inode tree.
*
* The tree is given a single reference on the ordered extent that was
* inserted.
*/
static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
static int __btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset,
u64 disk_bytenr, u64 num_bytes,
u64 disk_num_bytes, int type, int dio,
int compress_type)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_ordered_inode_tree *tree;
struct btrfs_root *root = inode->root;
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree;
struct rb_node *node;
struct btrfs_ordered_extent *entry;
int ret;
tree = &BTRFS_I(inode)->ordered_tree;
if (type == BTRFS_ORDERED_NOCOW || type == BTRFS_ORDERED_PREALLOC) {
/* For nocow write, we can release the qgroup rsv right now */
ret = btrfs_qgroup_free_data(inode, NULL, file_offset, num_bytes);
if (ret < 0)
return ret;
ret = 0;
} else {
/*
* The ordered extent has reserved qgroup space, release now
* and pass the reserved number for qgroup_record to free.
*/
ret = btrfs_qgroup_release_data(inode, file_offset, num_bytes);
if (ret < 0)
return ret;
}
entry = kmem_cache_zalloc(btrfs_ordered_extent_cache, GFP_NOFS);
if (!entry)
return -ENOMEM;
......@@ -178,9 +195,10 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
entry->num_bytes = num_bytes;
entry->disk_num_bytes = disk_num_bytes;
entry->bytes_left = num_bytes;
entry->inode = igrab(inode);
entry->inode = igrab(&inode->vfs_inode);
entry->compress_type = compress_type;
entry->truncated_len = (u64)-1;
entry->qgroup_rsv = ret;
if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE)
set_bit(type, &entry->flags);
......@@ -197,10 +215,8 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
INIT_LIST_HEAD(&entry->root_extent_list);
INIT_LIST_HEAD(&entry->work_list);
init_completion(&entry->completion);
INIT_LIST_HEAD(&entry->log_list);
INIT_LIST_HEAD(&entry->trans_list);
trace_btrfs_ordered_extent_add(inode, entry);
trace_btrfs_ordered_extent_add(&inode->vfs_inode, entry);
spin_lock_irq(&tree->lock);
node = tree_insert(&tree->tree, file_offset,
......@@ -228,14 +244,14 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
* that work has been done at higher layers, so this is truly the
* smallest the extent is going to get.
*/
spin_lock(&BTRFS_I(inode)->lock);
btrfs_mod_outstanding_extents(BTRFS_I(inode), 1);
spin_unlock(&BTRFS_I(inode)->lock);
spin_lock(&inode->lock);
btrfs_mod_outstanding_extents(inode, 1);
spin_unlock(&inode->lock);
return 0;
}
int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
int btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset,
u64 disk_bytenr, u64 num_bytes, u64 disk_num_bytes,
int type)
{
......@@ -244,7 +260,7 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
BTRFS_COMPRESS_NONE);
}
int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset,
int btrfs_add_ordered_extent_dio(struct btrfs_inode *inode, u64 file_offset,
u64 disk_bytenr, u64 num_bytes,
u64 disk_num_bytes, int type)
{
......@@ -253,7 +269,7 @@ int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset,
BTRFS_COMPRESS_NONE);
}
int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset,
int btrfs_add_ordered_extent_compress(struct btrfs_inode *inode, u64 file_offset,
u64 disk_bytenr, u64 num_bytes,
u64 disk_num_bytes, int type,
int compress_type)
......@@ -291,12 +307,12 @@ void btrfs_add_ordered_sum(struct btrfs_ordered_extent *entry,
* file_offset is updated to one byte past the range that is recorded as
* complete. This allows you to walk forward in the file.
*/
int btrfs_dec_test_first_ordered_pending(struct inode *inode,
int btrfs_dec_test_first_ordered_pending(struct btrfs_inode *inode,
struct btrfs_ordered_extent **cached,
u64 *file_offset, u64 io_size, int uptodate)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_ordered_inode_tree *tree;
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree;
struct rb_node *node;
struct btrfs_ordered_extent *entry = NULL;
int ret;
......@@ -305,7 +321,6 @@ int btrfs_dec_test_first_ordered_pending(struct inode *inode,
u64 dec_start;
u64 to_dec;
tree = &BTRFS_I(inode)->ordered_tree;
spin_lock_irqsave(&tree->lock, flags);
node = tree_search(tree, *file_offset);
if (!node) {
......@@ -429,8 +444,6 @@ void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
trace_btrfs_ordered_extent_put(entry->inode, entry);
if (refcount_dec_and_test(&entry->refs)) {
ASSERT(list_empty(&entry->log_list));
ASSERT(list_empty(&entry->trans_list));
ASSERT(list_empty(&entry->root_extent_list));
ASSERT(RB_EMPTY_NODE(&entry->rb_node));
if (entry->inode)
......@@ -698,14 +711,14 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
* find an ordered extent corresponding to file_offset. return NULL if
* nothing is found, otherwise take a reference on the extent and return it
*/
struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct btrfs_inode *inode,
u64 file_offset)
{
struct btrfs_ordered_inode_tree *tree;
struct rb_node *node;
struct btrfs_ordered_extent *entry = NULL;
tree = &BTRFS_I(inode)->ordered_tree;
tree = &inode->ordered_tree;
spin_lock_irq(&tree->lock);
node = tree_search(tree, file_offset);
if (!node)
......@@ -803,7 +816,7 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
const u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
int index = 0;
ordered = btrfs_lookup_ordered_extent(inode, offset);
ordered = btrfs_lookup_ordered_extent(BTRFS_I(inode), offset);
if (!ordered)
return 0;
......
......@@ -92,6 +92,9 @@ struct btrfs_ordered_extent {
/* compression algorithm */
int compress_type;
/* Qgroup reserved space */
int qgroup_rsv;
/* reference count */
refcount_t refs;
......@@ -101,12 +104,6 @@ struct btrfs_ordered_extent {
/* list of checksums for insertion when the extent io is done */
struct list_head list;
/* If we need to wait on this to be done */
struct list_head log_list;
/* If the transaction needs to wait on this ordered extent */
struct list_head trans_list;
/* used to wait for the BTRFS_ORDERED_COMPLETE bit */
wait_queue_head_t wait;
......@@ -150,23 +147,23 @@ void btrfs_remove_ordered_extent(struct inode *inode,
int btrfs_dec_test_ordered_pending(struct inode *inode,
struct btrfs_ordered_extent **cached,
u64 file_offset, u64 io_size, int uptodate);
int btrfs_dec_test_first_ordered_pending(struct inode *inode,
int btrfs_dec_test_first_ordered_pending(struct btrfs_inode *inode,
struct btrfs_ordered_extent **cached,
u64 *file_offset, u64 io_size,
int uptodate);
int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
int btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset,
u64 disk_bytenr, u64 num_bytes, u64 disk_num_bytes,
int type);
int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset,
int btrfs_add_ordered_extent_dio(struct btrfs_inode *inode, u64 file_offset,
u64 disk_bytenr, u64 num_bytes,
u64 disk_num_bytes, int type);
int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset,
int btrfs_add_ordered_extent_compress(struct btrfs_inode *inode, u64 file_offset,
u64 disk_bytenr, u64 num_bytes,
u64 disk_num_bytes, int type,
int compress_type);
void btrfs_add_ordered_sum(struct btrfs_ordered_extent *entry,
struct btrfs_ordered_sum *sum);
struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct btrfs_inode *inode,
u64 file_offset);
void btrfs_start_ordered_extent(struct inode *inode,
struct btrfs_ordered_extent *entry, int wait);
......
This diff is collapsed.
......@@ -8,6 +8,7 @@
#include <linux/spinlock.h>
#include <linux/rbtree.h>
#include <linux/kobject.h>
#include "ulist.h"
#include "delayed-ref.h"
......@@ -223,8 +224,18 @@ struct btrfs_qgroup {
*/
u64 old_refcnt;
u64 new_refcnt;
/*
* Sysfs kobjectid
*/
struct kobject kobj;
};
static inline u64 btrfs_qgroup_subvolid(u64 qgroupid)
{
return (qgroupid & ((1ULL << BTRFS_QGROUP_LEVEL_SHIFT) - 1));
}
/*
* For qgroup event trace points only
*/
......@@ -344,12 +355,12 @@ int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid,
#endif
/* New io_tree based accurate qgroup reserve API */
int btrfs_qgroup_reserve_data(struct inode *inode,
int btrfs_qgroup_reserve_data(struct btrfs_inode *inode,
struct extent_changeset **reserved, u64 start, u64 len);
int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len);
int btrfs_qgroup_free_data(struct inode *inode,
struct extent_changeset *reserved, u64 start, u64 len);
int btrfs_qgroup_release_data(struct btrfs_inode *inode, u64 start, u64 len);
int btrfs_qgroup_free_data(struct btrfs_inode *inode,
struct extent_changeset *reserved, u64 start,
u64 len);
int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
enum btrfs_qgroup_rsv_type type, bool enforce);
/* Reserve metadata space for pertrans and prealloc type */
......@@ -399,7 +410,7 @@ void btrfs_qgroup_free_meta_all_pertrans(struct btrfs_root *root);
*/
void btrfs_qgroup_convert_reserved_meta(struct btrfs_root *root, int num_bytes);
void btrfs_qgroup_check_reserved_leak(struct inode *inode);
void btrfs_qgroup_check_reserved_leak(struct btrfs_inode *inode);
/* btrfs_qgroup_swapped_blocks related functions */
void btrfs_qgroup_init_swapped_blocks(
......@@ -415,5 +426,6 @@ int btrfs_qgroup_add_swapped_blocks(struct btrfs_trans_handle *trans,
int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct extent_buffer *eb);
void btrfs_qgroup_destroy_extent_records(struct btrfs_transaction *trans);
bool btrfs_check_quota_leak(struct btrfs_fs_info *fs_info);
#endif
......@@ -1083,7 +1083,6 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
unsigned long bio_max_len)
{
struct bio *last = bio_list->tail;
u64 last_end = 0;
int ret;
struct bio *bio;
struct btrfs_bio_stripe *stripe;
......@@ -1098,15 +1097,14 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
/* see if we can add this page onto our existing bio */
if (last) {
last_end = (u64)last->bi_iter.bi_sector << 9;
u64 last_end = (u64)last->bi_iter.bi_sector << 9;
last_end += last->bi_iter.bi_size;
/*
* we can't merge these if they are from different
* devices or if they are not contiguous
*/
if (last_end == disk_start && stripe->dev->bdev &&
!last->bi_status &&
if (last_end == disk_start && !last->bi_status &&
last->bi_disk == stripe->dev->bdev->bd_disk &&
last->bi_partno == stripe->dev->bdev->bd_partno) {
ret = bio_add_page(last, page, PAGE_SIZE, 0);
......@@ -1117,6 +1115,7 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
/* put a new bio on the list */
bio = btrfs_io_bio_alloc(bio_max_len >> PAGE_SHIFT ?: 1);
btrfs_io_bio(bio)->device = stripe->dev;
bio->bi_iter.bi_size = 0;
bio_set_dev(bio, stripe->dev->bdev);
bio->bi_iter.bi_sector = disk_start >> 9;
......@@ -1325,11 +1324,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
atomic_set(&rbio->stripes_pending, bio_list_size(&bio_list));
BUG_ON(atomic_read(&rbio->stripes_pending) == 0);
while (1) {
bio = bio_list_pop(&bio_list);
if (!bio)
break;
while ((bio = bio_list_pop(&bio_list))) {
bio->bi_private = rbio;
bio->bi_end_io = raid_write_end_io;
bio->bi_opf = REQ_OP_WRITE;
......@@ -1354,7 +1349,6 @@ static int find_bio_stripe(struct btrfs_raid_bio *rbio,
struct bio *bio)
{
u64 physical = bio->bi_iter.bi_sector;
u64 stripe_start;
int i;
struct btrfs_bio_stripe *stripe;
......@@ -1362,9 +1356,7 @@ static int find_bio_stripe(struct btrfs_raid_bio *rbio,
for (i = 0; i < rbio->bbio->num_stripes; i++) {
stripe = &rbio->bbio->stripes[i];
stripe_start = stripe->physical;
if (physical >= stripe_start &&
physical < stripe_start + rbio->stripe_len &&
if (in_range(physical, stripe->physical, rbio->stripe_len) &&
stripe->dev->bdev &&
bio->bi_disk == stripe->dev->bdev->bd_disk &&
bio->bi_partno == stripe->dev->bdev->bd_partno) {
......@@ -1382,18 +1374,14 @@ static int find_bio_stripe(struct btrfs_raid_bio *rbio,
static int find_logical_bio_stripe(struct btrfs_raid_bio *rbio,
struct bio *bio)
{
u64 logical = bio->bi_iter.bi_sector;
u64 stripe_start;
u64 logical = (u64)bio->bi_iter.bi_sector << 9;
int i;
logical <<= 9;
for (i = 0; i < rbio->nr_data; i++) {
stripe_start = rbio->bbio->raid_map[i];
if (logical >= stripe_start &&
logical < stripe_start + rbio->stripe_len) {
u64 stripe_start = rbio->bbio->raid_map[i];
if (in_range(logical, stripe_start, rbio->stripe_len))
return i;
}
}
return -1;
}
......@@ -1567,11 +1555,7 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
* not to touch it after that
*/
atomic_set(&rbio->stripes_pending, bios_to_read);
while (1) {
bio = bio_list_pop(&bio_list);
if (!bio)
break;
while ((bio = bio_list_pop(&bio_list))) {
bio->bi_private = rbio;
bio->bi_end_io = raid_rmw_end_io;
bio->bi_opf = REQ_OP_READ;
......@@ -1878,11 +1862,8 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
}
/* make sure our ps and qs are in order */
if (faila > failb) {
int tmp = failb;
failb = faila;
faila = tmp;
}
if (faila > failb)
swap(faila, failb);
/* if the q stripe is failed, do a pstripe reconstruction
* from the xors.
......@@ -2102,7 +2083,7 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
*/
if (atomic_read(&rbio->error) <= rbio->bbio->max_errors) {
__raid_recover_end_io(rbio);
goto out;
return 0;
} else {
goto cleanup;
}
......@@ -2113,11 +2094,7 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
* not to touch it after that
*/
atomic_set(&rbio->stripes_pending, bios_to_read);
while (1) {
bio = bio_list_pop(&bio_list);
if (!bio)
break;
while ((bio = bio_list_pop(&bio_list))) {
bio->bi_private = rbio;
bio->bi_end_io = raid_recover_end_io;
bio->bi_opf = REQ_OP_READ;
......@@ -2126,7 +2103,7 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
submit_bio(bio);
}
out:
return 0;
cleanup:
......@@ -2482,11 +2459,7 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
atomic_set(&rbio->stripes_pending, nr_data);
while (1) {
bio = bio_list_pop(&bio_list);
if (!bio)
break;
while ((bio = bio_list_pop(&bio_list))) {
bio->bi_private = rbio;
bio->bi_end_io = raid_write_end_io;
bio->bi_opf = REQ_OP_WRITE;
......@@ -2664,11 +2637,7 @@ static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio)
* not to touch it after that
*/
atomic_set(&rbio->stripes_pending, bios_to_read);
while (1) {
bio = bio_list_pop(&bio_list);
if (!bio)
break;
while ((bio = bio_list_pop(&bio_list))) {
bio->bi_private = rbio;
bio->bi_end_io = raid56_parity_scrub_end_io;
bio->bi_opf = REQ_OP_READ;
......
......@@ -286,6 +286,8 @@ static struct block_entry *add_block_entry(struct btrfs_fs_info *fs_info,
exist_re = insert_root_entry(&exist->roots, re);
if (exist_re)
kfree(re);
} else {
kfree(re);
}
kfree(be);
return exist;
......
......@@ -68,8 +68,8 @@ static int copy_inline_to_page(struct inode *inode,
* reservation here. Also we must not do the reservation while holding
* a transaction open, otherwise we would deadlock.
*/
ret = btrfs_delalloc_reserve_space(inode, &data_reserved, file_offset,
block_size);
ret = btrfs_delalloc_reserve_space(BTRFS_I(inode), &data_reserved,
file_offset, block_size);
if (ret)
goto out;
......@@ -84,7 +84,8 @@ static int copy_inline_to_page(struct inode *inode,
clear_extent_bit(&BTRFS_I(inode)->io_tree, file_offset, range_end,
EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
0, 0, NULL);
ret = btrfs_set_extent_delalloc(inode, file_offset, range_end, 0, NULL);
ret = btrfs_set_extent_delalloc(BTRFS_I(inode), file_offset, range_end,
0, NULL);
if (ret)
goto out_unlock;
......@@ -133,8 +134,8 @@ static int copy_inline_to_page(struct inode *inode,
put_page(page);
}
if (ret)
btrfs_delalloc_release_space(inode, data_reserved, file_offset,
block_size, true);
btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved,
file_offset, block_size, true);
btrfs_delalloc_release_extents(BTRFS_I(inode), block_size);
out:
extent_changeset_free(data_reserved);
......@@ -336,6 +337,7 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
while (1) {
u64 next_key_min_offset = key.offset + 1;
struct btrfs_file_extent_item *extent;
u64 extent_gen;
int type;
u32 size;
struct btrfs_key new_key;
......@@ -384,6 +386,7 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
extent = btrfs_item_ptr(leaf, slot,
struct btrfs_file_extent_item);
extent_gen = btrfs_file_extent_generation(leaf, extent);
comp = btrfs_file_extent_compression(leaf, extent);
type = btrfs_file_extent_type(leaf, extent);
if (type == BTRFS_FILE_EXTENT_REG ||
......@@ -488,6 +491,19 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
btrfs_release_path(path);
/*
* If this is a new extent update the last_reflink_trans of both
* inodes. This is used by fsync to make sure it does not log
* multiple checksum items with overlapping ranges. For older
* extents we don't need to do it since inode logging skips the
* checksums for older extents. Also ignore holes and inline
* extents because they don't have checksums in the csum tree.
*/
if (extent_gen == trans->transid && disko > 0) {
BTRFS_I(src)->last_reflink_trans = trans->transid;
BTRFS_I(inode)->last_reflink_trans = trans->transid;
}
last_dest_end = ALIGN(new_key.offset + datal,
fs_info->sectorsize);
ret = clone_finish_inode_update(trans, inode, last_dest_end,
......
......@@ -1686,12 +1686,20 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
btrfs_unlock_up_safe(path, 0);
}
min_reserved = fs_info->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;
/*
* In merge_reloc_root(), we modify the upper level pointer to swap the
* tree blocks between reloc tree and subvolume tree. Thus for tree
* block COW, we COW at most from level 1 to root level for each tree.
*
* Thus the needed metadata size is at most root_level * nodesize,
* and * 2 since we have two trees to COW.
*/
min_reserved = fs_info->nodesize * btrfs_root_level(root_item) * 2;
memset(&next_key, 0, sizeof(next_key));
while (1) {
ret = btrfs_block_rsv_refill(root, rc->block_rsv, min_reserved,
BTRFS_RESERVE_FLUSH_ALL);
BTRFS_RESERVE_FLUSH_LIMIT);
if (ret) {
err = ret;
goto out;
......@@ -2571,58 +2579,50 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans,
return err;
}
static noinline_for_stack
int prealloc_file_extent_cluster(struct inode *inode,
struct file_extent_cluster *cluster)
static noinline_for_stack int prealloc_file_extent_cluster(
struct btrfs_inode *inode,
struct file_extent_cluster *cluster)
{
u64 alloc_hint = 0;
u64 start;
u64 end;
u64 offset = BTRFS_I(inode)->index_cnt;
u64 offset = inode->index_cnt;
u64 num_bytes;
int nr = 0;
int nr;
int ret = 0;
u64 prealloc_start = cluster->start - offset;
u64 prealloc_end = cluster->end - offset;
u64 cur_offset;
struct extent_changeset *data_reserved = NULL;
u64 cur_offset = prealloc_start;
BUG_ON(cluster->start != cluster->boundary[0]);
inode_lock(inode);
ret = btrfs_check_data_free_space(inode, &data_reserved, prealloc_start,
prealloc_end + 1 - prealloc_start);
ret = btrfs_alloc_data_chunk_ondemand(inode,
prealloc_end + 1 - prealloc_start);
if (ret)
goto out;
return ret;
cur_offset = prealloc_start;
while (nr < cluster->nr) {
inode_lock(&inode->vfs_inode);
for (nr = 0; nr < cluster->nr; nr++) {
start = cluster->boundary[nr] - offset;
if (nr + 1 < cluster->nr)
end = cluster->boundary[nr + 1] - 1 - offset;
else
end = cluster->end - offset;
lock_extent(&BTRFS_I(inode)->io_tree, start, end);
lock_extent(&inode->io_tree, start, end);
num_bytes = end + 1 - start;
if (cur_offset < start)
btrfs_free_reserved_data_space(inode, data_reserved,
cur_offset, start - cur_offset);
ret = btrfs_prealloc_file_range(inode, 0, start,
ret = btrfs_prealloc_file_range(&inode->vfs_inode, 0, start,
num_bytes, num_bytes,
end + 1, &alloc_hint);
cur_offset = end + 1;
unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
unlock_extent(&inode->io_tree, start, end);
if (ret)
break;
nr++;
}
inode_unlock(&inode->vfs_inode);
if (cur_offset < prealloc_end)
btrfs_free_reserved_data_space(inode, data_reserved,
cur_offset, prealloc_end + 1 - cur_offset);
out:
inode_unlock(inode);
extent_changeset_free(data_reserved);
btrfs_free_reserved_data_space_noquota(inode->root->fs_info,
prealloc_end + 1 - cur_offset);
return ret;
}
......@@ -2664,7 +2664,8 @@ int setup_extent_mapping(struct inode *inode, u64 start, u64 end,
*/
int btrfs_should_cancel_balance(struct btrfs_fs_info *fs_info)
{
return atomic_read(&fs_info->balance_cancel_req);
return atomic_read(&fs_info->balance_cancel_req) ||
fatal_signal_pending(current);
}
ALLOW_ERROR_INJECTION(btrfs_should_cancel_balance, TRUE);
......@@ -2690,7 +2691,7 @@ static int relocate_file_extent_cluster(struct inode *inode,
if (!ra)
return -ENOMEM;
ret = prealloc_file_extent_cluster(inode, cluster);
ret = prealloc_file_extent_cluster(BTRFS_I(inode), cluster);
if (ret)
goto out;
......@@ -2762,8 +2763,8 @@ static int relocate_file_extent_cluster(struct inode *inode,
nr++;
}
ret = btrfs_set_extent_delalloc(inode, page_start, page_end, 0,
NULL);
ret = btrfs_set_extent_delalloc(BTRFS_I(inode), page_start,
page_end, 0, NULL);
if (ret) {
unlock_page(page);
put_page(page);
......@@ -3872,9 +3873,9 @@ int btrfs_recover_relocation(struct btrfs_root *root)
* cloning checksum properly handles the nodatasum extents.
* it also saves CPU time to re-calculate the checksum.
*/
int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
int btrfs_reloc_clone_csums(struct btrfs_inode *inode, u64 file_pos, u64 len)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_ordered_sum *sums;
struct btrfs_ordered_extent *ordered;
int ret;
......@@ -3885,7 +3886,7 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
ordered = btrfs_lookup_ordered_extent(inode, file_pos);
BUG_ON(ordered->file_offset != file_pos || ordered->num_bytes != len);
disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt;
disk_bytenr = file_pos + inode->index_cnt;
ret = btrfs_lookup_csums_range(fs_info->csum_root, disk_bytenr,
disk_bytenr + len - 1, &list, 0);
if (ret)
......
This diff is collapsed.
......@@ -468,8 +468,8 @@ void btrfs_dump_space_info(struct btrfs_fs_info *fs_info,
"block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %s",
cache->start, cache->length, cache->used, cache->pinned,
cache->reserved, cache->ro ? "[readonly]" : "");
btrfs_dump_free_space(cache, bytes);
spin_unlock(&cache->lock);
btrfs_dump_free_space(cache, bytes);
}
if (++index < BTRFS_NR_RAID_TYPES)
goto again;
......
This diff is collapsed.
This diff is collapsed.
......@@ -36,4 +36,11 @@ int btrfs_sysfs_add_space_info_type(struct btrfs_fs_info *fs_info,
void btrfs_sysfs_remove_space_info(struct btrfs_space_info *space_info);
void btrfs_sysfs_update_devid(struct btrfs_device *device);
int btrfs_sysfs_add_one_qgroup(struct btrfs_fs_info *fs_info,
struct btrfs_qgroup *qgroup);
void btrfs_sysfs_del_qgroups(struct btrfs_fs_info *fs_info);
int btrfs_sysfs_add_qgroups(struct btrfs_fs_info *fs_info);
void btrfs_sysfs_del_one_qgroup(struct btrfs_fs_info *fs_info,
struct btrfs_qgroup *qgroup);
#endif
......@@ -60,8 +60,6 @@ static int __check_free_space_extents(struct btrfs_trans_handle *trans,
if (prev_bit == 0 && bit == 1) {
extent_start = offset;
} else if (prev_bit == 1 && bit == 0) {
if (i >= num_extents)
goto invalid;
if (i >= num_extents ||
extent_start != extents[i].start ||
offset - extent_start != extents[i].length)
......
......@@ -954,8 +954,8 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
btrfs_test_inode_set_ops(inode);
/* [BTRFS_MAX_EXTENT_SIZE] */
ret = btrfs_set_extent_delalloc(inode, 0, BTRFS_MAX_EXTENT_SIZE - 1, 0,
NULL);
ret = btrfs_set_extent_delalloc(BTRFS_I(inode), 0,
BTRFS_MAX_EXTENT_SIZE - 1, 0, NULL);
if (ret) {
test_err("btrfs_set_extent_delalloc returned %d", ret);
goto out;
......@@ -968,7 +968,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
}
/* [BTRFS_MAX_EXTENT_SIZE][sectorsize] */
ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE,
ret = btrfs_set_extent_delalloc(BTRFS_I(inode), BTRFS_MAX_EXTENT_SIZE,
BTRFS_MAX_EXTENT_SIZE + sectorsize - 1,
0, NULL);
if (ret) {
......@@ -999,7 +999,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
}
/* [BTRFS_MAX_EXTENT_SIZE][sectorsize] */
ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE >> 1,
ret = btrfs_set_extent_delalloc(BTRFS_I(inode), BTRFS_MAX_EXTENT_SIZE >> 1,
(BTRFS_MAX_EXTENT_SIZE >> 1)
+ sectorsize - 1,
0, NULL);
......@@ -1017,7 +1017,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
/*
* [BTRFS_MAX_EXTENT_SIZE+sectorsize][sectorsize HOLE][BTRFS_MAX_EXTENT_SIZE+sectorsize]
*/
ret = btrfs_set_extent_delalloc(inode,
ret = btrfs_set_extent_delalloc(BTRFS_I(inode),
BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize,
(BTRFS_MAX_EXTENT_SIZE << 1) + 3 * sectorsize - 1,
0, NULL);
......@@ -1035,7 +1035,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
/*
* [BTRFS_MAX_EXTENT_SIZE+sectorsize][sectorsize][BTRFS_MAX_EXTENT_SIZE+sectorsize]
*/
ret = btrfs_set_extent_delalloc(inode,
ret = btrfs_set_extent_delalloc(BTRFS_I(inode),
BTRFS_MAX_EXTENT_SIZE + sectorsize,
BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, 0, NULL);
if (ret) {
......@@ -1069,7 +1069,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
* Refill the hole again just for good measure, because I thought it
* might fail and I'd rather satisfy my paranoia at this point.
*/
ret = btrfs_set_extent_delalloc(inode,
ret = btrfs_set_extent_delalloc(BTRFS_I(inode),
BTRFS_MAX_EXTENT_SIZE + sectorsize,
BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, 0, NULL);
if (ret) {
......
......@@ -937,7 +937,10 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
if (TRANS_ABORTED(trans) ||
test_bit(BTRFS_FS_STATE_ERROR, &info->fs_state)) {
wake_up_process(info->transaction_kthread);
err = -EIO;
if (TRANS_ABORTED(trans))
err = trans->aborted;
else
err = -EROFS;
}
kmem_cache_free(btrfs_trans_handle_cachep, trans);
......@@ -1630,7 +1633,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
}
key.offset = (u64)-1;
pending->snap = btrfs_get_fs_root(fs_info, objectid, true);
pending->snap = btrfs_get_new_fs_root(fs_info, objectid, pending->anon_dev);
if (IS_ERR(pending->snap)) {
ret = PTR_ERR(pending->snap);
btrfs_abort_transaction(trans, ret);
......@@ -2351,7 +2354,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
*/
cur_trans->state = TRANS_STATE_COMPLETED;
wake_up(&cur_trans->commit_wait);
clear_bit(BTRFS_FS_NEED_ASYNC_COMMIT, &fs_info->flags);
spin_lock(&fs_info->trans_lock);
list_del_init(&cur_trans->list);
......
......@@ -151,18 +151,20 @@ struct btrfs_pending_snapshot {
struct btrfs_block_rsv block_rsv;
/* extra metadata reservation for relocation */
int error;
/* Preallocated anonymous block device number */
dev_t anon_dev;
bool readonly;
struct list_head list;
};
static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans,
struct inode *inode)
struct btrfs_inode *inode)
{
spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->last_trans = trans->transaction->transid;
BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid;
BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->root->last_log_commit;
spin_unlock(&BTRFS_I(inode)->lock);
spin_lock(&inode->lock);
inode->last_trans = trans->transaction->transid;
inode->last_sub_trans = inode->root->log_transid;
inode->last_log_commit = inode->root->last_log_commit;
spin_unlock(&inode->lock);
}
/*
......@@ -208,20 +210,6 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root);
int btrfs_commit_transaction(struct btrfs_trans_handle *trans);
int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
int wait_for_unblock);
/*
* Try to commit transaction asynchronously, so this is safe to call
* even holding a spinlock.
*
* It's done by informing transaction_kthread to commit transaction without
* waiting for commit interval.
*/
static inline void btrfs_commit_transaction_locksafe(
struct btrfs_fs_info *fs_info)
{
set_bit(BTRFS_FS_NEED_ASYNC_COMMIT, &fs_info->flags);
wake_up_process(fs_info->transaction_kthread);
}
int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans);
int btrfs_should_end_transaction(struct btrfs_trans_handle *trans);
void btrfs_throttle(struct btrfs_fs_info *fs_info);
......
......@@ -133,10 +133,9 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
ret = 0;
}
done:
if (ret != -EAGAIN) {
if (ret != -EAGAIN)
memset(&root->defrag_progress, 0,
sizeof(root->defrag_progress));
root->defrag_trans_start = trans->transid;
}
return ret;
}
This diff is collapsed.
This diff is collapsed.
......@@ -288,7 +288,7 @@ struct btrfs_fs_devices {
*/
struct btrfs_io_bio {
unsigned int mirror_num;
unsigned int stripe_index;
struct btrfs_device *device;
u64 logical;
u8 *csum;
u8 csum_inline[BTRFS_BIO_INLINE_CSUM_SIZE];
......
This diff is collapsed.
......@@ -243,6 +243,18 @@ struct btrfs_ioctl_dev_info_args {
__u8 path[BTRFS_DEVICE_PATH_NAME_MAX]; /* out */
};
/*
* Retrieve information about the filesystem
*/
/* Request information about checksum type and size */
#define BTRFS_FS_INFO_FLAG_CSUM_INFO (1 << 0)
/* Request information about filesystem generation */
#define BTRFS_FS_INFO_FLAG_GENERATION (1 << 1)
/* Request information about filesystem metadata UUID */
#define BTRFS_FS_INFO_FLAG_METADATA_UUID (1 << 2)
struct btrfs_ioctl_fs_info_args {
__u64 max_id; /* out */
__u64 num_devices; /* out */
......@@ -250,8 +262,13 @@ struct btrfs_ioctl_fs_info_args {
__u32 nodesize; /* out */
__u32 sectorsize; /* out */
__u32 clone_alignment; /* out */
__u32 reserved32;
__u64 reserved[122]; /* pad to 1k */
/* See BTRFS_FS_INFO_FLAG_* */
__u16 csum_type; /* out */
__u16 csum_size; /* out */
__u64 flags; /* in/out */
__u64 generation; /* out */
__u8 metadata_uuid[BTRFS_FSID_SIZE]; /* out */
__u8 reserved[944]; /* pad to 1k */
};
/*
......
......@@ -913,9 +913,9 @@ struct btrfs_free_space_info {
#define BTRFS_FREE_SPACE_USING_BITMAPS (1ULL << 0)
#define BTRFS_QGROUP_LEVEL_SHIFT 48
static inline __u64 btrfs_qgroup_level(__u64 qgroupid)
static inline __u16 btrfs_qgroup_level(__u64 qgroupid)
{
return qgroupid >> BTRFS_QGROUP_LEVEL_SHIFT;
return (__u16)(qgroupid >> BTRFS_QGROUP_LEVEL_SHIFT);
}
/*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment