Commit cc423f63 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-6.5-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs updates from David Sterba:
 "Mainly core changes, refactoring and optimizations.

  Performance is improved in some areas, overall there may be a
  cumulative improvement due to refactoring that removed lookups in the
  IO path or simplified IO submission tracking.

  Core:

   - submit IO synchronously for fast checksums (crc32c and xxhash),
     remove high priority worker kthread

   - read extent buffer in one go, simplify IO tracking, bio submission
     and locking

   - remove additional tracking of redirtied extent buffers, originally
     added for zoned mode but actually not needed

   - track ordered extent pointer in bio to avoid rbtree lookups during
     IO

   - scrub, use recovered data stripes as cache to avoid unnecessary
     read

   - in zoned mode, optimize logical to physical mappings of extents

   - remove PageError handling, not set by VFS nor writeback

   - cleanups, refactoring, better structure packing

   - lots of error handling improvements

   - more assertions, lockdep annotations

   - print assertion failure with the exact line where it happens

   - tracepoint updates

   - more debugging prints

  Performance:

   - speedup in fsync(), better tracking of inode logged status can
     avoid transaction commit

   - IO path structures track logical offsets in data structures and
     does not need to look it up

  User visible changes:

   - don't commit transaction for every created subvolume, this can
     reduce time when many subvolumes are created in a batch

   - print affected files when relocation fails

   - trigger orphan file cleanup during START_SYNC ioctl

  Notable fixes:

   - fix crash when disabling quota and relocation

   - fix crashes when removing roots from drity list

   - fix transacion abort during relocation when converting from newer
     profiles not covered by fallback

   - in zoned mode, stop reclaiming block groups if filesystem becomes
     read-only

   - fix rare race condition in tree mod log rewind that can miss some
     btree node slots

   - with enabled fsverity, drop up-to-date page bit in case the
     verification fails"

* tag 'for-6.5-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (194 commits)
  btrfs: fix race between quota disable and relocation
  btrfs: add comment to struct btrfs_fs_info::dirty_cowonly_roots
  btrfs: fix race when deleting free space root from the dirty cow roots list
  btrfs: fix race when deleting quota root from the dirty cow roots list
  btrfs: tracepoints: also show actual number of the outstanding extents
  btrfs: update i_version in update_dev_time
  btrfs: make btrfs_compressed_bioset static
  btrfs: add handling for RAID1C23/DUP to btrfs_reduce_alloc_profile
  btrfs: scrub: remove btrfs_fs_info::scrub_wr_completion_workers
  btrfs: scrub: remove scrub_ctx::csum_list member
  btrfs: do not BUG_ON after failure to migrate space during truncation
  btrfs: do not BUG_ON on failure to get dir index for new snapshot
  btrfs: send: do not BUG_ON() on unexpected symlink data extent
  btrfs: do not BUG_ON() when dropping inode items from log root
  btrfs: replace BUG_ON() at split_item() with proper error handling
  btrfs: do not BUG_ON() on tree mod log failures at btrfs_del_ptr()
  btrfs: do not BUG_ON() on tree mod log failures at insert_ptr()
  btrfs: do not BUG_ON() on tree mod log failure at insert_new_root()
  btrfs: do not BUG_ON() on tree mod log failures at push_nodes_for_insert()
  btrfs: abort transaction at update_ref_for_cow() when ref count is zero
  ...
parents e940efa9 8a4a0b2a
......@@ -71,6 +71,16 @@ bool btrfs_workqueue_normal_congested(const struct btrfs_workqueue *wq)
return atomic_read(&wq->pending) > wq->thresh * 2;
}
static void btrfs_init_workqueue(struct btrfs_workqueue *wq,
struct btrfs_fs_info *fs_info)
{
wq->fs_info = fs_info;
atomic_set(&wq->pending, 0);
INIT_LIST_HEAD(&wq->ordered_list);
spin_lock_init(&wq->list_lock);
spin_lock_init(&wq->thres_lock);
}
struct btrfs_workqueue *btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info,
const char *name, unsigned int flags,
int limit_active, int thresh)
......@@ -80,9 +90,9 @@ struct btrfs_workqueue *btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info,
if (!ret)
return NULL;
ret->fs_info = fs_info;
btrfs_init_workqueue(ret, fs_info);
ret->limit_active = limit_active;
atomic_set(&ret->pending, 0);
if (thresh == 0)
thresh = DFT_THRESHOLD;
/* For low threshold, disabling threshold is a better choice */
......@@ -106,9 +116,33 @@ struct btrfs_workqueue *btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info,
return NULL;
}
INIT_LIST_HEAD(&ret->ordered_list);
spin_lock_init(&ret->list_lock);
spin_lock_init(&ret->thres_lock);
trace_btrfs_workqueue_alloc(ret, name);
return ret;
}
struct btrfs_workqueue *btrfs_alloc_ordered_workqueue(
struct btrfs_fs_info *fs_info, const char *name,
unsigned int flags)
{
struct btrfs_workqueue *ret;
ret = kzalloc(sizeof(*ret), GFP_KERNEL);
if (!ret)
return NULL;
btrfs_init_workqueue(ret, fs_info);
/* Ordered workqueues don't allow @max_active adjustments. */
ret->limit_active = 1;
ret->current_active = 1;
ret->thresh = NO_THRESHOLD;
ret->normal_wq = alloc_ordered_workqueue("btrfs-%s", flags, name);
if (!ret->normal_wq) {
kfree(ret);
return NULL;
}
trace_btrfs_workqueue_alloc(ret, name);
return ret;
}
......
......@@ -31,6 +31,9 @@ struct btrfs_workqueue *btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info,
unsigned int flags,
int limit_active,
int thresh);
struct btrfs_workqueue *btrfs_alloc_ordered_workqueue(
struct btrfs_fs_info *fs_info, const char *name,
unsigned int flags);
void btrfs_init_work(struct btrfs_work *work, btrfs_func_t func,
btrfs_func_t ordered_func, btrfs_func_t ordered_free);
void btrfs_queue_work(struct btrfs_workqueue *wq,
......
......@@ -27,6 +27,17 @@ struct btrfs_failed_bio {
atomic_t repair_count;
};
/* Is this a data path I/O that needs storage layer checksum and repair? */
static inline bool is_data_bbio(struct btrfs_bio *bbio)
{
return bbio->inode && is_data_inode(&bbio->inode->vfs_inode);
}
static bool bbio_has_ordered_extent(struct btrfs_bio *bbio)
{
return is_data_bbio(bbio) && btrfs_op(&bbio->bio) == BTRFS_MAP_WRITE;
}
/*
* Initialize a btrfs_bio structure. This skips the embedded bio itself as it
* is already initialized by the block layer.
......@@ -61,20 +72,6 @@ struct btrfs_bio *btrfs_bio_alloc(unsigned int nr_vecs, blk_opf_t opf,
return bbio;
}
static blk_status_t btrfs_bio_extract_ordered_extent(struct btrfs_bio *bbio)
{
struct btrfs_ordered_extent *ordered;
int ret;
ordered = btrfs_lookup_ordered_extent(bbio->inode, bbio->file_offset);
if (WARN_ON_ONCE(!ordered))
return BLK_STS_IOERR;
ret = btrfs_extract_ordered_extent(bbio, ordered);
btrfs_put_ordered_extent(ordered);
return errno_to_blk_status(ret);
}
static struct btrfs_bio *btrfs_split_bio(struct btrfs_fs_info *fs_info,
struct btrfs_bio *orig_bbio,
u64 map_length, bool use_append)
......@@ -95,13 +92,41 @@ static struct btrfs_bio *btrfs_split_bio(struct btrfs_fs_info *fs_info,
btrfs_bio_init(bbio, fs_info, NULL, orig_bbio);
bbio->inode = orig_bbio->inode;
bbio->file_offset = orig_bbio->file_offset;
if (!(orig_bbio->bio.bi_opf & REQ_BTRFS_ONE_ORDERED))
orig_bbio->file_offset += map_length;
orig_bbio->file_offset += map_length;
if (bbio_has_ordered_extent(bbio)) {
refcount_inc(&orig_bbio->ordered->refs);
bbio->ordered = orig_bbio->ordered;
}
atomic_inc(&orig_bbio->pending_ios);
return bbio;
}
/* Free a bio that was never submitted to the underlying device. */
static void btrfs_cleanup_bio(struct btrfs_bio *bbio)
{
if (bbio_has_ordered_extent(bbio))
btrfs_put_ordered_extent(bbio->ordered);
bio_put(&bbio->bio);
}
static void __btrfs_bio_end_io(struct btrfs_bio *bbio)
{
if (bbio_has_ordered_extent(bbio)) {
struct btrfs_ordered_extent *ordered = bbio->ordered;
bbio->end_io(bbio);
btrfs_put_ordered_extent(ordered);
} else {
bbio->end_io(bbio);
}
}
void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status)
{
bbio->bio.bi_status = status;
__btrfs_bio_end_io(bbio);
}
static void btrfs_orig_write_end_io(struct bio *bio);
static void btrfs_bbio_propagate_error(struct btrfs_bio *bbio,
......@@ -130,12 +155,12 @@ static void btrfs_orig_bbio_end_io(struct btrfs_bio *bbio)
if (bbio->bio.bi_status)
btrfs_bbio_propagate_error(bbio, orig_bbio);
bio_put(&bbio->bio);
btrfs_cleanup_bio(bbio);
bbio = orig_bbio;
}
if (atomic_dec_and_test(&bbio->pending_ios))
bbio->end_io(bbio);
__btrfs_bio_end_io(bbio);
}
static int next_repair_mirror(struct btrfs_failed_bio *fbio, int cur_mirror)
......@@ -327,7 +352,7 @@ static void btrfs_end_bio_work(struct work_struct *work)
struct btrfs_bio *bbio = container_of(work, struct btrfs_bio, end_io_work);
/* Metadata reads are checked and repaired by the submitter. */
if (bbio->inode && !(bbio->bio.bi_opf & REQ_META))
if (is_data_bbio(bbio))
btrfs_check_read_bio(bbio, bbio->bio.bi_private);
else
btrfs_orig_bbio_end_io(bbio);
......@@ -348,7 +373,7 @@ static void btrfs_simple_end_io(struct bio *bio)
INIT_WORK(&bbio->end_io_work, btrfs_end_bio_work);
queue_work(btrfs_end_io_wq(fs_info, bio), &bbio->end_io_work);
} else {
if (bio_op(bio) == REQ_OP_ZONE_APPEND)
if (bio_op(bio) == REQ_OP_ZONE_APPEND && !bio->bi_status)
btrfs_record_physical_zoned(bbio);
btrfs_orig_bbio_end_io(bbio);
}
......@@ -361,8 +386,7 @@ static void btrfs_raid56_end_io(struct bio *bio)
btrfs_bio_counter_dec(bioc->fs_info);
bbio->mirror_num = bioc->mirror_num;
if (bio_op(bio) == REQ_OP_READ && bbio->inode &&
!(bbio->bio.bi_opf & REQ_META))
if (bio_op(bio) == REQ_OP_READ && is_data_bbio(bbio))
btrfs_check_read_bio(bbio, NULL);
else
btrfs_orig_bbio_end_io(bbio);
......@@ -472,13 +496,12 @@ static void btrfs_submit_mirrored_bio(struct btrfs_io_context *bioc, int dev_nr)
static void __btrfs_submit_bio(struct bio *bio, struct btrfs_io_context *bioc,
struct btrfs_io_stripe *smap, int mirror_num)
{
/* Do not leak our private flag into the block layer. */
bio->bi_opf &= ~REQ_BTRFS_ONE_ORDERED;
if (!bioc) {
/* Single mirror read/write fast path. */
btrfs_bio(bio)->mirror_num = mirror_num;
bio->bi_iter.bi_sector = smap->physical >> SECTOR_SHIFT;
if (bio_op(bio) != REQ_OP_READ)
btrfs_bio(bio)->orig_physical = smap->physical;
bio->bi_private = smap->dev;
bio->bi_end_io = btrfs_simple_end_io;
btrfs_submit_dev_bio(smap->dev, bio);
......@@ -574,27 +597,20 @@ static void run_one_async_free(struct btrfs_work *work)
static bool should_async_write(struct btrfs_bio *bbio)
{
/*
* If the I/O is not issued by fsync and friends, (->sync_writers != 0),
* then try to defer the submission to a workqueue to parallelize the
* checksum calculation.
*/
if (atomic_read(&bbio->inode->sync_writers))
/* Submit synchronously if the checksum implementation is fast. */
if (test_bit(BTRFS_FS_CSUM_IMPL_FAST, &bbio->fs_info->flags))
return false;
/*
* Submit metadata writes synchronously if the checksum implementation
* is fast, or we are on a zoned device that wants I/O to be submitted
* in order.
* Try to defer the submission to a workqueue to parallelize the
* checksum calculation unless the I/O is issued synchronously.
*/
if (bbio->bio.bi_opf & REQ_META) {
struct btrfs_fs_info *fs_info = bbio->fs_info;
if (op_is_sync(bbio->bio.bi_opf))
return false;
if (btrfs_is_zoned(fs_info))
return false;
if (test_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags))
return false;
}
/* Zoned devices require I/O to be submitted in order. */
if ((bbio->bio.bi_opf & REQ_META) && btrfs_is_zoned(bbio->fs_info))
return false;
return true;
}
......@@ -622,10 +638,7 @@ static bool btrfs_wq_submit_bio(struct btrfs_bio *bbio,
btrfs_init_work(&async->work, run_one_async_start, run_one_async_done,
run_one_async_free);
if (op_is_sync(bbio->bio.bi_opf))
btrfs_queue_work(fs_info->hipri_workers, &async->work);
else
btrfs_queue_work(fs_info->workers, &async->work);
btrfs_queue_work(fs_info->workers, &async->work);
return true;
}
......@@ -635,7 +648,7 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
struct btrfs_fs_info *fs_info = bbio->fs_info;
struct btrfs_bio *orig_bbio = bbio;
struct bio *bio = &bbio->bio;
u64 logical = bio->bi_iter.bi_sector << 9;
u64 logical = bio->bi_iter.bi_sector << SECTOR_SHIFT;
u64 length = bio->bi_iter.bi_size;
u64 map_length = length;
bool use_append = btrfs_use_zone_append(bbio);
......@@ -645,8 +658,8 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
int error;
btrfs_bio_counter_inc_blocked(fs_info);
error = __btrfs_map_block(fs_info, btrfs_op(bio), logical, &map_length,
&bioc, &smap, &mirror_num, 1);
error = btrfs_map_block(fs_info, btrfs_op(bio), logical, &map_length,
&bioc, &smap, &mirror_num, 1);
if (error) {
ret = errno_to_blk_status(error);
goto fail;
......@@ -665,7 +678,7 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
* Save the iter for the end_io handler and preload the checksums for
* data reads.
*/
if (bio_op(bio) == REQ_OP_READ && inode && !(bio->bi_opf & REQ_META)) {
if (bio_op(bio) == REQ_OP_READ && is_data_bbio(bbio)) {
bbio->saved_iter = bio->bi_iter;
ret = btrfs_lookup_bio_sums(bbio);
if (ret)
......@@ -676,9 +689,6 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
if (use_append) {
bio->bi_opf &= ~REQ_OP_WRITE;
bio->bi_opf |= REQ_OP_ZONE_APPEND;
ret = btrfs_bio_extract_ordered_extent(bbio);
if (ret)
goto fail_put_bio;
}
/*
......@@ -695,6 +705,10 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
ret = btrfs_bio_csum(bbio);
if (ret)
goto fail_put_bio;
} else if (use_append) {
ret = btrfs_alloc_dummy_sum(bbio);
if (ret)
goto fail_put_bio;
}
}
......@@ -704,7 +718,7 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
fail_put_bio:
if (map_length < length)
bio_put(bio);
btrfs_cleanup_bio(bbio);
fail:
btrfs_bio_counter_dec(fs_info);
btrfs_bio_end_io(orig_bbio, ret);
......
......@@ -39,8 +39,8 @@ struct btrfs_bio {
union {
/*
* Data checksumming and original I/O information for internal
* use in the btrfs_submit_bio machinery.
* For data reads: checksumming and original I/O information.
* (for internal use in the btrfs_submit_bio machinery only)
*/
struct {
u8 *csum;
......@@ -48,7 +48,20 @@ struct btrfs_bio {
struct bvec_iter saved_iter;
};
/* For metadata parentness verification. */
/*
* For data writes:
* - ordered extent covering the bio
* - pointer to the checksums for this bio
* - original physical address from the allocator
* (for zone append only)
*/
struct {
struct btrfs_ordered_extent *ordered;
struct btrfs_ordered_sum *sums;
u64 orig_physical;
};
/* For metadata reads: parentness verification. */
struct btrfs_tree_parent_check parent_check;
};
......@@ -84,15 +97,7 @@ void btrfs_bio_init(struct btrfs_bio *bbio, struct btrfs_fs_info *fs_info,
struct btrfs_bio *btrfs_bio_alloc(unsigned int nr_vecs, blk_opf_t opf,
struct btrfs_fs_info *fs_info,
btrfs_bio_end_io_t end_io, void *private);
static inline void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status)
{
bbio->bio.bi_status = status;
bbio->end_io(bbio);
}
/* Bio only refers to one ordered extent. */
#define REQ_BTRFS_ONE_ORDERED REQ_DRV
void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status);
/* Submit using blkcg_punt_bio_submit. */
#define REQ_BTRFS_CGROUP_PUNT REQ_FS_PRIVATE
......
......@@ -95,14 +95,21 @@ static u64 btrfs_reduce_alloc_profile(struct btrfs_fs_info *fs_info, u64 flags)
}
allowed &= flags;
if (allowed & BTRFS_BLOCK_GROUP_RAID6)
/* Select the highest-redundancy RAID level. */
if (allowed & BTRFS_BLOCK_GROUP_RAID1C4)
allowed = BTRFS_BLOCK_GROUP_RAID1C4;
else if (allowed & BTRFS_BLOCK_GROUP_RAID6)
allowed = BTRFS_BLOCK_GROUP_RAID6;
else if (allowed & BTRFS_BLOCK_GROUP_RAID1C3)
allowed = BTRFS_BLOCK_GROUP_RAID1C3;
else if (allowed & BTRFS_BLOCK_GROUP_RAID5)
allowed = BTRFS_BLOCK_GROUP_RAID5;
else if (allowed & BTRFS_BLOCK_GROUP_RAID10)
allowed = BTRFS_BLOCK_GROUP_RAID10;
else if (allowed & BTRFS_BLOCK_GROUP_RAID1)
allowed = BTRFS_BLOCK_GROUP_RAID1;
else if (allowed & BTRFS_BLOCK_GROUP_DUP)
allowed = BTRFS_BLOCK_GROUP_DUP;
else if (allowed & BTRFS_BLOCK_GROUP_RAID0)
allowed = BTRFS_BLOCK_GROUP_RAID0;
......@@ -1633,11 +1640,14 @@ void btrfs_mark_bg_unused(struct btrfs_block_group *bg)
{
struct btrfs_fs_info *fs_info = bg->fs_info;
trace_btrfs_add_unused_block_group(bg);
spin_lock(&fs_info->unused_bgs_lock);
if (list_empty(&bg->bg_list)) {
btrfs_get_block_group(bg);
trace_btrfs_add_unused_block_group(bg);
list_add_tail(&bg->bg_list, &fs_info->unused_bgs);
} else {
/* Pull out the block group from the reclaim_bgs list. */
list_move_tail(&bg->bg_list, &fs_info->unused_bgs);
}
spin_unlock(&fs_info->unused_bgs_lock);
}
......@@ -1791,8 +1801,15 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
}
spin_unlock(&bg->lock);
/* Get out fast, in case we're unmounting the filesystem */
if (btrfs_fs_closing(fs_info)) {
/*
* Get out fast, in case we're read-only or unmounting the
* filesystem. It is OK to drop block groups from the list even
* for the read-only case. As we did sb_start_write(),
* "mount -o remount,ro" won't happen and read-only filesystem
* means it is forced read-only due to a fatal error. So, it
* never gets back to read-write to let us reclaim again.
*/
if (btrfs_need_cleaner_sleep(fs_info)) {
up_write(&space_info->groups_sem);
goto next;
}
......@@ -1823,11 +1840,27 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
}
next:
if (ret)
btrfs_mark_bg_to_reclaim(bg);
btrfs_put_block_group(bg);
mutex_unlock(&fs_info->reclaim_bgs_lock);
/*
* Reclaiming all the block groups in the list can take really
* long. Prioritize cleaning up unused block groups.
*/
btrfs_delete_unused_bgs(fs_info);
/*
* If we are interrupted by a balance, we can just bail out. The
* cleaner thread restart again if necessary.
*/
if (!mutex_trylock(&fs_info->reclaim_bgs_lock))
goto end;
spin_lock(&fs_info->unused_bgs_lock);
}
spin_unlock(&fs_info->unused_bgs_lock);
mutex_unlock(&fs_info->reclaim_bgs_lock);
end:
btrfs_exclop_finish(fs_info);
sb_end_write(fs_info->sb);
}
......@@ -3521,9 +3554,9 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
spin_unlock(&cache->lock);
spin_unlock(&space_info->lock);
set_extent_dirty(&trans->transaction->pinned_extents,
bytenr, bytenr + num_bytes - 1,
GFP_NOFS | __GFP_NOFAIL);
set_extent_bit(&trans->transaction->pinned_extents,
bytenr, bytenr + num_bytes - 1,
EXTENT_DIRTY, NULL);
}
spin_lock(&trans->transaction->dirty_bgs_lock);
......
......@@ -162,7 +162,14 @@ struct btrfs_block_group {
*/
struct list_head cluster_list;
/* For delayed block group creation or deletion of empty block groups */
/*
* Used for several lists:
*
* 1) struct btrfs_fs_info::unused_bgs
* 2) struct btrfs_fs_info::reclaim_bgs
* 3) struct btrfs_transaction::deleted_bgs
* 4) struct btrfs_trans_handle::new_bgs
*/
struct list_head bg_list;
/* For read-only block groups */
......
......@@ -541,3 +541,22 @@ struct btrfs_block_rsv *btrfs_use_block_rsv(struct btrfs_trans_handle *trans,
return ERR_PTR(ret);
}
int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *rsv)
{
u64 needed_bytes;
int ret;
/* 1 for slack space, 1 for updating the inode */
needed_bytes = btrfs_calc_insert_metadata_size(fs_info, 1) +
btrfs_calc_metadata_size(fs_info, 1);
spin_lock(&rsv->lock);
if (rsv->reserved < needed_bytes)
ret = -ENOSPC;
else
ret = 0;
spin_unlock(&rsv->lock);
return ret;
}
......@@ -82,6 +82,8 @@ void btrfs_release_global_block_rsv(struct btrfs_fs_info *fs_info);
struct btrfs_block_rsv *btrfs_use_block_rsv(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u32 blocksize);
int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *rsv);
static inline void btrfs_unuse_block_rsv(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *block_rsv,
u32 blocksize)
......
......@@ -116,9 +116,6 @@ struct btrfs_inode {
unsigned long runtime_flags;
/* Keep track of who's O_SYNC/fsyncing currently */
atomic_t sync_writers;
/* full 64 bit generation number, struct vfs_inode doesn't have a big
* enough field for this.
*/
......@@ -335,7 +332,7 @@ static inline void btrfs_mod_outstanding_extents(struct btrfs_inode *inode,
if (btrfs_is_free_space_inode(inode))
return;
trace_btrfs_inode_mod_outstanding_extents(inode->root, btrfs_ino(inode),
mod);
mod, inode->outstanding_extents);
}
/*
......@@ -407,30 +404,12 @@ static inline bool btrfs_inode_can_compress(const struct btrfs_inode *inode)
return true;
}
/*
* btrfs_inode_item stores flags in a u64, btrfs_inode stores them in two
* separate u32s. These two functions convert between the two representations.
*/
static inline u64 btrfs_inode_combine_flags(u32 flags, u32 ro_flags)
{
return (flags | ((u64)ro_flags << 32));
}
static inline void btrfs_inode_split_flags(u64 inode_item_flags,
u32 *flags, u32 *ro_flags)
{
*flags = (u32)inode_item_flags;
*ro_flags = (u32)(inode_item_flags >> 32);
}
/* Array of bytes with variable length, hexadecimal format 0x1234 */
#define CSUM_FMT "0x%*phN"
#define CSUM_FMT_VALUE(size, bytes) size, bytes
int btrfs_check_sector_csum(struct btrfs_fs_info *fs_info, struct page *page,
u32 pgoff, u8 *csum, const u8 * const csum_expected);
int btrfs_extract_ordered_extent(struct btrfs_bio *bbio,
struct btrfs_ordered_extent *ordered);
bool btrfs_data_csum_ok(struct btrfs_bio *bbio, struct btrfs_device *dev,
u32 bio_offset, struct bio_vec *bv);
noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
......
......@@ -1459,13 +1459,13 @@ static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len,
struct btrfs_fs_info *fs_info = state->fs_info;
int ret;
u64 length;
struct btrfs_io_context *multi = NULL;
struct btrfs_io_context *bioc = NULL;
struct btrfs_io_stripe smap, *map;
struct btrfs_device *device;
length = len;
ret = btrfs_map_block(fs_info, BTRFS_MAP_READ,
bytenr, &length, &multi, mirror_num);
ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, bytenr, &length, &bioc,
NULL, &mirror_num, 0);
if (ret) {
block_ctx_out->start = 0;
block_ctx_out->dev_bytenr = 0;
......@@ -1478,21 +1478,26 @@ static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len,
return ret;
}
device = multi->stripes[0].dev;
if (bioc)
map = &bioc->stripes[0];
else
map = &smap;
device = map->dev;
if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state) ||
!device->bdev || !device->name)
block_ctx_out->dev = NULL;
else
block_ctx_out->dev = btrfsic_dev_state_lookup(
device->bdev->bd_dev);
block_ctx_out->dev_bytenr = multi->stripes[0].physical;
block_ctx_out->dev_bytenr = map->physical;
block_ctx_out->start = bytenr;
block_ctx_out->len = len;
block_ctx_out->datav = NULL;
block_ctx_out->pagev = NULL;
block_ctx_out->mem_to_free = NULL;
kfree(multi);
kfree(bioc);
if (NULL == block_ctx_out->dev) {
ret = -ENXIO;
pr_info("btrfsic: error, cannot lookup dev (#1)!\n");
......@@ -1565,7 +1570,7 @@ static int btrfsic_read_block(struct btrfsic_state *state,
bio = bio_alloc(block_ctx->dev->bdev, num_pages - i,
REQ_OP_READ, GFP_NOFS);
bio->bi_iter.bi_sector = dev_bytenr >> 9;
bio->bi_iter.bi_sector = dev_bytenr >> SECTOR_SHIFT;
for (j = i; j < num_pages; j++) {
ret = bio_add_page(bio, block_ctx->pagev[j],
......
......@@ -37,7 +37,7 @@
#include "file-item.h"
#include "super.h"
struct bio_set btrfs_compressed_bioset;
static struct bio_set btrfs_compressed_bioset;
static const char* const btrfs_compress_types[] = { "", "zlib", "lzo", "zstd" };
......@@ -211,8 +211,6 @@ static noinline void end_compressed_writeback(const struct compressed_bio *cb)
for (i = 0; i < ret; i++) {
struct folio *folio = fbatch.folios[i];
if (errno)
folio_set_error(folio);
btrfs_page_clamp_clear_writeback(fs_info, &folio->page,
cb->start, cb->len);
}
......@@ -226,13 +224,8 @@ static void btrfs_finish_compressed_write_work(struct work_struct *work)
struct compressed_bio *cb =
container_of(work, struct compressed_bio, write_end_work);
/*
* Ok, we're the last bio for this extent, step one is to call back
* into the FS and do all the end_io operations.
*/
btrfs_writepage_endio_finish_ordered(cb->bbio.inode, NULL,
cb->start, cb->start + cb->len - 1,
cb->bbio.bio.bi_status == BLK_STS_OK);
btrfs_finish_ordered_extent(cb->bbio.ordered, NULL, cb->start, cb->len,
cb->bbio.bio.bi_status == BLK_STS_OK);
if (cb->writeback)
end_compressed_writeback(cb);
......@@ -281,32 +274,31 @@ static void btrfs_add_compressed_bio_pages(struct compressed_bio *cb)
* This also checksums the file bytes and gets things ready for
* the end io hooks.
*/
void btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
unsigned int len, u64 disk_start,
unsigned int compressed_len,
struct page **compressed_pages,
unsigned int nr_pages,
blk_opf_t write_flags,
bool writeback)
void btrfs_submit_compressed_write(struct btrfs_ordered_extent *ordered,
struct page **compressed_pages,
unsigned int nr_pages,
blk_opf_t write_flags,
bool writeback)
{
struct btrfs_inode *inode = BTRFS_I(ordered->inode);
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct compressed_bio *cb;
ASSERT(IS_ALIGNED(start, fs_info->sectorsize) &&
IS_ALIGNED(len, fs_info->sectorsize));
write_flags |= REQ_BTRFS_ONE_ORDERED;
ASSERT(IS_ALIGNED(ordered->file_offset, fs_info->sectorsize));
ASSERT(IS_ALIGNED(ordered->num_bytes, fs_info->sectorsize));
cb = alloc_compressed_bio(inode, start, REQ_OP_WRITE | write_flags,
cb = alloc_compressed_bio(inode, ordered->file_offset,
REQ_OP_WRITE | write_flags,
end_compressed_bio_write);
cb->start = start;
cb->len = len;
cb->start = ordered->file_offset;
cb->len = ordered->num_bytes;
cb->compressed_pages = compressed_pages;
cb->compressed_len = compressed_len;
cb->compressed_len = ordered->disk_num_bytes;
cb->writeback = writeback;
INIT_WORK(&cb->write_end_work, btrfs_finish_compressed_write_work);
cb->nr_pages = nr_pages;
cb->bbio.bio.bi_iter.bi_sector = disk_start >> SECTOR_SHIFT;
cb->bbio.bio.bi_iter.bi_sector = ordered->disk_bytenr >> SECTOR_SHIFT;
cb->bbio.ordered = ordered;
btrfs_add_compressed_bio_pages(cb);
btrfs_submit_bio(&cb->bbio, 0);
......@@ -421,7 +413,7 @@ static noinline int add_ra_bio_pages(struct inode *inode,
*/
if (!em || cur < em->start ||
(cur + fs_info->sectorsize > extent_map_end(em)) ||
(em->block_start >> 9) != orig_bio->bi_iter.bi_sector) {
(em->block_start >> SECTOR_SHIFT) != orig_bio->bi_iter.bi_sector) {
free_extent_map(em);
unlock_extent(tree, cur, page_end, NULL);
unlock_page(page);
......@@ -472,7 +464,7 @@ static noinline int add_ra_bio_pages(struct inode *inode,
* After the compressed pages are read, we copy the bytes into the
* bio we were passed and then call the bio end_io calls
*/
void btrfs_submit_compressed_read(struct btrfs_bio *bbio, int mirror_num)
void btrfs_submit_compressed_read(struct btrfs_bio *bbio)
{
struct btrfs_inode *inode = bbio->inode;
struct btrfs_fs_info *fs_info = inode->root->fs_info;
......@@ -538,7 +530,7 @@ void btrfs_submit_compressed_read(struct btrfs_bio *bbio, int mirror_num)
if (memstall)
psi_memstall_leave(&pflags);
btrfs_submit_bio(&cb->bbio, mirror_num);
btrfs_submit_bio(&cb->bbio, 0);
return;
out_free_compressed_pages:
......
......@@ -10,6 +10,7 @@
#include "bio.h"
struct btrfs_inode;
struct btrfs_ordered_extent;
/*
* We want to make sure that amount of RAM required to uncompress an extent is
......@@ -86,14 +87,12 @@ int btrfs_decompress(int type, const u8 *data_in, struct page *dest_page,
int btrfs_decompress_buf2page(const char *buf, u32 buf_len,
struct compressed_bio *cb, u32 decompressed);
void btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
unsigned int len, u64 disk_start,
unsigned int compressed_len,
void btrfs_submit_compressed_write(struct btrfs_ordered_extent *ordered,
struct page **compressed_pages,
unsigned int nr_pages,
blk_opf_t write_flags,
bool writeback);
void btrfs_submit_compressed_read(struct btrfs_bio *bbio, int mirror_num);
void btrfs_submit_compressed_read(struct btrfs_bio *bbio);
unsigned int btrfs_compress_str2level(unsigned int type, const char *str);
......
This diff is collapsed.
......@@ -541,6 +541,8 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
struct extent_buffer **cow_ret, u64 new_root_objectid);
int btrfs_block_can_be_shared(struct btrfs_root *root,
struct extent_buffer *buf);
int btrfs_del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct btrfs_path *path, int level, int slot);
void btrfs_extend_item(struct btrfs_path *path, u32 data_size);
void btrfs_truncate_item(struct btrfs_path *path, u32 new_size, int from_end);
int btrfs_split_item(struct btrfs_trans_handle *trans,
......@@ -633,7 +635,6 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
return btrfs_insert_empty_items(trans, root, path, &batch);
}
int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
u64 time_seq);
......@@ -686,7 +687,7 @@ static inline int btrfs_next_item(struct btrfs_root *root, struct btrfs_path *p)
{
return btrfs_next_old_item(root, p, 0);
}
int btrfs_leaf_free_space(struct extent_buffer *leaf);
int btrfs_leaf_free_space(const struct extent_buffer *leaf);
static inline int is_fstree(u64 rootid)
{
......@@ -702,6 +703,7 @@ static inline bool btrfs_is_data_reloc_root(const struct btrfs_root *root)
return root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID;
}
u16 btrfs_csum_type_size(u16 type);
int btrfs_super_csum_size(const struct btrfs_super_block *s);
const char *btrfs_super_csum_name(u16 csum_type);
const char *btrfs_super_csum_driver(u16 csum_type);
......
......@@ -1040,7 +1040,8 @@ static int defrag_one_locked_target(struct btrfs_inode *inode,
clear_extent_bit(&inode->io_tree, start, start + len - 1,
EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
EXTENT_DEFRAG, cached_state);
set_extent_defrag(&inode->io_tree, start, start + len - 1, cached_state);
set_extent_bit(&inode->io_tree, start, start + len - 1,
EXTENT_DELALLOC | EXTENT_DEFRAG, cached_state);
/* Update the page status */
for (i = start_index - first_index; i <= last_index - first_index; i++) {
......
......@@ -407,7 +407,6 @@ static inline void drop_delayed_ref(struct btrfs_delayed_ref_root *delayed_refs,
RB_CLEAR_NODE(&ref->ref_node);
if (!list_empty(&ref->add_list))
list_del(&ref->add_list);
ref->in_tree = 0;
btrfs_put_delayed_ref(ref);
atomic_dec(&delayed_refs->num_entries);
}
......@@ -507,6 +506,7 @@ struct btrfs_delayed_ref_head *btrfs_select_ref_head(
{
struct btrfs_delayed_ref_head *head;
lockdep_assert_held(&delayed_refs->lock);
again:
head = find_ref_head(delayed_refs, delayed_refs->run_delayed_start,
true);
......@@ -531,7 +531,7 @@ struct btrfs_delayed_ref_head *btrfs_select_ref_head(
href_node);
}
head->processing = 1;
head->processing = true;
WARN_ON(delayed_refs->num_heads_ready == 0);
delayed_refs->num_heads_ready--;
delayed_refs->run_delayed_start = head->bytenr +
......@@ -549,31 +549,35 @@ void btrfs_delete_ref_head(struct btrfs_delayed_ref_root *delayed_refs,
RB_CLEAR_NODE(&head->href_node);
atomic_dec(&delayed_refs->num_entries);
delayed_refs->num_heads--;
if (head->processing == 0)
if (!head->processing)
delayed_refs->num_heads_ready--;
}
/*
* Helper to insert the ref_node to the tail or merge with tail.
*
* Return 0 for insert.
* Return >0 for merge.
* Return false if the ref was inserted.
* Return true if the ref was merged into an existing one (and therefore can be
* freed by the caller).
*/
static int insert_delayed_ref(struct btrfs_delayed_ref_root *root,
struct btrfs_delayed_ref_head *href,
struct btrfs_delayed_ref_node *ref)
static bool insert_delayed_ref(struct btrfs_delayed_ref_root *root,
struct btrfs_delayed_ref_head *href,
struct btrfs_delayed_ref_node *ref)
{
struct btrfs_delayed_ref_node *exist;
int mod;
int ret = 0;
spin_lock(&href->lock);
exist = tree_insert(&href->ref_tree, ref);
if (!exist)
goto inserted;
if (!exist) {
if (ref->action == BTRFS_ADD_DELAYED_REF)
list_add_tail(&ref->add_list, &href->ref_add_list);
atomic_inc(&root->num_entries);
spin_unlock(&href->lock);
return false;
}
/* Now we are sure we can merge */
ret = 1;
if (exist->action == ref->action) {
mod = ref->ref_mod;
} else {
......@@ -600,13 +604,7 @@ static int insert_delayed_ref(struct btrfs_delayed_ref_root *root,
if (exist->ref_mod == 0)
drop_delayed_ref(root, href, exist);
spin_unlock(&href->lock);
return ret;
inserted:
if (ref->action == BTRFS_ADD_DELAYED_REF)
list_add_tail(&ref->add_list, &href->ref_add_list);
atomic_inc(&root->num_entries);
spin_unlock(&href->lock);
return ret;
return true;
}
/*
......@@ -699,34 +697,38 @@ static void init_delayed_ref_head(struct btrfs_delayed_ref_head *head_ref,
bool is_system)
{
int count_mod = 1;
int must_insert_reserved = 0;
bool must_insert_reserved = false;
/* If reserved is provided, it must be a data extent. */
BUG_ON(!is_data && reserved);
/*
* The head node stores the sum of all the mods, so dropping a ref
* should drop the sum in the head node by one.
*/
if (action == BTRFS_UPDATE_DELAYED_HEAD)
switch (action) {
case BTRFS_UPDATE_DELAYED_HEAD:
count_mod = 0;
else if (action == BTRFS_DROP_DELAYED_REF)
break;
case BTRFS_DROP_DELAYED_REF:
/*
* The head node stores the sum of all the mods, so dropping a ref
* should drop the sum in the head node by one.
*/
count_mod = -1;
/*
* BTRFS_ADD_DELAYED_EXTENT means that we need to update the reserved
* accounting when the extent is finally added, or if a later
* modification deletes the delayed ref without ever inserting the
* extent into the extent allocation tree. ref->must_insert_reserved
* is the flag used to record that accounting mods are required.
*
* Once we record must_insert_reserved, switch the action to
* BTRFS_ADD_DELAYED_REF because other special casing is not required.
*/
if (action == BTRFS_ADD_DELAYED_EXTENT)
must_insert_reserved = 1;
else
must_insert_reserved = 0;
break;
case BTRFS_ADD_DELAYED_EXTENT:
/*
* BTRFS_ADD_DELAYED_EXTENT means that we need to update the
* reserved accounting when the extent is finally added, or if a
* later modification deletes the delayed ref without ever
* inserting the extent into the extent allocation tree.
* ref->must_insert_reserved is the flag used to record that
* accounting mods are required.
*
* Once we record must_insert_reserved, switch the action to
* BTRFS_ADD_DELAYED_REF because other special casing is not
* required.
*/
must_insert_reserved = true;
break;
}
refcount_set(&head_ref->refs, 1);
head_ref->bytenr = bytenr;
......@@ -738,7 +740,7 @@ static void init_delayed_ref_head(struct btrfs_delayed_ref_head *head_ref,
head_ref->ref_tree = RB_ROOT_CACHED;
INIT_LIST_HEAD(&head_ref->ref_add_list);
RB_CLEAR_NODE(&head_ref->href_node);
head_ref->processing = 0;
head_ref->processing = false;
head_ref->total_ref_mod = count_mod;
spin_lock_init(&head_ref->lock);
mutex_init(&head_ref->mutex);
......@@ -763,11 +765,11 @@ static noinline struct btrfs_delayed_ref_head *
add_delayed_ref_head(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_head *head_ref,
struct btrfs_qgroup_extent_record *qrecord,
int action, int *qrecord_inserted_ret)
int action, bool *qrecord_inserted_ret)
{
struct btrfs_delayed_ref_head *existing;
struct btrfs_delayed_ref_root *delayed_refs;
int qrecord_inserted = 0;
bool qrecord_inserted = false;
delayed_refs = &trans->transaction->delayed_refs;
......@@ -777,7 +779,7 @@ add_delayed_ref_head(struct btrfs_trans_handle *trans,
delayed_refs, qrecord))
kfree(qrecord);
else
qrecord_inserted = 1;
qrecord_inserted = true;
}
trace_add_delayed_ref_head(trans->fs_info, head_ref, action);
......@@ -853,8 +855,6 @@ static void init_delayed_ref_common(struct btrfs_fs_info *fs_info,
ref->num_bytes = num_bytes;
ref->ref_mod = 1;
ref->action = action;
ref->is_head = 0;
ref->in_tree = 1;
ref->seq = seq;
ref->type = ref_type;
RB_CLEAR_NODE(&ref->ref_node);
......@@ -875,11 +875,11 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_head *head_ref;
struct btrfs_delayed_ref_root *delayed_refs;
struct btrfs_qgroup_extent_record *record = NULL;
int qrecord_inserted;
bool qrecord_inserted;
bool is_system;
bool merged;
int action = generic_ref->action;
int level = generic_ref->tree_ref.level;
int ret;
u64 bytenr = generic_ref->bytenr;
u64 num_bytes = generic_ref->len;
u64 parent = generic_ref->parent;
......@@ -935,7 +935,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
head_ref = add_delayed_ref_head(trans, head_ref, record,
action, &qrecord_inserted);
ret = insert_delayed_ref(delayed_refs, head_ref, &ref->node);
merged = insert_delayed_ref(delayed_refs, head_ref, &ref->node);
spin_unlock(&delayed_refs->lock);
/*
......@@ -947,7 +947,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
trace_add_delayed_tree_ref(fs_info, &ref->node, ref,
action == BTRFS_ADD_DELAYED_EXTENT ?
BTRFS_ADD_DELAYED_REF : action);
if (ret > 0)
if (merged)
kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
if (qrecord_inserted)
......@@ -968,9 +968,9 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_head *head_ref;
struct btrfs_delayed_ref_root *delayed_refs;
struct btrfs_qgroup_extent_record *record = NULL;
int qrecord_inserted;
bool qrecord_inserted;
int action = generic_ref->action;
int ret;
bool merged;
u64 bytenr = generic_ref->bytenr;
u64 num_bytes = generic_ref->len;
u64 parent = generic_ref->parent;
......@@ -1027,7 +1027,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
head_ref = add_delayed_ref_head(trans, head_ref, record,
action, &qrecord_inserted);
ret = insert_delayed_ref(delayed_refs, head_ref, &ref->node);
merged = insert_delayed_ref(delayed_refs, head_ref, &ref->node);
spin_unlock(&delayed_refs->lock);
/*
......@@ -1039,7 +1039,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
trace_add_delayed_data_ref(trans->fs_info, &ref->node, ref,
action == BTRFS_ADD_DELAYED_EXTENT ?
BTRFS_ADD_DELAYED_REF : action);
if (ret > 0)
if (merged)
kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
......
......@@ -48,9 +48,6 @@ struct btrfs_delayed_ref_node {
unsigned int action:8;
unsigned int type:8;
/* is this node still in the rbtree? */
unsigned int is_head:1;
unsigned int in_tree:1;
};
struct btrfs_delayed_extent_op {
......@@ -70,20 +67,26 @@ struct btrfs_delayed_extent_op {
struct btrfs_delayed_ref_head {
u64 bytenr;
u64 num_bytes;
refcount_t refs;
/*
* For insertion into struct btrfs_delayed_ref_root::href_root.
* Keep it in the same cache line as 'bytenr' for more efficient
* searches in the rbtree.
*/
struct rb_node href_node;
/*
* the mutex is held while running the refs, and it is also
* held when checking the sum of reference modifications.
*/
struct mutex mutex;
refcount_t refs;
/* Protects 'ref_tree' and 'ref_add_list'. */
spinlock_t lock;
struct rb_root_cached ref_tree;
/* accumulate add BTRFS_ADD_DELAYED_REF nodes to this ref_add_list. */
struct list_head ref_add_list;
struct rb_node href_node;
struct btrfs_delayed_extent_op *extent_op;
/*
......@@ -113,10 +116,10 @@ struct btrfs_delayed_ref_head {
* we need to update the in ram accounting to properly reflect
* the free has happened.
*/
unsigned int must_insert_reserved:1;
unsigned int is_data:1;
unsigned int is_system:1;
unsigned int processing:1;
bool must_insert_reserved;
bool is_data;
bool is_system;
bool processing;
};
struct btrfs_delayed_tree_ref {
......@@ -337,7 +340,7 @@ static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref)
{
WARN_ON(refcount_read(&ref->refs) == 0);
if (refcount_dec_and_test(&ref->refs)) {
WARN_ON(ref->in_tree);
WARN_ON(!RB_EMPTY_NODE(&ref->ref_node));
switch (ref->type) {
case BTRFS_TREE_BLOCK_REF_KEY:
case BTRFS_SHARED_BLOCK_REF_KEY:
......
......@@ -41,7 +41,7 @@
* All new writes will be written to both target and source devices, so even
* if replace gets canceled, sources device still contains up-to-date data.
*
* Location: handle_ops_on_dev_replace() from __btrfs_map_block()
* Location: handle_ops_on_dev_replace() from btrfs_map_block()
* Start: btrfs_dev_replace_start()
* End: btrfs_dev_replace_finishing()
* Content: Latest data/metadata
......@@ -795,8 +795,8 @@ static int btrfs_set_target_alloc_state(struct btrfs_device *srcdev,
while (!find_first_extent_bit(&srcdev->alloc_state, start,
&found_start, &found_end,
CHUNK_ALLOCATED, &cached_state)) {
ret = set_extent_bits(&tgtdev->alloc_state, found_start,
found_end, CHUNK_ALLOCATED);
ret = set_extent_bit(&tgtdev->alloc_state, found_start,
found_end, CHUNK_ALLOCATED, NULL);
if (ret)
break;
start = found_end + 1;
......
......@@ -73,6 +73,23 @@ static struct list_head *get_discard_list(struct btrfs_discard_ctl *discard_ctl,
return &discard_ctl->discard_list[block_group->discard_index];
}
/*
* Determine if async discard should be running.
*
* @discard_ctl: discard control
*
* Check if the file system is writeable and BTRFS_FS_DISCARD_RUNNING is set.
*/
static bool btrfs_run_discard_work(struct btrfs_discard_ctl *discard_ctl)
{
struct btrfs_fs_info *fs_info = container_of(discard_ctl,
struct btrfs_fs_info,
discard_ctl);
return (!(fs_info->sb->s_flags & SB_RDONLY) &&
test_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags));
}
static void __add_to_discard_list(struct btrfs_discard_ctl *discard_ctl,
struct btrfs_block_group *block_group)
{
......@@ -544,23 +561,6 @@ static void btrfs_discard_workfn(struct work_struct *work)
spin_unlock(&discard_ctl->lock);
}
/*
* Determine if async discard should be running.
*
* @discard_ctl: discard control
*
* Check if the file system is writeable and BTRFS_FS_DISCARD_RUNNING is set.
*/
bool btrfs_run_discard_work(struct btrfs_discard_ctl *discard_ctl)
{
struct btrfs_fs_info *fs_info = container_of(discard_ctl,
struct btrfs_fs_info,
discard_ctl);
return (!(fs_info->sb->s_flags & SB_RDONLY) &&
test_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags));
}
/*
* Recalculate the base delay.
*
......
......@@ -24,7 +24,6 @@ void btrfs_discard_queue_work(struct btrfs_discard_ctl *discard_ctl,
struct btrfs_block_group *block_group);
void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
bool override);
bool btrfs_run_discard_work(struct btrfs_discard_ctl *discard_ctl);
/* Update operations */
void btrfs_discard_calc_delay(struct btrfs_discard_ctl *discard_ctl);
......
This diff is collapsed.
......@@ -31,8 +31,6 @@ struct btrfs_tree_parent_check;
void btrfs_check_leaked_roots(struct btrfs_fs_info *fs_info);
void btrfs_init_fs_info(struct btrfs_fs_info *fs_info);
int btrfs_verify_level_key(struct extent_buffer *eb, int level,
struct btrfs_key *first_key, u64 parent_transid);
struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
struct btrfs_tree_parent_check *check);
struct extent_buffer *btrfs_find_create_tree_block(
......@@ -84,9 +82,8 @@ void btrfs_btree_balance_dirty(struct btrfs_fs_info *fs_info);
void btrfs_btree_balance_dirty_nodelay(struct btrfs_fs_info *fs_info);
void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
struct btrfs_root *root);
int btrfs_validate_metadata_buffer(struct btrfs_bio *bbio,
struct page *page, u64 start, u64 end,
int mirror);
int btrfs_validate_extent_buffer(struct extent_buffer *eb,
struct btrfs_tree_parent_check *check);
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
struct btrfs_root *btrfs_alloc_dummy_root(struct btrfs_fs_info *fs_info);
#endif
......
......@@ -532,6 +532,16 @@ static struct extent_state *clear_state_bit(struct extent_io_tree *tree,
return next;
}
/*
* Detect if extent bits request NOWAIT semantics and set the gfp mask accordingly,
* unset the EXTENT_NOWAIT bit.
*/
static void set_gfp_mask_from_bits(u32 *bits, gfp_t *mask)
{
*mask = (*bits & EXTENT_NOWAIT ? GFP_NOWAIT : GFP_NOFS);
*bits &= EXTENT_NOWAIT - 1;
}
/*
* Clear some bits on a range in the tree. This may require splitting or
* inserting elements in the tree, so the gfp mask is used to indicate which
......@@ -546,7 +556,7 @@ static struct extent_state *clear_state_bit(struct extent_io_tree *tree,
*/
int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
u32 bits, struct extent_state **cached_state,
gfp_t mask, struct extent_changeset *changeset)
struct extent_changeset *changeset)
{
struct extent_state *state;
struct extent_state *cached;
......@@ -556,7 +566,9 @@ int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
int clear = 0;
int wake;
int delete = (bits & EXTENT_CLEAR_ALL_BITS);
gfp_t mask;
set_gfp_mask_from_bits(&bits, &mask);
btrfs_debug_check_extent_io_range(tree, start, end);
trace_btrfs_clear_extent_bit(tree, start, end - start + 1, bits);
......@@ -953,7 +965,8 @@ bool btrfs_find_delalloc_range(struct extent_io_tree *tree, u64 *start,
/*
* Set some bits on a range in the tree. This may require allocations or
* sleeping, so the gfp mask is used to indicate what is allowed.
* sleeping. By default all allocations use GFP_NOFS, use EXTENT_NOWAIT for
* GFP_NOWAIT.
*
* If any of the exclusive bits are set, this will fail with -EEXIST if some
* part of the range already has the desired bits set. The extent_state of the
......@@ -968,7 +981,7 @@ static int __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
u32 bits, u64 *failed_start,
struct extent_state **failed_state,
struct extent_state **cached_state,
struct extent_changeset *changeset, gfp_t mask)
struct extent_changeset *changeset)
{
struct extent_state *state;
struct extent_state *prealloc = NULL;
......@@ -978,7 +991,9 @@ static int __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
u64 last_start;
u64 last_end;
u32 exclusive_bits = (bits & EXTENT_LOCKED);
gfp_t mask;
set_gfp_mask_from_bits(&bits, &mask);
btrfs_debug_check_extent_io_range(tree, start, end);
trace_btrfs_set_extent_bit(tree, start, end - start + 1, bits);
......@@ -1188,10 +1203,10 @@ static int __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
}
int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
u32 bits, struct extent_state **cached_state, gfp_t mask)
u32 bits, struct extent_state **cached_state)
{
return __set_extent_bit(tree, start, end, bits, NULL, NULL,
cached_state, NULL, mask);
cached_state, NULL);
}
/*
......@@ -1687,8 +1702,7 @@ int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
*/
ASSERT(!(bits & EXTENT_LOCKED));
return __set_extent_bit(tree, start, end, bits, NULL, NULL, NULL,
changeset, GFP_NOFS);
return __set_extent_bit(tree, start, end, bits, NULL, NULL, NULL, changeset);
}
int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
......@@ -1700,8 +1714,7 @@ int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
*/
ASSERT(!(bits & EXTENT_LOCKED));
return __clear_extent_bit(tree, start, end, bits, NULL, GFP_NOFS,
changeset);
return __clear_extent_bit(tree, start, end, bits, NULL, changeset);
}
int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
......@@ -1711,7 +1724,7 @@ int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
u64 failed_start;
err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, &failed_start,
NULL, cached, NULL, GFP_NOFS);
NULL, cached, NULL);
if (err == -EEXIST) {
if (failed_start > start)
clear_extent_bit(tree, start, failed_start - 1,
......@@ -1733,7 +1746,7 @@ int lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
u64 failed_start;
err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, &failed_start,
&failed_state, cached_state, NULL, GFP_NOFS);
&failed_state, cached_state, NULL);
while (err == -EEXIST) {
if (failed_start != start)
clear_extent_bit(tree, start, failed_start - 1,
......@@ -1743,7 +1756,7 @@ int lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
&failed_state);
err = __set_extent_bit(tree, start, end, EXTENT_LOCKED,
&failed_start, &failed_state,
cached_state, NULL, GFP_NOFS);
cached_state, NULL);
}
return err;
}
......
......@@ -43,6 +43,15 @@ enum {
* want the extent states to go away.
*/
ENUM_BIT(EXTENT_CLEAR_ALL_BITS),
/*
* This must be last.
*
* Bit not representing a state but a request for NOWAIT semantics,
* e.g. when allocating memory, and must be masked out from the other
* bits.
*/
ENUM_BIT(EXTENT_NOWAIT)
};
#define EXTENT_DO_ACCOUNTING (EXTENT_CLEAR_META_RESV | \
......@@ -127,22 +136,20 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
u32 bits, struct extent_changeset *changeset);
int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
u32 bits, struct extent_state **cached, gfp_t mask,
u32 bits, struct extent_state **cached,
struct extent_changeset *changeset);
static inline int clear_extent_bit(struct extent_io_tree *tree, u64 start,
u64 end, u32 bits,
struct extent_state **cached)
{
return __clear_extent_bit(tree, start, end, bits, cached,
GFP_NOFS, NULL);
return __clear_extent_bit(tree, start, end, bits, cached, NULL);
}
static inline int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state **cached)
{
return __clear_extent_bit(tree, start, end, EXTENT_LOCKED, cached,
GFP_NOFS, NULL);
return __clear_extent_bit(tree, start, end, EXTENT_LOCKED, cached, NULL);
}
static inline int clear_extent_bits(struct extent_io_tree *tree, u64 start,
......@@ -154,31 +161,13 @@ static inline int clear_extent_bits(struct extent_io_tree *tree, u64 start,
int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
u32 bits, struct extent_changeset *changeset);
int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
u32 bits, struct extent_state **cached_state, gfp_t mask);
static inline int set_extent_bits_nowait(struct extent_io_tree *tree, u64 start,
u64 end, u32 bits)
{
return set_extent_bit(tree, start, end, bits, NULL, GFP_NOWAIT);
}
static inline int set_extent_bits(struct extent_io_tree *tree, u64 start,
u64 end, u32 bits)
{
return set_extent_bit(tree, start, end, bits, NULL, GFP_NOFS);
}
u32 bits, struct extent_state **cached_state);
static inline int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
u64 end, struct extent_state **cached_state)
{
return __clear_extent_bit(tree, start, end, EXTENT_UPTODATE,
cached_state, GFP_NOFS, NULL);
}
static inline int set_extent_dirty(struct extent_io_tree *tree, u64 start,
u64 end, gfp_t mask)
{
return set_extent_bit(tree, start, end, EXTENT_DIRTY, NULL, mask);
cached_state, NULL);
}
static inline int clear_extent_dirty(struct extent_io_tree *tree, u64 start,
......@@ -193,29 +182,6 @@ int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
u32 bits, u32 clear_bits,
struct extent_state **cached_state);
static inline int set_extent_delalloc(struct extent_io_tree *tree, u64 start,
u64 end, u32 extra_bits,
struct extent_state **cached_state)
{
return set_extent_bit(tree, start, end,
EXTENT_DELALLOC | extra_bits,
cached_state, GFP_NOFS);
}
static inline int set_extent_defrag(struct extent_io_tree *tree, u64 start,
u64 end, struct extent_state **cached_state)
{
return set_extent_bit(tree, start, end,
EXTENT_DELALLOC | EXTENT_DEFRAG,
cached_state, GFP_NOFS);
}
static inline int set_extent_new(struct extent_io_tree *tree, u64 start,
u64 end)
{
return set_extent_bit(tree, start, end, EXTENT_NEW, NULL, GFP_NOFS);
}
int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
u64 *start_ret, u64 *end_ret, u32 bits,
struct extent_state **cached_state);
......
This diff is collapsed.
......@@ -141,7 +141,7 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct extent_buffer *buf, int full_backref);
int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
struct extent_buffer *eb, u64 flags, int level);
struct extent_buffer *eb, u64 flags);
int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref);
int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
......
This diff is collapsed.
......@@ -29,6 +29,8 @@ enum {
/* write IO error */
EXTENT_BUFFER_WRITE_ERR,
EXTENT_BUFFER_NO_CHECK,
/* Indicate that extent buffer pages a being read */
EXTENT_BUFFER_READING,
};
/* these are flags for __process_pages_contig */
......@@ -38,7 +40,6 @@ enum {
ENUM_BIT(PAGE_START_WRITEBACK),
ENUM_BIT(PAGE_END_WRITEBACK),
ENUM_BIT(PAGE_SET_ORDERED),
ENUM_BIT(PAGE_SET_ERROR),
ENUM_BIT(PAGE_LOCK),
};
......@@ -79,7 +80,6 @@ struct extent_buffer {
struct btrfs_fs_info *fs_info;
spinlock_t refs_lock;
atomic_t refs;
atomic_t io_pages;
int read_mirror;
struct rcu_head rcu_head;
pid_t lock_owner;
......@@ -89,7 +89,6 @@ struct extent_buffer {
struct rw_semaphore lock;
struct page *pages[INLINE_EXTENT_BUFFER_PAGES];
struct list_head release_list;
#ifdef CONFIG_BTRFS_DEBUG
struct list_head leak_list;
#endif
......@@ -179,7 +178,8 @@ int try_release_extent_mapping(struct page *page, gfp_t mask);
int try_release_extent_buffer(struct page *page);
int btrfs_read_folio(struct file *file, struct folio *folio);
int extent_write_locked_range(struct inode *inode, u64 start, u64 end);
int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
struct writeback_control *wbc);
int extent_writepages(struct address_space *mapping,
struct writeback_control *wbc);
int btree_write_cache_pages(struct address_space *mapping,
......@@ -262,10 +262,9 @@ void extent_buffer_bitmap_set(const struct extent_buffer *eb, unsigned long star
void extent_buffer_bitmap_clear(const struct extent_buffer *eb,
unsigned long start, unsigned long pos,
unsigned long len);
bool set_extent_buffer_dirty(struct extent_buffer *eb);
void set_extent_buffer_dirty(struct extent_buffer *eb);
void set_extent_buffer_uptodate(struct extent_buffer *eb);
void clear_extent_buffer_uptodate(struct extent_buffer *eb);
int extent_buffer_under_io(const struct extent_buffer *eb);
void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end);
void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end);
void extent_clear_unlock_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
......
......@@ -364,8 +364,9 @@ static void extent_map_device_set_bits(struct extent_map *em, unsigned bits)
struct btrfs_io_stripe *stripe = &map->stripes[i];
struct btrfs_device *device = stripe->dev;
set_extent_bits_nowait(&device->alloc_state, stripe->physical,
stripe->physical + stripe_size - 1, bits);
set_extent_bit(&device->alloc_state, stripe->physical,
stripe->physical + stripe_size - 1,
bits | EXTENT_NOWAIT, NULL);
}
}
......@@ -380,8 +381,9 @@ static void extent_map_device_clear_bits(struct extent_map *em, unsigned bits)
struct btrfs_device *device = stripe->dev;
__clear_extent_bit(&device->alloc_state, stripe->physical,
stripe->physical + stripe_size - 1, bits,
NULL, GFP_NOWAIT, NULL);
stripe->physical + stripe_size - 1,
bits | EXTENT_NOWAIT,
NULL, NULL);
}
}
......@@ -502,10 +504,10 @@ void remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em)
RB_CLEAR_NODE(&em->rb_node);
}
void replace_extent_mapping(struct extent_map_tree *tree,
struct extent_map *cur,
struct extent_map *new,
int modified)
static void replace_extent_mapping(struct extent_map_tree *tree,
struct extent_map *cur,
struct extent_map *new,
int modified)
{
lockdep_assert_held_write(&tree->lock);
......@@ -959,3 +961,95 @@ int btrfs_replace_extent_map_range(struct btrfs_inode *inode,
return ret;
}
/*
* Split off the first pre bytes from the extent_map at [start, start + len],
* and set the block_start for it to new_logical.
*
* This function is used when an ordered_extent needs to be split.
*/
int split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pre,
u64 new_logical)
{
struct extent_map_tree *em_tree = &inode->extent_tree;
struct extent_map *em;
struct extent_map *split_pre = NULL;
struct extent_map *split_mid = NULL;
int ret = 0;
unsigned long flags;
ASSERT(pre != 0);
ASSERT(pre < len);
split_pre = alloc_extent_map();
if (!split_pre)
return -ENOMEM;
split_mid = alloc_extent_map();
if (!split_mid) {
ret = -ENOMEM;
goto out_free_pre;
}
lock_extent(&inode->io_tree, start, start + len - 1, NULL);
write_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, start, len);
if (!em) {
ret = -EIO;
goto out_unlock;
}
ASSERT(em->len == len);
ASSERT(!test_bit(EXTENT_FLAG_COMPRESSED, &em->flags));
ASSERT(em->block_start < EXTENT_MAP_LAST_BYTE);
ASSERT(test_bit(EXTENT_FLAG_PINNED, &em->flags));
ASSERT(!test_bit(EXTENT_FLAG_LOGGING, &em->flags));
ASSERT(!list_empty(&em->list));
flags = em->flags;
clear_bit(EXTENT_FLAG_PINNED, &em->flags);
/* First, replace the em with a new extent_map starting from * em->start */
split_pre->start = em->start;
split_pre->len = pre;
split_pre->orig_start = split_pre->start;
split_pre->block_start = new_logical;
split_pre->block_len = split_pre->len;
split_pre->orig_block_len = split_pre->block_len;
split_pre->ram_bytes = split_pre->len;
split_pre->flags = flags;
split_pre->compress_type = em->compress_type;
split_pre->generation = em->generation;
replace_extent_mapping(em_tree, em, split_pre, 1);
/*
* Now we only have an extent_map at:
* [em->start, em->start + pre]
*/
/* Insert the middle extent_map. */
split_mid->start = em->start + pre;
split_mid->len = em->len - pre;
split_mid->orig_start = split_mid->start;
split_mid->block_start = em->block_start + pre;
split_mid->block_len = split_mid->len;
split_mid->orig_block_len = split_mid->block_len;
split_mid->ram_bytes = split_mid->len;
split_mid->flags = flags;
split_mid->compress_type = em->compress_type;
split_mid->generation = em->generation;
add_extent_mapping(em_tree, split_mid, 1);
/* Once for us */
free_extent_map(em);
/* Once for the tree */
free_extent_map(em);
out_unlock:
write_unlock(&em_tree->lock);
unlock_extent(&inode->io_tree, start, start + len - 1, NULL);
free_extent_map(split_mid);
out_free_pre:
free_extent_map(split_pre);
return ret;
}
......@@ -90,10 +90,8 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
int add_extent_mapping(struct extent_map_tree *tree,
struct extent_map *em, int modified);
void remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em);
void replace_extent_mapping(struct extent_map_tree *tree,
struct extent_map *cur,
struct extent_map *new,
int modified);
int split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pre,
u64 new_logical);
struct extent_map *alloc_extent_map(void);
void free_extent_map(struct extent_map *em);
......
......@@ -94,8 +94,8 @@ int btrfs_inode_set_file_extent_range(struct btrfs_inode *inode, u64 start,
if (btrfs_fs_incompat(inode->root->fs_info, NO_HOLES))
return 0;
return set_extent_bits(&inode->file_extent_tree, start, start + len - 1,
EXTENT_DIRTY);
return set_extent_bit(&inode->file_extent_tree, start, start + len - 1,
EXTENT_DIRTY, NULL);
}
/*
......@@ -438,9 +438,9 @@ blk_status_t btrfs_lookup_bio_sums(struct btrfs_bio *bbio)
BTRFS_DATA_RELOC_TREE_OBJECTID) {
u64 file_offset = bbio->file_offset + bio_offset;
set_extent_bits(&inode->io_tree, file_offset,
file_offset + sectorsize - 1,
EXTENT_NODATASUM);
set_extent_bit(&inode->io_tree, file_offset,
file_offset + sectorsize - 1,
EXTENT_NODATASUM, NULL);
} else {
btrfs_warn_rl(fs_info,
"csum hole found for disk bytenr range [%llu, %llu)",
......@@ -560,8 +560,8 @@ int btrfs_lookup_csums_list(struct btrfs_root *root, u64 start, u64 end,
goto fail;
}
sums->bytenr = start;
sums->len = (int)size;
sums->logical = start;
sums->len = size;
offset = bytes_to_csum_size(fs_info, start - key.offset);
......@@ -721,20 +721,17 @@ int btrfs_lookup_csums_bitmap(struct btrfs_root *root, u64 start, u64 end,
*/
blk_status_t btrfs_csum_one_bio(struct btrfs_bio *bbio)
{
struct btrfs_ordered_extent *ordered = bbio->ordered;
struct btrfs_inode *inode = bbio->inode;
struct btrfs_fs_info *fs_info = inode->root->fs_info;
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
struct bio *bio = &bbio->bio;
u64 offset = bbio->file_offset;
struct btrfs_ordered_sum *sums;
struct btrfs_ordered_extent *ordered = NULL;
char *data;
struct bvec_iter iter;
struct bio_vec bvec;
int index;
unsigned int blockcount;
unsigned long total_bytes = 0;
unsigned long this_sum_bytes = 0;
int i;
unsigned nofs_flag;
......@@ -749,61 +746,17 @@ blk_status_t btrfs_csum_one_bio(struct btrfs_bio *bbio)
sums->len = bio->bi_iter.bi_size;
INIT_LIST_HEAD(&sums->list);
sums->bytenr = bio->bi_iter.bi_sector << 9;
sums->logical = bio->bi_iter.bi_sector << SECTOR_SHIFT;
index = 0;
shash->tfm = fs_info->csum_shash;
bio_for_each_segment(bvec, bio, iter) {
if (!ordered) {
ordered = btrfs_lookup_ordered_extent(inode, offset);
/*
* The bio range is not covered by any ordered extent,
* must be a code logic error.
*/
if (unlikely(!ordered)) {
WARN(1, KERN_WARNING
"no ordered extent for root %llu ino %llu offset %llu\n",
inode->root->root_key.objectid,
btrfs_ino(inode), offset);
kvfree(sums);
return BLK_STS_IOERR;
}
}
blockcount = BTRFS_BYTES_TO_BLKS(fs_info,
bvec.bv_len + fs_info->sectorsize
- 1);
for (i = 0; i < blockcount; i++) {
if (!(bio->bi_opf & REQ_BTRFS_ONE_ORDERED) &&
!in_range(offset, ordered->file_offset,
ordered->num_bytes)) {
unsigned long bytes_left;
sums->len = this_sum_bytes;
this_sum_bytes = 0;
btrfs_add_ordered_sum(ordered, sums);
btrfs_put_ordered_extent(ordered);
bytes_left = bio->bi_iter.bi_size - total_bytes;
nofs_flag = memalloc_nofs_save();
sums = kvzalloc(btrfs_ordered_sum_size(fs_info,
bytes_left), GFP_KERNEL);
memalloc_nofs_restore(nofs_flag);
if (!sums)
return BLK_STS_RESOURCE;
sums->len = bytes_left;
ordered = btrfs_lookup_ordered_extent(inode,
offset);
ASSERT(ordered); /* Logic error */
sums->bytenr = (bio->bi_iter.bi_sector << 9)
+ total_bytes;
index = 0;
}
data = bvec_kmap_local(&bvec);
crypto_shash_digest(shash,
data + (i * fs_info->sectorsize),
......@@ -811,15 +764,28 @@ blk_status_t btrfs_csum_one_bio(struct btrfs_bio *bbio)
sums->sums + index);
kunmap_local(data);
index += fs_info->csum_size;
offset += fs_info->sectorsize;
this_sum_bytes += fs_info->sectorsize;
total_bytes += fs_info->sectorsize;
}
}
this_sum_bytes = 0;
bbio->sums = sums;
btrfs_add_ordered_sum(ordered, sums);
btrfs_put_ordered_extent(ordered);
return 0;
}
/*
* Nodatasum I/O on zoned file systems still requires an btrfs_ordered_sum to
* record the updated logical address on Zone Append completion.
* Allocate just the structure with an empty sums array here for that case.
*/
blk_status_t btrfs_alloc_dummy_sum(struct btrfs_bio *bbio)
{
bbio->sums = kmalloc(sizeof(*bbio->sums), GFP_NOFS);
if (!bbio->sums)
return BLK_STS_RESOURCE;
bbio->sums->len = bbio->bio.bi_iter.bi_size;
bbio->sums->logical = bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT;
btrfs_add_ordered_sum(bbio->ordered, bbio->sums);
return 0;
}
......@@ -1086,7 +1052,7 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
again:
next_offset = (u64)-1;
found_next = 0;
bytenr = sums->bytenr + total_bytes;
bytenr = sums->logical + total_bytes;
file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
file_key.offset = bytenr;
file_key.type = BTRFS_EXTENT_CSUM_KEY;
......
......@@ -50,6 +50,7 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_ordered_sum *sums);
blk_status_t btrfs_csum_one_bio(struct btrfs_bio *bbio);
blk_status_t btrfs_alloc_dummy_sum(struct btrfs_bio *bbio);
int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
struct list_head *list, int search_commit,
bool nowait);
......
......@@ -1651,7 +1651,6 @@ ssize_t btrfs_do_write_iter(struct kiocb *iocb, struct iov_iter *from,
struct file *file = iocb->ki_filp;
struct btrfs_inode *inode = BTRFS_I(file_inode(file));
ssize_t num_written, num_sync;
const bool sync = iocb_is_dsync(iocb);
/*
* If the fs flips readonly due to some impossible error, although we
......@@ -1664,9 +1663,6 @@ ssize_t btrfs_do_write_iter(struct kiocb *iocb, struct iov_iter *from,
if (encoded && (iocb->ki_flags & IOCB_NOWAIT))
return -EOPNOTSUPP;
if (sync)
atomic_inc(&inode->sync_writers);
if (encoded) {
num_written = btrfs_encoded_write(iocb, from, encoded);
num_sync = encoded->len;
......@@ -1686,9 +1682,6 @@ ssize_t btrfs_do_write_iter(struct kiocb *iocb, struct iov_iter *from,
num_written = num_sync;
}
if (sync)
atomic_dec(&inode->sync_writers);
current->backing_dev_info = NULL;
return num_written;
}
......@@ -1733,9 +1726,7 @@ static int start_ordered_ops(struct inode *inode, loff_t start, loff_t end)
* several segments of stripe length (currently 64K).
*/
blk_start_plug(&plug);
atomic_inc(&BTRFS_I(inode)->sync_writers);
ret = btrfs_fdatawrite_range(inode, start, end);
atomic_dec(&BTRFS_I(inode)->sync_writers);
blk_finish_plug(&plug);
return ret;
......@@ -3709,7 +3700,8 @@ static int btrfs_file_open(struct inode *inode, struct file *filp)
{
int ret;
filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC | FMODE_BUF_WASYNC;
filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC | FMODE_BUF_WASYNC |
FMODE_CAN_ODIRECT;
ret = fsverity_file_open(inode, filp);
if (ret)
......
This diff is collapsed.
......@@ -101,8 +101,6 @@ int btrfs_remove_free_space_inode(struct btrfs_trans_handle *trans,
struct inode *inode,
struct btrfs_block_group *block_group);
int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *rsv);
int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans,
struct btrfs_block_group *block_group,
struct inode *inode);
......
......@@ -1280,7 +1280,10 @@ int btrfs_delete_free_space_tree(struct btrfs_fs_info *fs_info)
goto abort;
btrfs_global_root_delete(free_space_root);
spin_lock(&fs_info->trans_lock);
list_del(&free_space_root->dirty_list);
spin_unlock(&fs_info->trans_lock);
btrfs_tree_lock(free_space_root->node);
btrfs_clear_buffer_dirty(trans, free_space_root->node);
......
......@@ -543,7 +543,6 @@ struct btrfs_fs_info {
* A third pool does submit_bio to avoid deadlocking with the other two.
*/
struct btrfs_workqueue *workers;
struct btrfs_workqueue *hipri_workers;
struct btrfs_workqueue *delalloc_workers;
struct btrfs_workqueue *flush_workers;
struct workqueue_struct *endio_workers;
......@@ -577,6 +576,7 @@ struct btrfs_fs_info {
s32 dirty_metadata_batch;
s32 delalloc_batch;
/* Protected by 'trans_lock'. */
struct list_head dirty_cowonly_roots;
struct btrfs_fs_devices *fs_devices;
......@@ -643,7 +643,6 @@ struct btrfs_fs_info {
*/
refcount_t scrub_workers_refcnt;
struct workqueue_struct *scrub_workers;
struct workqueue_struct *scrub_wr_completion_workers;
struct btrfs_subpage_info *subpage_info;
struct btrfs_discard_ctl discard_ctl;
......@@ -854,7 +853,7 @@ static inline u64 btrfs_calc_metadata_size(const struct btrfs_fs_info *fs_info,
static inline bool btrfs_is_zoned(const struct btrfs_fs_info *fs_info)
{
return fs_info->zone_size > 0;
return IS_ENABLED(CONFIG_BLK_DEV_ZONED) && fs_info->zone_size > 0;
}
/*
......
......@@ -60,6 +60,22 @@ struct btrfs_truncate_control {
bool clear_extent_range;
};
/*
* btrfs_inode_item stores flags in a u64, btrfs_inode stores them in two
* separate u32s. These two functions convert between the two representations.
*/
static inline u64 btrfs_inode_combine_flags(u32 flags, u32 ro_flags)
{
return (flags | ((u64)ro_flags << 32));
}
static inline void btrfs_inode_split_flags(u64 inode_item_flags,
u32 *flags, u32 *ro_flags)
{
*flags = (u32)inode_item_flags;
*ro_flags = (u32)(inode_item_flags >> 32);
}
int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_truncate_control *control);
......
This diff is collapsed.
This diff is collapsed.
......@@ -57,8 +57,8 @@
static struct btrfs_lockdep_keyset {
u64 id; /* root objectid */
/* Longest entry: btrfs-free-space-00 */
char names[BTRFS_MAX_LEVEL][20];
/* Longest entry: btrfs-block-group-00 */
char names[BTRFS_MAX_LEVEL][24];
struct lock_class_key keys[BTRFS_MAX_LEVEL];
} btrfs_lockdep_keysets[] = {
{ .id = BTRFS_ROOT_TREE_OBJECTID, DEFINE_NAME("root") },
......@@ -72,6 +72,7 @@ static struct btrfs_lockdep_keyset {
{ .id = BTRFS_DATA_RELOC_TREE_OBJECTID, DEFINE_NAME("dreloc") },
{ .id = BTRFS_UUID_TREE_OBJECTID, DEFINE_NAME("uuid") },
{ .id = BTRFS_FREE_SPACE_TREE_OBJECTID, DEFINE_NAME("free-space") },
{ .id = BTRFS_BLOCK_GROUP_TREE_OBJECTID, DEFINE_NAME("block-group") },
{ .id = 0, DEFINE_NAME("tree") },
};
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment