Commit f3cdc8ae authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-5.8-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs updates from David Sterba:
 "Highlights:

   - speedup dead root detection during orphan cleanup, eg. when there
     are many deleted subvolumes waiting to be cleaned, the trees are
     now looked up in radix tree instead of a O(N^2) search

   - snapshot creation with inherited qgroup will mark the qgroup
     inconsistent, requires a rescan

   - send will emit file capabilities after chown, this produces a
     stream that does not need postprocessing to set the capabilities
     again

   - direct io ported to iomap infrastructure, cleaned up and simplified
     code, notably removing last use of struct buffer_head in btrfs code

  Core changes:

   - factor out backreference iteration, to be used by ordinary
     backreferences and relocation code

   - improved global block reserve utilization
      * better logic to serialize requests
      * increased maximum available for unlink
      * improved handling on large pages (64K)

   - direct io cleanups and fixes
      * simplify layering, where cloned bios were unnecessarily created
        for some cases
      * error handling fixes (submit, endio)
      * remove repair worker thread, used to avoid deadlocks during
        repair

   - refactored block group reading code, preparatory work for new type
     of block group storage that should improve mount time on large
     filesystems

  Cleanups:

   - cleaned up (and slightly sped up) set/get helpers for metadata data
     structure members

   - root bit REF_COWS got renamed to SHAREABLE to reflect the that the
     blocks of the tree get shared either among subvolumes or with the
     relocation trees

  Fixes:

   - when subvolume deletion fails due to ENOSPC, the filesystem is not
     turned read-only

   - device scan deals with devices from other filesystems that changed
     ownership due to overwrite (mkfs)

   - fix a race between scrub and block group removal/allocation

   - fix long standing bug of a runaway balance operation, printing the
     same line to the syslog, caused by a stale status bit on a reloc
     tree that prevented progress

   - fix corrupt log due to concurrent fsync of inodes with shared
     extents

   - fix space underflow for NODATACOW and buffered writes when it for
     some reason needs to fallback to COW mode"

* tag 'for-5.8-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (133 commits)
  btrfs: fix space_info bytes_may_use underflow during space cache writeout
  btrfs: fix space_info bytes_may_use underflow after nocow buffered write
  btrfs: fix wrong file range cleanup after an error filling dealloc range
  btrfs: remove redundant local variable in read_block_for_search
  btrfs: open code key_search
  btrfs: split btrfs_direct_IO to read and write part
  btrfs: remove BTRFS_INODE_READDIO_NEED_LOCK
  fs: remove dio_end_io()
  btrfs: switch to iomap_dio_rw() for dio
  iomap: remove lockdep_assert_held()
  iomap: add a filesystem hook for direct I/O bio submission
  fs: export generic_file_buffered_read()
  btrfs: turn space cache writeout failure messages into debug messages
  btrfs: include error on messages about failure to write space/inode caches
  btrfs: remove useless 'fail_unlock' label from btrfs_csum_file_blocks()
  btrfs: do not ignore error from btrfs_next_leaf() when inserting checksums
  btrfs: make checksum item extension more efficient
  btrfs: fix corrupt log due to concurrent fsync of inodes with shared extents
  btrfs: unexport btrfs_compress_set_level()
  btrfs: simplify iget helpers
  ...
parents 8eeae5ba 2166e5ed
......@@ -80,6 +80,7 @@ ForEachMacros:
- 'ax25_uid_for_each'
- '__bio_for_each_bvec'
- 'bio_for_each_bvec'
- 'bio_for_each_bvec_all'
- 'bio_for_each_integrity_vec'
- '__bio_for_each_segment'
- 'bio_for_each_segment'
......
......@@ -129,6 +129,7 @@ Usage of helpers:
::
bio_for_each_segment_all()
bio_for_each_bvec_all()
bio_first_bvec_all()
bio_first_page_all()
bio_last_bvec_all()
......@@ -143,4 +144,5 @@ Usage of helpers:
bio_vec' will contain a multi-page IO vector during the iteration::
bio_for_each_bvec()
bio_for_each_bvec_all()
rq_for_each_bvec()
......@@ -14,6 +14,7 @@ config BTRFS_FS
select LZO_DECOMPRESS
select ZSTD_COMPRESS
select ZSTD_DECOMPRESS
select FS_IOMAP
select RAID6_PQ
select XOR_BLOCKS
select SRCU
......
This diff is collapsed.
......@@ -8,6 +8,7 @@
#include <linux/btrfs.h>
#include "ulist.h"
#include "disk-io.h"
#include "extent_io.h"
struct inode_fs_paths {
......@@ -78,4 +79,300 @@ struct prelim_ref {
u64 wanted_disk_byte;
};
/*
* Iterate backrefs of one extent.
*
* Now it only supports iteration of tree block in commit root.
*/
struct btrfs_backref_iter {
u64 bytenr;
struct btrfs_path *path;
struct btrfs_fs_info *fs_info;
struct btrfs_key cur_key;
u32 item_ptr;
u32 cur_ptr;
u32 end_ptr;
};
struct btrfs_backref_iter *btrfs_backref_iter_alloc(
struct btrfs_fs_info *fs_info, gfp_t gfp_flag);
static inline void btrfs_backref_iter_free(struct btrfs_backref_iter *iter)
{
if (!iter)
return;
btrfs_free_path(iter->path);
kfree(iter);
}
static inline struct extent_buffer *btrfs_backref_get_eb(
struct btrfs_backref_iter *iter)
{
if (!iter)
return NULL;
return iter->path->nodes[0];
}
/*
* For metadata with EXTENT_ITEM key (non-skinny) case, the first inline data
* is btrfs_tree_block_info, without a btrfs_extent_inline_ref header.
*
* This helper determines if that's the case.
*/
static inline bool btrfs_backref_has_tree_block_info(
struct btrfs_backref_iter *iter)
{
if (iter->cur_key.type == BTRFS_EXTENT_ITEM_KEY &&
iter->cur_ptr - iter->item_ptr == sizeof(struct btrfs_extent_item))
return true;
return false;
}
int btrfs_backref_iter_start(struct btrfs_backref_iter *iter, u64 bytenr);
int btrfs_backref_iter_next(struct btrfs_backref_iter *iter);
static inline bool btrfs_backref_iter_is_inline_ref(
struct btrfs_backref_iter *iter)
{
if (iter->cur_key.type == BTRFS_EXTENT_ITEM_KEY ||
iter->cur_key.type == BTRFS_METADATA_ITEM_KEY)
return true;
return false;
}
static inline void btrfs_backref_iter_release(struct btrfs_backref_iter *iter)
{
iter->bytenr = 0;
iter->item_ptr = 0;
iter->cur_ptr = 0;
iter->end_ptr = 0;
btrfs_release_path(iter->path);
memset(&iter->cur_key, 0, sizeof(iter->cur_key));
}
/*
* Backref cache related structures
*
* The whole objective of backref_cache is to build a bi-directional map
* of tree blocks (represented by backref_node) and all their parents.
*/
/*
* Represent a tree block in the backref cache
*/
struct btrfs_backref_node {
struct {
struct rb_node rb_node;
u64 bytenr;
}; /* Use rb_simple_node for search/insert */
u64 new_bytenr;
/* Objectid of tree block owner, can be not uptodate */
u64 owner;
/* Link to pending, changed or detached list */
struct list_head list;
/* List of upper level edges, which link this node to its parents */
struct list_head upper;
/* List of lower level edges, which link this node to its children */
struct list_head lower;
/* NULL if this node is not tree root */
struct btrfs_root *root;
/* Extent buffer got by COWing the block */
struct extent_buffer *eb;
/* Level of the tree block */
unsigned int level:8;
/* Is the block in a non-shareable tree */
unsigned int cowonly:1;
/* 1 if no child node is in the cache */
unsigned int lowest:1;
/* Is the extent buffer locked */
unsigned int locked:1;
/* Has the block been processed */
unsigned int processed:1;
/* Have backrefs of this block been checked */
unsigned int checked:1;
/*
* 1 if corresponding block has been COWed but some upper level block
* pointers may not point to the new location
*/
unsigned int pending:1;
/* 1 if the backref node isn't connected to any other backref node */
unsigned int detached:1;
/*
* For generic purpose backref cache, where we only care if it's a reloc
* root, doesn't care the source subvolid.
*/
unsigned int is_reloc_root:1;
};
#define LOWER 0
#define UPPER 1
/*
* Represent an edge connecting upper and lower backref nodes.
*/
struct btrfs_backref_edge {
/*
* list[LOWER] is linked to btrfs_backref_node::upper of lower level
* node, and list[UPPER] is linked to btrfs_backref_node::lower of
* upper level node.
*
* Also, build_backref_tree() uses list[UPPER] for pending edges, before
* linking list[UPPER] to its upper level nodes.
*/
struct list_head list[2];
/* Two related nodes */
struct btrfs_backref_node *node[2];
};
struct btrfs_backref_cache {
/* Red black tree of all backref nodes in the cache */
struct rb_root rb_root;
/* For passing backref nodes to btrfs_reloc_cow_block */
struct btrfs_backref_node *path[BTRFS_MAX_LEVEL];
/*
* List of blocks that have been COWed but some block pointers in upper
* level blocks may not reflect the new location
*/
struct list_head pending[BTRFS_MAX_LEVEL];
/* List of backref nodes with no child node */
struct list_head leaves;
/* List of blocks that have been COWed in current transaction */
struct list_head changed;
/* List of detached backref node. */
struct list_head detached;
u64 last_trans;
int nr_nodes;
int nr_edges;
/* List of unchecked backref edges during backref cache build */
struct list_head pending_edge;
/* List of useless backref nodes during backref cache build */
struct list_head useless_node;
struct btrfs_fs_info *fs_info;
/*
* Whether this cache is for relocation
*
* Reloction backref cache require more info for reloc root compared
* to generic backref cache.
*/
unsigned int is_reloc;
};
void btrfs_backref_init_cache(struct btrfs_fs_info *fs_info,
struct btrfs_backref_cache *cache, int is_reloc);
struct btrfs_backref_node *btrfs_backref_alloc_node(
struct btrfs_backref_cache *cache, u64 bytenr, int level);
struct btrfs_backref_edge *btrfs_backref_alloc_edge(
struct btrfs_backref_cache *cache);
#define LINK_LOWER (1 << 0)
#define LINK_UPPER (1 << 1)
static inline void btrfs_backref_link_edge(struct btrfs_backref_edge *edge,
struct btrfs_backref_node *lower,
struct btrfs_backref_node *upper,
int link_which)
{
ASSERT(upper && lower && upper->level == lower->level + 1);
edge->node[LOWER] = lower;
edge->node[UPPER] = upper;
if (link_which & LINK_LOWER)
list_add_tail(&edge->list[LOWER], &lower->upper);
if (link_which & LINK_UPPER)
list_add_tail(&edge->list[UPPER], &upper->lower);
}
static inline void btrfs_backref_free_node(struct btrfs_backref_cache *cache,
struct btrfs_backref_node *node)
{
if (node) {
cache->nr_nodes--;
btrfs_put_root(node->root);
kfree(node);
}
}
static inline void btrfs_backref_free_edge(struct btrfs_backref_cache *cache,
struct btrfs_backref_edge *edge)
{
if (edge) {
cache->nr_edges--;
kfree(edge);
}
}
static inline void btrfs_backref_unlock_node_buffer(
struct btrfs_backref_node *node)
{
if (node->locked) {
btrfs_tree_unlock(node->eb);
node->locked = 0;
}
}
static inline void btrfs_backref_drop_node_buffer(
struct btrfs_backref_node *node)
{
if (node->eb) {
btrfs_backref_unlock_node_buffer(node);
free_extent_buffer(node->eb);
node->eb = NULL;
}
}
/*
* Drop the backref node from cache without cleaning up its children
* edges.
*
* This can only be called on node without parent edges.
* The children edges are still kept as is.
*/
static inline void btrfs_backref_drop_node(struct btrfs_backref_cache *tree,
struct btrfs_backref_node *node)
{
BUG_ON(!list_empty(&node->upper));
btrfs_backref_drop_node_buffer(node);
list_del(&node->list);
list_del(&node->lower);
if (!RB_EMPTY_NODE(&node->rb_node))
rb_erase(&node->rb_node, &tree->rb_root);
btrfs_backref_free_node(tree, node);
}
void btrfs_backref_cleanup_node(struct btrfs_backref_cache *cache,
struct btrfs_backref_node *node);
void btrfs_backref_release_cache(struct btrfs_backref_cache *cache);
static inline void btrfs_backref_panic(struct btrfs_fs_info *fs_info,
u64 bytenr, int errno)
{
btrfs_panic(fs_info, errno,
"Inconsistency in backref cache found at offset %llu",
bytenr);
}
int btrfs_backref_add_tree_node(struct btrfs_backref_cache *cache,
struct btrfs_path *path,
struct btrfs_backref_iter *iter,
struct btrfs_key *node_key,
struct btrfs_backref_node *cur);
int btrfs_backref_finish_upper_links(struct btrfs_backref_cache *cache,
struct btrfs_backref_node *start);
void btrfs_backref_error_cleanup(struct btrfs_backref_cache *cache,
struct btrfs_backref_node *node);
#endif
This diff is collapsed.
......@@ -129,8 +129,17 @@ struct btrfs_block_group {
/* For read-only block groups */
struct list_head ro_list;
/*
* When non-zero it means the block group's logical address and its
* device extents can not be reused for future block group allocations
* until the counter goes down to 0. This is to prevent them from being
* reused while some task is still using the block group after it was
* deleted - we want to make sure they can only be reused for new block
* groups after that task is done with the deleted block group.
*/
atomic_t frozen;
/* For discard operations */
atomic_t trimming;
struct list_head discard_list;
int discard_index;
u64 discard_eligible_time;
......@@ -283,6 +292,9 @@ static inline int btrfs_block_group_done(struct btrfs_block_group *cache)
cache->cached == BTRFS_CACHE_ERROR;
}
void btrfs_freeze_block_group(struct btrfs_block_group *cache);
void btrfs_unfreeze_block_group(struct btrfs_block_group *cache);
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
u64 physical, u64 **logical, int *naddrs, int *stripe_len);
......
......@@ -5,6 +5,7 @@
#include "block-rsv.h"
#include "space-info.h"
#include "transaction.h"
#include "block-group.h"
/*
* HOW DO BLOCK RESERVES WORK
......@@ -405,6 +406,8 @@ void btrfs_update_global_block_rsv(struct btrfs_fs_info *fs_info)
else
block_rsv->full = 0;
if (block_rsv->size >= sinfo->total_bytes)
sinfo->force_alloc = CHUNK_ALLOC_FORCE;
spin_unlock(&block_rsv->lock);
spin_unlock(&sinfo->lock);
}
......@@ -455,7 +458,7 @@ static struct btrfs_block_rsv *get_block_rsv(
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_block_rsv *block_rsv = NULL;
if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state) ||
(root == fs_info->csum_root && trans->adding_csums) ||
(root == fs_info->uuid_root))
block_rsv = trans->block_rsv;
......
......@@ -7,6 +7,7 @@
#define BTRFS_INODE_H
#include <linux/hash.h>
#include <linux/refcount.h>
#include "extent_map.h"
#include "extent_io.h"
#include "ordered-data.h"
......@@ -27,7 +28,6 @@ enum {
BTRFS_INODE_NEEDS_FULL_SYNC,
BTRFS_INODE_COPY_EVERYTHING,
BTRFS_INODE_IN_DELALLOC_LIST,
BTRFS_INODE_READDIO_NEED_LOCK,
BTRFS_INODE_HAS_PROPS,
BTRFS_INODE_SNAPSHOT_FLUSH,
};
......@@ -293,53 +293,25 @@ static inline int btrfs_inode_in_log(struct btrfs_inode *inode, u64 generation)
return ret;
}
#define BTRFS_DIO_ORIG_BIO_SUBMITTED 0x1
struct btrfs_dio_private {
struct inode *inode;
unsigned long flags;
u64 logical_offset;
u64 disk_bytenr;
u64 bytes;
void *private;
/* number of bios pending for this dio */
atomic_t pending_bios;
/* IO errors */
int errors;
/* orig_bio is our btrfs_io_bio */
struct bio *orig_bio;
/*
* References to this structure. There is one reference per in-flight
* bio plus one while we're still setting up.
*/
refcount_t refs;
/* dio_bio came from fs/direct-io.c */
struct bio *dio_bio;
/*
* The original bio may be split to several sub-bios, this is
* done during endio of sub-bios
*/
blk_status_t (*subio_endio)(struct inode *, struct btrfs_io_bio *,
blk_status_t);
/* Array of checksums */
u8 csums[];
};
/*
* Disable DIO read nolock optimization, so new dio readers will be forced
* to grab i_mutex. It is used to avoid the endless truncate due to
* nonlocked dio read.
*/
static inline void btrfs_inode_block_unlocked_dio(struct btrfs_inode *inode)
{
set_bit(BTRFS_INODE_READDIO_NEED_LOCK, &inode->runtime_flags);
smp_mb();
}
static inline void btrfs_inode_resume_unlocked_dio(struct btrfs_inode *inode)
{
smp_mb__before_atomic();
clear_bit(BTRFS_INODE_READDIO_NEED_LOCK, &inode->runtime_flags);
}
/* Array of bytes with variable length, hexadecimal format 0x1234 */
#define CSUM_FMT "0x%*phN"
#define CSUM_FMT_VALUE(size, bytes) size, bytes
......
......@@ -194,11 +194,9 @@ static int check_compressed_csum(struct btrfs_inode *inode,
for (i = 0; i < cb->nr_pages; i++) {
page = cb->compressed_pages[i];
crypto_shash_init(shash);
kaddr = kmap_atomic(page);
crypto_shash_update(shash, kaddr, PAGE_SIZE);
crypto_shash_digest(shash, kaddr, PAGE_SIZE, csum);
kunmap_atomic(kaddr);
crypto_shash_final(shash, (u8 *)&csum);
if (memcmp(&csum, cb_sum, csum_size)) {
btrfs_print_data_csum_error(inode, disk_start,
......@@ -1141,6 +1139,22 @@ static void put_workspace(int type, struct list_head *ws)
}
}
/*
* Adjust @level according to the limits of the compression algorithm or
* fallback to default
*/
static unsigned int btrfs_compress_set_level(int type, unsigned level)
{
const struct btrfs_compress_op *ops = btrfs_compress_op[type];
if (level == 0)
level = ops->default_level;
else
level = min(level, ops->max_level);
return level;
}
/*
* Given an address space and start and length, compress the bytes into @pages
* that are allocated on demand.
......@@ -1748,19 +1762,3 @@ unsigned int btrfs_compress_str2level(unsigned int type, const char *str)
return level;
}
/*
* Adjust @level according to the limits of the compression algorithm or
* fallback to default
*/
unsigned int btrfs_compress_set_level(int type, unsigned level)
{
const struct btrfs_compress_op *ops = btrfs_compress_op[type];
if (level == 0)
level = ops->default_level;
else
level = min(level, ops->max_level);
return level;
}
......@@ -140,8 +140,6 @@ extern const struct btrfs_compress_op btrfs_zstd_compress;
const char* btrfs_compress_type2str(enum btrfs_compression_type type);
bool btrfs_compress_is_valid_type(const char *str, size_t len);
unsigned int btrfs_compress_set_level(int type, unsigned level);
int btrfs_compress_heuristic(struct inode *inode, u64 start, u64 end);
#endif
This diff is collapsed.
This diff is collapsed.
......@@ -358,16 +358,14 @@ static int btrfs_check_super_csum(struct btrfs_fs_info *fs_info,
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
shash->tfm = fs_info->csum_shash;
crypto_shash_init(shash);
/*
* The super_block structure does not span the whole
* BTRFS_SUPER_INFO_SIZE range, we expect that the unused space is
* filled with zeros and is included in the checksum.
*/
crypto_shash_update(shash, raw_disk_sb + BTRFS_CSUM_SIZE,
BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
crypto_shash_final(shash, result);
crypto_shash_digest(shash, raw_disk_sb + BTRFS_CSUM_SIZE,
BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE, result);
if (memcmp(disk_sb->csum, result, btrfs_super_csum_size(disk_sb)))
return 1;
......@@ -709,9 +707,7 @@ static void end_workqueue_bio(struct bio *bio)
else
wq = fs_info->endio_write_workers;
} else {
if (unlikely(end_io_wq->metadata == BTRFS_WQ_ENDIO_DIO_REPAIR))
wq = fs_info->endio_repair_workers;
else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56)
if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56)
wq = fs_info->endio_raid56_workers;
else if (end_io_wq->metadata)
wq = fs_info->endio_meta_workers;
......@@ -1135,9 +1131,12 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
root->log_transid = 0;
root->log_transid_committed = -1;
root->last_log_commit = 0;
if (!dummy)
if (!dummy) {
extent_io_tree_init(fs_info, &root->dirty_log_pages,
IO_TREE_ROOT_DIRTY_LOG_PAGES, NULL);
extent_io_tree_init(fs_info, &root->log_csum_range,
IO_TREE_LOG_CSUM_RANGE, NULL);
}
memset(&root->root_key, 0, sizeof(root->root_key));
memset(&root->root_item, 0, sizeof(root->root_item));
......@@ -1275,12 +1274,13 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
root->root_key.offset = BTRFS_TREE_LOG_OBJECTID;
/*
* DON'T set REF_COWS for log trees
* DON'T set SHAREABLE bit for log trees.
*
* log trees do not get reference counted because they go away
* before a real commit is actually done. They do store pointers
* to file data extents, and those reference counts still get
* updated (along with back refs to the log tree).
* Log trees are not exposed to user space thus can't be snapshotted,
* and they go away before a real commit is actually done.
*
* They do store pointers to file data extents, and those reference
* counts still get updated (along with back refs to the log tree).
*/
leaf = btrfs_alloc_tree_block(trans, root, 0, BTRFS_TREE_LOG_OBJECTID,
......@@ -1418,8 +1418,9 @@ static int btrfs_init_fs_root(struct btrfs_root *root)
if (ret)
goto fail;
if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
set_bit(BTRFS_ROOT_REF_COWS, &root->state);
if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID &&
root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) {
set_bit(BTRFS_ROOT_SHAREABLE, &root->state);
btrfs_check_and_init_root_item(&root->root_item);
}
......@@ -1524,6 +1525,7 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
btrfs_put_root(fs_info->uuid_root);
btrfs_put_root(fs_info->free_space_root);
btrfs_put_root(fs_info->fs_root);
btrfs_put_root(fs_info->data_reloc_root);
btrfs_check_leaked_roots(fs_info);
btrfs_extent_buffer_leak_debug_check(fs_info);
kfree(fs_info->super_copy);
......@@ -1533,35 +1535,34 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
struct btrfs_key *location,
bool check_ref)
u64 objectid, bool check_ref)
{
struct btrfs_root *root;
struct btrfs_path *path;
struct btrfs_key key;
int ret;
if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
if (objectid == BTRFS_ROOT_TREE_OBJECTID)
return btrfs_grab_root(fs_info->tree_root);
if (location->objectid == BTRFS_EXTENT_TREE_OBJECTID)
if (objectid == BTRFS_EXTENT_TREE_OBJECTID)
return btrfs_grab_root(fs_info->extent_root);
if (location->objectid == BTRFS_CHUNK_TREE_OBJECTID)
if (objectid == BTRFS_CHUNK_TREE_OBJECTID)
return btrfs_grab_root(fs_info->chunk_root);
if (location->objectid == BTRFS_DEV_TREE_OBJECTID)
if (objectid == BTRFS_DEV_TREE_OBJECTID)
return btrfs_grab_root(fs_info->dev_root);
if (location->objectid == BTRFS_CSUM_TREE_OBJECTID)
if (objectid == BTRFS_CSUM_TREE_OBJECTID)
return btrfs_grab_root(fs_info->csum_root);
if (location->objectid == BTRFS_QUOTA_TREE_OBJECTID)
if (objectid == BTRFS_QUOTA_TREE_OBJECTID)
return btrfs_grab_root(fs_info->quota_root) ?
fs_info->quota_root : ERR_PTR(-ENOENT);
if (location->objectid == BTRFS_UUID_TREE_OBJECTID)
if (objectid == BTRFS_UUID_TREE_OBJECTID)
return btrfs_grab_root(fs_info->uuid_root) ?
fs_info->uuid_root : ERR_PTR(-ENOENT);
if (location->objectid == BTRFS_FREE_SPACE_TREE_OBJECTID)
if (objectid == BTRFS_FREE_SPACE_TREE_OBJECTID)
return btrfs_grab_root(fs_info->free_space_root) ?
fs_info->free_space_root : ERR_PTR(-ENOENT);
again:
root = btrfs_lookup_fs_root(fs_info, location->objectid);
root = btrfs_lookup_fs_root(fs_info, objectid);
if (root) {
if (check_ref && btrfs_root_refs(&root->root_item) == 0) {
btrfs_put_root(root);
......@@ -1570,7 +1571,10 @@ struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
return root;
}
root = btrfs_read_tree_root(fs_info->tree_root, location);
key.objectid = objectid;
key.type = BTRFS_ROOT_ITEM_KEY;
key.offset = (u64)-1;
root = btrfs_read_tree_root(fs_info->tree_root, &key);
if (IS_ERR(root))
return root;
......@@ -1590,7 +1594,7 @@ struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
}
key.objectid = BTRFS_ORPHAN_OBJECTID;
key.type = BTRFS_ORPHAN_ITEM_KEY;
key.offset = location->objectid;
key.offset = objectid;
ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
btrfs_free_path(path);
......@@ -1940,7 +1944,6 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
btrfs_destroy_workqueue(fs_info->workers);
btrfs_destroy_workqueue(fs_info->endio_workers);
btrfs_destroy_workqueue(fs_info->endio_raid56_workers);
btrfs_destroy_workqueue(fs_info->endio_repair_workers);
btrfs_destroy_workqueue(fs_info->rmw_workers);
btrfs_destroy_workqueue(fs_info->endio_write_workers);
btrfs_destroy_workqueue(fs_info->endio_freespace_worker);
......@@ -1981,6 +1984,7 @@ static void free_root_pointers(struct btrfs_fs_info *info, bool free_chunk_root)
free_root_extent_buffers(info->quota_root);
free_root_extent_buffers(info->uuid_root);
free_root_extent_buffers(info->fs_root);
free_root_extent_buffers(info->data_reloc_root);
if (free_chunk_root)
free_root_extent_buffers(info->chunk_root);
free_root_extent_buffers(info->free_space_root);
......@@ -1993,6 +1997,7 @@ void btrfs_put_root(struct btrfs_root *root)
if (refcount_dec_and_test(&root->refs)) {
WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree));
WARN_ON(test_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state));
if (root->anon_dev)
free_anon_bdev(root->anon_dev);
btrfs_drew_lock_destroy(&root->snapshot_lock);
......@@ -2143,8 +2148,6 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info,
fs_info->endio_raid56_workers =
btrfs_alloc_workqueue(fs_info, "endio-raid56", flags,
max_active, 4);
fs_info->endio_repair_workers =
btrfs_alloc_workqueue(fs_info, "endio-repair", flags, 1, 0);
fs_info->rmw_workers =
btrfs_alloc_workqueue(fs_info, "rmw", flags, max_active, 2);
fs_info->endio_write_workers =
......@@ -2168,7 +2171,6 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info,
fs_info->flush_workers &&
fs_info->endio_workers && fs_info->endio_meta_workers &&
fs_info->endio_meta_write_workers &&
fs_info->endio_repair_workers &&
fs_info->endio_write_workers && fs_info->endio_raid56_workers &&
fs_info->endio_freespace_worker && fs_info->rmw_workers &&
fs_info->caching_workers && fs_info->readahead_workers &&
......@@ -2290,6 +2292,19 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info)
set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
fs_info->csum_root = root;
/*
* This tree can share blocks with some other fs tree during relocation
* and we need a proper setup by btrfs_get_fs_root
*/
root = btrfs_get_fs_root(tree_root->fs_info,
BTRFS_DATA_RELOC_TREE_OBJECTID, true);
if (IS_ERR(root)) {
ret = PTR_ERR(root);
goto out;
}
set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
fs_info->data_reloc_root = root;
location.objectid = BTRFS_QUOTA_TREE_OBJECTID;
root = btrfs_read_tree_root(tree_root, &location);
if (!IS_ERR(root)) {
......@@ -2827,7 +2842,6 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
u64 generation;
u64 features;
u16 csum_type;
struct btrfs_key location;
struct btrfs_super_block *disk_super;
struct btrfs_fs_info *fs_info = btrfs_sb(sb);
struct btrfs_root *tree_root;
......@@ -3241,11 +3255,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
}
}
location.objectid = BTRFS_FS_TREE_OBJECTID;
location.type = BTRFS_ROOT_ITEM_KEY;
location.offset = 0;
fs_info->fs_root = btrfs_get_fs_root(fs_info, &location, true);
fs_info->fs_root = btrfs_get_fs_root(fs_info, BTRFS_FS_TREE_OBJECTID, true);
if (IS_ERR(fs_info->fs_root)) {
err = PTR_ERR(fs_info->fs_root);
btrfs_warn(fs_info, "failed to read fs tree: %d", err);
......@@ -3508,10 +3518,9 @@ static int write_dev_supers(struct btrfs_device *device,
btrfs_set_super_bytenr(sb, bytenr);
crypto_shash_init(shash);
crypto_shash_update(shash, (const char *)sb + BTRFS_CSUM_SIZE,
BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
crypto_shash_final(shash, sb->csum);
crypto_shash_digest(shash, (const char *)sb + BTRFS_CSUM_SIZE,
BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE,
sb->csum);
page = find_or_create_page(mapping, bytenr >> PAGE_SHIFT,
GFP_NOFS);
......
......@@ -25,7 +25,6 @@ enum btrfs_wq_endio_type {
BTRFS_WQ_ENDIO_METADATA,
BTRFS_WQ_ENDIO_FREE_SPACE,
BTRFS_WQ_ENDIO_RAID56,
BTRFS_WQ_ENDIO_DIO_REPAIR,
};
static inline u64 btrfs_sb_offset(int mirror)
......@@ -67,8 +66,7 @@ int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info);
struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
struct btrfs_key *key,
bool check_ref);
u64 objectid, bool check_ref);
void btrfs_free_fs_info(struct btrfs_fs_info *fs_info);
int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info);
......
......@@ -64,24 +64,15 @@ struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
struct btrfs_fs_info *fs_info = btrfs_sb(sb);
struct btrfs_root *root;
struct inode *inode;
struct btrfs_key key;
if (objectid < BTRFS_FIRST_FREE_OBJECTID)
return ERR_PTR(-ESTALE);
key.objectid = root_objectid;
key.type = BTRFS_ROOT_ITEM_KEY;
key.offset = (u64)-1;
root = btrfs_get_fs_root(fs_info, &key, true);
root = btrfs_get_fs_root(fs_info, root_objectid, true);
if (IS_ERR(root))
return ERR_CAST(root);
key.objectid = objectid;
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
inode = btrfs_iget(sb, &key, root);
inode = btrfs_iget(sb, objectid, root);
btrfs_put_root(root);
if (IS_ERR(inode))
return ERR_CAST(inode);
......@@ -200,9 +191,7 @@ struct dentry *btrfs_get_parent(struct dentry *child)
found_key.offset, 0, 0);
}
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
return d_obtain_alias(btrfs_iget(fs_info->sb, &key, root));
return d_obtain_alias(btrfs_iget(fs_info->sb, key.objectid, root));
fail:
btrfs_free_path(path);
return ERR_PTR(ret);
......
......@@ -44,6 +44,7 @@ enum {
IO_TREE_TRANS_DIRTY_PAGES,
IO_TREE_ROOT_DIRTY_LOG_PAGES,
IO_TREE_INODE_FILE_EXTENT,
IO_TREE_LOG_CSUM_RANGE,
IO_TREE_SELFTEST,
};
......
......@@ -2114,22 +2114,6 @@ static u64 find_middle(struct rb_root *root)
}
#endif
static inline u64 heads_to_leaves(struct btrfs_fs_info *fs_info, u64 heads)
{
u64 num_bytes;
num_bytes = heads * (sizeof(struct btrfs_extent_item) +
sizeof(struct btrfs_extent_inline_ref));
if (!btrfs_fs_incompat(fs_info, SKINNY_METADATA))
num_bytes += heads * sizeof(struct btrfs_tree_block_info);
/*
* We don't ever fill up leaves all the way so multiply by 2 just to be
* closer to what we're really going to want to use.
*/
return div_u64(num_bytes, BTRFS_LEAF_DATA_SIZE(fs_info));
}
/*
* Takes the number of bytes to be csumm'ed and figures out how many leaves it
* would require to store the csums for that many bytes.
......@@ -2442,7 +2426,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
nritems = btrfs_header_nritems(buf);
level = btrfs_header_level(buf);
if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state) && level == 0)
if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state) && level == 0)
return 0;
if (full_backref)
......@@ -2932,7 +2916,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
&trimmed);
list_del_init(&block_group->bg_list);
btrfs_put_block_group_trimming(block_group);
btrfs_unfreeze_block_group(block_group);
btrfs_put_block_group(block_group);
if (ret) {
......@@ -3369,6 +3353,7 @@ static struct btrfs_block_group *btrfs_lock_cluster(
struct btrfs_block_group *block_group,
struct btrfs_free_cluster *cluster,
int delalloc)
__acquires(&cluster->refill_lock)
{
struct btrfs_block_group *used_bg = NULL;
......@@ -5501,8 +5486,6 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
*/
if (!for_reloc && !root_dropped)
btrfs_add_dead_root(root);
if (err && err != -EAGAIN)
btrfs_handle_fs_error(fs_info, err, NULL);
return err;
}
......
This diff is collapsed.
......@@ -66,6 +66,10 @@ struct btrfs_io_bio;
struct io_failure_record;
struct extent_io_tree;
typedef blk_status_t (submit_bio_hook_t)(struct inode *inode, struct bio *bio,
int mirror_num,
unsigned long bio_flags);
typedef blk_status_t (extent_submit_bio_start_t)(void *private_data,
struct bio *bio, u64 bio_offset);
......@@ -74,8 +78,7 @@ struct extent_io_ops {
* The following callbacks must be always defined, the function
* pointer will be called unconditionally.
*/
blk_status_t (*submit_bio_hook)(struct inode *inode, struct bio *bio,
int mirror_num, unsigned long bio_flags);
submit_bio_hook_t *submit_bio_hook;
int (*readpage_end_io_hook)(struct btrfs_io_bio *io_bio, u64 phy_offset,
struct page *page, u64 start, u64 end,
int mirror);
......@@ -209,7 +212,7 @@ struct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
u64 start, unsigned long len);
struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
u64 start);
struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src);
struct extent_buffer *btrfs_clone_extent_buffer(const struct extent_buffer *src);
struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
u64 start);
void free_extent_buffer(struct extent_buffer *eb);
......@@ -227,7 +230,7 @@ static inline int num_extent_pages(const struct extent_buffer *eb)
(eb->start >> PAGE_SHIFT);
}
static inline int extent_buffer_uptodate(struct extent_buffer *eb)
static inline int extent_buffer_uptodate(const struct extent_buffer *eb)
{
return test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
}
......@@ -240,37 +243,37 @@ void read_extent_buffer(const struct extent_buffer *eb, void *dst,
int read_extent_buffer_to_user(const struct extent_buffer *eb,
void __user *dst, unsigned long start,
unsigned long len);
void write_extent_buffer_fsid(struct extent_buffer *eb, const void *src);
void write_extent_buffer_chunk_tree_uuid(struct extent_buffer *eb,
void write_extent_buffer_fsid(const struct extent_buffer *eb, const void *src);
void write_extent_buffer_chunk_tree_uuid(const struct extent_buffer *eb,
const void *src);
void write_extent_buffer(struct extent_buffer *eb, const void *src,
void write_extent_buffer(const struct extent_buffer *eb, const void *src,
unsigned long start, unsigned long len);
void copy_extent_buffer_full(struct extent_buffer *dst,
struct extent_buffer *src);
void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
void copy_extent_buffer_full(const struct extent_buffer *dst,
const struct extent_buffer *src);
void copy_extent_buffer(const struct extent_buffer *dst,
const struct extent_buffer *src,
unsigned long dst_offset, unsigned long src_offset,
unsigned long len);
void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
unsigned long src_offset, unsigned long len);
void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
unsigned long src_offset, unsigned long len);
void memzero_extent_buffer(struct extent_buffer *eb, unsigned long start,
void memcpy_extent_buffer(const struct extent_buffer *dst,
unsigned long dst_offset, unsigned long src_offset,
unsigned long len);
void memmove_extent_buffer(const struct extent_buffer *dst,
unsigned long dst_offset, unsigned long src_offset,
unsigned long len);
void memzero_extent_buffer(const struct extent_buffer *eb, unsigned long start,
unsigned long len);
int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start,
int extent_buffer_test_bit(const struct extent_buffer *eb, unsigned long start,
unsigned long pos);
void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start,
void extent_buffer_bitmap_set(const struct extent_buffer *eb, unsigned long start,
unsigned long pos, unsigned long len);
void extent_buffer_bitmap_clear(struct extent_buffer *eb, unsigned long start,
unsigned long pos, unsigned long len);
void clear_extent_buffer_dirty(struct extent_buffer *eb);
void extent_buffer_bitmap_clear(const struct extent_buffer *eb,
unsigned long start, unsigned long pos,
unsigned long len);
void clear_extent_buffer_dirty(const struct extent_buffer *eb);
bool set_extent_buffer_dirty(struct extent_buffer *eb);
void set_extent_buffer_uptodate(struct extent_buffer *eb);
void clear_extent_buffer_uptodate(struct extent_buffer *eb);
int extent_buffer_under_io(struct extent_buffer *eb);
int map_private_extent_buffer(const struct extent_buffer *eb,
unsigned long offset, unsigned long min_len,
char **map, unsigned long *map_start,
unsigned long *map_len);
int extent_buffer_under_io(const struct extent_buffer *eb);
void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end);
void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end);
void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
......@@ -289,7 +292,7 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
u64 length, u64 logical, struct page *page,
unsigned int pg_offset, int mirror_num);
void end_extent_writepage(struct page *page, int err, u64 start, u64 end);
int btrfs_repair_eb_io_failure(struct extent_buffer *eb, int mirror_num);
int btrfs_repair_eb_io_failure(const struct extent_buffer *eb, int mirror_num);
/*
* When IO fails, either with EIO or csum verification fails, we
......@@ -311,12 +314,12 @@ struct io_failure_record {
};
bool btrfs_check_repairable(struct inode *inode, unsigned failed_bio_pages,
struct io_failure_record *failrec, int fail_mirror);
struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio,
struct io_failure_record *failrec,
struct page *page, int pg_offset, int icsum,
bio_end_io_t *endio_func, void *data);
blk_status_t btrfs_submit_read_repair(struct inode *inode,
struct bio *failed_bio, u64 phy_offset,
struct page *page, unsigned int pgoff,
u64 start, u64 end, int failed_mirror,
submit_bio_hook_t *submit_bio_hook);
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
bool find_lock_delalloc_range(struct inode *inode,
struct page *locked_page, u64 *start,
......
......@@ -242,11 +242,13 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
/**
* btrfs_lookup_bio_sums - Look up checksums for a bio.
* @inode: inode that the bio is for.
* @bio: bio embedded in btrfs_io_bio.
* @bio: bio to look up.
* @offset: Unless (u64)-1, look up checksums for this offset in the file.
* If (u64)-1, use the page offsets from the bio instead.
* @dst: Buffer of size btrfs_super_csum_size() used to return checksum. If
* NULL, the checksum is returned in btrfs_io_bio(bio)->csum instead.
* @dst: Buffer of size nblocks * btrfs_super_csum_size() used to return
* checksum (nblocks = bio->bi_iter.bi_size / fs_info->sectorsize). If
* NULL, the checksum buffer is allocated and returned in
* btrfs_io_bio(bio)->csum instead.
*
* Return: BLK_STS_RESOURCE if allocating memory fails, BLK_STS_OK otherwise.
*/
......@@ -256,7 +258,6 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct bio_vec bvec;
struct bvec_iter iter;
struct btrfs_io_bio *btrfs_bio = btrfs_io_bio(bio);
struct btrfs_csum_item *item = NULL;
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
struct btrfs_path *path;
......@@ -277,6 +278,8 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
nblocks = bio->bi_iter.bi_size >> inode->i_sb->s_blocksize_bits;
if (!dst) {
struct btrfs_io_bio *btrfs_bio = btrfs_io_bio(bio);
if (nblocks * csum_size > BTRFS_BIO_INLINE_CSUM_SIZE) {
btrfs_bio->csum = kmalloc_array(nblocks, csum_size,
GFP_NOFS);
......@@ -598,13 +601,12 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
index = 0;
}
crypto_shash_init(shash);
data = kmap_atomic(bvec.bv_page);
crypto_shash_update(shash, data + bvec.bv_offset
crypto_shash_digest(shash, data + bvec.bv_offset
+ (i * fs_info->sectorsize),
fs_info->sectorsize);
fs_info->sectorsize,
sums->sums + index);
kunmap_atomic(data);
crypto_shash_final(shash, (char *)(sums->sums + index));
index += csum_size;
offset += fs_info->sectorsize;
this_sum_bytes += fs_info->sectorsize;
......@@ -869,7 +871,7 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
}
ret = PTR_ERR(item);
if (ret != -EFBIG && ret != -ENOENT)
goto fail_unlock;
goto out;
if (ret == -EFBIG) {
u32 item_size;
......@@ -887,10 +889,12 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
nritems = btrfs_header_nritems(path->nodes[0]);
if (!nritems || (path->slots[0] >= nritems - 1)) {
ret = btrfs_next_leaf(root, path);
if (ret == 1)
if (ret < 0) {
goto out;
} else if (ret > 0) {
found_next = 1;
if (ret != 0)
goto insert;
}
slot = path->slots[0];
}
btrfs_item_key_to_cpu(path->nodes[0], &found_key, slot);
......@@ -905,14 +909,27 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
}
/*
* at this point, we know the tree has an item, but it isn't big
* enough yet to put our csum in. Grow it
* At this point, we know the tree has a checksum item that ends at an
* offset matching the start of the checksum range we want to insert.
* We try to extend that item as much as possible and then add as many
* checksums to it as they fit.
*
* First check if the leaf has enough free space for at least one
* checksum. If it has go directly to the item extension code, otherwise
* release the path and do a search for insertion before the extension.
*/
if (btrfs_leaf_free_space(leaf) >= csum_size) {
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
csum_offset = (bytenr - found_key.offset) >>
fs_info->sb->s_blocksize_bits;
goto extend_csum;
}
btrfs_release_path(path);
ret = btrfs_search_slot(trans, root, &file_key, path,
csum_size, 1);
if (ret < 0)
goto fail_unlock;
goto out;
if (ret > 0) {
if (path->slots[0] == 0)
......@@ -931,19 +948,13 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
goto insert;
}
extend_csum:
if (csum_offset == btrfs_item_size_nr(leaf, path->slots[0]) /
csum_size) {
int extend_nr;
u64 tmp;
u32 diff;
u32 free_space;
if (btrfs_leaf_free_space(leaf) <
sizeof(struct btrfs_item) + csum_size * 2)
goto insert;
free_space = btrfs_leaf_free_space(leaf) -
sizeof(struct btrfs_item) - csum_size;
tmp = sums->len - total_bytes;
tmp >>= fs_info->sb->s_blocksize_bits;
WARN_ON(tmp < 1);
......@@ -954,7 +965,7 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
MAX_CSUM_ITEMS(fs_info, csum_size) * csum_size);
diff = diff - btrfs_item_size_nr(leaf, path->slots[0]);
diff = min(free_space, diff);
diff = min_t(u32, btrfs_leaf_free_space(leaf), diff);
diff /= csum_size;
diff *= csum_size;
......@@ -985,9 +996,9 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
ins_size);
path->leave_spinning = 0;
if (ret < 0)
goto fail_unlock;
goto out;
if (WARN_ON(ret != 0))
goto fail_unlock;
goto out;
leaf = path->nodes[0];
csum:
item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item);
......@@ -1017,9 +1028,6 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
out:
btrfs_free_path(path);
return ret;
fail_unlock:
goto out;
}
void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
......
......@@ -275,26 +275,18 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
{
struct btrfs_root *inode_root;
struct inode *inode;
struct btrfs_key key;
struct btrfs_ioctl_defrag_range_args range;
int num_defrag;
int ret;
/* get the inode */
key.objectid = defrag->root;
key.type = BTRFS_ROOT_ITEM_KEY;
key.offset = (u64)-1;
inode_root = btrfs_get_fs_root(fs_info, &key, true);
inode_root = btrfs_get_fs_root(fs_info, defrag->root, true);
if (IS_ERR(inode_root)) {
ret = PTR_ERR(inode_root);
goto cleanup;
}
key.objectid = defrag->ino;
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
inode = btrfs_iget(fs_info->sb, &key, inode_root);
inode = btrfs_iget(fs_info->sb, defrag->ino, inode_root);
btrfs_put_root(inode_root);
if (IS_ERR(inode)) {
ret = PTR_ERR(inode);
......@@ -775,7 +767,7 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
if (start >= BTRFS_I(inode)->disk_i_size && !replace_extent)
modify_tree = 0;
update_refs = (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
update_refs = (test_bit(BTRFS_ROOT_SHAREABLE, &root->state) ||
root == fs_info->tree_root);
while (1) {
recow = 0;
......@@ -1817,21 +1809,61 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
return num_written ? num_written : ret;
}
static ssize_t __btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
static ssize_t check_direct_IO(struct btrfs_fs_info *fs_info,
const struct iov_iter *iter, loff_t offset)
{
const unsigned int blocksize_mask = fs_info->sectorsize - 1;
if (offset & blocksize_mask)
return -EINVAL;
if (iov_iter_alignment(iter) & blocksize_mask)
return -EINVAL;
return 0;
}
static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
loff_t pos;
ssize_t written;
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
loff_t pos = iocb->ki_pos;
ssize_t written = 0;
ssize_t written_buffered;
loff_t endbyte;
int err;
size_t count = 0;
bool relock = false;
written = generic_file_direct_write(iocb, from);
if (check_direct_IO(fs_info, from, pos))
goto buffered;
count = iov_iter_count(from);
/*
* If the write DIO is beyond the EOF, we need update the isize, but it
* is protected by i_mutex. So we can not unlock the i_mutex at this
* case.
*/
if (pos + count <= inode->i_size) {
inode_unlock(inode);
relock = true;
} else if (iocb->ki_flags & IOCB_NOWAIT) {
return -EAGAIN;
}
down_read(&BTRFS_I(inode)->dio_sem);
written = iomap_dio_rw(iocb, from, &btrfs_dio_iomap_ops, &btrfs_dops,
is_sync_kiocb(iocb));
up_read(&BTRFS_I(inode)->dio_sem);
if (relock)
inode_lock(inode);
if (written < 0 || !iov_iter_count(from))
return written;
buffered:
pos = iocb->ki_pos;
written_buffered = btrfs_buffered_write(iocb, from);
if (written_buffered < 0) {
......@@ -1970,7 +2002,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
atomic_inc(&BTRFS_I(inode)->sync_writers);
if (iocb->ki_flags & IOCB_DIRECT) {
num_written = __btrfs_direct_write(iocb, from);
num_written = btrfs_direct_write(iocb, from);
} else {
num_written = btrfs_buffered_write(iocb, from);
if (num_written > 0)
......@@ -3484,9 +3516,54 @@ static int btrfs_file_open(struct inode *inode, struct file *filp)
return generic_file_open(inode, filp);
}
static int check_direct_read(struct btrfs_fs_info *fs_info,
const struct iov_iter *iter, loff_t offset)
{
int ret;
int i, seg;
ret = check_direct_IO(fs_info, iter, offset);
if (ret < 0)
return ret;
for (seg = 0; seg < iter->nr_segs; seg++)
for (i = seg + 1; i < iter->nr_segs; i++)
if (iter->iov[seg].iov_base == iter->iov[i].iov_base)
return -EINVAL;
return 0;
}
static ssize_t btrfs_direct_read(struct kiocb *iocb, struct iov_iter *to)
{
struct inode *inode = file_inode(iocb->ki_filp);
ssize_t ret;
if (check_direct_read(btrfs_sb(inode->i_sb), to, iocb->ki_pos))
return 0;
inode_lock_shared(inode);
ret = iomap_dio_rw(iocb, to, &btrfs_dio_iomap_ops, &btrfs_dops,
is_sync_kiocb(iocb));
inode_unlock_shared(inode);
return ret;
}
static ssize_t btrfs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
ssize_t ret = 0;
if (iocb->ki_flags & IOCB_DIRECT) {
ret = btrfs_direct_read(iocb, to);
if (ret < 0)
return ret;
}
return generic_file_buffered_read(iocb, to, ret);
}
const struct file_operations btrfs_file_operations = {
.llseek = btrfs_file_llseek,
.read_iter = generic_file_read_iter,
.read_iter = btrfs_file_read_iter,
.splice_read = generic_file_splice_read,
.write_iter = btrfs_file_write_iter,
.mmap = btrfs_file_mmap,
......
......@@ -82,7 +82,7 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
* sure NOFS is set to keep us from deadlocking.
*/
nofs_flag = memalloc_nofs_save();
inode = btrfs_iget_path(fs_info->sb, &location, root, path);
inode = btrfs_iget_path(fs_info->sb, location.objectid, root, path);
btrfs_release_path(path);
memalloc_nofs_restore(nofs_flag);
if (IS_ERR(inode))
......@@ -1190,13 +1190,10 @@ static int __btrfs_wait_cache_io(struct btrfs_root *root,
if (ret) {
invalidate_inode_pages2(inode->i_mapping);
BTRFS_I(inode)->generation = 0;
if (block_group) {
#ifdef CONFIG_BTRFS_DEBUG
btrfs_err(root->fs_info,
"failed to write free space cache for block group %llu",
block_group->start);
#endif
}
if (block_group)
btrfs_debug(root->fs_info,
"failed to write free space cache for block group %llu error %d",
block_group->start, ret);
}
btrfs_update_inode(trans, root, inode);
......@@ -1415,11 +1412,9 @@ int btrfs_write_out_cache(struct btrfs_trans_handle *trans,
ret = __btrfs_write_out_cache(fs_info->tree_root, inode, ctl,
block_group, &block_group->io_ctl, trans);
if (ret) {
#ifdef CONFIG_BTRFS_DEBUG
btrfs_err(fs_info,
"failed to write free space cache for block group %llu",
block_group->start);
#endif
btrfs_debug(fs_info,
"failed to write free space cache for block group %llu error %d",
block_group->start, ret);
spin_lock(&block_group->lock);
block_group->disk_cache_state = BTRFS_DC_ERROR;
spin_unlock(&block_group->lock);
......@@ -3762,46 +3757,6 @@ static int trim_bitmaps(struct btrfs_block_group *block_group,
return ret;
}
void btrfs_get_block_group_trimming(struct btrfs_block_group *cache)
{
atomic_inc(&cache->trimming);
}
void btrfs_put_block_group_trimming(struct btrfs_block_group *block_group)
{
struct btrfs_fs_info *fs_info = block_group->fs_info;
struct extent_map_tree *em_tree;
struct extent_map *em;
bool cleanup;
spin_lock(&block_group->lock);
cleanup = (atomic_dec_and_test(&block_group->trimming) &&
block_group->removed);
spin_unlock(&block_group->lock);
if (cleanup) {
mutex_lock(&fs_info->chunk_mutex);
em_tree = &fs_info->mapping_tree;
write_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, block_group->start,
1);
BUG_ON(!em); /* logic error, can't happen */
remove_extent_mapping(em_tree, em);
write_unlock(&em_tree->lock);
mutex_unlock(&fs_info->chunk_mutex);
/* once for us and once for the tree */
free_extent_map(em);
free_extent_map(em);
/*
* We've left one free space entry and other tasks trimming
* this block group have left 1 entry each one. Free them.
*/
__btrfs_remove_free_space_cache(block_group->free_space_ctl);
}
}
int btrfs_trim_block_group(struct btrfs_block_group *block_group,
u64 *trimmed, u64 start, u64 end, u64 minlen)
{
......@@ -3816,7 +3771,7 @@ int btrfs_trim_block_group(struct btrfs_block_group *block_group,
spin_unlock(&block_group->lock);
return 0;
}
btrfs_get_block_group_trimming(block_group);
btrfs_freeze_block_group(block_group);
spin_unlock(&block_group->lock);
ret = trim_no_bitmap(block_group, trimmed, start, end, minlen, false);
......@@ -3829,7 +3784,7 @@ int btrfs_trim_block_group(struct btrfs_block_group *block_group,
if (rem)
reset_trimming_bitmap(ctl, offset_to_bitmap(ctl, end));
out:
btrfs_put_block_group_trimming(block_group);
btrfs_unfreeze_block_group(block_group);
return ret;
}
......@@ -3846,11 +3801,11 @@ int btrfs_trim_block_group_extents(struct btrfs_block_group *block_group,
spin_unlock(&block_group->lock);
return 0;
}
btrfs_get_block_group_trimming(block_group);
btrfs_freeze_block_group(block_group);
spin_unlock(&block_group->lock);
ret = trim_no_bitmap(block_group, trimmed, start, end, minlen, async);
btrfs_put_block_group_trimming(block_group);
btrfs_unfreeze_block_group(block_group);
return ret;
}
......@@ -3868,13 +3823,13 @@ int btrfs_trim_block_group_bitmaps(struct btrfs_block_group *block_group,
spin_unlock(&block_group->lock);
return 0;
}
btrfs_get_block_group_trimming(block_group);
btrfs_freeze_block_group(block_group);
spin_unlock(&block_group->lock);
ret = trim_bitmaps(block_group, trimmed, start, end, minlen, maxlen,
async);
btrfs_put_block_group_trimming(block_group);
btrfs_unfreeze_block_group(block_group);
return ret;
}
......@@ -4035,11 +3990,9 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
if (release_metadata)
btrfs_delalloc_release_metadata(BTRFS_I(inode),
inode->i_size, true);
#ifdef CONFIG_BTRFS_DEBUG
btrfs_err(fs_info,
"failed to write free ino cache for root %llu",
root->root_key.objectid);
#endif
btrfs_debug(fs_info,
"failed to write free ino cache for root %llu error %d",
root->root_key.objectid, ret);
}
return ret;
......
This diff is collapsed.
......@@ -660,7 +660,7 @@ static noinline int create_subvol(struct inode *dir,
goto fail;
key.offset = (u64)-1;
new_root = btrfs_get_fs_root(fs_info, &key, true);
new_root = btrfs_get_fs_root(fs_info, objectid, true);
if (IS_ERR(new_root)) {
ret = PTR_ERR(new_root);
btrfs_abort_transaction(trans, ret);
......@@ -748,9 +748,8 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
struct btrfs_pending_snapshot *pending_snapshot;
struct btrfs_trans_handle *trans;
int ret;
bool snapshot_force_cow = false;
if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
return -EINVAL;
if (atomic_read(&root->nr_swapfiles)) {
......@@ -771,27 +770,6 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
goto free_pending;
}
/*
* Force new buffered writes to reserve space even when NOCOW is
* possible. This is to avoid later writeback (running dealloc) to
* fallback to COW mode and unexpectedly fail with ENOSPC.
*/
btrfs_drew_read_lock(&root->snapshot_lock);
ret = btrfs_start_delalloc_snapshot(root);
if (ret)
goto dec_and_free;
/*
* All previous writes have started writeback in NOCOW mode, so now
* we force future writes to fallback to COW mode during snapshot
* creation.
*/
atomic_inc(&root->snapshot_force_cow);
snapshot_force_cow = true;
btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1);
btrfs_init_block_rsv(&pending_snapshot->block_rsv,
BTRFS_BLOCK_RSV_TEMP);
/*
......@@ -806,7 +784,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
&pending_snapshot->block_rsv, 8,
false);
if (ret)
goto dec_and_free;
goto free_pending;
pending_snapshot->dentry = dentry;
pending_snapshot->root = root;
......@@ -848,11 +826,6 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
fail:
btrfs_put_root(pending_snapshot->snap);
btrfs_subvolume_release_metadata(fs_info, &pending_snapshot->block_rsv);
dec_and_free:
if (snapshot_force_cow)
atomic_dec(&root->snapshot_force_cow);
btrfs_drew_read_unlock(&root->snapshot_lock);
free_pending:
kfree(pending_snapshot->root_item);
btrfs_free_path(pending_snapshot->path);
......@@ -983,6 +956,45 @@ static noinline int btrfs_mksubvol(const struct path *parent,
return error;
}
static noinline int btrfs_mksnapshot(const struct path *parent,
const char *name, int namelen,
struct btrfs_root *root,
bool readonly,
struct btrfs_qgroup_inherit *inherit)
{
int ret;
bool snapshot_force_cow = false;
/*
* Force new buffered writes to reserve space even when NOCOW is
* possible. This is to avoid later writeback (running dealloc) to
* fallback to COW mode and unexpectedly fail with ENOSPC.
*/
btrfs_drew_read_lock(&root->snapshot_lock);
ret = btrfs_start_delalloc_snapshot(root);
if (ret)
goto out;
/*
* All previous writes have started writeback in NOCOW mode, so now
* we force future writes to fallback to COW mode during snapshot
* creation.
*/
atomic_inc(&root->snapshot_force_cow);
snapshot_force_cow = true;
btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1);
ret = btrfs_mksubvol(parent, name, namelen,
root, readonly, inherit);
out:
if (snapshot_force_cow)
atomic_dec(&root->snapshot_force_cow);
btrfs_drew_read_unlock(&root->snapshot_lock);
return ret;
}
/*
* When we're defragging a range, we don't want to kick it off again
* if it is really just waiting for delalloc to send it down.
......@@ -1762,7 +1774,7 @@ static noinline int __btrfs_ioctl_snap_create(struct file *file,
*/
ret = -EPERM;
} else {
ret = btrfs_mksubvol(&file->f_path, name, namelen,
ret = btrfs_mksnapshot(&file->f_path, name, namelen,
BTRFS_I(src_inode)->root,
readonly, inherit);
}
......@@ -2127,10 +2139,7 @@ static noinline int search_ioctl(struct inode *inode,
/* search the root of the inode that was passed */
root = btrfs_grab_root(BTRFS_I(inode)->root);
} else {
key.objectid = sk->tree_id;
key.type = BTRFS_ROOT_ITEM_KEY;
key.offset = (u64)-1;
root = btrfs_get_fs_root(info, &key, true);
root = btrfs_get_fs_root(info, sk->tree_id, true);
if (IS_ERR(root)) {
btrfs_free_path(path);
return PTR_ERR(root);
......@@ -2263,10 +2272,7 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,
ptr = &name[BTRFS_INO_LOOKUP_PATH_MAX - 1];
key.objectid = tree_id;
key.type = BTRFS_ROOT_ITEM_KEY;
key.offset = (u64)-1;
root = btrfs_get_fs_root(info, &key, true);
root = btrfs_get_fs_root(info, tree_id, true);
if (IS_ERR(root)) {
ret = PTR_ERR(root);
root = NULL;
......@@ -2359,10 +2365,7 @@ static int btrfs_search_path_in_tree_user(struct inode *inode,
if (dirid != upper_limit.objectid) {
ptr = &args->path[BTRFS_INO_LOOKUP_USER_PATH_MAX - 1];
key.objectid = treeid;
key.type = BTRFS_ROOT_ITEM_KEY;
key.offset = (u64)-1;
root = btrfs_get_fs_root(fs_info, &key, true);
root = btrfs_get_fs_root(fs_info, treeid, true);
if (IS_ERR(root)) {
ret = PTR_ERR(root);
goto out;
......@@ -2421,7 +2424,7 @@ static int btrfs_search_path_in_tree_user(struct inode *inode,
goto out_put;
}
temp_inode = btrfs_iget(sb, &key2, root);
temp_inode = btrfs_iget(sb, key2.objectid, root);
if (IS_ERR(temp_inode)) {
ret = PTR_ERR(temp_inode);
goto out_put;
......@@ -2608,9 +2611,7 @@ static int btrfs_ioctl_get_subvol_info(struct file *file, void __user *argp)
/* Get root_item of inode's subvolume */
key.objectid = BTRFS_I(inode)->root->root_key.objectid;
key.type = BTRFS_ROOT_ITEM_KEY;
key.offset = (u64)-1;
root = btrfs_get_fs_root(fs_info, &key, true);
root = btrfs_get_fs_root(fs_info, key.objectid, true);
if (IS_ERR(root)) {
ret = PTR_ERR(root);
goto out_free;
......@@ -3278,7 +3279,6 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
struct btrfs_dir_item *di;
struct btrfs_trans_handle *trans;
struct btrfs_path *path = NULL;
struct btrfs_key location;
struct btrfs_disk_key disk_key;
u64 objectid = 0;
u64 dir_id;
......@@ -3299,11 +3299,7 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
if (!objectid)
objectid = BTRFS_FS_TREE_OBJECTID;
location.objectid = objectid;
location.type = BTRFS_ROOT_ITEM_KEY;
location.offset = (u64)-1;
new_root = btrfs_get_fs_root(fs_info, &location, true);
new_root = btrfs_get_fs_root(fs_info, objectid, true);
if (IS_ERR(new_root)) {
ret = PTR_ERR(new_root);
goto out;
......
......@@ -410,6 +410,7 @@ void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb)
* The rwlock is held for write upon exit.
*/
void btrfs_tree_lock(struct extent_buffer *eb)
__acquires(&eb->lock)
{
u64 start_ns = 0;
......
......@@ -6,6 +6,7 @@
#include <linux/sched.h>
#include <linux/wait.h>
#include <asm/div64.h>
#include <linux/rbtree.h>
#define in_range(b, first, len) ((b) >= (first) && (b) < (first) + (len))
......@@ -58,4 +59,57 @@ static inline bool has_single_bit_set(u64 n)
return is_power_of_two_u64(n);
}
/*
* Simple bytenr based rb_tree relate structures
*
* Any structure wants to use bytenr as single search index should have their
* structure start with these members.
*/
struct rb_simple_node {
struct rb_node rb_node;
u64 bytenr;
};
static inline struct rb_node *rb_simple_search(struct rb_root *root, u64 bytenr)
{
struct rb_node *node = root->rb_node;
struct rb_simple_node *entry;
while (node) {
entry = rb_entry(node, struct rb_simple_node, rb_node);
if (bytenr < entry->bytenr)
node = node->rb_left;
else if (bytenr > entry->bytenr)
node = node->rb_right;
else
return node;
}
return NULL;
}
static inline struct rb_node *rb_simple_insert(struct rb_root *root, u64 bytenr,
struct rb_node *node)
{
struct rb_node **p = &root->rb_node;
struct rb_node *parent = NULL;
struct rb_simple_node *entry;
while (*p) {
parent = *p;
entry = rb_entry(parent, struct rb_simple_node, rb_node);
if (bytenr < entry->bytenr)
p = &(*p)->rb_left;
else if (bytenr > entry->bytenr)
p = &(*p)->rb_right;
else
return parent;
}
rb_link_node(node, parent, p);
rb_insert_color(node, root);
return NULL;
}
#endif
......@@ -408,19 +408,14 @@ int btrfs_subvol_inherit_props(struct btrfs_trans_handle *trans,
struct btrfs_root *parent_root)
{
struct super_block *sb = root->fs_info->sb;
struct btrfs_key key;
struct inode *parent_inode, *child_inode;
int ret;
key.objectid = BTRFS_FIRST_FREE_OBJECTID;
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
parent_inode = btrfs_iget(sb, &key, parent_root);
parent_inode = btrfs_iget(sb, BTRFS_FIRST_FREE_OBJECTID, parent_root);
if (IS_ERR(parent_inode))
return PTR_ERR(parent_inode);
child_inode = btrfs_iget(sb, &key, root);
child_inode = btrfs_iget(sb, BTRFS_FIRST_FREE_OBJECTID, root);
if (IS_ERR(child_inode)) {
iput(parent_inode);
return PTR_ERR(child_inode);
......
......@@ -2622,6 +2622,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
struct btrfs_root *quota_root;
struct btrfs_qgroup *srcgroup;
struct btrfs_qgroup *dstgroup;
bool need_rescan = false;
u32 level_size = 0;
u64 nums;
......@@ -2765,6 +2766,13 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
goto unlock;
}
++i_qgroups;
/*
* If we're doing a snapshot, and adding the snapshot to a new
* qgroup, the numbers are guaranteed to be incorrect.
*/
if (srcid)
need_rescan = true;
}
for (i = 0; i < inherit->num_ref_copies; ++i, i_qgroups += 2) {
......@@ -2784,6 +2792,9 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
dst->rfer = src->rfer - level_size;
dst->rfer_cmpr = src->rfer_cmpr - level_size;
/* Manually tweaking numbers certainly needs a rescan */
need_rescan = true;
}
for (i = 0; i < inherit->num_excl_copies; ++i, i_qgroups += 2) {
struct btrfs_qgroup *src;
......@@ -2802,6 +2813,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
dst->excl = src->excl + level_size;
dst->excl_cmpr = src->excl_cmpr + level_size;
need_rescan = true;
}
unlock:
......@@ -2809,6 +2821,8 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
out:
if (!committing)
mutex_unlock(&fs_info->qgroup_ioctl_lock);
if (need_rescan)
fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
return ret;
}
......
This diff is collapsed.
......@@ -210,7 +210,6 @@ int btrfs_find_orphan_roots(struct btrfs_fs_info *fs_info)
struct extent_buffer *leaf;
struct btrfs_path *path;
struct btrfs_key key;
struct btrfs_key root_key;
struct btrfs_root *root;
int err = 0;
int ret;
......@@ -223,10 +222,9 @@ int btrfs_find_orphan_roots(struct btrfs_fs_info *fs_info)
key.type = BTRFS_ORPHAN_ITEM_KEY;
key.offset = 0;
root_key.type = BTRFS_ROOT_ITEM_KEY;
root_key.offset = (u64)-1;
while (1) {
u64 root_objectid;
ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
if (ret < 0) {
err = ret;
......@@ -250,10 +248,10 @@ int btrfs_find_orphan_roots(struct btrfs_fs_info *fs_info)
key.type != BTRFS_ORPHAN_ITEM_KEY)
break;
root_key.objectid = key.offset;
root_objectid = key.offset;
key.offset++;
root = btrfs_get_fs_root(fs_info, &root_key, false);
root = btrfs_get_fs_root(fs_info, root_objectid, false);
err = PTR_ERR_OR_ZERO(root);
if (err && err != -ENOENT) {
break;
......@@ -270,7 +268,7 @@ int btrfs_find_orphan_roots(struct btrfs_fs_info *fs_info)
break;
}
err = btrfs_del_orphan_item(trans, tree_root,
root_key.objectid);
root_objectid);
btrfs_end_transaction(trans);
if (err) {
btrfs_handle_fs_error(fs_info, err,
......
......@@ -647,13 +647,9 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
struct btrfs_fs_info *fs_info = swarn->dev->fs_info;
struct inode_fs_paths *ipath = NULL;
struct btrfs_root *local_root;
struct btrfs_key root_key;
struct btrfs_key key;
root_key.objectid = root;
root_key.type = BTRFS_ROOT_ITEM_KEY;
root_key.offset = (u64)-1;
local_root = btrfs_get_fs_root(fs_info, &root_key, true);
local_root = btrfs_get_fs_root(fs_info, root, true);
if (IS_ERR(local_root)) {
ret = PTR_ERR(local_root);
goto err;
......@@ -3046,7 +3042,8 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
struct map_lookup *map,
struct btrfs_device *scrub_dev,
int num, u64 base, u64 length)
int num, u64 base, u64 length,
struct btrfs_block_group *cache)
{
struct btrfs_path *path, *ppath;
struct btrfs_fs_info *fs_info = sctx->fs_info;
......@@ -3284,6 +3281,20 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
break;
}
/*
* If our block group was removed in the meanwhile, just
* stop scrubbing since there is no point in continuing.
* Continuing would prevent reusing its device extents
* for new block groups for a long time.
*/
spin_lock(&cache->lock);
if (cache->removed) {
spin_unlock(&cache->lock);
ret = 0;
goto out;
}
spin_unlock(&cache->lock);
extent = btrfs_item_ptr(l, slot,
struct btrfs_extent_item);
flags = btrfs_extent_flags(l, extent);
......@@ -3328,13 +3339,14 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
&extent_dev,
&extent_mirror_num);
ret = btrfs_lookup_csums_range(csum_root,
extent_logical,
extent_logical +
extent_len - 1,
&sctx->csum_list, 1);
if (ret)
goto out;
if (flags & BTRFS_EXTENT_FLAG_DATA) {
ret = btrfs_lookup_csums_range(csum_root,
extent_logical,
extent_logical + extent_len - 1,
&sctx->csum_list, 1);
if (ret)
goto out;
}
ret = scrub_extent(sctx, map, extent_logical, extent_len,
extent_physical, extent_dev, flags,
......@@ -3457,7 +3469,7 @@ static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx,
if (map->stripes[i].dev->bdev == scrub_dev->bdev &&
map->stripes[i].physical == dev_offset) {
ret = scrub_stripe(sctx, map, scrub_dev, i,
chunk_offset, length);
chunk_offset, length, cache);
if (ret)
goto out;
}
......@@ -3554,6 +3566,23 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
if (!cache)
goto skip;
/*
* Make sure that while we are scrubbing the corresponding block
* group doesn't get its logical address and its device extents
* reused for another block group, which can possibly be of a
* different type and different profile. We do this to prevent
* false error detections and crashes due to bogus attempts to
* repair extents.
*/
spin_lock(&cache->lock);
if (cache->removed) {
spin_unlock(&cache->lock);
btrfs_put_block_group(cache);
goto skip;
}
btrfs_freeze_block_group(cache);
spin_unlock(&cache->lock);
/*
* we need call btrfs_inc_block_group_ro() with scrubs_paused,
* to avoid deadlock caused by:
......@@ -3609,6 +3638,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
} else {
btrfs_warn(fs_info,
"failed setting block group ro: %d", ret);
btrfs_unfreeze_block_group(cache);
btrfs_put_block_group(cache);
scrub_pause_off(fs_info);
break;
......@@ -3695,6 +3725,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
spin_unlock(&cache->lock);
}
btrfs_unfreeze_block_group(cache);
btrfs_put_block_group(cache);
if (ret)
break;
......
......@@ -23,6 +23,7 @@
#include "btrfs_inode.h"
#include "transaction.h"
#include "compression.h"
#include "xattr.h"
/*
* Maximum number of references an extent can have in order for us to attempt to
......@@ -4545,6 +4546,10 @@ static int __process_new_xattr(int num, struct btrfs_key *di_key,
struct fs_path *p;
struct posix_acl_xattr_header dummy_acl;
/* Capabilities are emitted by finish_inode_if_needed */
if (!strncmp(name, XATTR_NAME_CAPS, name_len))
return 0;
p = fs_path_alloc();
if (!p)
return -ENOMEM;
......@@ -4801,17 +4806,12 @@ static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len)
struct inode *inode;
struct page *page;
char *addr;
struct btrfs_key key;
pgoff_t index = offset >> PAGE_SHIFT;
pgoff_t last_index;
unsigned pg_offset = offset_in_page(offset);
ssize_t ret = 0;
key.objectid = sctx->cur_ino;
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
inode = btrfs_iget(fs_info->sb, &key, root);
inode = btrfs_iget(fs_info->sb, sctx->cur_ino, root);
if (IS_ERR(inode))
return PTR_ERR(inode);
......@@ -5107,6 +5107,64 @@ static int send_extent_data(struct send_ctx *sctx,
return 0;
}
/*
* Search for a capability xattr related to sctx->cur_ino. If the capability is
* found, call send_set_xattr function to emit it.
*
* Return 0 if there isn't a capability, or when the capability was emitted
* successfully, or < 0 if an error occurred.
*/
static int send_capabilities(struct send_ctx *sctx)
{
struct fs_path *fspath = NULL;
struct btrfs_path *path;
struct btrfs_dir_item *di;
struct extent_buffer *leaf;
unsigned long data_ptr;
char *buf = NULL;
int buf_len;
int ret = 0;
path = alloc_path_for_send();
if (!path)
return -ENOMEM;
di = btrfs_lookup_xattr(NULL, sctx->send_root, path, sctx->cur_ino,
XATTR_NAME_CAPS, strlen(XATTR_NAME_CAPS), 0);
if (!di) {
/* There is no xattr for this inode */
goto out;
} else if (IS_ERR(di)) {
ret = PTR_ERR(di);
goto out;
}
leaf = path->nodes[0];
buf_len = btrfs_dir_data_len(leaf, di);
fspath = fs_path_alloc();
buf = kmalloc(buf_len, GFP_KERNEL);
if (!fspath || !buf) {
ret = -ENOMEM;
goto out;
}
ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, fspath);
if (ret < 0)
goto out;
data_ptr = (unsigned long)(di + 1) + btrfs_dir_name_len(leaf, di);
read_extent_buffer(leaf, buf, data_ptr, buf_len);
ret = send_set_xattr(sctx, fspath, XATTR_NAME_CAPS,
strlen(XATTR_NAME_CAPS), buf, buf_len);
out:
kfree(buf);
fs_path_free(fspath);
btrfs_free_path(path);
return ret;
}
static int clone_range(struct send_ctx *sctx,
struct clone_root *clone_root,
const u64 disk_byte,
......@@ -5972,6 +6030,10 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
goto out;
}
ret = send_capabilities(sctx);
if (ret < 0)
goto out;
/*
* If other directory inodes depended on our current directory
* inode's move/rename, now do their move/rename operations.
......@@ -7021,7 +7083,6 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
struct btrfs_root *send_root = BTRFS_I(file_inode(mnt_file))->root;
struct btrfs_fs_info *fs_info = send_root->fs_info;
struct btrfs_root *clone_root;
struct btrfs_key key;
struct send_ctx *sctx = NULL;
u32 i;
u64 *clone_sources_tmp = NULL;
......@@ -7143,11 +7204,8 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
}
for (i = 0; i < arg->clone_sources_count; i++) {
key.objectid = clone_sources_tmp[i];
key.type = BTRFS_ROOT_ITEM_KEY;
key.offset = (u64)-1;
clone_root = btrfs_get_fs_root(fs_info, &key, true);
clone_root = btrfs_get_fs_root(fs_info,
clone_sources_tmp[i], true);
if (IS_ERR(clone_root)) {
ret = PTR_ERR(clone_root);
goto out;
......@@ -7178,11 +7236,8 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
}
if (arg->parent_root) {
key.objectid = arg->parent_root;
key.type = BTRFS_ROOT_ITEM_KEY;
key.offset = (u64)-1;
sctx->parent_root = btrfs_get_fs_root(fs_info, &key, true);
sctx->parent_root = btrfs_get_fs_root(fs_info, arg->parent_root,
true);
if (IS_ERR(sctx->parent_root)) {
ret = PTR_ERR(sctx->parent_root);
goto out;
......
......@@ -626,6 +626,7 @@ static int may_commit_transaction(struct btrfs_fs_info *fs_info,
struct reserve_ticket *ticket = NULL;
struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_block_rsv;
struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
struct btrfs_block_rsv *trans_rsv = &fs_info->trans_block_rsv;
struct btrfs_trans_handle *trans;
u64 bytes_needed;
u64 reclaim_bytes = 0;
......@@ -688,6 +689,11 @@ static int may_commit_transaction(struct btrfs_fs_info *fs_info,
spin_lock(&delayed_refs_rsv->lock);
reclaim_bytes += delayed_refs_rsv->reserved;
spin_unlock(&delayed_refs_rsv->lock);
spin_lock(&trans_rsv->lock);
reclaim_bytes += trans_rsv->reserved;
spin_unlock(&trans_rsv->lock);
if (reclaim_bytes >= bytes_needed)
goto commit;
bytes_needed -= reclaim_bytes;
......@@ -856,6 +862,34 @@ static inline int need_do_async_reclaim(struct btrfs_fs_info *fs_info,
!test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state));
}
static bool steal_from_global_rsv(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *space_info,
struct reserve_ticket *ticket)
{
struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
u64 min_bytes;
if (global_rsv->space_info != space_info)
return false;
spin_lock(&global_rsv->lock);
min_bytes = div_factor(global_rsv->size, 1);
if (global_rsv->reserved < min_bytes + ticket->bytes) {
spin_unlock(&global_rsv->lock);
return false;
}
global_rsv->reserved -= ticket->bytes;
ticket->bytes = 0;
list_del_init(&ticket->list);
wake_up(&ticket->wait);
space_info->tickets_id++;
if (global_rsv->reserved < global_rsv->size)
global_rsv->full = 0;
spin_unlock(&global_rsv->lock);
return true;
}
/*
* maybe_fail_all_tickets - we've exhausted our flushing, start failing tickets
* @fs_info - fs_info for this fs
......@@ -888,6 +922,10 @@ static bool maybe_fail_all_tickets(struct btrfs_fs_info *fs_info,
ticket = list_first_entry(&space_info->tickets,
struct reserve_ticket, list);
if (ticket->steal &&
steal_from_global_rsv(fs_info, space_info, ticket))
return true;
/*
* may_commit_transaction will avoid committing the transaction
* if it doesn't feel like the space reclaimed by the commit
......@@ -1104,6 +1142,7 @@ static int handle_reserve_ticket(struct btrfs_fs_info *fs_info,
switch (flush) {
case BTRFS_RESERVE_FLUSH_ALL:
case BTRFS_RESERVE_FLUSH_ALL_STEAL:
wait_reserve_ticket(fs_info, space_info, ticket);
break;
case BTRFS_RESERVE_FLUSH_LIMIT:
......@@ -1125,11 +1164,17 @@ static int handle_reserve_ticket(struct btrfs_fs_info *fs_info,
ret = ticket->error;
if (ticket->bytes || ticket->error) {
/*
* Need to delete here for priority tickets. For regular tickets
* either the async reclaim job deletes the ticket from the list
* or we delete it ourselves at wait_reserve_ticket().
* We were a priority ticket, so we need to delete ourselves
* from the list. Because we could have other priority tickets
* behind us that require less space, run
* btrfs_try_granting_tickets() to see if their reservations can
* now be made.
*/
remove_ticket(space_info, ticket);
if (!list_empty(&ticket->list)) {
remove_ticket(space_info, ticket);
btrfs_try_granting_tickets(fs_info, space_info);
}
if (!ret)
ret = -ENOSPC;
}
......@@ -1145,6 +1190,16 @@ static int handle_reserve_ticket(struct btrfs_fs_info *fs_info,
return ret;
}
/*
* This returns true if this flush state will go through the ordinary flushing
* code.
*/
static inline bool is_normal_flushing(enum btrfs_reserve_flush_enum flush)
{
return (flush == BTRFS_RESERVE_FLUSH_ALL) ||
(flush == BTRFS_RESERVE_FLUSH_ALL_STEAL);
}
/**
* reserve_metadata_bytes - try to reserve bytes from the block_rsv's space
* @root - the root we're allocating for
......@@ -1175,8 +1230,17 @@ static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
spin_lock(&space_info->lock);
ret = -ENOSPC;
used = btrfs_space_info_used(space_info, true);
pending_tickets = !list_empty(&space_info->tickets) ||
!list_empty(&space_info->priority_tickets);
/*
* We don't want NO_FLUSH allocations to jump everybody, they can
* generally handle ENOSPC in a different way, so treat them the same as
* normal flushers when it comes to skipping pending tickets.
*/
if (is_normal_flushing(flush) || (flush == BTRFS_RESERVE_NO_FLUSH))
pending_tickets = !list_empty(&space_info->tickets) ||
!list_empty(&space_info->priority_tickets);
else
pending_tickets = !list_empty(&space_info->priority_tickets);
/*
* Carry on if we have enough space (short-circuit) OR call
......@@ -1198,12 +1262,13 @@ static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
* the list and we will do our own flushing further down.
*/
if (ret && flush != BTRFS_RESERVE_NO_FLUSH) {
ASSERT(space_info->reclaim_size >= 0);
ticket.bytes = orig_bytes;
ticket.error = 0;
space_info->reclaim_size += ticket.bytes;
init_waitqueue_head(&ticket.wait);
if (flush == BTRFS_RESERVE_FLUSH_ALL) {
ticket.steal = (flush == BTRFS_RESERVE_FLUSH_ALL_STEAL);
if (flush == BTRFS_RESERVE_FLUSH_ALL ||
flush == BTRFS_RESERVE_FLUSH_ALL_STEAL) {
list_add_tail(&ticket.list, &space_info->tickets);
if (!space_info->flush) {
space_info->flush = 1;
......
......@@ -78,6 +78,7 @@ struct btrfs_space_info {
struct reserve_ticket {
u64 bytes;
int error;
bool steal;
struct list_head list;
wait_queue_head_t wait;
};
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -193,8 +193,7 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
unsigned int num_items);
struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv(
struct btrfs_root *root,
unsigned int num_items,
int min_factor);
unsigned int num_items);
struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root);
struct btrfs_trans_handle *btrfs_join_transaction_spacecache(struct btrfs_root *root);
struct btrfs_trans_handle *btrfs_join_transaction_nostart(struct btrfs_root *root);
......
......@@ -957,10 +957,6 @@ static int check_dev_item(struct extent_buffer *leaf,
return 0;
}
/* Inode item error output has the same format as dir_item_err() */
#define inode_item_err(eb, slot, fmt, ...) \
dir_item_err(eb, slot, fmt, __VA_ARGS__)
static int check_inode_item(struct extent_buffer *leaf,
struct btrfs_key *key, int slot)
{
......
......@@ -35,7 +35,7 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
goto out;
}
if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
goto out;
path = btrfs_alloc_path();
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment