Commit 51eab603 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs

Pull btrfs updates from Chris Mason:
 "This includes a fairly large change from Josef around data writeback
  completion.  Before, the writeback wasn't completed until the metadata
  insertions for the extent were done, and this made for fairly large
  latency spikes on the last page of each ordered extent.

  We already had a separate mechanism for tracking pending metadata
  insertions, so Josef just needed to tweak things a little to end
  writeback earlier on the page.  Overall it makes us much friendly to
  memory reclaim and lowers latencies quite a lot for synchronous IO.

  Jan Schmidt has finished some background work required to track btree
  blocks as they go through changes in ownership.  It's the missing
  piece he needed for both btrfs send/receive and subvolume quotas.
  Neither of those are ready yet, but the new tracking code is included
  here.  Most of the time, the new code is off.  It is only used by
  scrub and other backref walkers.

  Stefan Behrens has added io failure tracking.  This includes counters
  for which drives are causing the most trouble so the admin (or an
  automated tool) can choose to kick them out.  We're tracking IO
  errors, crc errors, and generation checks we do on each metadata
  block.

  RAID5/6 did miss the cut this time because I'm having trouble with
  corruptions.  I'll nail it down next week and post as a beta testing
  before 3.6"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (58 commits)
  Btrfs: fix tree mod log rewinded level and rewinding of moved keys
  Btrfs: fix tree mod log del_ptr
  Btrfs: add tree_mod_dont_log helper
  Btrfs: add missing spin_lock for insertion into tree mod log
  Btrfs: add inodes before dropping the extent lock in find_all_leafs
  Btrfs: use delayed ref sequence numbers for all fs-tree updates
  Btrfs: fix false positive in check-integrity on unmount
  Btrfs: fix runtime warning in check-integrity check data mode
  Btrfs: set ioprio of scrub readahead to idle
  Btrfs: fix return code in drop_objectid_items
  Btrfs: check to see if the inode is in the log before fsyncing
  Btrfs: return value of btrfs_read_buffer is checked correctly
  Btrfs: read device stats on mount, write modified ones during commit
  Btrfs: add ioctl to get and reset the device stats
  Btrfs: add device counters for detected IO and checksum errors
  btrfs: Drop unused function btrfs_abort_devices()
  Btrfs: fix the same inode id problem when doing auto defragment
  Btrfs: fall back to non-inline if we don't have enough space
  Btrfs: fix how we deal with the orphan block rsv
  Btrfs: convert the inode bit field to use the actual bit operations
  ...
parents 419f4319 1e20932a
......@@ -227,7 +227,11 @@ int btrfs_init_acl(struct btrfs_trans_handle *trans,
if (ret > 0) {
/* we need an acl */
ret = btrfs_set_acl(trans, inode, acl, ACL_TYPE_ACCESS);
} else {
cache_no_acl(inode);
}
} else {
cache_no_acl(inode);
}
failed:
posix_acl_release(acl);
......
This diff is collapsed.
......@@ -58,7 +58,8 @@ int paths_from_inode(u64 inum, struct inode_fs_paths *ipath);
int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
u64 num_bytes, u64 seq, struct ulist **roots);
u64 delayed_ref_seq, u64 time_seq,
struct ulist **roots);
struct btrfs_data_container *init_data_container(u32 total_bytes);
struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root,
......
......@@ -24,6 +24,20 @@
#include "ordered-data.h"
#include "delayed-inode.h"
/*
* ordered_data_close is set by truncate when a file that used
* to have good data has been truncated to zero. When it is set
* the btrfs file release call will add this inode to the
* ordered operations list so that we make sure to flush out any
* new data the application may have written before commit.
*/
#define BTRFS_INODE_ORDERED_DATA_CLOSE 0
#define BTRFS_INODE_ORPHAN_META_RESERVED 1
#define BTRFS_INODE_DUMMY 2
#define BTRFS_INODE_IN_DEFRAG 3
#define BTRFS_INODE_DELALLOC_META_RESERVED 4
#define BTRFS_INODE_HAS_ORPHAN_ITEM 5
/* in memory btrfs inode */
struct btrfs_inode {
/* which subvolume this inode belongs to */
......@@ -57,9 +71,6 @@ struct btrfs_inode {
/* used to order data wrt metadata */
struct btrfs_ordered_inode_tree ordered_tree;
/* for keeping track of orphaned inodes */
struct list_head i_orphan;
/* list of all the delalloc inodes in the FS. There are times we need
* to write all the delalloc pages to disk, and this list is used
* to walk them all.
......@@ -78,14 +89,13 @@ struct btrfs_inode {
/* the space_info for where this inode's data allocations are done */
struct btrfs_space_info *space_info;
unsigned long runtime_flags;
/* full 64 bit generation number, struct vfs_inode doesn't have a big
* enough field for this.
*/
u64 generation;
/* sequence number for NFS changes */
u64 sequence;
/*
* transid of the trans_handle that last modified this inode
*/
......@@ -144,23 +154,10 @@ struct btrfs_inode {
unsigned outstanding_extents;
unsigned reserved_extents;
/*
* ordered_data_close is set by truncate when a file that used
* to have good data has been truncated to zero. When it is set
* the btrfs file release call will add this inode to the
* ordered operations list so that we make sure to flush out any
* new data the application may have written before commit.
*/
unsigned ordered_data_close:1;
unsigned orphan_meta_reserved:1;
unsigned dummy_inode:1;
unsigned in_defrag:1;
unsigned delalloc_meta_reserved:1;
/*
* always compress this one file
*/
unsigned force_compress:4;
unsigned force_compress;
struct btrfs_delayed_node *delayed_node;
......@@ -202,4 +199,17 @@ static inline bool btrfs_is_free_space_inode(struct btrfs_root *root,
return false;
}
static inline int btrfs_inode_in_log(struct inode *inode, u64 generation)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
int ret = 0;
mutex_lock(&root->log_mutex);
if (BTRFS_I(inode)->logged_trans == generation &&
BTRFS_I(inode)->last_sub_trans <= root->last_log_commit)
ret = 1;
mutex_unlock(&root->log_mutex);
return ret;
}
#endif
This diff is collapsed.
This diff is collapsed.
......@@ -173,6 +173,9 @@ static int btrfs_csum_sizes[] = { 4, 0 };
#define BTRFS_FT_XATTR 8
#define BTRFS_FT_MAX 9
/* ioprio of readahead is set to idle */
#define BTRFS_IOPRIO_READA (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0))
/*
* The key defines the order in the tree, and so it also defines (optimal)
* block layout.
......@@ -823,6 +826,14 @@ struct btrfs_csum_item {
u8 csum;
} __attribute__ ((__packed__));
struct btrfs_dev_stats_item {
/*
* grow this item struct at the end for future enhancements and keep
* the existing values unchanged
*/
__le64 values[BTRFS_DEV_STAT_VALUES_MAX];
} __attribute__ ((__packed__));
/* different types of block groups (and chunks) */
#define BTRFS_BLOCK_GROUP_DATA (1ULL << 0)
#define BTRFS_BLOCK_GROUP_SYSTEM (1ULL << 1)
......@@ -1129,6 +1140,15 @@ struct btrfs_fs_info {
spinlock_t delayed_iput_lock;
struct list_head delayed_iputs;
/* this protects tree_mod_seq_list */
spinlock_t tree_mod_seq_lock;
atomic_t tree_mod_seq;
struct list_head tree_mod_seq_list;
/* this protects tree_mod_log */
rwlock_t tree_mod_log_lock;
struct rb_root tree_mod_log;
atomic_t nr_async_submits;
atomic_t async_submit_draining;
atomic_t nr_async_bios;
......@@ -1375,7 +1395,7 @@ struct btrfs_root {
struct list_head root_list;
spinlock_t orphan_lock;
struct list_head orphan_list;
atomic_t orphan_inodes;
struct btrfs_block_rsv *orphan_block_rsv;
int orphan_item_inserted;
int orphan_cleanup_state;
......@@ -1507,6 +1527,12 @@ struct btrfs_ioctl_defrag_range_args {
#define BTRFS_BALANCE_ITEM_KEY 248
/*
* Persistantly stores the io stats in the device tree.
* One key for all stats, (0, BTRFS_DEV_STATS_KEY, devid).
*/
#define BTRFS_DEV_STATS_KEY 249
/*
* string items are for debugging. They just store a short string of
* data in the FS
......@@ -2415,6 +2441,30 @@ static inline u32 btrfs_file_extent_inline_item_len(struct extent_buffer *eb,
return btrfs_item_size(eb, e) - offset;
}
/* btrfs_dev_stats_item */
static inline u64 btrfs_dev_stats_value(struct extent_buffer *eb,
struct btrfs_dev_stats_item *ptr,
int index)
{
u64 val;
read_extent_buffer(eb, &val,
offsetof(struct btrfs_dev_stats_item, values) +
((unsigned long)ptr) + (index * sizeof(u64)),
sizeof(val));
return val;
}
static inline void btrfs_set_dev_stats_value(struct extent_buffer *eb,
struct btrfs_dev_stats_item *ptr,
int index, u64 val)
{
write_extent_buffer(eb, &val,
offsetof(struct btrfs_dev_stats_item, values) +
((unsigned long)ptr) + (index * sizeof(u64)),
sizeof(val));
}
static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb)
{
return sb->s_fs_info;
......@@ -2496,11 +2546,11 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u32 blocksize,
u64 parent, u64 root_objectid,
struct btrfs_disk_key *key, int level,
u64 hint, u64 empty_size, int for_cow);
u64 hint, u64 empty_size);
void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *buf,
u64 parent, int last_ref, int for_cow);
u64 parent, int last_ref);
struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u32 blocksize,
......@@ -2659,6 +2709,8 @@ int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
*root, struct btrfs_key *key, struct btrfs_path *p, int
ins_len, int cow);
int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key,
struct btrfs_path *p, u64 time_seq);
int btrfs_realloc_node(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct extent_buffer *parent,
int start_slot, int cache_only, u64 *last_ret,
......@@ -3098,4 +3150,23 @@ void btrfs_reada_detach(void *handle);
int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
u64 start, int err);
/* delayed seq elem */
struct seq_list {
struct list_head list;
u64 seq;
u32 flags;
};
void btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
struct seq_list *elem);
void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
struct seq_list *elem);
static inline int is_fstree(u64 rootid)
{
if (rootid == BTRFS_FS_TREE_OBJECTID ||
(s64)rootid >= (s64)BTRFS_FIRST_FREE_OBJECTID)
return 1;
return 0;
}
#endif
......@@ -669,8 +669,8 @@ static int btrfs_delayed_inode_reserve_metadata(
return ret;
} else if (src_rsv == &root->fs_info->delalloc_block_rsv) {
spin_lock(&BTRFS_I(inode)->lock);
if (BTRFS_I(inode)->delalloc_meta_reserved) {
BTRFS_I(inode)->delalloc_meta_reserved = 0;
if (test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
&BTRFS_I(inode)->runtime_flags)) {
spin_unlock(&BTRFS_I(inode)->lock);
release = true;
goto migrate;
......@@ -1706,7 +1706,7 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
btrfs_set_stack_inode_nbytes(inode_item, inode_get_bytes(inode));
btrfs_set_stack_inode_generation(inode_item,
BTRFS_I(inode)->generation);
btrfs_set_stack_inode_sequence(inode_item, BTRFS_I(inode)->sequence);
btrfs_set_stack_inode_sequence(inode_item, inode->i_version);
btrfs_set_stack_inode_transid(inode_item, trans->transid);
btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev);
btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags);
......@@ -1754,7 +1754,7 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev)
set_nlink(inode, btrfs_stack_inode_nlink(inode_item));
inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item));
BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item);
BTRFS_I(inode)->sequence = btrfs_stack_inode_sequence(inode_item);
inode->i_version = btrfs_stack_inode_sequence(inode_item);
inode->i_rdev = 0;
*rdev = btrfs_stack_inode_rdev(inode_item);
BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item);
......
......@@ -525,7 +525,7 @@ static noinline void add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
ref->is_head = 0;
ref->in_tree = 1;
if (need_ref_seq(for_cow, ref_root))
if (is_fstree(ref_root))
seq = inc_delayed_seq(delayed_refs);
ref->seq = seq;
......@@ -584,7 +584,7 @@ static noinline void add_delayed_data_ref(struct btrfs_fs_info *fs_info,
ref->is_head = 0;
ref->in_tree = 1;
if (need_ref_seq(for_cow, ref_root))
if (is_fstree(ref_root))
seq = inc_delayed_seq(delayed_refs);
ref->seq = seq;
......@@ -658,10 +658,11 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr,
num_bytes, parent, ref_root, level, action,
for_cow);
if (!need_ref_seq(for_cow, ref_root) &&
if (!is_fstree(ref_root) &&
waitqueue_active(&delayed_refs->seq_wait))
wake_up(&delayed_refs->seq_wait);
spin_unlock(&delayed_refs->lock);
return 0;
}
......@@ -706,10 +707,11 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
add_delayed_data_ref(fs_info, trans, &ref->node, bytenr,
num_bytes, parent, ref_root, owner, offset,
action, for_cow);
if (!need_ref_seq(for_cow, ref_root) &&
if (!is_fstree(ref_root) &&
waitqueue_active(&delayed_refs->seq_wait))
wake_up(&delayed_refs->seq_wait);
spin_unlock(&delayed_refs->lock);
return 0;
}
......
......@@ -195,11 +195,6 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
struct list_head *cluster, u64 search_start);
struct seq_list {
struct list_head list;
u64 seq;
};
static inline u64 inc_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs)
{
assert_spin_locked(&delayed_refs->lock);
......@@ -229,25 +224,6 @@ btrfs_put_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs,
int btrfs_check_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs,
u64 seq);
/*
* delayed refs with a ref_seq > 0 must be held back during backref walking.
* this only applies to items in one of the fs-trees. for_cow items never need
* to be held back, so they won't get a ref_seq number.
*/
static inline int need_ref_seq(int for_cow, u64 rootid)
{
if (for_cow)
return 0;
if (rootid == BTRFS_FS_TREE_OBJECTID)
return 1;
if ((s64)rootid >= (s64)BTRFS_FIRST_FREE_OBJECTID)
return 1;
return 0;
}
/*
* a node might live in a head or a regular ref, this lets you
* test for the proper type to use.
......
......@@ -1153,7 +1153,6 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
root->orphan_block_rsv = NULL;
INIT_LIST_HEAD(&root->dirty_list);
INIT_LIST_HEAD(&root->orphan_list);
INIT_LIST_HEAD(&root->root_list);
spin_lock_init(&root->orphan_lock);
spin_lock_init(&root->inode_lock);
......@@ -1166,6 +1165,7 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
atomic_set(&root->log_commit[0], 0);
atomic_set(&root->log_commit[1], 0);
atomic_set(&root->log_writers, 0);
atomic_set(&root->orphan_inodes, 0);
root->log_batch = 0;
root->log_transid = 0;
root->last_log_commit = 0;
......@@ -1252,7 +1252,7 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
BTRFS_TREE_LOG_OBJECTID, NULL,
0, 0, 0, 0);
0, 0, 0);
if (IS_ERR(leaf)) {
kfree(root);
return ERR_CAST(leaf);
......@@ -1914,11 +1914,14 @@ int open_ctree(struct super_block *sb,
spin_lock_init(&fs_info->delayed_iput_lock);
spin_lock_init(&fs_info->defrag_inodes_lock);
spin_lock_init(&fs_info->free_chunk_lock);
spin_lock_init(&fs_info->tree_mod_seq_lock);
rwlock_init(&fs_info->tree_mod_log_lock);
mutex_init(&fs_info->reloc_mutex);
init_completion(&fs_info->kobj_unregister);
INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
INIT_LIST_HEAD(&fs_info->space_info);
INIT_LIST_HEAD(&fs_info->tree_mod_seq_list);
btrfs_mapping_init(&fs_info->mapping_tree);
btrfs_init_block_rsv(&fs_info->global_block_rsv);
btrfs_init_block_rsv(&fs_info->delalloc_block_rsv);
......@@ -1931,12 +1934,14 @@ int open_ctree(struct super_block *sb,
atomic_set(&fs_info->async_submit_draining, 0);
atomic_set(&fs_info->nr_async_bios, 0);
atomic_set(&fs_info->defrag_running, 0);
atomic_set(&fs_info->tree_mod_seq, 0);
fs_info->sb = sb;
fs_info->max_inline = 8192 * 1024;
fs_info->metadata_ratio = 0;
fs_info->defrag_inodes = RB_ROOT;
fs_info->trans_no_join = 0;
fs_info->free_chunk_space = 0;
fs_info->tree_mod_log = RB_ROOT;
/* readahead state */
INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT);
......@@ -2001,7 +2006,8 @@ int open_ctree(struct super_block *sb,
BTRFS_I(fs_info->btree_inode)->root = tree_root;
memset(&BTRFS_I(fs_info->btree_inode)->location, 0,
sizeof(struct btrfs_key));
BTRFS_I(fs_info->btree_inode)->dummy_inode = 1;
set_bit(BTRFS_INODE_DUMMY,
&BTRFS_I(fs_info->btree_inode)->runtime_flags);
insert_inode_hash(fs_info->btree_inode);
spin_lock_init(&fs_info->block_group_cache_lock);
......@@ -2353,6 +2359,13 @@ int open_ctree(struct super_block *sb,
fs_info->generation = generation;
fs_info->last_trans_committed = generation;
ret = btrfs_init_dev_stats(fs_info);
if (ret) {
printk(KERN_ERR "btrfs: failed to init dev_stats: %d\n",
ret);
goto fail_block_groups;
}
ret = btrfs_init_space_info(fs_info);
if (ret) {
printk(KERN_ERR "Failed to initial space info: %d\n", ret);
......@@ -2556,18 +2569,19 @@ int open_ctree(struct super_block *sb,
static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
{
char b[BDEVNAME_SIZE];
if (uptodate) {
set_buffer_uptodate(bh);
} else {
struct btrfs_device *device = (struct btrfs_device *)
bh->b_private;
printk_ratelimited(KERN_WARNING "lost page write due to "
"I/O error on %s\n",
bdevname(bh->b_bdev, b));
"I/O error on %s\n", device->name);
/* note, we dont' set_buffer_write_io_error because we have
* our own ways of dealing with the IO errors
*/
clear_buffer_uptodate(bh);
btrfs_dev_stat_inc_and_print(device, BTRFS_DEV_STAT_WRITE_ERRS);
}
unlock_buffer(bh);
put_bh(bh);
......@@ -2682,6 +2696,7 @@ static int write_dev_supers(struct btrfs_device *device,
set_buffer_uptodate(bh);
lock_buffer(bh);
bh->b_end_io = btrfs_end_buffer_write_sync;
bh->b_private = device;
}
/*
......@@ -2740,6 +2755,9 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
}
if (!bio_flagged(bio, BIO_UPTODATE)) {
ret = -EIO;
if (!bio_flagged(bio, BIO_EOPNOTSUPP))
btrfs_dev_stat_inc_and_print(device,
BTRFS_DEV_STAT_FLUSH_ERRS);
}
/* drop the reference from the wait == 0 run */
......@@ -2902,19 +2920,6 @@ int write_ctree_super(struct btrfs_trans_handle *trans,
return ret;
}
/* Kill all outstanding I/O */
void btrfs_abort_devices(struct btrfs_root *root)
{
struct list_head *head;
struct btrfs_device *dev;
mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
head = &root->fs_info->fs_devices->devices;
list_for_each_entry_rcu(dev, head, dev_list) {
blk_abort_queue(dev->bdev->bd_disk->queue);
}
mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
}
void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
{
spin_lock(&fs_info->fs_roots_radix_lock);
......@@ -3671,17 +3676,6 @@ int btrfs_cleanup_transaction(struct btrfs_root *root)
return 0;
}
static int btree_writepage_io_failed_hook(struct bio *bio, struct page *page,
u64 start, u64 end,
struct extent_state *state)
{
struct super_block *sb = page->mapping->host->i_sb;
struct btrfs_fs_info *fs_info = btrfs_sb(sb);
btrfs_error(fs_info, -EIO,
"Error occured while writing out btree at %llu", start);
return -EIO;
}
static struct extent_io_ops btree_extent_io_ops = {
.write_cache_pages_lock_hook = btree_lock_page_hook,
.readpage_end_io_hook = btree_readpage_end_io_hook,
......@@ -3689,5 +3683,4 @@ static struct extent_io_ops btree_extent_io_ops = {
.submit_bio_hook = btree_submit_bio_hook,
/* note we're sharing with inode.c for the merge bio hook */
.merge_bio_hook = btrfs_merge_bio_hook,
.writepage_io_failed_hook = btree_writepage_io_failed_hook,
};
......@@ -89,7 +89,6 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
int btrfs_cleanup_transaction(struct btrfs_root *root);
void btrfs_cleanup_one_transaction(struct btrfs_transaction *trans,
struct btrfs_root *root);
void btrfs_abort_devices(struct btrfs_root *root);
#ifdef CONFIG_DEBUG_LOCK_ALLOC
void btrfs_init_lockdep(void);
......
......@@ -3578,7 +3578,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
space_info->chunk_alloc = 0;
spin_unlock(&space_info->lock);
out:
mutex_unlock(&extent_root->fs_info->chunk_mutex);
mutex_unlock(&fs_info->chunk_mutex);
return ret;
}
......@@ -4355,10 +4355,9 @@ static unsigned drop_outstanding_extent(struct inode *inode)
BTRFS_I(inode)->outstanding_extents--;
if (BTRFS_I(inode)->outstanding_extents == 0 &&
BTRFS_I(inode)->delalloc_meta_reserved) {
test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
&BTRFS_I(inode)->runtime_flags))
drop_inode_space = 1;
BTRFS_I(inode)->delalloc_meta_reserved = 0;
}
/*
* If we have more or the same amount of outsanding extents than we have
......@@ -4465,7 +4464,8 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
* Add an item to reserve for updating the inode when we complete the
* delalloc io.
*/
if (!BTRFS_I(inode)->delalloc_meta_reserved) {
if (!test_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
&BTRFS_I(inode)->runtime_flags)) {
nr_extents++;
extra_reserve = 1;
}
......@@ -4511,7 +4511,8 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
spin_lock(&BTRFS_I(inode)->lock);
if (extra_reserve) {
BTRFS_I(inode)->delalloc_meta_reserved = 1;
set_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
&BTRFS_I(inode)->runtime_flags);
nr_extents--;
}
BTRFS_I(inode)->reserved_extents += nr_extents;
......@@ -5217,7 +5218,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *buf,
u64 parent, int last_ref, int for_cow)
u64 parent, int last_ref)
{
struct btrfs_block_group_cache *cache = NULL;
int ret;
......@@ -5227,7 +5228,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
buf->start, buf->len,
parent, root->root_key.objectid,
btrfs_header_level(buf),
BTRFS_DROP_DELAYED_REF, NULL, for_cow);
BTRFS_DROP_DELAYED_REF, NULL, 0);
BUG_ON(ret); /* -ENOMEM */
}
......@@ -6249,7 +6250,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u32 blocksize,
u64 parent, u64 root_objectid,
struct btrfs_disk_key *key, int level,
u64 hint, u64 empty_size, int for_cow)
u64 hint, u64 empty_size)
{
struct btrfs_key ins;
struct btrfs_block_rsv *block_rsv;
......@@ -6297,7 +6298,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
ins.objectid,
ins.offset, parent, root_objectid,
level, BTRFS_ADD_DELAYED_EXTENT,
extent_op, for_cow);
extent_op, 0);
BUG_ON(ret); /* -ENOMEM */
}
return buf;
......@@ -6715,7 +6716,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
btrfs_header_owner(path->nodes[level + 1]));
}
btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1, 0);
btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1);
out:
wc->refs[level] = 0;
wc->flags[level] = 0;
......
This diff is collapsed.
......@@ -39,6 +39,7 @@
#define EXTENT_BUFFER_STALE 6
#define EXTENT_BUFFER_WRITEBACK 7
#define EXTENT_BUFFER_IOERR 8
#define EXTENT_BUFFER_DUMMY 9
/* these are flags for extent_clear_unlock_delalloc */
#define EXTENT_CLEAR_UNLOCK_PAGE 0x1
......@@ -75,9 +76,6 @@ struct extent_io_ops {
unsigned long bio_flags);
int (*readpage_io_hook)(struct page *page, u64 start, u64 end);
int (*readpage_io_failed_hook)(struct page *page, int failed_mirror);
int (*writepage_io_failed_hook)(struct bio *bio, struct page *page,
u64 start, u64 end,
struct extent_state *state);
int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end,
struct extent_state *state, int mirror);
int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,
......@@ -225,6 +223,8 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state **cached_state, gfp_t mask);
int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state **cached_state, gfp_t mask);
int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state **cached_state, gfp_t mask);
int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask);
int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
......@@ -265,6 +265,8 @@ void set_page_extent_mapped(struct page *page);
struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
u64 start, unsigned long len);
struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len);
struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src);
struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
u64 start, unsigned long len);
void free_extent_buffer(struct extent_buffer *eb);
......
......@@ -65,6 +65,21 @@ struct inode_defrag {
int cycled;
};
static int __compare_inode_defrag(struct inode_defrag *defrag1,
struct inode_defrag *defrag2)
{
if (defrag1->root > defrag2->root)
return 1;
else if (defrag1->root < defrag2->root)
return -1;
else if (defrag1->ino > defrag2->ino)
return 1;
else if (defrag1->ino < defrag2->ino)
return -1;
else
return 0;
}
/* pop a record for an inode into the defrag tree. The lock
* must be held already
*
......@@ -81,15 +96,17 @@ static void __btrfs_add_inode_defrag(struct inode *inode,
struct inode_defrag *entry;
struct rb_node **p;
struct rb_node *parent = NULL;
int ret;
p = &root->fs_info->defrag_inodes.rb_node;
while (*p) {
parent = *p;
entry = rb_entry(parent, struct inode_defrag, rb_node);
if (defrag->ino < entry->ino)
ret = __compare_inode_defrag(defrag, entry);
if (ret < 0)
p = &parent->rb_left;
else if (defrag->ino > entry->ino)
else if (ret > 0)
p = &parent->rb_right;
else {
/* if we're reinserting an entry for
......@@ -103,7 +120,7 @@ static void __btrfs_add_inode_defrag(struct inode *inode,
goto exists;
}
}
BTRFS_I(inode)->in_defrag = 1;
set_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags);
rb_link_node(&defrag->rb_node, parent, p);
rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes);
return;
......@@ -131,7 +148,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
if (btrfs_fs_closing(root->fs_info))
return 0;
if (BTRFS_I(inode)->in_defrag)
if (test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags))
return 0;
if (trans)
......@@ -148,7 +165,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
defrag->root = root->root_key.objectid;
spin_lock(&root->fs_info->defrag_inodes_lock);
if (!BTRFS_I(inode)->in_defrag)
if (!test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags))
__btrfs_add_inode_defrag(inode, defrag);
else
kfree(defrag);
......@@ -159,28 +176,35 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
/*
* must be called with the defrag_inodes lock held
*/
struct inode_defrag *btrfs_find_defrag_inode(struct btrfs_fs_info *info, u64 ino,
struct inode_defrag *btrfs_find_defrag_inode(struct btrfs_fs_info *info,
u64 root, u64 ino,
struct rb_node **next)
{
struct inode_defrag *entry = NULL;
struct inode_defrag tmp;
struct rb_node *p;
struct rb_node *parent = NULL;
int ret;
tmp.ino = ino;
tmp.root = root;
p = info->defrag_inodes.rb_node;
while (p) {
parent = p;
entry = rb_entry(parent, struct inode_defrag, rb_node);
if (ino < entry->ino)
ret = __compare_inode_defrag(&tmp, entry);
if (ret < 0)
p = parent->rb_left;
else if (ino > entry->ino)
else if (ret > 0)
p = parent->rb_right;
else
return entry;
}
if (next) {
while (parent && ino > entry->ino) {
while (parent && __compare_inode_defrag(&tmp, entry) > 0) {
parent = rb_next(parent);
entry = rb_entry(parent, struct inode_defrag, rb_node);
}
......@@ -202,6 +226,7 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
struct btrfs_key key;
struct btrfs_ioctl_defrag_range_args range;
u64 first_ino = 0;
u64 root_objectid = 0;
int num_defrag;
int defrag_batch = 1024;
......@@ -214,11 +239,14 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
n = NULL;
/* find an inode to defrag */
defrag = btrfs_find_defrag_inode(fs_info, first_ino, &n);
defrag = btrfs_find_defrag_inode(fs_info, root_objectid,
first_ino, &n);
if (!defrag) {
if (n)
defrag = rb_entry(n, struct inode_defrag, rb_node);
else if (first_ino) {
if (n) {
defrag = rb_entry(n, struct inode_defrag,
rb_node);
} else if (root_objectid || first_ino) {
root_objectid = 0;
first_ino = 0;
continue;
} else {
......@@ -228,6 +256,7 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
/* remove it from the rbtree */
first_ino = defrag->ino + 1;
root_objectid = defrag->root;
rb_erase(&defrag->rb_node, &fs_info->defrag_inodes);
if (btrfs_fs_closing(fs_info))
......@@ -252,7 +281,7 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
goto next;
/* do a chunk of defrag */
BTRFS_I(inode)->in_defrag = 0;
clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags);
range.start = defrag->last_offset;
num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid,
defrag_batch);
......@@ -1409,7 +1438,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
mutex_unlock(&inode->i_mutex);
goto out;
}
BTRFS_I(inode)->sequence++;
start_pos = round_down(pos, root->sectorsize);
if (start_pos > i_size_read(inode)) {
......@@ -1466,8 +1494,8 @@ int btrfs_release_file(struct inode *inode, struct file *filp)
* flush down new bytes that may have been written if the
* application were using truncate to replace a file in place.
*/
if (BTRFS_I(inode)->ordered_data_close) {
BTRFS_I(inode)->ordered_data_close = 0;
if (test_and_clear_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
&BTRFS_I(inode)->runtime_flags)) {
btrfs_add_ordered_operation(NULL, BTRFS_I(inode)->root, inode);
if (inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT)
filemap_flush(inode->i_mapping);
......@@ -1498,14 +1526,15 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
trace_btrfs_sync_file(file, datasync);
ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
if (ret)
return ret;
mutex_lock(&inode->i_mutex);
/* we wait first, since the writeback may change the inode */
/*
* we wait first, since the writeback may change the inode, also wait
* ordered range does a filemape_write_and_wait_range which is why we
* don't do it above like other file systems.
*/
root->log_batch++;
btrfs_wait_ordered_range(inode, 0, (u64)-1);
btrfs_wait_ordered_range(inode, start, end);
root->log_batch++;
/*
......@@ -1523,7 +1552,8 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
* syncing
*/
smp_mb();
if (BTRFS_I(inode)->last_trans <=
if (btrfs_inode_in_log(inode, root->fs_info->generation) ||
BTRFS_I(inode)->last_trans <=
root->fs_info->last_trans_committed) {
BTRFS_I(inode)->last_trans = 0;
mutex_unlock(&inode->i_mutex);
......
......@@ -33,6 +33,8 @@
static int link_free_space(struct btrfs_free_space_ctl *ctl,
struct btrfs_free_space *info);
static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
struct btrfs_free_space *info);
static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
struct btrfs_path *path,
......@@ -584,6 +586,44 @@ static int io_ctl_read_bitmap(struct io_ctl *io_ctl,
return 0;
}
/*
* Since we attach pinned extents after the fact we can have contiguous sections
* of free space that are split up in entries. This poses a problem with the
* tree logging stuff since it could have allocated across what appears to be 2
* entries since we would have merged the entries when adding the pinned extents
* back to the free space cache. So run through the space cache that we just
* loaded and merge contiguous entries. This will make the log replay stuff not
* blow up and it will make for nicer allocator behavior.
*/
static void merge_space_tree(struct btrfs_free_space_ctl *ctl)
{
struct btrfs_free_space *e, *prev = NULL;
struct rb_node *n;
again:
spin_lock(&ctl->tree_lock);
for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
e = rb_entry(n, struct btrfs_free_space, offset_index);
if (!prev)
goto next;
if (e->bitmap || prev->bitmap)
goto next;
if (prev->offset + prev->bytes == e->offset) {
unlink_free_space(ctl, prev);
unlink_free_space(ctl, e);
prev->bytes += e->bytes;
kmem_cache_free(btrfs_free_space_cachep, e);
link_free_space(ctl, prev);
prev = NULL;
spin_unlock(&ctl->tree_lock);
goto again;
}
next:
prev = e;
}
spin_unlock(&ctl->tree_lock);
}
int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
struct btrfs_free_space_ctl *ctl,
struct btrfs_path *path, u64 offset)
......@@ -726,6 +766,7 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
}
io_ctl_drop_pages(&io_ctl);
merge_space_tree(ctl);
ret = 1;
out:
io_ctl_free(&io_ctl);
......@@ -972,9 +1013,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
goto out;
ret = filemap_write_and_wait(inode->i_mapping);
if (ret)
goto out;
btrfs_wait_ordered_range(inode, 0, (u64)-1);
key.objectid = BTRFS_FREE_SPACE_OBJECTID;
key.offset = offset;
......
This diff is collapsed.
......@@ -261,6 +261,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
}
btrfs_update_iflags(inode);
inode_inc_iversion(inode);
inode->i_ctime = CURRENT_TIME;
ret = btrfs_update_inode(trans, root, inode);
......@@ -367,7 +368,7 @@ static noinline int create_subvol(struct btrfs_root *root,
return PTR_ERR(trans);
leaf = btrfs_alloc_free_block(trans, root, root->leafsize,
0, objectid, NULL, 0, 0, 0, 0);
0, objectid, NULL, 0, 0, 0);
if (IS_ERR(leaf)) {
ret = PTR_ERR(leaf);
goto fail;
......@@ -2262,10 +2263,12 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg)
di_args->bytes_used = dev->bytes_used;
di_args->total_bytes = dev->total_bytes;
memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid));
if (dev->name)
if (dev->name) {
strncpy(di_args->path, dev->name, sizeof(di_args->path));
else
di_args->path[sizeof(di_args->path) - 1] = 0;
} else {
di_args->path[0] = '\0';
}
out:
if (ret == 0 && copy_to_user(arg, di_args, sizeof(*di_args)))
......@@ -2622,6 +2625,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
btrfs_mark_buffer_dirty(leaf);
btrfs_release_path(path);
inode_inc_iversion(inode);
inode->i_mtime = inode->i_ctime = CURRENT_TIME;
/*
......@@ -2914,7 +2918,7 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
up_read(&info->groups_sem);
}
user_dest = (struct btrfs_ioctl_space_info *)
user_dest = (struct btrfs_ioctl_space_info __user *)
(arg + sizeof(struct btrfs_ioctl_space_args));
if (copy_to_user(user_dest, dest_orig, alloc_size))
......@@ -3042,6 +3046,28 @@ static long btrfs_ioctl_scrub_progress(struct btrfs_root *root,
return ret;
}
static long btrfs_ioctl_get_dev_stats(struct btrfs_root *root,
void __user *arg, int reset_after_read)
{
struct btrfs_ioctl_get_dev_stats *sa;
int ret;
if (reset_after_read && !capable(CAP_SYS_ADMIN))
return -EPERM;
sa = memdup_user(arg, sizeof(*sa));
if (IS_ERR(sa))
return PTR_ERR(sa);
ret = btrfs_get_dev_stats(root, sa, reset_after_read);
if (copy_to_user(arg, sa, sizeof(*sa)))
ret = -EFAULT;
kfree(sa);
return ret;
}
static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg)
{
int ret = 0;
......@@ -3212,8 +3238,9 @@ void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
}
}
static long btrfs_ioctl_balance(struct btrfs_root *root, void __user *arg)
static long btrfs_ioctl_balance(struct file *file, void __user *arg)
{
struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_ioctl_balance_args *bargs;
struct btrfs_balance_control *bctl;
......@@ -3225,6 +3252,10 @@ static long btrfs_ioctl_balance(struct btrfs_root *root, void __user *arg)
if (fs_info->sb->s_flags & MS_RDONLY)
return -EROFS;
ret = mnt_want_write(file->f_path.mnt);
if (ret)
return ret;
mutex_lock(&fs_info->volume_mutex);
mutex_lock(&fs_info->balance_mutex);
......@@ -3291,6 +3322,7 @@ static long btrfs_ioctl_balance(struct btrfs_root *root, void __user *arg)
out:
mutex_unlock(&fs_info->balance_mutex);
mutex_unlock(&fs_info->volume_mutex);
mnt_drop_write(file->f_path.mnt);
return ret;
}
......@@ -3386,7 +3418,7 @@ long btrfs_ioctl(struct file *file, unsigned int
case BTRFS_IOC_DEV_INFO:
return btrfs_ioctl_dev_info(root, argp);
case BTRFS_IOC_BALANCE:
return btrfs_ioctl_balance(root, NULL);
return btrfs_ioctl_balance(file, NULL);
case BTRFS_IOC_CLONE:
return btrfs_ioctl_clone(file, arg, 0, 0, 0);
case BTRFS_IOC_CLONE_RANGE:
......@@ -3419,11 +3451,15 @@ long btrfs_ioctl(struct file *file, unsigned int
case BTRFS_IOC_SCRUB_PROGRESS:
return btrfs_ioctl_scrub_progress(root, argp);
case BTRFS_IOC_BALANCE_V2:
return btrfs_ioctl_balance(root, argp);
return btrfs_ioctl_balance(file, argp);
case BTRFS_IOC_BALANCE_CTL:
return btrfs_ioctl_balance_ctl(root, arg);
case BTRFS_IOC_BALANCE_PROGRESS:
return btrfs_ioctl_balance_progress(root, argp);
case BTRFS_IOC_GET_DEV_STATS:
return btrfs_ioctl_get_dev_stats(root, argp, 0);
case BTRFS_IOC_GET_AND_RESET_DEV_STATS:
return btrfs_ioctl_get_dev_stats(root, argp, 1);
}
return -ENOTTY;
......
This diff is collapsed.
This diff is collapsed.
......@@ -74,6 +74,12 @@ struct btrfs_ordered_sum {
#define BTRFS_ORDERED_DIRECT 5 /* set when we're doing DIO with this extent */
#define BTRFS_ORDERED_IOERR 6 /* We had an io error when writing this out */
#define BTRFS_ORDERED_UPDATED_ISIZE 7 /* indicates wether this ordered extent
* has done its due diligence in updating
* the isize. */
struct btrfs_ordered_extent {
/* logical offset in the file */
u64 file_offset;
......@@ -113,6 +119,8 @@ struct btrfs_ordered_extent {
/* a per root list of all the pending ordered extents */
struct list_head root_extent_list;
struct btrfs_work work;
};
......@@ -143,10 +151,11 @@ void btrfs_remove_ordered_extent(struct inode *inode,
struct btrfs_ordered_extent *entry);
int btrfs_dec_test_ordered_pending(struct inode *inode,
struct btrfs_ordered_extent **cached,
u64 file_offset, u64 io_size);
u64 file_offset, u64 io_size, int uptodate);
int btrfs_dec_test_first_ordered_pending(struct inode *inode,
struct btrfs_ordered_extent **cached,
u64 *file_offset, u64 io_size);
u64 *file_offset, u64 io_size,
int uptodate);
int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
u64 start, u64 len, u64 disk_len, int type);
int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset,
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -196,6 +196,7 @@ int __btrfs_setxattr(struct btrfs_trans_handle *trans,
if (ret)
goto out;
inode_inc_iversion(inode);
inode->i_ctime = CURRENT_TIME;
ret = btrfs_update_inode(trans, root, inode);
BUG_ON(ret);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment