Commit 1f3c79a2 authored by Liu Hui's avatar Liu Hui Committed by Chris Mason

Btrfs: Fix free block discard calls down to the block layer

This is a patch to fix discard semantic to make Btrfs work with FTL and SSD.
We can improve FTL's performance by telling it which sectors are freed by file
system. But if we don't tell FTL the information of free sectors in proper
time, the transaction mechanism of Btrfs will be destroyed and Btrfs could not
roll back the previous transaction under the power loss condition.

There are some problems in the old implementation:
1, In __free_extent(), the pinned down extents should not be discarded.
2, In free_extents(), the free extents are all pinned, so they need to
be discarded in transaction committing time instead of free_extents().
3, The reserved extent used by log tree should be discard too.

This patch change discard behavior as follows:
1, For the extents which need to be free at once,
   we discard them in update_block_group().
2, Delay discarding the pinned extent in btrfs_finish_extent_commit()
   when committing transaction.
3, Remove discarding from free_extents() and __free_extent()
4, Add discard interface into btrfs_free_reserved_extent()
5, Discard sectors before updating the free space cache, otherwise,
   FTL will destroy file system data.
parent ec051c0f
...@@ -876,6 +876,38 @@ static void btrfs_issue_discard(struct block_device *bdev, ...@@ -876,6 +876,38 @@ static void btrfs_issue_discard(struct block_device *bdev,
} }
#endif #endif
static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
u64 num_bytes)
{
#ifdef BIO_RW_DISCARD
int ret;
u64 map_length = num_bytes;
struct btrfs_multi_bio *multi = NULL;
/* Tell the block device(s) that the sectors can be discarded */
ret = btrfs_map_block(&root->fs_info->mapping_tree, READ,
bytenr, &map_length, &multi, 0);
if (!ret) {
struct btrfs_bio_stripe *stripe = multi->stripes;
int i;
if (map_length > num_bytes)
map_length = num_bytes;
for (i = 0; i < multi->num_stripes; i++, stripe++) {
btrfs_issue_discard(stripe->dev->bdev,
stripe->physical,
map_length);
}
kfree(multi);
}
return ret;
#else
return 0;
#endif
}
static int noinline free_extents(struct btrfs_trans_handle *trans, static int noinline free_extents(struct btrfs_trans_handle *trans,
struct btrfs_root *extent_root, struct btrfs_root *extent_root,
struct list_head *del_list) struct list_head *del_list)
...@@ -1069,10 +1101,6 @@ static int noinline free_extents(struct btrfs_trans_handle *trans, ...@@ -1069,10 +1101,6 @@ static int noinline free_extents(struct btrfs_trans_handle *trans,
for (pos = cur, n = pos->next; pos != end; for (pos = cur, n = pos->next; pos != end;
pos = n, n = pos->next) { pos = n, n = pos->next) {
struct pending_extent_op *tmp; struct pending_extent_op *tmp;
#ifdef BIO_RW_DISCARD
u64 map_length;
struct btrfs_multi_bio *multi = NULL;
#endif
tmp = list_entry(pos, struct pending_extent_op, list); tmp = list_entry(pos, struct pending_extent_op, list);
/* /*
...@@ -1084,28 +1112,6 @@ static int noinline free_extents(struct btrfs_trans_handle *trans, ...@@ -1084,28 +1112,6 @@ static int noinline free_extents(struct btrfs_trans_handle *trans,
tmp->del); tmp->del);
BUG_ON(ret); BUG_ON(ret);
#ifdef BIO_RW_DISCARD
map_length = tmp->num_bytes;
ret = btrfs_map_block(&info->mapping_tree, READ,
tmp->bytenr, &map_length, &multi,
0);
if (!ret) {
struct btrfs_bio_stripe *stripe;
int i;
stripe = multi->stripes;
if (map_length > tmp->num_bytes)
map_length = tmp->num_bytes;
for (i = 0; i < multi->num_stripes;
i++, stripe++)
btrfs_issue_discard(stripe->dev->bdev,
stripe->physical,
map_length);
kfree(multi);
}
#endif
list_del_init(&tmp->list); list_del_init(&tmp->list);
unlock_extent(&info->extent_ins, tmp->bytenr, unlock_extent(&info->extent_ins, tmp->bytenr,
tmp->bytenr + tmp->num_bytes - 1, tmp->bytenr + tmp->num_bytes - 1,
...@@ -1965,6 +1971,11 @@ static int update_block_group(struct btrfs_trans_handle *trans, ...@@ -1965,6 +1971,11 @@ static int update_block_group(struct btrfs_trans_handle *trans,
spin_unlock(&cache->space_info->lock); spin_unlock(&cache->space_info->lock);
if (mark_free) { if (mark_free) {
int ret; int ret;
ret = btrfs_discard_extent(root, bytenr,
num_bytes);
WARN_ON(ret);
ret = btrfs_add_free_space(cache, bytenr, ret = btrfs_add_free_space(cache, bytenr,
num_bytes); num_bytes);
WARN_ON(ret); WARN_ON(ret);
...@@ -2104,8 +2115,12 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, ...@@ -2104,8 +2115,12 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
EXTENT_DIRTY); EXTENT_DIRTY);
if (ret) if (ret)
break; break;
ret = btrfs_discard_extent(root, start, end + 1 - start);
btrfs_update_pinned_extents(root, start, end + 1 - start, 0); btrfs_update_pinned_extents(root, start, end + 1 - start, 0);
clear_extent_dirty(unpin, start, end, GFP_NOFS); clear_extent_dirty(unpin, start, end, GFP_NOFS);
if (need_resched()) { if (need_resched()) {
mutex_unlock(&root->fs_info->pinned_mutex); mutex_unlock(&root->fs_info->pinned_mutex);
cond_resched(); cond_resched();
...@@ -2113,7 +2128,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, ...@@ -2113,7 +2128,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
} }
} }
mutex_unlock(&root->fs_info->pinned_mutex); mutex_unlock(&root->fs_info->pinned_mutex);
return 0; return ret;
} }
static int finish_current_insert(struct btrfs_trans_handle *trans, static int finish_current_insert(struct btrfs_trans_handle *trans,
...@@ -2458,10 +2473,6 @@ static int __free_extent(struct btrfs_trans_handle *trans, ...@@ -2458,10 +2473,6 @@ static int __free_extent(struct btrfs_trans_handle *trans,
if (refs == 0) { if (refs == 0) {
u64 super_used; u64 super_used;
u64 root_used; u64 root_used;
#ifdef BIO_RW_DISCARD
u64 map_length = num_bytes;
struct btrfs_multi_bio *multi = NULL;
#endif
if (pin) { if (pin) {
mutex_lock(&root->fs_info->pinned_mutex); mutex_lock(&root->fs_info->pinned_mutex);
...@@ -2496,25 +2507,6 @@ static int __free_extent(struct btrfs_trans_handle *trans, ...@@ -2496,25 +2507,6 @@ static int __free_extent(struct btrfs_trans_handle *trans,
ret = update_block_group(trans, root, bytenr, num_bytes, 0, ret = update_block_group(trans, root, bytenr, num_bytes, 0,
mark_free); mark_free);
BUG_ON(ret); BUG_ON(ret);
#ifdef BIO_RW_DISCARD
/* Tell the block device(s) that the sectors can be discarded */
ret = btrfs_map_block(&root->fs_info->mapping_tree, READ,
bytenr, &map_length, &multi, 0);
if (!ret) {
struct btrfs_bio_stripe *stripe = multi->stripes;
int i;
if (map_length > num_bytes)
map_length = num_bytes;
for (i = 0; i < multi->num_stripes; i++, stripe++) {
btrfs_issue_discard(stripe->dev->bdev,
stripe->physical,
map_length);
}
kfree(multi);
}
#endif
} }
btrfs_free_path(path); btrfs_free_path(path);
finish_current_insert(trans, extent_root, 0); finish_current_insert(trans, extent_root, 0);
...@@ -3112,16 +3104,21 @@ static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans, ...@@ -3112,16 +3104,21 @@ static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans,
int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len)
{ {
struct btrfs_block_group_cache *cache; struct btrfs_block_group_cache *cache;
int ret = 0;
cache = btrfs_lookup_block_group(root->fs_info, start); cache = btrfs_lookup_block_group(root->fs_info, start);
if (!cache) { if (!cache) {
printk(KERN_ERR "Unable to find block group for %Lu\n", start); printk(KERN_ERR "Unable to find block group for %Lu\n", start);
return -ENOSPC; return -ENOSPC;
} }
ret = btrfs_discard_extent(root, start, len);
btrfs_add_free_space(cache, start, len); btrfs_add_free_space(cache, start, len);
put_block_group(cache); put_block_group(cache);
update_reserved_extents(root, start, len, 0); update_reserved_extents(root, start, len, 0);
return 0;
return ret;
} }
int btrfs_reserve_extent(struct btrfs_trans_handle *trans, int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment