Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
L
linux
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
nexedi
linux
Commits
27263e28
Commit
27263e28
authored
Jan 16, 2012
by
Chris Mason
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'restriper' of
git://github.com/idryomov/btrfs-unstable
into integration
parents
64e05503
19a39dce
Changes
8
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
1385 additions
and
98 deletions
+1385
-98
fs/btrfs/ctree.h
fs/btrfs/ctree.h
+193
-12
fs/btrfs/disk-io.c
fs/btrfs/disk-io.c
+15
-3
fs/btrfs/extent-tree.c
fs/btrfs/extent-tree.c
+103
-25
fs/btrfs/ioctl.c
fs/btrfs/ioctl.c
+210
-16
fs/btrfs/ioctl.h
fs/btrfs/ioctl.h
+54
-0
fs/btrfs/super.c
fs/btrfs/super.c
+9
-2
fs/btrfs/volumes.c
fs/btrfs/volumes.c
+751
-39
fs/btrfs/volumes.h
fs/btrfs/volumes.h
+50
-1
No files found.
fs/btrfs/ctree.h
View file @
27263e28
...
...
@@ -86,6 +86,9 @@ struct btrfs_ordered_sum;
/* holds checksums of all the data extents */
#define BTRFS_CSUM_TREE_OBJECTID 7ULL
/* for storing balance parameters in the root tree */
#define BTRFS_BALANCE_OBJECTID -4ULL
/* orhpan objectid for tracking unlinked/truncated files */
#define BTRFS_ORPHAN_OBJECTID -5ULL
...
...
@@ -692,6 +695,54 @@ struct btrfs_root_ref {
__le16
name_len
;
}
__attribute__
((
__packed__
));
struct
btrfs_disk_balance_args
{
/*
* profiles to operate on, single is denoted by
* BTRFS_AVAIL_ALLOC_BIT_SINGLE
*/
__le64
profiles
;
/* usage filter */
__le64
usage
;
/* devid filter */
__le64
devid
;
/* devid subset filter [pstart..pend) */
__le64
pstart
;
__le64
pend
;
/* btrfs virtual address space subset filter [vstart..vend) */
__le64
vstart
;
__le64
vend
;
/*
* profile to convert to, single is denoted by
* BTRFS_AVAIL_ALLOC_BIT_SINGLE
*/
__le64
target
;
/* BTRFS_BALANCE_ARGS_* */
__le64
flags
;
__le64
unused
[
8
];
}
__attribute__
((
__packed__
));
/*
* store balance parameters to disk so that balance can be properly
* resumed after crash or unmount
*/
struct
btrfs_balance_item
{
/* BTRFS_BALANCE_* */
__le64
flags
;
struct
btrfs_disk_balance_args
data
;
struct
btrfs_disk_balance_args
meta
;
struct
btrfs_disk_balance_args
sys
;
__le64
unused
[
4
];
}
__attribute__
((
__packed__
));
#define BTRFS_FILE_EXTENT_INLINE 0
#define BTRFS_FILE_EXTENT_REG 1
#define BTRFS_FILE_EXTENT_PREALLOC 2
...
...
@@ -751,15 +802,33 @@ struct btrfs_csum_item {
}
__attribute__
((
__packed__
));
/* different types of block groups (and chunks) */
#define BTRFS_BLOCK_GROUP_DATA (1 << 0)
#define BTRFS_BLOCK_GROUP_SYSTEM (1 << 1)
#define BTRFS_BLOCK_GROUP_METADATA (1 << 2)
#define BTRFS_BLOCK_GROUP_RAID0 (1 << 3)
#define BTRFS_BLOCK_GROUP_RAID1 (1 << 4)
#define BTRFS_BLOCK_GROUP_DUP (1 << 5)
#define BTRFS_BLOCK_GROUP_RAID10 (1 << 6)
#define BTRFS_BLOCK_GROUP_DATA (1ULL << 0)
#define BTRFS_BLOCK_GROUP_SYSTEM (1ULL << 1)
#define BTRFS_BLOCK_GROUP_METADATA (1ULL << 2)
#define BTRFS_BLOCK_GROUP_RAID0 (1ULL << 3)
#define BTRFS_BLOCK_GROUP_RAID1 (1ULL << 4)
#define BTRFS_BLOCK_GROUP_DUP (1ULL << 5)
#define BTRFS_BLOCK_GROUP_RAID10 (1ULL << 6)
#define BTRFS_BLOCK_GROUP_RESERVED BTRFS_AVAIL_ALLOC_BIT_SINGLE
#define BTRFS_NR_RAID_TYPES 5
#define BTRFS_BLOCK_GROUP_TYPE_MASK (BTRFS_BLOCK_GROUP_DATA | \
BTRFS_BLOCK_GROUP_SYSTEM | \
BTRFS_BLOCK_GROUP_METADATA)
#define BTRFS_BLOCK_GROUP_PROFILE_MASK (BTRFS_BLOCK_GROUP_RAID0 | \
BTRFS_BLOCK_GROUP_RAID1 | \
BTRFS_BLOCK_GROUP_DUP | \
BTRFS_BLOCK_GROUP_RAID10)
/*
* We need a bit for restriper to be able to tell when chunks of type
* SINGLE are available. This "extended" profile format is used in
* fs_info->avail_*_alloc_bits (in-memory) and balance item fields
* (on-disk). The corresponding on-disk bit in chunk.type is reserved
* to avoid remappings between two formats in future.
*/
#define BTRFS_AVAIL_ALLOC_BIT_SINGLE (1ULL << 48)
struct
btrfs_block_group_item
{
__le64
used
;
__le64
chunk_objectid
;
...
...
@@ -916,6 +985,7 @@ struct btrfs_block_group_cache {
struct
reloc_control
;
struct
btrfs_device
;
struct
btrfs_fs_devices
;
struct
btrfs_balance_control
;
struct
btrfs_delayed_root
;
struct
btrfs_fs_info
{
u8
fsid
[
BTRFS_FSID_SIZE
];
...
...
@@ -1132,12 +1202,23 @@ struct btrfs_fs_info {
spinlock_t
ref_cache_lock
;
u64
total_ref_cache_size
;
/*
* these three are in extended format (availability of single
* chunks is denoted by BTRFS_AVAIL_ALLOC_BIT_SINGLE bit, other
* types are denoted by corresponding BTRFS_BLOCK_GROUP_* bits)
*/
u64
avail_data_alloc_bits
;
u64
avail_metadata_alloc_bits
;
u64
avail_system_alloc_bits
;
u64
data_alloc_profile
;
u64
metadata_alloc_profile
;
u64
system_alloc_profile
;
/* restriper state */
spinlock_t
balance_lock
;
struct
mutex
balance_mutex
;
atomic_t
balance_running
;
atomic_t
balance_pause_req
;
atomic_t
balance_cancel_req
;
struct
btrfs_balance_control
*
balance_ctl
;
wait_queue_head_t
balance_wait_q
;
unsigned
data_chunk_allocations
;
unsigned
metadata_ratio
;
...
...
@@ -1383,6 +1464,8 @@ struct btrfs_ioctl_defrag_range_args {
#define BTRFS_DEV_ITEM_KEY 216
#define BTRFS_CHUNK_ITEM_KEY 228
#define BTRFS_BALANCE_ITEM_KEY 248
/*
* string items are for debugging. They just store a short string of
* data in the FS
...
...
@@ -1413,6 +1496,7 @@ struct btrfs_ioctl_defrag_range_args {
#define BTRFS_MOUNT_AUTO_DEFRAG (1 << 16)
#define BTRFS_MOUNT_INODE_MAP_CACHE (1 << 17)
#define BTRFS_MOUNT_RECOVERY (1 << 18)
#define BTRFS_MOUNT_SKIP_BALANCE (1 << 19)
#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
...
...
@@ -2077,8 +2161,86 @@ BTRFS_SETGET_STACK_FUNCS(backup_bytes_used, struct btrfs_root_backup,
BTRFS_SETGET_STACK_FUNCS
(
backup_num_devices
,
struct
btrfs_root_backup
,
num_devices
,
64
);
/* struct btrfs_super_block */
/* struct btrfs_balance_item */
BTRFS_SETGET_FUNCS
(
balance_flags
,
struct
btrfs_balance_item
,
flags
,
64
);
static
inline
void
btrfs_balance_data
(
struct
extent_buffer
*
eb
,
struct
btrfs_balance_item
*
bi
,
struct
btrfs_disk_balance_args
*
ba
)
{
read_eb_member
(
eb
,
bi
,
struct
btrfs_balance_item
,
data
,
ba
);
}
static
inline
void
btrfs_set_balance_data
(
struct
extent_buffer
*
eb
,
struct
btrfs_balance_item
*
bi
,
struct
btrfs_disk_balance_args
*
ba
)
{
write_eb_member
(
eb
,
bi
,
struct
btrfs_balance_item
,
data
,
ba
);
}
static
inline
void
btrfs_balance_meta
(
struct
extent_buffer
*
eb
,
struct
btrfs_balance_item
*
bi
,
struct
btrfs_disk_balance_args
*
ba
)
{
read_eb_member
(
eb
,
bi
,
struct
btrfs_balance_item
,
meta
,
ba
);
}
static
inline
void
btrfs_set_balance_meta
(
struct
extent_buffer
*
eb
,
struct
btrfs_balance_item
*
bi
,
struct
btrfs_disk_balance_args
*
ba
)
{
write_eb_member
(
eb
,
bi
,
struct
btrfs_balance_item
,
meta
,
ba
);
}
static
inline
void
btrfs_balance_sys
(
struct
extent_buffer
*
eb
,
struct
btrfs_balance_item
*
bi
,
struct
btrfs_disk_balance_args
*
ba
)
{
read_eb_member
(
eb
,
bi
,
struct
btrfs_balance_item
,
sys
,
ba
);
}
static
inline
void
btrfs_set_balance_sys
(
struct
extent_buffer
*
eb
,
struct
btrfs_balance_item
*
bi
,
struct
btrfs_disk_balance_args
*
ba
)
{
write_eb_member
(
eb
,
bi
,
struct
btrfs_balance_item
,
sys
,
ba
);
}
static
inline
void
btrfs_disk_balance_args_to_cpu
(
struct
btrfs_balance_args
*
cpu
,
struct
btrfs_disk_balance_args
*
disk
)
{
memset
(
cpu
,
0
,
sizeof
(
*
cpu
));
cpu
->
profiles
=
le64_to_cpu
(
disk
->
profiles
);
cpu
->
usage
=
le64_to_cpu
(
disk
->
usage
);
cpu
->
devid
=
le64_to_cpu
(
disk
->
devid
);
cpu
->
pstart
=
le64_to_cpu
(
disk
->
pstart
);
cpu
->
pend
=
le64_to_cpu
(
disk
->
pend
);
cpu
->
vstart
=
le64_to_cpu
(
disk
->
vstart
);
cpu
->
vend
=
le64_to_cpu
(
disk
->
vend
);
cpu
->
target
=
le64_to_cpu
(
disk
->
target
);
cpu
->
flags
=
le64_to_cpu
(
disk
->
flags
);
}
static
inline
void
btrfs_cpu_balance_args_to_disk
(
struct
btrfs_disk_balance_args
*
disk
,
struct
btrfs_balance_args
*
cpu
)
{
memset
(
disk
,
0
,
sizeof
(
*
disk
));
disk
->
profiles
=
cpu_to_le64
(
cpu
->
profiles
);
disk
->
usage
=
cpu_to_le64
(
cpu
->
usage
);
disk
->
devid
=
cpu_to_le64
(
cpu
->
devid
);
disk
->
pstart
=
cpu_to_le64
(
cpu
->
pstart
);
disk
->
pend
=
cpu_to_le64
(
cpu
->
pend
);
disk
->
vstart
=
cpu_to_le64
(
cpu
->
vstart
);
disk
->
vend
=
cpu_to_le64
(
cpu
->
vend
);
disk
->
target
=
cpu_to_le64
(
cpu
->
target
);
disk
->
flags
=
cpu_to_le64
(
cpu
->
flags
);
}
/* struct btrfs_super_block */
BTRFS_SETGET_STACK_FUNCS
(
super_bytenr
,
struct
btrfs_super_block
,
bytenr
,
64
);
BTRFS_SETGET_STACK_FUNCS
(
super_flags
,
struct
btrfs_super_block
,
flags
,
64
);
BTRFS_SETGET_STACK_FUNCS
(
super_generation
,
struct
btrfs_super_block
,
...
...
@@ -2500,6 +2662,7 @@ static inline int btrfs_fs_closing(struct btrfs_fs_info *fs_info)
}
static
inline
void
free_fs_info
(
struct
btrfs_fs_info
*
fs_info
)
{
kfree
(
fs_info
->
balance_ctl
);
kfree
(
fs_info
->
delayed_root
);
kfree
(
fs_info
->
extent_root
);
kfree
(
fs_info
->
tree_root
);
...
...
@@ -2510,6 +2673,24 @@ static inline void free_fs_info(struct btrfs_fs_info *fs_info)
kfree
(
fs_info
->
super_for_commit
);
kfree
(
fs_info
);
}
/**
* profile_is_valid - tests whether a given profile is valid and reduced
* @flags: profile to validate
* @extended: if true @flags is treated as an extended profile
*/
static
inline
int
profile_is_valid
(
u64
flags
,
int
extended
)
{
u64
mask
=
~
BTRFS_BLOCK_GROUP_PROFILE_MASK
;
flags
&=
~
BTRFS_BLOCK_GROUP_TYPE_MASK
;
if
(
extended
)
mask
&=
~
BTRFS_AVAIL_ALLOC_BIT_SINGLE
;
if
(
flags
&
mask
)
return
0
;
/* true if zero or exactly one bit set */
return
(
flags
&
(
~
flags
+
1
))
==
flags
;
}
/* root-item.c */
int
btrfs_find_root_ref
(
struct
btrfs_root
*
tree_root
,
...
...
fs/btrfs/disk-io.c
View file @
27263e28
...
...
@@ -2002,6 +2002,14 @@ struct btrfs_root *open_ctree(struct super_block *sb,
init_rwsem
(
&
fs_info
->
scrub_super_lock
);
fs_info
->
scrub_workers_refcnt
=
0
;
spin_lock_init
(
&
fs_info
->
balance_lock
);
mutex_init
(
&
fs_info
->
balance_mutex
);
atomic_set
(
&
fs_info
->
balance_running
,
0
);
atomic_set
(
&
fs_info
->
balance_pause_req
,
0
);
atomic_set
(
&
fs_info
->
balance_cancel_req
,
0
);
fs_info
->
balance_ctl
=
NULL
;
init_waitqueue_head
(
&
fs_info
->
balance_wait_q
);
sb
->
s_blocksize
=
4096
;
sb
->
s_blocksize_bits
=
blksize_bits
(
4096
);
sb
->
s_bdi
=
&
fs_info
->
bdi
;
...
...
@@ -2321,9 +2329,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
fs_info
->
generation
=
generation
;
fs_info
->
last_trans_committed
=
generation
;
fs_info
->
data_alloc_profile
=
(
u64
)
-
1
;
fs_info
->
metadata_alloc_profile
=
(
u64
)
-
1
;
fs_info
->
system_alloc_profile
=
fs_info
->
metadata_alloc_profile
;
ret
=
btrfs_init_space_info
(
fs_info
);
if
(
ret
)
{
...
...
@@ -2426,6 +2431,10 @@ struct btrfs_root *open_ctree(struct super_block *sb,
if
(
!
err
)
err
=
btrfs_orphan_cleanup
(
fs_info
->
tree_root
);
up_read
(
&
fs_info
->
cleanup_work_sem
);
if
(
!
err
)
err
=
btrfs_recover_balance
(
fs_info
->
tree_root
);
if
(
err
)
{
close_ctree
(
tree_root
);
return
ERR_PTR
(
err
);
...
...
@@ -2975,6 +2984,9 @@ int close_ctree(struct btrfs_root *root)
fs_info
->
closing
=
1
;
smp_mb
();
/* pause restriper - we want to resume on mount */
btrfs_pause_balance
(
root
->
fs_info
);
btrfs_scrub_cancel
(
root
);
/* wait for any defraggers to finish */
...
...
fs/btrfs/extent-tree.c
View file @
27263e28
...
...
@@ -618,8 +618,7 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
struct
list_head
*
head
=
&
info
->
space_info
;
struct
btrfs_space_info
*
found
;
flags
&=
BTRFS_BLOCK_GROUP_DATA
|
BTRFS_BLOCK_GROUP_SYSTEM
|
BTRFS_BLOCK_GROUP_METADATA
;
flags
&=
BTRFS_BLOCK_GROUP_TYPE_MASK
;
rcu_read_lock
();
list_for_each_entry_rcu
(
found
,
head
,
list
)
{
...
...
@@ -2999,9 +2998,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
INIT_LIST_HEAD
(
&
found
->
block_groups
[
i
]);
init_rwsem
(
&
found
->
groups_sem
);
spin_lock_init
(
&
found
->
lock
);
found
->
flags
=
flags
&
(
BTRFS_BLOCK_GROUP_DATA
|
BTRFS_BLOCK_GROUP_SYSTEM
|
BTRFS_BLOCK_GROUP_METADATA
);
found
->
flags
=
flags
&
BTRFS_BLOCK_GROUP_TYPE_MASK
;
found
->
total_bytes
=
total_bytes
;
found
->
disk_total
=
total_bytes
*
factor
;
found
->
bytes_used
=
bytes_used
;
...
...
@@ -3022,20 +3019,27 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
static
void
set_avail_alloc_bits
(
struct
btrfs_fs_info
*
fs_info
,
u64
flags
)
{
u64
extra_flags
=
flags
&
(
BTRFS_BLOCK_GROUP_RAID0
|
BTRFS_BLOCK_GROUP_RAID1
|
BTRFS_BLOCK_GROUP_RAID10
|
BTRFS_BLOCK_GROUP_DUP
);
if
(
extra_flags
)
{
u64
extra_flags
=
flags
&
BTRFS_BLOCK_GROUP_PROFILE_MASK
;
/* chunk -> extended profile */
if
(
extra_flags
==
0
)
extra_flags
=
BTRFS_AVAIL_ALLOC_BIT_SINGLE
;
if
(
flags
&
BTRFS_BLOCK_GROUP_DATA
)
fs_info
->
avail_data_alloc_bits
|=
extra_flags
;
if
(
flags
&
BTRFS_BLOCK_GROUP_METADATA
)
fs_info
->
avail_metadata_alloc_bits
|=
extra_flags
;
if
(
flags
&
BTRFS_BLOCK_GROUP_SYSTEM
)
fs_info
->
avail_system_alloc_bits
|=
extra_flags
;
}
}
/*
* @flags: available profiles in extended format (see ctree.h)
*
* Returns reduced profile in chunk format. If profile changing is in
* progress (either running or paused) picks the target profile (if it's
* already available), otherwise falls back to plain reducing.
*/
u64
btrfs_reduce_alloc_profile
(
struct
btrfs_root
*
root
,
u64
flags
)
{
/*
...
...
@@ -3046,6 +3050,34 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
u64
num_devices
=
root
->
fs_info
->
fs_devices
->
rw_devices
+
root
->
fs_info
->
fs_devices
->
missing_devices
;
/* pick restriper's target profile if it's available */
spin_lock
(
&
root
->
fs_info
->
balance_lock
);
if
(
root
->
fs_info
->
balance_ctl
)
{
struct
btrfs_balance_control
*
bctl
=
root
->
fs_info
->
balance_ctl
;
u64
tgt
=
0
;
if
((
flags
&
BTRFS_BLOCK_GROUP_DATA
)
&&
(
bctl
->
data
.
flags
&
BTRFS_BALANCE_ARGS_CONVERT
)
&&
(
flags
&
bctl
->
data
.
target
))
{
tgt
=
BTRFS_BLOCK_GROUP_DATA
|
bctl
->
data
.
target
;
}
else
if
((
flags
&
BTRFS_BLOCK_GROUP_SYSTEM
)
&&
(
bctl
->
sys
.
flags
&
BTRFS_BALANCE_ARGS_CONVERT
)
&&
(
flags
&
bctl
->
sys
.
target
))
{
tgt
=
BTRFS_BLOCK_GROUP_SYSTEM
|
bctl
->
sys
.
target
;
}
else
if
((
flags
&
BTRFS_BLOCK_GROUP_METADATA
)
&&
(
bctl
->
meta
.
flags
&
BTRFS_BALANCE_ARGS_CONVERT
)
&&
(
flags
&
bctl
->
meta
.
target
))
{
tgt
=
BTRFS_BLOCK_GROUP_METADATA
|
bctl
->
meta
.
target
;
}
if
(
tgt
)
{
spin_unlock
(
&
root
->
fs_info
->
balance_lock
);
flags
=
tgt
;
goto
out
;
}
}
spin_unlock
(
&
root
->
fs_info
->
balance_lock
);
if
(
num_devices
==
1
)
flags
&=
~
(
BTRFS_BLOCK_GROUP_RAID1
|
BTRFS_BLOCK_GROUP_RAID0
);
if
(
num_devices
<
4
)
...
...
@@ -3065,22 +3097,25 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
if
((
flags
&
BTRFS_BLOCK_GROUP_RAID0
)
&&
((
flags
&
BTRFS_BLOCK_GROUP_RAID1
)
|
(
flags
&
BTRFS_BLOCK_GROUP_RAID10
)
|
(
flags
&
BTRFS_BLOCK_GROUP_DUP
)))
(
flags
&
BTRFS_BLOCK_GROUP_DUP
)))
{
flags
&=
~
BTRFS_BLOCK_GROUP_RAID0
;
}
out:
/* extended -> chunk profile */
flags
&=
~
BTRFS_AVAIL_ALLOC_BIT_SINGLE
;
return
flags
;
}
static
u64
get_alloc_profile
(
struct
btrfs_root
*
root
,
u64
flags
)
{
if
(
flags
&
BTRFS_BLOCK_GROUP_DATA
)
flags
|=
root
->
fs_info
->
avail_data_alloc_bits
&
root
->
fs_info
->
data_alloc_profile
;
flags
|=
root
->
fs_info
->
avail_data_alloc_bits
;
else
if
(
flags
&
BTRFS_BLOCK_GROUP_SYSTEM
)
flags
|=
root
->
fs_info
->
avail_system_alloc_bits
&
root
->
fs_info
->
system_alloc_profile
;
flags
|=
root
->
fs_info
->
avail_system_alloc_bits
;
else
if
(
flags
&
BTRFS_BLOCK_GROUP_METADATA
)
flags
|=
root
->
fs_info
->
avail_metadata_alloc_bits
&
root
->
fs_info
->
metadata_alloc_profile
;
flags
|=
root
->
fs_info
->
avail_metadata_alloc_bits
;
return
btrfs_reduce_alloc_profile
(
root
,
flags
);
}
...
...
@@ -3282,7 +3317,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
int
wait_for_alloc
=
0
;
int
ret
=
0
;
flags
=
btrfs_reduce_alloc_profile
(
extent_root
,
flags
);
BUG_ON
(
!
profile_is_valid
(
flags
,
0
)
);
space_info
=
__find_space_info
(
extent_root
->
fs_info
,
flags
);
if
(
!
space_info
)
{
...
...
@@ -6792,6 +6827,29 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
u64
stripped
=
BTRFS_BLOCK_GROUP_RAID0
|
BTRFS_BLOCK_GROUP_RAID1
|
BTRFS_BLOCK_GROUP_RAID10
;
if
(
root
->
fs_info
->
balance_ctl
)
{
struct
btrfs_balance_control
*
bctl
=
root
->
fs_info
->
balance_ctl
;
u64
tgt
=
0
;
/* pick restriper's target profile and return */
if
(
flags
&
BTRFS_BLOCK_GROUP_DATA
&&
bctl
->
data
.
flags
&
BTRFS_BALANCE_ARGS_CONVERT
)
{
tgt
=
BTRFS_BLOCK_GROUP_DATA
|
bctl
->
data
.
target
;
}
else
if
(
flags
&
BTRFS_BLOCK_GROUP_SYSTEM
&&
bctl
->
sys
.
flags
&
BTRFS_BALANCE_ARGS_CONVERT
)
{
tgt
=
BTRFS_BLOCK_GROUP_SYSTEM
|
bctl
->
sys
.
target
;
}
else
if
(
flags
&
BTRFS_BLOCK_GROUP_METADATA
&&
bctl
->
meta
.
flags
&
BTRFS_BALANCE_ARGS_CONVERT
)
{
tgt
=
BTRFS_BLOCK_GROUP_METADATA
|
bctl
->
meta
.
target
;
}
if
(
tgt
)
{
/* extended -> chunk profile */
tgt
&=
~
BTRFS_AVAIL_ALLOC_BIT_SINGLE
;
return
tgt
;
}
}
/*
* we add in the count of missing devices because we want
* to make sure that any RAID levels on a degraded FS
...
...
@@ -7466,6 +7524,22 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
return
0
;
}
static
void
clear_avail_alloc_bits
(
struct
btrfs_fs_info
*
fs_info
,
u64
flags
)
{
u64
extra_flags
=
flags
&
BTRFS_BLOCK_GROUP_PROFILE_MASK
;
/* chunk -> extended profile */
if
(
extra_flags
==
0
)
extra_flags
=
BTRFS_AVAIL_ALLOC_BIT_SINGLE
;
if
(
flags
&
BTRFS_BLOCK_GROUP_DATA
)
fs_info
->
avail_data_alloc_bits
&=
~
extra_flags
;
if
(
flags
&
BTRFS_BLOCK_GROUP_METADATA
)
fs_info
->
avail_metadata_alloc_bits
&=
~
extra_flags
;
if
(
flags
&
BTRFS_BLOCK_GROUP_SYSTEM
)
fs_info
->
avail_system_alloc_bits
&=
~
extra_flags
;
}
int
btrfs_remove_block_group
(
struct
btrfs_trans_handle
*
trans
,
struct
btrfs_root
*
root
,
u64
group_start
)
{
...
...
@@ -7476,6 +7550,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
struct
btrfs_key
key
;
struct
inode
*
inode
;
int
ret
;
int
index
;
int
factor
;
root
=
root
->
fs_info
->
extent_root
;
...
...
@@ -7491,6 +7566,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
free_excluded_extents
(
root
,
block_group
);
memcpy
(
&
key
,
&
block_group
->
key
,
sizeof
(
key
));
index
=
get_block_group_index
(
block_group
);
if
(
block_group
->
flags
&
(
BTRFS_BLOCK_GROUP_DUP
|
BTRFS_BLOCK_GROUP_RAID1
|
BTRFS_BLOCK_GROUP_RAID10
))
...
...
@@ -7565,6 +7641,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
* are still on the list after taking the semaphore
*/
list_del_init
(
&
block_group
->
list
);
if
(
list_empty
(
&
block_group
->
space_info
->
block_groups
[
index
]))
clear_avail_alloc_bits
(
root
->
fs_info
,
block_group
->
flags
);
up_write
(
&
block_group
->
space_info
->
groups_sem
);
if
(
block_group
->
cached
==
BTRFS_CACHE_STARTED
)
...
...
fs/btrfs/ioctl.c
View file @
27263e28
...
...
@@ -1203,13 +1203,21 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
if
(
!
capable
(
CAP_SYS_ADMIN
))
return
-
EPERM
;
mutex_lock
(
&
root
->
fs_info
->
volume_mutex
);
if
(
root
->
fs_info
->
balance_ctl
)
{
printk
(
KERN_INFO
"btrfs: balance in progress
\n
"
);
ret
=
-
EINVAL
;
goto
out
;
}
vol_args
=
memdup_user
(
arg
,
sizeof
(
*
vol_args
));
if
(
IS_ERR
(
vol_args
))
return
PTR_ERR
(
vol_args
);
if
(
IS_ERR
(
vol_args
))
{
ret
=
PTR_ERR
(
vol_args
);
goto
out
;
}
vol_args
->
name
[
BTRFS_PATH_NAME_MAX
]
=
'\0'
;
mutex_lock
(
&
root
->
fs_info
->
volume_mutex
);
sizestr
=
vol_args
->
name
;
devstr
=
strchr
(
sizestr
,
':'
);
if
(
devstr
)
{
...
...
@@ -1226,7 +1234,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
printk
(
KERN_INFO
"btrfs: resizer unable to find device %llu
\n
"
,
(
unsigned
long
long
)
devid
);
ret
=
-
EINVAL
;
goto
out_
unlock
;
goto
out_
free
;
}
if
(
!
strcmp
(
sizestr
,
"max"
))
new_size
=
device
->
bdev
->
bd_inode
->
i_size
;
...
...
@@ -1241,7 +1249,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
new_size
=
memparse
(
sizestr
,
NULL
);
if
(
new_size
==
0
)
{
ret
=
-
EINVAL
;
goto
out_
unlock
;
goto
out_
free
;
}
}
...
...
@@ -1250,7 +1258,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
if
(
mod
<
0
)
{
if
(
new_size
>
old_size
)
{
ret
=
-
EINVAL
;
goto
out_
unlock
;
goto
out_
free
;
}
new_size
=
old_size
-
new_size
;
}
else
if
(
mod
>
0
)
{
...
...
@@ -1259,11 +1267,11 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
if
(
new_size
<
256
*
1024
*
1024
)
{
ret
=
-
EINVAL
;
goto
out_
unlock
;
goto
out_
free
;
}
if
(
new_size
>
device
->
bdev
->
bd_inode
->
i_size
)
{
ret
=
-
EFBIG
;
goto
out_
unlock
;
goto
out_
free
;
}
do_div
(
new_size
,
root
->
sectorsize
);
...
...
@@ -1276,7 +1284,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
trans
=
btrfs_start_transaction
(
root
,
0
);
if
(
IS_ERR
(
trans
))
{
ret
=
PTR_ERR
(
trans
);
goto
out_
unlock
;
goto
out_
free
;
}
ret
=
btrfs_grow_device
(
trans
,
device
,
new_size
);
btrfs_commit_transaction
(
trans
,
root
);
...
...
@@ -1284,9 +1292,10 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
ret
=
btrfs_shrink_device
(
device
,
new_size
);
}
out_unlock:
mutex_unlock
(
&
root
->
fs_info
->
volume_mutex
);
out_free:
kfree
(
vol_args
);
out:
mutex_unlock
(
&
root
->
fs_info
->
volume_mutex
);
return
ret
;
}
...
...
@@ -2052,14 +2061,25 @@ static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg)
if
(
!
capable
(
CAP_SYS_ADMIN
))
return
-
EPERM
;
mutex_lock
(
&
root
->
fs_info
->
volume_mutex
);
if
(
root
->
fs_info
->
balance_ctl
)
{
printk
(
KERN_INFO
"btrfs: balance in progress
\n
"
);
ret
=
-
EINVAL
;
goto
out
;
}
vol_args
=
memdup_user
(
arg
,
sizeof
(
*
vol_args
));
if
(
IS_ERR
(
vol_args
))
return
PTR_ERR
(
vol_args
);
if
(
IS_ERR
(
vol_args
))
{
ret
=
PTR_ERR
(
vol_args
);
goto
out
;
}
vol_args
->
name
[
BTRFS_PATH_NAME_MAX
]
=
'\0'
;
ret
=
btrfs_init_new_device
(
root
,
vol_args
->
name
);
kfree
(
vol_args
);
out:
mutex_unlock
(
&
root
->
fs_info
->
volume_mutex
);
return
ret
;
}
...
...
@@ -2074,14 +2094,25 @@ static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg)
if
(
root
->
fs_info
->
sb
->
s_flags
&
MS_RDONLY
)
return
-
EROFS
;
mutex_lock
(
&
root
->
fs_info
->
volume_mutex
);
if
(
root
->
fs_info
->
balance_ctl
)
{
printk
(
KERN_INFO
"btrfs: balance in progress
\n
"
);
ret
=
-
EINVAL
;
goto
out
;
}
vol_args
=
memdup_user
(
arg
,
sizeof
(
*
vol_args
));
if
(
IS_ERR
(
vol_args
))
return
PTR_ERR
(
vol_args
);
if
(
IS_ERR
(
vol_args
))
{
ret
=
PTR_ERR
(
vol_args
);
goto
out
;
}
vol_args
->
name
[
BTRFS_PATH_NAME_MAX
]
=
'\0'
;
ret
=
btrfs_rm_device
(
root
,
vol_args
->
name
);
kfree
(
vol_args
);
out:
mutex_unlock
(
&
root
->
fs_info
->
volume_mutex
);
return
ret
;
}
...
...
@@ -3034,6 +3065,163 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_root *root,
return
ret
;
}
void
update_ioctl_balance_args
(
struct
btrfs_fs_info
*
fs_info
,
int
lock
,
struct
btrfs_ioctl_balance_args
*
bargs
)
{
struct
btrfs_balance_control
*
bctl
=
fs_info
->
balance_ctl
;
bargs
->
flags
=
bctl
->
flags
;
if
(
atomic_read
(
&
fs_info
->
balance_running
))
bargs
->
state
|=
BTRFS_BALANCE_STATE_RUNNING
;
if
(
atomic_read
(
&
fs_info
->
balance_pause_req
))
bargs
->
state
|=
BTRFS_BALANCE_STATE_PAUSE_REQ
;
if
(
atomic_read
(
&
fs_info
->
balance_cancel_req
))
bargs
->
state
|=
BTRFS_BALANCE_STATE_CANCEL_REQ
;
memcpy
(
&
bargs
->
data
,
&
bctl
->
data
,
sizeof
(
bargs
->
data
));
memcpy
(
&
bargs
->
meta
,
&
bctl
->
meta
,
sizeof
(
bargs
->
meta
));
memcpy
(
&
bargs
->
sys
,
&
bctl
->
sys
,
sizeof
(
bargs
->
sys
));
if
(
lock
)
{
spin_lock
(
&
fs_info
->
balance_lock
);
memcpy
(
&
bargs
->
stat
,
&
bctl
->
stat
,
sizeof
(
bargs
->
stat
));
spin_unlock
(
&
fs_info
->
balance_lock
);
}
else
{
memcpy
(
&
bargs
->
stat
,
&
bctl
->
stat
,
sizeof
(
bargs
->
stat
));
}
}
static
long
btrfs_ioctl_balance
(
struct
btrfs_root
*
root
,
void
__user
*
arg
)
{
struct
btrfs_fs_info
*
fs_info
=
root
->
fs_info
;
struct
btrfs_ioctl_balance_args
*
bargs
;
struct
btrfs_balance_control
*
bctl
;
int
ret
;
if
(
!
capable
(
CAP_SYS_ADMIN
))
return
-
EPERM
;
if
(
fs_info
->
sb
->
s_flags
&
MS_RDONLY
)
return
-
EROFS
;
mutex_lock
(
&
fs_info
->
volume_mutex
);
mutex_lock
(
&
fs_info
->
balance_mutex
);
if
(
arg
)
{
bargs
=
memdup_user
(
arg
,
sizeof
(
*
bargs
));
if
(
IS_ERR
(
bargs
))
{
ret
=
PTR_ERR
(
bargs
);
goto
out
;
}
if
(
bargs
->
flags
&
BTRFS_BALANCE_RESUME
)
{
if
(
!
fs_info
->
balance_ctl
)
{
ret
=
-
ENOTCONN
;
goto
out_bargs
;
}
bctl
=
fs_info
->
balance_ctl
;
spin_lock
(
&
fs_info
->
balance_lock
);
bctl
->
flags
|=
BTRFS_BALANCE_RESUME
;
spin_unlock
(
&
fs_info
->
balance_lock
);
goto
do_balance
;
}
}
else
{
bargs
=
NULL
;
}
if
(
fs_info
->
balance_ctl
)
{
ret
=
-
EINPROGRESS
;
goto
out_bargs
;
}
bctl
=
kzalloc
(
sizeof
(
*
bctl
),
GFP_NOFS
);
if
(
!
bctl
)
{
ret
=
-
ENOMEM
;
goto
out_bargs
;
}
bctl
->
fs_info
=
fs_info
;
if
(
arg
)
{
memcpy
(
&
bctl
->
data
,
&
bargs
->
data
,
sizeof
(
bctl
->
data
));
memcpy
(
&
bctl
->
meta
,
&
bargs
->
meta
,
sizeof
(
bctl
->
meta
));
memcpy
(
&
bctl
->
sys
,
&
bargs
->
sys
,
sizeof
(
bctl
->
sys
));
bctl
->
flags
=
bargs
->
flags
;
}
else
{
/* balance everything - no filters */
bctl
->
flags
|=
BTRFS_BALANCE_TYPE_MASK
;
}
do_balance:
ret
=
btrfs_balance
(
bctl
,
bargs
);
/*
* bctl is freed in __cancel_balance or in free_fs_info if
* restriper was paused all the way until unmount
*/
if
(
arg
)
{
if
(
copy_to_user
(
arg
,
bargs
,
sizeof
(
*
bargs
)))
ret
=
-
EFAULT
;
}
out_bargs:
kfree
(
bargs
);
out:
mutex_unlock
(
&
fs_info
->
balance_mutex
);
mutex_unlock
(
&
fs_info
->
volume_mutex
);
return
ret
;
}
static
long
btrfs_ioctl_balance_ctl
(
struct
btrfs_root
*
root
,
int
cmd
)
{
if
(
!
capable
(
CAP_SYS_ADMIN
))
return
-
EPERM
;
switch
(
cmd
)
{
case
BTRFS_BALANCE_CTL_PAUSE
:
return
btrfs_pause_balance
(
root
->
fs_info
);
case
BTRFS_BALANCE_CTL_CANCEL
:
return
btrfs_cancel_balance
(
root
->
fs_info
);
}
return
-
EINVAL
;
}
static
long
btrfs_ioctl_balance_progress
(
struct
btrfs_root
*
root
,
void
__user
*
arg
)
{
struct
btrfs_fs_info
*
fs_info
=
root
->
fs_info
;
struct
btrfs_ioctl_balance_args
*
bargs
;
int
ret
=
0
;
if
(
!
capable
(
CAP_SYS_ADMIN
))
return
-
EPERM
;
mutex_lock
(
&
fs_info
->
balance_mutex
);
if
(
!
fs_info
->
balance_ctl
)
{
ret
=
-
ENOTCONN
;
goto
out
;
}
bargs
=
kzalloc
(
sizeof
(
*
bargs
),
GFP_NOFS
);
if
(
!
bargs
)
{
ret
=
-
ENOMEM
;
goto
out
;
}
update_ioctl_balance_args
(
fs_info
,
1
,
bargs
);
if
(
copy_to_user
(
arg
,
bargs
,
sizeof
(
*
bargs
)))
ret
=
-
EFAULT
;
kfree
(
bargs
);
out:
mutex_unlock
(
&
fs_info
->
balance_mutex
);
return
ret
;
}
long
btrfs_ioctl
(
struct
file
*
file
,
unsigned
int
cmd
,
unsigned
long
arg
)
{
...
...
@@ -3078,7 +3266,7 @@ long btrfs_ioctl(struct file *file, unsigned int
case
BTRFS_IOC_DEV_INFO
:
return
btrfs_ioctl_dev_info
(
root
,
argp
);
case
BTRFS_IOC_BALANCE
:
return
btrfs_
balance
(
root
->
fs_info
->
dev_root
);
return
btrfs_
ioctl_balance
(
root
,
NULL
);
case
BTRFS_IOC_CLONE
:
return
btrfs_ioctl_clone
(
file
,
arg
,
0
,
0
,
0
);
case
BTRFS_IOC_CLONE_RANGE
:
...
...
@@ -3110,6 +3298,12 @@ long btrfs_ioctl(struct file *file, unsigned int
return
btrfs_ioctl_scrub_cancel
(
root
,
argp
);
case
BTRFS_IOC_SCRUB_PROGRESS
:
return
btrfs_ioctl_scrub_progress
(
root
,
argp
);
case
BTRFS_IOC_BALANCE_V2
:
return
btrfs_ioctl_balance
(
root
,
argp
);
case
BTRFS_IOC_BALANCE_CTL
:
return
btrfs_ioctl_balance_ctl
(
root
,
arg
);
case
BTRFS_IOC_BALANCE_PROGRESS
:
return
btrfs_ioctl_balance_progress
(
root
,
argp
);
}
return
-
ENOTTY
;
...
...
fs/btrfs/ioctl.h
View file @
27263e28
...
...
@@ -109,6 +109,55 @@ struct btrfs_ioctl_fs_info_args {
__u64
reserved
[
124
];
/* pad to 1k */
};
/* balance control ioctl modes */
#define BTRFS_BALANCE_CTL_PAUSE 1
#define BTRFS_BALANCE_CTL_CANCEL 2
/*
* this is packed, because it should be exactly the same as its disk
* byte order counterpart (struct btrfs_disk_balance_args)
*/
struct
btrfs_balance_args
{
__u64
profiles
;
__u64
usage
;
__u64
devid
;
__u64
pstart
;
__u64
pend
;
__u64
vstart
;
__u64
vend
;
__u64
target
;
__u64
flags
;
__u64
unused
[
8
];
}
__attribute__
((
__packed__
));
/* report balance progress to userspace */
struct
btrfs_balance_progress
{
__u64
expected
;
/* estimated # of chunks that will be
* relocated to fulfill the request */
__u64
considered
;
/* # of chunks we have considered so far */
__u64
completed
;
/* # of chunks relocated so far */
};
#define BTRFS_BALANCE_STATE_RUNNING (1ULL << 0)
#define BTRFS_BALANCE_STATE_PAUSE_REQ (1ULL << 1)
#define BTRFS_BALANCE_STATE_CANCEL_REQ (1ULL << 2)
struct
btrfs_ioctl_balance_args
{
__u64
flags
;
/* in/out */
__u64
state
;
/* out */
struct
btrfs_balance_args
data
;
/* in/out */
struct
btrfs_balance_args
meta
;
/* in/out */
struct
btrfs_balance_args
sys
;
/* in/out */
struct
btrfs_balance_progress
stat
;
/* out */
__u64
unused
[
72
];
/* pad to 1k */
};
#define BTRFS_INO_LOOKUP_PATH_MAX 4080
struct
btrfs_ioctl_ino_lookup_args
{
__u64
treeid
;
...
...
@@ -272,6 +321,11 @@ struct btrfs_ioctl_logical_ino_args {
struct btrfs_ioctl_dev_info_args)
#define BTRFS_IOC_FS_INFO _IOR(BTRFS_IOCTL_MAGIC, 31, \
struct btrfs_ioctl_fs_info_args)
#define BTRFS_IOC_BALANCE_V2 _IOWR(BTRFS_IOCTL_MAGIC, 32, \
struct btrfs_ioctl_balance_args)
#define BTRFS_IOC_BALANCE_CTL _IOW(BTRFS_IOCTL_MAGIC, 33, int)
#define BTRFS_IOC_BALANCE_PROGRESS _IOR(BTRFS_IOCTL_MAGIC, 34, \
struct btrfs_ioctl_balance_args)
#define BTRFS_IOC_INO_PATHS _IOWR(BTRFS_IOCTL_MAGIC, 35, \
struct btrfs_ioctl_ino_path_args)
#define BTRFS_IOC_LOGICAL_INO _IOWR(BTRFS_IOCTL_MAGIC, 36, \
...
...
fs/btrfs/super.c
View file @
27263e28
...
...
@@ -164,8 +164,9 @@ enum {
Opt_compress_type
,
Opt_compress_force
,
Opt_compress_force_type
,
Opt_notreelog
,
Opt_ratio
,
Opt_flushoncommit
,
Opt_discard
,
Opt_space_cache
,
Opt_clear_cache
,
Opt_user_subvol_rm_allowed
,
Opt_enospc_debug
,
Opt_subvolrootid
,
Opt_defrag
,
Opt_inode_cache
,
Opt_no_space_cache
,
Opt_recovery
,
Opt_err
,
Opt_enospc_debug
,
Opt_subvolrootid
,
Opt_defrag
,
Opt_inode_cache
,
Opt_no_space_cache
,
Opt_recovery
,
Opt_skip_balance
,
Opt_err
,
};
static
match_table_t
tokens
=
{
...
...
@@ -200,6 +201,7 @@ static match_table_t tokens = {
{
Opt_inode_cache
,
"inode_cache"
},
{
Opt_no_space_cache
,
"nospace_cache"
},
{
Opt_recovery
,
"recovery"
},
{
Opt_skip_balance
,
"skip_balance"
},
{
Opt_err
,
NULL
},
};
...
...
@@ -398,6 +400,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
printk
(
KERN_INFO
"btrfs: enabling auto recovery"
);
btrfs_set_opt
(
info
->
mount_opt
,
RECOVERY
);
break
;
case
Opt_skip_balance
:
btrfs_set_opt
(
info
->
mount_opt
,
SKIP_BALANCE
);
break
;
case
Opt_err
:
printk
(
KERN_INFO
"btrfs: unrecognized mount option "
"'%s'
\n
"
,
p
);
...
...
@@ -723,6 +728,8 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
seq_puts
(
seq
,
",autodefrag"
);
if
(
btrfs_test_opt
(
root
,
INODE_MAP_CACHE
))
seq_puts
(
seq
,
",inode_cache"
);
if
(
btrfs_test_opt
(
root
,
SKIP_BALANCE
))
seq_puts
(
seq
,
",skip_balance"
);
return
0
;
}
...
...
fs/btrfs/volumes.c
View file @
27263e28
...
...
@@ -23,6 +23,7 @@
#include <linux/random.h>
#include <linux/iocontext.h>
#include <linux/capability.h>
#include <linux/kthread.h>
#include <asm/div64.h>
#include "compat.h"
#include "ctree.h"
...
...
@@ -1282,7 +1283,6 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
bool
clear_super
=
false
;
mutex_lock
(
&
uuid_mutex
);
mutex_lock
(
&
root
->
fs_info
->
volume_mutex
);
all_avail
=
root
->
fs_info
->
avail_data_alloc_bits
|
root
->
fs_info
->
avail_system_alloc_bits
|
...
...
@@ -1452,7 +1452,6 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
if
(
bdev
)
blkdev_put
(
bdev
,
FMODE_READ
|
FMODE_EXCL
);
out:
mutex_unlock
(
&
root
->
fs_info
->
volume_mutex
);
mutex_unlock
(
&
uuid_mutex
);
return
ret
;
error_undo:
...
...
@@ -1629,7 +1628,6 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
}
filemap_write_and_wait
(
bdev
->
bd_inode
->
i_mapping
);
mutex_lock
(
&
root
->
fs_info
->
volume_mutex
);
devices
=
&
root
->
fs_info
->
fs_devices
->
devices
;
/*
...
...
@@ -1757,8 +1755,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
ret
=
btrfs_relocate_sys_chunks
(
root
);
BUG_ON
(
ret
);
}
out:
mutex_unlock
(
&
root
->
fs_info
->
volume_mutex
);
return
ret
;
error:
blkdev_put
(
bdev
,
FMODE_EXCL
);
...
...
@@ -1766,7 +1763,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
mutex_unlock
(
&
uuid_mutex
);
up_write
(
&
sb
->
s_umount
);
}
goto
ou
t
;
return
re
t
;
}
static
noinline
int
btrfs_update_device
(
struct
btrfs_trans_handle
*
trans
,
...
...
@@ -2077,6 +2074,362 @@ static int btrfs_relocate_sys_chunks(struct btrfs_root *root)
return
ret
;
}
static
int
insert_balance_item
(
struct
btrfs_root
*
root
,
struct
btrfs_balance_control
*
bctl
)
{
struct
btrfs_trans_handle
*
trans
;
struct
btrfs_balance_item
*
item
;
struct
btrfs_disk_balance_args
disk_bargs
;
struct
btrfs_path
*
path
;
struct
extent_buffer
*
leaf
;
struct
btrfs_key
key
;
int
ret
,
err
;
path
=
btrfs_alloc_path
();
if
(
!
path
)
return
-
ENOMEM
;
trans
=
btrfs_start_transaction
(
root
,
0
);
if
(
IS_ERR
(
trans
))
{
btrfs_free_path
(
path
);
return
PTR_ERR
(
trans
);
}
key
.
objectid
=
BTRFS_BALANCE_OBJECTID
;
key
.
type
=
BTRFS_BALANCE_ITEM_KEY
;
key
.
offset
=
0
;
ret
=
btrfs_insert_empty_item
(
trans
,
root
,
path
,
&
key
,
sizeof
(
*
item
));
if
(
ret
)
goto
out
;
leaf
=
path
->
nodes
[
0
];
item
=
btrfs_item_ptr
(
leaf
,
path
->
slots
[
0
],
struct
btrfs_balance_item
);
memset_extent_buffer
(
leaf
,
0
,
(
unsigned
long
)
item
,
sizeof
(
*
item
));
btrfs_cpu_balance_args_to_disk
(
&
disk_bargs
,
&
bctl
->
data
);
btrfs_set_balance_data
(
leaf
,
item
,
&
disk_bargs
);
btrfs_cpu_balance_args_to_disk
(
&
disk_bargs
,
&
bctl
->
meta
);
btrfs_set_balance_meta
(
leaf
,
item
,
&
disk_bargs
);
btrfs_cpu_balance_args_to_disk
(
&
disk_bargs
,
&
bctl
->
sys
);
btrfs_set_balance_sys
(
leaf
,
item
,
&
disk_bargs
);
btrfs_set_balance_flags
(
leaf
,
item
,
bctl
->
flags
);
btrfs_mark_buffer_dirty
(
leaf
);
out:
btrfs_free_path
(
path
);
err
=
btrfs_commit_transaction
(
trans
,
root
);
if
(
err
&&
!
ret
)
ret
=
err
;
return
ret
;
}
static
int
del_balance_item
(
struct
btrfs_root
*
root
)
{
struct
btrfs_trans_handle
*
trans
;
struct
btrfs_path
*
path
;
struct
btrfs_key
key
;
int
ret
,
err
;
path
=
btrfs_alloc_path
();
if
(
!
path
)
return
-
ENOMEM
;
trans
=
btrfs_start_transaction
(
root
,
0
);
if
(
IS_ERR
(
trans
))
{
btrfs_free_path
(
path
);
return
PTR_ERR
(
trans
);
}
key
.
objectid
=
BTRFS_BALANCE_OBJECTID
;
key
.
type
=
BTRFS_BALANCE_ITEM_KEY
;
key
.
offset
=
0
;
ret
=
btrfs_search_slot
(
trans
,
root
,
&
key
,
path
,
-
1
,
1
);
if
(
ret
<
0
)
goto
out
;
if
(
ret
>
0
)
{
ret
=
-
ENOENT
;
goto
out
;
}
ret
=
btrfs_del_item
(
trans
,
root
,
path
);
out:
btrfs_free_path
(
path
);
err
=
btrfs_commit_transaction
(
trans
,
root
);
if
(
err
&&
!
ret
)
ret
=
err
;
return
ret
;
}
/*
* This is a heuristic used to reduce the number of chunks balanced on
* resume after balance was interrupted.
*/
static
void
update_balance_args
(
struct
btrfs_balance_control
*
bctl
)
{
/*
* Turn on soft mode for chunk types that were being converted.
*/
if
(
bctl
->
data
.
flags
&
BTRFS_BALANCE_ARGS_CONVERT
)
bctl
->
data
.
flags
|=
BTRFS_BALANCE_ARGS_SOFT
;
if
(
bctl
->
sys
.
flags
&
BTRFS_BALANCE_ARGS_CONVERT
)
bctl
->
sys
.
flags
|=
BTRFS_BALANCE_ARGS_SOFT
;
if
(
bctl
->
meta
.
flags
&
BTRFS_BALANCE_ARGS_CONVERT
)
bctl
->
meta
.
flags
|=
BTRFS_BALANCE_ARGS_SOFT
;
/*
* Turn on usage filter if is not already used. The idea is
* that chunks that we have already balanced should be
* reasonably full. Don't do it for chunks that are being
* converted - that will keep us from relocating unconverted
* (albeit full) chunks.
*/
if
(
!
(
bctl
->
data
.
flags
&
BTRFS_BALANCE_ARGS_USAGE
)
&&
!
(
bctl
->
data
.
flags
&
BTRFS_BALANCE_ARGS_CONVERT
))
{
bctl
->
data
.
flags
|=
BTRFS_BALANCE_ARGS_USAGE
;
bctl
->
data
.
usage
=
90
;
}
if
(
!
(
bctl
->
sys
.
flags
&
BTRFS_BALANCE_ARGS_USAGE
)
&&
!
(
bctl
->
sys
.
flags
&
BTRFS_BALANCE_ARGS_CONVERT
))
{
bctl
->
sys
.
flags
|=
BTRFS_BALANCE_ARGS_USAGE
;
bctl
->
sys
.
usage
=
90
;
}
if
(
!
(
bctl
->
meta
.
flags
&
BTRFS_BALANCE_ARGS_USAGE
)
&&
!
(
bctl
->
meta
.
flags
&
BTRFS_BALANCE_ARGS_CONVERT
))
{
bctl
->
meta
.
flags
|=
BTRFS_BALANCE_ARGS_USAGE
;
bctl
->
meta
.
usage
=
90
;
}
}
/*
* Should be called with both balance and volume mutexes held to
* serialize other volume operations (add_dev/rm_dev/resize) with
* restriper. Same goes for unset_balance_control.
*/
static
void
set_balance_control
(
struct
btrfs_balance_control
*
bctl
)
{
struct
btrfs_fs_info
*
fs_info
=
bctl
->
fs_info
;
BUG_ON
(
fs_info
->
balance_ctl
);
spin_lock
(
&
fs_info
->
balance_lock
);
fs_info
->
balance_ctl
=
bctl
;
spin_unlock
(
&
fs_info
->
balance_lock
);
}
static
void
unset_balance_control
(
struct
btrfs_fs_info
*
fs_info
)
{
struct
btrfs_balance_control
*
bctl
=
fs_info
->
balance_ctl
;
BUG_ON
(
!
fs_info
->
balance_ctl
);
spin_lock
(
&
fs_info
->
balance_lock
);
fs_info
->
balance_ctl
=
NULL
;
spin_unlock
(
&
fs_info
->
balance_lock
);
kfree
(
bctl
);
}
/*
* Balance filters. Return 1 if chunk should be filtered out
* (should not be balanced).
*/
static
int
chunk_profiles_filter
(
u64
chunk_profile
,
struct
btrfs_balance_args
*
bargs
)
{
chunk_profile
&=
BTRFS_BLOCK_GROUP_PROFILE_MASK
;
if
(
chunk_profile
==
0
)
chunk_profile
=
BTRFS_AVAIL_ALLOC_BIT_SINGLE
;
if
(
bargs
->
profiles
&
chunk_profile
)
return
0
;
return
1
;
}
static
u64
div_factor_fine
(
u64
num
,
int
factor
)
{
if
(
factor
<=
0
)
return
0
;
if
(
factor
>=
100
)
return
num
;
num
*=
factor
;
do_div
(
num
,
100
);
return
num
;
}
static
int
chunk_usage_filter
(
struct
btrfs_fs_info
*
fs_info
,
u64
chunk_offset
,
struct
btrfs_balance_args
*
bargs
)
{
struct
btrfs_block_group_cache
*
cache
;
u64
chunk_used
,
user_thresh
;
int
ret
=
1
;
cache
=
btrfs_lookup_block_group
(
fs_info
,
chunk_offset
);
chunk_used
=
btrfs_block_group_used
(
&
cache
->
item
);
user_thresh
=
div_factor_fine
(
cache
->
key
.
offset
,
bargs
->
usage
);
if
(
chunk_used
<
user_thresh
)
ret
=
0
;
btrfs_put_block_group
(
cache
);
return
ret
;
}
static
int
chunk_devid_filter
(
struct
extent_buffer
*
leaf
,
struct
btrfs_chunk
*
chunk
,
struct
btrfs_balance_args
*
bargs
)
{
struct
btrfs_stripe
*
stripe
;
int
num_stripes
=
btrfs_chunk_num_stripes
(
leaf
,
chunk
);
int
i
;
for
(
i
=
0
;
i
<
num_stripes
;
i
++
)
{
stripe
=
btrfs_stripe_nr
(
chunk
,
i
);
if
(
btrfs_stripe_devid
(
leaf
,
stripe
)
==
bargs
->
devid
)
return
0
;
}
return
1
;
}
/* [pstart, pend) */
static
int
chunk_drange_filter
(
struct
extent_buffer
*
leaf
,
struct
btrfs_chunk
*
chunk
,
u64
chunk_offset
,
struct
btrfs_balance_args
*
bargs
)
{
struct
btrfs_stripe
*
stripe
;
int
num_stripes
=
btrfs_chunk_num_stripes
(
leaf
,
chunk
);
u64
stripe_offset
;
u64
stripe_length
;
int
factor
;
int
i
;
if
(
!
(
bargs
->
flags
&
BTRFS_BALANCE_ARGS_DEVID
))
return
0
;
if
(
btrfs_chunk_type
(
leaf
,
chunk
)
&
(
BTRFS_BLOCK_GROUP_DUP
|
BTRFS_BLOCK_GROUP_RAID1
|
BTRFS_BLOCK_GROUP_RAID10
))
factor
=
2
;
else
factor
=
1
;
factor
=
num_stripes
/
factor
;
for
(
i
=
0
;
i
<
num_stripes
;
i
++
)
{
stripe
=
btrfs_stripe_nr
(
chunk
,
i
);
if
(
btrfs_stripe_devid
(
leaf
,
stripe
)
!=
bargs
->
devid
)
continue
;
stripe_offset
=
btrfs_stripe_offset
(
leaf
,
stripe
);
stripe_length
=
btrfs_chunk_length
(
leaf
,
chunk
);
do_div
(
stripe_length
,
factor
);
if
(
stripe_offset
<
bargs
->
pend
&&
stripe_offset
+
stripe_length
>
bargs
->
pstart
)
return
0
;
}
return
1
;
}
/* [vstart, vend) */
static
int
chunk_vrange_filter
(
struct
extent_buffer
*
leaf
,
struct
btrfs_chunk
*
chunk
,
u64
chunk_offset
,
struct
btrfs_balance_args
*
bargs
)
{
if
(
chunk_offset
<
bargs
->
vend
&&
chunk_offset
+
btrfs_chunk_length
(
leaf
,
chunk
)
>
bargs
->
vstart
)
/* at least part of the chunk is inside this vrange */
return
0
;
return
1
;
}
static
int
chunk_soft_convert_filter
(
u64
chunk_profile
,
struct
btrfs_balance_args
*
bargs
)
{
if
(
!
(
bargs
->
flags
&
BTRFS_BALANCE_ARGS_CONVERT
))
return
0
;
chunk_profile
&=
BTRFS_BLOCK_GROUP_PROFILE_MASK
;
if
(
chunk_profile
==
0
)
chunk_profile
=
BTRFS_AVAIL_ALLOC_BIT_SINGLE
;
if
(
bargs
->
target
&
chunk_profile
)
return
1
;
return
0
;
}
static
int
should_balance_chunk
(
struct
btrfs_root
*
root
,
struct
extent_buffer
*
leaf
,
struct
btrfs_chunk
*
chunk
,
u64
chunk_offset
)
{
struct
btrfs_balance_control
*
bctl
=
root
->
fs_info
->
balance_ctl
;
struct
btrfs_balance_args
*
bargs
=
NULL
;
u64
chunk_type
=
btrfs_chunk_type
(
leaf
,
chunk
);
/* type filter */
if
(
!
((
chunk_type
&
BTRFS_BLOCK_GROUP_TYPE_MASK
)
&
(
bctl
->
flags
&
BTRFS_BALANCE_TYPE_MASK
)))
{
return
0
;
}
if
(
chunk_type
&
BTRFS_BLOCK_GROUP_DATA
)
bargs
=
&
bctl
->
data
;
else
if
(
chunk_type
&
BTRFS_BLOCK_GROUP_SYSTEM
)
bargs
=
&
bctl
->
sys
;
else
if
(
chunk_type
&
BTRFS_BLOCK_GROUP_METADATA
)
bargs
=
&
bctl
->
meta
;
/* profiles filter */
if
((
bargs
->
flags
&
BTRFS_BALANCE_ARGS_PROFILES
)
&&
chunk_profiles_filter
(
chunk_type
,
bargs
))
{
return
0
;
}
/* usage filter */
if
((
bargs
->
flags
&
BTRFS_BALANCE_ARGS_USAGE
)
&&
chunk_usage_filter
(
bctl
->
fs_info
,
chunk_offset
,
bargs
))
{
return
0
;
}
/* devid filter */
if
((
bargs
->
flags
&
BTRFS_BALANCE_ARGS_DEVID
)
&&
chunk_devid_filter
(
leaf
,
chunk
,
bargs
))
{
return
0
;
}
/* drange filter, makes sense only with devid filter */
if
((
bargs
->
flags
&
BTRFS_BALANCE_ARGS_DRANGE
)
&&
chunk_drange_filter
(
leaf
,
chunk
,
chunk_offset
,
bargs
))
{
return
0
;
}
/* vrange filter */
if
((
bargs
->
flags
&
BTRFS_BALANCE_ARGS_VRANGE
)
&&
chunk_vrange_filter
(
leaf
,
chunk
,
chunk_offset
,
bargs
))
{
return
0
;
}
/* soft profile changing mode */
if
((
bargs
->
flags
&
BTRFS_BALANCE_ARGS_SOFT
)
&&
chunk_soft_convert_filter
(
chunk_type
,
bargs
))
{
return
0
;
}
return
1
;
}
static
u64
div_factor
(
u64
num
,
int
factor
)
{
if
(
factor
==
10
)
...
...
@@ -2086,29 +2439,28 @@ static u64 div_factor(u64 num, int factor)
return
num
;
}
int
btrfs_balance
(
struct
btrfs_root
*
dev_root
)
static
int
__btrfs_balance
(
struct
btrfs_fs_info
*
fs_info
)
{
int
ret
;
struct
list_head
*
devices
=
&
dev_root
->
fs_info
->
fs_devices
->
devices
;
struct
btrfs_balance_control
*
bctl
=
fs_info
->
balance_ctl
;
struct
btrfs_root
*
chunk_root
=
fs_info
->
chunk_root
;
struct
btrfs_root
*
dev_root
=
fs_info
->
dev_root
;
struct
list_head
*
devices
;
struct
btrfs_device
*
device
;
u64
old_size
;
u64
size_to_free
;
struct
btrfs_chunk
*
chunk
;
struct
btrfs_path
*
path
;
struct
btrfs_key
key
;
struct
btrfs_root
*
chunk_root
=
dev_root
->
fs_info
->
chunk_root
;
struct
btrfs_trans_handle
*
trans
;
struct
btrfs_key
found_key
;
if
(
dev_root
->
fs_info
->
sb
->
s_flags
&
MS_RDONLY
)
return
-
EROFS
;
if
(
!
capable
(
CAP_SYS_ADMIN
))
return
-
EPERM
;
mutex_lock
(
&
dev_root
->
fs_info
->
volume_mutex
);
dev_root
=
dev_root
->
fs_info
->
dev_root
;
struct
btrfs_trans_handle
*
trans
;
struct
extent_buffer
*
leaf
;
int
slot
;
int
ret
;
int
enospc_errors
=
0
;
bool
counting
=
true
;
/* step one make some room on all the devices */
devices
=
&
fs_info
->
fs_devices
->
devices
;
list_for_each_entry
(
device
,
devices
,
dev_list
)
{
old_size
=
device
->
total_bytes
;
size_to_free
=
div_factor
(
old_size
,
1
);
...
...
@@ -2137,11 +2489,23 @@ int btrfs_balance(struct btrfs_root *dev_root)
ret
=
-
ENOMEM
;
goto
error
;
}
/* zero out stat counters */
spin_lock
(
&
fs_info
->
balance_lock
);
memset
(
&
bctl
->
stat
,
0
,
sizeof
(
bctl
->
stat
));
spin_unlock
(
&
fs_info
->
balance_lock
);
again:
key
.
objectid
=
BTRFS_FIRST_CHUNK_TREE_OBJECTID
;
key
.
offset
=
(
u64
)
-
1
;
key
.
type
=
BTRFS_CHUNK_ITEM_KEY
;
while
(
1
)
{
if
((
!
counting
&&
atomic_read
(
&
fs_info
->
balance_pause_req
))
||
atomic_read
(
&
fs_info
->
balance_cancel_req
))
{
ret
=
-
ECANCELED
;
goto
error
;
}
ret
=
btrfs_search_slot
(
NULL
,
chunk_root
,
&
key
,
path
,
0
,
0
);
if
(
ret
<
0
)
goto
error
;
...
...
@@ -2151,15 +2515,19 @@ int btrfs_balance(struct btrfs_root *dev_root)
* failed
*/
if
(
ret
==
0
)
break
;
BUG
();
/* FIXME break ? */
ret
=
btrfs_previous_item
(
chunk_root
,
path
,
0
,
BTRFS_CHUNK_ITEM_KEY
);
if
(
ret
)
if
(
ret
)
{
ret
=
0
;
break
;
}
leaf
=
path
->
nodes
[
0
];
slot
=
path
->
slots
[
0
];
btrfs_item_key_to_cpu
(
leaf
,
&
found_key
,
slot
);
btrfs_item_key_to_cpu
(
path
->
nodes
[
0
],
&
found_key
,
path
->
slots
[
0
]);
if
(
found_key
.
objectid
!=
key
.
objectid
)
break
;
...
...
@@ -2167,22 +2535,375 @@ int btrfs_balance(struct btrfs_root *dev_root)
if
(
found_key
.
offset
==
0
)
break
;
chunk
=
btrfs_item_ptr
(
leaf
,
slot
,
struct
btrfs_chunk
);
if
(
!
counting
)
{
spin_lock
(
&
fs_info
->
balance_lock
);
bctl
->
stat
.
considered
++
;
spin_unlock
(
&
fs_info
->
balance_lock
);
}
ret
=
should_balance_chunk
(
chunk_root
,
leaf
,
chunk
,
found_key
.
offset
);
btrfs_release_path
(
path
);
if
(
!
ret
)
goto
loop
;
if
(
counting
)
{
spin_lock
(
&
fs_info
->
balance_lock
);
bctl
->
stat
.
expected
++
;
spin_unlock
(
&
fs_info
->
balance_lock
);
goto
loop
;
}
ret
=
btrfs_relocate_chunk
(
chunk_root
,
chunk_root
->
root_key
.
objectid
,
found_key
.
objectid
,
found_key
.
offset
);
if
(
ret
&&
ret
!=
-
ENOSPC
)
goto
error
;
if
(
ret
==
-
ENOSPC
)
{
enospc_errors
++
;
}
else
{
spin_lock
(
&
fs_info
->
balance_lock
);
bctl
->
stat
.
completed
++
;
spin_unlock
(
&
fs_info
->
balance_lock
);
}
loop:
key
.
offset
=
found_key
.
offset
-
1
;
}
ret
=
0
;
if
(
counting
)
{
btrfs_release_path
(
path
);
counting
=
false
;
goto
again
;
}
error:
btrfs_free_path
(
path
);
mutex_unlock
(
&
dev_root
->
fs_info
->
volume_mutex
);
if
(
enospc_errors
)
{
printk
(
KERN_INFO
"btrfs: %d enospc errors during balance
\n
"
,
enospc_errors
);
if
(
!
ret
)
ret
=
-
ENOSPC
;
}
return
ret
;
}
static
inline
int
balance_need_close
(
struct
btrfs_fs_info
*
fs_info
)
{
/* cancel requested || normal exit path */
return
atomic_read
(
&
fs_info
->
balance_cancel_req
)
||
(
atomic_read
(
&
fs_info
->
balance_pause_req
)
==
0
&&
atomic_read
(
&
fs_info
->
balance_cancel_req
)
==
0
);
}
static
void
__cancel_balance
(
struct
btrfs_fs_info
*
fs_info
)
{
int
ret
;
unset_balance_control
(
fs_info
);
ret
=
del_balance_item
(
fs_info
->
tree_root
);
BUG_ON
(
ret
);
}
void
update_ioctl_balance_args
(
struct
btrfs_fs_info
*
fs_info
,
int
lock
,
struct
btrfs_ioctl_balance_args
*
bargs
);
/*
* Should be called with both balance and volume mutexes held
*/
int
btrfs_balance
(
struct
btrfs_balance_control
*
bctl
,
struct
btrfs_ioctl_balance_args
*
bargs
)
{
struct
btrfs_fs_info
*
fs_info
=
bctl
->
fs_info
;
u64
allowed
;
int
ret
;
if
(
btrfs_fs_closing
(
fs_info
)
||
atomic_read
(
&
fs_info
->
balance_pause_req
)
||
atomic_read
(
&
fs_info
->
balance_cancel_req
))
{
ret
=
-
EINVAL
;
goto
out
;
}
/*
* In case of mixed groups both data and meta should be picked,
* and identical options should be given for both of them.
*/
allowed
=
btrfs_super_incompat_flags
(
fs_info
->
super_copy
);
if
((
allowed
&
BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS
)
&&
(
bctl
->
flags
&
(
BTRFS_BALANCE_DATA
|
BTRFS_BALANCE_METADATA
)))
{
if
(
!
(
bctl
->
flags
&
BTRFS_BALANCE_DATA
)
||
!
(
bctl
->
flags
&
BTRFS_BALANCE_METADATA
)
||
memcmp
(
&
bctl
->
data
,
&
bctl
->
meta
,
sizeof
(
bctl
->
data
)))
{
printk
(
KERN_ERR
"btrfs: with mixed groups data and "
"metadata balance options must be the same
\n
"
);
ret
=
-
EINVAL
;
goto
out
;
}
}
/*
* Profile changing sanity checks. Skip them if a simple
* balance is requested.
*/
if
(
!
((
bctl
->
data
.
flags
|
bctl
->
sys
.
flags
|
bctl
->
meta
.
flags
)
&
BTRFS_BALANCE_ARGS_CONVERT
))
goto
do_balance
;
allowed
=
BTRFS_AVAIL_ALLOC_BIT_SINGLE
;
if
(
fs_info
->
fs_devices
->
num_devices
==
1
)
allowed
|=
BTRFS_BLOCK_GROUP_DUP
;
else
if
(
fs_info
->
fs_devices
->
num_devices
<
4
)
allowed
|=
(
BTRFS_BLOCK_GROUP_RAID0
|
BTRFS_BLOCK_GROUP_RAID1
);
else
allowed
|=
(
BTRFS_BLOCK_GROUP_RAID0
|
BTRFS_BLOCK_GROUP_RAID1
|
BTRFS_BLOCK_GROUP_RAID10
);
if
(
!
profile_is_valid
(
bctl
->
data
.
target
,
1
)
||
bctl
->
data
.
target
&
~
allowed
)
{
printk
(
KERN_ERR
"btrfs: unable to start balance with target "
"data profile %llu
\n
"
,
(
unsigned
long
long
)
bctl
->
data
.
target
);
ret
=
-
EINVAL
;
goto
out
;
}
if
(
!
profile_is_valid
(
bctl
->
meta
.
target
,
1
)
||
bctl
->
meta
.
target
&
~
allowed
)
{
printk
(
KERN_ERR
"btrfs: unable to start balance with target "
"metadata profile %llu
\n
"
,
(
unsigned
long
long
)
bctl
->
meta
.
target
);
ret
=
-
EINVAL
;
goto
out
;
}
if
(
!
profile_is_valid
(
bctl
->
sys
.
target
,
1
)
||
bctl
->
sys
.
target
&
~
allowed
)
{
printk
(
KERN_ERR
"btrfs: unable to start balance with target "
"system profile %llu
\n
"
,
(
unsigned
long
long
)
bctl
->
sys
.
target
);
ret
=
-
EINVAL
;
goto
out
;
}
if
(
bctl
->
data
.
target
&
BTRFS_BLOCK_GROUP_DUP
)
{
printk
(
KERN_ERR
"btrfs: dup for data is not allowed
\n
"
);
ret
=
-
EINVAL
;
goto
out
;
}
/* allow to reduce meta or sys integrity only if force set */
allowed
=
BTRFS_BLOCK_GROUP_DUP
|
BTRFS_BLOCK_GROUP_RAID1
|
BTRFS_BLOCK_GROUP_RAID10
;
if
(((
bctl
->
sys
.
flags
&
BTRFS_BALANCE_ARGS_CONVERT
)
&&
(
fs_info
->
avail_system_alloc_bits
&
allowed
)
&&
!
(
bctl
->
sys
.
target
&
allowed
))
||
((
bctl
->
meta
.
flags
&
BTRFS_BALANCE_ARGS_CONVERT
)
&&
(
fs_info
->
avail_metadata_alloc_bits
&
allowed
)
&&
!
(
bctl
->
meta
.
target
&
allowed
)))
{
if
(
bctl
->
flags
&
BTRFS_BALANCE_FORCE
)
{
printk
(
KERN_INFO
"btrfs: force reducing metadata "
"integrity
\n
"
);
}
else
{
printk
(
KERN_ERR
"btrfs: balance will reduce metadata "
"integrity, use force if you want this
\n
"
);
ret
=
-
EINVAL
;
goto
out
;
}
}
do_balance:
ret
=
insert_balance_item
(
fs_info
->
tree_root
,
bctl
);
if
(
ret
&&
ret
!=
-
EEXIST
)
goto
out
;
if
(
!
(
bctl
->
flags
&
BTRFS_BALANCE_RESUME
))
{
BUG_ON
(
ret
==
-
EEXIST
);
set_balance_control
(
bctl
);
}
else
{
BUG_ON
(
ret
!=
-
EEXIST
);
spin_lock
(
&
fs_info
->
balance_lock
);
update_balance_args
(
bctl
);
spin_unlock
(
&
fs_info
->
balance_lock
);
}
atomic_inc
(
&
fs_info
->
balance_running
);
mutex_unlock
(
&
fs_info
->
balance_mutex
);
ret
=
__btrfs_balance
(
fs_info
);
mutex_lock
(
&
fs_info
->
balance_mutex
);
atomic_dec
(
&
fs_info
->
balance_running
);
if
(
bargs
)
{
memset
(
bargs
,
0
,
sizeof
(
*
bargs
));
update_ioctl_balance_args
(
fs_info
,
0
,
bargs
);
}
if
((
ret
&&
ret
!=
-
ECANCELED
&&
ret
!=
-
ENOSPC
)
||
balance_need_close
(
fs_info
))
{
__cancel_balance
(
fs_info
);
}
wake_up
(
&
fs_info
->
balance_wait_q
);
return
ret
;
out:
if
(
bctl
->
flags
&
BTRFS_BALANCE_RESUME
)
__cancel_balance
(
fs_info
);
else
kfree
(
bctl
);
return
ret
;
}
static
int
balance_kthread
(
void
*
data
)
{
struct
btrfs_balance_control
*
bctl
=
(
struct
btrfs_balance_control
*
)
data
;
struct
btrfs_fs_info
*
fs_info
=
bctl
->
fs_info
;
int
ret
=
0
;
mutex_lock
(
&
fs_info
->
volume_mutex
);
mutex_lock
(
&
fs_info
->
balance_mutex
);
set_balance_control
(
bctl
);
if
(
btrfs_test_opt
(
fs_info
->
tree_root
,
SKIP_BALANCE
))
{
printk
(
KERN_INFO
"btrfs: force skipping balance
\n
"
);
}
else
{
printk
(
KERN_INFO
"btrfs: continuing balance
\n
"
);
ret
=
btrfs_balance
(
bctl
,
NULL
);
}
mutex_unlock
(
&
fs_info
->
balance_mutex
);
mutex_unlock
(
&
fs_info
->
volume_mutex
);
return
ret
;
}
int
btrfs_recover_balance
(
struct
btrfs_root
*
tree_root
)
{
struct
task_struct
*
tsk
;
struct
btrfs_balance_control
*
bctl
;
struct
btrfs_balance_item
*
item
;
struct
btrfs_disk_balance_args
disk_bargs
;
struct
btrfs_path
*
path
;
struct
extent_buffer
*
leaf
;
struct
btrfs_key
key
;
int
ret
;
path
=
btrfs_alloc_path
();
if
(
!
path
)
return
-
ENOMEM
;
bctl
=
kzalloc
(
sizeof
(
*
bctl
),
GFP_NOFS
);
if
(
!
bctl
)
{
ret
=
-
ENOMEM
;
goto
out
;
}
key
.
objectid
=
BTRFS_BALANCE_OBJECTID
;
key
.
type
=
BTRFS_BALANCE_ITEM_KEY
;
key
.
offset
=
0
;
ret
=
btrfs_search_slot
(
NULL
,
tree_root
,
&
key
,
path
,
0
,
0
);
if
(
ret
<
0
)
goto
out_bctl
;
if
(
ret
>
0
)
{
/* ret = -ENOENT; */
ret
=
0
;
goto
out_bctl
;
}
leaf
=
path
->
nodes
[
0
];
item
=
btrfs_item_ptr
(
leaf
,
path
->
slots
[
0
],
struct
btrfs_balance_item
);
bctl
->
fs_info
=
tree_root
->
fs_info
;
bctl
->
flags
=
btrfs_balance_flags
(
leaf
,
item
)
|
BTRFS_BALANCE_RESUME
;
btrfs_balance_data
(
leaf
,
item
,
&
disk_bargs
);
btrfs_disk_balance_args_to_cpu
(
&
bctl
->
data
,
&
disk_bargs
);
btrfs_balance_meta
(
leaf
,
item
,
&
disk_bargs
);
btrfs_disk_balance_args_to_cpu
(
&
bctl
->
meta
,
&
disk_bargs
);
btrfs_balance_sys
(
leaf
,
item
,
&
disk_bargs
);
btrfs_disk_balance_args_to_cpu
(
&
bctl
->
sys
,
&
disk_bargs
);
tsk
=
kthread_run
(
balance_kthread
,
bctl
,
"btrfs-balance"
);
if
(
IS_ERR
(
tsk
))
ret
=
PTR_ERR
(
tsk
);
else
goto
out
;
out_bctl:
kfree
(
bctl
);
out:
btrfs_free_path
(
path
);
return
ret
;
}
int
btrfs_pause_balance
(
struct
btrfs_fs_info
*
fs_info
)
{
int
ret
=
0
;
mutex_lock
(
&
fs_info
->
balance_mutex
);
if
(
!
fs_info
->
balance_ctl
)
{
mutex_unlock
(
&
fs_info
->
balance_mutex
);
return
-
ENOTCONN
;
}
if
(
atomic_read
(
&
fs_info
->
balance_running
))
{
atomic_inc
(
&
fs_info
->
balance_pause_req
);
mutex_unlock
(
&
fs_info
->
balance_mutex
);
wait_event
(
fs_info
->
balance_wait_q
,
atomic_read
(
&
fs_info
->
balance_running
)
==
0
);
mutex_lock
(
&
fs_info
->
balance_mutex
);
/* we are good with balance_ctl ripped off from under us */
BUG_ON
(
atomic_read
(
&
fs_info
->
balance_running
));
atomic_dec
(
&
fs_info
->
balance_pause_req
);
}
else
{
ret
=
-
ENOTCONN
;
}
mutex_unlock
(
&
fs_info
->
balance_mutex
);
return
ret
;
}
int
btrfs_cancel_balance
(
struct
btrfs_fs_info
*
fs_info
)
{
mutex_lock
(
&
fs_info
->
balance_mutex
);
if
(
!
fs_info
->
balance_ctl
)
{
mutex_unlock
(
&
fs_info
->
balance_mutex
);
return
-
ENOTCONN
;
}
atomic_inc
(
&
fs_info
->
balance_cancel_req
);
/*
* if we are running just wait and return, balance item is
* deleted in btrfs_balance in this case
*/
if
(
atomic_read
(
&
fs_info
->
balance_running
))
{
mutex_unlock
(
&
fs_info
->
balance_mutex
);
wait_event
(
fs_info
->
balance_wait_q
,
atomic_read
(
&
fs_info
->
balance_running
)
==
0
);
mutex_lock
(
&
fs_info
->
balance_mutex
);
}
else
{
/* __cancel_balance needs volume_mutex */
mutex_unlock
(
&
fs_info
->
balance_mutex
);
mutex_lock
(
&
fs_info
->
volume_mutex
);
mutex_lock
(
&
fs_info
->
balance_mutex
);
if
(
fs_info
->
balance_ctl
)
__cancel_balance
(
fs_info
);
mutex_unlock
(
&
fs_info
->
volume_mutex
);
}
BUG_ON
(
fs_info
->
balance_ctl
||
atomic_read
(
&
fs_info
->
balance_running
));
atomic_dec
(
&
fs_info
->
balance_cancel_req
);
mutex_unlock
(
&
fs_info
->
balance_mutex
);
return
0
;
}
/*
* shrinking a device means finding all of the device extents past
* the new size, and then following the back refs to the chunks.
...
...
@@ -2756,8 +3477,7 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans,
return
ret
;
alloc_profile
=
BTRFS_BLOCK_GROUP_METADATA
|
(
fs_info
->
metadata_alloc_profile
&
fs_info
->
avail_metadata_alloc_bits
);
fs_info
->
avail_metadata_alloc_bits
;
alloc_profile
=
btrfs_reduce_alloc_profile
(
root
,
alloc_profile
);
ret
=
__btrfs_alloc_chunk
(
trans
,
extent_root
,
&
map
,
&
chunk_size
,
...
...
@@ -2767,8 +3487,7 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans,
sys_chunk_offset
=
chunk_offset
+
chunk_size
;
alloc_profile
=
BTRFS_BLOCK_GROUP_SYSTEM
|
(
fs_info
->
system_alloc_profile
&
fs_info
->
avail_system_alloc_bits
);
fs_info
->
avail_system_alloc_bits
;
alloc_profile
=
btrfs_reduce_alloc_profile
(
root
,
alloc_profile
);
ret
=
__btrfs_alloc_chunk
(
trans
,
extent_root
,
&
sys_map
,
...
...
@@ -2955,13 +3674,9 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
}
}
if
(
rw
&
REQ_DISCARD
)
{
if
(
map
->
type
&
(
BTRFS_BLOCK_GROUP_RAID0
|
BTRFS_BLOCK_GROUP_RAID1
|
BTRFS_BLOCK_GROUP_DUP
|
BTRFS_BLOCK_GROUP_RAID10
))
{
if
(
map
->
type
&
BTRFS_BLOCK_GROUP_PROFILE_MASK
)
stripes_required
=
map
->
num_stripes
;
}
}
if
(
bbio_ret
&&
(
rw
&
(
REQ_WRITE
|
REQ_DISCARD
))
&&
stripes_allocated
<
stripes_required
)
{
stripes_allocated
=
map
->
num_stripes
;
...
...
@@ -2984,10 +3699,7 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
if
(
rw
&
REQ_DISCARD
)
*
length
=
min_t
(
u64
,
em
->
len
-
offset
,
*
length
);
else
if
(
map
->
type
&
(
BTRFS_BLOCK_GROUP_RAID0
|
BTRFS_BLOCK_GROUP_RAID1
|
BTRFS_BLOCK_GROUP_RAID10
|
BTRFS_BLOCK_GROUP_DUP
))
{
else
if
(
map
->
type
&
BTRFS_BLOCK_GROUP_PROFILE_MASK
)
{
/* we limit the length of each bio to what fits in a stripe */
*
length
=
min_t
(
u64
,
em
->
len
-
offset
,
map
->
stripe_len
-
stripe_offset
);
...
...
fs/btrfs/volumes.h
View file @
27263e28
...
...
@@ -186,6 +186,51 @@ struct map_lookup {
#define map_lookup_size(n) (sizeof(struct map_lookup) + \
(sizeof(struct btrfs_bio_stripe) * (n)))
/*
* Restriper's general type filter
*/
#define BTRFS_BALANCE_DATA (1ULL << 0)
#define BTRFS_BALANCE_SYSTEM (1ULL << 1)
#define BTRFS_BALANCE_METADATA (1ULL << 2)
#define BTRFS_BALANCE_TYPE_MASK (BTRFS_BALANCE_DATA | \
BTRFS_BALANCE_SYSTEM | \
BTRFS_BALANCE_METADATA)
#define BTRFS_BALANCE_FORCE (1ULL << 3)
#define BTRFS_BALANCE_RESUME (1ULL << 4)
/*
* Balance filters
*/
#define BTRFS_BALANCE_ARGS_PROFILES (1ULL << 0)
#define BTRFS_BALANCE_ARGS_USAGE (1ULL << 1)
#define BTRFS_BALANCE_ARGS_DEVID (1ULL << 2)
#define BTRFS_BALANCE_ARGS_DRANGE (1ULL << 3)
#define BTRFS_BALANCE_ARGS_VRANGE (1ULL << 4)
/*
* Profile changing flags. When SOFT is set we won't relocate chunk if
* it already has the target profile (even though it may be
* half-filled).
*/
#define BTRFS_BALANCE_ARGS_CONVERT (1ULL << 8)
#define BTRFS_BALANCE_ARGS_SOFT (1ULL << 9)
struct
btrfs_balance_args
;
struct
btrfs_balance_progress
;
struct
btrfs_balance_control
{
struct
btrfs_fs_info
*
fs_info
;
struct
btrfs_balance_args
data
;
struct
btrfs_balance_args
meta
;
struct
btrfs_balance_args
sys
;
u64
flags
;
struct
btrfs_balance_progress
stat
;
};
int
btrfs_account_dev_extents_size
(
struct
btrfs_device
*
device
,
u64
start
,
u64
end
,
u64
*
length
);
...
...
@@ -228,7 +273,11 @@ struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid,
u8
*
uuid
,
u8
*
fsid
);
int
btrfs_shrink_device
(
struct
btrfs_device
*
device
,
u64
new_size
);
int
btrfs_init_new_device
(
struct
btrfs_root
*
root
,
char
*
path
);
int
btrfs_balance
(
struct
btrfs_root
*
dev_root
);
int
btrfs_balance
(
struct
btrfs_balance_control
*
bctl
,
struct
btrfs_ioctl_balance_args
*
bargs
);
int
btrfs_recover_balance
(
struct
btrfs_root
*
tree_root
);
int
btrfs_pause_balance
(
struct
btrfs_fs_info
*
fs_info
);
int
btrfs_cancel_balance
(
struct
btrfs_fs_info
*
fs_info
);
int
btrfs_chunk_readonly
(
struct
btrfs_root
*
root
,
u64
chunk_offset
);
int
find_free_dev_extent
(
struct
btrfs_trans_handle
*
trans
,
struct
btrfs_device
*
device
,
u64
num_bytes
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment