Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
L
linux
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
nexedi
linux
Commits
ff9cb1c4
Commit
ff9cb1c4
authored
Jan 10, 2012
by
Theodore Ts'o
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'for_linus' into for_linus_merged
Conflicts: fs/ext4/ioctl.c
parents
e4e11180
d50f2ab6
Changes
16
Hide whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
1079 additions
and
463 deletions
+1079
-463
Documentation/filesystems/ext4.txt
Documentation/filesystems/ext4.txt
+7
-0
fs/ext4/balloc.c
fs/ext4/balloc.c
+3
-1
fs/ext4/ext4.h
fs/ext4/ext4.h
+17
-12
fs/ext4/extents.c
fs/ext4/extents.c
+6
-4
fs/ext4/ialloc.c
fs/ext4/ialloc.c
+11
-7
fs/ext4/inode.c
fs/ext4/inode.c
+19
-124
fs/ext4/ioctl.c
fs/ext4/ioctl.c
+74
-12
fs/ext4/mballoc.c
fs/ext4/mballoc.c
+1
-1
fs/ext4/resize.c
fs/ext4/resize.c
+886
-289
fs/ext4/super.c
fs/ext4/super.c
+5
-6
fs/ext4/xattr_security.c
fs/ext4/xattr_security.c
+3
-2
fs/jbd2/commit.c
fs/jbd2/commit.c
+6
-0
fs/jbd2/revoke.c
fs/jbd2/revoke.c
+34
-0
fs/jbd2/transaction.c
fs/jbd2/transaction.c
+3
-2
include/linux/jbd2.h
include/linux/jbd2.h
+1
-0
include/trace/events/ext4.h
include/trace/events/ext4.h
+3
-3
No files found.
Documentation/filesystems/ext4.txt
View file @
ff9cb1c4
...
...
@@ -581,6 +581,13 @@ Table of Ext4 specific ioctls
behaviour may change in the future as it is
not necessary and has been done this way only
for sake of simplicity.
EXT4_IOC_RESIZE_FS Resize the filesystem to a new size. The number
of blocks of resized filesystem is passed in via
64 bit integer argument. The kernel allocates
bitmaps and inode table, the userspace tool thus
just passes the new number of blocks.
..............................................................................
References
...
...
fs/ext4/balloc.c
View file @
ff9cb1c4
...
...
@@ -23,6 +23,8 @@
#include <trace/events/ext4.h>
static
unsigned
ext4_num_base_meta_clusters
(
struct
super_block
*
sb
,
ext4_group_t
block_group
);
/*
* balloc.c contains the blocks allocation and deallocation routines
*/
...
...
@@ -668,7 +670,7 @@ unsigned long ext4_bg_num_gdb(struct super_block *sb, ext4_group_t group)
* This function returns the number of file system metadata clusters at
* the beginning of a block group, including the reserved gdt blocks.
*/
unsigned
ext4_num_base_meta_clusters
(
struct
super_block
*
sb
,
static
unsigned
ext4_num_base_meta_clusters
(
struct
super_block
*
sb
,
ext4_group_t
block_group
)
{
struct
ext4_sb_info
*
sbi
=
EXT4_SB
(
sb
);
...
...
fs/ext4/ext4.h
View file @
ff9cb1c4
...
...
@@ -511,6 +511,14 @@ struct ext4_new_group_data {
__u32
free_blocks_count
;
};
/* Indexes used to index group tables in ext4_new_group_data */
enum
{
BLOCK_BITMAP
=
0
,
/* block bitmap */
INODE_BITMAP
,
/* inode bitmap */
INODE_TABLE
,
/* inode tables */
GROUP_TABLE_COUNT
,
};
/*
* Flags used by ext4_map_blocks()
*/
...
...
@@ -575,6 +583,7 @@ struct ext4_new_group_data {
/* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */
#define EXT4_IOC_ALLOC_DA_BLKS _IO('f', 12)
#define EXT4_IOC_MOVE_EXT _IOWR('f', 15, struct move_extent)
#define EXT4_IOC_RESIZE_FS _IOW('f', 16, __u64)
#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
/*
...
...
@@ -957,12 +966,13 @@ struct ext4_inode_info {
#define test_opt2(sb, opt) (EXT4_SB(sb)->s_mount_opt2 & \
EXT4_MOUNT2_##opt)
#define ext4_set_bit __test_and_set_bit_le
#define ext4_test_and_set_bit __test_and_set_bit_le
#define ext4_set_bit __set_bit_le
#define ext4_set_bit_atomic ext2_set_bit_atomic
#define ext4_clear_bit __test_and_clear_bit_le
#define ext4_test_and_clear_bit __test_and_clear_bit_le
#define ext4_clear_bit __clear_bit_le
#define ext4_clear_bit_atomic ext2_clear_bit_atomic
#define ext4_test_bit test_bit_le
#define ext4_find_first_zero_bit find_first_zero_bit_le
#define ext4_find_next_zero_bit find_next_zero_bit_le
#define ext4_find_next_bit find_next_bit_le
...
...
@@ -1397,6 +1407,7 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
#define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040
#define EXT4_FEATURE_RO_COMPAT_QUOTA 0x0100
#define EXT4_FEATURE_RO_COMPAT_BIGALLOC 0x0200
#define EXT4_FEATURE_RO_COMPAT_METADATA_CSUM 0x0400
#define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001
#define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002
...
...
@@ -1409,6 +1420,8 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
#define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200
#define EXT4_FEATURE_INCOMPAT_EA_INODE 0x0400
/* EA in inode */
#define EXT4_FEATURE_INCOMPAT_DIRDATA 0x1000
/* data in dirent */
#define EXT4_FEATURE_INCOMPAT_INLINEDATA 0x2000
/* data in inode */
#define EXT4_FEATURE_INCOMPAT_LARGEDIR 0x4000
/* >2GB or 3-lvl htree */
#define EXT2_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR
#define EXT2_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \
...
...
@@ -1790,8 +1803,6 @@ extern void ext4_init_block_bitmap(struct super_block *sb,
extern
unsigned
ext4_free_clusters_after_init
(
struct
super_block
*
sb
,
ext4_group_t
block_group
,
struct
ext4_group_desc
*
gdp
);
extern
unsigned
ext4_num_base_meta_clusters
(
struct
super_block
*
sb
,
ext4_group_t
block_group
);
extern
unsigned
ext4_num_overhead_clusters
(
struct
super_block
*
sb
,
ext4_group_t
block_group
,
struct
ext4_group_desc
*
gdp
);
...
...
@@ -1880,16 +1891,9 @@ extern int ext4_alloc_da_blocks(struct inode *inode);
extern
void
ext4_set_aops
(
struct
inode
*
inode
);
extern
int
ext4_writepage_trans_blocks
(
struct
inode
*
);
extern
int
ext4_chunk_trans_blocks
(
struct
inode
*
,
int
nrblocks
);
extern
int
ext4_block_truncate_page
(
handle_t
*
handle
,
struct
address_space
*
mapping
,
loff_t
from
);
extern
int
ext4_block_zero_page_range
(
handle_t
*
handle
,
struct
address_space
*
mapping
,
loff_t
from
,
loff_t
length
);
extern
int
ext4_discard_partial_page_buffers
(
handle_t
*
handle
,
struct
address_space
*
mapping
,
loff_t
from
,
loff_t
length
,
int
flags
);
extern
int
ext4_discard_partial_page_buffers_no_lock
(
handle_t
*
handle
,
struct
inode
*
inode
,
struct
page
*
page
,
loff_t
from
,
loff_t
length
,
int
flags
);
extern
int
ext4_page_mkwrite
(
struct
vm_area_struct
*
vma
,
struct
vm_fault
*
vmf
);
extern
qsize_t
*
ext4_get_reserved_space
(
struct
inode
*
inode
);
extern
void
ext4_da_update_reserve_space
(
struct
inode
*
inode
,
...
...
@@ -1924,6 +1928,7 @@ extern int ext4_group_add(struct super_block *sb,
extern
int
ext4_group_extend
(
struct
super_block
*
sb
,
struct
ext4_super_block
*
es
,
ext4_fsblk_t
n_blocks_count
);
extern
int
ext4_resize_fs
(
struct
super_block
*
sb
,
ext4_fsblk_t
n_blocks_count
);
/* super.c */
extern
void
*
ext4_kvmalloc
(
size_t
size
,
gfp_t
flags
);
...
...
fs/ext4/extents.c
View file @
ff9cb1c4
...
...
@@ -3280,6 +3280,9 @@ static int ext4_find_delalloc_range(struct inode *inode,
ext4_lblk_t
i
,
pg_lblk
;
pgoff_t
index
;
if
(
!
test_opt
(
inode
->
i_sb
,
DELALLOC
))
return
0
;
/* reverse search wont work if fs block size is less than page size */
if
(
inode
->
i_blkbits
<
PAGE_CACHE_SHIFT
)
search_hint_reverse
=
0
;
...
...
@@ -3452,8 +3455,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
int
err
=
0
;
ext4_io_end_t
*
io
=
EXT4_I
(
inode
)
->
cur_aio_dio
;
ext_debug
(
"ext4_ext_handle_uninitialized_extents: inode %lu, logical"
"block %llu, max_blocks %u, flags %
d, allocated %u
"
,
ext_debug
(
"ext4_ext_handle_uninitialized_extents: inode %lu, logical
"
"block %llu, max_blocks %u, flags %
x, allocated %u
\n
"
,
inode
->
i_ino
,
(
unsigned
long
long
)
map
->
m_lblk
,
map
->
m_len
,
flags
,
allocated
);
ext4_ext_show_leaf
(
inode
,
path
);
...
...
@@ -3624,7 +3627,7 @@ static int get_implied_cluster_alloc(struct super_block *sb,
struct
ext4_sb_info
*
sbi
=
EXT4_SB
(
sb
);
ext4_lblk_t
c_offset
=
map
->
m_lblk
&
(
sbi
->
s_cluster_ratio
-
1
);
ext4_lblk_t
ex_cluster_start
,
ex_cluster_end
;
ext4_lblk_t
rr_cluster_start
,
rr_cluster_end
;
ext4_lblk_t
rr_cluster_start
;
ext4_lblk_t
ee_block
=
le32_to_cpu
(
ex
->
ee_block
);
ext4_fsblk_t
ee_start
=
ext4_ext_pblock
(
ex
);
unsigned
short
ee_len
=
ext4_ext_get_actual_len
(
ex
);
...
...
@@ -3635,7 +3638,6 @@ static int get_implied_cluster_alloc(struct super_block *sb,
/* The requested region passed into ext4_map_blocks() */
rr_cluster_start
=
EXT4_B2C
(
sbi
,
map
->
m_lblk
);
rr_cluster_end
=
EXT4_B2C
(
sbi
,
map
->
m_lblk
+
map
->
m_len
-
1
);
if
((
rr_cluster_start
==
ex_cluster_end
)
||
(
rr_cluster_start
==
ex_cluster_start
))
{
...
...
fs/ext4/ialloc.c
View file @
ff9cb1c4
...
...
@@ -252,7 +252,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
fatal
=
ext4_journal_get_write_access
(
handle
,
bh2
);
}
ext4_lock_group
(
sb
,
block_group
);
cleared
=
ext4_clear_bit
(
bit
,
bitmap_bh
->
b_data
);
cleared
=
ext4_
test_and_
clear_bit
(
bit
,
bitmap_bh
->
b_data
);
if
(
fatal
||
!
cleared
)
{
ext4_unlock_group
(
sb
,
block_group
);
goto
out
;
...
...
@@ -358,7 +358,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
struct
ext4_sb_info
*
sbi
=
EXT4_SB
(
sb
);
ext4_group_t
real_ngroups
=
ext4_get_groups_count
(
sb
);
int
inodes_per_group
=
EXT4_INODES_PER_GROUP
(
sb
);
unsigned
int
freei
,
avefreei
;
unsigned
int
freei
,
avefreei
,
grp_free
;
ext4_fsblk_t
freeb
,
avefreec
;
unsigned
int
ndirs
;
int
max_dirs
,
min_inodes
;
...
...
@@ -477,8 +477,8 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
for
(
i
=
0
;
i
<
ngroups
;
i
++
)
{
grp
=
(
parent_group
+
i
)
%
ngroups
;
desc
=
ext4_get_group_desc
(
sb
,
grp
,
NULL
);
if
(
desc
&&
ext4_free_inodes_count
(
sb
,
desc
)
&&
ext4_free_inodes_count
(
sb
,
desc
)
>=
avefreei
)
{
grp_free
=
ext4_free_inodes_count
(
sb
,
desc
);
if
(
desc
&&
grp_free
&&
grp_free
>=
avefreei
)
{
*
group
=
grp
;
return
0
;
}
...
...
@@ -618,7 +618,7 @@ static int ext4_claim_inode(struct super_block *sb,
*/
down_read
(
&
grp
->
alloc_sem
);
ext4_lock_group
(
sb
,
group
);
if
(
ext4_set_bit
(
ino
,
inode_bitmap_bh
->
b_data
))
{
if
(
ext4_
test_and_
set_bit
(
ino
,
inode_bitmap_bh
->
b_data
))
{
/* not a free inode */
retval
=
1
;
goto
err_ret
;
...
...
@@ -885,8 +885,12 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, umode_t mode,
if
(
IS_DIRSYNC
(
inode
))
ext4_handle_sync
(
handle
);
if
(
insert_inode_locked
(
inode
)
<
0
)
{
err
=
-
EINVAL
;
goto
fail_drop
;
/*
* Likely a bitmap corruption causing inode to be allocated
* twice.
*/
err
=
-
EIO
;
goto
fail
;
}
spin_lock
(
&
sbi
->
s_next_gen_lock
);
inode
->
i_generation
=
sbi
->
s_next_generation
++
;
...
...
fs/ext4/inode.c
View file @
ff9cb1c4
...
...
@@ -71,6 +71,9 @@ static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode);
static
void
ext4_end_io_buffer_write
(
struct
buffer_head
*
bh
,
int
uptodate
);
static
int
__ext4_journalled_writepage
(
struct
page
*
page
,
unsigned
int
len
);
static
int
ext4_bh_delay_or_unwritten
(
handle_t
*
handle
,
struct
buffer_head
*
bh
);
static
int
ext4_discard_partial_page_buffers_no_lock
(
handle_t
*
handle
,
struct
inode
*
inode
,
struct
page
*
page
,
loff_t
from
,
loff_t
length
,
int
flags
);
/*
* Test whether an inode is a fast symlink.
...
...
@@ -2759,7 +2762,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
if
(
!
io_end
||
!
size
)
goto
out
;
ext_debug
(
"ext4_end_io_dio(): io_end 0x%p"
ext_debug
(
"ext4_end_io_dio(): io_end 0x%p
"
"for inode %lu, iocb 0x%p, offset %llu, size %llu
\n
"
,
iocb
->
private
,
io_end
->
inode
->
i_ino
,
iocb
,
offset
,
size
);
...
...
@@ -3160,7 +3163,7 @@ int ext4_discard_partial_page_buffers(handle_t *handle,
*
* Returns zero on sucess or negative on failure.
*/
int
ext4_discard_partial_page_buffers_no_lock
(
handle_t
*
handle
,
static
int
ext4_discard_partial_page_buffers_no_lock
(
handle_t
*
handle
,
struct
inode
*
inode
,
struct
page
*
page
,
loff_t
from
,
loff_t
length
,
int
flags
)
{
...
...
@@ -3300,126 +3303,6 @@ int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
return
err
;
}
/*
* ext4_block_truncate_page() zeroes out a mapping from file offset `from'
* up to the end of the block which corresponds to `from'.
* This required during truncate. We need to physically zero the tail end
* of that block so it doesn't yield old data if the file is later grown.
*/
int
ext4_block_truncate_page
(
handle_t
*
handle
,
struct
address_space
*
mapping
,
loff_t
from
)
{
unsigned
offset
=
from
&
(
PAGE_CACHE_SIZE
-
1
);
unsigned
length
;
unsigned
blocksize
;
struct
inode
*
inode
=
mapping
->
host
;
blocksize
=
inode
->
i_sb
->
s_blocksize
;
length
=
blocksize
-
(
offset
&
(
blocksize
-
1
));
return
ext4_block_zero_page_range
(
handle
,
mapping
,
from
,
length
);
}
/*
* ext4_block_zero_page_range() zeros out a mapping of length 'length'
* starting from file offset 'from'. The range to be zero'd must
* be contained with in one block. If the specified range exceeds
* the end of the block it will be shortened to end of the block
* that cooresponds to 'from'
*/
int
ext4_block_zero_page_range
(
handle_t
*
handle
,
struct
address_space
*
mapping
,
loff_t
from
,
loff_t
length
)
{
ext4_fsblk_t
index
=
from
>>
PAGE_CACHE_SHIFT
;
unsigned
offset
=
from
&
(
PAGE_CACHE_SIZE
-
1
);
unsigned
blocksize
,
max
,
pos
;
ext4_lblk_t
iblock
;
struct
inode
*
inode
=
mapping
->
host
;
struct
buffer_head
*
bh
;
struct
page
*
page
;
int
err
=
0
;
page
=
find_or_create_page
(
mapping
,
from
>>
PAGE_CACHE_SHIFT
,
mapping_gfp_mask
(
mapping
)
&
~
__GFP_FS
);
if
(
!
page
)
return
-
ENOMEM
;
blocksize
=
inode
->
i_sb
->
s_blocksize
;
max
=
blocksize
-
(
offset
&
(
blocksize
-
1
));
/*
* correct length if it does not fall between
* 'from' and the end of the block
*/
if
(
length
>
max
||
length
<
0
)
length
=
max
;
iblock
=
index
<<
(
PAGE_CACHE_SHIFT
-
inode
->
i_sb
->
s_blocksize_bits
);
if
(
!
page_has_buffers
(
page
))
create_empty_buffers
(
page
,
blocksize
,
0
);
/* Find the buffer that contains "offset" */
bh
=
page_buffers
(
page
);
pos
=
blocksize
;
while
(
offset
>=
pos
)
{
bh
=
bh
->
b_this_page
;
iblock
++
;
pos
+=
blocksize
;
}
err
=
0
;
if
(
buffer_freed
(
bh
))
{
BUFFER_TRACE
(
bh
,
"freed: skip"
);
goto
unlock
;
}
if
(
!
buffer_mapped
(
bh
))
{
BUFFER_TRACE
(
bh
,
"unmapped"
);
ext4_get_block
(
inode
,
iblock
,
bh
,
0
);
/* unmapped? It's a hole - nothing to do */
if
(
!
buffer_mapped
(
bh
))
{
BUFFER_TRACE
(
bh
,
"still unmapped"
);
goto
unlock
;
}
}
/* Ok, it's mapped. Make sure it's up-to-date */
if
(
PageUptodate
(
page
))
set_buffer_uptodate
(
bh
);
if
(
!
buffer_uptodate
(
bh
))
{
err
=
-
EIO
;
ll_rw_block
(
READ
,
1
,
&
bh
);
wait_on_buffer
(
bh
);
/* Uhhuh. Read error. Complain and punt. */
if
(
!
buffer_uptodate
(
bh
))
goto
unlock
;
}
if
(
ext4_should_journal_data
(
inode
))
{
BUFFER_TRACE
(
bh
,
"get write access"
);
err
=
ext4_journal_get_write_access
(
handle
,
bh
);
if
(
err
)
goto
unlock
;
}
zero_user
(
page
,
offset
,
length
);
BUFFER_TRACE
(
bh
,
"zeroed end of block"
);
err
=
0
;
if
(
ext4_should_journal_data
(
inode
))
{
err
=
ext4_handle_dirty_metadata
(
handle
,
inode
,
bh
);
}
else
mark_buffer_dirty
(
bh
);
unlock:
unlock_page
(
page
);
page_cache_release
(
page
);
return
err
;
}
int
ext4_can_truncate
(
struct
inode
*
inode
)
{
if
(
S_ISREG
(
inode
->
i_mode
))
...
...
@@ -4646,9 +4529,19 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
return
0
;
if
(
is_journal_aborted
(
journal
))
return
-
EROFS
;
/* We have to allocate physical blocks for delalloc blocks
* before flushing journal. otherwise delalloc blocks can not
* be allocated any more. even more truncate on delalloc blocks
* could trigger BUG by flushing delalloc blocks in journal.
* There is no delalloc block in non-journal data mode.
*/
if
(
val
&&
test_opt
(
inode
->
i_sb
,
DELALLOC
))
{
err
=
ext4_alloc_da_blocks
(
inode
);
if
(
err
<
0
)
return
err
;
}
jbd2_journal_lock_updates
(
journal
);
jbd2_journal_flush
(
journal
);
/*
* OK, there are no updates running now, and all cached data is
...
...
@@ -4660,8 +4553,10 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
if
(
val
)
ext4_set_inode_flag
(
inode
,
EXT4_INODE_JOURNAL_DATA
);
else
else
{
jbd2_journal_flush
(
journal
);
ext4_clear_inode_flag
(
inode
,
EXT4_INODE_JOURNAL_DATA
);
}
ext4_set_aops
(
inode
);
jbd2_journal_unlock_updates
(
journal
);
...
...
fs/ext4/ioctl.c
View file @
ff9cb1c4
...
...
@@ -18,6 +18,8 @@
#include "ext4_jbd2.h"
#include "ext4.h"
#define MAX_32_NUM ((((unsigned long long) 1) << 32) - 1)
long
ext4_ioctl
(
struct
file
*
filp
,
unsigned
int
cmd
,
unsigned
long
arg
)
{
struct
inode
*
inode
=
filp
->
f_dentry
->
d_inode
;
...
...
@@ -186,19 +188,22 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
if
(
err
)
return
err
;
if
(
get_user
(
n_blocks_count
,
(
__u32
__user
*
)
arg
))
return
-
EFAULT
;
if
(
get_user
(
n_blocks_count
,
(
__u32
__user
*
)
arg
))
{
err
=
-
EFAULT
;
goto
group_extend_out
;
}
if
(
EXT4_HAS_RO_COMPAT_FEATURE
(
sb
,
EXT4_FEATURE_RO_COMPAT_BIGALLOC
))
{
ext4_msg
(
sb
,
KERN_ERR
,
"Online resizing not supported with bigalloc"
);
return
-
EOPNOTSUPP
;
err
=
-
EOPNOTSUPP
;
goto
group_extend_out
;
}
err
=
mnt_want_write_file
(
filp
);
if
(
err
)
return
err
;
goto
group_extend_out
;
err
=
ext4_group_extend
(
sb
,
EXT4_SB
(
sb
)
->
s_es
,
n_blocks_count
);
if
(
EXT4_SB
(
sb
)
->
s_journal
)
{
...
...
@@ -209,8 +214,8 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
if
(
err
==
0
)
err
=
err2
;
mnt_drop_write_file
(
filp
);
group_extend_out:
ext4_resize_end
(
sb
);
return
err
;
}
...
...
@@ -251,8 +256,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
err
=
ext4_move_extents
(
filp
,
donor_filp
,
me
.
orig_start
,
me
.
donor_start
,
me
.
len
,
&
me
.
moved_len
);
mnt_drop_write_file
(
filp
);
if
(
me
.
moved_len
>
0
)
file_remove_suid
(
donor_filp
);
mnt_drop_write
(
filp
->
f_path
.
mnt
);
if
(
copy_to_user
((
struct
move_extent
__user
*
)
arg
,
&
me
,
sizeof
(
me
)))
...
...
@@ -271,19 +275,22 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return
err
;
if
(
copy_from_user
(
&
input
,
(
struct
ext4_new_group_input
__user
*
)
arg
,
sizeof
(
input
)))
return
-
EFAULT
;
sizeof
(
input
)))
{
err
=
-
EFAULT
;
goto
group_add_out
;
}
if
(
EXT4_HAS_RO_COMPAT_FEATURE
(
sb
,
EXT4_FEATURE_RO_COMPAT_BIGALLOC
))
{
ext4_msg
(
sb
,
KERN_ERR
,
"Online resizing not supported with bigalloc"
);
return
-
EOPNOTSUPP
;
err
=
-
EOPNOTSUPP
;
goto
group_add_out
;
}
err
=
mnt_want_write_file
(
filp
);
if
(
err
)
return
err
;
goto
group_add_out
;
err
=
ext4_group_add
(
sb
,
&
input
);
if
(
EXT4_SB
(
sb
)
->
s_journal
)
{
...
...
@@ -294,8 +301,8 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
if
(
err
==
0
)
err
=
err2
;
mnt_drop_write_file
(
filp
);
group_add_out:
ext4_resize_end
(
sb
);
return
err
;
}
...
...
@@ -335,6 +342,60 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return
err
;
}
case
EXT4_IOC_RESIZE_FS
:
{
ext4_fsblk_t
n_blocks_count
;
struct
super_block
*
sb
=
inode
->
i_sb
;
int
err
=
0
,
err2
=
0
;
if
(
EXT4_HAS_RO_COMPAT_FEATURE
(
sb
,
EXT4_FEATURE_RO_COMPAT_BIGALLOC
))
{
ext4_msg
(
sb
,
KERN_ERR
,
"Online resizing not (yet) supported with bigalloc"
);
return
-
EOPNOTSUPP
;
}
if
(
EXT4_HAS_INCOMPAT_FEATURE
(
sb
,
EXT4_FEATURE_INCOMPAT_META_BG
))
{
ext4_msg
(
sb
,
KERN_ERR
,
"Online resizing not (yet) supported with meta_bg"
);
return
-
EOPNOTSUPP
;
}
if
(
copy_from_user
(
&
n_blocks_count
,
(
__u64
__user
*
)
arg
,
sizeof
(
__u64
)))
{
return
-
EFAULT
;
}
if
(
n_blocks_count
>
MAX_32_NUM
&&
!
EXT4_HAS_INCOMPAT_FEATURE
(
sb
,
EXT4_FEATURE_INCOMPAT_64BIT
))
{
ext4_msg
(
sb
,
KERN_ERR
,
"File system only supports 32-bit block numbers"
);
return
-
EOPNOTSUPP
;
}
err
=
ext4_resize_begin
(
sb
);
if
(
err
)
return
err
;
err
=
mnt_want_write
(
filp
->
f_path
.
mnt
);
if
(
err
)
goto
resizefs_out
;
err
=
ext4_resize_fs
(
sb
,
n_blocks_count
);
if
(
EXT4_SB
(
sb
)
->
s_journal
)
{
jbd2_journal_lock_updates
(
EXT4_SB
(
sb
)
->
s_journal
);
err2
=
jbd2_journal_flush
(
EXT4_SB
(
sb
)
->
s_journal
);
jbd2_journal_unlock_updates
(
EXT4_SB
(
sb
)
->
s_journal
);
}
if
(
err
==
0
)
err
=
err2
;
mnt_drop_write
(
filp
->
f_path
.
mnt
);
resizefs_out:
ext4_resize_end
(
sb
);
return
err
;
}
case
FITRIM
:
{
struct
request_queue
*
q
=
bdev_get_queue
(
sb
->
s_bdev
);
...
...
@@ -433,6 +494,7 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
}
case
EXT4_IOC_MOVE_EXT
:
case
FITRIM
:
case
EXT4_IOC_RESIZE_FS
:
break
;
default:
return
-
ENOIOCTLCMD
;
...
...
fs/ext4/mballoc.c
View file @
ff9cb1c4
...
...
@@ -3671,7 +3671,7 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b,
ext4_group_t
group
;
ext4_grpblk_t
bit
;
trace_ext4_mb_release_group_pa
(
pa
);
trace_ext4_mb_release_group_pa
(
sb
,
pa
);
BUG_ON
(
pa
->
pa_deleted
==
0
);
ext4_get_group_no_and_offset
(
sb
,
pa
->
pa_pstart
,
&
group
,
&
bit
);
BUG_ON
(
group
!=
e4b
->
bd_group
&&
pa
->
pa_len
!=
0
);
...
...
fs/ext4/resize.c
View file @
ff9cb1c4
...
...
@@ -134,6 +134,172 @@ static int verify_group_input(struct super_block *sb,
return
err
;
}
/*
* ext4_new_flex_group_data is used by 64bit-resize interface to add a flex
* group each time.
*/
struct
ext4_new_flex_group_data
{
struct
ext4_new_group_data
*
groups
;
/* new_group_data for groups
in the flex group */
__u16
*
bg_flags
;
/* block group flags of groups
in @groups */
ext4_group_t
count
;
/* number of groups in @groups
*/
};
/*
* alloc_flex_gd() allocates a ext4_new_flex_group_data with size of
* @flexbg_size.
*
* Returns NULL on failure otherwise address of the allocated structure.
*/
static
struct
ext4_new_flex_group_data
*
alloc_flex_gd
(
unsigned
long
flexbg_size
)
{
struct
ext4_new_flex_group_data
*
flex_gd
;
flex_gd
=
kmalloc
(
sizeof
(
*
flex_gd
),
GFP_NOFS
);
if
(
flex_gd
==
NULL
)
goto
out3
;
flex_gd
->
count
=
flexbg_size
;
flex_gd
->
groups
=
kmalloc
(
sizeof
(
struct
ext4_new_group_data
)
*
flexbg_size
,
GFP_NOFS
);
if
(
flex_gd
->
groups
==
NULL
)
goto
out2
;
flex_gd
->
bg_flags
=
kmalloc
(
flexbg_size
*
sizeof
(
__u16
),
GFP_NOFS
);
if
(
flex_gd
->
bg_flags
==
NULL
)
goto
out1
;
return
flex_gd
;
out1:
kfree
(
flex_gd
->
groups
);
out2:
kfree
(
flex_gd
);
out3:
return
NULL
;
}
static
void
free_flex_gd
(
struct
ext4_new_flex_group_data
*
flex_gd
)
{
kfree
(
flex_gd
->
bg_flags
);
kfree
(
flex_gd
->
groups
);
kfree
(
flex_gd
);
}
/*
* ext4_alloc_group_tables() allocates block bitmaps, inode bitmaps
* and inode tables for a flex group.
*
* This function is used by 64bit-resize. Note that this function allocates
* group tables from the 1st group of groups contained by @flexgd, which may
* be a partial of a flex group.
*
* @sb: super block of fs to which the groups belongs
*/
static
void
ext4_alloc_group_tables
(
struct
super_block
*
sb
,
struct
ext4_new_flex_group_data
*
flex_gd
,
int
flexbg_size
)
{
struct
ext4_new_group_data
*
group_data
=
flex_gd
->
groups
;
struct
ext4_super_block
*
es
=
EXT4_SB
(
sb
)
->
s_es
;
ext4_fsblk_t
start_blk
;
ext4_fsblk_t
last_blk
;
ext4_group_t
src_group
;
ext4_group_t
bb_index
=
0
;
ext4_group_t
ib_index
=
0
;
ext4_group_t
it_index
=
0
;
ext4_group_t
group
;
ext4_group_t
last_group
;
unsigned
overhead
;
BUG_ON
(
flex_gd
->
count
==
0
||
group_data
==
NULL
);
src_group
=
group_data
[
0
].
group
;
last_group
=
src_group
+
flex_gd
->
count
-
1
;
BUG_ON
((
flexbg_size
>
1
)
&&
((
src_group
&
~
(
flexbg_size
-
1
))
!=
(
last_group
&
~
(
flexbg_size
-
1
))));
next_group:
group
=
group_data
[
0
].
group
;
start_blk
=
ext4_group_first_block_no
(
sb
,
src_group
);
last_blk
=
start_blk
+
group_data
[
src_group
-
group
].
blocks_count
;
overhead
=
ext4_bg_has_super
(
sb
,
src_group
)
?
(
1
+
ext4_bg_num_gdb
(
sb
,
src_group
)
+
le16_to_cpu
(
es
->
s_reserved_gdt_blocks
))
:
0
;
start_blk
+=
overhead
;
BUG_ON
(
src_group
>=
group_data
[
0
].
group
+
flex_gd
->
count
);
/* We collect contiguous blocks as much as possible. */
src_group
++
;
for
(;
src_group
<=
last_group
;
src_group
++
)
if
(
!
ext4_bg_has_super
(
sb
,
src_group
))
last_blk
+=
group_data
[
src_group
-
group
].
blocks_count
;
else
break
;
/* Allocate block bitmaps */
for
(;
bb_index
<
flex_gd
->
count
;
bb_index
++
)
{
if
(
start_blk
>=
last_blk
)
goto
next_group
;
group_data
[
bb_index
].
block_bitmap
=
start_blk
++
;
ext4_get_group_no_and_offset
(
sb
,
start_blk
-
1
,
&
group
,
NULL
);
group
-=
group_data
[
0
].
group
;
group_data
[
group
].
free_blocks_count
--
;
if
(
flexbg_size
>
1
)
flex_gd
->
bg_flags
[
group
]
&=
~
EXT4_BG_BLOCK_UNINIT
;
}
/* Allocate inode bitmaps */
for
(;
ib_index
<
flex_gd
->
count
;
ib_index
++
)
{
if
(
start_blk
>=
last_blk
)
goto
next_group
;
group_data
[
ib_index
].
inode_bitmap
=
start_blk
++
;
ext4_get_group_no_and_offset
(
sb
,
start_blk
-
1
,
&
group
,
NULL
);
group
-=
group_data
[
0
].
group
;
group_data
[
group
].
free_blocks_count
--
;
if
(
flexbg_size
>
1
)
flex_gd
->
bg_flags
[
group
]
&=
~
EXT4_BG_BLOCK_UNINIT
;
}
/* Allocate inode tables */
for
(;
it_index
<
flex_gd
->
count
;
it_index
++
)
{
if
(
start_blk
+
EXT4_SB
(
sb
)
->
s_itb_per_group
>
last_blk
)
goto
next_group
;
group_data
[
it_index
].
inode_table
=
start_blk
;
ext4_get_group_no_and_offset
(
sb
,
start_blk
,
&
group
,
NULL
);
group
-=
group_data
[
0
].
group
;
group_data
[
group
].
free_blocks_count
-=
EXT4_SB
(
sb
)
->
s_itb_per_group
;
if
(
flexbg_size
>
1
)
flex_gd
->
bg_flags
[
group
]
&=
~
EXT4_BG_BLOCK_UNINIT
;
start_blk
+=
EXT4_SB
(
sb
)
->
s_itb_per_group
;
}
if
(
test_opt
(
sb
,
DEBUG
))
{
int
i
;
group
=
group_data
[
0
].
group
;
printk
(
KERN_DEBUG
"EXT4-fs: adding a flex group with "
"%d groups, flexbg size is %d:
\n
"
,
flex_gd
->
count
,
flexbg_size
);
for
(
i
=
0
;
i
<
flex_gd
->
count
;
i
++
)
{
printk
(
KERN_DEBUG
"adding %s group %u: %u "
"blocks (%d free)
\n
"
,
ext4_bg_has_super
(
sb
,
group
+
i
)
?
"normal"
:
"no-super"
,
group
+
i
,
group_data
[
i
].
blocks_count
,
group_data
[
i
].
free_blocks_count
);
}
}
}
static
struct
buffer_head
*
bclean
(
handle_t
*
handle
,
struct
super_block
*
sb
,
ext4_fsblk_t
blk
)
{
...
...
@@ -179,131 +345,250 @@ static int extend_or_restart_transaction(handle_t *handle, int thresh)
}
/*
* Set up the block and inode bitmaps, and the inode table for the new group.
* set_flexbg_block_bitmap() mark @count blocks starting from @block used.
*
* Helper function for ext4_setup_new_group_blocks() which set .
*
* @sb: super block
* @handle: journal handle
* @flex_gd: flex group data
*/
static
int
set_flexbg_block_bitmap
(
struct
super_block
*
sb
,
handle_t
*
handle
,
struct
ext4_new_flex_group_data
*
flex_gd
,
ext4_fsblk_t
block
,
ext4_group_t
count
)
{
ext4_group_t
count2
;
ext4_debug
(
"mark blocks [%llu/%u] used
\n
"
,
block
,
count
);
for
(
count2
=
count
;
count
>
0
;
count
-=
count2
,
block
+=
count2
)
{
ext4_fsblk_t
start
;
struct
buffer_head
*
bh
;
ext4_group_t
group
;
int
err
;
ext4_get_group_no_and_offset
(
sb
,
block
,
&
group
,
NULL
);
start
=
ext4_group_first_block_no
(
sb
,
group
);
group
-=
flex_gd
->
groups
[
0
].
group
;
count2
=
sb
->
s_blocksize
*
8
-
(
block
-
start
);
if
(
count2
>
count
)
count2
=
count
;
if
(
flex_gd
->
bg_flags
[
group
]
&
EXT4_BG_BLOCK_UNINIT
)
{
BUG_ON
(
flex_gd
->
count
>
1
);
continue
;
}
err
=
extend_or_restart_transaction
(
handle
,
1
);
if
(
err
)
return
err
;
bh
=
sb_getblk
(
sb
,
flex_gd
->
groups
[
group
].
block_bitmap
);
if
(
!
bh
)
return
-
EIO
;
err
=
ext4_journal_get_write_access
(
handle
,
bh
);
if
(
err
)
return
err
;
ext4_debug
(
"mark block bitmap %#04llx (+%llu/%u)
\n
"
,
block
,
block
-
start
,
count2
);
ext4_set_bits
(
bh
->
b_data
,
block
-
start
,
count2
);
err
=
ext4_handle_dirty_metadata
(
handle
,
NULL
,
bh
);
if
(
unlikely
(
err
))
return
err
;
brelse
(
bh
);
}
return
0
;
}
/*
* Set up the block and inode bitmaps, and the inode table for the new groups.
* This doesn't need to be part of the main transaction, since we are only
* changing blocks outside the actual filesystem. We still do journaling to
* ensure the recovery is correct in case of a failure just after resize.
* If any part of this fails, we simply abort the resize.
*
* setup_new_flex_group_blocks handles a flex group as follow:
* 1. copy super block and GDT, and initialize group tables if necessary.
* In this step, we only set bits in blocks bitmaps for blocks taken by
* super block and GDT.
* 2. allocate group tables in block bitmaps, that is, set bits in block
* bitmap for blocks taken by group tables.
*/
static
int
setup_new_group_blocks
(
struct
super_block
*
sb
,
struct
ext4_new_group_data
*
input
)
static
int
setup_new_
flex_
group_blocks
(
struct
super_block
*
sb
,
struct
ext4_new_flex_group_data
*
flex_gd
)
{
int
group_table_count
[]
=
{
1
,
1
,
EXT4_SB
(
sb
)
->
s_itb_per_group
};
ext4_fsblk_t
start
;
ext4_fsblk_t
block
;
struct
ext4_sb_info
*
sbi
=
EXT4_SB
(
sb
);
ext4_fsblk_t
start
=
ext4_group_first_block_no
(
sb
,
input
->
group
);
int
reserved_gdb
=
ext4_bg_has_super
(
sb
,
input
->
group
)
?
le16_to_cpu
(
sbi
->
s_es
->
s_reserved_gdt_blocks
)
:
0
;
unsigned
long
gdblocks
=
ext4_bg_num_gdb
(
sb
,
input
->
group
);
struct
buffer_head
*
bh
;
struct
ext4_super_block
*
es
=
sbi
->
s_es
;
struct
ext4_new_group_data
*
group_data
=
flex_gd
->
groups
;
__u16
*
bg_flags
=
flex_gd
->
bg_flags
;
handle_t
*
handle
;
ext4_fsblk_t
block
;
ext4_grpblk_t
bit
;
int
i
;
int
err
=
0
,
err2
;
ext4_group_t
group
,
count
;
struct
buffer_head
*
bh
=
NULL
;
int
reserved_gdb
,
i
,
j
,
err
=
0
,
err2
;
BUG_ON
(
!
flex_gd
->
count
||
!
group_data
||
group_data
[
0
].
group
!=
sbi
->
s_groups_count
);
reserved_gdb
=
le16_to_cpu
(
es
->
s_reserved_gdt_blocks
);
/* This transaction may be extended/restarted along the way */
handle
=
ext4_journal_start_sb
(
sb
,
EXT4_MAX_TRANS_DATA
);
if
(
IS_ERR
(
handle
))
return
PTR_ERR
(
handle
);
BUG_ON
(
input
->
group
!=
sbi
->
s_groups_count
);
group
=
group_data
[
0
].
group
;
for
(
i
=
0
;
i
<
flex_gd
->
count
;
i
++
,
group
++
)
{
unsigned
long
gdblocks
;
/* Copy all of the GDT blocks into the backup in this group */
for
(
i
=
0
,
bit
=
1
,
block
=
start
+
1
;
i
<
gdblocks
;
i
++
,
block
++
,
bit
++
)
{
struct
buffer_head
*
gdb
;
gdblocks
=
ext4_bg_num_gdb
(
sb
,
group
);
start
=
ext4_group_first_block_no
(
sb
,
group
);
ext4_debug
(
"update backup group %#04llx (+%d)
\n
"
,
block
,
bit
);
err
=
extend_or_restart_transaction
(
handle
,
1
);
if
(
err
)
goto
exit_journal
;
/* Copy all of the GDT blocks into the backup in this group */
for
(
j
=
0
,
block
=
start
+
1
;
j
<
gdblocks
;
j
++
,
block
++
)
{
struct
buffer_head
*
gdb
;
gdb
=
sb_getblk
(
sb
,
block
);
if
(
!
gdb
)
{
err
=
-
EIO
;
goto
exit_journal
;
}
if
((
err
=
ext4_journal_get_write_access
(
handle
,
gdb
)))
{
ext4_debug
(
"update backup group %#04llx
\n
"
,
block
);
err
=
extend_or_restart_transaction
(
handle
,
1
);
if
(
err
)
goto
out
;
gdb
=
sb_getblk
(
sb
,
block
);
if
(
!
gdb
)
{
err
=
-
EIO
;
goto
out
;
}
err
=
ext4_journal_get_write_access
(
handle
,
gdb
);
if
(
err
)
{
brelse
(
gdb
);
goto
out
;
}
memcpy
(
gdb
->
b_data
,
sbi
->
s_group_desc
[
j
]
->
b_data
,
gdb
->
b_size
);
set_buffer_uptodate
(
gdb
);
err
=
ext4_handle_dirty_metadata
(
handle
,
NULL
,
gdb
);
if
(
unlikely
(
err
))
{
brelse
(
gdb
);
goto
out
;
}
brelse
(
gdb
);
goto
exit_journal
;
}
memcpy
(
gdb
->
b_data
,
sbi
->
s_group_desc
[
i
]
->
b_data
,
gdb
->
b_size
);
set_buffer_uptodate
(
gdb
);
err
=
ext4_handle_dirty_metadata
(
handle
,
NULL
,
gdb
);
if
(
unlikely
(
err
))
{
brelse
(
gdb
);
goto
exit_journal
;
/* Zero out all of the reserved backup group descriptor
* table blocks
*/
if
(
ext4_bg_has_super
(
sb
,
group
))
{
err
=
sb_issue_zeroout
(
sb
,
gdblocks
+
start
+
1
,
reserved_gdb
,
GFP_NOFS
);
if
(
err
)
goto
out
;
}
brelse
(
gdb
);
}
/* Zero out all of the reserved backup group descriptor table blocks */
ext4_debug
(
"clear inode table blocks %#04llx -> %#04lx
\n
"
,
block
,
sbi
->
s_itb_per_group
);
err
=
sb_issue_zeroout
(
sb
,
gdblocks
+
start
+
1
,
reserved_gdb
,
GFP_NOFS
);
if
(
err
)
goto
exit_journal
;
/* Initialize group tables of the grop @group */
if
(
!
(
bg_flags
[
i
]
&
EXT4_BG_INODE_ZEROED
))
goto
handle_bb
;
err
=
extend_or_restart_transaction
(
handle
,
2
);
if
(
err
)
goto
exit_journal
;
/* Zero out all of the inode table blocks */
block
=
group_data
[
i
].
inode_table
;
ext4_debug
(
"clear inode table blocks %#04llx -> %#04lx
\n
"
,
block
,
sbi
->
s_itb_per_group
);
err
=
sb_issue_zeroout
(
sb
,
block
,
sbi
->
s_itb_per_group
,
GFP_NOFS
);
if
(
err
)
goto
out
;
bh
=
bclean
(
handle
,
sb
,
input
->
block_bitmap
);
if
(
IS_ERR
(
bh
))
{
err
=
PTR_ERR
(
bh
);
goto
exit_journal
;
}
handle_bb:
if
(
bg_flags
[
i
]
&
EXT4_BG_BLOCK_UNINIT
)
goto
handle_ib
;
if
(
ext4_bg_has_super
(
sb
,
input
->
group
))
{
ext4_debug
(
"mark backup group tables %#04llx (+0)
\n
"
,
start
);
ext4_set_bits
(
bh
->
b_data
,
0
,
gdblocks
+
reserved_gdb
+
1
);
}
/* Initialize block bitmap of the @group */
block
=
group_data
[
i
].
block_bitmap
;
err
=
extend_or_restart_transaction
(
handle
,
1
);
if
(
err
)
goto
out
;
ext4_debug
(
"mark block bitmap %#04llx (+%llu)
\n
"
,
input
->
block_bitmap
,
input
->
block_bitmap
-
start
);
ext4_set_bit
(
input
->
block_bitmap
-
start
,
bh
->
b_data
);
ext4_debug
(
"mark inode bitmap %#04llx (+%llu)
\n
"
,
input
->
inode_bitmap
,
input
->
inode_bitmap
-
start
);
ext4_set_bit
(
input
->
inode_bitmap
-
start
,
bh
->
b_data
);
/* Zero out all of the inode table blocks */
block
=
input
->
inode_table
;
ext4_debug
(
"clear inode table blocks %#04llx -> %#04lx
\n
"
,
block
,
sbi
->
s_itb_per_group
);
err
=
sb_issue_zeroout
(
sb
,
block
,
sbi
->
s_itb_per_group
,
GFP_NOFS
);
if
(
err
)
goto
exit_bh
;
ext4_set_bits
(
bh
->
b_data
,
input
->
inode_table
-
start
,
sbi
->
s_itb_per_group
);
bh
=
bclean
(
handle
,
sb
,
block
);
if
(
IS_ERR
(
bh
))
{
err
=
PTR_ERR
(
bh
);
goto
out
;
}
if
(
ext4_bg_has_super
(
sb
,
group
))
{
ext4_debug
(
"mark backup superblock %#04llx (+0)
\n
"
,
start
);
ext4_set_bits
(
bh
->
b_data
,
0
,
gdblocks
+
reserved_gdb
+
1
);
}
ext4_mark_bitmap_end
(
group_data
[
i
].
blocks_count
,
sb
->
s_blocksize
*
8
,
bh
->
b_data
);
err
=
ext4_handle_dirty_metadata
(
handle
,
NULL
,
bh
);
if
(
err
)
goto
out
;
brelse
(
bh
);
handle_ib:
if
(
bg_flags
[
i
]
&
EXT4_BG_INODE_UNINIT
)
continue
;
ext4_mark_bitmap_end
(
input
->
blocks_count
,
sb
->
s_blocksize
*
8
,
bh
->
b_data
);
err
=
ext4_handle_dirty_metadata
(
handle
,
NULL
,
bh
);
if
(
unlikely
(
err
))
{
ext4_std_error
(
sb
,
err
);
goto
exit_bh
;
/* Initialize inode bitmap of the @group */
block
=
group_data
[
i
].
inode_bitmap
;
err
=
extend_or_restart_transaction
(
handle
,
1
);
if
(
err
)
goto
out
;
/* Mark unused entries in inode bitmap used */
bh
=
bclean
(
handle
,
sb
,
block
);
if
(
IS_ERR
(
bh
))
{
err
=
PTR_ERR
(
bh
);
goto
out
;
}
ext4_mark_bitmap_end
(
EXT4_INODES_PER_GROUP
(
sb
),
sb
->
s_blocksize
*
8
,
bh
->
b_data
);
err
=
ext4_handle_dirty_metadata
(
handle
,
NULL
,
bh
);
if
(
err
)
goto
out
;
brelse
(
bh
);
}
brelse
(
bh
);
/* Mark unused entries in inode bitmap used */
ext4_debug
(
"clear inode bitmap %#04llx (+%llu)
\n
"
,
input
->
inode_bitmap
,
input
->
inode_bitmap
-
start
);
if
(
IS_ERR
(
bh
=
bclean
(
handle
,
sb
,
input
->
inode_bitmap
)))
{
err
=
PTR_ERR
(
bh
);
goto
exit_journal
;
bh
=
NULL
;
/* Mark group tables in block bitmap */
for
(
j
=
0
;
j
<
GROUP_TABLE_COUNT
;
j
++
)
{
count
=
group_table_count
[
j
];
start
=
(
&
group_data
[
0
].
block_bitmap
)[
j
];
block
=
start
;
for
(
i
=
1
;
i
<
flex_gd
->
count
;
i
++
)
{
block
+=
group_table_count
[
j
];
if
(
block
==
(
&
group_data
[
i
].
block_bitmap
)[
j
])
{
count
+=
group_table_count
[
j
];
continue
;
}
err
=
set_flexbg_block_bitmap
(
sb
,
handle
,
flex_gd
,
start
,
count
);
if
(
err
)
goto
out
;
count
=
group_table_count
[
j
];
start
=
group_data
[
i
].
block_bitmap
;
block
=
start
;
}
if
(
count
)
{
err
=
set_flexbg_block_bitmap
(
sb
,
handle
,
flex_gd
,
start
,
count
);
if
(
err
)
goto
out
;
}
}
ext4_mark_bitmap_end
(
EXT4_INODES_PER_GROUP
(
sb
),
sb
->
s_blocksize
*
8
,
bh
->
b_data
);
err
=
ext4_handle_dirty_metadata
(
handle
,
NULL
,
bh
);
if
(
unlikely
(
err
))
ext4_std_error
(
sb
,
err
);
exit_bh:
out:
brelse
(
bh
);
exit_journal:
if
((
err2
=
ext4_journal_stop
(
handle
))
&&
!
err
)
err2
=
ext4_journal_stop
(
handle
);
if
(
err2
&&
!
err
)
err
=
err2
;
return
err
;
...
...
@@ -351,10 +636,10 @@ static unsigned ext4_list_backups(struct super_block *sb, unsigned *three,
* groups in current filesystem that have BACKUPS, or -ve error code.
*/
static
int
verify_reserved_gdb
(
struct
super_block
*
sb
,
ext4_group_t
end
,
struct
buffer_head
*
primary
)
{
const
ext4_fsblk_t
blk
=
primary
->
b_blocknr
;
const
ext4_group_t
end
=
EXT4_SB
(
sb
)
->
s_groups_count
;
unsigned
three
=
1
;
unsigned
five
=
5
;
unsigned
seven
=
7
;
...
...
@@ -429,7 +714,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
if
(
!
gdb_bh
)
return
-
EIO
;
gdbackups
=
verify_reserved_gdb
(
sb
,
gdb_bh
);
gdbackups
=
verify_reserved_gdb
(
sb
,
g
roup
,
g
db_bh
);
if
(
gdbackups
<
0
)
{
err
=
gdbackups
;
goto
exit_bh
;
...
...
@@ -592,7 +877,8 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
err
=
-
EIO
;
goto
exit_bh
;
}
if
((
gdbackups
=
verify_reserved_gdb
(
sb
,
primary
[
res
]))
<
0
)
{
gdbackups
=
verify_reserved_gdb
(
sb
,
group
,
primary
[
res
]);
if
(
gdbackups
<
0
)
{
brelse
(
primary
[
res
]);
err
=
gdbackups
;
goto
exit_bh
;
...
...
@@ -735,6 +1021,348 @@ static void update_backups(struct super_block *sb,
}
}
/*
* ext4_add_new_descs() adds @count group descriptor of groups
* starting at @group
*
* @handle: journal handle
* @sb: super block
* @group: the group no. of the first group desc to be added
* @resize_inode: the resize inode
* @count: number of group descriptors to be added
*/
static
int
ext4_add_new_descs
(
handle_t
*
handle
,
struct
super_block
*
sb
,
ext4_group_t
group
,
struct
inode
*
resize_inode
,
ext4_group_t
count
)
{
struct
ext4_sb_info
*
sbi
=
EXT4_SB
(
sb
);
struct
ext4_super_block
*
es
=
sbi
->
s_es
;
struct
buffer_head
*
gdb_bh
;
int
i
,
gdb_off
,
gdb_num
,
err
=
0
;
for
(
i
=
0
;
i
<
count
;
i
++
,
group
++
)
{
int
reserved_gdb
=
ext4_bg_has_super
(
sb
,
group
)
?
le16_to_cpu
(
es
->
s_reserved_gdt_blocks
)
:
0
;
gdb_off
=
group
%
EXT4_DESC_PER_BLOCK
(
sb
);
gdb_num
=
group
/
EXT4_DESC_PER_BLOCK
(
sb
);
/*
* We will only either add reserved group blocks to a backup group
* or remove reserved blocks for the first group in a new group block.
* Doing both would be mean more complex code, and sane people don't
* use non-sparse filesystems anymore. This is already checked above.
*/
if
(
gdb_off
)
{
gdb_bh
=
sbi
->
s_group_desc
[
gdb_num
];
err
=
ext4_journal_get_write_access
(
handle
,
gdb_bh
);
if
(
!
err
&&
reserved_gdb
&&
ext4_bg_num_gdb
(
sb
,
group
))
err
=
reserve_backup_gdb
(
handle
,
resize_inode
,
group
);
}
else
err
=
add_new_gdb
(
handle
,
resize_inode
,
group
);
if
(
err
)
break
;
}
return
err
;
}
/*
* ext4_setup_new_descs() will set up the group descriptor descriptors of a flex bg
*/
static
int
ext4_setup_new_descs
(
handle_t
*
handle
,
struct
super_block
*
sb
,
struct
ext4_new_flex_group_data
*
flex_gd
)
{
struct
ext4_new_group_data
*
group_data
=
flex_gd
->
groups
;
struct
ext4_group_desc
*
gdp
;
struct
ext4_sb_info
*
sbi
=
EXT4_SB
(
sb
);
struct
buffer_head
*
gdb_bh
;
ext4_group_t
group
;
__u16
*
bg_flags
=
flex_gd
->
bg_flags
;
int
i
,
gdb_off
,
gdb_num
,
err
=
0
;
for
(
i
=
0
;
i
<
flex_gd
->
count
;
i
++
,
group_data
++
,
bg_flags
++
)
{
group
=
group_data
->
group
;
gdb_off
=
group
%
EXT4_DESC_PER_BLOCK
(
sb
);
gdb_num
=
group
/
EXT4_DESC_PER_BLOCK
(
sb
);
/*
* get_write_access() has been called on gdb_bh by ext4_add_new_desc().
*/
gdb_bh
=
sbi
->
s_group_desc
[
gdb_num
];
/* Update group descriptor block for new group */
gdp
=
(
struct
ext4_group_desc
*
)((
char
*
)
gdb_bh
->
b_data
+
gdb_off
*
EXT4_DESC_SIZE
(
sb
));
memset
(
gdp
,
0
,
EXT4_DESC_SIZE
(
sb
));
ext4_block_bitmap_set
(
sb
,
gdp
,
group_data
->
block_bitmap
);
ext4_inode_bitmap_set
(
sb
,
gdp
,
group_data
->
inode_bitmap
);
ext4_inode_table_set
(
sb
,
gdp
,
group_data
->
inode_table
);
ext4_free_group_clusters_set
(
sb
,
gdp
,
EXT4_B2C
(
sbi
,
group_data
->
free_blocks_count
));
ext4_free_inodes_set
(
sb
,
gdp
,
EXT4_INODES_PER_GROUP
(
sb
));
gdp
->
bg_flags
=
cpu_to_le16
(
*
bg_flags
);
gdp
->
bg_checksum
=
ext4_group_desc_csum
(
sbi
,
group
,
gdp
);
err
=
ext4_handle_dirty_metadata
(
handle
,
NULL
,
gdb_bh
);
if
(
unlikely
(
err
))
{
ext4_std_error
(
sb
,
err
);
break
;
}
/*
* We can allocate memory for mb_alloc based on the new group
* descriptor
*/
err
=
ext4_mb_add_groupinfo
(
sb
,
group
,
gdp
);
if
(
err
)
break
;
}
return
err
;
}
/*
* ext4_update_super() updates the super block so that the newly added
* groups can be seen by the filesystem.
*
* @sb: super block
* @flex_gd: new added groups
*/
static
void
ext4_update_super
(
struct
super_block
*
sb
,
struct
ext4_new_flex_group_data
*
flex_gd
)
{
ext4_fsblk_t
blocks_count
=
0
;
ext4_fsblk_t
free_blocks
=
0
;
ext4_fsblk_t
reserved_blocks
=
0
;
struct
ext4_new_group_data
*
group_data
=
flex_gd
->
groups
;
struct
ext4_sb_info
*
sbi
=
EXT4_SB
(
sb
);
struct
ext4_super_block
*
es
=
sbi
->
s_es
;
int
i
;
BUG_ON
(
flex_gd
->
count
==
0
||
group_data
==
NULL
);
/*
* Make the new blocks and inodes valid next. We do this before
* increasing the group count so that once the group is enabled,
* all of its blocks and inodes are already valid.
*
* We always allocate group-by-group, then block-by-block or
* inode-by-inode within a group, so enabling these
* blocks/inodes before the group is live won't actually let us
* allocate the new space yet.
*/
for
(
i
=
0
;
i
<
flex_gd
->
count
;
i
++
)
{
blocks_count
+=
group_data
[
i
].
blocks_count
;
free_blocks
+=
group_data
[
i
].
free_blocks_count
;
}
reserved_blocks
=
ext4_r_blocks_count
(
es
)
*
100
;
do_div
(
reserved_blocks
,
ext4_blocks_count
(
es
));
reserved_blocks
*=
blocks_count
;
do_div
(
reserved_blocks
,
100
);
ext4_blocks_count_set
(
es
,
ext4_blocks_count
(
es
)
+
blocks_count
);
le32_add_cpu
(
&
es
->
s_inodes_count
,
EXT4_INODES_PER_GROUP
(
sb
)
*
flex_gd
->
count
);
/*
* We need to protect s_groups_count against other CPUs seeing
* inconsistent state in the superblock.
*
* The precise rules we use are:
*
* * Writers must perform a smp_wmb() after updating all
* dependent data and before modifying the groups count
*
* * Readers must perform an smp_rmb() after reading the groups
* count and before reading any dependent data.
*
* NB. These rules can be relaxed when checking the group count
* while freeing data, as we can only allocate from a block
* group after serialising against the group count, and we can
* only then free after serialising in turn against that
* allocation.
*/
smp_wmb
();
/* Update the global fs size fields */
sbi
->
s_groups_count
+=
flex_gd
->
count
;
/* Update the reserved block counts only once the new group is
* active. */
ext4_r_blocks_count_set
(
es
,
ext4_r_blocks_count
(
es
)
+
reserved_blocks
);
/* Update the free space counts */
percpu_counter_add
(
&
sbi
->
s_freeclusters_counter
,
EXT4_B2C
(
sbi
,
free_blocks
));
percpu_counter_add
(
&
sbi
->
s_freeinodes_counter
,
EXT4_INODES_PER_GROUP
(
sb
)
*
flex_gd
->
count
);
if
(
EXT4_HAS_INCOMPAT_FEATURE
(
sb
,
EXT4_FEATURE_INCOMPAT_FLEX_BG
)
&&
sbi
->
s_log_groups_per_flex
)
{
ext4_group_t
flex_group
;
flex_group
=
ext4_flex_group
(
sbi
,
group_data
[
0
].
group
);
atomic_add
(
EXT4_B2C
(
sbi
,
free_blocks
),
&
sbi
->
s_flex_groups
[
flex_group
].
free_clusters
);
atomic_add
(
EXT4_INODES_PER_GROUP
(
sb
)
*
flex_gd
->
count
,
&
sbi
->
s_flex_groups
[
flex_group
].
free_inodes
);
}
if
(
test_opt
(
sb
,
DEBUG
))
printk
(
KERN_DEBUG
"EXT4-fs: added group %u:"
"%llu blocks(%llu free %llu reserved)
\n
"
,
flex_gd
->
count
,
blocks_count
,
free_blocks
,
reserved_blocks
);
}
/* Add a flex group to an fs. Ensure we handle all possible error conditions
* _before_ we start modifying the filesystem, because we cannot abort the
* transaction and not have it write the data to disk.
*/
static
int
ext4_flex_group_add
(
struct
super_block
*
sb
,
struct
inode
*
resize_inode
,
struct
ext4_new_flex_group_data
*
flex_gd
)
{
struct
ext4_sb_info
*
sbi
=
EXT4_SB
(
sb
);
struct
ext4_super_block
*
es
=
sbi
->
s_es
;
ext4_fsblk_t
o_blocks_count
;
ext4_grpblk_t
last
;
ext4_group_t
group
;
handle_t
*
handle
;
unsigned
reserved_gdb
;
int
err
=
0
,
err2
=
0
,
credit
;
BUG_ON
(
!
flex_gd
->
count
||
!
flex_gd
->
groups
||
!
flex_gd
->
bg_flags
);
reserved_gdb
=
le16_to_cpu
(
es
->
s_reserved_gdt_blocks
);
o_blocks_count
=
ext4_blocks_count
(
es
);
ext4_get_group_no_and_offset
(
sb
,
o_blocks_count
,
&
group
,
&
last
);
BUG_ON
(
last
);
err
=
setup_new_flex_group_blocks
(
sb
,
flex_gd
);
if
(
err
)
goto
exit
;
/*
* We will always be modifying at least the superblock and GDT
* block. If we are adding a group past the last current GDT block,
* we will also modify the inode and the dindirect block. If we
* are adding a group with superblock/GDT backups we will also
* modify each of the reserved GDT dindirect blocks.
*/
credit
=
flex_gd
->
count
*
4
+
reserved_gdb
;
handle
=
ext4_journal_start_sb
(
sb
,
credit
);
if
(
IS_ERR
(
handle
))
{
err
=
PTR_ERR
(
handle
);
goto
exit
;
}
err
=
ext4_journal_get_write_access
(
handle
,
sbi
->
s_sbh
);
if
(
err
)
goto
exit_journal
;
group
=
flex_gd
->
groups
[
0
].
group
;
BUG_ON
(
group
!=
EXT4_SB
(
sb
)
->
s_groups_count
);
err
=
ext4_add_new_descs
(
handle
,
sb
,
group
,
resize_inode
,
flex_gd
->
count
);
if
(
err
)
goto
exit_journal
;
err
=
ext4_setup_new_descs
(
handle
,
sb
,
flex_gd
);
if
(
err
)
goto
exit_journal
;
ext4_update_super
(
sb
,
flex_gd
);
err
=
ext4_handle_dirty_super
(
handle
,
sb
);
exit_journal:
err2
=
ext4_journal_stop
(
handle
);
if
(
!
err
)
err
=
err2
;
if
(
!
err
)
{
int
i
;
update_backups
(
sb
,
sbi
->
s_sbh
->
b_blocknr
,
(
char
*
)
es
,
sizeof
(
struct
ext4_super_block
));
for
(
i
=
0
;
i
<
flex_gd
->
count
;
i
++
,
group
++
)
{
struct
buffer_head
*
gdb_bh
;
int
gdb_num
;
gdb_num
=
group
/
EXT4_BLOCKS_PER_GROUP
(
sb
);
gdb_bh
=
sbi
->
s_group_desc
[
gdb_num
];
update_backups
(
sb
,
gdb_bh
->
b_blocknr
,
gdb_bh
->
b_data
,
gdb_bh
->
b_size
);
}
}
exit:
return
err
;
}
static
int
ext4_setup_next_flex_gd
(
struct
super_block
*
sb
,
struct
ext4_new_flex_group_data
*
flex_gd
,
ext4_fsblk_t
n_blocks_count
,
unsigned
long
flexbg_size
)
{
struct
ext4_super_block
*
es
=
EXT4_SB
(
sb
)
->
s_es
;
struct
ext4_new_group_data
*
group_data
=
flex_gd
->
groups
;
ext4_fsblk_t
o_blocks_count
;
ext4_group_t
n_group
;
ext4_group_t
group
;
ext4_group_t
last_group
;
ext4_grpblk_t
last
;
ext4_grpblk_t
blocks_per_group
;
unsigned
long
i
;
blocks_per_group
=
EXT4_BLOCKS_PER_GROUP
(
sb
);
o_blocks_count
=
ext4_blocks_count
(
es
);
if
(
o_blocks_count
==
n_blocks_count
)
return
0
;
ext4_get_group_no_and_offset
(
sb
,
o_blocks_count
,
&
group
,
&
last
);
BUG_ON
(
last
);
ext4_get_group_no_and_offset
(
sb
,
n_blocks_count
-
1
,
&
n_group
,
&
last
);
last_group
=
group
|
(
flexbg_size
-
1
);
if
(
last_group
>
n_group
)
last_group
=
n_group
;
flex_gd
->
count
=
last_group
-
group
+
1
;
for
(
i
=
0
;
i
<
flex_gd
->
count
;
i
++
)
{
int
overhead
;
group_data
[
i
].
group
=
group
+
i
;
group_data
[
i
].
blocks_count
=
blocks_per_group
;
overhead
=
ext4_bg_has_super
(
sb
,
group
+
i
)
?
(
1
+
ext4_bg_num_gdb
(
sb
,
group
+
i
)
+
le16_to_cpu
(
es
->
s_reserved_gdt_blocks
))
:
0
;
group_data
[
i
].
free_blocks_count
=
blocks_per_group
-
overhead
;
if
(
EXT4_HAS_RO_COMPAT_FEATURE
(
sb
,
EXT4_FEATURE_RO_COMPAT_GDT_CSUM
))
flex_gd
->
bg_flags
[
i
]
=
EXT4_BG_BLOCK_UNINIT
|
EXT4_BG_INODE_UNINIT
;
else
flex_gd
->
bg_flags
[
i
]
=
EXT4_BG_INODE_ZEROED
;
}
if
(
last_group
==
n_group
&&
EXT4_HAS_RO_COMPAT_FEATURE
(
sb
,
EXT4_FEATURE_RO_COMPAT_GDT_CSUM
))
/* We need to initialize block bitmap of last group. */
flex_gd
->
bg_flags
[
i
-
1
]
&=
~
EXT4_BG_BLOCK_UNINIT
;
if
((
last_group
==
n_group
)
&&
(
last
!=
blocks_per_group
-
1
))
{
group_data
[
i
-
1
].
blocks_count
=
last
+
1
;
group_data
[
i
-
1
].
free_blocks_count
-=
blocks_per_group
-
last
-
1
;
}
return
1
;
}
/* Add group descriptor data to an existing or new group descriptor block.
* Ensure we handle all possible error conditions _before_ we start modifying
* the filesystem, because we cannot abort the transaction and not have it
...
...
@@ -750,16 +1378,15 @@ static void update_backups(struct super_block *sb,
*/
int
ext4_group_add
(
struct
super_block
*
sb
,
struct
ext4_new_group_data
*
input
)
{
struct
ext4_new_flex_group_data
flex_gd
;
struct
ext4_sb_info
*
sbi
=
EXT4_SB
(
sb
);
struct
ext4_super_block
*
es
=
sbi
->
s_es
;
int
reserved_gdb
=
ext4_bg_has_super
(
sb
,
input
->
group
)
?
le16_to_cpu
(
es
->
s_reserved_gdt_blocks
)
:
0
;
struct
buffer_head
*
primary
=
NULL
;
struct
ext4_group_desc
*
gdp
;
struct
inode
*
inode
=
NULL
;
handle_t
*
handle
;
int
gdb_off
,
gdb_num
;
int
err
,
err2
;
int
err
;
__u16
bg_flags
=
0
;
gdb_num
=
input
->
group
/
EXT4_DESC_PER_BLOCK
(
sb
);
gdb_off
=
input
->
group
%
EXT4_DESC_PER_BLOCK
(
sb
);
...
...
@@ -798,175 +1425,69 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
}
if
((
err
=
verify_group_input
(
sb
,
input
)))
goto
exit_put
;
err
=
verify_group_input
(
sb
,
input
);
if
(
err
)
goto
out
;
if
((
err
=
setup_new_group_blocks
(
sb
,
input
)))
goto
exit_put
;
flex_gd
.
count
=
1
;
flex_gd
.
groups
=
input
;
flex_gd
.
bg_flags
=
&
bg_flags
;
err
=
ext4_flex_group_add
(
sb
,
inode
,
&
flex_gd
);
out:
iput
(
inode
);
return
err
;
}
/* ext4_group_add */
/*
* We will always be modifying at least the superblock and a GDT
* block. If we are adding a group past the last current GDT block,
* we will also modify the inode and the dindirect block. If we
* are adding a group with superblock/GDT backups we will also
* modify each of the reserved GDT dindirect blocks.
/*
* extend a group without checking assuming that checking has been done.
*/
static
int
ext4_group_extend_no_check
(
struct
super_block
*
sb
,
ext4_fsblk_t
o_blocks_count
,
ext4_grpblk_t
add
)
{
struct
ext4_super_block
*
es
=
EXT4_SB
(
sb
)
->
s_es
;
handle_t
*
handle
;
int
err
=
0
,
err2
;
/* We will update the superblock, one block bitmap, and
* one group descriptor via ext4_group_add_blocks().
*/
handle
=
ext4_journal_start_sb
(
sb
,
ext4_bg_has_super
(
sb
,
input
->
group
)
?
3
+
reserved_gdb
:
4
);
handle
=
ext4_journal_start_sb
(
sb
,
3
);
if
(
IS_ERR
(
handle
))
{
err
=
PTR_ERR
(
handle
);
goto
exit_put
;
ext4_warning
(
sb
,
"error %d on journal start"
,
err
);
return
err
;
}
if
((
err
=
ext4_journal_get_write_access
(
handle
,
sbi
->
s_sbh
)))
goto
exit_journal
;
/*
* We will only either add reserved group blocks to a backup group
* or remove reserved blocks for the first group in a new group block.
* Doing both would be mean more complex code, and sane people don't
* use non-sparse filesystems anymore. This is already checked above.
*/
if
(
gdb_off
)
{
primary
=
sbi
->
s_group_desc
[
gdb_num
];
if
((
err
=
ext4_journal_get_write_access
(
handle
,
primary
)))
goto
exit_journal
;
if
(
reserved_gdb
&&
ext4_bg_num_gdb
(
sb
,
input
->
group
))
{
err
=
reserve_backup_gdb
(
handle
,
inode
,
input
->
group
);
if
(
err
)
goto
exit_journal
;
}
}
else
{
/*
* Note that we can access new group descriptor block safely
* only if add_new_gdb() succeeds.
*/
err
=
add_new_gdb
(
handle
,
inode
,
input
->
group
);
if
(
err
)
goto
exit_journal
;
primary
=
sbi
->
s_group_desc
[
gdb_num
];
err
=
ext4_journal_get_write_access
(
handle
,
EXT4_SB
(
sb
)
->
s_sbh
);
if
(
err
)
{
ext4_warning
(
sb
,
"error %d on journal write access"
,
err
);
goto
errout
;
}
/*
* OK, now we've set up the new group. Time to make it active.
*
* so we have to be safe wrt. concurrent accesses the group
* data. So we need to be careful to set all of the relevant
* group descriptor data etc. *before* we enable the group.
*
* The key field here is sbi->s_groups_count: as long as
* that retains its old value, nobody is going to access the new
* group.
*
* So first we update all the descriptor metadata for the new
* group; then we update the total disk blocks count; then we
* update the groups count to enable the group; then finally we
* update the free space counts so that the system can start
* using the new disk blocks.
*/
/* Update group descriptor block for new group */
gdp
=
(
struct
ext4_group_desc
*
)((
char
*
)
primary
->
b_data
+
gdb_off
*
EXT4_DESC_SIZE
(
sb
));
memset
(
gdp
,
0
,
EXT4_DESC_SIZE
(
sb
));
ext4_block_bitmap_set
(
sb
,
gdp
,
input
->
block_bitmap
);
/* LV FIXME */
ext4_inode_bitmap_set
(
sb
,
gdp
,
input
->
inode_bitmap
);
/* LV FIXME */
ext4_inode_table_set
(
sb
,
gdp
,
input
->
inode_table
);
/* LV FIXME */
ext4_free_group_clusters_set
(
sb
,
gdp
,
input
->
free_blocks_count
);
ext4_free_inodes_set
(
sb
,
gdp
,
EXT4_INODES_PER_GROUP
(
sb
));
gdp
->
bg_flags
=
cpu_to_le16
(
EXT4_BG_INODE_ZEROED
);
gdp
->
bg_checksum
=
ext4_group_desc_csum
(
sbi
,
input
->
group
,
gdp
);
/*
* We can allocate memory for mb_alloc based on the new group
* descriptor
*/
err
=
ext4_mb_add_groupinfo
(
sb
,
input
->
group
,
gdp
);
ext4_blocks_count_set
(
es
,
o_blocks_count
+
add
);
ext4_debug
(
"freeing blocks %llu through %llu
\n
"
,
o_blocks_count
,
o_blocks_count
+
add
);
/* We add the blocks to the bitmap and set the group need init bit */
err
=
ext4_group_add_blocks
(
handle
,
sb
,
o_blocks_count
,
add
);
if
(
err
)
goto
exit_journal
;
/*
* Make the new blocks and inodes valid next. We do this before
* increasing the group count so that once the group is enabled,
* all of its blocks and inodes are already valid.
*
* We always allocate group-by-group, then block-by-block or
* inode-by-inode within a group, so enabling these
* blocks/inodes before the group is live won't actually let us
* allocate the new space yet.
*/
ext4_blocks_count_set
(
es
,
ext4_blocks_count
(
es
)
+
input
->
blocks_count
);
le32_add_cpu
(
&
es
->
s_inodes_count
,
EXT4_INODES_PER_GROUP
(
sb
));
/*
* We need to protect s_groups_count against other CPUs seeing
* inconsistent state in the superblock.
*
* The precise rules we use are:
*
* * Writers must perform a smp_wmb() after updating all dependent
* data and before modifying the groups count
*
* * Readers must perform an smp_rmb() after reading the groups count
* and before reading any dependent data.
*
* NB. These rules can be relaxed when checking the group count
* while freeing data, as we can only allocate from a block
* group after serialising against the group count, and we can
* only then free after serialising in turn against that
* allocation.
*/
smp_wmb
();
/* Update the global fs size fields */
sbi
->
s_groups_count
++
;
err
=
ext4_handle_dirty_metadata
(
handle
,
NULL
,
primary
);
if
(
unlikely
(
err
))
{
ext4_std_error
(
sb
,
err
);
goto
exit_journal
;
}
/* Update the reserved block counts only once the new group is
* active. */
ext4_r_blocks_count_set
(
es
,
ext4_r_blocks_count
(
es
)
+
input
->
reserved_blocks
);
/* Update the free space counts */
percpu_counter_add
(
&
sbi
->
s_freeclusters_counter
,
EXT4_B2C
(
sbi
,
input
->
free_blocks_count
));
percpu_counter_add
(
&
sbi
->
s_freeinodes_counter
,
EXT4_INODES_PER_GROUP
(
sb
));
if
(
EXT4_HAS_INCOMPAT_FEATURE
(
sb
,
EXT4_FEATURE_INCOMPAT_FLEX_BG
)
&&
sbi
->
s_log_groups_per_flex
)
{
ext4_group_t
flex_group
;
flex_group
=
ext4_flex_group
(
sbi
,
input
->
group
);
atomic_add
(
EXT4_B2C
(
sbi
,
input
->
free_blocks_count
),
&
sbi
->
s_flex_groups
[
flex_group
].
free_clusters
);
atomic_add
(
EXT4_INODES_PER_GROUP
(
sb
),
&
sbi
->
s_flex_groups
[
flex_group
].
free_inodes
);
}
goto
errout
;
ext4_handle_dirty_super
(
handle
,
sb
);
exit_journal:
if
((
err2
=
ext4_journal_stop
(
handle
))
&&
!
err
)
ext4_debug
(
"freed blocks %llu through %llu
\n
"
,
o_blocks_count
,
o_blocks_count
+
add
);
errout:
err2
=
ext4_journal_stop
(
handle
);
if
(
err2
&&
!
err
)
err
=
err2
;
if
(
!
err
&&
primary
)
{
update_backups
(
sb
,
sbi
->
s_sbh
->
b_blocknr
,
(
char
*
)
es
,
if
(
!
err
)
{
if
(
test_opt
(
sb
,
DEBUG
))
printk
(
KERN_DEBUG
"EXT4-fs: extended group to %llu "
"blocks
\n
"
,
ext4_blocks_count
(
es
));
update_backups
(
sb
,
EXT4_SB
(
sb
)
->
s_sbh
->
b_blocknr
,
(
char
*
)
es
,
sizeof
(
struct
ext4_super_block
));
update_backups
(
sb
,
primary
->
b_blocknr
,
primary
->
b_data
,
primary
->
b_size
);
}
exit_put:
iput
(
inode
);
return
err
;
}
/* ext4_group_add */
}
/*
* Extend the filesystem to the new number of blocks specified. This entry
...
...
@@ -985,8 +1506,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
ext4_grpblk_t
last
;
ext4_grpblk_t
add
;
struct
buffer_head
*
bh
;
handle_t
*
handle
;
int
err
,
err2
;
int
err
;
ext4_group_t
group
;
o_blocks_count
=
ext4_blocks_count
(
es
);
...
...
@@ -1042,42 +1562,119 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
}
brelse
(
bh
);
/* We will update the superblock, one block bitmap, and
* one group descriptor via ext4_free_blocks().
*/
handle
=
ext4_journal_start_sb
(
sb
,
3
);
if
(
IS_ERR
(
handle
))
{
err
=
PTR_ERR
(
handle
);
ext4_warning
(
sb
,
"error %d on journal start"
,
err
);
goto
exit_put
;
err
=
ext4_group_extend_no_check
(
sb
,
o_blocks_count
,
add
);
return
err
;
}
/* ext4_group_extend */
/*
* ext4_resize_fs() resizes a fs to new size specified by @n_blocks_count
*
* @sb: super block of the fs to be resized
* @n_blocks_count: the number of blocks resides in the resized fs
*/
int
ext4_resize_fs
(
struct
super_block
*
sb
,
ext4_fsblk_t
n_blocks_count
)
{
struct
ext4_new_flex_group_data
*
flex_gd
=
NULL
;
struct
ext4_sb_info
*
sbi
=
EXT4_SB
(
sb
);
struct
ext4_super_block
*
es
=
sbi
->
s_es
;
struct
buffer_head
*
bh
;
struct
inode
*
resize_inode
;
ext4_fsblk_t
o_blocks_count
;
ext4_group_t
o_group
;
ext4_group_t
n_group
;
ext4_grpblk_t
offset
;
unsigned
long
n_desc_blocks
;
unsigned
long
o_desc_blocks
;
unsigned
long
desc_blocks
;
int
err
=
0
,
flexbg_size
=
1
;
o_blocks_count
=
ext4_blocks_count
(
es
);
if
(
test_opt
(
sb
,
DEBUG
))
printk
(
KERN_DEBUG
"EXT4-fs: resizing filesystem from %llu "
"upto %llu blocks
\n
"
,
o_blocks_count
,
n_blocks_count
);
if
(
n_blocks_count
<
o_blocks_count
)
{
/* On-line shrinking not supported */
ext4_warning
(
sb
,
"can't shrink FS - resize aborted"
);
return
-
EINVAL
;
}
if
((
err
=
ext4_journal_get_write_access
(
handle
,
EXT4_SB
(
sb
)
->
s_sbh
)))
{
ext4_warning
(
sb
,
"error %d on journal write access"
,
err
);
ext4_journal_stop
(
handle
);
goto
exit_put
;
if
(
n_blocks_count
==
o_blocks_count
)
/* Nothing need to do */
return
0
;
ext4_get_group_no_and_offset
(
sb
,
n_blocks_count
-
1
,
&
n_group
,
&
offset
);
ext4_get_group_no_and_offset
(
sb
,
o_blocks_count
,
&
o_group
,
&
offset
);
n_desc_blocks
=
(
n_group
+
EXT4_DESC_PER_BLOCK
(
sb
))
/
EXT4_DESC_PER_BLOCK
(
sb
);
o_desc_blocks
=
(
sbi
->
s_groups_count
+
EXT4_DESC_PER_BLOCK
(
sb
)
-
1
)
/
EXT4_DESC_PER_BLOCK
(
sb
);
desc_blocks
=
n_desc_blocks
-
o_desc_blocks
;
if
(
desc_blocks
&&
(
!
EXT4_HAS_COMPAT_FEATURE
(
sb
,
EXT4_FEATURE_COMPAT_RESIZE_INODE
)
||
le16_to_cpu
(
es
->
s_reserved_gdt_blocks
)
<
desc_blocks
))
{
ext4_warning
(
sb
,
"No reserved GDT blocks, can't resize"
);
return
-
EPERM
;
}
ext4_blocks_count_set
(
es
,
o_blocks_count
+
add
);
ext4_debug
(
"freeing blocks %llu through %llu
\n
"
,
o_blocks_count
,
o_blocks_count
+
add
);
/* We add the blocks to the bitmap and set the group need init bit */
err
=
ext4_group_add_blocks
(
handle
,
sb
,
o_blocks_count
,
add
);
ext4_handle_dirty_super
(
handle
,
sb
);
ext4_debug
(
"freed blocks %llu through %llu
\n
"
,
o_blocks_count
,
o_blocks_count
+
add
);
err2
=
ext4_journal_stop
(
handle
);
if
(
!
err
&&
err2
)
err
=
err2
;
if
(
err
)
goto
exit_put
;
resize_inode
=
ext4_iget
(
sb
,
EXT4_RESIZE_INO
);
if
(
IS_ERR
(
resize_inode
))
{
ext4_warning
(
sb
,
"Error opening resize inode"
);
return
PTR_ERR
(
resize_inode
);
}
/* See if the device is actually as big as what was requested */
bh
=
sb_bread
(
sb
,
n_blocks_count
-
1
);
if
(
!
bh
)
{
ext4_warning
(
sb
,
"can't read last block, resize aborted"
);
return
-
ENOSPC
;
}
brelse
(
bh
);
if
(
offset
!=
0
)
{
/* extend the last group */
ext4_grpblk_t
add
;
add
=
EXT4_BLOCKS_PER_GROUP
(
sb
)
-
offset
;
err
=
ext4_group_extend_no_check
(
sb
,
o_blocks_count
,
add
);
if
(
err
)
goto
out
;
}
if
(
EXT4_HAS_INCOMPAT_FEATURE
(
sb
,
EXT4_FEATURE_INCOMPAT_FLEX_BG
)
&&
es
->
s_log_groups_per_flex
)
flexbg_size
=
1
<<
es
->
s_log_groups_per_flex
;
o_blocks_count
=
ext4_blocks_count
(
es
);
if
(
o_blocks_count
==
n_blocks_count
)
goto
out
;
flex_gd
=
alloc_flex_gd
(
flexbg_size
);
if
(
flex_gd
==
NULL
)
{
err
=
-
ENOMEM
;
goto
out
;
}
/* Add flex groups. Note that a regular group is a
* flex group with 1 group.
*/
while
(
ext4_setup_next_flex_gd
(
sb
,
flex_gd
,
n_blocks_count
,
flexbg_size
))
{
ext4_alloc_group_tables
(
sb
,
flex_gd
,
flexbg_size
);
err
=
ext4_flex_group_add
(
sb
,
resize_inode
,
flex_gd
);
if
(
unlikely
(
err
))
break
;
}
out:
if
(
flex_gd
)
free_flex_gd
(
flex_gd
);
iput
(
resize_inode
);
if
(
test_opt
(
sb
,
DEBUG
))
printk
(
KERN_DEBUG
"EXT4-fs: extended group to %llu blocks
\n
"
,
ext4_blocks_count
(
es
));
update_backups
(
sb
,
EXT4_SB
(
sb
)
->
s_sbh
->
b_blocknr
,
(
char
*
)
es
,
sizeof
(
struct
ext4_super_block
));
exit_put:
printk
(
KERN_DEBUG
"EXT4-fs: resized filesystem from %llu "
"upto %llu blocks
\n
"
,
o_blocks_count
,
n_blocks_count
);
return
err
;
}
/* ext4_group_extend */
}
fs/ext4/super.c
View file @
ff9cb1c4
...
...
@@ -1095,7 +1095,7 @@ static int ext4_show_options(struct seq_file *seq, struct dentry *root)
}
if
(
sbi
->
s_max_batch_time
!=
EXT4_DEF_MAX_BATCH_TIME
)
{
seq_printf
(
seq
,
",max_batch_time=%u"
,
(
unsigned
)
sbi
->
s_m
in
_batch_time
);
(
unsigned
)
sbi
->
s_m
ax
_batch_time
);
}
/*
...
...
@@ -2005,17 +2005,16 @@ static int ext4_fill_flex_info(struct super_block *sb)
struct
ext4_group_desc
*
gdp
=
NULL
;
ext4_group_t
flex_group_count
;
ext4_group_t
flex_group
;
int
groups_per_flex
=
0
;
unsigned
int
groups_per_flex
=
0
;
size_t
size
;
int
i
;
sbi
->
s_log_groups_per_flex
=
sbi
->
s_es
->
s_log_groups_per_flex
;
groups_per_flex
=
1
<<
sbi
->
s_log_groups_per_flex
;
if
(
groups_per_flex
<
2
)
{
if
(
sbi
->
s_log_groups_per_flex
<
1
||
sbi
->
s_log_groups_per_flex
>
31
)
{
sbi
->
s_log_groups_per_flex
=
0
;
return
1
;
}
groups_per_flex
=
1
<<
sbi
->
s_log_groups_per_flex
;
/* We allocate both existing and potentially added groups */
flex_group_count
=
((
sbi
->
s_groups_count
+
groups_per_flex
-
1
)
+
...
...
@@ -3506,7 +3505,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
* of the filesystem.
*/
if
(
le32_to_cpu
(
es
->
s_first_data_block
)
>=
ext4_blocks_count
(
es
))
{
ext4_msg
(
sb
,
KERN_WARNING
,
"bad geometry: first data
"
ext4_msg
(
sb
,
KERN_WARNING
,
"bad geometry: first data
"
"block %u is beyond end of filesystem (%llu)"
,
le32_to_cpu
(
es
->
s_first_data_block
),
ext4_blocks_count
(
es
));
...
...
fs/ext4/xattr_security.c
View file @
ff9cb1c4
...
...
@@ -47,8 +47,9 @@ ext4_xattr_security_set(struct dentry *dentry, const char *name,
name
,
value
,
size
,
flags
);
}
int
ext4_initxattrs
(
struct
inode
*
inode
,
const
struct
xattr
*
xattr_array
,
void
*
fs_info
)
static
int
ext4_initxattrs
(
struct
inode
*
inode
,
const
struct
xattr
*
xattr_array
,
void
*
fs_info
)
{
const
struct
xattr
*
xattr
;
handle_t
*
handle
=
fs_info
;
...
...
fs/jbd2/commit.c
View file @
ff9cb1c4
...
...
@@ -429,6 +429,12 @@ void jbd2_journal_commit_transaction(journal_t *journal)
jbd_debug
(
3
,
"JBD2: commit phase 1
\n
"
);
/*
* Clear revoked flag to reflect there is no revoked buffers
* in the next transaction which is going to be started.
*/
jbd2_clear_buffer_revoked_flags
(
journal
);
/*
* Switch to a new revoke table.
*/
...
...
fs/jbd2/revoke.c
View file @
ff9cb1c4
...
...
@@ -47,6 +47,10 @@
* overwriting the new data. We don't even need to clear the revoke
* bit here.
*
* We cache revoke status of a buffer in the current transaction in b_states
* bits. As the name says, revokevalid flag indicates that the cached revoke
* status of a buffer is valid and we can rely on the cached status.
*
* Revoke information on buffers is a tri-state value:
*
* RevokeValid clear: no cached revoke status, need to look it up
...
...
@@ -478,6 +482,36 @@ int jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
return
did_revoke
;
}
/*
* journal_clear_revoked_flag clears revoked flag of buffers in
* revoke table to reflect there is no revoked buffers in the next
* transaction which is going to be started.
*/
void
jbd2_clear_buffer_revoked_flags
(
journal_t
*
journal
)
{
struct
jbd2_revoke_table_s
*
revoke
=
journal
->
j_revoke
;
int
i
=
0
;
for
(
i
=
0
;
i
<
revoke
->
hash_size
;
i
++
)
{
struct
list_head
*
hash_list
;
struct
list_head
*
list_entry
;
hash_list
=
&
revoke
->
hash_table
[
i
];
list_for_each
(
list_entry
,
hash_list
)
{
struct
jbd2_revoke_record_s
*
record
;
struct
buffer_head
*
bh
;
record
=
(
struct
jbd2_revoke_record_s
*
)
list_entry
;
bh
=
__find_get_block
(
journal
->
j_fs_dev
,
record
->
blocknr
,
journal
->
j_blocksize
);
if
(
bh
)
{
clear_buffer_revoked
(
bh
);
__brelse
(
bh
);
}
}
}
}
/* journal_switch_revoke table select j_revoke for next transaction
* we do not want to suspend any processing until all revokes are
* written -bzzz
...
...
fs/jbd2/transaction.c
View file @
ff9cb1c4
...
...
@@ -517,12 +517,13 @@ void jbd2_journal_lock_updates(journal_t *journal)
break
;
spin_lock
(
&
transaction
->
t_handle_lock
);
prepare_to_wait
(
&
journal
->
j_wait_updates
,
&
wait
,
TASK_UNINTERRUPTIBLE
);
if
(
!
atomic_read
(
&
transaction
->
t_updates
))
{
spin_unlock
(
&
transaction
->
t_handle_lock
);
finish_wait
(
&
journal
->
j_wait_updates
,
&
wait
);
break
;
}
prepare_to_wait
(
&
journal
->
j_wait_updates
,
&
wait
,
TASK_UNINTERRUPTIBLE
);
spin_unlock
(
&
transaction
->
t_handle_lock
);
write_unlock
(
&
journal
->
j_state_lock
);
schedule
();
...
...
include/linux/jbd2.h
View file @
ff9cb1c4
...
...
@@ -1151,6 +1151,7 @@ extern int jbd2_journal_set_revoke(journal_t *, unsigned long long, tid_t);
extern
int
jbd2_journal_test_revoke
(
journal_t
*
,
unsigned
long
long
,
tid_t
);
extern
void
jbd2_journal_clear_revoke
(
journal_t
*
);
extern
void
jbd2_journal_switch_revoke_table
(
journal_t
*
journal
);
extern
void
jbd2_clear_buffer_revoked_flags
(
journal_t
*
journal
);
/*
* The log thread user interface:
...
...
include/trace/events/ext4.h
View file @
ff9cb1c4
...
...
@@ -573,9 +573,9 @@ TRACE_EVENT(ext4_mb_release_inode_pa,
);
TRACE_EVENT
(
ext4_mb_release_group_pa
,
TP_PROTO
(
struct
ext4_prealloc_space
*
pa
),
TP_PROTO
(
struct
super_block
*
sb
,
struct
ext4_prealloc_space
*
pa
),
TP_ARGS
(
pa
),
TP_ARGS
(
sb
,
pa
),
TP_STRUCT__entry
(
__field
(
dev_t
,
dev
)
...
...
@@ -585,7 +585,7 @@ TRACE_EVENT(ext4_mb_release_group_pa,
),
TP_fast_assign
(
__entry
->
dev
=
pa
->
pa_inode
->
i_
sb
->
s_dev
;
__entry
->
dev
=
sb
->
s_dev
;
__entry
->
pa_pstart
=
pa
->
pa_pstart
;
__entry
->
pa_len
=
pa
->
pa_len
;
),
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment