Commit c86d8db3 authored by Jan Kara's avatar Jan Kara Committed by Theodore Ts'o

ext4: implement allocation of pre-zeroed blocks

DAX page fault path needs to get blocks that are pre-zeroed to avoid
races when two concurrent page faults happen in the same block of a
file. Implement support for this in ext4_map_blocks().
Signed-off-by: default avatarJan Kara <jack@suse.com>
Signed-off-by: default avatarTheodore Ts'o <tytso@mit.edu>
parent 53085fac
...@@ -557,6 +557,10 @@ enum { ...@@ -557,6 +557,10 @@ enum {
#define EXT4_GET_BLOCKS_KEEP_SIZE 0x0080 #define EXT4_GET_BLOCKS_KEEP_SIZE 0x0080
/* Convert written extents to unwritten */ /* Convert written extents to unwritten */
#define EXT4_GET_BLOCKS_CONVERT_UNWRITTEN 0x0100 #define EXT4_GET_BLOCKS_CONVERT_UNWRITTEN 0x0100
/* Write zeros to newly created written extents */
#define EXT4_GET_BLOCKS_ZERO 0x0200
#define EXT4_GET_BLOCKS_CREATE_ZERO (EXT4_GET_BLOCKS_CREATE |\
EXT4_GET_BLOCKS_ZERO)
/* /*
* The bit position of these flags must not overlap with any of the * The bit position of these flags must not overlap with any of the
......
...@@ -4044,6 +4044,14 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode, ...@@ -4044,6 +4044,14 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,
} }
/* IO end_io complete, convert the filled extent to written */ /* IO end_io complete, convert the filled extent to written */
if (flags & EXT4_GET_BLOCKS_CONVERT) { if (flags & EXT4_GET_BLOCKS_CONVERT) {
if (flags & EXT4_GET_BLOCKS_ZERO) {
if (allocated > map->m_len)
allocated = map->m_len;
err = ext4_issue_zeroout(inode, map->m_lblk, newblock,
allocated);
if (err < 0)
goto out2;
}
ret = ext4_convert_unwritten_extents_endio(handle, inode, map, ret = ext4_convert_unwritten_extents_endio(handle, inode, map,
ppath); ppath);
if (ret >= 0) { if (ret >= 0) {
......
...@@ -636,6 +636,22 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, ...@@ -636,6 +636,22 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
WARN_ON(1); WARN_ON(1);
} }
/*
* We have to zeroout blocks before inserting them into extent
* status tree. Otherwise someone could look them up there and
* use them before they are really zeroed.
*/
if (flags & EXT4_GET_BLOCKS_ZERO &&
map->m_flags & EXT4_MAP_MAPPED &&
map->m_flags & EXT4_MAP_NEW) {
ret = ext4_issue_zeroout(inode, map->m_lblk,
map->m_pblk, map->m_len);
if (ret) {
retval = ret;
goto out_sem;
}
}
/* /*
* If the extent has been zeroed out, we don't need to update * If the extent has been zeroed out, we don't need to update
* extent status tree. * extent status tree.
...@@ -643,7 +659,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, ...@@ -643,7 +659,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
if ((flags & EXT4_GET_BLOCKS_PRE_IO) && if ((flags & EXT4_GET_BLOCKS_PRE_IO) &&
ext4_es_lookup_extent(inode, map->m_lblk, &es)) { ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
if (ext4_es_is_written(&es)) if (ext4_es_is_written(&es))
goto has_zeroout; goto out_sem;
} }
status = map->m_flags & EXT4_MAP_UNWRITTEN ? status = map->m_flags & EXT4_MAP_UNWRITTEN ?
EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
...@@ -654,11 +670,13 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, ...@@ -654,11 +670,13 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
status |= EXTENT_STATUS_DELAYED; status |= EXTENT_STATUS_DELAYED;
ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len, ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
map->m_pblk, status); map->m_pblk, status);
if (ret < 0) if (ret < 0) {
retval = ret; retval = ret;
goto out_sem;
}
} }
has_zeroout: out_sem:
up_write((&EXT4_I(inode)->i_data_sem)); up_write((&EXT4_I(inode)->i_data_sem));
if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
ret = check_block_validity(inode, map); ret = check_block_validity(inode, map);
...@@ -3083,6 +3101,7 @@ int ext4_get_block_dax(struct inode *inode, sector_t iblock, ...@@ -3083,6 +3101,7 @@ int ext4_get_block_dax(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create) struct buffer_head *bh_result, int create)
{ {
int flags = EXT4_GET_BLOCKS_PRE_IO | EXT4_GET_BLOCKS_UNWRIT_EXT; int flags = EXT4_GET_BLOCKS_PRE_IO | EXT4_GET_BLOCKS_UNWRIT_EXT;
if (create) if (create)
flags |= EXT4_GET_BLOCKS_CREATE; flags |= EXT4_GET_BLOCKS_CREATE;
ext4_debug("ext4_get_block_dax: inode %lu, create flag %d\n", ext4_debug("ext4_get_block_dax: inode %lu, create flag %d\n",
......
...@@ -42,7 +42,8 @@ struct extent_status; ...@@ -42,7 +42,8 @@ struct extent_status;
{ EXT4_GET_BLOCKS_CONVERT, "CONVERT" }, \ { EXT4_GET_BLOCKS_CONVERT, "CONVERT" }, \
{ EXT4_GET_BLOCKS_METADATA_NOFAIL, "METADATA_NOFAIL" }, \ { EXT4_GET_BLOCKS_METADATA_NOFAIL, "METADATA_NOFAIL" }, \
{ EXT4_GET_BLOCKS_NO_NORMALIZE, "NO_NORMALIZE" }, \ { EXT4_GET_BLOCKS_NO_NORMALIZE, "NO_NORMALIZE" }, \
{ EXT4_GET_BLOCKS_KEEP_SIZE, "KEEP_SIZE" }) { EXT4_GET_BLOCKS_KEEP_SIZE, "KEEP_SIZE" }, \
{ EXT4_GET_BLOCKS_ZERO, "ZERO" })
#define show_mflags(flags) __print_flags(flags, "", \ #define show_mflags(flags) __print_flags(flags, "", \
{ EXT4_MAP_NEW, "N" }, \ { EXT4_MAP_NEW, "N" }, \
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment