Commit 331573fe authored by Namjae Jeon's avatar Namjae Jeon Committed by Theodore Ts'o

ext4: Add support FALLOC_FL_INSERT_RANGE for fallocate

This patch implements fallocate's FALLOC_FL_INSERT_RANGE for Ext4.

1) Make sure that both offset and len are block size aligned.
2) Update the i_size of inode by len bytes.
3) Compute the file's logical block number against offset. If the computed
   block number is not the starting block of the extent, split the extent
   such that the block number is the starting block of the extent.
4) Shift all the extents which are lying between [offset, last allocated extent]
   towards right by len bytes. This step will make a hole of len bytes
   at offset.
Signed-off-by: default avatarNamjae Jeon <namjae.jeon@samsung.com>
Signed-off-by: default avatarAshish Sangwan <a.sangwan@samsung.com>
parent de92c8ca
...@@ -90,6 +90,11 @@ typedef __u32 ext4_lblk_t; ...@@ -90,6 +90,11 @@ typedef __u32 ext4_lblk_t;
/* data type for block group number */ /* data type for block group number */
typedef unsigned int ext4_group_t; typedef unsigned int ext4_group_t;
enum SHIFT_DIRECTION {
SHIFT_LEFT = 0,
SHIFT_RIGHT,
};
/* /*
* Flags used in mballoc's allocation_context flags field. * Flags used in mballoc's allocation_context flags field.
* *
...@@ -2947,6 +2952,7 @@ extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, ...@@ -2947,6 +2952,7 @@ extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len); __u64 start, __u64 len);
extern int ext4_ext_precache(struct inode *inode); extern int ext4_ext_precache(struct inode *inode);
extern int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len); extern int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len);
extern int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len);
extern int ext4_swap_extents(handle_t *handle, struct inode *inode1, extern int ext4_swap_extents(handle_t *handle, struct inode *inode1,
struct inode *inode2, ext4_lblk_t lblk1, struct inode *inode2, ext4_lblk_t lblk1,
ext4_lblk_t lblk2, ext4_lblk_t count, ext4_lblk_t lblk2, ext4_lblk_t count,
......
...@@ -4912,12 +4912,14 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) ...@@ -4912,12 +4912,14 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
* bug we should fix.... * bug we should fix....
*/ */
if (ext4_encrypted_inode(inode) && if (ext4_encrypted_inode(inode) &&
(mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))) (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE |
FALLOC_FL_ZERO_RANGE)))
return -EOPNOTSUPP; return -EOPNOTSUPP;
/* Return error if mode is not supported */ /* Return error if mode is not supported */
if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE)) FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE |
FALLOC_FL_INSERT_RANGE))
return -EOPNOTSUPP; return -EOPNOTSUPP;
if (mode & FALLOC_FL_PUNCH_HOLE) if (mode & FALLOC_FL_PUNCH_HOLE)
...@@ -4930,6 +4932,9 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) ...@@ -4930,6 +4932,9 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
if (mode & FALLOC_FL_COLLAPSE_RANGE) if (mode & FALLOC_FL_COLLAPSE_RANGE)
return ext4_collapse_range(inode, offset, len); return ext4_collapse_range(inode, offset, len);
if (mode & FALLOC_FL_INSERT_RANGE)
return ext4_insert_range(inode, offset, len);
if (mode & FALLOC_FL_ZERO_RANGE) if (mode & FALLOC_FL_ZERO_RANGE)
return ext4_zero_range(file, offset, len, mode); return ext4_zero_range(file, offset, len, mode);
...@@ -5224,13 +5229,13 @@ ext4_access_path(handle_t *handle, struct inode *inode, ...@@ -5224,13 +5229,13 @@ ext4_access_path(handle_t *handle, struct inode *inode,
/* /*
* ext4_ext_shift_path_extents: * ext4_ext_shift_path_extents:
* Shift the extents of a path structure lying between path[depth].p_ext * Shift the extents of a path structure lying between path[depth].p_ext
* and EXT_LAST_EXTENT(path[depth].p_hdr) downwards, by subtracting shift * and EXT_LAST_EXTENT(path[depth].p_hdr), by @shift blocks. @SHIFT tells
* from starting block for each extent. * if it is right shift or left shift operation.
*/ */
static int static int
ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift, ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
struct inode *inode, handle_t *handle, struct inode *inode, handle_t *handle,
ext4_lblk_t *start) enum SHIFT_DIRECTION SHIFT)
{ {
int depth, err = 0; int depth, err = 0;
struct ext4_extent *ex_start, *ex_last; struct ext4_extent *ex_start, *ex_last;
...@@ -5252,19 +5257,25 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift, ...@@ -5252,19 +5257,25 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr)) if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr))
update = 1; update = 1;
*start = le32_to_cpu(ex_last->ee_block) +
ext4_ext_get_actual_len(ex_last);
while (ex_start <= ex_last) { while (ex_start <= ex_last) {
le32_add_cpu(&ex_start->ee_block, -shift); if (SHIFT == SHIFT_LEFT) {
/* Try to merge to the left. */ le32_add_cpu(&ex_start->ee_block,
if ((ex_start > -shift);
EXT_FIRST_EXTENT(path[depth].p_hdr)) && /* Try to merge to the left. */
ext4_ext_try_to_merge_right(inode, if ((ex_start >
path, ex_start - 1)) EXT_FIRST_EXTENT(path[depth].p_hdr))
&&
ext4_ext_try_to_merge_right(inode,
path, ex_start - 1))
ex_last--;
else
ex_start++;
} else {
le32_add_cpu(&ex_last->ee_block, shift);
ext4_ext_try_to_merge_right(inode, path,
ex_last);
ex_last--; ex_last--;
else }
ex_start++;
} }
err = ext4_ext_dirty(handle, inode, path + depth); err = ext4_ext_dirty(handle, inode, path + depth);
if (err) if (err)
...@@ -5279,7 +5290,10 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift, ...@@ -5279,7 +5290,10 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
if (err) if (err)
goto out; goto out;
le32_add_cpu(&path[depth].p_idx->ei_block, -shift); if (SHIFT == SHIFT_LEFT)
le32_add_cpu(&path[depth].p_idx->ei_block, -shift);
else
le32_add_cpu(&path[depth].p_idx->ei_block, shift);
err = ext4_ext_dirty(handle, inode, path + depth); err = ext4_ext_dirty(handle, inode, path + depth);
if (err) if (err)
goto out; goto out;
...@@ -5297,19 +5311,20 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift, ...@@ -5297,19 +5311,20 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
/* /*
* ext4_ext_shift_extents: * ext4_ext_shift_extents:
* All the extents which lies in the range from start to the last allocated * All the extents which lies in the range from @start to the last allocated
* block for the file are shifted downwards by shift blocks. * block for the @inode are shifted either towards left or right (depending
* upon @SHIFT) by @shift blocks.
* On success, 0 is returned, error otherwise. * On success, 0 is returned, error otherwise.
*/ */
static int static int
ext4_ext_shift_extents(struct inode *inode, handle_t *handle, ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
ext4_lblk_t start, ext4_lblk_t shift) ext4_lblk_t start, ext4_lblk_t shift,
enum SHIFT_DIRECTION SHIFT)
{ {
struct ext4_ext_path *path; struct ext4_ext_path *path;
int ret = 0, depth; int ret = 0, depth;
struct ext4_extent *extent; struct ext4_extent *extent;
ext4_lblk_t stop_block; ext4_lblk_t stop, *iterator, ex_start, ex_end;
ext4_lblk_t ex_start, ex_end;
/* Let path point to the last extent */ /* Let path point to the last extent */
path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0); path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0);
...@@ -5321,58 +5336,84 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, ...@@ -5321,58 +5336,84 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
if (!extent) if (!extent)
goto out; goto out;
stop_block = le32_to_cpu(extent->ee_block) + stop = le32_to_cpu(extent->ee_block) +
ext4_ext_get_actual_len(extent); ext4_ext_get_actual_len(extent);
/* Nothing to shift, if hole is at the end of file */ /*
if (start >= stop_block) * In case of left shift, Don't start shifting extents until we make
goto out; * sure the hole is big enough to accommodate the shift.
*/
if (SHIFT == SHIFT_LEFT) {
path = ext4_find_extent(inode, start - 1, &path, 0);
if (IS_ERR(path))
return PTR_ERR(path);
depth = path->p_depth;
extent = path[depth].p_ext;
if (extent) {
ex_start = le32_to_cpu(extent->ee_block);
ex_end = le32_to_cpu(extent->ee_block) +
ext4_ext_get_actual_len(extent);
} else {
ex_start = 0;
ex_end = 0;
}
/* if ((start == ex_start && shift > ex_start) ||
* Don't start shifting extents until we make sure the hole is big (shift > start - ex_end)) {
* enough to accomodate the shift. ext4_ext_drop_refs(path);
*/ kfree(path);
path = ext4_find_extent(inode, start - 1, &path, 0); return -EINVAL;
if (IS_ERR(path)) }
return PTR_ERR(path);
depth = path->p_depth;
extent = path[depth].p_ext;
if (extent) {
ex_start = le32_to_cpu(extent->ee_block);
ex_end = le32_to_cpu(extent->ee_block) +
ext4_ext_get_actual_len(extent);
} else {
ex_start = 0;
ex_end = 0;
} }
if ((start == ex_start && shift > ex_start) || /*
(shift > start - ex_end)) * In case of left shift, iterator points to start and it is increased
return -EINVAL; * till we reach stop. In case of right shift, iterator points to stop
* and it is decreased till we reach start.
*/
if (SHIFT == SHIFT_LEFT)
iterator = &start;
else
iterator = &stop;
/* Its safe to start updating extents */ /* Its safe to start updating extents */
while (start < stop_block) { while (start < stop) {
path = ext4_find_extent(inode, start, &path, 0); path = ext4_find_extent(inode, *iterator, &path, 0);
if (IS_ERR(path)) if (IS_ERR(path))
return PTR_ERR(path); return PTR_ERR(path);
depth = path->p_depth; depth = path->p_depth;
extent = path[depth].p_ext; extent = path[depth].p_ext;
if (!extent) { if (!extent) {
EXT4_ERROR_INODE(inode, "unexpected hole at %lu", EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
(unsigned long) start); (unsigned long) *iterator);
return -EIO; return -EIO;
} }
if (start > le32_to_cpu(extent->ee_block)) { if (SHIFT == SHIFT_LEFT && *iterator >
le32_to_cpu(extent->ee_block)) {
/* Hole, move to the next extent */ /* Hole, move to the next extent */
if (extent < EXT_LAST_EXTENT(path[depth].p_hdr)) { if (extent < EXT_LAST_EXTENT(path[depth].p_hdr)) {
path[depth].p_ext++; path[depth].p_ext++;
} else { } else {
start = ext4_ext_next_allocated_block(path); *iterator = ext4_ext_next_allocated_block(path);
continue; continue;
} }
} }
if (SHIFT == SHIFT_LEFT) {
extent = EXT_LAST_EXTENT(path[depth].p_hdr);
*iterator = le32_to_cpu(extent->ee_block) +
ext4_ext_get_actual_len(extent);
} else {
extent = EXT_FIRST_EXTENT(path[depth].p_hdr);
*iterator = le32_to_cpu(extent->ee_block) > 0 ?
le32_to_cpu(extent->ee_block) - 1 : 0;
/* Update path extent in case we need to stop */
while (le32_to_cpu(extent->ee_block) < start)
extent++;
path[depth].p_ext = extent;
}
ret = ext4_ext_shift_path_extents(path, shift, inode, ret = ext4_ext_shift_path_extents(path, shift, inode,
handle, &start); handle, SHIFT);
if (ret) if (ret)
break; break;
} }
...@@ -5485,7 +5526,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) ...@@ -5485,7 +5526,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
ext4_discard_preallocations(inode); ext4_discard_preallocations(inode);
ret = ext4_ext_shift_extents(inode, handle, punch_stop, ret = ext4_ext_shift_extents(inode, handle, punch_stop,
punch_stop - punch_start); punch_stop - punch_start, SHIFT_LEFT);
if (ret) { if (ret) {
up_write(&EXT4_I(inode)->i_data_sem); up_write(&EXT4_I(inode)->i_data_sem);
goto out_stop; goto out_stop;
...@@ -5510,6 +5551,174 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) ...@@ -5510,6 +5551,174 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
return ret; return ret;
} }
/*
* ext4_insert_range:
* This function implements the FALLOC_FL_INSERT_RANGE flag of fallocate.
* The data blocks starting from @offset to the EOF are shifted by @len
* towards right to create a hole in the @inode. Inode size is increased
* by len bytes.
* Returns 0 on success, error otherwise.
*/
int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
{
struct super_block *sb = inode->i_sb;
handle_t *handle;
struct ext4_ext_path *path;
struct ext4_extent *extent;
ext4_lblk_t offset_lblk, len_lblk, ee_start_lblk = 0;
unsigned int credits, ee_len;
int ret = 0, depth, split_flag = 0;
loff_t ioffset;
/*
* We need to test this early because xfstests assumes that an
* insert range of (0, 1) will return EOPNOTSUPP if the file
* system does not support insert range.
*/
if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
return -EOPNOTSUPP;
/* Insert range works only on fs block size aligned offsets. */
if (offset & (EXT4_CLUSTER_SIZE(sb) - 1) ||
len & (EXT4_CLUSTER_SIZE(sb) - 1))
return -EINVAL;
if (!S_ISREG(inode->i_mode))
return -EOPNOTSUPP;
trace_ext4_insert_range(inode, offset, len);
offset_lblk = offset >> EXT4_BLOCK_SIZE_BITS(sb);
len_lblk = len >> EXT4_BLOCK_SIZE_BITS(sb);
/* Call ext4_force_commit to flush all data in case of data=journal */
if (ext4_should_journal_data(inode)) {
ret = ext4_force_commit(inode->i_sb);
if (ret)
return ret;
}
/*
* Need to round down to align start offset to page size boundary
* for page size > block size.
*/
ioffset = round_down(offset, PAGE_SIZE);
/* Write out all dirty pages */
ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
LLONG_MAX);
if (ret)
return ret;
/* Take mutex lock */
mutex_lock(&inode->i_mutex);
/* Currently just for extent based files */
if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
ret = -EOPNOTSUPP;
goto out_mutex;
}
/* Check for wrap through zero */
if (inode->i_size + len > inode->i_sb->s_maxbytes) {
ret = -EFBIG;
goto out_mutex;
}
/* Offset should be less than i_size */
if (offset >= i_size_read(inode)) {
ret = -EINVAL;
goto out_mutex;
}
truncate_pagecache(inode, ioffset);
/* Wait for existing dio to complete */
ext4_inode_block_unlocked_dio(inode);
inode_dio_wait(inode);
credits = ext4_writepage_trans_blocks(inode);
handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
goto out_dio;
}
/* Expand file to avoid data loss if there is error while shifting */
inode->i_size += len;
EXT4_I(inode)->i_disksize += len;
inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
ret = ext4_mark_inode_dirty(handle, inode);
if (ret)
goto out_stop;
down_write(&EXT4_I(inode)->i_data_sem);
ext4_discard_preallocations(inode);
path = ext4_find_extent(inode, offset_lblk, NULL, 0);
if (IS_ERR(path)) {
up_write(&EXT4_I(inode)->i_data_sem);
goto out_stop;
}
depth = ext_depth(inode);
extent = path[depth].p_ext;
if (extent) {
ee_start_lblk = le32_to_cpu(extent->ee_block);
ee_len = ext4_ext_get_actual_len(extent);
/*
* If offset_lblk is not the starting block of extent, split
* the extent @offset_lblk
*/
if ((offset_lblk > ee_start_lblk) &&
(offset_lblk < (ee_start_lblk + ee_len))) {
if (ext4_ext_is_unwritten(extent))
split_flag = EXT4_EXT_MARK_UNWRIT1 |
EXT4_EXT_MARK_UNWRIT2;
ret = ext4_split_extent_at(handle, inode, &path,
offset_lblk, split_flag,
EXT4_EX_NOCACHE |
EXT4_GET_BLOCKS_PRE_IO |
EXT4_GET_BLOCKS_METADATA_NOFAIL);
}
ext4_ext_drop_refs(path);
kfree(path);
if (ret < 0) {
up_write(&EXT4_I(inode)->i_data_sem);
goto out_stop;
}
}
ret = ext4_es_remove_extent(inode, offset_lblk,
EXT_MAX_BLOCKS - offset_lblk);
if (ret) {
up_write(&EXT4_I(inode)->i_data_sem);
goto out_stop;
}
/*
* if offset_lblk lies in a hole which is at start of file, use
* ee_start_lblk to shift extents
*/
ret = ext4_ext_shift_extents(inode, handle,
ee_start_lblk > offset_lblk ? ee_start_lblk : offset_lblk,
len_lblk, SHIFT_RIGHT);
up_write(&EXT4_I(inode)->i_data_sem);
if (IS_SYNC(inode))
ext4_handle_sync(handle);
out_stop:
ext4_journal_stop(handle);
out_dio:
ext4_inode_resume_unlocked_dio(inode);
out_mutex:
mutex_unlock(&inode->i_mutex);
return ret;
}
/** /**
* ext4_swap_extents - Swap extents between two inodes * ext4_swap_extents - Swap extents between two inodes
* *
......
...@@ -2478,6 +2478,31 @@ TRACE_EVENT(ext4_collapse_range, ...@@ -2478,6 +2478,31 @@ TRACE_EVENT(ext4_collapse_range,
__entry->offset, __entry->len) __entry->offset, __entry->len)
); );
TRACE_EVENT(ext4_insert_range,
TP_PROTO(struct inode *inode, loff_t offset, loff_t len),
TP_ARGS(inode, offset, len),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(ino_t, ino)
__field(loff_t, offset)
__field(loff_t, len)
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->offset = offset;
__entry->len = len;
),
TP_printk("dev %d,%d ino %lu offset %lld len %lld",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
__entry->offset, __entry->len)
);
TRACE_EVENT(ext4_es_shrink, TRACE_EVENT(ext4_es_shrink,
TP_PROTO(struct super_block *sb, int nr_shrunk, u64 scan_time, TP_PROTO(struct super_block *sb, int nr_shrunk, u64 scan_time,
int nr_skipped, int retried), int nr_skipped, int retried),
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment