Commit 9ac03675 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'ext4_for_linus_stable' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 fixes from Ted Ts'o:
 "These are regression and bug fixes for ext4.

  We had a number of new features in ext4 during this merge window
  (ZERO_RANGE and COLLAPSE_RANGE fallocate modes, renameat, etc.) so
  there were many more regression and bug fixes this time around.  It
  didn't help that xfstests hadn't been fully updated to fully stress
  test COLLAPSE_RANGE until after -rc1"

* tag 'ext4_for_linus_stable' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (31 commits)
  ext4: disable COLLAPSE_RANGE for bigalloc
  ext4: fix COLLAPSE_RANGE failure with 1KB block size
  ext4: use EINVAL if not a regular file in ext4_collapse_range()
  ext4: enforce we are operating on a regular file in ext4_zero_range()
  ext4: fix extent merging in ext4_ext_shift_path_extents()
  ext4: discard preallocations after removing space
  ext4: no need to truncate pagecache twice in collapse range
  ext4: fix removing status extents in ext4_collapse_range()
  ext4: use filemap_write_and_wait_range() correctly in collapse range
  ext4: use truncate_pagecache() in collapse range
  ext4: remove temporary shim used to merge COLLAPSE_RANGE and ZERO_RANGE
  ext4: fix ext4_count_free_clusters() with EXT4FS_DEBUG and bigalloc enabled
  ext4: always check ext4_ext_find_extent result
  ext4: fix error handling in ext4_ext_shift_extents
  ext4: silence sparse check warning for function ext4_trim_extent
  ext4: COLLAPSE_RANGE only works on extent-based files
  ext4: fix byte order problems introduced by the COLLAPSE_RANGE patches
  ext4: use i_size_read in ext4_unaligned_aio()
  fs: disallow all fallocate operation on active swapfile
  fs: move falloc collapse range check into the filesystem methods
  ...
parents a798c10f 0a04b248
...@@ -1221,9 +1221,6 @@ static long ceph_fallocate(struct file *file, int mode, ...@@ -1221,9 +1221,6 @@ static long ceph_fallocate(struct file *file, int mode,
if (!S_ISREG(inode->i_mode)) if (!S_ISREG(inode->i_mode))
return -EOPNOTSUPP; return -EOPNOTSUPP;
if (IS_SWAPFILE(inode))
return -ETXTBSY;
mutex_lock(&inode->i_mutex); mutex_lock(&inode->i_mutex);
if (ceph_snap(inode) != CEPH_NOSNAP) { if (ceph_snap(inode) != CEPH_NOSNAP) {
......
...@@ -667,7 +667,7 @@ ext4_fsblk_t ext4_count_free_clusters(struct super_block *sb) ...@@ -667,7 +667,7 @@ ext4_fsblk_t ext4_count_free_clusters(struct super_block *sb)
continue; continue;
x = ext4_count_free(bitmap_bh->b_data, x = ext4_count_free(bitmap_bh->b_data,
EXT4_BLOCKS_PER_GROUP(sb) / 8); EXT4_CLUSTERS_PER_GROUP(sb) / 8);
printk(KERN_DEBUG "group %u: stored = %d, counted = %u\n", printk(KERN_DEBUG "group %u: stored = %d, counted = %u\n",
i, ext4_free_group_clusters(sb, gdp), x); i, ext4_free_group_clusters(sb, gdp), x);
bitmap_count += x; bitmap_count += x;
......
...@@ -2466,23 +2466,6 @@ static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize) ...@@ -2466,23 +2466,6 @@ static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
up_write(&EXT4_I(inode)->i_data_sem); up_write(&EXT4_I(inode)->i_data_sem);
} }
/*
* Update i_disksize after writeback has been started. Races with truncate
* are avoided by checking i_size under i_data_sem.
*/
static inline void ext4_wb_update_i_disksize(struct inode *inode, loff_t newsize)
{
loff_t i_size;
down_write(&EXT4_I(inode)->i_data_sem);
i_size = i_size_read(inode);
if (newsize > i_size)
newsize = i_size;
if (newsize > EXT4_I(inode)->i_disksize)
EXT4_I(inode)->i_disksize = newsize;
up_write(&EXT4_I(inode)->i_data_sem);
}
struct ext4_group_info { struct ext4_group_info {
unsigned long bb_state; unsigned long bb_state;
struct rb_root bb_free_root; struct rb_root bb_free_root;
......
...@@ -3313,6 +3313,11 @@ static int ext4_split_extent(handle_t *handle, ...@@ -3313,6 +3313,11 @@ static int ext4_split_extent(handle_t *handle,
return PTR_ERR(path); return PTR_ERR(path);
depth = ext_depth(inode); depth = ext_depth(inode);
ex = path[depth].p_ext; ex = path[depth].p_ext;
if (!ex) {
EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
(unsigned long) map->m_lblk);
return -EIO;
}
uninitialized = ext4_ext_is_uninitialized(ex); uninitialized = ext4_ext_is_uninitialized(ex);
split_flag1 = 0; split_flag1 = 0;
...@@ -3694,6 +3699,12 @@ static int ext4_convert_initialized_extents(handle_t *handle, ...@@ -3694,6 +3699,12 @@ static int ext4_convert_initialized_extents(handle_t *handle,
} }
depth = ext_depth(inode); depth = ext_depth(inode);
ex = path[depth].p_ext; ex = path[depth].p_ext;
if (!ex) {
EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
(unsigned long) map->m_lblk);
err = -EIO;
goto out;
}
} }
err = ext4_ext_get_access(handle, inode, path + depth); err = ext4_ext_get_access(handle, inode, path + depth);
...@@ -4730,6 +4741,9 @@ static long ext4_zero_range(struct file *file, loff_t offset, ...@@ -4730,6 +4741,9 @@ static long ext4_zero_range(struct file *file, loff_t offset,
trace_ext4_zero_range(inode, offset, len, mode); trace_ext4_zero_range(inode, offset, len, mode);
if (!S_ISREG(inode->i_mode))
return -EINVAL;
/* /*
* Write out all dirty pages to avoid race conditions * Write out all dirty pages to avoid race conditions
* Then release them. * Then release them.
...@@ -4878,9 +4892,6 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) ...@@ -4878,9 +4892,6 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
if (mode & FALLOC_FL_PUNCH_HOLE) if (mode & FALLOC_FL_PUNCH_HOLE)
return ext4_punch_hole(inode, offset, len); return ext4_punch_hole(inode, offset, len);
if (mode & FALLOC_FL_COLLAPSE_RANGE)
return ext4_collapse_range(inode, offset, len);
ret = ext4_convert_inline_data(inode); ret = ext4_convert_inline_data(inode);
if (ret) if (ret)
return ret; return ret;
...@@ -4892,6 +4903,9 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) ...@@ -4892,6 +4903,9 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
return -EOPNOTSUPP; return -EOPNOTSUPP;
if (mode & FALLOC_FL_COLLAPSE_RANGE)
return ext4_collapse_range(inode, offset, len);
if (mode & FALLOC_FL_ZERO_RANGE) if (mode & FALLOC_FL_ZERO_RANGE)
return ext4_zero_range(file, offset, len, mode); return ext4_zero_range(file, offset, len, mode);
...@@ -5229,17 +5243,18 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift, ...@@ -5229,17 +5243,18 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr)) if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr))
update = 1; update = 1;
*start = ex_last->ee_block + *start = le32_to_cpu(ex_last->ee_block) +
ext4_ext_get_actual_len(ex_last); ext4_ext_get_actual_len(ex_last);
while (ex_start <= ex_last) { while (ex_start <= ex_last) {
ex_start->ee_block -= shift; le32_add_cpu(&ex_start->ee_block, -shift);
if (ex_start > /* Try to merge to the left. */
EXT_FIRST_EXTENT(path[depth].p_hdr)) { if ((ex_start >
if (ext4_ext_try_to_merge_right(inode, EXT_FIRST_EXTENT(path[depth].p_hdr)) &&
ext4_ext_try_to_merge_right(inode,
path, ex_start - 1)) path, ex_start - 1))
ex_last--; ex_last--;
} else
ex_start++; ex_start++;
} }
err = ext4_ext_dirty(handle, inode, path + depth); err = ext4_ext_dirty(handle, inode, path + depth);
...@@ -5255,7 +5270,7 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift, ...@@ -5255,7 +5270,7 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
if (err) if (err)
goto out; goto out;
path[depth].p_idx->ei_block -= shift; le32_add_cpu(&path[depth].p_idx->ei_block, -shift);
err = ext4_ext_dirty(handle, inode, path + depth); err = ext4_ext_dirty(handle, inode, path + depth);
if (err) if (err)
goto out; goto out;
...@@ -5300,7 +5315,8 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, ...@@ -5300,7 +5315,8 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
return ret; return ret;
} }
stop_block = extent->ee_block + ext4_ext_get_actual_len(extent); stop_block = le32_to_cpu(extent->ee_block) +
ext4_ext_get_actual_len(extent);
ext4_ext_drop_refs(path); ext4_ext_drop_refs(path);
kfree(path); kfree(path);
...@@ -5313,10 +5329,18 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, ...@@ -5313,10 +5329,18 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
* enough to accomodate the shift. * enough to accomodate the shift.
*/ */
path = ext4_ext_find_extent(inode, start - 1, NULL, 0); path = ext4_ext_find_extent(inode, start - 1, NULL, 0);
if (IS_ERR(path))
return PTR_ERR(path);
depth = path->p_depth; depth = path->p_depth;
extent = path[depth].p_ext; extent = path[depth].p_ext;
ex_start = extent->ee_block; if (extent) {
ex_end = extent->ee_block + ext4_ext_get_actual_len(extent); ex_start = le32_to_cpu(extent->ee_block);
ex_end = le32_to_cpu(extent->ee_block) +
ext4_ext_get_actual_len(extent);
} else {
ex_start = 0;
ex_end = 0;
}
ext4_ext_drop_refs(path); ext4_ext_drop_refs(path);
kfree(path); kfree(path);
...@@ -5331,7 +5355,13 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, ...@@ -5331,7 +5355,13 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
return PTR_ERR(path); return PTR_ERR(path);
depth = path->p_depth; depth = path->p_depth;
extent = path[depth].p_ext; extent = path[depth].p_ext;
current_block = extent->ee_block; if (!extent) {
EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
(unsigned long) start);
return -EIO;
}
current_block = le32_to_cpu(extent->ee_block);
if (start > current_block) { if (start > current_block) {
/* Hole, move to the next extent */ /* Hole, move to the next extent */
ret = mext_next_extent(inode, path, &extent); ret = mext_next_extent(inode, path, &extent);
...@@ -5365,17 +5395,18 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) ...@@ -5365,17 +5395,18 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
ext4_lblk_t punch_start, punch_stop; ext4_lblk_t punch_start, punch_stop;
handle_t *handle; handle_t *handle;
unsigned int credits; unsigned int credits;
loff_t new_size; loff_t new_size, ioffset;
int ret; int ret;
BUG_ON(offset + len > i_size_read(inode));
/* Collapse range works only on fs block size aligned offsets. */ /* Collapse range works only on fs block size aligned offsets. */
if (offset & (EXT4_BLOCK_SIZE(sb) - 1) || if (offset & (EXT4_BLOCK_SIZE(sb) - 1) ||
len & (EXT4_BLOCK_SIZE(sb) - 1)) len & (EXT4_BLOCK_SIZE(sb) - 1))
return -EINVAL; return -EINVAL;
if (!S_ISREG(inode->i_mode)) if (!S_ISREG(inode->i_mode))
return -EINVAL;
if (EXT4_SB(inode->i_sb)->s_cluster_ratio > 1)
return -EOPNOTSUPP; return -EOPNOTSUPP;
trace_ext4_collapse_range(inode, offset, len); trace_ext4_collapse_range(inode, offset, len);
...@@ -5383,22 +5414,34 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) ...@@ -5383,22 +5414,34 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
punch_start = offset >> EXT4_BLOCK_SIZE_BITS(sb); punch_start = offset >> EXT4_BLOCK_SIZE_BITS(sb);
punch_stop = (offset + len) >> EXT4_BLOCK_SIZE_BITS(sb); punch_stop = (offset + len) >> EXT4_BLOCK_SIZE_BITS(sb);
/* Call ext4_force_commit to flush all data in case of data=journal. */
if (ext4_should_journal_data(inode)) {
ret = ext4_force_commit(inode->i_sb);
if (ret)
return ret;
}
/*
* Need to round down offset to be aligned with page size boundary
* for page size > block size.
*/
ioffset = round_down(offset, PAGE_SIZE);
/* Write out all dirty pages */ /* Write out all dirty pages */
ret = filemap_write_and_wait_range(inode->i_mapping, offset, -1); ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
LLONG_MAX);
if (ret) if (ret)
return ret; return ret;
/* Take mutex lock */ /* Take mutex lock */
mutex_lock(&inode->i_mutex); mutex_lock(&inode->i_mutex);
/* It's not possible punch hole on append only file */ /*
if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) { * There is no need to overlap collapse range with EOF, in which case
ret = -EPERM; * it is effectively a truncate operation
goto out_mutex; */
} if (offset + len >= i_size_read(inode)) {
ret = -EINVAL;
if (IS_SWAPFILE(inode)) {
ret = -ETXTBSY;
goto out_mutex; goto out_mutex;
} }
...@@ -5408,7 +5451,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) ...@@ -5408,7 +5451,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
goto out_mutex; goto out_mutex;
} }
truncate_pagecache_range(inode, offset, -1); truncate_pagecache(inode, ioffset);
/* Wait for existing dio to complete */ /* Wait for existing dio to complete */
ext4_inode_block_unlocked_dio(inode); ext4_inode_block_unlocked_dio(inode);
...@@ -5425,7 +5468,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) ...@@ -5425,7 +5468,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
ext4_discard_preallocations(inode); ext4_discard_preallocations(inode);
ret = ext4_es_remove_extent(inode, punch_start, ret = ext4_es_remove_extent(inode, punch_start,
EXT_MAX_BLOCKS - punch_start - 1); EXT_MAX_BLOCKS - punch_start);
if (ret) { if (ret) {
up_write(&EXT4_I(inode)->i_data_sem); up_write(&EXT4_I(inode)->i_data_sem);
goto out_stop; goto out_stop;
...@@ -5436,6 +5479,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) ...@@ -5436,6 +5479,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
up_write(&EXT4_I(inode)->i_data_sem); up_write(&EXT4_I(inode)->i_data_sem);
goto out_stop; goto out_stop;
} }
ext4_discard_preallocations(inode);
ret = ext4_ext_shift_extents(inode, handle, punch_stop, ret = ext4_ext_shift_extents(inode, handle, punch_stop,
punch_stop - punch_start); punch_stop - punch_start);
...@@ -5445,10 +5489,9 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) ...@@ -5445,10 +5489,9 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
} }
new_size = i_size_read(inode) - len; new_size = i_size_read(inode) - len;
truncate_setsize(inode, new_size); i_size_write(inode, new_size);
EXT4_I(inode)->i_disksize = new_size; EXT4_I(inode)->i_disksize = new_size;
ext4_discard_preallocations(inode);
up_write(&EXT4_I(inode)->i_data_sem); up_write(&EXT4_I(inode)->i_data_sem);
if (IS_SYNC(inode)) if (IS_SYNC(inode))
ext4_handle_sync(handle); ext4_handle_sync(handle);
......
...@@ -810,7 +810,7 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk, ...@@ -810,7 +810,7 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
newes.es_lblk = end + 1; newes.es_lblk = end + 1;
newes.es_len = len2; newes.es_len = len2;
block = 0x7FDEADBEEF; block = 0x7FDEADBEEFULL;
if (ext4_es_is_written(&orig_es) || if (ext4_es_is_written(&orig_es) ||
ext4_es_is_unwritten(&orig_es)) ext4_es_is_unwritten(&orig_es))
block = ext4_es_pblock(&orig_es) + block = ext4_es_pblock(&orig_es) +
......
...@@ -82,7 +82,7 @@ ext4_unaligned_aio(struct inode *inode, const struct iovec *iov, ...@@ -82,7 +82,7 @@ ext4_unaligned_aio(struct inode *inode, const struct iovec *iov,
size_t count = iov_length(iov, nr_segs); size_t count = iov_length(iov, nr_segs);
loff_t final_size = pos + count; loff_t final_size = pos + count;
if (pos >= inode->i_size) if (pos >= i_size_read(inode))
return 0; return 0;
if ((pos & blockmask) || (final_size & blockmask)) if ((pos & blockmask) || (final_size & blockmask))
......
...@@ -522,6 +522,10 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, ...@@ -522,6 +522,10 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
if (unlikely(map->m_len > INT_MAX)) if (unlikely(map->m_len > INT_MAX))
map->m_len = INT_MAX; map->m_len = INT_MAX;
/* We can handle the block number less than EXT_MAX_BLOCKS */
if (unlikely(map->m_lblk >= EXT_MAX_BLOCKS))
return -EIO;
/* Lookup extent status tree firstly */ /* Lookup extent status tree firstly */
if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) { if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
ext4_es_lru_add(inode); ext4_es_lru_add(inode);
...@@ -2243,13 +2247,23 @@ static int mpage_map_and_submit_extent(handle_t *handle, ...@@ -2243,13 +2247,23 @@ static int mpage_map_and_submit_extent(handle_t *handle,
return err; return err;
} while (map->m_len); } while (map->m_len);
/* Update on-disk size after IO is submitted */ /*
* Update on-disk size after IO is submitted. Races with
* truncate are avoided by checking i_size under i_data_sem.
*/
disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT; disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT;
if (disksize > EXT4_I(inode)->i_disksize) { if (disksize > EXT4_I(inode)->i_disksize) {
int err2; int err2;
loff_t i_size;
ext4_wb_update_i_disksize(inode, disksize); down_write(&EXT4_I(inode)->i_data_sem);
i_size = i_size_read(inode);
if (disksize > i_size)
disksize = i_size;
if (disksize > EXT4_I(inode)->i_disksize)
EXT4_I(inode)->i_disksize = disksize;
err2 = ext4_mark_inode_dirty(handle, inode); err2 = ext4_mark_inode_dirty(handle, inode);
up_write(&EXT4_I(inode)->i_data_sem);
if (err2) if (err2)
ext4_error(inode->i_sb, ext4_error(inode->i_sb,
"Failed to mark inode %lu dirty", "Failed to mark inode %lu dirty",
...@@ -3527,15 +3541,6 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) ...@@ -3527,15 +3541,6 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
} }
mutex_lock(&inode->i_mutex); mutex_lock(&inode->i_mutex);
/* It's not possible punch hole on append only file */
if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
ret = -EPERM;
goto out_mutex;
}
if (IS_SWAPFILE(inode)) {
ret = -ETXTBSY;
goto out_mutex;
}
/* No need to punch hole beyond i_size */ /* No need to punch hole beyond i_size */
if (offset >= inode->i_size) if (offset >= inode->i_size)
...@@ -3616,7 +3621,6 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) ...@@ -3616,7 +3621,6 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
ret = ext4_free_hole_blocks(handle, inode, first_block, ret = ext4_free_hole_blocks(handle, inode, first_block,
stop_block); stop_block);
ext4_discard_preallocations(inode);
up_write(&EXT4_I(inode)->i_data_sem); up_write(&EXT4_I(inode)->i_data_sem);
if (IS_SYNC(inode)) if (IS_SYNC(inode))
ext4_handle_sync(handle); ext4_handle_sync(handle);
...@@ -4423,21 +4427,20 @@ static int ext4_do_update_inode(handle_t *handle, ...@@ -4423,21 +4427,20 @@ static int ext4_do_update_inode(handle_t *handle,
* *
* We are called from a few places: * We are called from a few places:
* *
* - Within generic_file_write() for O_SYNC files. * - Within generic_file_aio_write() -> generic_write_sync() for O_SYNC files.
* Here, there will be no transaction running. We wait for any running * Here, there will be no transaction running. We wait for any running
* transaction to commit. * transaction to commit.
* *
* - Within sys_sync(), kupdate and such. * - Within flush work (sys_sync(), kupdate and such).
* We wait on commit, if tol to. * We wait on commit, if told to.
* *
* - Within prune_icache() (PF_MEMALLOC == true) * - Within iput_final() -> write_inode_now()
* Here we simply return. We can't afford to block kswapd on the * We wait on commit, if told to.
* journal commit.
* *
* In all cases it is actually safe for us to return without doing anything, * In all cases it is actually safe for us to return without doing anything,
* because the inode has been copied into a raw inode buffer in * because the inode has been copied into a raw inode buffer in
* ext4_mark_inode_dirty(). This is a correctness thing for O_SYNC and for * ext4_mark_inode_dirty(). This is a correctness thing for WB_SYNC_ALL
* knfsd. * writeback.
* *
* Note that we are absolutely dependent upon all inode dirtiers doing the * Note that we are absolutely dependent upon all inode dirtiers doing the
* right thing: they *must* call mark_inode_dirty() after dirtying info in * right thing: they *must* call mark_inode_dirty() after dirtying info in
...@@ -4449,15 +4452,15 @@ static int ext4_do_update_inode(handle_t *handle, ...@@ -4449,15 +4452,15 @@ static int ext4_do_update_inode(handle_t *handle,
* stuff(); * stuff();
* inode->i_size = expr; * inode->i_size = expr;
* *
* is in error because a kswapd-driven write_inode() could occur while * is in error because write_inode() could occur while `stuff()' is running,
* `stuff()' is running, and the new i_size will be lost. Plus the inode * and the new i_size will be lost. Plus the inode will no longer be on the
* will no longer be on the superblock's dirty inode list. * superblock's dirty inode list.
*/ */
int ext4_write_inode(struct inode *inode, struct writeback_control *wbc) int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
{ {
int err; int err;
if (current->flags & PF_MEMALLOC) if (WARN_ON_ONCE(current->flags & PF_MEMALLOC))
return 0; return 0;
if (EXT4_SB(inode->i_sb)->s_journal) { if (EXT4_SB(inode->i_sb)->s_journal) {
......
...@@ -989,7 +989,7 @@ static int ext4_mb_get_buddy_page_lock(struct super_block *sb, ...@@ -989,7 +989,7 @@ static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
poff = block % blocks_per_page; poff = block % blocks_per_page;
page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
if (!page) if (!page)
return -EIO; return -ENOMEM;
BUG_ON(page->mapping != inode->i_mapping); BUG_ON(page->mapping != inode->i_mapping);
e4b->bd_bitmap_page = page; e4b->bd_bitmap_page = page;
e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize); e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
...@@ -1003,7 +1003,7 @@ static int ext4_mb_get_buddy_page_lock(struct super_block *sb, ...@@ -1003,7 +1003,7 @@ static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
pnum = block / blocks_per_page; pnum = block / blocks_per_page;
page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
if (!page) if (!page)
return -EIO; return -ENOMEM;
BUG_ON(page->mapping != inode->i_mapping); BUG_ON(page->mapping != inode->i_mapping);
e4b->bd_buddy_page = page; e4b->bd_buddy_page = page;
return 0; return 0;
...@@ -1168,7 +1168,11 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, ...@@ -1168,7 +1168,11 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
unlock_page(page); unlock_page(page);
} }
} }
if (page == NULL || !PageUptodate(page)) { if (page == NULL) {
ret = -ENOMEM;
goto err;
}
if (!PageUptodate(page)) {
ret = -EIO; ret = -EIO;
goto err; goto err;
} }
...@@ -1197,7 +1201,11 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, ...@@ -1197,7 +1201,11 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
unlock_page(page); unlock_page(page);
} }
} }
if (page == NULL || !PageUptodate(page)) { if (page == NULL) {
ret = -ENOMEM;
goto err;
}
if (!PageUptodate(page)) {
ret = -EIO; ret = -EIO;
goto err; goto err;
} }
...@@ -5008,6 +5016,8 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, ...@@ -5008,6 +5016,8 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
*/ */
static int ext4_trim_extent(struct super_block *sb, int start, int count, static int ext4_trim_extent(struct super_block *sb, int start, int count,
ext4_group_t group, struct ext4_buddy *e4b) ext4_group_t group, struct ext4_buddy *e4b)
__releases(bitlock)
__acquires(bitlock)
{ {
struct ext4_free_extent ex; struct ext4_free_extent ex;
int ret = 0; int ret = 0;
......
...@@ -308,13 +308,14 @@ static void ext4_end_bio(struct bio *bio, int error) ...@@ -308,13 +308,14 @@ static void ext4_end_bio(struct bio *bio, int error)
if (error) { if (error) {
struct inode *inode = io_end->inode; struct inode *inode = io_end->inode;
ext4_warning(inode->i_sb, "I/O error writing to inode %lu " ext4_warning(inode->i_sb, "I/O error %d writing to inode %lu "
"(offset %llu size %ld starting block %llu)", "(offset %llu size %ld starting block %llu)",
inode->i_ino, error, inode->i_ino,
(unsigned long long) io_end->offset, (unsigned long long) io_end->offset,
(long) io_end->size, (long) io_end->size,
(unsigned long long) (unsigned long long)
bi_sector >> (inode->i_blkbits - 9)); bi_sector >> (inode->i_blkbits - 9));
mapping_set_error(inode->i_mapping, error);
} }
if (io_end->flag & EXT4_IO_END_UNWRITTEN) { if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
......
...@@ -3869,19 +3869,38 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ...@@ -3869,19 +3869,38 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
goto failed_mount2; goto failed_mount2;
} }
} }
/*
* set up enough so that it can read an inode,
* and create new inode for buddy allocator
*/
sbi->s_gdb_count = db_count;
if (!test_opt(sb, NOLOAD) &&
EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL))
sb->s_op = &ext4_sops;
else
sb->s_op = &ext4_nojournal_sops;
ext4_ext_init(sb);
err = ext4_mb_init(sb);
if (err) {
ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
err);
goto failed_mount2;
}
if (!ext4_check_descriptors(sb, &first_not_zeroed)) { if (!ext4_check_descriptors(sb, &first_not_zeroed)) {
ext4_msg(sb, KERN_ERR, "group descriptors corrupted!"); ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
goto failed_mount2; goto failed_mount2a;
} }
if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
if (!ext4_fill_flex_info(sb)) { if (!ext4_fill_flex_info(sb)) {
ext4_msg(sb, KERN_ERR, ext4_msg(sb, KERN_ERR,
"unable to initialize " "unable to initialize "
"flex_bg meta info!"); "flex_bg meta info!");
goto failed_mount2; goto failed_mount2a;
} }
sbi->s_gdb_count = db_count;
get_random_bytes(&sbi->s_next_generation, sizeof(u32)); get_random_bytes(&sbi->s_next_generation, sizeof(u32));
spin_lock_init(&sbi->s_next_gen_lock); spin_lock_init(&sbi->s_next_gen_lock);
...@@ -3916,14 +3935,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ...@@ -3916,14 +3935,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_stripe = ext4_get_stripe_size(sbi); sbi->s_stripe = ext4_get_stripe_size(sbi);
sbi->s_extent_max_zeroout_kb = 32; sbi->s_extent_max_zeroout_kb = 32;
/*
* set up enough so that it can read an inode
*/
if (!test_opt(sb, NOLOAD) &&
EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL))
sb->s_op = &ext4_sops;
else
sb->s_op = &ext4_nojournal_sops;
sb->s_export_op = &ext4_export_ops; sb->s_export_op = &ext4_export_ops;
sb->s_xattr = ext4_xattr_handlers; sb->s_xattr = ext4_xattr_handlers;
#ifdef CONFIG_QUOTA #ifdef CONFIG_QUOTA
...@@ -4113,21 +4124,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ...@@ -4113,21 +4124,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
if (err) { if (err) {
ext4_msg(sb, KERN_ERR, "failed to reserve %llu clusters for " ext4_msg(sb, KERN_ERR, "failed to reserve %llu clusters for "
"reserved pool", ext4_calculate_resv_clusters(sb)); "reserved pool", ext4_calculate_resv_clusters(sb));
goto failed_mount4a; goto failed_mount5;
} }
err = ext4_setup_system_zone(sb); err = ext4_setup_system_zone(sb);
if (err) { if (err) {
ext4_msg(sb, KERN_ERR, "failed to initialize system " ext4_msg(sb, KERN_ERR, "failed to initialize system "
"zone (%d)", err); "zone (%d)", err);
goto failed_mount4a;
}
ext4_ext_init(sb);
err = ext4_mb_init(sb);
if (err) {
ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
err);
goto failed_mount5; goto failed_mount5;
} }
...@@ -4204,11 +4207,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ...@@ -4204,11 +4207,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
failed_mount7: failed_mount7:
ext4_unregister_li_request(sb); ext4_unregister_li_request(sb);
failed_mount6: failed_mount6:
ext4_mb_release(sb);
failed_mount5:
ext4_ext_release(sb);
ext4_release_system_zone(sb); ext4_release_system_zone(sb);
failed_mount4a: failed_mount5:
dput(sb->s_root); dput(sb->s_root);
sb->s_root = NULL; sb->s_root = NULL;
failed_mount4: failed_mount4:
...@@ -4232,11 +4232,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ...@@ -4232,11 +4232,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
percpu_counter_destroy(&sbi->s_extent_cache_cnt); percpu_counter_destroy(&sbi->s_extent_cache_cnt);
if (sbi->s_mmp_tsk) if (sbi->s_mmp_tsk)
kthread_stop(sbi->s_mmp_tsk); kthread_stop(sbi->s_mmp_tsk);
failed_mount2a:
ext4_mb_release(sb);
failed_mount2: failed_mount2:
for (i = 0; i < db_count; i++) for (i = 0; i < db_count; i++)
brelse(sbi->s_group_desc[i]); brelse(sbi->s_group_desc[i]);
ext4_kvfree(sbi->s_group_desc); ext4_kvfree(sbi->s_group_desc);
failed_mount: failed_mount:
ext4_ext_release(sb);
if (sbi->s_chksum_driver) if (sbi->s_chksum_driver)
crypto_free_shash(sbi->s_chksum_driver); crypto_free_shash(sbi->s_chksum_driver);
if (sbi->s_proc) { if (sbi->s_proc) {
......
...@@ -520,8 +520,8 @@ static void ext4_xattr_update_super_block(handle_t *handle, ...@@ -520,8 +520,8 @@ static void ext4_xattr_update_super_block(handle_t *handle,
} }
/* /*
* Release the xattr block BH: If the reference count is > 1, decrement * Release the xattr block BH: If the reference count is > 1, decrement it;
* it; otherwise free the block. * otherwise free the block.
*/ */
static void static void
ext4_xattr_release_block(handle_t *handle, struct inode *inode, ext4_xattr_release_block(handle_t *handle, struct inode *inode,
...@@ -542,16 +542,31 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode, ...@@ -542,16 +542,31 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
if (ce) if (ce)
mb_cache_entry_free(ce); mb_cache_entry_free(ce);
get_bh(bh); get_bh(bh);
unlock_buffer(bh);
ext4_free_blocks(handle, inode, bh, 0, 1, ext4_free_blocks(handle, inode, bh, 0, 1,
EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_METADATA |
EXT4_FREE_BLOCKS_FORGET); EXT4_FREE_BLOCKS_FORGET);
unlock_buffer(bh);
} else { } else {
le32_add_cpu(&BHDR(bh)->h_refcount, -1); le32_add_cpu(&BHDR(bh)->h_refcount, -1);
if (ce) if (ce)
mb_cache_entry_release(ce); mb_cache_entry_release(ce);
/*
* Beware of this ugliness: Releasing of xattr block references
* from different inodes can race and so we have to protect
* from a race where someone else frees the block (and releases
* its journal_head) before we are done dirtying the buffer. In
* nojournal mode this race is harmless and we actually cannot
* call ext4_handle_dirty_xattr_block() with locked buffer as
* that function can call sync_dirty_buffer() so for that case
* we handle the dirtying after unlocking the buffer.
*/
if (ext4_handle_valid(handle))
error = ext4_handle_dirty_xattr_block(handle, inode,
bh);
unlock_buffer(bh); unlock_buffer(bh);
error = ext4_handle_dirty_xattr_block(handle, inode, bh); if (!ext4_handle_valid(handle))
error = ext4_handle_dirty_xattr_block(handle, inode,
bh);
if (IS_SYNC(inode)) if (IS_SYNC(inode))
ext4_handle_sync(handle); ext4_handle_sync(handle);
dquot_free_block(inode, EXT4_C2B(EXT4_SB(inode->i_sb), 1)); dquot_free_block(inode, EXT4_C2B(EXT4_SB(inode->i_sb), 1));
......
...@@ -254,16 +254,21 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len) ...@@ -254,16 +254,21 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
return -EBADF; return -EBADF;
/* /*
* It's not possible to punch hole or perform collapse range * We can only allow pure fallocate on append only files
* on append only file
*/ */
if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE) if ((mode & ~FALLOC_FL_KEEP_SIZE) && IS_APPEND(inode))
&& IS_APPEND(inode))
return -EPERM; return -EPERM;
if (IS_IMMUTABLE(inode)) if (IS_IMMUTABLE(inode))
return -EPERM; return -EPERM;
/*
* We can not allow to do any fallocate operation on an active
* swapfile
*/
if (IS_SWAPFILE(inode))
ret = -ETXTBSY;
/* /*
* Revalidate the write permissions, in case security policy has * Revalidate the write permissions, in case security policy has
* changed since the files were opened. * changed since the files were opened.
...@@ -286,14 +291,6 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len) ...@@ -286,14 +291,6 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0)) if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0))
return -EFBIG; return -EFBIG;
/*
* There is no need to overlap collapse range with EOF, in which case
* it is effectively a truncate operation
*/
if ((mode & FALLOC_FL_COLLAPSE_RANGE) &&
(offset + len >= i_size_read(inode)))
return -EINVAL;
if (!file->f_op->fallocate) if (!file->f_op->fallocate)
return -EOPNOTSUPP; return -EOPNOTSUPP;
......
...@@ -841,7 +841,15 @@ xfs_file_fallocate( ...@@ -841,7 +841,15 @@ xfs_file_fallocate(
goto out_unlock; goto out_unlock;
} }
ASSERT(offset + len < i_size_read(inode)); /*
* There is no need to overlap collapse range with EOF,
* in which case it is effectively a truncate operation
*/
if (offset + len >= i_size_read(inode)) {
error = -EINVAL;
goto out_unlock;
}
new_size = i_size_read(inode) - len; new_size = i_size_read(inode) - len;
error = xfs_collapse_file_space(ip, offset, len); error = xfs_collapse_file_space(ip, offset, len);
......
...@@ -16,15 +16,6 @@ struct mpage_da_data; ...@@ -16,15 +16,6 @@ struct mpage_da_data;
struct ext4_map_blocks; struct ext4_map_blocks;
struct extent_status; struct extent_status;
/* shim until we merge in the xfs_collapse_range branch */
#ifndef FALLOC_FL_COLLAPSE_RANGE
#define FALLOC_FL_COLLAPSE_RANGE 0x08
#endif
#ifndef FALLOC_FL_ZERO_RANGE
#define FALLOC_FL_ZERO_RANGE 0x10
#endif
#define EXT4_I(inode) (container_of(inode, struct ext4_inode_info, vfs_inode)) #define EXT4_I(inode) (container_of(inode, struct ext4_inode_info, vfs_inode))
#define show_mballoc_flags(flags) __print_flags(flags, "|", \ #define show_mballoc_flags(flags) __print_flags(flags, "|", \
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment