Commit 2e756758 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 updates from Ted Ts'o:
 "Many bug fixes and cleanups, and an optimization for case-insensitive
  lookups"

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
  ext4: fix coverity warning on error path of filename setup
  ext4: replace ktype default_attrs with default_groups
  ext4: rename htree_inline_dir_to_tree() to ext4_inlinedir_to_tree()
  ext4: refactor initialize_dirent_tail()
  ext4: rename "dirent_csum" functions to use "dirblock"
  ext4: allow directory holes
  jbd2: drop declaration of journal_sync_buffer()
  ext4: use jbd2_inode dirty range scoping
  jbd2: introduce jbd2_inode dirty range scoping
  mm: add filemap_fdatawait_range_keep_errors()
  ext4: remove redundant assignment to node
  ext4: optimize case-insensitive lookups
  ext4: make __ext4_get_inode_loc plug
  ext4: clean up kerneldoc warnigns when building with W=1
  ext4: only set project inherit bit for directory
  ext4: enforce the immutable flag on open files
  ext4: don't allow any modifications to an immutable file
  jbd2: fix typo in comment of journal_submit_inode_data_buffers
  jbd2: fix some print format mistakes
  ext4: gracefully handle ext4_break_layouts() failure during truncate
parents 8dda9957 96fcaf86
......@@ -603,9 +603,9 @@ int ext4_claim_free_clusters(struct ext4_sb_info *sbi,
}
/**
* ext4_should_retry_alloc()
* ext4_should_retry_alloc() - check if a block allocation should be retried
* @sb: super block
* @retries number of attemps has been made
* @retries: number of attemps has been made
*
* ext4_should_retry_alloc() is called when ENOSPC is returned, and if
* it is profitable to retry the operation, this function will wait
......
......@@ -33,6 +33,9 @@
static int ext4_dx_readdir(struct file *, struct dir_context *);
/**
* is_dx_dir() - check if a directory is using htree indexing
* @inode: directory inode
*
* Check if the given dir-inode refers to an htree-indexed directory
* (or a directory which could potentially get converted to use htree
* indexing).
......@@ -109,7 +112,6 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
struct inode *inode = file_inode(file);
struct super_block *sb = inode->i_sb;
struct buffer_head *bh = NULL;
int dir_has_error = 0;
struct fscrypt_str fstr = FSTR_INIT(NULL, 0);
if (IS_ENCRYPTED(inode)) {
......@@ -145,8 +147,6 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
return err;
}
offset = ctx->pos & (sb->s_blocksize - 1);
while (ctx->pos < inode->i_size) {
struct ext4_map_blocks map;
......@@ -155,9 +155,18 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
goto errout;
}
cond_resched();
offset = ctx->pos & (sb->s_blocksize - 1);
map.m_lblk = ctx->pos >> EXT4_BLOCK_SIZE_BITS(sb);
map.m_len = 1;
err = ext4_map_blocks(NULL, inode, &map, 0);
if (err == 0) {
/* m_len should never be zero but let's avoid
* an infinite loop if it somehow is */
if (map.m_len == 0)
map.m_len = 1;
ctx->pos += map.m_len * sb->s_blocksize;
continue;
}
if (err > 0) {
pgoff_t index = map.m_pblk >>
(PAGE_SHIFT - inode->i_blkbits);
......@@ -176,13 +185,6 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
}
if (!bh) {
if (!dir_has_error) {
EXT4_ERROR_FILE(file, 0,
"directory contains a "
"hole at offset %llu",
(unsigned long long) ctx->pos);
dir_has_error = 1;
}
/* corrupt size? Maybe no more blocks to read */
if (ctx->pos > inode->i_blocks << 9)
break;
......@@ -192,8 +194,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
/* Check the checksum */
if (!buffer_verified(bh) &&
!ext4_dirent_csum_verify(inode,
(struct ext4_dir_entry *)bh->b_data)) {
!ext4_dirblock_csum_verify(inode, bh)) {
EXT4_ERROR_FILE(file, 0, "directory fails checksum "
"at offset %llu",
(unsigned long long)ctx->pos);
......@@ -674,7 +675,7 @@ static int ext4_d_compare(const struct dentry *dentry, unsigned int len,
return memcmp(str, name->name, len);
}
return ext4_ci_compare(dentry->d_parent->d_inode, name, &qstr);
return ext4_ci_compare(dentry->d_parent->d_inode, name, &qstr, false);
}
static int ext4_d_hash(const struct dentry *dentry, struct qstr *str)
......
......@@ -421,7 +421,8 @@ struct flex_groups {
EXT4_PROJINHERIT_FL | EXT4_CASEFOLD_FL)
/* Flags that are appropriate for regular files (all but dir-specific ones). */
#define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL | EXT4_CASEFOLD_FL))
#define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL | EXT4_CASEFOLD_FL |\
EXT4_PROJINHERIT_FL))
/* Flags that are appropriate for non-directories/regular files. */
#define EXT4_OTHER_FLMASK (EXT4_NODUMP_FL | EXT4_NOATIME_FL)
......@@ -2077,6 +2078,9 @@ struct ext4_filename {
#ifdef CONFIG_FS_ENCRYPTION
struct fscrypt_str crypto_buf;
#endif
#ifdef CONFIG_UNICODE
struct fscrypt_str cf_name;
#endif
};
#define fname_name(p) ((p)->disk_name.name)
......@@ -2302,6 +2306,12 @@ extern unsigned ext4_free_clusters_after_init(struct super_block *sb,
struct ext4_group_desc *gdp);
ext4_fsblk_t ext4_inode_to_goal_block(struct inode *);
#ifdef CONFIG_UNICODE
extern void ext4_fname_setup_ci_filename(struct inode *dir,
const struct qstr *iname,
struct fscrypt_str *fname);
#endif
#ifdef CONFIG_FS_ENCRYPTION
static inline void ext4_fname_from_fscrypt_name(struct ext4_filename *dst,
const struct fscrypt_name *src)
......@@ -2328,6 +2338,10 @@ static inline int ext4_fname_setup_filename(struct inode *dir,
return err;
ext4_fname_from_fscrypt_name(fname, &name);
#ifdef CONFIG_UNICODE
ext4_fname_setup_ci_filename(dir, iname, &fname->cf_name);
#endif
return 0;
}
......@@ -2343,6 +2357,10 @@ static inline int ext4_fname_prepare_lookup(struct inode *dir,
return err;
ext4_fname_from_fscrypt_name(fname, &name);
#ifdef CONFIG_UNICODE
ext4_fname_setup_ci_filename(dir, &dentry->d_name, &fname->cf_name);
#endif
return 0;
}
......@@ -2356,6 +2374,11 @@ static inline void ext4_fname_free_filename(struct ext4_filename *fname)
fname->crypto_buf.name = NULL;
fname->usr_fname = NULL;
fname->disk_name.name = NULL;
#ifdef CONFIG_UNICODE
kfree(fname->cf_name.name);
fname->cf_name.name = NULL;
#endif
}
#else /* !CONFIG_FS_ENCRYPTION */
static inline int ext4_fname_setup_filename(struct inode *dir,
......@@ -2366,6 +2389,11 @@ static inline int ext4_fname_setup_filename(struct inode *dir,
fname->usr_fname = iname;
fname->disk_name.name = (unsigned char *) iname->name;
fname->disk_name.len = iname->len;
#ifdef CONFIG_UNICODE
ext4_fname_setup_ci_filename(dir, iname, &fname->cf_name);
#endif
return 0;
}
......@@ -2376,7 +2404,13 @@ static inline int ext4_fname_prepare_lookup(struct inode *dir,
return ext4_fname_setup_filename(dir, &dentry->d_name, 1, fname);
}
static inline void ext4_fname_free_filename(struct ext4_filename *fname) { }
static inline void ext4_fname_free_filename(struct ext4_filename *fname)
{
#ifdef CONFIG_UNICODE
kfree(fname->cf_name.name);
fname->cf_name.name = NULL;
#endif
}
#endif /* !CONFIG_FS_ENCRYPTION */
/* dir.c */
......@@ -2568,8 +2602,8 @@ extern int ext4_ext_migrate(struct inode *);
extern int ext4_ind_migrate(struct inode *inode);
/* namei.c */
extern int ext4_dirent_csum_verify(struct inode *inode,
struct ext4_dir_entry *dirent);
extern int ext4_dirblock_csum_verify(struct inode *inode,
struct buffer_head *bh);
extern int ext4_orphan_add(handle_t *, struct inode *);
extern int ext4_orphan_del(handle_t *, struct inode *);
extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
......@@ -3070,11 +3104,11 @@ extern int ext4_try_create_inline_dir(handle_t *handle,
extern int ext4_read_inline_dir(struct file *filp,
struct dir_context *ctx,
int *has_inline_data);
extern int htree_inlinedir_to_tree(struct file *dir_file,
struct inode *dir, ext4_lblk_t block,
struct dx_hash_info *hinfo,
__u32 start_hash, __u32 start_minor_hash,
int *has_inline_data);
extern int ext4_inlinedir_to_tree(struct file *dir_file,
struct inode *dir, ext4_lblk_t block,
struct dx_hash_info *hinfo,
__u32 start_hash, __u32 start_minor_hash,
int *has_inline_data);
extern struct buffer_head *ext4_find_inline_entry(struct inode *dir,
struct ext4_filename *fname,
struct ext4_dir_entry_2 **res_dir,
......@@ -3113,14 +3147,13 @@ extern struct ext4_dir_entry_2 *ext4_init_dot_dotdot(struct inode *inode,
struct ext4_dir_entry_2 *de,
int blocksize, int csum_size,
unsigned int parent_ino, int dotdot_real_len);
extern void initialize_dirent_tail(struct ext4_dir_entry_tail *t,
unsigned int blocksize);
extern int ext4_handle_dirty_dirent_node(handle_t *handle,
struct inode *inode,
struct buffer_head *bh);
extern void ext4_initialize_dirent_tail(struct buffer_head *bh,
unsigned int blocksize);
extern int ext4_handle_dirty_dirblock(handle_t *handle, struct inode *inode,
struct buffer_head *bh);
extern int ext4_ci_compare(const struct inode *parent,
const struct qstr *name,
const struct qstr *entry);
const struct qstr *fname,
const struct qstr *entry, bool quick);
#define S_SHIFT 12
static const unsigned char ext4_type_by_mode[(S_IFMT >> S_SHIFT) + 1] = {
......
......@@ -361,20 +361,20 @@ static inline int ext4_journal_force_commit(journal_t *journal)
}
static inline int ext4_jbd2_inode_add_write(handle_t *handle,
struct inode *inode)
struct inode *inode, loff_t start_byte, loff_t length)
{
if (ext4_handle_valid(handle))
return jbd2_journal_inode_add_write(handle,
EXT4_I(inode)->jinode);
return jbd2_journal_inode_ranged_write(handle,
EXT4_I(inode)->jinode, start_byte, length);
return 0;
}
static inline int ext4_jbd2_inode_add_wait(handle_t *handle,
struct inode *inode)
struct inode *inode, loff_t start_byte, loff_t length)
{
if (ext4_handle_valid(handle))
return jbd2_journal_inode_add_wait(handle,
EXT4_I(inode)->jinode);
return jbd2_journal_inode_ranged_wait(handle,
EXT4_I(inode)->jinode, start_byte, length);
return 0;
}
......
......@@ -5676,8 +5676,8 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
}
/**
* ext4_swap_extents - Swap extents between two inodes
*
* ext4_swap_extents() - Swap extents between two inodes
* @handle: handle for this transaction
* @inode1: First inode
* @inode2: Second inode
* @lblk1: Start block for first inode
......
......@@ -1317,7 +1317,6 @@ static int es_do_reclaim_extents(struct ext4_inode_info *ei, ext4_lblk_t end,
es = __es_tree_search(&tree->root, ei->i_es_shrink_lblk);
if (!es)
goto out_wrap;
node = &es->rb_node;
while (*nr_to_scan > 0) {
if (es->es_lblk > end) {
ei->i_es_shrink_lblk = end + 1;
......
......@@ -165,6 +165,10 @@ static ssize_t ext4_write_checks(struct kiocb *iocb, struct iov_iter *from)
ret = generic_write_checks(iocb, from);
if (ret <= 0)
return ret;
if (unlikely(IS_IMMUTABLE(inode)))
return -EPERM;
/*
* If we have encountered a bitmap-format file, the size limit
* is smaller than s_maxbytes, which is for extent-mapped files.
......
......@@ -294,14 +294,12 @@ static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks,
}
/**
* ext4_alloc_branch - allocate and set up a chain of blocks.
* @handle: handle for this transaction
* @inode: owner
* @indirect_blks: number of allocated indirect blocks
* @blks: number of allocated direct blocks
* @goal: preferred place for allocation
* @offsets: offsets (in the blocks) to store the pointers to next.
* @branch: place to store the chain in.
* ext4_alloc_branch() - allocate and set up a chain of blocks
* @handle: handle for this transaction
* @ar: structure describing the allocation request
* @indirect_blks: number of allocated indirect blocks
* @offsets: offsets (in the blocks) to store the pointers to next.
* @branch: place to store the chain in.
*
* This function allocates blocks, zeroes out all but the last one,
* links them into chain and (if we are synchronous) writes them to disk.
......@@ -396,15 +394,11 @@ static int ext4_alloc_branch(handle_t *handle,
}
/**
* ext4_splice_branch - splice the allocated branch onto inode.
* ext4_splice_branch() - splice the allocated branch onto inode.
* @handle: handle for this transaction
* @inode: owner
* @block: (logical) number of block we are adding
* @chain: chain of indirect blocks (with a missing link - see
* ext4_alloc_branch)
* @ar: structure describing the allocation request
* @where: location of missing link
* @num: number of indirect blocks we are adding
* @blks: number of direct blocks we are adding
*
* This function fills the missing link and does all housekeeping needed in
* inode (->i_blocks, etc.). In case of success we end up with the full
......
......@@ -1132,7 +1132,6 @@ static int ext4_finish_convert_inline_dir(handle_t *handle,
{
int err, csum_size = 0, header_size = 0;
struct ext4_dir_entry_2 *de;
struct ext4_dir_entry_tail *t;
void *target = dir_block->b_data;
/*
......@@ -1158,13 +1157,11 @@ static int ext4_finish_convert_inline_dir(handle_t *handle,
inline_size - EXT4_INLINE_DOTDOT_SIZE + header_size,
inode->i_sb->s_blocksize - csum_size);
if (csum_size) {
t = EXT4_DIRENT_TAIL(dir_block->b_data,
inode->i_sb->s_blocksize);
initialize_dirent_tail(t, inode->i_sb->s_blocksize);
}
if (csum_size)
ext4_initialize_dirent_tail(dir_block,
inode->i_sb->s_blocksize);
set_buffer_uptodate(dir_block);
err = ext4_handle_dirty_dirent_node(handle, inode, dir_block);
err = ext4_handle_dirty_dirblock(handle, inode, dir_block);
if (err)
return err;
set_buffer_verified(dir_block);
......@@ -1327,11 +1324,11 @@ int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname,
* inlined dir. It returns the number directory entries loaded
* into the tree. If there is an error it is returned in err.
*/
int htree_inlinedir_to_tree(struct file *dir_file,
struct inode *dir, ext4_lblk_t block,
struct dx_hash_info *hinfo,
__u32 start_hash, __u32 start_minor_hash,
int *has_inline_data)
int ext4_inlinedir_to_tree(struct file *dir_file,
struct inode *dir, ext4_lblk_t block,
struct dx_hash_info *hinfo,
__u32 start_hash, __u32 start_minor_hash,
int *has_inline_data)
{
int err = 0, count = 0;
unsigned int parent_ino;
......
......@@ -731,10 +731,16 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
!(flags & EXT4_GET_BLOCKS_ZERO) &&
!ext4_is_quota_file(inode) &&
ext4_should_order_data(inode)) {
loff_t start_byte =
(loff_t)map->m_lblk << inode->i_blkbits;
loff_t length = (loff_t)map->m_len << inode->i_blkbits;
if (flags & EXT4_GET_BLOCKS_IO_SUBMIT)
ret = ext4_jbd2_inode_add_wait(handle, inode);
ret = ext4_jbd2_inode_add_wait(handle, inode,
start_byte, length);
else
ret = ext4_jbd2_inode_add_write(handle, inode);
ret = ext4_jbd2_inode_add_write(handle, inode,
start_byte, length);
if (ret)
return ret;
}
......@@ -4094,7 +4100,8 @@ static int __ext4_block_zero_page_range(handle_t *handle,
err = 0;
mark_buffer_dirty(bh);
if (ext4_should_order_data(inode))
err = ext4_jbd2_inode_add_write(handle, inode);
err = ext4_jbd2_inode_add_write(handle, inode, from,
length);
}
unlock:
......@@ -4579,6 +4586,7 @@ static int __ext4_get_inode_loc(struct inode *inode,
struct buffer_head *bh;
struct super_block *sb = inode->i_sb;
ext4_fsblk_t block;
struct blk_plug plug;
int inodes_per_block, inode_offset;
iloc->bh = NULL;
......@@ -4667,6 +4675,7 @@ static int __ext4_get_inode_loc(struct inode *inode,
* If we need to do any I/O, try to pre-readahead extra
* blocks from the inode table.
*/
blk_start_plug(&plug);
if (EXT4_SB(sb)->s_inode_readahead_blks) {
ext4_fsblk_t b, end, table;
unsigned num;
......@@ -4697,6 +4706,7 @@ static int __ext4_get_inode_loc(struct inode *inode,
get_bh(bh);
bh->b_end_io = end_buffer_read_sync;
submit_bh(REQ_OP_READ, REQ_META | REQ_PRIO, bh);
blk_finish_plug(&plug);
wait_on_buffer(bh);
if (!buffer_uptodate(bh)) {
EXT4_ERROR_INODE_BLOCK(inode, block,
......@@ -5529,6 +5539,14 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
return -EIO;
if (unlikely(IS_IMMUTABLE(inode)))
return -EPERM;
if (unlikely(IS_APPEND(inode) &&
(ia_valid & (ATTR_MODE | ATTR_UID |
ATTR_GID | ATTR_TIMES_SET))))
return -EPERM;
error = setattr_prepare(dentry, attr);
if (error)
return error;
......@@ -5580,7 +5598,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
if (attr->ia_valid & ATTR_SIZE) {
handle_t *handle;
loff_t oldsize = inode->i_size;
int shrink = (attr->ia_size <= inode->i_size);
int shrink = (attr->ia_size < inode->i_size);
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
......@@ -5594,18 +5612,33 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
if (IS_I_VERSION(inode) && attr->ia_size != inode->i_size)
inode_inc_iversion(inode);
if (ext4_should_order_data(inode) &&
(attr->ia_size < inode->i_size)) {
error = ext4_begin_ordered_truncate(inode,
if (shrink) {
if (ext4_should_order_data(inode)) {
error = ext4_begin_ordered_truncate(inode,
attr->ia_size);
if (error)
goto err_out;
if (error)
goto err_out;
}
/*
* Blocks are going to be removed from the inode. Wait
* for dio in flight.
*/
inode_dio_wait(inode);
}
down_write(&EXT4_I(inode)->i_mmap_sem);
rc = ext4_break_layouts(inode);
if (rc) {
up_write(&EXT4_I(inode)->i_mmap_sem);
return rc;
}
if (attr->ia_size != inode->i_size) {
handle = ext4_journal_start(inode, EXT4_HT_INODE, 3);
if (IS_ERR(handle)) {
error = PTR_ERR(handle);
goto err_out;
goto out_mmap_sem;
}
if (ext4_handle_valid(handle) && shrink) {
error = ext4_orphan_add(handle, inode);
......@@ -5633,42 +5666,31 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
i_size_write(inode, attr->ia_size);
up_write(&EXT4_I(inode)->i_data_sem);
ext4_journal_stop(handle);
if (error) {
if (orphan && inode->i_nlink)
ext4_orphan_del(NULL, inode);
goto err_out;
if (error)
goto out_mmap_sem;
if (!shrink) {
pagecache_isize_extended(inode, oldsize,
inode->i_size);
} else if (ext4_should_journal_data(inode)) {
ext4_wait_for_tail_page_commit(inode);
}
}
if (!shrink) {
pagecache_isize_extended(inode, oldsize, inode->i_size);
} else {
/*
* Blocks are going to be removed from the inode. Wait
* for dio in flight.
*/
inode_dio_wait(inode);
}
if (orphan && ext4_should_journal_data(inode))
ext4_wait_for_tail_page_commit(inode);
down_write(&EXT4_I(inode)->i_mmap_sem);
rc = ext4_break_layouts(inode);
if (rc) {
up_write(&EXT4_I(inode)->i_mmap_sem);
error = rc;
goto err_out;
}
/*
* Truncate pagecache after we've waited for commit
* in data=journal mode to make pages freeable.
*/
truncate_pagecache(inode, inode->i_size);
if (shrink) {
/*
* Call ext4_truncate() even if i_size didn't change to
* truncate possible preallocated blocks.
*/
if (attr->ia_size <= oldsize) {
rc = ext4_truncate(inode);
if (rc)
error = rc;
}
out_mmap_sem:
up_write(&EXT4_I(inode)->i_mmap_sem);
}
......@@ -6199,6 +6221,9 @@ vm_fault_t ext4_page_mkwrite(struct vm_fault *vmf)
get_block_t *get_block;
int retries = 0;
if (unlikely(IS_IMMUTABLE(inode)))
return VM_FAULT_SIGBUS;
sb_start_pagefault(inode->i_sb);
file_update_time(vma->vm_file);
......
......@@ -269,6 +269,29 @@ static int uuid_is_zero(__u8 u[16])
}
#endif
/*
* If immutable is set and we are not clearing it, we're not allowed to change
* anything else in the inode. Don't error out if we're only trying to set
* immutable on an immutable file.
*/
static int ext4_ioctl_check_immutable(struct inode *inode, __u32 new_projid,
unsigned int flags)
{
struct ext4_inode_info *ei = EXT4_I(inode);
unsigned int oldflags = ei->i_flags;
if (!(oldflags & EXT4_IMMUTABLE_FL) || !(flags & EXT4_IMMUTABLE_FL))
return 0;
if ((oldflags & ~EXT4_IMMUTABLE_FL) != (flags & ~EXT4_IMMUTABLE_FL))
return -EPERM;
if (ext4_has_feature_project(inode->i_sb) &&
__kprojid_val(ei->i_projid) != new_projid)
return -EPERM;
return 0;
}
static int ext4_ioctl_setflags(struct inode *inode,
unsigned int flags)
{
......@@ -340,6 +363,20 @@ static int ext4_ioctl_setflags(struct inode *inode,
}
}
/*
* Wait for all pending directio and then flush all the dirty pages
* for this file. The flush marks all the pages readonly, so any
* subsequent attempt to write to the file (particularly mmap pages)
* will come through the filesystem and fail.
*/
if (S_ISREG(inode->i_mode) && !IS_IMMUTABLE(inode) &&
(flags & EXT4_IMMUTABLE_FL)) {
inode_dio_wait(inode);
err = filemap_write_and_wait(inode->i_mapping);
if (err)
goto flags_out;
}
handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
if (IS_ERR(handle)) {
err = PTR_ERR(handle);
......@@ -742,6 +779,8 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return ext4_ioc_getfsmap(sb, (void __user *)arg);
case EXT4_IOC_GETFLAGS:
flags = ei->i_flags & EXT4_FL_USER_VISIBLE;
if (S_ISREG(inode->i_mode))
flags &= ~EXT4_PROJINHERIT_FL;
return put_user(flags, (int __user *) arg);
case EXT4_IOC_SETFLAGS: {
int err;
......@@ -769,7 +808,11 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return err;
inode_lock(inode);
err = ext4_ioctl_setflags(inode, flags);
err = ext4_ioctl_check_immutable(inode,
from_kprojid(&init_user_ns, ei->i_projid),
flags);
if (!err)
err = ext4_ioctl_setflags(inode, flags);
inode_unlock(inode);
mnt_drop_write_file(filp);
return err;
......@@ -1139,6 +1182,9 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
goto out;
flags = (ei->i_flags & ~EXT4_FL_XFLAG_VISIBLE) |
(flags & EXT4_FL_XFLAG_VISIBLE);
err = ext4_ioctl_check_immutable(inode, fa.fsx_projid, flags);
if (err)
goto out;
err = ext4_ioctl_setflags(inode, flags);
if (err)
goto out;
......
......@@ -4696,8 +4696,9 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
* ext4_free_blocks() -- Free given blocks and update quota
* @handle: handle for this transaction
* @inode: inode
* @block: start physical block to free
* @count: number of blocks to count
* @bh: optional buffer of the block to be freed
* @block: starting physical block to be freed
* @count: number of blocks to be freed
* @flags: flags used by ext4_free_blocks
*/
void ext4_free_blocks(handle_t *handle, struct inode *inode,
......
......@@ -13,11 +13,10 @@
#include "ext4_extents.h"
/**
* get_ext_path - Find an extent path for designated logical block number.
*
* @inode: an inode which is searched
* get_ext_path() - Find an extent path for designated logical block number.
* @inode: inode to be searched
* @lblock: logical block number to find an extent path
* @path: pointer to an extent path pointer (for output)
* @ppath: pointer to an extent path pointer (for output)
*
* ext4_find_extent wrapper. Return 0 on success, or a negative error value
* on failure.
......@@ -42,8 +41,9 @@ get_ext_path(struct inode *inode, ext4_lblk_t lblock,
}
/**
* ext4_double_down_write_data_sem - Acquire two inodes' write lock
* of i_data_sem
* ext4_double_down_write_data_sem() - write lock two inodes's i_data_sem
* @first: inode to be locked
* @second: inode to be locked
*
* Acquire write lock of i_data_sem of the two inodes
*/
......@@ -390,7 +390,8 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
/* Even in case of data=writeback it is reasonable to pin
* inode to transaction, to prevent unexpected data loss */
*err = ext4_jbd2_inode_add_write(handle, orig_inode);
*err = ext4_jbd2_inode_add_write(handle, orig_inode,
(loff_t)orig_page_offset << PAGE_SHIFT, replaced_size);
unlock_pages:
unlock_page(pagep[0]);
......
This diff is collapsed.
......@@ -230,6 +230,7 @@ static struct attribute *ext4_attrs[] = {
ATTR_LIST(journal_task),
NULL,
};
ATTRIBUTE_GROUPS(ext4);
/* Features this copy of ext4 supports */
EXT4_ATTR_FEATURE(lazy_itable_init);
......@@ -256,6 +257,7 @@ static struct attribute *ext4_feat_attrs[] = {
ATTR_LIST(metadata_csum_seed),
NULL,
};
ATTRIBUTE_GROUPS(ext4_feat);
static void *calc_ptr(struct ext4_attr *a, struct ext4_sb_info *sbi)
{
......@@ -374,13 +376,13 @@ static const struct sysfs_ops ext4_attr_ops = {
};
static struct kobj_type ext4_sb_ktype = {
.default_attrs = ext4_attrs,
.default_groups = ext4_groups,
.sysfs_ops = &ext4_attr_ops,
.release = ext4_sb_release,
};
static struct kobj_type ext4_feat_ktype = {
.default_attrs = ext4_feat_attrs,
.default_groups = ext4_feat_groups,
.sysfs_ops = &ext4_attr_ops,
.release = (void (*)(struct kobject *))kfree,
};
......
......@@ -184,17 +184,18 @@ static int journal_wait_on_commit_record(journal_t *journal,
/*
* write the filemap data using writepage() address_space_operations.
* We don't do block allocation here even for delalloc. We don't
* use writepages() because with dealyed allocation we may be doing
* use writepages() because with delayed allocation we may be doing
* block allocation in writepages().
*/
static int journal_submit_inode_data_buffers(struct address_space *mapping)
static int journal_submit_inode_data_buffers(struct address_space *mapping,
loff_t dirty_start, loff_t dirty_end)
{
int ret;
struct writeback_control wbc = {
.sync_mode = WB_SYNC_ALL,
.nr_to_write = mapping->nrpages * 2,
.range_start = 0,
.range_end = i_size_read(mapping->host),
.range_start = dirty_start,
.range_end = dirty_end,
};
ret = generic_writepages(mapping, &wbc);
......@@ -218,6 +219,9 @@ static int journal_submit_data_buffers(journal_t *journal,
spin_lock(&journal->j_list_lock);
list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
loff_t dirty_start = jinode->i_dirty_start;
loff_t dirty_end = jinode->i_dirty_end;
if (!(jinode->i_flags & JI_WRITE_DATA))
continue;
mapping = jinode->i_vfs_inode->i_mapping;
......@@ -230,7 +234,8 @@ static int journal_submit_data_buffers(journal_t *journal,
* only allocated blocks here.
*/
trace_jbd2_submit_inode_data(jinode->i_vfs_inode);
err = journal_submit_inode_data_buffers(mapping);
err = journal_submit_inode_data_buffers(mapping, dirty_start,
dirty_end);
if (!ret)
ret = err;
spin_lock(&journal->j_list_lock);
......@@ -257,12 +262,16 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
/* For locking, see the comment in journal_submit_data_buffers() */
spin_lock(&journal->j_list_lock);
list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
loff_t dirty_start = jinode->i_dirty_start;
loff_t dirty_end = jinode->i_dirty_end;
if (!(jinode->i_flags & JI_WAIT_DATA))
continue;
jinode->i_flags |= JI_COMMIT_RUNNING;
spin_unlock(&journal->j_list_lock);
err = filemap_fdatawait_keep_errors(
jinode->i_vfs_inode->i_mapping);
err = filemap_fdatawait_range_keep_errors(
jinode->i_vfs_inode->i_mapping, dirty_start,
dirty_end);
if (!ret)
ret = err;
spin_lock(&journal->j_list_lock);
......@@ -282,6 +291,8 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
&jinode->i_transaction->t_inode_list);
} else {
jinode->i_transaction = NULL;
jinode->i_dirty_start = 0;
jinode->i_dirty_end = 0;
}
}
spin_unlock(&journal->j_list_lock);
......
......@@ -66,9 +66,6 @@ EXPORT_SYMBOL(jbd2_journal_get_undo_access);
EXPORT_SYMBOL(jbd2_journal_set_triggers);
EXPORT_SYMBOL(jbd2_journal_dirty_metadata);
EXPORT_SYMBOL(jbd2_journal_forget);
#if 0
EXPORT_SYMBOL(journal_sync_buffer);
#endif
EXPORT_SYMBOL(jbd2_journal_flush);
EXPORT_SYMBOL(jbd2_journal_revoke);
......@@ -94,6 +91,8 @@ EXPORT_SYMBOL(jbd2_journal_try_to_free_buffers);
EXPORT_SYMBOL(jbd2_journal_force_commit);
EXPORT_SYMBOL(jbd2_journal_inode_add_write);
EXPORT_SYMBOL(jbd2_journal_inode_add_wait);
EXPORT_SYMBOL(jbd2_journal_inode_ranged_write);
EXPORT_SYMBOL(jbd2_journal_inode_ranged_wait);
EXPORT_SYMBOL(jbd2_journal_init_jbd_inode);
EXPORT_SYMBOL(jbd2_journal_release_jbd_inode);
EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate);
......@@ -203,7 +202,7 @@ static int kjournald2(void *arg)
if (journal->j_flags & JBD2_UNMOUNT)
goto end_loop;
jbd_debug(1, "commit_sequence=%d, commit_request=%d\n",
jbd_debug(1, "commit_sequence=%u, commit_request=%u\n",
journal->j_commit_sequence, journal->j_commit_request);
if (journal->j_commit_sequence != journal->j_commit_request) {
......@@ -324,7 +323,7 @@ static void journal_kill_thread(journal_t *journal)
* IO is in progress. do_get_write_access() handles this.
*
* The function returns a pointer to the buffer_head to be used for IO.
*
*
*
* Return value:
* <0: Error
......@@ -500,7 +499,7 @@ int __jbd2_log_start_commit(journal_t *journal, tid_t target)
*/
journal->j_commit_request = target;
jbd_debug(1, "JBD2: requesting commit %d/%d\n",
jbd_debug(1, "JBD2: requesting commit %u/%u\n",
journal->j_commit_request,
journal->j_commit_sequence);
journal->j_running_transaction->t_requested = jiffies;
......@@ -513,7 +512,7 @@ int __jbd2_log_start_commit(journal_t *journal, tid_t target)
WARN_ONCE(1, "JBD2: bad log_start_commit: %u %u %u %u\n",
journal->j_commit_request,
journal->j_commit_sequence,
target, journal->j_running_transaction ?
target, journal->j_running_transaction ?
journal->j_running_transaction->t_tid : 0);
return 0;
}
......@@ -698,12 +697,12 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
#ifdef CONFIG_JBD2_DEBUG
if (!tid_geq(journal->j_commit_request, tid)) {
printk(KERN_ERR
"%s: error: j_commit_request=%d, tid=%d\n",
"%s: error: j_commit_request=%u, tid=%u\n",
__func__, journal->j_commit_request, tid);
}
#endif
while (tid_gt(tid, journal->j_commit_sequence)) {
jbd_debug(1, "JBD2: want %d, j_commit_sequence=%d\n",
jbd_debug(1, "JBD2: want %u, j_commit_sequence=%u\n",
tid, journal->j_commit_sequence);
read_unlock(&journal->j_state_lock);
wake_up(&journal->j_wait_commit);
......@@ -944,7 +943,7 @@ int __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
trace_jbd2_update_log_tail(journal, tid, block, freed);
jbd_debug(1,
"Cleaning journal tail from %d to %d (offset %lu), "
"Cleaning journal tail from %u to %u (offset %lu), "
"freeing %lu\n",
journal->j_tail_sequence, tid, block, freed);
......@@ -1318,7 +1317,7 @@ static int journal_reset(journal_t *journal)
*/
if (sb->s_start == 0) {
jbd_debug(1, "JBD2: Skipping superblock update on recovered sb "
"(start %ld, seq %d, errno %d)\n",
"(start %ld, seq %u, errno %d)\n",
journal->j_tail, journal->j_tail_sequence,
journal->j_errno);
journal->j_flags |= JBD2_FLUSHED;
......@@ -1453,7 +1452,7 @@ static void jbd2_mark_journal_empty(journal_t *journal, int write_op)
return;
}
jbd_debug(1, "JBD2: Marking journal as empty (seq %d)\n",
jbd_debug(1, "JBD2: Marking journal as empty (seq %u)\n",
journal->j_tail_sequence);
sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
......@@ -2574,6 +2573,8 @@ void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode)
jinode->i_next_transaction = NULL;
jinode->i_vfs_inode = inode;
jinode->i_flags = 0;
jinode->i_dirty_start = 0;
jinode->i_dirty_end = 0;
INIT_LIST_HEAD(&jinode->i_list);
}
......
......@@ -2565,7 +2565,7 @@ void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh)
* File inode in the inode list of the handle's transaction
*/
static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode,
unsigned long flags)
unsigned long flags, loff_t start_byte, loff_t end_byte)
{
transaction_t *transaction = handle->h_transaction;
journal_t *journal;
......@@ -2577,26 +2577,17 @@ static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode,
jbd_debug(4, "Adding inode %lu, tid:%d\n", jinode->i_vfs_inode->i_ino,
transaction->t_tid);
/*
* First check whether inode isn't already on the transaction's
* lists without taking the lock. Note that this check is safe
* without the lock as we cannot race with somebody removing inode
* from the transaction. The reason is that we remove inode from the
* transaction only in journal_release_jbd_inode() and when we commit
* the transaction. We are guarded from the first case by holding
* a reference to the inode. We are safe against the second case
* because if jinode->i_transaction == transaction, commit code
* cannot touch the transaction because we hold reference to it,
* and if jinode->i_next_transaction == transaction, commit code
* will only file the inode where we want it.
*/
if ((jinode->i_transaction == transaction ||
jinode->i_next_transaction == transaction) &&
(jinode->i_flags & flags) == flags)
return 0;
spin_lock(&journal->j_list_lock);
jinode->i_flags |= flags;
if (jinode->i_dirty_end) {
jinode->i_dirty_start = min(jinode->i_dirty_start, start_byte);
jinode->i_dirty_end = max(jinode->i_dirty_end, end_byte);
} else {
jinode->i_dirty_start = start_byte;
jinode->i_dirty_end = end_byte;
}
/* Is inode already attached where we need it? */
if (jinode->i_transaction == transaction ||
jinode->i_next_transaction == transaction)
......@@ -2631,12 +2622,28 @@ static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode,
int jbd2_journal_inode_add_write(handle_t *handle, struct jbd2_inode *jinode)
{
return jbd2_journal_file_inode(handle, jinode,
JI_WRITE_DATA | JI_WAIT_DATA);
JI_WRITE_DATA | JI_WAIT_DATA, 0, LLONG_MAX);
}
int jbd2_journal_inode_add_wait(handle_t *handle, struct jbd2_inode *jinode)
{
return jbd2_journal_file_inode(handle, jinode, JI_WAIT_DATA);
return jbd2_journal_file_inode(handle, jinode, JI_WAIT_DATA, 0,
LLONG_MAX);
}
int jbd2_journal_inode_ranged_write(handle_t *handle,
struct jbd2_inode *jinode, loff_t start_byte, loff_t length)
{
return jbd2_journal_file_inode(handle, jinode,
JI_WRITE_DATA | JI_WAIT_DATA, start_byte,
start_byte + length - 1);
}
int jbd2_journal_inode_ranged_wait(handle_t *handle, struct jbd2_inode *jinode,
loff_t start_byte, loff_t length)
{
return jbd2_journal_file_inode(handle, jinode, JI_WAIT_DATA,
start_byte, start_byte + length - 1);
}
/*
......
......@@ -73,6 +73,34 @@ int utf8_strncasecmp(const struct unicode_map *um,
}
EXPORT_SYMBOL(utf8_strncasecmp);
/* String cf is expected to be a valid UTF-8 casefolded
* string.
*/
int utf8_strncasecmp_folded(const struct unicode_map *um,
const struct qstr *cf,
const struct qstr *s1)
{
const struct utf8data *data = utf8nfdicf(um->version);
struct utf8cursor cur1;
int c1, c2;
int i = 0;
if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0)
return -EINVAL;
do {
c1 = utf8byte(&cur1);
c2 = cf->name[i++];
if (c1 < 0)
return -EINVAL;
if (c1 != c2)
return 1;
} while (c1);
return 0;
}
EXPORT_SYMBOL(utf8_strncasecmp_folded);
int utf8_casefold(const struct unicode_map *um, const struct qstr *str,
unsigned char *dest, size_t dlen)
{
......
......@@ -2718,6 +2718,8 @@ extern int filemap_flush(struct address_space *);
extern int filemap_fdatawait_keep_errors(struct address_space *mapping);
extern int filemap_fdatawait_range(struct address_space *, loff_t lstart,
loff_t lend);
extern int filemap_fdatawait_range_keep_errors(struct address_space *mapping,
loff_t start_byte, loff_t end_byte);
static inline int filemap_fdatawait(struct address_space *mapping)
{
......
......@@ -451,6 +451,22 @@ struct jbd2_inode {
* @i_flags: Flags of inode [j_list_lock]
*/
unsigned long i_flags;
/**
* @i_dirty_start:
*
* Offset in bytes where the dirty range for this inode starts.
* [j_list_lock]
*/
loff_t i_dirty_start;
/**
* @i_dirty_end:
*
* Inclusive offset in bytes where the dirty range for this inode
* ends. [j_list_lock]
*/
loff_t i_dirty_end;
};
struct jbd2_revoke_table_s;
......@@ -1357,7 +1373,6 @@ void jbd2_journal_set_triggers(struct buffer_head *,
struct jbd2_buffer_trigger_type *type);
extern int jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *);
extern int jbd2_journal_forget (handle_t *, struct buffer_head *);
extern void journal_sync_buffer (struct buffer_head *);
extern int jbd2_journal_invalidatepage(journal_t *,
struct page *, unsigned int, unsigned int);
extern int jbd2_journal_try_to_free_buffers(journal_t *, struct page *, gfp_t);
......@@ -1397,6 +1412,12 @@ extern int jbd2_journal_force_commit(journal_t *);
extern int jbd2_journal_force_commit_nested(journal_t *);
extern int jbd2_journal_inode_add_write(handle_t *handle, struct jbd2_inode *inode);
extern int jbd2_journal_inode_add_wait(handle_t *handle, struct jbd2_inode *inode);
extern int jbd2_journal_inode_ranged_write(handle_t *handle,
struct jbd2_inode *inode, loff_t start_byte,
loff_t length);
extern int jbd2_journal_inode_ranged_wait(handle_t *handle,
struct jbd2_inode *inode, loff_t start_byte,
loff_t length);
extern int jbd2_journal_begin_ordered_truncate(journal_t *journal,
struct jbd2_inode *inode, loff_t new_size);
extern void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode);
......
......@@ -17,6 +17,9 @@ int utf8_strncmp(const struct unicode_map *um,
int utf8_strncasecmp(const struct unicode_map *um,
const struct qstr *s1, const struct qstr *s2);
int utf8_strncasecmp_folded(const struct unicode_map *um,
const struct qstr *cf,
const struct qstr *s1);
int utf8_normalize(const struct unicode_map *um, const struct qstr *str,
unsigned char *dest, size_t dlen);
......
......@@ -549,6 +549,28 @@ int filemap_fdatawait_range(struct address_space *mapping, loff_t start_byte,
}
EXPORT_SYMBOL(filemap_fdatawait_range);
/**
* filemap_fdatawait_range_keep_errors - wait for writeback to complete
* @mapping: address space structure to wait for
* @start_byte: offset in bytes where the range starts
* @end_byte: offset in bytes where the range ends (inclusive)
*
* Walk the list of under-writeback pages of the given address space in the
* given range and wait for all of them. Unlike filemap_fdatawait_range(),
* this function does not clear error status of the address space.
*
* Use this function if callers don't handle errors themselves. Expected
* call sites are system-wide / filesystem-wide data flushers: e.g. sync(2),
* fsfreeze(8)
*/
int filemap_fdatawait_range_keep_errors(struct address_space *mapping,
loff_t start_byte, loff_t end_byte)
{
__filemap_fdatawait_range(mapping, start_byte, end_byte);
return filemap_check_and_keep_errors(mapping);
}
EXPORT_SYMBOL(filemap_fdatawait_range_keep_errors);
/**
* file_fdatawait_range - wait for writeback to complete
* @file: file pointing to address space structure to wait for
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment