Commit d857da7b authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 updates from Ted Ts'o:
 "A very large number of cleanups and bug fixes --- in particular for
  the ext4 encryption patches, which is a new feature added in the last
  merge window.  Also fix a number of long-standing xfstest failures.
  (Quota writes failing due to ENOSPC, a race between truncate and
  writepage in data=journalled mode that was causing generic/068 to
  fail, and other corner cases.)

  Also add support for FALLOC_FL_INSERT_RANGE, and improve jbd2
  performance eliminating locking when a buffer is modified more than
  once during a transaction (which is very common for allocation
  bitmaps, for example), in which case the state of the journalled
  buffer head doesn't need to change"

[ I renamed "ext4_follow_link()" to "ext4_encrypted_follow_link()" in
  the merge resolution, to make it clear that that function is _only_
  used for encrypted symlinks.  The function doesn't actually work for
  non-encrypted symlinks at all, and they use the generic helpers
                                         - Linus ]

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (52 commits)
  ext4: set lazytime on remount if MS_LAZYTIME is set by mount
  ext4: only call ext4_truncate when size <= isize
  ext4: make online defrag error reporting consistent
  ext4: minor cleanup of ext4_da_reserve_space()
  ext4: don't retry file block mapping on bigalloc fs with non-extent file
  ext4: prevent ext4_quota_write() from failing due to ENOSPC
  ext4: call sync_blockdev() before invalidate_bdev() in put_super()
  jbd2: speedup jbd2_journal_dirty_metadata()
  jbd2: get rid of open coded allocation retry loop
  ext4: improve warning directory handling messages
  jbd2: fix ocfs2 corrupt when updating journal superblock fails
  ext4: mballoc: avoid 20-argument function call
  ext4: wait for existing dio workers in ext4_alloc_file_blocks()
  ext4: recalculate journal credits as inode depth changes
  jbd2: use GFP_NOFS in jbd2_cleanup_journal_tail()
  ext4: use swap() in mext_page_double_lock()
  ext4: use swap() in memswap()
  ext4: fix race between truncate and __ext4_journalled_writepage()
  ext4 crypto: fail the mount if blocksize != pagesize
  ext4: Add support FALLOC_FL_INSERT_RANGE for fallocate
  ...
parents 77d43164 a2fd66d0
......@@ -72,6 +72,7 @@ config EXT4_ENCRYPTION
select CRYPTO_ECB
select CRYPTO_XTS
select CRYPTO_CTS
select CRYPTO_CTR
select CRYPTO_SHA256
select KEYS
select ENCRYPTED_KEYS
......
......@@ -369,7 +369,7 @@ static void ext4_validate_block_bitmap(struct super_block *sb,
struct ext4_group_info *grp = ext4_get_group_info(sb, block_group);
struct ext4_sb_info *sbi = EXT4_SB(sb);
if (buffer_verified(bh))
if (buffer_verified(bh) || EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
return;
ext4_lock_group(sb, block_group);
......@@ -446,7 +446,7 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group)
unlock_buffer(bh);
if (err)
ext4_error(sb, "Checksum bad for grp %u", block_group);
return bh;
goto verify;
}
ext4_unlock_group(sb, block_group);
if (buffer_uptodate(bh)) {
......
This diff is collapsed.
This diff is collapsed.
......@@ -84,14 +84,38 @@ static int ext4_derive_key_aes(char deriving_key[EXT4_AES_128_ECB_KEY_SIZE],
return res;
}
/**
* ext4_generate_encryption_key() - generates an encryption key
* @inode: The inode to generate the encryption key for.
*/
int ext4_generate_encryption_key(struct inode *inode)
void ext4_free_crypt_info(struct ext4_crypt_info *ci)
{
if (!ci)
return;
if (ci->ci_keyring_key)
key_put(ci->ci_keyring_key);
crypto_free_ablkcipher(ci->ci_ctfm);
kmem_cache_free(ext4_crypt_info_cachep, ci);
}
void ext4_free_encryption_info(struct inode *inode,
struct ext4_crypt_info *ci)
{
struct ext4_inode_info *ei = EXT4_I(inode);
struct ext4_crypt_info *prev;
if (ci == NULL)
ci = ACCESS_ONCE(ei->i_crypt_info);
if (ci == NULL)
return;
prev = cmpxchg(&ei->i_crypt_info, ci, NULL);
if (prev != ci)
return;
ext4_free_crypt_info(ci);
}
int _ext4_get_encryption_info(struct inode *inode)
{
struct ext4_inode_info *ei = EXT4_I(inode);
struct ext4_encryption_key *crypt_key = &ei->i_encryption_key;
struct ext4_crypt_info *crypt_info;
char full_key_descriptor[EXT4_KEY_DESC_PREFIX_SIZE +
(EXT4_KEY_DESCRIPTOR_SIZE * 2) + 1];
struct key *keyring_key = NULL;
......@@ -99,31 +123,76 @@ int ext4_generate_encryption_key(struct inode *inode)
struct ext4_encryption_context ctx;
struct user_key_payload *ukp;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
int res = ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION,
EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
&ctx, sizeof(ctx));
struct crypto_ablkcipher *ctfm;
const char *cipher_str;
char raw_key[EXT4_MAX_KEY_SIZE];
char mode;
int res;
if (res != sizeof(ctx)) {
if (res > 0)
res = -EINVAL;
goto out;
if (!ext4_read_workqueue) {
res = ext4_init_crypto();
if (res)
return res;
}
retry:
crypt_info = ACCESS_ONCE(ei->i_crypt_info);
if (crypt_info) {
if (!crypt_info->ci_keyring_key ||
key_validate(crypt_info->ci_keyring_key) == 0)
return 0;
ext4_free_encryption_info(inode, crypt_info);
goto retry;
}
res = ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION,
EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
&ctx, sizeof(ctx));
if (res < 0) {
if (!DUMMY_ENCRYPTION_ENABLED(sbi))
return res;
ctx.contents_encryption_mode = EXT4_ENCRYPTION_MODE_AES_256_XTS;
ctx.filenames_encryption_mode =
EXT4_ENCRYPTION_MODE_AES_256_CTS;
ctx.flags = 0;
} else if (res != sizeof(ctx))
return -EINVAL;
res = 0;
ei->i_crypt_policy_flags = ctx.flags;
crypt_info = kmem_cache_alloc(ext4_crypt_info_cachep, GFP_KERNEL);
if (!crypt_info)
return -ENOMEM;
crypt_info->ci_flags = ctx.flags;
crypt_info->ci_data_mode = ctx.contents_encryption_mode;
crypt_info->ci_filename_mode = ctx.filenames_encryption_mode;
crypt_info->ci_ctfm = NULL;
crypt_info->ci_keyring_key = NULL;
memcpy(crypt_info->ci_master_key, ctx.master_key_descriptor,
sizeof(crypt_info->ci_master_key));
if (S_ISREG(inode->i_mode))
crypt_key->mode = ctx.contents_encryption_mode;
mode = crypt_info->ci_data_mode;
else if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
crypt_key->mode = ctx.filenames_encryption_mode;
else {
printk(KERN_ERR "ext4 crypto: Unsupported inode type.\n");
mode = crypt_info->ci_filename_mode;
else
BUG();
switch (mode) {
case EXT4_ENCRYPTION_MODE_AES_256_XTS:
cipher_str = "xts(aes)";
break;
case EXT4_ENCRYPTION_MODE_AES_256_CTS:
cipher_str = "cts(cbc(aes))";
break;
default:
printk_once(KERN_WARNING
"ext4: unsupported key mode %d (ino %u)\n",
mode, (unsigned) inode->i_ino);
res = -ENOKEY;
goto out;
}
crypt_key->size = ext4_encryption_key_size(crypt_key->mode);
BUG_ON(!crypt_key->size);
if (DUMMY_ENCRYPTION_ENABLED(sbi)) {
memset(crypt_key->raw, 0x42, EXT4_AES_256_XTS_KEY_SIZE);
goto out;
memset(raw_key, 0x42, EXT4_AES_256_XTS_KEY_SIZE);
goto got_key;
}
memcpy(full_key_descriptor, EXT4_KEY_DESC_PREFIX,
EXT4_KEY_DESC_PREFIX_SIZE);
......@@ -138,6 +207,7 @@ int ext4_generate_encryption_key(struct inode *inode)
keyring_key = NULL;
goto out;
}
crypt_info->ci_keyring_key = keyring_key;
BUG_ON(keyring_key->type != &key_type_logon);
ukp = ((struct user_key_payload *)keyring_key->payload.data);
if (ukp->datalen != sizeof(struct ext4_encryption_key)) {
......@@ -148,19 +218,43 @@ int ext4_generate_encryption_key(struct inode *inode)
BUILD_BUG_ON(EXT4_AES_128_ECB_KEY_SIZE !=
EXT4_KEY_DERIVATION_NONCE_SIZE);
BUG_ON(master_key->size != EXT4_AES_256_XTS_KEY_SIZE);
res = ext4_derive_key_aes(ctx.nonce, master_key->raw, crypt_key->raw);
res = ext4_derive_key_aes(ctx.nonce, master_key->raw,
raw_key);
got_key:
ctfm = crypto_alloc_ablkcipher(cipher_str, 0, 0);
if (!ctfm || IS_ERR(ctfm)) {
res = ctfm ? PTR_ERR(ctfm) : -ENOMEM;
printk(KERN_DEBUG
"%s: error %d (inode %u) allocating crypto tfm\n",
__func__, res, (unsigned) inode->i_ino);
goto out;
}
crypt_info->ci_ctfm = ctfm;
crypto_ablkcipher_clear_flags(ctfm, ~0);
crypto_tfm_set_flags(crypto_ablkcipher_tfm(ctfm),
CRYPTO_TFM_REQ_WEAK_KEY);
res = crypto_ablkcipher_setkey(ctfm, raw_key,
ext4_encryption_key_size(mode));
if (res)
goto out;
memzero_explicit(raw_key, sizeof(raw_key));
if (cmpxchg(&ei->i_crypt_info, NULL, crypt_info) != NULL) {
ext4_free_crypt_info(crypt_info);
goto retry;
}
return 0;
out:
if (keyring_key)
key_put(keyring_key);
if (res < 0)
crypt_key->mode = EXT4_ENCRYPTION_MODE_INVALID;
if (res == -ENOKEY)
res = 0;
ext4_free_crypt_info(crypt_info);
memzero_explicit(raw_key, sizeof(raw_key));
return res;
}
int ext4_has_encryption_key(struct inode *inode)
{
struct ext4_inode_info *ei = EXT4_I(inode);
struct ext4_encryption_key *crypt_key = &ei->i_encryption_key;
return (crypt_key->mode != EXT4_ENCRYPTION_MODE_INVALID);
return (ei->i_crypt_info != NULL);
}
......@@ -51,6 +51,10 @@ static int ext4_create_encryption_context_from_policy(
struct ext4_encryption_context ctx;
int res = 0;
res = ext4_convert_inline_data(inode);
if (res)
return res;
ctx.format = EXT4_ENCRYPTION_CONTEXT_FORMAT_V1;
memcpy(ctx.master_key_descriptor, policy->master_key_descriptor,
EXT4_KEY_DESCRIPTOR_SIZE);
......@@ -89,6 +93,8 @@ int ext4_process_policy(const struct ext4_encryption_policy *policy,
return -EINVAL;
if (!ext4_inode_has_encryption_context(inode)) {
if (!S_ISDIR(inode->i_mode))
return -EINVAL;
if (!ext4_empty_dir(inode))
return -ENOTEMPTY;
return ext4_create_encryption_context_from_policy(inode,
......@@ -126,7 +132,7 @@ int ext4_get_policy(struct inode *inode, struct ext4_encryption_policy *policy)
int ext4_is_child_context_consistent_with_parent(struct inode *parent,
struct inode *child)
{
struct ext4_encryption_context parent_ctx, child_ctx;
struct ext4_crypt_info *parent_ci, *child_ci;
int res;
if ((parent == NULL) || (child == NULL)) {
......@@ -136,26 +142,28 @@ int ext4_is_child_context_consistent_with_parent(struct inode *parent,
/* no restrictions if the parent directory is not encrypted */
if (!ext4_encrypted_inode(parent))
return 1;
res = ext4_xattr_get(parent, EXT4_XATTR_INDEX_ENCRYPTION,
EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
&parent_ctx, sizeof(parent_ctx));
if (res != sizeof(parent_ctx))
return 0;
/* if the child directory is not encrypted, this is always a problem */
if (!ext4_encrypted_inode(child))
return 0;
res = ext4_xattr_get(child, EXT4_XATTR_INDEX_ENCRYPTION,
EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
&child_ctx, sizeof(child_ctx));
if (res != sizeof(child_ctx))
res = ext4_get_encryption_info(parent);
if (res)
return 0;
return (memcmp(parent_ctx.master_key_descriptor,
child_ctx.master_key_descriptor,
res = ext4_get_encryption_info(child);
if (res)
return 0;
parent_ci = EXT4_I(parent)->i_crypt_info;
child_ci = EXT4_I(child)->i_crypt_info;
if (!parent_ci && !child_ci)
return 1;
if (!parent_ci || !child_ci)
return 0;
return (memcmp(parent_ci->ci_master_key,
child_ci->ci_master_key,
EXT4_KEY_DESCRIPTOR_SIZE) == 0 &&
(parent_ctx.contents_encryption_mode ==
child_ctx.contents_encryption_mode) &&
(parent_ctx.filenames_encryption_mode ==
child_ctx.filenames_encryption_mode));
(parent_ci->ci_data_mode == child_ci->ci_data_mode) &&
(parent_ci->ci_filename_mode == child_ci->ci_filename_mode) &&
(parent_ci->ci_flags == child_ci->ci_flags));
}
/**
......@@ -168,15 +176,19 @@ int ext4_is_child_context_consistent_with_parent(struct inode *parent,
int ext4_inherit_context(struct inode *parent, struct inode *child)
{
struct ext4_encryption_context ctx;
int res = ext4_xattr_get(parent, EXT4_XATTR_INDEX_ENCRYPTION,
EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
&ctx, sizeof(ctx));
struct ext4_crypt_info *ci;
int res;
res = ext4_get_encryption_info(parent);
if (res < 0)
return res;
ci = EXT4_I(parent)->i_crypt_info;
if (ci == NULL)
return -ENOKEY;
if (res != sizeof(ctx)) {
if (DUMMY_ENCRYPTION_ENABLED(EXT4_SB(parent->i_sb))) {
ctx.format = EXT4_ENCRYPTION_CONTEXT_FORMAT_V1;
ctx.contents_encryption_mode =
EXT4_ENCRYPTION_MODE_AES_256_XTS;
if (DUMMY_ENCRYPTION_ENABLED(EXT4_SB(parent->i_sb))) {
ctx.contents_encryption_mode = EXT4_ENCRYPTION_MODE_AES_256_XTS;
ctx.filenames_encryption_mode =
EXT4_ENCRYPTION_MODE_AES_256_CTS;
ctx.flags = 0;
......@@ -184,15 +196,20 @@ int ext4_inherit_context(struct inode *parent, struct inode *child)
EXT4_KEY_DESCRIPTOR_SIZE);
res = 0;
} else {
goto out;
}
ctx.contents_encryption_mode = ci->ci_data_mode;
ctx.filenames_encryption_mode = ci->ci_filename_mode;
ctx.flags = ci->ci_flags;
memcpy(ctx.master_key_descriptor, ci->ci_master_key,
EXT4_KEY_DESCRIPTOR_SIZE);
}
get_random_bytes(ctx.nonce, EXT4_KEY_DERIVATION_NONCE_SIZE);
res = ext4_xattr_set(child, EXT4_XATTR_INDEX_ENCRYPTION,
EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx,
sizeof(ctx), 0);
out:
if (!res)
if (!res) {
ext4_set_inode_flag(child, EXT4_INODE_ENCRYPT);
ext4_clear_inode_state(child, EXT4_STATE_MAY_INLINE_DATA);
res = ext4_get_encryption_info(child);
}
return res;
}
......@@ -110,7 +110,6 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
struct super_block *sb = inode->i_sb;
struct buffer_head *bh = NULL;
int dir_has_error = 0;
struct ext4_fname_crypto_ctx *enc_ctx = NULL;
struct ext4_str fname_crypto_str = {.name = NULL, .len = 0};
if (is_dx_dir(inode)) {
......@@ -134,17 +133,12 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
return err;
}
enc_ctx = ext4_get_fname_crypto_ctx(inode, EXT4_NAME_LEN);
if (IS_ERR(enc_ctx))
return PTR_ERR(enc_ctx);
if (enc_ctx) {
err = ext4_fname_crypto_alloc_buffer(enc_ctx, EXT4_NAME_LEN,
if (ext4_encrypted_inode(inode)) {
err = ext4_fname_crypto_alloc_buffer(inode, EXT4_NAME_LEN,
&fname_crypto_str);
if (err < 0) {
ext4_put_fname_crypto_ctx(&enc_ctx);
if (err < 0)
return err;
}
}
offset = ctx->pos & (sb->s_blocksize - 1);
......@@ -239,17 +233,19 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
offset += ext4_rec_len_from_disk(de->rec_len,
sb->s_blocksize);
if (le32_to_cpu(de->inode)) {
if (enc_ctx == NULL) {
/* Directory is not encrypted */
if (!ext4_encrypted_inode(inode)) {
if (!dir_emit(ctx, de->name,
de->name_len,
le32_to_cpu(de->inode),
get_dtype(sb, de->file_type)))
goto done;
} else {
int save_len = fname_crypto_str.len;
/* Directory is encrypted */
err = ext4_fname_disk_to_usr(enc_ctx,
err = ext4_fname_disk_to_usr(inode,
NULL, de, &fname_crypto_str);
fname_crypto_str.len = save_len;
if (err < 0)
goto errout;
if (!dir_emit(ctx,
......@@ -272,7 +268,6 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
err = 0;
errout:
#ifdef CONFIG_EXT4_FS_ENCRYPTION
ext4_put_fname_crypto_ctx(&enc_ctx);
ext4_fname_crypto_free_buffer(&fname_crypto_str);
#endif
brelse(bh);
......@@ -598,6 +593,13 @@ static int ext4_dx_readdir(struct file *file, struct dir_context *ctx)
return 0;
}
static int ext4_dir_open(struct inode * inode, struct file * filp)
{
if (ext4_encrypted_inode(inode))
return ext4_get_encryption_info(inode) ? -EACCES : 0;
return 0;
}
static int ext4_release_dir(struct inode *inode, struct file *filp)
{
if (filp->private_data)
......@@ -640,5 +642,6 @@ const struct file_operations ext4_dir_operations = {
.compat_ioctl = ext4_compat_ioctl,
#endif
.fsync = ext4_sync_file,
.open = ext4_dir_open,
.release = ext4_release_dir,
};
This diff is collapsed.
......@@ -66,24 +66,39 @@ struct ext4_encryption_context {
#define EXT4_KEY_DESC_PREFIX "ext4:"
#define EXT4_KEY_DESC_PREFIX_SIZE 5
/* This is passed in from userspace into the kernel keyring */
struct ext4_encryption_key {
uint32_t mode;
__u32 mode;
char raw[EXT4_MAX_KEY_SIZE];
uint32_t size;
__u32 size;
} __attribute__((__packed__));
struct ext4_crypt_info {
char ci_data_mode;
char ci_filename_mode;
char ci_flags;
struct crypto_ablkcipher *ci_ctfm;
struct key *ci_keyring_key;
char ci_master_key[EXT4_KEY_DESCRIPTOR_SIZE];
};
#define EXT4_CTX_REQUIRES_FREE_ENCRYPT_FL 0x00000001
#define EXT4_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL 0x00000002
#define EXT4_WRITE_PATH_FL 0x00000002
struct ext4_crypto_ctx {
struct crypto_tfm *tfm; /* Crypto API context */
struct page *bounce_page; /* Ciphertext page on write path */
struct page *control_page; /* Original page on write path */
struct bio *bio; /* The bio for this context */
struct work_struct work; /* Work queue for read complete path */
union {
struct {
struct page *bounce_page; /* Ciphertext page */
struct page *control_page; /* Original page */
} w;
struct {
struct bio *bio;
struct work_struct work;
} r;
struct list_head free_list; /* Free list */
int flags; /* Flags */
int mode; /* Encryption mode for tfm */
};
char flags; /* Flags */
char mode; /* Encryption mode for tfm */
};
struct ext4_completion_result {
......@@ -121,18 +136,6 @@ struct ext4_str {
u32 len;
};
struct ext4_fname_crypto_ctx {
u32 lim;
char tmp_buf[EXT4_CRYPTO_BLOCK_SIZE];
struct crypto_ablkcipher *ctfm;
struct crypto_hash *htfm;
struct page *workpage;
struct ext4_encryption_key key;
unsigned flags : 8;
unsigned has_valid_key : 1;
unsigned ctfm_key_is_ready : 1;
};
/**
* For encrypted symlinks, the ciphertext length is stored at the beginning
* of the string in little-endian format.
......
This diff is collapsed.
......@@ -223,9 +223,11 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
struct inode *inode = file->f_mapping->host;
if (ext4_encrypted_inode(inode)) {
int err = ext4_generate_encryption_key(inode);
int err = ext4_get_encryption_info(inode);
if (err)
return 0;
if (ext4_encryption_info(inode) == NULL)
return -ENOKEY;
}
file_accessed(file);
if (IS_DAX(file_inode(file))) {
......@@ -278,6 +280,13 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
ext4_journal_stop(handle);
}
}
if (ext4_encrypted_inode(inode)) {
ret = ext4_get_encryption_info(inode);
if (ret)
return -EACCES;
if (ext4_encryption_info(inode) == NULL)
return -ENOKEY;
}
/*
* Set up the jbd2_inode if we are opening the inode for
* writing and the journal is present
......@@ -287,13 +296,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
if (ret < 0)
return ret;
}
ret = dquot_file_open(inode, filp);
if (!ret && ext4_encrypted_inode(inode)) {
ret = ext4_generate_encryption_key(inode);
if (ret)
ret = -EACCES;
}
return ret;
return dquot_file_open(inode, filp);
}
/*
......
......@@ -726,11 +726,25 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
ext4_group_t i;
ext4_group_t flex_group;
struct ext4_group_info *grp;
int encrypt = 0;
/* Cannot create files in a deleted directory */
if (!dir || !dir->i_nlink)
return ERR_PTR(-EPERM);
if ((ext4_encrypted_inode(dir) ||
DUMMY_ENCRYPTION_ENABLED(EXT4_SB(dir->i_sb))) &&
(S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) {
err = ext4_get_encryption_info(dir);
if (err)
return ERR_PTR(err);
if (ext4_encryption_info(dir) == NULL)
return ERR_PTR(-EPERM);
if (!handle)
nblocks += EXT4_DATA_TRANS_BLOCKS(dir->i_sb);
encrypt = 1;
}
sb = dir->i_sb;
ngroups = ext4_get_groups_count(sb);
trace_ext4_request_inode(dir, mode);
......@@ -996,12 +1010,6 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
ei->i_block_group = group;
ei->i_last_alloc_group = ~0;
/* If the directory encrypted, then we should encrypt the inode. */
if ((S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode)) &&
(ext4_encrypted_inode(dir) ||
DUMMY_ENCRYPTION_ENABLED(sbi)))
ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
ext4_set_inode_flags(inode);
if (IS_DIRSYNC(inode))
ext4_handle_sync(handle);
......@@ -1034,28 +1042,9 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
ext4_set_inode_state(inode, EXT4_STATE_NEW);
ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize;
#ifdef CONFIG_EXT4_FS_ENCRYPTION
if ((sbi->s_file_encryption_mode == EXT4_ENCRYPTION_MODE_INVALID) &&
(sbi->s_dir_encryption_mode == EXT4_ENCRYPTION_MODE_INVALID)) {
ei->i_inline_off = 0;
if (EXT4_HAS_INCOMPAT_FEATURE(sb,
EXT4_FEATURE_INCOMPAT_INLINE_DATA))
ext4_set_inode_state(inode,
EXT4_STATE_MAY_INLINE_DATA);
} else {
/* Inline data and encryption are incompatible
* We turn off inline data since encryption is enabled */
ei->i_inline_off = 1;
if (EXT4_HAS_INCOMPAT_FEATURE(sb,
EXT4_FEATURE_INCOMPAT_INLINE_DATA))
ext4_clear_inode_state(inode,
EXT4_STATE_MAY_INLINE_DATA);
}
#else
ei->i_inline_off = 0;
if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_INLINE_DATA))
ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
#endif
ret = inode;
err = dquot_alloc_inode(inode);
if (err)
......@@ -1082,6 +1071,12 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
ei->i_datasync_tid = handle->h_transaction->t_tid;
}
if (encrypt) {
err = ext4_inherit_context(dir, inode);
if (err)
goto fail_free_drop;
}
err = ext4_mark_inode_dirty(handle, inode);
if (err) {
ext4_std_error(sb, err);
......
......@@ -565,7 +565,7 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
EXT4_ERROR_INODE(inode, "Can't allocate blocks for "
"non-extent mapped inodes with bigalloc");
return -ENOSPC;
return -EUCLEAN;
}
/* Set up for the direct block allocation */
......@@ -576,6 +576,8 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
ar.flags = EXT4_MB_HINT_DATA;
if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
ar.flags |= EXT4_MB_DELALLOC_RESERVED;
if (flags & EXT4_GET_BLOCKS_METADATA_NOFAIL)
ar.flags |= EXT4_MB_USE_RESERVED;
ar.goal = ext4_find_goal(inode, map->m_lblk, partial);
......
......@@ -995,20 +995,18 @@ void ext4_show_inline_dir(struct inode *dir, struct buffer_head *bh,
* and -EEXIST if directory entry already exists.
*/
static int ext4_add_dirent_to_inline(handle_t *handle,
struct ext4_filename *fname,
struct dentry *dentry,
struct inode *inode,
struct ext4_iloc *iloc,
void *inline_start, int inline_size)
{
struct inode *dir = d_inode(dentry->d_parent);
const char *name = dentry->d_name.name;
int namelen = dentry->d_name.len;
int err;
struct ext4_dir_entry_2 *de;
err = ext4_find_dest_de(dir, inode, iloc->bh,
inline_start, inline_size,
name, namelen, &de);
err = ext4_find_dest_de(dir, inode, iloc->bh, inline_start,
inline_size, fname, &de);
if (err)
return err;
......@@ -1016,8 +1014,7 @@ static int ext4_add_dirent_to_inline(handle_t *handle,
err = ext4_journal_get_write_access(handle, iloc->bh);
if (err)
return err;
ext4_insert_dentry(dir, inode, de, inline_size, &dentry->d_name,
name, namelen);
ext4_insert_dentry(dir, inode, de, inline_size, fname);
ext4_show_inline_dir(dir, iloc->bh, inline_start, inline_size);
......@@ -1248,8 +1245,8 @@ static int ext4_convert_inline_data_nolock(handle_t *handle,
* If succeeds, return 0. If not, extended the inline dir and copied data to
* the new created block.
*/
int ext4_try_add_inline_entry(handle_t *handle, struct dentry *dentry,
struct inode *inode)
int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname,
struct dentry *dentry, struct inode *inode)
{
int ret, inline_size;
void *inline_start;
......@@ -1268,7 +1265,7 @@ int ext4_try_add_inline_entry(handle_t *handle, struct dentry *dentry,
EXT4_INLINE_DOTDOT_SIZE;
inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE;
ret = ext4_add_dirent_to_inline(handle, dentry, inode, &iloc,
ret = ext4_add_dirent_to_inline(handle, fname, dentry, inode, &iloc,
inline_start, inline_size);
if (ret != -ENOSPC)
goto out;
......@@ -1289,8 +1286,9 @@ int ext4_try_add_inline_entry(handle_t *handle, struct dentry *dentry,
if (inline_size) {
inline_start = ext4_get_inline_xattr_pos(dir, &iloc);
ret = ext4_add_dirent_to_inline(handle, dentry, inode, &iloc,
inline_start, inline_size);
ret = ext4_add_dirent_to_inline(handle, fname, dentry,
inode, &iloc, inline_start,
inline_size);
if (ret != -ENOSPC)
goto out;
......@@ -1611,6 +1609,7 @@ int ext4_try_create_inline_dir(handle_t *handle, struct inode *parent,
}
struct buffer_head *ext4_find_inline_entry(struct inode *dir,
struct ext4_filename *fname,
const struct qstr *d_name,
struct ext4_dir_entry_2 **res_dir,
int *has_inline_data)
......@@ -1632,8 +1631,8 @@ struct buffer_head *ext4_find_inline_entry(struct inode *dir,
inline_start = (void *)ext4_raw_inode(&iloc)->i_block +
EXT4_INLINE_DOTDOT_SIZE;
inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE;
ret = search_dir(iloc.bh, inline_start, inline_size,
dir, d_name, 0, res_dir);
ret = ext4_search_dir(iloc.bh, inline_start, inline_size,
dir, fname, d_name, 0, res_dir);
if (ret == 1)
goto out_find;
if (ret < 0)
......@@ -1645,8 +1644,8 @@ struct buffer_head *ext4_find_inline_entry(struct inode *dir,
inline_start = ext4_get_inline_xattr_pos(dir, &iloc);
inline_size = ext4_get_inline_size(dir) - EXT4_MIN_INLINE_DATA_SIZE;
ret = search_dir(iloc.bh, inline_start, inline_size,
dir, d_name, 0, res_dir);
ret = ext4_search_dir(iloc.bh, inline_start, inline_size,
dir, fname, d_name, 0, res_dir);
if (ret == 1)
goto out_find;
......
......@@ -731,18 +731,18 @@ int ext4_get_block(struct inode *inode, sector_t iblock,
* `handle' can be NULL if create is zero
*/
struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
ext4_lblk_t block, int create)
ext4_lblk_t block, int map_flags)
{
struct ext4_map_blocks map;
struct buffer_head *bh;
int create = map_flags & EXT4_GET_BLOCKS_CREATE;
int err;
J_ASSERT(handle != NULL || create == 0);
map.m_lblk = block;
map.m_len = 1;
err = ext4_map_blocks(handle, inode, &map,
create ? EXT4_GET_BLOCKS_CREATE : 0);
err = ext4_map_blocks(handle, inode, &map, map_flags);
if (err == 0)
return create ? ERR_PTR(-ENOSPC) : NULL;
......@@ -788,11 +788,11 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
}
struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
ext4_lblk_t block, int create)
ext4_lblk_t block, int map_flags)
{
struct buffer_head *bh;
bh = ext4_getblk(handle, inode, block, create);
bh = ext4_getblk(handle, inode, block, map_flags);
if (IS_ERR(bh))
return bh;
if (!bh || buffer_uptodate(bh))
......@@ -1261,13 +1261,12 @@ static int ext4_journalled_write_end(struct file *file,
}
/*
* Reserve a single cluster located at lblock
* Reserve space for a single cluster
*/
static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
static int ext4_da_reserve_space(struct inode *inode)
{
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
struct ext4_inode_info *ei = EXT4_I(inode);
unsigned int md_needed;
int ret;
/*
......@@ -1279,25 +1278,14 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
if (ret)
return ret;
/*
* recalculate the amount of metadata blocks to reserve
* in order to allocate nrblocks
* worse case is one extent per block
*/
spin_lock(&ei->i_block_reservation_lock);
/*
* ext4_calc_metadata_amount() has side effects, which we have
* to be prepared undo if we fail to claim space.
*/
md_needed = 0;
trace_ext4_da_reserve_space(inode, 0);
if (ext4_claim_free_clusters(sbi, 1, 0)) {
spin_unlock(&ei->i_block_reservation_lock);
dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1));
return -ENOSPC;
}
ei->i_reserved_data_blocks++;
trace_ext4_da_reserve_space(inode);
spin_unlock(&ei->i_block_reservation_lock);
return 0; /* success */
......@@ -1566,9 +1554,9 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
* then we don't need to reserve it again. However we still need
* to reserve metadata for every block we're going to write.
*/
if (EXT4_SB(inode->i_sb)->s_cluster_ratio <= 1 ||
if (EXT4_SB(inode->i_sb)->s_cluster_ratio == 1 ||
!ext4_find_delalloc_cluster(inode, map->m_lblk)) {
ret = ext4_da_reserve_space(inode, iblock);
ret = ext4_da_reserve_space(inode);
if (ret) {
/* not enough space to reserve */
retval = ret;
......@@ -1701,19 +1689,32 @@ static int __ext4_journalled_writepage(struct page *page,
ext4_walk_page_buffers(handle, page_bufs, 0, len,
NULL, bget_one);
}
/* As soon as we unlock the page, it can go away, but we have
* references to buffers so we are safe */
/*
* We need to release the page lock before we start the
* journal, so grab a reference so the page won't disappear
* out from under us.
*/
get_page(page);
unlock_page(page);
handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE,
ext4_writepage_trans_blocks(inode));
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
goto out;
put_page(page);
goto out_no_pagelock;
}
BUG_ON(!ext4_handle_valid(handle));
lock_page(page);
put_page(page);
if (page->mapping != mapping) {
/* The page got truncated from under us */
ext4_journal_stop(handle);
ret = 0;
goto out;
}
if (inline_data) {
BUFFER_TRACE(inode_bh, "get write access");
ret = ext4_journal_get_write_access(handle, inode_bh);
......@@ -1739,6 +1740,8 @@ static int __ext4_journalled_writepage(struct page *page,
NULL, bput_one);
ext4_set_inode_state(inode, EXT4_STATE_JDATA);
out:
unlock_page(page);
out_no_pagelock:
brelse(inode_bh);
return ret;
}
......@@ -4681,8 +4684,10 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
ext4_journal_stop(handle);
}
if (attr->ia_valid & ATTR_SIZE && attr->ia_size != inode->i_size) {
if (attr->ia_valid & ATTR_SIZE) {
handle_t *handle;
loff_t oldsize = inode->i_size;
int shrink = (attr->ia_size <= inode->i_size);
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
......@@ -4690,24 +4695,26 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
if (attr->ia_size > sbi->s_bitmap_maxbytes)
return -EFBIG;
}
if (!S_ISREG(inode->i_mode))
return -EINVAL;
if (IS_I_VERSION(inode) && attr->ia_size != inode->i_size)
inode_inc_iversion(inode);
if (S_ISREG(inode->i_mode) &&
if (ext4_should_order_data(inode) &&
(attr->ia_size < inode->i_size)) {
if (ext4_should_order_data(inode)) {
error = ext4_begin_ordered_truncate(inode,
attr->ia_size);
if (error)
goto err_out;
}
if (attr->ia_size != inode->i_size) {
handle = ext4_journal_start(inode, EXT4_HT_INODE, 3);
if (IS_ERR(handle)) {
error = PTR_ERR(handle);
goto err_out;
}
if (ext4_handle_valid(handle)) {
if (ext4_handle_valid(handle) && shrink) {
error = ext4_orphan_add(handle, inode);
orphan = 1;
}
......@@ -4726,15 +4733,13 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
up_write(&EXT4_I(inode)->i_data_sem);
ext4_journal_stop(handle);
if (error) {
if (orphan)
ext4_orphan_del(NULL, inode);
goto err_out;
}
} else {
loff_t oldsize = inode->i_size;
i_size_write(inode, attr->ia_size);
pagecache_isize_extended(inode, oldsize, inode->i_size);
}
if (!shrink)
pagecache_isize_extended(inode, oldsize, inode->i_size);
/*
* Blocks are going to be removed from the inode. Wait
......@@ -4754,13 +4759,9 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
* in data=journal mode to make pages freeable.
*/
truncate_pagecache(inode, inode->i_size);
}
/*
* We want to call ext4_truncate() even if attr->ia_size ==
* inode->i_size for cases like truncation of fallocated space
*/
if (attr->ia_valid & ATTR_SIZE)
if (shrink)
ext4_truncate(inode);
}
if (!rc) {
setattr_copy(inode, attr);
......
......@@ -31,14 +31,11 @@
static void memswap(void *a, void *b, size_t len)
{
unsigned char *ap, *bp;
unsigned char tmp;
ap = (unsigned char *)a;
bp = (unsigned char *)b;
while (len-- > 0) {
tmp = *ap;
*ap = *bp;
*bp = tmp;
swap(*ap, *bp);
ap++;
bp++;
}
......@@ -675,8 +672,8 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
if (err)
return err;
}
if (copy_to_user((void *) arg, sbi->s_es->s_encrypt_pw_salt,
16))
if (copy_to_user((void __user *) arg,
sbi->s_es->s_encrypt_pw_salt, 16))
return -EFAULT;
return 0;
}
......@@ -690,7 +687,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
err = ext4_get_policy(inode, &policy);
if (err)
return err;
if (copy_to_user((void *)arg, &policy, sizeof(policy)))
if (copy_to_user((void __user *)arg, &policy, sizeof(policy)))
return -EFAULT;
return 0;
#else
......
......@@ -882,10 +882,8 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
/* wait for I/O completion */
for (i = 0, group = first_group; i < groups_per_page; i++, group++) {
if (bh[i] && ext4_wait_block_bitmap(sb, group, bh[i])) {
if (bh[i] && ext4_wait_block_bitmap(sb, group, bh[i]))
err = -EIO;
goto out;
}
}
first_block = page->index * blocks_per_page;
......@@ -898,6 +896,11 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
/* skip initialized uptodate buddy */
continue;
if (!buffer_verified(bh[group - first_group]))
/* Skip faulty bitmaps */
continue;
err = 0;
/*
* data carry information regarding this
* particular group in the format specified
......@@ -2008,7 +2011,12 @@ void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
}
}
/* This is now called BEFORE we load the buddy bitmap. */
/*
* This is now called BEFORE we load the buddy bitmap.
* Returns either 1 or 0 indicating that the group is either suitable
* for the allocation or not. In addition it can also return negative
* error code when something goes wrong.
*/
static int ext4_mb_good_group(struct ext4_allocation_context *ac,
ext4_group_t group, int cr)
{
......@@ -2031,7 +2039,7 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
int ret = ext4_mb_init_group(ac->ac_sb, group);
if (ret)
return 0;
return ret;
}
fragments = grp->bb_fragments;
......@@ -2078,7 +2086,7 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
{
ext4_group_t ngroups, group, i;
int cr;
int err = 0;
int err = 0, first_err = 0;
struct ext4_sb_info *sbi;
struct super_block *sb;
struct ext4_buddy e4b;
......@@ -2145,6 +2153,7 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
group = ac->ac_g_ex.fe_group;
for (i = 0; i < ngroups; group++, i++) {
int ret = 0;
cond_resched();
/*
* Artificially restricted ngroups for non-extent
......@@ -2154,8 +2163,12 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
group = 0;
/* This now checks without needing the buddy page */
if (!ext4_mb_good_group(ac, group, cr))
ret = ext4_mb_good_group(ac, group, cr);
if (ret <= 0) {
if (!first_err)
first_err = ret;
continue;
}
err = ext4_mb_load_buddy(sb, group, &e4b);
if (err)
......@@ -2167,9 +2180,12 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
* We need to check again after locking the
* block group
*/
if (!ext4_mb_good_group(ac, group, cr)) {
ret = ext4_mb_good_group(ac, group, cr);
if (ret <= 0) {
ext4_unlock_group(sb, group);
ext4_mb_unload_buddy(&e4b);
if (!first_err)
first_err = ret;
continue;
}
......@@ -2216,6 +2232,8 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
}
}
out:
if (!err && ac->ac_status != AC_STATUS_FOUND && first_err)
err = first_err;
return err;
}
......@@ -2257,12 +2275,9 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
group--;
if (group == 0)
seq_printf(seq, "#%-5s: %-5s %-5s %-5s "
"[ %-5s %-5s %-5s %-5s %-5s %-5s %-5s "
"%-5s %-5s %-5s %-5s %-5s %-5s %-5s ]\n",
"group", "free", "frags", "first",
"2^0", "2^1", "2^2", "2^3", "2^4", "2^5", "2^6",
"2^7", "2^8", "2^9", "2^10", "2^11", "2^12", "2^13");
seq_puts(seq, "#group: free frags first ["
" 2^0 2^1 2^2 2^3 2^4 2^5 2^6 "
" 2^7 2^8 2^9 2^10 2^11 2^12 2^13 ]");
i = (sb->s_blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) +
sizeof(struct ext4_group_info);
......
......@@ -166,12 +166,9 @@ mext_page_double_lock(struct inode *inode1, struct inode *inode2,
*/
wait_on_page_writeback(page[0]);
wait_on_page_writeback(page[1]);
if (inode1 > inode2) {
struct page *tmp;
tmp = page[0];
page[0] = page[1];
page[1] = tmp;
}
if (inode1 > inode2)
swap(page[0], page[1]);
return 0;
}
......@@ -574,12 +571,16 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
orig_inode->i_ino, donor_inode->i_ino);
return -EINVAL;
}
/* TODO: This is non obvious task to swap blocks for inodes with full
jornaling enabled */
/* TODO: it's not obvious how to swap blocks for inodes with full
journaling enabled */
if (ext4_should_journal_data(orig_inode) ||
ext4_should_journal_data(donor_inode)) {
return -EINVAL;
ext4_msg(orig_inode->i_sb, KERN_ERR,
"Online defrag not supported with data journaling");
return -EOPNOTSUPP;
}
/* Protect orig and donor inodes against a truncate */
lock_two_nondirectories(orig_inode, donor_inode);
......
This diff is collapsed.
......@@ -84,7 +84,7 @@ static void ext4_finish_bio(struct bio *bio)
/* The bounce data pages are unmapped. */
data_page = page;
ctx = (struct ext4_crypto_ctx *)page_private(data_page);
page = ctx->control_page;
page = ctx->w.control_page;
}
#endif
......
......@@ -54,8 +54,8 @@ static void completion_pages(struct work_struct *work)
{
#ifdef CONFIG_EXT4_FS_ENCRYPTION
struct ext4_crypto_ctx *ctx =
container_of(work, struct ext4_crypto_ctx, work);
struct bio *bio = ctx->bio;
container_of(work, struct ext4_crypto_ctx, r.work);
struct bio *bio = ctx->r.bio;
struct bio_vec *bv;
int i;
......@@ -109,9 +109,9 @@ static void mpage_end_io(struct bio *bio, int err)
if (err) {
ext4_release_crypto_ctx(ctx);
} else {
INIT_WORK(&ctx->work, completion_pages);
ctx->bio = bio;
queue_work(ext4_read_workqueue, &ctx->work);
INIT_WORK(&ctx->r.work, completion_pages);
ctx->r.bio = bio;
queue_work(ext4_read_workqueue, &ctx->r.work);
return;
}
}
......
......@@ -591,14 +591,17 @@ void __ext4_msg(struct super_block *sb,
va_end(args);
}
#define ext4_warning_ratelimit(sb) \
___ratelimit(&(EXT4_SB(sb)->s_warning_ratelimit_state), \
"EXT4-fs warning")
void __ext4_warning(struct super_block *sb, const char *function,
unsigned int line, const char *fmt, ...)
{
struct va_format vaf;
va_list args;
if (!___ratelimit(&(EXT4_SB(sb)->s_warning_ratelimit_state),
"EXT4-fs warning"))
if (!ext4_warning_ratelimit(sb))
return;
va_start(args, fmt);
......@@ -609,6 +612,24 @@ void __ext4_warning(struct super_block *sb, const char *function,
va_end(args);
}
void __ext4_warning_inode(const struct inode *inode, const char *function,
unsigned int line, const char *fmt, ...)
{
struct va_format vaf;
va_list args;
if (!ext4_warning_ratelimit(inode->i_sb))
return;
va_start(args, fmt);
vaf.fmt = fmt;
vaf.va = &args;
printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: "
"inode #%lu: comm %s: %pV\n", inode->i_sb->s_id,
function, line, inode->i_ino, current->comm, &vaf);
va_end(args);
}
void __ext4_grp_locked_error(const char *function, unsigned int line,
struct super_block *sb, ext4_group_t grp,
unsigned long ino, ext4_fsblk_t block,
......@@ -807,6 +828,7 @@ static void ext4_put_super(struct super_block *sb)
dump_orphan_list(sb, sbi);
J_ASSERT(list_empty(&sbi->s_orphan));
sync_blockdev(sb->s_bdev);
invalidate_bdev(sb->s_bdev);
if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) {
/*
......@@ -879,9 +901,8 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
atomic_set(&ei->i_unwritten, 0);
INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work);
#ifdef CONFIG_EXT4_FS_ENCRYPTION
ei->i_encryption_key.mode = EXT4_ENCRYPTION_MODE_INVALID;
ei->i_crypt_info = NULL;
#endif
return &ei->vfs_inode;
}
......@@ -958,6 +979,10 @@ void ext4_clear_inode(struct inode *inode)
jbd2_free_inode(EXT4_I(inode)->jinode);
EXT4_I(inode)->jinode = NULL;
}
#ifdef CONFIG_EXT4_FS_ENCRYPTION
if (EXT4_I(inode)->i_crypt_info)
ext4_free_encryption_info(inode, EXT4_I(inode)->i_crypt_info);
#endif
}
static struct inode *ext4_nfs_get_inode(struct super_block *sb,
......@@ -3449,11 +3474,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
if (sb->s_bdev->bd_part)
sbi->s_sectors_written_start =
part_stat_read(sb->s_bdev->bd_part, sectors[1]);
#ifdef CONFIG_EXT4_FS_ENCRYPTION
/* Modes of operations for file and directory encryption. */
sbi->s_file_encryption_mode = EXT4_ENCRYPTION_MODE_AES_256_XTS;
sbi->s_dir_encryption_mode = EXT4_ENCRYPTION_MODE_INVALID;
#endif
/* Cleanup superblock name */
for (cp = sb->s_id; (cp = strchr(cp, '/'));)
......@@ -4067,7 +4087,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
}
}
if (unlikely(sbi->s_mount_flags & EXT4_MF_TEST_DUMMY_ENCRYPTION) &&
if ((DUMMY_ENCRYPTION_ENABLED(sbi) ||
EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_ENCRYPT)) &&
(blocksize != PAGE_CACHE_SIZE)) {
ext4_msg(sb, KERN_ERR,
"Unsupported blocksize for fs encryption");
goto failed_mount_wq;
}
if (DUMMY_ENCRYPTION_ENABLED(sbi) &&
!(sb->s_flags & MS_RDONLY) &&
!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_ENCRYPT)) {
EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_ENCRYPT);
......@@ -4943,6 +4971,9 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
}
if (*flags & MS_LAZYTIME)
sb->s_flags |= MS_LAZYTIME;
if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) {
if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) {
err = -EROFS;
......@@ -5410,6 +5441,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
struct inode *inode = sb_dqopt(sb)->files[type];
ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
int err, offset = off & (sb->s_blocksize - 1);
int retries = 0;
struct buffer_head *bh;
handle_t *handle = journal_current_handle();
......@@ -5430,7 +5462,12 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
return -EIO;
}
bh = ext4_bread(handle, inode, blk, 1);
do {
bh = ext4_bread(handle, inode, blk,
EXT4_GET_BLOCKS_CREATE |
EXT4_GET_BLOCKS_METADATA_NOFAIL);
} while (IS_ERR(bh) && (PTR_ERR(bh) == -ENOSPC) &&
ext4_should_retry_alloc(inode->i_sb, &retries));
if (IS_ERR(bh))
return PTR_ERR(bh);
if (!bh)
......@@ -5647,6 +5684,7 @@ static int __init ext4_init_fs(void)
static void __exit ext4_exit_fs(void)
{
ext4_exit_crypto();
ext4_destroy_lazyinit_thread();
unregister_as_ext2();
unregister_as_ext3();
......
......@@ -23,31 +23,28 @@
#include "xattr.h"
#ifdef CONFIG_EXT4_FS_ENCRYPTION
static const char *ext4_follow_link(struct dentry *dentry, void **cookie)
static const char *ext4_encrypted_follow_link(struct dentry *dentry, void **cookie)
{
struct page *cpage = NULL;
char *caddr, *paddr = NULL;
struct ext4_str cstr, pstr;
struct inode *inode = d_inode(dentry);
struct ext4_fname_crypto_ctx *ctx = NULL;
struct ext4_encrypted_symlink_data *sd;
loff_t size = min_t(loff_t, i_size_read(inode), PAGE_SIZE - 1);
int res;
u32 plen, max_size = inode->i_sb->s_blocksize;
ctx = ext4_get_fname_crypto_ctx(inode, inode->i_sb->s_blocksize);
if (IS_ERR(ctx))
return ERR_CAST(ctx);
res = ext4_get_encryption_info(inode);
if (res)
return ERR_PTR(res);
if (ext4_inode_is_fast_symlink(inode)) {
caddr = (char *) EXT4_I(inode)->i_data;
max_size = sizeof(EXT4_I(inode)->i_data);
} else {
cpage = read_mapping_page(inode->i_mapping, 0, NULL);
if (IS_ERR(cpage)) {
ext4_put_fname_crypto_ctx(&ctx);
if (IS_ERR(cpage))
return ERR_CAST(cpage);
}
caddr = kmap(cpage);
caddr[size] = 0;
}
......@@ -71,20 +68,19 @@ static const char *ext4_follow_link(struct dentry *dentry, void **cookie)
goto errout;
}
pstr.name = paddr;
res = _ext4_fname_disk_to_usr(ctx, NULL, &cstr, &pstr);
pstr.len = plen;
res = _ext4_fname_disk_to_usr(inode, NULL, &cstr, &pstr);
if (res < 0)
goto errout;
/* Null-terminate the name */
if (res <= plen)
paddr[res] = '\0';
ext4_put_fname_crypto_ctx(&ctx);
if (cpage) {
kunmap(cpage);
page_cache_release(cpage);
}
return *cookie = paddr;
errout:
ext4_put_fname_crypto_ctx(&ctx);
if (cpage) {
kunmap(cpage);
page_cache_release(cpage);
......@@ -95,7 +91,7 @@ static const char *ext4_follow_link(struct dentry *dentry, void **cookie)
const struct inode_operations ext4_encrypted_symlink_inode_operations = {
.readlink = generic_readlink,
.follow_link = ext4_follow_link,
.follow_link = ext4_encrypted_follow_link,
.put_link = kfree_put_link,
.setattr = ext4_setattr,
.setxattr = generic_setxattr,
......
......@@ -390,7 +390,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
unsigned long blocknr;
if (is_journal_aborted(journal))
return 1;
return -EIO;
if (!jbd2_journal_get_log_tail(journal, &first_tid, &blocknr))
return 1;
......@@ -405,10 +405,9 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
* jbd2_cleanup_journal_tail() doesn't get called all that often.
*/
if (journal->j_flags & JBD2_BARRIER)
blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
blkdev_issue_flush(journal->j_fs_dev, GFP_NOFS, NULL);
__jbd2_update_log_tail(journal, first_tid, blocknr);
return 0;
return __jbd2_update_log_tail(journal, first_tid, blocknr);
}
......
......@@ -371,16 +371,7 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
*/
J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in));
retry_alloc:
new_bh = alloc_buffer_head(GFP_NOFS);
if (!new_bh) {
/*
* Failure is not an option, but __GFP_NOFAIL is going
* away; so we retry ourselves here.
*/
congestion_wait(BLK_RW_ASYNC, HZ/50);
goto retry_alloc;
}
new_bh = alloc_buffer_head(GFP_NOFS|__GFP_NOFAIL);
/* keep subsequent assertions sane */
atomic_set(&new_bh->b_count, 1);
......@@ -885,9 +876,10 @@ int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid,
*
* Requires j_checkpoint_mutex
*/
void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
int __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
{
unsigned long freed;
int ret;
BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
......@@ -897,7 +889,10 @@ void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
* space and if we lose sb update during power failure we'd replay
* old transaction with possibly newly overwritten data.
*/
jbd2_journal_update_sb_log_tail(journal, tid, block, WRITE_FUA);
ret = jbd2_journal_update_sb_log_tail(journal, tid, block, WRITE_FUA);
if (ret)
goto out;
write_lock(&journal->j_state_lock);
freed = block - journal->j_tail;
if (block < journal->j_tail)
......@@ -913,6 +908,9 @@ void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
journal->j_tail_sequence = tid;
journal->j_tail = block;
write_unlock(&journal->j_state_lock);
out:
return ret;
}
/*
......@@ -1331,7 +1329,7 @@ static int journal_reset(journal_t *journal)
return jbd2_journal_start_thread(journal);
}
static void jbd2_write_superblock(journal_t *journal, int write_op)
static int jbd2_write_superblock(journal_t *journal, int write_op)
{
struct buffer_head *bh = journal->j_sb_buffer;
journal_superblock_t *sb = journal->j_superblock;
......@@ -1370,7 +1368,10 @@ static void jbd2_write_superblock(journal_t *journal, int write_op)
printk(KERN_ERR "JBD2: Error %d detected when updating "
"journal superblock for %s.\n", ret,
journal->j_devname);
jbd2_journal_abort(journal, ret);
}
return ret;
}
/**
......@@ -1383,10 +1384,11 @@ static void jbd2_write_superblock(journal_t *journal, int write_op)
* Update a journal's superblock information about log tail and write it to
* disk, waiting for the IO to complete.
*/
void jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
int jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
unsigned long tail_block, int write_op)
{
journal_superblock_t *sb = journal->j_superblock;
int ret;
BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
jbd_debug(1, "JBD2: updating superblock (start %lu, seq %u)\n",
......@@ -1395,13 +1397,18 @@ void jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
sb->s_sequence = cpu_to_be32(tail_tid);
sb->s_start = cpu_to_be32(tail_block);
jbd2_write_superblock(journal, write_op);
ret = jbd2_write_superblock(journal, write_op);
if (ret)
goto out;
/* Log is no longer empty */
write_lock(&journal->j_state_lock);
WARN_ON(!sb->s_sequence);
journal->j_flags &= ~JBD2_FLUSHED;
write_unlock(&journal->j_state_lock);
out:
return ret;
}
/**
......@@ -1950,7 +1957,14 @@ int jbd2_journal_flush(journal_t *journal)
return -EIO;
mutex_lock(&journal->j_checkpoint_mutex);
jbd2_cleanup_journal_tail(journal);
if (!err) {
err = jbd2_cleanup_journal_tail(journal);
if (err < 0) {
mutex_unlock(&journal->j_checkpoint_mutex);
goto out;
}
err = 0;
}
/* Finally, mark the journal as really needing no recovery.
* This sets s_start==0 in the underlying superblock, which is
......@@ -1966,7 +1980,8 @@ int jbd2_journal_flush(journal_t *journal)
J_ASSERT(journal->j_head == journal->j_tail);
J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence);
write_unlock(&journal->j_state_lock);
return 0;
out:
return err;
}
/**
......@@ -2330,7 +2345,7 @@ static int jbd2_journal_init_journal_head_cache(void)
jbd2_journal_head_cache = kmem_cache_create("jbd2_journal_head",
sizeof(struct journal_head),
0, /* offset */
SLAB_TEMPORARY, /* flags */
SLAB_TEMPORARY | SLAB_DESTROY_BY_RCU,
NULL); /* ctor */
retval = 0;
if (!jbd2_journal_head_cache) {
......@@ -2362,10 +2377,8 @@ static struct journal_head *journal_alloc_journal_head(void)
if (!ret) {
jbd_debug(1, "out of memory for journal_head\n");
pr_notice_ratelimited("ENOMEM in %s, retrying.\n", __func__);
while (!ret) {
yield();
ret = kmem_cache_zalloc(jbd2_journal_head_cache, GFP_NOFS);
}
ret = kmem_cache_zalloc(jbd2_journal_head_cache,
GFP_NOFS | __GFP_NOFAIL);
}
return ret;
}
......
......@@ -141,11 +141,13 @@ static int insert_revoke_hash(journal_t *journal, unsigned long long blocknr,
{
struct list_head *hash_list;
struct jbd2_revoke_record_s *record;
gfp_t gfp_mask = GFP_NOFS;
repeat:
record = kmem_cache_alloc(jbd2_revoke_record_cache, GFP_NOFS);
if (journal_oom_retry)
gfp_mask |= __GFP_NOFAIL;
record = kmem_cache_alloc(jbd2_revoke_record_cache, gfp_mask);
if (!record)
goto oom;
return -ENOMEM;
record->sequence = seq;
record->blocknr = blocknr;
......@@ -154,13 +156,6 @@ static int insert_revoke_hash(journal_t *journal, unsigned long long blocknr,
list_add(&record->hash, hash_list);
spin_unlock(&journal->j_revoke_lock);
return 0;
oom:
if (!journal_oom_retry)
return -ENOMEM;
jbd_debug(1, "ENOMEM in %s, retrying\n", __func__);
yield();
goto repeat;
}
/* Find a revoke record in the journal's hash table. */
......
This diff is collapsed.
......@@ -1035,7 +1035,7 @@ struct buffer_head *jbd2_journal_get_descriptor_buffer(journal_t *journal);
int jbd2_journal_next_log_block(journal_t *, unsigned long long *);
int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid,
unsigned long *block);
void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block);
int __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block);
void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block);
/* Commit management */
......@@ -1157,7 +1157,7 @@ extern int jbd2_journal_recover (journal_t *journal);
extern int jbd2_journal_wipe (journal_t *, int);
extern int jbd2_journal_skip_recovery (journal_t *);
extern void jbd2_journal_update_sb_errno(journal_t *);
extern void jbd2_journal_update_sb_log_tail (journal_t *, tid_t,
extern int jbd2_journal_update_sb_log_tail (journal_t *, tid_t,
unsigned long, int);
extern void __jbd2_journal_abort_hard (journal_t *);
extern void jbd2_journal_abort (journal_t *, int);
......
......@@ -1185,15 +1185,14 @@ TRACE_EVENT(ext4_da_update_reserve_space,
);
TRACE_EVENT(ext4_da_reserve_space,
TP_PROTO(struct inode *inode, int md_needed),
TP_PROTO(struct inode *inode),
TP_ARGS(inode, md_needed),
TP_ARGS(inode),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( __u64, i_blocks )
__field( int, md_needed )
__field( int, reserved_data_blocks )
__field( int, reserved_meta_blocks )
__field( __u16, mode )
......@@ -1203,18 +1202,17 @@ TRACE_EVENT(ext4_da_reserve_space,
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->i_blocks = inode->i_blocks;
__entry->md_needed = md_needed;
__entry->reserved_data_blocks = EXT4_I(inode)->i_reserved_data_blocks;
__entry->reserved_meta_blocks = EXT4_I(inode)->i_reserved_meta_blocks;
__entry->mode = inode->i_mode;
),
TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu md_needed %d "
TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu "
"reserved_data_blocks %d reserved_meta_blocks %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
__entry->mode, __entry->i_blocks,
__entry->md_needed, __entry->reserved_data_blocks,
__entry->reserved_data_blocks,
__entry->reserved_meta_blocks)
);
......@@ -2478,6 +2476,31 @@ TRACE_EVENT(ext4_collapse_range,
__entry->offset, __entry->len)
);
TRACE_EVENT(ext4_insert_range,
TP_PROTO(struct inode *inode, loff_t offset, loff_t len),
TP_ARGS(inode, offset, len),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(ino_t, ino)
__field(loff_t, offset)
__field(loff_t, len)
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->offset = offset;
__entry->len = len;
),
TP_printk("dev %d,%d ino %lu offset %lld len %lld",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
__entry->offset, __entry->len)
);
TRACE_EVENT(ext4_es_shrink,
TP_PROTO(struct super_block *sb, int nr_shrunk, u64 scan_time,
int nr_skipped, int retried),
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment