Commit 0732f877 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
  jbd2: clean up jbd2_journal_try_to_free_buffers()
  ext4: Don't update ctime for non-extent-mapped inodes
  ext4: Fix up whitespace issues in fs/ext4/inode.c
  ext4: Fix 64-bit block type problem on 32-bit platforms
  ext4: teach the inode allocator to use a goal inode number
  ext4: Use a hash of the topdir directory name for the Orlov parent group
  ext4: document the "abort" mount option
  ext4: move the abort flag from s_mount_opts to s_mount_flags
  ext4: update the s_last_mounted field in the superblock
  ext4: change s_mount_opt to be an unsigned int
  ext4: online defrag -- Add EXT4_IOC_MOVE_EXT ioctl
  ext4: avoid unnecessary spinlock in critical POSIX ACL path
  ext3: avoid unnecessary spinlock in critical POSIX ACL path
  ext4: convert instrumentation from markers to tracepoints
  jbd2: convert instrumentation from markers to tracepoints
parents 15fc204a 536fc240
......@@ -79,3 +79,13 @@ Description:
This file is read-only and shows the number of
kilobytes of data that have been written to this
filesystem since it was mounted.
What: /sys/fs/ext4/<disk>/inode_goal
Date: June 2008
Contact: "Theodore Ts'o" <tytso@mit.edu>
Description:
Tuning parameter which (if non-zero) controls the goal
inode used by the inode allocator in p0reference to
all other allocation hueristics. This is intended for
debugging use only, and should be 0 on production
systems.
......@@ -235,6 +235,10 @@ minixdf Make 'df' act like Minix.
debug Extra debugging information is sent to syslog.
abort Simulate the effects of calling ext4_abort() for
debugging purposes. This is normally used while
remounting a filesystem which is already mounted.
errors=remount-ro Remount the filesystem read-only on an error.
errors=continue Keep going on a filesystem error.
errors=panic Panic and halt the machine if an error occurs.
......
......@@ -6,7 +6,7 @@ obj-$(CONFIG_EXT4_FS) += ext4.o
ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
ext4_jbd2.o migrate.o mballoc.o block_validity.o
ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o
ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o
......
......@@ -352,6 +352,7 @@ struct ext4_new_group_data {
/* note ioctl 10 reserved for an early version of the FIEMAP ioctl */
/* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */
#define EXT4_IOC_ALLOC_DA_BLKS _IO('f', 12)
#define EXT4_IOC_MOVE_EXT _IOWR('f', 15, struct move_extent)
/*
* ioctl commands in 32 bit emulation
......@@ -447,6 +448,15 @@ struct ext4_inode {
__le32 i_version_hi; /* high 32 bits for 64-bit version */
};
struct move_extent {
__u32 reserved; /* should be zero */
__u32 donor_fd; /* donor file descriptor */
__u64 orig_start; /* logical start offset in block for orig */
__u64 donor_start; /* logical start offset in block for donor */
__u64 len; /* block length to be moved */
__u64 moved_len; /* moved block length */
};
#define MAX_DEFRAG_SIZE ((1UL<<31) - 1)
#define EXT4_EPOCH_BITS 2
#define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1)
......@@ -674,7 +684,6 @@ struct ext4_inode_info {
#define EXT4_MOUNT_ERRORS_PANIC 0x00040 /* Panic on errors */
#define EXT4_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */
#define EXT4_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/
#define EXT4_MOUNT_ABORT 0x00200 /* Fatal error detected */
#define EXT4_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */
#define EXT4_MOUNT_JOURNAL_DATA 0x00400 /* Write data to journal */
#define EXT4_MOUNT_ORDERED_DATA 0x00800 /* Flush data before commit */
......@@ -696,17 +705,10 @@ struct ext4_inode_info {
#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */
#define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */
/* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */
#ifndef _LINUX_EXT2_FS_H
#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt
#define set_opt(o, opt) o |= EXT4_MOUNT_##opt
#define test_opt(sb, opt) (EXT4_SB(sb)->s_mount_opt & \
EXT4_MOUNT_##opt)
#else
#define EXT2_MOUNT_NOLOAD EXT4_MOUNT_NOLOAD
#define EXT2_MOUNT_ABORT EXT4_MOUNT_ABORT
#define EXT2_MOUNT_DATA_FLAGS EXT4_MOUNT_DATA_FLAGS
#endif
#define ext4_set_bit ext2_set_bit
#define ext4_set_bit_atomic ext2_set_bit_atomic
......@@ -824,6 +826,13 @@ struct ext4_super_block {
};
#ifdef __KERNEL__
/*
* run-time mount flags
*/
#define EXT4_MF_MNTDIR_SAMPLED 0x0001
#define EXT4_MF_FS_ABORTED 0x0002 /* Fatal error detected */
/*
* fourth extended-fs super-block data in memory
*/
......@@ -842,7 +851,8 @@ struct ext4_sb_info {
struct buffer_head * s_sbh; /* Buffer containing the super block */
struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */
struct buffer_head **s_group_desc;
unsigned long s_mount_opt;
unsigned int s_mount_opt;
unsigned int s_mount_flags;
ext4_fsblk_t s_sb_block;
uid_t s_resuid;
gid_t s_resgid;
......@@ -853,6 +863,7 @@ struct ext4_sb_info {
int s_inode_size;
int s_first_ino;
unsigned int s_inode_readahead_blks;
unsigned int s_inode_goal;
spinlock_t s_next_gen_lock;
u32 s_next_generation;
u32 s_hash_seed[4];
......@@ -1305,7 +1316,8 @@ extern int ext4fs_dirhash(const char *name, int len, struct
dx_hash_info *hinfo);
/* ialloc.c */
extern struct inode * ext4_new_inode(handle_t *, struct inode *, int);
extern struct inode *ext4_new_inode(handle_t *, struct inode *, int,
const struct qstr *qstr, __u32 goal);
extern void ext4_free_inode(handle_t *, struct inode *);
extern struct inode * ext4_orphan_get(struct super_block *, unsigned long);
extern unsigned long ext4_count_free_inodes(struct super_block *);
......@@ -1329,7 +1341,7 @@ extern void ext4_discard_preallocations(struct inode *);
extern int __init init_ext4_mballoc(void);
extern void exit_ext4_mballoc(void);
extern void ext4_mb_free_blocks(handle_t *, struct inode *,
unsigned long, unsigned long, int, unsigned long *);
ext4_fsblk_t, unsigned long, int, unsigned long *);
extern int ext4_mb_add_groupinfo(struct super_block *sb,
ext4_group_t i, struct ext4_group_desc *desc);
extern void ext4_mb_update_group_info(struct ext4_group_info *grp,
......@@ -1647,6 +1659,11 @@ extern int ext4_get_blocks(handle_t *handle, struct inode *inode,
struct buffer_head *bh, int flags);
extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len);
/* move_extent.c */
extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
__u64 start_orig, __u64 start_donor,
__u64 len, __u64 *moved_len);
/*
* Add new method to test wether block and inode bitmaps are properly
......
......@@ -221,12 +221,16 @@ static inline int ext4_ext_get_actual_len(struct ext4_extent *ext)
}
extern int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks);
extern ext4_fsblk_t ext_pblock(struct ext4_extent *ex);
extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *);
extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t);
extern int ext4_extent_tree_init(handle_t *, struct inode *);
extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode,
int num,
struct ext4_ext_path *path);
extern int ext4_can_extents_be_merged(struct inode *inode,
struct ext4_extent *ex1,
struct ext4_extent *ex2);
extern int ext4_ext_try_to_merge(struct inode *inode,
struct ext4_ext_path *path,
struct ext4_extent *);
......
......@@ -49,7 +49,7 @@
* ext_pblock:
* combine low and high parts of physical block number into ext4_fsblk_t
*/
static ext4_fsblk_t ext_pblock(struct ext4_extent *ex)
ext4_fsblk_t ext_pblock(struct ext4_extent *ex)
{
ext4_fsblk_t block;
......@@ -1417,7 +1417,7 @@ static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode,
return err;
}
static int
int
ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
struct ext4_extent *ex2)
{
......
......@@ -21,6 +21,8 @@
#include <linux/time.h>
#include <linux/fs.h>
#include <linux/jbd2.h>
#include <linux/mount.h>
#include <linux/path.h>
#include "ext4.h"
#include "ext4_jbd2.h"
#include "xattr.h"
......@@ -145,6 +147,38 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
return 0;
}
static int ext4_file_open(struct inode * inode, struct file * filp)
{
struct super_block *sb = inode->i_sb;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
struct vfsmount *mnt = filp->f_path.mnt;
struct path path;
char buf[64], *cp;
if (unlikely(!(sbi->s_mount_flags & EXT4_MF_MNTDIR_SAMPLED) &&
!(sb->s_flags & MS_RDONLY))) {
sbi->s_mount_flags |= EXT4_MF_MNTDIR_SAMPLED;
/*
* Sample where the filesystem has been mounted and
* store it in the superblock for sysadmin convenience
* when trying to sort through large numbers of block
* devices or filesystem images.
*/
memset(buf, 0, sizeof(buf));
path.mnt = mnt->mnt_parent;
path.dentry = mnt->mnt_mountpoint;
path_get(&path);
cp = d_path(&path, buf, sizeof(buf));
path_put(&path);
if (!IS_ERR(cp)) {
memcpy(sbi->s_es->s_last_mounted, cp,
sizeof(sbi->s_es->s_last_mounted));
sb->s_dirt = 1;
}
}
return generic_file_open(inode, filp);
}
const struct file_operations ext4_file_operations = {
.llseek = generic_file_llseek,
.read = do_sync_read,
......@@ -156,7 +190,7 @@ const struct file_operations ext4_file_operations = {
.compat_ioctl = ext4_compat_ioctl,
#endif
.mmap = ext4_file_mmap,
.open = generic_file_open,
.open = ext4_file_open,
.release = ext4_release_file,
.fsync = ext4_sync_file,
.splice_read = generic_file_splice_read,
......
......@@ -28,10 +28,12 @@
#include <linux/writeback.h>
#include <linux/jbd2.h>
#include <linux/blkdev.h>
#include <linux/marker.h>
#include "ext4.h"
#include "ext4_jbd2.h"
#include <trace/events/ext4.h>
/*
* akpm: A new design for ext4_sync_file().
*
......@@ -52,9 +54,7 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
J_ASSERT(ext4_journal_current_handle() == NULL);
trace_mark(ext4_sync_file, "dev %s datasync %d ino %ld parent %ld",
inode->i_sb->s_id, datasync, inode->i_ino,
dentry->d_parent->d_inode->i_ino);
trace_ext4_sync_file(file, dentry, datasync);
/*
* data=writeback:
......
......@@ -23,11 +23,14 @@
#include <linux/bitops.h>
#include <linux/blkdev.h>
#include <asm/byteorder.h>
#include "ext4.h"
#include "ext4_jbd2.h"
#include "xattr.h"
#include "acl.h"
#include <trace/events/ext4.h>
/*
* ialloc.c contains the inodes allocation and deallocation routines
*/
......@@ -208,11 +211,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
ino = inode->i_ino;
ext4_debug("freeing inode %lu\n", ino);
trace_mark(ext4_free_inode,
"dev %s ino %lu mode %d uid %lu gid %lu bocks %llu",
sb->s_id, inode->i_ino, inode->i_mode,
(unsigned long) inode->i_uid, (unsigned long) inode->i_gid,
(unsigned long long) inode->i_blocks);
trace_ext4_free_inode(inode);
/*
* Note: we must free any quota before locking the superblock,
......@@ -471,7 +470,8 @@ void get_orlov_stats(struct super_block *sb, ext4_group_t g,
*/
static int find_group_orlov(struct super_block *sb, struct inode *parent,
ext4_group_t *group, int mode)
ext4_group_t *group, int mode,
const struct qstr *qstr)
{
ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
struct ext4_sb_info *sbi = EXT4_SB(sb);
......@@ -486,6 +486,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
struct ext4_group_desc *desc;
struct orlov_stats stats;
int flex_size = ext4_flex_bg_size(sbi);
struct dx_hash_info hinfo;
ngroups = real_ngroups;
if (flex_size > 1) {
......@@ -507,7 +508,13 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
int best_ndir = inodes_per_group;
int ret = -1;
get_random_bytes(&grp, sizeof(grp));
if (qstr) {
hinfo.hash_version = DX_HASH_HALF_MD4;
hinfo.seed = sbi->s_hash_seed;
ext4fs_dirhash(qstr->name, qstr->len, &hinfo);
grp = hinfo.hash;
} else
get_random_bytes(&grp, sizeof(grp));
parent_group = (unsigned)grp % ngroups;
for (i = 0; i < ngroups; i++) {
g = (parent_group + i) % ngroups;
......@@ -650,7 +657,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
*group = parent_group + flex_size;
if (*group > ngroups)
*group = 0;
return find_group_orlov(sb, parent, group, mode);
return find_group_orlov(sb, parent, group, mode, 0);
}
/*
......@@ -791,7 +798,8 @@ static int ext4_claim_inode(struct super_block *sb,
* For other inodes, search forward from the parent directory's block
* group to find a free inode.
*/
struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode,
const struct qstr *qstr, __u32 goal)
{
struct super_block *sb;
struct buffer_head *inode_bitmap_bh = NULL;
......@@ -815,14 +823,23 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
sb = dir->i_sb;
ngroups = ext4_get_groups_count(sb);
trace_mark(ext4_request_inode, "dev %s dir %lu mode %d", sb->s_id,
dir->i_ino, mode);
trace_ext4_request_inode(dir, mode);
inode = new_inode(sb);
if (!inode)
return ERR_PTR(-ENOMEM);
ei = EXT4_I(inode);
sbi = EXT4_SB(sb);
if (!goal)
goal = sbi->s_inode_goal;
if (goal && goal < le32_to_cpu(sbi->s_es->s_inodes_count)) {
group = (goal - 1) / EXT4_INODES_PER_GROUP(sb);
ino = (goal - 1) % EXT4_INODES_PER_GROUP(sb);
ret2 = 0;
goto got_group;
}
if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) {
ret2 = find_group_flex(sb, dir, &group);
if (ret2 == -1) {
......@@ -841,7 +858,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
if (test_opt(sb, OLDALLOC))
ret2 = find_group_dir(sb, dir, &group);
else
ret2 = find_group_orlov(sb, dir, &group, mode);
ret2 = find_group_orlov(sb, dir, &group, mode, qstr);
} else
ret2 = find_group_other(sb, dir, &group, mode);
......@@ -851,7 +868,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
if (ret2 == -1)
goto out;
for (i = 0; i < ngroups; i++) {
for (i = 0; i < ngroups; i++, ino = 0) {
err = -EIO;
gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
......@@ -863,8 +880,6 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
if (!inode_bitmap_bh)
goto fail;
ino = 0;
repeat_in_this_group:
ino = ext4_find_next_zero_bit((unsigned long *)
inode_bitmap_bh->b_data,
......@@ -1047,8 +1062,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
}
ext4_debug("allocating inode %lu\n", inode->i_ino);
trace_mark(ext4_allocate_inode, "dev %s ino %lu dir %lu mode %d",
sb->s_id, inode->i_ino, dir->i_ino, mode);
trace_ext4_allocate_inode(inode, dir, mode);
goto really_out;
fail:
ext4_std_error(sb, err);
......
......@@ -37,11 +37,14 @@
#include <linux/namei.h>
#include <linux/uio.h>
#include <linux/bio.h>
#include "ext4_jbd2.h"
#include "xattr.h"
#include "acl.h"
#include "ext4_extents.h"
#include <trace/events/ext4.h>
#define MPAGE_DA_EXTENT_TAIL 0x01
static inline int ext4_begin_ordered_truncate(struct inode *inode,
......@@ -78,7 +81,7 @@ static int ext4_inode_is_fast_symlink(struct inode *inode)
* If the handle isn't valid we're not journaling so there's nothing to do.
*/
int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
struct buffer_head *bh, ext4_fsblk_t blocknr)
struct buffer_head *bh, ext4_fsblk_t blocknr)
{
int err;
......@@ -90,7 +93,7 @@ int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
BUFFER_TRACE(bh, "enter");
jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, "
"data mode %lx\n",
"data mode %x\n",
bh, is_metadata, inode->i_mode,
test_opt(inode->i_sb, DATA_FLAGS));
......@@ -329,8 +332,8 @@ static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v)
*/
static int ext4_block_to_path(struct inode *inode,
ext4_lblk_t i_block,
ext4_lblk_t offsets[4], int *boundary)
ext4_lblk_t i_block,
ext4_lblk_t offsets[4], int *boundary)
{
int ptrs = EXT4_ADDR_PER_BLOCK(inode->i_sb);
int ptrs_bits = EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb);
......@@ -362,9 +365,9 @@ static int ext4_block_to_path(struct inode *inode,
final = ptrs;
} else {
ext4_warning(inode->i_sb, "ext4_block_to_path",
"block %lu > max in inode %lu",
i_block + direct_blocks +
indirect_blocks + double_blocks, inode->i_ino);
"block %lu > max in inode %lu",
i_block + direct_blocks +
indirect_blocks + double_blocks, inode->i_ino);
}
if (boundary)
*boundary = final - 1 - (i_block & (ptrs - 1));
......@@ -379,25 +382,25 @@ static int __ext4_check_blockref(const char *function, struct inode *inode,
while (bref < p+max) {
blk = le32_to_cpu(*bref++);
if (blk &&
unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb),
if (blk &&
unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb),
blk, 1))) {
ext4_error(inode->i_sb, function,
"invalid block reference %u "
"in inode #%lu", blk, inode->i_ino);
return -EIO;
}
}
return 0;
return -EIO;
}
}
return 0;
}
#define ext4_check_indirect_blockref(inode, bh) \
__ext4_check_blockref(__func__, inode, (__le32 *)(bh)->b_data, \
__ext4_check_blockref(__func__, inode, (__le32 *)(bh)->b_data, \
EXT4_ADDR_PER_BLOCK((inode)->i_sb))
#define ext4_check_inode_blockref(inode) \
__ext4_check_blockref(__func__, inode, EXT4_I(inode)->i_data, \
__ext4_check_blockref(__func__, inode, EXT4_I(inode)->i_data, \
EXT4_NDIR_BLOCKS)
/**
......@@ -447,7 +450,7 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth,
bh = sb_getblk(sb, le32_to_cpu(p->key));
if (unlikely(!bh))
goto failure;
if (!bh_uptodate_or_lock(bh)) {
if (bh_submit_read(bh) < 0) {
put_bh(bh);
......@@ -459,7 +462,7 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth,
goto failure;
}
}
add_chain(++p, bh, (__le32 *)bh->b_data + *++offsets);
/* Reader: end */
if (!p->key)
......@@ -552,7 +555,7 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind)
* returns it.
*/
static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block,
Indirect *partial)
Indirect *partial)
{
/*
* XXX need to get goal block from mballoc's data structures
......@@ -574,7 +577,7 @@ static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block,
* direct and indirect blocks.
*/
static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks,
int blocks_to_boundary)
int blocks_to_boundary)
{
unsigned int count = 0;
......@@ -610,9 +613,9 @@ static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks,
* direct blocks
*/
static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
ext4_lblk_t iblock, ext4_fsblk_t goal,
int indirect_blks, int blks,
ext4_fsblk_t new_blocks[4], int *err)
ext4_lblk_t iblock, ext4_fsblk_t goal,
int indirect_blks, int blks,
ext4_fsblk_t new_blocks[4], int *err)
{
struct ext4_allocation_request ar;
int target, i;
......@@ -683,10 +686,10 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
}
if (!*err) {
if (target == blks) {
/*
* save the new block number
* for the first direct block
*/
/*
* save the new block number
* for the first direct block
*/
new_blocks[index] = current_block;
}
blk_allocated += ar.len;
......@@ -728,9 +731,9 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
* as described above and return 0.
*/
static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
ext4_lblk_t iblock, int indirect_blks,
int *blks, ext4_fsblk_t goal,
ext4_lblk_t *offsets, Indirect *branch)
ext4_lblk_t iblock, int indirect_blks,
int *blks, ext4_fsblk_t goal,
ext4_lblk_t *offsets, Indirect *branch)
{
int blocksize = inode->i_sb->s_blocksize;
int i, n = 0;
......@@ -777,7 +780,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
* the chain to point to the new allocated
* data blocks numbers
*/
for (i=1; i < num; i++)
for (i = 1; i < num; i++)
*(branch[n].p + i) = cpu_to_le32(++current_block);
}
BUFFER_TRACE(bh, "marking uptodate");
......@@ -820,7 +823,8 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
* chain to new block and return 0.
*/
static int ext4_splice_branch(handle_t *handle, struct inode *inode,
ext4_lblk_t block, Indirect *where, int num, int blks)
ext4_lblk_t block, Indirect *where, int num,
int blks)
{
int i;
int err = 0;
......@@ -852,10 +856,6 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
}
/* We are done with atomic stuff, now do the rest of housekeeping */
inode->i_ctime = ext4_current_time(inode);
ext4_mark_inode_dirty(handle, inode);
/* had we spliced it onto indirect block? */
if (where->bh) {
/*
......@@ -874,8 +874,8 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
} else {
/*
* OK, we spliced it into the inode itself on a direct block.
* Inode was dirtied above.
*/
ext4_mark_inode_dirty(handle, inode);
jbd_debug(5, "splicing direct\n");
}
return err;
......@@ -921,9 +921,9 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
* blocks.
*/
static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
ext4_lblk_t iblock, unsigned int maxblocks,
struct buffer_head *bh_result,
int flags)
ext4_lblk_t iblock, unsigned int maxblocks,
struct buffer_head *bh_result,
int flags)
{
int err = -EIO;
ext4_lblk_t offsets[4];
......@@ -939,7 +939,7 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL));
J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0);
depth = ext4_block_to_path(inode, iblock, offsets,
&blocks_to_boundary);
&blocks_to_boundary);
if (depth == 0)
goto out;
......@@ -987,8 +987,8 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
* Block out ext4_truncate while we alter the tree
*/
err = ext4_alloc_branch(handle, inode, iblock, indirect_blks,
&count, goal,
offsets + (partial - chain), partial);
&count, goal,
offsets + (partial - chain), partial);
/*
* The ext4_splice_branch call will free and forget any buffers
......@@ -999,8 +999,8 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
*/
if (!err)
err = ext4_splice_branch(handle, inode, iblock,
partial, indirect_blks, count);
else
partial, indirect_blks, count);
else
goto cleanup;
set_buffer_new(bh_result);
......@@ -1172,7 +1172,7 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
up_read((&EXT4_I(inode)->i_data_sem));
if (retval > 0 && buffer_mapped(bh)) {
int ret = check_block_validity(inode, block,
int ret = check_block_validity(inode, block,
bh->b_blocknr, retval);
if (ret != 0)
return ret;
......@@ -1254,7 +1254,7 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
up_write((&EXT4_I(inode)->i_data_sem));
if (retval > 0 && buffer_mapped(bh)) {
int ret = check_block_validity(inode, block,
int ret = check_block_validity(inode, block,
bh->b_blocknr, retval);
if (ret != 0)
return ret;
......@@ -1405,8 +1405,7 @@ static int walk_page_buffers(handle_t *handle,
for (bh = head, block_start = 0;
ret == 0 && (bh != head || !block_start);
block_start = block_end, bh = next)
{
block_start = block_end, bh = next) {
next = bh->b_this_page;
block_end = block_start + blocksize;
if (block_end <= from || block_start >= to) {
......@@ -1447,7 +1446,7 @@ static int walk_page_buffers(handle_t *handle,
* write.
*/
static int do_journal_get_write_access(handle_t *handle,
struct buffer_head *bh)
struct buffer_head *bh)
{
if (!buffer_mapped(bh) || buffer_freed(bh))
return 0;
......@@ -1455,27 +1454,24 @@ static int do_journal_get_write_access(handle_t *handle,
}
static int ext4_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
{
struct inode *inode = mapping->host;
int ret, needed_blocks;
handle_t *handle;
int retries = 0;
struct page *page;
pgoff_t index;
pgoff_t index;
unsigned from, to;
trace_mark(ext4_write_begin,
"dev %s ino %lu pos %llu len %u flags %u",
inode->i_sb->s_id, inode->i_ino,
(unsigned long long) pos, len, flags);
trace_ext4_write_begin(inode, pos, len, flags);
/*
* Reserve one block more for addition to orphan list in case
* we allocate blocks but write fails for some reason
*/
needed_blocks = ext4_writepage_trans_blocks(inode) + 1;
index = pos >> PAGE_CACHE_SHIFT;
index = pos >> PAGE_CACHE_SHIFT;
from = pos & (PAGE_CACHE_SIZE - 1);
to = from + len;
......@@ -1523,7 +1519,7 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
ext4_journal_stop(handle);
if (pos + len > inode->i_size) {
vmtruncate(inode, inode->i_size);
/*
/*
* If vmtruncate failed early the inode might
* still be on the orphan list; we need to
* make sure the inode is removed from the
......@@ -1550,9 +1546,9 @@ static int write_end_fn(handle_t *handle, struct buffer_head *bh)
}
static int ext4_generic_write_end(struct file *file,
struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata)
struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata)
{
int i_size_changed = 0;
struct inode *inode = mapping->host;
......@@ -1603,18 +1599,15 @@ static int ext4_generic_write_end(struct file *file,
* buffers are managed internally.
*/
static int ext4_ordered_write_end(struct file *file,
struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata)
struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata)
{
handle_t *handle = ext4_journal_current_handle();
struct inode *inode = mapping->host;
int ret = 0, ret2;
trace_mark(ext4_ordered_write_end,
"dev %s ino %lu pos %llu len %u copied %u",
inode->i_sb->s_id, inode->i_ino,
(unsigned long long) pos, len, copied);
trace_ext4_ordered_write_end(inode, pos, len, copied);
ret = ext4_jbd2_file_inode(handle, inode);
if (ret == 0) {
......@@ -1636,7 +1629,7 @@ static int ext4_ordered_write_end(struct file *file,
if (pos + len > inode->i_size) {
vmtruncate(inode, inode->i_size);
/*
/*
* If vmtruncate failed early the inode might still be
* on the orphan list; we need to make sure the inode
* is removed from the orphan list in that case.
......@@ -1650,18 +1643,15 @@ static int ext4_ordered_write_end(struct file *file,
}
static int ext4_writeback_write_end(struct file *file,
struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata)
struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata)
{
handle_t *handle = ext4_journal_current_handle();
struct inode *inode = mapping->host;
int ret = 0, ret2;
trace_mark(ext4_writeback_write_end,
"dev %s ino %lu pos %llu len %u copied %u",
inode->i_sb->s_id, inode->i_ino,
(unsigned long long) pos, len, copied);
trace_ext4_writeback_write_end(inode, pos, len, copied);
ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
page, fsdata);
copied = ret2;
......@@ -1681,7 +1671,7 @@ static int ext4_writeback_write_end(struct file *file,
if (pos + len > inode->i_size) {
vmtruncate(inode, inode->i_size);
/*
/*
* If vmtruncate failed early the inode might still be
* on the orphan list; we need to make sure the inode
* is removed from the orphan list in that case.
......@@ -1694,9 +1684,9 @@ static int ext4_writeback_write_end(struct file *file,
}
static int ext4_journalled_write_end(struct file *file,
struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata)
struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata)
{
handle_t *handle = ext4_journal_current_handle();
struct inode *inode = mapping->host;
......@@ -1705,10 +1695,7 @@ static int ext4_journalled_write_end(struct file *file,
unsigned from, to;
loff_t new_i_size;
trace_mark(ext4_journalled_write_end,
"dev %s ino %lu pos %llu len %u copied %u",
inode->i_sb->s_id, inode->i_ino,
(unsigned long long) pos, len, copied);
trace_ext4_journalled_write_end(inode, pos, len, copied);
from = pos & (PAGE_CACHE_SIZE - 1);
to = from + len;
......@@ -1747,7 +1734,7 @@ static int ext4_journalled_write_end(struct file *file,
ret = ret2;
if (pos + len > inode->i_size) {
vmtruncate(inode, inode->i_size);
/*
/*
* If vmtruncate failed early the inode might still be
* on the orphan list; we need to make sure the inode
* is removed from the orphan list in that case.
......@@ -1854,7 +1841,7 @@ static void ext4_da_release_space(struct inode *inode, int to_free)
}
static void ext4_da_page_release_reservation(struct page *page,
unsigned long offset)
unsigned long offset)
{
int to_release = 0;
struct buffer_head *head, *bh;
......@@ -2554,9 +2541,7 @@ static int ext4_da_writepage(struct page *page,
struct buffer_head *page_bufs;
struct inode *inode = page->mapping->host;
trace_mark(ext4_da_writepage,
"dev %s ino %lu page_index %lu",
inode->i_sb->s_id, inode->i_ino, page->index);
trace_ext4_da_writepage(inode, page);
size = i_size_read(inode);
if (page->index == size >> PAGE_CACHE_SHIFT)
len = size & ~PAGE_CACHE_MASK;
......@@ -2667,19 +2652,7 @@ static int ext4_da_writepages(struct address_space *mapping,
int needed_blocks, ret = 0, nr_to_writebump = 0;
struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
trace_mark(ext4_da_writepages,
"dev %s ino %lu nr_t_write %ld "
"pages_skipped %ld range_start %llu "
"range_end %llu nonblocking %d "
"for_kupdate %d for_reclaim %d "
"for_writepages %d range_cyclic %d",
inode->i_sb->s_id, inode->i_ino,
wbc->nr_to_write, wbc->pages_skipped,
(unsigned long long) wbc->range_start,
(unsigned long long) wbc->range_end,
wbc->nonblocking, wbc->for_kupdate,
wbc->for_reclaim, wbc->for_writepages,
wbc->range_cyclic);
trace_ext4_da_writepages(inode, wbc);
/*
* No pages to write? This is mainly a kludge to avoid starting
......@@ -2693,13 +2666,13 @@ static int ext4_da_writepages(struct address_space *mapping,
* If the filesystem has aborted, it is read-only, so return
* right away instead of dumping stack traces later on that
* will obscure the real source of the problem. We test
* EXT4_MOUNT_ABORT instead of sb->s_flag's MS_RDONLY because
* EXT4_MF_FS_ABORTED instead of sb->s_flag's MS_RDONLY because
* the latter could be true if the filesystem is mounted
* read-only, and in that case, ext4_da_writepages should
* *never* be called, so if that ever happens, we would want
* the stack trace.
*/
if (unlikely(sbi->s_mount_opt & EXT4_MOUNT_ABORT))
if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED))
return -EROFS;
/*
......@@ -2845,14 +2818,7 @@ static int ext4_da_writepages(struct address_space *mapping,
if (!no_nrwrite_index_update)
wbc->no_nrwrite_index_update = 0;
wbc->nr_to_write -= nr_to_writebump;
trace_mark(ext4_da_writepage_result,
"dev %s ino %lu ret %d pages_written %d "
"pages_skipped %ld congestion %d "
"more_io %d no_nrwrite_index_update %d",
inode->i_sb->s_id, inode->i_ino, ret,
pages_written, wbc->pages_skipped,
wbc->encountered_congestion, wbc->more_io,
wbc->no_nrwrite_index_update);
trace_ext4_da_writepages_result(inode, wbc, ret, pages_written);
return ret;
}
......@@ -2884,8 +2850,8 @@ static int ext4_nonda_switch(struct super_block *sb)
}
static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
{
int ret, retries = 0;
struct page *page;
......@@ -2904,11 +2870,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
len, flags, pagep, fsdata);
}
*fsdata = (void *)0;
trace_mark(ext4_da_write_begin,
"dev %s ino %lu pos %llu len %u flags %u",
inode->i_sb->s_id, inode->i_ino,
(unsigned long long) pos, len, flags);
trace_ext4_da_write_begin(inode, pos, len, flags);
retry:
/*
* With delayed allocation, we don't log the i_disksize update
......@@ -2959,7 +2921,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
* when write to the end of file but not require block allocation
*/
static int ext4_da_should_update_i_disksize(struct page *page,
unsigned long offset)
unsigned long offset)
{
struct buffer_head *bh;
struct inode *inode = page->mapping->host;
......@@ -2978,9 +2940,9 @@ static int ext4_da_should_update_i_disksize(struct page *page,
}
static int ext4_da_write_end(struct file *file,
struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata)
struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata)
{
struct inode *inode = mapping->host;
int ret = 0, ret2;
......@@ -3001,10 +2963,7 @@ static int ext4_da_write_end(struct file *file,
}
}
trace_mark(ext4_da_write_end,
"dev %s ino %lu pos %llu len %u copied %u",
inode->i_sb->s_id, inode->i_ino,
(unsigned long long) pos, len, copied);
trace_ext4_da_write_end(inode, pos, len, copied);
start = pos & (PAGE_CACHE_SIZE - 1);
end = start + copied - 1;
......@@ -3081,7 +3040,7 @@ int ext4_alloc_da_blocks(struct inode *inode)
* not strictly speaking necessary (and for users of
* laptop_mode, not even desirable). However, to do otherwise
* would require replicating code paths in:
*
*
* ext4_da_writepages() ->
* write_cache_pages() ---> (via passed in callback function)
* __mpage_da_writepage() -->
......@@ -3101,7 +3060,7 @@ int ext4_alloc_da_blocks(struct inode *inode)
* write out the pages, but rather only collect contiguous
* logical block extents, call the multi-block allocator, and
* then update the buffer heads with the block allocations.
*
*
* For now, though, we'll cheat by calling filemap_flush(),
* which will map the blocks, and start the I/O, but not
* actually wait for the I/O to complete.
......@@ -3237,7 +3196,7 @@ static int bput_one(handle_t *handle, struct buffer_head *bh)
*
*/
static int __ext4_normal_writepage(struct page *page,
struct writeback_control *wbc)
struct writeback_control *wbc)
{
struct inode *inode = page->mapping->host;
......@@ -3249,15 +3208,13 @@ static int __ext4_normal_writepage(struct page *page,
}
static int ext4_normal_writepage(struct page *page,
struct writeback_control *wbc)
struct writeback_control *wbc)
{
struct inode *inode = page->mapping->host;
loff_t size = i_size_read(inode);
loff_t len;
trace_mark(ext4_normal_writepage,
"dev %s ino %lu page_index %lu",
inode->i_sb->s_id, inode->i_ino, page->index);
trace_ext4_normal_writepage(inode, page);
J_ASSERT(PageLocked(page));
if (page->index == size >> PAGE_CACHE_SHIFT)
len = size & ~PAGE_CACHE_MASK;
......@@ -3287,7 +3244,7 @@ static int ext4_normal_writepage(struct page *page,
}
static int __ext4_journalled_writepage(struct page *page,
struct writeback_control *wbc)
struct writeback_control *wbc)
{
struct address_space *mapping = page->mapping;
struct inode *inode = mapping->host;
......@@ -3337,15 +3294,13 @@ static int __ext4_journalled_writepage(struct page *page,
}
static int ext4_journalled_writepage(struct page *page,
struct writeback_control *wbc)
struct writeback_control *wbc)
{
struct inode *inode = page->mapping->host;
loff_t size = i_size_read(inode);
loff_t len;
trace_mark(ext4_journalled_writepage,
"dev %s ino %lu page_index %lu",
inode->i_sb->s_id, inode->i_ino, page->index);
trace_ext4_journalled_writepage(inode, page);
J_ASSERT(PageLocked(page));
if (page->index == size >> PAGE_CACHE_SHIFT)
len = size & ~PAGE_CACHE_MASK;
......@@ -3442,8 +3397,8 @@ static int ext4_releasepage(struct page *page, gfp_t wait)
* VFS code falls back into buffered path in that case so we are safe.
*/
static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
const struct iovec *iov, loff_t offset,
unsigned long nr_segs)
const struct iovec *iov, loff_t offset,
unsigned long nr_segs)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
......@@ -3763,7 +3718,8 @@ static inline int all_zeroes(__le32 *p, __le32 *q)
* (no partially truncated stuff there). */
static Indirect *ext4_find_shared(struct inode *inode, int depth,
ext4_lblk_t offsets[4], Indirect chain[4], __le32 *top)
ext4_lblk_t offsets[4], Indirect chain[4],
__le32 *top)
{
Indirect *partial, *p;
int k, err;
......@@ -3819,8 +3775,10 @@ static Indirect *ext4_find_shared(struct inode *inode, int depth,
* than `count' because there can be holes in there.
*/
static void ext4_clear_blocks(handle_t *handle, struct inode *inode,
struct buffer_head *bh, ext4_fsblk_t block_to_free,
unsigned long count, __le32 *first, __le32 *last)
struct buffer_head *bh,
ext4_fsblk_t block_to_free,
unsigned long count, __le32 *first,
__le32 *last)
{
__le32 *p;
if (try_to_extend_transaction(handle, inode)) {
......@@ -3837,10 +3795,11 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode,
}
/*
* Any buffers which are on the journal will be in memory. We find
* them on the hash table so jbd2_journal_revoke() will run jbd2_journal_forget()
* on them. We've already detached each block from the file, so
* bforget() in jbd2_journal_forget() should be safe.
* Any buffers which are on the journal will be in memory. We
* find them on the hash table so jbd2_journal_revoke() will
* run jbd2_journal_forget() on them. We've already detached
* each block from the file, so bforget() in
* jbd2_journal_forget() should be safe.
*
* AKPM: turn on bforget in jbd2_journal_forget()!!!
*/
......@@ -4212,7 +4171,7 @@ void ext4_truncate(struct inode *inode)
(__le32*)partial->bh->b_data+addr_per_block,
(chain+n-1) - partial);
BUFFER_TRACE(partial->bh, "call brelse");
brelse (partial->bh);
brelse(partial->bh);
partial--;
}
do_indirects:
......@@ -4453,8 +4412,9 @@ void ext4_get_inode_flags(struct ext4_inode_info *ei)
if (flags & S_DIRSYNC)
ei->i_flags |= EXT4_DIRSYNC_FL;
}
static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode,
struct ext4_inode_info *ei)
struct ext4_inode_info *ei)
{
blkcnt_t i_blocks ;
struct inode *inode = &(ei->vfs_inode);
......@@ -4569,7 +4529,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
EXT4_GOOD_OLD_INODE_SIZE +
ei->i_extra_isize;
if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC))
ei->i_state |= EXT4_STATE_XATTR;
ei->i_state |= EXT4_STATE_XATTR;
}
} else
ei->i_extra_isize = 0;
......@@ -4588,7 +4548,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
ret = 0;
if (ei->i_file_acl &&
((ei->i_file_acl <
((ei->i_file_acl <
(le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) +
EXT4_SB(sb)->s_gdb_count)) ||
(ei->i_file_acl >= ext4_blocks_count(EXT4_SB(sb)->s_es)))) {
......@@ -4603,15 +4563,15 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
!ext4_inode_is_fast_symlink(inode)))
/* Validate extent which is part of inode */
ret = ext4_ext_check_inode(inode);
} else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
} else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
(S_ISLNK(inode->i_mode) &&
!ext4_inode_is_fast_symlink(inode))) {
/* Validate block references which are part of inode */
/* Validate block references which are part of inode */
ret = ext4_check_inode_blockref(inode);
}
if (ret) {
brelse(bh);
goto bad_inode;
brelse(bh);
goto bad_inode;
}
if (S_ISREG(inode->i_mode)) {
......@@ -4642,7 +4602,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
} else {
brelse(bh);
ret = -EIO;
ext4_error(inode->i_sb, __func__,
ext4_error(inode->i_sb, __func__,
"bogus i_mode (%o) for inode=%lu",
inode->i_mode, inode->i_ino);
goto bad_inode;
......@@ -4795,8 +4755,9 @@ static int ext4_do_update_inode(handle_t *handle,
cpu_to_le32(new_encode_dev(inode->i_rdev));
raw_inode->i_block[2] = 0;
}
} else for (block = 0; block < EXT4_N_BLOCKS; block++)
raw_inode->i_block[block] = ei->i_data[block];
} else
for (block = 0; block < EXT4_N_BLOCKS; block++)
raw_inode->i_block[block] = ei->i_data[block];
raw_inode->i_disk_version = cpu_to_le32(inode->i_version);
if (ei->i_extra_isize) {
......@@ -5150,7 +5111,7 @@ int ext4_chunk_trans_blocks(struct inode *inode, int nrblocks)
* Give this, we know that the caller already has write access to iloc->bh.
*/
int ext4_mark_iloc_dirty(handle_t *handle,
struct inode *inode, struct ext4_iloc *iloc)
struct inode *inode, struct ext4_iloc *iloc)
{
int err = 0;
......
......@@ -14,6 +14,7 @@
#include <linux/compat.h>
#include <linux/smp_lock.h>
#include <linux/mount.h>
#include <linux/file.h>
#include <asm/uaccess.h>
#include "ext4_jbd2.h"
#include "ext4.h"
......@@ -213,6 +214,41 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return err;
}
case EXT4_IOC_MOVE_EXT: {
struct move_extent me;
struct file *donor_filp;
int err;
if (copy_from_user(&me,
(struct move_extent __user *)arg, sizeof(me)))
return -EFAULT;
donor_filp = fget(me.donor_fd);
if (!donor_filp)
return -EBADF;
if (!capable(CAP_DAC_OVERRIDE)) {
if ((current->real_cred->fsuid != inode->i_uid) ||
!(inode->i_mode & S_IRUSR) ||
!(donor_filp->f_dentry->d_inode->i_mode &
S_IRUSR)) {
fput(donor_filp);
return -EACCES;
}
}
err = ext4_move_extents(filp, donor_filp, me.orig_start,
me.donor_start, me.len, &me.moved_len);
fput(donor_filp);
if (!err)
if (copy_to_user((struct move_extent *)arg,
&me, sizeof(me)))
return -EFAULT;
return err;
}
case EXT4_IOC_GROUP_ADD: {
struct ext4_new_group_data input;
struct super_block *sb = inode->i_sb;
......
......@@ -22,6 +22,8 @@
*/
#include "mballoc.h"
#include <trace/events/ext4.h>
/*
* MUSTDO:
* - test ext4_ext_search_left() and ext4_ext_search_right()
......@@ -340,8 +342,6 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
ext4_group_t group);
static void release_blocks_on_commit(journal_t *journal, transaction_t *txn);
static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
{
#if BITS_PER_LONG == 64
......@@ -2859,9 +2859,8 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
discard_block = (ext4_fsblk_t) entry->group * EXT4_BLOCKS_PER_GROUP(sb)
+ entry->start_blk
+ le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
trace_mark(ext4_discard_blocks, "dev %s blk %llu count %u",
sb->s_id, (unsigned long long) discard_block,
entry->count);
trace_ext4_discard_blocks(sb, (unsigned long long)discard_block,
entry->count);
sb_issue_discard(sb, discard_block, entry->count);
kmem_cache_free(ext4_free_ext_cachep, entry);
......@@ -3629,10 +3628,7 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
mb_debug("new inode pa %p: %llu/%u for %u\n", pa,
pa->pa_pstart, pa->pa_len, pa->pa_lstart);
trace_mark(ext4_mb_new_inode_pa,
"dev %s ino %lu pstart %llu len %u lstart %u",
sb->s_id, ac->ac_inode->i_ino,
pa->pa_pstart, pa->pa_len, pa->pa_lstart);
trace_ext4_mb_new_inode_pa(ac, pa);
ext4_mb_use_inode_pa(ac, pa);
atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated);
......@@ -3691,9 +3687,8 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
pa->pa_type = MB_GROUP_PA;
mb_debug("new group pa %p: %llu/%u for %u\n", pa,
pa->pa_pstart, pa->pa_len, pa->pa_lstart);
trace_mark(ext4_mb_new_group_pa, "dev %s pstart %llu len %u lstart %u",
sb->s_id, pa->pa_pstart, pa->pa_len, pa->pa_lstart);
pa->pa_pstart, pa->pa_len, pa->pa_lstart);
trace_ext4_mb_new_group_pa(ac, pa);
ext4_mb_use_group_pa(ac, pa);
atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated);
......@@ -3783,10 +3778,8 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
ext4_mb_store_history(ac);
}
trace_mark(ext4_mb_release_inode_pa,
"dev %s ino %lu block %llu count %u",
sb->s_id, pa->pa_inode->i_ino, grp_blk_start + bit,
next - bit);
trace_ext4_mb_release_inode_pa(ac, pa, grp_blk_start + bit,
next - bit);
mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
bit = next + 1;
}
......@@ -3820,8 +3813,7 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b,
if (ac)
ac->ac_op = EXT4_MB_HISTORY_DISCARD;
trace_mark(ext4_mb_release_group_pa, "dev %s pstart %llu len %d",
sb->s_id, pa->pa_pstart, pa->pa_len);
trace_ext4_mb_release_group_pa(ac, pa);
BUG_ON(pa->pa_deleted == 0);
ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
......@@ -3889,6 +3881,8 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
INIT_LIST_HEAD(&list);
ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
if (ac)
ac->ac_sb = sb;
repeat:
ext4_lock_group(sb, group);
list_for_each_entry_safe(pa, tmp,
......@@ -3987,12 +3981,15 @@ void ext4_discard_preallocations(struct inode *inode)
}
mb_debug("discard preallocation for inode %lu\n", inode->i_ino);
trace_mark(ext4_discard_preallocations, "dev %s ino %lu", sb->s_id,
inode->i_ino);
trace_ext4_discard_preallocations(inode);
INIT_LIST_HEAD(&list);
ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
if (ac) {
ac->ac_sb = sb;
ac->ac_inode = inode;
}
repeat:
/* first, collect all pa's in the inode */
spin_lock(&ei->i_prealloc_lock);
......@@ -4276,6 +4273,8 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
INIT_LIST_HEAD(&discard_list);
ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
if (ac)
ac->ac_sb = sb;
spin_lock(&lg->lg_prealloc_lock);
list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order],
......@@ -4445,8 +4444,7 @@ static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
int ret;
int freed = 0;
trace_mark(ext4_mb_discard_preallocations, "dev %s needed %d",
sb->s_id, needed);
trace_ext4_mb_discard_preallocations(sb, needed);
for (i = 0; i < ngroups && needed > 0; i++) {
ret = ext4_mb_discard_group_preallocations(sb, i, needed);
freed += ret;
......@@ -4475,17 +4473,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
sb = ar->inode->i_sb;
sbi = EXT4_SB(sb);
trace_mark(ext4_request_blocks, "dev %s flags %u len %u ino %lu "
"lblk %llu goal %llu lleft %llu lright %llu "
"pleft %llu pright %llu ",
sb->s_id, ar->flags, ar->len,
ar->inode ? ar->inode->i_ino : 0,
(unsigned long long) ar->logical,
(unsigned long long) ar->goal,
(unsigned long long) ar->lleft,
(unsigned long long) ar->lright,
(unsigned long long) ar->pleft,
(unsigned long long) ar->pright);
trace_ext4_request_blocks(ar);
/*
* For delayed allocation, we could skip the ENOSPC and
......@@ -4521,7 +4509,10 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
}
ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
if (!ac) {
if (ac) {
ac->ac_sb = sb;
ac->ac_inode = ar->inode;
} else {
ar->len = 0;
*errp = -ENOMEM;
goto out1;
......@@ -4594,18 +4585,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
reserv_blks);
}
trace_mark(ext4_allocate_blocks,
"dev %s block %llu flags %u len %u ino %lu "
"logical %llu goal %llu lleft %llu lright %llu "
"pleft %llu pright %llu ",
sb->s_id, (unsigned long long) block,
ar->flags, ar->len, ar->inode ? ar->inode->i_ino : 0,
(unsigned long long) ar->logical,
(unsigned long long) ar->goal,
(unsigned long long) ar->lleft,
(unsigned long long) ar->lright,
(unsigned long long) ar->pleft,
(unsigned long long) ar->pright);
trace_ext4_allocate_blocks(ar, (unsigned long long)block);
return block;
}
......@@ -4709,7 +4689,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
* Main entry point into mballoc to free blocks
*/
void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
unsigned long block, unsigned long count,
ext4_fsblk_t block, unsigned long count,
int metadata, unsigned long *freed)
{
struct buffer_head *bitmap_bh = NULL;
......@@ -4735,15 +4715,12 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
block + count > ext4_blocks_count(es)) {
ext4_error(sb, __func__,
"Freeing blocks not in datazone - "
"block = %lu, count = %lu", block, count);
"block = %llu, count = %lu", block, count);
goto error_return;
}
ext4_debug("freeing block %lu\n", block);
trace_mark(ext4_free_blocks,
"dev %s block %llu count %lu metadata %d ino %lu",
sb->s_id, (unsigned long long) block, count, metadata,
inode ? inode->i_ino : 0);
ext4_debug("freeing block %llu\n", block);
trace_ext4_free_blocks(inode, block, count, metadata);
ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
if (ac) {
......@@ -4784,7 +4761,7 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
ext4_error(sb, __func__,
"Freeing blocks in system zone - "
"Block = %lu, count = %lu", block, count);
"Block = %llu, count = %lu", block, count);
/* err = 0. ext4_std_error should be a no op */
goto error_return;
}
......
......@@ -19,7 +19,6 @@
#include <linux/seq_file.h>
#include <linux/version.h>
#include <linux/blkdev.h>
#include <linux/marker.h>
#include <linux/mutex.h>
#include "ext4_jbd2.h"
#include "ext4.h"
......
......@@ -458,6 +458,7 @@ int ext4_ext_migrate(struct inode *inode)
struct inode *tmp_inode = NULL;
struct list_blocks_struct lb;
unsigned long max_entries;
__u32 goal;
/*
* If the filesystem does not support extents, or the inode
......@@ -483,9 +484,10 @@ int ext4_ext_migrate(struct inode *inode)
retval = PTR_ERR(handle);
return retval;
}
tmp_inode = ext4_new_inode(handle,
inode->i_sb->s_root->d_inode,
S_IFREG);
goal = (((inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) *
EXT4_INODES_PER_GROUP(inode->i_sb)) + 1;
tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode,
S_IFREG, 0, goal);
if (IS_ERR(tmp_inode)) {
retval = -ENOMEM;
ext4_journal_stop(handle);
......
/*
* Copyright (c) 2008,2009 NEC Software Tohoku, Ltd.
* Written by Takashi Sato <t-sato@yk.jp.nec.com>
* Akira Fujita <a-fujita@rs.jp.nec.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of version 2.1 of the GNU Lesser General Public License
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#include <linux/fs.h>
#include <linux/quotaops.h>
#include "ext4_jbd2.h"
#include "ext4_extents.h"
#include "ext4.h"
#define get_ext_path(path, inode, block, ret) \
do { \
path = ext4_ext_find_extent(inode, block, path); \
if (IS_ERR(path)) { \
ret = PTR_ERR(path); \
path = NULL; \
} \
} while (0)
/**
* copy_extent_status - Copy the extent's initialization status
*
* @src: an extent for getting initialize status
* @dest: an extent to be set the status
*/
static void
copy_extent_status(struct ext4_extent *src, struct ext4_extent *dest)
{
if (ext4_ext_is_uninitialized(src))
ext4_ext_mark_uninitialized(dest);
else
dest->ee_len = cpu_to_le16(ext4_ext_get_actual_len(dest));
}
/**
* mext_next_extent - Search for the next extent and set it to "extent"
*
* @inode: inode which is searched
* @path: this will obtain data for the next extent
* @extent: pointer to the next extent we have just gotten
*
* Search the next extent in the array of ext4_ext_path structure (@path)
* and set it to ext4_extent structure (@extent). In addition, the member of
* @path (->p_ext) also points the next extent. Return 0 on success, 1 if
* ext4_ext_path structure refers to the last extent, or a negative error
* value on failure.
*/
static int
mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
struct ext4_extent **extent)
{
int ppos, leaf_ppos = path->p_depth;
ppos = leaf_ppos;
if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) {
/* leaf block */
*extent = ++path[ppos].p_ext;
return 0;
}
while (--ppos >= 0) {
if (EXT_LAST_INDEX(path[ppos].p_hdr) >
path[ppos].p_idx) {
int cur_ppos = ppos;
/* index block */
path[ppos].p_idx++;
path[ppos].p_block = idx_pblock(path[ppos].p_idx);
if (path[ppos+1].p_bh)
brelse(path[ppos+1].p_bh);
path[ppos+1].p_bh =
sb_bread(inode->i_sb, path[ppos].p_block);
if (!path[ppos+1].p_bh)
return -EIO;
path[ppos+1].p_hdr =
ext_block_hdr(path[ppos+1].p_bh);
/* Halfway index block */
while (++cur_ppos < leaf_ppos) {
path[cur_ppos].p_idx =
EXT_FIRST_INDEX(path[cur_ppos].p_hdr);
path[cur_ppos].p_block =
idx_pblock(path[cur_ppos].p_idx);
if (path[cur_ppos+1].p_bh)
brelse(path[cur_ppos+1].p_bh);
path[cur_ppos+1].p_bh = sb_bread(inode->i_sb,
path[cur_ppos].p_block);
if (!path[cur_ppos+1].p_bh)
return -EIO;
path[cur_ppos+1].p_hdr =
ext_block_hdr(path[cur_ppos+1].p_bh);
}
/* leaf block */
path[leaf_ppos].p_ext = *extent =
EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr);
return 0;
}
}
/* We found the last extent */
return 1;
}
/**
* mext_double_down_read - Acquire two inodes' read semaphore
*
* @orig_inode: original inode structure
* @donor_inode: donor inode structure
* Acquire read semaphore of the two inodes (orig and donor) by i_ino order.
*/
static void
mext_double_down_read(struct inode *orig_inode, struct inode *donor_inode)
{
struct inode *first = orig_inode, *second = donor_inode;
BUG_ON(orig_inode == NULL || donor_inode == NULL);
/*
* Use the inode number to provide the stable locking order instead
* of its address, because the C language doesn't guarantee you can
* compare pointers that don't come from the same array.
*/
if (donor_inode->i_ino < orig_inode->i_ino) {
first = donor_inode;
second = orig_inode;
}
down_read(&EXT4_I(first)->i_data_sem);
down_read(&EXT4_I(second)->i_data_sem);
}
/**
* mext_double_down_write - Acquire two inodes' write semaphore
*
* @orig_inode: original inode structure
* @donor_inode: donor inode structure
* Acquire write semaphore of the two inodes (orig and donor) by i_ino order.
*/
static void
mext_double_down_write(struct inode *orig_inode, struct inode *donor_inode)
{
struct inode *first = orig_inode, *second = donor_inode;
BUG_ON(orig_inode == NULL || donor_inode == NULL);
/*
* Use the inode number to provide the stable locking order instead
* of its address, because the C language doesn't guarantee you can
* compare pointers that don't come from the same array.
*/
if (donor_inode->i_ino < orig_inode->i_ino) {
first = donor_inode;
second = orig_inode;
}
down_write(&EXT4_I(first)->i_data_sem);
down_write(&EXT4_I(second)->i_data_sem);
}
/**
* mext_double_up_read - Release two inodes' read semaphore
*
* @orig_inode: original inode structure to be released its lock first
* @donor_inode: donor inode structure to be released its lock second
* Release read semaphore of two inodes (orig and donor).
*/
static void
mext_double_up_read(struct inode *orig_inode, struct inode *donor_inode)
{
BUG_ON(orig_inode == NULL || donor_inode == NULL);
up_read(&EXT4_I(orig_inode)->i_data_sem);
up_read(&EXT4_I(donor_inode)->i_data_sem);
}
/**
* mext_double_up_write - Release two inodes' write semaphore
*
* @orig_inode: original inode structure to be released its lock first
* @donor_inode: donor inode structure to be released its lock second
* Release write semaphore of two inodes (orig and donor).
*/
static void
mext_double_up_write(struct inode *orig_inode, struct inode *donor_inode)
{
BUG_ON(orig_inode == NULL || donor_inode == NULL);
up_write(&EXT4_I(orig_inode)->i_data_sem);
up_write(&EXT4_I(donor_inode)->i_data_sem);
}
/**
* mext_insert_across_blocks - Insert extents across leaf block
*
* @handle: journal handle
* @orig_inode: original inode
* @o_start: first original extent to be changed
* @o_end: last original extent to be changed
* @start_ext: first new extent to be inserted
* @new_ext: middle of new extent to be inserted
* @end_ext: last new extent to be inserted
*
* Allocate a new leaf block and insert extents into it. Return 0 on success,
* or a negative error value on failure.
*/
static int
mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode,
struct ext4_extent *o_start, struct ext4_extent *o_end,
struct ext4_extent *start_ext, struct ext4_extent *new_ext,
struct ext4_extent *end_ext)
{
struct ext4_ext_path *orig_path = NULL;
ext4_lblk_t eblock = 0;
int new_flag = 0;
int end_flag = 0;
int err = 0;
if (start_ext->ee_len && new_ext->ee_len && end_ext->ee_len) {
if (o_start == o_end) {
/* start_ext new_ext end_ext
* donor |---------|-----------|--------|
* orig |------------------------------|
*/
end_flag = 1;
} else {
/* start_ext new_ext end_ext
* donor |---------|----------|---------|
* orig |---------------|--------------|
*/
o_end->ee_block = end_ext->ee_block;
o_end->ee_len = end_ext->ee_len;
ext4_ext_store_pblock(o_end, ext_pblock(end_ext));
}
o_start->ee_len = start_ext->ee_len;
new_flag = 1;
} else if (start_ext->ee_len && new_ext->ee_len &&
!end_ext->ee_len && o_start == o_end) {
/* start_ext new_ext
* donor |--------------|---------------|
* orig |------------------------------|
*/
o_start->ee_len = start_ext->ee_len;
new_flag = 1;
} else if (!start_ext->ee_len && new_ext->ee_len &&
end_ext->ee_len && o_start == o_end) {
/* new_ext end_ext
* donor |--------------|---------------|
* orig |------------------------------|
*/
o_end->ee_block = end_ext->ee_block;
o_end->ee_len = end_ext->ee_len;
ext4_ext_store_pblock(o_end, ext_pblock(end_ext));
/*
* Set 0 to the extent block if new_ext was
* the first block.
*/
if (new_ext->ee_block)
eblock = le32_to_cpu(new_ext->ee_block);
new_flag = 1;
} else {
ext4_debug("ext4 move extent: Unexpected insert case\n");
return -EIO;
}
if (new_flag) {
get_ext_path(orig_path, orig_inode, eblock, err);
if (orig_path == NULL)
goto out;
if (ext4_ext_insert_extent(handle, orig_inode,
orig_path, new_ext))
goto out;
}
if (end_flag) {
get_ext_path(orig_path, orig_inode,
le32_to_cpu(end_ext->ee_block) - 1, err);
if (orig_path == NULL)
goto out;
if (ext4_ext_insert_extent(handle, orig_inode,
orig_path, end_ext))
goto out;
}
out:
if (orig_path) {
ext4_ext_drop_refs(orig_path);
kfree(orig_path);
}
return err;
}
/**
* mext_insert_inside_block - Insert new extent to the extent block
*
* @o_start: first original extent to be moved
* @o_end: last original extent to be moved
* @start_ext: first new extent to be inserted
* @new_ext: middle of new extent to be inserted
* @end_ext: last new extent to be inserted
* @eh: extent header of target leaf block
* @range_to_move: used to decide how to insert extent
*
* Insert extents into the leaf block. The extent (@o_start) is overwritten
* by inserted extents.
*/
static void
mext_insert_inside_block(struct ext4_extent *o_start,
struct ext4_extent *o_end,
struct ext4_extent *start_ext,
struct ext4_extent *new_ext,
struct ext4_extent *end_ext,
struct ext4_extent_header *eh,
int range_to_move)
{
int i = 0;
unsigned long len;
/* Move the existing extents */
if (range_to_move && o_end < EXT_LAST_EXTENT(eh)) {
len = (unsigned long)(EXT_LAST_EXTENT(eh) + 1) -
(unsigned long)(o_end + 1);
memmove(o_end + 1 + range_to_move, o_end + 1, len);
}
/* Insert start entry */
if (start_ext->ee_len)
o_start[i++].ee_len = start_ext->ee_len;
/* Insert new entry */
if (new_ext->ee_len) {
o_start[i] = *new_ext;
ext4_ext_store_pblock(&o_start[i++], ext_pblock(new_ext));
}
/* Insert end entry */
if (end_ext->ee_len)
o_start[i] = *end_ext;
/* Increment the total entries counter on the extent block */
le16_add_cpu(&eh->eh_entries, range_to_move);
}
/**
* mext_insert_extents - Insert new extent
*
* @handle: journal handle
* @orig_inode: original inode
* @orig_path: path indicates first extent to be changed
* @o_start: first original extent to be changed
* @o_end: last original extent to be changed
* @start_ext: first new extent to be inserted
* @new_ext: middle of new extent to be inserted
* @end_ext: last new extent to be inserted
*
* Call the function to insert extents. If we cannot add more extents into
* the leaf block, we call mext_insert_across_blocks() to create a
* new leaf block. Otherwise call mext_insert_inside_block(). Return 0
* on success, or a negative error value on failure.
*/
static int
mext_insert_extents(handle_t *handle, struct inode *orig_inode,
struct ext4_ext_path *orig_path,
struct ext4_extent *o_start,
struct ext4_extent *o_end,
struct ext4_extent *start_ext,
struct ext4_extent *new_ext,
struct ext4_extent *end_ext)
{
struct ext4_extent_header *eh;
unsigned long need_slots, slots_range;
int range_to_move, depth, ret;
/*
* The extents need to be inserted
* start_extent + new_extent + end_extent.
*/
need_slots = (start_ext->ee_len ? 1 : 0) + (end_ext->ee_len ? 1 : 0) +
(new_ext->ee_len ? 1 : 0);
/* The number of slots between start and end */
slots_range = ((unsigned long)(o_end + 1) - (unsigned long)o_start + 1)
/ sizeof(struct ext4_extent);
/* Range to move the end of extent */
range_to_move = need_slots - slots_range;
depth = orig_path->p_depth;
orig_path += depth;
eh = orig_path->p_hdr;
if (depth) {
/* Register to journal */
ret = ext4_journal_get_write_access(handle, orig_path->p_bh);
if (ret)
return ret;
}
/* Expansion */
if (range_to_move > 0 &&
(range_to_move > le16_to_cpu(eh->eh_max)
- le16_to_cpu(eh->eh_entries))) {
ret = mext_insert_across_blocks(handle, orig_inode, o_start,
o_end, start_ext, new_ext, end_ext);
if (ret < 0)
return ret;
} else
mext_insert_inside_block(o_start, o_end, start_ext, new_ext,
end_ext, eh, range_to_move);
if (depth) {
ret = ext4_handle_dirty_metadata(handle, orig_inode,
orig_path->p_bh);
if (ret)
return ret;
} else {
ret = ext4_mark_inode_dirty(handle, orig_inode);
if (ret < 0)
return ret;
}
return 0;
}
/**
* mext_leaf_block - Move one leaf extent block into the inode.
*
* @handle: journal handle
* @orig_inode: original inode
* @orig_path: path indicates first extent to be changed
* @dext: donor extent
* @from: start offset on the target file
*
* In order to insert extents into the leaf block, we must divide the extent
* in the leaf block into three extents. The one is located to be inserted
* extents, and the others are located around it.
*
* Therefore, this function creates structures to save extents of the leaf
* block, and inserts extents by calling mext_insert_extents() with
* created extents. Return 0 on success, or a negative error value on failure.
*/
static int
mext_leaf_block(handle_t *handle, struct inode *orig_inode,
struct ext4_ext_path *orig_path, struct ext4_extent *dext,
ext4_lblk_t *from)
{
struct ext4_extent *oext, *o_start, *o_end, *prev_ext;
struct ext4_extent new_ext, start_ext, end_ext;
ext4_lblk_t new_ext_end;
ext4_fsblk_t new_phys_end;
int oext_alen, new_ext_alen, end_ext_alen;
int depth = ext_depth(orig_inode);
int ret;
o_start = o_end = oext = orig_path[depth].p_ext;
oext_alen = ext4_ext_get_actual_len(oext);
start_ext.ee_len = end_ext.ee_len = 0;
new_ext.ee_block = cpu_to_le32(*from);
ext4_ext_store_pblock(&new_ext, ext_pblock(dext));
new_ext.ee_len = dext->ee_len;
new_ext_alen = ext4_ext_get_actual_len(&new_ext);
new_ext_end = le32_to_cpu(new_ext.ee_block) + new_ext_alen - 1;
new_phys_end = ext_pblock(&new_ext) + new_ext_alen - 1;
/*
* Case: original extent is first
* oext |--------|
* new_ext |--|
* start_ext |--|
*/
if (le32_to_cpu(oext->ee_block) < le32_to_cpu(new_ext.ee_block) &&
le32_to_cpu(new_ext.ee_block) <
le32_to_cpu(oext->ee_block) + oext_alen) {
start_ext.ee_len = cpu_to_le16(le32_to_cpu(new_ext.ee_block) -
le32_to_cpu(oext->ee_block));
copy_extent_status(oext, &start_ext);
} else if (oext > EXT_FIRST_EXTENT(orig_path[depth].p_hdr)) {
prev_ext = oext - 1;
/*
* We can merge new_ext into previous extent,
* if these are contiguous and same extent type.
*/
if (ext4_can_extents_be_merged(orig_inode, prev_ext,
&new_ext)) {
o_start = prev_ext;
start_ext.ee_len = cpu_to_le16(
ext4_ext_get_actual_len(prev_ext) +
new_ext_alen);
copy_extent_status(prev_ext, &start_ext);
new_ext.ee_len = 0;
}
}
/*
* Case: new_ext_end must be less than oext
* oext |-----------|
* new_ext |-------|
*/
BUG_ON(le32_to_cpu(oext->ee_block) + oext_alen - 1 < new_ext_end);
/*
* Case: new_ext is smaller than original extent
* oext |---------------|
* new_ext |-----------|
* end_ext |---|
*/
if (le32_to_cpu(oext->ee_block) <= new_ext_end &&
new_ext_end < le32_to_cpu(oext->ee_block) + oext_alen - 1) {
end_ext.ee_len =
cpu_to_le16(le32_to_cpu(oext->ee_block) +
oext_alen - 1 - new_ext_end);
copy_extent_status(oext, &end_ext);
end_ext_alen = ext4_ext_get_actual_len(&end_ext);
ext4_ext_store_pblock(&end_ext,
(ext_pblock(o_end) + oext_alen - end_ext_alen));
end_ext.ee_block =
cpu_to_le32(le32_to_cpu(o_end->ee_block) +
oext_alen - end_ext_alen);
}
ret = mext_insert_extents(handle, orig_inode, orig_path, o_start,
o_end, &start_ext, &new_ext, &end_ext);
return ret;
}
/**
* mext_calc_swap_extents - Calculate extents for extent swapping.
*
* @tmp_dext: the extent that will belong to the original inode
* @tmp_oext: the extent that will belong to the donor inode
* @orig_off: block offset of original inode
* @donor_off: block offset of donor inode
* @max_count: the maximun length of extents
*/
static void
mext_calc_swap_extents(struct ext4_extent *tmp_dext,
struct ext4_extent *tmp_oext,
ext4_lblk_t orig_off, ext4_lblk_t donor_off,
ext4_lblk_t max_count)
{
ext4_lblk_t diff, orig_diff;
struct ext4_extent dext_old, oext_old;
dext_old = *tmp_dext;
oext_old = *tmp_oext;
/* When tmp_dext is too large, pick up the target range. */
diff = donor_off - le32_to_cpu(tmp_dext->ee_block);
ext4_ext_store_pblock(tmp_dext, ext_pblock(tmp_dext) + diff);
tmp_dext->ee_block =
cpu_to_le32(le32_to_cpu(tmp_dext->ee_block) + diff);
tmp_dext->ee_len = cpu_to_le16(le16_to_cpu(tmp_dext->ee_len) - diff);
if (max_count < ext4_ext_get_actual_len(tmp_dext))
tmp_dext->ee_len = cpu_to_le16(max_count);
orig_diff = orig_off - le32_to_cpu(tmp_oext->ee_block);
ext4_ext_store_pblock(tmp_oext, ext_pblock(tmp_oext) + orig_diff);
/* Adjust extent length if donor extent is larger than orig */
if (ext4_ext_get_actual_len(tmp_dext) >
ext4_ext_get_actual_len(tmp_oext) - orig_diff)
tmp_dext->ee_len = cpu_to_le16(le16_to_cpu(tmp_oext->ee_len) -
orig_diff);
tmp_oext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(tmp_dext));
copy_extent_status(&oext_old, tmp_dext);
copy_extent_status(&dext_old, tmp_oext);
}
/**
* mext_replace_branches - Replace original extents with new extents
*
* @handle: journal handle
* @orig_inode: original inode
* @donor_inode: donor inode
* @from: block offset of orig_inode
* @count: block count to be replaced
*
* Replace original inode extents and donor inode extents page by page.
* We implement this replacement in the following three steps:
* 1. Save the block information of original and donor inodes into
* dummy extents.
* 2. Change the block information of original inode to point at the
* donor inode blocks.
* 3. Change the block information of donor inode to point at the saved
* original inode blocks in the dummy extents.
*
* Return 0 on success, or a negative error value on failure.
*/
static int
mext_replace_branches(handle_t *handle, struct inode *orig_inode,
struct inode *donor_inode, ext4_lblk_t from,
ext4_lblk_t count)
{
struct ext4_ext_path *orig_path = NULL;
struct ext4_ext_path *donor_path = NULL;
struct ext4_extent *oext, *dext;
struct ext4_extent tmp_dext, tmp_oext;
ext4_lblk_t orig_off = from, donor_off = from;
int err = 0;
int depth;
int replaced_count = 0;
int dext_alen;
mext_double_down_write(orig_inode, donor_inode);
/* Get the original extent for the block "orig_off" */
get_ext_path(orig_path, orig_inode, orig_off, err);
if (orig_path == NULL)
goto out;
/* Get the donor extent for the head */
get_ext_path(donor_path, donor_inode, donor_off, err);
if (donor_path == NULL)
goto out;
depth = ext_depth(orig_inode);
oext = orig_path[depth].p_ext;
tmp_oext = *oext;
depth = ext_depth(donor_inode);
dext = donor_path[depth].p_ext;
tmp_dext = *dext;
mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
donor_off, count);
/* Loop for the donor extents */
while (1) {
/* The extent for donor must be found. */
BUG_ON(!dext || donor_off != le32_to_cpu(tmp_dext.ee_block));
/* Set donor extent to orig extent */
err = mext_leaf_block(handle, orig_inode,
orig_path, &tmp_dext, &orig_off);
if (err < 0)
goto out;
/* Set orig extent to donor extent */
err = mext_leaf_block(handle, donor_inode,
donor_path, &tmp_oext, &donor_off);
if (err < 0)
goto out;
dext_alen = ext4_ext_get_actual_len(&tmp_dext);
replaced_count += dext_alen;
donor_off += dext_alen;
orig_off += dext_alen;
/* Already moved the expected blocks */
if (replaced_count >= count)
break;
if (orig_path)
ext4_ext_drop_refs(orig_path);
get_ext_path(orig_path, orig_inode, orig_off, err);
if (orig_path == NULL)
goto out;
depth = ext_depth(orig_inode);
oext = orig_path[depth].p_ext;
if (le32_to_cpu(oext->ee_block) +
ext4_ext_get_actual_len(oext) <= orig_off) {
err = 0;
goto out;
}
tmp_oext = *oext;
if (donor_path)
ext4_ext_drop_refs(donor_path);
get_ext_path(donor_path, donor_inode,
donor_off, err);
if (donor_path == NULL)
goto out;
depth = ext_depth(donor_inode);
dext = donor_path[depth].p_ext;
if (le32_to_cpu(dext->ee_block) +
ext4_ext_get_actual_len(dext) <= donor_off) {
err = 0;
goto out;
}
tmp_dext = *dext;
mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
donor_off,
count - replaced_count);
}
out:
if (orig_path) {
ext4_ext_drop_refs(orig_path);
kfree(orig_path);
}
if (donor_path) {
ext4_ext_drop_refs(donor_path);
kfree(donor_path);
}
mext_double_up_write(orig_inode, donor_inode);
return err;
}
/**
* move_extent_per_page - Move extent data per page
*
* @o_filp: file structure of original file
* @donor_inode: donor inode
* @orig_page_offset: page index on original file
* @data_offset_in_page: block index where data swapping starts
* @block_len_in_page: the number of blocks to be swapped
* @uninit: orig extent is uninitialized or not
*
* Save the data in original inode blocks and replace original inode extents
* with donor inode extents by calling mext_replace_branches().
* Finally, write out the saved data in new original inode blocks. Return 0
* on success, or a negative error value on failure.
*/
static int
move_extent_par_page(struct file *o_filp, struct inode *donor_inode,
pgoff_t orig_page_offset, int data_offset_in_page,
int block_len_in_page, int uninit)
{
struct inode *orig_inode = o_filp->f_dentry->d_inode;
struct address_space *mapping = orig_inode->i_mapping;
struct buffer_head *bh;
struct page *page = NULL;
const struct address_space_operations *a_ops = mapping->a_ops;
handle_t *handle;
ext4_lblk_t orig_blk_offset;
long long offs = orig_page_offset << PAGE_CACHE_SHIFT;
unsigned long blocksize = orig_inode->i_sb->s_blocksize;
unsigned int w_flags = 0;
unsigned int tmp_data_len, data_len;
void *fsdata;
int ret, i, jblocks;
int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
/*
* It needs twice the amount of ordinary journal buffers because
* inode and donor_inode may change each different metadata blocks.
*/
jblocks = ext4_writepage_trans_blocks(orig_inode) * 2;
handle = ext4_journal_start(orig_inode, jblocks);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
return ret;
}
if (segment_eq(get_fs(), KERNEL_DS))
w_flags |= AOP_FLAG_UNINTERRUPTIBLE;
orig_blk_offset = orig_page_offset * blocks_per_page +
data_offset_in_page;
/*
* If orig extent is uninitialized one,
* it's not necessary force the page into memory
* and then force it to be written out again.
* Just swap data blocks between orig and donor.
*/
if (uninit) {
ret = mext_replace_branches(handle, orig_inode,
donor_inode, orig_blk_offset,
block_len_in_page);
/* Clear the inode cache not to refer to the old data */
ext4_ext_invalidate_cache(orig_inode);
ext4_ext_invalidate_cache(donor_inode);
goto out2;
}
offs = (long long)orig_blk_offset << orig_inode->i_blkbits;
/* Calculate data_len */
if ((orig_blk_offset + block_len_in_page - 1) ==
((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) {
/* Replace the last block */
tmp_data_len = orig_inode->i_size & (blocksize - 1);
/*
* If data_len equal zero, it shows data_len is multiples of
* blocksize. So we set appropriate value.
*/
if (tmp_data_len == 0)
tmp_data_len = blocksize;
data_len = tmp_data_len +
((block_len_in_page - 1) << orig_inode->i_blkbits);
} else {
data_len = block_len_in_page << orig_inode->i_blkbits;
}
ret = a_ops->write_begin(o_filp, mapping, offs, data_len, w_flags,
&page, &fsdata);
if (unlikely(ret < 0))
goto out;
if (!PageUptodate(page)) {
mapping->a_ops->readpage(o_filp, page);
lock_page(page);
}
/*
* try_to_release_page() doesn't call releasepage in writeback mode.
* We should care about the order of writing to the same file
* by multiple move extent processes.
* It needs to call wait_on_page_writeback() to wait for the
* writeback of the page.
*/
if (PageWriteback(page))
wait_on_page_writeback(page);
/* Release old bh and drop refs */
try_to_release_page(page, 0);
ret = mext_replace_branches(handle, orig_inode, donor_inode,
orig_blk_offset, block_len_in_page);
if (ret < 0)
goto out;
/* Clear the inode cache not to refer to the old data */
ext4_ext_invalidate_cache(orig_inode);
ext4_ext_invalidate_cache(donor_inode);
if (!page_has_buffers(page))
create_empty_buffers(page, 1 << orig_inode->i_blkbits, 0);
bh = page_buffers(page);
for (i = 0; i < data_offset_in_page; i++)
bh = bh->b_this_page;
for (i = 0; i < block_len_in_page; i++) {
ret = ext4_get_block(orig_inode,
(sector_t)(orig_blk_offset + i), bh, 0);
if (ret < 0)
goto out;
if (bh->b_this_page != NULL)
bh = bh->b_this_page;
}
ret = a_ops->write_end(o_filp, mapping, offs, data_len, data_len,
page, fsdata);
page = NULL;
out:
if (unlikely(page)) {
if (PageLocked(page))
unlock_page(page);
page_cache_release(page);
}
out2:
ext4_journal_stop(handle);
return ret < 0 ? ret : 0;
}
/**
* mext_check_argumants - Check whether move extent can be done
*
* @orig_inode: original inode
* @donor_inode: donor inode
* @orig_start: logical start offset in block for orig
* @donor_start: logical start offset in block for donor
* @len: the number of blocks to be moved
* @moved_len: moved block length
*
* Check the arguments of ext4_move_extents() whether the files can be
* exchanged with each other.
* Return 0 on success, or a negative error value on failure.
*/
static int
mext_check_arguments(struct inode *orig_inode,
struct inode *donor_inode, __u64 orig_start,
__u64 donor_start, __u64 *len, __u64 moved_len)
{
/* Regular file check */
if (!S_ISREG(orig_inode->i_mode) || !S_ISREG(donor_inode->i_mode)) {
ext4_debug("ext4 move extent: The argument files should be "
"regular file [ino:orig %lu, donor %lu]\n",
orig_inode->i_ino, donor_inode->i_ino);
return -EINVAL;
}
/* Ext4 move extent does not support swapfile */
if (IS_SWAPFILE(orig_inode) || IS_SWAPFILE(donor_inode)) {
ext4_debug("ext4 move extent: The argument files should "
"not be swapfile [ino:orig %lu, donor %lu]\n",
orig_inode->i_ino, donor_inode->i_ino);
return -EINVAL;
}
/* Files should be in the same ext4 FS */
if (orig_inode->i_sb != donor_inode->i_sb) {
ext4_debug("ext4 move extent: The argument files "
"should be in same FS [ino:orig %lu, donor %lu]\n",
orig_inode->i_ino, donor_inode->i_ino);
return -EINVAL;
}
/* orig and donor should be different file */
if (orig_inode->i_ino == donor_inode->i_ino) {
ext4_debug("ext4 move extent: The argument files should not "
"be same file [ino:orig %lu, donor %lu]\n",
orig_inode->i_ino, donor_inode->i_ino);
return -EINVAL;
}
/* Ext4 move extent supports only extent based file */
if (!(EXT4_I(orig_inode)->i_flags & EXT4_EXTENTS_FL)) {
ext4_debug("ext4 move extent: orig file is not extents "
"based file [ino:orig %lu]\n", orig_inode->i_ino);
return -EOPNOTSUPP;
} else if (!(EXT4_I(donor_inode)->i_flags & EXT4_EXTENTS_FL)) {
ext4_debug("ext4 move extent: donor file is not extents "
"based file [ino:donor %lu]\n", donor_inode->i_ino);
return -EOPNOTSUPP;
}
if ((!orig_inode->i_size) || (!donor_inode->i_size)) {
ext4_debug("ext4 move extent: File size is 0 byte\n");
return -EINVAL;
}
/* Start offset should be same */
if (orig_start != donor_start) {
ext4_debug("ext4 move extent: orig and donor's start "
"offset are not same [ino:orig %lu, donor %lu]\n",
orig_inode->i_ino, donor_inode->i_ino);
return -EINVAL;
}
if (moved_len) {
ext4_debug("ext4 move extent: moved_len should be 0 "
"[ino:orig %lu, donor %lu]\n", orig_inode->i_ino,
donor_inode->i_ino);
return -EINVAL;
}
if ((orig_start > MAX_DEFRAG_SIZE) ||
(donor_start > MAX_DEFRAG_SIZE) ||
(*len > MAX_DEFRAG_SIZE) ||
(orig_start + *len > MAX_DEFRAG_SIZE)) {
ext4_debug("ext4 move extent: Can't handle over [%lu] blocks "
"[ino:orig %lu, donor %lu]\n", MAX_DEFRAG_SIZE,
orig_inode->i_ino, donor_inode->i_ino);
return -EINVAL;
}
if (orig_inode->i_size > donor_inode->i_size) {
if (orig_start >= donor_inode->i_size) {
ext4_debug("ext4 move extent: orig start offset "
"[%llu] should be less than donor file size "
"[%lld] [ino:orig %lu, donor_inode %lu]\n",
orig_start, donor_inode->i_size,
orig_inode->i_ino, donor_inode->i_ino);
return -EINVAL;
}
if (orig_start + *len > donor_inode->i_size) {
ext4_debug("ext4 move extent: End offset [%llu] should "
"be less than donor file size [%lld]."
"So adjust length from %llu to %lld "
"[ino:orig %lu, donor %lu]\n",
orig_start + *len, donor_inode->i_size,
*len, donor_inode->i_size - orig_start,
orig_inode->i_ino, donor_inode->i_ino);
*len = donor_inode->i_size - orig_start;
}
} else {
if (orig_start >= orig_inode->i_size) {
ext4_debug("ext4 move extent: start offset [%llu] "
"should be less than original file size "
"[%lld] [inode:orig %lu, donor %lu]\n",
orig_start, orig_inode->i_size,
orig_inode->i_ino, donor_inode->i_ino);
return -EINVAL;
}
if (orig_start + *len > orig_inode->i_size) {
ext4_debug("ext4 move extent: Adjust length "
"from %llu to %lld. Because it should be "
"less than original file size "
"[ino:orig %lu, donor %lu]\n",
*len, orig_inode->i_size - orig_start,
orig_inode->i_ino, donor_inode->i_ino);
*len = orig_inode->i_size - orig_start;
}
}
if (!*len) {
ext4_debug("ext4 move extent: len shoudld not be 0 "
"[ino:orig %lu, donor %lu]\n", orig_inode->i_ino,
donor_inode->i_ino);
return -EINVAL;
}
return 0;
}
/**
* mext_inode_double_lock - Lock i_mutex on both @inode1 and @inode2
*
* @inode1: the inode structure
* @inode2: the inode structure
*
* Lock two inodes' i_mutex by i_ino order. This function is moved from
* fs/inode.c.
*/
static void
mext_inode_double_lock(struct inode *inode1, struct inode *inode2)
{
if (inode1 == NULL || inode2 == NULL || inode1 == inode2) {
if (inode1)
mutex_lock(&inode1->i_mutex);
else if (inode2)
mutex_lock(&inode2->i_mutex);
return;
}
if (inode1->i_ino < inode2->i_ino) {
mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
} else {
mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT);
mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD);
}
}
/**
* mext_inode_double_unlock - Release i_mutex on both @inode1 and @inode2
*
* @inode1: the inode that is released first
* @inode2: the inode that is released second
*
* This function is moved from fs/inode.c.
*/
static void
mext_inode_double_unlock(struct inode *inode1, struct inode *inode2)
{
if (inode1)
mutex_unlock(&inode1->i_mutex);
if (inode2 && inode2 != inode1)
mutex_unlock(&inode2->i_mutex);
}
/**
* ext4_move_extents - Exchange the specified range of a file
*
* @o_filp: file structure of the original file
* @d_filp: file structure of the donor file
* @orig_start: start offset in block for orig
* @donor_start: start offset in block for donor
* @len: the number of blocks to be moved
* @moved_len: moved block length
*
* This function returns 0 and moved block length is set in moved_len
* if succeed, otherwise returns error value.
*
* Note: ext4_move_extents() proceeds the following order.
* 1:ext4_move_extents() calculates the last block number of moving extent
* function by the start block number (orig_start) and the number of blocks
* to be moved (len) specified as arguments.
* If the {orig, donor}_start points a hole, the extent's start offset
* pointed by ext_cur (current extent), holecheck_path, orig_path are set
* after hole behind.
* 2:Continue step 3 to step 5, until the holecheck_path points to last_extent
* or the ext_cur exceeds the block_end which is last logical block number.
* 3:To get the length of continues area, call mext_next_extent()
* specified with the ext_cur (initial value is holecheck_path) re-cursive,
* until find un-continuous extent, the start logical block number exceeds
* the block_end or the extent points to the last extent.
* 4:Exchange the original inode data with donor inode data
* from orig_page_offset to seq_end_page.
* The start indexes of data are specified as arguments.
* That of the original inode is orig_page_offset,
* and the donor inode is also orig_page_offset
* (To easily handle blocksize != pagesize case, the offset for the
* donor inode is block unit).
* 5:Update holecheck_path and orig_path to points a next proceeding extent,
* then returns to step 2.
* 6:Release holecheck_path, orig_path and set the len to moved_len
* which shows the number of moved blocks.
* The moved_len is useful for the command to calculate the file offset
* for starting next move extent ioctl.
* 7:Return 0 on success, or a negative error value on failure.
*/
int
ext4_move_extents(struct file *o_filp, struct file *d_filp,
__u64 orig_start, __u64 donor_start, __u64 len,
__u64 *moved_len)
{
struct inode *orig_inode = o_filp->f_dentry->d_inode;
struct inode *donor_inode = d_filp->f_dentry->d_inode;
struct ext4_ext_path *orig_path = NULL, *holecheck_path = NULL;
struct ext4_extent *ext_prev, *ext_cur, *ext_dummy;
ext4_lblk_t block_start = orig_start;
ext4_lblk_t block_end, seq_start, add_blocks, file_end, seq_blocks = 0;
ext4_lblk_t rest_blocks;
pgoff_t orig_page_offset = 0, seq_end_page;
int ret, depth, last_extent = 0;
int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
int data_offset_in_page;
int block_len_in_page;
int uninit;
/* protect orig and donor against a truncate */
mext_inode_double_lock(orig_inode, donor_inode);
mext_double_down_read(orig_inode, donor_inode);
/* Check the filesystem environment whether move_extent can be done */
ret = mext_check_arguments(orig_inode, donor_inode, orig_start,
donor_start, &len, *moved_len);
mext_double_up_read(orig_inode, donor_inode);
if (ret)
goto out2;
file_end = (i_size_read(orig_inode) - 1) >> orig_inode->i_blkbits;
block_end = block_start + len - 1;
if (file_end < block_end)
len -= block_end - file_end;
get_ext_path(orig_path, orig_inode, block_start, ret);
if (orig_path == NULL)
goto out2;
/* Get path structure to check the hole */
get_ext_path(holecheck_path, orig_inode, block_start, ret);
if (holecheck_path == NULL)
goto out;
depth = ext_depth(orig_inode);
ext_cur = holecheck_path[depth].p_ext;
if (ext_cur == NULL) {
ret = -EINVAL;
goto out;
}
/*
* Get proper extent whose ee_block is beyond block_start
* if block_start was within the hole.
*/
if (le32_to_cpu(ext_cur->ee_block) +
ext4_ext_get_actual_len(ext_cur) - 1 < block_start) {
last_extent = mext_next_extent(orig_inode,
holecheck_path, &ext_cur);
if (last_extent < 0) {
ret = last_extent;
goto out;
}
last_extent = mext_next_extent(orig_inode, orig_path,
&ext_dummy);
if (last_extent < 0) {
ret = last_extent;
goto out;
}
}
seq_start = block_start;
/* No blocks within the specified range. */
if (le32_to_cpu(ext_cur->ee_block) > block_end) {
ext4_debug("ext4 move extent: The specified range of file "
"may be the hole\n");
ret = -EINVAL;
goto out;
}
/* Adjust start blocks */
add_blocks = min(le32_to_cpu(ext_cur->ee_block) +
ext4_ext_get_actual_len(ext_cur), block_end + 1) -
max(le32_to_cpu(ext_cur->ee_block), block_start);
while (!last_extent && le32_to_cpu(ext_cur->ee_block) <= block_end) {
seq_blocks += add_blocks;
/* Adjust tail blocks */
if (seq_start + seq_blocks - 1 > block_end)
seq_blocks = block_end - seq_start + 1;
ext_prev = ext_cur;
last_extent = mext_next_extent(orig_inode, holecheck_path,
&ext_cur);
if (last_extent < 0) {
ret = last_extent;
break;
}
add_blocks = ext4_ext_get_actual_len(ext_cur);
/*
* Extend the length of contiguous block (seq_blocks)
* if extents are contiguous.
*/
if (ext4_can_extents_be_merged(orig_inode,
ext_prev, ext_cur) &&
block_end >= le32_to_cpu(ext_cur->ee_block) &&
!last_extent)
continue;
/* Is original extent is uninitialized */
uninit = ext4_ext_is_uninitialized(ext_prev);
data_offset_in_page = seq_start % blocks_per_page;
/*
* Calculate data blocks count that should be swapped
* at the first page.
*/
if (data_offset_in_page + seq_blocks > blocks_per_page) {
/* Swapped blocks are across pages */
block_len_in_page =
blocks_per_page - data_offset_in_page;
} else {
/* Swapped blocks are in a page */
block_len_in_page = seq_blocks;
}
orig_page_offset = seq_start >>
(PAGE_CACHE_SHIFT - orig_inode->i_blkbits);
seq_end_page = (seq_start + seq_blocks - 1) >>
(PAGE_CACHE_SHIFT - orig_inode->i_blkbits);
seq_start = le32_to_cpu(ext_cur->ee_block);
rest_blocks = seq_blocks;
/* Discard preallocations of two inodes */
down_write(&EXT4_I(orig_inode)->i_data_sem);
ext4_discard_preallocations(orig_inode);
up_write(&EXT4_I(orig_inode)->i_data_sem);
down_write(&EXT4_I(donor_inode)->i_data_sem);
ext4_discard_preallocations(donor_inode);
up_write(&EXT4_I(donor_inode)->i_data_sem);
while (orig_page_offset <= seq_end_page) {
/* Swap original branches with new branches */
ret = move_extent_par_page(o_filp, donor_inode,
orig_page_offset,
data_offset_in_page,
block_len_in_page, uninit);
if (ret < 0)
goto out;
orig_page_offset++;
/* Count how many blocks we have exchanged */
*moved_len += block_len_in_page;
BUG_ON(*moved_len > len);
data_offset_in_page = 0;
rest_blocks -= block_len_in_page;
if (rest_blocks > blocks_per_page)
block_len_in_page = blocks_per_page;
else
block_len_in_page = rest_blocks;
}
/* Decrease buffer counter */
if (holecheck_path)
ext4_ext_drop_refs(holecheck_path);
get_ext_path(holecheck_path, orig_inode,
seq_start, ret);
if (holecheck_path == NULL)
break;
depth = holecheck_path->p_depth;
/* Decrease buffer counter */
if (orig_path)
ext4_ext_drop_refs(orig_path);
get_ext_path(orig_path, orig_inode, seq_start, ret);
if (orig_path == NULL)
break;
ext_cur = holecheck_path[depth].p_ext;
add_blocks = ext4_ext_get_actual_len(ext_cur);
seq_blocks = 0;
}
out:
if (orig_path) {
ext4_ext_drop_refs(orig_path);
kfree(orig_path);
}
if (holecheck_path) {
ext4_ext_drop_refs(holecheck_path);
kfree(holecheck_path);
}
out2:
mext_inode_double_unlock(orig_inode, donor_inode);
if (ret)
return ret;
/* All of the specified blocks must be exchanged in succeed */
BUG_ON(*moved_len != len);
return 0;
}
......@@ -1782,7 +1782,7 @@ static int ext4_create(struct inode *dir, struct dentry *dentry, int mode,
if (IS_DIRSYNC(dir))
ext4_handle_sync(handle);
inode = ext4_new_inode (handle, dir, mode);
inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0);
err = PTR_ERR(inode);
if (!IS_ERR(inode)) {
inode->i_op = &ext4_file_inode_operations;
......@@ -1816,7 +1816,7 @@ static int ext4_mknod(struct inode *dir, struct dentry *dentry,
if (IS_DIRSYNC(dir))
ext4_handle_sync(handle);
inode = ext4_new_inode(handle, dir, mode);
inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0);
err = PTR_ERR(inode);
if (!IS_ERR(inode)) {
init_special_inode(inode, inode->i_mode, rdev);
......@@ -1853,7 +1853,8 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode)
if (IS_DIRSYNC(dir))
ext4_handle_sync(handle);
inode = ext4_new_inode(handle, dir, S_IFDIR | mode);
inode = ext4_new_inode(handle, dir, S_IFDIR | mode,
&dentry->d_name, 0);
err = PTR_ERR(inode);
if (IS_ERR(inode))
goto out_stop;
......@@ -2264,7 +2265,8 @@ static int ext4_symlink(struct inode *dir,
if (IS_DIRSYNC(dir))
ext4_handle_sync(handle);
inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO);
inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO,
&dentry->d_name, 0);
err = PTR_ERR(inode);
if (IS_ERR(inode))
goto out_stop;
......
......@@ -37,7 +37,6 @@
#include <linux/seq_file.h>
#include <linux/proc_fs.h>
#include <linux/ctype.h>
#include <linux/marker.h>
#include <linux/log2.h>
#include <linux/crc16.h>
#include <asm/uaccess.h>
......@@ -47,6 +46,9 @@
#include "xattr.h"
#include "acl.h"
#define CREATE_TRACE_POINTS
#include <trace/events/ext4.h>
static int default_mb_history_length = 1000;
module_param_named(default_mb_history_length, default_mb_history_length,
......@@ -301,7 +303,7 @@ static void ext4_handle_error(struct super_block *sb)
if (!test_opt(sb, ERRORS_CONT)) {
journal_t *journal = EXT4_SB(sb)->s_journal;
EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT;
EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
if (journal)
jbd2_journal_abort(journal, -EIO);
}
......@@ -414,7 +416,7 @@ void ext4_abort(struct super_block *sb, const char *function,
ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
sb->s_flags |= MS_RDONLY;
EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT;
EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
if (EXT4_SB(sb)->s_journal)
jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
}
......@@ -1474,7 +1476,7 @@ static int parse_options(char *options, struct super_block *sb,
break;
#endif
case Opt_abort:
set_opt(sbi->s_mount_opt, ABORT);
sbi->s_mount_flags |= EXT4_MF_FS_ABORTED;
break;
case Opt_nobarrier:
clear_opt(sbi->s_mount_opt, BARRIER);
......@@ -1653,7 +1655,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
ext4_commit_super(sb, 1);
if (test_opt(sb, DEBUG))
printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
"bpg=%lu, ipg=%lu, mo=%04lx]\n",
"bpg=%lu, ipg=%lu, mo=%04x]\n",
sb->s_blocksize,
sbi->s_groups_count,
EXT4_BLOCKS_PER_GROUP(sb),
......@@ -2204,6 +2206,7 @@ EXT4_RO_ATTR(session_write_kbytes);
EXT4_RO_ATTR(lifetime_write_kbytes);
EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show,
inode_readahead_blks_store, s_inode_readahead_blks);
EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal);
EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats);
EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan);
EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan);
......@@ -2216,6 +2219,7 @@ static struct attribute *ext4_attrs[] = {
ATTR_LIST(session_write_kbytes),
ATTR_LIST(lifetime_write_kbytes),
ATTR_LIST(inode_readahead_blks),
ATTR_LIST(inode_goal),
ATTR_LIST(mb_stats),
ATTR_LIST(mb_max_to_scan),
ATTR_LIST(mb_min_to_scan),
......@@ -3346,7 +3350,7 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
int ret = 0;
tid_t target;
trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait);
trace_ext4_sync_fs(sb, wait);
if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) {
if (wait)
jbd2_log_wait_commit(EXT4_SB(sb)->s_journal, target);
......@@ -3450,7 +3454,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
goto restore_opts;
}
if (sbi->s_mount_opt & EXT4_MOUNT_ABORT)
if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
ext4_abort(sb, __func__, "Abort forced by user");
sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
......@@ -3465,7 +3469,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) ||
n_blocks_count > ext4_blocks_count(es)) {
if (sbi->s_mount_opt & EXT4_MOUNT_ABORT) {
if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) {
err = -EROFS;
goto restore_opts;
}
......
......@@ -20,9 +20,9 @@
#include <linux/time.h>
#include <linux/fs.h>
#include <linux/jbd2.h>
#include <linux/marker.h>
#include <linux/errno.h>
#include <linux/slab.h>
#include <trace/events/jbd2.h>
/*
* Unlink a buffer from a transaction checkpoint list.
......@@ -358,8 +358,7 @@ int jbd2_log_do_checkpoint(journal_t *journal)
* journal straight away.
*/
result = jbd2_cleanup_journal_tail(journal);
trace_mark(jbd2_checkpoint, "dev %s need_checkpoint %d",
journal->j_devname, result);
trace_jbd2_checkpoint(journal, result);
jbd_debug(1, "cleanup_journal_tail returned %d\n", result);
if (result <= 0)
return result;
......
......@@ -16,7 +16,6 @@
#include <linux/time.h>
#include <linux/fs.h>
#include <linux/jbd2.h>
#include <linux/marker.h>
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/mm.h>
......@@ -26,6 +25,7 @@
#include <linux/writeback.h>
#include <linux/backing-dev.h>
#include <linux/bio.h>
#include <trace/events/jbd2.h>
/*
* Default IO end handler for temporary BJ_IO buffer_heads.
......@@ -253,6 +253,7 @@ static int journal_submit_data_buffers(journal_t *journal,
* block allocation with delalloc. We need to write
* only allocated blocks here.
*/
trace_jbd2_submit_inode_data(jinode->i_vfs_inode);
err = journal_submit_inode_data_buffers(mapping);
if (!ret)
ret = err;
......@@ -394,8 +395,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
commit_transaction = journal->j_running_transaction;
J_ASSERT(commit_transaction->t_state == T_RUNNING);
trace_mark(jbd2_start_commit, "dev %s transaction %d",
journal->j_devname, commit_transaction->t_tid);
trace_jbd2_start_commit(journal, commit_transaction);
jbd_debug(1, "JBD: starting commit of transaction %d\n",
commit_transaction->t_tid);
......@@ -409,6 +409,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
*/
if (commit_transaction->t_synchronous_commit)
write_op = WRITE_SYNC_PLUG;
trace_jbd2_commit_locking(journal, commit_transaction);
stats.u.run.rs_wait = commit_transaction->t_max_wait;
stats.u.run.rs_locked = jiffies;
stats.u.run.rs_running = jbd2_time_diff(commit_transaction->t_start,
......@@ -484,6 +485,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
*/
jbd2_journal_switch_revoke_table(journal);
trace_jbd2_commit_flushing(journal, commit_transaction);
stats.u.run.rs_flushing = jiffies;
stats.u.run.rs_locked = jbd2_time_diff(stats.u.run.rs_locked,
stats.u.run.rs_flushing);
......@@ -520,6 +522,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
commit_transaction->t_state = T_COMMIT;
spin_unlock(&journal->j_state_lock);
trace_jbd2_commit_logging(journal, commit_transaction);
stats.u.run.rs_logging = jiffies;
stats.u.run.rs_flushing = jbd2_time_diff(stats.u.run.rs_flushing,
stats.u.run.rs_logging);
......@@ -1054,9 +1057,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
if (journal->j_commit_callback)
journal->j_commit_callback(journal, commit_transaction);
trace_mark(jbd2_end_commit, "dev %s transaction %d head %d",
journal->j_devname, commit_transaction->t_tid,
journal->j_tail_sequence);
trace_jbd2_end_commit(journal, commit_transaction);
jbd_debug(1, "JBD: commit %d complete, head %d\n",
journal->j_commit_sequence, journal->j_tail_sequence);
if (to_free)
......
......@@ -38,6 +38,10 @@
#include <linux/debugfs.h>
#include <linux/seq_file.h>
#include <linux/math64.h>
#include <linux/hash.h>
#define CREATE_TRACE_POINTS
#include <trace/events/jbd2.h>
#include <asm/uaccess.h>
#include <asm/page.h>
......@@ -2377,6 +2381,71 @@ static void __exit journal_exit(void)
jbd2_journal_destroy_caches();
}
/*
* jbd2_dev_to_name is a utility function used by the jbd2 and ext4
* tracing infrastructure to map a dev_t to a device name.
*
* The caller should use rcu_read_lock() in order to make sure the
* device name stays valid until its done with it. We use
* rcu_read_lock() as well to make sure we're safe in case the caller
* gets sloppy, and because rcu_read_lock() is cheap and can be safely
* nested.
*/
struct devname_cache {
struct rcu_head rcu;
dev_t device;
char devname[BDEVNAME_SIZE];
};
#define CACHE_SIZE_BITS 6
static struct devname_cache *devcache[1 << CACHE_SIZE_BITS];
static DEFINE_SPINLOCK(devname_cache_lock);
static void free_devcache(struct rcu_head *rcu)
{
kfree(rcu);
}
const char *jbd2_dev_to_name(dev_t device)
{
int i = hash_32(device, CACHE_SIZE_BITS);
char *ret;
struct block_device *bd;
rcu_read_lock();
if (devcache[i] && devcache[i]->device == device) {
ret = devcache[i]->devname;
rcu_read_unlock();
return ret;
}
rcu_read_unlock();
spin_lock(&devname_cache_lock);
if (devcache[i]) {
if (devcache[i]->device == device) {
ret = devcache[i]->devname;
spin_unlock(&devname_cache_lock);
return ret;
}
call_rcu(&devcache[i]->rcu, free_devcache);
}
devcache[i] = kmalloc(sizeof(struct devname_cache), GFP_KERNEL);
if (!devcache[i]) {
spin_unlock(&devname_cache_lock);
return "NODEV-ALLOCFAILURE"; /* Something non-NULL */
}
devcache[i]->device = device;
bd = bdget(device);
if (bd) {
bdevname(bd, devcache[i]->devname);
bdput(bd);
} else
__bdevname(device, devcache[i]->devname);
ret = devcache[i]->devname;
spin_unlock(&devname_cache_lock);
return ret;
}
EXPORT_SYMBOL(jbd2_dev_to_name);
MODULE_LICENSE("GPL");
module_init(journal_init);
module_exit(journal_exit);
......
......@@ -1547,36 +1547,6 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
return;
}
/*
* jbd2_journal_try_to_free_buffers() could race with
* jbd2_journal_commit_transaction(). The later might still hold the
* reference count to the buffers when inspecting them on
* t_syncdata_list or t_locked_list.
*
* jbd2_journal_try_to_free_buffers() will call this function to
* wait for the current transaction to finish syncing data buffers, before
* try to free that buffer.
*
* Called with journal->j_state_lock hold.
*/
static void jbd2_journal_wait_for_transaction_sync_data(journal_t *journal)
{
transaction_t *transaction;
tid_t tid;
spin_lock(&journal->j_state_lock);
transaction = journal->j_committing_transaction;
if (!transaction) {
spin_unlock(&journal->j_state_lock);
return;
}
tid = transaction->t_tid;
spin_unlock(&journal->j_state_lock);
jbd2_log_wait_commit(journal, tid);
}
/**
* int jbd2_journal_try_to_free_buffers() - try to free page buffers.
* @journal: journal for operation
......@@ -1649,25 +1619,6 @@ int jbd2_journal_try_to_free_buffers(journal_t *journal,
ret = try_to_free_buffers(page);
/*
* There are a number of places where jbd2_journal_try_to_free_buffers()
* could race with jbd2_journal_commit_transaction(), the later still
* holds the reference to the buffers to free while processing them.
* try_to_free_buffers() failed to free those buffers. Some of the
* caller of releasepage() request page buffers to be dropped, otherwise
* treat the fail-to-free as errors (such as generic_file_direct_IO())
*
* So, if the caller of try_to_release_page() wants the synchronous
* behaviour(i.e make sure buffers are dropped upon return),
* let's wait for the current transaction to finish flush of
* dirty data buffers, then try to free those buffers again,
* with the journal locked.
*/
if (ret == 0 && (gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS)) {
jbd2_journal_wait_for_transaction_sync_data(journal);
ret = try_to_free_buffers(page);
}
busy:
return ret;
}
......
......@@ -1315,6 +1315,12 @@ extern int jbd_blocks_per_page(struct inode *inode);
#define BUFFER_TRACE2(bh, bh2, info) do {} while (0)
#define JBUFFER_TRACE(jh, info) do {} while (0)
/*
* jbd2_dev_to_name is a utility function used by the jbd2 and ext4
* tracing infrastructure to map a dev_t to a device name.
*/
extern const char *jbd2_dev_to_name(dev_t device);
#endif /* __KERNEL__ */
#endif /* _LINUX_JBD2_H */
#if !defined(_TRACE_EXT4_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_EXT4_H
#undef TRACE_SYSTEM
#define TRACE_SYSTEM ext4
#include <linux/writeback.h>
#include "../../../fs/ext4/ext4.h"
#include "../../../fs/ext4/mballoc.h"
#include <linux/tracepoint.h>
TRACE_EVENT(ext4_free_inode,
TP_PROTO(struct inode *inode),
TP_ARGS(inode),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( umode_t, mode )
__field( uid_t, uid )
__field( gid_t, gid )
__field( blkcnt_t, blocks )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->mode = inode->i_mode;
__entry->uid = inode->i_uid;
__entry->gid = inode->i_gid;
__entry->blocks = inode->i_blocks;
),
TP_printk("dev %s ino %lu mode %d uid %u gid %u blocks %llu",
jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->mode,
__entry->uid, __entry->gid, __entry->blocks)
);
TRACE_EVENT(ext4_request_inode,
TP_PROTO(struct inode *dir, int mode),
TP_ARGS(dir, mode),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, dir )
__field( umode_t, mode )
),
TP_fast_assign(
__entry->dev = dir->i_sb->s_dev;
__entry->dir = dir->i_ino;
__entry->mode = mode;
),
TP_printk("dev %s dir %lu mode %d",
jbd2_dev_to_name(__entry->dev), __entry->dir, __entry->mode)
);
TRACE_EVENT(ext4_allocate_inode,
TP_PROTO(struct inode *inode, struct inode *dir, int mode),
TP_ARGS(inode, dir, mode),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( ino_t, dir )
__field( umode_t, mode )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->dir = dir->i_ino;
__entry->mode = mode;
),
TP_printk("dev %s ino %lu dir %lu mode %d",
jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->dir, __entry->mode)
);
TRACE_EVENT(ext4_write_begin,
TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
unsigned int flags),
TP_ARGS(inode, pos, len, flags),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( loff_t, pos )
__field( unsigned int, len )
__field( unsigned int, flags )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->pos = pos;
__entry->len = len;
__entry->flags = flags;
),
TP_printk("dev %s ino %lu pos %llu len %u flags %u",
jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len,
__entry->flags)
);
TRACE_EVENT(ext4_ordered_write_end,
TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
unsigned int copied),
TP_ARGS(inode, pos, len, copied),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( loff_t, pos )
__field( unsigned int, len )
__field( unsigned int, copied )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->pos = pos;
__entry->len = len;
__entry->copied = copied;
),
TP_printk("dev %s ino %lu pos %llu len %u copied %u",
jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len,
__entry->copied)
);
TRACE_EVENT(ext4_writeback_write_end,
TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
unsigned int copied),
TP_ARGS(inode, pos, len, copied),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( loff_t, pos )
__field( unsigned int, len )
__field( unsigned int, copied )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->pos = pos;
__entry->len = len;
__entry->copied = copied;
),
TP_printk("dev %s ino %lu pos %llu len %u copied %u",
jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len,
__entry->copied)
);
TRACE_EVENT(ext4_journalled_write_end,
TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
unsigned int copied),
TP_ARGS(inode, pos, len, copied),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( loff_t, pos )
__field( unsigned int, len )
__field( unsigned int, copied )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->pos = pos;
__entry->len = len;
__entry->copied = copied;
),
TP_printk("dev %s ino %lu pos %llu len %u copied %u",
jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len,
__entry->copied)
);
TRACE_EVENT(ext4_da_writepage,
TP_PROTO(struct inode *inode, struct page *page),
TP_ARGS(inode, page),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( pgoff_t, index )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->index = page->index;
),
TP_printk("dev %s ino %lu page_index %lu",
jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->index)
);
TRACE_EVENT(ext4_da_writepages,
TP_PROTO(struct inode *inode, struct writeback_control *wbc),
TP_ARGS(inode, wbc),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( long, nr_to_write )
__field( long, pages_skipped )
__field( loff_t, range_start )
__field( loff_t, range_end )
__field( char, nonblocking )
__field( char, for_kupdate )
__field( char, for_reclaim )
__field( char, for_writepages )
__field( char, range_cyclic )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->nr_to_write = wbc->nr_to_write;
__entry->pages_skipped = wbc->pages_skipped;
__entry->range_start = wbc->range_start;
__entry->range_end = wbc->range_end;
__entry->nonblocking = wbc->nonblocking;
__entry->for_kupdate = wbc->for_kupdate;
__entry->for_reclaim = wbc->for_reclaim;
__entry->for_writepages = wbc->for_writepages;
__entry->range_cyclic = wbc->range_cyclic;
),
TP_printk("dev %s ino %lu nr_t_write %ld pages_skipped %ld range_start %llu range_end %llu nonblocking %d for_kupdate %d for_reclaim %d for_writepages %d range_cyclic %d",
jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->nr_to_write,
__entry->pages_skipped, __entry->range_start,
__entry->range_end, __entry->nonblocking,
__entry->for_kupdate, __entry->for_reclaim,
__entry->for_writepages, __entry->range_cyclic)
);
TRACE_EVENT(ext4_da_writepages_result,
TP_PROTO(struct inode *inode, struct writeback_control *wbc,
int ret, int pages_written),
TP_ARGS(inode, wbc, ret, pages_written),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( int, ret )
__field( int, pages_written )
__field( long, pages_skipped )
__field( char, encountered_congestion )
__field( char, more_io )
__field( char, no_nrwrite_index_update )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->ret = ret;
__entry->pages_written = pages_written;
__entry->pages_skipped = wbc->pages_skipped;
__entry->encountered_congestion = wbc->encountered_congestion;
__entry->more_io = wbc->more_io;
__entry->no_nrwrite_index_update = wbc->no_nrwrite_index_update;
),
TP_printk("dev %s ino %lu ret %d pages_written %d pages_skipped %ld congestion %d more_io %d no_nrwrite_index_update %d",
jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->ret,
__entry->pages_written, __entry->pages_skipped,
__entry->encountered_congestion, __entry->more_io,
__entry->no_nrwrite_index_update)
);
TRACE_EVENT(ext4_da_write_begin,
TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
unsigned int flags),
TP_ARGS(inode, pos, len, flags),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( loff_t, pos )
__field( unsigned int, len )
__field( unsigned int, flags )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->pos = pos;
__entry->len = len;
__entry->flags = flags;
),
TP_printk("dev %s ino %lu pos %llu len %u flags %u",
jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len,
__entry->flags)
);
TRACE_EVENT(ext4_da_write_end,
TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
unsigned int copied),
TP_ARGS(inode, pos, len, copied),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( loff_t, pos )
__field( unsigned int, len )
__field( unsigned int, copied )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->pos = pos;
__entry->len = len;
__entry->copied = copied;
),
TP_printk("dev %s ino %lu pos %llu len %u copied %u",
jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len,
__entry->copied)
);
TRACE_EVENT(ext4_normal_writepage,
TP_PROTO(struct inode *inode, struct page *page),
TP_ARGS(inode, page),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( pgoff_t, index )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->index = page->index;
),
TP_printk("dev %s ino %lu page_index %lu",
jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->index)
);
TRACE_EVENT(ext4_journalled_writepage,
TP_PROTO(struct inode *inode, struct page *page),
TP_ARGS(inode, page),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( pgoff_t, index )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->index = page->index;
),
TP_printk("dev %s ino %lu page_index %lu",
jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->index)
);
TRACE_EVENT(ext4_discard_blocks,
TP_PROTO(struct super_block *sb, unsigned long long blk,
unsigned long long count),
TP_ARGS(sb, blk, count),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( __u64, blk )
__field( __u64, count )
),
TP_fast_assign(
__entry->dev = sb->s_dev;
__entry->blk = blk;
__entry->count = count;
),
TP_printk("dev %s blk %llu count %llu",
jbd2_dev_to_name(__entry->dev), __entry->blk, __entry->count)
);
TRACE_EVENT(ext4_mb_new_inode_pa,
TP_PROTO(struct ext4_allocation_context *ac,
struct ext4_prealloc_space *pa),
TP_ARGS(ac, pa),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( __u64, pa_pstart )
__field( __u32, pa_len )
__field( __u64, pa_lstart )
),
TP_fast_assign(
__entry->dev = ac->ac_sb->s_dev;
__entry->ino = ac->ac_inode->i_ino;
__entry->pa_pstart = pa->pa_pstart;
__entry->pa_len = pa->pa_len;
__entry->pa_lstart = pa->pa_lstart;
),
TP_printk("dev %s ino %lu pstart %llu len %u lstart %llu",
jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pa_pstart,
__entry->pa_len, __entry->pa_lstart)
);
TRACE_EVENT(ext4_mb_new_group_pa,
TP_PROTO(struct ext4_allocation_context *ac,
struct ext4_prealloc_space *pa),
TP_ARGS(ac, pa),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( __u64, pa_pstart )
__field( __u32, pa_len )
__field( __u64, pa_lstart )
),
TP_fast_assign(
__entry->dev = ac->ac_sb->s_dev;
__entry->ino = ac->ac_inode->i_ino;
__entry->pa_pstart = pa->pa_pstart;
__entry->pa_len = pa->pa_len;
__entry->pa_lstart = pa->pa_lstart;
),
TP_printk("dev %s ino %lu pstart %llu len %u lstart %llu",
jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pa_pstart,
__entry->pa_len, __entry->pa_lstart)
);
TRACE_EVENT(ext4_mb_release_inode_pa,
TP_PROTO(struct ext4_allocation_context *ac,
struct ext4_prealloc_space *pa,
unsigned long long block, unsigned int count),
TP_ARGS(ac, pa, block, count),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( __u64, block )
__field( __u32, count )
),
TP_fast_assign(
__entry->dev = ac->ac_sb->s_dev;
__entry->ino = ac->ac_inode->i_ino;
__entry->block = block;
__entry->count = count;
),
TP_printk("dev %s ino %lu block %llu count %u",
jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->block,
__entry->count)
);
TRACE_EVENT(ext4_mb_release_group_pa,
TP_PROTO(struct ext4_allocation_context *ac,
struct ext4_prealloc_space *pa),
TP_ARGS(ac, pa),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( __u64, pa_pstart )
__field( __u32, pa_len )
),
TP_fast_assign(
__entry->dev = ac->ac_sb->s_dev;
__entry->ino = ac->ac_inode->i_ino;
__entry->pa_pstart = pa->pa_pstart;
__entry->pa_len = pa->pa_len;
),
TP_printk("dev %s pstart %llu len %u",
jbd2_dev_to_name(__entry->dev), __entry->pa_pstart, __entry->pa_len)
);
TRACE_EVENT(ext4_discard_preallocations,
TP_PROTO(struct inode *inode),
TP_ARGS(inode),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
),
TP_printk("dev %s ino %lu",
jbd2_dev_to_name(__entry->dev), __entry->ino)
);
TRACE_EVENT(ext4_mb_discard_preallocations,
TP_PROTO(struct super_block *sb, int needed),
TP_ARGS(sb, needed),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( int, needed )
),
TP_fast_assign(
__entry->dev = sb->s_dev;
__entry->needed = needed;
),
TP_printk("dev %s needed %d",
jbd2_dev_to_name(__entry->dev), __entry->needed)
);
TRACE_EVENT(ext4_request_blocks,
TP_PROTO(struct ext4_allocation_request *ar),
TP_ARGS(ar),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( unsigned int, flags )
__field( unsigned int, len )
__field( __u64, logical )
__field( __u64, goal )
__field( __u64, lleft )
__field( __u64, lright )
__field( __u64, pleft )
__field( __u64, pright )
),
TP_fast_assign(
__entry->dev = ar->inode->i_sb->s_dev;
__entry->ino = ar->inode->i_ino;
__entry->flags = ar->flags;
__entry->len = ar->len;
__entry->logical = ar->logical;
__entry->goal = ar->goal;
__entry->lleft = ar->lleft;
__entry->lright = ar->lright;
__entry->pleft = ar->pleft;
__entry->pright = ar->pright;
),
TP_printk("dev %s ino %lu flags %u len %u lblk %llu goal %llu lleft %llu lright %llu pleft %llu pright %llu ",
jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->flags,
__entry->len,
(unsigned long long) __entry->logical,
(unsigned long long) __entry->goal,
(unsigned long long) __entry->lleft,
(unsigned long long) __entry->lright,
(unsigned long long) __entry->pleft,
(unsigned long long) __entry->pright)
);
TRACE_EVENT(ext4_allocate_blocks,
TP_PROTO(struct ext4_allocation_request *ar, unsigned long long block),
TP_ARGS(ar, block),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( __u64, block )
__field( unsigned int, flags )
__field( unsigned int, len )
__field( __u64, logical )
__field( __u64, goal )
__field( __u64, lleft )
__field( __u64, lright )
__field( __u64, pleft )
__field( __u64, pright )
),
TP_fast_assign(
__entry->dev = ar->inode->i_sb->s_dev;
__entry->ino = ar->inode->i_ino;
__entry->block = block;
__entry->flags = ar->flags;
__entry->len = ar->len;
__entry->logical = ar->logical;
__entry->goal = ar->goal;
__entry->lleft = ar->lleft;
__entry->lright = ar->lright;
__entry->pleft = ar->pleft;
__entry->pright = ar->pright;
),
TP_printk("dev %s ino %lu flags %u len %u block %llu lblk %llu goal %llu lleft %llu lright %llu pleft %llu pright %llu ",
jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->flags,
__entry->len, __entry->block,
(unsigned long long) __entry->logical,
(unsigned long long) __entry->goal,
(unsigned long long) __entry->lleft,
(unsigned long long) __entry->lright,
(unsigned long long) __entry->pleft,
(unsigned long long) __entry->pright)
);
TRACE_EVENT(ext4_free_blocks,
TP_PROTO(struct inode *inode, __u64 block, unsigned long count,
int metadata),
TP_ARGS(inode, block, count, metadata),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( __u64, block )
__field( unsigned long, count )
__field( int, metadata )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->block = block;
__entry->count = count;
__entry->metadata = metadata;
),
TP_printk("dev %s ino %lu block %llu count %lu metadata %d",
jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->block,
__entry->count, __entry->metadata)
);
TRACE_EVENT(ext4_sync_file,
TP_PROTO(struct file *file, struct dentry *dentry, int datasync),
TP_ARGS(file, dentry, datasync),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( ino_t, parent )
__field( int, datasync )
),
TP_fast_assign(
__entry->dev = dentry->d_inode->i_sb->s_dev;
__entry->ino = dentry->d_inode->i_ino;
__entry->datasync = datasync;
__entry->parent = dentry->d_parent->d_inode->i_ino;
),
TP_printk("dev %s ino %ld parent %ld datasync %d ",
jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->parent,
__entry->datasync)
);
TRACE_EVENT(ext4_sync_fs,
TP_PROTO(struct super_block *sb, int wait),
TP_ARGS(sb, wait),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( int, wait )
),
TP_fast_assign(
__entry->dev = sb->s_dev;
__entry->wait = wait;
),
TP_printk("dev %s wait %d", jbd2_dev_to_name(__entry->dev),
__entry->wait)
);
#endif /* _TRACE_EXT4_H */
/* This part must be outside protection */
#include <trace/define_trace.h>
#if !defined(_TRACE_JBD2_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_JBD2_H
#include <linux/jbd2.h>
#include <linux/tracepoint.h>
#undef TRACE_SYSTEM
#define TRACE_SYSTEM jbd2
TRACE_EVENT(jbd2_checkpoint,
TP_PROTO(journal_t *journal, int result),
TP_ARGS(journal, result),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( int, result )
),
TP_fast_assign(
__entry->dev = journal->j_fs_dev->bd_dev;
__entry->result = result;
),
TP_printk("dev %s result %d",
jbd2_dev_to_name(__entry->dev), __entry->result)
);
TRACE_EVENT(jbd2_start_commit,
TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
TP_ARGS(journal, commit_transaction),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( char, sync_commit )
__field( int, transaction )
),
TP_fast_assign(
__entry->dev = journal->j_fs_dev->bd_dev;
__entry->sync_commit = commit_transaction->t_synchronous_commit;
__entry->transaction = commit_transaction->t_tid;
),
TP_printk("dev %s transaction %d sync %d",
jbd2_dev_to_name(__entry->dev), __entry->transaction,
__entry->sync_commit)
);
TRACE_EVENT(jbd2_commit_locking,
TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
TP_ARGS(journal, commit_transaction),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( char, sync_commit )
__field( int, transaction )
),
TP_fast_assign(
__entry->dev = journal->j_fs_dev->bd_dev;
__entry->sync_commit = commit_transaction->t_synchronous_commit;
__entry->transaction = commit_transaction->t_tid;
),
TP_printk("dev %s transaction %d sync %d",
jbd2_dev_to_name(__entry->dev), __entry->transaction,
__entry->sync_commit)
);
TRACE_EVENT(jbd2_commit_flushing,
TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
TP_ARGS(journal, commit_transaction),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( char, sync_commit )
__field( int, transaction )
),
TP_fast_assign(
__entry->dev = journal->j_fs_dev->bd_dev;
__entry->sync_commit = commit_transaction->t_synchronous_commit;
__entry->transaction = commit_transaction->t_tid;
),
TP_printk("dev %s transaction %d sync %d",
jbd2_dev_to_name(__entry->dev), __entry->transaction,
__entry->sync_commit)
);
TRACE_EVENT(jbd2_commit_logging,
TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
TP_ARGS(journal, commit_transaction),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( char, sync_commit )
__field( int, transaction )
),
TP_fast_assign(
__entry->dev = journal->j_fs_dev->bd_dev;
__entry->sync_commit = commit_transaction->t_synchronous_commit;
__entry->transaction = commit_transaction->t_tid;
),
TP_printk("dev %s transaction %d sync %d",
jbd2_dev_to_name(__entry->dev), __entry->transaction,
__entry->sync_commit)
);
TRACE_EVENT(jbd2_end_commit,
TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
TP_ARGS(journal, commit_transaction),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( char, sync_commit )
__field( int, transaction )
__field( int, head )
),
TP_fast_assign(
__entry->dev = journal->j_fs_dev->bd_dev;
__entry->sync_commit = commit_transaction->t_synchronous_commit;
__entry->transaction = commit_transaction->t_tid;
__entry->head = journal->j_tail_sequence;
),
TP_printk("dev %s transaction %d sync %d head %d",
jbd2_dev_to_name(__entry->dev), __entry->transaction,
__entry->sync_commit, __entry->head)
);
TRACE_EVENT(jbd2_submit_inode_data,
TP_PROTO(struct inode *inode),
TP_ARGS(inode),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
),
TP_printk("dev %s ino %lu",
jbd2_dev_to_name(__entry->dev), __entry->ino)
);
#endif /* _TRACE_JBD2_H */
/* This part must be outside protection */
#include <trace/define_trace.h>
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment