Commit 4dfa66bd authored by Tristan Ye's avatar Tristan Ye

Ocfs2/move_extents: Let defrag handle partial extent moving.

We're going to support partial extent moving, which may split entire extent
movement into pieces to compromise the insuffice allocations, it eases the
'ENSPC' pain and makes the whole moving much less likely to fail, the downside
is it may make the fs even more fragmented before moving, just let the userspace
make a trade-off here.
Signed-off-by: default avatarTristan Ye <tristan.ye@oracle.com>
parent 53069d4e
...@@ -44,6 +44,7 @@ struct ocfs2_move_extents_context { ...@@ -44,6 +44,7 @@ struct ocfs2_move_extents_context {
struct inode *inode; struct inode *inode;
struct file *file; struct file *file;
int auto_defrag; int auto_defrag;
int partial;
int credits; int credits;
u32 new_phys_cpos; u32 new_phys_cpos;
u32 clusters_moved; u32 clusters_moved;
...@@ -221,9 +222,9 @@ static int ocfs2_lock_allocators_move_extents(struct inode *inode, ...@@ -221,9 +222,9 @@ static int ocfs2_lock_allocators_move_extents(struct inode *inode,
* crash happens anywhere. * crash happens anywhere.
*/ */
static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context, static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context,
u32 cpos, u32 phys_cpos, u32 len, int ext_flags) u32 cpos, u32 phys_cpos, u32 *len, int ext_flags)
{ {
int ret, credits = 0, extra_blocks = 0; int ret, credits = 0, extra_blocks = 0, partial = context->partial;
handle_t *handle; handle_t *handle;
struct inode *inode = context->inode; struct inode *inode = context->inode;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
...@@ -232,7 +233,7 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context, ...@@ -232,7 +233,7 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context,
u32 new_phys_cpos, new_len; u32 new_phys_cpos, new_len;
u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
if ((ext_flags & OCFS2_EXT_REFCOUNTED) && len) { if ((ext_flags & OCFS2_EXT_REFCOUNTED) && *len) {
BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & BUG_ON(!(OCFS2_I(inode)->ip_dyn_features &
OCFS2_HAS_REFCOUNT_FL)); OCFS2_HAS_REFCOUNT_FL));
...@@ -249,7 +250,7 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context, ...@@ -249,7 +250,7 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context,
ret = ocfs2_prepare_refcount_change_for_del(inode, ret = ocfs2_prepare_refcount_change_for_del(inode,
context->refcount_loc, context->refcount_loc,
phys_blkno, phys_blkno,
len, *len,
&credits, &credits,
&extra_blocks); &extra_blocks);
if (ret) { if (ret) {
...@@ -258,7 +259,7 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context, ...@@ -258,7 +259,7 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context,
} }
} }
ret = ocfs2_lock_allocators_move_extents(inode, &context->et, len, 1, ret = ocfs2_lock_allocators_move_extents(inode, &context->et, *len, 1,
&context->meta_ac, &context->meta_ac,
&context->data_ac, &context->data_ac,
extra_blocks, &credits); extra_blocks, &credits);
...@@ -291,7 +292,7 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context, ...@@ -291,7 +292,7 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context,
goto out_unlock_mutex; goto out_unlock_mutex;
} }
ret = __ocfs2_claim_clusters(handle, context->data_ac, 1, len, ret = __ocfs2_claim_clusters(handle, context->data_ac, 1, *len,
&new_phys_cpos, &new_len); &new_phys_cpos, &new_len);
if (ret) { if (ret) {
mlog_errno(ret); mlog_errno(ret);
...@@ -299,33 +300,36 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context, ...@@ -299,33 +300,36 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context,
} }
/* /*
* we're not quite patient here to make multiple attempts for claiming * allowing partial extent moving is kind of 'pros and cons', it makes
* enough clusters, failure to claim clusters per-requested is not a * whole defragmentation less likely to fail, on the contrary, the bad
* disaster though, it can only mean partial range of defragmentation * thing is it may make the fs even more fragmented after moving, let
* or extent movements gets gone, users anyway is able to have another * userspace make a good decision here.
* try as they wish anytime, since they're going to be returned a
* '-ENOSPC' and completed length of this movement.
*/ */
if (new_len != len) { if (new_len != *len) {
mlog(0, "len_claimed: %u, len: %u\n", new_len, len); mlog(0, "len_claimed: %u, len: %u\n", new_len, *len);
context->range->me_flags &= ~OCFS2_MOVE_EXT_FL_COMPLETE; if (!partial) {
ret = -ENOSPC; context->range->me_flags &= ~OCFS2_MOVE_EXT_FL_COMPLETE;
goto out_commit; ret = -ENOSPC;
goto out_commit;
}
} }
mlog(0, "cpos: %u, phys_cpos: %u, new_phys_cpos: %u\n", cpos, mlog(0, "cpos: %u, phys_cpos: %u, new_phys_cpos: %u\n", cpos,
phys_cpos, new_phys_cpos); phys_cpos, new_phys_cpos);
ret = __ocfs2_move_extent(handle, context, cpos, len, phys_cpos, ret = __ocfs2_move_extent(handle, context, cpos, new_len, phys_cpos,
new_phys_cpos, ext_flags); new_phys_cpos, ext_flags);
if (ret) if (ret)
mlog_errno(ret); mlog_errno(ret);
if (partial && (new_len != *len))
*len = new_len;
/* /*
* Here we should write the new page out first if we are * Here we should write the new page out first if we are
* in write-back mode. * in write-back mode.
*/ */
ret = ocfs2_cow_sync_writeback(inode->i_sb, context->inode, cpos, len); ret = ocfs2_cow_sync_writeback(inode->i_sb, context->inode, cpos, *len);
if (ret) if (ret)
mlog_errno(ret); mlog_errno(ret);
...@@ -926,7 +930,7 @@ static int __ocfs2_move_extents_range(struct buffer_head *di_bh, ...@@ -926,7 +930,7 @@ static int __ocfs2_move_extents_range(struct buffer_head *di_bh,
cpos, phys_cpos, alloc_size, len_defraged); cpos, phys_cpos, alloc_size, len_defraged);
ret = ocfs2_defrag_extent(context, cpos, phys_cpos, ret = ocfs2_defrag_extent(context, cpos, phys_cpos,
alloc_size, flags); &alloc_size, flags);
} else { } else {
ret = ocfs2_move_extent(context, cpos, phys_cpos, ret = ocfs2_move_extent(context, cpos, phys_cpos,
&new_phys_cpos, alloc_size, &new_phys_cpos, alloc_size,
...@@ -1101,6 +1105,8 @@ int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp) ...@@ -1101,6 +1105,8 @@ int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp)
* any thought? * any thought?
*/ */
range.me_threshold = 1024 * 1024; range.me_threshold = 1024 * 1024;
if (range.me_flags & OCFS2_MOVE_EXT_FL_PART_DEFRAG)
context->partial = 1;
} else { } else {
/* /*
* first best-effort attempt to validate and adjust the goal * first best-effort attempt to validate and adjust the goal
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment