Commit b46637d5 authored by Ryan Ding's avatar Ryan Ding Committed by Linus Torvalds

ocfs2: use c_new to indicate newly allocated extents

To support direct io in ocfs2_write_begin_nolock & ocfs2_write_end_nolock.

There is a problem in ocfs2's direct io implement: if system crashed
after extents allocated, and before data return, we will get a extent
with dirty data on disk.  This problem violate the journal=order
semantics, which means meta changes take effect after data written to
disk.  To resolve this issue, direct write can use the UNWRITTEN flag to
describe a extent during direct data writeback.  The direct write
procedure should act in the following order:

phase 1: alloc extent with UNWRITTEN flag
phase 2: submit direct data to disk, add zero page to page cache
phase 3: clear UNWRITTEN flag when data has been written to disk

This patch is to change the 'c_unwritten' member of
ocfs2_write_cluster_desc to 'c_clear_unwritten'.  Means whether to clear
the unwritten flag.  It do not care if a extent is allocated or not.
And use 'c_new' to specify a newly allocated extent.  So the direct io
procedure can use c_clear_unwritten to control the UNWRITTEN bit on
extent.
Signed-off-by: default avatarRyan Ding <ryan.ding@oracle.com>
Reviewed-by: default avatarJunxiao Bi <junxiao.bi@oracle.com>
Cc: Joseph Qi <joseph.qi@huawei.com>
Cc: Mark Fasheh <mfasheh@suse.de>
Cc: Joel Becker <jlbec@evilplan.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent c1ad1e3c
...@@ -1212,7 +1212,7 @@ struct ocfs2_write_cluster_desc { ...@@ -1212,7 +1212,7 @@ struct ocfs2_write_cluster_desc {
* filled. * filled.
*/ */
unsigned c_new; unsigned c_new;
unsigned c_unwritten; unsigned c_clear_unwritten;
unsigned c_needs_zero; unsigned c_needs_zero;
}; };
...@@ -1588,19 +1588,19 @@ static int ocfs2_grab_pages_for_write(struct address_space *mapping, ...@@ -1588,19 +1588,19 @@ static int ocfs2_grab_pages_for_write(struct address_space *mapping,
* Prepare a single cluster for write one cluster into the file. * Prepare a single cluster for write one cluster into the file.
*/ */
static int ocfs2_write_cluster(struct address_space *mapping, static int ocfs2_write_cluster(struct address_space *mapping,
u32 phys, unsigned int unwritten, u32 phys, unsigned int new,
unsigned int clear_unwritten,
unsigned int should_zero, unsigned int should_zero,
struct ocfs2_alloc_context *data_ac, struct ocfs2_alloc_context *data_ac,
struct ocfs2_alloc_context *meta_ac, struct ocfs2_alloc_context *meta_ac,
struct ocfs2_write_ctxt *wc, u32 cpos, struct ocfs2_write_ctxt *wc, u32 cpos,
loff_t user_pos, unsigned user_len) loff_t user_pos, unsigned user_len)
{ {
int ret, i, new; int ret, i;
u64 v_blkno, p_blkno; u64 v_blkno, p_blkno;
struct inode *inode = mapping->host; struct inode *inode = mapping->host;
struct ocfs2_extent_tree et; struct ocfs2_extent_tree et;
new = phys == 0 ? 1 : 0;
if (new) { if (new) {
u32 tmp_pos; u32 tmp_pos;
...@@ -1610,9 +1610,9 @@ static int ocfs2_write_cluster(struct address_space *mapping, ...@@ -1610,9 +1610,9 @@ static int ocfs2_write_cluster(struct address_space *mapping,
*/ */
tmp_pos = cpos; tmp_pos = cpos;
ret = ocfs2_add_inode_data(OCFS2_SB(inode->i_sb), inode, ret = ocfs2_add_inode_data(OCFS2_SB(inode->i_sb), inode,
&tmp_pos, 1, 0, wc->w_di_bh, &tmp_pos, 1, !clear_unwritten,
wc->w_handle, data_ac, wc->w_di_bh, wc->w_handle,
meta_ac, NULL); data_ac, meta_ac, NULL);
/* /*
* This shouldn't happen because we must have already * This shouldn't happen because we must have already
* calculated the correct meta data allocation required. The * calculated the correct meta data allocation required. The
...@@ -1629,7 +1629,7 @@ static int ocfs2_write_cluster(struct address_space *mapping, ...@@ -1629,7 +1629,7 @@ static int ocfs2_write_cluster(struct address_space *mapping,
mlog_errno(ret); mlog_errno(ret);
goto out; goto out;
} }
} else if (unwritten) { } else if (clear_unwritten) {
ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode),
wc->w_di_bh); wc->w_di_bh);
ret = ocfs2_mark_extent_written(inode, &et, ret = ocfs2_mark_extent_written(inode, &et,
...@@ -1712,7 +1712,8 @@ static int ocfs2_write_cluster_by_desc(struct address_space *mapping, ...@@ -1712,7 +1712,8 @@ static int ocfs2_write_cluster_by_desc(struct address_space *mapping,
local_len = osb->s_clustersize - cluster_off; local_len = osb->s_clustersize - cluster_off;
ret = ocfs2_write_cluster(mapping, desc->c_phys, ret = ocfs2_write_cluster(mapping, desc->c_phys,
desc->c_unwritten, desc->c_new,
desc->c_clear_unwritten,
desc->c_needs_zero, desc->c_needs_zero,
data_ac, meta_ac, data_ac, meta_ac,
wc, desc->c_cpos, pos, local_len); wc, desc->c_cpos, pos, local_len);
...@@ -1857,11 +1858,12 @@ static int ocfs2_populate_write_desc(struct inode *inode, ...@@ -1857,11 +1858,12 @@ static int ocfs2_populate_write_desc(struct inode *inode,
if (phys == 0) { if (phys == 0) {
desc->c_new = 1; desc->c_new = 1;
desc->c_needs_zero = 1; desc->c_needs_zero = 1;
desc->c_clear_unwritten = 1;
*clusters_to_alloc = *clusters_to_alloc + 1; *clusters_to_alloc = *clusters_to_alloc + 1;
} }
if (ext_flags & OCFS2_EXT_UNWRITTEN) { if (ext_flags & OCFS2_EXT_UNWRITTEN) {
desc->c_unwritten = 1; desc->c_clear_unwritten = 1;
desc->c_needs_zero = 1; desc->c_needs_zero = 1;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment