Commit 88bd02c9 authored by Jaegeuk Kim's avatar Jaegeuk Kim

f2fs: fix conditions to remain recovery information in f2fs_sync_file

This patch revisited whole the recovery information during the f2fs_sync_file.

In this patch, there are three information to make a decision.

a) IS_CHECKPOINTED,	/* is it checkpointed before? */
b) HAS_FSYNCED_INODE,	/* is the inode fsynced before? */
c) HAS_LAST_FSYNC,	/* has the latest node fsync mark? */

And, the scenarios for our rule are based on:

[Term] F: fsync_mark, D: dentry_mark

1. inode(x) | CP | inode(x) | dnode(F)
2. inode(x) | CP | inode(F) | dnode(F)
3. inode(x) | CP | dnode(F) | inode(x) | inode(F)
4. inode(x) | CP | dnode(F) | inode(F)
5. CP | inode(x) | dnode(F) | inode(DF)
6. CP | inode(DF) | dnode(F)
7. CP | dnode(F) | inode(DF)
8. CP | dnode(F) | inode(x) | inode(DF)

For example, #3, the three conditions should be changed as follows.

   inode(x) | CP | dnode(F) | inode(x) | inode(F)
a)    x       o      o          o          o
b)    x       x      x          x          o
c)    x       o      o          x          o

If f2fs_sync_file stops   ------^,
 it should write inode(F)    --------------^

So, the need_inode_block_update should return true, since
 c) get_nat_flag(e, HAS_LAST_FSYNC), is false.

For example, #8,
      CP | alloc | dnode(F) | inode(x) | inode(DF)
a)    o      x        x          x          x
b)    x               x          x          o
c)    o               o          x          o

If f2fs_sync_file stops   -------^,
 it should write inode(DF)    --------------^

Note that, the roll-forward policy should follow this rule, which means,
if there are any missing blocks, we doesn't need to recover that inode.
Signed-off-by: default avatarHuang Ying <ying.huang@intel.com>
Signed-off-by: default avatarJaegeuk Kim <jaegeuk@kernel.org>
parent 7ef35e3b
...@@ -1089,9 +1089,6 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, ...@@ -1089,9 +1089,6 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
if (check_direct_IO(inode, rw, iter, offset)) if (check_direct_IO(inode, rw, iter, offset))
return 0; return 0;
/* clear fsync mark to recover these blocks */
fsync_mark_clear(F2FS_I_SB(inode), inode->i_ino);
trace_f2fs_direct_IO_enter(inode, offset, count, rw); trace_f2fs_direct_IO_enter(inode, offset, count, rw);
err = blockdev_direct_IO(rw, iocb, inode, iter, offset, get_data_block); err = blockdev_direct_IO(rw, iocb, inode, iter, offset, get_data_block);
......
...@@ -1224,9 +1224,9 @@ struct dnode_of_data; ...@@ -1224,9 +1224,9 @@ struct dnode_of_data;
struct node_info; struct node_info;
bool available_free_memory(struct f2fs_sb_info *, int); bool available_free_memory(struct f2fs_sb_info *, int);
int is_checkpointed_node(struct f2fs_sb_info *, nid_t); bool is_checkpointed_node(struct f2fs_sb_info *, nid_t);
bool fsync_mark_done(struct f2fs_sb_info *, nid_t); bool has_fsynced_inode(struct f2fs_sb_info *, nid_t);
void fsync_mark_clear(struct f2fs_sb_info *, nid_t); bool need_inode_block_update(struct f2fs_sb_info *, nid_t);
void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *); void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *);
int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int); int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int);
int truncate_inode_blocks(struct inode *, pgoff_t); int truncate_inode_blocks(struct inode *, pgoff_t);
......
...@@ -207,15 +207,17 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) ...@@ -207,15 +207,17 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
up_write(&fi->i_sem); up_write(&fi->i_sem);
} }
} else { } else {
/* if there is no written node page, write its inode page */ sync_nodes:
while (!sync_node_pages(sbi, ino, &wbc)) { sync_node_pages(sbi, ino, &wbc);
if (fsync_mark_done(sbi, ino))
goto out; if (need_inode_block_update(sbi, ino)) {
mark_inode_dirty_sync(inode); mark_inode_dirty_sync(inode);
ret = f2fs_write_inode(inode, NULL); ret = f2fs_write_inode(inode, NULL);
if (ret) if (ret)
goto out; goto out;
goto sync_nodes;
} }
ret = wait_on_node_pages_writeback(sbi, ino); ret = wait_on_node_pages_writeback(sbi, ino);
if (ret) if (ret)
goto out; goto out;
......
...@@ -123,44 +123,48 @@ static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e) ...@@ -123,44 +123,48 @@ static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e)
kmem_cache_free(nat_entry_slab, e); kmem_cache_free(nat_entry_slab, e);
} }
int is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid) bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
{ {
struct f2fs_nm_info *nm_i = NM_I(sbi); struct f2fs_nm_info *nm_i = NM_I(sbi);
struct nat_entry *e; struct nat_entry *e;
int is_cp = 1; bool is_cp = true;
read_lock(&nm_i->nat_tree_lock); read_lock(&nm_i->nat_tree_lock);
e = __lookup_nat_cache(nm_i, nid); e = __lookup_nat_cache(nm_i, nid);
if (e && !get_nat_flag(e, IS_CHECKPOINTED)) if (e && !get_nat_flag(e, IS_CHECKPOINTED))
is_cp = 0; is_cp = false;
read_unlock(&nm_i->nat_tree_lock); read_unlock(&nm_i->nat_tree_lock);
return is_cp; return is_cp;
} }
bool fsync_mark_done(struct f2fs_sb_info *sbi, nid_t nid) bool has_fsynced_inode(struct f2fs_sb_info *sbi, nid_t ino)
{ {
struct f2fs_nm_info *nm_i = NM_I(sbi); struct f2fs_nm_info *nm_i = NM_I(sbi);
struct nat_entry *e; struct nat_entry *e;
bool fsync_done = false; bool fsynced = false;
read_lock(&nm_i->nat_tree_lock); read_lock(&nm_i->nat_tree_lock);
e = __lookup_nat_cache(nm_i, nid); e = __lookup_nat_cache(nm_i, ino);
if (e) if (e && get_nat_flag(e, HAS_FSYNCED_INODE))
fsync_done = get_nat_flag(e, HAS_FSYNC_MARK); fsynced = true;
read_unlock(&nm_i->nat_tree_lock); read_unlock(&nm_i->nat_tree_lock);
return fsync_done; return fsynced;
} }
void fsync_mark_clear(struct f2fs_sb_info *sbi, nid_t nid) bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino)
{ {
struct f2fs_nm_info *nm_i = NM_I(sbi); struct f2fs_nm_info *nm_i = NM_I(sbi);
struct nat_entry *e; struct nat_entry *e;
bool need_update = true;
write_lock(&nm_i->nat_tree_lock); read_lock(&nm_i->nat_tree_lock);
e = __lookup_nat_cache(nm_i, nid); e = __lookup_nat_cache(nm_i, ino);
if (e) if (e && get_nat_flag(e, HAS_LAST_FSYNC) &&
set_nat_flag(e, HAS_FSYNC_MARK, false); (get_nat_flag(e, IS_CHECKPOINTED) ||
write_unlock(&nm_i->nat_tree_lock); get_nat_flag(e, HAS_FSYNCED_INODE)))
need_update = false;
read_unlock(&nm_i->nat_tree_lock);
return need_update;
} }
static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid) static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid)
...@@ -176,7 +180,7 @@ static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid) ...@@ -176,7 +180,7 @@ static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid)
} }
memset(new, 0, sizeof(struct nat_entry)); memset(new, 0, sizeof(struct nat_entry));
nat_set_nid(new, nid); nat_set_nid(new, nid);
set_nat_flag(new, IS_CHECKPOINTED, true); nat_reset_flag(new);
list_add_tail(&new->list, &nm_i->nat_entries); list_add_tail(&new->list, &nm_i->nat_entries);
nm_i->nat_cnt++; nm_i->nat_cnt++;
return new; return new;
...@@ -244,12 +248,17 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, ...@@ -244,12 +248,17 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
/* change address */ /* change address */
nat_set_blkaddr(e, new_blkaddr); nat_set_blkaddr(e, new_blkaddr);
if (new_blkaddr == NEW_ADDR || new_blkaddr == NULL_ADDR)
set_nat_flag(e, IS_CHECKPOINTED, false);
__set_nat_cache_dirty(nm_i, e); __set_nat_cache_dirty(nm_i, e);
/* update fsync_mark if its inode nat entry is still alive */ /* update fsync_mark if its inode nat entry is still alive */
e = __lookup_nat_cache(nm_i, ni->ino); e = __lookup_nat_cache(nm_i, ni->ino);
if (e) if (e) {
set_nat_flag(e, HAS_FSYNC_MARK, fsync_done); if (fsync_done && ni->nid == ni->ino)
set_nat_flag(e, HAS_FSYNCED_INODE, true);
set_nat_flag(e, HAS_LAST_FSYNC, fsync_done);
}
write_unlock(&nm_i->nat_tree_lock); write_unlock(&nm_i->nat_tree_lock);
} }
...@@ -1121,10 +1130,14 @@ int sync_node_pages(struct f2fs_sb_info *sbi, nid_t ino, ...@@ -1121,10 +1130,14 @@ int sync_node_pages(struct f2fs_sb_info *sbi, nid_t ino,
/* called by fsync() */ /* called by fsync() */
if (ino && IS_DNODE(page)) { if (ino && IS_DNODE(page)) {
int mark = !is_checkpointed_node(sbi, ino);
set_fsync_mark(page, 1); set_fsync_mark(page, 1);
if (IS_INODE(page)) if (IS_INODE(page)) {
set_dentry_mark(page, mark); if (!is_checkpointed_node(sbi, ino) &&
!has_fsynced_inode(sbi, ino))
set_dentry_mark(page, 1);
else
set_dentry_mark(page, 0);
}
nwritten++; nwritten++;
} else { } else {
set_fsync_mark(page, 0); set_fsync_mark(page, 0);
...@@ -1912,6 +1925,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi) ...@@ -1912,6 +1925,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
write_unlock(&nm_i->nat_tree_lock); write_unlock(&nm_i->nat_tree_lock);
} else { } else {
write_lock(&nm_i->nat_tree_lock); write_lock(&nm_i->nat_tree_lock);
nat_reset_flag(ne);
__clear_nat_cache_dirty(nm_i, ne); __clear_nat_cache_dirty(nm_i, ne);
write_unlock(&nm_i->nat_tree_lock); write_unlock(&nm_i->nat_tree_lock);
} }
......
...@@ -41,7 +41,8 @@ struct node_info { ...@@ -41,7 +41,8 @@ struct node_info {
enum { enum {
IS_CHECKPOINTED, /* is it checkpointed before? */ IS_CHECKPOINTED, /* is it checkpointed before? */
HAS_FSYNC_MARK, /* has the latest node fsync mark? */ HAS_FSYNCED_INODE, /* is the inode fsynced before? */
HAS_LAST_FSYNC, /* has the latest node fsync mark? */
}; };
struct nat_entry { struct nat_entry {
...@@ -60,15 +61,9 @@ struct nat_entry { ...@@ -60,15 +61,9 @@ struct nat_entry {
#define nat_set_version(nat, v) (nat->ni.version = v) #define nat_set_version(nat, v) (nat->ni.version = v)
#define __set_nat_cache_dirty(nm_i, ne) \ #define __set_nat_cache_dirty(nm_i, ne) \
do { \ list_move_tail(&ne->list, &nm_i->dirty_nat_entries);
set_nat_flag(ne, IS_CHECKPOINTED, false); \
list_move_tail(&ne->list, &nm_i->dirty_nat_entries); \
} while (0)
#define __clear_nat_cache_dirty(nm_i, ne) \ #define __clear_nat_cache_dirty(nm_i, ne) \
do { \ list_move_tail(&ne->list, &nm_i->nat_entries);
set_nat_flag(ne, IS_CHECKPOINTED, true); \
list_move_tail(&ne->list, &nm_i->nat_entries); \
} while (0)
#define inc_node_version(version) (++version) #define inc_node_version(version) (++version)
static inline void set_nat_flag(struct nat_entry *ne, static inline void set_nat_flag(struct nat_entry *ne,
...@@ -87,6 +82,14 @@ static inline bool get_nat_flag(struct nat_entry *ne, unsigned int type) ...@@ -87,6 +82,14 @@ static inline bool get_nat_flag(struct nat_entry *ne, unsigned int type)
return ne->flag & mask; return ne->flag & mask;
} }
static inline void nat_reset_flag(struct nat_entry *ne)
{
/* these states can be set only after checkpoint was done */
set_nat_flag(ne, IS_CHECKPOINTED, true);
set_nat_flag(ne, HAS_FSYNCED_INODE, false);
set_nat_flag(ne, HAS_LAST_FSYNC, true);
}
static inline void node_info_from_raw_nat(struct node_info *ni, static inline void node_info_from_raw_nat(struct node_info *ni,
struct f2fs_nat_entry *raw_ne) struct f2fs_nat_entry *raw_ne)
{ {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment