Commit de285c52 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] ext3: fix data=journal mode

ext3's fully data-journalled mode has been broken for a year.  This patch
fixes it up.

The prepare_write/commit_write/writepage implementations have been split up.
Instead of having each function handle all three journalling mode we now have
three separate sets of address_space_operations.

The problematic part of data=journal is MAP_SHARED writepage traffic: pages
which don't have buffers.  In 2.4 these were cheatingly treated as
data-ordered buffers and that caused several nasty problems.

Here we do it properly: writepage traffic is fully journalled.  This means
that the various workarounds for the 2.4 scheme can be removed, when I
remember where they all are.

The PG_checked flag has been borrowed: it it set in the atomic set_page_dirty
a_op to tell the subsequent writepage() that this page needs to have buffers
attached, dirtied and journalled.

This rather defines PG_checked as "fs-private info in page->flags" and it
should be renamed sometime.
parent 8b7eec3b
This diff is collapsed.
......@@ -1644,10 +1644,7 @@ static int ext3_create (struct inode * dir, struct dentry * dentry, int mode)
if (!IS_ERR(inode)) {
inode->i_op = &ext3_file_inode_operations;
inode->i_fop = &ext3_file_operations;
if (ext3_should_writeback_data(inode))
inode->i_mapping->a_ops = &ext3_writeback_aops;
else
inode->i_mapping->a_ops = &ext3_aops;
ext3_set_aops(inode);
err = ext3_add_nondir(handle, dentry, inode);
}
ext3_journal_stop(handle);
......@@ -2100,10 +2097,7 @@ static int ext3_symlink (struct inode * dir,
if (l > sizeof (EXT3_I(inode)->i_data)) {
inode->i_op = &ext3_symlink_inode_operations;
if (ext3_should_writeback_data(inode))
inode->i_mapping->a_ops = &ext3_writeback_aops;
else
inode->i_mapping->a_ops = &ext3_aops;
ext3_set_aops(inode);
/*
* page_symlink() calls into ext3_prepare/commit_write.
* We have a transaction open. All is sweetness. It also sets
......
......@@ -1120,7 +1120,6 @@ int journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
/* And this case is illegal: we can't reuse another
* transaction's data buffer, ever. */
/* FIXME: writepage() should be journalled */
J_ASSERT_JH(jh, jh->b_jlist != BJ_SyncData);
goto done_locked;
}
......
......@@ -735,6 +735,7 @@ extern void ext3_dirty_inode(struct inode *);
extern int ext3_change_inode_journal_flag(struct inode *, int);
extern void ext3_truncate (struct inode *);
extern void ext3_set_inode_flags(struct inode *);
extern void ext3_set_aops(struct inode *inode);
/* ioctl.c */
extern int ext3_ioctl (struct inode *, struct file *, unsigned int,
......@@ -783,10 +784,6 @@ extern struct file_operations ext3_dir_operations;
extern struct inode_operations ext3_file_inode_operations;
extern struct file_operations ext3_file_operations;
/* inode.c */
extern struct address_space_operations ext3_aops;
extern struct address_space_operations ext3_writeback_aops;
/* namei.c */
extern struct inode_operations ext3_dir_inode_operations;
extern struct inode_operations ext3_special_inode_operations;
......
......@@ -200,6 +200,7 @@ extern void get_full_page_state(struct page_state *ret);
#define PageChecked(page) test_bit(PG_checked, &(page)->flags)
#define SetPageChecked(page) set_bit(PG_checked, &(page)->flags)
#define ClearPageChecked(page) clear_bit(PG_checked, &(page)->flags)
#define PageReserved(page) test_bit(PG_reserved, &(page)->flags)
#define SetPageReserved(page) set_bit(PG_reserved, &(page)->flags)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment