[PATCH] ext3: fix data=journal mode

ext3's fully data-journalled mode has been broken for a year. This patch fixes it up. The prepare_write/commit_write/writepage implementations have been split up. Instead of having each function handle all three journalling mode we now have three separate sets of address_space_operations. The problematic part of data=journal is MAP_SHARED writepage traffic: pages which don't have buffers. In 2.4 these were cheatingly treated as data-ordered buffers and that caused several nasty problems. Here we do it properly: writepage traffic is fully journalled. This means that the various workarounds for the 2.4 scheme can be removed, when I remember where they all are. The PG_checked flag has been borrowed: it it set in the atomic set_page_dirty a_op to tell the subsequent writepage() that this page needs to have buffers attached, dirtied and journalled. This rather defines PG_checked as "fs-private info in page->flags" and it should be renamed sometime.

[PATCH] ext3: fix data=journal mode
ext3's fully data-journalled mode has been broken for a year. This patch fixes it up. The prepare_write/commit_write/writepage implementations have been split up. Instead of having each function handle all three journalling mode we now have three separate sets of address_space_operations. The problematic part of data=journal is MAP_SHARED writepage traffic: pages which don't have buffers. In 2.4 these were cheatingly treated as data-ordered buffers and that caused several nasty problems. Here we do it properly: writepage traffic is fully journalled. This means that the various workarounds for the 2.4 scheme can be removed, when I remember where they all are. The PG_checked flag has been borrowed: it it set in the atomic set_page_dirty a_op to tell the subsequent writepage() that this page needs to have buffers attached, dirtied and journalled. This rather defines PG_checked as "fs-private info in page->flags" and it should be renamed sometime.
de285c52 · Andrew Morton · Linus Torvalds · 8b7eec3b · de285c52 · de285c52
Commit de285c52 authored Jun 17, 2003 by Andrew Morton Committed by Linus Torvalds Jun 17, 2003
5 changed files
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1130,52 +1130,79 @@ static int commit_write_fn(handle_t *handle, struct buffer_head *bh)
 * buffers are managed internally.
 */

-static int ext3_commit_write(struct file *file, struct page *page,
+static int ext3_ordered_commit_write(struct file *file, struct page *page,
 			     unsigned from, unsigned to)
 {
 	handle_t *handle = ext3_journal_current_handle();
 	struct inode *inode = page->mapping->host;
 	int ret = 0, ret2;

-	if (ext3_should_journal_data(inode)) {
+	ret = walk_page_buffers(handle, page_buffers(page),
+		from, to, NULL, ext3_journal_dirty_data);
+
+	if (ret == 0) {
 		/*
-		 * Here we duplicate the generic_commit_write() functionality
+		 * generic_commit_write() will run mark_inode_dirty() if i_size
+		 * changes.  So let's piggyback the i_disksize mark_inode_dirty
+		 * into that.
 		 */
-		int partial = 0;
-		loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
+		loff_t new_i_size;

-		ret = walk_page_buffers(handle, page_buffers(page),
-			from, to, &partial, commit_write_fn);
-		if (!partial)
-			SetPageUptodate(page);
-		if (pos > inode->i_size)
-			inode->i_size = pos;
-		EXT3_I(inode)->i_state |= EXT3_STATE_JDATA;
-		if (inode->i_size > EXT3_I(inode)->i_disksize) {
-			EXT3_I(inode)->i_disksize = inode->i_size;
-			ret2 = ext3_mark_inode_dirty(handle, inode);
-			if (!ret) 
-				ret = ret2;
-		}
-	} else {
-		if (ext3_should_order_data(inode)) {
-			ret = walk_page_buffers(handle, page_buffers(page),
-				from, to, NULL, ext3_journal_dirty_data);
-		}
-		/* Be careful here if generic_commit_write becomes a
-		 * required invocation after block_prepare_write. */
-		if (ret == 0) {
-			/*
-			 * generic_commit_write() will run mark_inode_dirty()
-			 * if i_size changes.  So let's piggyback the
-			 * i_disksize mark_inode_dirty into that.
-			 */
-			loff_t new_i_size =
-				((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
-			if (new_i_size > EXT3_I(inode)->i_disksize)
-				EXT3_I(inode)->i_disksize = new_i_size;
-			ret = generic_commit_write(file, page, from, to);
-		}
+		new_i_size = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
+		if (new_i_size > EXT3_I(inode)->i_disksize)
+			EXT3_I(inode)->i_disksize = new_i_size;
+		ret = generic_commit_write(file, page, from, to);
+	}
+	ret2 = ext3_journal_stop(handle);
+	if (!ret)
+		ret = ret2;
+	return ret;
+}
+
+static int ext3_writeback_commit_write(struct file *file, struct page *page,
+			     unsigned from, unsigned to)
+{
+	handle_t *handle = ext3_journal_current_handle();
+	struct inode *inode = page->mapping->host;
+	int ret = 0, ret2;
+	loff_t new_i_size;
+
+	new_i_size = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
+	if (new_i_size > EXT3_I(inode)->i_disksize)
+		EXT3_I(inode)->i_disksize = new_i_size;
+	ret = generic_commit_write(file, page, from, to);
+	ret2 = ext3_journal_stop(handle);
+	if (!ret)
+		ret = ret2;
+	return ret;
+}
+
+static int ext3_journalled_commit_write(struct file *file,
+			struct page *page, unsigned from, unsigned to)
+{
+	handle_t *handle = ext3_journal_current_handle();
+	struct inode *inode = page->mapping->host;
+	int ret = 0, ret2;
+	int partial = 0;
+	loff_t pos;
+
+	/*
+	 * Here we duplicate the generic_commit_write() functionality
+	 */
+	pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
+
+	ret = walk_page_buffers(handle, page_buffers(page), from,
+				to, &partial, commit_write_fn);
+	if (!partial)
+		SetPageUptodate(page);
+	if (pos > inode->i_size)
+		inode->i_size = pos;
+	EXT3_I(inode)->i_state |= EXT3_STATE_JDATA;
+	if (inode->i_size > EXT3_I(inode)->i_disksize) {
+		EXT3_I(inode)->i_disksize = inode->i_size;
+		ret2 = ext3_mark_inode_dirty(handle, inode);
+		if (!ret) 
+			ret = ret2;
 	}
 	ret2 = ext3_journal_stop(handle);
 	if (!ret)
@@ -1302,52 +1329,44 @@ static int journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh)
 * We don't honour synchronous mounts for writepage().  That would be
 * disastrous.  Any write() or metadata operation will sync the fs for
 * us.
+ *
+ * AKPM2: if all the page's buffers are mapped to disk and !data=journal,
+ * we don't need to open a transaction here.
 */
-static int ext3_writepage(struct page *page, struct writeback_control *wbc)
+static int ext3_ordered_writepage(struct page *page,
+			struct writeback_control *wbc)
 {
 	struct inode *inode = page->mapping->host;
 	struct buffer_head *page_bufs;
 	handle_t *handle = NULL;
-	int ret = 0, err;
-	int needed;
-	int order_data;
+	int ret = 0;
+	int err;

 	J_ASSERT(PageLocked(page));
 	
 	/*
-	 * We give up here if we're reentered, because it might be
-	 * for a different filesystem.  One *could* look for a
-	 * nested transaction opportunity.
+	 * We give up here if we're reentered, because it might be for a
+	 * different filesystem.
 	 */
 	if (ext3_journal_current_handle())
 		goto out_fail;

-	needed = ext3_writepage_trans_blocks(inode);
-	handle = ext3_journal_start(inode, needed);
+	handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode));
 				
 	if (IS_ERR(handle)) {
 		ret = PTR_ERR(handle);
 		goto out_fail;
 	}

-	order_data = ext3_should_order_data(inode) ||
-			ext3_should_journal_data(inode);
-
-	page_bufs = NULL;	/* Purely to prevent compiler warning */
-
-	/* bget() all the buffers */
-	if (order_data) {
-		if (!page_has_buffers(page)) {
-			if (!PageUptodate(page))
-				buffer_error();
-			create_empty_buffers(page,
-				inode->i_sb->s_blocksize,
+	if (!page_has_buffers(page)) {
+		if (!PageUptodate(page))
+			buffer_error();
+		create_empty_buffers(page, inode->i_sb->s_blocksize,
 				(1 << BH_Dirty)|(1 << BH_Uptodate));
-		}
-		page_bufs = page_buffers(page);
-		walk_page_buffers(handle, page_bufs, 0,
-				PAGE_CACHE_SIZE, NULL, bget_one);
 	}
+	page_bufs = page_buffers(page);
+	walk_page_buffers(handle, page_bufs, 0,
+			PAGE_CACHE_SIZE, NULL, bget_one);

 	ret = block_write_full_page(page, ext3_get_block, wbc);

@@ -1358,42 +1377,116 @@ static int ext3_writepage(struct page *page, struct writeback_control *wbc)
 	 * safe due to elevated refcount.
 	 */

-	handle = ext3_journal_current_handle();
-
 	/*
 	 * And attach them to the current transaction.  But only if 
 	 * block_write_full_page() succeeded.  Otherwise they are unmapped,
 	 * and generally junk.
 	 */
-	if (order_data) {
-		if (ret == 0) {
-			err = walk_page_buffers(handle, page_bufs,
-				0, PAGE_CACHE_SIZE, NULL,
-				journal_dirty_data_fn);
-			if (!ret)
-				ret = err;
-		}
-		walk_page_buffers(handle, page_bufs, 0,
-				PAGE_CACHE_SIZE, NULL, bput_one);
+	if (ret == 0) {
+		err = walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE,
+					NULL, journal_dirty_data_fn);
+		if (!ret)
+			ret = err;
+	}
+	walk_page_buffers(handle, page_bufs, 0,
+			PAGE_CACHE_SIZE, NULL, bput_one);
+	err = ext3_journal_stop(handle);
+	if (!ret)
+		ret = err;
+	return ret;
+
+out_fail:
+	__set_page_dirty_nobuffers(page);
+	unlock_page(page);
+	return ret;
+}
+
+static int ext3_writeback_writepage(struct page *page,
+				struct writeback_control *wbc)
+{
+	struct inode *inode = page->mapping->host;
+	handle_t *handle = NULL;
+	int ret = 0;
+	int err;
+
+	if (ext3_journal_current_handle())
+		goto out_fail;
+
+	handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode));
+	if (IS_ERR(handle)) {
+		ret = PTR_ERR(handle);
+		goto out_fail;
 	}

+	ret = block_write_full_page(page, ext3_get_block, wbc);
 	err = ext3_journal_stop(handle);
 	if (!ret)
 		ret = err;
 	return ret;

 out_fail:
-	
-	/*
-	 * We have to fail this writepage to avoid cross-fs transactions.
-	 * Put the page back on mapping->dirty_pages.  The page's buffers'
-	 * dirty state will be left as-is.
-	 */
 	__set_page_dirty_nobuffers(page);
 	unlock_page(page);
 	return ret;
 }

+static int ext3_journalled_writepage(struct page *page,
+				struct writeback_control *wbc)
+{
+	struct inode *inode = page->mapping->host;
+	handle_t *handle = NULL;
+	int ret = 0;
+	int err;
+
+	if (ext3_journal_current_handle())
+		goto no_write;
+
+	handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode));
+	if (IS_ERR(handle)) {
+		ret = PTR_ERR(handle);
+		goto no_write;
+	}
+
+	if (!page_has_buffers(page) || PageChecked(page)) {
+		/*
+		 * It's mmapped pagecache.  Add buffers and journal it.  There
+		 * doesn't seem much point in redirtying the page here.
+		 */
+		ClearPageChecked(page);
+		ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE,
+					ext3_get_block);
+		if (ret != 0)
+			goto out_unlock;
+		ret = walk_page_buffers(handle, page_buffers(page), 0,
+			PAGE_CACHE_SIZE, NULL, do_journal_get_write_access);
+
+		err = walk_page_buffers(handle, page_buffers(page), 0,
+				PAGE_CACHE_SIZE, NULL, commit_write_fn);
+		if (ret == 0)
+			ret = err;
+		EXT3_I(inode)->i_state |= EXT3_STATE_JDATA;
+		unlock_page(page);
+	} else {
+		/*
+		 * It may be a page full of checkpoint-mode buffers.  We don't
+		 * really know unless we go poke around in the buffer_heads.
+		 * But block_write_full_page will do the right thing.
+		 */
+		ret = block_write_full_page(page, ext3_get_block, wbc);
+	}
+	err = ext3_journal_stop(handle);
+	if (!ret)
+		ret = err;
+out:
+	return ret;
+
+no_write:
+	__set_page_dirty_nobuffers(page);
+out_unlock:
+	unlock_page(page);
+	goto out;
+}
+
 static int ext3_readpage(struct file *file, struct page *page)
 {
 	return mpage_readpage(page, ext3_get_block);
@@ -1409,12 +1502,21 @@ ext3_readpages(struct file *file, struct address_space *mapping,
 static int ext3_invalidatepage(struct page *page, unsigned long offset)
 {
 	journal_t *journal = EXT3_JOURNAL(page->mapping->host);
+
+	/*
+	 * If it's a full truncate we just forget about the pending dirtying
+	 */
+	if (offset == 0)
+		ClearPageChecked(page);
+
 	return journal_invalidatepage(journal, page, offset);
 }

 static int ext3_releasepage(struct page *page, int wait)
 {
 	journal_t *journal = EXT3_JOURNAL(page->mapping->host);
+
+	WARN_ON(PageChecked(page));
 	return journal_try_to_free_buffers(journal, page, wait);
 }

@@ -1482,41 +1584,75 @@ static int ext3_direct_IO(int rw, struct kiocb *iocb,
 	return ret;
 }

-struct address_space_operations ext3_aops = {
-	.readpage	= ext3_readpage,	/* BKL not held.  Don't need */
-	.readpages	= ext3_readpages,	/* BKL not held.  Don't need */
-	.writepage	= ext3_writepage,	/* BKL not held.  We take it */
+/*
+ * Pages can be marked dirty completely asynchronously from ext3's journalling
+ * activity.  By filemap_sync_pte(), try_to_unmap_one(), etc.  We cannot do
+ * much here because ->set_page_dirty is called under VFS locks.  The page is
+ * not necessarily locked.
+ *
+ * We cannot just dirty the page and leave attached buffers clean, because the
+ * buffers' dirty state is "definitive".  We cannot just set the buffers dirty
+ * or jbddirty because all the journalling code will explode.
+ *
+ * So what we do is to mark the page "pending dirty" and next time writepage
+ * is called, propagate that into the buffers appropriately.
+ */
+static int ext3_journalled_set_page_dirty(struct page *page)
+{
+	SetPageChecked(page);
+	return __set_page_dirty_nobuffers(page);
+}
+
+static struct address_space_operations ext3_ordered_aops = {
+	.readpage	= ext3_readpage,
+	.readpages	= ext3_readpages,
+	.writepage	= ext3_ordered_writepage,
 	.sync_page	= block_sync_page,
-	.prepare_write	= ext3_prepare_write,	/* BKL not held.  We take it */
-	.commit_write	= ext3_commit_write,	/* BKL not held.  We take it */
-	.bmap		= ext3_bmap,		/* BKL held */
-	.invalidatepage	= ext3_invalidatepage,	/* BKL not held.  Don't need */
-	.releasepage	= ext3_releasepage,	/* BKL not held.  Don't need */
-	.direct_IO	= ext3_direct_IO,	/* BKL not held.  Don't need */
+	.prepare_write	= ext3_prepare_write,
+	.commit_write	= ext3_ordered_commit_write,
+	.bmap		= ext3_bmap,
+	.invalidatepage	= ext3_invalidatepage,
+	.releasepage	= ext3_releasepage,
+	.direct_IO	= ext3_direct_IO,
 };

-/* For writeback mode, we can use mpage_writepages() */
-#if 0	/* Doesn't work for shared mappings */
-static int
-ext3_writepages(struct address_space *mapping, struct writeback_control *wbc)
-{
-	return mpage_writepages(mapping, wbc, ext3_get_block);
-}
-#endif
+static struct address_space_operations ext3_writeback_aops = {
+	.readpage	= ext3_readpage,
+	.readpages	= ext3_readpages,
+	.writepage	= ext3_writeback_writepage,
+	.sync_page	= block_sync_page,
+	.prepare_write	= ext3_prepare_write,
+	.commit_write	= ext3_writeback_commit_write,
+	.bmap		= ext3_bmap,
+	.invalidatepage	= ext3_invalidatepage,
+	.releasepage	= ext3_releasepage,
+	.direct_IO	= ext3_direct_IO,
+};

-struct address_space_operations ext3_writeback_aops = {
-	.readpage	= ext3_readpage,	/* BKL not held.  Don't need */
-	.readpages	= ext3_readpages,	/* BKL not held.  Don't need */
-	.writepage	= ext3_writepage,	/* BKL not held.  We take it */
+static struct address_space_operations ext3_journalled_aops = {
+	.readpage	= ext3_readpage,
+	.readpages	= ext3_readpages,
+	.writepage	= ext3_journalled_writepage,
 	.sync_page	= block_sync_page,
-	.prepare_write	= ext3_prepare_write,	/* BKL not held.  We take it */
-	.commit_write	= ext3_commit_write,	/* BKL not held.  We take it */
-	.bmap		= ext3_bmap,		/* BKL held */
-	.invalidatepage	= ext3_invalidatepage,	/* BKL not held.  Don't need */
-	.releasepage	= ext3_releasepage,	/* BKL not held.  Don't need */
-	.direct_IO	= ext3_direct_IO,	/* BKL not held.  Don't need */
+	.prepare_write	= ext3_prepare_write,
+	.commit_write	= ext3_journalled_commit_write,
+	.set_page_dirty	= ext3_journalled_set_page_dirty,
+	.bmap		= ext3_bmap,
+	.invalidatepage	= ext3_invalidatepage,
+	.releasepage	= ext3_releasepage,
+	.direct_IO	= ext3_direct_IO,
 };

+void ext3_set_aops(struct inode *inode)
+{
+	if (ext3_should_order_data(inode))
+		inode->i_mapping->a_ops = &ext3_ordered_aops;
+	else if (ext3_should_writeback_data(inode))
+		inode->i_mapping->a_ops = &ext3_writeback_aops;
+	else
+		inode->i_mapping->a_ops = &ext3_journalled_aops;
+}
+
 /*
 * ext3_block_truncate_page() zeroes out a mapping from file offset `from'
 * up to the end of the block which corresponds to `from'.
@@ -2307,10 +2443,7 @@ void ext3_read_inode(struct inode * inode)
 	if (S_ISREG(inode->i_mode)) {
 		inode->i_op = &ext3_file_inode_operations;
 		inode->i_fop = &ext3_file_operations;
-		if (ext3_should_writeback_data(inode))
-			inode->i_mapping->a_ops = &ext3_writeback_aops;
-		else
-			inode->i_mapping->a_ops = &ext3_aops;
+		ext3_set_aops(inode);
 	} else if (S_ISDIR(inode->i_mode)) {
 		inode->i_op = &ext3_dir_inode_operations;
 		inode->i_fop = &ext3_dir_operations;
@@ -2319,10 +2452,7 @@ void ext3_read_inode(struct inode * inode)
 			inode->i_op = &ext3_fast_symlink_inode_operations;
 		else {
 			inode->i_op = &ext3_symlink_inode_operations;
-			if (ext3_should_writeback_data(inode))
-				inode->i_mapping->a_ops = &ext3_writeback_aops;
-			else
-				inode->i_mapping->a_ops = &ext3_aops;
+			ext3_set_aops(inode);
 		}
 	} else {
 		inode->i_op = &ext3_special_inode_operations;
@@ -2804,61 +2934,3 @@ int ext3_change_inode_journal_flag(struct inode *inode, int val)
 	
 	return err;
 }
-
-
-/*
- * ext3_aops_journal_start().
- *
- * <This function died, but the comment lives on>
- *
- * We need to take the inode semaphore *outside* the
- * journal_start/journal_stop.  Otherwise, a different task could do a
- * wait_for_commit() while holding ->i_sem, which deadlocks.  The rule
- * is: transaction open/closes are considered to be a locking operation
- * and they nest *inside* ->i_sem.
- * ----------------------------------------------------------------------------
- * Possible problem:
- *	ext3_file_write()
- *	-> generic_file_write()
- *	   -> __alloc_pages()
- *	      -> page_launder()
- *		 -> ext3_writepage()
- *
- * And the writepage can be on a different fs while we have a
- * transaction open against this one!  Bad.
- *
- * I tried making the task PF_MEMALLOC here, but that simply results in
- * 0-order allocation failures passed back to generic_file_write().
- * Instead, we rely on the reentrancy protection in ext3_writepage().
- * ----------------------------------------------------------------------------
- * When we do the journal_start() here we don't really need to reserve
- * any blocks - we won't need any until we hit ext3_prepare_write(),
- * which does all the needed journal extending.  However!  There is a
- * problem with quotas:
- *
- * Thread 1:
- * sys_sync
- * ->sync_dquots
- *   ->commit_dquot
- *     ->lock_dquot
- *     ->write_dquot
- *       ->ext3_file_write
- *         ->journal_start
- *         ->ext3_prepare_write
- *           ->journal_extend
- *           ->journal_start
- * Thread 2:
- * ext3_create		(for example)
- * ->ext3_new_inode
- *   ->dquot_initialize
- *     ->lock_dquot
- *
- * Deadlock.  Thread 1's journal_start blocks because thread 2 has a
- * transaction open.  Thread 2's transaction will never close because
- * thread 2 is stuck waiting for the dquot lock.
- *
- * So.  We must ensure that thread 1 *never* needs to extend the journal
- * for quota writes.  We do that by reserving enough journal blocks
- * here, in ext3_aops_journal_start() to ensure that the forthcoming "see if we
- * need to extend" test in ext3_prepare_write() succeeds.  
- */
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1644,10 +1644,7 @@ static int ext3_create (struct inode * dir, struct dentry * dentry, int mode)
 	if (!IS_ERR(inode)) {
 		inode->i_op = &ext3_file_inode_operations;
 		inode->i_fop = &ext3_file_operations;
-		if (ext3_should_writeback_data(inode))
-			inode->i_mapping->a_ops = &ext3_writeback_aops;
-		else
-			inode->i_mapping->a_ops = &ext3_aops;
+		ext3_set_aops(inode);
 		err = ext3_add_nondir(handle, dentry, inode);
 	}
 	ext3_journal_stop(handle);
@@ -2100,10 +2097,7 @@ static int ext3_symlink (struct inode * dir,

 	if (l > sizeof (EXT3_I(inode)->i_data)) {
 		inode->i_op = &ext3_symlink_inode_operations;
-		if (ext3_should_writeback_data(inode))
-			inode->i_mapping->a_ops = &ext3_writeback_aops;
-		else
-			inode->i_mapping->a_ops = &ext3_aops;
+		ext3_set_aops(inode);
 		/*
 		 * page_symlink() calls into ext3_prepare/commit_write.
 		 * We have a transaction open.  All is sweetness.  It also sets

--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -1120,7 +1120,6 @@ int journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
 		/* And this case is illegal: we can't reuse another
 		 * transaction's data buffer, ever. */
 		/* FIXME: writepage() should be journalled */
-		J_ASSERT_JH(jh, jh->b_jlist != BJ_SyncData);
 		goto done_locked;
 	}


--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -735,6 +735,7 @@ extern void ext3_dirty_inode(struct inode *);
 extern int ext3_change_inode_journal_flag(struct inode *, int);
 extern void ext3_truncate (struct inode *);
 extern void ext3_set_inode_flags(struct inode *);
+extern void ext3_set_aops(struct inode *inode);

 /* ioctl.c */
 extern int ext3_ioctl (struct inode *, struct file *, unsigned int,
@@ -783,10 +784,6 @@ extern struct file_operations ext3_dir_operations;
 extern struct inode_operations ext3_file_inode_operations;
 extern struct file_operations ext3_file_operations;

-/* inode.c */
-extern struct address_space_operations ext3_aops;
-extern struct address_space_operations ext3_writeback_aops;
-
 /* namei.c */
 extern struct inode_operations ext3_dir_inode_operations;
 extern struct inode_operations ext3_special_inode_operations;

--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -200,6 +200,7 @@ extern void get_full_page_state(struct page_state *ret);

 #define PageChecked(page)	test_bit(PG_checked, &(page)->flags)
 #define SetPageChecked(page)	set_bit(PG_checked, &(page)->flags)
+#define ClearPageChecked(page)	clear_bit(PG_checked, &(page)->flags)

 #define PageReserved(page)	test_bit(PG_reserved, &(page)->flags)
 #define SetPageReserved(page)	set_bit(PG_reserved, &(page)->flags)