NTFS: - Add mapping of unmapped buffers to all remaining code paths, i.e.

fs/ntfs/aops.c::ntfs_write_mst_block(), mft.c::ntfs_sync_mft_mirror(), and write_mft_record_nolock(). From now on we require that the complete runlist for the mft mirror is always mapped into memory. - Add creation of buffers to fs/ntfs/mft.c::ntfs_sync_mft_mirror(). - Do not check for the page being uptodate in mark_ntfs_record_dirty() as we now call this after marking the page not uptodate during mft mirror synchronisation (fs/ntfs/mft.c::ntfs_sync_mft_mirror()). - Improve error handling in fs/ntfs/aops.c::ntfs_{read,write}_block(). Signed-off-by: Anton Altaparmakov <aia21@cantab.net>

NTFS: - Add mapping of unmapped buffers to all remaining code paths, i.e.
fs/ntfs/aops.c::ntfs_write_mst_block(), mft.c::ntfs_sync_mft_mirror(), and write_mft_record_nolock(). From now on we require that the complete runlist for the mft mirror is always mapped into memory. - Add creation of buffers to fs/ntfs/mft.c::ntfs_sync_mft_mirror(). - Do not check for the page being uptodate in mark_ntfs_record_dirty() as we now call this after marking the page not uptodate during mft mirror synchronisation (fs/ntfs/mft.c::ntfs_sync_mft_mirror()). - Improve error handling in fs/ntfs/aops.c::ntfs_{read,write}_block(). Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
0f9bfc48 · Anton Altaparmakov · 453b5e0c · 0f9bfc48 · 0f9bfc48 · 0f9bfc48
Commit 0f9bfc48 authored Nov 09, 2004 by Anton Altaparmakov
Showing with 309 additions and 103 deletions

fs/ntfs/ChangeLog fs/ntfs/ChangeLog +9 -0

fs/ntfs/aops.c fs/ntfs/aops.c +180 -72

fs/ntfs/inode.c fs/ntfs/inode.c +2 -1

fs/ntfs/mft.c fs/ntfs/mft.c +114 -30

fs/ntfs/super.c fs/ntfs/super.c +4 -0

No files found.
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -84,6 +84,15 @@ ToDo/Notes:
 	- Fix error handling in fs/ntfs/quota.c::ntfs_mark_quotas_out_of_date()
 	  where we failed to release i_sem on the $Quota/$Q attribute inode.
 	- Fix bug in handling of bad inodes in fs/ntfs/namei.c::ntfs_lookup().
+	- Add mapping of unmapped buffers to all remaining code paths, i.e.
+	  fs/ntfs/aops.c::ntfs_write_mst_block(), mft.c::ntfs_sync_mft_mirror(),
+	  and write_mft_record_nolock().  From now on we require that the
+	  complete runlist for the mft mirror is always mapped into memory.
+	- Add creation of buffers to fs/ntfs/mft.c::ntfs_sync_mft_mirror().
+	- Do not check for the page being uptodate in mark_ntfs_record_dirty()
+	  as we now call this after marking the page not uptodate during mft
+	  mirror synchronisation (fs/ntfs/mft.c::ntfs_sync_mft_mirror()).
+	- Improve error handling in fs/ntfs/aops.c::ntfs_{read,write}_block().

 2.1.21 - Fix some races and bugs, rewrite mft write code, add mft allocator.


--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -175,6 +175,9 @@ static int ntfs_read_block(struct page *page)
 	ni = NTFS_I(page->mapping->host);
 	vol = ni->vol;

+	/* $MFT/$DATA must have its complete runlist in memory at all times. */
+	BUG_ON(!ni->runlist.rl && !ni->mft_no && !NInoAttr(ni));
+
 	blocksize_bits = VFS_I(ni)->i_blkbits;
 	blocksize = 1 << blocksize_bits;

@@ -190,12 +193,6 @@ static int ntfs_read_block(struct page *page)
 	lblock = (ni->allocated_size + blocksize - 1) >> blocksize_bits;
 	zblock = (ni->initialized_size + blocksize - 1) >> blocksize_bits;

-#ifdef DEBUG
-	if (unlikely(!ni->runlist.rl && !ni->mft_no && !NInoAttr(ni)))
-		panic("NTFS: $MFT/$DATA runlist has been unmapped! This is a "
-				"very serious bug! Cannot continue...");
-#endif
-
 	/* Loop through all the buffers in the page. */
 	rl = NULL;
 	nr = i = 0;
@@ -249,24 +246,30 @@ static int ntfs_read_block(struct page *page)
 				goto handle_hole;
 			/* If first try and runlist unmapped, map and retry. */
 			if (!is_retry && lcn == LCN_RL_NOT_MAPPED) {
+				int err;
 				is_retry = TRUE;
 				/*
 				 * Attempt to map runlist, dropping lock for
 				 * the duration.
 				 */
 				up_read(&ni->runlist.lock);
-				if (!ntfs_map_runlist(ni, vcn))
+				err = ntfs_map_runlist(ni, vcn);
+				if (likely(!err))
 					goto lock_retry_remap;
 				rl = NULL;
+				lcn = err;
 			}
 			/* Hard error, zero out region. */
+			bh->b_blocknr = -1;
 			SetPageError(page);
-			ntfs_error(vol->sb, "ntfs_rl_vcn_to_lcn(vcn = 0x%llx) "
-					"failed with error code 0x%llx%s.",
-					(unsigned long long)vcn,
-					(unsigned long long)-lcn,
-					is_retry ? " even after retrying" : "");
-			// FIXME: Depending on vol->on_errors, do something.
+			ntfs_error(vol->sb, "Failed to read from inode 0x%lx, "
+					"attribute type 0x%x, vcn 0x%llx, "
+					"offset 0x%x because its location on "
+					"disk could not be determined%s "
+					"(error code %lli).", ni->mft_no,
+					ni->type, (unsigned long long)vcn,
+					vcn_ofs, is_retry ? " even after "
+					"retrying" : "", (long long)lcn);
 		}
 		/*
 		 * Either iblock was outside lblock limits or
@@ -437,8 +440,8 @@ static int ntfs_readpage(struct file *file, struct page *page)

 /**
 * ntfs_write_block - write a @page to the backing store
- * @wbc:	writeback control structure
 * @page:	page cache page to write out
+ * @wbc:	writeback control structure
 *
 * This function is for writing pages belonging to non-resident, non-mst
 * protected attributes to their backing store.
@@ -457,7 +460,7 @@ static int ntfs_readpage(struct file *file, struct page *page)
 *
 * Based on ntfs_read_block() and __block_write_full_page().
 */
-static int ntfs_write_block(struct writeback_control *wbc, struct page *page)
+static int ntfs_write_block(struct page *page, struct writeback_control *wbc)
 {
 	VCN vcn;
 	LCN lcn;
@@ -477,7 +480,7 @@ static int ntfs_write_block(struct writeback_control *wbc, struct page *page)
 	vol = ni->vol;

 	ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
-			"0x%lx.", vi->i_ino, ni->type, page->index);
+			"0x%lx.", ni->mft_no, ni->type, page->index);

 	BUG_ON(!NInoNonResident(ni));
 	BUG_ON(NInoMstProtected(ni));
@@ -618,9 +621,9 @@ static int ntfs_write_block(struct writeback_control *wbc, struct page *page)
 		bh->b_bdev = vol->sb->s_bdev;

 		/* Convert block into corresponding vcn and offset. */
-		vcn = (VCN)block << blocksize_bits >> vol->cluster_size_bits;
-		vcn_ofs = ((VCN)block << blocksize_bits) &
-				vol->cluster_size_mask;
+		vcn = (VCN)block << blocksize_bits;
+		vcn_ofs = vcn & vol->cluster_size_mask;
+		vcn >>= vol->cluster_size_bits;
 		if (!rl) {
 lock_retry_remap:
 			down_read(&ni->runlist.lock);
@@ -663,15 +666,17 @@ static int ntfs_write_block(struct writeback_control *wbc, struct page *page)
 			if (likely(!err))
 				goto lock_retry_remap;
 			rl = NULL;
+			lcn = err;
 		}
 		/* Failed to map the buffer, even after retrying. */
-		bh->b_blocknr = -1UL;
-		ntfs_error(vol->sb, "ntfs_rl_vcn_to_lcn(vcn = 0x%llx) failed "
-				"with error code 0x%llx%s.",
-				(unsigned long long)vcn,
-				(unsigned long long)-lcn,
-				is_retry ? " even after retrying" : "");
-		// FIXME: Depending on vol->on_errors, do something.
+		bh->b_blocknr = -1;
+		ntfs_error(vol->sb, "Failed to write to inode 0x%lx, "
+				"attribute type 0x%x, vcn 0x%llx, offset 0x%x "
+				"because its location on disk could not be "
+				"determined%s (error code %lli).", ni->mft_no,
+				ni->type, (unsigned long long)vcn,
+				vcn_ofs, is_retry ? " even after "
+				"retrying" : "", (long long)lcn);
 		if (!err)
 			err = -EIO;
 		break;
@@ -767,8 +772,8 @@ static int ntfs_write_block(struct writeback_control *wbc, struct page *page)

 /**
 * ntfs_write_mst_block - write a @page to the backing store
- * @wbc:	writeback control structure
 * @page:	page cache page to write out
+ * @wbc:	writeback control structure
 *
 * This function is for writing pages belonging to non-resident, mst protected
 * attributes to their backing store.  The only supported attributes are index
@@ -789,22 +794,24 @@ static int ntfs_write_block(struct writeback_control *wbc, struct page *page)
 * Based on ntfs_write_block(), ntfs_mft_writepage(), and
 * write_mft_record_nolock().
 */
-static int ntfs_write_mst_block(struct writeback_control *wbc,
-		struct page *page)
+static int ntfs_write_mst_block(struct page *page,
+		struct writeback_control *wbc)
 {
 	sector_t block, dblock, rec_block;
 	struct inode *vi = page->mapping->host;
 	ntfs_inode *ni = NTFS_I(vi);
 	ntfs_volume *vol = ni->vol;
 	u8 *kaddr;
-	unsigned int bh_size = 1 << vi->i_blkbits;
+	unsigned char bh_size_bits = vi->i_blkbits;
+	unsigned int bh_size = 1 << bh_size_bits;
 	unsigned int rec_size = ni->itype.index.block_size;
 	ntfs_inode *locked_nis[PAGE_CACHE_SIZE / rec_size];
-	struct buffer_head *bh, *head, *tbh;
+	struct buffer_head *bh, *head, *tbh, *rec_start_bh;
 	int max_bhs = PAGE_CACHE_SIZE / bh_size;
 	struct buffer_head *bhs[max_bhs];
-	int i, nr_locked_nis, nr_recs, nr_bhs, bhs_per_rec, err;
-	unsigned char bh_size_bits, rec_size_bits;
+	runlist_element *rl;
+	int i, nr_locked_nis, nr_recs, nr_bhs, bhs_per_rec, err, err2;
+	unsigned rec_size_bits;
 	BOOL sync, is_mft, page_is_dirty, rec_is_dirty;

 	ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
@@ -824,7 +831,6 @@ static int ntfs_write_mst_block(struct writeback_control *wbc,
 	bh = head = page_buffers(page);
 	BUG_ON(!bh);

-	bh_size_bits = vi->i_blkbits;
 	rec_size_bits = ni->itype.index.block_size_bits;
 	BUG_ON(!(PAGE_CACHE_SIZE >> rec_size_bits));
 	bhs_per_rec = rec_size >> bh_size_bits;
@@ -837,25 +843,18 @@ static int ntfs_write_mst_block(struct writeback_control *wbc,
 	/* The first out of bounds block for the data size. */
 	dblock = (vi->i_size + bh_size - 1) >> bh_size_bits;

-	err = nr_bhs = nr_recs = nr_locked_nis = 0;
+	rl = NULL;
+	err = err2 = nr_bhs = nr_recs = nr_locked_nis = 0;
 	page_is_dirty = rec_is_dirty = FALSE;
+	rec_start_bh = NULL;
 	do {
-		if (unlikely(block >= dblock)) {
-			/*
-			 * Mapped buffers outside i_size will occur, because
-			 * this page can be outside i_size when there is a
-			 * truncate in progress.  The contents of such buffers
-			 * were zeroed by ntfs_writepage().
-			 *
-			 * FIXME: What about the small race window where
-			 * ntfs_writepage() has not done any clearing because
-			 * the page was within i_size but before we get here,
-			 * vmtruncate() modifies i_size?
-			 */
-			clear_buffer_dirty(bh);
-			continue;
-		}
+		BOOL is_retry = FALSE;
+
 		if (likely(block < rec_block)) {
+			if (unlikely(block >= dblock)) {
+				clear_buffer_dirty(bh);
+				continue;
+			}
 			/*
 			 * This block is not the first one in the record.  We
 			 * ignore the buffer's dirty state because we could
@@ -863,22 +862,121 @@ static int ntfs_write_mst_block(struct writeback_control *wbc,
 			 */
 			if (!rec_is_dirty)
 				continue;
+			if (unlikely(err2)) {
+				if (err2 != -ENOMEM)
+					clear_buffer_dirty(bh);
+				continue;
+			}
 		} else /* if (block == rec_block) */ {
 			BUG_ON(block > rec_block);
 			/* This block is the first one in the record. */
 			rec_block += bhs_per_rec;
+			err2 = 0;
+			if (unlikely(block >= dblock)) {
+				clear_buffer_dirty(bh);
+				continue;
+			}
 			if (!buffer_dirty(bh)) {
 				/* Clean records are not written out. */
 				rec_is_dirty = FALSE;
 				continue;
 			}
 			rec_is_dirty = TRUE;
+			rec_start_bh = bh;
+		}
+		/* Need to map the buffer if it is not mapped already. */
+		if (unlikely(!buffer_mapped(bh))) {
+			VCN vcn;
+			LCN lcn;
+			unsigned int vcn_ofs;
+
+			/* Obtain the vcn and offset of the current block. */
+			vcn = (VCN)block << bh_size_bits;
+			vcn_ofs = vcn & vol->cluster_size_mask;
+			vcn >>= vol->cluster_size_bits;
+			if (!rl) {
+lock_retry_remap:
+				down_read(&ni->runlist.lock);
+				rl = ni->runlist.rl;
+			}
+			if (likely(rl != NULL)) {
+				/* Seek to element containing target vcn. */
+				while (rl->length && rl[1].vcn <= vcn)
+					rl++;
+				lcn = ntfs_rl_vcn_to_lcn(rl, vcn);
+			} else
+				lcn = LCN_RL_NOT_MAPPED;
+			/* Successful remap. */
+			if (likely(lcn >= 0)) {
+				/* Setup buffer head to correct block. */
+				bh->b_blocknr = ((lcn <<
+						vol->cluster_size_bits) +
+						vcn_ofs) >> bh_size_bits;
+				set_buffer_mapped(bh);
+			} else {
+				/*
+				 * Remap failed.  Retry to map the runlist once
+				 * unless we are working on $MFT which always
+				 * has the whole of its runlist in memory.
+				 */
+				if (!is_mft && !is_retry &&
+						lcn == LCN_RL_NOT_MAPPED) {
+					is_retry = TRUE;
+					/*
+					 * Attempt to map runlist, dropping
+					 * lock for the duration.
+					 */
+					up_read(&ni->runlist.lock);
+					err2 = ntfs_map_runlist(ni, vcn);
+					if (likely(!err2))
+						goto lock_retry_remap;
+					if (err2 == -ENOMEM)
+						page_is_dirty = TRUE;
+					lcn = err2;
+				} else
+					err2 = -EIO;
+				/* Hard error.  Abort writing this record. */
+				if (!err || err == -ENOMEM)
+					err = err2;
+				bh->b_blocknr = -1;
+				ntfs_error(vol->sb, "Cannot write ntfs record "
+						"0x%llx (inode 0x%lx, "
+						"attribute type 0x%x) because "
+						"its location on disk could "
+						"not be determined (error "
+						"code %lli).", (s64)block <<
+						bh_size_bits >>
+						vol->mft_record_size_bits,
+						ni->mft_no, ni->type,
+						(long long)lcn);
+				/*
+				 * If this is not the first buffer, remove the
+				 * buffers in this record from the list of
+				 * buffers to write and clear their dirty bit
+				 * if not error -ENOMEM.
+				 */
+				if (rec_start_bh != bh) {
+					while (bhs[--nr_bhs] != rec_start_bh)
+						;
+					if (err2 != -ENOMEM) {
+						do {
+							clear_buffer_dirty(
+								rec_start_bh);
+						} while ((rec_start_bh =
+								rec_start_bh->
+								b_this_page) !=
+								bh);
+					}
+				}
+				continue;
+			}
 		}
-		BUG_ON(!buffer_mapped(bh));
 		BUG_ON(!buffer_uptodate(bh));
+		BUG_ON(nr_bhs >= max_bhs);
 		bhs[nr_bhs++] = bh;
-		BUG_ON(nr_bhs > max_bhs);
 	} while (block++, (bh = bh->b_this_page) != head);
+	if (unlikely(rl))
+		up_read(&ni->runlist.lock);
 	/* If there were no dirty buffers, we are done. */
 	if (!nr_bhs)
 		goto done;
@@ -930,9 +1028,11 @@ static int ntfs_write_mst_block(struct writeback_control *wbc,
 				locked_nis[nr_locked_nis++] = tni;
 		}
 		/* Apply the mst protection fixups. */
-		err = pre_write_mst_fixup((NTFS_RECORD*)(kaddr + ofs),
+		err2 = pre_write_mst_fixup((NTFS_RECORD*)(kaddr + ofs),
 				rec_size);
-		if (unlikely(err)) {
+		if (unlikely(err2)) {
+			if (!err || err == -ENOMEM)
+				err = -EIO;
 			ntfs_error(vol->sb, "Failed to apply mst fixups "
 					"(inode 0x%lx, attribute type 0x%x, "
 					"page index 0x%lx, page offset 0x%x)!"
@@ -986,7 +1086,8 @@ static int ntfs_write_mst_block(struct writeback_control *wbc,
 					"0x%lx, page offset 0x%lx)!  Unmount "
 					"and run chkdsk.", vi->i_ino, ni->type,
 					page->index, bh_offset(tbh));
-			err = -EIO;
+			if (!err || err == -ENOMEM)
+				err = -EIO;
 			/*
 			 * Set the buffer uptodate so the page and buffer
 			 * states do not become out of sync.
@@ -1056,13 +1157,18 @@ static int ntfs_write_mst_block(struct writeback_control *wbc,
 		atomic_dec(&tni->count);
 		iput(VFS_I(base_tni));
 	}
-	if (unlikely(err)) {
-		SetPageError(page);
-		NVolSetErrors(vol);
-	}
 	SetPageUptodate(page);
 	kunmap(page);
 done:
+	if (unlikely(err && err != -ENOMEM)) {
+		/*
+		 * Set page error if there is only one ntfs record in the page.
+		 * Otherwise we would loose per-record granularity.
+		 */
+		if (ni->itype.index.block_size == PAGE_CACHE_SIZE)
+			SetPageError(page);
+		NVolSetErrors(vol);
+	}
 	if (page_is_dirty) {
 		ntfs_debug("Page still contains one or more dirty ntfs "
 				"records.  Redirtying the page starting at "
@@ -1182,9 +1288,9 @@ static int ntfs_writepage(struct page *page, struct writeback_control *wbc)
 		}
 		/* Handle mst protected attributes. */
 		if (NInoMstProtected(ni))
-			return ntfs_write_mst_block(wbc, page);
+			return ntfs_write_mst_block(page, wbc);
 		/* Normal data stream. */
-		return ntfs_write_block(wbc, page);
+		return ntfs_write_block(page, wbc);
 	}
 	/*
 	 * Attribute is resident, implying it is not compressed, encrypted,
@@ -1343,7 +1449,7 @@ static int ntfs_prepare_nonresident_write(struct page *page,
 	vol = ni->vol;

 	ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
-			"0x%lx, from = %u, to = %u.", vi->i_ino, ni->type,
+			"0x%lx, from = %u, to = %u.", ni->mft_no, ni->type,
 			page->index, from, to);

 	BUG_ON(!NInoNonResident(ni));
@@ -1537,21 +1643,24 @@ static int ntfs_prepare_nonresident_write(struct page *page,
 					if (likely(!err))
 						goto lock_retry_remap;
 					rl = NULL;
+					lcn = err;
 				}
 				/*
 				 * Failed to map the buffer, even after
 				 * retrying.
 				 */
-				bh->b_blocknr = -1UL;
-				ntfs_error(vol->sb, "ntfs_rl_vcn_to_lcn(vcn = "
-						"0x%llx) failed with error "
-						"code 0x%llx%s.",
+				bh->b_blocknr = -1;
+				ntfs_error(vol->sb, "Failed to write to inode "
+						"0x%lx, attribute type 0x%x, "
+						"vcn 0x%llx, offset 0x%x "
+						"because its location on disk "
+						"could not be determined%s "
+						"(error code %lli).",
+						ni->mft_no, ni->type,
 						(unsigned long long)vcn,
-						(unsigned long long)-lcn,
-						is_retry ? " even after "
-						"retrying" : "");
-				// FIXME: Depending on vol->on_errors, do
-				// something.
+						vcn_ofs, is_retry ? " even "
+						"after retrying" : "",
+						(long long)lcn);
 				if (!err)
 					err = -EIO;
 				goto err_out;
@@ -2173,7 +2282,6 @@ void mark_ntfs_record_dirty(struct page *page, const unsigned int ofs) {
 	struct buffer_head *bh, *head, *buffers_to_free = NULL;
 	unsigned int end, bh_size, bh_ofs;

-	BUG_ON(!PageUptodate(page));
 	end = ofs + ni->itype.index.block_size;
 	bh_size = 1 << VFS_I(ni)->i_blkbits;
 	spin_lock(&mapping->private_lock);

--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -2358,8 +2358,8 @@ int ntfs_truncate(struct inode *vi)
 done:
 	ntfs_attr_put_search_ctx(ctx);
 	unmap_mft_record(ni);
-	ntfs_debug("Done.");
 	NInoClearTruncateFailed(ni);
+	ntfs_debug("Done.");
 	return 0;
 err_out:
 	if (err != -ENOMEM) {
@@ -2608,6 +2608,7 @@ int ntfs_write_inode(struct inode *vi, int sync)
 		ntfs_error(vi->i_sb, "Failed (error code %i):  Marking inode "
 				"as bad.  You should run chkdsk.", -err);
 		make_bad_inode(vi);
+		NVolSetErrors(ni->vol);
 	}
 	return err;
 }

--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -466,8 +466,10 @@ int ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned long mft_no,
 	struct buffer_head *bhs[max_bhs];
 	struct buffer_head *bh, *head;
 	u8 *kmirr;
-	unsigned int block_start, block_end, m_start, m_end;
+	runlist_element *rl;
+	unsigned int block_start, block_end, m_start, m_end, page_ofs;
 	int i_bhs, nr_bhs, err = 0;
+	unsigned char blocksize_bits = vol->mftmirr_ino->i_blkbits;

 	ntfs_debug("Entering for inode 0x%lx.", mft_no);
 	BUG_ON(!max_bhs);
@@ -486,24 +488,24 @@ int ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned long mft_no,
 		err = PTR_ERR(page);
 		goto err_out;
 	}
-	/*
-	 * Exclusion against other writers.   This should never be a problem
-	 * since the page in which the mft record @m resides is also locked and
-	 * hence any other writers would be held up there but it is better to
-	 * make sure no one is writing from elsewhere.
-	 */
 	lock_page(page);
 	BUG_ON(!PageUptodate(page));
 	ClearPageUptodate(page);
+	/* Offset of the mft mirror record inside the page. */
+	page_ofs = (mft_no << vol->mft_record_size_bits) & ~PAGE_CACHE_MASK;
 	/* The address in the page of the mirror copy of the mft record @m. */
-	kmirr = page_address(page) + ((mft_no << vol->mft_record_size_bits) &
-			~PAGE_CACHE_MASK);
+	kmirr = page_address(page) + page_ofs;
 	/* Copy the mst protected mft record to the mirror. */
 	memcpy(kmirr, m, vol->mft_record_size);
-	/* Make sure we have mapped buffers. */
+	/*
+	 * Create buffers if not present and mark the ones belonging to the mft
+	 * mirror record dirty.
+	 */
+	mark_ntfs_record_dirty(page, page_ofs);
 	BUG_ON(!page_has_buffers(page));
 	bh = head = page_buffers(page);
 	BUG_ON(!bh);
+	rl = NULL;
 	nr_bhs = 0;
 	block_start = 0;
 	m_start = kmirr - (u8*)page_address(page);
@@ -511,15 +513,61 @@ int ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned long mft_no,
 	do {
 		block_end = block_start + blocksize;
 		/* If the buffer is outside the mft record, skip it. */
-		if ((block_end <= m_start) || (block_start >= m_end))
+		if (block_end <= m_start)
 			continue;
-		BUG_ON(!buffer_mapped(bh));
+		if (unlikely(block_start >= m_end))
+			break;
+		/* Need to map the buffer if it is not mapped already. */
+		if (unlikely(!buffer_mapped(bh))) {
+			VCN vcn;
+			LCN lcn;
+			unsigned int vcn_ofs;
+
+			/* Obtain the vcn and offset of the current block. */
+			vcn = ((VCN)mft_no << vol->mft_record_size_bits) +
+					(block_start - m_start);
+			vcn_ofs = vcn & vol->cluster_size_mask;
+			vcn >>= vol->cluster_size_bits;
+			if (!rl) {
+				down_read(&NTFS_I(vol->mftmirr_ino)->
+						runlist.lock);
+				rl = NTFS_I(vol->mftmirr_ino)->runlist.rl;
+				/*
+				 * $MFTMirr always has the whole of its runlist
+				 * in memory.
+				 */
+				BUG_ON(!rl);
+			}
+			/* Seek to element containing target vcn. */
+			while (rl->length && rl[1].vcn <= vcn)
+				rl++;
+			lcn = ntfs_rl_vcn_to_lcn(rl, vcn);
+			/* For $MFTMirr, only lcn >= 0 is a successful remap. */
+			if (likely(lcn >= 0)) {
+				/* Setup buffer head to correct block. */
+				bh->b_blocknr = ((lcn <<
+						vol->cluster_size_bits) +
+						vcn_ofs) >> blocksize_bits;
+				set_buffer_mapped(bh);
+			} else {
+				bh->b_blocknr = -1;
+				ntfs_error(vol->sb, "Cannot write mft mirror "
+						"record 0x%lx because its "
+						"location on disk could not "
+						"be determined (error code "
+						"%lli).", mft_no,
+						(long long)lcn);
+				err = -EIO;
+			}
+		}
 		BUG_ON(!buffer_uptodate(bh));
 		BUG_ON(!nr_bhs && (m_start != block_start));
 		BUG_ON(nr_bhs >= max_bhs);
 		bhs[nr_bhs++] = bh;
 		BUG_ON((nr_bhs >= max_bhs) && (m_end != block_end));
 	} while (block_start = block_end, (bh = bh->b_this_page) != head);
+	if (unlikely(rl))
+		up_read(&NTFS_I(vol->mftmirr_ino)->runlist.lock);
 	if (likely(!err)) {
 		/* Lock buffers and start synchronous write i/o on them. */
 		for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) {
@@ -528,8 +576,7 @@ int ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned long mft_no,
 			if (unlikely(test_set_buffer_locked(tbh)))
 				BUG();
 			BUG_ON(!buffer_uptodate(tbh));
-			if (buffer_dirty(tbh))
-				clear_buffer_dirty(tbh);
+			clear_buffer_dirty(tbh);
 			get_bh(tbh);
 			tbh->b_end_io = end_buffer_write_sync;
 			submit_bh(WRITE, tbh);
@@ -613,13 +660,14 @@ int write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync)
 {
 	ntfs_volume *vol = ni->vol;
 	struct page *page = ni->page;
-	unsigned int blocksize = vol->sb->s_blocksize;
+	unsigned char blocksize_bits = vol->mft_ino->i_blkbits;
+	unsigned int blocksize = 1 << blocksize_bits;
 	int max_bhs = vol->mft_record_size / blocksize;
 	struct buffer_head *bhs[max_bhs];
 	struct buffer_head *bh, *head;
+	runlist_element *rl;
 	unsigned int block_start, block_end, m_start, m_end;
 	int i_bhs, nr_bhs, err = 0;
-	BOOL rec_is_dirty = TRUE;

 	ntfs_debug("Entering for inode 0x%lx.", ni->mft_no);
 	BUG_ON(NInoAttr(ni));
@@ -636,6 +684,7 @@ int write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync)
 	BUG_ON(!page_has_buffers(page));
 	bh = head = page_buffers(page);
 	BUG_ON(!bh);
+	rl = NULL;
 	nr_bhs = 0;
 	block_start = 0;
 	m_start = ni->page_ofs;
@@ -647,31 +696,65 @@ int write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync)
 			continue;
 		if (unlikely(block_start >= m_end))
 			break;
+		/*
+		 * If this block is not the first one in the record, we ignore
+		 * the buffer's dirty state because we could have raced with a
+		 * parallel mark_ntfs_record_dirty().
+		 */
 		if (block_start == m_start) {
 			/* This block is the first one in the record. */
 			if (!buffer_dirty(bh)) {
+				BUG_ON(nr_bhs);
 				/* Clean records are not written out. */
-				rec_is_dirty = FALSE;
-				continue;
+				break;
+			}
+		}
+		/* Need to map the buffer if it is not mapped already. */
+		if (unlikely(!buffer_mapped(bh))) {
+			VCN vcn;
+			LCN lcn;
+			unsigned int vcn_ofs;
+
+			/* Obtain the vcn and offset of the current block. */
+			vcn = ((VCN)ni->mft_no << vol->mft_record_size_bits) +
+					(block_start - m_start);
+			vcn_ofs = vcn & vol->cluster_size_mask;
+			vcn >>= vol->cluster_size_bits;
+			if (!rl) {
+				down_read(&NTFS_I(vol->mft_ino)->runlist.lock);
+				rl = NTFS_I(vol->mft_ino)->runlist.rl;
+				BUG_ON(!rl);
+			}
+			/* Seek to element containing target vcn. */
+			while (rl->length && rl[1].vcn <= vcn)
+				rl++;
+			lcn = ntfs_rl_vcn_to_lcn(rl, vcn);
+			/* For $MFT, only lcn >= 0 is a successful remap. */
+			if (likely(lcn >= 0)) {
+				/* Setup buffer head to correct block. */
+				bh->b_blocknr = ((lcn <<
+						vol->cluster_size_bits) +
+						vcn_ofs) >> blocksize_bits;
+				set_buffer_mapped(bh);
+			} else {
+				bh->b_blocknr = -1;
+				ntfs_error(vol->sb, "Cannot write mft record "
+						"0x%lx because its location "
+						"on disk could not be "
+						"determined (error code %lli).",
+						ni->mft_no, (long long)lcn);
+				err = -EIO;
 			}
-			rec_is_dirty = TRUE;
-		} else {
-			/*
-			 * This block is not the first one in the record.  We
-			 * ignore the buffer's dirty state because we could
-			 * have raced with a parallel mark_ntfs_record_dirty().
-			 */
-			if (!rec_is_dirty)
-				continue;
 		}
-		BUG_ON(!buffer_mapped(bh));
 		BUG_ON(!buffer_uptodate(bh));
 		BUG_ON(!nr_bhs && (m_start != block_start));
 		BUG_ON(nr_bhs >= max_bhs);
 		bhs[nr_bhs++] = bh;
 		BUG_ON((nr_bhs >= max_bhs) && (m_end != block_end));
 	} while (block_start = block_end, (bh = bh->b_this_page) != head);
-	if (!rec_is_dirty)
+	if (unlikely(rl))
+		up_read(&NTFS_I(vol->mft_ino)->runlist.lock);
+	if (!nr_bhs)
 		goto done;
 	if (unlikely(err))
 		goto cleanup_out;
@@ -745,7 +828,8 @@ int write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync)
 				"Redirtying so the write is retried later.");
 		mark_mft_record_dirty(ni);
 		err = 0;
-	}
+	} else
+		NVolSetErrors(vol);
 	return err;
 }


--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -983,6 +983,10 @@ static BOOL load_and_init_mft_mirror(ntfs_volume *vol)
 * @vol:	ntfs super block describing device whose mft mirror to check
 *
 * Return TRUE on success or FALSE on error.
+ *
+ * Note, this function also results in the mft mirror runlist being completely
+ * mapped into memory.  The mft mirror write code requires this and will BUG()
+ * should it find an unmapped runlist element.
 */
 static BOOL check_mft_mirror(ntfs_volume *vol)
 {