Commit 6ab455ee authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Lachlan McIlroy

[XFS] Fix memory corruption with small buffer reads

When we have multiple buffers in a single page for a blocksize == pagesize
filesystem we might overwrite the page contents if two callers hit it
shortly after each other. To prevent that we need to keep the page locked
until I/O is completed and the page marked uptodate.

Thanks to Eric Sandeen for triaging this bug and finding a reproducible
testcase and Dave Chinner for additional advice.

This should fix kernel.org bz #10421.
Tested-by: default avatarEric Sandeen <sandeen@sandeen.net>

SGI-PV: 981813
SGI-Modid: xfs-linux-melb:xfs-kern:31173a
Signed-off-by: default avatarChristoph Hellwig <hch@infradead.org>
Signed-off-by: default avatarDavid Chinner <dgc@sgi.com>
Signed-off-by: default avatarLachlan McIlroy <lachlan@sgi.com>
parent c8f5f12e
...@@ -387,6 +387,8 @@ _xfs_buf_lookup_pages( ...@@ -387,6 +387,8 @@ _xfs_buf_lookup_pages(
if (unlikely(page == NULL)) { if (unlikely(page == NULL)) {
if (flags & XBF_READ_AHEAD) { if (flags & XBF_READ_AHEAD) {
bp->b_page_count = i; bp->b_page_count = i;
for (i = 0; i < bp->b_page_count; i++)
unlock_page(bp->b_pages[i]);
return -ENOMEM; return -ENOMEM;
} }
...@@ -416,17 +418,24 @@ _xfs_buf_lookup_pages( ...@@ -416,17 +418,24 @@ _xfs_buf_lookup_pages(
ASSERT(!PagePrivate(page)); ASSERT(!PagePrivate(page));
if (!PageUptodate(page)) { if (!PageUptodate(page)) {
page_count--; page_count--;
if (blocksize < PAGE_CACHE_SIZE && !PagePrivate(page)) { if (blocksize >= PAGE_CACHE_SIZE) {
if (flags & XBF_READ)
bp->b_flags |= _XBF_PAGE_LOCKED;
} else if (!PagePrivate(page)) {
if (test_page_region(page, offset, nbytes)) if (test_page_region(page, offset, nbytes))
page_count++; page_count++;
} }
} }
unlock_page(page);
bp->b_pages[i] = page; bp->b_pages[i] = page;
offset = 0; offset = 0;
} }
if (!(bp->b_flags & _XBF_PAGE_LOCKED)) {
for (i = 0; i < bp->b_page_count; i++)
unlock_page(bp->b_pages[i]);
}
if (page_count == bp->b_page_count) if (page_count == bp->b_page_count)
bp->b_flags |= XBF_DONE; bp->b_flags |= XBF_DONE;
...@@ -746,6 +755,7 @@ xfs_buf_associate_memory( ...@@ -746,6 +755,7 @@ xfs_buf_associate_memory(
bp->b_count_desired = len; bp->b_count_desired = len;
bp->b_buffer_length = buflen; bp->b_buffer_length = buflen;
bp->b_flags |= XBF_MAPPED; bp->b_flags |= XBF_MAPPED;
bp->b_flags &= ~_XBF_PAGE_LOCKED;
return 0; return 0;
} }
...@@ -1093,8 +1103,10 @@ _xfs_buf_ioend( ...@@ -1093,8 +1103,10 @@ _xfs_buf_ioend(
xfs_buf_t *bp, xfs_buf_t *bp,
int schedule) int schedule)
{ {
if (atomic_dec_and_test(&bp->b_io_remaining) == 1) if (atomic_dec_and_test(&bp->b_io_remaining) == 1) {
bp->b_flags &= ~_XBF_PAGE_LOCKED;
xfs_buf_ioend(bp, schedule); xfs_buf_ioend(bp, schedule);
}
} }
STATIC void STATIC void
...@@ -1125,6 +1137,9 @@ xfs_buf_bio_end_io( ...@@ -1125,6 +1137,9 @@ xfs_buf_bio_end_io(
if (--bvec >= bio->bi_io_vec) if (--bvec >= bio->bi_io_vec)
prefetchw(&bvec->bv_page->flags); prefetchw(&bvec->bv_page->flags);
if (bp->b_flags & _XBF_PAGE_LOCKED)
unlock_page(page);
} while (bvec >= bio->bi_io_vec); } while (bvec >= bio->bi_io_vec);
_xfs_buf_ioend(bp, 1); _xfs_buf_ioend(bp, 1);
...@@ -1163,7 +1178,8 @@ _xfs_buf_ioapply( ...@@ -1163,7 +1178,8 @@ _xfs_buf_ioapply(
* filesystem block size is not smaller than the page size. * filesystem block size is not smaller than the page size.
*/ */
if ((bp->b_buffer_length < PAGE_CACHE_SIZE) && if ((bp->b_buffer_length < PAGE_CACHE_SIZE) &&
(bp->b_flags & XBF_READ) && ((bp->b_flags & (XBF_READ|_XBF_PAGE_LOCKED)) ==
(XBF_READ|_XBF_PAGE_LOCKED)) &&
(blocksize >= PAGE_CACHE_SIZE)) { (blocksize >= PAGE_CACHE_SIZE)) {
bio = bio_alloc(GFP_NOIO, 1); bio = bio_alloc(GFP_NOIO, 1);
......
...@@ -66,6 +66,25 @@ typedef enum { ...@@ -66,6 +66,25 @@ typedef enum {
_XBF_PAGES = (1 << 18), /* backed by refcounted pages */ _XBF_PAGES = (1 << 18), /* backed by refcounted pages */
_XBF_RUN_QUEUES = (1 << 19),/* run block device task queue */ _XBF_RUN_QUEUES = (1 << 19),/* run block device task queue */
_XBF_DELWRI_Q = (1 << 21), /* buffer on delwri queue */ _XBF_DELWRI_Q = (1 << 21), /* buffer on delwri queue */
/*
* Special flag for supporting metadata blocks smaller than a FSB.
*
* In this case we can have multiple xfs_buf_t on a single page and
* need to lock out concurrent xfs_buf_t readers as they only
* serialise access to the buffer.
*
* If the FSB size >= PAGE_CACHE_SIZE case, we have no serialisation
* between reads of the page. Hence we can have one thread read the
* page and modify it, but then race with another thread that thinks
* the page is not up-to-date and hence reads it again.
*
* The result is that the first modifcation to the page is lost.
* This sort of AGF/AGI reading race can happen when unlinking inodes
* that require truncation and results in the AGI unlinked list
* modifications being lost.
*/
_XBF_PAGE_LOCKED = (1 << 22),
} xfs_buf_flags_t; } xfs_buf_flags_t;
typedef enum { typedef enum {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment