Commit 676ce6d5 authored by Hugh Dickins's avatar Hugh Dickins Committed by Jens Axboe

block: replace __getblk_slow misfix by grow_dev_page fix

Commit 91f68c89 ("block: fix infinite loop in __getblk_slow")
is not good: a successful call to grow_buffers() cannot guarantee
that the page won't be reclaimed before the immediate next call to
__find_get_block(), which is why there was always a loop there.

Yesterday I got "EXT4-fs error (device loop0): __ext4_get_inode_loc:3595:
inode #19278: block 664: comm cc1: unable to read itable block" on console,
which pointed to this commit.

I've been trying to bisect for weeks, why kbuild-on-ext4-on-loop-on-tmpfs
sometimes fails from a missing header file, under memory pressure on
ppc G5.  I've never seen this on x86, and I've never seen it on 3.5-rc7
itself, despite that commit being in there: bisection pointed to an
irrelevant pinctrl merge, but hard to tell when failure takes between
18 minutes and 38 hours (but so far it's happened quicker on 3.6-rc2).

(I've since found such __ext4_get_inode_loc errors in /var/log/messages
from previous weeks: why the message never appeared on console until
yesterday morning is a mystery for another day.)

Revert 91f68c89, restoring __getblk_slow() to how it was (plus
a checkpatch nitfix).  Simplify the interface between grow_buffers()
and grow_dev_page(), and avoid the infinite loop beyond end of device
by instead checking init_page_buffers()'s end_block there (I presume
that's more efficient than a repeated call to blkdev_max_block()),
returning -ENXIO to __getblk_slow() in that case.

And remove akpm's ten-year-old "__getblk() cannot fail ... weird"
comment, but that is worrying: are all users of __getblk() really
now prepared for a NULL bh beyond end of device, or will some oops??
Signed-off-by: default avatarHugh Dickins <hughd@google.com>
Cc: stable@vger.kernel.org # 3.0 3.2 3.4 3.5
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 79df9b40
...@@ -914,7 +914,7 @@ link_dev_buffers(struct page *page, struct buffer_head *head) ...@@ -914,7 +914,7 @@ link_dev_buffers(struct page *page, struct buffer_head *head)
/* /*
* Initialise the state of a blockdev page's buffers. * Initialise the state of a blockdev page's buffers.
*/ */
static void static sector_t
init_page_buffers(struct page *page, struct block_device *bdev, init_page_buffers(struct page *page, struct block_device *bdev,
sector_t block, int size) sector_t block, int size)
{ {
...@@ -936,33 +936,41 @@ init_page_buffers(struct page *page, struct block_device *bdev, ...@@ -936,33 +936,41 @@ init_page_buffers(struct page *page, struct block_device *bdev,
block++; block++;
bh = bh->b_this_page; bh = bh->b_this_page;
} while (bh != head); } while (bh != head);
/*
* Caller needs to validate requested block against end of device.
*/
return end_block;
} }
/* /*
* Create the page-cache page that contains the requested block. * Create the page-cache page that contains the requested block.
* *
* This is user purely for blockdev mappings. * This is used purely for blockdev mappings.
*/ */
static struct page * static int
grow_dev_page(struct block_device *bdev, sector_t block, grow_dev_page(struct block_device *bdev, sector_t block,
pgoff_t index, int size) pgoff_t index, int size, int sizebits)
{ {
struct inode *inode = bdev->bd_inode; struct inode *inode = bdev->bd_inode;
struct page *page; struct page *page;
struct buffer_head *bh; struct buffer_head *bh;
sector_t end_block;
int ret = 0; /* Will call free_more_memory() */
page = find_or_create_page(inode->i_mapping, index, page = find_or_create_page(inode->i_mapping, index,
(mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS)|__GFP_MOVABLE); (mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS)|__GFP_MOVABLE);
if (!page) if (!page)
return NULL; return ret;
BUG_ON(!PageLocked(page)); BUG_ON(!PageLocked(page));
if (page_has_buffers(page)) { if (page_has_buffers(page)) {
bh = page_buffers(page); bh = page_buffers(page);
if (bh->b_size == size) { if (bh->b_size == size) {
init_page_buffers(page, bdev, block, size); end_block = init_page_buffers(page, bdev,
return page; index << sizebits, size);
goto done;
} }
if (!try_to_free_buffers(page)) if (!try_to_free_buffers(page))
goto failed; goto failed;
...@@ -982,14 +990,14 @@ grow_dev_page(struct block_device *bdev, sector_t block, ...@@ -982,14 +990,14 @@ grow_dev_page(struct block_device *bdev, sector_t block,
*/ */
spin_lock(&inode->i_mapping->private_lock); spin_lock(&inode->i_mapping->private_lock);
link_dev_buffers(page, bh); link_dev_buffers(page, bh);
init_page_buffers(page, bdev, block, size); end_block = init_page_buffers(page, bdev, index << sizebits, size);
spin_unlock(&inode->i_mapping->private_lock); spin_unlock(&inode->i_mapping->private_lock);
return page; done:
ret = (block < end_block) ? 1 : -ENXIO;
failed: failed:
unlock_page(page); unlock_page(page);
page_cache_release(page); page_cache_release(page);
return NULL; return ret;
} }
/* /*
...@@ -999,7 +1007,6 @@ grow_dev_page(struct block_device *bdev, sector_t block, ...@@ -999,7 +1007,6 @@ grow_dev_page(struct block_device *bdev, sector_t block,
static int static int
grow_buffers(struct block_device *bdev, sector_t block, int size) grow_buffers(struct block_device *bdev, sector_t block, int size)
{ {
struct page *page;
pgoff_t index; pgoff_t index;
int sizebits; int sizebits;
...@@ -1023,22 +1030,14 @@ grow_buffers(struct block_device *bdev, sector_t block, int size) ...@@ -1023,22 +1030,14 @@ grow_buffers(struct block_device *bdev, sector_t block, int size)
bdevname(bdev, b)); bdevname(bdev, b));
return -EIO; return -EIO;
} }
block = index << sizebits;
/* Create a page with the proper size buffers.. */ /* Create a page with the proper size buffers.. */
page = grow_dev_page(bdev, block, index, size); return grow_dev_page(bdev, block, index, size, sizebits);
if (!page)
return 0;
unlock_page(page);
page_cache_release(page);
return 1;
} }
static struct buffer_head * static struct buffer_head *
__getblk_slow(struct block_device *bdev, sector_t block, int size) __getblk_slow(struct block_device *bdev, sector_t block, int size)
{ {
int ret;
struct buffer_head *bh;
/* Size must be multiple of hard sectorsize */ /* Size must be multiple of hard sectorsize */
if (unlikely(size & (bdev_logical_block_size(bdev)-1) || if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
(size < 512 || size > PAGE_SIZE))) { (size < 512 || size > PAGE_SIZE))) {
...@@ -1051,21 +1050,20 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size) ...@@ -1051,21 +1050,20 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size)
return NULL; return NULL;
} }
retry: for (;;) {
bh = __find_get_block(bdev, block, size); struct buffer_head *bh;
if (bh) int ret;
return bh;
ret = grow_buffers(bdev, block, size);
if (ret == 0) {
free_more_memory();
goto retry;
} else if (ret > 0) {
bh = __find_get_block(bdev, block, size); bh = __find_get_block(bdev, block, size);
if (bh) if (bh)
return bh; return bh;
ret = grow_buffers(bdev, block, size);
if (ret < 0)
return NULL;
if (ret == 0)
free_more_memory();
} }
return NULL;
} }
/* /*
...@@ -1321,10 +1319,6 @@ EXPORT_SYMBOL(__find_get_block); ...@@ -1321,10 +1319,6 @@ EXPORT_SYMBOL(__find_get_block);
* which corresponds to the passed block_device, block and size. The * which corresponds to the passed block_device, block and size. The
* returned buffer has its reference count incremented. * returned buffer has its reference count incremented.
* *
* __getblk() cannot fail - it just keeps trying. If you pass it an
* illegal block number, __getblk() will happily return a buffer_head
* which represents the non-existent block. Very weird.
*
* __getblk() will lock up the machine if grow_dev_page's try_to_free_buffers() * __getblk() will lock up the machine if grow_dev_page's try_to_free_buffers()
* attempt is failing. FIXME, perhaps? * attempt is failing. FIXME, perhaps?
*/ */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment