Commit f6d6d4fc authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Nathan Scott

[XFS] Initial pass at going directly-to-bio on the buffered IO path. This

allows us to submit much larger I/Os instead of sending down lots of small
buffer_heads.  To do this we need to have a rather complicated I/O
submission and completion tracking infrastructure.  Part of the latter has
been merged already a long time ago for direct I/O support. Part of the
problem is that we need to track sub-pagesize regions and for that we
still need buffer_heads for the time beeing.  Long-term I hope we can move
to better data strucutures and/or maybe move this to fs/mpage.c instead of
having it in XFS.  Original patch from Nathan Scott with various updates
from David Chinner and Christoph Hellwig.

SGI-PV: 947118
SGI-Modid: xfs-linux-melb:xfs-kern:203822a
Signed-off-by: default avatarChristoph Hellwig <hch@sgi.com>
Signed-off-by: default avatarNathan Scott <nathans@sgi.com>
parent ce8e922c
...@@ -43,8 +43,6 @@ ...@@ -43,8 +43,6 @@
#include <linux/writeback.h> #include <linux/writeback.h>
STATIC void xfs_count_page_state(struct page *, int *, int *, int *); STATIC void xfs_count_page_state(struct page *, int *, int *, int *);
STATIC void xfs_convert_page(struct inode *, struct page *, xfs_iomap_t *,
struct writeback_control *wbc, void *, int, int);
#if defined(XFS_RW_TRACE) #if defined(XFS_RW_TRACE)
void void
...@@ -58,7 +56,7 @@ xfs_page_trace( ...@@ -58,7 +56,7 @@ xfs_page_trace(
bhv_desc_t *bdp; bhv_desc_t *bdp;
vnode_t *vp = LINVFS_GET_VP(inode); vnode_t *vp = LINVFS_GET_VP(inode);
loff_t isize = i_size_read(inode); loff_t isize = i_size_read(inode);
loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT; loff_t offset = page_offset(page);
int delalloc = -1, unmapped = -1, unwritten = -1; int delalloc = -1, unmapped = -1, unwritten = -1;
if (page_has_buffers(page)) if (page_has_buffers(page))
...@@ -103,15 +101,56 @@ xfs_finish_ioend( ...@@ -103,15 +101,56 @@ xfs_finish_ioend(
queue_work(xfsdatad_workqueue, &ioend->io_work); queue_work(xfsdatad_workqueue, &ioend->io_work);
} }
/*
* We're now finished for good with this ioend structure.
* Update the page state via the associated buffer_heads,
* release holds on the inode and bio, and finally free
* up memory. Do not use the ioend after this.
*/
STATIC void STATIC void
xfs_destroy_ioend( xfs_destroy_ioend(
xfs_ioend_t *ioend) xfs_ioend_t *ioend)
{ {
struct buffer_head *bh, *next;
for (bh = ioend->io_buffer_head; bh; bh = next) {
next = bh->b_private;
bh->b_end_io(bh, ioend->io_uptodate);
}
vn_iowake(ioend->io_vnode); vn_iowake(ioend->io_vnode);
mempool_free(ioend, xfs_ioend_pool); mempool_free(ioend, xfs_ioend_pool);
} }
/* /*
* Buffered IO write completion for delayed allocate extents.
* TODO: Update ondisk isize now that we know the file data
* has been flushed (i.e. the notorious "NULL file" problem).
*/
STATIC void
xfs_end_bio_delalloc(
void *data)
{
xfs_ioend_t *ioend = data;
xfs_destroy_ioend(ioend);
}
/*
* Buffered IO write completion for regular, written extents.
*/
STATIC void
xfs_end_bio_written(
void *data)
{
xfs_ioend_t *ioend = data;
xfs_destroy_ioend(ioend);
}
/*
* IO write completion for unwritten extents.
*
* Issue transactions to convert a buffer range from unwritten * Issue transactions to convert a buffer range from unwritten
* to written extents. * to written extents.
*/ */
...@@ -123,21 +162,10 @@ xfs_end_bio_unwritten( ...@@ -123,21 +162,10 @@ xfs_end_bio_unwritten(
vnode_t *vp = ioend->io_vnode; vnode_t *vp = ioend->io_vnode;
xfs_off_t offset = ioend->io_offset; xfs_off_t offset = ioend->io_offset;
size_t size = ioend->io_size; size_t size = ioend->io_size;
struct buffer_head *bh, *next;
int error; int error;
if (ioend->io_uptodate) if (ioend->io_uptodate)
VOP_BMAP(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL, error); VOP_BMAP(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL, error);
/* ioend->io_buffer_head is only non-NULL for buffered I/O */
for (bh = ioend->io_buffer_head; bh; bh = next) {
next = bh->b_private;
bh->b_end_io = NULL;
clear_buffer_unwritten(bh);
end_buffer_async_write(bh, ioend->io_uptodate);
}
xfs_destroy_ioend(ioend); xfs_destroy_ioend(ioend);
} }
...@@ -149,7 +177,8 @@ xfs_end_bio_unwritten( ...@@ -149,7 +177,8 @@ xfs_end_bio_unwritten(
*/ */
STATIC xfs_ioend_t * STATIC xfs_ioend_t *
xfs_alloc_ioend( xfs_alloc_ioend(
struct inode *inode) struct inode *inode,
unsigned int type)
{ {
xfs_ioend_t *ioend; xfs_ioend_t *ioend;
...@@ -162,45 +191,25 @@ xfs_alloc_ioend( ...@@ -162,45 +191,25 @@ xfs_alloc_ioend(
*/ */
atomic_set(&ioend->io_remaining, 1); atomic_set(&ioend->io_remaining, 1);
ioend->io_uptodate = 1; /* cleared if any I/O fails */ ioend->io_uptodate = 1; /* cleared if any I/O fails */
ioend->io_list = NULL;
ioend->io_type = type;
ioend->io_vnode = LINVFS_GET_VP(inode); ioend->io_vnode = LINVFS_GET_VP(inode);
ioend->io_buffer_head = NULL; ioend->io_buffer_head = NULL;
ioend->io_buffer_tail = NULL;
atomic_inc(&ioend->io_vnode->v_iocount); atomic_inc(&ioend->io_vnode->v_iocount);
ioend->io_offset = 0; ioend->io_offset = 0;
ioend->io_size = 0; ioend->io_size = 0;
if (type == IOMAP_UNWRITTEN)
INIT_WORK(&ioend->io_work, xfs_end_bio_unwritten, ioend); INIT_WORK(&ioend->io_work, xfs_end_bio_unwritten, ioend);
else if (type == IOMAP_DELAY)
INIT_WORK(&ioend->io_work, xfs_end_bio_delalloc, ioend);
else
INIT_WORK(&ioend->io_work, xfs_end_bio_written, ioend);
return ioend; return ioend;
} }
void
linvfs_unwritten_done(
struct buffer_head *bh,
int uptodate)
{
xfs_ioend_t *ioend = bh->b_private;
static spinlock_t unwritten_done_lock = SPIN_LOCK_UNLOCKED;
unsigned long flags;
ASSERT(buffer_unwritten(bh));
bh->b_end_io = NULL;
if (!uptodate)
ioend->io_uptodate = 0;
/*
* Deep magic here. We reuse b_private in the buffer_heads to build
* a chain for completing the I/O from user context after we've issued
* a transaction to convert the unwritten extent.
*/
spin_lock_irqsave(&unwritten_done_lock, flags);
bh->b_private = ioend->io_buffer_head;
ioend->io_buffer_head = bh;
spin_unlock_irqrestore(&unwritten_done_lock, flags);
xfs_finish_ioend(ioend);
}
STATIC int STATIC int
xfs_map_blocks( xfs_map_blocks(
struct inode *inode, struct inode *inode,
...@@ -228,7 +237,7 @@ xfs_offset_to_map( ...@@ -228,7 +237,7 @@ xfs_offset_to_map(
xfs_iomap_t *iomapp, xfs_iomap_t *iomapp,
unsigned long offset) unsigned long offset)
{ {
loff_t full_offset; /* offset from start of file */ xfs_off_t full_offset; /* offset from start of file */
ASSERT(offset < PAGE_CACHE_SIZE); ASSERT(offset < PAGE_CACHE_SIZE);
...@@ -243,16 +252,223 @@ xfs_offset_to_map( ...@@ -243,16 +252,223 @@ xfs_offset_to_map(
return NULL; return NULL;
} }
/*
* BIO completion handler for buffered IO.
*/
STATIC int
xfs_end_bio(
struct bio *bio,
unsigned int bytes_done,
int error)
{
xfs_ioend_t *ioend = bio->bi_private;
if (bio->bi_size)
return 1;
ASSERT(ioend);
ASSERT(atomic_read(&bio->bi_cnt) >= 1);
/* Toss bio and pass work off to an xfsdatad thread */
if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
ioend->io_uptodate = 0;
bio->bi_private = NULL;
bio->bi_end_io = NULL;
bio_put(bio);
xfs_finish_ioend(ioend);
return 0;
}
STATIC void
xfs_submit_ioend_bio(
xfs_ioend_t *ioend,
struct bio *bio)
{
atomic_inc(&ioend->io_remaining);
bio->bi_private = ioend;
bio->bi_end_io = xfs_end_bio;
submit_bio(WRITE, bio);
ASSERT(!bio_flagged(bio, BIO_EOPNOTSUPP));
bio_put(bio);
}
STATIC struct bio *
xfs_alloc_ioend_bio(
struct buffer_head *bh)
{
struct bio *bio;
int nvecs = bio_get_nr_vecs(bh->b_bdev);
do {
bio = bio_alloc(GFP_NOIO, nvecs);
nvecs >>= 1;
} while (!bio);
ASSERT(bio->bi_private == NULL);
bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
bio->bi_bdev = bh->b_bdev;
bio_get(bio);
return bio;
}
STATIC void
xfs_start_buffer_writeback(
struct buffer_head *bh)
{
ASSERT(buffer_mapped(bh));
ASSERT(buffer_locked(bh));
ASSERT(!buffer_delay(bh));
ASSERT(!buffer_unwritten(bh));
mark_buffer_async_write(bh);
set_buffer_uptodate(bh);
clear_buffer_dirty(bh);
}
STATIC void
xfs_start_page_writeback(
struct page *page,
struct writeback_control *wbc,
int clear_dirty,
int buffers)
{
ASSERT(PageLocked(page));
ASSERT(!PageWriteback(page));
set_page_writeback(page);
if (clear_dirty)
clear_page_dirty(page);
unlock_page(page);
if (!buffers) {
end_page_writeback(page);
wbc->pages_skipped++; /* We didn't write this page */
}
}
static inline int bio_add_buffer(struct bio *bio, struct buffer_head *bh)
{
return bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
}
/*
* Submit all of the bios for all of the ioends we have saved up,
* covering the initial writepage page and also any probed pages.
*/
STATIC void
xfs_submit_ioend(
xfs_ioend_t *ioend)
{
xfs_ioend_t *next;
struct buffer_head *bh;
struct bio *bio;
sector_t lastblock = 0;
do {
next = ioend->io_list;
bio = NULL;
for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) {
xfs_start_buffer_writeback(bh);
if (!bio) {
retry:
bio = xfs_alloc_ioend_bio(bh);
} else if (bh->b_blocknr != lastblock + 1) {
xfs_submit_ioend_bio(ioend, bio);
goto retry;
}
if (bio_add_buffer(bio, bh) != bh->b_size) {
xfs_submit_ioend_bio(ioend, bio);
goto retry;
}
lastblock = bh->b_blocknr;
}
if (bio)
xfs_submit_ioend_bio(ioend, bio);
xfs_finish_ioend(ioend);
} while ((ioend = next) != NULL);
}
/*
* Cancel submission of all buffer_heads so far in this endio.
* Toss the endio too. Only ever called for the initial page
* in a writepage request, so only ever one page.
*/
STATIC void
xfs_cancel_ioend(
xfs_ioend_t *ioend)
{
xfs_ioend_t *next;
struct buffer_head *bh, *next_bh;
do {
next = ioend->io_list;
bh = ioend->io_buffer_head;
do {
next_bh = bh->b_private;
clear_buffer_async_write(bh);
unlock_buffer(bh);
} while ((bh = next_bh) != NULL);
vn_iowake(ioend->io_vnode);
mempool_free(ioend, xfs_ioend_pool);
} while ((ioend = next) != NULL);
}
/*
* Test to see if we've been building up a completion structure for
* earlier buffers -- if so, we try to append to this ioend if we
* can, otherwise we finish off any current ioend and start another.
* Return true if we've finished the given ioend.
*/
STATIC void
xfs_add_to_ioend(
struct inode *inode,
struct buffer_head *bh,
unsigned int p_offset,
unsigned int type,
xfs_ioend_t **result,
int need_ioend)
{
xfs_ioend_t *ioend = *result;
if (!ioend || need_ioend || type != ioend->io_type) {
xfs_ioend_t *previous = *result;
xfs_off_t offset;
offset = (xfs_off_t)bh->b_page->index << PAGE_CACHE_SHIFT;
offset += p_offset;
ioend = xfs_alloc_ioend(inode, type);
ioend->io_offset = offset;
ioend->io_buffer_head = bh;
ioend->io_buffer_tail = bh;
if (previous)
previous->io_list = ioend;
*result = ioend;
} else {
ioend->io_buffer_tail->b_private = bh;
ioend->io_buffer_tail = bh;
}
bh->b_private = NULL;
ioend->io_size += bh->b_size;
}
STATIC void STATIC void
xfs_map_at_offset( xfs_map_at_offset(
struct page *page, struct page *page,
struct buffer_head *bh, struct buffer_head *bh,
unsigned long offset, unsigned long offset,
int block_bits, int block_bits,
xfs_iomap_t *iomapp) xfs_iomap_t *iomapp,
xfs_ioend_t *ioend)
{ {
xfs_daddr_t bn; xfs_daddr_t bn;
loff_t delta; xfs_off_t delta;
int sector_shift; int sector_shift;
ASSERT(!(iomapp->iomap_flags & IOMAP_HOLE)); ASSERT(!(iomapp->iomap_flags & IOMAP_HOLE));
...@@ -276,60 +492,7 @@ xfs_map_at_offset( ...@@ -276,60 +492,7 @@ xfs_map_at_offset(
bh->b_bdev = iomapp->iomap_target->bt_bdev; bh->b_bdev = iomapp->iomap_target->bt_bdev;
set_buffer_mapped(bh); set_buffer_mapped(bh);
clear_buffer_delay(bh); clear_buffer_delay(bh);
} clear_buffer_unwritten(bh);
/*
* Look for a page at index which is unlocked and contains our
* unwritten extent flagged buffers at its head. Returns page
* locked and with an extra reference count, and length of the
* unwritten extent component on this page that we can write,
* in units of filesystem blocks.
*/
STATIC struct page *
xfs_probe_unwritten_page(
struct address_space *mapping,
pgoff_t index,
xfs_iomap_t *iomapp,
xfs_ioend_t *ioend,
unsigned long max_offset,
unsigned long *fsbs,
unsigned int bbits)
{
struct page *page;
page = find_trylock_page(mapping, index);
if (!page)
return NULL;
if (PageWriteback(page))
goto out;
if (page->mapping && page_has_buffers(page)) {
struct buffer_head *bh, *head;
unsigned long p_offset = 0;
*fsbs = 0;
bh = head = page_buffers(page);
do {
if (!buffer_unwritten(bh) || !buffer_uptodate(bh))
break;
if (!xfs_offset_to_map(page, iomapp, p_offset))
break;
if (p_offset >= max_offset)
break;
xfs_map_at_offset(page, bh, p_offset, bbits, iomapp);
set_buffer_unwritten_io(bh);
bh->b_private = ioend;
p_offset += bh->b_size;
(*fsbs)++;
} while ((bh = bh->b_this_page) != head);
if (p_offset)
return page;
}
out:
unlock_page(page);
return NULL;
} }
/* /*
...@@ -372,15 +535,16 @@ xfs_probe_unmapped_page( ...@@ -372,15 +535,16 @@ xfs_probe_unmapped_page(
return ret; return ret;
} }
STATIC unsigned int STATIC size_t
xfs_probe_unmapped_cluster( xfs_probe_unmapped_cluster(
struct inode *inode, struct inode *inode,
struct page *startpage, struct page *startpage,
struct buffer_head *bh, struct buffer_head *bh,
struct buffer_head *head) struct buffer_head *head)
{ {
size_t len, total = 0;
pgoff_t tindex, tlast, tloff; pgoff_t tindex, tlast, tloff;
unsigned int pg_offset, len, total = 0; unsigned int pg_offset;
struct address_space *mapping = inode->i_mapping; struct address_space *mapping = inode->i_mapping;
/* First sum forwards in this page */ /* First sum forwards in this page */
...@@ -414,14 +578,15 @@ xfs_probe_unmapped_cluster( ...@@ -414,14 +578,15 @@ xfs_probe_unmapped_cluster(
} }
/* /*
* Probe for a given page (index) in the inode and test if it is delayed * Probe for a given page (index) in the inode and test if it is suitable
* and without unwritten buffers. Returns page locked and with an extra * for writing as part of an unwritten or delayed allocate extent.
* reference count. * Returns page locked and with an extra reference count if so, else NULL.
*/ */
STATIC struct page * STATIC struct page *
xfs_probe_delalloc_page( xfs_probe_delayed_page(
struct inode *inode, struct inode *inode,
pgoff_t index) pgoff_t index,
unsigned int type)
{ {
struct page *page; struct page *page;
...@@ -437,12 +602,12 @@ xfs_probe_delalloc_page( ...@@ -437,12 +602,12 @@ xfs_probe_delalloc_page(
bh = head = page_buffers(page); bh = head = page_buffers(page);
do { do {
if (buffer_unwritten(bh)) { if (buffer_unwritten(bh))
acceptable = 0; acceptable = (type == IOMAP_UNWRITTEN);
else if (buffer_delay(bh))
acceptable = (type == IOMAP_DELAY);
else
break; break;
} else if (buffer_delay(bh)) {
acceptable = 1;
}
} while ((bh = bh->b_this_page) != head); } while ((bh = bh->b_this_page) != head);
if (acceptable) if (acceptable)
...@@ -454,161 +619,30 @@ xfs_probe_delalloc_page( ...@@ -454,161 +619,30 @@ xfs_probe_delalloc_page(
return NULL; return NULL;
} }
STATIC int
xfs_map_unwritten(
struct inode *inode,
struct page *start_page,
struct buffer_head *head,
struct buffer_head *curr,
unsigned long p_offset,
int block_bits,
xfs_iomap_t *iomapp,
struct writeback_control *wbc,
int startio,
int all_bh)
{
struct buffer_head *bh = curr;
xfs_iomap_t *tmp;
xfs_ioend_t *ioend;
loff_t offset;
unsigned long nblocks = 0;
offset = start_page->index;
offset <<= PAGE_CACHE_SHIFT;
offset += p_offset;
ioend = xfs_alloc_ioend(inode);
/* First map forwards in the page consecutive buffers
* covering this unwritten extent
*/
do {
if (!buffer_unwritten(bh))
break;
tmp = xfs_offset_to_map(start_page, iomapp, p_offset);
if (!tmp)
break;
xfs_map_at_offset(start_page, bh, p_offset, block_bits, iomapp);
set_buffer_unwritten_io(bh);
bh->b_private = ioend;
p_offset += bh->b_size;
nblocks++;
} while ((bh = bh->b_this_page) != head);
atomic_add(nblocks, &ioend->io_remaining);
/* If we reached the end of the page, map forwards in any
* following pages which are also covered by this extent.
*/
if (bh == head) {
struct address_space *mapping = inode->i_mapping;
pgoff_t tindex, tloff, tlast;
unsigned long bs;
unsigned int pg_offset, bbits = inode->i_blkbits;
struct page *page;
tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT;
tloff = (iomapp->iomap_offset + iomapp->iomap_bsize) >> PAGE_CACHE_SHIFT;
tloff = min(tlast, tloff);
for (tindex = start_page->index + 1; tindex < tloff; tindex++) {
page = xfs_probe_unwritten_page(mapping,
tindex, iomapp, ioend,
PAGE_CACHE_SIZE, &bs, bbits);
if (!page)
break;
nblocks += bs;
atomic_add(bs, &ioend->io_remaining);
xfs_convert_page(inode, page, iomapp, wbc, ioend,
startio, all_bh);
/* stop if converting the next page might add
* enough blocks that the corresponding byte
* count won't fit in our ulong page buf length */
if (nblocks >= ((ULONG_MAX - PAGE_SIZE) >> block_bits))
goto enough;
}
if (tindex == tlast &&
(pg_offset = (i_size_read(inode) & (PAGE_CACHE_SIZE - 1)))) {
page = xfs_probe_unwritten_page(mapping,
tindex, iomapp, ioend,
pg_offset, &bs, bbits);
if (page) {
nblocks += bs;
atomic_add(bs, &ioend->io_remaining);
xfs_convert_page(inode, page, iomapp, wbc, ioend,
startio, all_bh);
if (nblocks >= ((ULONG_MAX - PAGE_SIZE) >> block_bits))
goto enough;
}
}
}
enough:
ioend->io_size = (xfs_off_t)nblocks << block_bits;
ioend->io_offset = offset;
xfs_finish_ioend(ioend);
return 0;
}
STATIC void
xfs_submit_page(
struct page *page,
struct writeback_control *wbc,
struct buffer_head *bh_arr[],
int bh_count,
int probed_page,
int clear_dirty)
{
struct buffer_head *bh;
int i;
BUG_ON(PageWriteback(page));
if (bh_count)
set_page_writeback(page);
if (clear_dirty)
clear_page_dirty(page);
unlock_page(page);
if (bh_count) {
for (i = 0; i < bh_count; i++) {
bh = bh_arr[i];
mark_buffer_async_write(bh);
if (buffer_unwritten(bh))
set_buffer_unwritten_io(bh);
set_buffer_uptodate(bh);
clear_buffer_dirty(bh);
}
for (i = 0; i < bh_count; i++)
submit_bh(WRITE, bh_arr[i]);
if (probed_page && clear_dirty)
wbc->nr_to_write--; /* Wrote an "extra" page */
}
}
/* /*
* Allocate & map buffers for page given the extent map. Write it out. * Allocate & map buffers for page given the extent map. Write it out.
* except for the original page of a writepage, this is called on * except for the original page of a writepage, this is called on
* delalloc/unwritten pages only, for the original page it is possible * delalloc/unwritten pages only, for the original page it is possible
* that the page has no mapping at all. * that the page has no mapping at all.
*/ */
STATIC void STATIC int
xfs_convert_page( xfs_convert_page(
struct inode *inode, struct inode *inode,
struct page *page, struct page *page,
xfs_iomap_t *iomapp, xfs_iomap_t *iomapp,
xfs_ioend_t **ioendp,
struct writeback_control *wbc, struct writeback_control *wbc,
void *private, void *private,
int startio, int startio,
int all_bh) int all_bh)
{ {
struct buffer_head *bh_arr[MAX_BUF_PER_PAGE], *bh, *head; struct buffer_head *bh, *head;
xfs_iomap_t *mp = iomapp, *tmp; xfs_iomap_t *mp = iomapp, *tmp;
unsigned long offset, end_offset; unsigned long p_offset, end_offset;
int index = 0; unsigned int type;
int bbits = inode->i_blkbits; int bbits = inode->i_blkbits;
int len, page_dirty; int len, page_dirty;
int count = 0, done = 0, uptodate = 1;
end_offset = (i_size_read(inode) & (PAGE_CACHE_SIZE - 1)); end_offset = (i_size_read(inode) & (PAGE_CACHE_SIZE - 1));
...@@ -621,59 +655,66 @@ xfs_convert_page( ...@@ -621,59 +655,66 @@ xfs_convert_page(
end_offset = roundup(end_offset, len); end_offset = roundup(end_offset, len);
page_dirty = end_offset / len; page_dirty = end_offset / len;
offset = 0; p_offset = 0;
bh = head = page_buffers(page); bh = head = page_buffers(page);
do { do {
if (offset >= end_offset) if (p_offset >= end_offset)
break; break;
if (!(PageUptodate(page) || buffer_uptodate(bh))) if (!buffer_uptodate(bh))
uptodate = 0;
if (!(PageUptodate(page) || buffer_uptodate(bh))) {
done = 1;
continue; continue;
if (buffer_mapped(bh) && all_bh && }
!(buffer_unwritten(bh) || buffer_delay(bh))) {
if (startio) { if (buffer_unwritten(bh))
type = IOMAP_UNWRITTEN;
else if (buffer_delay(bh))
type = IOMAP_DELAY;
else {
type = 0;
if (!(buffer_mapped(bh) && all_bh && startio)) {
done = 1;
} else if (startio) {
lock_buffer(bh); lock_buffer(bh);
bh_arr[index++] = bh; xfs_add_to_ioend(inode, bh, p_offset,
type, ioendp, done);
count++;
page_dirty--; page_dirty--;
} }
continue; continue;
} }
tmp = xfs_offset_to_map(page, mp, offset); tmp = xfs_offset_to_map(page, mp, p_offset);
if (!tmp) if (!tmp) {
done = 1;
continue; continue;
}
ASSERT(!(tmp->iomap_flags & IOMAP_HOLE)); ASSERT(!(tmp->iomap_flags & IOMAP_HOLE));
ASSERT(!(tmp->iomap_flags & IOMAP_DELAY)); ASSERT(!(tmp->iomap_flags & IOMAP_DELAY));
/* If this is a new unwritten extent buffer (i.e. one xfs_map_at_offset(page, bh, p_offset, bbits, tmp, *ioendp);
* that we haven't passed in private data for, we must
* now map this buffer too.
*/
if (buffer_unwritten(bh) && !bh->b_end_io) {
ASSERT(tmp->iomap_flags & IOMAP_UNWRITTEN);
xfs_map_unwritten(inode, page, head, bh, offset,
bbits, tmp, wbc, startio, all_bh);
} else if (! (buffer_unwritten(bh) && buffer_locked(bh))) {
xfs_map_at_offset(page, bh, offset, bbits, tmp);
if (buffer_unwritten(bh)) {
set_buffer_unwritten_io(bh);
bh->b_private = private;
ASSERT(private);
}
}
if (startio) { if (startio) {
bh_arr[index++] = bh; xfs_add_to_ioend(inode, bh, p_offset,
type, ioendp, done);
count++;
} else { } else {
set_buffer_dirty(bh); set_buffer_dirty(bh);
unlock_buffer(bh); unlock_buffer(bh);
mark_buffer_dirty(bh); mark_buffer_dirty(bh);
} }
page_dirty--; page_dirty--;
} while (offset += len, (bh = bh->b_this_page) != head); } while (p_offset += len, (bh = bh->b_this_page) != head);
if (startio && index) { if (uptodate && bh == head)
xfs_submit_page(page, wbc, bh_arr, index, 1, !page_dirty); SetPageUptodate(page);
} else {
unlock_page(page); if (startio) {
if (count)
wbc->nr_to_write--;
xfs_start_page_writeback(page, wbc, !page_dirty, count);
} }
return done;
} }
/* /*
...@@ -685,19 +726,22 @@ xfs_cluster_write( ...@@ -685,19 +726,22 @@ xfs_cluster_write(
struct inode *inode, struct inode *inode,
pgoff_t tindex, pgoff_t tindex,
xfs_iomap_t *iomapp, xfs_iomap_t *iomapp,
xfs_ioend_t **ioendp,
struct writeback_control *wbc, struct writeback_control *wbc,
int startio, int startio,
int all_bh, int all_bh,
pgoff_t tlast) pgoff_t tlast)
{ {
struct page *page; struct page *page;
unsigned int type = (*ioendp)->io_type;
int done;
for (; tindex <= tlast; tindex++) { for (done = 0; tindex <= tlast && !done; tindex++) {
page = xfs_probe_delalloc_page(inode, tindex); page = xfs_probe_delayed_page(inode, tindex, type);
if (!page) if (!page)
break; break;
xfs_convert_page(inode, page, iomapp, wbc, NULL, done = xfs_convert_page(inode, page, iomapp, ioendp,
startio, all_bh); wbc, NULL, startio, all_bh);
} }
} }
...@@ -728,18 +772,21 @@ xfs_page_state_convert( ...@@ -728,18 +772,21 @@ xfs_page_state_convert(
int startio, int startio,
int unmapped) /* also implies page uptodate */ int unmapped) /* also implies page uptodate */
{ {
struct buffer_head *bh_arr[MAX_BUF_PER_PAGE], *bh, *head; struct buffer_head *bh, *head;
xfs_iomap_t *iomp, iomap; xfs_iomap_t *iomp, iomap;
xfs_ioend_t *ioend = NULL, *iohead = NULL;
loff_t offset; loff_t offset;
unsigned long p_offset = 0; unsigned long p_offset = 0;
unsigned int type;
__uint64_t end_offset; __uint64_t end_offset;
pgoff_t end_index, last_index, tlast; pgoff_t end_index, last_index, tlast;
int len, err, i, cnt = 0, uptodate = 1; int flags, len, err, done = 1;
int flags; int uptodate = 1;
int page_dirty; int page_dirty, count = 0, trylock_flag = 0;
/* wait for other IO threads? */ /* wait for other IO threads? */
flags = (startio && wbc->sync_mode != WB_SYNC_NONE) ? 0 : BMAPI_TRYLOCK; if (startio && wbc->sync_mode != WB_SYNC_NONE)
trylock_flag |= BMAPI_TRYLOCK;
/* Is this page beyond the end of the file? */ /* Is this page beyond the end of the file? */
offset = i_size_read(inode); offset = i_size_read(inode);
...@@ -754,98 +801,98 @@ xfs_page_state_convert( ...@@ -754,98 +801,98 @@ xfs_page_state_convert(
} }
} }
end_offset = min_t(unsigned long long,
(loff_t)(page->index + 1) << PAGE_CACHE_SHIFT, offset);
offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
/* /*
* page_dirty is initially a count of buffers on the page before * page_dirty is initially a count of buffers on the page before
* EOF and is decrememted as we move each into a cleanable state. * EOF and is decrememted as we move each into a cleanable state.
*
* Derivation:
*
* End offset is the highest offset that this page should represent.
* If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1))
* will evaluate non-zero and be less than PAGE_CACHE_SIZE and
* hence give us the correct page_dirty count. On any other page,
* it will be zero and in that case we need page_dirty to be the
* count of buffers on the page.
*/ */
end_offset = min_t(unsigned long long,
(xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT, offset);
len = 1 << inode->i_blkbits; len = 1 << inode->i_blkbits;
p_offset = max(p_offset, PAGE_CACHE_SIZE); p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1),
p_offset = roundup(p_offset, len); PAGE_CACHE_SIZE);
p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE;
page_dirty = p_offset / len; page_dirty = p_offset / len;
iomp = NULL; iomp = NULL;
p_offset = 0;
bh = head = page_buffers(page); bh = head = page_buffers(page);
offset = page_offset(page);
/* TODO: fix up "done" variable and iomap pointer (boolean) */
/* TODO: cleanup count and page_dirty */
do { do {
if (offset >= end_offset) if (offset >= end_offset)
break; break;
if (!buffer_uptodate(bh)) if (!buffer_uptodate(bh))
uptodate = 0; uptodate = 0;
if (!(PageUptodate(page) || buffer_uptodate(bh)) && !startio) if (!(PageUptodate(page) || buffer_uptodate(bh)) && !startio) {
done = 1;
continue; continue;
}
if (iomp) { if (iomp) {
iomp = xfs_offset_to_map(page, &iomap, p_offset); iomp = xfs_offset_to_map(page, &iomap, p_offset);
done = (iomp == NULL);
} }
/* /*
* First case, map an unwritten extent and prepare for * First case, map an unwritten extent and prepare for
* extent state conversion transaction on completion. * extent state conversion transaction on completion.
*
* Second case, allocate space for a delalloc buffer.
* We can return EAGAIN here in the release page case.
*/ */
if (buffer_unwritten(bh) || buffer_delay(bh)) {
if (buffer_unwritten(bh)) { if (buffer_unwritten(bh)) {
if (!startio) type = IOMAP_UNWRITTEN;
continue; flags = BMAPI_WRITE|BMAPI_IGNSTATE;
if (!iomp) {
err = xfs_map_blocks(inode, offset, len, &iomap,
BMAPI_WRITE|BMAPI_IGNSTATE);
if (err) {
goto error;
}
iomp = xfs_offset_to_map(page, &iomap,
p_offset);
}
if (iomp) {
if (!bh->b_end_io) {
err = xfs_map_unwritten(inode, page,
head, bh, p_offset,
inode->i_blkbits, iomp,
wbc, startio, unmapped);
if (err) {
goto error;
}
} else { } else {
set_bit(BH_Lock, &bh->b_state); type = IOMAP_DELAY;
} flags = BMAPI_ALLOCATE;
BUG_ON(!buffer_locked(bh)); if (!startio)
bh_arr[cnt++] = bh; flags |= trylock_flag;
page_dirty--;
} }
/*
* Second case, allocate space for a delalloc buffer.
* We can return EAGAIN here in the release page case.
*/
} else if (buffer_delay(bh)) {
if (!iomp) { if (!iomp) {
done = 1;
err = xfs_map_blocks(inode, offset, len, &iomap, err = xfs_map_blocks(inode, offset, len, &iomap,
BMAPI_ALLOCATE | flags); flags);
if (err) { if (err)
goto error; goto error;
}
iomp = xfs_offset_to_map(page, &iomap, iomp = xfs_offset_to_map(page, &iomap,
p_offset); p_offset);
done = (iomp == NULL);
} }
if (iomp) { if (iomp) {
xfs_map_at_offset(page, bh, p_offset, xfs_map_at_offset(page, bh, p_offset,
inode->i_blkbits, iomp); inode->i_blkbits, iomp, ioend);
if (startio) { if (startio) {
bh_arr[cnt++] = bh; xfs_add_to_ioend(inode, bh, p_offset,
type, &ioend, done);
} else { } else {
set_buffer_dirty(bh); set_buffer_dirty(bh);
unlock_buffer(bh); unlock_buffer(bh);
mark_buffer_dirty(bh); mark_buffer_dirty(bh);
} }
page_dirty--; page_dirty--;
count++;
} else {
done = 1;
} }
} else if ((buffer_uptodate(bh) || PageUptodate(page)) && } else if ((buffer_uptodate(bh) || PageUptodate(page)) &&
(unmapped || startio)) { (unmapped || startio)) {
type = 0;
if (!buffer_mapped(bh)) { if (!buffer_mapped(bh)) {
int size;
/* /*
* Getting here implies an unmapped buffer * Getting here implies an unmapped buffer
...@@ -853,6 +900,8 @@ xfs_page_state_convert( ...@@ -853,6 +900,8 @@ xfs_page_state_convert(
* need to write the whole page out. * need to write the whole page out.
*/ */
if (!iomp) { if (!iomp) {
int size;
size = xfs_probe_unmapped_cluster( size = xfs_probe_unmapped_cluster(
inode, page, bh, head); inode, page, bh, head);
err = xfs_map_blocks(inode, offset, err = xfs_map_blocks(inode, offset,
...@@ -863,52 +912,70 @@ xfs_page_state_convert( ...@@ -863,52 +912,70 @@ xfs_page_state_convert(
} }
iomp = xfs_offset_to_map(page, &iomap, iomp = xfs_offset_to_map(page, &iomap,
p_offset); p_offset);
done = (iomp == NULL);
} }
if (iomp) { if (iomp) {
xfs_map_at_offset(page, xfs_map_at_offset(page, bh, p_offset,
bh, p_offset, inode->i_blkbits, iomp,
inode->i_blkbits, iomp); ioend);
if (startio) { if (startio) {
bh_arr[cnt++] = bh; xfs_add_to_ioend(inode,
bh, p_offset, type,
&ioend, done);
} else { } else {
set_buffer_dirty(bh); set_buffer_dirty(bh);
unlock_buffer(bh); unlock_buffer(bh);
mark_buffer_dirty(bh); mark_buffer_dirty(bh);
} }
page_dirty--; page_dirty--;
count++;
} else {
done = 1;
} }
} else if (startio) { } else if (startio) {
if (buffer_uptodate(bh) && if (buffer_uptodate(bh) &&
!test_and_set_bit(BH_Lock, &bh->b_state)) { !test_and_set_bit(BH_Lock, &bh->b_state)) {
bh_arr[cnt++] = bh; ASSERT(buffer_mapped(bh));
xfs_add_to_ioend(inode,
bh, p_offset, type,
&ioend, done);
page_dirty--; page_dirty--;
count++;
} else {
done = 1;
} }
} else {
done = 1;
} }
} }
} while (offset += len, p_offset += len,
((bh = bh->b_this_page) != head)); if (!iohead)
iohead = ioend;
} while (offset += len, ((bh = bh->b_this_page) != head));
if (uptodate && bh == head) if (uptodate && bh == head)
SetPageUptodate(page); SetPageUptodate(page);
if (startio) { if (startio)
xfs_submit_page(page, wbc, bh_arr, cnt, 0, !page_dirty); xfs_start_page_writeback(page, wbc, 1, count);
}
if (iomp) { if (ioend && iomp && !done) {
offset = (iomp->iomap_offset + iomp->iomap_bsize - 1) >> offset = (iomp->iomap_offset + iomp->iomap_bsize - 1) >>
PAGE_CACHE_SHIFT; PAGE_CACHE_SHIFT;
tlast = min_t(pgoff_t, offset, last_index); tlast = min_t(pgoff_t, offset, last_index);
xfs_cluster_write(inode, page->index + 1, iomp, wbc, xfs_cluster_write(inode, page->index + 1, iomp, &ioend,
startio, unmapped, tlast); wbc, startio, unmapped, tlast);
} }
if (iohead)
xfs_submit_ioend(iohead);
return page_dirty; return page_dirty;
error: error:
for (i = 0; i < cnt; i++) { if (iohead)
unlock_buffer(bh_arr[i]); xfs_cancel_ioend(iohead);
}
/* /*
* If it's delalloc and we have nowhere to put it, * If it's delalloc and we have nowhere to put it,
...@@ -916,9 +983,8 @@ xfs_page_state_convert( ...@@ -916,9 +983,8 @@ xfs_page_state_convert(
* us to try again. * us to try again.
*/ */
if (err != -EAGAIN) { if (err != -EAGAIN) {
if (!unmapped) { if (!unmapped)
block_invalidatepage(page, 0); block_invalidatepage(page, 0);
}
ClearPageUptodate(page); ClearPageUptodate(page);
} }
return err; return err;
...@@ -1094,7 +1160,7 @@ linvfs_direct_IO( ...@@ -1094,7 +1160,7 @@ linvfs_direct_IO(
if (error) if (error)
return -error; return -error;
iocb->private = xfs_alloc_ioend(inode); iocb->private = xfs_alloc_ioend(inode, IOMAP_UNWRITTEN);
ret = blockdev_direct_IO_own_locking(rw, iocb, inode, ret = blockdev_direct_IO_own_locking(rw, iocb, inode,
iomap.iomap_target->bt_bdev, iomap.iomap_target->bt_bdev,
......
...@@ -23,14 +23,24 @@ extern mempool_t *xfs_ioend_pool; ...@@ -23,14 +23,24 @@ extern mempool_t *xfs_ioend_pool;
typedef void (*xfs_ioend_func_t)(void *); typedef void (*xfs_ioend_func_t)(void *);
/*
* xfs_ioend struct manages large extent writes for XFS.
* It can manage several multi-page bio's at once.
*/
typedef struct xfs_ioend { typedef struct xfs_ioend {
struct xfs_ioend *io_list; /* next ioend in chain */
unsigned int io_type; /* delalloc / unwritten */
unsigned int io_uptodate; /* I/O status register */ unsigned int io_uptodate; /* I/O status register */
atomic_t io_remaining; /* hold count */ atomic_t io_remaining; /* hold count */
struct vnode *io_vnode; /* file being written to */ struct vnode *io_vnode; /* file being written to */
struct buffer_head *io_buffer_head;/* buffer linked list head */ struct buffer_head *io_buffer_head;/* buffer linked list head */
struct buffer_head *io_buffer_tail;/* buffer linked list tail */
size_t io_size; /* size of the extent */ size_t io_size; /* size of the extent */
xfs_off_t io_offset; /* offset in the file */ xfs_off_t io_offset; /* offset in the file */
struct work_struct io_work; /* xfsdatad work queue */ struct work_struct io_work; /* xfsdatad work queue */
} xfs_ioend_t; } xfs_ioend_t;
extern struct address_space_operations linvfs_aops;
extern int linvfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
#endif /* __XFS_IOPS_H__ */ #endif /* __XFS_IOPS_H__ */
...@@ -26,11 +26,6 @@ extern struct file_operations linvfs_file_operations; ...@@ -26,11 +26,6 @@ extern struct file_operations linvfs_file_operations;
extern struct file_operations linvfs_invis_file_operations; extern struct file_operations linvfs_invis_file_operations;
extern struct file_operations linvfs_dir_operations; extern struct file_operations linvfs_dir_operations;
extern struct address_space_operations linvfs_aops;
extern int linvfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
extern void linvfs_unwritten_done(struct buffer_head *, int);
extern int xfs_ioctl(struct bhv_desc *, struct inode *, struct file *, extern int xfs_ioctl(struct bhv_desc *, struct inode *, struct file *,
int, unsigned int, void __user *); int, unsigned int, void __user *);
......
...@@ -110,10 +110,6 @@ ...@@ -110,10 +110,6 @@
* delalloc and these ondisk-uninitialised buffers. * delalloc and these ondisk-uninitialised buffers.
*/ */
BUFFER_FNS(PrivateStart, unwritten); BUFFER_FNS(PrivateStart, unwritten);
static inline void set_buffer_unwritten_io(struct buffer_head *bh)
{
bh->b_end_io = linvfs_unwritten_done;
}
#define restricted_chown xfs_params.restrict_chown.val #define restricted_chown xfs_params.restrict_chown.val
#define irix_sgid_inherit xfs_params.sgid_inherit.val #define irix_sgid_inherit xfs_params.sgid_inherit.val
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment