Commit 3198c19a authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] reiserfs direct-IO support

From: Oleg Drokin <green@namesys.com>

This patch implements DirectIO support for reiserfs v3.  This is mostly a
port from 2.4.

Thanks to Mingming Cao from IBM for some clues in porting.
parent f261ecb2
...@@ -485,6 +485,11 @@ int reiserfs_allocate_blocks_for_region( ...@@ -485,6 +485,11 @@ int reiserfs_allocate_blocks_for_region(
/* Now the final thing, if we have grew the file, we must update it's size*/ /* Now the final thing, if we have grew the file, we must update it's size*/
if ( pos + write_bytes > inode->i_size) { if ( pos + write_bytes > inode->i_size) {
inode->i_size = pos + write_bytes; // Set new size inode->i_size = pos + write_bytes; // Set new size
/* If the file have grown so much that tail packing is no longer possible, reset
"need to pack" flag */
if ( (have_large_tails (inode->i_sb) && inode->i_size > i_block_size (inode)*4) ||
(have_small_tails (inode->i_sb) && inode->i_size > i_block_size(inode)) )
REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask ;
} }
/* Amount of on-disk blocks used by file have changed, update it */ /* Amount of on-disk blocks used by file have changed, update it */
...@@ -999,9 +1004,41 @@ ssize_t reiserfs_file_write( struct file *file, /* the file we are going to writ ...@@ -999,9 +1004,41 @@ ssize_t reiserfs_file_write( struct file *file, /* the file we are going to writ
struct page * prepared_pages[REISERFS_WRITE_PAGES_AT_A_TIME]; struct page * prepared_pages[REISERFS_WRITE_PAGES_AT_A_TIME];
/* To simplify coding at this time, we store /* To simplify coding at this time, we store
locked pages in array for now */ locked pages in array for now */
if ( count <= PAGE_CACHE_SIZE || file->f_flags & O_DIRECT) if ( count <= PAGE_CACHE_SIZE )
return generic_file_write(file, buf, count, ppos); return generic_file_write(file, buf, count, ppos);
if ( file->f_flags & O_DIRECT) { // Direct IO needs some special threating.
int result, after_file_end = 0;
if ( (*ppos + count >= inode->i_size) || (file->f_flags & O_APPEND) ) {
/* If we are appending a file, we need to put this savelink in here.
If we will crash while doing direct io, finish_unfinished will
cut the garbage from the file end. */
struct reiserfs_transaction_handle th;
reiserfs_write_lock(inode->i_sb);
journal_begin(&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT );
reiserfs_update_inode_transaction(inode);
add_save_link (&th, inode, 1 /* Truncate */);
journal_end(&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT );
reiserfs_write_unlock(inode->i_sb);
after_file_end = 1;
}
result = generic_file_write(file, buf, count, ppos);
if ( after_file_end ) { /* Now update i_size and remove the savelink */
struct reiserfs_transaction_handle th;
reiserfs_write_lock(inode->i_sb);
journal_begin(&th, inode->i_sb, 1);
reiserfs_update_inode_transaction(inode);
reiserfs_update_sd(&th, inode);
journal_end(&th, inode->i_sb, 1);
remove_save_link (inode, 1/* truncate */);
reiserfs_write_unlock(inode->i_sb);
}
return result;
}
if ( unlikely((ssize_t) count < 0 )) if ( unlikely((ssize_t) count < 0 ))
return -EINVAL; return -EINVAL;
......
...@@ -306,7 +306,7 @@ static int _get_block_create_0 (struct inode * inode, long block, ...@@ -306,7 +306,7 @@ static int _get_block_create_0 (struct inode * inode, long block,
** read old data off disk. Set the up to date bit on the buffer instead ** read old data off disk. Set the up to date bit on the buffer instead
** and jump to the end ** and jump to the end
*/ */
if (PageUptodate(bh_result->b_page)) { if (!bh_result->b_page || PageUptodate(bh_result->b_page)) {
set_buffer_uptodate(bh_result); set_buffer_uptodate(bh_result);
goto finished ; goto finished ;
} }
...@@ -420,6 +420,45 @@ static int reiserfs_get_block_create_0 (struct inode * inode, sector_t block, ...@@ -420,6 +420,45 @@ static int reiserfs_get_block_create_0 (struct inode * inode, sector_t block,
return reiserfs_get_block(inode, block, bh_result, GET_BLOCK_NO_HOLE) ; return reiserfs_get_block(inode, block, bh_result, GET_BLOCK_NO_HOLE) ;
} }
/* This is special helper for reiserfs_get_block in case we are executing
direct_IO request. */
static int reiserfs_get_blocks_direct_io(struct inode *inode,
sector_t iblock,
unsigned long max_blocks,
struct buffer_head *bh_result,
int create)
{
int ret ;
bh_result->b_page = NULL;
/* We set the b_size before reiserfs_get_block call since it is
referenced in convert_tail_for_hole() that may be called from
reiserfs_get_block() */
bh_result->b_size = (1 << inode->i_blkbits);
ret = reiserfs_get_block(inode, iblock, bh_result, create) ;
/* don't allow direct io onto tail pages */
if (ret == 0 && buffer_mapped(bh_result) && bh_result->b_blocknr == 0) {
/* make sure future calls to the direct io funcs for this offset
** in the file fail by unmapping the buffer
*/
reiserfs_unmap_buffer(bh_result);
ret = -EINVAL ;
}
/* Possible unpacked tail. Flush the data before pages have
disappeared */
if (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) {
lock_kernel();
reiserfs_commit_for_inode(inode);
REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
unlock_kernel();
}
return ret ;
}
/* /*
** helper function for when reiserfs_get_block is called for a hole ** helper function for when reiserfs_get_block is called for a hole
** but the file tail is still in a direct item ** but the file tail is still in a direct item
...@@ -448,7 +487,10 @@ static int convert_tail_for_hole(struct inode *inode, ...@@ -448,7 +487,10 @@ static int convert_tail_for_hole(struct inode *inode,
tail_end = (tail_start | (bh_result->b_size - 1)) + 1 ; tail_end = (tail_start | (bh_result->b_size - 1)) + 1 ;
index = tail_offset >> PAGE_CACHE_SHIFT ; index = tail_offset >> PAGE_CACHE_SHIFT ;
if (index != hole_page->index) { /* hole_page can be zero in case of direct_io, we are sure
that we cannot get here if we write with O_DIRECT into
tail page */
if (!hole_page || index != hole_page->index) {
tail_page = grab_cache_page(inode->i_mapping, index) ; tail_page = grab_cache_page(inode->i_mapping, index) ;
retval = -ENOMEM; retval = -ENOMEM;
if (!tail_page) { if (!tail_page) {
...@@ -554,7 +596,12 @@ int reiserfs_get_block (struct inode * inode, sector_t block, ...@@ -554,7 +596,12 @@ int reiserfs_get_block (struct inode * inode, sector_t block,
return ret; return ret;
} }
REISERFS_I(inode)->i_flags |= i_pack_on_close_mask ; /* If file is of such a size, that it might have a tail and tails are enabled
** we should mark it as possibly needing tail packing on close
*/
if ( (have_large_tails (inode->i_sb) && inode->i_size < i_block_size (inode)*4) ||
(have_small_tails (inode->i_sb) && inode->i_size < i_block_size(inode)) )
REISERFS_I(inode)->i_flags |= i_pack_on_close_mask ;
windex = push_journal_writer("reiserfs_get_block") ; windex = push_journal_writer("reiserfs_get_block") ;
...@@ -745,21 +792,26 @@ int reiserfs_get_block (struct inode * inode, sector_t block, ...@@ -745,21 +792,26 @@ int reiserfs_get_block (struct inode * inode, sector_t block,
*/ */
set_buffer_uptodate (unbh); set_buffer_uptodate (unbh);
/* we've converted the tail, so we must /* unbh->b_page == NULL in case of DIRECT_IO request, this means
** flush unbh before the transaction commits buffer will disappear shortly, so it should not be added to
*/
add_to_flushlist(inode, unbh) ;
/* mark it dirty now to prevent commit_write from adding
** this buffer to the inode's dirty buffer list
*/ */
if ( unbh->b_page ) {
/* we've converted the tail, so we must
** flush unbh before the transaction commits
*/
add_to_flushlist(inode, unbh) ;
/* mark it dirty now to prevent commit_write from adding
** this buffer to the inode's dirty buffer list
*/
/* /*
* AKPM: changed __mark_buffer_dirty to mark_buffer_dirty(). * AKPM: changed __mark_buffer_dirty to mark_buffer_dirty().
* It's still atomic, but it sets the page dirty too, * It's still atomic, but it sets the page dirty too,
* which makes it eligible for writeback at any time by the * which makes it eligible for writeback at any time by the
* VM (which was also the case with __mark_buffer_dirty()) * VM (which was also the case with __mark_buffer_dirty())
*/ */
mark_buffer_dirty(unbh) ; mark_buffer_dirty(unbh) ;
}
//inode->i_blocks += inode->i_sb->s_blocksize / 512; //inode->i_blocks += inode->i_sb->s_blocksize / 512;
//mark_tail_converted (inode); //mark_tail_converted (inode);
...@@ -2204,6 +2256,13 @@ static int reiserfs_commit_write(struct file *f, struct page *page, ...@@ -2204,6 +2256,13 @@ static int reiserfs_commit_write(struct file *f, struct page *page,
if (pos > inode->i_size) { if (pos > inode->i_size) {
struct reiserfs_transaction_handle th ; struct reiserfs_transaction_handle th ;
reiserfs_write_lock(inode->i_sb); reiserfs_write_lock(inode->i_sb);
/* If the file have grown beyond the border where it
can have a tail, unmark it as needing a tail
packing */
if ( (have_large_tails (inode->i_sb) && inode->i_size > i_block_size (inode)*4) ||
(have_small_tails (inode->i_sb) && inode->i_size > i_block_size(inode)) )
REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask ;
journal_begin(&th, inode->i_sb, 1) ; journal_begin(&th, inode->i_sb, 1) ;
reiserfs_update_inode_transaction(inode) ; reiserfs_update_inode_transaction(inode) ;
inode->i_size = pos ; inode->i_size = pos ;
...@@ -2310,6 +2369,19 @@ static int reiserfs_releasepage(struct page *page, int unused_gfp_flags) ...@@ -2310,6 +2369,19 @@ static int reiserfs_releasepage(struct page *page, int unused_gfp_flags)
return ret ; return ret ;
} }
/* We thank Mingming Cao for helping us understand in great detail what
to do in this section of the code. */
static int reiserfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
loff_t offset, unsigned long nr_segs)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_dentry->d_inode->i_mapping->host;
return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
offset, nr_segs, reiserfs_get_blocks_direct_io, NULL);
}
struct address_space_operations reiserfs_address_space_operations = { struct address_space_operations reiserfs_address_space_operations = {
.writepage = reiserfs_writepage, .writepage = reiserfs_writepage,
.readpage = reiserfs_readpage, .readpage = reiserfs_readpage,
...@@ -2318,5 +2390,6 @@ struct address_space_operations reiserfs_address_space_operations = { ...@@ -2318,5 +2390,6 @@ struct address_space_operations reiserfs_address_space_operations = {
.sync_page = block_sync_page, .sync_page = block_sync_page,
.prepare_write = reiserfs_prepare_write, .prepare_write = reiserfs_prepare_write,
.commit_write = reiserfs_commit_write, .commit_write = reiserfs_commit_write,
.bmap = reiserfs_aop_bmap .bmap = reiserfs_aop_bmap,
.direct_IO = reiserfs_direct_IO
} ; } ;
...@@ -104,8 +104,10 @@ int direct2indirect (struct reiserfs_transaction_handle *th, struct inode * inod ...@@ -104,8 +104,10 @@ int direct2indirect (struct reiserfs_transaction_handle *th, struct inode * inod
/* we only send the unbh pointer if the buffer is not up to date. /* we only send the unbh pointer if the buffer is not up to date.
** this avoids overwriting good data from writepage() with old data ** this avoids overwriting good data from writepage() with old data
** from the disk or buffer cache ** from the disk or buffer cache
** Special case: unbh->b_page will be NULL if we are coming through
** DIRECT_IO handler here.
*/ */
if (buffer_uptodate(unbh) || PageUptodate(unbh->b_page)) { if (!unbh->b_page || buffer_uptodate(unbh) || PageUptodate(unbh->b_page)) {
up_to_date_bh = NULL ; up_to_date_bh = NULL ;
} else { } else {
up_to_date_bh = unbh ; up_to_date_bh = unbh ;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment