Commit 124d8831 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] generic_file_write() cleanup

Fixes all the goto spaghetti in generic_file_write() and turns it into
something which humans can understand.

Andi tells me that gcc3 does a decent job of relocating blocks out of
line anyway.  This patch gives the compiler a helping hand with
appropriate use of likely() and unlikely().
parent fd6dee02
...@@ -2074,48 +2074,43 @@ inline void remove_suid(struct dentry *dentry) ...@@ -2074,48 +2074,43 @@ inline void remove_suid(struct dentry *dentry)
/* /*
* Write to a file through the page cache. * Write to a file through the page cache.
* *
* We currently put everything into the page cache prior to writing it. * We put everything into the page cache prior to writing it. This is not a
* This is not a problem when writing full pages. With partial pages, * problem when writing full pages. With partial pages, however, we first have
* however, we first have to read the data into the cache, then * to read the data into the cache, then dirty the page, and finally schedule
* dirty the page, and finally schedule it for writing. Alternatively, we * it for writing by marking it dirty.
* could write-through just the portion of data that would go into that
* page, but that would kill performance for applications that write data
* line by line, and it's prone to race conditions.
*
* Note that this routine doesn't try to keep track of dirty pages. Each
* file system has to do this all by itself, unfortunately.
* okir@monad.swb.de * okir@monad.swb.de
*/ */
ssize_t ssize_t
generic_file_write(struct file *file,const char *buf,size_t count, loff_t *ppos) generic_file_write(struct file *file, const char *buf,
size_t count, loff_t *ppos)
{ {
struct address_space *mapping = file->f_dentry->d_inode->i_mapping; struct address_space * mapping = file->f_dentry->d_inode->i_mapping;
struct inode *inode = mapping->host; struct address_space_operations *a_ops = mapping->a_ops;
struct inode *inode = mapping->host;
unsigned long limit = current->rlim[RLIMIT_FSIZE].rlim_cur; unsigned long limit = current->rlim[RLIMIT_FSIZE].rlim_cur;
long status = 0;
loff_t pos; loff_t pos;
struct page *page, *cached_page; struct page *page;
struct page *cached_page = NULL;
ssize_t written; ssize_t written;
long status = 0;
int err; int err;
unsigned bytes; unsigned bytes;
if ((ssize_t) count < 0) if (unlikely((ssize_t) count < 0))
return -EINVAL; return -EINVAL;
if (!access_ok(VERIFY_READ, buf, count)) if (unlikely(!access_ok(VERIFY_READ, buf, count)))
return -EFAULT; return -EFAULT;
cached_page = NULL;
down(&inode->i_sem); down(&inode->i_sem);
pos = *ppos; pos = *ppos;
err = -EINVAL; if (unlikely(pos < 0)) {
if (pos < 0) err = -EINVAL;
goto out; goto out;
}
err = file->f_error; if (unlikely(file->f_error)) {
if (err) { err = file->f_error;
file->f_error = 0; file->f_error = 0;
goto out; goto out;
} }
...@@ -2129,11 +2124,10 @@ generic_file_write(struct file *file,const char *buf,size_t count, loff_t *ppos) ...@@ -2129,11 +2124,10 @@ generic_file_write(struct file *file,const char *buf,size_t count, loff_t *ppos)
/* /*
* Check whether we've reached the file size limit. * Check whether we've reached the file size limit.
*/ */
err = -EFBIG; if (unlikely(limit != RLIM_INFINITY)) {
if (limit != RLIM_INFINITY) {
if (pos >= limit) { if (pos >= limit) {
send_sig(SIGXFSZ, current, 0); send_sig(SIGXFSZ, current, 0);
err = -EFBIG;
goto out; goto out;
} }
if (pos > 0xFFFFFFFFULL || count > limit - (u32)pos) { if (pos > 0xFFFFFFFFULL || count > limit - (u32)pos) {
...@@ -2143,11 +2137,13 @@ generic_file_write(struct file *file,const char *buf,size_t count, loff_t *ppos) ...@@ -2143,11 +2137,13 @@ generic_file_write(struct file *file,const char *buf,size_t count, loff_t *ppos)
} }
/* /*
* LFS rule * LFS rule
*/ */
if ( pos + count > MAX_NON_LFS && !(file->f_flags&O_LARGEFILE)) { if (unlikely(pos + count > MAX_NON_LFS &&
!(file->f_flags & O_LARGEFILE))) {
if (pos >= MAX_NON_LFS) { if (pos >= MAX_NON_LFS) {
send_sig(SIGXFSZ, current, 0); send_sig(SIGXFSZ, current, 0);
err = -EFBIG;
goto out; goto out;
} }
if (count > MAX_NON_LFS - (u32)pos) { if (count > MAX_NON_LFS - (u32)pos) {
...@@ -2157,18 +2153,14 @@ generic_file_write(struct file *file,const char *buf,size_t count, loff_t *ppos) ...@@ -2157,18 +2153,14 @@ generic_file_write(struct file *file,const char *buf,size_t count, loff_t *ppos)
} }
/* /*
* Are we about to exceed the fs block limit ? * Are we about to exceed the fs block limit ?
*
* If we have written data it becomes a short write
* If we have exceeded without writing data we send
* a signal and give them an EFBIG.
* *
* Linus frestrict idea will clean these up nicely.. * If we have written data it becomes a short write. If we have
* exceeded without writing data we send a signal and return EFBIG.
* Linus frestrict idea will clean these up nicely..
*/ */
if (likely(!S_ISBLK(inode->i_mode))) {
if (!S_ISBLK(inode->i_mode)) { if (unlikely(pos >= inode->i_sb->s_maxbytes)) {
if (pos >= inode->i_sb->s_maxbytes)
{
if (count || pos > inode->i_sb->s_maxbytes) { if (count || pos > inode->i_sb->s_maxbytes) {
send_sig(SIGXFSZ, current, 0); send_sig(SIGXFSZ, current, 0);
err = -EFBIG; err = -EFBIG;
...@@ -2177,7 +2169,7 @@ generic_file_write(struct file *file,const char *buf,size_t count, loff_t *ppos) ...@@ -2177,7 +2169,7 @@ generic_file_write(struct file *file,const char *buf,size_t count, loff_t *ppos)
/* zero-length writes at ->s_maxbytes are OK */ /* zero-length writes at ->s_maxbytes are OK */
} }
if (pos + count > inode->i_sb->s_maxbytes) if (unlikely(pos + count > inode->i_sb->s_maxbytes))
count = inode->i_sb->s_maxbytes - pos; count = inode->i_sb->s_maxbytes - pos;
} else { } else {
if (is_read_only(inode->i_rdev)) { if (is_read_only(inode->i_rdev)) {
...@@ -2200,21 +2192,37 @@ generic_file_write(struct file *file,const char *buf,size_t count, loff_t *ppos) ...@@ -2200,21 +2192,37 @@ generic_file_write(struct file *file,const char *buf,size_t count, loff_t *ppos)
goto out; goto out;
remove_suid(file->f_dentry); remove_suid(file->f_dentry);
inode->i_ctime = inode->i_mtime = CURRENT_TIME; inode->i_ctime = CURRENT_TIME;
inode->i_mtime = CURRENT_TIME;
mark_inode_dirty_sync(inode); mark_inode_dirty_sync(inode);
if (file->f_flags & O_DIRECT) if (unlikely(file->f_flags & O_DIRECT)) {
goto o_direct; written = generic_file_direct_IO(WRITE, file,
(char *) buf, count, pos);
if (written > 0) {
loff_t end = pos + written;
if (end > inode->i_size && !S_ISBLK(inode->i_mode)) {
inode->i_size = end;
mark_inode_dirty(inode);
}
*ppos = end;
invalidate_inode_pages2(mapping);
}
/*
* Sync the fs metadata but not the minor inode changes and
* of course not the data as we did direct DMA for the IO.
*/
if (written >= 0 && file->f_flags & O_SYNC)
status = generic_osync_inode(inode, OSYNC_METADATA);
goto out_status;
}
do { do {
unsigned long index, offset; unsigned long index;
unsigned long offset;
long page_fault; long page_fault;
char *kaddr; char *kaddr;
/*
* Try to find the page in the cache. If it isn't there,
* allocate a free page.
*/
offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */ offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
index = pos >> PAGE_CACHE_SHIFT; index = pos >> PAGE_CACHE_SHIFT;
bytes = PAGE_CACHE_SIZE - offset; bytes = PAGE_CACHE_SIZE - offset;
...@@ -2232,96 +2240,67 @@ generic_file_write(struct file *file,const char *buf,size_t count, loff_t *ppos) ...@@ -2232,96 +2240,67 @@ generic_file_write(struct file *file,const char *buf,size_t count, loff_t *ppos)
__get_user(dummy, buf+bytes-1); __get_user(dummy, buf+bytes-1);
} }
status = -ENOMEM; /* we'll assign it later anyway */
page = __grab_cache_page(mapping, index, &cached_page); page = __grab_cache_page(mapping, index, &cached_page);
if (!page) if (!page) {
status = -ENOMEM;
break; break;
/* We have exclusive IO access to the page.. */
if (!PageLocked(page)) {
PAGE_BUG(page);
} }
kaddr = kmap(page); kaddr = kmap(page);
status = mapping->a_ops->prepare_write(file, page, offset, offset+bytes); status = a_ops->prepare_write(file, page, offset, offset+bytes);
if (status) if (unlikely(status)) {
goto sync_failure; /*
page_fault = __copy_from_user(kaddr+offset, buf, bytes); * prepare_write() may have instantiated a few blocks
* outside i_size. Trim these off again.
*/
kunmap(page);
unlock_page(page);
page_cache_release(page);
if (pos + bytes > inode->i_size)
vmtruncate(inode, inode->i_size);
break;
}
page_fault = __copy_from_user(kaddr + offset, buf, bytes);
flush_dcache_page(page); flush_dcache_page(page);
status = mapping->a_ops->commit_write(file, page, offset, offset+bytes); status = a_ops->commit_write(file, page, offset, offset+bytes);
if (page_fault) if (unlikely(page_fault)) {
goto fail_write; status = -EFAULT;
if (!status) } else {
status = bytes; if (!status)
status = bytes;
if (status >= 0) {
written += status; if (status >= 0) {
count -= status; written += status;
pos += status; count -= status;
buf += status; pos += status;
buf += status;
}
} }
unlock:
kunmap(page); kunmap(page);
/* Mark it unlocked again and drop the page.. */
SetPageReferenced(page); SetPageReferenced(page);
unlock_page(page); unlock_page(page);
page_cache_release(page); page_cache_release(page);
if (status < 0) if (status < 0)
break; break;
balance_dirty_pages_ratelimited(mapping); balance_dirty_pages_ratelimited(mapping);
} while (count); } while (count);
done:
*ppos = pos; *ppos = pos;
if (cached_page) if (cached_page)
page_cache_release(cached_page); page_cache_release(cached_page);
/* For now, when the user asks for O_SYNC, we'll actually /*
* provide O_DSYNC. */ * For now, when the user asks for O_SYNC, we'll actually give O_DSYNC
*/
if (status >= 0) { if (status >= 0) {
if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) if ((file->f_flags & O_SYNC) || IS_SYNC(inode))
status = generic_osync_inode(inode, OSYNC_METADATA|OSYNC_DATA); status = generic_osync_inode(inode,
OSYNC_METADATA|OSYNC_DATA);
} }
out_status: out_status:
err = written ? written : status; err = written ? written : status;
out: out:
up(&inode->i_sem); up(&inode->i_sem);
return err; return err;
fail_write:
status = -EFAULT;
goto unlock;
sync_failure:
/*
* If blocksize < pagesize, prepare_write() may have instantiated a
* few blocks outside i_size. Trim these off again.
*/
kunmap(page);
unlock_page(page);
page_cache_release(page);
if (pos + bytes > inode->i_size)
vmtruncate(inode, inode->i_size);
goto done;
o_direct:
written = generic_file_direct_IO(WRITE, file, (char *) buf, count, pos);
if (written > 0) {
loff_t end = pos + written;
if (end > inode->i_size && !S_ISBLK(inode->i_mode)) {
inode->i_size = end;
mark_inode_dirty(inode);
}
*ppos = end;
invalidate_inode_pages2(mapping);
}
/*
* Sync the fs metadata but not the minor inode changes and
* of course not the data as we did direct DMA for the IO.
*/
if (written >= 0 && file->f_flags & O_SYNC)
status = generic_osync_inode(inode, OSYNC_METADATA);
goto out_status;
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment