Commit af61a2bd authored by Linus Torvalds's avatar Linus Torvalds

Merge master.kernel.org:/home/hch/BK/xfs/linux-2.5

into home.transmeta.com:/home/torvalds/v2.5/linux
parents 91af0978 ed7fa26b
......@@ -659,8 +659,7 @@ void buffer_insert_list(spinlock_t *lock,
struct buffer_head *bh, struct list_head *list)
{
spin_lock(lock);
list_del(&bh->b_assoc_buffers);
list_add(&bh->b_assoc_buffers, list);
list_move_tail(&bh->b_assoc_buffers, list);
spin_unlock(lock);
}
......
......@@ -53,12 +53,15 @@ map_blocks(
count = max_t(ssize_t, count, XFS_WRITE_IO_LOG);
retry:
VOP_BMAP(vp, offset, count, flags, pbmapp, &nmaps, error);
if (flags & PBF_WRITE) {
if (unlikely((flags & PBF_DIRECT) && nmaps &&
(pbmapp->pbm_flags & PBMF_DELAY))) {
flags = PBF_FILE_ALLOCATE;
goto retry;
}
if (error == EAGAIN)
return -error;
if (unlikely((flags & (PBF_WRITE|PBF_DIRECT)) ==
(PBF_WRITE|PBF_DIRECT) && nmaps &&
(pbmapp->pbm_flags & PBMF_DELAY))) {
flags = PBF_FILE_ALLOCATE;
goto retry;
}
if (flags & (PBF_WRITE|PBF_FILE_ALLOCATE)) {
VMODIFY(vp);
}
return -error;
......@@ -309,6 +312,7 @@ convert_page(
if (startio && (offset < end)) {
bh_arr[index++] = bh;
} else {
set_buffer_dirty(bh);
unlock_buffer(bh);
}
} while (i++, (bh = bh->b_this_page) != head);
......@@ -365,9 +369,9 @@ cluster_write(
STATIC int
delalloc_convert(
struct page *page,
int startio,
int allocate_space)
struct page *page,
int startio,
int unmapped) /* also implies page uptodate */
{
struct inode *inode = page->mapping->host;
struct buffer_head *bh_arr[MAX_BUF_PER_PAGE], *bh, *head;
......@@ -375,6 +379,9 @@ delalloc_convert(
unsigned long p_offset = 0, end_index;
loff_t offset, end_offset;
int len, err, i, cnt = 0, uptodate = 1;
int flags = startio ? 0 : PBF_TRYLOCK;
int page_dirty = 1;
/* Are we off the end of the file ? */
end_index = inode->i_size >> PAGE_CACHE_SHIFT;
......@@ -390,9 +397,6 @@ delalloc_convert(
if (end_offset > inode->i_size)
end_offset = inode->i_size;
if (startio && !page_has_buffers(page))
create_empty_buffers(page, 1 << inode->i_blkbits, 0);
bh = head = page_buffers(page);
mp = NULL;
......@@ -406,10 +410,14 @@ delalloc_convert(
mp = match_offset_to_mapping(page, &map, p_offset);
}
/*
* First case, allocate space for delalloc buffer head
* we can return EAGAIN here in the release page case.
*/
if (buffer_delay(bh)) {
if (!mp) {
err = map_blocks(inode, offset, len, &map,
PBF_FILE_ALLOCATE);
PBF_FILE_ALLOCATE | flags);
if (err) {
goto error;
}
......@@ -422,14 +430,17 @@ delalloc_convert(
if (startio) {
bh_arr[cnt++] = bh;
} else {
set_buffer_dirty(bh);
unlock_buffer(bh);
}
page_dirty = 0;
}
} else if ((buffer_uptodate(bh) || PageUptodate(page)) &&
(allocate_space || startio)) {
(unmapped || startio)) {
if (!buffer_mapped(bh)) {
int size;
/*
* Getting here implies an unmapped buffer
* was found, and we are in a path where we
......@@ -454,13 +465,16 @@ delalloc_convert(
if (startio) {
bh_arr[cnt++] = bh;
} else {
set_buffer_dirty(bh);
unlock_buffer(bh);
}
page_dirty = 0;
}
} else if (startio && buffer_mapped(bh)) {
if (buffer_uptodate(bh) && allocate_space) {
} else if (startio) {
if (buffer_uptodate(bh)) {
lock_buffer(bh);
bh_arr[cnt++] = bh;
page_dirty = 0;
}
}
}
......@@ -482,10 +496,10 @@ delalloc_convert(
if (mp) {
cluster_write(inode, page->index + 1, mp,
startio, allocate_space);
startio, unmapped);
}
return 0;
return page_dirty;
error:
for (i = 0; i < cnt; i++) {
......@@ -494,12 +508,15 @@ delalloc_convert(
/*
* If it's delalloc and we have nowhere to put it,
* throw it away.
* throw it away, unless the lower layers told
* us to try again.
*/
if (!allocate_space) {
block_invalidatepage(page, 0);
if (err != -EAGAIN) {
if (!unmapped) {
block_invalidatepage(page, 0);
}
ClearPageUptodate(page);
}
ClearPageUptodate(page);
return err;
}
......@@ -679,109 +696,172 @@ linvfs_readpages(
}
STATIC int
STATIC void
count_page_state(
struct page *page,
int *nr_delalloc,
int *nr_unmapped)
int *delalloc,
int *unmapped)
{
*nr_delalloc = *nr_unmapped = 0;
if (page_has_buffers(page)) {
struct buffer_head *bh, *head;
struct buffer_head *bh, *head;
bh = head = page_buffers(page);
do {
if (buffer_uptodate(bh) && !buffer_mapped(bh))
(*nr_unmapped)++;
else if (buffer_delay(bh))
(*nr_delalloc)++;
} while ((bh = bh->b_this_page) != head);
*delalloc = *unmapped = 0;
return 1;
}
return 0;
bh = head = page_buffers(page);
do {
if (buffer_uptodate(bh) && !buffer_mapped(bh))
(*unmapped) = 1;
else if (buffer_delay(bh))
(*delalloc) = 1;
} while ((bh = bh->b_this_page) != head);
}
/*
* writepage: Called from one of two places:
*
* 1. we are flushing a delalloc buffer head.
*
* 2. we are writing out a dirty page. Typically the page dirty
* state is cleared before we get here. In this case is it
* conceivable we have no buffer heads.
*
* For delalloc space on the page we need to allocate space and
* flush it. For unmapped buffer heads on the page we should
* allocate space if the page is uptodate. For any other dirty
* buffer heads on the page we should flush them.
*
* If we detect that a transaction would be required to flush
* the page, we have to check the process flags first, if we
* are already in a transaction or disk I/O during allocations
* is off, we need to fail the writepage and redirty the page.
* We also need to set PF_NOIO ourselves.
*/
STATIC int
linvfs_writepage(
struct page *page,
struct writeback_control *wbc)
{
int error;
int need_trans = 1;
int nr_delalloc, nr_unmapped;
int need_trans;
int delalloc, unmapped;
struct inode *inode = page->mapping->host;
if (count_page_state(page, &nr_delalloc, &nr_unmapped))
need_trans = nr_delalloc + nr_unmapped;
/*
* We need a transaction if:
* 1. There are delalloc buffers on the page
* 2. The page is upto date and we have unmapped buffers
* 3. The page is upto date and we have no buffers
*/
if (!page_has_buffers(page)) {
unmapped = 1;
need_trans = 1;
} else {
count_page_state(page, &delalloc, &unmapped);
if (!PageUptodate(page))
unmapped = 0;
need_trans = delalloc + unmapped;
}
/*
* If we need a transaction and the process flags say
* we are already in a transaction, or no IO is allowed
* then mark the page dirty again and leave the page
* as is.
*/
if ((current->flags & (PF_FSTRANS)) && need_trans)
goto out_fail;
/*
* Delay hooking up buffer heads until we have
* made our go/no-go decision.
*/
if (!page_has_buffers(page)) {
create_empty_buffers(page, 1 << inode->i_blkbits, 0);
}
/*
* Convert delalloc or unmapped space to real space and flush out
* to disk.
*/
error = delalloc_convert(page, 1, nr_delalloc == 0);
if (unlikely(error))
unlock_page(page);
return error;
error = delalloc_convert(page, 1, unmapped);
if (error == -EAGAIN)
goto out_fail;
if (unlikely(error < 0))
goto out_unlock;
return 0;
out_fail:
set_page_dirty(page);
unlock_page(page);
return 0;
}
STATIC int
linvfs_prepare_write(
struct file *file,
struct page *page,
unsigned int from,
unsigned int to)
{
if (file && (file->f_flags & O_SYNC)) {
return block_prepare_write(page, from, to,
linvfs_get_block_sync);
} else {
return block_prepare_write(page, from, to,
linvfs_get_block);
}
out_unlock:
unlock_page(page);
return error;
}
/*
* This gets a page into cleanable state - page locked on entry
* kept locked on exit. If the page is marked dirty we should
* not come this way.
* Called to move a page into cleanable state - and from there
* to be released. Possibly the page is already clean. We always
* have buffer heads in this call.
*
* Returns 0 if the page is ok to release, 1 otherwise.
*
* Possible scenarios are:
*
* 1. We are being called to release a page which has been written
* to via regular I/O. buffer heads will be dirty and possibly
* delalloc. If no delalloc buffer heads in this case then we
* can just return zero.
*
* 2. We are called to release a page which has been written via
* mmap, all we need to do is ensure there is no delalloc
* state in the buffer heads, if not we can let the caller
* free them and we should come back later via writepage.
*/
STATIC int
linvfs_release_page(
struct page *page,
int gfp_mask)
{
int nr_delalloc, nr_unmapped;
int delalloc, unmapped;
if (count_page_state(page, &nr_delalloc, &nr_unmapped)) {
if (!nr_delalloc)
goto free_buffers;
}
count_page_state(page, &delalloc, &unmapped);
if (!delalloc)
goto free_buffers;
if (gfp_mask & __GFP_FS) {
/*
* Convert delalloc space to real space, do not flush the
* data out to disk, that will be done by the caller.
*/
if (delalloc_convert(page, 0, 0) == 0)
goto free_buffers;
}
if (!(gfp_mask & __GFP_FS))
return 0;
/*
* Convert delalloc space to real space, do not flush the
* data out to disk, that will be done by the caller.
* Never need to allocate space here - we will always
* come back to writepage in that case.
*/
if (delalloc_convert(page, 0, 0) == 0)
goto free_buffers;
return 0;
free_buffers:
return try_to_free_buffers(page);
}
STATIC int
linvfs_prepare_write(
struct file *file,
struct page *page,
unsigned int from,
unsigned int to)
{
if (file && (file->f_flags & O_SYNC)) {
return block_prepare_write(page, from, to,
linvfs_get_block_sync);
} else {
return block_prepare_write(page, from, to,
linvfs_get_block);
}
}
struct address_space_operations linvfs_aops = {
.readpage = linvfs_readpage,
......
......@@ -120,7 +120,13 @@ xfs_iomap(
case PBF_FILE_ALLOCATE:
lockmode = XFS_ILOCK_SHARED|XFS_EXTSIZE_RD;
bmap_flags = XFS_BMAPI_ENTIRE;
XFS_ILOCK(mp, io, lockmode);
/* Attempt non-blocking lock */
if (flags & PBF_TRYLOCK) {
if (!XFS_ILOCK_NOWAIT(mp, io, lockmode))
return XFS_ERROR(EAGAIN);
} else {
XFS_ILOCK(mp, io, lockmode);
}
break;
case PBF_FILE_UNWRITTEN:
lockmode = XFS_ILOCK_EXCL|XFS_EXTSIZE_WR;
......
......@@ -1009,13 +1009,13 @@ init_xfs_fs( void )
if (error < 0)
return error;
si_meminfo(&si);
xfs_physmem = si.totalram;
error = pagebuf_init();
if (error < 0)
goto out;
si_meminfo(&si);
xfs_physmem = si.totalram;
vn_init();
xfs_init();
dmapi_init();
......
......@@ -119,9 +119,9 @@ pb_trace_func(
STATIC kmem_cache_t *pagebuf_cache;
STATIC void pagebuf_daemon_wakeup(int);
STATIC void pagebuf_delwri_queue(page_buf_t *, int);
STATIC struct workqueue_struct *pagebuf_workqueue;
/*
* Pagebuf module configuration parameters, exported via
* /proc/sys/vm/pagebuf
......@@ -155,35 +155,37 @@ struct pbstats pbstats;
* Pagebuf hashing
*/
#define NBITS 5
#define NHASH (1<<NBITS)
/* This structure must be a power of 2 long for the hash to work */
typedef struct {
struct list_head pb_hash;
int pb_count;
spinlock_t pb_hash_lock;
} pb_hash_t;
STATIC pb_hash_t pbhash[NHASH];
static pb_hash_t *pbhash;
static unsigned int pb_hash_mask;
static unsigned int pb_hash_shift;
static unsigned int pb_order;
#define pb_hash(pb) &pbhash[pb->pb_hash_index]
STATIC int
/*
* This hash is the same one as used on the Linux buffer cache,
* see fs/buffer.c
*/
#define _hashfn(dev,block) \
((((dev)<<(pb_hash_shift - 6)) ^ ((dev)<<(pb_hash_shift - 9))) ^ \
(((block)<<(pb_hash_shift - 6)) ^ ((block) >> 13) ^ \
((block) << (pb_hash_shift - 12))))
static inline int
_bhash(
dev_t dev,
loff_t base)
{
int bit, hval;
base >>= 9;
/*
* dev_t is 16 bits, loff_t is always 64 bits
*/
base ^= dev;
for (bit = hval = 0; base != 0 && bit < sizeof(base) * 8; bit += NBITS) {
hval ^= (int)base & (NHASH-1);
base >>= NBITS;
}
return hval;
return (_hashfn(dev, base) & pb_hash_mask);
}
/*
......@@ -1516,7 +1518,7 @@ STATIC int pbd_active = 1;
STATIC LIST_HEAD(pbd_delwrite_queue);
STATIC spinlock_t pbd_delwrite_lock = SPIN_LOCK_UNLOCKED;
void
STATIC void
pagebuf_delwri_queue(
page_buf_t *pb,
int unlock)
......@@ -1862,7 +1864,39 @@ pagebuf_shaker(void)
int __init
pagebuf_init(void)
{
int i;
int order, mempages, i;
unsigned int nr_hash;
extern int xfs_physmem;
mempages = xfs_physmem >>= 16;
mempages *= sizeof(pb_hash_t);
for (order = 0; (1 << order) < mempages; order++)
;
if (order > 3) order = 3; /* cap us at 2K buckets */
do {
unsigned long tmp;
nr_hash = (PAGE_SIZE << order) / sizeof(pb_hash_t);
nr_hash = 1 << (ffs(nr_hash) - 1);
pb_hash_mask = (nr_hash - 1);
tmp = nr_hash;
pb_hash_shift = 0;
while((tmp >>= 1UL) != 0UL)
pb_hash_shift++;
pbhash = (pb_hash_t *)
__get_free_pages(GFP_KERNEL, order);
pb_order = order;
} while (pbhash == NULL && --order > 0);
printk("pagebuf cache hash table entries: %d (order: %d, %ld bytes)\n",
nr_hash, order, (PAGE_SIZE << order));
for(i = 0; i < nr_hash; i++) {
spin_lock_init(&pbhash[i].pb_hash_lock);
INIT_LIST_HEAD(&pbhash[i].pb_hash);
}
pagebuf_table_header = register_sysctl_table(pagebuf_root_table, 1);
......@@ -1880,11 +1914,6 @@ pagebuf_init(void)
return -ENOMEM;
}
for (i = 0; i < NHASH; i++) {
spin_lock_init(&pbhash[i].pb_hash_lock);
INIT_LIST_HEAD(&pbhash[i].pb_hash);
}
#ifdef PAGEBUF_TRACE
pb_trace.buf = (pagebuf_trace_t *)kmalloc(
PB_TRACE_BUFSIZE * sizeof(pagebuf_trace_t), GFP_KERNEL);
......@@ -1911,6 +1940,7 @@ pagebuf_terminate(void)
kmem_cache_destroy(pagebuf_cache);
kmem_shake_deregister(pagebuf_shaker);
free_pages((unsigned long)pbhash, pb_order);
unregister_sysctl_table(pagebuf_table_header);
#ifdef CONFIG_PROC_FS
......
......@@ -215,8 +215,8 @@ typedef struct page_buf_s {
unsigned short pb_error; /* error code on I/O */
unsigned short pb_page_count; /* size of page array */
unsigned short pb_offset; /* page offset in first page */
unsigned short pb_hash_index; /* hash table index */
unsigned char pb_locked; /* page array is locked */
unsigned char pb_hash_index; /* hash table index */
struct page **pb_pages; /* array of page pointers */
struct page *pb_page_array[PB_PAGES]; /* inline pages */
#ifdef PAGEBUF_LOCK_TRACKING
......@@ -350,7 +350,6 @@ extern int pagebuf_ispin( page_buf_t *); /* check if pagebuf is pinned */
/* Reading and writing pages */
extern void pagebuf_delwri_queue(page_buf_t *, int);
extern void pagebuf_delwri_dequeue(page_buf_t *);
#define PBDF_WAIT 0x01
......
......@@ -86,7 +86,7 @@ cmn_err(register int level, char *fmt, ...)
{
char *fp = fmt;
int len;
int flags;
unsigned long flags;
va_list ap;
level &= XFS_ERR_MASK;
......
......@@ -46,7 +46,6 @@
typedef spinlock_t lock_t;
#define spinlock_init(lock, name) spin_lock_init(lock)
#define init_spinlock(lock, name, ll) spin_lock_init(lock)
#define spinlock_destroy(lock)
static inline unsigned long mutex_spinlock(lock_t *lock)
......
......@@ -648,7 +648,7 @@ xfs_dir_leaf_to_shortform(xfs_da_args_t *iargs)
retval = xfs_da_read_buf(iargs->trans, iargs->dp, 0, -1, &bp,
XFS_DATA_FORK);
if (retval)
return(retval);
goto out;
ASSERT(bp != NULL);
memcpy(tmpbuffer, bp->data, XFS_LBSIZE(dp->i_mount));
leaf = (xfs_dir_leafblock_t *)tmpbuffer;
......
......@@ -33,7 +33,7 @@
#define __XFS_ERROR_H__
#define prdev(fmt,dev,args...) \
printk("XFS: device 0x%x- " fmt "\n", dev, ## args)
printk("XFS: device 0x%x- " fmt "\n", (unsigned)dev, ## args)
#define XFS_ERECOVER 1 /* Failure to recover log */
#define XFS_ELOGSTAT 2 /* Failure to stat log in user space */
......
......@@ -378,17 +378,26 @@ xfs_log_notify(xfs_mount_t *mp, /* mount of partition */
iclog->ic_callback_tail = &(cb->cb_next);
}
LOG_UNLOCK(log, spl);
if (!abortflg) {
if (xlog_state_release_iclog(log, iclog)) {
xfs_force_shutdown(mp, XFS_LOG_IO_ERROR);
return EIO;
}
} else {
if (abortflg) {
cb->cb_func(cb->cb_arg, abortflg);
}
return 0;
} /* xfs_log_notify */
int
xfs_log_release_iclog(xfs_mount_t *mp,
void *iclog_hndl)
{
xlog_t *log = mp->m_log;
xlog_in_core_t *iclog = (xlog_in_core_t *)iclog_hndl;
if (xlog_state_release_iclog(log, iclog)) {
xfs_force_shutdown(mp, XFS_LOG_IO_ERROR);
return(EIO);
}
return 0;
}
/*
* Initialize log manager data. This routine is intended to be called when
......
......@@ -164,6 +164,8 @@ void xfs_log_move_tail(struct xfs_mount *mp,
int xfs_log_notify(struct xfs_mount *mp,
void *iclog,
xfs_log_callback_t *callback_entry);
int xfs_log_release_iclog(struct xfs_mount *mp,
void *iclog_hndl);
int xfs_log_reserve(struct xfs_mount *mp,
int length,
int count,
......
......@@ -1287,10 +1287,6 @@ xlog_recover_add_to_trans(xlog_recover_t *trans,
if (!len)
return 0;
ptr = kmem_zalloc(len, 0);
memcpy(ptr, dp, len);
in_f = (xfs_inode_log_format_t *)ptr;
item = trans->r_itemq;
if (item == 0) {
ASSERT(*(uint *)dp == XFS_TRANS_HEADER_MAGIC);
......@@ -1299,6 +1295,11 @@ xlog_recover_add_to_trans(xlog_recover_t *trans,
memcpy(&trans->r_theader, dp, len); /* d, s, l */
return 0;
}
ptr = kmem_alloc(len, 0);
memcpy(ptr, dp, len);
in_f = (xfs_inode_log_format_t *)ptr;
if (item->ri_prev->ri_total != 0 &&
item->ri_prev->ri_total == item->ri_prev->ri_cnt) {
xlog_recover_add_item(&trans->r_itemq);
......
......@@ -272,7 +272,7 @@ xfs_mount_validate_sb(
cmn_err(CE_WARN,
"XFS: Only page-sized (%d) or less blocksizes currently work.",
PAGE_SIZE);
return XFS_ERROR(EWRONGFS);
return XFS_ERROR(ENOSYS);
}
return 0;
......@@ -459,10 +459,22 @@ xfs_readsb(xfs_mount_t *mp)
}
/*
* Re-read the superblock so that our buffer is correctly sized.
* We only need to do this if sector size on-disk is different.
* We must be able to do sector-sized and sector-aligned IO.
*/
if (sector_size > mp->m_sb.sb_sectsize) {
cmn_err(CE_WARN,
"XFS: device supports only %u byte sectors (not %u)",
sector_size, mp->m_sb.sb_sectsize);
XFS_BUF_UNMANAGE(bp);
xfs_buf_relse(bp);
return XFS_ERROR(ENOSYS);
}
/*
* If device sector size is smaller than the superblock size,
* re-read the superblock so the buffer is correctly sized.
*/
if (sector_size != mp->m_sb.sb_sectsize) {
if (sector_size < mp->m_sb.sb_sectsize) {
XFS_BUF_UNMANAGE(bp);
xfs_buf_relse(bp);
sector_size = mp->m_sb.sb_sectsize;
......
......@@ -808,19 +808,6 @@ xfs_trans_commit(
return XFS_ERROR(EIO);
}
/*
* Once all the items of the transaction have been copied
* to the in core log we can release them. Do that here.
* This will free descriptors pointing to items which were
* not logged since there is nothing more to do with them.
* For items which were logged, we will keep pointers to them
* so they can be unpinned after the transaction commits to disk.
* This will also stamp each modified meta-data item with
* the commit lsn of this transaction for dependency tracking
* purposes.
*/
xfs_trans_unlock_items(tp, commit_lsn);
/*
* Once the transaction has committed, unused
* reservations need to be released and changes to
......@@ -856,12 +843,36 @@ xfs_trans_commit(
tp->t_logcb.cb_arg = tp;
/* We need to pass the iclog buffer which was used for the
* transaction commit record into this function, attach
* the callback to it, and then release it. This will guarantee
* that we do callbacks on the transaction in the correct order.
* transaction commit record into this function, and attach
* the callback to it. The callback must be attached before
* the items are unlocked to avoid racing with other threads
* waiting for an item to unlock.
*/
error = xfs_log_notify(mp, commit_iclog, &(tp->t_logcb));
#endif
/*
* Once all the items of the transaction have been copied
* to the in core log and the callback is attached, the
* items can be unlocked.
*
* This will free descriptors pointing to items which were
* not logged since there is nothing more to do with them.
* For items which were logged, we will keep pointers to them
* so they can be unpinned after the transaction commits to disk.
* This will also stamp each modified meta-data item with
* the commit lsn of this transaction for dependency tracking
* purposes.
*/
xfs_trans_unlock_items(tp, commit_lsn);
/*
* Now that the xfs_trans_committed callback has been attached,
* and the items are released we can finally allow the iclog to
* go to disk.
*/
error = xfs_log_release_iclog(mp, commit_iclog);
/*
* If the transaction needs to be synchronous, then force the
* log out now and wait for it.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment