Commit af61a2bd authored by Linus Torvalds's avatar Linus Torvalds

Merge master.kernel.org:/home/hch/BK/xfs/linux-2.5

into home.transmeta.com:/home/torvalds/v2.5/linux
parents 91af0978 ed7fa26b
...@@ -659,8 +659,7 @@ void buffer_insert_list(spinlock_t *lock, ...@@ -659,8 +659,7 @@ void buffer_insert_list(spinlock_t *lock,
struct buffer_head *bh, struct list_head *list) struct buffer_head *bh, struct list_head *list)
{ {
spin_lock(lock); spin_lock(lock);
list_del(&bh->b_assoc_buffers); list_move_tail(&bh->b_assoc_buffers, list);
list_add(&bh->b_assoc_buffers, list);
spin_unlock(lock); spin_unlock(lock);
} }
......
...@@ -53,12 +53,15 @@ map_blocks( ...@@ -53,12 +53,15 @@ map_blocks(
count = max_t(ssize_t, count, XFS_WRITE_IO_LOG); count = max_t(ssize_t, count, XFS_WRITE_IO_LOG);
retry: retry:
VOP_BMAP(vp, offset, count, flags, pbmapp, &nmaps, error); VOP_BMAP(vp, offset, count, flags, pbmapp, &nmaps, error);
if (flags & PBF_WRITE) { if (error == EAGAIN)
if (unlikely((flags & PBF_DIRECT) && nmaps && return -error;
(pbmapp->pbm_flags & PBMF_DELAY))) { if (unlikely((flags & (PBF_WRITE|PBF_DIRECT)) ==
flags = PBF_FILE_ALLOCATE; (PBF_WRITE|PBF_DIRECT) && nmaps &&
goto retry; (pbmapp->pbm_flags & PBMF_DELAY))) {
} flags = PBF_FILE_ALLOCATE;
goto retry;
}
if (flags & (PBF_WRITE|PBF_FILE_ALLOCATE)) {
VMODIFY(vp); VMODIFY(vp);
} }
return -error; return -error;
...@@ -309,6 +312,7 @@ convert_page( ...@@ -309,6 +312,7 @@ convert_page(
if (startio && (offset < end)) { if (startio && (offset < end)) {
bh_arr[index++] = bh; bh_arr[index++] = bh;
} else { } else {
set_buffer_dirty(bh);
unlock_buffer(bh); unlock_buffer(bh);
} }
} while (i++, (bh = bh->b_this_page) != head); } while (i++, (bh = bh->b_this_page) != head);
...@@ -365,9 +369,9 @@ cluster_write( ...@@ -365,9 +369,9 @@ cluster_write(
STATIC int STATIC int
delalloc_convert( delalloc_convert(
struct page *page, struct page *page,
int startio, int startio,
int allocate_space) int unmapped) /* also implies page uptodate */
{ {
struct inode *inode = page->mapping->host; struct inode *inode = page->mapping->host;
struct buffer_head *bh_arr[MAX_BUF_PER_PAGE], *bh, *head; struct buffer_head *bh_arr[MAX_BUF_PER_PAGE], *bh, *head;
...@@ -375,6 +379,9 @@ delalloc_convert( ...@@ -375,6 +379,9 @@ delalloc_convert(
unsigned long p_offset = 0, end_index; unsigned long p_offset = 0, end_index;
loff_t offset, end_offset; loff_t offset, end_offset;
int len, err, i, cnt = 0, uptodate = 1; int len, err, i, cnt = 0, uptodate = 1;
int flags = startio ? 0 : PBF_TRYLOCK;
int page_dirty = 1;
/* Are we off the end of the file ? */ /* Are we off the end of the file ? */
end_index = inode->i_size >> PAGE_CACHE_SHIFT; end_index = inode->i_size >> PAGE_CACHE_SHIFT;
...@@ -390,9 +397,6 @@ delalloc_convert( ...@@ -390,9 +397,6 @@ delalloc_convert(
if (end_offset > inode->i_size) if (end_offset > inode->i_size)
end_offset = inode->i_size; end_offset = inode->i_size;
if (startio && !page_has_buffers(page))
create_empty_buffers(page, 1 << inode->i_blkbits, 0);
bh = head = page_buffers(page); bh = head = page_buffers(page);
mp = NULL; mp = NULL;
...@@ -406,10 +410,14 @@ delalloc_convert( ...@@ -406,10 +410,14 @@ delalloc_convert(
mp = match_offset_to_mapping(page, &map, p_offset); mp = match_offset_to_mapping(page, &map, p_offset);
} }
/*
* First case, allocate space for delalloc buffer head
* we can return EAGAIN here in the release page case.
*/
if (buffer_delay(bh)) { if (buffer_delay(bh)) {
if (!mp) { if (!mp) {
err = map_blocks(inode, offset, len, &map, err = map_blocks(inode, offset, len, &map,
PBF_FILE_ALLOCATE); PBF_FILE_ALLOCATE | flags);
if (err) { if (err) {
goto error; goto error;
} }
...@@ -422,14 +430,17 @@ delalloc_convert( ...@@ -422,14 +430,17 @@ delalloc_convert(
if (startio) { if (startio) {
bh_arr[cnt++] = bh; bh_arr[cnt++] = bh;
} else { } else {
set_buffer_dirty(bh);
unlock_buffer(bh); unlock_buffer(bh);
} }
page_dirty = 0;
} }
} else if ((buffer_uptodate(bh) || PageUptodate(page)) && } else if ((buffer_uptodate(bh) || PageUptodate(page)) &&
(allocate_space || startio)) { (unmapped || startio)) {
if (!buffer_mapped(bh)) { if (!buffer_mapped(bh)) {
int size; int size;
/* /*
* Getting here implies an unmapped buffer * Getting here implies an unmapped buffer
* was found, and we are in a path where we * was found, and we are in a path where we
...@@ -454,13 +465,16 @@ delalloc_convert( ...@@ -454,13 +465,16 @@ delalloc_convert(
if (startio) { if (startio) {
bh_arr[cnt++] = bh; bh_arr[cnt++] = bh;
} else { } else {
set_buffer_dirty(bh);
unlock_buffer(bh); unlock_buffer(bh);
} }
page_dirty = 0;
} }
} else if (startio && buffer_mapped(bh)) { } else if (startio) {
if (buffer_uptodate(bh) && allocate_space) { if (buffer_uptodate(bh)) {
lock_buffer(bh); lock_buffer(bh);
bh_arr[cnt++] = bh; bh_arr[cnt++] = bh;
page_dirty = 0;
} }
} }
} }
...@@ -482,10 +496,10 @@ delalloc_convert( ...@@ -482,10 +496,10 @@ delalloc_convert(
if (mp) { if (mp) {
cluster_write(inode, page->index + 1, mp, cluster_write(inode, page->index + 1, mp,
startio, allocate_space); startio, unmapped);
} }
return 0; return page_dirty;
error: error:
for (i = 0; i < cnt; i++) { for (i = 0; i < cnt; i++) {
...@@ -494,12 +508,15 @@ delalloc_convert( ...@@ -494,12 +508,15 @@ delalloc_convert(
/* /*
* If it's delalloc and we have nowhere to put it, * If it's delalloc and we have nowhere to put it,
* throw it away. * throw it away, unless the lower layers told
* us to try again.
*/ */
if (!allocate_space) { if (err != -EAGAIN) {
block_invalidatepage(page, 0); if (!unmapped) {
block_invalidatepage(page, 0);
}
ClearPageUptodate(page);
} }
ClearPageUptodate(page);
return err; return err;
} }
...@@ -679,109 +696,172 @@ linvfs_readpages( ...@@ -679,109 +696,172 @@ linvfs_readpages(
} }
STATIC int STATIC void
count_page_state( count_page_state(
struct page *page, struct page *page,
int *nr_delalloc, int *delalloc,
int *nr_unmapped) int *unmapped)
{ {
*nr_delalloc = *nr_unmapped = 0; struct buffer_head *bh, *head;
if (page_has_buffers(page)) {
struct buffer_head *bh, *head;
bh = head = page_buffers(page); *delalloc = *unmapped = 0;
do {
if (buffer_uptodate(bh) && !buffer_mapped(bh))
(*nr_unmapped)++;
else if (buffer_delay(bh))
(*nr_delalloc)++;
} while ((bh = bh->b_this_page) != head);
return 1; bh = head = page_buffers(page);
} do {
if (buffer_uptodate(bh) && !buffer_mapped(bh))
return 0; (*unmapped) = 1;
else if (buffer_delay(bh))
(*delalloc) = 1;
} while ((bh = bh->b_this_page) != head);
} }
/*
* writepage: Called from one of two places:
*
* 1. we are flushing a delalloc buffer head.
*
* 2. we are writing out a dirty page. Typically the page dirty
* state is cleared before we get here. In this case is it
* conceivable we have no buffer heads.
*
* For delalloc space on the page we need to allocate space and
* flush it. For unmapped buffer heads on the page we should
* allocate space if the page is uptodate. For any other dirty
* buffer heads on the page we should flush them.
*
* If we detect that a transaction would be required to flush
* the page, we have to check the process flags first, if we
* are already in a transaction or disk I/O during allocations
* is off, we need to fail the writepage and redirty the page.
* We also need to set PF_NOIO ourselves.
*/
STATIC int STATIC int
linvfs_writepage( linvfs_writepage(
struct page *page, struct page *page,
struct writeback_control *wbc) struct writeback_control *wbc)
{ {
int error; int error;
int need_trans = 1; int need_trans;
int nr_delalloc, nr_unmapped; int delalloc, unmapped;
struct inode *inode = page->mapping->host;
if (count_page_state(page, &nr_delalloc, &nr_unmapped)) /*
need_trans = nr_delalloc + nr_unmapped; * We need a transaction if:
* 1. There are delalloc buffers on the page
* 2. The page is upto date and we have unmapped buffers
* 3. The page is upto date and we have no buffers
*/
if (!page_has_buffers(page)) {
unmapped = 1;
need_trans = 1;
} else {
count_page_state(page, &delalloc, &unmapped);
if (!PageUptodate(page))
unmapped = 0;
need_trans = delalloc + unmapped;
}
/*
* If we need a transaction and the process flags say
* we are already in a transaction, or no IO is allowed
* then mark the page dirty again and leave the page
* as is.
*/
if ((current->flags & (PF_FSTRANS)) && need_trans) if ((current->flags & (PF_FSTRANS)) && need_trans)
goto out_fail; goto out_fail;
/*
* Delay hooking up buffer heads until we have
* made our go/no-go decision.
*/
if (!page_has_buffers(page)) {
create_empty_buffers(page, 1 << inode->i_blkbits, 0);
}
/* /*
* Convert delalloc or unmapped space to real space and flush out * Convert delalloc or unmapped space to real space and flush out
* to disk. * to disk.
*/ */
error = delalloc_convert(page, 1, nr_delalloc == 0); error = delalloc_convert(page, 1, unmapped);
if (unlikely(error)) if (error == -EAGAIN)
unlock_page(page); goto out_fail;
return error; if (unlikely(error < 0))
goto out_unlock;
return 0;
out_fail: out_fail:
set_page_dirty(page); set_page_dirty(page);
unlock_page(page); unlock_page(page);
return 0; return 0;
} out_unlock:
unlock_page(page);
STATIC int return error;
linvfs_prepare_write(
struct file *file,
struct page *page,
unsigned int from,
unsigned int to)
{
if (file && (file->f_flags & O_SYNC)) {
return block_prepare_write(page, from, to,
linvfs_get_block_sync);
} else {
return block_prepare_write(page, from, to,
linvfs_get_block);
}
} }
/* /*
* This gets a page into cleanable state - page locked on entry * Called to move a page into cleanable state - and from there
* kept locked on exit. If the page is marked dirty we should * to be released. Possibly the page is already clean. We always
* not come this way. * have buffer heads in this call.
*
* Returns 0 if the page is ok to release, 1 otherwise.
*
* Possible scenarios are:
*
* 1. We are being called to release a page which has been written
* to via regular I/O. buffer heads will be dirty and possibly
* delalloc. If no delalloc buffer heads in this case then we
* can just return zero.
*
* 2. We are called to release a page which has been written via
* mmap, all we need to do is ensure there is no delalloc
* state in the buffer heads, if not we can let the caller
* free them and we should come back later via writepage.
*/ */
STATIC int STATIC int
linvfs_release_page( linvfs_release_page(
struct page *page, struct page *page,
int gfp_mask) int gfp_mask)
{ {
int nr_delalloc, nr_unmapped; int delalloc, unmapped;
if (count_page_state(page, &nr_delalloc, &nr_unmapped)) { count_page_state(page, &delalloc, &unmapped);
if (!nr_delalloc) if (!delalloc)
goto free_buffers; goto free_buffers;
}
if (gfp_mask & __GFP_FS) { if (!(gfp_mask & __GFP_FS))
/* return 0;
* Convert delalloc space to real space, do not flush the
* data out to disk, that will be done by the caller.
*/
if (delalloc_convert(page, 0, 0) == 0)
goto free_buffers;
}
/*
* Convert delalloc space to real space, do not flush the
* data out to disk, that will be done by the caller.
* Never need to allocate space here - we will always
* come back to writepage in that case.
*/
if (delalloc_convert(page, 0, 0) == 0)
goto free_buffers;
return 0; return 0;
free_buffers: free_buffers:
return try_to_free_buffers(page); return try_to_free_buffers(page);
} }
STATIC int
linvfs_prepare_write(
struct file *file,
struct page *page,
unsigned int from,
unsigned int to)
{
if (file && (file->f_flags & O_SYNC)) {
return block_prepare_write(page, from, to,
linvfs_get_block_sync);
} else {
return block_prepare_write(page, from, to,
linvfs_get_block);
}
}
struct address_space_operations linvfs_aops = { struct address_space_operations linvfs_aops = {
.readpage = linvfs_readpage, .readpage = linvfs_readpage,
......
...@@ -120,7 +120,13 @@ xfs_iomap( ...@@ -120,7 +120,13 @@ xfs_iomap(
case PBF_FILE_ALLOCATE: case PBF_FILE_ALLOCATE:
lockmode = XFS_ILOCK_SHARED|XFS_EXTSIZE_RD; lockmode = XFS_ILOCK_SHARED|XFS_EXTSIZE_RD;
bmap_flags = XFS_BMAPI_ENTIRE; bmap_flags = XFS_BMAPI_ENTIRE;
XFS_ILOCK(mp, io, lockmode); /* Attempt non-blocking lock */
if (flags & PBF_TRYLOCK) {
if (!XFS_ILOCK_NOWAIT(mp, io, lockmode))
return XFS_ERROR(EAGAIN);
} else {
XFS_ILOCK(mp, io, lockmode);
}
break; break;
case PBF_FILE_UNWRITTEN: case PBF_FILE_UNWRITTEN:
lockmode = XFS_ILOCK_EXCL|XFS_EXTSIZE_WR; lockmode = XFS_ILOCK_EXCL|XFS_EXTSIZE_WR;
......
...@@ -1009,13 +1009,13 @@ init_xfs_fs( void ) ...@@ -1009,13 +1009,13 @@ init_xfs_fs( void )
if (error < 0) if (error < 0)
return error; return error;
si_meminfo(&si);
xfs_physmem = si.totalram;
error = pagebuf_init(); error = pagebuf_init();
if (error < 0) if (error < 0)
goto out; goto out;
si_meminfo(&si);
xfs_physmem = si.totalram;
vn_init(); vn_init();
xfs_init(); xfs_init();
dmapi_init(); dmapi_init();
......
...@@ -119,9 +119,9 @@ pb_trace_func( ...@@ -119,9 +119,9 @@ pb_trace_func(
STATIC kmem_cache_t *pagebuf_cache; STATIC kmem_cache_t *pagebuf_cache;
STATIC void pagebuf_daemon_wakeup(int); STATIC void pagebuf_daemon_wakeup(int);
STATIC void pagebuf_delwri_queue(page_buf_t *, int);
STATIC struct workqueue_struct *pagebuf_workqueue; STATIC struct workqueue_struct *pagebuf_workqueue;
/* /*
* Pagebuf module configuration parameters, exported via * Pagebuf module configuration parameters, exported via
* /proc/sys/vm/pagebuf * /proc/sys/vm/pagebuf
...@@ -155,35 +155,37 @@ struct pbstats pbstats; ...@@ -155,35 +155,37 @@ struct pbstats pbstats;
* Pagebuf hashing * Pagebuf hashing
*/ */
#define NBITS 5 /* This structure must be a power of 2 long for the hash to work */
#define NHASH (1<<NBITS)
typedef struct { typedef struct {
struct list_head pb_hash; struct list_head pb_hash;
int pb_count; int pb_count;
spinlock_t pb_hash_lock; spinlock_t pb_hash_lock;
} pb_hash_t; } pb_hash_t;
STATIC pb_hash_t pbhash[NHASH]; static pb_hash_t *pbhash;
static unsigned int pb_hash_mask;
static unsigned int pb_hash_shift;
static unsigned int pb_order;
#define pb_hash(pb) &pbhash[pb->pb_hash_index] #define pb_hash(pb) &pbhash[pb->pb_hash_index]
STATIC int /*
* This hash is the same one as used on the Linux buffer cache,
* see fs/buffer.c
*/
#define _hashfn(dev,block) \
((((dev)<<(pb_hash_shift - 6)) ^ ((dev)<<(pb_hash_shift - 9))) ^ \
(((block)<<(pb_hash_shift - 6)) ^ ((block) >> 13) ^ \
((block) << (pb_hash_shift - 12))))
static inline int
_bhash( _bhash(
dev_t dev, dev_t dev,
loff_t base) loff_t base)
{ {
int bit, hval;
base >>= 9; base >>= 9;
/*
* dev_t is 16 bits, loff_t is always 64 bits return (_hashfn(dev, base) & pb_hash_mask);
*/
base ^= dev;
for (bit = hval = 0; base != 0 && bit < sizeof(base) * 8; bit += NBITS) {
hval ^= (int)base & (NHASH-1);
base >>= NBITS;
}
return hval;
} }
/* /*
...@@ -1516,7 +1518,7 @@ STATIC int pbd_active = 1; ...@@ -1516,7 +1518,7 @@ STATIC int pbd_active = 1;
STATIC LIST_HEAD(pbd_delwrite_queue); STATIC LIST_HEAD(pbd_delwrite_queue);
STATIC spinlock_t pbd_delwrite_lock = SPIN_LOCK_UNLOCKED; STATIC spinlock_t pbd_delwrite_lock = SPIN_LOCK_UNLOCKED;
void STATIC void
pagebuf_delwri_queue( pagebuf_delwri_queue(
page_buf_t *pb, page_buf_t *pb,
int unlock) int unlock)
...@@ -1862,7 +1864,39 @@ pagebuf_shaker(void) ...@@ -1862,7 +1864,39 @@ pagebuf_shaker(void)
int __init int __init
pagebuf_init(void) pagebuf_init(void)
{ {
int i; int order, mempages, i;
unsigned int nr_hash;
extern int xfs_physmem;
mempages = xfs_physmem >>= 16;
mempages *= sizeof(pb_hash_t);
for (order = 0; (1 << order) < mempages; order++)
;
if (order > 3) order = 3; /* cap us at 2K buckets */
do {
unsigned long tmp;
nr_hash = (PAGE_SIZE << order) / sizeof(pb_hash_t);
nr_hash = 1 << (ffs(nr_hash) - 1);
pb_hash_mask = (nr_hash - 1);
tmp = nr_hash;
pb_hash_shift = 0;
while((tmp >>= 1UL) != 0UL)
pb_hash_shift++;
pbhash = (pb_hash_t *)
__get_free_pages(GFP_KERNEL, order);
pb_order = order;
} while (pbhash == NULL && --order > 0);
printk("pagebuf cache hash table entries: %d (order: %d, %ld bytes)\n",
nr_hash, order, (PAGE_SIZE << order));
for(i = 0; i < nr_hash; i++) {
spin_lock_init(&pbhash[i].pb_hash_lock);
INIT_LIST_HEAD(&pbhash[i].pb_hash);
}
pagebuf_table_header = register_sysctl_table(pagebuf_root_table, 1); pagebuf_table_header = register_sysctl_table(pagebuf_root_table, 1);
...@@ -1880,11 +1914,6 @@ pagebuf_init(void) ...@@ -1880,11 +1914,6 @@ pagebuf_init(void)
return -ENOMEM; return -ENOMEM;
} }
for (i = 0; i < NHASH; i++) {
spin_lock_init(&pbhash[i].pb_hash_lock);
INIT_LIST_HEAD(&pbhash[i].pb_hash);
}
#ifdef PAGEBUF_TRACE #ifdef PAGEBUF_TRACE
pb_trace.buf = (pagebuf_trace_t *)kmalloc( pb_trace.buf = (pagebuf_trace_t *)kmalloc(
PB_TRACE_BUFSIZE * sizeof(pagebuf_trace_t), GFP_KERNEL); PB_TRACE_BUFSIZE * sizeof(pagebuf_trace_t), GFP_KERNEL);
...@@ -1911,6 +1940,7 @@ pagebuf_terminate(void) ...@@ -1911,6 +1940,7 @@ pagebuf_terminate(void)
kmem_cache_destroy(pagebuf_cache); kmem_cache_destroy(pagebuf_cache);
kmem_shake_deregister(pagebuf_shaker); kmem_shake_deregister(pagebuf_shaker);
free_pages((unsigned long)pbhash, pb_order);
unregister_sysctl_table(pagebuf_table_header); unregister_sysctl_table(pagebuf_table_header);
#ifdef CONFIG_PROC_FS #ifdef CONFIG_PROC_FS
......
...@@ -215,8 +215,8 @@ typedef struct page_buf_s { ...@@ -215,8 +215,8 @@ typedef struct page_buf_s {
unsigned short pb_error; /* error code on I/O */ unsigned short pb_error; /* error code on I/O */
unsigned short pb_page_count; /* size of page array */ unsigned short pb_page_count; /* size of page array */
unsigned short pb_offset; /* page offset in first page */ unsigned short pb_offset; /* page offset in first page */
unsigned short pb_hash_index; /* hash table index */
unsigned char pb_locked; /* page array is locked */ unsigned char pb_locked; /* page array is locked */
unsigned char pb_hash_index; /* hash table index */
struct page **pb_pages; /* array of page pointers */ struct page **pb_pages; /* array of page pointers */
struct page *pb_page_array[PB_PAGES]; /* inline pages */ struct page *pb_page_array[PB_PAGES]; /* inline pages */
#ifdef PAGEBUF_LOCK_TRACKING #ifdef PAGEBUF_LOCK_TRACKING
...@@ -350,7 +350,6 @@ extern int pagebuf_ispin( page_buf_t *); /* check if pagebuf is pinned */ ...@@ -350,7 +350,6 @@ extern int pagebuf_ispin( page_buf_t *); /* check if pagebuf is pinned */
/* Reading and writing pages */ /* Reading and writing pages */
extern void pagebuf_delwri_queue(page_buf_t *, int);
extern void pagebuf_delwri_dequeue(page_buf_t *); extern void pagebuf_delwri_dequeue(page_buf_t *);
#define PBDF_WAIT 0x01 #define PBDF_WAIT 0x01
......
...@@ -86,7 +86,7 @@ cmn_err(register int level, char *fmt, ...) ...@@ -86,7 +86,7 @@ cmn_err(register int level, char *fmt, ...)
{ {
char *fp = fmt; char *fp = fmt;
int len; int len;
int flags; unsigned long flags;
va_list ap; va_list ap;
level &= XFS_ERR_MASK; level &= XFS_ERR_MASK;
......
...@@ -46,7 +46,6 @@ ...@@ -46,7 +46,6 @@
typedef spinlock_t lock_t; typedef spinlock_t lock_t;
#define spinlock_init(lock, name) spin_lock_init(lock) #define spinlock_init(lock, name) spin_lock_init(lock)
#define init_spinlock(lock, name, ll) spin_lock_init(lock)
#define spinlock_destroy(lock) #define spinlock_destroy(lock)
static inline unsigned long mutex_spinlock(lock_t *lock) static inline unsigned long mutex_spinlock(lock_t *lock)
......
...@@ -648,7 +648,7 @@ xfs_dir_leaf_to_shortform(xfs_da_args_t *iargs) ...@@ -648,7 +648,7 @@ xfs_dir_leaf_to_shortform(xfs_da_args_t *iargs)
retval = xfs_da_read_buf(iargs->trans, iargs->dp, 0, -1, &bp, retval = xfs_da_read_buf(iargs->trans, iargs->dp, 0, -1, &bp,
XFS_DATA_FORK); XFS_DATA_FORK);
if (retval) if (retval)
return(retval); goto out;
ASSERT(bp != NULL); ASSERT(bp != NULL);
memcpy(tmpbuffer, bp->data, XFS_LBSIZE(dp->i_mount)); memcpy(tmpbuffer, bp->data, XFS_LBSIZE(dp->i_mount));
leaf = (xfs_dir_leafblock_t *)tmpbuffer; leaf = (xfs_dir_leafblock_t *)tmpbuffer;
......
...@@ -33,7 +33,7 @@ ...@@ -33,7 +33,7 @@
#define __XFS_ERROR_H__ #define __XFS_ERROR_H__
#define prdev(fmt,dev,args...) \ #define prdev(fmt,dev,args...) \
printk("XFS: device 0x%x- " fmt "\n", dev, ## args) printk("XFS: device 0x%x- " fmt "\n", (unsigned)dev, ## args)
#define XFS_ERECOVER 1 /* Failure to recover log */ #define XFS_ERECOVER 1 /* Failure to recover log */
#define XFS_ELOGSTAT 2 /* Failure to stat log in user space */ #define XFS_ELOGSTAT 2 /* Failure to stat log in user space */
......
...@@ -378,17 +378,26 @@ xfs_log_notify(xfs_mount_t *mp, /* mount of partition */ ...@@ -378,17 +378,26 @@ xfs_log_notify(xfs_mount_t *mp, /* mount of partition */
iclog->ic_callback_tail = &(cb->cb_next); iclog->ic_callback_tail = &(cb->cb_next);
} }
LOG_UNLOCK(log, spl); LOG_UNLOCK(log, spl);
if (!abortflg) { if (abortflg) {
if (xlog_state_release_iclog(log, iclog)) {
xfs_force_shutdown(mp, XFS_LOG_IO_ERROR);
return EIO;
}
} else {
cb->cb_func(cb->cb_arg, abortflg); cb->cb_func(cb->cb_arg, abortflg);
} }
return 0; return 0;
} /* xfs_log_notify */ } /* xfs_log_notify */
int
xfs_log_release_iclog(xfs_mount_t *mp,
void *iclog_hndl)
{
xlog_t *log = mp->m_log;
xlog_in_core_t *iclog = (xlog_in_core_t *)iclog_hndl;
if (xlog_state_release_iclog(log, iclog)) {
xfs_force_shutdown(mp, XFS_LOG_IO_ERROR);
return(EIO);
}
return 0;
}
/* /*
* Initialize log manager data. This routine is intended to be called when * Initialize log manager data. This routine is intended to be called when
......
...@@ -164,6 +164,8 @@ void xfs_log_move_tail(struct xfs_mount *mp, ...@@ -164,6 +164,8 @@ void xfs_log_move_tail(struct xfs_mount *mp,
int xfs_log_notify(struct xfs_mount *mp, int xfs_log_notify(struct xfs_mount *mp,
void *iclog, void *iclog,
xfs_log_callback_t *callback_entry); xfs_log_callback_t *callback_entry);
int xfs_log_release_iclog(struct xfs_mount *mp,
void *iclog_hndl);
int xfs_log_reserve(struct xfs_mount *mp, int xfs_log_reserve(struct xfs_mount *mp,
int length, int length,
int count, int count,
......
...@@ -1287,10 +1287,6 @@ xlog_recover_add_to_trans(xlog_recover_t *trans, ...@@ -1287,10 +1287,6 @@ xlog_recover_add_to_trans(xlog_recover_t *trans,
if (!len) if (!len)
return 0; return 0;
ptr = kmem_zalloc(len, 0);
memcpy(ptr, dp, len);
in_f = (xfs_inode_log_format_t *)ptr;
item = trans->r_itemq; item = trans->r_itemq;
if (item == 0) { if (item == 0) {
ASSERT(*(uint *)dp == XFS_TRANS_HEADER_MAGIC); ASSERT(*(uint *)dp == XFS_TRANS_HEADER_MAGIC);
...@@ -1299,6 +1295,11 @@ xlog_recover_add_to_trans(xlog_recover_t *trans, ...@@ -1299,6 +1295,11 @@ xlog_recover_add_to_trans(xlog_recover_t *trans,
memcpy(&trans->r_theader, dp, len); /* d, s, l */ memcpy(&trans->r_theader, dp, len); /* d, s, l */
return 0; return 0;
} }
ptr = kmem_alloc(len, 0);
memcpy(ptr, dp, len);
in_f = (xfs_inode_log_format_t *)ptr;
if (item->ri_prev->ri_total != 0 && if (item->ri_prev->ri_total != 0 &&
item->ri_prev->ri_total == item->ri_prev->ri_cnt) { item->ri_prev->ri_total == item->ri_prev->ri_cnt) {
xlog_recover_add_item(&trans->r_itemq); xlog_recover_add_item(&trans->r_itemq);
......
...@@ -272,7 +272,7 @@ xfs_mount_validate_sb( ...@@ -272,7 +272,7 @@ xfs_mount_validate_sb(
cmn_err(CE_WARN, cmn_err(CE_WARN,
"XFS: Only page-sized (%d) or less blocksizes currently work.", "XFS: Only page-sized (%d) or less blocksizes currently work.",
PAGE_SIZE); PAGE_SIZE);
return XFS_ERROR(EWRONGFS); return XFS_ERROR(ENOSYS);
} }
return 0; return 0;
...@@ -459,10 +459,22 @@ xfs_readsb(xfs_mount_t *mp) ...@@ -459,10 +459,22 @@ xfs_readsb(xfs_mount_t *mp)
} }
/* /*
* Re-read the superblock so that our buffer is correctly sized. * We must be able to do sector-sized and sector-aligned IO.
* We only need to do this if sector size on-disk is different. */
if (sector_size > mp->m_sb.sb_sectsize) {
cmn_err(CE_WARN,
"XFS: device supports only %u byte sectors (not %u)",
sector_size, mp->m_sb.sb_sectsize);
XFS_BUF_UNMANAGE(bp);
xfs_buf_relse(bp);
return XFS_ERROR(ENOSYS);
}
/*
* If device sector size is smaller than the superblock size,
* re-read the superblock so the buffer is correctly sized.
*/ */
if (sector_size != mp->m_sb.sb_sectsize) { if (sector_size < mp->m_sb.sb_sectsize) {
XFS_BUF_UNMANAGE(bp); XFS_BUF_UNMANAGE(bp);
xfs_buf_relse(bp); xfs_buf_relse(bp);
sector_size = mp->m_sb.sb_sectsize; sector_size = mp->m_sb.sb_sectsize;
......
...@@ -808,19 +808,6 @@ xfs_trans_commit( ...@@ -808,19 +808,6 @@ xfs_trans_commit(
return XFS_ERROR(EIO); return XFS_ERROR(EIO);
} }
/*
* Once all the items of the transaction have been copied
* to the in core log we can release them. Do that here.
* This will free descriptors pointing to items which were
* not logged since there is nothing more to do with them.
* For items which were logged, we will keep pointers to them
* so they can be unpinned after the transaction commits to disk.
* This will also stamp each modified meta-data item with
* the commit lsn of this transaction for dependency tracking
* purposes.
*/
xfs_trans_unlock_items(tp, commit_lsn);
/* /*
* Once the transaction has committed, unused * Once the transaction has committed, unused
* reservations need to be released and changes to * reservations need to be released and changes to
...@@ -856,12 +843,36 @@ xfs_trans_commit( ...@@ -856,12 +843,36 @@ xfs_trans_commit(
tp->t_logcb.cb_arg = tp; tp->t_logcb.cb_arg = tp;
/* We need to pass the iclog buffer which was used for the /* We need to pass the iclog buffer which was used for the
* transaction commit record into this function, attach * transaction commit record into this function, and attach
* the callback to it, and then release it. This will guarantee * the callback to it. The callback must be attached before
* that we do callbacks on the transaction in the correct order. * the items are unlocked to avoid racing with other threads
* waiting for an item to unlock.
*/ */
error = xfs_log_notify(mp, commit_iclog, &(tp->t_logcb)); error = xfs_log_notify(mp, commit_iclog, &(tp->t_logcb));
#endif #endif
/*
* Once all the items of the transaction have been copied
* to the in core log and the callback is attached, the
* items can be unlocked.
*
* This will free descriptors pointing to items which were
* not logged since there is nothing more to do with them.
* For items which were logged, we will keep pointers to them
* so they can be unpinned after the transaction commits to disk.
* This will also stamp each modified meta-data item with
* the commit lsn of this transaction for dependency tracking
* purposes.
*/
xfs_trans_unlock_items(tp, commit_lsn);
/*
* Now that the xfs_trans_committed callback has been attached,
* and the items are released we can finally allow the iclog to
* go to disk.
*/
error = xfs_log_release_iclog(mp, commit_iclog);
/* /*
* If the transaction needs to be synchronous, then force the * If the transaction needs to be synchronous, then force the
* log out now and wait for it. * log out now and wait for it.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment