Commit a77febbe authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://oss.sgi.com/xfs/xfs

* 'for-linus' of git://oss.sgi.com/xfs/xfs:
  xfs: obey minleft values during extent allocation correctly
  xfs: reset buffer pointers before freeing them
  xfs: avoid getting stuck during async inode flushes
  xfs: fix xfs_itruncate_start tracing
  xfs: fix duplicate workqueue initialisation
  xfs: kill off xfs_printk()
  xfs: fix race condition in AIL push trigger
  xfs: make AIL target updates and compares 32bit safe.
  xfs: always push the AIL to the target
  xfs: exit AIL push work correctly when AIL is empty
  xfs: ensure reclaim cursor is reset correctly at end of AG
  xfs: add an x86 compat handler for XFS_IOC_ZERO_RANGE
  xfs: fix compiler warning in xfs_trace.h
  xfs: cleanup duplicate initializations
  xfs: reduce the number of pagb_lock roundtrips in xfs_alloc_clear_busy
  xfs: exact busy extent tracking
  xfs: do not immediately reuse busy extent ranges
  xfs: optimize AGFL refills
parents 42cd71bf bf59170a
...@@ -33,7 +33,6 @@ ...@@ -33,7 +33,6 @@
#include <linux/migrate.h> #include <linux/migrate.h>
#include <linux/backing-dev.h> #include <linux/backing-dev.h>
#include <linux/freezer.h> #include <linux/freezer.h>
#include <linux/list_sort.h>
#include "xfs_sb.h" #include "xfs_sb.h"
#include "xfs_inum.h" #include "xfs_inum.h"
...@@ -709,6 +708,27 @@ xfs_buf_get_empty( ...@@ -709,6 +708,27 @@ xfs_buf_get_empty(
return bp; return bp;
} }
/*
* Return a buffer allocated as an empty buffer and associated to external
* memory via xfs_buf_associate_memory() back to it's empty state.
*/
void
xfs_buf_set_empty(
struct xfs_buf *bp,
size_t len)
{
if (bp->b_pages)
_xfs_buf_free_pages(bp);
bp->b_pages = NULL;
bp->b_page_count = 0;
bp->b_addr = NULL;
bp->b_file_offset = 0;
bp->b_buffer_length = bp->b_count_desired = len;
bp->b_bn = XFS_BUF_DADDR_NULL;
bp->b_flags &= ~XBF_MAPPED;
}
static inline struct page * static inline struct page *
mem_to_page( mem_to_page(
void *addr) void *addr)
......
...@@ -178,6 +178,7 @@ extern xfs_buf_t *xfs_buf_read(xfs_buftarg_t *, xfs_off_t, size_t, ...@@ -178,6 +178,7 @@ extern xfs_buf_t *xfs_buf_read(xfs_buftarg_t *, xfs_off_t, size_t,
xfs_buf_flags_t); xfs_buf_flags_t);
extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *); extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *);
extern void xfs_buf_set_empty(struct xfs_buf *bp, size_t len);
extern xfs_buf_t *xfs_buf_get_uncached(struct xfs_buftarg *, size_t, int); extern xfs_buf_t *xfs_buf_get_uncached(struct xfs_buftarg *, size_t, int);
extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t); extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t);
extern void xfs_buf_hold(xfs_buf_t *); extern void xfs_buf_hold(xfs_buf_t *);
......
...@@ -586,7 +586,8 @@ xfs_file_compat_ioctl( ...@@ -586,7 +586,8 @@ xfs_file_compat_ioctl(
case XFS_IOC_RESVSP_32: case XFS_IOC_RESVSP_32:
case XFS_IOC_UNRESVSP_32: case XFS_IOC_UNRESVSP_32:
case XFS_IOC_RESVSP64_32: case XFS_IOC_RESVSP64_32:
case XFS_IOC_UNRESVSP64_32: { case XFS_IOC_UNRESVSP64_32:
case XFS_IOC_ZERO_RANGE_32: {
struct xfs_flock64 bf; struct xfs_flock64 bf;
if (xfs_compat_flock64_copyin(&bf, arg)) if (xfs_compat_flock64_copyin(&bf, arg))
......
...@@ -184,6 +184,7 @@ typedef struct compat_xfs_flock64 { ...@@ -184,6 +184,7 @@ typedef struct compat_xfs_flock64 {
#define XFS_IOC_UNRESVSP_32 _IOW('X', 41, struct compat_xfs_flock64) #define XFS_IOC_UNRESVSP_32 _IOW('X', 41, struct compat_xfs_flock64)
#define XFS_IOC_RESVSP64_32 _IOW('X', 42, struct compat_xfs_flock64) #define XFS_IOC_RESVSP64_32 _IOW('X', 42, struct compat_xfs_flock64)
#define XFS_IOC_UNRESVSP64_32 _IOW('X', 43, struct compat_xfs_flock64) #define XFS_IOC_UNRESVSP64_32 _IOW('X', 43, struct compat_xfs_flock64)
#define XFS_IOC_ZERO_RANGE_32 _IOW('X', 57, struct compat_xfs_flock64)
typedef struct compat_xfs_fsop_geom_v1 { typedef struct compat_xfs_fsop_geom_v1 {
__u32 blocksize; /* filesystem (data) block size */ __u32 blocksize; /* filesystem (data) block size */
......
...@@ -70,6 +70,7 @@ ...@@ -70,6 +70,7 @@
#include <linux/ctype.h> #include <linux/ctype.h>
#include <linux/writeback.h> #include <linux/writeback.h>
#include <linux/capability.h> #include <linux/capability.h>
#include <linux/list_sort.h>
#include <asm/page.h> #include <asm/page.h>
#include <asm/div64.h> #include <asm/div64.h>
......
...@@ -41,23 +41,6 @@ __xfs_printk( ...@@ -41,23 +41,6 @@ __xfs_printk(
printk("%sXFS: %pV\n", level, vaf); printk("%sXFS: %pV\n", level, vaf);
} }
void xfs_printk(
const char *level,
const struct xfs_mount *mp,
const char *fmt, ...)
{
struct va_format vaf;
va_list args;
va_start(args, fmt);
vaf.fmt = fmt;
vaf.va = &args;
__xfs_printk(level, mp, &vaf);
va_end(args);
}
#define define_xfs_printk_level(func, kern_level) \ #define define_xfs_printk_level(func, kern_level) \
void func(const struct xfs_mount *mp, const char *fmt, ...) \ void func(const struct xfs_mount *mp, const char *fmt, ...) \
{ \ { \
...@@ -95,8 +78,7 @@ xfs_alert_tag( ...@@ -95,8 +78,7 @@ xfs_alert_tag(
int do_panic = 0; int do_panic = 0;
if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) { if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) {
xfs_printk(KERN_ALERT, mp, xfs_alert(mp, "Transforming an alert into a BUG.");
"XFS: Transforming an alert into a BUG.");
do_panic = 1; do_panic = 1;
} }
......
...@@ -3,9 +3,6 @@ ...@@ -3,9 +3,6 @@
struct xfs_mount; struct xfs_mount;
extern void xfs_printk(const char *level, const struct xfs_mount *mp,
const char *fmt, ...)
__attribute__ ((format (printf, 3, 4)));
extern void xfs_emerg(const struct xfs_mount *mp, const char *fmt, ...) extern void xfs_emerg(const struct xfs_mount *mp, const char *fmt, ...)
__attribute__ ((format (printf, 2, 3))); __attribute__ ((format (printf, 2, 3)));
extern void xfs_alert(const struct xfs_mount *mp, const char *fmt, ...) extern void xfs_alert(const struct xfs_mount *mp, const char *fmt, ...)
...@@ -28,7 +25,9 @@ extern void xfs_info(const struct xfs_mount *mp, const char *fmt, ...) ...@@ -28,7 +25,9 @@ extern void xfs_info(const struct xfs_mount *mp, const char *fmt, ...)
extern void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...) extern void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...)
__attribute__ ((format (printf, 2, 3))); __attribute__ ((format (printf, 2, 3)));
#else #else
static inline void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...) static inline void
__attribute__ ((format (printf, 2, 3)))
xfs_debug(const struct xfs_mount *mp, const char *fmt, ...)
{ {
} }
#endif #endif
......
...@@ -1787,10 +1787,6 @@ init_xfs_fs(void) ...@@ -1787,10 +1787,6 @@ init_xfs_fs(void)
if (error) if (error)
goto out_cleanup_procfs; goto out_cleanup_procfs;
error = xfs_init_workqueues();
if (error)
goto out_sysctl_unregister;
vfs_initquota(); vfs_initquota();
error = register_filesystem(&xfs_fs_type); error = register_filesystem(&xfs_fs_type);
......
...@@ -267,6 +267,16 @@ xfs_sync_inode_attr( ...@@ -267,6 +267,16 @@ xfs_sync_inode_attr(
error = xfs_iflush(ip, flags); error = xfs_iflush(ip, flags);
/*
* We don't want to try again on non-blocking flushes that can't run
* again immediately. If an inode really must be written, then that's
* what the SYNC_WAIT flag is for.
*/
if (error == EAGAIN) {
ASSERT(!(flags & SYNC_WAIT));
error = 0;
}
out_unlock: out_unlock:
xfs_iunlock(ip, XFS_ILOCK_SHARED); xfs_iunlock(ip, XFS_ILOCK_SHARED);
return error; return error;
......
...@@ -1151,44 +1151,7 @@ TRACE_EVENT(xfs_bunmap, ...@@ -1151,44 +1151,7 @@ TRACE_EVENT(xfs_bunmap,
); );
#define XFS_BUSY_SYNC \ DECLARE_EVENT_CLASS(xfs_busy_class,
{ 0, "async" }, \
{ 1, "sync" }
TRACE_EVENT(xfs_alloc_busy,
TP_PROTO(struct xfs_trans *trans, xfs_agnumber_t agno,
xfs_agblock_t agbno, xfs_extlen_t len, int sync),
TP_ARGS(trans, agno, agbno, len, sync),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(struct xfs_trans *, tp)
__field(int, tid)
__field(xfs_agnumber_t, agno)
__field(xfs_agblock_t, agbno)
__field(xfs_extlen_t, len)
__field(int, sync)
),
TP_fast_assign(
__entry->dev = trans->t_mountp->m_super->s_dev;
__entry->tp = trans;
__entry->tid = trans->t_ticket->t_tid;
__entry->agno = agno;
__entry->agbno = agbno;
__entry->len = len;
__entry->sync = sync;
),
TP_printk("dev %d:%d trans 0x%p tid 0x%x agno %u agbno %u len %u %s",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->tp,
__entry->tid,
__entry->agno,
__entry->agbno,
__entry->len,
__print_symbolic(__entry->sync, XFS_BUSY_SYNC))
);
TRACE_EVENT(xfs_alloc_unbusy,
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
xfs_agblock_t agbno, xfs_extlen_t len), xfs_agblock_t agbno, xfs_extlen_t len),
TP_ARGS(mp, agno, agbno, len), TP_ARGS(mp, agno, agbno, len),
...@@ -1210,35 +1173,45 @@ TRACE_EVENT(xfs_alloc_unbusy, ...@@ -1210,35 +1173,45 @@ TRACE_EVENT(xfs_alloc_unbusy,
__entry->agbno, __entry->agbno,
__entry->len) __entry->len)
); );
#define DEFINE_BUSY_EVENT(name) \
DEFINE_EVENT(xfs_busy_class, name, \
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
xfs_agblock_t agbno, xfs_extlen_t len), \
TP_ARGS(mp, agno, agbno, len))
DEFINE_BUSY_EVENT(xfs_alloc_busy);
DEFINE_BUSY_EVENT(xfs_alloc_busy_enomem);
DEFINE_BUSY_EVENT(xfs_alloc_busy_force);
DEFINE_BUSY_EVENT(xfs_alloc_busy_reuse);
DEFINE_BUSY_EVENT(xfs_alloc_busy_clear);
#define XFS_BUSY_STATES \ TRACE_EVENT(xfs_alloc_busy_trim,
{ 0, "missing" }, \
{ 1, "found" }
TRACE_EVENT(xfs_alloc_busysearch,
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
xfs_agblock_t agbno, xfs_extlen_t len, int found), xfs_agblock_t agbno, xfs_extlen_t len,
TP_ARGS(mp, agno, agbno, len, found), xfs_agblock_t tbno, xfs_extlen_t tlen),
TP_ARGS(mp, agno, agbno, len, tbno, tlen),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(dev_t, dev) __field(dev_t, dev)
__field(xfs_agnumber_t, agno) __field(xfs_agnumber_t, agno)
__field(xfs_agblock_t, agbno) __field(xfs_agblock_t, agbno)
__field(xfs_extlen_t, len) __field(xfs_extlen_t, len)
__field(int, found) __field(xfs_agblock_t, tbno)
__field(xfs_extlen_t, tlen)
), ),
TP_fast_assign( TP_fast_assign(
__entry->dev = mp->m_super->s_dev; __entry->dev = mp->m_super->s_dev;
__entry->agno = agno; __entry->agno = agno;
__entry->agbno = agbno; __entry->agbno = agbno;
__entry->len = len; __entry->len = len;
__entry->found = found; __entry->tbno = tbno;
__entry->tlen = tlen;
), ),
TP_printk("dev %d:%d agno %u agbno %u len %u %s", TP_printk("dev %d:%d agno %u agbno %u len %u tbno %u tlen %u",
MAJOR(__entry->dev), MINOR(__entry->dev), MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->agno, __entry->agno,
__entry->agbno, __entry->agbno,
__entry->len, __entry->len,
__print_symbolic(__entry->found, XFS_BUSY_STATES)) __entry->tbno,
__entry->tlen)
); );
TRACE_EVENT(xfs_trans_commit_lsn, TRACE_EVENT(xfs_trans_commit_lsn,
...@@ -1418,7 +1391,7 @@ DECLARE_EVENT_CLASS(xfs_alloc_class, ...@@ -1418,7 +1391,7 @@ DECLARE_EVENT_CLASS(xfs_alloc_class,
__entry->wasfromfl, __entry->wasfromfl,
__entry->isfl, __entry->isfl,
__entry->userdata, __entry->userdata,
__entry->firstblock) (unsigned long long)__entry->firstblock)
) )
#define DEFINE_ALLOC_EVENT(name) \ #define DEFINE_ALLOC_EVENT(name) \
...@@ -1433,11 +1406,14 @@ DEFINE_ALLOC_EVENT(xfs_alloc_near_first); ...@@ -1433,11 +1406,14 @@ DEFINE_ALLOC_EVENT(xfs_alloc_near_first);
DEFINE_ALLOC_EVENT(xfs_alloc_near_greater); DEFINE_ALLOC_EVENT(xfs_alloc_near_greater);
DEFINE_ALLOC_EVENT(xfs_alloc_near_lesser); DEFINE_ALLOC_EVENT(xfs_alloc_near_lesser);
DEFINE_ALLOC_EVENT(xfs_alloc_near_error); DEFINE_ALLOC_EVENT(xfs_alloc_near_error);
DEFINE_ALLOC_EVENT(xfs_alloc_near_noentry);
DEFINE_ALLOC_EVENT(xfs_alloc_near_busy);
DEFINE_ALLOC_EVENT(xfs_alloc_size_neither); DEFINE_ALLOC_EVENT(xfs_alloc_size_neither);
DEFINE_ALLOC_EVENT(xfs_alloc_size_noentry); DEFINE_ALLOC_EVENT(xfs_alloc_size_noentry);
DEFINE_ALLOC_EVENT(xfs_alloc_size_nominleft); DEFINE_ALLOC_EVENT(xfs_alloc_size_nominleft);
DEFINE_ALLOC_EVENT(xfs_alloc_size_done); DEFINE_ALLOC_EVENT(xfs_alloc_size_done);
DEFINE_ALLOC_EVENT(xfs_alloc_size_error); DEFINE_ALLOC_EVENT(xfs_alloc_size_error);
DEFINE_ALLOC_EVENT(xfs_alloc_size_busy);
DEFINE_ALLOC_EVENT(xfs_alloc_small_freelist); DEFINE_ALLOC_EVENT(xfs_alloc_small_freelist);
DEFINE_ALLOC_EVENT(xfs_alloc_small_notenough); DEFINE_ALLOC_EVENT(xfs_alloc_small_notenough);
DEFINE_ALLOC_EVENT(xfs_alloc_small_done); DEFINE_ALLOC_EVENT(xfs_alloc_small_done);
......
...@@ -187,7 +187,6 @@ struct xfs_busy_extent { ...@@ -187,7 +187,6 @@ struct xfs_busy_extent {
xfs_agnumber_t agno; xfs_agnumber_t agno;
xfs_agblock_t bno; xfs_agblock_t bno;
xfs_extlen_t length; xfs_extlen_t length;
xlog_tid_t tid; /* transaction that created this */
}; };
/* /*
......
...@@ -41,19 +41,13 @@ ...@@ -41,19 +41,13 @@
#define XFSA_FIXUP_BNO_OK 1 #define XFSA_FIXUP_BNO_OK 1
#define XFSA_FIXUP_CNT_OK 2 #define XFSA_FIXUP_CNT_OK 2
/*
* Prototypes for per-ag allocation routines
*/
STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *); STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *);
STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *); STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *);
STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *); STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *, STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *); xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
STATIC void xfs_alloc_busy_trim(struct xfs_alloc_arg *,
/* xfs_agblock_t, xfs_extlen_t, xfs_agblock_t *, xfs_extlen_t *);
* Internal functions.
*/
/* /*
* Lookup the record equal to [bno, len] in the btree given by cur. * Lookup the record equal to [bno, len] in the btree given by cur.
...@@ -154,19 +148,21 @@ xfs_alloc_compute_aligned( ...@@ -154,19 +148,21 @@ xfs_alloc_compute_aligned(
xfs_extlen_t *reslen) /* result length */ xfs_extlen_t *reslen) /* result length */
{ {
xfs_agblock_t bno; xfs_agblock_t bno;
xfs_extlen_t diff;
xfs_extlen_t len; xfs_extlen_t len;
if (args->alignment > 1 && foundlen >= args->minlen) { /* Trim busy sections out of found extent */
bno = roundup(foundbno, args->alignment); xfs_alloc_busy_trim(args, foundbno, foundlen, &bno, &len);
diff = bno - foundbno;
len = diff >= foundlen ? 0 : foundlen - diff; if (args->alignment > 1 && len >= args->minlen) {
xfs_agblock_t aligned_bno = roundup(bno, args->alignment);
xfs_extlen_t diff = aligned_bno - bno;
*resbno = aligned_bno;
*reslen = diff >= len ? 0 : len - diff;
} else { } else {
bno = foundbno; *resbno = bno;
len = foundlen; *reslen = len;
} }
*resbno = bno;
*reslen = len;
} }
/* /*
...@@ -280,7 +276,6 @@ xfs_alloc_fix_minleft( ...@@ -280,7 +276,6 @@ xfs_alloc_fix_minleft(
return 1; return 1;
agf = XFS_BUF_TO_AGF(args->agbp); agf = XFS_BUF_TO_AGF(args->agbp);
diff = be32_to_cpu(agf->agf_freeblks) diff = be32_to_cpu(agf->agf_freeblks)
+ be32_to_cpu(agf->agf_flcount)
- args->len - args->minleft; - args->len - args->minleft;
if (diff >= 0) if (diff >= 0)
return 1; return 1;
...@@ -541,16 +536,8 @@ xfs_alloc_ag_vextent( ...@@ -541,16 +536,8 @@ xfs_alloc_ag_vextent(
if (error) if (error)
return error; return error;
/* ASSERT(!xfs_alloc_busy_search(args->mp, args->agno,
* Search the busylist for these blocks and mark the args->agbno, args->len));
* transaction as synchronous if blocks are found. This
* avoids the need to block due to a synchronous log
* force to ensure correct ordering as the synchronous
* transaction will guarantee that for us.
*/
if (xfs_alloc_busy_search(args->mp, args->agno,
args->agbno, args->len))
xfs_trans_set_sync(args->tp);
} }
if (!args->isfl) { if (!args->isfl) {
...@@ -577,14 +564,14 @@ xfs_alloc_ag_vextent_exact( ...@@ -577,14 +564,14 @@ xfs_alloc_ag_vextent_exact(
{ {
xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */ xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */
xfs_btree_cur_t *cnt_cur;/* by count btree cursor */ xfs_btree_cur_t *cnt_cur;/* by count btree cursor */
xfs_agblock_t end; /* end of allocated extent */
int error; int error;
xfs_agblock_t fbno; /* start block of found extent */ xfs_agblock_t fbno; /* start block of found extent */
xfs_agblock_t fend; /* end block of found extent */
xfs_extlen_t flen; /* length of found extent */ xfs_extlen_t flen; /* length of found extent */
xfs_agblock_t tbno; /* start block of trimmed extent */
xfs_extlen_t tlen; /* length of trimmed extent */
xfs_agblock_t tend; /* end block of trimmed extent */
xfs_agblock_t end; /* end of allocated extent */
int i; /* success/failure of operation */ int i; /* success/failure of operation */
xfs_agblock_t maxend; /* end of maximal extent */
xfs_agblock_t minend; /* end of minimal extent */
xfs_extlen_t rlen; /* length of returned extent */ xfs_extlen_t rlen; /* length of returned extent */
ASSERT(args->alignment == 1); ASSERT(args->alignment == 1);
...@@ -614,14 +601,22 @@ xfs_alloc_ag_vextent_exact( ...@@ -614,14 +601,22 @@ xfs_alloc_ag_vextent_exact(
goto error0; goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0); XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
ASSERT(fbno <= args->agbno); ASSERT(fbno <= args->agbno);
minend = args->agbno + args->minlen;
maxend = args->agbno + args->maxlen;
fend = fbno + flen;
/* /*
* Give up if the freespace isn't long enough for the minimum request. * Check for overlapping busy extents.
*/
xfs_alloc_busy_trim(args, fbno, flen, &tbno, &tlen);
/*
* Give up if the start of the extent is busy, or the freespace isn't
* long enough for the minimum request.
*/ */
if (fend < minend) if (tbno > args->agbno)
goto not_found;
if (tlen < args->minlen)
goto not_found;
tend = tbno + tlen;
if (tend < args->agbno + args->minlen)
goto not_found; goto not_found;
/* /*
...@@ -630,14 +625,14 @@ xfs_alloc_ag_vextent_exact( ...@@ -630,14 +625,14 @@ xfs_alloc_ag_vextent_exact(
* *
* Fix the length according to mod and prod if given. * Fix the length according to mod and prod if given.
*/ */
end = XFS_AGBLOCK_MIN(fend, maxend); end = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen);
args->len = end - args->agbno; args->len = end - args->agbno;
xfs_alloc_fix_len(args); xfs_alloc_fix_len(args);
if (!xfs_alloc_fix_minleft(args)) if (!xfs_alloc_fix_minleft(args))
goto not_found; goto not_found;
rlen = args->len; rlen = args->len;
ASSERT(args->agbno + rlen <= fend); ASSERT(args->agbno + rlen <= tend);
end = args->agbno + rlen; end = args->agbno + rlen;
/* /*
...@@ -686,11 +681,11 @@ xfs_alloc_find_best_extent( ...@@ -686,11 +681,11 @@ xfs_alloc_find_best_extent(
struct xfs_btree_cur **scur, /* searching cursor */ struct xfs_btree_cur **scur, /* searching cursor */
xfs_agblock_t gdiff, /* difference for search comparison */ xfs_agblock_t gdiff, /* difference for search comparison */
xfs_agblock_t *sbno, /* extent found by search */ xfs_agblock_t *sbno, /* extent found by search */
xfs_extlen_t *slen, xfs_extlen_t *slen, /* extent length */
xfs_extlen_t *slena, /* aligned length */ xfs_agblock_t *sbnoa, /* aligned extent found by search */
xfs_extlen_t *slena, /* aligned extent length */
int dir) /* 0 = search right, 1 = search left */ int dir) /* 0 = search right, 1 = search left */
{ {
xfs_agblock_t bno;
xfs_agblock_t new; xfs_agblock_t new;
xfs_agblock_t sdiff; xfs_agblock_t sdiff;
int error; int error;
...@@ -708,16 +703,16 @@ xfs_alloc_find_best_extent( ...@@ -708,16 +703,16 @@ xfs_alloc_find_best_extent(
if (error) if (error)
goto error0; goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0); XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
xfs_alloc_compute_aligned(args, *sbno, *slen, &bno, slena); xfs_alloc_compute_aligned(args, *sbno, *slen, sbnoa, slena);
/* /*
* The good extent is closer than this one. * The good extent is closer than this one.
*/ */
if (!dir) { if (!dir) {
if (bno >= args->agbno + gdiff) if (*sbnoa >= args->agbno + gdiff)
goto out_use_good; goto out_use_good;
} else { } else {
if (bno <= args->agbno - gdiff) if (*sbnoa <= args->agbno - gdiff)
goto out_use_good; goto out_use_good;
} }
...@@ -729,8 +724,8 @@ xfs_alloc_find_best_extent( ...@@ -729,8 +724,8 @@ xfs_alloc_find_best_extent(
xfs_alloc_fix_len(args); xfs_alloc_fix_len(args);
sdiff = xfs_alloc_compute_diff(args->agbno, args->len, sdiff = xfs_alloc_compute_diff(args->agbno, args->len,
args->alignment, *sbno, args->alignment, *sbnoa,
*slen, &new); *slena, &new);
/* /*
* Choose closer size and invalidate other cursor. * Choose closer size and invalidate other cursor.
...@@ -780,7 +775,7 @@ xfs_alloc_ag_vextent_near( ...@@ -780,7 +775,7 @@ xfs_alloc_ag_vextent_near(
xfs_agblock_t gtbnoa; /* aligned ... */ xfs_agblock_t gtbnoa; /* aligned ... */
xfs_extlen_t gtdiff; /* difference to right side entry */ xfs_extlen_t gtdiff; /* difference to right side entry */
xfs_extlen_t gtlen; /* length of right side entry */ xfs_extlen_t gtlen; /* length of right side entry */
xfs_extlen_t gtlena = 0; /* aligned ... */ xfs_extlen_t gtlena; /* aligned ... */
xfs_agblock_t gtnew; /* useful start bno of right side */ xfs_agblock_t gtnew; /* useful start bno of right side */
int error; /* error code */ int error; /* error code */
int i; /* result code, temporary */ int i; /* result code, temporary */
...@@ -789,9 +784,10 @@ xfs_alloc_ag_vextent_near( ...@@ -789,9 +784,10 @@ xfs_alloc_ag_vextent_near(
xfs_agblock_t ltbnoa; /* aligned ... */ xfs_agblock_t ltbnoa; /* aligned ... */
xfs_extlen_t ltdiff; /* difference to left side entry */ xfs_extlen_t ltdiff; /* difference to left side entry */
xfs_extlen_t ltlen; /* length of left side entry */ xfs_extlen_t ltlen; /* length of left side entry */
xfs_extlen_t ltlena = 0; /* aligned ... */ xfs_extlen_t ltlena; /* aligned ... */
xfs_agblock_t ltnew; /* useful start bno of left side */ xfs_agblock_t ltnew; /* useful start bno of left side */
xfs_extlen_t rlen; /* length of returned extent */ xfs_extlen_t rlen; /* length of returned extent */
int forced = 0;
#if defined(DEBUG) && defined(__KERNEL__) #if defined(DEBUG) && defined(__KERNEL__)
/* /*
* Randomly don't execute the first algorithm. * Randomly don't execute the first algorithm.
...@@ -800,13 +796,20 @@ xfs_alloc_ag_vextent_near( ...@@ -800,13 +796,20 @@ xfs_alloc_ag_vextent_near(
dofirst = random32() & 1; dofirst = random32() & 1;
#endif #endif
restart:
bno_cur_lt = NULL;
bno_cur_gt = NULL;
ltlen = 0;
gtlena = 0;
ltlena = 0;
/* /*
* Get a cursor for the by-size btree. * Get a cursor for the by-size btree.
*/ */
cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
args->agno, XFS_BTNUM_CNT); args->agno, XFS_BTNUM_CNT);
ltlen = 0;
bno_cur_lt = bno_cur_gt = NULL;
/* /*
* See if there are any free extents as big as maxlen. * See if there are any free extents as big as maxlen.
*/ */
...@@ -822,11 +825,13 @@ xfs_alloc_ag_vextent_near( ...@@ -822,11 +825,13 @@ xfs_alloc_ag_vextent_near(
goto error0; goto error0;
if (i == 0 || ltlen == 0) { if (i == 0 || ltlen == 0) {
xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
trace_xfs_alloc_near_noentry(args);
return 0; return 0;
} }
ASSERT(i == 1); ASSERT(i == 1);
} }
args->wasfromfl = 0; args->wasfromfl = 0;
/* /*
* First algorithm. * First algorithm.
* If the requested extent is large wrt the freespaces available * If the requested extent is large wrt the freespaces available
...@@ -890,7 +895,7 @@ xfs_alloc_ag_vextent_near( ...@@ -890,7 +895,7 @@ xfs_alloc_ag_vextent_near(
if (args->len < blen) if (args->len < blen)
continue; continue;
ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
args->alignment, ltbno, ltlen, &ltnew); args->alignment, ltbnoa, ltlena, &ltnew);
if (ltnew != NULLAGBLOCK && if (ltnew != NULLAGBLOCK &&
(args->len > blen || ltdiff < bdiff)) { (args->len > blen || ltdiff < bdiff)) {
bdiff = ltdiff; bdiff = ltdiff;
...@@ -1042,11 +1047,12 @@ xfs_alloc_ag_vextent_near( ...@@ -1042,11 +1047,12 @@ xfs_alloc_ag_vextent_near(
args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
xfs_alloc_fix_len(args); xfs_alloc_fix_len(args);
ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
args->alignment, ltbno, ltlen, &ltnew); args->alignment, ltbnoa, ltlena, &ltnew);
error = xfs_alloc_find_best_extent(args, error = xfs_alloc_find_best_extent(args,
&bno_cur_lt, &bno_cur_gt, &bno_cur_lt, &bno_cur_gt,
ltdiff, &gtbno, &gtlen, &gtlena, ltdiff, &gtbno, &gtlen,
&gtbnoa, &gtlena,
0 /* search right */); 0 /* search right */);
} else { } else {
ASSERT(gtlena >= args->minlen); ASSERT(gtlena >= args->minlen);
...@@ -1057,11 +1063,12 @@ xfs_alloc_ag_vextent_near( ...@@ -1057,11 +1063,12 @@ xfs_alloc_ag_vextent_near(
args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen); args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen);
xfs_alloc_fix_len(args); xfs_alloc_fix_len(args);
gtdiff = xfs_alloc_compute_diff(args->agbno, args->len, gtdiff = xfs_alloc_compute_diff(args->agbno, args->len,
args->alignment, gtbno, gtlen, &gtnew); args->alignment, gtbnoa, gtlena, &gtnew);
error = xfs_alloc_find_best_extent(args, error = xfs_alloc_find_best_extent(args,
&bno_cur_gt, &bno_cur_lt, &bno_cur_gt, &bno_cur_lt,
gtdiff, &ltbno, &ltlen, &ltlena, gtdiff, &ltbno, &ltlen,
&ltbnoa, &ltlena,
1 /* search left */); 1 /* search left */);
} }
...@@ -1073,6 +1080,12 @@ xfs_alloc_ag_vextent_near( ...@@ -1073,6 +1080,12 @@ xfs_alloc_ag_vextent_near(
* If we couldn't get anything, give up. * If we couldn't get anything, give up.
*/ */
if (bno_cur_lt == NULL && bno_cur_gt == NULL) { if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
if (!forced++) {
trace_xfs_alloc_near_busy(args);
xfs_log_force(args->mp, XFS_LOG_SYNC);
goto restart;
}
trace_xfs_alloc_size_neither(args); trace_xfs_alloc_size_neither(args);
args->agbno = NULLAGBLOCK; args->agbno = NULLAGBLOCK;
return 0; return 0;
...@@ -1107,12 +1120,13 @@ xfs_alloc_ag_vextent_near( ...@@ -1107,12 +1120,13 @@ xfs_alloc_ag_vextent_near(
return 0; return 0;
} }
rlen = args->len; rlen = args->len;
(void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, ltbno, (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment,
ltlen, &ltnew); ltbnoa, ltlena, &ltnew);
ASSERT(ltnew >= ltbno); ASSERT(ltnew >= ltbno);
ASSERT(ltnew + rlen <= ltbno + ltlen); ASSERT(ltnew + rlen <= ltbnoa + ltlena);
ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
args->agbno = ltnew; args->agbno = ltnew;
if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen, if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
ltnew, rlen, XFSA_FIXUP_BNO_OK))) ltnew, rlen, XFSA_FIXUP_BNO_OK)))
goto error0; goto error0;
...@@ -1155,26 +1169,35 @@ xfs_alloc_ag_vextent_size( ...@@ -1155,26 +1169,35 @@ xfs_alloc_ag_vextent_size(
int i; /* temp status variable */ int i; /* temp status variable */
xfs_agblock_t rbno; /* returned block number */ xfs_agblock_t rbno; /* returned block number */
xfs_extlen_t rlen; /* length of returned extent */ xfs_extlen_t rlen; /* length of returned extent */
int forced = 0;
restart:
/* /*
* Allocate and initialize a cursor for the by-size btree. * Allocate and initialize a cursor for the by-size btree.
*/ */
cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
args->agno, XFS_BTNUM_CNT); args->agno, XFS_BTNUM_CNT);
bno_cur = NULL; bno_cur = NULL;
/* /*
* Look for an entry >= maxlen+alignment-1 blocks. * Look for an entry >= maxlen+alignment-1 blocks.
*/ */
if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, if ((error = xfs_alloc_lookup_ge(cnt_cur, 0,
args->maxlen + args->alignment - 1, &i))) args->maxlen + args->alignment - 1, &i)))
goto error0; goto error0;
/* /*
* If none, then pick up the last entry in the tree unless the * If none or we have busy extents that we cannot allocate from, then
* tree is empty. * we have to settle for a smaller extent. In the case that there are
* no large extents, this will return the last entry in the tree unless
* the tree is empty. In the case that there are only busy large
* extents, this will return the largest small extent unless there
* are no smaller extents available.
*/ */
if (!i) { if (!i || forced > 1) {
if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, &fbno, error = xfs_alloc_ag_vextent_small(args, cnt_cur,
&flen, &i))) &fbno, &flen, &i);
if (error)
goto error0; goto error0;
if (i == 0 || flen == 0) { if (i == 0 || flen == 0) {
xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
...@@ -1182,22 +1205,56 @@ xfs_alloc_ag_vextent_size( ...@@ -1182,22 +1205,56 @@ xfs_alloc_ag_vextent_size(
return 0; return 0;
} }
ASSERT(i == 1); ASSERT(i == 1);
xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen);
} else {
/*
* Search for a non-busy extent that is large enough.
* If we are at low space, don't check, or if we fall of
* the end of the btree, turn off the busy check and
* restart.
*/
for (;;) {
error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
if (error)
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
xfs_alloc_compute_aligned(args, fbno, flen,
&rbno, &rlen);
if (rlen >= args->maxlen)
break;
error = xfs_btree_increment(cnt_cur, 0, &i);
if (error)
goto error0;
if (i == 0) {
/*
* Our only valid extents must have been busy.
* Make it unbusy by forcing the log out and
* retrying. If we've been here before, forcing
* the log isn't making the extents available,
* which means they have probably been freed in
* this transaction. In that case, we have to
* give up on them and we'll attempt a minlen
* allocation the next time around.
*/
xfs_btree_del_cursor(cnt_cur,
XFS_BTREE_NOERROR);
trace_xfs_alloc_size_busy(args);
if (!forced++)
xfs_log_force(args->mp, XFS_LOG_SYNC);
goto restart;
}
}
} }
/*
* There's a freespace as big as maxlen+alignment-1, get it.
*/
else {
if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
}
/* /*
* In the first case above, we got the last entry in the * In the first case above, we got the last entry in the
* by-size btree. Now we check to see if the space hits maxlen * by-size btree. Now we check to see if the space hits maxlen
* once aligned; if not, we search left for something better. * once aligned; if not, we search left for something better.
* This can't happen in the second case above. * This can't happen in the second case above.
*/ */
xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen);
rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
XFS_WANT_CORRUPTED_GOTO(rlen == 0 || XFS_WANT_CORRUPTED_GOTO(rlen == 0 ||
(rlen <= flen && rbno + rlen <= fbno + flen), error0); (rlen <= flen && rbno + rlen <= fbno + flen), error0);
...@@ -1251,13 +1308,19 @@ xfs_alloc_ag_vextent_size( ...@@ -1251,13 +1308,19 @@ xfs_alloc_ag_vextent_size(
* Fix up the length. * Fix up the length.
*/ */
args->len = rlen; args->len = rlen;
xfs_alloc_fix_len(args); if (rlen < args->minlen) {
if (rlen < args->minlen || !xfs_alloc_fix_minleft(args)) { if (!forced++) {
xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
trace_xfs_alloc_size_nominleft(args); trace_xfs_alloc_size_busy(args);
args->agbno = NULLAGBLOCK; xfs_log_force(args->mp, XFS_LOG_SYNC);
return 0; goto restart;
}
goto out_nominleft;
} }
xfs_alloc_fix_len(args);
if (!xfs_alloc_fix_minleft(args))
goto out_nominleft;
rlen = args->len; rlen = args->len;
XFS_WANT_CORRUPTED_GOTO(rlen <= flen, error0); XFS_WANT_CORRUPTED_GOTO(rlen <= flen, error0);
/* /*
...@@ -1287,6 +1350,12 @@ xfs_alloc_ag_vextent_size( ...@@ -1287,6 +1350,12 @@ xfs_alloc_ag_vextent_size(
if (bno_cur) if (bno_cur)
xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
return error; return error;
out_nominleft:
xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
trace_xfs_alloc_size_nominleft(args);
args->agbno = NULLAGBLOCK;
return 0;
} }
/* /*
...@@ -1326,6 +1395,9 @@ xfs_alloc_ag_vextent_small( ...@@ -1326,6 +1395,9 @@ xfs_alloc_ag_vextent_small(
if (error) if (error)
goto error0; goto error0;
if (fbno != NULLAGBLOCK) { if (fbno != NULLAGBLOCK) {
xfs_alloc_busy_reuse(args->mp, args->agno, fbno, 1,
args->userdata);
if (args->userdata) { if (args->userdata) {
xfs_buf_t *bp; xfs_buf_t *bp;
...@@ -1617,18 +1689,6 @@ xfs_free_ag_extent( ...@@ -1617,18 +1689,6 @@ xfs_free_ag_extent(
trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright); trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright);
/*
* Since blocks move to the free list without the coordination
* used in xfs_bmap_finish, we can't allow block to be available
* for reallocation and non-transaction writing (user data)
* until we know that the transaction that moved it to the free
* list is permanently on disk. We track the blocks by declaring
* these blocks as "busy"; the busy list is maintained on a per-ag
* basis and each transaction records which entries should be removed
* when the iclog commits to disk. If a busy block is allocated,
* the iclog is pushed up to the LSN that freed the block.
*/
xfs_alloc_busy_insert(tp, agno, bno, len);
return 0; return 0;
error0: error0:
...@@ -1923,21 +1983,6 @@ xfs_alloc_get_freelist( ...@@ -1923,21 +1983,6 @@ xfs_alloc_get_freelist(
xfs_alloc_log_agf(tp, agbp, logflags); xfs_alloc_log_agf(tp, agbp, logflags);
*bnop = bno; *bnop = bno;
/*
* As blocks are freed, they are added to the per-ag busy list and
* remain there until the freeing transaction is committed to disk.
* Now that we have allocated blocks, this list must be searched to see
* if a block is being reused. If one is, then the freeing transaction
* must be pushed to disk before this transaction.
*
* We do this by setting the current transaction to a sync transaction
* which guarantees that the freeing transaction is on disk before this
* transaction. This is done instead of a synchronous log force here so
* that we don't sit and wait with the AGF locked in the transaction
* during the log force.
*/
if (xfs_alloc_busy_search(mp, be32_to_cpu(agf->agf_seqno), bno, 1))
xfs_trans_set_sync(tp);
return 0; return 0;
} }
...@@ -2423,105 +2468,13 @@ xfs_free_extent( ...@@ -2423,105 +2468,13 @@ xfs_free_extent(
} }
error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0); error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0);
if (!error)
xfs_alloc_busy_insert(tp, args.agno, args.agbno, len);
error0: error0:
xfs_perag_put(args.pag); xfs_perag_put(args.pag);
return error; return error;
} }
/*
* AG Busy list management
* The busy list contains block ranges that have been freed but whose
* transactions have not yet hit disk. If any block listed in a busy
* list is reused, the transaction that freed it must be forced to disk
* before continuing to use the block.
*
* xfs_alloc_busy_insert - add to the per-ag busy list
* xfs_alloc_busy_clear - remove an item from the per-ag busy list
* xfs_alloc_busy_search - search for a busy extent
*/
/*
* Insert a new extent into the busy tree.
*
* The busy extent tree is indexed by the start block of the busy extent.
* there can be multiple overlapping ranges in the busy extent tree but only
* ever one entry at a given start block. The reason for this is that
* multi-block extents can be freed, then smaller chunks of that extent
* allocated and freed again before the first transaction commit is on disk.
* If the exact same start block is freed a second time, we have to wait for
* that busy extent to pass out of the tree before the new extent is inserted.
* There are two main cases we have to handle here.
*
* The first case is a transaction that triggers a "free - allocate - free"
* cycle. This can occur during btree manipulations as a btree block is freed
* to the freelist, then allocated from the free list, then freed again. In
* this case, the second extxpnet free is what triggers the duplicate and as
* such the transaction IDs should match. Because the extent was allocated in
* this transaction, the transaction must be marked as synchronous. This is
* true for all cases where the free/alloc/free occurs in the one transaction,
* hence the addition of the ASSERT(tp->t_flags & XFS_TRANS_SYNC) to this case.
* This serves to catch violations of the second case quite effectively.
*
* The second case is where the free/alloc/free occur in different
* transactions. In this case, the thread freeing the extent the second time
* can't mark the extent busy immediately because it is already tracked in a
* transaction that may be committing. When the log commit for the existing
* busy extent completes, the busy extent will be removed from the tree. If we
* allow the second busy insert to continue using that busy extent structure,
* it can be freed before this transaction is safely in the log. Hence our
* only option in this case is to force the log to remove the existing busy
* extent from the list before we insert the new one with the current
* transaction ID.
*
* The problem we are trying to avoid in the free-alloc-free in separate
* transactions is most easily described with a timeline:
*
* Thread 1 Thread 2 Thread 3 xfslogd
* xact alloc
* free X
* mark busy
* commit xact
* free xact
* xact alloc
* alloc X
* busy search
* mark xact sync
* commit xact
* free xact
* force log
* checkpoint starts
* ....
* xact alloc
* free X
* mark busy
* finds match
* *** KABOOM! ***
* ....
* log IO completes
* unbusy X
* checkpoint completes
*
* By issuing a log force in thread 3 @ "KABOOM", the thread will block until
* the checkpoint completes, and the busy extent it matched will have been
* removed from the tree when it is woken. Hence it can then continue safely.
*
* However, to ensure this matching process is robust, we need to use the
* transaction ID for identifying transaction, as delayed logging results in
* the busy extent and transaction lifecycles being different. i.e. the busy
* extent is active for a lot longer than the transaction. Hence the
* transaction structure can be freed and reallocated, then mark the same
* extent busy again in the new transaction. In this case the new transaction
* will have a different tid but can have the same address, and hence we need
* to check against the tid.
*
* Future: for delayed logging, we could avoid the log force if the extent was
* first freed in the current checkpoint sequence. This, however, requires the
* ability to pin the current checkpoint in memory until this transaction
* commits to ensure that both the original free and the current one combine
* logically into the one checkpoint. If the checkpoint sequences are
* different, however, we still need to wait on a log force.
*/
void void
xfs_alloc_busy_insert( xfs_alloc_busy_insert(
struct xfs_trans *tp, struct xfs_trans *tp,
...@@ -2533,9 +2486,7 @@ xfs_alloc_busy_insert( ...@@ -2533,9 +2486,7 @@ xfs_alloc_busy_insert(
struct xfs_busy_extent *busyp; struct xfs_busy_extent *busyp;
struct xfs_perag *pag; struct xfs_perag *pag;
struct rb_node **rbp; struct rb_node **rbp;
struct rb_node *parent; struct rb_node *parent = NULL;
int match;
new = kmem_zalloc(sizeof(struct xfs_busy_extent), KM_MAYFAIL); new = kmem_zalloc(sizeof(struct xfs_busy_extent), KM_MAYFAIL);
if (!new) { if (!new) {
...@@ -2544,7 +2495,7 @@ xfs_alloc_busy_insert( ...@@ -2544,7 +2495,7 @@ xfs_alloc_busy_insert(
* block, make this a synchronous transaction to insure that * block, make this a synchronous transaction to insure that
* the block is not reused before this transaction commits. * the block is not reused before this transaction commits.
*/ */
trace_xfs_alloc_busy(tp, agno, bno, len, 1); trace_xfs_alloc_busy_enomem(tp->t_mountp, agno, bno, len);
xfs_trans_set_sync(tp); xfs_trans_set_sync(tp);
return; return;
} }
...@@ -2552,66 +2503,28 @@ xfs_alloc_busy_insert( ...@@ -2552,66 +2503,28 @@ xfs_alloc_busy_insert(
new->agno = agno; new->agno = agno;
new->bno = bno; new->bno = bno;
new->length = len; new->length = len;
new->tid = xfs_log_get_trans_ident(tp);
INIT_LIST_HEAD(&new->list); INIT_LIST_HEAD(&new->list);
/* trace before insert to be able to see failed inserts */ /* trace before insert to be able to see failed inserts */
trace_xfs_alloc_busy(tp, agno, bno, len, 0); trace_xfs_alloc_busy(tp->t_mountp, agno, bno, len);
pag = xfs_perag_get(tp->t_mountp, new->agno); pag = xfs_perag_get(tp->t_mountp, new->agno);
restart:
spin_lock(&pag->pagb_lock); spin_lock(&pag->pagb_lock);
rbp = &pag->pagb_tree.rb_node; rbp = &pag->pagb_tree.rb_node;
parent = NULL; while (*rbp) {
busyp = NULL;
match = 0;
while (*rbp && match >= 0) {
parent = *rbp; parent = *rbp;
busyp = rb_entry(parent, struct xfs_busy_extent, rb_node); busyp = rb_entry(parent, struct xfs_busy_extent, rb_node);
if (new->bno < busyp->bno) { if (new->bno < busyp->bno) {
/* may overlap, but exact start block is lower */
rbp = &(*rbp)->rb_left; rbp = &(*rbp)->rb_left;
if (new->bno + new->length > busyp->bno) ASSERT(new->bno + new->length <= busyp->bno);
match = busyp->tid == new->tid ? 1 : -1;
} else if (new->bno > busyp->bno) { } else if (new->bno > busyp->bno) {
/* may overlap, but exact start block is higher */
rbp = &(*rbp)->rb_right; rbp = &(*rbp)->rb_right;
if (bno < busyp->bno + busyp->length) ASSERT(bno >= busyp->bno + busyp->length);
match = busyp->tid == new->tid ? 1 : -1;
} else { } else {
match = busyp->tid == new->tid ? 1 : -1; ASSERT(0);
break;
} }
} }
if (match < 0) {
/* overlap marked busy in different transaction */
spin_unlock(&pag->pagb_lock);
xfs_log_force(tp->t_mountp, XFS_LOG_SYNC);
goto restart;
}
if (match > 0) {
/*
* overlap marked busy in same transaction. Update if exact
* start block match, otherwise combine the busy extents into
* a single range.
*/
if (busyp->bno == new->bno) {
busyp->length = max(busyp->length, new->length);
spin_unlock(&pag->pagb_lock);
ASSERT(tp->t_flags & XFS_TRANS_SYNC);
xfs_perag_put(pag);
kmem_free(new);
return;
}
rb_erase(&busyp->rb_node, &pag->pagb_tree);
new->length = max(busyp->bno + busyp->length,
new->bno + new->length) -
min(busyp->bno, new->bno);
new->bno = min(busyp->bno, new->bno);
} else
busyp = NULL;
rb_link_node(&new->rb_node, parent, rbp); rb_link_node(&new->rb_node, parent, rbp);
rb_insert_color(&new->rb_node, &pag->pagb_tree); rb_insert_color(&new->rb_node, &pag->pagb_tree);
...@@ -2619,7 +2532,6 @@ xfs_alloc_busy_insert( ...@@ -2619,7 +2532,6 @@ xfs_alloc_busy_insert(
list_add(&new->list, &tp->t_busy); list_add(&new->list, &tp->t_busy);
spin_unlock(&pag->pagb_lock); spin_unlock(&pag->pagb_lock);
xfs_perag_put(pag); xfs_perag_put(pag);
kmem_free(busyp);
} }
/* /*
...@@ -2668,31 +2580,443 @@ xfs_alloc_busy_search( ...@@ -2668,31 +2580,443 @@ xfs_alloc_busy_search(
} }
} }
spin_unlock(&pag->pagb_lock); spin_unlock(&pag->pagb_lock);
trace_xfs_alloc_busysearch(mp, agno, bno, len, !!match);
xfs_perag_put(pag); xfs_perag_put(pag);
return match; return match;
} }
/*
* The found free extent [fbno, fend] overlaps part or all of the given busy
* extent. If the overlap covers the beginning, the end, or all of the busy
* extent, the overlapping portion can be made unbusy and used for the
* allocation. We can't split a busy extent because we can't modify a
* transaction/CIL context busy list, but we can update an entries block
* number or length.
*
* Returns true if the extent can safely be reused, or false if the search
* needs to be restarted.
*/
STATIC bool
xfs_alloc_busy_update_extent(
struct xfs_mount *mp,
struct xfs_perag *pag,
struct xfs_busy_extent *busyp,
xfs_agblock_t fbno,
xfs_extlen_t flen,
bool userdata)
{
xfs_agblock_t fend = fbno + flen;
xfs_agblock_t bbno = busyp->bno;
xfs_agblock_t bend = bbno + busyp->length;
/*
* If there is a busy extent overlapping a user allocation, we have
* no choice but to force the log and retry the search.
*
* Fortunately this does not happen during normal operation, but
* only if the filesystem is very low on space and has to dip into
* the AGFL for normal allocations.
*/
if (userdata)
goto out_force_log;
if (bbno < fbno && bend > fend) {
/*
* Case 1:
* bbno bend
* +BBBBBBBBBBBBBBBBB+
* +---------+
* fbno fend
*/
/*
* We would have to split the busy extent to be able to track
* it correct, which we cannot do because we would have to
* modify the list of busy extents attached to the transaction
* or CIL context, which is immutable.
*
* Force out the log to clear the busy extent and retry the
* search.
*/
goto out_force_log;
} else if (bbno >= fbno && bend <= fend) {
/*
* Case 2:
* bbno bend
* +BBBBBBBBBBBBBBBBB+
* +-----------------+
* fbno fend
*
* Case 3:
* bbno bend
* +BBBBBBBBBBBBBBBBB+
* +--------------------------+
* fbno fend
*
* Case 4:
* bbno bend
* +BBBBBBBBBBBBBBBBB+
* +--------------------------+
* fbno fend
*
* Case 5:
* bbno bend
* +BBBBBBBBBBBBBBBBB+
* +-----------------------------------+
* fbno fend
*
*/
/*
* The busy extent is fully covered by the extent we are
* allocating, and can simply be removed from the rbtree.
* However we cannot remove it from the immutable list
* tracking busy extents in the transaction or CIL context,
* so set the length to zero to mark it invalid.
*
* We also need to restart the busy extent search from the
* tree root, because erasing the node can rearrange the
* tree topology.
*/
rb_erase(&busyp->rb_node, &pag->pagb_tree);
busyp->length = 0;
return false;
} else if (fend < bend) {
/*
* Case 6:
* bbno bend
* +BBBBBBBBBBBBBBBBB+
* +---------+
* fbno fend
*
* Case 7:
* bbno bend
* +BBBBBBBBBBBBBBBBB+
* +------------------+
* fbno fend
*
*/
busyp->bno = fend;
} else if (bbno < fbno) {
/*
* Case 8:
* bbno bend
* +BBBBBBBBBBBBBBBBB+
* +-------------+
* fbno fend
*
* Case 9:
* bbno bend
* +BBBBBBBBBBBBBBBBB+
* +----------------------+
* fbno fend
*/
busyp->length = fbno - busyp->bno;
} else {
ASSERT(0);
}
trace_xfs_alloc_busy_reuse(mp, pag->pag_agno, fbno, flen);
return true;
out_force_log:
spin_unlock(&pag->pagb_lock);
xfs_log_force(mp, XFS_LOG_SYNC);
trace_xfs_alloc_busy_force(mp, pag->pag_agno, fbno, flen);
spin_lock(&pag->pagb_lock);
return false;
}
/*
* For a given extent [fbno, flen], make sure we can reuse it safely.
*/
void void
xfs_alloc_busy_clear( xfs_alloc_busy_reuse(
struct xfs_mount *mp, struct xfs_mount *mp,
struct xfs_busy_extent *busyp) xfs_agnumber_t agno,
xfs_agblock_t fbno,
xfs_extlen_t flen,
bool userdata)
{ {
struct xfs_perag *pag; struct xfs_perag *pag;
struct rb_node *rbp;
trace_xfs_alloc_unbusy(mp, busyp->agno, busyp->bno, ASSERT(flen > 0);
busyp->length);
ASSERT(xfs_alloc_busy_search(mp, busyp->agno, busyp->bno, pag = xfs_perag_get(mp, agno);
busyp->length) == 1); spin_lock(&pag->pagb_lock);
restart:
rbp = pag->pagb_tree.rb_node;
while (rbp) {
struct xfs_busy_extent *busyp =
rb_entry(rbp, struct xfs_busy_extent, rb_node);
xfs_agblock_t bbno = busyp->bno;
xfs_agblock_t bend = bbno + busyp->length;
list_del_init(&busyp->list); if (fbno + flen <= bbno) {
rbp = rbp->rb_left;
continue;
} else if (fbno >= bend) {
rbp = rbp->rb_right;
continue;
}
pag = xfs_perag_get(mp, busyp->agno); if (!xfs_alloc_busy_update_extent(mp, pag, busyp, fbno, flen,
spin_lock(&pag->pagb_lock); userdata))
rb_erase(&busyp->rb_node, &pag->pagb_tree); goto restart;
}
spin_unlock(&pag->pagb_lock); spin_unlock(&pag->pagb_lock);
xfs_perag_put(pag); xfs_perag_put(pag);
}
/*
* For a given extent [fbno, flen], search the busy extent list to find a
* subset of the extent that is not busy. If *rlen is smaller than
* args->minlen no suitable extent could be found, and the higher level
* code needs to force out the log and retry the allocation.
*/
STATIC void
xfs_alloc_busy_trim(
struct xfs_alloc_arg *args,
xfs_agblock_t bno,
xfs_extlen_t len,
xfs_agblock_t *rbno,
xfs_extlen_t *rlen)
{
xfs_agblock_t fbno;
xfs_extlen_t flen;
struct rb_node *rbp;
ASSERT(len > 0);
spin_lock(&args->pag->pagb_lock);
restart:
fbno = bno;
flen = len;
rbp = args->pag->pagb_tree.rb_node;
while (rbp && flen >= args->minlen) {
struct xfs_busy_extent *busyp =
rb_entry(rbp, struct xfs_busy_extent, rb_node);
xfs_agblock_t fend = fbno + flen;
xfs_agblock_t bbno = busyp->bno;
xfs_agblock_t bend = bbno + busyp->length;
if (fend <= bbno) {
rbp = rbp->rb_left;
continue;
} else if (fbno >= bend) {
rbp = rbp->rb_right;
continue;
}
/*
* If this is a metadata allocation, try to reuse the busy
* extent instead of trimming the allocation.
*/
if (!args->userdata) {
if (!xfs_alloc_busy_update_extent(args->mp, args->pag,
busyp, fbno, flen,
false))
goto restart;
continue;
}
if (bbno <= fbno) {
/* start overlap */
/*
* Case 1:
* bbno bend
* +BBBBBBBBBBBBBBBBB+
* +---------+
* fbno fend
*
* Case 2:
* bbno bend
* +BBBBBBBBBBBBBBBBB+
* +-------------+
* fbno fend
*
* Case 3:
* bbno bend
* +BBBBBBBBBBBBBBBBB+
* +-------------+
* fbno fend
*
* Case 4:
* bbno bend
* +BBBBBBBBBBBBBBBBB+
* +-----------------+
* fbno fend
*
* No unbusy region in extent, return failure.
*/
if (fend <= bend)
goto fail;
/*
* Case 5:
* bbno bend
* +BBBBBBBBBBBBBBBBB+
* +----------------------+
* fbno fend
*
* Case 6:
* bbno bend
* +BBBBBBBBBBBBBBBBB+
* +--------------------------+
* fbno fend
*
* Needs to be trimmed to:
* +-------+
* fbno fend
*/
fbno = bend;
} else if (bend >= fend) {
/* end overlap */
/*
* Case 7:
* bbno bend
* +BBBBBBBBBBBBBBBBB+
* +------------------+
* fbno fend
*
* Case 8:
* bbno bend
* +BBBBBBBBBBBBBBBBB+
* +--------------------------+
* fbno fend
*
* Needs to be trimmed to:
* +-------+
* fbno fend
*/
fend = bbno;
} else {
/* middle overlap */
/*
* Case 9:
* bbno bend
* +BBBBBBBBBBBBBBBBB+
* +-----------------------------------+
* fbno fend
*
* Can be trimmed to:
* +-------+ OR +-------+
* fbno fend fbno fend
*
* Backward allocation leads to significant
* fragmentation of directories, which degrades
* directory performance, therefore we always want to
* choose the option that produces forward allocation
* patterns.
* Preferring the lower bno extent will make the next
* request use "fend" as the start of the next
* allocation; if the segment is no longer busy at
* that point, we'll get a contiguous allocation, but
* even if it is still busy, we will get a forward
* allocation.
* We try to avoid choosing the segment at "bend",
* because that can lead to the next allocation
* taking the segment at "fbno", which would be a
* backward allocation. We only use the segment at
* "fbno" if it is much larger than the current
* requested size, because in that case there's a
* good chance subsequent allocations will be
* contiguous.
*/
if (bbno - fbno >= args->maxlen) {
/* left candidate fits perfect */
fend = bbno;
} else if (fend - bend >= args->maxlen * 4) {
/* right candidate has enough free space */
fbno = bend;
} else if (bbno - fbno >= args->minlen) {
/* left candidate fits minimum requirement */
fend = bbno;
} else {
goto fail;
}
}
flen = fend - fbno;
}
spin_unlock(&args->pag->pagb_lock);
if (fbno != bno || flen != len) {
trace_xfs_alloc_busy_trim(args->mp, args->agno, bno, len,
fbno, flen);
}
*rbno = fbno;
*rlen = flen;
return;
fail:
/*
* Return a zero extent length as failure indications. All callers
* re-check if the trimmed extent satisfies the minlen requirement.
*/
spin_unlock(&args->pag->pagb_lock);
trace_xfs_alloc_busy_trim(args->mp, args->agno, bno, len, fbno, 0);
*rbno = fbno;
*rlen = 0;
}
static void
xfs_alloc_busy_clear_one(
struct xfs_mount *mp,
struct xfs_perag *pag,
struct xfs_busy_extent *busyp)
{
if (busyp->length) {
trace_xfs_alloc_busy_clear(mp, busyp->agno, busyp->bno,
busyp->length);
rb_erase(&busyp->rb_node, &pag->pagb_tree);
}
list_del_init(&busyp->list);
kmem_free(busyp); kmem_free(busyp);
} }
void
xfs_alloc_busy_clear(
struct xfs_mount *mp,
struct list_head *list)
{
struct xfs_busy_extent *busyp, *n;
struct xfs_perag *pag = NULL;
xfs_agnumber_t agno = NULLAGNUMBER;
list_for_each_entry_safe(busyp, n, list, list) {
if (busyp->agno != agno) {
if (pag) {
spin_unlock(&pag->pagb_lock);
xfs_perag_put(pag);
}
pag = xfs_perag_get(mp, busyp->agno);
spin_lock(&pag->pagb_lock);
agno = busyp->agno;
}
xfs_alloc_busy_clear_one(mp, pag, busyp);
}
if (pag) {
spin_unlock(&pag->pagb_lock);
xfs_perag_put(pag);
}
}
/*
* Callback for list_sort to sort busy extents by the AG they reside in.
*/
int
xfs_busy_extent_ag_cmp(
void *priv,
struct list_head *a,
struct list_head *b)
{
return container_of(a, struct xfs_busy_extent, list)->agno -
container_of(b, struct xfs_busy_extent, list)->agno;
}
...@@ -140,11 +140,24 @@ xfs_alloc_busy_insert(struct xfs_trans *tp, xfs_agnumber_t agno, ...@@ -140,11 +140,24 @@ xfs_alloc_busy_insert(struct xfs_trans *tp, xfs_agnumber_t agno,
xfs_agblock_t bno, xfs_extlen_t len); xfs_agblock_t bno, xfs_extlen_t len);
void void
xfs_alloc_busy_clear(struct xfs_mount *mp, struct xfs_busy_extent *busyp); xfs_alloc_busy_clear(struct xfs_mount *mp, struct list_head *list);
int int
xfs_alloc_busy_search(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_alloc_busy_search(struct xfs_mount *mp, xfs_agnumber_t agno,
xfs_agblock_t bno, xfs_extlen_t len); xfs_agblock_t bno, xfs_extlen_t len);
void
xfs_alloc_busy_reuse(struct xfs_mount *mp, xfs_agnumber_t agno,
xfs_agblock_t fbno, xfs_extlen_t flen, bool userdata);
int
xfs_busy_extent_ag_cmp(void *priv, struct list_head *a, struct list_head *b);
static inline void xfs_alloc_busy_sort(struct list_head *list)
{
list_sort(NULL, list, xfs_busy_extent_ag_cmp);
}
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */
/* /*
......
...@@ -95,6 +95,8 @@ xfs_allocbt_alloc_block( ...@@ -95,6 +95,8 @@ xfs_allocbt_alloc_block(
return 0; return 0;
} }
xfs_alloc_busy_reuse(cur->bc_mp, cur->bc_private.a.agno, bno, 1, false);
xfs_trans_agbtree_delta(cur->bc_tp, 1); xfs_trans_agbtree_delta(cur->bc_tp, 1);
new->s = cpu_to_be32(bno); new->s = cpu_to_be32(bno);
...@@ -118,17 +120,6 @@ xfs_allocbt_free_block( ...@@ -118,17 +120,6 @@ xfs_allocbt_free_block(
if (error) if (error)
return error; return error;
/*
* Since blocks move to the free list without the coordination used in
* xfs_bmap_finish, we can't allow block to be available for
* reallocation and non-transaction writing (user data) until we know
* that the transaction that moved it to the free list is permanently
* on disk. We track the blocks by declaring these blocks as "busy";
* the busy list is maintained on a per-ag basis and each transaction
* records which entries should be removed when the iclog commits to
* disk. If a busy block is allocated, the iclog is pushed up to the
* LSN that freed the block.
*/
xfs_alloc_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1); xfs_alloc_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1);
xfs_trans_agbtree_delta(cur->bc_tp, -1); xfs_trans_agbtree_delta(cur->bc_tp, -1);
return 0; return 0;
......
...@@ -202,7 +202,7 @@ xfs_swap_extents( ...@@ -202,7 +202,7 @@ xfs_swap_extents(
xfs_inode_t *tip, /* tmp inode */ xfs_inode_t *tip, /* tmp inode */
xfs_swapext_t *sxp) xfs_swapext_t *sxp)
{ {
xfs_mount_t *mp; xfs_mount_t *mp = ip->i_mount;
xfs_trans_t *tp; xfs_trans_t *tp;
xfs_bstat_t *sbp = &sxp->sx_stat; xfs_bstat_t *sbp = &sxp->sx_stat;
xfs_ifork_t *tempifp, *ifp, *tifp; xfs_ifork_t *tempifp, *ifp, *tifp;
...@@ -212,16 +212,12 @@ xfs_swap_extents( ...@@ -212,16 +212,12 @@ xfs_swap_extents(
int taforkblks = 0; int taforkblks = 0;
__uint64_t tmp; __uint64_t tmp;
mp = ip->i_mount;
tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL); tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL);
if (!tempifp) { if (!tempifp) {
error = XFS_ERROR(ENOMEM); error = XFS_ERROR(ENOMEM);
goto out; goto out;
} }
sbp = &sxp->sx_stat;
/* /*
* we have to do two separate lock calls here to keep lockdep * we have to do two separate lock calls here to keep lockdep
* happy. If we try to get all the locks in one call, lock will * happy. If we try to get all the locks in one call, lock will
......
...@@ -1354,7 +1354,7 @@ xfs_itruncate_start( ...@@ -1354,7 +1354,7 @@ xfs_itruncate_start(
return 0; return 0;
} }
last_byte = xfs_file_last_byte(ip); last_byte = xfs_file_last_byte(ip);
trace_xfs_itruncate_start(ip, flags, new_size, toss_start, last_byte); trace_xfs_itruncate_start(ip, new_size, flags, toss_start, last_byte);
if (last_byte > toss_start) { if (last_byte > toss_start) {
if (flags & XFS_ITRUNC_DEFINITE) { if (flags & XFS_ITRUNC_DEFINITE) {
xfs_tosspages(ip, toss_start, xfs_tosspages(ip, toss_start,
......
...@@ -970,7 +970,6 @@ xfs_iflush_abort( ...@@ -970,7 +970,6 @@ xfs_iflush_abort(
{ {
xfs_inode_log_item_t *iip = ip->i_itemp; xfs_inode_log_item_t *iip = ip->i_itemp;
iip = ip->i_itemp;
if (iip) { if (iip) {
struct xfs_ail *ailp = iip->ili_item.li_ailp; struct xfs_ail *ailp = iip->ili_item.li_ailp;
if (iip->ili_item.li_flags & XFS_LI_IN_AIL) { if (iip->ili_item.li_flags & XFS_LI_IN_AIL) {
......
...@@ -1449,6 +1449,13 @@ xlog_dealloc_log(xlog_t *log) ...@@ -1449,6 +1449,13 @@ xlog_dealloc_log(xlog_t *log)
xlog_cil_destroy(log); xlog_cil_destroy(log);
/*
* always need to ensure that the extra buffer does not point to memory
* owned by another log buffer before we free it.
*/
xfs_buf_set_empty(log->l_xbuf, log->l_iclog_size);
xfs_buf_free(log->l_xbuf);
iclog = log->l_iclog; iclog = log->l_iclog;
for (i=0; i<log->l_iclog_bufs; i++) { for (i=0; i<log->l_iclog_bufs; i++) {
xfs_buf_free(iclog->ic_bp); xfs_buf_free(iclog->ic_bp);
...@@ -1458,7 +1465,6 @@ xlog_dealloc_log(xlog_t *log) ...@@ -1458,7 +1465,6 @@ xlog_dealloc_log(xlog_t *log)
} }
spinlock_destroy(&log->l_icloglock); spinlock_destroy(&log->l_icloglock);
xfs_buf_free(log->l_xbuf);
log->l_mp->m_log = NULL; log->l_mp->m_log = NULL;
kmem_free(log); kmem_free(log);
} /* xlog_dealloc_log */ } /* xlog_dealloc_log */
...@@ -3248,13 +3254,6 @@ xfs_log_ticket_get( ...@@ -3248,13 +3254,6 @@ xfs_log_ticket_get(
return ticket; return ticket;
} }
xlog_tid_t
xfs_log_get_trans_ident(
struct xfs_trans *tp)
{
return tp->t_ticket->t_tid;
}
/* /*
* Allocate and initialise a new log ticket. * Allocate and initialise a new log ticket.
*/ */
......
...@@ -189,8 +189,6 @@ void xlog_iodone(struct xfs_buf *); ...@@ -189,8 +189,6 @@ void xlog_iodone(struct xfs_buf *);
struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket); struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket);
void xfs_log_ticket_put(struct xlog_ticket *ticket); void xfs_log_ticket_put(struct xlog_ticket *ticket);
xlog_tid_t xfs_log_get_trans_ident(struct xfs_trans *tp);
void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp, void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
struct xfs_log_vec *log_vector, struct xfs_log_vec *log_vector,
xfs_lsn_t *commit_lsn, int flags); xfs_lsn_t *commit_lsn, int flags);
......
...@@ -361,13 +361,12 @@ xlog_cil_committed( ...@@ -361,13 +361,12 @@ xlog_cil_committed(
int abort) int abort)
{ {
struct xfs_cil_ctx *ctx = args; struct xfs_cil_ctx *ctx = args;
struct xfs_busy_extent *busyp, *n;
xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, ctx->lv_chain, xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, ctx->lv_chain,
ctx->start_lsn, abort); ctx->start_lsn, abort);
list_for_each_entry_safe(busyp, n, &ctx->busy_extents, list) xfs_alloc_busy_sort(&ctx->busy_extents);
xfs_alloc_busy_clear(ctx->cil->xc_log->l_mp, busyp); xfs_alloc_busy_clear(ctx->cil->xc_log->l_mp, &ctx->busy_extents);
spin_lock(&ctx->cil->xc_cil_lock); spin_lock(&ctx->cil->xc_cil_lock);
list_del(&ctx->committing); list_del(&ctx->committing);
......
...@@ -146,6 +146,8 @@ static inline uint xlog_get_client_id(__be32 i) ...@@ -146,6 +146,8 @@ static inline uint xlog_get_client_id(__be32 i)
shutdown */ shutdown */
#define XLOG_TAIL_WARN 0x10 /* log tail verify warning issued */ #define XLOG_TAIL_WARN 0x10 /* log tail verify warning issued */
typedef __uint32_t xlog_tid_t;
#ifdef __KERNEL__ #ifdef __KERNEL__
/* /*
* Below are states for covering allocation transactions. * Below are states for covering allocation transactions.
......
...@@ -204,6 +204,35 @@ xlog_bread( ...@@ -204,6 +204,35 @@ xlog_bread(
return 0; return 0;
} }
/*
* Read at an offset into the buffer. Returns with the buffer in it's original
* state regardless of the result of the read.
*/
STATIC int
xlog_bread_offset(
xlog_t *log,
xfs_daddr_t blk_no, /* block to read from */
int nbblks, /* blocks to read */
xfs_buf_t *bp,
xfs_caddr_t offset)
{
xfs_caddr_t orig_offset = XFS_BUF_PTR(bp);
int orig_len = bp->b_buffer_length;
int error, error2;
error = XFS_BUF_SET_PTR(bp, offset, BBTOB(nbblks));
if (error)
return error;
error = xlog_bread_noalign(log, blk_no, nbblks, bp);
/* must reset buffer pointer even on error */
error2 = XFS_BUF_SET_PTR(bp, orig_offset, orig_len);
if (error)
return error;
return error2;
}
/* /*
* Write out the buffer at the given block for the given number of blocks. * Write out the buffer at the given block for the given number of blocks.
* The buffer is kept locked across the write and is returned locked. * The buffer is kept locked across the write and is returned locked.
...@@ -1229,20 +1258,12 @@ xlog_write_log_records( ...@@ -1229,20 +1258,12 @@ xlog_write_log_records(
*/ */
ealign = round_down(end_block, sectbb); ealign = round_down(end_block, sectbb);
if (j == 0 && (start_block + endcount > ealign)) { if (j == 0 && (start_block + endcount > ealign)) {
offset = XFS_BUF_PTR(bp); offset = XFS_BUF_PTR(bp) + BBTOB(ealign - start_block);
balign = BBTOB(ealign - start_block); error = xlog_bread_offset(log, ealign, sectbb,
error = XFS_BUF_SET_PTR(bp, offset + balign, bp, offset);
BBTOB(sectbb));
if (error) if (error)
break; break;
error = xlog_bread_noalign(log, ealign, sectbb, bp);
if (error)
break;
error = XFS_BUF_SET_PTR(bp, offset, bufblks);
if (error)
break;
} }
offset = xlog_align(log, start_block, endcount, bp); offset = xlog_align(log, start_block, endcount, bp);
...@@ -3448,19 +3469,9 @@ xlog_do_recovery_pass( ...@@ -3448,19 +3469,9 @@ xlog_do_recovery_pass(
* - order is important. * - order is important.
*/ */
wrapped_hblks = hblks - split_hblks; wrapped_hblks = hblks - split_hblks;
error = XFS_BUF_SET_PTR(hbp, error = xlog_bread_offset(log, 0,
offset + BBTOB(split_hblks), wrapped_hblks, hbp,
BBTOB(hblks - split_hblks)); offset + BBTOB(split_hblks));
if (error)
goto bread_err2;
error = xlog_bread_noalign(log, 0,
wrapped_hblks, hbp);
if (error)
goto bread_err2;
error = XFS_BUF_SET_PTR(hbp, offset,
BBTOB(hblks));
if (error) if (error)
goto bread_err2; goto bread_err2;
} }
...@@ -3511,19 +3522,9 @@ xlog_do_recovery_pass( ...@@ -3511,19 +3522,9 @@ xlog_do_recovery_pass(
* _first_, then the log start (LR header end) * _first_, then the log start (LR header end)
* - order is important. * - order is important.
*/ */
error = XFS_BUF_SET_PTR(dbp, error = xlog_bread_offset(log, 0,
offset + BBTOB(split_bblks), bblks - split_bblks, hbp,
BBTOB(bblks - split_bblks)); offset + BBTOB(split_bblks));
if (error)
goto bread_err2;
error = xlog_bread_noalign(log, wrapped_hblks,
bblks - split_bblks,
dbp);
if (error)
goto bread_err2;
error = XFS_BUF_SET_PTR(dbp, offset, h_size);
if (error) if (error)
goto bread_err2; goto bread_err2;
} }
......
...@@ -1900,7 +1900,7 @@ xfs_mod_incore_sb_batch( ...@@ -1900,7 +1900,7 @@ xfs_mod_incore_sb_batch(
uint nmsb, uint nmsb,
int rsvd) int rsvd)
{ {
xfs_mod_sb_t *msbp = &msb[0]; xfs_mod_sb_t *msbp;
int error = 0; int error = 0;
/* /*
...@@ -1910,7 +1910,7 @@ xfs_mod_incore_sb_batch( ...@@ -1910,7 +1910,7 @@ xfs_mod_incore_sb_batch(
* changes will be atomic. * changes will be atomic.
*/ */
spin_lock(&mp->m_sb_lock); spin_lock(&mp->m_sb_lock);
for (msbp = &msbp[0]; msbp < (msb + nmsb); msbp++) { for (msbp = msb; msbp < (msb + nmsb); msbp++) {
ASSERT(msbp->msb_field < XFS_SBS_ICOUNT || ASSERT(msbp->msb_field < XFS_SBS_ICOUNT ||
msbp->msb_field > XFS_SBS_FDBLOCKS); msbp->msb_field > XFS_SBS_FDBLOCKS);
......
...@@ -608,10 +608,8 @@ STATIC void ...@@ -608,10 +608,8 @@ STATIC void
xfs_trans_free( xfs_trans_free(
struct xfs_trans *tp) struct xfs_trans *tp)
{ {
struct xfs_busy_extent *busyp, *n; xfs_alloc_busy_sort(&tp->t_busy);
xfs_alloc_busy_clear(tp->t_mountp, &tp->t_busy);
list_for_each_entry_safe(busyp, n, &tp->t_busy, list)
xfs_alloc_busy_clear(tp->t_mountp, busyp);
atomic_dec(&tp->t_mountp->m_active_trans); atomic_dec(&tp->t_mountp->m_active_trans);
xfs_trans_free_dqinfo(tp); xfs_trans_free_dqinfo(tp);
......
...@@ -73,8 +73,6 @@ typedef __int32_t xfs_tid_t; /* transaction identifier */ ...@@ -73,8 +73,6 @@ typedef __int32_t xfs_tid_t; /* transaction identifier */
typedef __uint32_t xfs_dablk_t; /* dir/attr block number (in file) */ typedef __uint32_t xfs_dablk_t; /* dir/attr block number (in file) */
typedef __uint32_t xfs_dahash_t; /* dir/attr hash value */ typedef __uint32_t xfs_dahash_t; /* dir/attr hash value */
typedef __uint32_t xlog_tid_t; /* transaction ID type */
/* /*
* These types are 64 bits on disk but are either 32 or 64 bits in memory. * These types are 64 bits on disk but are either 32 or 64 bits in memory.
* Disk based types: * Disk based types:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment