Commit 788c1da0 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'xfs-4.15-fixes-4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pull xfs fixes from Darrick Wong:
 "Here are some bug fixes for 4.15-rc2.

   - fix memory leaks that appeared after removing ifork inline data
     buffer

   - recover deferred rmap update log items in correct order

   - fix memory leaks when buffer construction fails

   - fix memory leaks when bmbt is corrupt

   - fix some uninitialized variables and math problems in the quota
     scrubber

   - add some omitted attribution tags on the log replay commit

   - fix some UBSAN complaints about integer overflows with large sparse
     files

   - implement an effective inode mode check in online fsck

   - fix log's inability to retry quota item writeout due to transient
     errors"

* tag 'xfs-4.15-fixes-4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
  xfs: Properly retry failed dquot items in case of error during buffer writeback
  xfs: scrub inode mode properly
  xfs: remove unused parameter from xfs_writepage_map
  xfs: ubsan fixes
  xfs: calculate correct offset in xfs_scrub_quota_item
  xfs: fix uninitialized variable in xfs_scrub_quota
  xfs: fix leaks on corruption errors in xfs_bmap.c
  xfs: fortify xfs_alloc_buftarg error handling
  xfs: log recovery should replay deferred ops in order
  xfs: always free inline data before resetting inode fork during ifree
parents e1ba1c99 373b0589
......@@ -5662,7 +5662,8 @@ xfs_bmap_collapse_extents(
*done = true;
goto del_cursor;
}
XFS_WANT_CORRUPTED_RETURN(mp, !isnullstartblock(got.br_startblock));
XFS_WANT_CORRUPTED_GOTO(mp, !isnullstartblock(got.br_startblock),
del_cursor);
new_startoff = got.br_startoff - offset_shift_fsb;
if (xfs_iext_peek_prev_extent(ifp, &icur, &prev)) {
......@@ -5767,7 +5768,8 @@ xfs_bmap_insert_extents(
goto del_cursor;
}
}
XFS_WANT_CORRUPTED_RETURN(mp, !isnullstartblock(got.br_startblock));
XFS_WANT_CORRUPTED_GOTO(mp, !isnullstartblock(got.br_startblock),
del_cursor);
if (stop_fsb >= got.br_startoff + got.br_blockcount) {
error = -EIO;
......
......@@ -318,8 +318,20 @@ xfs_scrub_dinode(
/* di_mode */
mode = be16_to_cpu(dip->di_mode);
if (mode & ~(S_IALLUGO | S_IFMT))
switch (mode & S_IFMT) {
case S_IFLNK:
case S_IFREG:
case S_IFDIR:
case S_IFCHR:
case S_IFBLK:
case S_IFIFO:
case S_IFSOCK:
/* mode is recognized */
break;
default:
xfs_scrub_ino_set_corrupt(sc, ino, bp);
break;
}
/* v1/v2 fields */
switch (dip->di_version) {
......
......@@ -107,7 +107,7 @@ xfs_scrub_quota_item(
unsigned long long rcount;
xfs_ino_t fs_icount;
offset = id * qi->qi_dqperchunk;
offset = id / qi->qi_dqperchunk;
/*
* We fed $id and DQNEXT into the xfs_qm_dqget call, which means
......@@ -207,7 +207,7 @@ xfs_scrub_quota(
xfs_dqid_t id = 0;
uint dqtype;
int nimaps;
int error;
int error = 0;
if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
return -ENOENT;
......
......@@ -399,7 +399,7 @@ xfs_map_blocks(
(ip->i_df.if_flags & XFS_IFEXTENTS));
ASSERT(offset <= mp->m_super->s_maxbytes);
if (offset + count > mp->m_super->s_maxbytes)
if ((xfs_ufsize_t)offset + count > mp->m_super->s_maxbytes)
count = mp->m_super->s_maxbytes - offset;
end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
offset_fsb = XFS_B_TO_FSBT(mp, offset);
......@@ -896,13 +896,13 @@ xfs_writepage_map(
struct writeback_control *wbc,
struct inode *inode,
struct page *page,
loff_t offset,
uint64_t end_offset)
{
LIST_HEAD(submit_list);
struct xfs_ioend *ioend, *next;
struct buffer_head *bh, *head;
ssize_t len = i_blocksize(inode);
uint64_t offset;
int error = 0;
int count = 0;
int uptodate = 1;
......@@ -1146,7 +1146,7 @@ xfs_do_writepage(
end_offset = offset;
}
return xfs_writepage_map(wpc, wbc, inode, page, offset, end_offset);
return xfs_writepage_map(wpc, wbc, inode, page, end_offset);
redirty:
redirty_page_for_writepage(wbc, page);
......@@ -1265,7 +1265,7 @@ xfs_map_trim_size(
if (mapping_size > size)
mapping_size = size;
if (offset < i_size_read(inode) &&
offset + mapping_size >= i_size_read(inode)) {
(xfs_ufsize_t)offset + mapping_size >= i_size_read(inode)) {
/* limit mapping to block that spans EOF */
mapping_size = roundup_64(i_size_read(inode) - offset,
i_blocksize(inode));
......@@ -1312,7 +1312,7 @@ xfs_get_blocks(
lockmode = xfs_ilock_data_map_shared(ip);
ASSERT(offset <= mp->m_super->s_maxbytes);
if (offset + size > mp->m_super->s_maxbytes)
if ((xfs_ufsize_t)offset + size > mp->m_super->s_maxbytes)
size = mp->m_super->s_maxbytes - offset;
end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size);
offset_fsb = XFS_B_TO_FSBT(mp, offset);
......
......@@ -389,7 +389,8 @@ xfs_bud_init(
int
xfs_bui_recover(
struct xfs_mount *mp,
struct xfs_bui_log_item *buip)
struct xfs_bui_log_item *buip,
struct xfs_defer_ops *dfops)
{
int error = 0;
unsigned int bui_type;
......@@ -404,9 +405,7 @@ xfs_bui_recover(
xfs_exntst_t state;
struct xfs_trans *tp;
struct xfs_inode *ip = NULL;
struct xfs_defer_ops dfops;
struct xfs_bmbt_irec irec;
xfs_fsblock_t firstfsb;
ASSERT(!test_bit(XFS_BUI_RECOVERED, &buip->bui_flags));
......@@ -464,7 +463,6 @@ xfs_bui_recover(
if (VFS_I(ip)->i_nlink == 0)
xfs_iflags_set(ip, XFS_IRECOVERY);
xfs_defer_init(&dfops, &firstfsb);
/* Process deferred bmap item. */
state = (bmap->me_flags & XFS_BMAP_EXTENT_UNWRITTEN) ?
......@@ -479,16 +477,16 @@ xfs_bui_recover(
break;
default:
error = -EFSCORRUPTED;
goto err_dfops;
goto err_inode;
}
xfs_trans_ijoin(tp, ip, 0);
count = bmap->me_len;
error = xfs_trans_log_finish_bmap_update(tp, budp, &dfops, type,
error = xfs_trans_log_finish_bmap_update(tp, budp, dfops, type,
ip, whichfork, bmap->me_startoff,
bmap->me_startblock, &count, state);
if (error)
goto err_dfops;
goto err_inode;
if (count > 0) {
ASSERT(type == XFS_BMAP_UNMAP);
......@@ -496,16 +494,11 @@ xfs_bui_recover(
irec.br_blockcount = count;
irec.br_startoff = bmap->me_startoff;
irec.br_state = state;
error = xfs_bmap_unmap_extent(tp->t_mountp, &dfops, ip, &irec);
error = xfs_bmap_unmap_extent(tp->t_mountp, dfops, ip, &irec);
if (error)
goto err_dfops;
goto err_inode;
}
/* Finish transaction, free inodes. */
error = xfs_defer_finish(&tp, &dfops);
if (error)
goto err_dfops;
set_bit(XFS_BUI_RECOVERED, &buip->bui_flags);
error = xfs_trans_commit(tp);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
......@@ -513,8 +506,6 @@ xfs_bui_recover(
return error;
err_dfops:
xfs_defer_cancel(&dfops);
err_inode:
xfs_trans_cancel(tp);
if (ip) {
......
......@@ -93,6 +93,7 @@ struct xfs_bud_log_item *xfs_bud_init(struct xfs_mount *,
struct xfs_bui_log_item *);
void xfs_bui_item_free(struct xfs_bui_log_item *);
void xfs_bui_release(struct xfs_bui_log_item *);
int xfs_bui_recover(struct xfs_mount *mp, struct xfs_bui_log_item *buip);
int xfs_bui_recover(struct xfs_mount *mp, struct xfs_bui_log_item *buip,
struct xfs_defer_ops *dfops);
#endif /* __XFS_BMAP_ITEM_H__ */
......@@ -1815,22 +1815,27 @@ xfs_alloc_buftarg(
btp->bt_daxdev = dax_dev;
if (xfs_setsize_buftarg_early(btp, bdev))
goto error;
goto error_free;
if (list_lru_init(&btp->bt_lru))
goto error;
goto error_free;
if (percpu_counter_init(&btp->bt_io_count, 0, GFP_KERNEL))
goto error;
goto error_lru;
btp->bt_shrinker.count_objects = xfs_buftarg_shrink_count;
btp->bt_shrinker.scan_objects = xfs_buftarg_shrink_scan;
btp->bt_shrinker.seeks = DEFAULT_SEEKS;
btp->bt_shrinker.flags = SHRINKER_NUMA_AWARE;
register_shrinker(&btp->bt_shrinker);
if (register_shrinker(&btp->bt_shrinker))
goto error_pcpu;
return btp;
error:
error_pcpu:
percpu_counter_destroy(&btp->bt_io_count);
error_lru:
list_lru_destroy(&btp->bt_lru);
error_free:
kmem_free(btp);
return NULL;
}
......
......@@ -970,15 +970,23 @@ xfs_qm_dqflush_done(
* holding the lock before removing the dquot from the AIL.
*/
if ((lip->li_flags & XFS_LI_IN_AIL) &&
lip->li_lsn == qip->qli_flush_lsn) {
((lip->li_lsn == qip->qli_flush_lsn) ||
(lip->li_flags & XFS_LI_FAILED))) {
/* xfs_trans_ail_delete() drops the AIL lock. */
spin_lock(&ailp->xa_lock);
if (lip->li_lsn == qip->qli_flush_lsn)
if (lip->li_lsn == qip->qli_flush_lsn) {
xfs_trans_ail_delete(ailp, lip, SHUTDOWN_CORRUPT_INCORE);
else
} else {
/*
* Clear the failed state since we are about to drop the
* flush lock
*/
if (lip->li_flags & XFS_LI_FAILED)
xfs_clear_li_failed(lip);
spin_unlock(&ailp->xa_lock);
}
}
/*
* Release the dq's flush lock since we're done with it.
......
......@@ -137,6 +137,26 @@ xfs_qm_dqunpin_wait(
wait_event(dqp->q_pinwait, (atomic_read(&dqp->q_pincount) == 0));
}
/*
* Callback used to mark a buffer with XFS_LI_FAILED when items in the buffer
* have been failed during writeback
*
* this informs the AIL that the dquot is already flush locked on the next push,
* and acquires a hold on the buffer to ensure that it isn't reclaimed before
* dirty data makes it to disk.
*/
STATIC void
xfs_dquot_item_error(
struct xfs_log_item *lip,
struct xfs_buf *bp)
{
struct xfs_dquot *dqp;
dqp = DQUOT_ITEM(lip)->qli_dquot;
ASSERT(!completion_done(&dqp->q_flush));
xfs_set_li_failed(lip, bp);
}
STATIC uint
xfs_qm_dquot_logitem_push(
struct xfs_log_item *lip,
......@@ -144,13 +164,28 @@ xfs_qm_dquot_logitem_push(
__acquires(&lip->li_ailp->xa_lock)
{
struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot;
struct xfs_buf *bp = NULL;
struct xfs_buf *bp = lip->li_buf;
uint rval = XFS_ITEM_SUCCESS;
int error;
if (atomic_read(&dqp->q_pincount) > 0)
return XFS_ITEM_PINNED;
/*
* The buffer containing this item failed to be written back
* previously. Resubmit the buffer for IO
*/
if (lip->li_flags & XFS_LI_FAILED) {
if (!xfs_buf_trylock(bp))
return XFS_ITEM_LOCKED;
if (!xfs_buf_resubmit_failed_buffers(bp, lip, buffer_list))
rval = XFS_ITEM_FLUSHING;
xfs_buf_unlock(bp);
return rval;
}
if (!xfs_dqlock_nowait(dqp))
return XFS_ITEM_LOCKED;
......@@ -242,7 +277,8 @@ static const struct xfs_item_ops xfs_dquot_item_ops = {
.iop_unlock = xfs_qm_dquot_logitem_unlock,
.iop_committed = xfs_qm_dquot_logitem_committed,
.iop_push = xfs_qm_dquot_logitem_push,
.iop_committing = xfs_qm_dquot_logitem_committing
.iop_committing = xfs_qm_dquot_logitem_committing,
.iop_error = xfs_dquot_item_error
};
/*
......
......@@ -2400,6 +2400,24 @@ xfs_ifree_cluster(
return 0;
}
/*
* Free any local-format buffers sitting around before we reset to
* extents format.
*/
static inline void
xfs_ifree_local_data(
struct xfs_inode *ip,
int whichfork)
{
struct xfs_ifork *ifp;
if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL)
return;
ifp = XFS_IFORK_PTR(ip, whichfork);
xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
}
/*
* This is called to return an inode to the inode free list.
* The inode should already be truncated to 0 length and have
......@@ -2437,6 +2455,9 @@ xfs_ifree(
if (error)
return error;
xfs_ifree_local_data(ip, XFS_DATA_FORK);
xfs_ifree_local_data(ip, XFS_ATTR_FORK);
VFS_I(ip)->i_mode = 0; /* mark incore inode as free */
ip->i_d.di_flags = 0;
ip->i_d.di_dmevmask = 0;
......
......@@ -24,6 +24,7 @@
#include "xfs_bit.h"
#include "xfs_sb.h"
#include "xfs_mount.h"
#include "xfs_defer.h"
#include "xfs_da_format.h"
#include "xfs_da_btree.h"
#include "xfs_inode.h"
......@@ -4716,7 +4717,8 @@ STATIC int
xlog_recover_process_cui(
struct xfs_mount *mp,
struct xfs_ail *ailp,
struct xfs_log_item *lip)
struct xfs_log_item *lip,
struct xfs_defer_ops *dfops)
{
struct xfs_cui_log_item *cuip;
int error;
......@@ -4729,7 +4731,7 @@ xlog_recover_process_cui(
return 0;
spin_unlock(&ailp->xa_lock);
error = xfs_cui_recover(mp, cuip);
error = xfs_cui_recover(mp, cuip, dfops);
spin_lock(&ailp->xa_lock);
return error;
......@@ -4756,7 +4758,8 @@ STATIC int
xlog_recover_process_bui(
struct xfs_mount *mp,
struct xfs_ail *ailp,
struct xfs_log_item *lip)
struct xfs_log_item *lip,
struct xfs_defer_ops *dfops)
{
struct xfs_bui_log_item *buip;
int error;
......@@ -4769,7 +4772,7 @@ xlog_recover_process_bui(
return 0;
spin_unlock(&ailp->xa_lock);
error = xfs_bui_recover(mp, buip);
error = xfs_bui_recover(mp, buip, dfops);
spin_lock(&ailp->xa_lock);
return error;
......@@ -4805,6 +4808,46 @@ static inline bool xlog_item_is_intent(struct xfs_log_item *lip)
}
}
/* Take all the collected deferred ops and finish them in order. */
static int
xlog_finish_defer_ops(
struct xfs_mount *mp,
struct xfs_defer_ops *dfops)
{
struct xfs_trans *tp;
int64_t freeblks;
uint resblks;
int error;
/*
* We're finishing the defer_ops that accumulated as a result of
* recovering unfinished intent items during log recovery. We
* reserve an itruncate transaction because it is the largest
* permanent transaction type. Since we're the only user of the fs
* right now, take 93% (15/16) of the available free blocks. Use
* weird math to avoid a 64-bit division.
*/
freeblks = percpu_counter_sum(&mp->m_fdblocks);
if (freeblks <= 0)
return -ENOSPC;
resblks = min_t(int64_t, UINT_MAX, freeblks);
resblks = (resblks * 15) >> 4;
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, resblks,
0, XFS_TRANS_RESERVE, &tp);
if (error)
return error;
error = xfs_defer_finish(&tp, dfops);
if (error)
goto out_cancel;
return xfs_trans_commit(tp);
out_cancel:
xfs_trans_cancel(tp);
return error;
}
/*
* When this is called, all of the log intent items which did not have
* corresponding log done items should be in the AIL. What we do now
......@@ -4825,10 +4868,12 @@ STATIC int
xlog_recover_process_intents(
struct xlog *log)
{
struct xfs_log_item *lip;
int error = 0;
struct xfs_defer_ops dfops;
struct xfs_ail_cursor cur;
struct xfs_log_item *lip;
struct xfs_ail *ailp;
xfs_fsblock_t firstfsb;
int error = 0;
#if defined(DEBUG) || defined(XFS_WARN)
xfs_lsn_t last_lsn;
#endif
......@@ -4839,6 +4884,7 @@ xlog_recover_process_intents(
#if defined(DEBUG) || defined(XFS_WARN)
last_lsn = xlog_assign_lsn(log->l_curr_cycle, log->l_curr_block);
#endif
xfs_defer_init(&dfops, &firstfsb);
while (lip != NULL) {
/*
* We're done when we see something other than an intent.
......@@ -4859,6 +4905,12 @@ xlog_recover_process_intents(
*/
ASSERT(XFS_LSN_CMP(last_lsn, lip->li_lsn) >= 0);
/*
* NOTE: If your intent processing routine can create more
* deferred ops, you /must/ attach them to the dfops in this
* routine or else those subsequent intents will get
* replayed in the wrong order!
*/
switch (lip->li_type) {
case XFS_LI_EFI:
error = xlog_recover_process_efi(log->l_mp, ailp, lip);
......@@ -4867,10 +4919,12 @@ xlog_recover_process_intents(
error = xlog_recover_process_rui(log->l_mp, ailp, lip);
break;
case XFS_LI_CUI:
error = xlog_recover_process_cui(log->l_mp, ailp, lip);
error = xlog_recover_process_cui(log->l_mp, ailp, lip,
&dfops);
break;
case XFS_LI_BUI:
error = xlog_recover_process_bui(log->l_mp, ailp, lip);
error = xlog_recover_process_bui(log->l_mp, ailp, lip,
&dfops);
break;
}
if (error)
......@@ -4880,6 +4934,11 @@ xlog_recover_process_intents(
out:
xfs_trans_ail_cursor_done(&cur);
spin_unlock(&ailp->xa_lock);
if (error)
xfs_defer_cancel(&dfops);
else
error = xlog_finish_defer_ops(log->l_mp, &dfops);
return error;
}
......
......@@ -393,7 +393,8 @@ xfs_cud_init(
int
xfs_cui_recover(
struct xfs_mount *mp,
struct xfs_cui_log_item *cuip)
struct xfs_cui_log_item *cuip,
struct xfs_defer_ops *dfops)
{
int i;
int error = 0;
......@@ -405,11 +406,9 @@ xfs_cui_recover(
struct xfs_trans *tp;
struct xfs_btree_cur *rcur = NULL;
enum xfs_refcount_intent_type type;
xfs_fsblock_t firstfsb;
xfs_fsblock_t new_fsb;
xfs_extlen_t new_len;
struct xfs_bmbt_irec irec;
struct xfs_defer_ops dfops;
bool requeue_only = false;
ASSERT(!test_bit(XFS_CUI_RECOVERED, &cuip->cui_flags));
......@@ -465,7 +464,6 @@ xfs_cui_recover(
return error;
cudp = xfs_trans_get_cud(tp, cuip);
xfs_defer_init(&dfops, &firstfsb);
for (i = 0; i < cuip->cui_format.cui_nextents; i++) {
refc = &cuip->cui_format.cui_extents[i];
refc_type = refc->pe_flags & XFS_REFCOUNT_EXTENT_TYPE_MASK;
......@@ -485,7 +483,7 @@ xfs_cui_recover(
new_len = refc->pe_len;
} else
error = xfs_trans_log_finish_refcount_update(tp, cudp,
&dfops, type, refc->pe_startblock, refc->pe_len,
dfops, type, refc->pe_startblock, refc->pe_len,
&new_fsb, &new_len, &rcur);
if (error)
goto abort_error;
......@@ -497,21 +495,21 @@ xfs_cui_recover(
switch (type) {
case XFS_REFCOUNT_INCREASE:
error = xfs_refcount_increase_extent(
tp->t_mountp, &dfops, &irec);
tp->t_mountp, dfops, &irec);
break;
case XFS_REFCOUNT_DECREASE:
error = xfs_refcount_decrease_extent(
tp->t_mountp, &dfops, &irec);
tp->t_mountp, dfops, &irec);
break;
case XFS_REFCOUNT_ALLOC_COW:
error = xfs_refcount_alloc_cow_extent(
tp->t_mountp, &dfops,
tp->t_mountp, dfops,
irec.br_startblock,
irec.br_blockcount);
break;
case XFS_REFCOUNT_FREE_COW:
error = xfs_refcount_free_cow_extent(
tp->t_mountp, &dfops,
tp->t_mountp, dfops,
irec.br_startblock,
irec.br_blockcount);
break;
......@@ -525,17 +523,12 @@ xfs_cui_recover(
}
xfs_refcount_finish_one_cleanup(tp, rcur, error);
error = xfs_defer_finish(&tp, &dfops);
if (error)
goto abort_defer;
set_bit(XFS_CUI_RECOVERED, &cuip->cui_flags);
error = xfs_trans_commit(tp);
return error;
abort_error:
xfs_refcount_finish_one_cleanup(tp, rcur, error);
abort_defer:
xfs_defer_cancel(&dfops);
xfs_trans_cancel(tp);
return error;
}
......@@ -96,6 +96,7 @@ struct xfs_cud_log_item *xfs_cud_init(struct xfs_mount *,
struct xfs_cui_log_item *);
void xfs_cui_item_free(struct xfs_cui_log_item *);
void xfs_cui_release(struct xfs_cui_log_item *);
int xfs_cui_recover(struct xfs_mount *mp, struct xfs_cui_log_item *cuip);
int xfs_cui_recover(struct xfs_mount *mp, struct xfs_cui_log_item *cuip,
struct xfs_defer_ops *dfops);
#endif /* __XFS_REFCOUNT_ITEM_H__ */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment