Commit abbf9e8a authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Darrick J. Wong

xfs: rewrite getbmap using the xfs_iext_* helpers

Currently getbmap uses xfs_bmapi_read to query the extent map, and then
fixes up various bits that are eventually reported to userspace.

This patch instead rewrites it to use xfs_iext_lookup_extent and
xfs_iext_get_extent to iteratively process the extent map.  This not
only avoids the need to allocate a map for the returned xfs_bmbt_irec
structures but also greatly simplified the code.

There are two intentional behavior changes compared to the old code:

 - the current code reports unwritten extents that don't directly border
   a written one as unwritten even when not passing the BMV_IF_PREALLOC
   option, contrary to the documentation.  The new code requires the
   BMV_IF_PREALLOC flag to report the unwrittent extent bit.
 - The new code does never merges consecutive extents, unlike the old
   code that sometimes does it based on the boundaries of the
   xfs_bmapi_read calls.  Note that the extent merging behavior was
   entirely undocumented.
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarDarrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: default avatarDarrick J. Wong <darrick.wong@oracle.com>
parent 15f859ae
...@@ -405,125 +405,103 @@ xfs_bmap_count_blocks( ...@@ -405,125 +405,103 @@ xfs_bmap_count_blocks(
return 0; return 0;
} }
/* static int
* returns 1 for success, 0 if we failed to map the extent. xfs_getbmap_report_one(
*/ struct xfs_inode *ip,
STATIC int struct getbmapx *bmv,
xfs_getbmapx_fix_eof_hole( struct getbmapx *out,
xfs_inode_t *ip, /* xfs incore inode pointer */ int64_t bmv_end,
int whichfork, struct xfs_bmbt_irec *got)
struct getbmapx *out, /* output structure */
int prealloced, /* this is a file with
* preallocated data space */
int64_t end, /* last block requested */
xfs_fsblock_t startblock,
bool moretocome)
{ {
int64_t fixlen; struct getbmapx *p = out + bmv->bmv_entries;
xfs_mount_t *mp; /* file system mount point */ bool shared = false, trimmed = false;
xfs_ifork_t *ifp; /* inode fork pointer */ int error;
xfs_extnum_t lastx; /* last extent pointer */
xfs_fileoff_t fileblock;
if (startblock == HOLESTARTBLOCK) { error = xfs_reflink_trim_around_shared(ip, got, &shared, &trimmed);
mp = ip->i_mount; if (error)
out->bmv_block = -1; return error;
fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, XFS_ISIZE(ip)));
fixlen -= out->bmv_offset; if (isnullstartblock(got->br_startblock) ||
if (prealloced && out->bmv_offset + out->bmv_length == end) { got->br_startblock == DELAYSTARTBLOCK) {
/* Came to hole at EOF. Trim it. */ /*
if (fixlen <= 0) * Delalloc extents that start beyond EOF can occur due to
return 0; * speculative EOF allocation when the delalloc extent is larger
out->bmv_length = fixlen; * than the largest freespace extent at conversion time. These
} * extents cannot be converted by data writeback, so can exist
* here even if we are not supposed to be finding delalloc
* extents.
*/
if (got->br_startoff < XFS_B_TO_FSB(ip->i_mount, XFS_ISIZE(ip)))
ASSERT((bmv->bmv_iflags & BMV_IF_DELALLOC) != 0);
p->bmv_oflags |= BMV_OF_DELALLOC;
p->bmv_block = -2;
} else { } else {
if (startblock == DELAYSTARTBLOCK) p->bmv_block = xfs_fsb_to_db(ip, got->br_startblock);
out->bmv_block = -2;
else
out->bmv_block = xfs_fsb_to_db(ip, startblock);
fileblock = XFS_BB_TO_FSB(ip->i_mount, out->bmv_offset);
ifp = XFS_IFORK_PTR(ip, whichfork);
if (!moretocome &&
xfs_iext_bno_to_ext(ifp, fileblock, &lastx) &&
(lastx == xfs_iext_count(ifp) - 1))
out->bmv_oflags |= BMV_OF_LAST;
} }
return 1; if (got->br_state == XFS_EXT_UNWRITTEN &&
(bmv->bmv_iflags & BMV_IF_PREALLOC))
p->bmv_oflags |= BMV_OF_PREALLOC;
if (shared)
p->bmv_oflags |= BMV_OF_SHARED;
p->bmv_offset = XFS_FSB_TO_BB(ip->i_mount, got->br_startoff);
p->bmv_length = XFS_FSB_TO_BB(ip->i_mount, got->br_blockcount);
bmv->bmv_offset = p->bmv_offset + p->bmv_length;
bmv->bmv_length = max(0LL, bmv_end - bmv->bmv_offset);
bmv->bmv_entries++;
return 0;
} }
/* Adjust the reported bmap around shared/unshared extent transitions. */ static void
STATIC int xfs_getbmap_report_hole(
xfs_getbmap_adjust_shared(
struct xfs_inode *ip, struct xfs_inode *ip,
int whichfork, struct getbmapx *bmv,
struct xfs_bmbt_irec *map,
struct getbmapx *out, struct getbmapx *out,
struct xfs_bmbt_irec *next_map) int64_t bmv_end,
xfs_fileoff_t bno,
xfs_fileoff_t end)
{ {
struct xfs_mount *mp = ip->i_mount; struct getbmapx *p = out + bmv->bmv_entries;
xfs_agnumber_t agno;
xfs_agblock_t agbno;
xfs_agblock_t ebno;
xfs_extlen_t elen;
xfs_extlen_t nlen;
int error;
next_map->br_startblock = NULLFSBLOCK; if (bmv->bmv_iflags & BMV_IF_NO_HOLES)
next_map->br_startoff = NULLFILEOFF; return;
next_map->br_blockcount = 0;
/* Only written data blocks can be shared. */ p->bmv_block = -1;
if (!xfs_is_reflink_inode(ip) || p->bmv_offset = XFS_FSB_TO_BB(ip->i_mount, bno);
whichfork != XFS_DATA_FORK || p->bmv_length = XFS_FSB_TO_BB(ip->i_mount, end - bno);
!xfs_bmap_is_real_extent(map))
return 0;
agno = XFS_FSB_TO_AGNO(mp, map->br_startblock); bmv->bmv_offset = p->bmv_offset + p->bmv_length;
agbno = XFS_FSB_TO_AGBNO(mp, map->br_startblock); bmv->bmv_length = max(0LL, bmv_end - bmv->bmv_offset);
error = xfs_reflink_find_shared(mp, NULL, agno, agbno, bmv->bmv_entries++;
map->br_blockcount, &ebno, &elen, true); }
if (error)
return error;
if (ebno == NULLAGBLOCK) { static inline bool
/* No shared blocks at all. */ xfs_getbmap_full(
return 0; struct getbmapx *bmv)
} else if (agbno == ebno) { {
/* return bmv->bmv_length == 0 || bmv->bmv_entries >= bmv->bmv_count - 1;
* Shared extent at (agbno, elen). Shrink the reported }
* extent length and prepare to move the start of map[i]
* to agbno+elen, with the aim of (re)formatting the new
* map[i] the next time through the inner loop.
*/
out->bmv_length = XFS_FSB_TO_BB(mp, elen);
out->bmv_oflags |= BMV_OF_SHARED;
if (elen != map->br_blockcount) {
*next_map = *map;
next_map->br_startblock += elen;
next_map->br_startoff += elen;
next_map->br_blockcount -= elen;
}
map->br_blockcount -= elen;
} else {
/*
* There's an unshared extent (agbno, ebno - agbno)
* followed by shared extent at (ebno, elen). Shrink
* the reported extent length to cover only the unshared
* extent and prepare to move up the start of map[i] to
* ebno, with the aim of (re)formatting the new map[i]
* the next time through the inner loop.
*/
*next_map = *map;
nlen = ebno - agbno;
out->bmv_length = XFS_FSB_TO_BB(mp, nlen);
next_map->br_startblock += nlen;
next_map->br_startoff += nlen;
next_map->br_blockcount -= nlen;
map->br_blockcount -= nlen;
}
return 0; static bool
xfs_getbmap_next_rec(
struct xfs_bmbt_irec *rec,
xfs_fileoff_t total_end)
{
xfs_fileoff_t end = rec->br_startoff + rec->br_blockcount;
if (end == total_end)
return false;
rec->br_startoff += rec->br_blockcount;
if (!isnullstartblock(rec->br_startblock) &&
rec->br_startblock != DELAYSTARTBLOCK)
rec->br_startblock += rec->br_blockcount;
rec->br_blockcount = total_end - end;
return true;
} }
/* /*
...@@ -540,119 +518,72 @@ xfs_getbmap( ...@@ -540,119 +518,72 @@ xfs_getbmap(
xfs_bmap_format_t formatter, /* format to user */ xfs_bmap_format_t formatter, /* format to user */
void *arg) /* formatter arg */ void *arg) /* formatter arg */
{ {
int64_t bmvend; /* last block requested */ struct xfs_mount *mp = ip->i_mount;
int error = 0; /* return value */ int iflags = bmv->bmv_iflags;
int64_t fixlen; /* length for -1 case */ int whichfork, lock, i, error = 0;
int i; /* extent number */ int64_t bmv_end, max_len;
int lock; /* lock state */ xfs_fileoff_t bno, first_bno;
xfs_bmbt_irec_t *map; /* buffer for user's data */ struct xfs_ifork *ifp;
xfs_mount_t *mp; /* file system mount point */ struct getbmapx *out;
int nex; /* # of user extents can do */ struct xfs_bmbt_irec got, rec;
int subnex; /* # of bmapi's can do */ xfs_filblks_t len;
int nmap; /* number of map entries */ xfs_extnum_t idx;
struct getbmapx *out; /* output structure */
int whichfork; /* data or attr fork */
int prealloced; /* this is a file with
* preallocated data space */
int iflags; /* interface flags */
int bmapi_flags; /* flags for xfs_bmapi */
int cur_ext = 0;
struct xfs_bmbt_irec inject_map;
mp = ip->i_mount;
iflags = bmv->bmv_iflags;
#ifndef DEBUG #ifndef DEBUG
/* Only allow CoW fork queries if we're debugging. */ /* Only allow CoW fork queries if we're debugging. */
if (iflags & BMV_IF_COWFORK) if (iflags & BMV_IF_COWFORK)
return -EINVAL; return -EINVAL;
#endif #endif
if ((iflags & BMV_IF_ATTRFORK) && (iflags & BMV_IF_COWFORK)) if ((iflags & BMV_IF_ATTRFORK) && (iflags & BMV_IF_COWFORK))
return -EINVAL; return -EINVAL;
if (bmv->bmv_count <= 1)
return -EINVAL;
if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx))
return -ENOMEM;
if (bmv->bmv_length < -1)
return -EINVAL;
bmv->bmv_entries = 0;
if (bmv->bmv_length == 0)
return 0;
out = kmem_zalloc_large(bmv->bmv_count * sizeof(struct getbmapx), 0);
if (!out)
return -ENOMEM;
if (iflags & BMV_IF_ATTRFORK) if (iflags & BMV_IF_ATTRFORK)
whichfork = XFS_ATTR_FORK; whichfork = XFS_ATTR_FORK;
else if (iflags & BMV_IF_COWFORK) else if (iflags & BMV_IF_COWFORK)
whichfork = XFS_COW_FORK; whichfork = XFS_COW_FORK;
else else
whichfork = XFS_DATA_FORK; whichfork = XFS_DATA_FORK;
ifp = XFS_IFORK_PTR(ip, whichfork);
xfs_ilock(ip, XFS_IOLOCK_SHARED);
switch (whichfork) { switch (whichfork) {
case XFS_ATTR_FORK: case XFS_ATTR_FORK:
if (XFS_IFORK_Q(ip)) { if (!XFS_IFORK_Q(ip))
if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS && goto out_unlock_iolock;
ip->i_d.di_aformat != XFS_DINODE_FMT_BTREE &&
ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)
return -EINVAL;
} else if (unlikely(
ip->i_d.di_aformat != 0 &&
ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS)) {
XFS_ERROR_REPORT("xfs_getbmap", XFS_ERRLEVEL_LOW,
ip->i_mount);
return -EFSCORRUPTED;
}
prealloced = 0; max_len = 1LL << 32;
fixlen = 1LL << 32; lock = xfs_ilock_attr_map_shared(ip);
break; break;
case XFS_COW_FORK: case XFS_COW_FORK:
if (ip->i_cformat != XFS_DINODE_FMT_EXTENTS) /* No CoW fork? Just return */
return -EINVAL; if (!ifp)
goto out_unlock_iolock;
if (xfs_get_cowextsz_hint(ip)) { if (xfs_get_cowextsz_hint(ip))
prealloced = 1; max_len = mp->m_super->s_maxbytes;
fixlen = mp->m_super->s_maxbytes; else
} else { max_len = XFS_ISIZE(ip);
prealloced = 0;
fixlen = XFS_ISIZE(ip);
}
break;
default:
/* Local format data forks report no extents. */
if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
bmv->bmv_entries = 0;
return 0;
}
if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS &&
ip->i_d.di_format != XFS_DINODE_FMT_BTREE)
return -EINVAL;
if (xfs_get_extsz_hint(ip) || lock = XFS_ILOCK_SHARED;
ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){ xfs_ilock(ip, lock);
prealloced = 1;
fixlen = mp->m_super->s_maxbytes;
} else {
prealloced = 0;
fixlen = XFS_ISIZE(ip);
}
break; break;
}
if (bmv->bmv_length == -1) {
fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, fixlen));
bmv->bmv_length =
max_t(int64_t, fixlen - bmv->bmv_offset, 0);
} else if (bmv->bmv_length == 0) {
bmv->bmv_entries = 0;
return 0;
} else if (bmv->bmv_length < 0) {
return -EINVAL;
}
nex = bmv->bmv_count - 1;
if (nex <= 0)
return -EINVAL;
bmvend = bmv->bmv_offset + bmv->bmv_length;
if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx))
return -ENOMEM;
out = kmem_zalloc_large(bmv->bmv_count * sizeof(struct getbmapx), 0);
if (!out)
return -ENOMEM;
xfs_ilock(ip, XFS_IOLOCK_SHARED);
switch (whichfork) {
case XFS_DATA_FORK: case XFS_DATA_FORK:
if (!(iflags & BMV_IF_DELALLOC) && if (!(iflags & BMV_IF_DELALLOC) &&
(ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size)) { (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size)) {
...@@ -670,147 +601,107 @@ xfs_getbmap( ...@@ -670,147 +601,107 @@ xfs_getbmap(
*/ */
} }
if (xfs_get_extsz_hint(ip) ||
(ip->i_d.di_flags &
(XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))
max_len = mp->m_super->s_maxbytes;
else
max_len = XFS_ISIZE(ip);
lock = xfs_ilock_data_map_shared(ip); lock = xfs_ilock_data_map_shared(ip);
break; break;
case XFS_COW_FORK:
lock = XFS_ILOCK_SHARED;
xfs_ilock(ip, lock);
break;
case XFS_ATTR_FORK:
lock = xfs_ilock_attr_map_shared(ip);
break;
} }
/* switch (XFS_IFORK_FORMAT(ip, whichfork)) {
* Don't let nex be bigger than the number of extents case XFS_DINODE_FMT_EXTENTS:
* we can have assuming alternating holes and real extents. case XFS_DINODE_FMT_BTREE:
*/ break;
if (nex > XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1) case XFS_DINODE_FMT_LOCAL:
nex = XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1; /* Local format inode forks report no extents. */
bmapi_flags = xfs_bmapi_aflag(whichfork);
if (!(iflags & BMV_IF_PREALLOC))
bmapi_flags |= XFS_BMAPI_IGSTATE;
/*
* Allocate enough space to handle "subnex" maps at a time.
*/
error = -ENOMEM;
subnex = 16;
map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL | KM_NOFS);
if (!map)
goto out_unlock_ilock; goto out_unlock_ilock;
default:
error = -EINVAL;
goto out_unlock_ilock;
}
bmv->bmv_entries = 0; if (bmv->bmv_length == -1) {
max_len = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, max_len));
if (XFS_IFORK_NEXTENTS(ip, whichfork) == 0 && bmv->bmv_length = max(0LL, max_len - bmv->bmv_offset);
(whichfork == XFS_ATTR_FORK || !(iflags & BMV_IF_DELALLOC))) {
error = 0;
goto out_free_map;
} }
do { bmv_end = bmv->bmv_offset + bmv->bmv_length;
nmap = (nex> subnex) ? subnex : nex;
error = xfs_bmapi_read(ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset), first_bno = bno = XFS_BB_TO_FSBT(mp, bmv->bmv_offset);
XFS_BB_TO_FSB(mp, bmv->bmv_length), len = XFS_BB_TO_FSB(mp, bmv->bmv_length);
map, &nmap, bmapi_flags);
if (!(ifp->if_flags & XFS_IFEXTENTS)) {
error = xfs_iread_extents(NULL, ip, whichfork);
if (error) if (error)
goto out_free_map; goto out_unlock_ilock;
ASSERT(nmap <= subnex); }
for (i = 0; i < nmap && bmv->bmv_length &&
cur_ext < bmv->bmv_count - 1; i++) {
out[cur_ext].bmv_oflags = 0;
if (map[i].br_state == XFS_EXT_UNWRITTEN)
out[cur_ext].bmv_oflags |= BMV_OF_PREALLOC;
else if (map[i].br_startblock == DELAYSTARTBLOCK)
out[cur_ext].bmv_oflags |= BMV_OF_DELALLOC;
out[cur_ext].bmv_offset =
XFS_FSB_TO_BB(mp, map[i].br_startoff);
out[cur_ext].bmv_length =
XFS_FSB_TO_BB(mp, map[i].br_blockcount);
out[cur_ext].bmv_unused1 = 0;
out[cur_ext].bmv_unused2 = 0;
if (!xfs_iext_lookup_extent(ip, ifp, bno, &idx, &got)) {
/* /*
* delayed allocation extents that start beyond EOF can * Report a whole-file hole if the delalloc flag is set to
* occur due to speculative EOF allocation when the * stay compatible with the old implementation.
* delalloc extent is larger than the largest freespace
* extent at conversion time. These extents cannot be
* converted by data writeback, so can exist here even
* if we are not supposed to be finding delalloc
* extents.
*/ */
if (map[i].br_startblock == DELAYSTARTBLOCK && if (iflags & BMV_IF_DELALLOC)
map[i].br_startoff < XFS_B_TO_FSB(mp, XFS_ISIZE(ip))) xfs_getbmap_report_hole(ip, bmv, out, bmv_end, bno,
ASSERT((iflags & BMV_IF_DELALLOC) != 0); XFS_B_TO_FSB(mp, XFS_ISIZE(ip)));
goto out_unlock_ilock;
if (map[i].br_startblock == HOLESTARTBLOCK &&
whichfork == XFS_ATTR_FORK) {
/* came to the end of attribute fork */
out[cur_ext].bmv_oflags |= BMV_OF_LAST;
goto out_free_map;
} }
/* Is this a shared block? */ while (!xfs_getbmap_full(bmv)) {
error = xfs_getbmap_adjust_shared(ip, whichfork, xfs_trim_extent(&got, first_bno, len);
&map[i], &out[cur_ext], &inject_map);
if (error)
goto out_free_map;
if (!xfs_getbmapx_fix_eof_hole(ip, whichfork,
&out[cur_ext], prealloced, bmvend,
map[i].br_startblock,
inject_map.br_startblock != NULLFSBLOCK))
goto out_free_map;
bmv->bmv_offset =
out[cur_ext].bmv_offset +
out[cur_ext].bmv_length;
bmv->bmv_length =
max_t(int64_t, 0, bmvend - bmv->bmv_offset);
/* /*
* In case we don't want to return the hole, * Report an entry for a hole if this extent doesn't directly
* don't increase cur_ext so that we can reuse * follow the previous one.
* it in the next loop.
*/ */
if ((iflags & BMV_IF_NO_HOLES) && if (got.br_startoff > bno) {
map[i].br_startblock == HOLESTARTBLOCK) { xfs_getbmap_report_hole(ip, bmv, out, bmv_end, bno,
memset(&out[cur_ext], 0, sizeof(out[cur_ext])); got.br_startoff);
continue; if (xfs_getbmap_full(bmv))
break;
} }
/* /*
* In order to report shared extents accurately, * In order to report shared extents accurately, we report each
* we report each distinct shared/unshared part * distinct shared / unshared part of a single bmbt record with
* of a single bmbt record using multiple bmap * an individual getbmapx record.
* extents. To make that happen, we iterate the
* same map array item multiple times, each
* time trimming out the subextent that we just
* reported.
*
* Because of this, we must check the out array
* index (cur_ext) directly against bmv_count-1
* to avoid overflows.
*/ */
if (inject_map.br_startblock != NULLFSBLOCK) { bno = got.br_startoff + got.br_blockcount;
map[i] = inject_map; rec = got;
i--; do {
error = xfs_getbmap_report_one(ip, bmv, out, bmv_end,
&rec);
if (error || xfs_getbmap_full(bmv))
goto out_unlock_ilock;
} while (xfs_getbmap_next_rec(&rec, bno));
if (!xfs_iext_get_extent(ifp, ++idx, &got)) {
xfs_fileoff_t end = XFS_B_TO_FSB(mp, XFS_ISIZE(ip));
out[bmv->bmv_entries - 1].bmv_oflags |= BMV_OF_LAST;
if (whichfork != XFS_ATTR_FORK && bno < end &&
!xfs_getbmap_full(bmv)) {
xfs_getbmap_report_hole(ip, bmv, out, bmv_end,
bno, end);
} }
bmv->bmv_entries++; break;
cur_ext++; }
if (bno >= first_bno + len)
break;
} }
} while (nmap && bmv->bmv_length && cur_ext < bmv->bmv_count - 1);
out_free_map: out_unlock_ilock:
kmem_free(map);
out_unlock_ilock:
xfs_iunlock(ip, lock); xfs_iunlock(ip, lock);
out_unlock_iolock: out_unlock_iolock:
xfs_iunlock(ip, XFS_IOLOCK_SHARED); xfs_iunlock(ip, XFS_IOLOCK_SHARED);
for (i = 0; i < cur_ext; i++) { for (i = 0; i < bmv->bmv_entries; i++) {
/* format results & advance arg */ /* format results & advance arg */
error = formatter(&arg, &out[i]); error = formatter(&arg, &out[i]);
if (error) if (error)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment