Commit 60c7b4df authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus-v3.7-rc1' of git://oss.sgi.com/xfs/xfs

Pull xfs update from Ben Myers:
 "Several enhancements and cleanups:

   - make inode32 and inode64 remountable options
   - SEEK_HOLE/SEEK_DATA enhancements
   - cleanup struct declarations in xfs_mount.h"

* tag 'for-linus-v3.7-rc1' of git://oss.sgi.com/xfs/xfs:
  xfs: Make inode32 a remountable option
  xfs: add inode64->inode32 transition into xfs_set_inode32()
  xfs: Fix mp->m_maxagi update during inode64 remount
  xfs: reduce code duplication handling inode32/64 options
  xfs: make inode64 as the default allocation mode
  xfs: Fix m_agirotor reset during AG selection
  Make inode64 a remountable option
  xfs: stop the sync worker before xfs_unmountfs
  xfs: xfs_seek_hole() refinement with hole searching from page cache for unwritten extents
  xfs: xfs_seek_data() refinement with unwritten extents check up from page cache
  xfs: Introduce a helper routine to probe data or hole offset from page cache
  xfs: Remove type argument from xfs_seek_data()/xfs_seek_hole()
  xfs: fix race while discarding buffers [V4]
  xfs: check for possible overflow in xfs_ioc_trim
  xfs: unlock the AGI buffer when looping in xfs_dialloc
  xfs: kill struct declarations in xfs_mount.h
  xfs: fix uninitialised variable in xfs_rtbuf_get()
parents aab174f0 2ea03929
......@@ -36,6 +36,7 @@
#include <linux/dcache.h>
#include <linux/falloc.h>
#include <linux/pagevec.h>
static const struct vm_operations_struct xfs_file_vm_ops;
......@@ -959,17 +960,232 @@ xfs_vm_page_mkwrite(
return block_page_mkwrite(vma, vmf, xfs_get_blocks);
}
/*
* This type is designed to indicate the type of offset we would like
* to search from page cache for either xfs_seek_data() or xfs_seek_hole().
*/
enum {
HOLE_OFF = 0,
DATA_OFF,
};
/*
* Lookup the desired type of offset from the given page.
*
* On success, return true and the offset argument will point to the
* start of the region that was found. Otherwise this function will
* return false and keep the offset argument unchanged.
*/
STATIC bool
xfs_lookup_buffer_offset(
struct page *page,
loff_t *offset,
unsigned int type)
{
loff_t lastoff = page_offset(page);
bool found = false;
struct buffer_head *bh, *head;
bh = head = page_buffers(page);
do {
/*
* Unwritten extents that have data in the page
* cache covering them can be identified by the
* BH_Unwritten state flag. Pages with multiple
* buffers might have a mix of holes, data and
* unwritten extents - any buffer with valid
* data in it should have BH_Uptodate flag set
* on it.
*/
if (buffer_unwritten(bh) ||
buffer_uptodate(bh)) {
if (type == DATA_OFF)
found = true;
} else {
if (type == HOLE_OFF)
found = true;
}
if (found) {
*offset = lastoff;
break;
}
lastoff += bh->b_size;
} while ((bh = bh->b_this_page) != head);
return found;
}
/*
* This routine is called to find out and return a data or hole offset
* from the page cache for unwritten extents according to the desired
* type for xfs_seek_data() or xfs_seek_hole().
*
* The argument offset is used to tell where we start to search from the
* page cache. Map is used to figure out the end points of the range to
* lookup pages.
*
* Return true if the desired type of offset was found, and the argument
* offset is filled with that address. Otherwise, return false and keep
* offset unchanged.
*/
STATIC bool
xfs_find_get_desired_pgoff(
struct inode *inode,
struct xfs_bmbt_irec *map,
unsigned int type,
loff_t *offset)
{
struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount;
struct pagevec pvec;
pgoff_t index;
pgoff_t end;
loff_t endoff;
loff_t startoff = *offset;
loff_t lastoff = startoff;
bool found = false;
pagevec_init(&pvec, 0);
index = startoff >> PAGE_CACHE_SHIFT;
endoff = XFS_FSB_TO_B(mp, map->br_startoff + map->br_blockcount);
end = endoff >> PAGE_CACHE_SHIFT;
do {
int want;
unsigned nr_pages;
unsigned int i;
want = min_t(pgoff_t, end - index, PAGEVEC_SIZE);
nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index,
want);
/*
* No page mapped into given range. If we are searching holes
* and if this is the first time we got into the loop, it means
* that the given offset is landed in a hole, return it.
*
* If we have already stepped through some block buffers to find
* holes but they all contains data. In this case, the last
* offset is already updated and pointed to the end of the last
* mapped page, if it does not reach the endpoint to search,
* that means there should be a hole between them.
*/
if (nr_pages == 0) {
/* Data search found nothing */
if (type == DATA_OFF)
break;
ASSERT(type == HOLE_OFF);
if (lastoff == startoff || lastoff < endoff) {
found = true;
*offset = lastoff;
}
break;
}
/*
* At lease we found one page. If this is the first time we
* step into the loop, and if the first page index offset is
* greater than the given search offset, a hole was found.
*/
if (type == HOLE_OFF && lastoff == startoff &&
lastoff < page_offset(pvec.pages[0])) {
found = true;
break;
}
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
loff_t b_offset;
/*
* At this point, the page may be truncated or
* invalidated (changing page->mapping to NULL),
* or even swizzled back from swapper_space to tmpfs
* file mapping. However, page->index will not change
* because we have a reference on the page.
*
* Searching done if the page index is out of range.
* If the current offset is not reaches the end of
* the specified search range, there should be a hole
* between them.
*/
if (page->index > end) {
if (type == HOLE_OFF && lastoff < endoff) {
*offset = lastoff;
found = true;
}
goto out;
}
lock_page(page);
/*
* Page truncated or invalidated(page->mapping == NULL).
* We can freely skip it and proceed to check the next
* page.
*/
if (unlikely(page->mapping != inode->i_mapping)) {
unlock_page(page);
continue;
}
if (!page_has_buffers(page)) {
unlock_page(page);
continue;
}
found = xfs_lookup_buffer_offset(page, &b_offset, type);
if (found) {
/*
* The found offset may be less than the start
* point to search if this is the first time to
* come here.
*/
*offset = max_t(loff_t, startoff, b_offset);
unlock_page(page);
goto out;
}
/*
* We either searching data but nothing was found, or
* searching hole but found a data buffer. In either
* case, probably the next page contains the desired
* things, update the last offset to it so.
*/
lastoff = page_offset(page) + PAGE_SIZE;
unlock_page(page);
}
/*
* The number of returned pages less than our desired, search
* done. In this case, nothing was found for searching data,
* but we found a hole behind the last offset.
*/
if (nr_pages < want) {
if (type == HOLE_OFF) {
*offset = lastoff;
found = true;
}
break;
}
index = pvec.pages[i - 1]->index + 1;
pagevec_release(&pvec);
} while (index <= end);
out:
pagevec_release(&pvec);
return found;
}
STATIC loff_t
xfs_seek_data(
struct file *file,
loff_t start,
u32 type)
loff_t start)
{
struct inode *inode = file->f_mapping->host;
struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount;
struct xfs_bmbt_irec map[2];
int nmap = 2;
loff_t uninitialized_var(offset);
xfs_fsize_t isize;
xfs_fileoff_t fsbno;
......@@ -985,36 +1201,74 @@ xfs_seek_data(
goto out_unlock;
}
fsbno = XFS_B_TO_FSBT(mp, start);
/*
* Try to read extents from the first block indicated
* by fsbno to the end block of the file.
*/
fsbno = XFS_B_TO_FSBT(mp, start);
end = XFS_B_TO_FSB(mp, isize);
for (;;) {
struct xfs_bmbt_irec map[2];
int nmap = 2;
unsigned int i;
error = xfs_bmapi_read(ip, fsbno, end - fsbno, map, &nmap,
XFS_BMAPI_ENTIRE);
if (error)
goto out_unlock;
/* No extents at given offset, must be beyond EOF */
if (nmap == 0) {
error = ENXIO;
goto out_unlock;
}
for (i = 0; i < nmap; i++) {
offset = max_t(loff_t, start,
XFS_FSB_TO_B(mp, map[i].br_startoff));
/* Landed in a data extent */
if (map[i].br_startblock == DELAYSTARTBLOCK ||
(map[i].br_state == XFS_EXT_NORM &&
!isnullstartblock(map[i].br_startblock)))
goto out;
/*
* Treat unwritten extent as data extent since it might
* contains dirty data in page cache.
* Landed in an unwritten extent, try to search data
* from page cache.
*/
if (map[i].br_state == XFS_EXT_UNWRITTEN) {
if (xfs_find_get_desired_pgoff(inode, &map[i],
DATA_OFF, &offset))
goto out;
}
}
/*
* map[0] is hole or its an unwritten extent but
* without data in page cache. Probably means that
* we are reading after EOF if nothing in map[1].
*/
if (map[0].br_startblock != HOLESTARTBLOCK) {
offset = max_t(loff_t, start,
XFS_FSB_TO_B(mp, map[0].br_startoff));
} else {
if (nmap == 1) {
error = ENXIO;
goto out_unlock;
}
offset = max_t(loff_t, start,
XFS_FSB_TO_B(mp, map[1].br_startoff));
ASSERT(i > 1);
/*
* Nothing was found, proceed to the next round of search
* if reading offset not beyond or hit EOF.
*/
fsbno = map[i - 1].br_startoff + map[i - 1].br_blockcount;
start = XFS_FSB_TO_B(mp, fsbno);
if (start >= isize) {
error = ENXIO;
goto out_unlock;
}
}
out:
if (offset != file->f_pos)
file->f_pos = offset;
......@@ -1029,16 +1283,15 @@ xfs_seek_data(
STATIC loff_t
xfs_seek_hole(
struct file *file,
loff_t start,
u32 type)
loff_t start)
{
struct inode *inode = file->f_mapping->host;
struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount;
loff_t uninitialized_var(offset);
loff_t holeoff;
xfs_fsize_t isize;
xfs_fileoff_t fsbno;
xfs_filblks_t end;
uint lock;
int error;
......@@ -1054,21 +1307,77 @@ xfs_seek_hole(
}
fsbno = XFS_B_TO_FSBT(mp, start);
error = xfs_bmap_first_unused(NULL, ip, 1, &fsbno, XFS_DATA_FORK);
end = XFS_B_TO_FSB(mp, isize);
for (;;) {
struct xfs_bmbt_irec map[2];
int nmap = 2;
unsigned int i;
error = xfs_bmapi_read(ip, fsbno, end - fsbno, map, &nmap,
XFS_BMAPI_ENTIRE);
if (error)
goto out_unlock;
holeoff = XFS_FSB_TO_B(mp, fsbno);
if (holeoff <= start)
offset = start;
else {
/* No extents at given offset, must be beyond EOF */
if (nmap == 0) {
error = ENXIO;
goto out_unlock;
}
for (i = 0; i < nmap; i++) {
offset = max_t(loff_t, start,
XFS_FSB_TO_B(mp, map[i].br_startoff));
/* Landed in a hole */
if (map[i].br_startblock == HOLESTARTBLOCK)
goto out;
/*
* xfs_bmap_first_unused() could return a value bigger than
* isize if there are no more holes past the supplied offset.
* Landed in an unwritten extent, try to search hole
* from page cache.
*/
offset = min_t(loff_t, holeoff, isize);
if (map[i].br_state == XFS_EXT_UNWRITTEN) {
if (xfs_find_get_desired_pgoff(inode, &map[i],
HOLE_OFF, &offset))
goto out;
}
}
/*
* map[0] contains data or its unwritten but contains
* data in page cache, probably means that we are
* reading after EOF. We should fix offset to point
* to the end of the file(i.e., there is an implicit
* hole at the end of any file).
*/
if (nmap == 1) {
offset = isize;
break;
}
ASSERT(i > 1);
/*
* Both mappings contains data, proceed to the next round of
* search if the current reading offset not beyond or hit EOF.
*/
fsbno = map[i - 1].br_startoff + map[i - 1].br_blockcount;
start = XFS_FSB_TO_B(mp, fsbno);
if (start >= isize) {
offset = isize;
break;
}
}
out:
/*
* At this point, we must have found a hole. However, the returned
* offset may be bigger than the file size as it may be aligned to
* page boundary for unwritten extents, we need to deal with this
* situation in particular.
*/
offset = min_t(loff_t, offset, isize);
if (offset != file->f_pos)
file->f_pos = offset;
......@@ -1092,9 +1401,9 @@ xfs_file_llseek(
case SEEK_SET:
return generic_file_llseek(file, offset, origin);
case SEEK_DATA:
return xfs_seek_data(file, offset, origin);
return xfs_seek_data(file, offset);
case SEEK_HOLE:
return xfs_seek_hole(file, offset, origin);
return xfs_seek_hole(file, offset);
default:
return -EINVAL;
}
......
......@@ -431,7 +431,7 @@ xfs_ialloc_next_ag(
spin_lock(&mp->m_agirotor_lock);
agno = mp->m_agirotor;
if (++mp->m_agirotor == mp->m_maxagi)
if (++mp->m_agirotor >= mp->m_maxagi)
mp->m_agirotor = 0;
spin_unlock(&mp->m_agirotor_lock);
......
......@@ -440,7 +440,7 @@ xfs_initialize_perag(
xfs_agnumber_t agcount,
xfs_agnumber_t *maxagi)
{
xfs_agnumber_t index, max_metadata;
xfs_agnumber_t index;
xfs_agnumber_t first_initialised = 0;
xfs_perag_t *pag;
xfs_agino_t agino;
......@@ -500,43 +500,10 @@ xfs_initialize_perag(
else
mp->m_flags &= ~XFS_MOUNT_32BITINODES;
if (mp->m_flags & XFS_MOUNT_32BITINODES) {
/*
* Calculate how much should be reserved for inodes to meet
* the max inode percentage.
*/
if (mp->m_maxicount) {
__uint64_t icount;
icount = sbp->sb_dblocks * sbp->sb_imax_pct;
do_div(icount, 100);
icount += sbp->sb_agblocks - 1;
do_div(icount, sbp->sb_agblocks);
max_metadata = icount;
} else {
max_metadata = agcount;
}
for (index = 0; index < agcount; index++) {
ino = XFS_AGINO_TO_INO(mp, index, agino);
if (ino > XFS_MAXINUMBER_32) {
index++;
break;
}
pag = xfs_perag_get(mp, index);
pag->pagi_inodeok = 1;
if (index < max_metadata)
pag->pagf_metadata = 1;
xfs_perag_put(pag);
}
} else {
for (index = 0; index < agcount; index++) {
pag = xfs_perag_get(mp, index);
pag->pagi_inodeok = 1;
xfs_perag_put(pag);
}
}
if (mp->m_flags & XFS_MOUNT_32BITINODES)
index = xfs_set_inode32(mp);
else
index = xfs_set_inode64(mp);
if (maxagi)
*maxagi = index;
......
......@@ -54,12 +54,7 @@ typedef struct xfs_trans_reservations {
#include "xfs_sync.h"
struct xlog;
struct xfs_mount_args;
struct xfs_inode;
struct xfs_bmbt_irec;
struct xfs_bmap_free;
struct xfs_extdelta;
struct xfs_swapext;
struct xfs_mru_cache;
struct xfs_nameops;
struct xfs_ail;
......
......@@ -88,6 +88,8 @@ mempool_t *xfs_ioend_pool;
* unwritten extent conversion */
#define MNTOPT_NOBARRIER "nobarrier" /* .. disable */
#define MNTOPT_64BITINODE "inode64" /* inodes can be allocated anywhere */
#define MNTOPT_32BITINODE "inode32" /* inode allocation limited to
* XFS_MAXINUMBER_32 */
#define MNTOPT_IKEEP "ikeep" /* do not free empty inode clusters */
#define MNTOPT_NOIKEEP "noikeep" /* free empty inode clusters */
#define MNTOPT_LARGEIO "largeio" /* report large I/O sizes in stat() */
......@@ -120,12 +122,18 @@ mempool_t *xfs_ioend_pool;
* in the future, too.
*/
enum {
Opt_barrier, Opt_nobarrier, Opt_err
Opt_barrier,
Opt_nobarrier,
Opt_inode64,
Opt_inode32,
Opt_err
};
static const match_table_t tokens = {
{Opt_barrier, "barrier"},
{Opt_nobarrier, "nobarrier"},
{Opt_inode64, "inode64"},
{Opt_inode32, "inode32"},
{Opt_err, NULL}
};
......@@ -197,7 +205,9 @@ xfs_parseargs(
*/
mp->m_flags |= XFS_MOUNT_BARRIER;
mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
#if !XFS_BIG_INUMS
mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
#endif
/*
* These can be overridden by the mount option parsing.
......@@ -294,6 +304,8 @@ xfs_parseargs(
return EINVAL;
}
dswidth = simple_strtoul(value, &eov, 10);
} else if (!strcmp(this_char, MNTOPT_32BITINODE)) {
mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
} else if (!strcmp(this_char, MNTOPT_64BITINODE)) {
mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
#if !XFS_BIG_INUMS
......@@ -492,6 +504,7 @@ xfs_showargs(
{ XFS_MOUNT_FILESTREAMS, "," MNTOPT_FILESTREAM },
{ XFS_MOUNT_GRPID, "," MNTOPT_GRPID },
{ XFS_MOUNT_DISCARD, "," MNTOPT_DISCARD },
{ XFS_MOUNT_SMALL_INUMS, "," MNTOPT_32BITINODE },
{ 0, NULL }
};
static struct proc_xfs_info xfs_info_unset[] = {
......@@ -591,6 +604,80 @@ xfs_max_file_offset(
return (((__uint64_t)pagefactor) << bitshift) - 1;
}
xfs_agnumber_t
xfs_set_inode32(struct xfs_mount *mp)
{
xfs_agnumber_t index = 0;
xfs_agnumber_t maxagi = 0;
xfs_sb_t *sbp = &mp->m_sb;
xfs_agnumber_t max_metadata;
xfs_agino_t agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks -1, 0);
xfs_ino_t ino = XFS_AGINO_TO_INO(mp, sbp->sb_agcount -1, agino);
xfs_perag_t *pag;
/* Calculate how much should be reserved for inodes to meet
* the max inode percentage.
*/
if (mp->m_maxicount) {
__uint64_t icount;
icount = sbp->sb_dblocks * sbp->sb_imax_pct;
do_div(icount, 100);
icount += sbp->sb_agblocks - 1;
do_div(icount, sbp->sb_agblocks);
max_metadata = icount;
} else {
max_metadata = sbp->sb_agcount;
}
for (index = 0; index < sbp->sb_agcount; index++) {
ino = XFS_AGINO_TO_INO(mp, index, agino);
if (ino > XFS_MAXINUMBER_32) {
pag = xfs_perag_get(mp, index);
pag->pagi_inodeok = 0;
pag->pagf_metadata = 0;
xfs_perag_put(pag);
continue;
}
pag = xfs_perag_get(mp, index);
pag->pagi_inodeok = 1;
maxagi++;
if (index < max_metadata)
pag->pagf_metadata = 1;
xfs_perag_put(pag);
}
mp->m_flags |= (XFS_MOUNT_32BITINODES |
XFS_MOUNT_SMALL_INUMS);
return maxagi;
}
xfs_agnumber_t
xfs_set_inode64(struct xfs_mount *mp)
{
xfs_agnumber_t index = 0;
for (index = 0; index < mp->m_sb.sb_agcount; index++) {
struct xfs_perag *pag;
pag = xfs_perag_get(mp, index);
pag->pagi_inodeok = 1;
pag->pagf_metadata = 0;
xfs_perag_put(pag);
}
/* There is no need for lock protection on m_flags,
* the rw_semaphore of the VFS superblock is locked
* during mount/umount/remount operations, so this is
* enough to avoid concurency on the m_flags field
*/
mp->m_flags &= ~(XFS_MOUNT_32BITINODES |
XFS_MOUNT_SMALL_INUMS);
return index;
}
STATIC int
xfs_blkdev_get(
xfs_mount_t *mp,
......@@ -1056,6 +1143,12 @@ xfs_fs_remount(
case Opt_nobarrier:
mp->m_flags &= ~XFS_MOUNT_BARRIER;
break;
case Opt_inode64:
mp->m_maxagi = xfs_set_inode64(mp);
break;
case Opt_inode32:
mp->m_maxagi = xfs_set_inode32(mp);
break;
default:
/*
* Logically we would return an error here to prevent
......
......@@ -75,6 +75,8 @@ struct block_device;
extern __uint64_t xfs_max_file_offset(unsigned int);
extern void xfs_blkdev_issue_flush(struct xfs_buftarg *);
extern xfs_agnumber_t xfs_set_inode32(struct xfs_mount *);
extern xfs_agnumber_t xfs_set_inode64(struct xfs_mount *);
extern const struct export_operations xfs_export_operations;
extern const struct xattr_handler *xfs_xattr_handlers[];
......
......@@ -37,6 +37,7 @@ struct xlog_recover;
struct xlog_recover_item;
struct xfs_buf_log_format;
struct xfs_inode_log_format;
struct xfs_bmbt_irec;
DECLARE_EVENT_CLASS(xfs_attr_list_class,
TP_PROTO(struct xfs_attr_list_context *ctx),
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment