Commit 2a82b8be authored by David Chinner's avatar David Chinner Committed by Tim Shimmin

[XFS] Concurrent Multi-File Data Streams

In media spaces, video is often stored in a frame-per-file format. When
dealing with uncompressed realtime HD video streams in this format, it is
crucial that files do not get fragmented and that multiple files a placed
contiguously on disk.

When multiple streams are being ingested and played out at the same time,
it is critical that the filesystem does not cross the streams and
interleave them together as this creates seek and readahead cache miss
latency and prevents both ingest and playout from meeting frame rate
targets.

This patch set creates a "stream of files" concept into the allocator to
place all the data from a single stream contiguously on disk so that RAID
array readahead can be used effectively. Each additional stream gets
placed in different allocation groups within the filesystem, thereby
ensuring that we don't cross any streams. When an AG fills up, we select a
new AG for the stream that is not in use.

The core of the functionality is the stream tracking - each inode that we
create in a directory needs to be associated with the directories' stream.
Hence every time we create a file, we look up the directories' stream
object and associate the new file with that object.

Once we have a stream object for a file, we use the AG that the stream
object point to for allocations. If we can't allocate in that AG (e.g. it
is full) we move the entire stream to another AG. Other inodes in the same
stream are moved to the new AG on their next allocation (i.e. lazy
update).

Stream objects are kept in a cache and hold a reference on the inode.
Hence the inode cannot be reclaimed while there is an outstanding stream
reference. This means that on unlink we need to remove the stream
association and we also need to flush all the associations on certain
events that want to reclaim all unreferenced inodes (e.g. filesystem
freeze).

SGI-PV: 964469
SGI-Modid: xfs-linux-melb:xfs-kern:29096a
Signed-off-by: default avatarDavid Chinner <dgc@sgi.com>
Signed-off-by: default avatarBarry Naujok <bnaujok@sgi.com>
Signed-off-by: default avatarDonald Douwsma <donaldd@sgi.com>
Signed-off-by: default avatarChristoph Hellwig <hch@infradead.org>
Signed-off-by: default avatarTim Shimmin <tes@sgi.com>
Signed-off-by: default avatarVlad Apostolov <vapo@sgi.com>
parent 0892ccd6
...@@ -64,6 +64,7 @@ xfs-y += xfs_alloc.o \ ...@@ -64,6 +64,7 @@ xfs-y += xfs_alloc.o \
xfs_dir2_sf.o \ xfs_dir2_sf.o \
xfs_error.o \ xfs_error.o \
xfs_extfree_item.o \ xfs_extfree_item.o \
xfs_filestream.o \
xfs_fsops.o \ xfs_fsops.o \
xfs_ialloc.o \ xfs_ialloc.o \
xfs_ialloc_btree.o \ xfs_ialloc_btree.o \
...@@ -77,6 +78,7 @@ xfs-y += xfs_alloc.o \ ...@@ -77,6 +78,7 @@ xfs-y += xfs_alloc.o \
xfs_log.o \ xfs_log.o \
xfs_log_recover.o \ xfs_log_recover.o \
xfs_mount.o \ xfs_mount.o \
xfs_mru_cache.o \
xfs_rename.o \ xfs_rename.o \
xfs_trans.o \ xfs_trans.o \
xfs_trans_ail.o \ xfs_trans_ail.o \
......
...@@ -46,6 +46,7 @@ xfs_param_t xfs_params = { ...@@ -46,6 +46,7 @@ xfs_param_t xfs_params = {
.inherit_nosym = { 0, 0, 1 }, .inherit_nosym = { 0, 0, 1 },
.rotorstep = { 1, 1, 255 }, .rotorstep = { 1, 1, 255 },
.inherit_nodfrg = { 0, 1, 1 }, .inherit_nodfrg = { 0, 1, 1 },
.fstrm_timer = { 1, 50, 3600*100},
}; };
/* /*
......
...@@ -123,6 +123,7 @@ ...@@ -123,6 +123,7 @@
#define xfs_inherit_nosymlinks xfs_params.inherit_nosym.val #define xfs_inherit_nosymlinks xfs_params.inherit_nosym.val
#define xfs_rotorstep xfs_params.rotorstep.val #define xfs_rotorstep xfs_params.rotorstep.val
#define xfs_inherit_nodefrag xfs_params.inherit_nodfrg.val #define xfs_inherit_nodefrag xfs_params.inherit_nodfrg.val
#define xfs_fstrm_centisecs xfs_params.fstrm_timer.val
#define current_cpu() (raw_smp_processor_id()) #define current_cpu() (raw_smp_processor_id())
#define current_pid() (current->pid) #define current_pid() (current->pid)
......
...@@ -210,6 +210,17 @@ static ctl_table xfs_table[] = { ...@@ -210,6 +210,17 @@ static ctl_table xfs_table[] = {
.extra1 = &xfs_params.inherit_nodfrg.min, .extra1 = &xfs_params.inherit_nodfrg.min,
.extra2 = &xfs_params.inherit_nodfrg.max .extra2 = &xfs_params.inherit_nodfrg.max
}, },
{
.ctl_name = XFS_FILESTREAM_TIMER,
.procname = "filestream_centisecs",
.data = &xfs_params.fstrm_timer.val,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec_minmax,
.strategy = &sysctl_intvec,
.extra1 = &xfs_params.fstrm_timer.min,
.extra2 = &xfs_params.fstrm_timer.max,
},
/* please keep this the last entry */ /* please keep this the last entry */
#ifdef CONFIG_PROC_FS #ifdef CONFIG_PROC_FS
{ {
......
...@@ -47,6 +47,7 @@ typedef struct xfs_param { ...@@ -47,6 +47,7 @@ typedef struct xfs_param {
xfs_sysctl_val_t inherit_nosym; /* Inherit the "nosymlinks" flag. */ xfs_sysctl_val_t inherit_nosym; /* Inherit the "nosymlinks" flag. */
xfs_sysctl_val_t rotorstep; /* inode32 AG rotoring control knob */ xfs_sysctl_val_t rotorstep; /* inode32 AG rotoring control knob */
xfs_sysctl_val_t inherit_nodfrg;/* Inherit the "nodefrag" inode flag. */ xfs_sysctl_val_t inherit_nodfrg;/* Inherit the "nodefrag" inode flag. */
xfs_sysctl_val_t fstrm_timer; /* Filestream dir-AG assoc'n timeout. */
} xfs_param_t; } xfs_param_t;
/* /*
...@@ -86,6 +87,7 @@ enum { ...@@ -86,6 +87,7 @@ enum {
XFS_INHERIT_NOSYM = 19, XFS_INHERIT_NOSYM = 19,
XFS_ROTORSTEP = 20, XFS_ROTORSTEP = 20,
XFS_INHERIT_NODFRG = 21, XFS_INHERIT_NODFRG = 21,
XFS_FILESTREAM_TIMER = 22,
}; };
extern xfs_param_t xfs_params; extern xfs_param_t xfs_params;
......
...@@ -38,6 +38,7 @@ ...@@ -38,6 +38,7 @@
#define XFS_RW_TRACE 1 #define XFS_RW_TRACE 1
#define XFS_BUF_TRACE 1 #define XFS_BUF_TRACE 1
#define XFS_VNODE_TRACE 1 #define XFS_VNODE_TRACE 1
#define XFS_FILESTREAMS_TRACE 1
#endif #endif
#include <linux-2.6/xfs_linux.h> #include <linux-2.6/xfs_linux.h>
......
...@@ -196,6 +196,7 @@ typedef struct xfs_perag ...@@ -196,6 +196,7 @@ typedef struct xfs_perag
lock_t pagb_lock; /* lock for pagb_list */ lock_t pagb_lock; /* lock for pagb_list */
#endif #endif
xfs_perag_busy_t *pagb_list; /* unstable blocks */ xfs_perag_busy_t *pagb_list; /* unstable blocks */
atomic_t pagf_fstrms; /* # of filestreams active in this AG */
} xfs_perag_t; } xfs_perag_t;
#define XFS_AG_MAXLEVELS(mp) ((mp)->m_ag_maxlevels) #define XFS_AG_MAXLEVELS(mp) ((mp)->m_ag_maxlevels)
......
...@@ -52,6 +52,7 @@ ...@@ -52,6 +52,7 @@
#include "xfs_quota.h" #include "xfs_quota.h"
#include "xfs_trans_space.h" #include "xfs_trans_space.h"
#include "xfs_buf_item.h" #include "xfs_buf_item.h"
#include "xfs_filestream.h"
#ifdef DEBUG #ifdef DEBUG
...@@ -2725,9 +2726,15 @@ xfs_bmap_btalloc( ...@@ -2725,9 +2726,15 @@ xfs_bmap_btalloc(
} }
nullfb = ap->firstblock == NULLFSBLOCK; nullfb = ap->firstblock == NULLFSBLOCK;
fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, ap->firstblock); fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, ap->firstblock);
if (nullfb) if (nullfb) {
if (ap->userdata && xfs_inode_is_filestream(ap->ip)) {
ag = xfs_filestream_lookup_ag(ap->ip);
ag = (ag != NULLAGNUMBER) ? ag : 0;
ap->rval = XFS_AGB_TO_FSB(mp, ag, 0);
} else {
ap->rval = XFS_INO_TO_FSB(mp, ap->ip->i_ino); ap->rval = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
else }
} else
ap->rval = ap->firstblock; ap->rval = ap->firstblock;
xfs_bmap_adjacent(ap); xfs_bmap_adjacent(ap);
...@@ -2751,13 +2758,22 @@ xfs_bmap_btalloc( ...@@ -2751,13 +2758,22 @@ xfs_bmap_btalloc(
args.firstblock = ap->firstblock; args.firstblock = ap->firstblock;
blen = 0; blen = 0;
if (nullfb) { if (nullfb) {
if (ap->userdata && xfs_inode_is_filestream(ap->ip))
args.type = XFS_ALLOCTYPE_NEAR_BNO;
else
args.type = XFS_ALLOCTYPE_START_BNO; args.type = XFS_ALLOCTYPE_START_BNO;
args.total = ap->total; args.total = ap->total;
/* /*
* Find the longest available space. * Search for an allocation group with a single extent
* We're going to try for the whole allocation at once. * large enough for the request.
*
* If one isn't found, then adjust the minimum allocation
* size to the largest space found.
*/ */
startag = ag = XFS_FSB_TO_AGNO(mp, args.fsbno); startag = ag = XFS_FSB_TO_AGNO(mp, args.fsbno);
if (startag == NULLAGNUMBER)
startag = ag = 0;
notinit = 0; notinit = 0;
down_read(&mp->m_peraglock); down_read(&mp->m_peraglock);
while (blen < ap->alen) { while (blen < ap->alen) {
...@@ -2783,6 +2799,35 @@ xfs_bmap_btalloc( ...@@ -2783,6 +2799,35 @@ xfs_bmap_btalloc(
blen = longest; blen = longest;
} else } else
notinit = 1; notinit = 1;
if (xfs_inode_is_filestream(ap->ip)) {
if (blen >= ap->alen)
break;
if (ap->userdata) {
/*
* If startag is an invalid AG, we've
* come here once before and
* xfs_filestream_new_ag picked the
* best currently available.
*
* Don't continue looping, since we
* could loop forever.
*/
if (startag == NULLAGNUMBER)
break;
error = xfs_filestream_new_ag(ap, &ag);
if (error) {
up_read(&mp->m_peraglock);
return error;
}
/* loop again to set 'blen'*/
startag = NULLAGNUMBER;
continue;
}
}
if (++ag == mp->m_sb.sb_agcount) if (++ag == mp->m_sb.sb_agcount)
ag = 0; ag = 0;
if (ag == startag) if (ag == startag)
...@@ -2807,7 +2852,17 @@ xfs_bmap_btalloc( ...@@ -2807,7 +2852,17 @@ xfs_bmap_btalloc(
*/ */
else else
args.minlen = ap->alen; args.minlen = ap->alen;
/*
* set the failure fallback case to look in the selected
* AG as the stream may have moved.
*/
if (xfs_inode_is_filestream(ap->ip))
ap->rval = args.fsbno = XFS_AGB_TO_FSB(mp, ag, 0);
} else if (ap->low) { } else if (ap->low) {
if (xfs_inode_is_filestream(ap->ip))
args.type = XFS_ALLOCTYPE_FIRST_AG;
else
args.type = XFS_ALLOCTYPE_START_BNO; args.type = XFS_ALLOCTYPE_START_BNO;
args.total = args.minlen = ap->minlen; args.total = args.minlen = ap->minlen;
} else { } else {
......
...@@ -99,5 +99,7 @@ struct xfs_mount_args { ...@@ -99,5 +99,7 @@ struct xfs_mount_args {
*/ */
#define XFSMNT2_COMPAT_IOSIZE 0x00000001 /* don't report large preferred #define XFSMNT2_COMPAT_IOSIZE 0x00000001 /* don't report large preferred
* I/O size in stat(2) */ * I/O size in stat(2) */
#define XFSMNT2_FILESTREAMS 0x00000002 /* enable the filestreams
* allocator */
#endif /* __XFS_CLNT_H__ */ #endif /* __XFS_CLNT_H__ */
...@@ -257,6 +257,7 @@ typedef enum xfs_dinode_fmt ...@@ -257,6 +257,7 @@ typedef enum xfs_dinode_fmt
#define XFS_DIFLAG_EXTSIZE_BIT 11 /* inode extent size allocator hint */ #define XFS_DIFLAG_EXTSIZE_BIT 11 /* inode extent size allocator hint */
#define XFS_DIFLAG_EXTSZINHERIT_BIT 12 /* inherit inode extent size */ #define XFS_DIFLAG_EXTSZINHERIT_BIT 12 /* inherit inode extent size */
#define XFS_DIFLAG_NODEFRAG_BIT 13 /* do not reorganize/defragment */ #define XFS_DIFLAG_NODEFRAG_BIT 13 /* do not reorganize/defragment */
#define XFS_DIFLAG_FILESTREAM_BIT 14 /* use filestream allocator */
#define XFS_DIFLAG_REALTIME (1 << XFS_DIFLAG_REALTIME_BIT) #define XFS_DIFLAG_REALTIME (1 << XFS_DIFLAG_REALTIME_BIT)
#define XFS_DIFLAG_PREALLOC (1 << XFS_DIFLAG_PREALLOC_BIT) #define XFS_DIFLAG_PREALLOC (1 << XFS_DIFLAG_PREALLOC_BIT)
#define XFS_DIFLAG_NEWRTBM (1 << XFS_DIFLAG_NEWRTBM_BIT) #define XFS_DIFLAG_NEWRTBM (1 << XFS_DIFLAG_NEWRTBM_BIT)
...@@ -271,12 +272,13 @@ typedef enum xfs_dinode_fmt ...@@ -271,12 +272,13 @@ typedef enum xfs_dinode_fmt
#define XFS_DIFLAG_EXTSIZE (1 << XFS_DIFLAG_EXTSIZE_BIT) #define XFS_DIFLAG_EXTSIZE (1 << XFS_DIFLAG_EXTSIZE_BIT)
#define XFS_DIFLAG_EXTSZINHERIT (1 << XFS_DIFLAG_EXTSZINHERIT_BIT) #define XFS_DIFLAG_EXTSZINHERIT (1 << XFS_DIFLAG_EXTSZINHERIT_BIT)
#define XFS_DIFLAG_NODEFRAG (1 << XFS_DIFLAG_NODEFRAG_BIT) #define XFS_DIFLAG_NODEFRAG (1 << XFS_DIFLAG_NODEFRAG_BIT)
#define XFS_DIFLAG_FILESTREAM (1 << XFS_DIFLAG_FILESTREAM_BIT)
#define XFS_DIFLAG_ANY \ #define XFS_DIFLAG_ANY \
(XFS_DIFLAG_REALTIME | XFS_DIFLAG_PREALLOC | XFS_DIFLAG_NEWRTBM | \ (XFS_DIFLAG_REALTIME | XFS_DIFLAG_PREALLOC | XFS_DIFLAG_NEWRTBM | \
XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND | XFS_DIFLAG_SYNC | \ XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND | XFS_DIFLAG_SYNC | \
XFS_DIFLAG_NOATIME | XFS_DIFLAG_NODUMP | XFS_DIFLAG_RTINHERIT | \ XFS_DIFLAG_NOATIME | XFS_DIFLAG_NODUMP | XFS_DIFLAG_RTINHERIT | \
XFS_DIFLAG_PROJINHERIT | XFS_DIFLAG_NOSYMLINKS | XFS_DIFLAG_EXTSIZE | \ XFS_DIFLAG_PROJINHERIT | XFS_DIFLAG_NOSYMLINKS | XFS_DIFLAG_EXTSIZE | \
XFS_DIFLAG_EXTSZINHERIT | XFS_DIFLAG_NODEFRAG) XFS_DIFLAG_EXTSZINHERIT | XFS_DIFLAG_NODEFRAG | XFS_DIFLAG_FILESTREAM)
#endif /* __XFS_DINODE_H__ */ #endif /* __XFS_DINODE_H__ */
This diff is collapsed.
/*
* Copyright (c) 2006-2007 Silicon Graphics, Inc.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef __XFS_FILESTREAM_H__
#define __XFS_FILESTREAM_H__
#ifdef __KERNEL__
struct xfs_mount;
struct xfs_inode;
struct xfs_perag;
struct xfs_bmalloca;
#ifdef XFS_FILESTREAMS_TRACE
#define XFS_FSTRM_KTRACE_INFO 1
#define XFS_FSTRM_KTRACE_AGSCAN 2
#define XFS_FSTRM_KTRACE_AGPICK1 3
#define XFS_FSTRM_KTRACE_AGPICK2 4
#define XFS_FSTRM_KTRACE_UPDATE 5
#define XFS_FSTRM_KTRACE_FREE 6
#define XFS_FSTRM_KTRACE_ITEM_LOOKUP 7
#define XFS_FSTRM_KTRACE_ASSOCIATE 8
#define XFS_FSTRM_KTRACE_MOVEAG 9
#define XFS_FSTRM_KTRACE_ORPHAN 10
#define XFS_FSTRM_KTRACE_SIZE 16384
extern ktrace_t *xfs_filestreams_trace_buf;
#endif
/*
* Allocation group filestream associations are tracked with per-ag atomic
* counters. These counters allow _xfs_filestream_pick_ag() to tell whether a
* particular AG already has active filestreams associated with it. The mount
* point's m_peraglock is used to protect these counters from per-ag array
* re-allocation during a growfs operation. When xfs_growfs_data_private() is
* about to reallocate the array, it calls xfs_filestream_flush() with the
* m_peraglock held in write mode.
*
* Since xfs_mru_cache_flush() guarantees that all the free functions for all
* the cache elements have finished executing before it returns, it's safe for
* the free functions to use the atomic counters without m_peraglock protection.
* This allows the implementation of xfs_fstrm_free_func() to be agnostic about
* whether it was called with the m_peraglock held in read mode, write mode or
* not held at all. The race condition this addresses is the following:
*
* - The work queue scheduler fires and pulls a filestream directory cache
* element off the LRU end of the cache for deletion, then gets pre-empted.
* - A growfs operation grabs the m_peraglock in write mode, flushes all the
* remaining items from the cache and reallocates the mount point's per-ag
* array, resetting all the counters to zero.
* - The work queue thread resumes and calls the free function for the element
* it started cleaning up earlier. In the process it decrements the
* filestreams counter for an AG that now has no references.
*
* With a shrinkfs feature, the above scenario could panic the system.
*
* All other uses of the following macros should be protected by either the
* m_peraglock held in read mode, or the cache's internal locking exposed by the
* interval between a call to xfs_mru_cache_lookup() and a call to
* xfs_mru_cache_done(). In addition, the m_peraglock must be held in read mode
* when new elements are added to the cache.
*
* Combined, these locking rules ensure that no associations will ever exist in
* the cache that reference per-ag array elements that have since been
* reallocated.
*/
STATIC_INLINE int
xfs_filestream_peek_ag(
xfs_mount_t *mp,
xfs_agnumber_t agno)
{
return atomic_read(&mp->m_perag[agno].pagf_fstrms);
}
STATIC_INLINE int
xfs_filestream_get_ag(
xfs_mount_t *mp,
xfs_agnumber_t agno)
{
return atomic_inc_return(&mp->m_perag[agno].pagf_fstrms);
}
STATIC_INLINE int
xfs_filestream_put_ag(
xfs_mount_t *mp,
xfs_agnumber_t agno)
{
return atomic_dec_return(&mp->m_perag[agno].pagf_fstrms);
}
/* allocation selection flags */
typedef enum xfs_fstrm_alloc {
XFS_PICK_USERDATA = 1,
XFS_PICK_LOWSPACE = 2,
} xfs_fstrm_alloc_t;
/* prototypes for filestream.c */
int xfs_filestream_init(void);
void xfs_filestream_uninit(void);
int xfs_filestream_mount(struct xfs_mount *mp);
void xfs_filestream_unmount(struct xfs_mount *mp);
void xfs_filestream_flush(struct xfs_mount *mp);
xfs_agnumber_t xfs_filestream_lookup_ag(struct xfs_inode *ip);
int xfs_filestream_associate(struct xfs_inode *dip, struct xfs_inode *ip);
void xfs_filestream_deassociate(struct xfs_inode *ip);
int xfs_filestream_new_ag(struct xfs_bmalloca *ap, xfs_agnumber_t *agp);
/* filestreams for the inode? */
STATIC_INLINE int
xfs_inode_is_filestream(
struct xfs_inode *ip)
{
return (ip->i_mount->m_flags & XFS_MOUNT_FILESTREAMS) ||
xfs_iflags_test(ip, XFS_IFILESTREAM) ||
(ip->i_d.di_flags & XFS_DIFLAG_FILESTREAM);
}
#endif /* __KERNEL__ */
#endif /* __XFS_FILESTREAM_H__ */
...@@ -66,6 +66,7 @@ struct fsxattr { ...@@ -66,6 +66,7 @@ struct fsxattr {
#define XFS_XFLAG_EXTSIZE 0x00000800 /* extent size allocator hint */ #define XFS_XFLAG_EXTSIZE 0x00000800 /* extent size allocator hint */
#define XFS_XFLAG_EXTSZINHERIT 0x00001000 /* inherit inode extent size */ #define XFS_XFLAG_EXTSZINHERIT 0x00001000 /* inherit inode extent size */
#define XFS_XFLAG_NODEFRAG 0x00002000 /* do not defragment */ #define XFS_XFLAG_NODEFRAG 0x00002000 /* do not defragment */
#define XFS_XFLAG_FILESTREAM 0x00004000 /* use filestream allocator */
#define XFS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */ #define XFS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */
/* /*
......
...@@ -44,6 +44,7 @@ ...@@ -44,6 +44,7 @@
#include "xfs_trans_space.h" #include "xfs_trans_space.h"
#include "xfs_rtalloc.h" #include "xfs_rtalloc.h"
#include "xfs_rw.h" #include "xfs_rw.h"
#include "xfs_filestream.h"
/* /*
* File system operations * File system operations
...@@ -165,6 +166,7 @@ xfs_growfs_data_private( ...@@ -165,6 +166,7 @@ xfs_growfs_data_private(
new = nb - mp->m_sb.sb_dblocks; new = nb - mp->m_sb.sb_dblocks;
oagcount = mp->m_sb.sb_agcount; oagcount = mp->m_sb.sb_agcount;
if (nagcount > oagcount) { if (nagcount > oagcount) {
xfs_filestream_flush(mp);
down_write(&mp->m_peraglock); down_write(&mp->m_peraglock);
mp->m_perag = kmem_realloc(mp->m_perag, mp->m_perag = kmem_realloc(mp->m_perag,
sizeof(xfs_perag_t) * nagcount, sizeof(xfs_perag_t) * nagcount,
......
...@@ -48,6 +48,7 @@ ...@@ -48,6 +48,7 @@
#include "xfs_dir2_trace.h" #include "xfs_dir2_trace.h"
#include "xfs_quota.h" #include "xfs_quota.h"
#include "xfs_acl.h" #include "xfs_acl.h"
#include "xfs_filestream.h"
#include <linux/log2.h> #include <linux/log2.h>
...@@ -818,6 +819,8 @@ _xfs_dic2xflags( ...@@ -818,6 +819,8 @@ _xfs_dic2xflags(
flags |= XFS_XFLAG_EXTSZINHERIT; flags |= XFS_XFLAG_EXTSZINHERIT;
if (di_flags & XFS_DIFLAG_NODEFRAG) if (di_flags & XFS_DIFLAG_NODEFRAG)
flags |= XFS_XFLAG_NODEFRAG; flags |= XFS_XFLAG_NODEFRAG;
if (di_flags & XFS_DIFLAG_FILESTREAM)
flags |= XFS_XFLAG_FILESTREAM;
} }
return flags; return flags;
...@@ -1151,7 +1154,7 @@ xfs_ialloc( ...@@ -1151,7 +1154,7 @@ xfs_ialloc(
/* /*
* Project ids won't be stored on disk if we are using a version 1 inode. * Project ids won't be stored on disk if we are using a version 1 inode.
*/ */
if ( (prid != 0) && (ip->i_d.di_version == XFS_DINODE_VERSION_1)) if ((prid != 0) && (ip->i_d.di_version == XFS_DINODE_VERSION_1))
xfs_bump_ino_vers2(tp, ip); xfs_bump_ino_vers2(tp, ip);
if (XFS_INHERIT_GID(pip, vp->v_vfsp)) { if (XFS_INHERIT_GID(pip, vp->v_vfsp)) {
...@@ -1196,8 +1199,16 @@ xfs_ialloc( ...@@ -1196,8 +1199,16 @@ xfs_ialloc(
flags |= XFS_ILOG_DEV; flags |= XFS_ILOG_DEV;
break; break;
case S_IFREG: case S_IFREG:
if (xfs_inode_is_filestream(pip)) {
error = xfs_filestream_associate(pip, ip);
if (error < 0)
return -error;
if (!error)
xfs_iflags_set(ip, XFS_IFILESTREAM);
}
/* fall through */
case S_IFDIR: case S_IFDIR:
if (unlikely(pip->i_d.di_flags & XFS_DIFLAG_ANY)) { if (pip->i_d.di_flags & XFS_DIFLAG_ANY) {
uint di_flags = 0; uint di_flags = 0;
if ((mode & S_IFMT) == S_IFDIR) { if ((mode & S_IFMT) == S_IFDIR) {
...@@ -1234,6 +1245,8 @@ xfs_ialloc( ...@@ -1234,6 +1245,8 @@ xfs_ialloc(
if ((pip->i_d.di_flags & XFS_DIFLAG_NODEFRAG) && if ((pip->i_d.di_flags & XFS_DIFLAG_NODEFRAG) &&
xfs_inherit_nodefrag) xfs_inherit_nodefrag)
di_flags |= XFS_DIFLAG_NODEFRAG; di_flags |= XFS_DIFLAG_NODEFRAG;
if (pip->i_d.di_flags & XFS_DIFLAG_FILESTREAM)
di_flags |= XFS_DIFLAG_FILESTREAM;
ip->i_d.di_flags |= di_flags; ip->i_d.di_flags |= di_flags;
} }
/* FALLTHROUGH */ /* FALLTHROUGH */
......
...@@ -379,6 +379,7 @@ xfs_iflags_test(xfs_inode_t *ip, unsigned short flags) ...@@ -379,6 +379,7 @@ xfs_iflags_test(xfs_inode_t *ip, unsigned short flags)
#define XFS_ISTALE 0x0010 /* inode has been staled */ #define XFS_ISTALE 0x0010 /* inode has been staled */
#define XFS_IRECLAIMABLE 0x0020 /* inode can be reclaimed */ #define XFS_IRECLAIMABLE 0x0020 /* inode can be reclaimed */
#define XFS_INEW 0x0040 #define XFS_INEW 0x0040
#define XFS_IFILESTREAM 0x0080 /* inode is in a filestream directory */
/* /*
* Flags for inode locking. * Flags for inode locking.
......
...@@ -66,6 +66,7 @@ struct xfs_bmbt_irec; ...@@ -66,6 +66,7 @@ struct xfs_bmbt_irec;
struct xfs_bmap_free; struct xfs_bmap_free;
struct xfs_extdelta; struct xfs_extdelta;
struct xfs_swapext; struct xfs_swapext;
struct xfs_mru_cache;
extern struct bhv_vfsops xfs_vfsops; extern struct bhv_vfsops xfs_vfsops;
extern struct bhv_vnodeops xfs_vnodeops; extern struct bhv_vnodeops xfs_vnodeops;
...@@ -424,6 +425,7 @@ typedef struct xfs_mount { ...@@ -424,6 +425,7 @@ typedef struct xfs_mount {
struct notifier_block m_icsb_notifier; /* hotplug cpu notifier */ struct notifier_block m_icsb_notifier; /* hotplug cpu notifier */
struct mutex m_icsb_mutex; /* balancer sync lock */ struct mutex m_icsb_mutex; /* balancer sync lock */
#endif #endif
struct xfs_mru_cache *m_filestream; /* per-mount filestream data */
} xfs_mount_t; } xfs_mount_t;
/* /*
...@@ -463,6 +465,8 @@ typedef struct xfs_mount { ...@@ -463,6 +465,8 @@ typedef struct xfs_mount {
* I/O size in stat() */ * I/O size in stat() */
#define XFS_MOUNT_NO_PERCPU_SB (1ULL << 23) /* don't use per-cpu superblock #define XFS_MOUNT_NO_PERCPU_SB (1ULL << 23) /* don't use per-cpu superblock
counters */ counters */
#define XFS_MOUNT_FILESTREAMS (1ULL << 24) /* enable the filestreams
allocator */
/* /*
......
This diff is collapsed.
/*
* Copyright (c) 2006-2007 Silicon Graphics, Inc.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef __XFS_MRU_CACHE_H__
#define __XFS_MRU_CACHE_H__
/* Function pointer type for callback to free a client's data pointer. */
typedef void (*xfs_mru_cache_free_func_t)(unsigned long, void*);
typedef struct xfs_mru_cache
{
struct radix_tree_root store; /* Core storage data structure. */
struct list_head *lists; /* Array of lists, one per grp. */
struct list_head reap_list; /* Elements overdue for reaping. */
spinlock_t lock; /* Lock to protect this struct. */
unsigned int grp_count; /* Number of discrete groups. */
unsigned int grp_time; /* Time period spanned by grps. */
unsigned int lru_grp; /* Group containing time zero. */
unsigned long time_zero; /* Time first element was added. */
unsigned long next_reap; /* Time that the reaper should
next do something. */
unsigned int reap_all; /* if set, reap all lists */
xfs_mru_cache_free_func_t free_func; /* Function pointer for freeing. */
struct delayed_work work; /* Workqueue data for reaping. */
} xfs_mru_cache_t;
int xfs_mru_cache_init(void);
void xfs_mru_cache_uninit(void);
int xfs_mru_cache_create(struct xfs_mru_cache **mrup, unsigned int lifetime_ms,
unsigned int grp_count,
xfs_mru_cache_free_func_t free_func);
void xfs_mru_cache_flush(xfs_mru_cache_t *mru, int restart);
void xfs_mru_cache_destroy(struct xfs_mru_cache *mru);
int xfs_mru_cache_insert(struct xfs_mru_cache *mru, unsigned long key,
void *value);
void * xfs_mru_cache_remove(struct xfs_mru_cache *mru, unsigned long key);
void xfs_mru_cache_delete(struct xfs_mru_cache *mru, unsigned long key);
void *xfs_mru_cache_lookup(struct xfs_mru_cache *mru, unsigned long key);
void *xfs_mru_cache_peek(struct xfs_mru_cache *mru, unsigned long key);
void xfs_mru_cache_done(struct xfs_mru_cache *mru);
#endif /* __XFS_MRU_CACHE_H__ */
...@@ -51,6 +51,8 @@ ...@@ -51,6 +51,8 @@
#include "xfs_acl.h" #include "xfs_acl.h"
#include "xfs_attr.h" #include "xfs_attr.h"
#include "xfs_clnt.h" #include "xfs_clnt.h"
#include "xfs_mru_cache.h"
#include "xfs_filestream.h"
#include "xfs_fsops.h" #include "xfs_fsops.h"
STATIC int xfs_sync(bhv_desc_t *, int, cred_t *); STATIC int xfs_sync(bhv_desc_t *, int, cred_t *);
...@@ -81,6 +83,8 @@ xfs_init(void) ...@@ -81,6 +83,8 @@ xfs_init(void)
xfs_dabuf_zone = kmem_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf"); xfs_dabuf_zone = kmem_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf");
xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork"); xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork");
xfs_acl_zone_init(xfs_acl_zone, "xfs_acl"); xfs_acl_zone_init(xfs_acl_zone, "xfs_acl");
xfs_mru_cache_init();
xfs_filestream_init();
/* /*
* The size of the zone allocated buf log item is the maximum * The size of the zone allocated buf log item is the maximum
...@@ -164,6 +168,8 @@ xfs_cleanup(void) ...@@ -164,6 +168,8 @@ xfs_cleanup(void)
xfs_cleanup_procfs(); xfs_cleanup_procfs();
xfs_sysctl_unregister(); xfs_sysctl_unregister();
xfs_refcache_destroy(); xfs_refcache_destroy();
xfs_filestream_uninit();
xfs_mru_cache_uninit();
xfs_acl_zone_destroy(xfs_acl_zone); xfs_acl_zone_destroy(xfs_acl_zone);
#ifdef XFS_DIR2_TRACE #ifdef XFS_DIR2_TRACE
...@@ -320,6 +326,9 @@ xfs_start_flags( ...@@ -320,6 +326,9 @@ xfs_start_flags(
else else
mp->m_flags &= ~XFS_MOUNT_BARRIER; mp->m_flags &= ~XFS_MOUNT_BARRIER;
if (ap->flags2 & XFSMNT2_FILESTREAMS)
mp->m_flags |= XFS_MOUNT_FILESTREAMS;
return 0; return 0;
} }
...@@ -518,6 +527,9 @@ xfs_mount( ...@@ -518,6 +527,9 @@ xfs_mount(
if (mp->m_flags & XFS_MOUNT_BARRIER) if (mp->m_flags & XFS_MOUNT_BARRIER)
xfs_mountfs_check_barriers(mp); xfs_mountfs_check_barriers(mp);
if ((error = xfs_filestream_mount(mp)))
goto error2;
error = XFS_IOINIT(vfsp, args, flags); error = XFS_IOINIT(vfsp, args, flags);
if (error) if (error)
goto error2; goto error2;
...@@ -575,6 +587,13 @@ xfs_unmount( ...@@ -575,6 +587,13 @@ xfs_unmount(
*/ */
xfs_refcache_purge_mp(mp); xfs_refcache_purge_mp(mp);
/*
* Blow away any referenced inode in the filestreams cache.
* This can and will cause log traffic as inodes go inactive
* here.
*/
xfs_filestream_unmount(mp);
XFS_bflush(mp->m_ddev_targp); XFS_bflush(mp->m_ddev_targp);
error = xfs_unmount_flush(mp, 0); error = xfs_unmount_flush(mp, 0);
if (error) if (error)
...@@ -694,6 +713,7 @@ xfs_mntupdate( ...@@ -694,6 +713,7 @@ xfs_mntupdate(
mp->m_flags &= ~XFS_MOUNT_BARRIER; mp->m_flags &= ~XFS_MOUNT_BARRIER;
} }
} else if (!(vfsp->vfs_flag & VFS_RDONLY)) { /* rw -> ro */ } else if (!(vfsp->vfs_flag & VFS_RDONLY)) { /* rw -> ro */
xfs_filestream_flush(mp);
bhv_vfs_sync(vfsp, SYNC_DATA_QUIESCE, NULL); bhv_vfs_sync(vfsp, SYNC_DATA_QUIESCE, NULL);
xfs_attr_quiesce(mp); xfs_attr_quiesce(mp);
vfsp->vfs_flag |= VFS_RDONLY; vfsp->vfs_flag |= VFS_RDONLY;
...@@ -909,6 +929,9 @@ xfs_sync( ...@@ -909,6 +929,9 @@ xfs_sync(
{ {
xfs_mount_t *mp = XFS_BHVTOM(bdp); xfs_mount_t *mp = XFS_BHVTOM(bdp);
if (flags & SYNC_IOWAIT)
xfs_filestream_flush(mp);
return xfs_syncsub(mp, flags, NULL); return xfs_syncsub(mp, flags, NULL);
} }
...@@ -1659,6 +1682,7 @@ xfs_vget( ...@@ -1659,6 +1682,7 @@ xfs_vget(
* in stat(). */ * in stat(). */
#define MNTOPT_ATTR2 "attr2" /* do use attr2 attribute format */ #define MNTOPT_ATTR2 "attr2" /* do use attr2 attribute format */
#define MNTOPT_NOATTR2 "noattr2" /* do not use attr2 attribute format */ #define MNTOPT_NOATTR2 "noattr2" /* do not use attr2 attribute format */
#define MNTOPT_FILESTREAM "filestreams" /* use filestreams allocator */
STATIC unsigned long STATIC unsigned long
suffix_strtoul(char *s, char **endp, unsigned int base) suffix_strtoul(char *s, char **endp, unsigned int base)
...@@ -1845,6 +1869,8 @@ xfs_parseargs( ...@@ -1845,6 +1869,8 @@ xfs_parseargs(
args->flags |= XFSMNT_ATTR2; args->flags |= XFSMNT_ATTR2;
} else if (!strcmp(this_char, MNTOPT_NOATTR2)) { } else if (!strcmp(this_char, MNTOPT_NOATTR2)) {
args->flags &= ~XFSMNT_ATTR2; args->flags &= ~XFSMNT_ATTR2;
} else if (!strcmp(this_char, MNTOPT_FILESTREAM)) {
args->flags2 |= XFSMNT2_FILESTREAMS;
} else if (!strcmp(this_char, "osyncisdsync")) { } else if (!strcmp(this_char, "osyncisdsync")) {
/* no-op, this is now the default */ /* no-op, this is now the default */
cmn_err(CE_WARN, cmn_err(CE_WARN,
......
...@@ -51,6 +51,7 @@ ...@@ -51,6 +51,7 @@
#include "xfs_refcache.h" #include "xfs_refcache.h"
#include "xfs_trans_space.h" #include "xfs_trans_space.h"
#include "xfs_log_priv.h" #include "xfs_log_priv.h"
#include "xfs_filestream.h"
STATIC int STATIC int
xfs_open( xfs_open(
...@@ -783,6 +784,8 @@ xfs_setattr( ...@@ -783,6 +784,8 @@ xfs_setattr(
di_flags |= XFS_DIFLAG_PROJINHERIT; di_flags |= XFS_DIFLAG_PROJINHERIT;
if (vap->va_xflags & XFS_XFLAG_NODEFRAG) if (vap->va_xflags & XFS_XFLAG_NODEFRAG)
di_flags |= XFS_DIFLAG_NODEFRAG; di_flags |= XFS_DIFLAG_NODEFRAG;
if (vap->va_xflags & XFS_XFLAG_FILESTREAM)
di_flags |= XFS_DIFLAG_FILESTREAM;
if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) { if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) {
if (vap->va_xflags & XFS_XFLAG_RTINHERIT) if (vap->va_xflags & XFS_XFLAG_RTINHERIT)
di_flags |= XFS_DIFLAG_RTINHERIT; di_flags |= XFS_DIFLAG_RTINHERIT;
...@@ -1536,7 +1539,17 @@ xfs_release( ...@@ -1536,7 +1539,17 @@ xfs_release(
if (vp->v_vfsp->vfs_flag & VFS_RDONLY) if (vp->v_vfsp->vfs_flag & VFS_RDONLY)
return 0; return 0;
if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { if (!XFS_FORCED_SHUTDOWN(mp)) {
/*
* If we are using filestreams, and we have an unlinked
* file that we are processing the last close on, then nothing
* will be able to reopen and write to this file. Purge this
* inode from the filestreams cache so that it doesn't delay
* teardown of the inode.
*/
if ((ip->i_d.di_nlink == 0) && xfs_inode_is_filestream(ip))
xfs_filestream_deassociate(ip);
/* /*
* If we previously truncated this file and removed old data * If we previously truncated this file and removed old data
* in the process, we want to initiate "early" writeout on * in the process, we want to initiate "early" writeout on
...@@ -1551,7 +1564,6 @@ xfs_release( ...@@ -1551,7 +1564,6 @@ xfs_release(
bhv_vop_flush_pages(vp, 0, -1, XFS_B_ASYNC, FI_NONE); bhv_vop_flush_pages(vp, 0, -1, XFS_B_ASYNC, FI_NONE);
} }
#ifdef HAVE_REFCACHE #ifdef HAVE_REFCACHE
/* If we are in the NFS reference cache then don't do this now */ /* If we are in the NFS reference cache then don't do this now */
if (ip->i_refcache) if (ip->i_refcache)
...@@ -2541,6 +2553,15 @@ xfs_remove( ...@@ -2541,6 +2553,15 @@ xfs_remove(
*/ */
xfs_refcache_purge_ip(ip); xfs_refcache_purge_ip(ip);
/*
* If we are using filestreams, kill the stream association.
* If the file is still open it may get a new one but that
* will get killed on last close in xfs_close() so we don't
* have to worry about that.
*/
if (link_zero && xfs_inode_is_filestream(ip))
xfs_filestream_deassociate(ip);
vn_trace_exit(XFS_ITOV(ip), __FUNCTION__, (inst_t *)__return_address); vn_trace_exit(XFS_ITOV(ip), __FUNCTION__, (inst_t *)__return_address);
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment