Merge bk://jfs.bkbits.net/linux-2.5

into penguin.transmeta.com:/home/penguin/torvalds/repositories/kernel/linux

Merge bk://jfs.bkbits.net/linux-2.5
into penguin.transmeta.com:/home/penguin/torvalds/repositories/kernel/linux
985dcc74 · Linus Torvalds · 06acfb97 · 28db47b2 · 985dcc74 · 985dcc74
Commit 985dcc74 authored Sep 19, 2002 by Linus Torvalds
10 changed files
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -19,6 +19,7 @@
 #include <linux/fs.h>
 #include "jfs_incore.h"
+#include "jfs_dmap.h"
 #include "jfs_txnmgr.h"
 #include "jfs_xattr.h"
 #include "jfs_debug.h"
@@ -94,6 +95,47 @@ static void jfs_truncate(struct inode *ip)
 	IWRITE_UNLOCK(ip);
 }
+static int jfs_open(struct inode *inode, struct file *file)
+{
+	int rc;
+	if ((rc = generic_file_open(inode, file)))
+		return rc;
+	/*
+	 * We attempt to allow only one "active" file open per aggregate
+	 * group.  Otherwise, appending to files in parallel can cause
+	 * fragmentation within the files.
+	 *
+	 * If the file is empty, it was probably just created and going
+	 * to be written to.  If it has a size, we'll hold off until the
+	 * file is actually grown.
+	 */
+	if (S_ISREG(inode->i_mode) && file->f_mode & FMODE_WRITE &&
+	    (inode->i_size == 0)) {
+		struct jfs_inode_info *ji = JFS_IP(inode);
+		if (ji->active_ag == -1) {
+			ji->active_ag = ji->agno;
+			atomic_inc(
+			    &JFS_SBI(inode->i_sb)->bmap->db_active[ji->agno]);
+		}
+	}
+	return 0;
+}
+static int jfs_release(struct inode *inode, struct file *file)
+{
+	struct jfs_inode_info *ji = JFS_IP(inode);
+	if (ji->active_ag != -1) {
+		struct bmap *bmap = JFS_SBI(inode->i_sb)->bmap;
+		atomic_dec(&bmap->db_active[ji->active_ag]);
+		ji->active_ag = -1;
+	}
+	return 0;
+}
 struct inode_operations jfs_file_inode_operations = {
 	.truncate	= jfs_truncate,
 	.setxattr	= jfs_setxattr,
@@ -103,7 +145,7 @@ struct inode_operations jfs_file_inode_operations = {
 };
 struct file_operations jfs_file_operations = {
-	.open		= generic_file_open,
+	.open		= jfs_open,
 	.llseek		= generic_file_llseek,
 	.write		= generic_file_write,
 	.read		= generic_file_read,
@@ -112,4 +154,5 @@ struct file_operations jfs_file_operations = {
 	.writev		= generic_file_writev,
 	.sendfile	= generic_file_sendfile,
 	.fsync		= jfs_fsync,
+	.release	= jfs_release,
 };
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -241,6 +241,7 @@ int dbMount(struct inode *ipbmap)
 	bmp->db_ipbmap = ipbmap;
 	JFS_SBI(ipbmap->i_sb)->bmap = bmp;
+	memset(bmp->db_active, 0, sizeof(bmp->db_active));
 	DBINITMAP(bmp->db_mapsize, ipbmap, &bmp->db_DBmap);
 	/*
@@ -271,6 +272,7 @@ int dbMount(struct inode *ipbmap)
 int dbUnmount(struct inode *ipbmap, int mounterror)
 {
 	struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap;
+	int i;
 	if (!(mounterror || isReadOnly(ipbmap)))
 		dbSync(ipbmap);
@@ -280,6 +282,14 @@ int dbUnmount(struct inode *ipbmap, int mounterror)
 	 */
 	truncate_inode_pages(ipbmap->i_mapping, 0);
+	/*
+	 * Sanity Check
+	 */
+	for (i = 0; i < bmp->db_numag; i++)
+		if (atomic_read(&bmp->db_active[i]))
+			printk(KERN_ERR "dbUnmount: db_active[%d] = %d\n",
+			       i, atomic_read(&bmp->db_active[i]));
 	/* free the memory for the in-memory bmap. */
 	kfree(bmp);
@@ -598,102 +608,77 @@ dbUpdatePMap(struct inode *ipbmap,
 *
 * FUNCTION:    find the preferred allocation group for new allocations.
 *
- *		we try to keep the trailing (rightmost) allocation groups
+ *		Within the allocation groups, we maintain a preferred
- *		free for large allocations.  we try to do this by targeting
- *		new inode allocations towards the leftmost or 'active'
- *		allocation groups while keeping the rightmost or 'inactive'
- *		allocation groups free. once the active allocation groups
- *		have dropped to a certain percentage of free space, we add
- *		the leftmost inactive allocation group to the active set.
- *
- *		within the active allocation groups, we maintain a preferred
 *		allocation group which consists of a group with at least
- *		average free space over the active set. it is the preferred
+ *		average free space.  It is the preferred group that we target
- *		group that we target new inode allocation towards.  the 
+ *		new inode allocation towards.  The tie-in between inode
- *		tie-in between inode allocation and block allocation occurs
+ *		allocation and block allocation occurs as we allocate the
- *		as we allocate the first (data) block of an inode and specify
+ *		first (data) block of an inode and specify the inode (block)
- *		the inode (block) as the allocation hint for this block.
+ *		as the allocation hint for this block.
+ *
+ *		We try to avoid having more than one open file growing in
+ *		an allocation group, as this will lead to fragmentation.
+ *		This differs from the old OS/2 method of trying to keep
+ *		empty ags around for large allocations.
 *
 * PARAMETERS:
 *      ipbmap	-  pointer to in-core inode for the block map.
 *
 * RETURN VALUES:
 *      the preferred allocation group number.
- *
- * note: only called by dbAlloc();
 */
 int dbNextAG(struct inode *ipbmap)
 {
-	s64 avgfree, inactfree, actfree, rem;
+	s64 avgfree;
-	int actags, inactags, l2agsize;
+	int agpref;
+	s64 hwm = 0;
+	int i;
+	int next_best = -1;
 	struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap;
 	BMAP_LOCK(bmp);
-	/* determine the number of active allocation groups (i.e.
+	/* determine the average number of free blocks within the ags. */
-	 * the number of allocation groups up to and including
+	avgfree = (u32)bmp->db_nfree / bmp->db_numag;
-	 * the rightmost allocation group with blocks allocated
-	 * in it.
-	 */
-	actags = bmp->db_maxag + 1;
-	assert(actags <= bmp->db_numag);
-	/* get the number of inactive allocation groups (i.e. the
-	 * number of allocation group following the rightmost group
-	 * with allocation in it.
-	 */
-	inactags = bmp->db_numag - actags;
-	/* determine how many blocks are in the inactive allocation
+	/*
-	 * groups. in doing this, we must account for the fact that
+	 * if the current preferred ag does not have an active allocator
-	 * the rightmost group might be a partial group (i.e. file
+	 * and has at least average freespace, return it
-	 * system size is not a multiple of the group size).
 	 */
-	l2agsize = bmp->db_agl2size;
+	agpref = bmp->db_agpref;
-	rem = bmp->db_mapsize & (bmp->db_agsize - 1);
+	if ((atomic_read(&bmp->db_active[agpref]) == 0) &&
-	inactfree = (inactags
+	    (bmp->db_agfree[agpref] >= avgfree))
-		     && rem) ? ((inactags - 1) << l2agsize) +
+		goto found;
-	    rem : inactags << l2agsize;
-	/* now determine how many free blocks are in the active
+	/* From the last preferred ag, find the next one with at least
-	 * allocation groups plus the average number of free blocks
+	 * average free space.
-	 * within the active ags.
 	 */
-	actfree = bmp->db_nfree - inactfree;
+	for (i = 0 ; i < bmp->db_numag; i++, agpref++) {
-	avgfree = (u32) actfree / (u32) actags;
+		if (agpref == bmp->db_numag)
+			agpref = 0;
-	/* check if not all of the allocation groups are active.
+		if (atomic_read(&bmp->db_active[agpref]))
-	 */
+			/* open file is currently growing in this ag */
-	if (actags < bmp->db_numag) {
+			continue;
-		/* not all of the allocation groups are active.  determine
+		if (bmp->db_agfree[agpref] >= avgfree)
-		 * if we should extend the active set by 1 (i.e. add the
+			goto found;
-		 * group following the current active set).  we do so if
+		else if (bmp->db_agfree[agpref] > hwm) {
-		 * the number of free blocks within the active set is less
+			hwm = bmp->db_agfree[agpref];
-		 * than the allocation group set and average free within
+			next_best = agpref;
-		 * the active set is less than 60%.  we activate a new group
-		 * by setting the allocation group preference to the new
-		 * group.
-		 */
-		if (actfree < bmp->db_agsize &&
-		    ((avgfree * 100) >> l2agsize) < 60)
-			bmp->db_agpref = actags;
-	} else {
-		/* all allocation groups are in the active set.  check if
-		 * the preferred allocation group has average free space.
-		 * if not, re-establish the preferred group as the leftmost
-		 * group with average free space.
-		 */
-		if (bmp->db_agfree[bmp->db_agpref] < avgfree) {
-			for (bmp->db_agpref = 0; bmp->db_agpref < actags;
-			     bmp->db_agpref++) {
-				if (bmp->db_agfree[bmp->db_agpref] <=
-				    avgfree)
-					break;
-			}
-			assert(bmp->db_agpref < bmp->db_numag);
 		}
 	}
+	/*
+	 * If no inactive ag was found with average freespace, use the
+	 * next best
+	 */
+	if (next_best != -1)
+		agpref = next_best;
+	/* else agpref should be back to its original value */
+found:
+	bmp->db_agpref = agpref;
 	BMAP_UNLOCK(bmp);
 	/* return the preferred group.
@@ -701,7 +686,6 @@ int dbNextAG(struct inode *ipbmap)
 	return (bmp->db_agpref);
 }
 /*
 * NAME:	dbAlloc()
 *
@@ -750,6 +734,7 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
 	struct dmap *dp;
 	int l2nb;
 	s64 mapSize;
+	int writers;
 	/* assert that nblocks is valid */
 	assert(nblocks > 0);
@@ -774,11 +759,10 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
 	/* the hint should be within the map */
 	assert(hint < mapSize);
-	/* if no hint was specified or the number of blocks to be
+	/* if the number of blocks to be allocated is greater than the
-	 * allocated is greater than the allocation group size, try
+	 * allocation group size, try to allocate anywhere.
-	 * to allocate anywhere.
 	 */
-	if (hint == 0 || l2nb > bmp->db_agl2size) {
+	if (l2nb > bmp->db_agl2size) {
 		IWRITE_LOCK(ipbmap);
 		rc = dbAllocAny(bmp, nblocks, l2nb, results);
@@ -790,39 +774,34 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
 		goto write_unlock;
 	}
+	/*
+	 * If no hint, let dbNextAG recommend an allocation group
+	 */
+	if (hint == 0)
+		goto pref_ag;
 	/* we would like to allocate close to the hint.  adjust the
 	 * hint to the block following the hint since the allocators
 	 * will start looking for free space starting at this point.
-	 * if the hint was the last block of the file system, try to
-	 * allocate in the same allocation group as the hint.
 	 */
 	blkno = hint + 1;
-	if (blkno >= bmp->db_mapsize) {
-		blkno--;
+	if (blkno >= bmp->db_mapsize)
-		goto tryag;
+		goto pref_ag;
-	}
+	agno = blkno >> bmp->db_agl2size;
 	/* check if blkno crosses over into a new allocation group.
 	 * if so, check if we should allow allocations within this
-	 * allocation group.  we try to keep the trailing (rightmost)
+	 * allocation group.
-	 * allocation groups of the file system free for large
-	 * allocations and may want to prevent this allocation from
-	 * spilling over into this space.
 	 */
-	if ((blkno & (bmp->db_agsize - 1)) == 0) {
+	if ((blkno & (bmp->db_agsize - 1)) == 0)
-		/* check if the AG is beyond the rightmost AG with
+		/* check if the AG is currenly being written to.
-		 * allocations in it.  if so, call dbNextAG() to
+		 * if so, call dbNextAG() to find a non-busy
-		 * determine if the allocation should be allowed
+		 * AG with sufficient free space.
-		 * to proceed within this AG or should be targeted
-		 * to another AG.
 		 */
-		agno = blkno >> bmp->db_agl2size;
+		if (atomic_read(&bmp->db_active[agno]))
-		if (agno > bmp->db_maxag) {
+			goto pref_ag;
-			agno = dbNextAG(ipbmap);
-			blkno = (s64) agno << bmp->db_agl2size;
-			goto tryag;
-		}
-	}
 	/* check if the allocation request size can be satisfied from a
 	 * single dmap.  if so, try to allocate from the dmap containing
@@ -844,9 +823,8 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
 		/* first, try to satisfy the allocation request with the
 		 * blocks beginning at the hint.
 		 */
-		if ((rc =
+		if ((rc = dbAllocNext(bmp, dp, blkno, (int) nblocks))
-		     dbAllocNext(bmp, dp, blkno,
+		    != ENOSPC) {
-				 (int) nblocks)) != ENOSPC) {
 			if (rc == 0) {
 				*results = blkno;
 				DBALLOC(bmp->db_DBmap, bmp->db_mapsize,
@@ -858,12 +836,23 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
 			goto read_unlock;
 		}
+		writers = atomic_read(&bmp->db_active[agno]);
+		if ((writers > 1) ||
+		    ((writers == 1) && (JFS_IP(ip)->active_ag != agno))) {
+			/*
+			 * Someone else is writing in this allocation
+			 * group.  To avoid fragmenting, try another ag
+			 */
+			release_metapage(mp);
+			IREAD_UNLOCK(ipbmap);
+			goto pref_ag;
+		}
 		/* next, try to satisfy the allocation request with blocks
 		 * near the hint.
 		 */
 		if ((rc =
-		     dbAllocNear(bmp, dp, blkno, (int) nblocks, l2nb,
+		     dbAllocNear(bmp, dp, blkno, (int) nblocks, l2nb, results))
-				 results))
 		    != ENOSPC) {
 			if (rc == 0) {
 				DBALLOC(bmp->db_DBmap, bmp->db_mapsize,
@@ -876,10 +865,9 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
 		}
 		/* try to satisfy the allocation request with blocks within
-		 * the same allocation group as the hint.
+		 * the same dmap as the hint.
 		 */
-		if ((rc =
+		if ((rc = dbAllocDmapLev(bmp, dp, (int) nblocks, l2nb, results))
-		     dbAllocDmapLev(bmp, dp, (int) nblocks, l2nb, results))
 		    != ENOSPC) {
 			if (rc == 0) {
 				DBALLOC(bmp->db_DBmap, bmp->db_mapsize,
@@ -895,14 +883,30 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
 		IREAD_UNLOCK(ipbmap);
 	}
-      tryag:
+	/* try to satisfy the allocation request with blocks within
+	 * the same allocation group as the hint.
+	 */
+	IWRITE_LOCK(ipbmap);
+	if ((rc = dbAllocAG(bmp, agno, nblocks, l2nb, results))
+	    != ENOSPC) {
+		if (rc == 0)
+			DBALLOC(bmp->db_DBmap, bmp->db_mapsize,
+				*results, nblocks);
+		goto write_unlock;
+	}
+	IWRITE_UNLOCK(ipbmap);
+      pref_ag:
+	/*
+	 * Let dbNextAG recommend a preferred allocation group
+	 */
+	agno = dbNextAG(ipbmap);
 	IWRITE_LOCK(ipbmap);
-	/* determine the allocation group number of the hint and try to
+	/* Try to allocate within this allocation group.  if that fails, try to
-	 * allocate within this allocation group.  if that fails, try to
 	 * allocate anywhere in the map.
 	 */
-	agno = blkno >> bmp->db_agl2size;
 	if ((rc = dbAllocAG(bmp, agno, nblocks, l2nb, results)) == ENOSPC)
 		rc = dbAllocAny(bmp, nblocks, l2nb, results);
 	if (rc == 0) {
@@ -2314,11 +2318,9 @@ static void dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
 	 * if so, establish the new maximum allocation group number by
 	 * searching left for the first allocation group with allocation.
 	 */
-	if ((bmp->db_agfree[agno] == bmp->db_agsize
+	if ((bmp->db_agfree[agno] == bmp->db_agsize && agno == bmp->db_maxag) ||
-	     && agno == bmp->db_maxag) || (agno == bmp->db_numag - 1
+	    (agno == bmp->db_numag - 1 &&
-					   && bmp->db_agfree[agno] ==
+	     bmp->db_agfree[agno] == (bmp-> db_mapsize & (BPERDMAP - 1)))) {
-					   (bmp-> db_mapsize &
-					    (BPERDMAP - 1)))) {
 		while (bmp->db_maxag > 0) {
 			bmp->db_maxag -= 1;
 			if (bmp->db_agfree[bmp->db_maxag] !=

--- a/fs/jfs/jfs_dmap.h
+++ b/fs/jfs/jfs_dmap.h
@@ -227,6 +227,7 @@ struct bmap {
 	struct dbmap db_bmap;	/* on-disk aggregate map descriptor */
 	struct inode *db_ipbmap;	/* ptr to aggregate map incore inode */
 	struct semaphore db_bmaplock;	/* aggregate map lock */
+	atomic_t db_active[MAXAG];	/* count of active, open files in AG */
 	u32 *db_DBmap;
 };

--- a/fs/jfs/jfs_extent.c
+++ b/fs/jfs/jfs_extent.c
@@ -514,9 +514,12 @@ int extFill(struct inode *ip, xad_t * xp)
 static int
 extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno)
 {
+	struct jfs_inode_info *ji = JFS_IP(ip);
+	struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
 	s64 nb, nblks, daddr, max;
-	int rc, nbperpage = JFS_SBI(ip->i_sb)->nbperpage;
+	int rc, nbperpage = sbi->nbperpage;
-	struct bmap *mp = JFS_SBI(ip->i_sb)->bmap;
+	struct bmap *bmp = sbi->bmap;
+	int ag;
 	/* get the number of blocks to initially attempt to allocate.
 	 * we'll first try the number of blocks requested unless this
@@ -524,7 +527,7 @@ extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno)
 	 * blocks in the map. in that case, we'll start off with the 
 	 * maximum free.
 	 */
-	max = (s64) 1 << mp->db_maxfreebud;
+	max = (s64) 1 << bmp->db_maxfreebud;
 	if (*nblocks >= max && *nblocks > nbperpage)
 		nb = nblks = (max > nbperpage) ? max : nbperpage;
 	else
@@ -549,6 +552,18 @@ extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno)
 	*nblocks = nb;
 	*blkno = daddr;
+	if (S_ISREG(ip->i_mode) && (ji->fileset == FILESYSTEM_I)) {
+		ag = BLKTOAG(daddr, sbi);
+		if (ji->active_ag == -1) {
+			atomic_inc(&bmp->db_active[ag]);
+			ji->active_ag = ag;
+		} else if (ji->active_ag != ag) {
+			atomic_dec(&bmp->db_active[ji->active_ag]);
+			atomic_inc(&bmp->db_active[ag]);
+			ji->active_ag = ag;
+		}
+	}
 	return (0);
 }

--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -429,6 +429,7 @@ int diRead(struct inode *ip)
 	/* set the ag for the inode */
 	JFS_IP(ip)->agno = BLKTOAG(agstart, sbi);
+	JFS_IP(ip)->active_ag = -1;
 	return (rc);
 }
@@ -1358,6 +1359,7 @@ diInitInode(struct inode *ip, int iagno, int ino, int extno, struct iag * iagp)
 	DBG_DIALLOC(JFS_IP(ipimap)->i_imap, ip->i_ino);
 	jfs_ip->ixpxd = iagp->inoext[extno];
 	jfs_ip->agno = BLKTOAG(le64_to_cpu(iagp->agstart), sbi);
+	jfs_ip->active_ag = -1;
 }
@@ -1413,6 +1415,21 @@ int diAlloc(struct inode *pip, boolean_t dir, struct inode *ip)
 	 * moving backward on the disk.)  compute the hint within the
 	 * file system and the iag.
 	 */
+	/* get the ag number of this iag */
+	agno = JFS_IP(pip)->agno;
+	if (atomic_read(&JFS_SBI(pip->i_sb)->bmap->db_active[agno])) {
+		/*
+		 * There is an open file actively growing.  We want to
+		 * allocate new inodes from a different ag to avoid
+		 * fragmentation problems.
+		 */
+		agno = dbNextAG(JFS_SBI(pip->i_sb)->ipbmap);
+		AG_LOCK(imap, agno);
+		goto tryag;
+	}
 	inum = pip->i_ino + 1;
 	ino = inum & (INOSPERIAG - 1);
@@ -1420,9 +1437,6 @@ int diAlloc(struct inode *pip, boolean_t dir, struct inode *ip)
 	if (ino == 0)
 		inum = pip->i_ino;
-	/* get the ag number of this iag */
-	agno = JFS_IP(pip)->agno;
 	/* lock the AG inode map information */
 	AG_LOCK(imap, agno);

--- a/fs/jfs/jfs_incore.h
+++ b/fs/jfs/jfs_incore.h
@@ -49,7 +49,7 @@ struct jfs_inode_info {
 	long	cflag;		/* commit flags		*/
 	u16	bxflag;		/* xflag of pseudo buffer?	*/
 	unchar	agno;		/* ag number			*/
-	unchar	pad;		/* pad			*/
+	signed char active_ag;	/* ag currently allocating from	*/
 	lid_t	blid;		/* lid of pseudo buffer?	*/
 	lid_t	atlhead;	/* anonymous tlock list head	*/
 	lid_t	atltail;	/* anonymous tlock list tail	*/

--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -66,6 +66,7 @@
 #include <linux/completion.h>
 #include <linux/buffer_head.h>		/* for sync_blockdev() */
 #include <linux/bio.h>
+#include <linux/suspend.h>
 #include "jfs_incore.h"
 #include "jfs_filsys.h"
 #include "jfs_metapage.h"
@@ -2146,12 +2147,17 @@ int jfsIOWait(void *arg)
 			lbmStartIO(bp);
 			spin_lock_irq(&log_redrive_lock);
 		}
-		add_wait_queue(&jfs_IO_thread_wait, &wq);
+		if (current->flags & PF_FREEZE) {
-		set_current_state(TASK_INTERRUPTIBLE);
+			spin_unlock_irq(&log_redrive_lock);
-		spin_unlock_irq(&log_redrive_lock);
+			refrigerator(PF_IOTHREAD);
-		schedule();
+		} else {
-		current->state = TASK_RUNNING;
+			add_wait_queue(&jfs_IO_thread_wait, &wq);
-		remove_wait_queue(&jfs_IO_thread_wait, &wq);
+			set_current_state(TASK_INTERRUPTIBLE);
+			spin_unlock_irq(&log_redrive_lock);
+			schedule();
+			current->state = TASK_RUNNING;
+			remove_wait_queue(&jfs_IO_thread_wait, &wq);
+		}
 	} while (!jfs_stop_threads);
 	jFYI(1,("jfsIOWait being killed!\n"));

--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -47,6 +47,7 @@
 #include <linux/vmalloc.h>
 #include <linux/smp_lock.h>
 #include <linux/completion.h>
+#include <linux/suspend.h>
 #include "jfs_incore.h"
 #include "jfs_filsys.h"
 #include "jfs_metapage.h"
@@ -2789,8 +2790,6 @@ int jfs_lazycommit(void *arg)
 	complete(&jfsIOwait);
 	do {
-		DECLARE_WAITQUEUE(wq, current);
 		LAZY_LOCK(flags);
 restart:
 		WorkDone = 0;
@@ -2825,12 +2824,19 @@ int jfs_lazycommit(void *arg)
 		if (WorkDone)
 			goto restart;
-		add_wait_queue(&jfs_commit_thread_wait, &wq);
+		if (current->flags & PF_FREEZE) {
-		set_current_state(TASK_INTERRUPTIBLE);
+			LAZY_UNLOCK(flags);
-		LAZY_UNLOCK(flags);
+			refrigerator(PF_IOTHREAD);
-		schedule();
+		} else {
-		current->state = TASK_RUNNING;
+			DECLARE_WAITQUEUE(wq, current);
-		remove_wait_queue(&jfs_commit_thread_wait, &wq);
+			add_wait_queue(&jfs_commit_thread_wait, &wq);
+			set_current_state(TASK_INTERRUPTIBLE);
+			LAZY_UNLOCK(flags);
+			schedule();
+			current->state = TASK_RUNNING;
+			remove_wait_queue(&jfs_commit_thread_wait, &wq);
+		}
 	} while (!jfs_stop_threads);
 	if (TxAnchor.unlock_queue)
@@ -2981,7 +2987,6 @@ int jfs_sync(void *arg)
 	complete(&jfsIOwait);
 	do {
-		DECLARE_WAITQUEUE(wq, current);
 		/*
 		 * write each inode on the anonymous inode list
 		 */
@@ -3030,12 +3035,20 @@ int jfs_sync(void *arg)
 		}
 		/* Add anon_list2 back to anon_list */
 		list_splice_init(&TxAnchor.anon_list2, &TxAnchor.anon_list);
-		add_wait_queue(&jfs_sync_thread_wait, &wq);
-		set_current_state(TASK_INTERRUPTIBLE);
+		if (current->flags & PF_FREEZE) {
-		TXN_UNLOCK();
+			TXN_UNLOCK();
-		schedule();
+			refrigerator(PF_IOTHREAD);
-		current->state = TASK_RUNNING;
+		} else {
-		remove_wait_queue(&jfs_sync_thread_wait, &wq);
+			DECLARE_WAITQUEUE(wq, current);
+			add_wait_queue(&jfs_sync_thread_wait, &wq);
+			set_current_state(TASK_INTERRUPTIBLE);
+			TXN_UNLOCK();
+			schedule();
+			current->state = TASK_RUNNING;
+			remove_wait_queue(&jfs_sync_thread_wait, &wq);
+		}
 	} while (!jfs_stop_threads);
 	jFYI(1, ("jfs_sync being killed\n"));

--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -406,6 +406,7 @@ static void init_once(void *foo, kmem_cache_t * cachep, unsigned long flags)
 		init_rwsem(&jfs_ip->rdwrlock);
 		init_MUTEX(&jfs_ip->commit_sem);
 		jfs_ip->atlhead = 0;
+		jfs_ip->active_ag = -1;
 		inode_init_once(&jfs_ip->vfs_inode);
 	}
 }

--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -78,6 +78,68 @@ struct ea_buffer {
 #define EA_NEW		0x0004
 #define EA_MALLOC	0x0008
+/* Namespaces */
+#define XATTR_SYSTEM_PREFIX "system."
+#define XATTR_SYSTEM_PREFIX_LEN (sizeof (XATTR_SYSTEM_PREFIX) - 1)
+#define XATTR_USER_PREFIX "user."
+#define XATTR_USER_PREFIX_LEN (sizeof (XATTR_USER_PREFIX) - 1)
+#define XATTR_OS2_PREFIX "os2."
+#define XATTR_OS2_PREFIX_LEN (sizeof (XATTR_OS2_PREFIX) - 1)
+/*
+ * These three routines are used to recognize on-disk extended attributes
+ * that are in a recognized namespace.  If the attribute is not recognized,
+ * "os2." is prepended to the name
+ */
+static inline int is_os2_xattr(struct jfs_ea *ea)
+{
+	/*
+	 * Check for "system."
+	 */
+	if ((ea->namelen >= XATTR_SYSTEM_PREFIX_LEN) &&
+	    !strncmp(ea->name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
+		return FALSE;
+	/*
+	 * Check for "user."
+	 */
+	if ((ea->namelen >= XATTR_USER_PREFIX_LEN) &&
+	    !strncmp(ea->name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
+		return FALSE;
+	/*
+	 * Add any other valid namespace prefixes here
+	 */
+	/*
+	 * We assume it's OS/2's flat namespace
+	 */
+	return TRUE;
+}
+static inline int name_size(struct jfs_ea *ea)
+{
+	if (is_os2_xattr(ea))
+		return ea->namelen + XATTR_OS2_PREFIX_LEN;
+	else
+		return ea->namelen;
+}
+static inline int copy_name(char *buffer, struct jfs_ea *ea)
+{
+	int len = ea->namelen;
+	if (is_os2_xattr(ea)) {
+		memcpy(buffer, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN);
+		buffer += XATTR_OS2_PREFIX_LEN;
+		len += XATTR_OS2_PREFIX_LEN;
+	}
+	memcpy(buffer, ea->name, ea->namelen);
+	buffer[ea->namelen] = 0;
+	return len;
+}
 /* Forward references */
 static void ea_release(struct inode *inode, struct ea_buffer *ea_buf);
@@ -577,7 +639,8 @@ static int ea_put(struct inode *inode, struct ea_buffer *ea_buf, int new_size)
 	return rc;
 }
-static int can_set_xattr(struct inode *inode, const char *name)
+static int can_set_xattr(struct inode *inode, const char *name,
+			 void *value, size_t value_len)
 {
 	if (IS_RDONLY(inode))
 		return -EROFS;
@@ -585,6 +648,10 @@ static int can_set_xattr(struct inode *inode, const char *name)
 	if (IS_IMMUTABLE(inode) || IS_APPEND(inode) || S_ISLNK(inode->i_mode))
 		return -EPERM;
+	if((strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) != 0) &&
+	   (strncmp(name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN) != 0))
+		return -EOPNOTSUPP;
 	if (!S_ISREG(inode->i_mode) &&
 	    (!S_ISDIR(inode->i_mode) || inode->i_mode &S_ISVTX))
 		return -EPERM;
@@ -602,13 +669,24 @@ int __jfs_setxattr(struct inode *inode, const char *name, void *value,
 	int xattr_size;
 	int new_size;
 	int namelen = strlen(name);
+	char *os2name = NULL;
 	int found = 0;
 	int rc;
 	int length;
-	if ((rc = can_set_xattr(inode, name)))
+	if ((rc = can_set_xattr(inode, name, value, value_len)))
 		return rc;
+	if (strncmp(name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN) == 0) {
+		os2name = kmalloc(namelen - XATTR_OS2_PREFIX_LEN + 1,
+				  GFP_KERNEL);
+		if (!os2name)
+			return -ENOMEM;
+		strcpy(os2name, name + XATTR_OS2_PREFIX_LEN);
+		name = os2name;
+		namelen -= XATTR_OS2_PREFIX_LEN;
+	}
 	xattr_size = ea_get(inode, &ea_buf, 0);
 	if (xattr_size < 0) {
 		rc = xattr_size;
@@ -714,6 +792,9 @@ int __jfs_setxattr(struct inode *inode, const char *name, void *value,
      release:
 	ea_release(inode, &ea_buf);
      out:
+	if (os2name)
+		kfree(os2name);
 	return rc;
 }
@@ -728,7 +809,7 @@ int jfs_setxattr(struct dentry *dentry, const char *name, void *value,
 	return __jfs_setxattr(dentry->d_inode, name, value, value_len, flags);
 }
-static int can_get_xattr(struct inode *inode, const char *name)
+static inline int can_get_xattr(struct inode *inode, const char *name)
 {
 	return permission(inode, MAY_READ);
 }
@@ -742,12 +823,23 @@ ssize_t __jfs_getxattr(struct inode *inode, const char *name, void *data,
 	int xattr_size;
 	ssize_t size;
 	int namelen = strlen(name);
+	char *os2name = NULL;
 	int rc;
 	char *value;
 	if ((rc = can_get_xattr(inode, name)))
 		return rc;
+	if (strncmp(name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN) == 0) {
+		os2name = kmalloc(namelen - XATTR_OS2_PREFIX_LEN + 1,
+				  GFP_KERNEL);
+		if (!os2name)
+			return -ENOMEM;
+		strcpy(os2name, name + XATTR_OS2_PREFIX_LEN);
+		name = os2name;
+		namelen -= XATTR_OS2_PREFIX_LEN;
+	}
 	xattr_size = ea_get(inode, &ea_buf, 0);
 	if (xattr_size < 0) {
 		size = xattr_size;
@@ -780,6 +872,8 @@ ssize_t __jfs_getxattr(struct inode *inode, const char *name, void *data,
      release:
 	ea_release(inode, &ea_buf);
      out:
+	if (os2name)
+		kfree(os2name);
 	return size;
 }
@@ -813,7 +907,7 @@ ssize_t jfs_listxattr(struct dentry * dentry, char *data, size_t buf_size)
 	/* compute required size of list */
 	for (ea = FIRST_EA(ealist); ea < END_EALIST(ealist); ea = NEXT_EA(ea))
-		size += ea->namelen + 1;
+		size += name_size(ea) + 1;
 	if (!data)
 		goto release;
@@ -826,9 +920,8 @@ ssize_t jfs_listxattr(struct dentry * dentry, char *data, size_t buf_size)
 	/* Copy attribute names to buffer */
 	buffer = data;
 	for (ea = FIRST_EA(ealist); ea < END_EALIST(ealist); ea = NEXT_EA(ea)) {
-		memcpy(buffer, ea->name, ea->namelen);
+		int namelen = copy_name(buffer, ea);
-		buffer[ea->namelen] = 0;
+		buffer += namelen + 1;
-		buffer += ea->namelen + 1;
 	}
      release: