Commit 6abc05cc authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] xattr: fine-grained locking

From: Andreas Gruenbacher <agruen@suse.de>

This patch removes the dependency on i_sem in the getxattr and
listxattr iops of ext2 and ext3. In addition, the global ext[23]_xattr
semaphores go away. Instead of i_sem and the global semaphore, mutual
exclusion is now ensured by per-inode xattr semaphores, and by locking
the buffers before modifying them. The detailed locking strategy is
described in comments in fs/ext[23]/xattr.c.

Due to this change it is no longer necessary to take i_sem in
ext[23]_permission() for retrieving acls, so the
ext[23]_permission_locked() functions go away.

Additionally, the patch fixes a race condition in ext[23]_permission:
Accessing inode->i_acl was protected by the BKL in 2.4; in 2.5 there no
longer is such protection. Instead, inode->i_acl (and inode->i_default_acl)
are now accessed under inode->i_lock. (This could be replaced by RCU in
the future.)

In the ext3 extended attribute code, an new uglines results from locking
at the buffer head level: The buffer lock must be held between testing
if an xattr block can be modified and the actual modification to prevent
races from happening. Before a block can be modified,
ext3_journal_get_write_access() must be called. But this requies an unlocked
buffer, so I call ext3_journal_get_write_access() before locking the
buffer. If it turns out that the buffer cannot be modified,
journal_release_buffer() is called. Calling ext3_journal_get_write_access
after the test but while the buffer is still locked would be much better.
parent 430cab6d
...@@ -124,14 +124,38 @@ ext2_acl_to_disk(const struct posix_acl *acl, size_t *size) ...@@ -124,14 +124,38 @@ ext2_acl_to_disk(const struct posix_acl *acl, size_t *size)
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
} }
static inline struct posix_acl *
ext2_iget_acl(struct inode *inode, struct posix_acl **i_acl)
{
struct posix_acl *acl = EXT2_ACL_NOT_CACHED;
spin_lock(&inode->i_lock);
if (*i_acl != EXT2_ACL_NOT_CACHED)
acl = posix_acl_dup(*i_acl);
spin_unlock(&inode->i_lock);
return acl;
}
static inline void
ext2_iset_acl(struct inode *inode, struct posix_acl **i_acl,
struct posix_acl *acl)
{
spin_lock(&inode->i_lock);
if (*i_acl != EXT2_ACL_NOT_CACHED)
posix_acl_release(*i_acl);
*i_acl = posix_acl_dup(acl);
spin_unlock(&inode->i_lock);
}
/* /*
* inode->i_sem: down * inode->i_sem: don't care
*/ */
static struct posix_acl * static struct posix_acl *
ext2_get_acl(struct inode *inode, int type) ext2_get_acl(struct inode *inode, int type)
{ {
const size_t max_size = ext2_acl_size(EXT2_ACL_MAX_ENTRIES); const size_t max_size = ext2_acl_size(EXT2_ACL_MAX_ENTRIES);
struct ext2_inode_inode *ei = EXT2_I(inode); struct ext2_inode_info *ei = EXT2_I(inode);
int name_index; int name_index;
char *value; char *value;
struct posix_acl *acl; struct posix_acl *acl;
...@@ -142,14 +166,16 @@ ext2_get_acl(struct inode *inode, int type) ...@@ -142,14 +166,16 @@ ext2_get_acl(struct inode *inode, int type)
switch(type) { switch(type) {
case ACL_TYPE_ACCESS: case ACL_TYPE_ACCESS:
if (ei->i_acl != EXT2_ACL_NOT_CACHED) acl = ext2_iget_acl(inode, &ei->i_acl);
return posix_acl_dup(ei->i_acl); if (acl != EXT2_ACL_NOT_CACHED)
return acl;
name_index = EXT2_XATTR_INDEX_POSIX_ACL_ACCESS; name_index = EXT2_XATTR_INDEX_POSIX_ACL_ACCESS;
break; break;
case ACL_TYPE_DEFAULT: case ACL_TYPE_DEFAULT:
if (ei->i_default_acl != EXT2_ACL_NOT_CACHED) acl = ext2_iget_acl(inode, &ei->i_default_acl);
return posix_acl_dup(ei->i_default_acl); if (acl != EXT2_ACL_NOT_CACHED)
return acl;
name_index = EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT; name_index = EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT;
break; break;
...@@ -171,11 +197,11 @@ ext2_get_acl(struct inode *inode, int type) ...@@ -171,11 +197,11 @@ ext2_get_acl(struct inode *inode, int type)
if (!IS_ERR(acl)) { if (!IS_ERR(acl)) {
switch(type) { switch(type) {
case ACL_TYPE_ACCESS: case ACL_TYPE_ACCESS:
ei->i_acl = posix_acl_dup(acl); ext2_iset_acl(inode, &ei->i_acl, acl);
break; break;
case ACL_TYPE_DEFAULT: case ACL_TYPE_DEFAULT:
ei->i_default_acl = posix_acl_dup(acl); ext2_iset_acl(inode, &ei->i_default_acl, acl);
break; break;
} }
} }
...@@ -240,23 +266,24 @@ ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl) ...@@ -240,23 +266,24 @@ ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
if (!error) { if (!error) {
switch(type) { switch(type) {
case ACL_TYPE_ACCESS: case ACL_TYPE_ACCESS:
if (ei->i_acl != EXT2_ACL_NOT_CACHED) ext2_iset_acl(inode, &ei->i_acl, acl);
posix_acl_release(ei->i_acl);
ei->i_acl = posix_acl_dup(acl);
break; break;
case ACL_TYPE_DEFAULT: case ACL_TYPE_DEFAULT:
if (ei->i_default_acl != EXT2_ACL_NOT_CACHED) ext2_iset_acl(inode, &ei->i_default_acl, acl);
posix_acl_release(ei->i_default_acl);
ei->i_default_acl = posix_acl_dup(acl);
break; break;
} }
} }
return error; return error;
} }
static int /*
__ext2_permission(struct inode *inode, int mask, int lock) * Inode operation permission().
*
* inode->i_sem: don't care
*/
int
ext2_permission(struct inode *inode, int mask, struct nameidata *nd)
{ {
int mode = inode->i_mode; int mode = inode->i_mode;
...@@ -270,30 +297,16 @@ __ext2_permission(struct inode *inode, int mask, int lock) ...@@ -270,30 +297,16 @@ __ext2_permission(struct inode *inode, int mask, int lock)
if (current->fsuid == inode->i_uid) { if (current->fsuid == inode->i_uid) {
mode >>= 6; mode >>= 6;
} else if (test_opt(inode->i_sb, POSIX_ACL)) { } else if (test_opt(inode->i_sb, POSIX_ACL)) {
struct ext2_inode_info *ei = EXT2_I(inode); struct posix_acl *acl;
/* The access ACL cannot grant access if the group class /* The access ACL cannot grant access if the group class
permission bits don't contain all requested permissions. */ permission bits don't contain all requested permissions. */
if (((mode >> 3) & mask & S_IRWXO) != mask) if (((mode >> 3) & mask & S_IRWXO) != mask)
goto check_groups; goto check_groups;
if (ei->i_acl == EXT2_ACL_NOT_CACHED) { acl = ext2_get_acl(inode, ACL_TYPE_ACCESS);
struct posix_acl *acl; if (acl) {
int error = posix_acl_permission(inode, acl, mask);
if (lock) {
down(&inode->i_sem);
acl = ext2_get_acl(inode, ACL_TYPE_ACCESS);
up(&inode->i_sem);
} else
acl = ext2_get_acl(inode, ACL_TYPE_ACCESS);
if (IS_ERR(acl))
return PTR_ERR(acl);
posix_acl_release(acl); posix_acl_release(acl);
if (ei->i_acl == EXT2_ACL_NOT_CACHED)
return -EIO;
}
if (ei->i_acl) {
int error = posix_acl_permission(inode, ei->i_acl,mask);
if (error == -EACCES) if (error == -EACCES)
goto check_capabilities; goto check_capabilities;
return error; return error;
...@@ -319,33 +332,11 @@ __ext2_permission(struct inode *inode, int mask, int lock) ...@@ -319,33 +332,11 @@ __ext2_permission(struct inode *inode, int mask, int lock)
return -EACCES; return -EACCES;
} }
/*
* Inode operation permission().
*
* inode->i_sem: up
* BKL held [before 2.5.x]
*/
int
ext2_permission(struct inode *inode, int mask, struct nameidata *nd)
{
return __ext2_permission(inode, mask, 1);
}
/*
* Used internally if i_sem is already down.
*/
int
ext2_permission_locked(struct inode *inode, int mask)
{
return __ext2_permission(inode, mask, 0);
}
/* /*
* Initialize the ACLs of a new inode. Called from ext2_new_inode. * Initialize the ACLs of a new inode. Called from ext2_new_inode.
* *
* dir->i_sem: down * dir->i_sem: down
* inode->i_sem: up (access to inode is still exclusive) * inode->i_sem: up (access to inode is still exclusive)
* BKL held [before 2.5.x]
*/ */
int int
ext2_init_acl(struct inode *inode, struct inode *dir) ext2_init_acl(struct inode *inode, struct inode *dir)
...@@ -405,7 +396,6 @@ ext2_init_acl(struct inode *inode, struct inode *dir) ...@@ -405,7 +396,6 @@ ext2_init_acl(struct inode *inode, struct inode *dir)
* file mode. * file mode.
* *
* inode->i_sem: down * inode->i_sem: down
* BKL held [before 2.5.x]
*/ */
int int
ext2_acl_chmod(struct inode *inode) ext2_acl_chmod(struct inode *inode)
......
...@@ -60,7 +60,6 @@ static inline int ext2_acl_count(size_t size) ...@@ -60,7 +60,6 @@ static inline int ext2_acl_count(size_t size)
/* acl.c */ /* acl.c */
extern int ext2_permission (struct inode *, int, struct nameidata *); extern int ext2_permission (struct inode *, int, struct nameidata *);
extern int ext2_permission_locked (struct inode *, int);
extern int ext2_acl_chmod (struct inode *); extern int ext2_acl_chmod (struct inode *);
extern int ext2_init_acl (struct inode *, struct inode *); extern int ext2_init_acl (struct inode *, struct inode *);
......
...@@ -41,6 +41,16 @@ struct ext2_inode_info { ...@@ -41,6 +41,16 @@ struct ext2_inode_info {
__u32 i_prealloc_block; __u32 i_prealloc_block;
__u32 i_prealloc_count; __u32 i_prealloc_count;
__u32 i_dir_start_lookup; __u32 i_dir_start_lookup;
#ifdef CONFIG_EXT2_FS_XATTR
/*
* Extended attributes can be read independently of the main file
* data. Taking i_sem even when reading would cause contention
* between readers of EAs and writers of regular file data, so
* instead we synchronize on xattr_sem when reading or changing
* EAs.
*/
struct rw_semaphore xattr_sem;
#endif
#ifdef CONFIG_EXT2_FS_POSIX_ACL #ifdef CONFIG_EXT2_FS_POSIX_ACL
struct posix_acl *i_acl; struct posix_acl *i_acl;
struct posix_acl *i_default_acl; struct posix_acl *i_default_acl;
......
...@@ -177,6 +177,9 @@ static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) ...@@ -177,6 +177,9 @@ static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
SLAB_CTOR_CONSTRUCTOR) { SLAB_CTOR_CONSTRUCTOR) {
rwlock_init(&ei->i_meta_lock); rwlock_init(&ei->i_meta_lock);
#ifdef CONFIG_EXT2_FS_XATTR
init_rwsem(&ei->xattr_sem);
#endif
inode_init_once(&ei->vfs_inode); inode_init_once(&ei->vfs_inode);
} }
} }
......
...@@ -42,13 +42,12 @@ ...@@ -42,13 +42,12 @@
* *
* Locking strategy * Locking strategy
* ---------------- * ----------------
* The VFS already holds the BKL and the inode->i_sem semaphore when any of * EXT2_I(inode)->i_file_acl is protected by EXT2_I(inode)->xattr_sem.
* the xattr inode operations are called, so we are guaranteed that only one * EA blocks are only changed if they are exclusive to an inode, so
* processes accesses extended attributes of an inode at any time. * holding xattr_sem also means that nothing but the EA block's reference
* * count will change. Multiple writers to an EA block are synchronized
* For writing we also grab the ext2_xattr_sem semaphore. This ensures that * by the bh lock. No more than a single bh lock is held at any time
* only a single process is modifying an extended attribute block, even * to avoid deadlocks.
* if the block is shared among inodes.
*/ */
#include <linux/buffer_head.h> #include <linux/buffer_head.h>
...@@ -57,7 +56,7 @@ ...@@ -57,7 +56,7 @@
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/mbcache.h> #include <linux/mbcache.h>
#include <linux/quotaops.h> #include <linux/quotaops.h>
#include <asm/semaphore.h> #include <linux/rwsem.h>
#include "ext2.h" #include "ext2.h"
#include "xattr.h" #include "xattr.h"
#include "acl.h" #include "acl.h"
...@@ -105,15 +104,6 @@ static void ext2_xattr_rehash(struct ext2_xattr_header *, ...@@ -105,15 +104,6 @@ static void ext2_xattr_rehash(struct ext2_xattr_header *,
struct ext2_xattr_entry *); struct ext2_xattr_entry *);
static struct mb_cache *ext2_xattr_cache; static struct mb_cache *ext2_xattr_cache;
/*
* If a file system does not share extended attributes among inodes,
* we should not need the ext2_xattr_sem semaphore. However, the
* filesystem may still contain shared blocks, so we always take
* the lock.
*/
static DECLARE_MUTEX(ext2_xattr_sem);
static struct ext2_xattr_handler *ext2_xattr_handlers[EXT2_XATTR_INDEX_MAX]; static struct ext2_xattr_handler *ext2_xattr_handlers[EXT2_XATTR_INDEX_MAX];
static rwlock_t ext2_handler_lock = RW_LOCK_UNLOCKED; static rwlock_t ext2_handler_lock = RW_LOCK_UNLOCKED;
...@@ -196,7 +186,7 @@ ext2_xattr_handler(int name_index) ...@@ -196,7 +186,7 @@ ext2_xattr_handler(int name_index)
/* /*
* Inode operation getxattr() * Inode operation getxattr()
* *
* dentry->d_inode->i_sem down * dentry->d_inode->i_sem: don't care
*/ */
ssize_t ssize_t
ext2_getxattr(struct dentry *dentry, const char *name, ext2_getxattr(struct dentry *dentry, const char *name,
...@@ -204,39 +194,28 @@ ext2_getxattr(struct dentry *dentry, const char *name, ...@@ -204,39 +194,28 @@ ext2_getxattr(struct dentry *dentry, const char *name,
{ {
struct ext2_xattr_handler *handler; struct ext2_xattr_handler *handler;
struct inode *inode = dentry->d_inode; struct inode *inode = dentry->d_inode;
ssize_t error;
handler = ext2_xattr_resolve_name(&name); handler = ext2_xattr_resolve_name(&name);
if (!handler) if (!handler)
return -EOPNOTSUPP; return -EOPNOTSUPP;
down(&inode->i_sem); return handler->get(inode, name, buffer, size);
error = handler->get(inode, name, buffer, size);
up(&inode->i_sem);
return error;
} }
/* /*
* Inode operation listxattr() * Inode operation listxattr()
* *
* dentry->d_inode->i_sem down * dentry->d_inode->i_sem: don't care
*/ */
ssize_t ssize_t
ext2_listxattr(struct dentry *dentry, char *buffer, size_t size) ext2_listxattr(struct dentry *dentry, char *buffer, size_t size)
{ {
ssize_t error; return ext2_xattr_list(dentry->d_inode, buffer, size);
down(&dentry->d_inode->i_sem);
error = ext2_xattr_list(dentry->d_inode, buffer, size);
up(&dentry->d_inode->i_sem);
return error;
} }
/* /*
* Inode operation setxattr() * Inode operation setxattr()
* *
* dentry->d_inode->i_sem down * dentry->d_inode->i_sem: down
*/ */
int int
ext2_setxattr(struct dentry *dentry, const char *name, ext2_setxattr(struct dentry *dentry, const char *name,
...@@ -256,7 +235,7 @@ ext2_setxattr(struct dentry *dentry, const char *name, ...@@ -256,7 +235,7 @@ ext2_setxattr(struct dentry *dentry, const char *name,
/* /*
* Inode operation removexattr() * Inode operation removexattr()
* *
* dentry->d_inode->i_sem down * dentry->d_inode->i_sem: down
*/ */
int int
ext2_removexattr(struct dentry *dentry, const char *name) ext2_removexattr(struct dentry *dentry, const char *name)
...@@ -295,12 +274,15 @@ ext2_xattr_get(struct inode *inode, int name_index, const char *name, ...@@ -295,12 +274,15 @@ ext2_xattr_get(struct inode *inode, int name_index, const char *name,
if (name == NULL) if (name == NULL)
return -EINVAL; return -EINVAL;
down_read(&EXT2_I(inode)->xattr_sem);
error = -ENODATA;
if (!EXT2_I(inode)->i_file_acl) if (!EXT2_I(inode)->i_file_acl)
return -ENODATA; goto cleanup;
ea_idebug(inode, "reading block %d", EXT2_I(inode)->i_file_acl); ea_idebug(inode, "reading block %d", EXT2_I(inode)->i_file_acl);
bh = sb_bread(inode->i_sb, EXT2_I(inode)->i_file_acl); bh = sb_bread(inode->i_sb, EXT2_I(inode)->i_file_acl);
error = -EIO;
if (!bh) if (!bh)
return -EIO; goto cleanup;
ea_bdebug(bh, "b_count=%d, refcount=%d", ea_bdebug(bh, "b_count=%d, refcount=%d",
atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount));
end = bh->b_data + bh->b_size; end = bh->b_data + bh->b_size;
...@@ -365,6 +347,7 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_get", ...@@ -365,6 +347,7 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_get",
cleanup: cleanup:
brelse(bh); brelse(bh);
up_read(&EXT2_I(inode)->xattr_sem);
return error; return error;
} }
...@@ -391,12 +374,15 @@ ext2_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) ...@@ -391,12 +374,15 @@ ext2_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
ea_idebug(inode, "buffer=%p, buffer_size=%ld", ea_idebug(inode, "buffer=%p, buffer_size=%ld",
buffer, (long)buffer_size); buffer, (long)buffer_size);
down_read(&EXT2_I(inode)->xattr_sem);
error = 0;
if (!EXT2_I(inode)->i_file_acl) if (!EXT2_I(inode)->i_file_acl)
return 0; goto cleanup;
ea_idebug(inode, "reading block %d", EXT2_I(inode)->i_file_acl); ea_idebug(inode, "reading block %d", EXT2_I(inode)->i_file_acl);
bh = sb_bread(inode->i_sb, EXT2_I(inode)->i_file_acl); bh = sb_bread(inode->i_sb, EXT2_I(inode)->i_file_acl);
error = -EIO;
if (!bh) if (!bh)
return -EIO; goto cleanup;
ea_bdebug(bh, "b_count=%d, refcount=%d", ea_bdebug(bh, "b_count=%d, refcount=%d",
atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount));
end = bh->b_data + bh->b_size; end = bh->b_data + bh->b_size;
...@@ -449,6 +435,7 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_list", ...@@ -449,6 +435,7 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_list",
cleanup: cleanup:
brelse(bh); brelse(bh);
up_read(&EXT2_I(inode)->xattr_sem);
return error; return error;
} }
...@@ -520,8 +507,7 @@ ext2_xattr_set(struct inode *inode, int name_index, const char *name, ...@@ -520,8 +507,7 @@ ext2_xattr_set(struct inode *inode, int name_index, const char *name,
name_len = strlen(name); name_len = strlen(name);
if (name_len > 255 || value_len > sb->s_blocksize) if (name_len > 255 || value_len > sb->s_blocksize)
return -ERANGE; return -ERANGE;
down(&ext2_xattr_sem); down_write(&EXT2_I(inode)->xattr_sem);
if (EXT2_I(inode)->i_file_acl) { if (EXT2_I(inode)->i_file_acl) {
/* The inode already has an extended attribute block. */ /* The inode already has an extended attribute block. */
bh = sb_bread(sb, EXT2_I(inode)->i_file_acl); bh = sb_bread(sb, EXT2_I(inode)->i_file_acl);
...@@ -614,12 +600,16 @@ bad_block: ext2_error(sb, "ext2_xattr_set", ...@@ -614,12 +600,16 @@ bad_block: ext2_error(sb, "ext2_xattr_set",
/* Here we know that we can set the new attribute. */ /* Here we know that we can set the new attribute. */
if (header) { if (header) {
/* assert(header == HDR(bh)); */
lock_buffer(bh);
if (header->h_refcount == cpu_to_le32(1)) { if (header->h_refcount == cpu_to_le32(1)) {
ea_bdebug(bh, "modifying in-place"); ea_bdebug(bh, "modifying in-place");
ext2_xattr_cache_remove(bh); ext2_xattr_cache_remove(bh);
/* keep the buffer locked while modifying it. */
} else { } else {
int offset; int offset;
unlock_buffer(bh);
ea_bdebug(bh, "cloning"); ea_bdebug(bh, "cloning");
header = kmalloc(bh->b_size, GFP_KERNEL); header = kmalloc(bh->b_size, GFP_KERNEL);
error = -ENOMEM; error = -ENOMEM;
...@@ -644,6 +634,8 @@ bad_block: ext2_error(sb, "ext2_xattr_set", ...@@ -644,6 +634,8 @@ bad_block: ext2_error(sb, "ext2_xattr_set",
last = here = ENTRY(header+1); last = here = ENTRY(header+1);
} }
/* Iff we are modifying the block in-place, bh is locked here. */
if (not_found) { if (not_found) {
/* Insert the new name. */ /* Insert the new name. */
size_t size = EXT2_XATTR_LEN(name_len); size_t size = EXT2_XATTR_LEN(name_len);
...@@ -714,9 +706,13 @@ bad_block: ext2_error(sb, "ext2_xattr_set", ...@@ -714,9 +706,13 @@ bad_block: ext2_error(sb, "ext2_xattr_set",
skip_replace: skip_replace:
if (IS_LAST_ENTRY(ENTRY(header+1))) { if (IS_LAST_ENTRY(ENTRY(header+1))) {
/* This block is now empty. */ /* This block is now empty. */
if (bh && header == HDR(bh))
unlock_buffer(bh); /* we were modifying in-place. */
error = ext2_xattr_set2(inode, bh, NULL); error = ext2_xattr_set2(inode, bh, NULL);
} else { } else {
ext2_xattr_rehash(header, here); ext2_xattr_rehash(header, here);
if (bh && header == HDR(bh))
unlock_buffer(bh); /* we were modifying in-place. */
error = ext2_xattr_set2(inode, bh, header); error = ext2_xattr_set2(inode, bh, header);
} }
...@@ -724,7 +720,7 @@ bad_block: ext2_error(sb, "ext2_xattr_set", ...@@ -724,7 +720,7 @@ bad_block: ext2_error(sb, "ext2_xattr_set",
brelse(bh); brelse(bh);
if (!(bh && header == HDR(bh))) if (!(bh && header == HDR(bh)))
kfree(header); kfree(header);
up(&ext2_xattr_sem); up_write(&EXT2_I(inode)->xattr_sem);
return error; return error;
} }
...@@ -744,24 +740,28 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh, ...@@ -744,24 +740,28 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
new_bh = ext2_xattr_cache_find(inode, header); new_bh = ext2_xattr_cache_find(inode, header);
if (new_bh) { if (new_bh) {
/* /*
* We found an identical block in the cache. * We found an identical block in the cache. The
* The old block will be released after updating * block returned is locked. The old block will
* the inode. * be released after updating the inode.
*/ */
ea_bdebug(new_bh, "%s block %lu", ea_bdebug(new_bh, "%s block %lu",
(old_bh == new_bh) ? "keeping" : "reusing", (old_bh == new_bh) ? "keeping" : "reusing",
(unsigned long) new_bh->b_blocknr); (unsigned long) new_bh->b_blocknr);
error = -EDQUOT; error = -EDQUOT;
if (DQUOT_ALLOC_BLOCK(inode, 1)) if (DQUOT_ALLOC_BLOCK(inode, 1)) {
unlock_buffer(new_bh);
goto cleanup; goto cleanup;
}
HDR(new_bh)->h_refcount = cpu_to_le32( HDR(new_bh)->h_refcount = cpu_to_le32(
le32_to_cpu(HDR(new_bh)->h_refcount) + 1); le32_to_cpu(HDR(new_bh)->h_refcount) + 1);
ea_bdebug(new_bh, "refcount now=%d", ea_bdebug(new_bh, "refcount now=%d",
le32_to_cpu(HDR(new_bh)->h_refcount)); le32_to_cpu(HDR(new_bh)->h_refcount));
unlock_buffer(new_bh);
} else if (old_bh && header == HDR(old_bh)) { } else if (old_bh && header == HDR(old_bh)) {
/* Keep this block. */ /* Keep this block. No need to lock the block as we
don't need to change the reference count. */
new_bh = old_bh; new_bh = old_bh;
get_bh(new_bh); get_bh(new_bh);
ext2_xattr_cache_insert(new_bh); ext2_xattr_cache_insert(new_bh);
...@@ -812,12 +812,11 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh, ...@@ -812,12 +812,11 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
error = 0; error = 0;
if (old_bh && old_bh != new_bh) { if (old_bh && old_bh != new_bh) {
/* /*
* If there was an old block, and we are not still using it, * If there was an old block and we are no longer using it,
* we now release the old block. * release the old block.
*/ */
unsigned int refcount = le32_to_cpu(HDR(old_bh)->h_refcount); lock_buffer(old_bh);
if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) {
if (refcount == 1) {
/* Free the old block. */ /* Free the old block. */
ea_bdebug(old_bh, "freeing"); ea_bdebug(old_bh, "freeing");
ext2_free_blocks(inode, old_bh->b_blocknr, 1); ext2_free_blocks(inode, old_bh->b_blocknr, 1);
...@@ -827,12 +826,14 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh, ...@@ -827,12 +826,14 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
bforget(old_bh); bforget(old_bh);
} else { } else {
/* Decrement the refcount only. */ /* Decrement the refcount only. */
refcount--; HDR(old_bh)->h_refcount = cpu_to_le32(
HDR(old_bh)->h_refcount = cpu_to_le32(refcount); le32_to_cpu(HDR(old_bh)->h_refcount) - 1);
DQUOT_FREE_BLOCK(inode, 1); DQUOT_FREE_BLOCK(inode, 1);
mark_buffer_dirty(old_bh); mark_buffer_dirty(old_bh);
ea_bdebug(old_bh, "refcount now=%d", refcount); ea_bdebug(old_bh, "refcount now=%d",
le32_to_cpu(HDR(old_bh)->h_refcount));
} }
unlock_buffer(old_bh);
} }
cleanup: cleanup:
...@@ -850,12 +851,11 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh, ...@@ -850,12 +851,11 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
void void
ext2_xattr_delete_inode(struct inode *inode) ext2_xattr_delete_inode(struct inode *inode)
{ {
struct buffer_head *bh; struct buffer_head *bh = NULL;
down_write(&EXT2_I(inode)->xattr_sem);
if (!EXT2_I(inode)->i_file_acl) if (!EXT2_I(inode)->i_file_acl)
return; goto cleanup;
down(&ext2_xattr_sem);
bh = sb_bread(inode->i_sb, EXT2_I(inode)->i_file_acl); bh = sb_bread(inode->i_sb, EXT2_I(inode)->i_file_acl);
if (!bh) { if (!bh) {
ext2_error(inode->i_sb, "ext2_xattr_delete_inode", ext2_error(inode->i_sb, "ext2_xattr_delete_inode",
...@@ -871,7 +871,7 @@ ext2_xattr_delete_inode(struct inode *inode) ...@@ -871,7 +871,7 @@ ext2_xattr_delete_inode(struct inode *inode)
EXT2_I(inode)->i_file_acl); EXT2_I(inode)->i_file_acl);
goto cleanup; goto cleanup;
} }
ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1); lock_buffer(bh);
if (HDR(bh)->h_refcount == cpu_to_le32(1)) { if (HDR(bh)->h_refcount == cpu_to_le32(1)) {
ext2_xattr_cache_remove(bh); ext2_xattr_cache_remove(bh);
ext2_free_blocks(inode, EXT2_I(inode)->i_file_acl, 1); ext2_free_blocks(inode, EXT2_I(inode)->i_file_acl, 1);
...@@ -885,11 +885,13 @@ ext2_xattr_delete_inode(struct inode *inode) ...@@ -885,11 +885,13 @@ ext2_xattr_delete_inode(struct inode *inode)
sync_dirty_buffer(bh); sync_dirty_buffer(bh);
DQUOT_FREE_BLOCK(inode, 1); DQUOT_FREE_BLOCK(inode, 1);
} }
ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1);
unlock_buffer(bh);
EXT2_I(inode)->i_file_acl = 0; EXT2_I(inode)->i_file_acl = 0;
cleanup: cleanup:
brelse(bh); brelse(bh);
up(&ext2_xattr_sem); up_write(&EXT2_I(inode)->xattr_sem);
} }
/* /*
...@@ -982,8 +984,8 @@ ext2_xattr_cmp(struct ext2_xattr_header *header1, ...@@ -982,8 +984,8 @@ ext2_xattr_cmp(struct ext2_xattr_header *header1,
* *
* Find an identical extended attribute block. * Find an identical extended attribute block.
* *
* Returns a pointer to the block found, or NULL if such a block was * Returns a locked buffer head to the block found, or NULL if such
* not found or an error occurred. * a block was not found or an error occurred.
*/ */
static struct buffer_head * static struct buffer_head *
ext2_xattr_cache_find(struct inode *inode, struct ext2_xattr_header *header) ext2_xattr_cache_find(struct inode *inode, struct ext2_xattr_header *header)
...@@ -1003,18 +1005,23 @@ ext2_xattr_cache_find(struct inode *inode, struct ext2_xattr_header *header) ...@@ -1003,18 +1005,23 @@ ext2_xattr_cache_find(struct inode *inode, struct ext2_xattr_header *header)
ext2_error(inode->i_sb, "ext2_xattr_cache_find", ext2_error(inode->i_sb, "ext2_xattr_cache_find",
"inode %ld: block %ld read error", "inode %ld: block %ld read error",
inode->i_ino, (unsigned long) ce->e_block); inode->i_ino, (unsigned long) ce->e_block);
} else if (le32_to_cpu(HDR(bh)->h_refcount) > } else {
EXT2_XATTR_REFCOUNT_MAX) { lock_buffer(bh);
ea_idebug(inode, "block %ld refcount %d>%d", if (le32_to_cpu(HDR(bh)->h_refcount) >
(unsigned long) ce->e_block, EXT2_XATTR_REFCOUNT_MAX) {
le32_to_cpu(HDR(bh)->h_refcount), ea_idebug(inode, "block %ld refcount %d>%d",
EXT2_XATTR_REFCOUNT_MAX); (unsigned long) ce->e_block,
} else if (!ext2_xattr_cmp(header, HDR(bh))) { le32_to_cpu(HDR(bh)->h_refcount),
ea_bdebug(bh, "b_count=%d",atomic_read(&(bh->b_count))); EXT2_XATTR_REFCOUNT_MAX);
mb_cache_entry_release(ce); } else if (!ext2_xattr_cmp(header, HDR(bh))) {
return bh; ea_bdebug(bh, "b_count=%d",
atomic_read(&(bh->b_count)));
mb_cache_entry_release(ce);
return bh;
}
unlock_buffer(bh);
brelse(bh);
} }
brelse(bh);
ce = mb_cache_entry_find_next(ce, 0, inode->i_sb->s_bdev, hash); ce = mb_cache_entry_find_next(ce, 0, inode->i_sb->s_bdev, hash);
} }
return NULL; return NULL;
......
...@@ -11,10 +11,6 @@ ...@@ -11,10 +11,6 @@
#include "ext2.h" #include "ext2.h"
#include "xattr.h" #include "xattr.h"
#ifdef CONFIG_EXT2_FS_POSIX_ACL
# include "acl.h"
#endif
#define XATTR_USER_PREFIX "user." #define XATTR_USER_PREFIX "user."
static size_t static size_t
...@@ -44,11 +40,7 @@ ext2_xattr_user_get(struct inode *inode, const char *name, ...@@ -44,11 +40,7 @@ ext2_xattr_user_get(struct inode *inode, const char *name,
return -EINVAL; return -EINVAL;
if (!test_opt(inode->i_sb, XATTR_USER)) if (!test_opt(inode->i_sb, XATTR_USER))
return -EOPNOTSUPP; return -EOPNOTSUPP;
#ifdef CONFIG_EXT2_FS_POSIX_ACL
error = ext2_permission_locked(inode, MAY_READ);
#else
error = permission(inode, MAY_READ, NULL); error = permission(inode, MAY_READ, NULL);
#endif
if (error) if (error)
return error; return error;
...@@ -68,11 +60,7 @@ ext2_xattr_user_set(struct inode *inode, const char *name, ...@@ -68,11 +60,7 @@ ext2_xattr_user_set(struct inode *inode, const char *name,
if ( !S_ISREG(inode->i_mode) && if ( !S_ISREG(inode->i_mode) &&
(!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX)) (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX))
return -EPERM; return -EPERM;
#ifdef CONFIG_EXT2_FS_POSIX_ACL
error = ext2_permission_locked(inode, MAY_WRITE);
#else
error = permission(inode, MAY_WRITE, NULL); error = permission(inode, MAY_WRITE, NULL);
#endif
if (error) if (error)
return error; return error;
......
...@@ -125,10 +125,34 @@ ext3_acl_to_disk(const struct posix_acl *acl, size_t *size) ...@@ -125,10 +125,34 @@ ext3_acl_to_disk(const struct posix_acl *acl, size_t *size)
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
} }
static inline struct posix_acl *
ext3_iget_acl(struct inode *inode, struct posix_acl **i_acl)
{
struct posix_acl *acl = EXT3_ACL_NOT_CACHED;
spin_lock(&inode->i_lock);
if (*i_acl != EXT3_ACL_NOT_CACHED)
acl = posix_acl_dup(*i_acl);
spin_unlock(&inode->i_lock);
return acl;
}
static inline void
ext3_iset_acl(struct inode *inode, struct posix_acl **i_acl,
struct posix_acl *acl)
{
spin_lock(&inode->i_lock);
if (*i_acl != EXT3_ACL_NOT_CACHED)
posix_acl_release(*i_acl);
*i_acl = posix_acl_dup(acl);
spin_unlock(&inode->i_lock);
}
/* /*
* Inode operation get_posix_acl(). * Inode operation get_posix_acl().
* *
* inode->i_sem: down * inode->i_sem: don't care
*/ */
static struct posix_acl * static struct posix_acl *
ext3_get_acl(struct inode *inode, int type) ext3_get_acl(struct inode *inode, int type)
...@@ -145,14 +169,16 @@ ext3_get_acl(struct inode *inode, int type) ...@@ -145,14 +169,16 @@ ext3_get_acl(struct inode *inode, int type)
switch(type) { switch(type) {
case ACL_TYPE_ACCESS: case ACL_TYPE_ACCESS:
if (ei->i_acl != EXT3_ACL_NOT_CACHED) acl = ext3_iget_acl(inode, &ei->i_acl);
return posix_acl_dup(ei->i_acl); if (acl != EXT3_ACL_NOT_CACHED)
return acl;
name_index = EXT3_XATTR_INDEX_POSIX_ACL_ACCESS; name_index = EXT3_XATTR_INDEX_POSIX_ACL_ACCESS;
break; break;
case ACL_TYPE_DEFAULT: case ACL_TYPE_DEFAULT:
if (ei->i_default_acl != EXT3_ACL_NOT_CACHED) acl = ext3_iget_acl(inode, &ei->i_default_acl);
return posix_acl_dup(ei->i_default_acl); if (acl != EXT3_ACL_NOT_CACHED)
return acl;
name_index = EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT; name_index = EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT;
break; break;
...@@ -174,11 +200,11 @@ ext3_get_acl(struct inode *inode, int type) ...@@ -174,11 +200,11 @@ ext3_get_acl(struct inode *inode, int type)
if (!IS_ERR(acl)) { if (!IS_ERR(acl)) {
switch(type) { switch(type) {
case ACL_TYPE_ACCESS: case ACL_TYPE_ACCESS:
ei->i_acl = posix_acl_dup(acl); ext3_iset_acl(inode, &ei->i_acl, acl);
break; break;
case ACL_TYPE_DEFAULT: case ACL_TYPE_DEFAULT:
ei->i_default_acl = posix_acl_dup(acl); ext3_iset_acl(inode, &ei->i_default_acl, acl);
break; break;
} }
} }
...@@ -245,23 +271,24 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type, ...@@ -245,23 +271,24 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type,
if (!error) { if (!error) {
switch(type) { switch(type) {
case ACL_TYPE_ACCESS: case ACL_TYPE_ACCESS:
if (ei->i_acl != EXT3_ACL_NOT_CACHED) ext3_iset_acl(inode, &ei->i_acl, acl);
posix_acl_release(ei->i_acl);
ei->i_acl = posix_acl_dup(acl);
break; break;
case ACL_TYPE_DEFAULT: case ACL_TYPE_DEFAULT:
if (ei->i_default_acl != EXT3_ACL_NOT_CACHED) ext3_iset_acl(inode, &ei->i_default_acl, acl);
posix_acl_release(ei->i_default_acl);
ei->i_default_acl = posix_acl_dup(acl);
break; break;
} }
} }
return error; return error;
} }
static int /*
__ext3_permission(struct inode *inode, int mask, int lock) * Inode operation permission().
*
* inode->i_sem: don't care
*/
int
ext3_permission(struct inode *inode, int mask, struct nameidata *nd)
{ {
int mode = inode->i_mode; int mode = inode->i_mode;
...@@ -275,30 +302,16 @@ __ext3_permission(struct inode *inode, int mask, int lock) ...@@ -275,30 +302,16 @@ __ext3_permission(struct inode *inode, int mask, int lock)
if (current->fsuid == inode->i_uid) { if (current->fsuid == inode->i_uid) {
mode >>= 6; mode >>= 6;
} else if (test_opt(inode->i_sb, POSIX_ACL)) { } else if (test_opt(inode->i_sb, POSIX_ACL)) {
struct ext3_inode_info *ei = EXT3_I(inode); struct posix_acl *acl;
/* The access ACL cannot grant access if the group class /* The access ACL cannot grant access if the group class
permission bits don't contain all requested permissions. */ permission bits don't contain all requested permissions. */
if (((mode >> 3) & mask & S_IRWXO) != mask) if (((mode >> 3) & mask & S_IRWXO) != mask)
goto check_groups; goto check_groups;
if (ei->i_acl == EXT3_ACL_NOT_CACHED) { acl = ext3_get_acl(inode, ACL_TYPE_ACCESS);
struct posix_acl *acl; if (acl) {
int error = posix_acl_permission(inode, acl, mask);
if (lock) {
down(&inode->i_sem);
acl = ext3_get_acl(inode, ACL_TYPE_ACCESS);
up(&inode->i_sem);
} else
acl = ext3_get_acl(inode, ACL_TYPE_ACCESS);
if (IS_ERR(acl))
return PTR_ERR(acl);
posix_acl_release(acl); posix_acl_release(acl);
if (ei->i_acl == EXT3_ACL_NOT_CACHED)
return -EIO;
}
if (ei->i_acl) {
int error = posix_acl_permission(inode, ei->i_acl,mask);
if (error == -EACCES) if (error == -EACCES)
goto check_capabilities; goto check_capabilities;
return error; return error;
...@@ -324,26 +337,6 @@ __ext3_permission(struct inode *inode, int mask, int lock) ...@@ -324,26 +337,6 @@ __ext3_permission(struct inode *inode, int mask, int lock)
return -EACCES; return -EACCES;
} }
/*
* Inode operation permission().
*
* inode->i_sem: up
*/
int
ext3_permission(struct inode *inode, int mask, struct nameidata *nd)
{
return __ext3_permission(inode, mask, 1);
}
/*
* Used internally if i_sem is already down.
*/
int
ext3_permission_locked(struct inode *inode, int mask)
{
return __ext3_permission(inode, mask, 0);
}
/* /*
* Initialize the ACLs of a new inode. Called from ext3_new_inode. * Initialize the ACLs of a new inode. Called from ext3_new_inode.
* *
......
...@@ -60,7 +60,6 @@ static inline int ext3_acl_count(size_t size) ...@@ -60,7 +60,6 @@ static inline int ext3_acl_count(size_t size)
/* acl.c */ /* acl.c */
extern int ext3_permission (struct inode *, int, struct nameidata *); extern int ext3_permission (struct inode *, int, struct nameidata *);
extern int ext3_permission_locked (struct inode *, int);
extern int ext3_acl_chmod (struct inode *); extern int ext3_acl_chmod (struct inode *);
extern int ext3_init_acl (handle_t *, struct inode *, struct inode *); extern int ext3_init_acl (handle_t *, struct inode *, struct inode *);
......
...@@ -519,6 +519,9 @@ static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) ...@@ -519,6 +519,9 @@ static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
SLAB_CTOR_CONSTRUCTOR) { SLAB_CTOR_CONSTRUCTOR) {
INIT_LIST_HEAD(&ei->i_orphan); INIT_LIST_HEAD(&ei->i_orphan);
#ifdef CONFIG_EXT3_FS_XATTR
init_rwsem(&ei->xattr_sem);
#endif
init_rwsem(&ei->truncate_sem); init_rwsem(&ei->truncate_sem);
inode_init_once(&ei->vfs_inode); inode_init_once(&ei->vfs_inode);
} }
......
...@@ -43,13 +43,12 @@ ...@@ -43,13 +43,12 @@
* *
* Locking strategy * Locking strategy
* ---------------- * ----------------
* The VFS holdsinode->i_sem semaphore when any of the xattr inode * EXT3_I(inode)->i_file_acl is protected by EXT3_I(inode)->xattr_sem.
* operations are called, so we are guaranteed that only one * EA blocks are only changed if they are exclusive to an inode, so
* processes accesses extended attributes of an inode at any time. * holding xattr_sem also means that nothing but the EA block's reference
* * count will change. Multiple writers to an EA block are synchronized
* For writing we also grab the ext3_xattr_sem semaphore. This ensures that * by the bh lock. No more than a single bh lock is held at any time
* only a single process is modifying an extended attribute block, even * to avoid deadlocks.
* if the block is shared among inodes.
*/ */
#include <linux/init.h> #include <linux/init.h>
...@@ -59,7 +58,7 @@ ...@@ -59,7 +58,7 @@
#include <linux/ext3_fs.h> #include <linux/ext3_fs.h>
#include <linux/mbcache.h> #include <linux/mbcache.h>
#include <linux/quotaops.h> #include <linux/quotaops.h>
#include <asm/semaphore.h> #include <linux/rwsem.h>
#include "xattr.h" #include "xattr.h"
#include "acl.h" #include "acl.h"
...@@ -93,22 +92,14 @@ static int ext3_xattr_set_handle2(handle_t *, struct inode *, ...@@ -93,22 +92,14 @@ static int ext3_xattr_set_handle2(handle_t *, struct inode *,
struct ext3_xattr_header *); struct ext3_xattr_header *);
static int ext3_xattr_cache_insert(struct buffer_head *); static int ext3_xattr_cache_insert(struct buffer_head *);
static struct buffer_head *ext3_xattr_cache_find(struct inode *, static struct buffer_head *ext3_xattr_cache_find(handle_t *, struct inode *,
struct ext3_xattr_header *); struct ext3_xattr_header *,
int *);
static void ext3_xattr_cache_remove(struct buffer_head *); static void ext3_xattr_cache_remove(struct buffer_head *);
static void ext3_xattr_rehash(struct ext3_xattr_header *, static void ext3_xattr_rehash(struct ext3_xattr_header *,
struct ext3_xattr_entry *); struct ext3_xattr_entry *);
static struct mb_cache *ext3_xattr_cache; static struct mb_cache *ext3_xattr_cache;
/*
* If a file system does not share extended attributes among inodes,
* we should not need the ext3_xattr_sem semaphore. However, the
* filesystem may still contain shared blocks, so we always take
* the lock.
*/
static DECLARE_MUTEX(ext3_xattr_sem);
static struct ext3_xattr_handler *ext3_xattr_handlers[EXT3_XATTR_INDEX_MAX]; static struct ext3_xattr_handler *ext3_xattr_handlers[EXT3_XATTR_INDEX_MAX];
static rwlock_t ext3_handler_lock = RW_LOCK_UNLOCKED; static rwlock_t ext3_handler_lock = RW_LOCK_UNLOCKED;
...@@ -191,7 +182,7 @@ ext3_xattr_handler(int name_index) ...@@ -191,7 +182,7 @@ ext3_xattr_handler(int name_index)
/* /*
* Inode operation getxattr() * Inode operation getxattr()
* *
* dentry->d_inode->i_sem down * dentry->d_inode->i_sem: don't care
*/ */
ssize_t ssize_t
ext3_getxattr(struct dentry *dentry, const char *name, ext3_getxattr(struct dentry *dentry, const char *name,
...@@ -199,39 +190,28 @@ ext3_getxattr(struct dentry *dentry, const char *name, ...@@ -199,39 +190,28 @@ ext3_getxattr(struct dentry *dentry, const char *name,
{ {
struct ext3_xattr_handler *handler; struct ext3_xattr_handler *handler;
struct inode *inode = dentry->d_inode; struct inode *inode = dentry->d_inode;
ssize_t error;
handler = ext3_xattr_resolve_name(&name); handler = ext3_xattr_resolve_name(&name);
if (!handler) if (!handler)
return -EOPNOTSUPP; return -EOPNOTSUPP;
down(&inode->i_sem); return handler->get(inode, name, buffer, size);
error = handler->get(inode, name, buffer, size);
up(&inode->i_sem);
return error;
} }
/* /*
* Inode operation listxattr() * Inode operation listxattr()
* *
* dentry->d_inode->i_sem down * dentry->d_inode->i_sem: don't care
*/ */
ssize_t ssize_t
ext3_listxattr(struct dentry *dentry, char *buffer, size_t size) ext3_listxattr(struct dentry *dentry, char *buffer, size_t size)
{ {
ssize_t error; return ext3_xattr_list(dentry->d_inode, buffer, size);
down(&dentry->d_inode->i_sem);
error = ext3_xattr_list(dentry->d_inode, buffer, size);
up(&dentry->d_inode->i_sem);
return error;
} }
/* /*
* Inode operation setxattr() * Inode operation setxattr()
* *
* dentry->d_inode->i_sem down * dentry->d_inode->i_sem: down
*/ */
int int
ext3_setxattr(struct dentry *dentry, const char *name, ext3_setxattr(struct dentry *dentry, const char *name,
...@@ -251,7 +231,7 @@ ext3_setxattr(struct dentry *dentry, const char *name, ...@@ -251,7 +231,7 @@ ext3_setxattr(struct dentry *dentry, const char *name,
/* /*
* Inode operation removexattr() * Inode operation removexattr()
* *
* dentry->d_inode->i_sem down * dentry->d_inode->i_sem: down
*/ */
int int
ext3_removexattr(struct dentry *dentry, const char *name) ext3_removexattr(struct dentry *dentry, const char *name)
...@@ -290,12 +270,15 @@ ext3_xattr_get(struct inode *inode, int name_index, const char *name, ...@@ -290,12 +270,15 @@ ext3_xattr_get(struct inode *inode, int name_index, const char *name,
if (name == NULL) if (name == NULL)
return -EINVAL; return -EINVAL;
down_read(&EXT3_I(inode)->xattr_sem);
error = -ENODATA;
if (!EXT3_I(inode)->i_file_acl) if (!EXT3_I(inode)->i_file_acl)
return -ENODATA; goto cleanup;
ea_idebug(inode, "reading block %d", EXT3_I(inode)->i_file_acl); ea_idebug(inode, "reading block %d", EXT3_I(inode)->i_file_acl);
bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl); bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl);
error = -EIO;
if (!bh) if (!bh)
return -EIO; goto cleanup;
ea_bdebug(bh, "b_count=%d, refcount=%d", ea_bdebug(bh, "b_count=%d, refcount=%d",
atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount));
end = bh->b_data + bh->b_size; end = bh->b_data + bh->b_size;
...@@ -360,6 +343,7 @@ bad_block: ext3_error(inode->i_sb, "ext3_xattr_get", ...@@ -360,6 +343,7 @@ bad_block: ext3_error(inode->i_sb, "ext3_xattr_get",
cleanup: cleanup:
brelse(bh); brelse(bh);
up_read(&EXT3_I(inode)->xattr_sem);
return error; return error;
} }
...@@ -386,12 +370,15 @@ ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) ...@@ -386,12 +370,15 @@ ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
ea_idebug(inode, "buffer=%p, buffer_size=%ld", ea_idebug(inode, "buffer=%p, buffer_size=%ld",
buffer, (long)buffer_size); buffer, (long)buffer_size);
down_read(&EXT3_I(inode)->xattr_sem);
error = 0;
if (!EXT3_I(inode)->i_file_acl) if (!EXT3_I(inode)->i_file_acl)
return 0; goto cleanup;
ea_idebug(inode, "reading block %d", EXT3_I(inode)->i_file_acl); ea_idebug(inode, "reading block %d", EXT3_I(inode)->i_file_acl);
bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl); bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl);
error = -EIO;
if (!bh) if (!bh)
return -EIO; goto cleanup;
ea_bdebug(bh, "b_count=%d, refcount=%d", ea_bdebug(bh, "b_count=%d, refcount=%d",
atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount));
end = bh->b_data + bh->b_size; end = bh->b_data + bh->b_size;
...@@ -444,6 +431,7 @@ bad_block: ext3_error(inode->i_sb, "ext3_xattr_list", ...@@ -444,6 +431,7 @@ bad_block: ext3_error(inode->i_sb, "ext3_xattr_list",
cleanup: cleanup:
brelse(bh); brelse(bh);
up_read(&EXT3_I(inode)->xattr_sem);
return error; return error;
} }
...@@ -459,11 +447,12 @@ static void ext3_xattr_update_super_block(handle_t *handle, ...@@ -459,11 +447,12 @@ static void ext3_xattr_update_super_block(handle_t *handle,
return; return;
lock_super(sb); lock_super(sb);
ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh); if (ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh) == 0) {
EXT3_SB(sb)->s_es->s_feature_compat |= EXT3_SB(sb)->s_es->s_feature_compat |=
cpu_to_le32(EXT3_FEATURE_COMPAT_EXT_ATTR); cpu_to_le32(EXT3_FEATURE_COMPAT_EXT_ATTR);
sb->s_dirt = 1; sb->s_dirt = 1;
ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
}
unlock_super(sb); unlock_super(sb);
} }
...@@ -518,8 +507,7 @@ ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, ...@@ -518,8 +507,7 @@ ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
name_len = strlen(name); name_len = strlen(name);
if (name_len > 255 || value_len > sb->s_blocksize) if (name_len > 255 || value_len > sb->s_blocksize)
return -ERANGE; return -ERANGE;
down(&ext3_xattr_sem); down_write(&EXT3_I(inode)->xattr_sem);
if (EXT3_I(inode)->i_file_acl) { if (EXT3_I(inode)->i_file_acl) {
/* The inode already has an extended attribute block. */ /* The inode already has an extended attribute block. */
bh = sb_bread(sb, EXT3_I(inode)->i_file_acl); bh = sb_bread(sb, EXT3_I(inode)->i_file_acl);
...@@ -612,15 +600,28 @@ bad_block: ext3_error(sb, "ext3_xattr_set", ...@@ -612,15 +600,28 @@ bad_block: ext3_error(sb, "ext3_xattr_set",
/* Here we know that we can set the new attribute. */ /* Here we know that we can set the new attribute. */
if (header) { if (header) {
int credits = 0;
/* assert(header == HDR(bh)); */
if (header->h_refcount != cpu_to_le32(1))
goto skip_get_write_access;
/* ext3_journal_get_write_access() requires an unlocked bh,
which complicates things here. */
error = ext3_journal_get_write_access_credits(handle, bh,
&credits);
if (error)
goto cleanup;
lock_buffer(bh);
if (header->h_refcount == cpu_to_le32(1)) { if (header->h_refcount == cpu_to_le32(1)) {
ea_bdebug(bh, "modifying in-place"); ea_bdebug(bh, "modifying in-place");
ext3_xattr_cache_remove(bh); ext3_xattr_cache_remove(bh);
error = ext3_journal_get_write_access(handle, bh); /* keep the buffer locked while modifying it. */
if (error)
goto cleanup;
} else { } else {
int offset; int offset;
unlock_buffer(bh);
journal_release_buffer(handle, bh, credits);
skip_get_write_access:
ea_bdebug(bh, "cloning"); ea_bdebug(bh, "cloning");
header = kmalloc(bh->b_size, GFP_KERNEL); header = kmalloc(bh->b_size, GFP_KERNEL);
error = -ENOMEM; error = -ENOMEM;
...@@ -645,6 +646,8 @@ bad_block: ext3_error(sb, "ext3_xattr_set", ...@@ -645,6 +646,8 @@ bad_block: ext3_error(sb, "ext3_xattr_set",
last = here = ENTRY(header+1); last = here = ENTRY(header+1);
} }
/* Iff we are modifying the block in-place, bh is locked here. */
if (not_found) { if (not_found) {
/* Insert the new name. */ /* Insert the new name. */
size_t size = EXT3_XATTR_LEN(name_len); size_t size = EXT3_XATTR_LEN(name_len);
...@@ -715,9 +718,13 @@ bad_block: ext3_error(sb, "ext3_xattr_set", ...@@ -715,9 +718,13 @@ bad_block: ext3_error(sb, "ext3_xattr_set",
skip_replace: skip_replace:
if (IS_LAST_ENTRY(ENTRY(header+1))) { if (IS_LAST_ENTRY(ENTRY(header+1))) {
/* This block is now empty. */ /* This block is now empty. */
if (bh && header == HDR(bh))
unlock_buffer(bh); /* we were modifying in-place. */
error = ext3_xattr_set_handle2(handle, inode, bh, NULL); error = ext3_xattr_set_handle2(handle, inode, bh, NULL);
} else { } else {
ext3_xattr_rehash(header, here); ext3_xattr_rehash(header, here);
if (bh && header == HDR(bh))
unlock_buffer(bh); /* we were modifying in-place. */
error = ext3_xattr_set_handle2(handle, inode, bh, header); error = ext3_xattr_set_handle2(handle, inode, bh, header);
} }
...@@ -725,7 +732,7 @@ bad_block: ext3_error(sb, "ext3_xattr_set", ...@@ -725,7 +732,7 @@ bad_block: ext3_error(sb, "ext3_xattr_set",
brelse(bh); brelse(bh);
if (!(bh && header == HDR(bh))) if (!(bh && header == HDR(bh)))
kfree(header); kfree(header);
up(&ext3_xattr_sem); up_write(&EXT3_I(inode)->xattr_sem);
return error; return error;
} }
...@@ -740,33 +747,34 @@ ext3_xattr_set_handle2(handle_t *handle, struct inode *inode, ...@@ -740,33 +747,34 @@ ext3_xattr_set_handle2(handle_t *handle, struct inode *inode,
{ {
struct super_block *sb = inode->i_sb; struct super_block *sb = inode->i_sb;
struct buffer_head *new_bh = NULL; struct buffer_head *new_bh = NULL;
int error; int credits = 0, error;
if (header) { if (header) {
new_bh = ext3_xattr_cache_find(inode, header); new_bh = ext3_xattr_cache_find(handle, inode, header, &credits);
if (new_bh) { if (new_bh) {
/* /*
* We found an identical block in the cache. * We found an identical block in the cache. The
* The old block will be released after updating * block returned is locked. The old block will
* the inode. * be released after updating the inode.
*/ */
ea_bdebug(new_bh, "%s block %lu", ea_bdebug(new_bh, "%s block %lu",
(old_bh == new_bh) ? "keeping" : "reusing", (old_bh == new_bh) ? "keeping" : "reusing",
(unsigned long) new_bh->b_blocknr); (unsigned long) new_bh->b_blocknr);
error = -EDQUOT; error = -EDQUOT;
if (DQUOT_ALLOC_BLOCK(inode, 1)) if (DQUOT_ALLOC_BLOCK(inode, 1)) {
goto cleanup; unlock_buffer(new_bh);
journal_release_buffer(handle, new_bh, credits);
error = ext3_journal_get_write_access(handle, new_bh);
if (error)
goto cleanup; goto cleanup;
}
HDR(new_bh)->h_refcount = cpu_to_le32( HDR(new_bh)->h_refcount = cpu_to_le32(
le32_to_cpu(HDR(new_bh)->h_refcount) + 1); le32_to_cpu(HDR(new_bh)->h_refcount) + 1);
ea_bdebug(new_bh, "refcount now=%d", ea_bdebug(new_bh, "refcount now=%d",
le32_to_cpu(HDR(new_bh)->h_refcount)); le32_to_cpu(HDR(new_bh)->h_refcount));
unlock_buffer(new_bh);
} else if (old_bh && header == HDR(old_bh)) { } else if (old_bh && header == HDR(old_bh)) {
/* Keep this block. */ /* Keep this block. No need to lock the block as we
* don't need to change the reference count. */
new_bh = old_bh; new_bh = old_bh;
get_bh(new_bh); get_bh(new_bh);
ext3_xattr_cache_insert(new_bh); ext3_xattr_cache_insert(new_bh);
...@@ -817,15 +825,14 @@ ext3_xattr_set_handle2(handle_t *handle, struct inode *inode, ...@@ -817,15 +825,14 @@ ext3_xattr_set_handle2(handle_t *handle, struct inode *inode,
error = 0; error = 0;
if (old_bh && old_bh != new_bh) { if (old_bh && old_bh != new_bh) {
/* /*
* If there was an old block, and we are not still using it, * If there was an old block, and we are no longer using it,
* we now release the old block. * release the old block.
*/ */
unsigned int refcount = le32_to_cpu(HDR(old_bh)->h_refcount);
error = ext3_journal_get_write_access(handle, old_bh); error = ext3_journal_get_write_access(handle, old_bh);
if (error) if (error)
goto cleanup; goto cleanup;
if (refcount == 1) { lock_buffer(old_bh);
if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) {
/* Free the old block. */ /* Free the old block. */
ea_bdebug(old_bh, "freeing"); ea_bdebug(old_bh, "freeing");
ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1); ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1);
...@@ -837,12 +844,14 @@ ext3_xattr_set_handle2(handle_t *handle, struct inode *inode, ...@@ -837,12 +844,14 @@ ext3_xattr_set_handle2(handle_t *handle, struct inode *inode,
ext3_forget(handle, 1, inode, old_bh,old_bh->b_blocknr); ext3_forget(handle, 1, inode, old_bh,old_bh->b_blocknr);
} else { } else {
/* Decrement the refcount only. */ /* Decrement the refcount only. */
refcount--; HDR(old_bh)->h_refcount = cpu_to_le32(
HDR(old_bh)->h_refcount = cpu_to_le32(refcount); le32_to_cpu(HDR(old_bh)->h_refcount) - 1);
DQUOT_FREE_BLOCK(inode, 1); DQUOT_FREE_BLOCK(inode, 1);
ext3_journal_dirty_metadata(handle, old_bh); ext3_journal_dirty_metadata(handle, old_bh);
ea_bdebug(old_bh, "refcount now=%d", refcount); ea_bdebug(old_bh, "refcount now=%d",
le32_to_cpu(HDR(old_bh)->h_refcount));
} }
unlock_buffer(old_bh);
} }
cleanup: cleanup:
...@@ -886,12 +895,11 @@ ext3_xattr_set(struct inode *inode, int name_index, const char *name, ...@@ -886,12 +895,11 @@ ext3_xattr_set(struct inode *inode, int name_index, const char *name,
void void
ext3_xattr_delete_inode(handle_t *handle, struct inode *inode) ext3_xattr_delete_inode(handle_t *handle, struct inode *inode)
{ {
struct buffer_head *bh; struct buffer_head *bh = NULL;
down_write(&EXT3_I(inode)->xattr_sem);
if (!EXT3_I(inode)->i_file_acl) if (!EXT3_I(inode)->i_file_acl)
return; goto cleanup;
down(&ext3_xattr_sem);
bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl); bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl);
if (!bh) { if (!bh) {
ext3_error(inode->i_sb, "ext3_xattr_delete_inode", ext3_error(inode->i_sb, "ext3_xattr_delete_inode",
...@@ -899,7 +907,6 @@ ext3_xattr_delete_inode(handle_t *handle, struct inode *inode) ...@@ -899,7 +907,6 @@ ext3_xattr_delete_inode(handle_t *handle, struct inode *inode)
EXT3_I(inode)->i_file_acl); EXT3_I(inode)->i_file_acl);
goto cleanup; goto cleanup;
} }
ea_bdebug(bh, "b_count=%d", atomic_read(&(bh->b_count)));
if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
HDR(bh)->h_blocks != cpu_to_le32(1)) { HDR(bh)->h_blocks != cpu_to_le32(1)) {
ext3_error(inode->i_sb, "ext3_xattr_delete_inode", ext3_error(inode->i_sb, "ext3_xattr_delete_inode",
...@@ -907,8 +914,9 @@ ext3_xattr_delete_inode(handle_t *handle, struct inode *inode) ...@@ -907,8 +914,9 @@ ext3_xattr_delete_inode(handle_t *handle, struct inode *inode)
EXT3_I(inode)->i_file_acl); EXT3_I(inode)->i_file_acl);
goto cleanup; goto cleanup;
} }
ext3_journal_get_write_access(handle, bh); if (ext3_journal_get_write_access(handle, bh) != 0)
ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1); goto cleanup;
lock_buffer(bh);
if (HDR(bh)->h_refcount == cpu_to_le32(1)) { if (HDR(bh)->h_refcount == cpu_to_le32(1)) {
ext3_xattr_cache_remove(bh); ext3_xattr_cache_remove(bh);
ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1); ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1);
...@@ -922,11 +930,13 @@ ext3_xattr_delete_inode(handle_t *handle, struct inode *inode) ...@@ -922,11 +930,13 @@ ext3_xattr_delete_inode(handle_t *handle, struct inode *inode)
handle->h_sync = 1; handle->h_sync = 1;
DQUOT_FREE_BLOCK(inode, 1); DQUOT_FREE_BLOCK(inode, 1);
} }
ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1);
unlock_buffer(bh);
EXT3_I(inode)->i_file_acl = 0; EXT3_I(inode)->i_file_acl = 0;
cleanup: cleanup:
brelse(bh); brelse(bh);
up(&ext3_xattr_sem); up_write(&EXT3_I(inode)->xattr_sem);
} }
/* /*
...@@ -1022,7 +1032,8 @@ ext3_xattr_cmp(struct ext3_xattr_header *header1, ...@@ -1022,7 +1032,8 @@ ext3_xattr_cmp(struct ext3_xattr_header *header1,
* not found or an error occurred. * not found or an error occurred.
*/ */
static struct buffer_head * static struct buffer_head *
ext3_xattr_cache_find(struct inode *inode, struct ext3_xattr_header *header) ext3_xattr_cache_find(handle_t *handle, struct inode *inode,
struct ext3_xattr_header *header, int *credits)
{ {
__u32 hash = le32_to_cpu(header->h_hash); __u32 hash = le32_to_cpu(header->h_hash);
struct mb_cache_entry *ce; struct mb_cache_entry *ce;
...@@ -1039,18 +1050,28 @@ ext3_xattr_cache_find(struct inode *inode, struct ext3_xattr_header *header) ...@@ -1039,18 +1050,28 @@ ext3_xattr_cache_find(struct inode *inode, struct ext3_xattr_header *header)
ext3_error(inode->i_sb, "ext3_xattr_cache_find", ext3_error(inode->i_sb, "ext3_xattr_cache_find",
"inode %ld: block %ld read error", "inode %ld: block %ld read error",
inode->i_ino, (unsigned long) ce->e_block); inode->i_ino, (unsigned long) ce->e_block);
} else if (le32_to_cpu(HDR(bh)->h_refcount) > } else {
EXT3_XATTR_REFCOUNT_MAX) { /* ext3_journal_get_write_access() requires an unlocked
ea_idebug(inode, "block %ld refcount %d>%d", * bh, which complicates things here. */
(unsigned long) ce->e_block, if (ext3_journal_get_write_access_credits(handle, bh,
le32_to_cpu(HDR(bh)->h_refcount), credits) != 0)
EXT3_XATTR_REFCOUNT_MAX); return NULL;
} else if (!ext3_xattr_cmp(header, HDR(bh))) { lock_buffer(bh);
ea_bdebug(bh, "b_count=%d",atomic_read(&(bh->b_count))); if (le32_to_cpu(HDR(bh)->h_refcount) >
mb_cache_entry_release(ce); EXT3_XATTR_REFCOUNT_MAX) {
return bh; ea_idebug(inode, "block %ld refcount %d>%d",
(unsigned long) ce->e_block,
le32_to_cpu(HDR(bh)->h_refcount),
EXT3_XATTR_REFCOUNT_MAX);
} else if (!ext3_xattr_cmp(header, HDR(bh))) {
mb_cache_entry_release(ce);
/* buffer will be unlocked by caller */
return bh;
}
unlock_buffer(bh);
journal_release_buffer(handle, bh, *credits);
brelse(bh);
} }
brelse(bh);
ce = mb_cache_entry_find_next(ce, 0, inode->i_sb->s_bdev, hash); ce = mb_cache_entry_find_next(ce, 0, inode->i_sb->s_bdev, hash);
} }
return NULL; return NULL;
......
...@@ -13,10 +13,6 @@ ...@@ -13,10 +13,6 @@
#include <linux/ext3_fs.h> #include <linux/ext3_fs.h>
#include "xattr.h" #include "xattr.h"
#ifdef CONFIG_EXT3_FS_POSIX_ACL
# include "acl.h"
#endif
#define XATTR_USER_PREFIX "user." #define XATTR_USER_PREFIX "user."
static size_t static size_t
...@@ -46,11 +42,7 @@ ext3_xattr_user_get(struct inode *inode, const char *name, ...@@ -46,11 +42,7 @@ ext3_xattr_user_get(struct inode *inode, const char *name,
return -EINVAL; return -EINVAL;
if (!test_opt(inode->i_sb, XATTR_USER)) if (!test_opt(inode->i_sb, XATTR_USER))
return -EOPNOTSUPP; return -EOPNOTSUPP;
#ifdef CONFIG_EXT3_FS_POSIX_ACL
error = ext3_permission_locked(inode, MAY_READ);
#else
error = permission(inode, MAY_READ, NULL); error = permission(inode, MAY_READ, NULL);
#endif
if (error) if (error)
return error; return error;
...@@ -70,11 +62,7 @@ ext3_xattr_user_set(struct inode *inode, const char *name, ...@@ -70,11 +62,7 @@ ext3_xattr_user_set(struct inode *inode, const char *name,
if ( !S_ISREG(inode->i_mode) && if ( !S_ISREG(inode->i_mode) &&
(!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX)) (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX))
return -EPERM; return -EPERM;
#ifdef CONFIG_EXT3_FS_POSIX_ACL
error = ext3_permission_locked(inode, MAY_WRITE);
#else
error = permission(inode, MAY_WRITE, NULL); error = permission(inode, MAY_WRITE, NULL);
#endif
if (error) if (error)
return error; return error;
......
...@@ -62,6 +62,16 @@ struct ext3_inode_info { ...@@ -62,6 +62,16 @@ struct ext3_inode_info {
__u32 i_prealloc_count; __u32 i_prealloc_count;
#endif #endif
__u32 i_dir_start_lookup; __u32 i_dir_start_lookup;
#ifdef CONFIG_EXT3_FS_XATTR
/*
* Extended attributes can be read independently of the main file
* data. Taking i_sem even when reading would cause contention
* between readers of EAs and writers of regular file data, so
* instead we synchronize on xattr_sem when reading or changing
* EAs.
*/
struct rw_semaphore xattr_sem;
#endif
#ifdef CONFIG_EXT3_FS_POSIX_ACL #ifdef CONFIG_EXT3_FS_POSIX_ACL
struct posix_acl *i_acl; struct posix_acl *i_acl;
struct posix_acl *i_default_acl; struct posix_acl *i_default_acl;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment