Commit b0de9c76 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] quota locking update

I've been carrying this since Jan sent it out a month or two ago.
I don't know if anyone has tested it though.  The sort of people
who use quotas tend to like nice stable kernels.  I read through it,
but can't say that I know enough about quotas to know if it makes sense.
The wait_on_dquot() synchronisation is a bit odd.

I do need to do a round of stability testing with this and ext3 - the
interaction between quotas and ext3 is an area where we've had deadlocks
in the past.

But the quota locking is definitely looking crufty, and I'd suggest that
we run with this..


Patch from Jan Kara <jack@suse.cz>

"I'm resending you the patch with new quota SMP locking.  The patch removes
 BKL and replaces it with two spinlocks protecting quota lists and data
 stored in dquot structures.  Also non-SMP locking was changed a bit make SMP
 locking easier (eg.  we got rid of not very nice dq_dup_ref counters).  The
 patch is against 2.5.48 but applies well also to 2.5.49.  Would you please
 apply the patch?"


- Change dqoff_sem from a semaphore to an rwsem.

- Convert dqi_flags from an int to a ulong and use test_bit/set_bit rather
  thatn &/|

- The various exported quota operations now run without lock_kernel().
  This means that things like DQUOT_ALLOC_SPACE no longer take lock_kernel()
  in out high-perfomance filesystems.  Nice.

- Replace lock_kernel() in the quota code with two quota-private global
  locks.

- Replace all the open-coded waitqueue management with a semaphore
  (wait_on_dquot())
parent f5254cb3
This diff is collapsed.
......@@ -1133,9 +1133,8 @@ void remove_dquot_ref(struct super_block *sb, int type)
if (!sb->dq_op)
return; /* nothing to do */
/* We have to be protected against other CPUs */
lock_kernel(); /* This lock is for quota code */
spin_lock(&inode_lock); /* This lock is for inodes code */
/* We don't have to lock against quota code - test IS_QUOTAINIT is just for speedup... */
list_for_each(act_head, &inode_in_use) {
inode = list_entry(act_head, struct inode, i_list);
......@@ -1158,7 +1157,6 @@ void remove_dquot_ref(struct super_block *sb, int type)
remove_inode_dquot_ref(inode, type, &tofree_head);
}
spin_unlock(&inode_lock);
unlock_kernel();
put_dquot_list(&tofree_head);
}
......
......@@ -84,6 +84,7 @@ static int check_quotactl_valid(struct super_block *sb, int type, int cmd, qid_t
case Q_SETINFO:
case Q_SETQUOTA:
case Q_GETQUOTA:
/* This is just informative test so we are satisfied without a lock */
if (!sb_has_quota_enabled(sb, type))
return -ESRCH;
}
......@@ -151,7 +152,13 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, cadd
case Q_GETFMT: {
__u32 fmt;
down_read(&sb_dqopt(sb)->dqoff_sem);
if (!sb_has_quota_enabled(sb, type)) {
up_read(&sb_dqopt(sb)->dqoff_sem);
return -ESRCH;
}
fmt = sb_dqopt(sb)->info[type].dqi_format->qf_fmt_id;
up_read(&sb_dqopt(sb)->dqoff_sem);
if (copy_to_user(addr, &fmt, sizeof(fmt)))
return -EFAULT;
return 0;
......@@ -244,7 +251,6 @@ asmlinkage long sys_quotactl(unsigned int cmd, const char *special, qid_t id, ca
struct super_block *sb = NULL;
int ret = -EINVAL;
lock_kernel();
cmds = cmd >> SUBCMDSHIFT;
type = cmd & SUBCMDMASK;
......@@ -259,6 +265,5 @@ asmlinkage long sys_quotactl(unsigned int cmd, const char *special, qid_t id, ca
out:
if (sb)
drop_super(sb);
unlock_kernel();
return ret;
}
......@@ -71,7 +71,7 @@ static struct super_block *alloc_super(void)
atomic_set(&s->s_active, 1);
sema_init(&s->s_vfs_rename_sem,1);
sema_init(&s->s_dquot.dqio_sem, 1);
sema_init(&s->s_dquot.dqoff_sem, 1);
init_rwsem(&s->s_dquot.dqoff_sem);
s->s_maxbytes = MAX_NON_LFS;
s->dq_op = sb_dquot_ops;
s->s_qcop = sb_quotactl_ops;
......
......@@ -37,6 +37,7 @@
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/spinlock.h>
#define __DQUOT_VERSION__ "dquot_6.5.1"
#define __DQUOT_NUM_VERSION__ 6*10000+5*100+1
......@@ -44,6 +45,9 @@
typedef __kernel_uid32_t qid_t; /* Type in which we store ids in memory */
typedef __u64 qsize_t; /* Type in which we store sizes */
extern spinlock_t dq_list_lock;
extern spinlock_t dq_data_lock;
/* Size of blocks in which are counted size limits */
#define QUOTABLOCK_BITS 10
#define QUOTABLOCK_SIZE (1 << QUOTABLOCK_BITS)
......@@ -155,7 +159,7 @@ struct quota_format_type;
struct mem_dqinfo {
struct quota_format_type *dqi_format;
int dqi_flags;
unsigned long dqi_flags;
unsigned int dqi_bgrace;
unsigned int dqi_igrace;
union {
......@@ -165,18 +169,19 @@ struct mem_dqinfo {
};
#define DQF_MASK 0xffff /* Mask for format specific flags */
#define DQF_INFO_DIRTY 0x10000 /* Is info dirty? */
#define DQF_ANY_DQUOT_DIRTY 0x20000 /* Is any dquot dirty? */
#define DQF_INFO_DIRTY_B 16
#define DQF_ANY_DQUOT_DIRTY_B 17
#define DQF_INFO_DIRTY (1 << DQF_INFO_DIRTY_B) /* Is info dirty? */
#define DQF_ANY_DQUOT_DIRTY (1 << DQF_ANY_DQUOT_DIRTY B) /* Is any dquot dirty? */
extern inline void mark_info_dirty(struct mem_dqinfo *info)
{
info->dqi_flags |= DQF_INFO_DIRTY;
set_bit(DQF_INFO_DIRTY_B, &info->dqi_flags);
}
#define info_dirty(info) ((info)->dqi_flags & DQF_INFO_DIRTY)
#define info_any_dirty(info) ((info)->dqi_flags & DQF_INFO_DIRTY ||\
(info)->dqi_flags & DQF_ANY_DQUOT_DIRTY)
#define info_dirty(info) test_bit(DQF_INFO_DIRTY_B, &(info)->dqi_flags)
#define info_any_dquot_dirty(info) test_bit(DQF_ANY_DQUOT_DIRTY_B, &(info)->dqi_flags)
#define info_any_dirty(info) (info_dirty(info) || info_any_dquot_dirty(info))
#define sb_dqopt(sb) (&(sb)->s_dquot)
......@@ -195,30 +200,29 @@ extern struct dqstats dqstats;
#define NR_DQHASH 43 /* Just an arbitrary number */
#define DQ_LOCKED 0x01 /* dquot under IO */
#define DQ_MOD 0x02 /* dquot modified since read */
#define DQ_BLKS 0x10 /* uid/gid has been warned about blk limit */
#define DQ_INODES 0x20 /* uid/gid has been warned about inode limit */
#define DQ_FAKE 0x40 /* no limits only usage */
#define DQ_INVAL 0x80 /* dquot is going to be invalidated */
#define DQ_MOD_B 0
#define DQ_BLKS_B 1
#define DQ_INODES_B 2
#define DQ_FAKE_B 3
#define DQ_MOD (1 << DQ_MOD_B) /* dquot modified since read */
#define DQ_BLKS (1 << DQ_BLKS_B) /* uid/gid has been warned about blk limit */
#define DQ_INODES (1 << DQ_INODES_B) /* uid/gid has been warned about inode limit */
#define DQ_FAKE (1 << DQ_FAKE_B) /* no limits only usage */
struct dquot {
struct list_head dq_hash; /* Hash list in memory */
struct list_head dq_inuse; /* List of all quotas */
struct list_head dq_free; /* Free list element */
wait_queue_head_t dq_wait_lock; /* Pointer to waitqueue on dquot lock */
wait_queue_head_t dq_wait_free; /* Pointer to waitqueue for quota to be unused */
int dq_count; /* Use count */
int dq_dup_ref; /* Number of duplicated refences */
struct semaphore dq_lock; /* dquot IO lock */
atomic_t dq_count; /* Use count */
/* fields after this point are cleared when invalidating */
struct super_block *dq_sb; /* superblock this applies to */
unsigned int dq_id; /* ID this applies to (uid, gid) */
loff_t dq_off; /* Offset of dquot on disk */
unsigned long dq_flags; /* See DQ_* */
short dq_type; /* Type of quota */
short dq_flags; /* See DQ_* */
unsigned long dq_referenced; /* Number of times this dquot was
referenced during its lifetime */
struct mem_dqblk dq_dqb; /* Diskquota usage */
};
......@@ -276,7 +280,7 @@ struct quota_format_type {
struct quota_info {
unsigned int flags; /* Flags for diskquotas on this device */
struct semaphore dqio_sem; /* lock device while I/O in progress */
struct semaphore dqoff_sem; /* serialize quota_off() and quota_on() on device */
struct rw_semaphore dqoff_sem; /* serialize quota_off() and quota_on() on device and ops using quota_info struct, pointers from inode to dquots */
struct file *files[MAXQUOTAS]; /* fp's to quotafiles */
struct mem_dqinfo info[MAXQUOTAS]; /* Information for each quota type */
struct quota_format_ops *ops[MAXQUOTAS]; /* Operations for each type */
......@@ -284,26 +288,17 @@ struct quota_info {
/* Inline would be better but we need to dereference super_block which is not defined yet */
#define mark_dquot_dirty(dquot) do {\
dquot->dq_flags |= DQ_MOD;\
sb_dqopt(dquot->dq_sb)->info[dquot->dq_type].dqi_flags |= DQF_ANY_DQUOT_DIRTY;\
set_bit(DQF_ANY_DQUOT_DIRTY_B, &(sb_dqopt((dquot)->dq_sb)->info[(dquot)->dq_type].dqi_flags));\
set_bit(DQ_MOD_B, &(dquot)->dq_flags);\
} while (0)
#define dquot_dirty(dquot) ((dquot)->dq_flags & DQ_MOD)
static inline int is_enabled(struct quota_info *dqopt, int type)
{
switch (type) {
case USRQUOTA:
return dqopt->flags & DQUOT_USR_ENABLED;
case GRPQUOTA:
return dqopt->flags & DQUOT_GRP_ENABLED;
}
return 0;
}
#define dquot_dirty(dquot) test_bit(DQ_MOD_B, &(dquot)->dq_flags)
#define sb_any_quota_enabled(sb) (is_enabled(sb_dqopt(sb), USRQUOTA) | is_enabled(sb_dqopt(sb), GRPQUOTA))
#define sb_has_quota_enabled(sb, type) ((type)==USRQUOTA ? \
(sb_dqopt(sb)->flags & DQUOT_USR_ENABLED) : (sb_dqopt(sb)->flags & DQUOT_GRP_ENABLED))
#define sb_has_quota_enabled(sb, type) (is_enabled(sb_dqopt(sb), type))
#define sb_any_quota_enabled(sb) (sb_has_quota_enabled(sb, USRQUOTA) | \
sb_has_quota_enabled(sb, GRPQUOTA))
int register_quota_format(struct quota_format_type *fmt);
void unregister_quota_format(struct quota_format_type *fmt);
......
......@@ -46,36 +46,31 @@ static __inline__ void DQUOT_INIT(struct inode *inode)
{
if (!inode->i_sb)
BUG();
lock_kernel();
if (sb_any_quota_enabled(inode->i_sb) && !IS_NOQUOTA(inode))
inode->i_sb->dq_op->initialize(inode, -1);
unlock_kernel();
}
static __inline__ void DQUOT_DROP(struct inode *inode)
{
lock_kernel();
if (IS_QUOTAINIT(inode)) {
if (!inode->i_sb)
BUG();
inode->i_sb->dq_op->drop(inode); /* Ops must be set when there's any quota... */
}
unlock_kernel();
}
static __inline__ int DQUOT_PREALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
{
lock_kernel();
if (sb_any_quota_enabled(inode->i_sb)) {
/* Used space is updated in alloc_space() */
if (inode->i_sb->dq_op->alloc_space(inode, nr, 1) == NO_QUOTA) {
unlock_kernel();
if (inode->i_sb->dq_op->alloc_space(inode, nr, 1) == NO_QUOTA)
return 1;
}
}
else
else {
spin_lock(&dq_data_lock);
inode_add_bytes(inode, nr);
unlock_kernel();
spin_unlock(&dq_data_lock);
}
return 0;
}
......@@ -89,17 +84,16 @@ static __inline__ int DQUOT_PREALLOC_SPACE(struct inode *inode, qsize_t nr)
static __inline__ int DQUOT_ALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
{
lock_kernel();
if (sb_any_quota_enabled(inode->i_sb)) {
/* Used space is updated in alloc_space() */
if (inode->i_sb->dq_op->alloc_space(inode, nr, 0) == NO_QUOTA) {
unlock_kernel();
if (inode->i_sb->dq_op->alloc_space(inode, nr, 0) == NO_QUOTA)
return 1;
}
}
else
else {
spin_lock(&dq_data_lock);
inode_add_bytes(inode, nr);
unlock_kernel();
spin_unlock(&dq_data_lock);
}
return 0;
}
......@@ -113,26 +107,23 @@ static __inline__ int DQUOT_ALLOC_SPACE(struct inode *inode, qsize_t nr)
static __inline__ int DQUOT_ALLOC_INODE(struct inode *inode)
{
lock_kernel();
if (sb_any_quota_enabled(inode->i_sb)) {
DQUOT_INIT(inode);
if (inode->i_sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA) {
unlock_kernel();
if (inode->i_sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA)
return 1;
}
}
unlock_kernel();
return 0;
}
static __inline__ void DQUOT_FREE_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
{
lock_kernel();
if (sb_any_quota_enabled(inode->i_sb))
inode->i_sb->dq_op->free_space(inode, nr);
else
else {
spin_lock(&dq_data_lock);
inode_sub_bytes(inode, nr);
unlock_kernel();
spin_unlock(&dq_data_lock);
}
}
static __inline__ void DQUOT_FREE_SPACE(struct inode *inode, qsize_t nr)
......@@ -143,23 +134,17 @@ static __inline__ void DQUOT_FREE_SPACE(struct inode *inode, qsize_t nr)
static __inline__ void DQUOT_FREE_INODE(struct inode *inode)
{
lock_kernel();
if (sb_any_quota_enabled(inode->i_sb))
inode->i_sb->dq_op->free_inode(inode, 1);
unlock_kernel();
}
static __inline__ int DQUOT_TRANSFER(struct inode *inode, struct iattr *iattr)
{
lock_kernel();
if (sb_any_quota_enabled(inode->i_sb) && !IS_NOQUOTA(inode)) {
DQUOT_INIT(inode);
if (inode->i_sb->dq_op->transfer(inode, iattr) == NO_QUOTA) {
unlock_kernel();
if (inode->i_sb->dq_op->transfer(inode, iattr) == NO_QUOTA)
return 1;
}
}
unlock_kernel();
return 0;
}
......@@ -169,10 +154,8 @@ static __inline__ int DQUOT_OFF(struct super_block *sb)
{
int ret = -ENOSYS;
lock_kernel();
if (sb->s_qcop && sb->s_qcop->quota_off)
ret = sb->s_qcop->quota_off(sb, -1);
unlock_kernel();
return ret;
}
......@@ -192,9 +175,7 @@ static __inline__ int DQUOT_OFF(struct super_block *sb)
#define DQUOT_TRANSFER(inode, iattr) (0)
extern __inline__ int DQUOT_PREALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
{
lock_kernel();
inode_add_bytes(inode, nr);
unlock_kernel();
return 0;
}
......@@ -207,9 +188,7 @@ extern __inline__ int DQUOT_PREALLOC_SPACE(struct inode *inode, qsize_t nr)
extern __inline__ int DQUOT_ALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
{
lock_kernel();
inode_add_bytes(inode, nr);
unlock_kernel();
return 0;
}
......@@ -222,9 +201,7 @@ extern __inline__ int DQUOT_ALLOC_SPACE(struct inode *inode, qsize_t nr)
extern __inline__ void DQUOT_FREE_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
{
lock_kernel();
inode_sub_bytes(inode, nr);
unlock_kernel();
}
extern __inline__ void DQUOT_FREE_SPACE(struct inode *inode, qsize_t nr)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment