Commit c435ee34 authored by David Howells's avatar David Howells

afs: Overhaul the callback handling

Overhaul the AFS callback handling by the following means:

 (1) Don't give up callback promises on vnodes that we are no longer using,
     rather let them just expire on the server or let the server break
     them.  This is actually more efficient for the server as the callback
     lookup is expensive if there are lots of extant callbacks.

 (2) Only give up the callback promises we have from a server when the
     server record is destroyed.  Then we can just give up *all* the
     callback promises on it in one go.

 (3) Servers can end up being shared between cells if cells are aliased, so
     don't add all the vnodes being backed by a particular server into a
     big FID-indexed tree on that server as there may be duplicates.

     Instead have each volume instance (~= superblock) register an interest
     in a server as it starts to make use of it and use this to allow the
     processor for callbacks from the server to find the superblock and
     thence the inode corresponding to the FID being broken by means of
     ilookup_nowait().

 (4) Rather than iterating over the entire callback list when a mass-break
     comes in from the server, maintain a counter of mass-breaks in
     afs_server (cb_seq) and make afs_validate() check it against the copy
     in afs_vnode.

     It would be nice not to have to take a read_lock whilst doing this,
     but that's tricky without using RCU.

 (5) Save a ref on the fileserver we're using for a call in the afs_call
     struct so that we can access its cb_s_break during call decoding.

 (6) Write-lock around callback and status storage in a vnode and read-lock
     around getattr so that we don't see the status mid-update.

This has the following consequences:

 (1) Data invalidation isn't seen until someone calls afs_validate() on a
     vnode.  Unfortunately, we need to use a key to query the server, but
     getting one from a background thread is tricky without caching loads
     of keys all over the place.

 (2) Mass invalidation isn't seen until someone calls afs_validate().

 (3) Callback breaking is going to hit the inode_hash_lock quite a bit.
     Could this be replaced with rcu_read_lock() since inodes are destroyed
     under RCU conditions.
Signed-off-by: default avatarDavid Howells <dhowells@redhat.com>
parent d0676a16
......@@ -37,6 +37,7 @@ enum AFS_FS_Operations {
FSLOOKUP = 161, /* AFS lookup file in directory */
FSFETCHDATA64 = 65537, /* AFS Fetch file data */
FSSTOREDATA64 = 65538, /* AFS Store file data */
FSGIVEUPALLCALLBACKS = 65539, /* AFS Give up all outstanding callbacks on a server */
};
enum AFS_FS_Errors {
......
This diff is collapsed.
......@@ -153,7 +153,7 @@ static void afs_cm_destructor(struct afs_call *call)
}
/*
* allow the fileserver to see if the cache manager is still alive
* The server supplied a list of callbacks that it wanted to break.
*/
static void SRXAFSCB_CallBack(struct work_struct *work)
{
......
......@@ -384,7 +384,7 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
*/
static int afs_readdir(struct file *file, struct dir_context *ctx)
{
return afs_dir_iterate(file_inode(file),
return afs_dir_iterate(file_inode(file),
ctx, file->private_data);
}
......@@ -581,6 +581,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
struct afs_vnode *vnode, *dir;
struct afs_fid uninitialized_var(fid);
struct dentry *parent;
struct inode *inode;
struct key *key;
void *dir_version;
int ret;
......@@ -588,30 +589,39 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
if (flags & LOOKUP_RCU)
return -ECHILD;
vnode = AFS_FS_I(d_inode(dentry));
if (d_really_is_positive(dentry))
if (d_really_is_positive(dentry)) {
vnode = AFS_FS_I(d_inode(dentry));
_enter("{v={%x:%u} n=%pd fl=%lx},",
vnode->fid.vid, vnode->fid.vnode, dentry,
vnode->flags);
else
} else {
_enter("{neg n=%pd}", dentry);
}
key = afs_request_key(AFS_FS_S(dentry->d_sb)->volume->cell);
if (IS_ERR(key))
key = NULL;
if (d_really_is_positive(dentry)) {
inode = d_inode(dentry);
if (inode) {
vnode = AFS_FS_I(inode);
afs_validate(vnode, key);
if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
goto out_bad;
}
}
/* lock down the parent dentry so we can peer at it */
parent = dget_parent(dentry);
dir = AFS_FS_I(d_inode(parent));
/* validate the parent directory */
if (test_bit(AFS_VNODE_MODIFIED, &dir->flags))
afs_validate(dir, key);
afs_validate(dir, key);
if (test_bit(AFS_VNODE_DELETED, &dir->flags)) {
_debug("%pd: parent dir deleted", dentry);
goto out_bad;
goto out_bad_parent;
}
dir_version = (void *) (unsigned long) dir->status.data_version;
......@@ -626,13 +636,16 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
case 0:
/* the filename maps to something */
if (d_really_is_negative(dentry))
goto out_bad;
if (is_bad_inode(d_inode(dentry))) {
goto out_bad_parent;
inode = d_inode(dentry);
if (is_bad_inode(inode)) {
printk("kAFS: afs_d_revalidate: %pd2 has bad inode\n",
dentry);
goto out_bad;
goto out_bad_parent;
}
vnode = AFS_FS_I(inode);
/* if the vnode ID has changed, then the dirent points to a
* different file */
if (fid.vnode != vnode->fid.vnode) {
......@@ -649,10 +662,10 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
_debug("%pd: file deleted (uq %u -> %u I:%u)",
dentry, fid.unique,
vnode->fid.unique,
d_inode(dentry)->i_generation);
spin_lock(&vnode->lock);
vnode->vfs_inode.i_generation);
write_seqlock(&vnode->cb_lock);
set_bit(AFS_VNODE_DELETED, &vnode->flags);
spin_unlock(&vnode->lock);
write_sequnlock(&vnode->cb_lock);
goto not_found;
}
goto out_valid;
......@@ -667,7 +680,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
default:
_debug("failed to iterate dir %pd: %d",
parent, ret);
goto out_bad;
goto out_bad_parent;
}
out_valid:
......@@ -683,9 +696,10 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
dentry->d_flags |= DCACHE_NFSFS_RENAMED;
spin_unlock(&dentry->d_lock);
out_bad:
out_bad_parent:
_debug("dropping dentry %pd2", dentry);
dput(parent);
out_bad:
key_put(key);
_leave(" = 0 [bad]");
......@@ -820,7 +834,7 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
vnode = AFS_FS_I(d_inode(dentry));
clear_nlink(&vnode->vfs_inode);
set_bit(AFS_VNODE_DELETED, &vnode->flags);
afs_discard_callback_on_delete(vnode);
clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
}
key_put(key);
......@@ -884,9 +898,7 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
vnode = AFS_FS_I(d_inode(dentry));
if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
_debug("AFS_VNODE_DELETED");
if (test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags))
_debug("AFS_VNODE_CB_BROKEN");
set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
ret = afs_validate(vnode, key);
_debug("nlink %d [val %d]", vnode->vfs_inode.i_nlink, ret);
}
......
......@@ -243,7 +243,7 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl)
/* make sure we've got a callback on this file and that our view of the
* data version is up to date */
ret = afs_vnode_fetch_status(vnode, NULL, key);
ret = afs_validate(vnode, key);
if (ret < 0)
goto error;
......@@ -383,7 +383,7 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl)
/* again, make sure we've got a callback on this file and, again, make
* sure that our view of the data version is up to date (we ignore
* errors incurred here and deal with the consequences elsewhere) */
afs_vnode_fetch_status(vnode, NULL, key);
afs_vnode_fetch_status(vnode, NULL, key, false);
error:
spin_unlock(&inode->i_lock);
......@@ -455,7 +455,7 @@ static int afs_do_getlk(struct file *file, struct file_lock *fl)
posix_test_lock(file, fl);
if (fl->fl_type == F_UNLCK) {
/* no local locks; consult the server */
ret = afs_vnode_fetch_status(vnode, NULL, key);
ret = afs_vnode_fetch_status(vnode, NULL, key, true);
if (ret < 0)
goto error;
lock_count = vnode->status.lock_count;
......
This diff is collapsed.
......@@ -23,11 +23,6 @@
#include <linux/namei.h>
#include "internal.h"
struct afs_iget_data {
struct afs_fid fid;
struct afs_volume *volume; /* volume on which resides */
};
static const struct inode_operations afs_symlink_inode_operations = {
.get_link = page_get_link,
.listxattr = afs_listxattr,
......@@ -39,6 +34,7 @@ static const struct inode_operations afs_symlink_inode_operations = {
static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
{
struct inode *inode = AFS_VNODE_TO_I(vnode);
bool changed;
_debug("FS: ft=%d lk=%d sz=%llu ver=%Lu mod=%hu",
vnode->status.type,
......@@ -47,6 +43,8 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
vnode->status.data_version,
vnode->status.mode);
read_seqlock_excl(&vnode->cb_lock);
switch (vnode->status.type) {
case AFS_FTYPE_FILE:
inode->i_mode = S_IFREG | vnode->status.mode;
......@@ -63,9 +61,7 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
if ((vnode->status.mode & 0777) == 0644) {
inode->i_flags |= S_AUTOMOUNT;
spin_lock(&vnode->lock);
set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags);
spin_unlock(&vnode->lock);
inode->i_mode = S_IFDIR | 0555;
inode->i_op = &afs_mntpt_inode_operations;
......@@ -78,13 +74,11 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
break;
default:
printk("kAFS: AFS vnode with undefined type\n");
read_sequnlock_excl(&vnode->cb_lock);
return -EBADMSG;
}
#ifdef CONFIG_AFS_FSCACHE
if (vnode->status.size != inode->i_size)
fscache_attr_changed(vnode->cache);
#endif
changed = (vnode->status.size != inode->i_size);
set_nlink(inode, vnode->status.nlink);
inode->i_uid = vnode->status.owner;
......@@ -97,13 +91,20 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
inode->i_generation = vnode->fid.unique;
inode->i_version = vnode->status.data_version;
inode->i_mapping->a_ops = &afs_fs_aops;
read_sequnlock_excl(&vnode->cb_lock);
#ifdef CONFIG_AFS_FSCACHE
if (changed)
fscache_attr_changed(vnode->cache);
#endif
return 0;
}
/*
* iget5() comparator
*/
static int afs_iget5_test(struct inode *inode, void *opaque)
int afs_iget5_test(struct inode *inode, void *opaque)
{
struct afs_iget_data *data = opaque;
......@@ -237,8 +238,7 @@ struct inode *afs_iget(struct super_block *sb, struct key *key,
if (!status) {
/* it's a remotely extant inode */
set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
ret = afs_vnode_fetch_status(vnode, NULL, key);
ret = afs_vnode_fetch_status(vnode, NULL, key, true);
if (ret < 0)
goto bad_inode;
} else {
......@@ -249,16 +249,16 @@ struct inode *afs_iget(struct super_block *sb, struct key *key,
/* it's a symlink we just created (the fileserver
* didn't give us a callback) */
vnode->cb_version = 0;
vnode->cb_expiry = 0;
vnode->cb_type = 0;
vnode->cb_expires = ktime_get_real_seconds();
vnode->cb_expires_at = 0;
} else {
vnode->cb_version = cb->version;
vnode->cb_expiry = cb->expiry;
vnode->cb_type = cb->type;
vnode->cb_expires = vnode->cb_expiry +
ktime_get_real_seconds();
vnode->cb_expires_at = cb->expiry;
set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
}
vnode->cb_expires_at += ktime_get_real_seconds();
}
/* set up caching before mapping the status, as map-status reads the
......@@ -320,25 +320,34 @@ void afs_zap_data(struct afs_vnode *vnode)
*/
int afs_validate(struct afs_vnode *vnode, struct key *key)
{
time64_t now = ktime_get_real_seconds();
bool valid = false;
int ret;
_enter("{v={%x:%u} fl=%lx},%x",
vnode->fid.vid, vnode->fid.vnode, vnode->flags,
key_serial(key));
if (vnode->cb_promised &&
!test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags) &&
!test_bit(AFS_VNODE_MODIFIED, &vnode->flags) &&
!test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) {
if (vnode->cb_expires < ktime_get_real_seconds() + 10) {
_debug("callback expired");
set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
} else {
goto valid;
/* Quickly check the callback state. Ideally, we'd use read_seqbegin
* here, but we have no way to pass the net namespace to the RCU
* cleanup for the server record.
*/
read_seqlock_excl(&vnode->cb_lock);
if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
if (vnode->cb_s_break != vnode->cb_interest->server->cb_s_break) {
vnode->cb_s_break = vnode->cb_interest->server->cb_s_break;
} else if (!test_bit(AFS_VNODE_DIR_MODIFIED, &vnode->flags) &&
!test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags) &&
vnode->cb_expires_at - 10 > now) {
valid = true;
}
} else if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
valid = true;
}
if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
read_sequnlock_excl(&vnode->cb_lock);
if (valid)
goto valid;
mutex_lock(&vnode->validate_lock);
......@@ -347,12 +356,16 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
* a new promise - note that if the (parent) directory's metadata was
* changed then the security may be different and we may no longer have
* access */
if (!vnode->cb_promised ||
test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags)) {
if (!test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
_debug("not promised");
ret = afs_vnode_fetch_status(vnode, NULL, key);
if (ret < 0)
ret = afs_vnode_fetch_status(vnode, NULL, key, false);
if (ret < 0) {
if (ret == -ENOENT) {
set_bit(AFS_VNODE_DELETED, &vnode->flags);
ret = -ESTALE;
}
goto error_unlock;
}
_debug("new promise [fl=%lx]", vnode->flags);
}
......@@ -367,7 +380,7 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags))
afs_zap_data(vnode);
clear_bit(AFS_VNODE_MODIFIED, &vnode->flags);
clear_bit(AFS_VNODE_DIR_MODIFIED, &vnode->flags);
mutex_unlock(&vnode->validate_lock);
valid:
_leave(" = 0");
......@@ -386,10 +399,17 @@ int afs_getattr(const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int query_flags)
{
struct inode *inode = d_inode(path->dentry);
struct afs_vnode *vnode = AFS_FS_I(inode);
int seq = 0;
_enter("{ ino=%lu v=%u }", inode->i_ino, inode->i_generation);
generic_fillattr(inode, stat);
do {
read_seqbegin_or_lock(&vnode->cb_lock, &seq);
generic_fillattr(inode, stat);
} while (need_seqretry(&vnode->cb_lock, seq));
done_seqretry(&vnode->cb_lock, seq);
return 0;
}
......@@ -416,13 +436,10 @@ void afs_evict_inode(struct inode *inode)
vnode = AFS_FS_I(inode);
_enter("{%x:%u.%d} v=%u x=%u t=%u }",
_enter("{%x:%u.%d}",
vnode->fid.vid,
vnode->fid.vnode,
vnode->fid.unique,
vnode->cb_version,
vnode->cb_expiry,
vnode->cb_type);
vnode->fid.unique);
_debug("CLEAR INODE %p", inode);
......@@ -431,18 +448,12 @@ void afs_evict_inode(struct inode *inode)
truncate_inode_pages_final(&inode->i_data);
clear_inode(inode);
afs_give_up_callback(vnode);
if (vnode->server) {
spin_lock(&vnode->server->fs_lock);
rb_erase(&vnode->server_rb, &vnode->server->fs_vnodes);
spin_unlock(&vnode->server->fs_lock);
afs_put_server(afs_i2net(inode), vnode->server);
vnode->server = NULL;
if (vnode->cb_interest) {
afs_put_cb_interest(afs_i2net(inode), vnode->cb_interest);
vnode->cb_interest = NULL;
}
ASSERT(list_empty(&vnode->writebacks));
ASSERT(!vnode->cb_promised);
#ifdef CONFIG_AFS_FSCACHE
fscache_relinquish_cookie(vnode->cache, 0);
......
......@@ -55,6 +55,11 @@ struct afs_mount_params {
struct key *key; /* key to use for secure mounting */
};
struct afs_iget_data {
struct afs_fid fid;
struct afs_volume *volume; /* volume on which resides */
};
enum afs_call_state {
AFS_CALL_REQUESTING, /* request is being sent for outgoing call */
AFS_CALL_AWAIT_REPLY, /* awaiting reply to outgoing call */
......@@ -77,6 +82,7 @@ struct afs_call {
struct key *key; /* security for this call */
struct afs_net *net; /* The network namespace */
struct afs_server *cm_server; /* Server affected by incoming CM call */
struct afs_server *server; /* Server used by client call */
void *request; /* request data (first part) */
struct address_space *mapping; /* page set */
struct afs_writeback *wb; /* writeback being performed */
......@@ -92,6 +98,7 @@ struct afs_call {
unsigned request_size; /* size of request data */
unsigned reply_max; /* maximum size of reply */
unsigned first_offset; /* offset into mapping[first] */
unsigned int cb_break; /* cb_break + cb_s_break before the call */
union {
unsigned last_to; /* amount of mapping[last] */
unsigned count2; /* count used in unmarshalling */
......@@ -314,26 +321,31 @@ struct afs_server {
struct afs_cell *cell; /* cell in which server resides */
struct list_head link; /* link in cell's server list */
struct list_head grave; /* link in master graveyard list */
struct rb_node master_rb; /* link in master by-addr tree */
struct rw_semaphore sem; /* access lock */
unsigned long flags;
#define AFS_SERVER_NEW 0 /* New server, don't inc cb_s_break */
/* file service access */
struct rb_root fs_vnodes; /* vnodes backed by this server (ordered by FID) */
unsigned long fs_act_jif; /* time at which last activity occurred */
unsigned long fs_dead_jif; /* time at which no longer to be considered dead */
spinlock_t fs_lock; /* access lock */
int fs_state; /* 0 or reason FS currently marked dead (-errno) */
spinlock_t fs_lock; /* access lock */
/* callback promise management */
struct rb_root cb_promises; /* vnode expiration list (ordered earliest first) */
struct delayed_work cb_updater; /* callback updater */
struct delayed_work cb_break_work; /* collected break dispatcher */
wait_queue_head_t cb_break_waitq; /* space available in cb_break waitqueue */
spinlock_t cb_lock; /* access lock */
struct afs_callback cb_break[64]; /* ring of callbacks awaiting breaking */
atomic_t cb_break_n; /* number of pending breaks */
u8 cb_break_head; /* head of callback breaking ring */
u8 cb_break_tail; /* tail of callback breaking ring */
struct list_head cb_interests; /* List of superblocks using this server */
unsigned cb_s_break; /* Break-everything counter. */
rwlock_t cb_break_lock; /* Volume finding lock */
};
/*
* Interest by a superblock on a server.
*/
struct afs_cb_interest {
struct list_head cb_link; /* Link in server->cb_interests */
struct afs_server *server; /* Server on which this interest resides */
struct super_block *sb; /* Superblock on which inodes reside */
afs_volid_t vid; /* Volume ID to match */
refcount_t usage;
};
/*
......@@ -352,6 +364,7 @@ struct afs_volume {
unsigned short nservers; /* number of server slots filled */
unsigned short rjservers; /* number of servers discarded due to -ENOMEDIUM */
struct afs_server *servers[8]; /* servers on which volume resides (ordered) */
struct afs_cb_interest *cb_interests[8]; /* Interests on servers for callbacks */
struct rw_semaphore server_sem; /* lock for accessing current server */
};
......@@ -371,7 +384,6 @@ struct afs_vnode {
struct inode vfs_inode; /* the VFS's inode record */
struct afs_volume *volume; /* volume on which vnode resides */
struct afs_server *server; /* server currently supplying this file */
struct afs_fid fid; /* the file identifier for this inode */
struct afs_file_status status; /* AFS status info for this file */
#ifdef CONFIG_AFS_FSCACHE
......@@ -386,9 +398,9 @@ struct afs_vnode {
spinlock_t writeback_lock; /* lock for writebacks */
spinlock_t lock; /* waitqueue/flags lock */
unsigned long flags;
#define AFS_VNODE_CB_BROKEN 0 /* set if vnode's callback was broken */
#define AFS_VNODE_CB_PROMISED 0 /* Set if vnode has a callback promise */
#define AFS_VNODE_UNSET 1 /* set if vnode attributes not yet set */
#define AFS_VNODE_MODIFIED 2 /* set if vnode's data modified */
#define AFS_VNODE_DIR_MODIFIED 2 /* set if dir vnode's data modified */
#define AFS_VNODE_ZAP_DATA 3 /* set if vnode's data should be invalidated */
#define AFS_VNODE_DELETED 4 /* set if vnode deleted on server */
#define AFS_VNODE_MOUNTPOINT 5 /* set if vnode is a mountpoint symlink */
......@@ -408,15 +420,14 @@ struct afs_vnode {
struct key *unlock_key; /* key to be used in unlocking */
/* outstanding callback notification on this file */
struct rb_node server_rb; /* link in server->fs_vnodes */
struct rb_node cb_promise; /* link in server->cb_promises */
struct work_struct cb_broken_work; /* work to be done on callback break */
time64_t cb_expires; /* time at which callback expires */
time64_t cb_expires_at; /* time used to order cb_promise */
struct afs_cb_interest *cb_interest; /* Server on which this resides */
unsigned int cb_s_break; /* Mass break counter on ->server */
unsigned int cb_break; /* Break counter on vnode */
seqlock_t cb_lock; /* Lock for ->cb_interest, ->status, ->cb_*break */
time64_t cb_expires_at; /* time at which callback expires */
unsigned cb_version; /* callback version */
unsigned cb_expiry; /* callback expiry time */
afs_callback_type_t cb_type; /* type of callback */
bool cb_promised; /* true if promise still holds */
};
/*
......@@ -463,16 +474,20 @@ extern struct fscache_cookie_def afs_vnode_cache_index_def;
/*
* callback.c
*/
extern struct workqueue_struct *afs_callback_update_worker;
extern void afs_init_callback_state(struct afs_server *);
extern void afs_broken_callback_work(struct work_struct *);
extern void afs_break_callbacks(struct afs_server *, size_t,
struct afs_callback[]);
extern void afs_discard_callback_on_delete(struct afs_vnode *);
extern void afs_give_up_callback(struct afs_vnode *);
extern void afs_dispatch_give_up_callbacks(struct work_struct *);
extern void afs_flush_callback_breaks(struct afs_server *);
extern void afs_break_callback(struct afs_vnode *);
extern void afs_break_callbacks(struct afs_server *, size_t,struct afs_callback[]);
extern int afs_register_server_cb_interest(struct afs_vnode *, struct afs_cb_interest **,
struct afs_server *);
extern void afs_put_cb_interest(struct afs_net *, struct afs_cb_interest *);
extern void afs_clear_callback_interests(struct afs_net *, struct afs_volume *);
static inline struct afs_cb_interest *afs_get_cb_interest(struct afs_cb_interest *cbi)
{
refcount_inc(&cbi->usage);
return cbi;
}
/*
* cell.c
......@@ -560,10 +575,12 @@ extern int afs_fs_extend_lock(struct afs_server *, struct key *,
struct afs_vnode *, bool);
extern int afs_fs_release_lock(struct afs_server *, struct key *,
struct afs_vnode *, bool);
extern int afs_fs_give_up_all_callbacks(struct afs_server *, struct key *, bool);
/*
* inode.c
*/
extern int afs_iget5_test(struct inode *, void *);
extern struct inode *afs_iget_autocell(struct inode *, const char *, int,
struct key *);
extern struct inode *afs_iget(struct super_block *, struct key *,
......@@ -676,11 +693,11 @@ extern int afs_permission(struct inode *, int);
*/
extern spinlock_t afs_server_peer_lock;
#define afs_get_server(S) \
do { \
_debug("GET SERVER %d", atomic_read(&(S)->usage)); \
atomic_inc(&(S)->usage); \
} while(0)
static inline struct afs_server *afs_get_server(struct afs_server *server)
{
atomic_inc(&server->usage);
return server;
}
extern void afs_server_timer(struct timer_list *);
extern struct afs_server *afs_lookup_server(struct afs_cell *,
......@@ -741,7 +758,7 @@ static inline struct inode *AFS_VNODE_TO_I(struct afs_vnode *vnode)
extern void afs_vnode_finalise_status_update(struct afs_vnode *,
struct afs_server *);
extern int afs_vnode_fetch_status(struct afs_vnode *, struct afs_vnode *,
struct key *);
struct key *, bool);
extern int afs_vnode_fetch_data(struct afs_vnode *, struct key *,
struct afs_read *);
extern int afs_vnode_create(struct afs_vnode *, struct key *, const char *,
......
......@@ -123,10 +123,6 @@ static int __init afs_init(void)
alloc_workqueue("kafs_vlupdated", WQ_MEM_RECLAIM, 0);
if (!afs_vlocation_update_worker)
goto error_vl_up;
afs_callback_update_worker =
alloc_ordered_workqueue("kafs_callbackd", WQ_MEM_RECLAIM);
if (!afs_callback_update_worker)
goto error_callback;
afs_lock_manager = alloc_workqueue("kafs_lockd", WQ_MEM_RECLAIM, 0);
if (!afs_lock_manager)
goto error_lockmgr;
......@@ -158,8 +154,6 @@ static int __init afs_init(void)
#endif
destroy_workqueue(afs_lock_manager);
error_lockmgr:
destroy_workqueue(afs_callback_update_worker);
error_callback:
destroy_workqueue(afs_vlocation_update_worker);
error_vl_up:
destroy_workqueue(afs_async_calls);
......@@ -189,7 +183,6 @@ static void __exit afs_exit(void)
fscache_unregister_netfs(&afs_cache_netfs);
#endif
destroy_workqueue(afs_lock_manager);
destroy_workqueue(afs_callback_update_worker);
destroy_workqueue(afs_vlocation_update_worker);
destroy_workqueue(afs_async_calls);
destroy_workqueue(afs_wq);
......
......@@ -115,6 +115,7 @@ void afs_clear_permits(struct afs_vnode *vnode)
mutex_lock(&vnode->permits_lock);
permits = vnode->permits;
RCU_INIT_POINTER(vnode->permits, NULL);
vnode->cb_break++;
mutex_unlock(&vnode->permits_lock);
if (permits)
......@@ -264,8 +265,7 @@ static int afs_check_permit(struct afs_vnode *vnode, struct key *key,
* (the post-processing will cache the result on auth_vnode) */
_debug("no valid permit");
set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
ret = afs_vnode_fetch_status(vnode, auth_vnode, key);
ret = afs_vnode_fetch_status(vnode, auth_vnode, key, true);
if (ret < 0) {
iput(&auth_vnode->vfs_inode);
*_access = 0;
......@@ -304,14 +304,9 @@ int afs_permission(struct inode *inode, int mask)
return PTR_ERR(key);
}
/* if the promise has expired, we need to check the server again */
if (!vnode->cb_promised) {
_debug("not promised");
ret = afs_vnode_fetch_status(vnode, NULL, key);
if (ret < 0)
goto error;
_debug("new promise [fl=%lx]", vnode->flags);
}
ret = afs_validate(vnode, key);
if (ret < 0)
goto error;
/* check the permits to see if we've got one yet */
ret = afs_check_permit(vnode, key, &access);
......
......@@ -94,12 +94,8 @@ static struct afs_server *afs_alloc_server(struct afs_cell *cell,
INIT_LIST_HEAD(&server->grave);
init_rwsem(&server->sem);
spin_lock_init(&server->fs_lock);
server->fs_vnodes = RB_ROOT;
server->cb_promises = RB_ROOT;
spin_lock_init(&server->cb_lock);
init_waitqueue_head(&server->cb_break_waitq);
INIT_DELAYED_WORK(&server->cb_break_work,
afs_dispatch_give_up_callbacks);
INIT_LIST_HEAD(&server->cb_interests);
rwlock_init(&server->cb_break_lock);
server->addr = *addr;
afs_inc_servers_outstanding(cell->net);
......@@ -258,8 +254,6 @@ void afs_put_server(struct afs_net *net, struct afs_server *server)
return;
}
afs_flush_callback_breaks(server);
spin_lock(&net->server_graveyard_lock);
if (atomic_read(&server->usage) == 0) {
list_move_tail(&server->grave, &net->server_graveyard);
......@@ -277,15 +271,8 @@ static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
{
_enter("%p", server);
ASSERTIF(server->cb_break_head != server->cb_break_tail,
delayed_work_pending(&server->cb_break_work));
ASSERTCMP(server->fs_vnodes.rb_node, ==, NULL);
ASSERTCMP(server->cb_promises.rb_node, ==, NULL);
ASSERTCMP(server->cb_break_head, ==, server->cb_break_tail);
ASSERTCMP(atomic_read(&server->cb_break_n), ==, 0);
afs_put_cell(server->net, server->cell);
afs_fs_give_up_all_callbacks(server, NULL, false);
afs_put_cell(net, server->cell);
kfree(server);
afs_dec_servers_outstanding(net);
}
......
......@@ -512,8 +512,12 @@ static struct dentry *afs_mount(struct file_system_type *fs_type,
static void afs_kill_super(struct super_block *sb)
{
struct afs_super_info *as = sb->s_fs_info;
struct afs_super_info *as = AFS_FS_S(sb);
/* Clear the callback interests (which will do ilookup5) before
* deactivating the superblock.
*/
afs_clear_callback_interests(as->net, as->volume);
kill_anon_super(sb);
afs_destroy_sbi(as);
}
......@@ -536,7 +540,7 @@ static void afs_i_init_once(void *_vnode)
INIT_LIST_HEAD(&vnode->pending_locks);
INIT_LIST_HEAD(&vnode->granted_locks);
INIT_DELAYED_WORK(&vnode->lock_work, afs_lock_work);
INIT_WORK(&vnode->cb_broken_work, afs_broken_callback_work);
seqlock_init(&vnode->cb_lock);
}
/*
......@@ -558,7 +562,6 @@ static struct inode *afs_alloc_inode(struct super_block *sb)
vnode->volume = NULL;
vnode->update_cnt = 0;
vnode->flags = 1 << AFS_VNODE_UNSET;
vnode->cb_promised = false;
_leave(" = %p", &vnode->vfs_inode);
return &vnode->vfs_inode;
......@@ -582,7 +585,7 @@ static void afs_destroy_inode(struct inode *inode)
_debug("DESTROY INODE %p", inode);
ASSERTCMP(vnode->server, ==, NULL);
ASSERTCMP(vnode->cb_interest, ==, NULL);
call_rcu(&inode->i_rcu, afs_i_callback);
atomic_dec(&afs_count_active_inodes);
......
......@@ -16,189 +16,20 @@
#include <linux/sched.h>
#include "internal.h"
#if 0
static noinline bool dump_tree_aux(struct rb_node *node, struct rb_node *parent,
int depth, char lr)
{
struct afs_vnode *vnode;
bool bad = false;
if (!node)
return false;
if (node->rb_left)
bad = dump_tree_aux(node->rb_left, node, depth + 2, '/');
vnode = rb_entry(node, struct afs_vnode, cb_promise);
_debug("%c %*.*s%c%p {%d}",
rb_is_red(node) ? 'R' : 'B',
depth, depth, "", lr,
vnode, vnode->cb_expires_at);
if (rb_parent(node) != parent) {
printk("BAD: %p != %p\n", rb_parent(node), parent);
bad = true;
}
if (node->rb_right)
bad |= dump_tree_aux(node->rb_right, node, depth + 2, '\\');
return bad;
}
static noinline void dump_tree(const char *name, struct afs_server *server)
{
_enter("%s", name);
if (dump_tree_aux(server->cb_promises.rb_node, NULL, 0, '-'))
BUG();
}
#endif
/*
* insert a vnode into the backing server's vnode tree
*/
static void afs_install_vnode(struct afs_vnode *vnode,
struct afs_server *server)
{
struct afs_server *old_server = vnode->server;
struct afs_vnode *xvnode;
struct rb_node *parent, **p;
_enter("%p,%p", vnode, server);
if (old_server) {
spin_lock(&old_server->fs_lock);
rb_erase(&vnode->server_rb, &old_server->fs_vnodes);
spin_unlock(&old_server->fs_lock);
}
afs_get_server(server);
vnode->server = server;
afs_put_server(afs_v2net(vnode), old_server);
/* insert into the server's vnode tree in FID order */
spin_lock(&server->fs_lock);
parent = NULL;
p = &server->fs_vnodes.rb_node;
while (*p) {
parent = *p;
xvnode = rb_entry(parent, struct afs_vnode, server_rb);
if (vnode->fid.vid < xvnode->fid.vid)
p = &(*p)->rb_left;
else if (vnode->fid.vid > xvnode->fid.vid)
p = &(*p)->rb_right;
else if (vnode->fid.vnode < xvnode->fid.vnode)
p = &(*p)->rb_left;
else if (vnode->fid.vnode > xvnode->fid.vnode)
p = &(*p)->rb_right;
else if (vnode->fid.unique < xvnode->fid.unique)
p = &(*p)->rb_left;
else if (vnode->fid.unique > xvnode->fid.unique)
p = &(*p)->rb_right;
else
BUG(); /* can't happen unless afs_iget() malfunctions */
}
rb_link_node(&vnode->server_rb, parent, p);
rb_insert_color(&vnode->server_rb, &server->fs_vnodes);
spin_unlock(&server->fs_lock);
_leave("");
}
/*
* insert a vnode into the promising server's update/expiration tree
* - caller must hold vnode->lock
*/
static void afs_vnode_note_promise(struct afs_vnode *vnode,
struct afs_server *server)
{
struct afs_server *old_server;
struct afs_vnode *xvnode;
struct rb_node *parent, **p;
_enter("%p,%p", vnode, server);
ASSERT(server != NULL);
old_server = vnode->server;
if (vnode->cb_promised) {
if (server == old_server &&
vnode->cb_expires == vnode->cb_expires_at) {
_leave(" [no change]");
return;
}
spin_lock(&old_server->cb_lock);
if (vnode->cb_promised) {
_debug("delete");
rb_erase(&vnode->cb_promise, &old_server->cb_promises);
vnode->cb_promised = false;
}
spin_unlock(&old_server->cb_lock);
}
if (vnode->server != server)
afs_install_vnode(vnode, server);
vnode->cb_expires_at = vnode->cb_expires;
_debug("PROMISE on %p {%lu}",
vnode, (unsigned long) vnode->cb_expires_at);
/* abuse an RB-tree to hold the expiration order (we may have multiple
* items with the same expiration time) */
spin_lock(&server->cb_lock);
parent = NULL;
p = &server->cb_promises.rb_node;
while (*p) {
parent = *p;
xvnode = rb_entry(parent, struct afs_vnode, cb_promise);
if (vnode->cb_expires_at < xvnode->cb_expires_at)
p = &(*p)->rb_left;
else
p = &(*p)->rb_right;
}
rb_link_node(&vnode->cb_promise, parent, p);
rb_insert_color(&vnode->cb_promise, &server->cb_promises);
vnode->cb_promised = true;
spin_unlock(&server->cb_lock);
_leave("");
}
/*
* handle remote file deletion by discarding the callback promise
* Handle remote file deletion.
*/
static void afs_vnode_deleted_remotely(struct afs_vnode *vnode)
{
struct afs_server *server;
struct afs_cb_interest *cbi = vnode->cb_interest;
_enter("{%p}", vnode->server);
_enter("{%p}", cbi);
set_bit(AFS_VNODE_DELETED, &vnode->flags);
server = vnode->server;
if (server) {
if (vnode->cb_promised) {
spin_lock(&server->cb_lock);
if (vnode->cb_promised) {
rb_erase(&vnode->cb_promise,
&server->cb_promises);
vnode->cb_promised = false;
}
spin_unlock(&server->cb_lock);
}
spin_lock(&server->fs_lock);
rb_erase(&vnode->server_rb, &server->fs_vnodes);
spin_unlock(&server->fs_lock);
vnode->server = NULL;
afs_put_server(afs_v2net(vnode), server);
} else {
ASSERT(!vnode->cb_promised);
if (cbi) {
vnode->cb_interest = NULL;
afs_put_cb_interest(afs_v2net(vnode), cbi);
}
_leave("");
......@@ -218,8 +49,6 @@ void afs_vnode_finalise_status_update(struct afs_vnode *vnode,
_enter("%p,%p", vnode, server);
spin_lock(&vnode->lock);
clear_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
afs_vnode_note_promise(vnode, server);
vnode->update_cnt--;
ASSERTCMP(vnode->update_cnt, >=, 0);
spin_unlock(&vnode->lock);
......@@ -238,8 +67,6 @@ static void afs_vnode_status_update_failed(struct afs_vnode *vnode, int ret)
spin_lock(&vnode->lock);
clear_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
if (ret == -ENOENT) {
/* the file was deleted on the server */
_debug("got NOENT from server - marking file deleted");
......@@ -261,8 +88,8 @@ static void afs_vnode_status_update_failed(struct afs_vnode *vnode, int ret)
* - there are any outstanding ops that will fetch the status
* - TODO implement local caching
*/
int afs_vnode_fetch_status(struct afs_vnode *vnode,
struct afs_vnode *auth_vnode, struct key *key)
int afs_vnode_fetch_status(struct afs_vnode *vnode, struct afs_vnode *auth_vnode,
struct key *key, bool force)
{
struct afs_server *server;
unsigned long acl_order;
......@@ -270,12 +97,13 @@ int afs_vnode_fetch_status(struct afs_vnode *vnode,
DECLARE_WAITQUEUE(myself, current);
_enter("%s,{%x:%u.%u}",
_enter("%s,{%x:%u.%u,S=%lx},%u",
vnode->volume->vlocation->vldb.name,
vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique);
vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique,
vnode->flags,
force);
if (!test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags) &&
vnode->cb_promised) {
if (!force && test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
_leave(" [unchanged]");
return 0;
}
......@@ -291,8 +119,7 @@ int afs_vnode_fetch_status(struct afs_vnode *vnode,
spin_lock(&vnode->lock);
if (!test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags) &&
vnode->cb_promised) {
if (!force && test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
spin_unlock(&vnode->lock);
_leave(" [unchanged]");
return 0;
......@@ -310,7 +137,7 @@ int afs_vnode_fetch_status(struct afs_vnode *vnode,
/* wait for the status to be updated */
for (;;) {
if (!test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags))
if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags))
break;
if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
break;
......
......@@ -153,8 +153,10 @@ struct afs_volume *afs_volume_lookup(struct afs_mount_params *params)
error_discard:
up_write(&params->cell->vl_sem);
for (loop = volume->nservers - 1; loop >= 0; loop--)
for (loop = volume->nservers - 1; loop >= 0; loop--) {
afs_put_cb_interest(params->net, volume->cb_interests[loop]);
afs_put_server(params->net, volume->servers[loop]);
}
kfree(volume);
goto error;
......@@ -197,8 +199,10 @@ void afs_put_volume(struct afs_cell *cell, struct afs_volume *volume)
#endif
afs_put_vlocation(cell->net, vlocation);
for (loop = volume->nservers - 1; loop >= 0; loop--)
for (loop = volume->nservers - 1; loop >= 0; loop--) {
afs_put_cb_interest(cell->net, volume->cb_interests[loop]);
afs_put_server(cell->net, volume->servers[loop]);
}
kfree(volume);
......@@ -218,10 +222,10 @@ struct afs_server *afs_volume_pick_fileserver(struct afs_vnode *vnode)
_enter("%s", volume->vlocation->vldb.name);
/* stick with the server we're already using if we can */
if (vnode->server && vnode->server->fs_state == 0) {
afs_get_server(vnode->server);
_leave(" = %p [current]", vnode->server);
return vnode->server;
if (vnode->cb_interest && vnode->cb_interest->server->fs_state == 0) {
afs_get_server(vnode->cb_interest->server);
_leave(" = %p [current]", vnode->cb_interest->server);
return vnode->cb_interest->server;
}
down_read(&volume->server_sem);
......@@ -244,13 +248,8 @@ struct afs_server *afs_volume_pick_fileserver(struct afs_vnode *vnode)
_debug("consider %d [%d]", loop, state);
switch (state) {
/* found an apparently healthy server */
case 0:
afs_get_server(server);
up_read(&volume->server_sem);
_leave(" = %p (picked %pIS)",
server, &server->addr.transport);
return server;
goto picked_server;
case -ENETUNREACH:
if (ret == 0)
......@@ -284,9 +283,25 @@ struct afs_server *afs_volume_pick_fileserver(struct afs_vnode *vnode)
/* no available servers
* - TODO: handle the no active servers case better
*/
error:
up_read(&volume->server_sem);
_leave(" = %d", ret);
return ERR_PTR(ret);
picked_server:
/* Found an apparently healthy server. We need to register an interest
* in receiving callbacks before we talk to it.
*/
ret = afs_register_server_cb_interest(vnode,
&volume->cb_interests[loop], server);
if (ret < 0)
goto error;
afs_get_server(server);
up_read(&volume->server_sem);
_leave(" = %p (picked %pIS)",
server, &server->addr.transport);
return server;
}
/*
......@@ -309,14 +324,12 @@ int afs_volume_release_fileserver(struct afs_vnode *vnode,
switch (result) {
/* success */
case 0:
server->fs_act_jif = jiffies;
server->fs_state = 0;
_leave("");
return 1;
/* the fileserver denied all knowledge of the volume */
case -ENOMEDIUM:
server->fs_act_jif = jiffies;
down_write(&volume->server_sem);
/* firstly, find where the server is in the active list (if it
......@@ -365,7 +378,6 @@ int afs_volume_release_fileserver(struct afs_vnode *vnode,
*/
spin_lock(&server->fs_lock);
if (!server->fs_state) {
server->fs_dead_jif = jiffies + HZ * 10;
server->fs_state = result;
printk("kAFS: SERVER DEAD state=%d\n", result);
}
......@@ -374,7 +386,6 @@ int afs_volume_release_fileserver(struct afs_vnode *vnode,
/* miscellaneous error */
default:
server->fs_act_jif = jiffies;
case -ENOMEM:
case -ENONET:
/* tell the caller to accept the result */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment