Commit dce9f3bf authored by Trond Myklebust's avatar Trond Myklebust

NFSv4: Basic code for recovering file OPEN state after a server

reboot.
parent e85c40cd
......@@ -1448,6 +1448,8 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data,
clp->cl_cred = rpcauth_lookupcred(clnt->cl_auth, 0);
memcpy(clp->cl_ipaddr, server->ip_addr, sizeof(clp->cl_ipaddr));
}
if (list_empty(&clp->cl_superblocks))
clear_bit(NFS4CLNT_OK, &clp->cl_state);
list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks);
clnt = rpc_clone_client(clp->cl_rpcclient);
server->nfs4_state = clp;
......
......@@ -48,9 +48,12 @@
#define NFSDBG_FACILITY NFSDBG_PROC
#define NFS4_POLL_RETRY_TIME (15*HZ)
#define GET_OP(cp,name) &cp->ops[cp->req_nops].u.name
#define OPNUM(cp) cp->ops[cp->req_nops].opnum
static int nfs4_async_handle_error(struct rpc_task *, struct nfs_server *);
extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus);
extern struct rpc_procinfo nfs4_procedures[];
......@@ -532,7 +535,6 @@ nfs4_do_open(struct inode *dir, struct qstr *name, int flags, struct iattr *satt
struct nfs_openargs o_arg = {
.fh = NFS_FH(dir),
.share_access = flags & (FMODE_READ|FMODE_WRITE),
.clientid = NFS_SERVER(dir)->nfs4_state->cl_clientid,
.opentype = (flags & O_CREAT) ? NFS4_OPEN_CREATE : NFS4_OPEN_NOCREATE,
.createmode = (flags & O_EXCL) ? NFS4_CREATE_EXCLUSIVE : NFS4_CREATE_UNCHECKED,
.name = name,
......@@ -553,6 +555,7 @@ nfs4_do_open(struct inode *dir, struct qstr *name, int flags, struct iattr *satt
.rpc_cred = cred,
};
retry:
status = -ENOMEM;
if (!(sp = nfs4_get_state_owner(NFS_SERVER(dir), cred))) {
dprintk("nfs4_do_open: nfs4_get_state_owner failed!\n");
......@@ -569,6 +572,7 @@ nfs4_do_open(struct inode *dir, struct qstr *name, int flags, struct iattr *satt
down(&sp->so_sema);
o_arg.seqid = sp->so_seqid;
o_arg.id = sp->so_id;
o_arg.clientid = NFS_SERVER(dir)->nfs4_state->cl_clientid,
status = rpc_call_sync(server->client, &msg, 0);
nfs4_increment_seqid(status, sp);
......@@ -623,6 +627,9 @@ nfs4_do_open(struct inode *dir, struct qstr *name, int flags, struct iattr *satt
nfs4_put_open_state(state);
if (inode)
iput(inode);
status = nfs4_handle_error(server, status);
if (!status)
goto retry;
out:
return ERR_PTR(status);
}
......@@ -651,7 +658,9 @@ nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
.rpc_argp = &arg,
.rpc_resp = &res,
};
int status;
retry:
fattr->valid = 0;
if (state)
......@@ -659,7 +668,13 @@ nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
else
memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid));
return(rpc_call_sync(server->client, &msg, 0));
status = rpc_call_sync(server->client, &msg, 0);
if (status) {
status = nfs4_handle_error(server, status);
if (!status)
goto retry;
}
return status;
}
/*
......@@ -707,48 +722,12 @@ static int
nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fattr *fattr)
{
struct nfs4_client *clp;
struct nfs4_compound compound;
struct nfs4_op ops[4];
unsigned char * p;
struct qstr q;
int status;
clp = server->nfs4_state;
down_write(&clp->cl_sem);
/* Has the clientid already been initialized? */
if (clp->cl_state != NFS4CLNT_NEW)
/* Yep, so just read the root attributes and the lease time. */
goto no_setclientid;
/*
* SETCLIENTID.
* Until delegations are imported, we don't bother setting the program
* number and port to anything meaningful.
*/
if ((status = nfs4_proc_setclientid(clp, 0, 0)))
goto out_unlock;
/*
* SETCLIENTID_CONFIRM, plus root filehandle.
* We also get the lease time here.
*/
if ((status = nfs4_proc_setclientid_confirm(clp)))
goto out_unlock;
/*
* Now that we have instantiated the clientid and determined
* the lease time, we can initialize the renew daemon for this
* server.
* FIXME: we only need one renewd daemon per server.
*/
nfs4_schedule_state_renewal(clp);
clp->cl_state = NFS4CLNT_OK;
no_setclientid:
up_write(&clp->cl_sem);
/*
* Now we do a separate LOOKUP for each component of the mount path.
* The LOOKUPs are done separately so that we can conveniently
......@@ -787,9 +766,6 @@ nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
}
break;
}
return status;
out_unlock:
up_write(&clp->cl_sem);
out:
return status;
}
......@@ -1410,6 +1386,20 @@ nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
return nfs4_call_compound(&compound, NULL, 0);
}
static void
nfs4_restart_read(struct rpc_task *task)
{
struct nfs_read_data *data = (struct nfs_read_data *)task->tk_calldata;
struct nfs_page *req;
rpc_restart_call(task);
req = nfs_list_entry(data->pages.next);
if (req->wb_state)
memcpy(&data->args.stateid, &req->wb_state->stateid, sizeof(data->args.stateid));
else
memcpy(&data->args.stateid, &zero_stateid, sizeof(data->args.stateid));
}
static void
nfs4_read_done(struct rpc_task *task)
{
......@@ -1417,6 +1407,10 @@ nfs4_read_done(struct rpc_task *task)
struct inode *inode = data->inode;
struct nfs_fattr *fattr = data->res.fattr;
if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) {
task->tk_action = nfs4_restart_read;
return;
}
if (task->tk_status > 0)
renew_lease(NFS_SERVER(inode), data->timestamp);
/* Check cache consistency */
......@@ -1484,12 +1478,30 @@ nfs4_write_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
inode->i_mtime = fattr->mtime;
}
static void
nfs4_restart_write(struct rpc_task *task)
{
struct nfs_write_data *data = (struct nfs_write_data *)task->tk_calldata;
struct nfs_page *req;
rpc_restart_call(task);
req = nfs_list_entry(data->pages.next);
if (req->wb_state)
memcpy(&data->args.stateid, &req->wb_state->stateid, sizeof(data->args.stateid));
else
memcpy(&data->args.stateid, &zero_stateid, sizeof(data->args.stateid));
}
static void
nfs4_write_done(struct rpc_task *task)
{
struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata;
struct inode *inode = data->inode;
if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) {
task->tk_action = nfs4_restart_write;
return;
}
if (task->tk_status >= 0)
renew_lease(NFS_SERVER(inode), data->timestamp);
nfs4_write_refresh_inode(inode, data->res.fattr);
......@@ -1552,8 +1564,13 @@ static void
nfs4_commit_done(struct rpc_task *task)
{
struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata;
struct inode *inode = data->inode;
nfs4_write_refresh_inode(data->inode, data->res.fattr);
if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) {
task->tk_action = nfs4_restart_write;
return;
}
nfs4_write_refresh_inode(inode, data->res.fattr);
/* Call back common NFS writeback processing */
nfs_commit_done(task);
}
......@@ -1599,6 +1616,14 @@ renew_done(struct rpc_task *task)
{
struct nfs4_client *clp = (struct nfs4_client *)task->tk_msg.rpc_argp;
unsigned long timestamp = (unsigned long)task->tk_calldata;
if (task->tk_status < 0) {
switch (task->tk_status) {
case -NFS4ERR_STALE_CLIENTID:
nfs4_schedule_state_recovery(clp);
return;
}
}
spin_lock(&clp->cl_lock);
if (time_before(clp->cl_last_renewal,timestamp))
clp->cl_last_renewal = timestamp;
......@@ -1617,6 +1642,25 @@ nfs4_proc_async_renew(struct nfs4_client *clp)
return rpc_call_async(clp->cl_rpcclient, &msg, 0, renew_done, (void *)jiffies);
}
int
nfs4_proc_renew(struct nfs4_client *clp)
{
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW],
.rpc_argp = clp,
.rpc_cred = clp->cl_cred,
};
unsigned long now = jiffies;
int status;
status = rpc_call_sync(clp->cl_rpcclient, &msg, 0);
spin_lock(&clp->cl_lock);
if (time_before(clp->cl_last_renewal,now))
clp->cl_last_renewal = now;
spin_unlock(&clp->cl_lock);
return status;
}
/*
* We will need to arrange for the VFS layer to provide an atomic open.
* Until then, this open method is prone to inefficiency and race conditions
......@@ -1697,6 +1741,113 @@ nfs4_request_init(struct nfs_page *req, struct file *filp)
req->wb_cred = get_rpccred(state->owner->so_cred);
}
static int
nfs4_async_handle_error(struct rpc_task *task, struct nfs_server *server)
{
struct nfs4_client *clp = server->nfs4_state;
if (!clp)
return 0;
switch(task->tk_status) {
case -NFS4ERR_STALE_CLIENTID:
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_EXPIRED:
rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL, NULL);
nfs4_schedule_state_recovery(clp);
task->tk_status = 0;
return -EAGAIN;
case -NFS4ERR_GRACE:
case -NFS4ERR_DELAY:
rpc_delay(task, NFS4_POLL_RETRY_TIME);
task->tk_status = 0;
return -EAGAIN;
}
return 0;
}
int
nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs4_client *clp)
{
DEFINE_WAIT(wait);
sigset_t oldset;
int interruptible, res;
might_sleep();
rpc_clnt_sigmask(clnt, &oldset);
interruptible = TASK_UNINTERRUPTIBLE;
if (clnt->cl_intr)
interruptible = TASK_INTERRUPTIBLE;
do {
res = 0;
prepare_to_wait(&clp->cl_waitq, &wait, interruptible);
nfs4_schedule_state_recovery(clp);
if (test_bit(NFS4CLNT_OK, &clp->cl_state) &&
!test_bit(NFS4CLNT_SETUP_STATE, &clp->cl_state))
break;
if (clnt->cl_intr && signalled()) {
res = -ERESTARTSYS;
break;
}
schedule();
} while(!test_bit(NFS4CLNT_OK, &clp->cl_state));
finish_wait(&clp->cl_waitq, &wait);
rpc_clnt_sigunmask(clnt, &oldset);
return res;
}
static int
nfs4_delay(struct rpc_clnt *clnt)
{
sigset_t oldset;
int res = 0;
might_sleep();
rpc_clnt_sigmask(clnt, &oldset);
if (clnt->cl_intr) {
set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(NFS4_POLL_RETRY_TIME);
if (signalled())
res = -ERESTARTSYS;
} else {
set_current_state(TASK_UNINTERRUPTIBLE);
schedule_timeout(NFS4_POLL_RETRY_TIME);
}
rpc_clnt_sigunmask(clnt, &oldset);
return res;
}
/* This is the error handling routine for processes that are allowed
* to sleep.
*/
int
nfs4_handle_error(struct nfs_server *server, int errorcode)
{
struct nfs4_client *clp = server->nfs4_state;
int ret = errorcode;
switch(errorcode) {
case -NFS4ERR_STALE_CLIENTID:
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_EXPIRED:
ret = nfs4_wait_clnt_recover(server->client, clp);
break;
case -NFS4ERR_GRACE:
case -NFS4ERR_DELAY:
ret = nfs4_delay(server->client);
break;
default:
if (errorcode <= -1000) {
printk(KERN_WARNING "%s could not handle NFSv4 error %d\n",
__FUNCTION__, -errorcode);
ret = -EIO;
}
}
/* We failed to handle the error */
return ret;
}
static int
nfs4_request_compatible(struct nfs_page *req, struct file *filp, struct page *page)
......
......@@ -56,6 +56,7 @@ nfs4_stateid one_stateid =
static LIST_HEAD(nfs4_clientid_list);
static void nfs4_recover_state(void *);
extern void nfs4_renew_state(void *);
void
......@@ -98,9 +99,12 @@ nfs4_alloc_client(struct in_addr *addr)
INIT_LIST_HEAD(&clp->cl_unused);
spin_lock_init(&clp->cl_lock);
atomic_set(&clp->cl_count, 1);
INIT_WORK(&clp->cl_recoverd, nfs4_recover_state, clp);
INIT_WORK(&clp->cl_renewd, nfs4_renew_state, clp);
INIT_LIST_HEAD(&clp->cl_superblocks);
clp->cl_state = NFS4CLNT_NEW;
init_waitqueue_head(&clp->cl_waitq);
INIT_RPC_WAITQ(&clp->cl_rpcwaitq, "NFS4 client");
clp->cl_state = 1 << NFS4CLNT_NEW;
}
return clp;
}
......@@ -155,6 +159,9 @@ nfs4_put_client(struct nfs4_client *clp)
return;
list_del(&clp->cl_servers);
spin_unlock(&state_spinlock);
BUG_ON(!list_empty(&clp->cl_superblocks));
wake_up_all(&clp->cl_waitq);
rpc_wake_up(&clp->cl_rpcwaitq);
nfs4_kill_renewd(clp);
nfs4_free_client(clp);
}
......@@ -175,6 +182,7 @@ nfs4_client_grab_unused(struct nfs4_client *clp, struct rpc_cred *cred)
atomic_inc(&sp->so_count);
sp->so_cred = cred;
list_move(&sp->so_list, &clp->cl_state_owners);
sp->so_generation = clp->cl_generation;
clp->cl_nunused--;
}
return sp;
......@@ -215,13 +223,17 @@ nfs4_get_state_owner(struct nfs_server *server, struct rpc_cred *cred)
new->so_client = clp;
new->so_id = nfs4_alloc_lockowner_id(clp);
new->so_cred = cred;
new->so_generation = clp->cl_generation;
sp = new;
new = NULL;
}
spin_unlock(&clp->cl_lock);
if (new)
kfree(new);
if (!sp)
if (sp) {
if (!test_bit(NFS4CLNT_OK, &clp->cl_state))
nfs4_wait_clnt_recover(server->client, clp);
} else
put_rpccred(cred);
return sp;
}
......@@ -353,6 +365,7 @@ nfs4_put_open_state(struct nfs4_state *state)
{
struct inode *inode = state->inode;
struct nfs4_state_owner *owner = state->owner;
int status = 0;
if (!atomic_dec_and_lock(&state->count, &inode->i_lock))
return;
......@@ -360,8 +373,16 @@ nfs4_put_open_state(struct nfs4_state *state)
spin_unlock(&inode->i_lock);
down(&owner->so_sema);
list_del(&state->open_states);
if (state->state != 0)
nfs4_do_close(inode, state);
if (state->state != 0) {
do {
status = nfs4_do_close(inode, state);
if (!status)
break;
up(&owner->so_sema);
status = nfs4_handle_error(NFS_SERVER(inode), status);
down(&owner->so_sema);
} while (!status);
}
up(&owner->so_sema);
iput(inode);
nfs4_free_open_state(state);
......@@ -392,31 +413,65 @@ struct reclaimer_args {
* State recovery routine
*/
void
nfs4_recover_state(struct nfs4_client *clp)
nfs4_recover_state(void *data)
{
struct nfs4_client *clp = (struct nfs4_client *)data;
struct reclaimer_args args = {
.clp = clp,
};
might_sleep();
init_completion(&args.complete);
down_read(&clp->cl_sem);
if (kernel_thread(reclaimer, &args, CLONE_KERNEL) < 0)
if (test_and_set_bit(NFS4CLNT_SETUP_STATE, &clp->cl_state))
goto out_failed;
if (kernel_thread(reclaimer, &args, CLONE_KERNEL) < 0)
goto out_failed_clear;
wait_for_completion(&args.complete);
return;
out_failed_clear:
smp_mb__before_clear_bit();
clear_bit(NFS4CLNT_SETUP_STATE, &clp->cl_state);
smp_mb__after_clear_bit();
wake_up_all(&clp->cl_waitq);
rpc_wake_up(&clp->cl_rpcwaitq);
out_failed:
up_read(&clp->cl_sem);
}
static void
/*
* Schedule a state recovery attempt
*/
void
nfs4_schedule_state_recovery(struct nfs4_client *clp)
{
if (!clp)
return;
smp_mb__before_clear_bit();
clear_bit(NFS4CLNT_OK, &clp->cl_state);
smp_mb__after_clear_bit();
schedule_work(&clp->cl_recoverd);
}
static int
nfs4_reclaim_open_state(struct nfs4_state_owner *sp)
{
struct nfs4_state *state;
int status;
int status = 0;
list_for_each_entry(state, &sp->so_states, open_states) {
status = nfs4_open_reclaim(sp, state);
if (status) {
if (status >= 0)
continue;
switch (status) {
default:
printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n",
__FUNCTION__, status);
case -NFS4ERR_EXPIRED:
case -NFS4ERR_NO_GRACE:
case -NFS4ERR_RECLAIM_BAD:
case -NFS4ERR_RECLAIM_CONFLICT:
/*
* Open state on this file cannot be recovered
* All we can do is revert to using the zero stateid.
......@@ -425,8 +480,14 @@ nfs4_reclaim_open_state(struct nfs4_state_owner *sp)
sizeof(state->stateid.data));
/* Mark the file as being 'closed' */
state->state = 0;
break;
case -NFS4ERR_STALE_CLIENTID:
goto out_err;
}
}
return 0;
out_err:
return status;
}
static int
......@@ -435,6 +496,7 @@ reclaimer(void *ptr)
struct reclaimer_args *args = (struct reclaimer_args *)ptr;
struct nfs4_client *clp = args->clp;
struct nfs4_state_owner *sp;
int generation;
int status;
daemonize("%u.%u.%u.%u-reclaim", NIPQUAD(clp->cl_addr));
......@@ -445,29 +507,58 @@ reclaimer(void *ptr)
/* Are there any NFS mounts out there? */
if (list_empty(&clp->cl_superblocks))
goto out;
if (!test_bit(NFS4CLNT_NEW, &clp->cl_state)) {
status = nfs4_proc_renew(clp);
if (status == 0) {
set_bit(NFS4CLNT_OK, &clp->cl_state);
goto out;
}
}
status = nfs4_proc_setclientid(clp, 0, 0);
if (status)
goto out_error;
status = nfs4_proc_setclientid_confirm(clp);
if (status)
goto out_error;
generation = ++(clp->cl_generation);
clear_bit(NFS4CLNT_NEW, &clp->cl_state);
set_bit(NFS4CLNT_OK, &clp->cl_state);
up_read(&clp->cl_sem);
nfs4_schedule_state_renewal(clp);
restart_loop:
spin_lock(&clp->cl_lock);
list_for_each_entry(sp, &clp->cl_state_owners, so_list) {
if (sp->so_generation - generation <= 0)
continue;
atomic_inc(&sp->so_count);
spin_unlock(&clp->cl_lock);
down(&sp->so_sema);
nfs4_reclaim_open_state(sp);
if (sp->so_generation - generation < 0) {
smp_rmb();
sp->so_generation = clp->cl_generation;
status = nfs4_reclaim_open_state(sp);
}
up(&sp->so_sema);
nfs4_put_state_owner(sp);
spin_lock(&clp->cl_lock);
if (status < 0) {
if (status == -NFS4ERR_STALE_CLIENTID)
nfs4_schedule_state_recovery(clp);
goto out;
}
goto restart_loop;
}
spin_unlock(&clp->cl_lock);
out:
up_read(&clp->cl_sem);
smp_mb__before_clear_bit();
clear_bit(NFS4CLNT_SETUP_STATE, &clp->cl_state);
smp_mb__after_clear_bit();
wake_up_all(&clp->cl_waitq);
rpc_wake_up(&clp->cl_rpcwaitq);
return 0;
out_error:
printk(KERN_WARNING "Error: state recovery failed on NFSv4 server %u.%u.%u.%u\n",
NIPQUAD(clp->cl_addr.s_addr));
up_read(&clp->cl_sem);
goto out;
}
......
......@@ -465,6 +465,7 @@ extern void * nfs_root_data(void);
enum nfs4_client_state {
NFS4CLNT_OK = 0,
NFS4CLNT_NEW,
NFS4CLNT_SETUP_STATE,
};
/*
......@@ -475,7 +476,8 @@ struct nfs4_client {
struct in_addr cl_addr; /* Server identifier */
u64 cl_clientid; /* constant */
nfs4_verifier cl_confirm;
enum nfs4_client_state cl_state;
unsigned long cl_state;
long cl_generation;
u32 cl_lockowner_id;
......@@ -499,6 +501,10 @@ struct nfs4_client {
unsigned long cl_lease_time;
unsigned long cl_last_renewal;
struct work_struct cl_renewd;
struct work_struct cl_recoverd;
wait_queue_head_t cl_waitq;
struct rpc_wait_queue cl_rpcwaitq;
/* Our own IP address, as a null-terminated string.
* This is used to generate the clientid, and the callback address.
......@@ -523,6 +529,7 @@ struct nfs4_state_owner {
u32 so_seqid; /* protected by so_sema */
unsigned int so_flags; /* protected by so_sema */
atomic_t so_count;
long so_generation;
struct rpc_cred *so_cred; /* Associated cred */
struct list_head so_states;
......@@ -556,7 +563,9 @@ extern int nfs4_proc_setclientid(struct nfs4_client *, u32, unsigned short);
extern int nfs4_proc_setclientid_confirm(struct nfs4_client *);
extern int nfs4_open_reclaim(struct nfs4_state_owner *, struct nfs4_state *);
extern int nfs4_proc_async_renew(struct nfs4_client *);
extern int nfs4_proc_renew(struct nfs4_client *);
extern int nfs4_do_close(struct inode *, struct nfs4_state *);
extern int nfs4_wait_clnt_recover(struct rpc_clnt *, struct nfs4_client *);
/* nfs4renewd.c */
extern void nfs4_schedule_state_renewal(struct nfs4_client *);
......@@ -573,7 +582,8 @@ extern void nfs4_put_state_owner(struct nfs4_state_owner *);
extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *);
extern void nfs4_put_open_state(struct nfs4_state *);
extern void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp);
extern void nfs4_recover_state(struct nfs4_client *);
extern int nfs4_handle_error(struct nfs_server *, int);
extern void nfs4_schedule_state_recovery(struct nfs4_client *);
struct nfs4_mount_data;
#else
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment