Commit 8688d954 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'nfs-for-4.2-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs

Pull NFS client updates from Trond Myklebust:
 "Highlights include:

  Stable patches:
   - Fix a crash in the NFSv4 file locking code.
   - Fix an fsync() regression, where we were failing to retry I/O in
     some circumstances.
   - Fix an infinite loop in NFSv4.0 OPEN stateid recovery
   - Fix a memory leak when an attempted pnfs fails.
   - Fix a memory leak in the backchannel code
   - Large hostnames were not supported correctly in NFSv4.1
   - Fix a pNFS/flexfiles bug that was impeding error reporting on I/O.
   - Fix a couple of credential issues in pNFS/flexfiles

  Bugfixes + cleanups:
   - Open flag sanity checks in the NFSv4 atomic open codepath
   - More NFSv4 delegation related bugfixes
   - Various NFSv4.1 backchannel bugfixes and cleanups
   - Fix the NFS swap socket code
   - Various cleanups of the NFSv4 SETCLIENTID and EXCHANGE_ID code
   - Fix a UDP transport deadlock issue

  Features:
   - More RDMA client transport improvements
   - NFSv4.2 LAYOUTSTATS functionality for pnfs flexfiles"

* tag 'nfs-for-4.2-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (87 commits)
  nfs: Remove invalid tk_pid from debug message
  nfs: Remove invalid NFS_ATTR_FATTR_V4_REFERRAL checking in nfs4_get_rootfh
  nfs: Drop bad comment in nfs41_walk_client_list()
  nfs: Remove unneeded micro checking of CONFIG_PROC_FS
  nfs: Don't setting FILE_CREATED flags always
  nfs: Use remove_proc_subtree() instead remove_proc_entry()
  nfs: Remove unused argument in nfs_server_set_fsinfo()
  nfs: Fix a memory leak when meeting an unsupported state protect
  nfs: take extra reference to fl->fl_file when running a LOCKU operation
  NFSv4: When returning a delegation, don't reclaim an incompatible open mode.
  NFSv4.2: LAYOUTSTATS is optional to implement
  NFSv4.2: Fix up a decoding error in layoutstats
  pNFS/flexfiles: Fix the reset of struct pgio_header when resending
  pNFS/flexfiles: Turn off layoutcommit for servers that don't need it
  pnfs/flexfiles: protect ktime manipulation with mirror lock
  nfs: provide pnfs_report_layoutstat when NFS42 is disabled
  nfs: verify open flags before allowing open
  nfs: always update creds in mirror, even when we have an already connected ds
  nfs: fix potential credential leak in ff_layout_update_mirror_cred
  pnfs/flexfiles: report layoutstat regularly
  ...
parents 320cd413 b4839ebe
......@@ -458,7 +458,7 @@ check_gss_callback_principal(struct nfs_client *clp, struct svc_rqst *rqstp)
* pg_authenticate method for nfsv4 callback threads.
*
* The authflavor has been negotiated, so an incorrect flavor is a server
* bug. Drop packets with incorrect authflavor.
* bug. Deny packets with incorrect authflavor.
*
* All other checking done after NFS decoding where the nfs_client can be
* found in nfs4_callback_compound
......@@ -468,12 +468,12 @@ static int nfs_callback_authenticate(struct svc_rqst *rqstp)
switch (rqstp->rq_authop->flavour) {
case RPC_AUTH_NULL:
if (rqstp->rq_proc != CB_NULL)
return SVC_DROP;
return SVC_DENIED;
break;
case RPC_AUTH_GSS:
/* No RPC_AUTH_GSS support yet in NFSv4.1 */
if (svc_is_backchannel(rqstp))
return SVC_DROP;
return SVC_DENIED;
}
return SVC_OK;
}
......
......@@ -327,10 +327,8 @@ validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args)
dprintk("%s slot table seqid: %u\n", __func__, slot->seq_nr);
/* Normal */
if (likely(args->csa_sequenceid == slot->seq_nr + 1)) {
slot->seq_nr++;
if (likely(args->csa_sequenceid == slot->seq_nr + 1))
goto out_ok;
}
/* Replay */
if (args->csa_sequenceid == slot->seq_nr) {
......@@ -418,6 +416,7 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
struct cb_process_state *cps)
{
struct nfs4_slot_table *tbl;
struct nfs4_slot *slot;
struct nfs_client *clp;
int i;
__be32 status = htonl(NFS4ERR_BADSESSION);
......@@ -429,25 +428,32 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
if (!(clp->cl_session->flags & SESSION4_BACK_CHAN))
goto out;
tbl = &clp->cl_session->bc_slot_table;
slot = tbl->slots + args->csa_slotid;
spin_lock(&tbl->slot_tbl_lock);
/* state manager is resetting the session */
if (test_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state)) {
spin_unlock(&tbl->slot_tbl_lock);
status = htonl(NFS4ERR_DELAY);
/* Return NFS4ERR_BADSESSION if we're draining the session
* in order to reset it.
*/
if (test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state))
status = htonl(NFS4ERR_BADSESSION);
goto out;
goto out_unlock;
}
status = validate_seqid(&clp->cl_session->bc_slot_table, args);
spin_unlock(&tbl->slot_tbl_lock);
memcpy(&res->csr_sessionid, &args->csa_sessionid,
sizeof(res->csr_sessionid));
res->csr_sequenceid = args->csa_sequenceid;
res->csr_slotid = args->csa_slotid;
res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
status = validate_seqid(tbl, args);
if (status)
goto out;
goto out_unlock;
cps->slotid = args->csa_slotid;
......@@ -458,15 +464,17 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
*/
if (referring_call_exists(clp, args->csa_nrclists, args->csa_rclists)) {
status = htonl(NFS4ERR_DELAY);
goto out;
goto out_unlock;
}
memcpy(&res->csr_sessionid, &args->csa_sessionid,
sizeof(res->csr_sessionid));
res->csr_sequenceid = args->csa_sequenceid;
res->csr_slotid = args->csa_slotid;
res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
/*
* RFC5661 20.9.3
* If CB_SEQUENCE returns an error, then the state of the slot
* (sequence ID, cached reply) MUST NOT change.
*/
slot->seq_nr++;
out_unlock:
spin_unlock(&tbl->slot_tbl_lock);
out:
cps->clp = clp; /* put in nfs4_callback_compound */
......
......@@ -909,7 +909,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
xdr_init_encode(&xdr_out, &rqstp->rq_res, p);
status = decode_compound_hdr_arg(&xdr_in, &hdr_arg);
if (status == __constant_htonl(NFS4ERR_RESOURCE))
if (status == htonl(NFS4ERR_RESOURCE))
return rpc_garbage_args;
if (hdr_arg.minorversion == 0) {
......
......@@ -825,7 +825,6 @@ static int nfs_init_server(struct nfs_server *server,
* Load up the server record from information gained in an fsinfo record
*/
static void nfs_server_set_fsinfo(struct nfs_server *server,
struct nfs_fh *mntfh,
struct nfs_fsinfo *fsinfo)
{
unsigned long max_rpc_payload;
......@@ -901,7 +900,7 @@ int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs
if (error < 0)
goto out_error;
nfs_server_set_fsinfo(server, mntfh, &fsinfo);
nfs_server_set_fsinfo(server, &fsinfo);
/* Get some general file system info */
if (server->namelen == 0) {
......@@ -1193,8 +1192,6 @@ void nfs_clients_init(struct net *net)
}
#ifdef CONFIG_PROC_FS
static struct proc_dir_entry *proc_fs_nfs;
static int nfs_server_list_open(struct inode *inode, struct file *file);
static void *nfs_server_list_start(struct seq_file *p, loff_t *pos);
static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos);
......@@ -1364,27 +1361,29 @@ static int nfs_volume_list_show(struct seq_file *m, void *v)
{
struct nfs_server *server;
struct nfs_client *clp;
char dev[8], fsid[17];
char dev[13]; // 8 for 2^24, 1 for ':', 3 for 2^8, 1 for '\0'
char fsid[34]; // 2 * 16 for %llx, 1 for ':', 1 for '\0'
struct nfs_net *nn = net_generic(seq_file_net(m), nfs_net_id);
/* display header on line 1 */
if (v == &nn->nfs_volume_list) {
seq_puts(m, "NV SERVER PORT DEV FSID FSC\n");
seq_puts(m, "NV SERVER PORT DEV FSID"
" FSC\n");
return 0;
}
/* display one transport per line on subsequent lines */
server = list_entry(v, struct nfs_server, master_link);
clp = server->nfs_client;
snprintf(dev, 8, "%u:%u",
snprintf(dev, sizeof(dev), "%u:%u",
MAJOR(server->s_dev), MINOR(server->s_dev));
snprintf(fsid, 17, "%llx:%llx",
snprintf(fsid, sizeof(fsid), "%llx:%llx",
(unsigned long long) server->fsid.major,
(unsigned long long) server->fsid.minor);
rcu_read_lock();
seq_printf(m, "v%u %s %s %-7s %-17s %s\n",
seq_printf(m, "v%u %s %s %-12s %-33s %s\n",
clp->rpc_ops->version,
rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR),
rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_PORT),
......@@ -1434,27 +1433,20 @@ void nfs_fs_proc_net_exit(struct net *net)
*/
int __init nfs_fs_proc_init(void)
{
struct proc_dir_entry *p;
proc_fs_nfs = proc_mkdir("fs/nfsfs", NULL);
if (!proc_fs_nfs)
if (!proc_mkdir("fs/nfsfs", NULL))
goto error_0;
/* a file of servers with which we're dealing */
p = proc_symlink("servers", proc_fs_nfs, "../../net/nfsfs/servers");
if (!p)
if (!proc_symlink("fs/nfsfs/servers", NULL, "../../net/nfsfs/servers"))
goto error_1;
/* a file of volumes that we have mounted */
p = proc_symlink("volumes", proc_fs_nfs, "../../net/nfsfs/volumes");
if (!p)
goto error_2;
return 0;
if (!proc_symlink("fs/nfsfs/volumes", NULL, "../../net/nfsfs/volumes"))
goto error_1;
error_2:
remove_proc_entry("servers", proc_fs_nfs);
return 0;
error_1:
remove_proc_entry("fs/nfsfs", NULL);
remove_proc_subtree("fs/nfsfs", NULL);
error_0:
return -ENOMEM;
}
......@@ -1464,9 +1456,7 @@ int __init nfs_fs_proc_init(void)
*/
void nfs_fs_proc_exit(void)
{
remove_proc_entry("volumes", proc_fs_nfs);
remove_proc_entry("servers", proc_fs_nfs);
remove_proc_entry("fs/nfsfs", NULL);
remove_proc_subtree("fs/nfsfs", NULL);
}
#endif /* CONFIG_PROC_FS */
......@@ -1470,9 +1470,6 @@ static int nfs_finish_open(struct nfs_open_context *ctx,
{
int err;
if ((open_flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
*opened |= FILE_CREATED;
err = finish_open(file, dentry, do_open, opened);
if (err)
goto out;
......
......@@ -555,31 +555,22 @@ static int nfs_launder_page(struct page *page)
return nfs_wb_page(inode, page);
}
#ifdef CONFIG_NFS_SWAP
static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file,
sector_t *span)
{
int ret;
struct rpc_clnt *clnt = NFS_CLIENT(file->f_mapping->host);
*span = sis->pages;
rcu_read_lock();
ret = xs_swapper(rcu_dereference(clnt->cl_xprt), 1);
rcu_read_unlock();
return ret;
return rpc_clnt_swap_activate(clnt);
}
static void nfs_swap_deactivate(struct file *file)
{
struct rpc_clnt *clnt = NFS_CLIENT(file->f_mapping->host);
rcu_read_lock();
xs_swapper(rcu_dereference(clnt->cl_xprt), 0);
rcu_read_unlock();
rpc_clnt_swap_deactivate(clnt);
}
#endif
const struct address_space_operations nfs_file_aops = {
.readpage = nfs_readpage,
......@@ -596,10 +587,8 @@ const struct address_space_operations nfs_file_aops = {
.launder_page = nfs_launder_page,
.is_dirty_writeback = nfs_check_dirty_writeback,
.error_remove_page = generic_error_remove_page,
#ifdef CONFIG_NFS_SWAP
.swap_activate = nfs_swap_activate,
.swap_deactivate = nfs_swap_deactivate,
#endif
};
/*
......
This diff is collapsed.
......@@ -9,12 +9,17 @@
#ifndef FS_NFS_NFS4FLEXFILELAYOUT_H
#define FS_NFS_NFS4FLEXFILELAYOUT_H
#define FF_FLAGS_NO_LAYOUTCOMMIT 1
#include "../pnfs.h"
/* XXX: Let's filter out insanely large mirror count for now to avoid oom
* due to network error etc. */
#define NFS4_FLEXFILE_LAYOUT_MAX_MIRROR_CNT 4096
/* LAYOUTSTATS report interval in ms */
#define FF_LAYOUTSTATS_REPORT_INTERVAL (60000L)
struct nfs4_ff_ds_version {
u32 version;
u32 minor_version;
......@@ -41,24 +46,48 @@ struct nfs4_ff_layout_ds_err {
struct nfs4_deviceid deviceid;
};
struct nfs4_ff_io_stat {
__u64 ops_requested;
__u64 bytes_requested;
__u64 ops_completed;
__u64 bytes_completed;
__u64 bytes_not_delivered;
ktime_t total_busy_time;
ktime_t aggregate_completion_time;
};
struct nfs4_ff_busy_timer {
ktime_t start_time;
atomic_t n_ops;
};
struct nfs4_ff_layoutstat {
struct nfs4_ff_io_stat io_stat;
struct nfs4_ff_busy_timer busy_timer;
};
struct nfs4_ff_layout_mirror {
struct pnfs_layout_segment *lseg; /* back pointer */
u32 ds_count;
u32 efficiency;
struct nfs4_ff_layout_ds *mirror_ds;
u32 fh_versions_cnt;
struct nfs_fh *fh_versions;
nfs4_stateid stateid;
struct nfs4_string user_name;
struct nfs4_string group_name;
u32 uid;
u32 gid;
struct rpc_cred *cred;
spinlock_t lock;
struct nfs4_ff_layoutstat read_stat;
struct nfs4_ff_layoutstat write_stat;
ktime_t start_time;
ktime_t last_report_time;
};
struct nfs4_ff_layout_segment {
struct pnfs_layout_segment generic_hdr;
u64 stripe_unit;
u32 flags;
u32 mirror_array_cnt;
struct nfs4_ff_layout_mirror **mirror_array;
};
......
......@@ -324,7 +324,8 @@ static int ff_layout_update_mirror_cred(struct nfs4_ff_layout_mirror *mirror,
__func__, PTR_ERR(cred));
return PTR_ERR(cred);
} else {
mirror->cred = cred;
if (cmpxchg(&mirror->cred, NULL, cred))
put_rpccred(cred);
}
}
return 0;
......@@ -386,7 +387,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
/* matching smp_wmb() in _nfs4_pnfs_v3/4_ds_connect */
smp_rmb();
if (ds->ds_clp)
goto out;
goto out_update_creds;
flavor = nfs4_ff_layout_choose_authflavor(mirror);
......@@ -430,7 +431,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
}
}
}
out_update_creds:
if (ff_layout_update_mirror_cred(mirror, ds))
ds = NULL;
out:
......
......@@ -678,6 +678,8 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
if (!err) {
generic_fillattr(inode, stat);
stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode));
if (S_ISDIR(inode->i_mode))
stat->blksize = NFS_SERVER(inode)->dtsize;
}
out:
trace_nfs_getattr_exit(inode, err);
......@@ -2008,17 +2010,15 @@ static int __init init_nfs_fs(void)
if (err)
goto out1;
#ifdef CONFIG_PROC_FS
rpc_proc_register(&init_net, &nfs_rpcstat);
#endif
if ((err = register_nfs_fs()) != 0)
err = register_nfs_fs();
if (err)
goto out0;
return 0;
out0:
#ifdef CONFIG_PROC_FS
rpc_proc_unregister(&init_net, "nfs");
#endif
nfs_destroy_directcache();
out1:
nfs_destroy_writepagecache();
......@@ -2049,9 +2049,7 @@ static void __exit exit_nfs_fs(void)
nfs_destroy_nfspagecache();
nfs_fscache_unregister();
unregister_pernet_subsys(&nfs_net_ops);
#ifdef CONFIG_PROC_FS
rpc_proc_unregister(&init_net, "nfs");
#endif
unregister_nfs_fs();
nfs_fs_proc_exit();
nfsiod_stop();
......
......@@ -1342,7 +1342,7 @@ static void nfs3_xdr_enc_setacl3args(struct rpc_rqst *req,
if (args->npages != 0)
xdr_write_pages(xdr, args->pages, 0, args->len);
else
xdr_reserve_space(xdr, NFS_ACL_INLINE_BUFSIZE);
xdr_reserve_space(xdr, args->len);
error = nfsacl_encode(xdr->buf, base, args->inode,
(args->mask & NFS_ACL) ?
......
......@@ -5,11 +5,18 @@
#ifndef __LINUX_FS_NFS_NFS4_2_H
#define __LINUX_FS_NFS_NFS4_2_H
/*
* FIXME: four LAYOUTSTATS calls per compound at most! Do we need to support
* more? Need to consider not to pre-alloc too much for a compound.
*/
#define PNFS_LAYOUTSTATS_MAXDEV (4)
/* nfs4.2proc.c */
int nfs42_proc_allocate(struct file *, loff_t, loff_t);
int nfs42_proc_deallocate(struct file *, loff_t, loff_t);
loff_t nfs42_proc_llseek(struct file *, loff_t, int);
int nfs42_proc_layoutstats_generic(struct nfs_server *,
struct nfs42_layoutstat_data *);
/* nfs4.2xdr.h */
extern struct rpc_procinfo nfs4_2_procedures[];
......
......@@ -10,6 +10,11 @@
#include <linux/nfs_fs.h>
#include "nfs4_fs.h"
#include "nfs42.h"
#include "iostat.h"
#include "pnfs.h"
#include "internal.h"
#define NFSDBG_FACILITY NFSDBG_PNFS
static int nfs42_set_rw_stateid(nfs4_stateid *dst, struct file *file,
fmode_t fmode)
......@@ -165,3 +170,85 @@ loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence)
return vfs_setpos(filep, res.sr_offset, inode->i_sb->s_maxbytes);
}
static void
nfs42_layoutstat_prepare(struct rpc_task *task, void *calldata)
{
struct nfs42_layoutstat_data *data = calldata;
struct nfs_server *server = NFS_SERVER(data->args.inode);
nfs41_setup_sequence(nfs4_get_session(server), &data->args.seq_args,
&data->res.seq_res, task);
}
static void
nfs42_layoutstat_done(struct rpc_task *task, void *calldata)
{
struct nfs42_layoutstat_data *data = calldata;
if (!nfs4_sequence_done(task, &data->res.seq_res))
return;
switch (task->tk_status) {
case 0:
break;
case -ENOTSUPP:
case -EOPNOTSUPP:
NFS_SERVER(data->inode)->caps &= ~NFS_CAP_LAYOUTSTATS;
default:
dprintk("%s server returns %d\n", __func__, task->tk_status);
}
}
static void
nfs42_layoutstat_release(void *calldata)
{
struct nfs42_layoutstat_data *data = calldata;
struct nfs_server *nfss = NFS_SERVER(data->args.inode);
if (nfss->pnfs_curr_ld->cleanup_layoutstats)
nfss->pnfs_curr_ld->cleanup_layoutstats(data);
pnfs_put_layout_hdr(NFS_I(data->args.inode)->layout);
smp_mb__before_atomic();
clear_bit(NFS_INO_LAYOUTSTATS, &NFS_I(data->args.inode)->flags);
smp_mb__after_atomic();
nfs_iput_and_deactive(data->inode);
kfree(data->args.devinfo);
kfree(data);
}
static const struct rpc_call_ops nfs42_layoutstat_ops = {
.rpc_call_prepare = nfs42_layoutstat_prepare,
.rpc_call_done = nfs42_layoutstat_done,
.rpc_release = nfs42_layoutstat_release,
};
int nfs42_proc_layoutstats_generic(struct nfs_server *server,
struct nfs42_layoutstat_data *data)
{
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTSTATS],
.rpc_argp = &data->args,
.rpc_resp = &data->res,
};
struct rpc_task_setup task_setup = {
.rpc_client = server->client,
.rpc_message = &msg,
.callback_ops = &nfs42_layoutstat_ops,
.callback_data = data,
.flags = RPC_TASK_ASYNC,
};
struct rpc_task *task;
data->inode = nfs_igrab_and_active(data->args.inode);
if (!data->inode) {
nfs42_layoutstat_release(data);
return -EAGAIN;
}
nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0);
task = rpc_run_task(&task_setup);
if (IS_ERR(task))
return PTR_ERR(task);
return 0;
}
......@@ -4,6 +4,8 @@
#ifndef __LINUX_FS_NFS_NFS4_2XDR_H
#define __LINUX_FS_NFS_NFS4_2XDR_H
#include "nfs42.h"
#define encode_fallocate_maxsz (encode_stateid_maxsz + \
2 /* offset */ + \
2 /* length */)
......@@ -22,6 +24,16 @@
1 /* whence */ + \
2 /* offset */ + \
2 /* length */)
#define encode_io_info_maxsz 4
#define encode_layoutstats_maxsz (op_decode_hdr_maxsz + \
2 /* offset */ + \
2 /* length */ + \
encode_stateid_maxsz + \
encode_io_info_maxsz + \
encode_io_info_maxsz + \
1 /* opaque devaddr4 length */ + \
XDR_QUADLEN(PNFS_LAYOUTSTATS_MAXSIZE))
#define decode_layoutstats_maxsz (op_decode_hdr_maxsz)
#define NFS4_enc_allocate_sz (compound_encode_hdr_maxsz + \
encode_putfh_maxsz + \
......@@ -45,6 +57,14 @@
#define NFS4_dec_seek_sz (compound_decode_hdr_maxsz + \
decode_putfh_maxsz + \
decode_seek_maxsz)
#define NFS4_enc_layoutstats_sz (compound_encode_hdr_maxsz + \
encode_sequence_maxsz + \
encode_putfh_maxsz + \
PNFS_LAYOUTSTATS_MAXDEV * encode_layoutstats_maxsz)
#define NFS4_dec_layoutstats_sz (compound_decode_hdr_maxsz + \
decode_sequence_maxsz + \
decode_putfh_maxsz + \
PNFS_LAYOUTSTATS_MAXDEV * decode_layoutstats_maxsz)
static void encode_fallocate(struct xdr_stream *xdr,
......@@ -81,6 +101,33 @@ static void encode_seek(struct xdr_stream *xdr,
encode_uint32(xdr, args->sa_what);
}
static void encode_layoutstats(struct xdr_stream *xdr,
struct nfs42_layoutstat_args *args,
struct nfs42_layoutstat_devinfo *devinfo,
struct compound_hdr *hdr)
{
__be32 *p;
encode_op_hdr(xdr, OP_LAYOUTSTATS, decode_layoutstats_maxsz, hdr);
p = reserve_space(xdr, 8 + 8);
p = xdr_encode_hyper(p, devinfo->offset);
p = xdr_encode_hyper(p, devinfo->length);
encode_nfs4_stateid(xdr, &args->stateid);
p = reserve_space(xdr, 4*8 + NFS4_DEVICEID4_SIZE + 4);
p = xdr_encode_hyper(p, devinfo->read_count);
p = xdr_encode_hyper(p, devinfo->read_bytes);
p = xdr_encode_hyper(p, devinfo->write_count);
p = xdr_encode_hyper(p, devinfo->write_bytes);
p = xdr_encode_opaque_fixed(p, devinfo->dev_id.data,
NFS4_DEVICEID4_SIZE);
/* Encode layoutupdate4 */
*p++ = cpu_to_be32(devinfo->layout_type);
if (devinfo->layoutstats_encode != NULL)
devinfo->layoutstats_encode(xdr, args, devinfo);
else
encode_uint32(xdr, 0);
}
/*
* Encode ALLOCATE request
*/
......@@ -137,6 +184,28 @@ static void nfs4_xdr_enc_seek(struct rpc_rqst *req,
encode_nops(&hdr);
}
/*
* Encode LAYOUTSTATS request
*/
static void nfs4_xdr_enc_layoutstats(struct rpc_rqst *req,
struct xdr_stream *xdr,
struct nfs42_layoutstat_args *args)
{
int i;
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
encode_compound_hdr(xdr, req, &hdr);
encode_sequence(xdr, &args->seq_args, &hdr);
encode_putfh(xdr, args->fh, &hdr);
WARN_ON(args->num_dev > PNFS_LAYOUTSTATS_MAXDEV);
for (i = 0; i < args->num_dev; i++)
encode_layoutstats(xdr, args, &args->devinfo[i], &hdr);
encode_nops(&hdr);
}
static int decode_allocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res)
{
return decode_op_hdr(xdr, OP_ALLOCATE);
......@@ -169,6 +238,12 @@ static int decode_seek(struct xdr_stream *xdr, struct nfs42_seek_res *res)
return -EIO;
}
static int decode_layoutstats(struct xdr_stream *xdr,
struct nfs42_layoutstat_res *res)
{
return decode_op_hdr(xdr, OP_LAYOUTSTATS);
}
/*
* Decode ALLOCATE request
*/
......@@ -246,4 +321,35 @@ static int nfs4_xdr_dec_seek(struct rpc_rqst *rqstp,
out:
return status;
}
/*
* Decode LAYOUTSTATS request
*/
static int nfs4_xdr_dec_layoutstats(struct rpc_rqst *rqstp,
struct xdr_stream *xdr,
struct nfs42_layoutstat_res *res)
{
struct compound_hdr hdr;
int status, i;
status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status)
goto out;
status = decode_putfh(xdr);
if (status)
goto out;
WARN_ON(res->num_dev > PNFS_LAYOUTSTATS_MAXDEV);
for (i = 0; i < res->num_dev; i++) {
status = decode_layoutstats(xdr, res);
if (status)
goto out;
}
out:
res->rpc_status = status;
return status;
}
#endif /* __LINUX_FS_NFS_NFS4_2XDR_H */
......@@ -233,6 +233,7 @@ extern int nfs4_handle_exception(struct nfs_server *, int, struct nfs4_exception
extern int nfs4_call_sync(struct rpc_clnt *, struct nfs_server *,
struct rpc_message *, struct nfs4_sequence_args *,
struct nfs4_sequence_res *, int);
extern void nfs4_init_sequence(struct nfs4_sequence_args *, struct nfs4_sequence_res *, int);
extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *);
extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *);
extern int nfs4_proc_get_rootfh(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *, bool);
......
......@@ -676,7 +676,6 @@ int nfs41_walk_client_list(struct nfs_client *new,
break;
}
/* No matching nfs_client found. */
spin_unlock(&nn->nfs_client_lock);
dprintk("NFS: <-- %s status = %d\n", __func__, status);
nfs_put_client(prev);
......
......@@ -41,6 +41,10 @@ nfs4_file_open(struct inode *inode, struct file *filp)
dprintk("NFS: open file(%pd2)\n", dentry);
err = nfs_check_flags(openflags);
if (err)
return err;
if ((openflags & O_ACCMODE) == 3)
openflags--;
......
......@@ -35,13 +35,6 @@ int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh, bool auth_p
goto out;
}
if (fsinfo.fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) {
printk(KERN_ERR "nfs4_get_rootfh:"
" getroot obtained referral\n");
ret = -EREMOTE;
goto out;
}
memcpy(&server->fsid, &fsinfo.fattr->fsid, sizeof(server->fsid));
out:
nfs_free_fattr(fsinfo.fattr);
......
......@@ -494,12 +494,7 @@ nfs_idmap_delete(struct nfs_client *clp)
int nfs_idmap_init(void)
{
int ret;
ret = nfs_idmap_init_keyring();
if (ret != 0)
goto out;
out:
return ret;
return nfs_idmap_init_keyring();
}
void nfs_idmap_quit(void)
......
This diff is collapsed.
......@@ -309,7 +309,6 @@ int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
if (test_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state))
goto do_confirm;
nfs4_begin_drain_session(clp);
status = nfs4_proc_exchange_id(clp, cred);
if (status != 0)
goto out;
......@@ -1482,6 +1481,8 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs
spin_unlock(&state->state_lock);
}
nfs4_put_open_state(state);
clear_bit(NFS4CLNT_RECLAIM_NOGRACE,
&state->flags);
spin_lock(&sp->so_lock);
goto restart;
}
......@@ -1830,6 +1831,7 @@ static int nfs4_establish_lease(struct nfs_client *clp)
clp->cl_mvops->reboot_recovery_ops;
int status;
nfs4_begin_drain_session(clp);
cred = nfs4_get_clid_cred(clp);
if (cred == NULL)
return -ENOENT;
......
......@@ -139,7 +139,8 @@ static int nfs4_stat_to_errno(int);
#define encode_setclientid_maxsz \
(op_encode_hdr_maxsz + \
XDR_QUADLEN(NFS4_VERIFIER_SIZE) + \
XDR_QUADLEN(NFS4_SETCLIENTID_NAMELEN) + \
/* client name */ \
1 + XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \
1 /* sc_prog */ + \
1 + XDR_QUADLEN(RPCBIND_MAXNETIDLEN) + \
1 + XDR_QUADLEN(RPCBIND_MAXUADDRLEN) + \
......@@ -288,7 +289,8 @@ static int nfs4_stat_to_errno(int);
#define encode_exchange_id_maxsz (op_encode_hdr_maxsz + \
encode_verifier_maxsz + \
1 /* co_ownerid.len */ + \
XDR_QUADLEN(NFS4_EXCHANGE_ID_LEN) + \
/* eia_clientowner */ \
1 + XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \
1 /* flags */ + \
1 /* spa_how */ + \
/* max is SP4_MACH_CRED (for now) */ + \
......@@ -1667,13 +1669,14 @@ static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclie
encode_op_hdr(xdr, OP_SETCLIENTID, decode_setclientid_maxsz, hdr);
encode_nfs4_verifier(xdr, setclientid->sc_verifier);
encode_string(xdr, setclientid->sc_name_len, setclientid->sc_name);
encode_string(xdr, strlen(setclientid->sc_clnt->cl_owner_id),
setclientid->sc_clnt->cl_owner_id);
p = reserve_space(xdr, 4);
*p = cpu_to_be32(setclientid->sc_prog);
encode_string(xdr, setclientid->sc_netid_len, setclientid->sc_netid);
encode_string(xdr, setclientid->sc_uaddr_len, setclientid->sc_uaddr);
p = reserve_space(xdr, 4);
*p = cpu_to_be32(setclientid->sc_cb_ident);
*p = cpu_to_be32(setclientid->sc_clnt->cl_cb_ident);
}
static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs4_setclientid_res *arg, struct compound_hdr *hdr)
......@@ -1747,7 +1750,8 @@ static void encode_exchange_id(struct xdr_stream *xdr,
encode_op_hdr(xdr, OP_EXCHANGE_ID, decode_exchange_id_maxsz, hdr);
encode_nfs4_verifier(xdr, args->verifier);
encode_string(xdr, args->id_len, args->id);
encode_string(xdr, strlen(args->client->cl_owner_id),
args->client->cl_owner_id);
encode_uint32(xdr, args->flags);
encode_uint32(xdr, args->state_protect.how);
......@@ -7427,6 +7431,7 @@ struct rpc_procinfo nfs4_procedures[] = {
PROC(SEEK, enc_seek, dec_seek),
PROC(ALLOCATE, enc_allocate, dec_allocate),
PROC(DEALLOCATE, enc_deallocate, dec_deallocate),
PROC(LAYOUTSTATS, enc_layoutstats, dec_layoutstats),
#endif /* CONFIG_NFS_V4_2 */
};
......
......@@ -636,9 +636,8 @@ int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr,
hdr->rw_ops->rw_initiate(hdr, &msg, rpc_ops, &task_setup_data, how);
dprintk("NFS: %5u initiated pgio call "
dprintk("NFS: initiated pgio call "
"(req %s/%llu, %u bytes @ offset %llu)\n",
hdr->task.tk_pid,
hdr->inode->i_sb->s_id,
(unsigned long long)NFS_FILEID(hdr->inode),
hdr->args.count,
......@@ -690,8 +689,6 @@ static int nfs_pgio_error(struct nfs_pageio_descriptor *desc,
static void nfs_pgio_release(void *calldata)
{
struct nfs_pgio_header *hdr = calldata;
if (hdr->rw_ops->rw_release)
hdr->rw_ops->rw_release(hdr);
nfs_pgio_data_destroy(hdr);
hdr->completion_ops->completion(hdr);
}
......@@ -711,7 +708,9 @@ static void nfs_pageio_mirror_init(struct nfs_pgio_mirror *mirror,
* nfs_pageio_init - initialise a page io descriptor
* @desc: pointer to descriptor
* @inode: pointer to inode
* @doio: pointer to io function
* @pg_ops: pointer to pageio operations
* @compl_ops: pointer to pageio completion operations
* @rw_ops: pointer to nfs read/write operations
* @bsize: io block size
* @io_flags: extra parameters for the io function
*/
......@@ -1186,6 +1185,7 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
* nfs_pageio_complete_mirror - Complete I/O on the current mirror of an
* nfs_pageio_descriptor
* @desc: pointer to io descriptor
* @mirror_idx: pointer to mirror index
*/
static void nfs_pageio_complete_mirror(struct nfs_pageio_descriptor *desc,
u32 mirror_idx)
......
......@@ -35,6 +35,7 @@
#include "iostat.h"
#include "nfs4trace.h"
#include "delegation.h"
#include "nfs42.h"
#define NFSDBG_FACILITY NFSDBG_PNFS
#define PNFS_LAYOUTGET_RETRY_TIMEOUT (120*HZ)
......@@ -1821,6 +1822,7 @@ int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *hdr)
/* Resend all requests through the MDS */
nfs_pageio_init_write(&pgio, hdr->inode, FLUSH_STABLE, true,
hdr->completion_ops);
set_bit(NFS_CONTEXT_RESEND_WRITES, &hdr->args.context->flags);
return nfs_pageio_resend(&pgio, hdr);
}
EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds);
......@@ -1865,6 +1867,7 @@ pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
mirror->pg_recoalesce = 1;
}
nfs_pgio_data_destroy(hdr);
hdr->release(hdr);
}
static enum pnfs_try_status
......@@ -1979,6 +1982,7 @@ pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
mirror->pg_recoalesce = 1;
}
nfs_pgio_data_destroy(hdr);
hdr->release(hdr);
}
/*
......@@ -2247,3 +2251,63 @@ struct nfs4_threshold *pnfs_mdsthreshold_alloc(void)
}
return thp;
}
#if IS_ENABLED(CONFIG_NFS_V4_2)
int
pnfs_report_layoutstat(struct inode *inode)
{
struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
struct nfs_server *server = NFS_SERVER(inode);
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs42_layoutstat_data *data;
struct pnfs_layout_hdr *hdr;
int status = 0;
if (!pnfs_enabled_sb(server) || !ld->prepare_layoutstats)
goto out;
if (!nfs_server_capable(inode, NFS_CAP_LAYOUTSTATS))
goto out;
if (test_and_set_bit(NFS_INO_LAYOUTSTATS, &nfsi->flags))
goto out;
spin_lock(&inode->i_lock);
if (!NFS_I(inode)->layout) {
spin_unlock(&inode->i_lock);
goto out;
}
hdr = NFS_I(inode)->layout;
pnfs_get_layout_hdr(hdr);
spin_unlock(&inode->i_lock);
data = kzalloc(sizeof(*data), GFP_KERNEL);
if (!data) {
status = -ENOMEM;
goto out_put;
}
data->args.fh = NFS_FH(inode);
data->args.inode = inode;
nfs4_stateid_copy(&data->args.stateid, &hdr->plh_stateid);
status = ld->prepare_layoutstats(&data->args);
if (status)
goto out_free;
status = nfs42_proc_layoutstats_generic(NFS_SERVER(inode), data);
out:
dprintk("%s returns %d\n", __func__, status);
return status;
out_free:
kfree(data);
out_put:
pnfs_put_layout_hdr(hdr);
smp_mb__before_atomic();
clear_bit(NFS_INO_LAYOUTSTATS, &nfsi->flags);
smp_mb__after_atomic();
goto out;
}
EXPORT_SYMBOL_GPL(pnfs_report_layoutstat);
#endif
......@@ -178,6 +178,8 @@ struct pnfs_layoutdriver_type {
void (*encode_layoutcommit) (struct pnfs_layout_hdr *lo,
struct xdr_stream *xdr,
const struct nfs4_layoutcommit_args *args);
int (*prepare_layoutstats) (struct nfs42_layoutstat_args *args);
void (*cleanup_layoutstats) (struct nfs42_layoutstat_data *data);
};
struct pnfs_layout_hdr {
......@@ -290,7 +292,6 @@ int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *);
struct nfs4_threshold *pnfs_mdsthreshold_alloc(void);
void pnfs_error_mark_layout_for_return(struct inode *inode,
struct pnfs_layout_segment *lseg);
/* nfs4_deviceid_flags */
enum {
NFS_DEVICEID_INVALID = 0, /* set when MDS clientid recalled */
......@@ -689,4 +690,14 @@ static inline void nfs4_pnfs_v3_ds_connect_unload(void)
#endif /* CONFIG_NFS_V4_1 */
#if IS_ENABLED(CONFIG_NFS_V4_2)
int pnfs_report_layoutstat(struct inode *inode);
#else
static inline int
pnfs_report_layoutstat(struct inode *inode)
{
return 0;
}
#endif
#endif /* FS_NFS_PNFS_H */
......@@ -1290,6 +1290,7 @@ static void nfs_initiate_write(struct nfs_pgio_header *hdr,
static void nfs_redirty_request(struct nfs_page *req)
{
nfs_mark_request_dirty(req);
set_bit(NFS_CONTEXT_RESEND_WRITES, &req->wb_context->flags);
nfs_unlock_request(req);
nfs_end_page_writeback(req);
nfs_release_request(req);
......@@ -1348,11 +1349,6 @@ void nfs_commit_prepare(struct rpc_task *task, void *calldata)
NFS_PROTO(data->inode)->commit_rpc_prepare(task, data);
}
static void nfs_writeback_release_common(struct nfs_pgio_header *hdr)
{
/* do nothing! */
}
/*
* Special version of should_remove_suid() that ignores capabilities.
*/
......@@ -1556,7 +1552,7 @@ int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data,
/* Set up the initial task struct. */
nfs_ops->commit_setup(data, &msg);
dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
dprintk("NFS: initiated commit call\n");
nfs4_state_protect(NFS_SERVER(data->inode)->nfs_client,
NFS_SP4_MACH_CRED_COMMIT, &task_setup_data.rpc_client, &msg);
......@@ -2013,7 +2009,6 @@ static const struct nfs_rw_ops nfs_rw_write_ops = {
.rw_mode = FMODE_WRITE,
.rw_alloc_header = nfs_writehdr_alloc,
.rw_free_header = nfs_writehdr_free,
.rw_release = nfs_writeback_release_common,
.rw_done = nfs_writeback_done,
.rw_result = nfs_writeback_result,
.rw_initiate = nfs_initiate_write,
......
......@@ -500,6 +500,7 @@ enum {
NFSPROC4_CLNT_SEEK,
NFSPROC4_CLNT_ALLOCATE,
NFSPROC4_CLNT_DEALLOCATE,
NFSPROC4_CLNT_LAYOUTSTATS,
};
/* nfs41 types */
......
......@@ -219,6 +219,7 @@ struct nfs_inode {
#define NFS_INO_COMMIT (7) /* inode is committing unstable writes */
#define NFS_INO_LAYOUTCOMMIT (9) /* layoutcommit required */
#define NFS_INO_LAYOUTCOMMITTING (10) /* layoutcommit inflight */
#define NFS_INO_LAYOUTSTATS (11) /* layoutstats inflight */
static inline struct nfs_inode *NFS_I(const struct inode *inode)
{
......
......@@ -237,5 +237,6 @@ struct nfs_server {
#define NFS_CAP_SEEK (1U << 19)
#define NFS_CAP_ALLOCATE (1U << 20)
#define NFS_CAP_DEALLOCATE (1U << 21)
#define NFS_CAP_LAYOUTSTATS (1U << 22)
#endif
......@@ -67,7 +67,6 @@ struct nfs_rw_ops {
const fmode_t rw_mode;
struct nfs_pgio_header *(*rw_alloc_header)(void);
void (*rw_free_header)(struct nfs_pgio_header *);
void (*rw_release)(struct nfs_pgio_header *);
int (*rw_done)(struct rpc_task *, struct nfs_pgio_header *,
struct inode *);
void (*rw_result)(struct rpc_task *, struct nfs_pgio_header *);
......
......@@ -316,6 +316,49 @@ struct nfs4_layoutreturn {
int rpc_status;
};
#define PNFS_LAYOUTSTATS_MAXSIZE 256
struct nfs42_layoutstat_args;
struct nfs42_layoutstat_devinfo;
typedef void (*layoutstats_encode_t)(struct xdr_stream *,
struct nfs42_layoutstat_args *,
struct nfs42_layoutstat_devinfo *);
/* Per file per deviceid layoutstats */
struct nfs42_layoutstat_devinfo {
struct nfs4_deviceid dev_id;
__u64 offset;
__u64 length;
__u64 read_count;
__u64 read_bytes;
__u64 write_count;
__u64 write_bytes;
__u32 layout_type;
layoutstats_encode_t layoutstats_encode;
void *layout_private;
};
struct nfs42_layoutstat_args {
struct nfs4_sequence_args seq_args;
struct nfs_fh *fh;
struct inode *inode;
nfs4_stateid stateid;
int num_dev;
struct nfs42_layoutstat_devinfo *devinfo;
};
struct nfs42_layoutstat_res {
struct nfs4_sequence_res seq_res;
int num_dev;
int rpc_status;
};
struct nfs42_layoutstat_data {
struct inode *inode;
struct nfs42_layoutstat_args args;
struct nfs42_layoutstat_res res;
};
struct stateowner_id {
__u64 create_time;
__u32 uniquifier;
......@@ -984,17 +1027,14 @@ struct nfs4_readlink_res {
struct nfs4_sequence_res seq_res;
};
#define NFS4_SETCLIENTID_NAMELEN (127)
struct nfs4_setclientid {
const nfs4_verifier * sc_verifier;
unsigned int sc_name_len;
char sc_name[NFS4_SETCLIENTID_NAMELEN + 1];
u32 sc_prog;
unsigned int sc_netid_len;
char sc_netid[RPCBIND_MAXNETIDLEN + 1];
unsigned int sc_uaddr_len;
char sc_uaddr[RPCBIND_MAXUADDRLEN + 1];
u32 sc_cb_ident;
struct nfs_client *sc_clnt;
struct rpc_cred *sc_cred;
};
......@@ -1142,12 +1182,9 @@ struct nfs41_state_protection {
struct nfs4_op_map allow;
};
#define NFS4_EXCHANGE_ID_LEN (48)
struct nfs41_exchange_id_args {
struct nfs_client *client;
nfs4_verifier *verifier;
unsigned int id_len;
char id[NFS4_EXCHANGE_ID_LEN];
u32 flags;
struct nfs41_state_protection state_protect;
};
......
......@@ -37,7 +37,6 @@ void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied);
void xprt_free_bc_request(struct rpc_rqst *req);
int xprt_setup_backchannel(struct rpc_xprt *, unsigned int min_reqs);
void xprt_destroy_backchannel(struct rpc_xprt *, unsigned int max_reqs);
int bc_send(struct rpc_rqst *req);
/*
* Determine if a shared backchannel is in use
......
......@@ -56,6 +56,7 @@ struct rpc_clnt {
struct rpc_rtt * cl_rtt; /* RTO estimator data */
const struct rpc_timeout *cl_timeout; /* Timeout strategy */
atomic_t cl_swapper; /* swapfile count */
int cl_nodelen; /* nodename length */
char cl_nodename[UNX_MAXNODENAME+1];
struct rpc_pipe_dir_head cl_pipedir_objects;
......
......@@ -205,8 +205,7 @@ struct rpc_wait_queue {
*/
struct rpc_task *rpc_new_task(const struct rpc_task_setup *);
struct rpc_task *rpc_run_task(const struct rpc_task_setup *);
struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req,
const struct rpc_call_ops *ops);
struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req);
void rpc_put_task(struct rpc_task *);
void rpc_put_task_async(struct rpc_task *);
void rpc_exit_task(struct rpc_task *);
......@@ -269,4 +268,20 @@ static inline void rpc_assign_waitqueue_name(struct rpc_wait_queue *q,
}
#endif
#if IS_ENABLED(CONFIG_SUNRPC_SWAP)
int rpc_clnt_swap_activate(struct rpc_clnt *clnt);
void rpc_clnt_swap_deactivate(struct rpc_clnt *clnt);
#else
static inline int
rpc_clnt_swap_activate(struct rpc_clnt *clnt)
{
return -EINVAL;
}
static inline void
rpc_clnt_swap_deactivate(struct rpc_clnt *clnt)
{
}
#endif /* CONFIG_SUNRPC_SWAP */
#endif /* _LINUX_SUNRPC_SCHED_H_ */
......@@ -133,6 +133,9 @@ struct rpc_xprt_ops {
void (*close)(struct rpc_xprt *xprt);
void (*destroy)(struct rpc_xprt *xprt);
void (*print_stats)(struct rpc_xprt *xprt, struct seq_file *seq);
int (*enable_swap)(struct rpc_xprt *xprt);
void (*disable_swap)(struct rpc_xprt *xprt);
void (*inject_disconnect)(struct rpc_xprt *xprt);
};
/*
......@@ -180,7 +183,7 @@ struct rpc_xprt {
atomic_t num_reqs; /* total slots */
unsigned long state; /* transport state */
unsigned char resvport : 1; /* use a reserved port */
unsigned int swapper; /* we're swapping over this
atomic_t swapper; /* we're swapping over this
transport */
unsigned int bind_index; /* bind function index */
......@@ -212,7 +215,8 @@ struct rpc_xprt {
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
struct svc_serv *bc_serv; /* The RPC service which will */
/* process the callback */
unsigned int bc_alloc_count; /* Total number of preallocs */
int bc_alloc_count; /* Total number of preallocs */
atomic_t bc_free_slots;
spinlock_t bc_pa_lock; /* Protects the preallocated
* items */
struct list_head bc_pa_list; /* List of preallocated
......@@ -241,6 +245,7 @@ struct rpc_xprt {
const char *address_strings[RPC_DISPLAY_MAX];
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
struct dentry *debugfs; /* debugfs directory */
atomic_t inject_disconnect;
#endif
};
......@@ -327,6 +332,18 @@ static inline __be32 *xprt_skip_transport_header(struct rpc_xprt *xprt, __be32 *
return p + xprt->tsh_size;
}
static inline int
xprt_enable_swap(struct rpc_xprt *xprt)
{
return xprt->ops->enable_swap(xprt);
}
static inline void
xprt_disable_swap(struct rpc_xprt *xprt)
{
xprt->ops->disable_swap(xprt);
}
/*
* Transport switch helper functions
*/
......@@ -345,7 +362,6 @@ void xprt_release_rqst_cong(struct rpc_task *task);
void xprt_disconnect_done(struct rpc_xprt *xprt);
void xprt_force_disconnect(struct rpc_xprt *xprt);
void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie);
int xs_swapper(struct rpc_xprt *xprt, int enable);
bool xprt_lock_connect(struct rpc_xprt *, struct rpc_task *, void *);
void xprt_unlock_connect(struct rpc_xprt *, void *);
......@@ -431,6 +447,23 @@ static inline int xprt_test_and_set_binding(struct rpc_xprt *xprt)
return test_and_set_bit(XPRT_BINDING, &xprt->state);
}
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
extern unsigned int rpc_inject_disconnect;
static inline void xprt_inject_disconnect(struct rpc_xprt *xprt)
{
if (!rpc_inject_disconnect)
return;
if (atomic_dec_return(&xprt->inject_disconnect))
return;
atomic_set(&xprt->inject_disconnect, rpc_inject_disconnect);
xprt->ops->inject_disconnect(xprt);
}
#else
static inline void xprt_inject_disconnect(struct rpc_xprt *xprt)
{
}
#endif
#endif /* __KERNEL__*/
#endif /* _LINUX_SUNRPC_XPRT_H */
......@@ -56,7 +56,8 @@
#define RPCRDMA_INLINE_PAD_THRESH (512)/* payload threshold to pad (bytes) */
/* memory registration strategies */
/* Memory registration strategies, by number.
* This is part of a kernel / user space API. Do not remove. */
enum rpcrdma_memreg {
RPCRDMA_BOUNCEBUFFERS = 0,
RPCRDMA_REGISTER,
......
......@@ -14,6 +14,6 @@ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \
sunrpc_syms.o cache.o rpc_pipe.o \
svc_xprt.o
sunrpc-$(CONFIG_SUNRPC_DEBUG) += debugfs.o
sunrpc-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel_rqst.o bc_svc.o
sunrpc-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel_rqst.o
sunrpc-$(CONFIG_PROC_FS) += stats.o
sunrpc-$(CONFIG_SYSCTL) += sysctl.o
......@@ -37,16 +37,18 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
static inline int xprt_need_to_requeue(struct rpc_xprt *xprt)
{
return xprt->bc_alloc_count > 0;
return xprt->bc_alloc_count < atomic_read(&xprt->bc_free_slots);
}
static inline void xprt_inc_alloc_count(struct rpc_xprt *xprt, unsigned int n)
{
atomic_add(n, &xprt->bc_free_slots);
xprt->bc_alloc_count += n;
}
static inline int xprt_dec_alloc_count(struct rpc_xprt *xprt, unsigned int n)
{
atomic_sub(n, &xprt->bc_free_slots);
return xprt->bc_alloc_count -= n;
}
......@@ -60,13 +62,62 @@ static void xprt_free_allocation(struct rpc_rqst *req)
dprintk("RPC: free allocations for req= %p\n", req);
WARN_ON_ONCE(test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state));
xbufp = &req->rq_private_buf;
xbufp = &req->rq_rcv_buf;
free_page((unsigned long)xbufp->head[0].iov_base);
xbufp = &req->rq_snd_buf;
free_page((unsigned long)xbufp->head[0].iov_base);
kfree(req);
}
static int xprt_alloc_xdr_buf(struct xdr_buf *buf, gfp_t gfp_flags)
{
struct page *page;
/* Preallocate one XDR receive buffer */
page = alloc_page(gfp_flags);
if (page == NULL)
return -ENOMEM;
buf->head[0].iov_base = page_address(page);
buf->head[0].iov_len = PAGE_SIZE;
buf->tail[0].iov_base = NULL;
buf->tail[0].iov_len = 0;
buf->page_len = 0;
buf->len = 0;
buf->buflen = PAGE_SIZE;
return 0;
}
static
struct rpc_rqst *xprt_alloc_bc_req(struct rpc_xprt *xprt, gfp_t gfp_flags)
{
struct rpc_rqst *req;
/* Pre-allocate one backchannel rpc_rqst */
req = kzalloc(sizeof(*req), gfp_flags);
if (req == NULL)
return NULL;
req->rq_xprt = xprt;
INIT_LIST_HEAD(&req->rq_list);
INIT_LIST_HEAD(&req->rq_bc_list);
/* Preallocate one XDR receive buffer */
if (xprt_alloc_xdr_buf(&req->rq_rcv_buf, gfp_flags) < 0) {
printk(KERN_ERR "Failed to create bc receive xbuf\n");
goto out_free;
}
req->rq_rcv_buf.len = PAGE_SIZE;
/* Preallocate one XDR send buffer */
if (xprt_alloc_xdr_buf(&req->rq_snd_buf, gfp_flags) < 0) {
printk(KERN_ERR "Failed to create bc snd xbuf\n");
goto out_free;
}
return req;
out_free:
xprt_free_allocation(req);
return NULL;
}
/*
* Preallocate up to min_reqs structures and related buffers for use
* by the backchannel. This function can be called multiple times
......@@ -87,9 +138,7 @@ static void xprt_free_allocation(struct rpc_rqst *req)
*/
int xprt_setup_backchannel(struct rpc_xprt *xprt, unsigned int min_reqs)
{
struct page *page_rcv = NULL, *page_snd = NULL;
struct xdr_buf *xbufp = NULL;
struct rpc_rqst *req, *tmp;
struct rpc_rqst *req;
struct list_head tmp_list;
int i;
......@@ -106,7 +155,7 @@ int xprt_setup_backchannel(struct rpc_xprt *xprt, unsigned int min_reqs)
INIT_LIST_HEAD(&tmp_list);
for (i = 0; i < min_reqs; i++) {
/* Pre-allocate one backchannel rpc_rqst */
req = kzalloc(sizeof(struct rpc_rqst), GFP_KERNEL);
req = xprt_alloc_bc_req(xprt, GFP_KERNEL);
if (req == NULL) {
printk(KERN_ERR "Failed to create bc rpc_rqst\n");
goto out_free;
......@@ -115,41 +164,6 @@ int xprt_setup_backchannel(struct rpc_xprt *xprt, unsigned int min_reqs)
/* Add the allocated buffer to the tmp list */
dprintk("RPC: adding req= %p\n", req);
list_add(&req->rq_bc_pa_list, &tmp_list);
req->rq_xprt = xprt;
INIT_LIST_HEAD(&req->rq_list);
INIT_LIST_HEAD(&req->rq_bc_list);
/* Preallocate one XDR receive buffer */
page_rcv = alloc_page(GFP_KERNEL);
if (page_rcv == NULL) {
printk(KERN_ERR "Failed to create bc receive xbuf\n");
goto out_free;
}
xbufp = &req->rq_rcv_buf;
xbufp->head[0].iov_base = page_address(page_rcv);
xbufp->head[0].iov_len = PAGE_SIZE;
xbufp->tail[0].iov_base = NULL;
xbufp->tail[0].iov_len = 0;
xbufp->page_len = 0;
xbufp->len = PAGE_SIZE;
xbufp->buflen = PAGE_SIZE;
/* Preallocate one XDR send buffer */
page_snd = alloc_page(GFP_KERNEL);
if (page_snd == NULL) {
printk(KERN_ERR "Failed to create bc snd xbuf\n");
goto out_free;
}
xbufp = &req->rq_snd_buf;
xbufp->head[0].iov_base = page_address(page_snd);
xbufp->head[0].iov_len = 0;
xbufp->tail[0].iov_base = NULL;
xbufp->tail[0].iov_len = 0;
xbufp->page_len = 0;
xbufp->len = 0;
xbufp->buflen = PAGE_SIZE;
}
/*
......@@ -167,7 +181,10 @@ int xprt_setup_backchannel(struct rpc_xprt *xprt, unsigned int min_reqs)
/*
* Memory allocation failed, free the temporary list
*/
list_for_each_entry_safe(req, tmp, &tmp_list, rq_bc_pa_list) {
while (!list_empty(&tmp_list)) {
req = list_first_entry(&tmp_list,
struct rpc_rqst,
rq_bc_pa_list);
list_del(&req->rq_bc_pa_list);
xprt_free_allocation(req);
}
......@@ -217,9 +234,15 @@ static struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt, __be32 xid)
struct rpc_rqst *req = NULL;
dprintk("RPC: allocate a backchannel request\n");
if (list_empty(&xprt->bc_pa_list))
if (atomic_read(&xprt->bc_free_slots) <= 0)
goto not_found;
if (list_empty(&xprt->bc_pa_list)) {
req = xprt_alloc_bc_req(xprt, GFP_ATOMIC);
if (!req)
goto not_found;
/* Note: this 'free' request adds it to xprt->bc_pa_list */
xprt_free_bc_request(req);
}
req = list_first_entry(&xprt->bc_pa_list, struct rpc_rqst,
rq_bc_pa_list);
req->rq_reply_bytes_recvd = 0;
......@@ -245,11 +268,21 @@ void xprt_free_bc_request(struct rpc_rqst *req)
req->rq_connect_cookie = xprt->connect_cookie - 1;
smp_mb__before_atomic();
WARN_ON_ONCE(!test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state));
clear_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state);
smp_mb__after_atomic();
if (!xprt_need_to_requeue(xprt)) {
/*
* Return it to the list of preallocations so that it
* may be reused by a new callback request.
*/
spin_lock_bh(&xprt->bc_pa_lock);
if (xprt_need_to_requeue(xprt)) {
list_add_tail(&req->rq_bc_pa_list, &xprt->bc_pa_list);
xprt->bc_alloc_count++;
req = NULL;
}
spin_unlock_bh(&xprt->bc_pa_lock);
if (req != NULL) {
/*
* The last remaining session was destroyed while this
* entry was in use. Free the entry and don't attempt
......@@ -260,14 +293,6 @@ void xprt_free_bc_request(struct rpc_rqst *req)
xprt_free_allocation(req);
return;
}
/*
* Return it to the list of preallocations so that it
* may be reused by a new callback request.
*/
spin_lock_bh(&xprt->bc_pa_lock);
list_add_tail(&req->rq_bc_pa_list, &xprt->bc_pa_list);
spin_unlock_bh(&xprt->bc_pa_lock);
}
/*
......@@ -311,6 +336,7 @@ void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied)
spin_lock(&xprt->bc_pa_lock);
list_del(&req->rq_bc_pa_list);
xprt->bc_alloc_count--;
spin_unlock(&xprt->bc_pa_lock);
req->rq_private_buf.len = copied;
......
/******************************************************************************
(c) 2007 Network Appliance, Inc. All Rights Reserved.
(c) 2009 NetApp. All Rights Reserved.
NetApp provides this source code under the GPL v2 License.
The GPL v2 license is available at
http://opensource.org/licenses/gpl-license.php.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
******************************************************************************/
/*
* The NFSv4.1 callback service helper routines.
* They implement the transport level processing required to send the
* reply over an existing open connection previously established by the client.
*/
#include <linux/module.h>
#include <linux/sunrpc/xprt.h>
#include <linux/sunrpc/sched.h>
#include <linux/sunrpc/bc_xprt.h>
#define RPCDBG_FACILITY RPCDBG_SVCDSP
/* Empty callback ops */
static const struct rpc_call_ops nfs41_callback_ops = {
};
/*
* Send the callback reply
*/
int bc_send(struct rpc_rqst *req)
{
struct rpc_task *task;
int ret;
dprintk("RPC: bc_send req= %p\n", req);
task = rpc_run_bc_task(req, &nfs41_callback_ops);
if (IS_ERR(task))
ret = PTR_ERR(task);
else {
WARN_ON_ONCE(atomic_read(&task->tk_count) != 1);
ret = task->tk_status;
rpc_put_task(task);
}
dprintk("RPC: bc_send ret= %d\n", ret);
return ret;
}
......@@ -891,15 +891,8 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt)
task->tk_flags |= RPC_TASK_SOFT;
if (clnt->cl_noretranstimeo)
task->tk_flags |= RPC_TASK_NO_RETRANS_TIMEOUT;
if (sk_memalloc_socks()) {
struct rpc_xprt *xprt;
rcu_read_lock();
xprt = rcu_dereference(clnt->cl_xprt);
if (xprt->swapper)
task->tk_flags |= RPC_TASK_SWAPPER;
rcu_read_unlock();
}
if (atomic_read(&clnt->cl_swapper))
task->tk_flags |= RPC_TASK_SWAPPER;
/* Add to the client's list of all tasks */
spin_lock(&clnt->cl_lock);
list_add_tail(&task->tk_task, &clnt->cl_tasks);
......@@ -1031,15 +1024,14 @@ EXPORT_SYMBOL_GPL(rpc_call_async);
* rpc_run_bc_task - Allocate a new RPC task for backchannel use, then run
* rpc_execute against it
* @req: RPC request
* @tk_ops: RPC call ops
*/
struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req,
const struct rpc_call_ops *tk_ops)
struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req)
{
struct rpc_task *task;
struct xdr_buf *xbufp = &req->rq_snd_buf;
struct rpc_task_setup task_setup_data = {
.callback_ops = tk_ops,
.callback_ops = &rpc_default_ops,
.flags = RPC_TASK_SOFTCONN,
};
dprintk("RPC: rpc_run_bc_task req= %p\n", req);
......@@ -1614,6 +1606,7 @@ call_allocate(struct rpc_task *task)
req->rq_callsize + req->rq_rcvsize);
if (req->rq_buffer != NULL)
return;
xprt_inject_disconnect(xprt);
dprintk("RPC: %5u rpc_buffer allocation failed\n", task->tk_pid);
......@@ -1951,33 +1944,36 @@ call_bc_transmit(struct rpc_task *task)
{
struct rpc_rqst *req = task->tk_rqstp;
if (!xprt_prepare_transmit(task)) {
/*
* Could not reserve the transport. Try again after the
* transport is released.
*/
task->tk_status = 0;
task->tk_action = call_bc_transmit;
return;
}
if (!xprt_prepare_transmit(task))
goto out_retry;
task->tk_action = rpc_exit_task;
if (task->tk_status < 0) {
printk(KERN_NOTICE "RPC: Could not send backchannel reply "
"error: %d\n", task->tk_status);
return;
goto out_done;
}
if (req->rq_connect_cookie != req->rq_xprt->connect_cookie)
req->rq_bytes_sent = 0;
xprt_transmit(task);
if (task->tk_status == -EAGAIN)
goto out_nospace;
xprt_end_transmit(task);
dprint_status(task);
switch (task->tk_status) {
case 0:
/* Success */
break;
case -EHOSTDOWN:
case -EHOSTUNREACH:
case -ENETUNREACH:
case -ECONNRESET:
case -ECONNREFUSED:
case -EADDRINUSE:
case -ENOTCONN:
case -EPIPE:
break;
case -ETIMEDOUT:
/*
* Problem reaching the server. Disconnect and let the
......@@ -2002,6 +1998,13 @@ call_bc_transmit(struct rpc_task *task)
break;
}
rpc_wake_up_queued_task(&req->rq_xprt->pending, task);
out_done:
task->tk_action = rpc_exit_task;
return;
out_nospace:
req->rq_connect_cookie = req->rq_xprt->connect_cookie;
out_retry:
task->tk_status = 0;
}
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
......@@ -2476,3 +2479,59 @@ void rpc_show_tasks(struct net *net)
spin_unlock(&sn->rpc_client_lock);
}
#endif
#if IS_ENABLED(CONFIG_SUNRPC_SWAP)
int
rpc_clnt_swap_activate(struct rpc_clnt *clnt)
{
int ret = 0;
struct rpc_xprt *xprt;
if (atomic_inc_return(&clnt->cl_swapper) == 1) {
retry:
rcu_read_lock();
xprt = xprt_get(rcu_dereference(clnt->cl_xprt));
rcu_read_unlock();
if (!xprt) {
/*
* If we didn't get a reference, then we likely are
* racing with a migration event. Wait for a grace
* period and try again.
*/
synchronize_rcu();
goto retry;
}
ret = xprt_enable_swap(xprt);
xprt_put(xprt);
}
return ret;
}
EXPORT_SYMBOL_GPL(rpc_clnt_swap_activate);
void
rpc_clnt_swap_deactivate(struct rpc_clnt *clnt)
{
struct rpc_xprt *xprt;
if (atomic_dec_if_positive(&clnt->cl_swapper) == 0) {
retry:
rcu_read_lock();
xprt = xprt_get(rcu_dereference(clnt->cl_xprt));
rcu_read_unlock();
if (!xprt) {
/*
* If we didn't get a reference, then we likely are
* racing with a migration event. Wait for a grace
* period and try again.
*/
synchronize_rcu();
goto retry;
}
xprt_disable_swap(xprt);
xprt_put(xprt);
}
}
EXPORT_SYMBOL_GPL(rpc_clnt_swap_deactivate);
#endif /* CONFIG_SUNRPC_SWAP */
......@@ -10,9 +10,12 @@
#include "netns.h"
static struct dentry *topdir;
static struct dentry *rpc_fault_dir;
static struct dentry *rpc_clnt_dir;
static struct dentry *rpc_xprt_dir;
unsigned int rpc_inject_disconnect;
struct rpc_clnt_iter {
struct rpc_clnt *clnt;
loff_t pos;
......@@ -257,6 +260,8 @@ rpc_xprt_debugfs_register(struct rpc_xprt *xprt)
debugfs_remove_recursive(xprt->debugfs);
xprt->debugfs = NULL;
}
atomic_set(&xprt->inject_disconnect, rpc_inject_disconnect);
}
void
......@@ -266,11 +271,79 @@ rpc_xprt_debugfs_unregister(struct rpc_xprt *xprt)
xprt->debugfs = NULL;
}
static int
fault_open(struct inode *inode, struct file *filp)
{
filp->private_data = kmalloc(128, GFP_KERNEL);
if (!filp->private_data)
return -ENOMEM;
return 0;
}
static int
fault_release(struct inode *inode, struct file *filp)
{
kfree(filp->private_data);
return 0;
}
static ssize_t
fault_disconnect_read(struct file *filp, char __user *user_buf,
size_t len, loff_t *offset)
{
char *buffer = (char *)filp->private_data;
size_t size;
size = sprintf(buffer, "%u\n", rpc_inject_disconnect);
return simple_read_from_buffer(user_buf, len, offset, buffer, size);
}
static ssize_t
fault_disconnect_write(struct file *filp, const char __user *user_buf,
size_t len, loff_t *offset)
{
char buffer[16];
if (len >= sizeof(buffer))
len = sizeof(buffer) - 1;
if (copy_from_user(buffer, user_buf, len))
return -EFAULT;
buffer[len] = '\0';
if (kstrtouint(buffer, 10, &rpc_inject_disconnect))
return -EINVAL;
return len;
}
static const struct file_operations fault_disconnect_fops = {
.owner = THIS_MODULE,
.open = fault_open,
.read = fault_disconnect_read,
.write = fault_disconnect_write,
.release = fault_release,
};
static struct dentry *
inject_fault_dir(struct dentry *topdir)
{
struct dentry *faultdir;
faultdir = debugfs_create_dir("inject_fault", topdir);
if (!faultdir)
return NULL;
if (!debugfs_create_file("disconnect", S_IFREG | S_IRUSR, faultdir,
NULL, &fault_disconnect_fops))
return NULL;
return faultdir;
}
void __exit
sunrpc_debugfs_exit(void)
{
debugfs_remove_recursive(topdir);
topdir = NULL;
rpc_fault_dir = NULL;
rpc_clnt_dir = NULL;
rpc_xprt_dir = NULL;
}
......@@ -282,6 +355,10 @@ sunrpc_debugfs_init(void)
if (!topdir)
return;
rpc_fault_dir = inject_fault_dir(topdir);
if (!rpc_fault_dir)
goto out_remove;
rpc_clnt_dir = debugfs_create_dir("rpc_clnt", topdir);
if (!rpc_clnt_dir)
goto out_remove;
......@@ -294,5 +371,6 @@ sunrpc_debugfs_init(void)
out_remove:
debugfs_remove_recursive(topdir);
topdir = NULL;
rpc_fault_dir = NULL;
rpc_clnt_dir = NULL;
}
......@@ -1350,6 +1350,11 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
{
struct kvec *argv = &rqstp->rq_arg.head[0];
struct kvec *resv = &rqstp->rq_res.head[0];
struct rpc_task *task;
int proc_error;
int error;
dprintk("svc: %s(%p)\n", __func__, req);
/* Build the svc_rqst used by the common processing routine */
rqstp->rq_xprt = serv->sv_bc_xprt;
......@@ -1372,21 +1377,36 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
/*
* Skip the next two words because they've already been
* processed in the trasport
* processed in the transport
*/
svc_getu32(argv); /* XID */
svc_getnl(argv); /* CALLDIR */
/* Returns 1 for send, 0 for drop */
if (svc_process_common(rqstp, argv, resv)) {
memcpy(&req->rq_snd_buf, &rqstp->rq_res,
sizeof(req->rq_snd_buf));
return bc_send(req);
} else {
/* drop request */
/* Parse and execute the bc call */
proc_error = svc_process_common(rqstp, argv, resv);
atomic_inc(&req->rq_xprt->bc_free_slots);
if (!proc_error) {
/* Processing error: drop the request */
xprt_free_bc_request(req);
return 0;
}
/* Finally, send the reply synchronously */
memcpy(&req->rq_snd_buf, &rqstp->rq_res, sizeof(req->rq_snd_buf));
task = rpc_run_bc_task(req);
if (IS_ERR(task)) {
error = PTR_ERR(task);
goto out;
}
WARN_ON_ONCE(atomic_read(&task->tk_count) != 1);
error = task->tk_status;
rpc_put_task(task);
out:
dprintk("svc: %s(), error=%d\n", __func__, error);
return error;
}
EXPORT_SYMBOL_GPL(bc_svc_process);
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
......
......@@ -68,6 +68,7 @@ static void xprt_init(struct rpc_xprt *xprt, struct net *net);
static void xprt_request_init(struct rpc_task *, struct rpc_xprt *);
static void xprt_connect_status(struct rpc_task *task);
static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *);
static void __xprt_put_cong(struct rpc_xprt *, struct rpc_rqst *);
static void xprt_destroy(struct rpc_xprt *xprt);
static DEFINE_SPINLOCK(xprt_list_lock);
......@@ -250,6 +251,8 @@ int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task)
}
xprt_clear_locked(xprt);
out_sleep:
if (req)
__xprt_put_cong(xprt, req);
dprintk("RPC: %5u failed to lock transport %p\n", task->tk_pid, xprt);
task->tk_timeout = 0;
task->tk_status = -EAGAIN;
......@@ -608,8 +611,8 @@ static void xprt_autoclose(struct work_struct *work)
struct rpc_xprt *xprt =
container_of(work, struct rpc_xprt, task_cleanup);
xprt->ops->close(xprt);
clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
xprt->ops->close(xprt);
xprt_release_write(xprt, NULL);
}
......@@ -967,6 +970,7 @@ void xprt_transmit(struct rpc_task *task)
task->tk_status = status;
return;
}
xprt_inject_disconnect(xprt);
dprintk("RPC: %5u xmit complete\n", task->tk_pid);
task->tk_flags |= RPC_TASK_SENT;
......@@ -1285,6 +1289,7 @@ void xprt_release(struct rpc_task *task)
spin_unlock_bh(&xprt->transport_lock);
if (req->rq_buffer)
xprt->ops->buf_free(req->rq_buffer);
xprt_inject_disconnect(xprt);
if (req->rq_cred != NULL)
put_rpccred(req->rq_cred);
task->tk_rqstp = NULL;
......
......@@ -11,6 +11,21 @@
* can take tens of usecs to complete.
*/
/* Normal operation
*
* A Memory Region is prepared for RDMA READ or WRITE using the
* ib_map_phys_fmr verb (fmr_op_map). When the RDMA operation is
* finished, the Memory Region is unmapped using the ib_unmap_fmr
* verb (fmr_op_unmap).
*/
/* Transport recovery
*
* After a transport reconnect, fmr_op_map re-uses the MR already
* allocated for the RPC, but generates a fresh rkey then maps the
* MR again. This process is synchronous.
*/
#include "xprt_rdma.h"
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
......@@ -50,19 +65,28 @@ fmr_op_init(struct rpcrdma_xprt *r_xprt)
struct rpcrdma_mw *r;
int i, rc;
spin_lock_init(&buf->rb_mwlock);
INIT_LIST_HEAD(&buf->rb_mws);
INIT_LIST_HEAD(&buf->rb_all);
i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS;
dprintk("RPC: %s: initializing %d FMRs\n", __func__, i);
i = max_t(int, RPCRDMA_MAX_DATA_SEGS / RPCRDMA_MAX_FMR_SGES, 1);
i += 2; /* head + tail */
i *= buf->rb_max_requests; /* one set for each RPC slot */
dprintk("RPC: %s: initalizing %d FMRs\n", __func__, i);
rc = -ENOMEM;
while (i--) {
r = kzalloc(sizeof(*r), GFP_KERNEL);
if (!r)
return -ENOMEM;
goto out;
r->r.fmr = ib_alloc_fmr(pd, mr_access_flags, &fmr_attr);
if (IS_ERR(r->r.fmr))
r->r.fmr.physaddrs = kmalloc(RPCRDMA_MAX_FMR_SGES *
sizeof(u64), GFP_KERNEL);
if (!r->r.fmr.physaddrs)
goto out_free;
r->r.fmr.fmr = ib_alloc_fmr(pd, mr_access_flags, &fmr_attr);
if (IS_ERR(r->r.fmr.fmr))
goto out_fmr_err;
list_add(&r->mw_list, &buf->rb_mws);
......@@ -71,12 +95,24 @@ fmr_op_init(struct rpcrdma_xprt *r_xprt)
return 0;
out_fmr_err:
rc = PTR_ERR(r->r.fmr);
rc = PTR_ERR(r->r.fmr.fmr);
dprintk("RPC: %s: ib_alloc_fmr status %i\n", __func__, rc);
kfree(r->r.fmr.physaddrs);
out_free:
kfree(r);
out:
return rc;
}
static int
__fmr_unmap(struct rpcrdma_mw *r)
{
LIST_HEAD(l);
list_add(&r->r.fmr.fmr->list, &l);
return ib_unmap_fmr(&l);
}
/* Use the ib_map_phys_fmr() verb to register a memory region
* for remote access via RDMA READ or RDMA WRITE.
*/
......@@ -85,12 +121,24 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
int nsegs, bool writing)
{
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
struct ib_device *device = ia->ri_id->device;
struct ib_device *device = ia->ri_device;
enum dma_data_direction direction = rpcrdma_data_dir(writing);
struct rpcrdma_mr_seg *seg1 = seg;
struct rpcrdma_mw *mw = seg1->rl_mw;
u64 physaddrs[RPCRDMA_MAX_DATA_SEGS];
int len, pageoff, i, rc;
struct rpcrdma_mw *mw;
mw = seg1->rl_mw;
seg1->rl_mw = NULL;
if (!mw) {
mw = rpcrdma_get_mw(r_xprt);
if (!mw)
return -ENOMEM;
} else {
/* this is a retransmit; generate a fresh rkey */
rc = __fmr_unmap(mw);
if (rc)
return rc;
}
pageoff = offset_in_page(seg1->mr_offset);
seg1->mr_offset -= pageoff; /* start of page */
......@@ -100,7 +148,7 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
nsegs = RPCRDMA_MAX_FMR_SGES;
for (i = 0; i < nsegs;) {
rpcrdma_map_one(device, seg, direction);
physaddrs[i] = seg->mr_dma;
mw->r.fmr.physaddrs[i] = seg->mr_dma;
len += seg->mr_len;
++seg;
++i;
......@@ -110,11 +158,13 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
break;
}
rc = ib_map_phys_fmr(mw->r.fmr, physaddrs, i, seg1->mr_dma);
rc = ib_map_phys_fmr(mw->r.fmr.fmr, mw->r.fmr.physaddrs,
i, seg1->mr_dma);
if (rc)
goto out_maperr;
seg1->mr_rkey = mw->r.fmr->rkey;
seg1->rl_mw = mw;
seg1->mr_rkey = mw->r.fmr.fmr->rkey;
seg1->mr_base = seg1->mr_dma + pageoff;
seg1->mr_nsegs = i;
seg1->mr_len = len;
......@@ -137,48 +187,28 @@ fmr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
{
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
struct rpcrdma_mr_seg *seg1 = seg;
struct ib_device *device;
struct rpcrdma_mw *mw = seg1->rl_mw;
int rc, nsegs = seg->mr_nsegs;
LIST_HEAD(l);
list_add(&seg1->rl_mw->r.fmr->list, &l);
rc = ib_unmap_fmr(&l);
read_lock(&ia->ri_qplock);
device = ia->ri_id->device;
dprintk("RPC: %s: FMR %p\n", __func__, mw);
seg1->rl_mw = NULL;
while (seg1->mr_nsegs--)
rpcrdma_unmap_one(device, seg++);
read_unlock(&ia->ri_qplock);
rpcrdma_unmap_one(ia->ri_device, seg++);
rc = __fmr_unmap(mw);
if (rc)
goto out_err;
rpcrdma_put_mw(r_xprt, mw);
return nsegs;
out_err:
/* The FMR is abandoned, but remains in rb_all. fmr_op_destroy
* will attempt to release it when the transport is destroyed.
*/
dprintk("RPC: %s: ib_unmap_fmr status %i\n", __func__, rc);
return nsegs;
}
/* After a disconnect, unmap all FMRs.
*
* This is invoked only in the transport connect worker in order
* to serialize with rpcrdma_register_fmr_external().
*/
static void
fmr_op_reset(struct rpcrdma_xprt *r_xprt)
{
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_mw *r;
LIST_HEAD(list);
int rc;
list_for_each_entry(r, &buf->rb_all, mw_all)
list_add(&r->r.fmr->list, &list);
rc = ib_unmap_fmr(&list);
if (rc)
dprintk("RPC: %s: ib_unmap_fmr failed %i\n",
__func__, rc);
}
static void
fmr_op_destroy(struct rpcrdma_buffer *buf)
{
......@@ -188,10 +218,13 @@ fmr_op_destroy(struct rpcrdma_buffer *buf)
while (!list_empty(&buf->rb_all)) {
r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
list_del(&r->mw_all);
rc = ib_dealloc_fmr(r->r.fmr);
kfree(r->r.fmr.physaddrs);
rc = ib_dealloc_fmr(r->r.fmr.fmr);
if (rc)
dprintk("RPC: %s: ib_dealloc_fmr failed %i\n",
__func__, rc);
kfree(r);
}
}
......@@ -202,7 +235,6 @@ const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
.ro_open = fmr_op_open,
.ro_maxpages = fmr_op_maxpages,
.ro_init = fmr_op_init,
.ro_reset = fmr_op_reset,
.ro_destroy = fmr_op_destroy,
.ro_displayname = "fmr",
};
This diff is collapsed.
......@@ -50,8 +50,7 @@ physical_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
{
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
rpcrdma_map_one(ia->ri_id->device, seg,
rpcrdma_data_dir(writing));
rpcrdma_map_one(ia->ri_device, seg, rpcrdma_data_dir(writing));
seg->mr_rkey = ia->ri_bind_mem->rkey;
seg->mr_base = seg->mr_dma;
seg->mr_nsegs = 1;
......@@ -65,18 +64,10 @@ physical_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
{
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
read_lock(&ia->ri_qplock);
rpcrdma_unmap_one(ia->ri_id->device, seg);
read_unlock(&ia->ri_qplock);
rpcrdma_unmap_one(ia->ri_device, seg);
return 1;
}
static void
physical_op_reset(struct rpcrdma_xprt *r_xprt)
{
}
static void
physical_op_destroy(struct rpcrdma_buffer *buf)
{
......@@ -88,7 +79,6 @@ const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = {
.ro_open = physical_op_open,
.ro_maxpages = physical_op_maxpages,
.ro_init = physical_op_init,
.ro_reset = physical_op_reset,
.ro_destroy = physical_op_destroy,
.ro_displayname = "physical",
};
......@@ -284,9 +284,6 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
return (unsigned char *)iptr - (unsigned char *)headerp;
out:
if (r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_FRMR)
return n;
for (pos = 0; nchunks--;)
pos += r_xprt->rx_ia.ri_ops->ro_unmap(r_xprt,
&req->rl_segments[pos]);
......@@ -732,8 +729,8 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep)
struct rpcrdma_msg *headerp;
struct rpcrdma_req *req;
struct rpc_rqst *rqst;
struct rpc_xprt *xprt = rep->rr_xprt;
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
struct rpc_xprt *xprt = &r_xprt->rx_xprt;
__be32 *iptr;
int rdmalen, status;
unsigned long cwnd;
......@@ -770,7 +767,6 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep)
rep->rr_len);
repost:
r_xprt->rx_stats.bad_reply_count++;
rep->rr_func = rpcrdma_reply_handler;
if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, &r_xprt->rx_ep, rep))
rpcrdma_recv_buffer_put(rep);
......
......@@ -240,6 +240,16 @@ xprt_rdma_connect_worker(struct work_struct *work)
xprt_clear_connecting(xprt);
}
static void
xprt_rdma_inject_disconnect(struct rpc_xprt *xprt)
{
struct rpcrdma_xprt *r_xprt = container_of(xprt, struct rpcrdma_xprt,
rx_xprt);
pr_info("rpcrdma: injecting transport disconnect on xprt=%p\n", xprt);
rdma_disconnect(r_xprt->rx_ia.ri_id);
}
/*
* xprt_rdma_destroy
*
......@@ -612,12 +622,6 @@ xprt_rdma_send_request(struct rpc_task *task)
if (req->rl_reply == NULL) /* e.g. reconnection */
rpcrdma_recv_buffer_get(req);
if (req->rl_reply) {
req->rl_reply->rr_func = rpcrdma_reply_handler;
/* this need only be done once, but... */
req->rl_reply->rr_xprt = xprt;
}
/* Must suppress retransmit to maintain credits */
if (req->rl_connect_cookie == xprt->connect_cookie)
goto drop_connection;
......@@ -676,6 +680,17 @@ static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
r_xprt->rx_stats.bad_reply_count);
}
static int
xprt_rdma_enable_swap(struct rpc_xprt *xprt)
{
return -EINVAL;
}
static void
xprt_rdma_disable_swap(struct rpc_xprt *xprt)
{
}
/*
* Plumbing for rpc transport switch and kernel module
*/
......@@ -694,7 +709,10 @@ static struct rpc_xprt_ops xprt_rdma_procs = {
.send_request = xprt_rdma_send_request,
.close = xprt_rdma_close,
.destroy = xprt_rdma_destroy,
.print_stats = xprt_rdma_print_stats
.print_stats = xprt_rdma_print_stats,
.enable_swap = xprt_rdma_enable_swap,
.disable_swap = xprt_rdma_disable_swap,
.inject_disconnect = xprt_rdma_inject_disconnect
};
static struct xprt_class xprt_rdma = {
......@@ -720,17 +738,24 @@ void xprt_rdma_cleanup(void)
if (rc)
dprintk("RPC: %s: xprt_unregister returned %i\n",
__func__, rc);
frwr_destroy_recovery_wq();
}
int xprt_rdma_init(void)
{
int rc;
rc = xprt_register_transport(&xprt_rdma);
rc = frwr_alloc_recovery_wq();
if (rc)
return rc;
rc = xprt_register_transport(&xprt_rdma);
if (rc) {
frwr_destroy_recovery_wq();
return rc;
}
dprintk("RPCRDMA Module Init, register RPC RDMA transport\n");
dprintk("Defaults:\n");
......
This diff is collapsed.
......@@ -62,6 +62,7 @@
struct rpcrdma_ia {
const struct rpcrdma_memreg_ops *ri_ops;
rwlock_t ri_qplock;
struct ib_device *ri_device;
struct rdma_cm_id *ri_id;
struct ib_pd *ri_pd;
struct ib_mr *ri_bind_mem;
......@@ -69,7 +70,6 @@ struct rpcrdma_ia {
int ri_have_dma_lkey;
struct completion ri_done;
int ri_async_rc;
enum rpcrdma_memreg ri_memreg_strategy;
unsigned int ri_max_frmr_depth;
struct ib_device_attr ri_devattr;
struct ib_qp_attr ri_qp_attr;
......@@ -173,9 +173,8 @@ struct rpcrdma_buffer;
struct rpcrdma_rep {
unsigned int rr_len;
struct rpcrdma_buffer *rr_buffer;
struct rpc_xprt *rr_xprt;
void (*rr_func)(struct rpcrdma_rep *);
struct ib_device *rr_device;
struct rpcrdma_xprt *rr_rxprt;
struct list_head rr_list;
struct rpcrdma_regbuf *rr_rdmabuf;
};
......@@ -203,11 +202,18 @@ struct rpcrdma_frmr {
struct ib_fast_reg_page_list *fr_pgl;
struct ib_mr *fr_mr;
enum rpcrdma_frmr_state fr_state;
struct work_struct fr_work;
struct rpcrdma_xprt *fr_xprt;
};
struct rpcrdma_fmr {
struct ib_fmr *fmr;
u64 *physaddrs;
};
struct rpcrdma_mw {
union {
struct ib_fmr *fmr;
struct rpcrdma_fmr fmr;
struct rpcrdma_frmr frmr;
} r;
void (*mw_sendcompletion)(struct ib_wc *);
......@@ -281,15 +287,17 @@ rpcr_to_rdmar(struct rpc_rqst *rqst)
* One of these is associated with a transport instance
*/
struct rpcrdma_buffer {
spinlock_t rb_lock; /* protects indexes */
u32 rb_max_requests;/* client max requests */
struct list_head rb_mws; /* optional memory windows/fmrs/frmrs */
struct list_head rb_all;
int rb_send_index;
spinlock_t rb_mwlock; /* protect rb_mws list */
struct list_head rb_mws;
struct list_head rb_all;
char *rb_pool;
spinlock_t rb_lock; /* protect buf arrays */
u32 rb_max_requests;
int rb_send_index;
int rb_recv_index;
struct rpcrdma_req **rb_send_bufs;
int rb_recv_index;
struct rpcrdma_rep **rb_recv_bufs;
char *rb_pool;
};
#define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia)
......@@ -350,7 +358,6 @@ struct rpcrdma_memreg_ops {
struct rpcrdma_create_data_internal *);
size_t (*ro_maxpages)(struct rpcrdma_xprt *);
int (*ro_init)(struct rpcrdma_xprt *);
void (*ro_reset)(struct rpcrdma_xprt *);
void (*ro_destroy)(struct rpcrdma_buffer *);
const char *ro_displayname;
};
......@@ -413,6 +420,8 @@ int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_ep *,
int rpcrdma_buffer_create(struct rpcrdma_xprt *);
void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
struct rpcrdma_mw *rpcrdma_get_mw(struct rpcrdma_xprt *);
void rpcrdma_put_mw(struct rpcrdma_xprt *, struct rpcrdma_mw *);
struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *);
void rpcrdma_buffer_put(struct rpcrdma_req *);
void rpcrdma_recv_buffer_get(struct rpcrdma_req *);
......@@ -425,6 +434,9 @@ void rpcrdma_free_regbuf(struct rpcrdma_ia *,
unsigned int rpcrdma_max_segments(struct rpcrdma_xprt *);
int frwr_alloc_recovery_wq(void);
void frwr_destroy_recovery_wq(void);
/*
* Wrappers for chunk registration, shared by read/write chunk code.
*/
......
......@@ -622,24 +622,6 @@ static int xs_udp_send_request(struct rpc_task *task)
return status;
}
/**
* xs_tcp_shutdown - gracefully shut down a TCP socket
* @xprt: transport
*
* Initiates a graceful shutdown of the TCP socket by calling the
* equivalent of shutdown(SHUT_RDWR);
*/
static void xs_tcp_shutdown(struct rpc_xprt *xprt)
{
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
struct socket *sock = transport->sock;
if (sock != NULL) {
kernel_sock_shutdown(sock, SHUT_RDWR);
trace_rpc_socket_shutdown(xprt, sock);
}
}
/**
* xs_tcp_send_request - write an RPC request to a TCP socket
* @task: address of RPC task that manages the state of an RPC request
......@@ -786,6 +768,7 @@ static void xs_sock_mark_closed(struct rpc_xprt *xprt)
xs_sock_reset_connection_flags(xprt);
/* Mark transport as closed and wake up all pending tasks */
xprt_disconnect_done(xprt);
xprt_force_disconnect(xprt);
}
/**
......@@ -827,6 +810,9 @@ static void xs_reset_transport(struct sock_xprt *transport)
if (sk == NULL)
return;
if (atomic_read(&transport->xprt.swapper))
sk_clear_memalloc(sk);
write_lock_bh(&sk->sk_callback_lock);
transport->inet = NULL;
transport->sock = NULL;
......@@ -863,6 +849,13 @@ static void xs_close(struct rpc_xprt *xprt)
xprt_disconnect_done(xprt);
}
static void xs_inject_disconnect(struct rpc_xprt *xprt)
{
dprintk("RPC: injecting transport disconnect on xprt=%p\n",
xprt);
xprt_disconnect_done(xprt);
}
static void xs_xprt_free(struct rpc_xprt *xprt)
{
xs_free_peer_addresses(xprt);
......@@ -901,7 +894,6 @@ static int xs_local_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
/**
* xs_local_data_ready - "data ready" callback for AF_LOCAL sockets
* @sk: socket with data to read
* @len: how much data to read
*
* Currently this assumes we can read the whole reply in a single gulp.
*/
......@@ -965,7 +957,6 @@ static void xs_local_data_ready(struct sock *sk)
/**
* xs_udp_data_ready - "data ready" callback for UDP sockets
* @sk: socket with data to read
* @len: how much data to read
*
*/
static void xs_udp_data_ready(struct sock *sk)
......@@ -1389,7 +1380,6 @@ static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, uns
/**
* xs_tcp_data_ready - "data ready" callback for TCP sockets
* @sk: socket with data to read
* @bytes: how much data to read
*
*/
static void xs_tcp_data_ready(struct sock *sk)
......@@ -1886,9 +1876,7 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt,
/**
* xs_local_setup_socket - create AF_LOCAL socket, connect to a local endpoint
* @xprt: RPC transport to connect
* @transport: socket transport to connect
* @create_sock: function to create a socket of the correct type
*/
static int xs_local_setup_socket(struct sock_xprt *transport)
{
......@@ -1960,43 +1948,84 @@ static void xs_local_connect(struct rpc_xprt *xprt, struct rpc_task *task)
msleep_interruptible(15000);
}
#ifdef CONFIG_SUNRPC_SWAP
#if IS_ENABLED(CONFIG_SUNRPC_SWAP)
/*
* Note that this should be called with XPRT_LOCKED held (or when we otherwise
* know that we have exclusive access to the socket), to guard against
* races with xs_reset_transport.
*/
static void xs_set_memalloc(struct rpc_xprt *xprt)
{
struct sock_xprt *transport = container_of(xprt, struct sock_xprt,
xprt);
if (xprt->swapper)
/*
* If there's no sock, then we have nothing to set. The
* reconnecting process will get it for us.
*/
if (!transport->inet)
return;
if (atomic_read(&xprt->swapper))
sk_set_memalloc(transport->inet);
}
/**
* xs_swapper - Tag this transport as being used for swap.
* xs_enable_swap - Tag this transport as being used for swap.
* @xprt: transport to tag
* @enable: enable/disable
*
* Take a reference to this transport on behalf of the rpc_clnt, and
* optionally mark it for swapping if it wasn't already.
*/
int xs_swapper(struct rpc_xprt *xprt, int enable)
static int
xs_enable_swap(struct rpc_xprt *xprt)
{
struct sock_xprt *transport = container_of(xprt, struct sock_xprt,
xprt);
int err = 0;
struct sock_xprt *xs = container_of(xprt, struct sock_xprt, xprt);
if (enable) {
xprt->swapper++;
xs_set_memalloc(xprt);
} else if (xprt->swapper) {
xprt->swapper--;
sk_clear_memalloc(transport->inet);
}
if (atomic_inc_return(&xprt->swapper) != 1)
return 0;
if (wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_KILLABLE))
return -ERESTARTSYS;
if (xs->inet)
sk_set_memalloc(xs->inet);
xprt_release_xprt(xprt, NULL);
return 0;
}
return err;
/**
* xs_disable_swap - Untag this transport as being used for swap.
* @xprt: transport to tag
*
* Drop a "swapper" reference to this xprt on behalf of the rpc_clnt. If the
* swapper refcount goes to 0, untag the socket as a memalloc socket.
*/
static void
xs_disable_swap(struct rpc_xprt *xprt)
{
struct sock_xprt *xs = container_of(xprt, struct sock_xprt, xprt);
if (!atomic_dec_and_test(&xprt->swapper))
return;
if (wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_KILLABLE))
return;
if (xs->inet)
sk_clear_memalloc(xs->inet);
xprt_release_xprt(xprt, NULL);
}
EXPORT_SYMBOL_GPL(xs_swapper);
#else
static void xs_set_memalloc(struct rpc_xprt *xprt)
{
}
static int
xs_enable_swap(struct rpc_xprt *xprt)
{
return -EINVAL;
}
static void
xs_disable_swap(struct rpc_xprt *xprt)
{
}
#endif
static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
......@@ -2057,6 +2086,27 @@ static void xs_udp_setup_socket(struct work_struct *work)
xprt_wake_pending_tasks(xprt, status);
}
/**
* xs_tcp_shutdown - gracefully shut down a TCP socket
* @xprt: transport
*
* Initiates a graceful shutdown of the TCP socket by calling the
* equivalent of shutdown(SHUT_RDWR);
*/
static void xs_tcp_shutdown(struct rpc_xprt *xprt)
{
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
struct socket *sock = transport->sock;
if (sock == NULL)
return;
if (xprt_connected(xprt)) {
kernel_sock_shutdown(sock, SHUT_RDWR);
trace_rpc_socket_shutdown(xprt, sock);
} else
xs_reset_transport(transport);
}
static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
{
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
......@@ -2067,6 +2117,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
unsigned int keepidle = xprt->timeout->to_initval / HZ;
unsigned int keepcnt = xprt->timeout->to_retries + 1;
unsigned int opt_on = 1;
unsigned int timeo;
/* TCP Keepalive options */
kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
......@@ -2078,6 +2129,12 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT,
(char *)&keepcnt, sizeof(keepcnt));
/* TCP user timeout (see RFC5482) */
timeo = jiffies_to_msecs(xprt->timeout->to_initval) *
(xprt->timeout->to_retries + 1);
kernel_setsockopt(sock, SOL_TCP, TCP_USER_TIMEOUT,
(char *)&timeo, sizeof(timeo));
write_lock_bh(&sk->sk_callback_lock);
xs_save_old_callbacks(transport, sk);
......@@ -2125,9 +2182,6 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
/**
* xs_tcp_setup_socket - create a TCP socket and connect to a remote endpoint
* @xprt: RPC transport to connect
* @transport: socket transport to connect
* @create_sock: function to create a socket of the correct type
*
* Invoked by a work queue tasklet.
*/
......@@ -2463,6 +2517,8 @@ static struct rpc_xprt_ops xs_local_ops = {
.close = xs_close,
.destroy = xs_destroy,
.print_stats = xs_local_print_stats,
.enable_swap = xs_enable_swap,
.disable_swap = xs_disable_swap,
};
static struct rpc_xprt_ops xs_udp_ops = {
......@@ -2482,6 +2538,9 @@ static struct rpc_xprt_ops xs_udp_ops = {
.close = xs_close,
.destroy = xs_destroy,
.print_stats = xs_udp_print_stats,
.enable_swap = xs_enable_swap,
.disable_swap = xs_disable_swap,
.inject_disconnect = xs_inject_disconnect,
};
static struct rpc_xprt_ops xs_tcp_ops = {
......@@ -2498,6 +2557,9 @@ static struct rpc_xprt_ops xs_tcp_ops = {
.close = xs_tcp_shutdown,
.destroy = xs_destroy,
.print_stats = xs_tcp_print_stats,
.enable_swap = xs_enable_swap,
.disable_swap = xs_disable_swap,
.inject_disconnect = xs_inject_disconnect,
};
/*
......@@ -2515,6 +2577,9 @@ static struct rpc_xprt_ops bc_tcp_ops = {
.close = bc_close,
.destroy = bc_destroy,
.print_stats = xs_tcp_print_stats,
.enable_swap = xs_enable_swap,
.disable_swap = xs_disable_swap,
.inject_disconnect = xs_inject_disconnect,
};
static int xs_init_anyaddr(const int family, struct sockaddr *sap)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment