Commit ebc551f2 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'nfsd-5.1' of git://linux-nfs.org/~bfields/linux

Pull NFS server updates from Bruce Fields:
 "Miscellaneous NFS server fixes.

  Probably the most visible bug is one that could artificially limit
  NFSv4.1 performance by limiting the number of oustanding rpcs from a
  single client.

  Neil Brown also gets a special mention for fixing a 14.5-year-old
  memory-corruption bug in the encoding of NFSv3 readdir responses"

* tag 'nfsd-5.1' of git://linux-nfs.org/~bfields/linux:
  nfsd: allow nfsv3 readdir request to be larger.
  nfsd: fix wrong check in write_v4_end_grace()
  nfsd: fix memory corruption caused by readdir
  nfsd: fix performance-limiting session calculation
  svcrpc: fix UDP on servers with lots of threads
  svcrdma: Remove syslog warnings in work completion handlers
  svcrdma: Squelch compiler warning when SUNRPC_DEBUG is disabled
  svcrdma: Use struct_size() in kmalloc()
  svcrpc: fix unlikely races preventing queueing of sockets
  svcrpc: svc_xprt_has_something_to_do seems a little long
  SUNRPC: Don't allow compiler optimisation of svc_xprt_release_slot()
  nfsd: fix an IS_ERR() vs NULL check
parents a5adcfca f875a792
......@@ -463,8 +463,19 @@ nfsd3_proc_readdir(struct svc_rqst *rqstp)
&resp->common, nfs3svc_encode_entry);
memcpy(resp->verf, argp->verf, 8);
resp->count = resp->buffer - argp->buffer;
if (resp->offset)
xdr_encode_hyper(resp->offset, argp->cookie);
if (resp->offset) {
loff_t offset = argp->cookie;
if (unlikely(resp->offset1)) {
/* we ended up with offset on a page boundary */
*resp->offset = htonl(offset >> 32);
*resp->offset1 = htonl(offset & 0xffffffff);
resp->offset1 = NULL;
} else {
xdr_encode_hyper(resp->offset, offset);
}
resp->offset = NULL;
}
RETURN_STATUS(nfserr);
}
......@@ -533,6 +544,7 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp)
} else {
xdr_encode_hyper(resp->offset, offset);
}
resp->offset = NULL;
}
RETURN_STATUS(nfserr);
......@@ -576,7 +588,7 @@ nfsd3_proc_fsinfo(struct svc_rqst *rqstp)
resp->f_wtmax = max_blocksize;
resp->f_wtpref = max_blocksize;
resp->f_wtmult = PAGE_SIZE;
resp->f_dtpref = PAGE_SIZE;
resp->f_dtpref = max_blocksize;
resp->f_maxfilesize = ~(u32) 0;
resp->f_properties = NFS3_FSF_DEFAULT;
......
......@@ -573,6 +573,8 @@ int
nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p)
{
struct nfsd3_readdirargs *args = rqstp->rq_argp;
u32 max_blocksize = svc_max_payload(rqstp);
p = decode_fh(p, &args->fh);
if (!p)
return 0;
......@@ -580,7 +582,7 @@ nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p)
args->verf = p; p += 2;
args->dircount = ~0;
args->count = ntohl(*p++);
args->count = min_t(u32, args->count, PAGE_SIZE);
args->count = min_t(u32, args->count, max_blocksize);
args->buffer = page_address(*(rqstp->rq_next_page++));
return xdr_argsize_check(rqstp, p);
......@@ -921,6 +923,7 @@ encode_entry(struct readdir_cd *ccd, const char *name, int namlen,
} else {
xdr_encode_hyper(cd->offset, offset64);
}
cd->offset = NULL;
}
/*
......
......@@ -900,9 +900,9 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
return PTR_ERR(client);
}
cred = get_backchannel_cred(clp, client, ses);
if (IS_ERR(cred)) {
if (!cred) {
rpc_shutdown_client(client);
return PTR_ERR(cred);
return -ENOMEM;
}
clp->cl_cb_client = client;
clp->cl_cb_cred = cred;
......
......@@ -1544,16 +1544,16 @@ static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca)
{
u32 slotsize = slot_bytes(ca);
u32 num = ca->maxreqs;
int avail;
unsigned long avail, total_avail;
spin_lock(&nfsd_drc_lock);
avail = min((unsigned long)NFSD_MAX_MEM_PER_SESSION,
nfsd_drc_max_mem - nfsd_drc_mem_used);
total_avail = nfsd_drc_max_mem - nfsd_drc_mem_used;
avail = min((unsigned long)NFSD_MAX_MEM_PER_SESSION, total_avail);
/*
* Never use more than a third of the remaining memory,
* unless it's the only way to give this client a slot:
*/
avail = clamp_t(int, avail, slotsize, avail/3);
avail = clamp_t(int, avail, slotsize, total_avail/3);
num = min_t(int, num, avail / slotsize);
nfsd_drc_mem_used += num * slotsize;
spin_unlock(&nfsd_drc_lock);
......
......@@ -1126,7 +1126,7 @@ static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size)
case 'Y':
case 'y':
case '1':
if (nn->nfsd_serv)
if (!nn->nfsd_serv)
return -EBUSY;
nfsd4_end_grace(nn);
break;
......
......@@ -357,15 +357,29 @@ static void svc_xprt_release_slot(struct svc_rqst *rqstp)
struct svc_xprt *xprt = rqstp->rq_xprt;
if (test_and_clear_bit(RQ_DATA, &rqstp->rq_flags)) {
atomic_dec(&xprt->xpt_nr_rqsts);
smp_wmb(); /* See smp_rmb() in svc_xprt_ready() */
svc_xprt_enqueue(xprt);
}
}
static bool svc_xprt_has_something_to_do(struct svc_xprt *xprt)
static bool svc_xprt_ready(struct svc_xprt *xprt)
{
if (xprt->xpt_flags & ((1<<XPT_CONN)|(1<<XPT_CLOSE)))
unsigned long xpt_flags;
/*
* If another cpu has recently updated xpt_flags,
* sk_sock->flags, xpt_reserved, or xpt_nr_rqsts, we need to
* know about it; otherwise it's possible that both that cpu and
* this one could call svc_xprt_enqueue() without either
* svc_xprt_enqueue() recognizing that the conditions below
* are satisfied, and we could stall indefinitely:
*/
smp_rmb();
xpt_flags = READ_ONCE(xprt->xpt_flags);
if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE)))
return true;
if (xprt->xpt_flags & ((1<<XPT_DATA)|(1<<XPT_DEFERRED))) {
if (xpt_flags & (BIT(XPT_DATA) | BIT(XPT_DEFERRED))) {
if (xprt->xpt_ops->xpo_has_wspace(xprt) &&
svc_xprt_slots_in_range(xprt))
return true;
......@@ -381,7 +395,7 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)
struct svc_rqst *rqstp = NULL;
int cpu;
if (!svc_xprt_has_something_to_do(xprt))
if (!svc_xprt_ready(xprt))
return;
/* Mark transport as busy. It will remain in this state until
......@@ -475,7 +489,7 @@ void svc_reserve(struct svc_rqst *rqstp, int space)
if (xprt && space < rqstp->rq_reserved) {
atomic_sub((rqstp->rq_reserved - space), &xprt->xpt_reserved);
rqstp->rq_reserved = space;
smp_wmb(); /* See smp_rmb() in svc_xprt_ready() */
svc_xprt_enqueue(xprt);
}
}
......
......@@ -349,12 +349,16 @@ static ssize_t svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov,
/*
* Set socket snd and rcv buffer lengths
*/
static void svc_sock_setbufsize(struct socket *sock, unsigned int snd,
unsigned int rcv)
static void svc_sock_setbufsize(struct svc_sock *svsk, unsigned int nreqs)
{
unsigned int max_mesg = svsk->sk_xprt.xpt_server->sv_max_mesg;
struct socket *sock = svsk->sk_sock;
nreqs = min(nreqs, INT_MAX / 2 / max_mesg);
lock_sock(sock->sk);
sock->sk->sk_sndbuf = snd * 2;
sock->sk->sk_rcvbuf = rcv * 2;
sock->sk->sk_sndbuf = nreqs * max_mesg * 2;
sock->sk->sk_rcvbuf = nreqs * max_mesg * 2;
sock->sk->sk_write_space(sock->sk);
release_sock(sock->sk);
}
......@@ -516,9 +520,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
* provides an upper bound on the number of threads
* which will access the socket.
*/
svc_sock_setbufsize(svsk->sk_sock,
(serv->sv_nrthreads+3) * serv->sv_max_mesg,
(serv->sv_nrthreads+3) * serv->sv_max_mesg);
svc_sock_setbufsize(svsk, serv->sv_nrthreads + 3);
clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
skb = NULL;
......@@ -681,9 +683,7 @@ static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)
* receive and respond to one request.
* svc_udp_recvfrom will re-adjust if necessary
*/
svc_sock_setbufsize(svsk->sk_sock,
3 * svsk->sk_xprt.xpt_server->sv_max_mesg,
3 * svsk->sk_xprt.xpt_server->sv_max_mesg);
svc_sock_setbufsize(svsk, 3);
/* data might have come in before data_ready set up */
set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
......
......@@ -272,12 +272,9 @@ bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma)
return false;
ctxt->rc_temp = true;
ret = __svc_rdma_post_recv(rdma, ctxt);
if (ret) {
pr_err("svcrdma: failure posting recv buffers: %d\n",
ret);
if (ret)
return false;
}
}
return true;
}
......@@ -314,17 +311,14 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
spin_lock(&rdma->sc_rq_dto_lock);
list_add_tail(&ctxt->rc_list, &rdma->sc_rq_dto_q);
spin_unlock(&rdma->sc_rq_dto_lock);
/* Note the unlock pairs with the smp_rmb in svc_xprt_ready: */
set_bit(XPT_DATA, &rdma->sc_xprt.xpt_flags);
spin_unlock(&rdma->sc_rq_dto_lock);
if (!test_bit(RDMAXPRT_CONN_PENDING, &rdma->sc_flags))
svc_xprt_enqueue(&rdma->sc_xprt);
goto out;
flushed:
if (wc->status != IB_WC_WR_FLUSH_ERR)
pr_err("svcrdma: Recv: %s (%u/0x%x)\n",
ib_wc_status_msg(wc->status),
wc->status, wc->vendor_err);
post_err:
svc_rdma_recv_ctxt_put(rdma, ctxt);
set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
......
......@@ -64,8 +64,7 @@ svc_rdma_get_rw_ctxt(struct svcxprt_rdma *rdma, unsigned int sges)
spin_unlock(&rdma->sc_rw_ctxt_lock);
} else {
spin_unlock(&rdma->sc_rw_ctxt_lock);
ctxt = kmalloc(sizeof(*ctxt) +
SG_CHUNK_SIZE * sizeof(struct scatterlist),
ctxt = kmalloc(struct_size(ctxt, rw_first_sgl, SG_CHUNK_SIZE),
GFP_KERNEL);
if (!ctxt)
goto out;
......@@ -213,13 +212,8 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail);
wake_up(&rdma->sc_send_wait);
if (unlikely(wc->status != IB_WC_SUCCESS)) {
if (unlikely(wc->status != IB_WC_SUCCESS))
set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
if (wc->status != IB_WC_WR_FLUSH_ERR)
pr_err("svcrdma: write ctx: %s (%u/0x%x)\n",
ib_wc_status_msg(wc->status),
wc->status, wc->vendor_err);
}
svc_rdma_write_info_free(info);
}
......@@ -278,18 +272,15 @@ static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc)
if (unlikely(wc->status != IB_WC_SUCCESS)) {
set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
if (wc->status != IB_WC_WR_FLUSH_ERR)
pr_err("svcrdma: read ctx: %s (%u/0x%x)\n",
ib_wc_status_msg(wc->status),
wc->status, wc->vendor_err);
svc_rdma_recv_ctxt_put(rdma, info->ri_readctxt);
} else {
spin_lock(&rdma->sc_rq_dto_lock);
list_add_tail(&info->ri_readctxt->rc_list,
&rdma->sc_read_complete_q);
/* Note the unlock pairs with the smp_rmb in svc_xprt_ready: */
set_bit(XPT_DATA, &rdma->sc_xprt.xpt_flags);
spin_unlock(&rdma->sc_rq_dto_lock);
set_bit(XPT_DATA, &rdma->sc_xprt.xpt_flags);
svc_xprt_enqueue(&rdma->sc_xprt);
}
......
......@@ -272,10 +272,6 @@ static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
if (unlikely(wc->status != IB_WC_SUCCESS)) {
set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
svc_xprt_enqueue(&rdma->sc_xprt);
if (wc->status != IB_WC_WR_FLUSH_ERR)
pr_err("svcrdma: Send: %s (%u/0x%x)\n",
ib_wc_status_msg(wc->status),
wc->status, wc->vendor_err);
}
svc_xprt_put(&rdma->sc_xprt);
......
......@@ -390,8 +390,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
struct ib_qp_init_attr qp_attr;
unsigned int ctxts, rq_depth;
struct ib_device *dev;
struct sockaddr *sap;
int ret = 0;
RPC_IFDEBUG(struct sockaddr *sap);
listen_rdma = container_of(xprt, struct svcxprt_rdma, sc_xprt);
clear_bit(XPT_CONN, &xprt->xpt_flags);
......@@ -525,6 +525,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
if (ret)
goto errout;
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
dprintk("svcrdma: new connection %p accepted:\n", newxprt);
sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr;
dprintk(" local address : %pIS:%u\n", sap, rpc_get_port(sap));
......@@ -535,6 +536,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
dprintk(" rdma_rw_ctxs : %d\n", ctxts);
dprintk(" max_requests : %d\n", newxprt->sc_max_requests);
dprintk(" ord : %d\n", conn_param.initiator_depth);
#endif
trace_svcrdma_xprt_accept(&newxprt->sc_xprt);
return &newxprt->sc_xprt;
......@@ -588,11 +590,6 @@ static void __svc_rdma_free(struct work_struct *work)
if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
ib_drain_qp(rdma->sc_qp);
/* We should only be called from kref_put */
if (kref_read(&xprt->xpt_ref) != 0)
pr_err("svcrdma: sc_xprt still in use? (%d)\n",
kref_read(&xprt->xpt_ref));
svc_rdma_flush_recv_queues(rdma);
/* Final put of backchannel client transport */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment