Commit 39f4cd9e authored by Chuck Lever's avatar Chuck Lever Committed by Anna Schumaker

xprtrdma: Harden chunk list encoding against send buffer overflow

While marshaling chunk lists which are variable-length XDR objects,
check for XDR buffer overflow at every step. Measurements show no
significant changes in CPU utilization.
Signed-off-by: default avatarChuck Lever <chuck.lever@oracle.com>
Signed-off-by: default avatarAnna Schumaker <Anna.Schumaker@Netapp.com>
parent 7a80f3f0
...@@ -273,15 +273,70 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf, ...@@ -273,15 +273,70 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
return -EIO; return -EIO;
} }
static inline __be32 * static inline int
encode_item_present(struct xdr_stream *xdr)
{
__be32 *p;
p = xdr_reserve_space(xdr, sizeof(*p));
if (unlikely(!p))
return -EMSGSIZE;
*p = xdr_one;
return 0;
}
static inline int
encode_item_not_present(struct xdr_stream *xdr)
{
__be32 *p;
p = xdr_reserve_space(xdr, sizeof(*p));
if (unlikely(!p))
return -EMSGSIZE;
*p = xdr_zero;
return 0;
}
static void
xdr_encode_rdma_segment(__be32 *iptr, struct rpcrdma_mw *mw) xdr_encode_rdma_segment(__be32 *iptr, struct rpcrdma_mw *mw)
{ {
*iptr++ = cpu_to_be32(mw->mw_handle); *iptr++ = cpu_to_be32(mw->mw_handle);
*iptr++ = cpu_to_be32(mw->mw_length); *iptr++ = cpu_to_be32(mw->mw_length);
return xdr_encode_hyper(iptr, mw->mw_offset); xdr_encode_hyper(iptr, mw->mw_offset);
}
static int
encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw)
{
__be32 *p;
p = xdr_reserve_space(xdr, 4 * sizeof(*p));
if (unlikely(!p))
return -EMSGSIZE;
xdr_encode_rdma_segment(p, mw);
return 0;
}
static int
encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw,
u32 position)
{
__be32 *p;
p = xdr_reserve_space(xdr, 6 * sizeof(*p));
if (unlikely(!p))
return -EMSGSIZE;
*p++ = xdr_one; /* Item present */
*p++ = cpu_to_be32(position);
xdr_encode_rdma_segment(p, mw);
return 0;
} }
/* XDR-encode the Read list. Supports encoding a list of read /* Register and XDR encode the Read list. Supports encoding a list of read
* segments that belong to a single read chunk. * segments that belong to a single read chunk.
* *
* Encoding key for single-list chunks (HLOO = Handle32 Length32 Offset64): * Encoding key for single-list chunks (HLOO = Handle32 Length32 Offset64):
...@@ -290,24 +345,21 @@ xdr_encode_rdma_segment(__be32 *iptr, struct rpcrdma_mw *mw) ...@@ -290,24 +345,21 @@ xdr_encode_rdma_segment(__be32 *iptr, struct rpcrdma_mw *mw)
* N elements, position P (same P for all chunks of same arg!): * N elements, position P (same P for all chunks of same arg!):
* 1 - PHLOO - 1 - PHLOO - ... - 1 - PHLOO - 0 * 1 - PHLOO - 1 - PHLOO - ... - 1 - PHLOO - 0
* *
* Returns a pointer to the XDR word in the RDMA header following * Returns zero on success, or a negative errno if a failure occurred.
* the end of the Read list, or an error pointer. * @xdr is advanced to the next position in the stream.
*
* Only a single @pos value is currently supported.
*/ */
static __be32 * static noinline int
rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
struct rpcrdma_req *req, struct rpc_rqst *rqst, struct rpc_rqst *rqst, enum rpcrdma_chunktype rtype)
__be32 *iptr, enum rpcrdma_chunktype rtype)
{ {
struct xdr_stream *xdr = &req->rl_stream;
struct rpcrdma_mr_seg *seg; struct rpcrdma_mr_seg *seg;
struct rpcrdma_mw *mw; struct rpcrdma_mw *mw;
unsigned int pos; unsigned int pos;
int n, nsegs; int n, nsegs;
if (rtype == rpcrdma_noch) {
*iptr++ = xdr_zero; /* item not present */
return iptr;
}
pos = rqst->rq_snd_buf.head[0].iov_len; pos = rqst->rq_snd_buf.head[0].iov_len;
if (rtype == rpcrdma_areadch) if (rtype == rpcrdma_areadch)
pos = 0; pos = 0;
...@@ -315,22 +367,17 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, ...@@ -315,22 +367,17 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt,
nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_snd_buf, pos, nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_snd_buf, pos,
rtype, seg); rtype, seg);
if (nsegs < 0) if (nsegs < 0)
return ERR_PTR(nsegs); return nsegs;
do { do {
n = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, n = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
false, &mw); false, &mw);
if (n < 0) if (n < 0)
return ERR_PTR(n); return n;
rpcrdma_push_mw(mw, &req->rl_registered); rpcrdma_push_mw(mw, &req->rl_registered);
*iptr++ = xdr_one; /* item present */ if (encode_read_segment(xdr, mw, pos) < 0)
return -EMSGSIZE;
/* All read segments in this chunk
* have the same "position".
*/
*iptr++ = cpu_to_be32(pos);
iptr = xdr_encode_rdma_segment(iptr, mw);
dprintk("RPC: %5u %s: pos %u %u@0x%016llx:0x%08x (%s)\n", dprintk("RPC: %5u %s: pos %u %u@0x%016llx:0x%08x (%s)\n",
rqst->rq_task->tk_pid, __func__, pos, rqst->rq_task->tk_pid, __func__, pos,
...@@ -342,13 +389,12 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, ...@@ -342,13 +389,12 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt,
nsegs -= n; nsegs -= n;
} while (nsegs); } while (nsegs);
/* Finish Read list */ return 0;
*iptr++ = xdr_zero; /* Next item not present */
return iptr;
} }
/* XDR-encode the Write list. Supports encoding a list containing /* Register and XDR encode the Write list. Supports encoding a list
* one array of plain segments that belong to a single write chunk. * containing one array of plain segments that belong to a single
* write chunk.
* *
* Encoding key for single-list chunks (HLOO = Handle32 Length32 Offset64): * Encoding key for single-list chunks (HLOO = Handle32 Length32 Offset64):
* *
...@@ -356,43 +402,45 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, ...@@ -356,43 +402,45 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt,
* N elements: * N elements:
* 1 - N - HLOO - HLOO - ... - HLOO - 0 * 1 - N - HLOO - HLOO - ... - HLOO - 0
* *
* Returns a pointer to the XDR word in the RDMA header following * Returns zero on success, or a negative errno if a failure occurred.
* the end of the Write list, or an error pointer. * @xdr is advanced to the next position in the stream.
*
* Only a single Write chunk is currently supported.
*/ */
static __be32 * static noinline int
rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
struct rpc_rqst *rqst, __be32 *iptr, struct rpc_rqst *rqst, enum rpcrdma_chunktype wtype)
enum rpcrdma_chunktype wtype)
{ {
struct xdr_stream *xdr = &req->rl_stream;
struct rpcrdma_mr_seg *seg; struct rpcrdma_mr_seg *seg;
struct rpcrdma_mw *mw; struct rpcrdma_mw *mw;
int n, nsegs, nchunks; int n, nsegs, nchunks;
__be32 *segcount; __be32 *segcount;
if (wtype != rpcrdma_writech) {
*iptr++ = xdr_zero; /* no Write list present */
return iptr;
}
seg = req->rl_segments; seg = req->rl_segments;
nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf, nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf,
rqst->rq_rcv_buf.head[0].iov_len, rqst->rq_rcv_buf.head[0].iov_len,
wtype, seg); wtype, seg);
if (nsegs < 0) if (nsegs < 0)
return ERR_PTR(nsegs); return nsegs;
*iptr++ = xdr_one; /* Write list present */ if (encode_item_present(xdr) < 0)
segcount = iptr++; /* save location of segment count */ return -EMSGSIZE;
segcount = xdr_reserve_space(xdr, sizeof(*segcount));
if (unlikely(!segcount))
return -EMSGSIZE;
/* Actual value encoded below */
nchunks = 0; nchunks = 0;
do { do {
n = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, n = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
true, &mw); true, &mw);
if (n < 0) if (n < 0)
return ERR_PTR(n); return n;
rpcrdma_push_mw(mw, &req->rl_registered); rpcrdma_push_mw(mw, &req->rl_registered);
iptr = xdr_encode_rdma_segment(iptr, mw); if (encode_rdma_segment(xdr, mw) < 0)
return -EMSGSIZE;
dprintk("RPC: %5u %s: %u@0x016%llx:0x%08x (%s)\n", dprintk("RPC: %5u %s: %u@0x016%llx:0x%08x (%s)\n",
rqst->rq_task->tk_pid, __func__, rqst->rq_task->tk_pid, __func__,
...@@ -409,13 +457,11 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, ...@@ -409,13 +457,11 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
/* Update count of segments in this Write chunk */ /* Update count of segments in this Write chunk */
*segcount = cpu_to_be32(nchunks); *segcount = cpu_to_be32(nchunks);
/* Finish Write list */ return 0;
*iptr++ = xdr_zero; /* Next item not present */
return iptr;
} }
/* XDR-encode the Reply chunk. Supports encoding an array of plain /* Register and XDR encode the Reply chunk. Supports encoding an array
* segments that belong to a single write (reply) chunk. * of plain segments that belong to a single write (reply) chunk.
* *
* Encoding key for single-list chunks (HLOO = Handle32 Length32 Offset64): * Encoding key for single-list chunks (HLOO = Handle32 Length32 Offset64):
* *
...@@ -423,41 +469,41 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, ...@@ -423,41 +469,41 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
* N elements: * N elements:
* 1 - N - HLOO - HLOO - ... - HLOO * 1 - N - HLOO - HLOO - ... - HLOO
* *
* Returns a pointer to the XDR word in the RDMA header following * Returns zero on success, or a negative errno if a failure occurred.
* the end of the Reply chunk, or an error pointer. * @xdr is advanced to the next position in the stream.
*/ */
static __be32 * static noinline int
rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
struct rpcrdma_req *req, struct rpc_rqst *rqst, struct rpc_rqst *rqst, enum rpcrdma_chunktype wtype)
__be32 *iptr, enum rpcrdma_chunktype wtype)
{ {
struct xdr_stream *xdr = &req->rl_stream;
struct rpcrdma_mr_seg *seg; struct rpcrdma_mr_seg *seg;
struct rpcrdma_mw *mw; struct rpcrdma_mw *mw;
int n, nsegs, nchunks; int n, nsegs, nchunks;
__be32 *segcount; __be32 *segcount;
if (wtype != rpcrdma_replych) {
*iptr++ = xdr_zero; /* no Reply chunk present */
return iptr;
}
seg = req->rl_segments; seg = req->rl_segments;
nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf, 0, wtype, seg); nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf, 0, wtype, seg);
if (nsegs < 0) if (nsegs < 0)
return ERR_PTR(nsegs); return nsegs;
*iptr++ = xdr_one; /* Reply chunk present */ if (encode_item_present(xdr) < 0)
segcount = iptr++; /* save location of segment count */ return -EMSGSIZE;
segcount = xdr_reserve_space(xdr, sizeof(*segcount));
if (unlikely(!segcount))
return -EMSGSIZE;
/* Actual value encoded below */
nchunks = 0; nchunks = 0;
do { do {
n = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, n = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
true, &mw); true, &mw);
if (n < 0) if (n < 0)
return ERR_PTR(n); return n;
rpcrdma_push_mw(mw, &req->rl_registered); rpcrdma_push_mw(mw, &req->rl_registered);
iptr = xdr_encode_rdma_segment(iptr, mw); if (encode_rdma_segment(xdr, mw) < 0)
return -EMSGSIZE;
dprintk("RPC: %5u %s: %u@0x%016llx:0x%08x (%s)\n", dprintk("RPC: %5u %s: %u@0x%016llx:0x%08x (%s)\n",
rqst->rq_task->tk_pid, __func__, rqst->rq_task->tk_pid, __func__,
...@@ -474,7 +520,7 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, ...@@ -474,7 +520,7 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt,
/* Update count of segments in the Reply chunk */ /* Update count of segments in the Reply chunk */
*segcount = cpu_to_be32(nchunks); *segcount = cpu_to_be32(nchunks);
return iptr; return 0;
} }
/* Prepare the RPC-over-RDMA header SGE. /* Prepare the RPC-over-RDMA header SGE.
...@@ -676,24 +722,21 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst) ...@@ -676,24 +722,21 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
struct rpcrdma_req *req = rpcr_to_rdmar(rqst); struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
struct xdr_stream *xdr = &req->rl_stream; struct xdr_stream *xdr = &req->rl_stream;
enum rpcrdma_chunktype rtype, wtype; enum rpcrdma_chunktype rtype, wtype;
struct rpcrdma_msg *headerp;
bool ddp_allowed; bool ddp_allowed;
ssize_t hdrlen;
__be32 *iptr;
__be32 *p; __be32 *p;
int ret;
#if defined(CONFIG_SUNRPC_BACKCHANNEL) #if defined(CONFIG_SUNRPC_BACKCHANNEL)
if (test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state)) if (test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state))
return rpcrdma_bc_marshal_reply(rqst); return rpcrdma_bc_marshal_reply(rqst);
#endif #endif
headerp = rdmab_to_msg(req->rl_rdmabuf);
rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0); rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0);
xdr_init_encode(xdr, &req->rl_hdrbuf, xdr_init_encode(xdr, &req->rl_hdrbuf,
req->rl_rdmabuf->rg_base); req->rl_rdmabuf->rg_base);
/* Fixed header fields */ /* Fixed header fields */
iptr = ERR_PTR(-EMSGSIZE); ret = -EMSGSIZE;
p = xdr_reserve_space(xdr, 4 * sizeof(*p)); p = xdr_reserve_space(xdr, 4 * sizeof(*p));
if (!p) if (!p)
goto out_err; goto out_err;
...@@ -775,37 +818,50 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst) ...@@ -775,37 +818,50 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
* send a Call message with a Position Zero Read chunk and a * send a Call message with a Position Zero Read chunk and a
* regular Read chunk at the same time. * regular Read chunk at the same time.
*/ */
iptr = headerp->rm_body.rm_chunks; if (rtype != rpcrdma_noch) {
iptr = rpcrdma_encode_read_list(r_xprt, req, rqst, iptr, rtype); ret = rpcrdma_encode_read_list(r_xprt, req, rqst, rtype);
if (IS_ERR(iptr)) if (ret)
goto out_err;
}
ret = encode_item_not_present(xdr);
if (ret)
goto out_err; goto out_err;
iptr = rpcrdma_encode_write_list(r_xprt, req, rqst, iptr, wtype);
if (IS_ERR(iptr)) if (wtype == rpcrdma_writech) {
ret = rpcrdma_encode_write_list(r_xprt, req, rqst, wtype);
if (ret)
goto out_err;
}
ret = encode_item_not_present(xdr);
if (ret)
goto out_err; goto out_err;
iptr = rpcrdma_encode_reply_chunk(r_xprt, req, rqst, iptr, wtype);
if (IS_ERR(iptr)) if (wtype != rpcrdma_replych)
ret = encode_item_not_present(xdr);
else
ret = rpcrdma_encode_reply_chunk(r_xprt, req, rqst, wtype);
if (ret)
goto out_err; goto out_err;
hdrlen = (unsigned char *)iptr - (unsigned char *)headerp;
dprintk("RPC: %5u %s: %s/%s: hdrlen %zd\n", dprintk("RPC: %5u %s: %s/%s: hdrlen %u rpclen\n",
rqst->rq_task->tk_pid, __func__, rqst->rq_task->tk_pid, __func__,
transfertypes[rtype], transfertypes[wtype], transfertypes[rtype], transfertypes[wtype],
hdrlen); xdr_stream_pos(xdr));
if (!rpcrdma_prepare_send_sges(&r_xprt->rx_ia, req, hdrlen, if (!rpcrdma_prepare_send_sges(&r_xprt->rx_ia, req,
xdr_stream_pos(xdr),
&rqst->rq_snd_buf, rtype)) { &rqst->rq_snd_buf, rtype)) {
iptr = ERR_PTR(-EIO); ret = -EIO;
goto out_err; goto out_err;
} }
return 0; return 0;
out_err: out_err:
if (PTR_ERR(iptr) != -ENOBUFS) { if (ret != -ENOBUFS) {
pr_err("rpcrdma: rpcrdma_marshal_req failed, status %ld\n", pr_err("rpcrdma: header marshaling failed (%d)\n", ret);
PTR_ERR(iptr));
r_xprt->rx_stats.failed_marshal_count++; r_xprt->rx_stats.failed_marshal_count++;
} }
return PTR_ERR(iptr); return ret;
} }
/** /**
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment