Commit e604aad2 authored by Chuck Lever's avatar Chuck Lever

svcrdma: Use struct xdr_stream to decode ingress transport headers

The logic that checks incoming network headers has to be scrupulous.

De-duplicate: replace open-coded buffer overflow checks with the use
of xdr_stream helpers that are used most everywhere else XDR
decoding is done.

One minor change to the sanity checks: instead of checking the
length of individual segments, cap the length of the whole chunk
to be sure it can fit in the set of pages available in rq_pages.
This should be a better test of whether the server can handle the
chunks in each request.
Signed-off-by: default avatarChuck Lever <chuck.lever@oracle.com>
parent 2426ddfd
......@@ -58,7 +58,8 @@ enum {
enum {
rpcrdma_fixed_maxsz = 4,
rpcrdma_segment_maxsz = 4,
rpcrdma_readchunk_maxsz = 2 + rpcrdma_segment_maxsz,
rpcrdma_readseg_maxsz = 1 + rpcrdma_segment_maxsz,
rpcrdma_readchunk_maxsz = 1 + rpcrdma_readseg_maxsz,
};
/*
......
......@@ -132,6 +132,7 @@ struct svc_rdma_recv_ctxt {
struct ib_sge rc_recv_sge;
void *rc_recv_buf;
struct xdr_buf rc_arg;
struct xdr_stream rc_stream;
bool rc_temp;
u32 rc_byte_len;
unsigned int rc_page_count;
......
......@@ -1469,7 +1469,7 @@ DECLARE_EVENT_CLASS(svcrdma_segment_event,
);
#define DEFINE_SEGMENT_EVENT(name) \
DEFINE_EVENT(svcrdma_segment_event, svcrdma_encode_##name,\
DEFINE_EVENT(svcrdma_segment_event, svcrdma_##name,\
TP_PROTO( \
u32 handle, \
u32 length, \
......@@ -1477,8 +1477,9 @@ DECLARE_EVENT_CLASS(svcrdma_segment_event,
), \
TP_ARGS(handle, length, offset))
DEFINE_SEGMENT_EVENT(rseg);
DEFINE_SEGMENT_EVENT(wseg);
DEFINE_SEGMENT_EVENT(decode_wseg);
DEFINE_SEGMENT_EVENT(encode_rseg);
DEFINE_SEGMENT_EVENT(encode_wseg);
DECLARE_EVENT_CLASS(svcrdma_chunk_event,
TP_PROTO(
......
......@@ -358,15 +358,14 @@ static void svc_rdma_build_arg_xdr(struct svc_rqst *rqstp,
arg->len = ctxt->rc_byte_len;
}
/* This accommodates the largest possible Write chunk,
* in one segment.
/* This accommodates the largest possible Write chunk.
*/
#define MAX_BYTES_WRITE_SEG ((u32)(RPCSVC_MAXPAGES << PAGE_SHIFT))
#define MAX_BYTES_WRITE_CHUNK ((u32)(RPCSVC_MAXPAGES << PAGE_SHIFT))
/* This accommodates the largest possible Position-Zero
* Read chunk or Reply chunk, in one segment.
* Read chunk or Reply chunk.
*/
#define MAX_BYTES_SPECIAL_SEG ((u32)((RPCSVC_MAXPAGES + 2) << PAGE_SHIFT))
#define MAX_BYTES_SPECIAL_CHUNK ((u32)((RPCSVC_MAXPAGES + 2) << PAGE_SHIFT))
/* Sanity check the Read list.
*
......@@ -374,7 +373,7 @@ static void svc_rdma_build_arg_xdr(struct svc_rqst *rqstp,
* - This implementation supports only one Read chunk.
*
* Sanity checks:
* - Read list does not overflow buffer.
* - Read list does not overflow Receive buffer.
* - Segment size limited by largest NFS data payload.
*
* The segment count is limited to how many segments can
......@@ -382,30 +381,44 @@ static void svc_rdma_build_arg_xdr(struct svc_rqst *rqstp,
* buffer. That's about 40 Read segments for a 1KB inline
* threshold.
*
* Returns pointer to the following Write list.
* Return values:
* %true: Read list is valid. @rctxt's xdr_stream is updated
* to point to the first byte past the Read list.
* %false: Read list is corrupt. @rctxt's xdr_stream is left
* in an unknown state.
*/
static __be32 *xdr_check_read_list(__be32 *p, const __be32 *end)
static bool xdr_check_read_list(struct svc_rdma_recv_ctxt *rctxt)
{
u32 position;
u32 position, len;
bool first;
__be32 *p;
p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p));
if (!p)
return false;
len = 0;
first = true;
while (*p++ != xdr_zero) {
while (*p != xdr_zero) {
p = xdr_inline_decode(&rctxt->rc_stream,
rpcrdma_readseg_maxsz * sizeof(*p));
if (!p)
return false;
if (first) {
position = be32_to_cpup(p++);
position = be32_to_cpup(p);
first = false;
} else if (be32_to_cpup(p++) != position) {
return NULL;
} else if (be32_to_cpup(p) != position) {
return false;
}
p++; /* handle */
if (be32_to_cpup(p++) > MAX_BYTES_SPECIAL_SEG)
return NULL;
p += 2; /* offset */
p += 2;
len += be32_to_cpup(p);
if (p > end)
return NULL;
p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p));
if (!p)
return false;
}
return p;
return len <= MAX_BYTES_SPECIAL_CHUNK;
}
/* The segment count is limited to how many segments can
......@@ -413,67 +426,93 @@ static __be32 *xdr_check_read_list(__be32 *p, const __be32 *end)
* buffer. That's about 60 Write segments for a 1KB inline
* threshold.
*/
static __be32 *xdr_check_write_chunk(__be32 *p, const __be32 *end,
u32 maxlen)
static bool xdr_check_write_chunk(struct svc_rdma_recv_ctxt *rctxt, u32 maxlen)
{
u32 i, segcount;
u32 i, segcount, total;
__be32 *p;
p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p));
if (!p)
return false;
segcount = be32_to_cpup(p);
segcount = be32_to_cpup(p++);
total = 0;
for (i = 0; i < segcount; i++) {
p++; /* handle */
if (be32_to_cpup(p++) > maxlen)
return NULL;
p += 2; /* offset */
u32 handle, length;
u64 offset;
if (p > end)
return NULL;
}
p = xdr_inline_decode(&rctxt->rc_stream,
rpcrdma_segment_maxsz * sizeof(*p));
if (!p)
return false;
handle = be32_to_cpup(p++);
length = be32_to_cpup(p++);
xdr_decode_hyper(p, &offset);
trace_svcrdma_decode_wseg(handle, length, offset);
return p;
total += length;
}
return total <= maxlen;
}
/* Sanity check the Write list.
*
* Implementation limits:
* - This implementation supports only one Write chunk.
* - This implementation currently supports only one Write chunk.
*
* Sanity checks:
* - Write list does not overflow buffer.
* - Segment size limited by largest NFS data payload.
*
* Returns pointer to the following Reply chunk.
* - Write list does not overflow Receive buffer.
* - Chunk size limited by largest NFS data payload.
*
* Return values:
* %true: Write list is valid. @rctxt's xdr_stream is updated
* to point to the first byte past the Write list.
* %false: Write list is corrupt. @rctxt's xdr_stream is left
* in an unknown state.
*/
static __be32 *xdr_check_write_list(__be32 *p, const __be32 *end)
static bool xdr_check_write_list(struct svc_rdma_recv_ctxt *rctxt)
{
u32 chcount;
u32 chcount = 0;
__be32 *p;
chcount = 0;
while (*p++ != xdr_zero) {
p = xdr_check_write_chunk(p, end, MAX_BYTES_WRITE_SEG);
p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p));
if (!p)
return false;
while (*p != xdr_zero) {
if (!xdr_check_write_chunk(rctxt, MAX_BYTES_WRITE_CHUNK))
return false;
++chcount;
p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p));
if (!p)
return NULL;
if (chcount++ > 1)
return NULL;
return false;
}
return p;
return chcount < 2;
}
/* Sanity check the Reply chunk.
*
* Sanity checks:
* - Reply chunk does not overflow buffer.
* - Segment size limited by largest NFS data payload.
*
* Returns pointer to the following RPC header.
* - Reply chunk does not overflow Receive buffer.
* - Chunk size limited by largest NFS data payload.
*
* Return values:
* %true: Reply chunk is valid. @rctxt's xdr_stream is updated
* to point to the first byte past the Reply chunk.
* %false: Reply chunk is corrupt. @rctxt's xdr_stream is left
* in an unknown state.
*/
static __be32 *xdr_check_reply_chunk(__be32 *p, const __be32 *end)
static bool xdr_check_reply_chunk(struct svc_rdma_recv_ctxt *rctxt)
{
if (*p++ != xdr_zero) {
p = xdr_check_write_chunk(p, end, MAX_BYTES_SPECIAL_SEG);
if (!p)
return NULL;
}
return p;
__be32 *p;
p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p));
if (!p)
return false;
if (*p != xdr_zero)
if (!xdr_check_write_chunk(rctxt, MAX_BYTES_SPECIAL_CHUNK))
return false;
return true;
}
/* RPC-over-RDMA Version One private extension: Remote Invalidation.
......@@ -538,60 +577,61 @@ static void svc_rdma_get_inv_rkey(struct svcxprt_rdma *rdma,
ctxt->rc_inv_rkey = be32_to_cpu(inv_rkey);
}
/* On entry, xdr->head[0].iov_base points to first byte in the
* RPC-over-RDMA header.
/**
* svc_rdma_xdr_decode_req - Decode the transport header
* @rq_arg: xdr_buf containing ingress RPC/RDMA message
* @rctxt: state of decoding
*
* On entry, xdr->head[0].iov_base points to first byte of the
* RPC-over-RDMA transport header.
*
* On successful exit, head[0] points to first byte past the
* RPC-over-RDMA header. For RDMA_MSG, this is the RPC message.
*
* The length of the RPC-over-RDMA header is returned.
*
* Assumptions:
* - The transport header is entirely contained in the head iovec.
*/
static int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg)
static int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg,
struct svc_rdma_recv_ctxt *rctxt)
{
__be32 *p, *end, *rdma_argp;
__be32 *p, *rdma_argp;
unsigned int hdr_len;
/* Verify that there's enough bytes for header + something */
if (rq_arg->len <= RPCRDMA_HDRLEN_ERR)
goto out_short;
rdma_argp = rq_arg->head[0].iov_base;
if (*(rdma_argp + 1) != rpcrdma_version)
goto out_version;
xdr_init_decode(&rctxt->rc_stream, rq_arg, rdma_argp, NULL);
switch (*(rdma_argp + 3)) {
p = xdr_inline_decode(&rctxt->rc_stream,
rpcrdma_fixed_maxsz * sizeof(*p));
if (unlikely(!p))
goto out_short;
p++;
if (*p != rpcrdma_version)
goto out_version;
p += 2;
switch (*p) {
case rdma_msg:
break;
case rdma_nomsg:
break;
case rdma_done:
goto out_drop;
case rdma_error:
goto out_drop;
default:
goto out_proc;
}
end = (__be32 *)((unsigned long)rdma_argp + rq_arg->len);
p = xdr_check_read_list(rdma_argp + 4, end);
if (!p)
if (!xdr_check_read_list(rctxt))
goto out_inval;
p = xdr_check_write_list(p, end);
if (!p)
goto out_inval;
p = xdr_check_reply_chunk(p, end);
if (!p)
if (!xdr_check_write_list(rctxt))
goto out_inval;
if (p > end)
if (!xdr_check_reply_chunk(rctxt))
goto out_inval;
rq_arg->head[0].iov_base = p;
hdr_len = (unsigned long)p - (unsigned long)rdma_argp;
rq_arg->head[0].iov_base = rctxt->rc_stream.p;
hdr_len = xdr_stream_pos(&rctxt->rc_stream);
rq_arg->head[0].iov_len -= hdr_len;
rq_arg->len -= hdr_len;
trace_svcrdma_decode_rqst(rdma_argp, hdr_len);
......@@ -786,7 +826,7 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
rqstp->rq_next_page = rqstp->rq_respages;
p = (__be32 *)rqstp->rq_arg.head[0].iov_base;
ret = svc_rdma_xdr_decode_req(&rqstp->rq_arg);
ret = svc_rdma_xdr_decode_req(&rqstp->rq_arg, ctxt);
if (ret < 0)
goto out_err;
if (ret == 0)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment