Commit 4edf0555 authored by Trond Myklebust's avatar Trond Myklebust

[PATCH] RPC over UDP congestion control updates [6/8]

Eliminate the arbitrary timeouts in xprt_adjust_cwnd(). Strict
enforcement of the congestion avoidance algorithm as detailed in Van
Jacobson's 1998 paper http://www-nrg.ee.lbl.gov/nrg-papers.html
Congestion Avoidance and Control.
parent 514349dc
...@@ -33,11 +33,11 @@ ...@@ -33,11 +33,11 @@
* MAXREQS value: At 32 outstanding reqs with 8 megs of RAM, fragment * MAXREQS value: At 32 outstanding reqs with 8 megs of RAM, fragment
* reassembly will frequently run out of memory. * reassembly will frequently run out of memory.
*/ */
#define RPC_MAXCONG 16 #define RPC_MAXCONG (16)
#define RPC_MAXREQS (RPC_MAXCONG + 1) #define RPC_MAXREQS RPC_MAXCONG
#define RPC_CWNDSCALE 256 #define RPC_CWNDSCALE (256)
#define RPC_MAXCWND (RPC_MAXCONG * RPC_CWNDSCALE) #define RPC_MAXCWND (RPC_MAXCONG * RPC_CWNDSCALE)
#define RPC_INITCWND RPC_CWNDSCALE #define RPC_INITCWND (RPC_MAXCWND >> 1)
#define RPCXPRT_CONGESTED(xprt) ((xprt)->cong >= (xprt)->cwnd) #define RPCXPRT_CONGESTED(xprt) ((xprt)->cong >= (xprt)->cwnd)
/* Default timeout values */ /* Default timeout values */
...@@ -121,7 +121,6 @@ struct rpc_xprt { ...@@ -121,7 +121,6 @@ struct rpc_xprt {
unsigned long cong; /* current congestion */ unsigned long cong; /* current congestion */
unsigned long cwnd; /* congestion window */ unsigned long cwnd; /* congestion window */
unsigned long congtime; /* hold cwnd until then */
struct rpc_wait_queue sending; /* requests waiting to send */ struct rpc_wait_queue sending; /* requests waiting to send */
struct rpc_wait_queue pending; /* requests in flight */ struct rpc_wait_queue pending; /* requests in flight */
......
...@@ -304,30 +304,20 @@ xprt_adjust_cwnd(struct rpc_xprt *xprt, int result) ...@@ -304,30 +304,20 @@ xprt_adjust_cwnd(struct rpc_xprt *xprt, int result)
*/ */
spin_lock(&xprt->xprt_lock); spin_lock(&xprt->xprt_lock);
cwnd = xprt->cwnd; cwnd = xprt->cwnd;
if (result >= 0) { if (result >= 0 && xprt->cong <= cwnd) {
if (xprt->cong < cwnd || time_before(jiffies, xprt->congtime))
goto out;
/* The (cwnd >> 1) term makes sure /* The (cwnd >> 1) term makes sure
* the result gets rounded properly. */ * the result gets rounded properly. */
cwnd += (RPC_CWNDSCALE * RPC_CWNDSCALE + (cwnd >> 1)) / cwnd; cwnd += (RPC_CWNDSCALE * RPC_CWNDSCALE + (cwnd >> 1)) / cwnd;
if (cwnd > RPC_MAXCWND) if (cwnd > RPC_MAXCWND)
cwnd = RPC_MAXCWND; cwnd = RPC_MAXCWND;
else __xprt_lock_write_next(xprt);
pprintk("RPC: %lu %ld cwnd\n", jiffies, cwnd);
xprt->congtime = jiffies + ((cwnd * HZ) << 2) / RPC_CWNDSCALE;
dprintk("RPC: cong %08lx, cwnd was %08lx, now %08lx, "
"time %ld ms\n", xprt->cong, xprt->cwnd, cwnd,
(xprt->congtime-jiffies)*1000/HZ);
} else if (result == -ETIMEDOUT) { } else if (result == -ETIMEDOUT) {
if ((cwnd >>= 1) < RPC_CWNDSCALE) cwnd >>= 1;
if (cwnd < RPC_CWNDSCALE)
cwnd = RPC_CWNDSCALE; cwnd = RPC_CWNDSCALE;
xprt->congtime = jiffies + ((cwnd * HZ) << 3) / RPC_CWNDSCALE;
dprintk("RPC: cong %ld, cwnd was %ld, now %ld, "
"time %ld ms\n", xprt->cong, xprt->cwnd, cwnd,
(xprt->congtime-jiffies)*1000/HZ);
pprintk("RPC: %lu %ld cwnd\n", jiffies, cwnd);
} }
dprintk("RPC: cong %ld, cwnd was %ld, now %ld\n",
xprt->cong, xprt->cwnd, cwnd);
xprt->cwnd = cwnd; xprt->cwnd = cwnd;
out: out:
spin_unlock(&xprt->xprt_lock); spin_unlock(&xprt->xprt_lock);
...@@ -1344,7 +1334,6 @@ xprt_setup(struct socket *sock, int proto, ...@@ -1344,7 +1334,6 @@ xprt_setup(struct socket *sock, int proto,
xprt->nocong = 1; xprt->nocong = 1;
} else } else
xprt->cwnd = RPC_INITCWND; xprt->cwnd = RPC_INITCWND;
xprt->congtime = jiffies;
spin_lock_init(&xprt->sock_lock); spin_lock_init(&xprt->sock_lock);
spin_lock_init(&xprt->xprt_lock); spin_lock_init(&xprt->xprt_lock);
init_waitqueue_head(&xprt->cong_wait); init_waitqueue_head(&xprt->cong_wait);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment