tcp_illinois.c 8.14 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
/*
 * TCP Illinois congestion control.
 * Home page:
 *	http://www.ews.uiuc.edu/~shaoliu/tcpillinois/index.html
 *
 * The algorithm is described in:
 * "TCP-Illinois: A Loss and Delay-Based Congestion Control Algorithm
 *  for High-Speed Networks"
 * http://www.ews.uiuc.edu/~shaoliu/papersandslides/liubassri06perf.pdf
 *
 * Implemented from description in paper and ns-2 simulation.
 * Copyright (C) 2007 Stephen Hemminger <shemminger@linux-foundation.org>
 */

#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/inet_diag.h>
#include <asm/div64.h>
#include <net/tcp.h>

#define ALPHA_SHIFT	7
#define ALPHA_SCALE	(1u<<ALPHA_SHIFT)
#define ALPHA_MIN	((3*ALPHA_SCALE)/10)	/* ~0.3 */
#define ALPHA_MAX	(10*ALPHA_SCALE)	/* 10.0 */
#define ALPHA_BASE	ALPHA_SCALE		/* 1.0 */
26 27
#define U32_MAX		((u32)~0U)
#define RTT_MAX		(U32_MAX / ALPHA_MAX)	/* 3.3 secs */
28 29 30

#define BETA_SHIFT	6
#define BETA_SCALE	(1u<<BETA_SHIFT)
31 32 33
#define BETA_MIN	(BETA_SCALE/8)		/* 0.125 */
#define BETA_MAX	(BETA_SCALE/2)		/* 0.5 */
#define BETA_BASE	BETA_MAX
34 35

static int win_thresh __read_mostly = 15;
36
module_param(win_thresh, int, 0);
37 38
MODULE_PARM_DESC(win_thresh, "Window threshold for starting adaptive sizing");

39 40 41
static int theta __read_mostly = 5;
module_param(theta, int, 0);
MODULE_PARM_DESC(theta, "# of fast RTT's before full growth");
42 43

/* TCP Illinois Parameters */
44 45 46 47 48 49 50 51 52 53 54
struct illinois {
	u64	sum_rtt;	/* sum of rtt's measured within last rtt */
	u16	cnt_rtt;	/* # of rtts measured within last rtt */
	u32	base_rtt;	/* min of all rtt in usec */
	u32	max_rtt;	/* max of all rtt in usec */
	u32	end_seq;	/* right edge of current RTT */
	u32	alpha;		/* Additive increase */
	u32	beta;		/* Muliplicative decrease */
	u16	acked;		/* # packets acked by current ACK */
	u8	rtt_above;	/* average rtt has gone above threshold */
	u8	rtt_low;	/* # of rtts measurements below threshold */
55 56
};

57 58 59 60 61 62 63 64 65 66 67 68
static void rtt_reset(struct sock *sk)
{
	struct tcp_sock *tp = tcp_sk(sk);
	struct illinois *ca = inet_csk_ca(sk);

	ca->end_seq = tp->snd_nxt;
	ca->cnt_rtt = 0;
	ca->sum_rtt = 0;

	/* TODO: age max_rtt? */
}

69 70
static void tcp_illinois_init(struct sock *sk)
{
71 72 73 74 75 76 77 78 79 80
	struct illinois *ca = inet_csk_ca(sk);

	ca->alpha = ALPHA_MAX;
	ca->beta = BETA_BASE;
	ca->base_rtt = 0x7fffffff;
	ca->max_rtt = 0;

	ca->acked = 0;
	ca->rtt_low = 0;
	ca->rtt_above = 0;
81

82
	rtt_reset(sk);
83 84
}

85
/* Measure RTT for each ack. */
86
static void tcp_illinois_acked(struct sock *sk, u32 pkts_acked, s32 rtt)
87
{
88
	struct illinois *ca = inet_csk_ca(sk);
89 90 91

	ca->acked = pkts_acked;

92 93
	/* dup ack, no rtt sample */
	if (rtt < 0)
94 95
		return;

96 97 98
	/* ignore bogus values, this prevents wraparound in alpha math */
	if (rtt > RTT_MAX)
		rtt = RTT_MAX;
99

100 101 102 103 104 105
	/* keep track of minimum RTT seen so far */
	if (ca->base_rtt > rtt)
		ca->base_rtt = rtt;

	/* and max */
	if (ca->max_rtt < rtt)
106 107
		ca->max_rtt = rtt;

108 109
	++ca->cnt_rtt;
	ca->sum_rtt += rtt;
110 111
}

112 113
/* Maximum queuing delay */
static inline u32 max_delay(const struct illinois *ca)
114
{
115 116
	return ca->max_rtt - ca->base_rtt;
}
117

118 119 120 121
/* Average queuing delay */
static inline u32 avg_delay(const struct illinois *ca)
{
	u64 t = ca->sum_rtt;
122

123 124
	do_div(t, ca->cnt_rtt);
	return t - ca->base_rtt;
125 126 127 128 129 130 131 132 133 134
}

/*
 * Compute value of alpha used for additive increase.
 * If small window then use 1.0, equivalent to Reno.
 *
 * For larger windows, adjust based on average delay.
 * A. If average delay is at minimum (we are uncongested),
 *    then use large alpha (10.0) to increase faster.
 * B. If average delay is at maximum (getting congested)
135
 *    then use small alpha (0.3)
136 137 138
 *
 * The result is a convex window growth curve.
 */
139
static u32 alpha(struct illinois *ca, u32 da, u32 dm)
140
{
141
	u32 d1 = dm / 100;	/* Low threshold */
142 143

	if (da <= d1) {
144 145
		/* If never got out of low delay zone, then use max */
		if (!ca->rtt_above)
146
			return ALPHA_MAX;
147 148 149 150 151 152 153 154 155 156

		/* Wait for 5 good RTT's before allowing alpha to go alpha max.
		 * This prevents one good RTT from causing sudden window increase.
		 */
		if (++ca->rtt_low < theta)
			return ca->alpha;

		ca->rtt_low = 0;
		ca->rtt_above = 0;
		return ALPHA_MAX;
157 158
	}

159
	ca->rtt_above = 1;
160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178

	/*
	 * Based on:
	 *
	 *      (dm - d1) amin amax
	 * k1 = -------------------
	 *         amax - amin
	 *
	 *       (dm - d1) amin
	 * k2 = ----------------  - d1
	 *        amax - amin
	 *
	 *             k1
	 * alpha = ----------
	 *          k2 + da
	 */

	dm -= d1;
	da -= d1;
179 180
	return (dm * ALPHA_MAX) /
		(dm + (da  * (ALPHA_MAX - ALPHA_MIN)) / ALPHA_MIN);
181 182 183 184 185 186 187 188 189 190
}

/*
 * Beta used for multiplicative decrease.
 * For small window sizes returns same value as Reno (0.5)
 *
 * If delay is small (10% of max) then beta = 1/8
 * If delay is up to 80% of max then beta = 1/2
 * In between is a linear function
 */
191
static u32 beta(u32 da, u32 dm)
192 193 194 195 196 197
{
	u32 d2, d3;

	d2 = dm / 10;
	if (da <= d2)
		return BETA_MIN;
198

199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219
	d3 = (8 * dm) / 10;
	if (da >= d3 || d3 <= d2)
		return BETA_MAX;

	/*
	 * Based on:
	 *
	 *       bmin d3 - bmax d2
	 * k3 = -------------------
	 *         d3 - d2
	 *
	 *       bmax - bmin
	 * k4 = -------------
	 *         d3 - d2
	 *
	 * b = k3 + k4 da
	 */
	return (BETA_MIN * d3 - BETA_MAX * d2 + (BETA_MAX - BETA_MIN) * da)
		/ (d3 - d2);
}

220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258
/* Update alpha and beta values once per RTT */
static void update_params(struct sock *sk)
{
	struct tcp_sock *tp = tcp_sk(sk);
	struct illinois *ca = inet_csk_ca(sk);

	if (tp->snd_cwnd < win_thresh) {
		ca->alpha = ALPHA_BASE;
		ca->beta = BETA_BASE;
	} else if (ca->cnt_rtt > 0) {
		u32 dm = max_delay(ca);
		u32 da = avg_delay(ca);

		ca->alpha = alpha(ca, da, dm);
		ca->beta = beta(da, dm);
	}

	rtt_reset(sk);
}

/*
 * In case of loss, reset to default values
 */
static void tcp_illinois_state(struct sock *sk, u8 new_state)
{
	struct illinois *ca = inet_csk_ca(sk);

	if (new_state == TCP_CA_Loss) {
		ca->alpha = ALPHA_BASE;
		ca->beta = BETA_BASE;
		ca->rtt_low = 0;
		ca->rtt_above = 0;
		rtt_reset(sk);
	}
}

/*
 * Increase window in response to successful acknowledgment.
 */
259
static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293
{
	struct tcp_sock *tp = tcp_sk(sk);
	struct illinois *ca = inet_csk_ca(sk);

	if (after(ack, ca->end_seq))
		update_params(sk);

	/* RFC2861 only increase cwnd if fully utilized */
	if (!tcp_is_cwnd_limited(sk, in_flight))
		return;

	/* In slow start */
	if (tp->snd_cwnd <= tp->snd_ssthresh)
		tcp_slow_start(tp);

	else {
		u32 delta;

		/* snd_cwnd_cnt is # of packets since last cwnd increment */
		tp->snd_cwnd_cnt += ca->acked;
		ca->acked = 1;

		/* This is close approximation of:
		 * tp->snd_cwnd += alpha/tp->snd_cwnd
		*/
		delta = (tp->snd_cwnd_cnt * ca->alpha) >> ALPHA_SHIFT;
		if (delta >= tp->snd_cwnd) {
			tp->snd_cwnd = min(tp->snd_cwnd + delta / tp->snd_cwnd,
					   (u32) tp->snd_cwnd_clamp);
			tp->snd_cwnd_cnt = 0;
		}
	}
}

294 295 296
static u32 tcp_illinois_ssthresh(struct sock *sk)
{
	struct tcp_sock *tp = tcp_sk(sk);
297
	struct illinois *ca = inet_csk_ca(sk);
298 299

	/* Multiplicative decrease */
300
	return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->beta) >> BETA_SHIFT), 2U);
301 302
}

303 304 305 306

/* Extract info for Tcp socket info provided via netlink. */
static void tcp_illinois_info(struct sock *sk, u32 ext,
			      struct sk_buff *skb)
307
{
308
	const struct illinois *ca = inet_csk_ca(sk);
309 310 311 312

	if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
		struct tcpvegas_info info = {
			.tcpv_enabled = 1,
313 314
			.tcpv_rttcnt = ca->cnt_rtt,
			.tcpv_minrtt = ca->base_rtt,
315
		};
316 317 318 319
		u64 t = ca->sum_rtt;

		do_div(t, ca->cnt_rtt);
		info.tcpv_rtt = t;
320 321 322 323 324 325

		nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info);
	}
}

static struct tcp_congestion_ops tcp_illinois = {
326
	.flags		= TCP_CONG_RTT_STAMP,
327 328 329 330
	.init		= tcp_illinois_init,
	.ssthresh	= tcp_illinois_ssthresh,
	.min_cwnd	= tcp_reno_min_cwnd,
	.cong_avoid	= tcp_illinois_cong_avoid,
331 332 333
	.set_state	= tcp_illinois_state,
	.get_info	= tcp_illinois_info,
	.pkts_acked	= tcp_illinois_acked,
334 335 336 337 338 339 340

	.owner		= THIS_MODULE,
	.name		= "illinois",
};

static int __init tcp_illinois_register(void)
{
341
	BUILD_BUG_ON(sizeof(struct illinois) > ICSK_CA_PRIV_SIZE);
342 343 344 345 346 347 348 349 350 351 352 353 354 355
	return tcp_register_congestion_control(&tcp_illinois);
}

static void __exit tcp_illinois_unregister(void)
{
	tcp_unregister_congestion_control(&tcp_illinois);
}

module_init(tcp_illinois_register);
module_exit(tcp_illinois_unregister);

MODULE_AUTHOR("Stephen Hemminger, Shao Liu");
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("TCP Illinois");
356
MODULE_VERSION("1.0");