Commit ea8fc104 authored by shemminger's avatar shemminger

Netem support for reorder

Update include files and add support for TCP_CONG
parent 2b4fad69
2005-06-22 Stephen Hemminger <shemminger@dxpl.pdx.osdl.net>
* Update include files to 2.6.12
* Add ss support for TCP_CONG
2005-06-13 Steven Whitehouse <steve@chygwyn.com>
* Decnet doc's update
......
......@@ -427,6 +427,7 @@ enum
TCA_NETEM_UNSPEC,
TCA_NETEM_CORR,
TCA_NETEM_DELAY_DIST,
TCA_NETEM_REORDER,
__TCA_NETEM_MAX,
};
......@@ -437,7 +438,7 @@ struct tc_netem_qopt
__u32 latency; /* added delay (us) */
__u32 limit; /* fifo limit (packets) */
__u32 loss; /* random packet loss (0=none ~0=100%) */
__u32 gap; /* re-ordering gap (0 for delay all) */
__u32 gap; /* re-ordering gap (0 for none) */
__u32 duplicate; /* random packet dup (0=none ~0=100%) */
__u32 jitter; /* random jitter in latency (us) */
};
......@@ -449,6 +450,12 @@ struct tc_netem_corr
__u32 dup_corr; /* duplicate correlation */
};
struct tc_netem_reorder
{
__u32 probability;
__u32 correlation;
};
#define NETEM_DIST_SCALE 8192
#endif
......@@ -89,10 +89,21 @@ enum {
RTM_GETANYCAST = 62,
#define RTM_GETANYCAST RTM_GETANYCAST
RTM_MAX,
#define RTM_MAX RTM_MAX
RTM_NEWNEIGHTBL = 64,
#define RTM_NEWNEIGHTBL RTM_NEWNEIGHTBL
RTM_GETNEIGHTBL = 66,
#define RTM_GETNEIGHTBL RTM_GETNEIGHTBL
RTM_SETNEIGHTBL,
#define RTM_SETNEIGHTBL RTM_SETNEIGHTBL
__RTM_MAX,
#define RTM_MAX (((__RTM_MAX + 3) & ~3) - 1)
};
#define RTM_NR_MSGTYPES (RTM_MAX + 1 - RTM_BASE)
#define RTM_NR_FAMILIES (RTM_NR_MSGTYPES >> 2)
#define RTM_FAM(cmd) (((cmd) - RTM_BASE) >> 2)
/*
Generic structure for encapsulation of optional route information.
It is reminiscent of sockaddr, but with sa_family replaced
......@@ -398,19 +409,6 @@ enum
#define IFA_MAX (__IFA_MAX - 1)
/*
* Quirk for IPv4 address deletion to allow exact deletion of equal
* addresses varying only in prefix length. A explicit exact comparison
* of the prefix length will only be done if IFA_PREFIX_EXACT_DEL is
* ORed to ifa_prefixlen.
*
* Note: This special treatment is only understood while deleting
* addresses and will lead to unexpected behaviour if used
* otherwise.
*/
#define IFA_PREFIX_EXACT_DEL 0x40
#define IFA_REAL_DEL_PREFIX(l) ((l) & 0x3f)
/* ifa_flags */
#define IFA_F_SECONDARY 0x01
......@@ -502,6 +500,106 @@ struct nda_cacheinfo
__u32 ndm_refcnt;
};
/*****************************************************************
* Neighbour tables specific messages.
*
* To retrieve the neighbour tables send RTM_GETNEIGHTBL with the
* NLM_F_DUMP flag set. Every neighbour table configuration is
* spread over multiple messages to avoid running into message
* size limits on systems with many interfaces. The first message
* in the sequence transports all not device specific data such as
* statistics, configuration, and the default parameter set.
* This message is followed by 0..n messages carrying device
* specific parameter sets.
* Although the ordering should be sufficient, NDTA_NAME can be
* used to identify sequences. The initial message can be identified
* by checking for NDTA_CONFIG. The device specific messages do
* not contain this TLV but have NDTPA_IFINDEX set to the
* corresponding interface index.
*
* To change neighbour table attributes, send RTM_SETNEIGHTBL
* with NDTA_NAME set. Changeable attribute include NDTA_THRESH[1-3],
* NDTA_GC_INTERVAL, and all TLVs in NDTA_PARMS unless marked
* otherwise. Device specific parameter sets can be changed by
* setting NDTPA_IFINDEX to the interface index of the corresponding
* device.
****/
struct ndt_stats
{
__u64 ndts_allocs;
__u64 ndts_destroys;
__u64 ndts_hash_grows;
__u64 ndts_res_failed;
__u64 ndts_lookups;
__u64 ndts_hits;
__u64 ndts_rcv_probes_mcast;
__u64 ndts_rcv_probes_ucast;
__u64 ndts_periodic_gc_runs;
__u64 ndts_forced_gc_runs;
};
enum {
NDTPA_UNSPEC,
NDTPA_IFINDEX, /* u32, unchangeable */
NDTPA_REFCNT, /* u32, read-only */
NDTPA_REACHABLE_TIME, /* u64, read-only, msecs */
NDTPA_BASE_REACHABLE_TIME, /* u64, msecs */
NDTPA_RETRANS_TIME, /* u64, msecs */
NDTPA_GC_STALETIME, /* u64, msecs */
NDTPA_DELAY_PROBE_TIME, /* u64, msecs */
NDTPA_QUEUE_LEN, /* u32 */
NDTPA_APP_PROBES, /* u32 */
NDTPA_UCAST_PROBES, /* u32 */
NDTPA_MCAST_PROBES, /* u32 */
NDTPA_ANYCAST_DELAY, /* u64, msecs */
NDTPA_PROXY_DELAY, /* u64, msecs */
NDTPA_PROXY_QLEN, /* u32 */
NDTPA_LOCKTIME, /* u64, msecs */
__NDTPA_MAX
};
#define NDTPA_MAX (__NDTPA_MAX - 1)
struct ndtmsg
{
__u8 ndtm_family;
__u8 ndtm_pad1;
__u16 ndtm_pad2;
};
struct ndt_config
{
__u16 ndtc_key_len;
__u16 ndtc_entry_size;
__u32 ndtc_entries;
__u32 ndtc_last_flush; /* delta to now in msecs */
__u32 ndtc_last_rand; /* delta to now in msecs */
__u32 ndtc_hash_rnd;
__u32 ndtc_hash_mask;
__u32 ndtc_hash_chain_gc;
__u32 ndtc_proxy_qlen;
};
enum {
NDTA_UNSPEC,
NDTA_NAME, /* char *, unchangeable */
NDTA_THRESH1, /* u32 */
NDTA_THRESH2, /* u32 */
NDTA_THRESH3, /* u32 */
NDTA_CONFIG, /* struct ndt_config, read-only */
NDTA_PARMS, /* nested TLV NDTPA_* */
NDTA_STATS, /* struct ndt_stats, read-only */
NDTA_GC_INTERVAL, /* u64, msecs */
__NDTA_MAX
};
#define NDTA_MAX (__NDTA_MAX - 1)
#define NDTA_RTA(r) ((struct rtattr*)(((char*)(r)) + \
NLMSG_ALIGN(sizeof(struct ndtmsg))))
#define NDTA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ndtmsg))
/****
* General form of address family dependent message.
****/
......
......@@ -99,9 +99,10 @@ enum
TCPDIAG_MEMINFO,
TCPDIAG_INFO,
TCPDIAG_VEGASINFO,
TCPDIAG_CONG,
};
#define TCPDIAG_MAX TCPDIAG_VEGASINFO
#define TCPDIAG_MAX TCPDIAG_CONG
/* TCPDIAG_MEM */
......@@ -123,5 +124,4 @@ struct tcpvegas_info {
__u32 tcpv_minrtt;
};
#endif /* _TCP_DIAG_H_ */
......@@ -140,8 +140,11 @@ enum {
XFRM_MSG_FLUSHPOLICY,
#define XFRM_MSG_FLUSHPOLICY XFRM_MSG_FLUSHPOLICY
XFRM_MSG_MAX
__XFRM_MSG_MAX
};
#define XFRM_MSG_MAX (__XFRM_MSG_MAX - 1)
#define XFRM_NR_MSGTYPES (XFRM_MSG_MAX + 1 - XFRM_MSG_BASE)
struct xfrm_user_tmpl {
struct xfrm_id id;
......@@ -171,6 +174,8 @@ enum xfrm_attr_type_t {
XFRMA_ALG_COMP, /* struct xfrm_algo */
XFRMA_ENCAP, /* struct xfrm_algo + struct xfrm_encap_tmpl */
XFRMA_TMPL, /* 1 or more struct xfrm_user_tmpl */
XFRMA_SA,
XFRMA_POLICY,
__XFRMA_MAX
#define XFRMA_MAX (__XFRMA_MAX - 1)
......@@ -254,5 +259,7 @@ struct xfrm_usersa_flush {
#define XFRMGRP_ACQUIRE 1
#define XFRMGRP_EXPIRE 2
#define XFRMGRP_SA 4
#define XFRMGRP_POLICY 8
#endif /* _LINUX_XFRM_H */
......@@ -1338,6 +1338,9 @@ static void tcp_show_info(const struct nlmsghdr *nlh, struct tcpdiagmsg *r)
if (info->tcpi_options & TCPI_OPT_ECN)
printf(" ecn");
}
if (tb[TCPDIAG_CONG])
printf("%s", (char *) RTA_DATA(tb[TCPDIAG_CONG]));
if (info->tcpi_options & TCPI_OPT_WSCALE)
printf(" wscale:%d,%d", info->tcpi_snd_wscale,
info->tcpi_rcv_wscale);
......@@ -1358,12 +1361,9 @@ static void tcp_show_info(const struct nlmsghdr *nlh, struct tcpdiagmsg *r)
const struct tcpvegas_info *vinfo
= RTA_DATA(tb[TCPDIAG_VEGASINFO]);
if (vinfo->tcpv_enabled)
printf(" vegas");
if (vinfo->tcpv_rtt &&
vinfo->tcpv_rtt != 0x7fffffff)
rtt = vinfo->tcpv_rtt;
if (vinfo->tcpv_enabled &&
vinfo->tcpv_rtt && vinfo->tcpv_rtt != 0x7fffffff)
rtt = vinfo->tcpv_rtt;
}
if (rtt > 0 && info->tcpi_snd_mss && info->tcpi_snd_cwnd) {
......@@ -1442,7 +1442,6 @@ int tcp_show_sock(struct nlmsghdr *nlh, struct filter *f)
printf("\n");
return 0;
}
int tcp_show_netlink(struct filter *f, FILE *dump_fp)
......@@ -1480,9 +1479,13 @@ int tcp_show_netlink(struct filter *f, FILE *dump_fp)
if (show_tcpinfo) {
req.r.tcpdiag_ext |= (1<<(TCPDIAG_INFO-1));
req.r.tcpdiag_ext |= (1<<(TCPDIAG_VEGASINFO-1));
req.r.tcpdiag_ext |= (1<<(TCPDIAG_CONG-1));
}
iov[0] = (struct iovec){ &req, sizeof(req) };
iov[0] = (struct iovec){
.iov_base = &req,
.iov_len = sizeof(req)
};
if (f->f) {
bclen = ssfilter_bytecompile(f->f, &bc);
rta.rta_type = TCPDIAG_REQ_BYTECODE;
......@@ -1493,17 +1496,19 @@ int tcp_show_netlink(struct filter *f, FILE *dump_fp)
}
msg = (struct msghdr) {
(void*)&nladdr, sizeof(nladdr),
iov, f->f ? 3 : 1,
NULL, 0,
0
.msg_name = (void*)&nladdr,
.msg_namelen = sizeof(nladdr),
.msg_iov = iov,
.msg_iovlen = f->f ? 3 : 1,
};
if (sendmsg(fd, &msg, 0) < 0)
return -1;
iov[0] = (struct iovec){ buf, sizeof(buf) };
iov[0] = (struct iovec){
.iov_base = buf,
.iov_len = sizeof(buf)
};
while (1) {
int status;
......
......@@ -29,11 +29,11 @@ static void explain(void)
{
fprintf(stderr,
"Usage: ... netem [ limit PACKETS ] \n" \
" [ delay TIME [ JITTER [CORRELATION]]]\n" \
" [ delay TIME [ JITTER [CORRELATION]]]\n" \
" [ distribution {uniform|normal|pareto|paretonormal} ]\n" \
" [ drop PERCENT [CORRELATION]] \n" \
" [ duplicate PERCENT [CORRELATION]]\n" \
" [ distribution {uniform|normal|pareto|paretonormal} ]\n" \
" [ gap PACKETS ]\n");
" [ reorder PRECENT [CORRELATION] [ gap DISTANCE ]]\n");
}
static void explain1(const char *arg)
......@@ -127,11 +127,13 @@ static int netem_parse_opt(struct qdisc_util *qu, int argc, char **argv,
struct rtattr *tail;
struct tc_netem_qopt opt;
struct tc_netem_corr cor;
struct tc_netem_reorder reorder;
__s16 dist_data[MAXDIST];
memset(&opt, 0, sizeof(opt));
opt.limit = 1000;
memset(&cor, 0, sizeof(cor));
memset(&reorder, 0, sizeof(reorder));
while (argc > 0) {
if (matches(*argv, "limit") == 0) {
......@@ -178,6 +180,19 @@ static int netem_parse_opt(struct qdisc_util *qu, int argc, char **argv,
return -1;
}
}
} else if (matches(*argv, "reorder") == 0) {
NEXT_ARG();
if (get_percent(&reorder.probability, *argv)) {
explain1("reorder");
return -1;
}
if (NEXT_IS_NUMBER()) {
NEXT_ARG();
if (get_percent(&reorder.correlation, *argv)) {
explain1("reorder");
return -1;
}
}
} else if (matches(*argv, "gap") == 0) {
NEXT_ARG();
if (get_u32(&opt.gap, *argv, 0)) {
......@@ -215,8 +230,27 @@ static int netem_parse_opt(struct qdisc_util *qu, int argc, char **argv,
tail = NLMSG_TAIL(n);
if (reorder.probability) {
if (opt.latency == 0) {
fprintf(stderr, "reordering not possible without specifying some delay\n");
}
if (opt.gap == 0)
opt.gap = 1;
} else if (opt.gap > 0) {
fprintf(stderr, "gap specified without reorder probability\n");
explain();
return -1;
}
if (dist_size > 0 && (opt.latency == 0 || opt.jitter == 0)) {
fprintf(stderr, "distribution specified but no latency and jitter values\n");
explain();
return -1;
}
addattr_l(n, 1024, TCA_OPTIONS, &opt, sizeof(opt));
addattr_l(n, 1024, TCA_NETEM_CORR, &cor, sizeof(cor));
addattr_l(n, 1024, TCA_NETEM_REORDER, &reorder, sizeof(reorder));
if (dist_size > 0) {
addattr_l(n, 32768, TCA_NETEM_DELAY_DIST,
......@@ -229,6 +263,7 @@ static int netem_parse_opt(struct qdisc_util *qu, int argc, char **argv,
static int netem_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt)
{
const struct tc_netem_corr *cor = NULL;
const struct tc_netem_reorder *reorder = NULL;
struct tc_netem_qopt qopt;
int len = RTA_PAYLOAD(opt) - sizeof(qopt);
SPRINT_BUF(b1);
......@@ -252,6 +287,11 @@ static int netem_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt)
return -1;
cor = RTA_DATA(tb[TCA_NETEM_CORR]);
}
if (tb[TCA_NETEM_REORDER]) {
if (RTA_PAYLOAD(tb[TCA_NETEM_REORDER]) < sizeof(*reorder))
return -1;
reorder = RTA_DATA(tb[TCA_NETEM_REORDER]);
}
}
fprintf(f, "limit %d", qopt.limit);
......@@ -278,6 +318,14 @@ static int netem_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt)
if (cor && cor->dup_corr)
fprintf(f, " %s", sprint_percent(cor->dup_corr, b1));
}
if (reorder && reorder->probability) {
fprintf(f, " reorder %s",
sprint_percent(reorder->probability, b1));
if (reorder->correlation)
fprintf(f, " %s",
sprint_percent(reorder->correlation, b1));
}
if (qopt.gap)
fprintf(f, " gap %lu", (unsigned long)qopt.gap);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment