Commit 0f634eb3 authored by David S. Miller's avatar David S. Miller

Merge

parents 6a1b925d 04db0420
......@@ -1754,18 +1754,25 @@ settings contain additional options to set garbage collection parameters.
In the interface directories you'll find the following entries:
base_reachable_time
-------------------
base_reachable_time, base_reachable_time_ms
-------------------------------------------
A base value used for computing the random reachable time value as specified
in RFC2461.
retrans_time
------------
Expression of base_reachable_time, which is deprecated, is in seconds.
Expression of base_reachable_time_ms is in milliseconds.
retrans_time, retrans_time_ms
-----------------------------
The time between retransmitted Neighbor Solicitation messages.
Used for address resolution and to determine if a neighbor is
unreachable.
The time, expressed in jiffies (1/100 sec), between retransmitted Neighbor
Solicitation messages. Used for address resolution and to determine if a
neighbor is unreachable.
Expression of retrans_time, which is deprecated, is in 1/100 seconds (for
IPv4) or in jiffies (for IPv6).
Expression of retrans_time_ms is in milliseconds.
unres_qlen
----------
......
......@@ -152,6 +152,7 @@ struct inet_sock {
};
#define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */
#define IPCORK_ALLFRAG 2 /* always fragment (for ipv6 for now) */
static inline struct inet_sock *inet_sk(const struct sock *sk)
{
......
......@@ -346,6 +346,7 @@ enum
#define RTAX_FEATURE_ECN 0x00000001
#define RTAX_FEATURE_SACK 0x00000002
#define RTAX_FEATURE_TIMESTAMP 0x00000004
#define RTAX_FEATURE_ALLFRAG 0x00000008
struct rta_session
{
......
......@@ -399,6 +399,7 @@ enum
NET_IPV4_CONF_FORCE_IGMP_VERSION=17,
NET_IPV4_CONF_ARP_ANNOUNCE=18,
NET_IPV4_CONF_ARP_IGNORE=19,
__NET_IPV4_CONF_MAX
};
/* /proc/sys/net/ipv4/netfilter */
......@@ -456,7 +457,8 @@ enum {
NET_IPV6_ROUTE_GC_INTERVAL=6,
NET_IPV6_ROUTE_GC_ELASTICITY=7,
NET_IPV6_ROUTE_MTU_EXPIRES=8,
NET_IPV6_ROUTE_MIN_ADVMSS=9
NET_IPV6_ROUTE_MIN_ADVMSS=9,
NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS=10
};
enum {
......@@ -476,7 +478,8 @@ enum {
NET_IPV6_REGEN_MAX_RETRY=14,
NET_IPV6_MAX_DESYNC_FACTOR=15,
NET_IPV6_MAX_ADDRESSES=16,
NET_IPV6_FORCE_MLD_VERSION=17
NET_IPV6_FORCE_MLD_VERSION=17,
__NET_IPV6_MAX
};
/* /proc/sys/net/ipv6/icmp */
......@@ -501,7 +504,10 @@ enum {
NET_NEIGH_GC_INTERVAL=13,
NET_NEIGH_GC_THRESH1=14,
NET_NEIGH_GC_THRESH2=15,
NET_NEIGH_GC_THRESH3=16
NET_NEIGH_GC_THRESH3=16,
NET_NEIGH_RETRANS_TIME_MS=17,
NET_NEIGH_REACHABLE_TIME_MS=18,
__NET_NEIGH_MAX
};
/* /proc/sys/net/ipx */
......
......@@ -102,6 +102,8 @@ extern int ipv6_is_mld(struct sk_buff *skb, int nexthdr);
extern void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len);
extern int ipv6_get_hoplimit(struct net_device *dev);
/*
* anycast prototypes (anycast.c)
*/
......
......@@ -134,6 +134,15 @@ static inline u32 dst_mtu(const struct dst_entry *dst)
return mtu;
}
static inline u32
dst_allfrag(const struct dst_entry *dst)
{
int ret = dst_path_metric(dst, RTAX_FEATURES) & RTAX_FEATURE_ALLFRAG;
/* Yes, _exactly_. This is paranoia. */
barrier();
return ret;
}
static inline int
dst_metric_locked(struct dst_entry *dst, int metric)
{
......
......@@ -305,6 +305,32 @@ static inline int ipv6_addr_equal(const struct in6_addr *a1,
a1->s6_addr32[3] == a2->s6_addr32[3]);
}
static inline int __ipv6_prefix_equal(const u32 *a1, const u32 *a2,
unsigned int prefixlen)
{
unsigned pdw, pbi;
/* check complete u32 in prefix */
pdw = prefixlen >> 5;
if (pdw && memcmp(a1, a2, pdw << 2))
return 0;
/* check incomplete u32 in prefix */
pbi = prefixlen & 0x1f;
if (pbi && ((a1[pdw] ^ a2[pdw]) & htonl((0xffffffff) << (32 - pbi))))
return 0;
return 1;
}
static inline int ipv6_prefix_equal(const struct in6_addr *a1,
const struct in6_addr *a2,
unsigned int prefixlen)
{
return __ipv6_prefix_equal(a1->s6_addr32, a2->s6_addr32,
prefixlen);
}
static inline int ipv6_addr_any(const struct in6_addr *a)
{
return ((a->s6_addr32[0] | a->s6_addr32[1] |
......
......@@ -15,11 +15,15 @@
* ndisc options
*/
#define ND_OPT_SOURCE_LL_ADDR 1
#define ND_OPT_TARGET_LL_ADDR 2
#define ND_OPT_PREFIX_INFO 3
#define ND_OPT_REDIRECT_HDR 4
#define ND_OPT_MTU 5
enum {
__ND_OPT_PREFIX_INFO_END = 0,
ND_OPT_SOURCE_LL_ADDR = 1, /* RFC2461 */
ND_OPT_TARGET_LL_ADDR = 2, /* RFC2461 */
ND_OPT_PREFIX_INFO = 3, /* RFC2461 */
ND_OPT_REDIRECT_HDR = 4, /* RFC2461 */
ND_OPT_MTU = 5, /* RFC2461 */
__ND_OPT_MAX
};
#define MAX_RTR_SOLICITATION_DELAY HZ
......
......@@ -274,7 +274,8 @@ extern int neigh_sysctl_register(struct net_device *dev,
struct neigh_parms *p,
int p_id, int pdev_id,
char *p_name,
proc_handler *proc_handler);
proc_handler *proc_handler,
ctl_handler *strategy);
extern void neigh_sysctl_unregister(struct neigh_parms *p);
static inline void __neigh_parms_put(struct neigh_parms *parms)
......
......@@ -107,28 +107,23 @@ source "net/ipv4/Kconfig"
# IPv6 as module will cause a CRASH if you try to unload it
config IPV6
tristate "The IPv6 protocol (EXPERIMENTAL)"
depends on INET && EXPERIMENTAL
tristate "The IPv6 protocol"
depends on INET
select CRYPTO if IPV6_PRIVACY
select CRYPTO_MD5 if IPV6_PRIVACY
---help---
This is experimental support for the IP version 6 (formerly called
IPng "IP next generation"). You will still be able to do
regular IPv4 networking as well.
Features of this new protocol include: expanded address space,
authentication and privacy, and seamless interoperability with the
current version of IP (IP version 4). For general information about
IPv6, see <http://playground.sun.com/pub/ipng/html/ipng-main.html>;
for specific information about IPv6 under Linux read the HOWTO at
<http://www.bieringer.de/linux/IPv6/> and the file net/ipv6/README
in the kernel source.
This is complemental support for the IP version 6.
You will still be able to do traditional IPv4 networking as well.
For general information about IPv6, see
<http://playground.sun.com/pub/ipng/html/ipng-main.html>.
For Linux IPv6 development information, see <http://www.linux-ipv6.org>.
For specific information about IPv6 under Linux, read the HOWTO at
<http://www.bieringer.de/linux/IPv6/>.
To compile this protocol support as a module, choose M here: the
module will be called ipv6.
It is safe to say N here for now.
source "net/ipv6/Kconfig"
menuconfig NETFILTER
......
......@@ -2052,7 +2052,7 @@ static void neigh_app_notify(struct neighbour *n)
static struct neigh_sysctl_table {
struct ctl_table_header *sysctl_header;
ctl_table neigh_vars[17];
ctl_table neigh_vars[__NET_NEIGH_MAX];
ctl_table neigh_dev[2];
ctl_table neigh_neigh_dir[2];
ctl_table neigh_proto_dir[2];
......@@ -2175,6 +2175,22 @@ static struct neigh_sysctl_table {
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{
.ctl_name = NET_NEIGH_RETRANS_TIME_MS,
.procname = "retrans_time_ms",
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec_ms_jiffies,
.strategy = &sysctl_ms_jiffies,
},
{
.ctl_name = NET_NEIGH_REACHABLE_TIME_MS,
.procname = "base_reachable_time_ms",
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec_ms_jiffies,
.strategy = &sysctl_ms_jiffies,
},
},
.neigh_dev = {
{
......@@ -2205,7 +2221,7 @@ static struct neigh_sysctl_table {
int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
int p_id, int pdev_id, char *p_name,
proc_handler *handler)
proc_handler *handler, ctl_handler *strategy)
{
struct neigh_sysctl_table *t = kmalloc(sizeof(*t), GFP_KERNEL);
const char *dev_name_source = NULL;
......@@ -2219,10 +2235,6 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
t->neigh_vars[1].data = &p->ucast_probes;
t->neigh_vars[2].data = &p->app_probes;
t->neigh_vars[3].data = &p->retrans_time;
if (handler) {
t->neigh_vars[3].proc_handler = handler;
t->neigh_vars[3].extra1 = dev;
}
t->neigh_vars[4].data = &p->base_reachable_time;
t->neigh_vars[5].data = &p->delay_probe_time;
t->neigh_vars[6].data = &p->gc_staletime;
......@@ -2232,18 +2244,43 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
t->neigh_vars[10].data = &p->proxy_delay;
t->neigh_vars[11].data = &p->locktime;
dev_name_source = t->neigh_dev[0].procname;
if (dev) {
dev_name_source = dev->name;
t->neigh_dev[0].ctl_name = dev->ifindex;
memset(&t->neigh_vars[12], 0, sizeof(ctl_table));
t->neigh_vars[12].procname = NULL;
t->neigh_vars[13].procname = NULL;
t->neigh_vars[14].procname = NULL;
t->neigh_vars[15].procname = NULL;
} else {
dev_name_source = t->neigh_dev[0].procname;
t->neigh_vars[12].data = (int *)(p + 1);
t->neigh_vars[13].data = (int *)(p + 1) + 1;
t->neigh_vars[14].data = (int *)(p + 1) + 2;
t->neigh_vars[15].data = (int *)(p + 1) + 3;
}
t->neigh_vars[16].data = &p->retrans_time;
t->neigh_vars[17].data = &p->base_reachable_time;
if (handler || strategy) {
/* RetransTime */
t->neigh_vars[3].proc_handler = handler;
t->neigh_vars[3].strategy = strategy;
t->neigh_vars[3].extra1 = dev;
/* ReachableTime */
t->neigh_vars[4].proc_handler = handler;
t->neigh_vars[4].strategy = strategy;
t->neigh_vars[4].extra1 = dev;
/* RetransTime (in milliseconds)*/
t->neigh_vars[16].proc_handler = handler;
t->neigh_vars[16].strategy = strategy;
t->neigh_vars[16].extra1 = dev;
/* ReachableTime (in milliseconds) */
t->neigh_vars[17].proc_handler = handler;
t->neigh_vars[17].strategy = strategy;
t->neigh_vars[17].extra1 = dev;
}
dev_name = net_sysctl_strdup(dev_name_source);
if (!dev_name) {
err = -ENOBUFS;
......
......@@ -1243,7 +1243,7 @@ void __init arp_init(void)
arp_proc_init();
#ifdef CONFIG_SYSCTL
neigh_sysctl_register(NULL, &arp_tbl.parms, NET_IPV4,
NET_IPV4_NEIGH, "ipv4", NULL);
NET_IPV4_NEIGH, "ipv4", NULL, NULL);
#endif
register_netdevice_notifier(&arp_netdev_notifier);
}
......
......@@ -153,7 +153,7 @@ struct in_device *inetdev_init(struct net_device *dev)
dev_hold(dev);
#ifdef CONFIG_SYSCTL
neigh_sysctl_register(dev, in_dev->arp_parms, NET_IPV4,
NET_IPV4_NEIGH, "ipv4", NULL);
NET_IPV4_NEIGH, "ipv4", NULL, NULL);
#endif
/* Account for reference dev->ip_ptr */
......@@ -1001,7 +1001,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
devinet_sysctl_unregister(&in_dev->cnf);
neigh_sysctl_unregister(in_dev->arp_parms);
neigh_sysctl_register(dev, in_dev->arp_parms, NET_IPV4,
NET_IPV4_NEIGH, "ipv4", NULL);
NET_IPV4_NEIGH, "ipv4", NULL, NULL);
devinet_sysctl_register(in_dev, &in_dev->cnf);
#endif
break;
......@@ -1221,7 +1221,7 @@ int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
static struct devinet_sysctl_table {
struct ctl_table_header *sysctl_header;
ctl_table devinet_vars[20];
ctl_table devinet_vars[__NET_IPV4_CONF_MAX];
ctl_table devinet_dev[2];
ctl_table devinet_conf_dir[2];
ctl_table devinet_proto_dir[2];
......
To join in the work on Linux IPv6 send mail to:
majordomo@oss.sgi.com
and in the body of the message include:
subscribe netdev
......@@ -391,7 +391,9 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
ndev->tstamp = jiffies;
#ifdef CONFIG_SYSCTL
neigh_sysctl_register(dev, ndev->nd_parms, NET_IPV6,
NET_IPV6_NEIGH, "ipv6", &ndisc_ifinfo_sysctl_change);
NET_IPV6_NEIGH, "ipv6",
&ndisc_ifinfo_sysctl_change,
NULL);
addrconf_sysctl_register(ndev, &ndev->cnf);
#endif
}
......@@ -589,6 +591,8 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
struct inet6_ifaddr *ifa, **ifap;
struct inet6_dev *idev = ifp->idev;
int hash;
int deleted = 0, onlink = 0;
unsigned long expires = jiffies;
hash = ipv6_addr_hash(&ifp->addr);
......@@ -631,7 +635,31 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
*ifap = ifa->if_next;
__in6_ifa_put(ifp);
ifa->if_next = NULL;
break;
if (!(ifp->flags & IFA_F_PERMANENT) || onlink > 0)
break;
deleted = 1;
} else if (ifp->flags & IFA_F_PERMANENT) {
if (ipv6_prefix_equal(&ifa->addr, &ifp->addr,
ifp->prefix_len)) {
if (ifa->flags & IFA_F_PERMANENT) {
onlink = 1;
if (deleted)
break;
} else {
unsigned long lifetime;
if (!onlink)
onlink = -1;
spin_lock(&ifa->lock);
lifetime = min_t(unsigned long,
ifa->valid_lft, 0x7fffffffUL/HZ);
if (time_before(expires,
ifa->tstamp + lifetime * HZ))
expires = ifa->tstamp + lifetime * HZ;
spin_unlock(&ifa->lock);
}
}
}
}
write_unlock_bh(&idev->lock);
......@@ -642,6 +670,40 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
addrconf_del_timer(ifp);
/*
* Purge or update corresponding prefix
*
* 1) we don't purge prefix here if address was not permanent.
* prefix is managed by its own lifetime.
* 2) if there're no addresses, delete prefix.
* 3) if there're still other permanent address(es),
* corresponding prefix is still permanent.
* 4) otherwise, update prefix lifetime to the
* longest valid lifetime among the corresponding
* addresses on the device.
* Note: subsequent RA will update lifetime.
*
* --yoshfuji
*/
if ((ifp->flags & IFA_F_PERMANENT) && onlink < 1) {
struct in6_addr prefix;
struct rt6_info *rt;
ipv6_addr_prefix(&prefix, &ifp->addr, ifp->prefix_len);
rt = rt6_lookup(&prefix, NULL, ifp->idev->dev->ifindex, 1);
if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) {
if (onlink == 0) {
ip6_del_rt(rt, NULL, NULL);
rt = NULL;
} else if (!(rt->rt6i_flags & RTF_EXPIRES)) {
rt->rt6i_expires = expires;
rt->rt6i_flags |= RTF_EXPIRES;
}
}
dst_release(&rt->u.dst);
}
in6_ifa_put(ifp);
}
......@@ -1982,7 +2044,10 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
if (idev) {
addrconf_sysctl_unregister(&idev->cnf);
neigh_sysctl_unregister(idev->nd_parms);
neigh_sysctl_register(dev, idev->nd_parms, NET_IPV6, NET_IPV6_NEIGH, "ipv6", &ndisc_ifinfo_sysctl_change);
neigh_sysctl_register(dev, idev->nd_parms,
NET_IPV6, NET_IPV6_NEIGH, "ipv6",
&ndisc_ifinfo_sysctl_change,
NULL);
addrconf_sysctl_register(idev, &idev->cnf);
}
#endif
......@@ -3150,7 +3215,7 @@ static int addrconf_sysctl_forward_strategy(ctl_table *table,
static struct addrconf_sysctl_table
{
struct ctl_table_header *sysctl_header;
ctl_table addrconf_vars[18];
ctl_table addrconf_vars[__NET_IPV6_MAX];
ctl_table addrconf_dev[2];
ctl_table addrconf_conf_dir[2];
ctl_table addrconf_proto_dir[2];
......
......@@ -48,32 +48,6 @@ static int ipv6_dev_ac_dec(struct net_device *dev, struct in6_addr *addr);
/* Big ac list lock for all the sockets */
static DEFINE_RWLOCK(ipv6_sk_ac_lock);
/* XXX ip6_addr_match() and ip6_onlink() really belong in net/core.c */
static int
ip6_addr_match(struct in6_addr *addr1, struct in6_addr *addr2, int prefix)
{
__u32 mask;
int i;
if (prefix > 128 || prefix < 0)
return 0;
if (prefix == 0)
return 1;
for (i=0; i<4; ++i) {
if (prefix >= 32)
mask = ~0;
else
mask = htonl(~0 << (32 - prefix));
if ((addr1->s6_addr32[i] ^ addr2->s6_addr32[i]) & mask)
return 0;
prefix -= 32;
if (prefix <= 0)
break;
}
return 1;
}
static int
ip6_onlink(struct in6_addr *addr, struct net_device *dev)
{
......@@ -87,8 +61,8 @@ ip6_onlink(struct in6_addr *addr, struct net_device *dev)
if (idev) {
read_lock_bh(&idev->lock);
for (ifa=idev->addr_list; ifa; ifa=ifa->if_next) {
onlink = ip6_addr_match(addr, &ifa->addr,
ifa->prefix_len);
onlink = ipv6_prefix_equal(addr, &ifa->addr,
ifa->prefix_len);
if (onlink)
break;
}
......
......@@ -381,6 +381,8 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
hlimit = np->hop_limit;
if (hlimit < 0)
hlimit = dst_metric(dst, RTAX_HOPLIMIT);
if (hlimit < 0)
hlimit = ipv6_get_hoplimit(dst->dev);
msg.skb = skb;
msg.offset = skb->nh.raw - skb->data;
......@@ -467,6 +469,8 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
hlimit = np->hop_limit;
if (hlimit < 0)
hlimit = dst_metric(dst, RTAX_HOPLIMIT);
if (hlimit < 0)
hlimit = ipv6_get_hoplimit(dst->dev);
idev = in6_dev_get(skb->dev);
......
......@@ -116,36 +116,6 @@ static __inline__ u32 fib6_new_sernum(void)
* 64bit processors)
*/
/*
* compare "prefix length" bits of an address
*/
static __inline__ int addr_match(void *token1, void *token2, int prefixlen)
{
__u32 *a1 = token1;
__u32 *a2 = token2;
int pdw;
int pbi;
pdw = prefixlen >> 5; /* num of whole __u32 in prefix */
pbi = prefixlen & 0x1f; /* num of bits in incomplete u32 in prefix */
if (pdw)
if (memcmp(a1, a2, pdw << 2))
return 0;
if (pbi) {
__u32 mask;
mask = htonl((0xffffffff) << (32 - pbi));
if ((a1[pdw] ^ a2[pdw]) & mask)
return 0;
}
return 1;
}
/*
* test bit
*/
......@@ -261,7 +231,7 @@ static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr,
* Prefix match
*/
if (plen < fn->fn_bit ||
!addr_match(&key->addr, addr, fn->fn_bit))
!ipv6_prefix_equal(&key->addr, addr, fn->fn_bit))
goto insert_above;
/*
......@@ -667,7 +637,7 @@ static struct fib6_node * fib6_lookup_1(struct fib6_node *root,
key = (struct rt6key *) ((u8 *) fn->leaf +
args->offset);
if (addr_match(&key->addr, args->addr, key->plen))
if (ipv6_prefix_equal(&key->addr, args->addr, key->plen))
return fn;
}
......@@ -718,7 +688,7 @@ static struct fib6_node * fib6_locate_1(struct fib6_node *root,
* Prefix match
*/
if (plen < fn->fn_bit ||
!addr_match(&key->addr, addr, fn->fn_bit))
!ipv6_prefix_equal(&key->addr, addr, fn->fn_bit))
return NULL;
if (plen == fn->fn_bit)
......@@ -1211,7 +1181,7 @@ void fib6_run_gc(unsigned long dummy)
{
if (dummy != ~0UL) {
spin_lock_bh(&fib6_gc_lock);
gc_args.timeout = (int)dummy;
gc_args.timeout = dummy ? (int)dummy : ip6_rt_gc_interval;
} else {
local_bh_disable();
if (!spin_trylock(&fib6_gc_lock)) {
......
......@@ -147,7 +147,7 @@ static int ip6_output2(struct sk_buff *skb)
int ip6_output(struct sk_buff *skb)
{
if (skb->len > dst_pmtu(skb->dst))
if (skb->len > dst_pmtu(skb->dst) || dst_allfrag(skb->dst))
return ip6_fragment(skb, ip6_output2);
else
return ip6_output2(skb);
......@@ -253,6 +253,8 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
hlimit = np->hop_limit;
if (hlimit < 0)
hlimit = dst_metric(dst, RTAX_HOPLIMIT);
if (hlimit < 0)
hlimit = ipv6_get_hoplimit(dst->dev);
hdr->payload_len = htons(seg_len);
hdr->nexthdr = proto;
......@@ -848,6 +850,8 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offse
inet->cork.fl = *fl;
np->cork.hop_limit = hlimit;
inet->cork.fragsize = mtu = dst_pmtu(&rt->u.dst);
if (dst_allfrag(&rt->u.dst))
inet->cork.flags |= IPCORK_ALLFRAG;
inet->cork.length = 0;
sk->sk_sndmsg_page = NULL;
sk->sk_sndmsg_off = 0;
......@@ -899,7 +903,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offse
while (length > 0) {
/* Check if the remaining data fits into current packet. */
copy = mtu - skb->len;
copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
if (copy < length)
copy = maxfraglen - skb->len;
......@@ -924,7 +928,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offse
* we know we need more fragment(s).
*/
datalen = length + fraggap;
if (datalen > mtu - fragheaderlen)
if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
datalen = maxfraglen - fragheaderlen;
fraglen = datalen + fragheaderlen;
......@@ -1158,6 +1162,7 @@ int ip6_push_pending_frames(struct sock *sk)
if (np->cork.rt) {
dst_release(&np->cork.rt->u.dst);
np->cork.rt = NULL;
inet->cork.flags &= ~IPCORK_ALLFRAG;
}
memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
return err;
......@@ -1185,6 +1190,7 @@ void ip6_flush_pending_frames(struct sock *sk)
if (np->cork.rt) {
dst_release(&np->cork.rt->u.dst);
np->cork.rt = NULL;
inet->cork.flags &= ~IPCORK_ALLFRAG;
}
memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
}
This diff is collapsed.
......@@ -2,8 +2,8 @@
# IP netfilter configuration
#
menu "IPv6: Netfilter Configuration"
depends on INET && IPV6 && NETFILTER
menu "IPv6: Netfilter Configuration (EXPERIMENTAL)"
depends on INET && IPV6 && NETFILTER && EXPERIMENTAL
#tristate 'Connection tracking (required for masq/NAT)' CONFIG_IP6_NF_CONNTRACK
#if [ "$CONFIG_IP6_NF_CONNTRACK" != "n" ]; then
......
......@@ -756,6 +756,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
hlimit = np->hop_limit;
if (hlimit < 0)
hlimit = dst_metric(dst, RTAX_HOPLIMIT);
if (hlimit < 0)
hlimit = ipv6_get_hoplimit(dst->dev);
}
if (msg->msg_flags&MSG_CONFIRM)
......
......@@ -628,8 +628,10 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
if (mtu < dst_pmtu(dst) && rt6->rt6i_dst.plen == 128) {
rt6->rt6i_flags |= RTF_MODIFIED;
if (mtu < IPV6_MIN_MTU)
if (mtu < IPV6_MIN_MTU) {
mtu = IPV6_MIN_MTU;
dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
}
dst->metrics[RTAX_MTU-1] = mtu;
}
}
......@@ -769,7 +771,7 @@ static int ipv6_get_mtu(struct net_device *dev)
return mtu;
}
static int ipv6_get_hoplimit(struct net_device *dev)
int ipv6_get_hoplimit(struct net_device *dev)
{
int hoplimit = ipv6_devconf.hop_limit;
struct inet6_dev *idev;
......@@ -965,15 +967,8 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
}
}
if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0) {
if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr))
rt->u.dst.metrics[RTAX_HOPLIMIT-1] =
IPV6_DEFAULT_MCASTHOPS;
else
rt->u.dst.metrics[RTAX_HOPLIMIT-1] =
ipv6_get_hoplimit(dev);
}
if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
if (!rt->u.dst.metrics[RTAX_MTU-1])
rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
......@@ -1164,26 +1159,26 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
struct net_device *dev, u32 pmtu)
{
struct rt6_info *rt, *nrt;
if (pmtu < IPV6_MIN_MTU) {
if (net_ratelimit())
printk(KERN_DEBUG "rt6_pmtu_discovery: invalid MTU value %d\n",
pmtu);
/* According to RFC1981, the PMTU is set to the IPv6 minimum
link MTU if the node receives a Packet Too Big message
reporting next-hop MTU that is less than the IPv6 minimum MTU.
*/
pmtu = IPV6_MIN_MTU;
}
int allfrag = 0;
rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
if (rt == NULL)
return;
if (pmtu >= dst_pmtu(&rt->u.dst))
goto out;
if (pmtu < IPV6_MIN_MTU) {
/*
* According to RFC2460, PMTU is set to the IPv6 Minimum Link
* MTU (1280) and a fragment header should always be included
* after a node receiving Too Big message reporting PMTU is
* less than the IPv6 Minimum Link MTU.
*/
pmtu = IPV6_MIN_MTU;
allfrag = 1;
}
/* New mtu received -> path was valid.
They are sent only in response to data packets,
so that this nexthop apparently is reachable. --ANK
......@@ -1197,6 +1192,8 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
*/
if (rt->rt6i_flags & RTF_CACHE) {
rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
if (allfrag)
rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
goto out;
......@@ -1211,6 +1208,8 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
nrt = rt6_cow(rt, daddr, saddr);
if (!nrt->u.dst.error) {
nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
if (allfrag)
nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
/* According to RFC 1981, detecting PMTU increase shouldn't be
happened within 5 mins, the recommended timer is 10 mins.
Here this route expiration time is set to ip6_rt_mtu_expires
......@@ -1232,6 +1231,8 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES;
nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
if (allfrag)
nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
ip6_ins_rt(nrt, NULL, NULL);
}
......@@ -1406,7 +1407,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
rt->rt6i_idev = idev;
rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst));
rt->u.dst.metrics[RTAX_HOPLIMIT-1] = ipv6_get_hoplimit(rt->rt6i_dev);
rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
rt->u.dst.obsolete = -1;
rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
......@@ -1993,9 +1994,7 @@ int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
{
if (write) {
proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
if (flush_delay < 0)
flush_delay = 0;
fib6_run_gc((unsigned long)flush_delay);
fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
return 0;
} else
return -EINVAL;
......@@ -2080,6 +2079,15 @@ ctl_table ipv6_route_table[] = {
.proc_handler = &proc_dointvec_jiffies,
.strategy = &sysctl_jiffies,
},
{
.ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
.procname = "gc_min_interval_ms",
.data = &ip6_rt_gc_min_interval,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec_ms_jiffies,
.strategy = &sysctl_ms_jiffies,
},
{ .ctl_name = 0 }
};
......
......@@ -811,6 +811,8 @@ static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
hlimit = np->hop_limit;
if (hlimit < 0)
hlimit = dst_metric(dst, RTAX_HOPLIMIT);
if (hlimit < 0)
hlimit = ipv6_get_hoplimit(dst->dev);
}
if (msg->msg_flags&MSG_CONFIRM)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment