Commit 8b1309d9 authored by David S. Miller's avatar David S. Miller

[NET]: Fix hashing exploits in ipv4 routing, IP conntrack, and TCP synq.

Several hash table implementations in the networking were
remotely exploitable.  Remote attackers could launch attacks
whereby, using carefully choosen forged source addresses, make
every routing cache entry get hashed into the same hash chain.

Netfilter's IP conntrack module and the TCP syn-queue implementation
had identical vulnerabilities and have been fixed too.

The choosen solution to the problem involved using Bob's Jenkins
hash along with a randomly choosen input.  For the ipv4 routing
cache we take things one step further and periodically choose a
new random secret.  By default this happens every 10 minutes, but
this is configurable by the user via sysctl knobs.
parent 012c6bdf
#ifndef _LINUX_JHASH_H
#define _LINUX_JHASH_H
/* jhash.h: Jenkins hash support.
*
* Copyright (C) 1996 Bob Jenkins (bob_jenkins@burtleburtle.net)
*
* http://burtleburtle.net/bob/hash/
*
* These are the credits from Bob's sources:
*
* lookup2.c, by Bob Jenkins, December 1996, Public Domain.
* hash(), hash2(), hash3, and mix() are externally useful functions.
* Routines to test the hash are included if SELF_TEST is defined.
* You can use this free for any purpose. It has no warranty.
*
* Copyright (C) 2003 David S. Miller (davem@redhat.com)
*
* I've modified Bob's hash to be useful in the Linux kernel, and
* any bugs present are surely my fault. -DaveM
*/
/* NOTE: Arguments are modified. */
#define __jhash_mix(a, b, c) \
{ \
a -= b; a -= c; a ^= (c>>13); \
b -= c; b -= a; b ^= (a<<8); \
c -= a; c -= b; c ^= (b>>13); \
a -= b; a -= c; a ^= (c>>12); \
b -= c; b -= a; b ^= (a<<16); \
c -= a; c -= b; c ^= (b>>5); \
a -= b; a -= c; a ^= (c>>3); \
b -= c; b -= a; b ^= (a<<10); \
c -= a; c -= b; c ^= (b>>15); \
}
/* The golden ration: an arbitrary value */
#define JHASH_GOLDEN_RATIO 0x9e3779b9
/* The most generic version, hashes an arbitrary sequence
* of bytes. No alignment or length assumptions are made about
* the input key.
*/
static __inline__ u32 jenkins_hash(void *key, u32 length, u32 initval)
{
u32 a, b, c, len;
u8 *k = key;
len = length;
a = b = JHASH_GOLDEN_RATIO;
c = initval;
while (len >= 12) {
a += (k[0] +((u32)k[1]<<8) +((u32)k[2]<<16) +((u32)k[3]<<24));
b += (k[4] +((u32)k[5]<<8) +((u32)k[6]<<16) +((u32)k[7]<<24));
c += (k[8] +((u32)k[9]<<8) +((u32)k[10]<<16)+((u32)k[11]<<24));
__jhash_mix(a,b,c);
k += 12;
len -= 12;
}
c += length;
switch (len) {
case 11: c += ((u32)k[10]<<24);
case 10: c += ((u32)k[9]<<16);
case 9 : c += ((u32)k[8]<<8);
case 8 : b += ((u32)k[7]<<24);
case 7 : b += ((u32)k[6]<<16);
case 6 : b += ((u32)k[5]<<8);
case 5 : b += k[4];
case 4 : a += ((u32)k[3]<<24);
case 3 : a += ((u32)k[2]<<16);
case 2 : a += ((u32)k[1]<<8);
case 1 : a += k[0];
};
__jhash_mix(a,b,c);
return c;
}
/* A special optimized version that handles 1 or more of u32s.
* The length parameter here is the number of u32s in the key.
*/
static __inline__ u32 hash2(u32 *k, u32 length, u32 initval)
{
u32 a, b, c, len;
a = b = JHASH_GOLDEN_RATIO;
c = initval;
len = length;
while (len >= 3) {
a += k[0];
b += k[1];
c += k[2];
__jhash_mix(a, b, c);
k += 3; len -= 3;
}
c += length * 4;
switch (len) {
case 2 : b += k[1];
case 1 : a += k[0];
};
__jhash_mix(a,b,c);
return c;
}
/* A special ultra-optimized versions that knows they are hashing exactly
* 3, 2 or 1 word(s).
*
* NOTE: In partilar the "c += length; __jhash_mix(a,b,c);" normally
* done at the end is not done here.
*/
static __inline__ u32 jenkins_hash_3words(u32 a, u32 b, u32 c,
u32 initval)
{
a += JHASH_GOLDEN_RATIO;
b += JHASH_GOLDEN_RATIO;
c += initval;
__jhash_mix(a, b, c);
return c;
}
static __inline__ u32 jenkins_hash_2words(u32 a, u32 b, u32 initval)
{
u32 c = 0;
a += JHASH_GOLDEN_RATIO;
b += JHASH_GOLDEN_RATIO;
c += initval;
__jhash_mix(a, b, c);
return c;
}
static __inline__ u32 jenkins_hash_1word(u32 a, u32 initval)
{
u32 b = 0;
u32 c = 0;
a += JHASH_GOLDEN_RATIO;
b += JHASH_GOLDEN_RATIO;
c += initval;
__jhash_mix(a, b, c);
return c;
}
#endif /* _LINUX_JHASH_H */
...@@ -327,7 +327,8 @@ enum { ...@@ -327,7 +327,8 @@ enum {
NET_IPV4_ROUTE_GC_ELASTICITY=14, NET_IPV4_ROUTE_GC_ELASTICITY=14,
NET_IPV4_ROUTE_MTU_EXPIRES=15, NET_IPV4_ROUTE_MTU_EXPIRES=15,
NET_IPV4_ROUTE_MIN_PMTU=16, NET_IPV4_ROUTE_MIN_PMTU=16,
NET_IPV4_ROUTE_MIN_ADVMSS=17 NET_IPV4_ROUTE_MIN_ADVMSS=17,
NET_IPV4_ROUTE_SECRET_INTERVAL=18,
}; };
enum enum
......
...@@ -1627,6 +1627,7 @@ struct tcp_listen_opt ...@@ -1627,6 +1627,7 @@ struct tcp_listen_opt
int qlen; int qlen;
int qlen_young; int qlen_young;
int clock_hand; int clock_hand;
u32 hash_rnd;
struct open_request *syn_table[TCP_SYNQ_HSIZE]; struct open_request *syn_table[TCP_SYNQ_HSIZE];
}; };
......
...@@ -28,6 +28,8 @@ ...@@ -28,6 +28,8 @@
#include <linux/stddef.h> #include <linux/stddef.h>
#include <linux/sysctl.h> #include <linux/sysctl.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/random.h>
#include <linux/jhash.h>
/* For ERR_PTR(). Yeah, I know... --RR */ /* For ERR_PTR(). Yeah, I know... --RR */
#include <linux/fs.h> #include <linux/fs.h>
...@@ -104,20 +106,21 @@ ip_conntrack_put(struct ip_conntrack *ct) ...@@ -104,20 +106,21 @@ ip_conntrack_put(struct ip_conntrack *ct)
nf_conntrack_put(&ct->infos[0]); nf_conntrack_put(&ct->infos[0]);
} }
static inline u_int32_t static int ip_conntrack_hash_rnd_initted;
static unsigned int ip_conntrack_hash_rnd;
static u_int32_t
hash_conntrack(const struct ip_conntrack_tuple *tuple) hash_conntrack(const struct ip_conntrack_tuple *tuple)
{ {
#if 0 #if 0
dump_tuple(tuple); dump_tuple(tuple);
#endif #endif
/* ntohl because more differences in low bits. */ return (jenkins_hash_3words(tuple->src.ip,
/* To ensure that halves of the same connection don't hash (tuple->dst.ip ^ tuple->dst.protonum),
clash, we add the source per-proto again. */ (tuple->src.u.all |
return (ntohl(tuple->src.ip + tuple->dst.ip (tuple->dst.u.all << 16)),
+ tuple->src.u.all + tuple->dst.u.all ip_conntrack_hash_rnd)
+ tuple->dst.protonum) % ip_conntrack_htable_size);
+ ntohs(tuple->src.u.all))
% ip_conntrack_htable_size;
} }
int int
...@@ -613,11 +616,16 @@ init_conntrack(const struct ip_conntrack_tuple *tuple, ...@@ -613,11 +616,16 @@ init_conntrack(const struct ip_conntrack_tuple *tuple,
{ {
struct ip_conntrack *conntrack; struct ip_conntrack *conntrack;
struct ip_conntrack_tuple repl_tuple; struct ip_conntrack_tuple repl_tuple;
size_t hash, repl_hash; size_t hash;
struct ip_conntrack_expect *expected; struct ip_conntrack_expect *expected;
int i; int i;
static unsigned int drop_next = 0; static unsigned int drop_next = 0;
if (!ip_conntrack_hash_rnd_initted) {
get_random_bytes(&ip_conntrack_hash_rnd, 4);
ip_conntrack_hash_rnd_initted = 1;
}
hash = hash_conntrack(tuple); hash = hash_conntrack(tuple);
if (ip_conntrack_max && if (ip_conntrack_max &&
...@@ -641,7 +649,6 @@ init_conntrack(const struct ip_conntrack_tuple *tuple, ...@@ -641,7 +649,6 @@ init_conntrack(const struct ip_conntrack_tuple *tuple,
DEBUGP("Can't invert tuple.\n"); DEBUGP("Can't invert tuple.\n");
return NULL; return NULL;
} }
repl_hash = hash_conntrack(&repl_tuple);
conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC); conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC);
if (!conntrack) { if (!conntrack) {
......
...@@ -86,6 +86,7 @@ ...@@ -86,6 +86,7 @@
#include <linux/mroute.h> #include <linux/mroute.h>
#include <linux/netfilter_ipv4.h> #include <linux/netfilter_ipv4.h>
#include <linux/random.h> #include <linux/random.h>
#include <linux/jhash.h>
#include <linux/rcupdate.h> #include <linux/rcupdate.h>
#include <net/protocol.h> #include <net/protocol.h>
#include <net/ip.h> #include <net/ip.h>
...@@ -120,13 +121,14 @@ int ip_rt_gc_elasticity = 8; ...@@ -120,13 +121,14 @@ int ip_rt_gc_elasticity = 8;
int ip_rt_mtu_expires = 10 * 60 * HZ; int ip_rt_mtu_expires = 10 * 60 * HZ;
int ip_rt_min_pmtu = 512 + 20 + 20; int ip_rt_min_pmtu = 512 + 20 + 20;
int ip_rt_min_advmss = 256; int ip_rt_min_advmss = 256;
int ip_rt_secret_interval = 10 * 60 * HZ;
static unsigned long rt_deadline; static unsigned long rt_deadline;
#define RTprint(a...) printk(KERN_DEBUG a) #define RTprint(a...) printk(KERN_DEBUG a)
static struct timer_list rt_flush_timer; static struct timer_list rt_flush_timer;
static struct timer_list rt_periodic_timer; static struct timer_list rt_periodic_timer;
static struct timer_list rt_secret_timer;
/* /*
* Interface to generic destination cache. * Interface to generic destination cache.
...@@ -196,19 +198,17 @@ struct rt_hash_bucket { ...@@ -196,19 +198,17 @@ struct rt_hash_bucket {
static struct rt_hash_bucket *rt_hash_table; static struct rt_hash_bucket *rt_hash_table;
static unsigned rt_hash_mask; static unsigned rt_hash_mask;
static int rt_hash_log; static int rt_hash_log;
static unsigned int rt_hash_rnd;
struct rt_cache_stat *rt_cache_stat; struct rt_cache_stat *rt_cache_stat;
static int rt_intern_hash(unsigned hash, struct rtable *rth, static int rt_intern_hash(unsigned hash, struct rtable *rth,
struct rtable **res); struct rtable **res);
static unsigned rt_hash_code(u32 daddr, u32 saddr, u8 tos) static unsigned int rt_hash_code(u32 daddr, u32 saddr, u8 tos)
{ {
unsigned hash = ((daddr & 0xF0F0F0F0) >> 4) | return (jenkins_hash_3words(daddr, saddr, (u32) tos, rt_hash_rnd)
((daddr & 0x0F0F0F0F) << 4); & rt_hash_mask);
hash ^= saddr ^ tos;
hash ^= (hash >> 16);
return (hash ^ (hash >> 8)) & rt_hash_mask;
} }
#ifdef CONFIG_PROC_FS #ifdef CONFIG_PROC_FS
...@@ -566,6 +566,15 @@ void rt_cache_flush(int delay) ...@@ -566,6 +566,15 @@ void rt_cache_flush(int delay)
spin_unlock_bh(&rt_flush_lock); spin_unlock_bh(&rt_flush_lock);
} }
static void rt_secret_rebuild(unsigned long dummy)
{
unsigned long now = jiffies;
get_random_bytes(&rt_hash_rnd, 4);
rt_cache_flush(0);
mod_timer(&rt_secret_timer, now + ip_rt_secret_interval);
}
/* /*
Short description of GC goals. Short description of GC goals.
...@@ -2553,10 +2562,22 @@ ctl_table ipv4_route_table[] = { ...@@ -2553,10 +2562,22 @@ ctl_table ipv4_route_table[] = {
.mode = 0644, .mode = 0644,
.proc_handler = &proc_dointvec, .proc_handler = &proc_dointvec,
}, },
{
.ctl_name = NET_IPV4_ROUTE_SECRET_INTERVAL,
.procname = "secret_interval",
.data = &ip_rt_secret_interval,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
.strategy = &sysctl_jiffies,
},
{ .ctl_name = 0 } { .ctl_name = 0 }
}; };
#endif #endif
rt_hash_rnd = (int) ((num_physpages ^ (num_physpages>>8)) ^
(jiffies ^ (jiffies >> 7)));
#ifdef CONFIG_NET_CLS_ROUTE #ifdef CONFIG_NET_CLS_ROUTE
struct ip_rt_acct *ip_rt_acct; struct ip_rt_acct *ip_rt_acct;
...@@ -2674,6 +2695,7 @@ int __init ip_rt_init(void) ...@@ -2674,6 +2695,7 @@ int __init ip_rt_init(void)
rt_flush_timer.function = rt_run_flush; rt_flush_timer.function = rt_run_flush;
init_timer(&rt_periodic_timer); init_timer(&rt_periodic_timer);
rt_periodic_timer.function = rt_check_expire; rt_periodic_timer.function = rt_check_expire;
rt_secret_timer.function = rt_secret_rebuild;
/* All the timers, started at system startup tend /* All the timers, started at system startup tend
to synchronize. Perturb it a bit. to synchronize. Perturb it a bit.
...@@ -2682,6 +2704,10 @@ int __init ip_rt_init(void) ...@@ -2682,6 +2704,10 @@ int __init ip_rt_init(void)
ip_rt_gc_interval; ip_rt_gc_interval;
add_timer(&rt_periodic_timer); add_timer(&rt_periodic_timer);
rt_secret_timer.expires = jiffies + net_random() % ip_rt_secret_interval +
ip_rt_secret_interval;
add_timer(&rt_secret_timer);
#ifdef CONFIG_PROC_FS #ifdef CONFIG_PROC_FS
if (rt_cache_proc_init()) if (rt_cache_proc_init())
goto out_enomem; goto out_enomem;
......
...@@ -254,6 +254,7 @@ ...@@ -254,6 +254,7 @@
#include <linux/init.h> #include <linux/init.h>
#include <linux/smp_lock.h> #include <linux/smp_lock.h>
#include <linux/fs.h> #include <linux/fs.h>
#include <linux/random.h>
#include <net/icmp.h> #include <net/icmp.h>
#include <net/tcp.h> #include <net/tcp.h>
...@@ -551,6 +552,7 @@ int tcp_listen_start(struct sock *sk) ...@@ -551,6 +552,7 @@ int tcp_listen_start(struct sock *sk)
for (lopt->max_qlen_log = 6; ; lopt->max_qlen_log++) for (lopt->max_qlen_log = 6; ; lopt->max_qlen_log++)
if ((1 << lopt->max_qlen_log) >= sysctl_max_syn_backlog) if ((1 << lopt->max_qlen_log) >= sysctl_max_syn_backlog)
break; break;
get_random_bytes(&lopt->hash_rnd, 4);
write_lock_bh(&tp->syn_wait_lock); write_lock_bh(&tp->syn_wait_lock);
tp->listen_opt = lopt; tp->listen_opt = lopt;
......
...@@ -58,6 +58,7 @@ ...@@ -58,6 +58,7 @@
#include <linux/fcntl.h> #include <linux/fcntl.h>
#include <linux/random.h> #include <linux/random.h>
#include <linux/cache.h> #include <linux/cache.h>
#include <linux/jhash.h>
#include <linux/init.h> #include <linux/init.h>
#include <net/icmp.h> #include <net/icmp.h>
...@@ -883,12 +884,10 @@ static __inline__ int tcp_v4_iif(struct sk_buff *skb) ...@@ -883,12 +884,10 @@ static __inline__ int tcp_v4_iif(struct sk_buff *skb)
return ((struct rtable *)skb->dst)->rt_iif; return ((struct rtable *)skb->dst)->rt_iif;
} }
static __inline__ unsigned tcp_v4_synq_hash(u32 raddr, u16 rport) static __inline__ u32 tcp_v4_synq_hash(u32 raddr, u16 rport, u32 rnd)
{ {
unsigned h = raddr ^ rport; return (jenkins_hash_2words(raddr, (u32) rport, rnd)
h ^= h >> 16; & (TCP_SYNQ_HSIZE - 1));
h ^= h >> 8;
return h & (TCP_SYNQ_HSIZE - 1);
} }
static struct open_request *tcp_v4_search_req(struct tcp_opt *tp, static struct open_request *tcp_v4_search_req(struct tcp_opt *tp,
...@@ -899,7 +898,7 @@ static struct open_request *tcp_v4_search_req(struct tcp_opt *tp, ...@@ -899,7 +898,7 @@ static struct open_request *tcp_v4_search_req(struct tcp_opt *tp,
struct tcp_listen_opt *lopt = tp->listen_opt; struct tcp_listen_opt *lopt = tp->listen_opt;
struct open_request *req, **prev; struct open_request *req, **prev;
for (prev = &lopt->syn_table[tcp_v4_synq_hash(raddr, rport)]; for (prev = &lopt->syn_table[tcp_v4_synq_hash(raddr, rport, lopt->hash_rnd)];
(req = *prev) != NULL; (req = *prev) != NULL;
prev = &req->dl_next) { prev = &req->dl_next) {
if (req->rmt_port == rport && if (req->rmt_port == rport &&
...@@ -919,7 +918,7 @@ static void tcp_v4_synq_add(struct sock *sk, struct open_request *req) ...@@ -919,7 +918,7 @@ static void tcp_v4_synq_add(struct sock *sk, struct open_request *req)
{ {
struct tcp_opt *tp = tcp_sk(sk); struct tcp_opt *tp = tcp_sk(sk);
struct tcp_listen_opt *lopt = tp->listen_opt; struct tcp_listen_opt *lopt = tp->listen_opt;
unsigned h = tcp_v4_synq_hash(req->af.v4_req.rmt_addr, req->rmt_port); u32 h = tcp_v4_synq_hash(req->af.v4_req.rmt_addr, req->rmt_port, lopt->hash_rnd);
req->expires = jiffies + TCP_TIMEOUT_INIT; req->expires = jiffies + TCP_TIMEOUT_INIT;
req->retrans = 0; req->retrans = 0;
......
...@@ -36,6 +36,7 @@ ...@@ -36,6 +36,7 @@
#include <linux/in6.h> #include <linux/in6.h>
#include <linux/netdevice.h> #include <linux/netdevice.h>
#include <linux/init.h> #include <linux/init.h>
#include <linux/jhash.h>
#include <linux/ipsec.h> #include <linux/ipsec.h>
#include <linux/ipv6.h> #include <linux/ipv6.h>
...@@ -387,12 +388,12 @@ __inline__ struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport, ...@@ -387,12 +388,12 @@ __inline__ struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
* Open request hash tables. * Open request hash tables.
*/ */
static __inline__ unsigned tcp_v6_synq_hash(struct in6_addr *raddr, u16 rport) static u32 tcp_v6_synq_hash(struct in6_addr *raddr, u16 rport, u32 rnd)
{ {
unsigned h = raddr->s6_addr32[3] ^ rport; return (jenkins_hash_3words(raddr->s6_addr32[0] ^ raddr->s6_addr32[1],
h ^= h>>16; raddr->s6_addr32[2] ^ raddr->s6_addr32[3],
h ^= h>>8; (u32) rport, rnd)
return h&(TCP_SYNQ_HSIZE-1); & (TCP_SYNQ_HSIZE - 1));
} }
static struct open_request *tcp_v6_search_req(struct tcp_opt *tp, static struct open_request *tcp_v6_search_req(struct tcp_opt *tp,
...@@ -405,7 +406,7 @@ static struct open_request *tcp_v6_search_req(struct tcp_opt *tp, ...@@ -405,7 +406,7 @@ static struct open_request *tcp_v6_search_req(struct tcp_opt *tp,
struct tcp_listen_opt *lopt = tp->listen_opt; struct tcp_listen_opt *lopt = tp->listen_opt;
struct open_request *req, **prev; struct open_request *req, **prev;
for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport)]; for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
(req = *prev) != NULL; (req = *prev) != NULL;
prev = &req->dl_next) { prev = &req->dl_next) {
if (req->rmt_port == rport && if (req->rmt_port == rport &&
...@@ -1162,7 +1163,7 @@ static void tcp_v6_synq_add(struct sock *sk, struct open_request *req) ...@@ -1162,7 +1163,7 @@ static void tcp_v6_synq_add(struct sock *sk, struct open_request *req)
{ {
struct tcp_opt *tp = tcp_sk(sk); struct tcp_opt *tp = tcp_sk(sk);
struct tcp_listen_opt *lopt = tp->listen_opt; struct tcp_listen_opt *lopt = tp->listen_opt;
unsigned h = tcp_v6_synq_hash(&req->af.v6_req.rmt_addr, req->rmt_port); u32 h = tcp_v6_synq_hash(&req->af.v6_req.rmt_addr, req->rmt_port, lopt->hash_rnd);
req->sk = NULL; req->sk = NULL;
req->expires = jiffies + TCP_TIMEOUT_INIT; req->expires = jiffies + TCP_TIMEOUT_INIT;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment