Commit 4ee2a8ca authored by Petr Machata's avatar Petr Machata Committed by Jakub Kicinski

net: ipv4: Add a sysctl to set multipath hash seed

When calculating hashes for the purpose of multipath forwarding, both IPv4
and IPv6 code currently fall back on flow_hash_from_keys(). That uses a
randomly-generated seed. That's a fine choice by default, but unfortunately
some deployments may need a tighter control over the seed used.

In this patch, make the seed configurable by adding a new sysctl key,
net.ipv4.fib_multipath_hash_seed to control the seed. This seed is used
specifically for multipath forwarding and not for the other concerns that
flow_hash_from_keys() is used for, such as queue selection. Expose the knob
as sysctl because other such settings, such as headers to hash, are also
handled that way. Like those, the multipath hash seed is a per-netns
variable.

Despite being placed in the net.ipv4 namespace, the multipath seed sysctl
is used for both IPv4 and IPv6, similarly to e.g. a number of TCP
variables.

The seed used by flow_hash_from_keys() is a 128-bit quantity. However it
seems that usually the seed is a much more modest value. 32 bits seem
typical (Cisco, Cumulus), some systems go even lower. For that reason, and
to decouple the user interface from implementation details, go with a
32-bit quantity, which is then quadruplicated to form the siphash key.
Signed-off-by: default avatarPetr Machata <petrm@nvidia.com>
Reviewed-by: default avatarIdo Schimmel <idosch@nvidia.com>
Reviewed-by: default avatarNikolay Aleksandrov <razor@blackwall.org>
Reviewed-by: default avatarDavid Ahern <dsahern@kernel.org>
Link: https://lore.kernel.org/r/20240607151357.421181-3-petrm@nvidia.comSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent 3e453ca1
...@@ -131,6 +131,20 @@ fib_multipath_hash_fields - UNSIGNED INTEGER ...@@ -131,6 +131,20 @@ fib_multipath_hash_fields - UNSIGNED INTEGER
Default: 0x0007 (source IP, destination IP and IP protocol) Default: 0x0007 (source IP, destination IP and IP protocol)
fib_multipath_hash_seed - UNSIGNED INTEGER
The seed value used when calculating hash for multipath routes. Applies
to both IPv4 and IPv6 datapath. Only present for kernels built with
CONFIG_IP_ROUTE_MULTIPATH enabled.
When set to 0, the seed value used for multipath routing defaults to an
internal random-generated one.
The actual hashing algorithm is not specified -- there is no guarantee
that a next hop distribution effected by a given seed will keep stable
across kernel versions.
Default: 0 (random)
fib_sync_mem - UNSIGNED INTEGER fib_sync_mem - UNSIGNED INTEGER
Amount of dirty memory from fib entries that can be backlogged before Amount of dirty memory from fib entries that can be backlogged before
synchronize_rcu is forced. synchronize_rcu is forced.
......
...@@ -442,6 +442,8 @@ static inline bool flow_keys_have_l4(const struct flow_keys *keys) ...@@ -442,6 +442,8 @@ static inline bool flow_keys_have_l4(const struct flow_keys *keys)
} }
u32 flow_hash_from_keys(struct flow_keys *keys); u32 flow_hash_from_keys(struct flow_keys *keys);
u32 flow_hash_from_keys_seed(struct flow_keys *keys,
const siphash_key_t *keyval);
void skb_flow_get_icmp_tci(const struct sk_buff *skb, void skb_flow_get_icmp_tci(const struct sk_buff *skb,
struct flow_dissector_key_icmp *key_icmp, struct flow_dissector_key_icmp *key_icmp,
const void *data, int thoff, int hlen); const void *data, int thoff, int hlen);
......
...@@ -520,13 +520,34 @@ void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig); ...@@ -520,13 +520,34 @@ void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig);
#ifdef CONFIG_IP_ROUTE_MULTIPATH #ifdef CONFIG_IP_ROUTE_MULTIPATH
int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
const struct sk_buff *skb, struct flow_keys *flkeys); const struct sk_buff *skb, struct flow_keys *flkeys);
#endif
static void
fib_multipath_hash_construct_key(siphash_key_t *key, u32 mp_seed)
{
u64 mp_seed_64 = mp_seed;
key->key[0] = (mp_seed_64 << 32) | mp_seed_64;
key->key[1] = key->key[0];
}
static inline u32 fib_multipath_hash_from_keys(const struct net *net,
struct flow_keys *keys)
{
siphash_aligned_key_t hash_key;
u32 mp_seed;
mp_seed = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed).mp_seed;
fib_multipath_hash_construct_key(&hash_key, mp_seed);
return flow_hash_from_keys_seed(keys, &hash_key);
}
#else
static inline u32 fib_multipath_hash_from_keys(const struct net *net, static inline u32 fib_multipath_hash_from_keys(const struct net *net,
struct flow_keys *keys) struct flow_keys *keys)
{ {
return flow_hash_from_keys(keys); return flow_hash_from_keys(keys);
} }
#endif
int fib_check_nh(struct net *net, struct fib_nh *nh, u32 table, u8 scope, int fib_check_nh(struct net *net, struct fib_nh *nh, u32 table, u8 scope,
struct netlink_ext_ack *extack); struct netlink_ext_ack *extack);
......
...@@ -40,6 +40,13 @@ struct inet_timewait_death_row { ...@@ -40,6 +40,13 @@ struct inet_timewait_death_row {
struct tcp_fastopen_context; struct tcp_fastopen_context;
#ifdef CONFIG_IP_ROUTE_MULTIPATH
struct sysctl_fib_multipath_hash_seed {
u32 user_seed;
u32 mp_seed;
};
#endif
struct netns_ipv4 { struct netns_ipv4 {
/* Cacheline organization can be found documented in /* Cacheline organization can be found documented in
* Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst. * Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst.
...@@ -246,6 +253,7 @@ struct netns_ipv4 { ...@@ -246,6 +253,7 @@ struct netns_ipv4 {
#endif #endif
#endif #endif
#ifdef CONFIG_IP_ROUTE_MULTIPATH #ifdef CONFIG_IP_ROUTE_MULTIPATH
struct sysctl_fib_multipath_hash_seed sysctl_fib_multipath_hash_seed;
u32 sysctl_fib_multipath_hash_fields; u32 sysctl_fib_multipath_hash_fields;
u8 sysctl_fib_multipath_use_neigh; u8 sysctl_fib_multipath_use_neigh;
u8 sysctl_fib_multipath_hash_policy; u8 sysctl_fib_multipath_hash_policy;
......
...@@ -1806,6 +1806,13 @@ u32 flow_hash_from_keys(struct flow_keys *keys) ...@@ -1806,6 +1806,13 @@ u32 flow_hash_from_keys(struct flow_keys *keys)
} }
EXPORT_SYMBOL(flow_hash_from_keys); EXPORT_SYMBOL(flow_hash_from_keys);
u32 flow_hash_from_keys_seed(struct flow_keys *keys,
const siphash_key_t *keyval)
{
return __flow_hash_from_keys(keys, keyval);
}
EXPORT_SYMBOL(flow_hash_from_keys_seed);
static inline u32 ___skb_get_hash(const struct sk_buff *skb, static inline u32 ___skb_get_hash(const struct sk_buff *skb,
struct flow_keys *keys, struct flow_keys *keys,
const siphash_key_t *keyval) const siphash_key_t *keyval)
......
...@@ -464,6 +464,61 @@ static int proc_fib_multipath_hash_fields(struct ctl_table *table, int write, ...@@ -464,6 +464,61 @@ static int proc_fib_multipath_hash_fields(struct ctl_table *table, int write,
return ret; return ret;
} }
static u32 proc_fib_multipath_hash_rand_seed __ro_after_init;
static void proc_fib_multipath_hash_init_rand_seed(void)
{
get_random_bytes(&proc_fib_multipath_hash_rand_seed,
sizeof(proc_fib_multipath_hash_rand_seed));
}
static void proc_fib_multipath_hash_set_seed(struct net *net, u32 user_seed)
{
struct sysctl_fib_multipath_hash_seed new = {
.user_seed = user_seed,
.mp_seed = (user_seed ? user_seed :
proc_fib_multipath_hash_rand_seed),
};
WRITE_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed, new);
}
static int proc_fib_multipath_hash_seed(struct ctl_table *table, int write,
void *buffer, size_t *lenp,
loff_t *ppos)
{
struct sysctl_fib_multipath_hash_seed *mphs;
struct net *net = table->data;
struct ctl_table tmp;
u32 user_seed;
int ret;
mphs = &net->ipv4.sysctl_fib_multipath_hash_seed;
user_seed = mphs->user_seed;
tmp = *table;
tmp.data = &user_seed;
ret = proc_douintvec_minmax(&tmp, write, buffer, lenp, ppos);
if (write && ret == 0) {
proc_fib_multipath_hash_set_seed(net, user_seed);
call_netevent_notifiers(NETEVENT_IPV4_MPATH_HASH_UPDATE, net);
}
return ret;
}
#else
static void proc_fib_multipath_hash_init_rand_seed(void)
{
}
static void proc_fib_multipath_hash_set_seed(struct net *net, u32 user_seed)
{
}
#endif #endif
static struct ctl_table ipv4_table[] = { static struct ctl_table ipv4_table[] = {
...@@ -1072,6 +1127,13 @@ static struct ctl_table ipv4_net_table[] = { ...@@ -1072,6 +1127,13 @@ static struct ctl_table ipv4_net_table[] = {
.extra1 = SYSCTL_ONE, .extra1 = SYSCTL_ONE,
.extra2 = &fib_multipath_hash_fields_all_mask, .extra2 = &fib_multipath_hash_fields_all_mask,
}, },
{
.procname = "fib_multipath_hash_seed",
.data = &init_net,
.maxlen = sizeof(u32),
.mode = 0644,
.proc_handler = proc_fib_multipath_hash_seed,
},
#endif #endif
{ {
.procname = "ip_unprivileged_port_start", .procname = "ip_unprivileged_port_start",
...@@ -1550,6 +1612,8 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) ...@@ -1550,6 +1612,8 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
if (!net->ipv4.sysctl_local_reserved_ports) if (!net->ipv4.sysctl_local_reserved_ports)
goto err_ports; goto err_ports;
proc_fib_multipath_hash_set_seed(net, 0);
return 0; return 0;
err_ports: err_ports:
...@@ -1584,6 +1648,8 @@ static __init int sysctl_ipv4_init(void) ...@@ -1584,6 +1648,8 @@ static __init int sysctl_ipv4_init(void)
if (!hdr) if (!hdr)
return -ENOMEM; return -ENOMEM;
proc_fib_multipath_hash_init_rand_seed();
if (register_pernet_subsys(&ipv4_sysctl_ops)) { if (register_pernet_subsys(&ipv4_sysctl_ops)) {
unregister_net_sysctl_table(hdr); unregister_net_sysctl_table(hdr);
return -ENOMEM; return -ENOMEM;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment