Commit 59c820b2 authored by Willem de Bruijn's avatar Willem de Bruijn Committed by David S. Miller

ipv6: elide flowlabel check if no exclusive leases exist

Processes can request ipv6 flowlabels with cmsg IPV6_FLOWINFO.
If not set, by default an autogenerated flowlabel is selected.

Explicit flowlabels require a control operation per label plus a
datapath check on every connection (every datagram if unconnected).
This is particularly expensive on unconnected sockets multiplexing
many flows, such as QUIC.

In the common case, where no lease is exclusive, the check can be
safely elided, as both lease request and check trivially succeed.
Indeed, autoflowlabel does the same even with exclusive leases.

Elide the check if no process has requested an exclusive lease.

fl6_sock_lookup previously returns either a reference to a lease or
NULL to denote failure. Modify to return a real error and update
all callers. On return NULL, they can use the label and will elide
the atomic_dec in fl6_sock_release.

This is an optimization. Robust applications still have to revert to
requesting leases if the fast path fails due to an exclusive lease.

Changes RFC->v1:
  - use static_key_false_deferred to rate limit jump label operations
    - call static_key_deferred_flush to stop timers on exit
  - move decrement out of RCU context
  - defer optimization also if opt data is associated with a lease
  - updated all fp6_sock_lookup callers, not just udp
Signed-off-by: default avatarWillem de Bruijn <willemb@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent ee4f56f4
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
#include <linux/hardirq.h> #include <linux/hardirq.h>
#include <linux/jhash.h> #include <linux/jhash.h>
#include <linux/refcount.h> #include <linux/refcount.h>
#include <linux/jump_label_ratelimit.h>
#include <net/if_inet6.h> #include <net/if_inet6.h>
#include <net/ndisc.h> #include <net/ndisc.h>
#include <net/flow.h> #include <net/flow.h>
...@@ -389,7 +390,18 @@ static inline void txopt_put(struct ipv6_txoptions *opt) ...@@ -389,7 +390,18 @@ static inline void txopt_put(struct ipv6_txoptions *opt)
kfree_rcu(opt, rcu); kfree_rcu(opt, rcu);
} }
struct ip6_flowlabel *fl6_sock_lookup(struct sock *sk, __be32 label); struct ip6_flowlabel *__fl6_sock_lookup(struct sock *sk, __be32 label);
extern struct static_key_false_deferred ipv6_flowlabel_exclusive;
static inline struct ip6_flowlabel *fl6_sock_lookup(struct sock *sk,
__be32 label)
{
if (static_branch_unlikely(&ipv6_flowlabel_exclusive.key))
return __fl6_sock_lookup(sk, label) ? : ERR_PTR(-ENOENT);
return NULL;
}
struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions *opt_space, struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions *opt_space,
struct ip6_flowlabel *fl, struct ip6_flowlabel *fl,
struct ipv6_txoptions *fopt); struct ipv6_txoptions *fopt);
......
...@@ -830,7 +830,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, ...@@ -830,7 +830,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
if (fl6.flowlabel & IPV6_FLOWLABEL_MASK) { if (fl6.flowlabel & IPV6_FLOWLABEL_MASK) {
struct ip6_flowlabel *flowlabel; struct ip6_flowlabel *flowlabel;
flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
if (flowlabel == NULL) if (IS_ERR(flowlabel))
return -EINVAL; return -EINVAL;
fl6_sock_release(flowlabel); fl6_sock_release(flowlabel);
} }
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/export.h> #include <linux/export.h>
#include <linux/pid_namespace.h> #include <linux/pid_namespace.h>
#include <linux/jump_label_ratelimit.h>
#include <net/net_namespace.h> #include <net/net_namespace.h>
#include <net/sock.h> #include <net/sock.h>
...@@ -53,6 +54,9 @@ static DEFINE_SPINLOCK(ip6_fl_lock); ...@@ -53,6 +54,9 @@ static DEFINE_SPINLOCK(ip6_fl_lock);
static DEFINE_SPINLOCK(ip6_sk_fl_lock); static DEFINE_SPINLOCK(ip6_sk_fl_lock);
DEFINE_STATIC_KEY_DEFERRED_FALSE(ipv6_flowlabel_exclusive, HZ);
EXPORT_SYMBOL(ipv6_flowlabel_exclusive);
#define for_each_fl_rcu(hash, fl) \ #define for_each_fl_rcu(hash, fl) \
for (fl = rcu_dereference_bh(fl_ht[(hash)]); \ for (fl = rcu_dereference_bh(fl_ht[(hash)]); \
fl != NULL; \ fl != NULL; \
...@@ -90,6 +94,13 @@ static struct ip6_flowlabel *fl_lookup(struct net *net, __be32 label) ...@@ -90,6 +94,13 @@ static struct ip6_flowlabel *fl_lookup(struct net *net, __be32 label)
return fl; return fl;
} }
static bool fl_shared_exclusive(struct ip6_flowlabel *fl)
{
return fl->share == IPV6_FL_S_EXCL ||
fl->share == IPV6_FL_S_PROCESS ||
fl->share == IPV6_FL_S_USER;
}
static void fl_free_rcu(struct rcu_head *head) static void fl_free_rcu(struct rcu_head *head)
{ {
struct ip6_flowlabel *fl = container_of(head, struct ip6_flowlabel, rcu); struct ip6_flowlabel *fl = container_of(head, struct ip6_flowlabel, rcu);
...@@ -103,8 +114,13 @@ static void fl_free_rcu(struct rcu_head *head) ...@@ -103,8 +114,13 @@ static void fl_free_rcu(struct rcu_head *head)
static void fl_free(struct ip6_flowlabel *fl) static void fl_free(struct ip6_flowlabel *fl)
{ {
if (fl) if (!fl)
call_rcu(&fl->rcu, fl_free_rcu); return;
if (fl_shared_exclusive(fl) || fl->opt)
static_branch_slow_dec_deferred(&ipv6_flowlabel_exclusive);
call_rcu(&fl->rcu, fl_free_rcu);
} }
static void fl_release(struct ip6_flowlabel *fl) static void fl_release(struct ip6_flowlabel *fl)
...@@ -240,7 +256,7 @@ static struct ip6_flowlabel *fl_intern(struct net *net, ...@@ -240,7 +256,7 @@ static struct ip6_flowlabel *fl_intern(struct net *net,
/* Socket flowlabel lists */ /* Socket flowlabel lists */
struct ip6_flowlabel *fl6_sock_lookup(struct sock *sk, __be32 label) struct ip6_flowlabel *__fl6_sock_lookup(struct sock *sk, __be32 label)
{ {
struct ipv6_fl_socklist *sfl; struct ipv6_fl_socklist *sfl;
struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk);
...@@ -260,7 +276,7 @@ struct ip6_flowlabel *fl6_sock_lookup(struct sock *sk, __be32 label) ...@@ -260,7 +276,7 @@ struct ip6_flowlabel *fl6_sock_lookup(struct sock *sk, __be32 label)
rcu_read_unlock_bh(); rcu_read_unlock_bh();
return NULL; return NULL;
} }
EXPORT_SYMBOL_GPL(fl6_sock_lookup); EXPORT_SYMBOL_GPL(__fl6_sock_lookup);
void fl6_free_socklist(struct sock *sk) void fl6_free_socklist(struct sock *sk)
{ {
...@@ -419,6 +435,8 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq, ...@@ -419,6 +435,8 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
} }
fl->dst = freq->flr_dst; fl->dst = freq->flr_dst;
atomic_set(&fl->users, 1); atomic_set(&fl->users, 1);
if (fl_shared_exclusive(fl) || fl->opt)
static_branch_deferred_inc(&ipv6_flowlabel_exclusive);
switch (fl->share) { switch (fl->share) {
case IPV6_FL_S_EXCL: case IPV6_FL_S_EXCL:
case IPV6_FL_S_ANY: case IPV6_FL_S_ANY:
...@@ -854,6 +872,7 @@ int ip6_flowlabel_init(void) ...@@ -854,6 +872,7 @@ int ip6_flowlabel_init(void)
void ip6_flowlabel_cleanup(void) void ip6_flowlabel_cleanup(void)
{ {
static_key_deferred_flush(&ipv6_flowlabel_exclusive);
del_timer(&ip6_fl_gc_timer); del_timer(&ip6_fl_gc_timer);
unregister_pernet_subsys(&ip6_flowlabel_net_ops); unregister_pernet_subsys(&ip6_flowlabel_net_ops);
} }
...@@ -834,7 +834,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ...@@ -834,7 +834,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
fl6.flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; fl6.flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
if (!flowlabel) if (IS_ERR(flowlabel))
return -EINVAL; return -EINVAL;
} }
} }
...@@ -876,7 +876,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ...@@ -876,7 +876,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
} }
if ((fl6.flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) { if ((fl6.flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) {
flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
if (!flowlabel) if (IS_ERR(flowlabel))
return -EINVAL; return -EINVAL;
} }
if (!(opt->opt_nflen|opt->opt_flen)) if (!(opt->opt_nflen|opt->opt_flen))
......
...@@ -171,7 +171,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, ...@@ -171,7 +171,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
struct ip6_flowlabel *flowlabel; struct ip6_flowlabel *flowlabel;
flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
if (!flowlabel) if (IS_ERR(flowlabel))
return -EINVAL; return -EINVAL;
fl6_sock_release(flowlabel); fl6_sock_release(flowlabel);
} }
......
...@@ -1319,7 +1319,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ...@@ -1319,7 +1319,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
fl6.flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; fl6.flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
if (!flowlabel) if (IS_ERR(flowlabel))
return -EINVAL; return -EINVAL;
} }
} }
...@@ -1371,7 +1371,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ...@@ -1371,7 +1371,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
} }
if ((fl6.flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) { if ((fl6.flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) {
flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
if (!flowlabel) if (IS_ERR(flowlabel))
return -EINVAL; return -EINVAL;
} }
if (!(opt->opt_nflen|opt->opt_flen)) if (!(opt->opt_nflen|opt->opt_flen))
......
...@@ -536,7 +536,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ...@@ -536,7 +536,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
fl6.flowlabel = lsa->l2tp_flowinfo & IPV6_FLOWINFO_MASK; fl6.flowlabel = lsa->l2tp_flowinfo & IPV6_FLOWINFO_MASK;
if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
if (flowlabel == NULL) if (IS_ERR(flowlabel))
return -EINVAL; return -EINVAL;
} }
} }
...@@ -577,7 +577,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ...@@ -577,7 +577,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
} }
if ((fl6.flowlabel & IPV6_FLOWLABEL_MASK) && !flowlabel) { if ((fl6.flowlabel & IPV6_FLOWLABEL_MASK) && !flowlabel) {
flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
if (flowlabel == NULL) if (IS_ERR(flowlabel))
return -EINVAL; return -EINVAL;
} }
if (!(opt->opt_nflen|opt->opt_flen)) if (!(opt->opt_nflen|opt->opt_flen))
......
...@@ -253,7 +253,7 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, ...@@ -253,7 +253,7 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
struct ip6_flowlabel *flowlabel; struct ip6_flowlabel *flowlabel;
flowlabel = fl6_sock_lookup(sk, fl6->flowlabel); flowlabel = fl6_sock_lookup(sk, fl6->flowlabel);
if (!flowlabel) if (IS_ERR(flowlabel))
goto out; goto out;
fl6_sock_release(flowlabel); fl6_sock_release(flowlabel);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment