Commit fb799dd4 authored by David S. Miller's avatar David S. Miller

Merge branch 'ipv6-RT_ONLINK-remove-prep'

Guillaume Nault says:

====================
ipv4: First steps toward removing RTO_ONLINK

RTO_ONLINK is a flag that allows to reduce the scope of route lookups.
It's stored in a normally unused bit of the ->flowi4_tos field, in
struct flowi4. However it has several problems:

 * This bit is also used by ECN. Although ECN bits are supposed to be
   cleared before doing a route lookup, it happened that some code
   paths didn't properly sanitise their ->flowi4_tos. So this mechanism
   is fragile and we had bugs in the past where ECN bits slipped in and
   could end up being erroneously interpreted as RTO_ONLINK.

 * A dscp_t type was recently introduced to ensure ECN bits are cleared
   during route lookups. ->flowi4_tos is the most important structure
   field to convert, but RTO_ONLINK prevents such conversion, as dscp_t
   mandates that ECN bits (where RTO_ONLINK is stored) be zero.

Therefore we need to stop using RTO_ONLINK altogether. Fortunately
RTO_ONLINK isn't a necessity. Instead of passing a flag in ->flowi4_tos
to tell the route lookup function to restrict the scope, we can simply
initialise the scope correctly.

Patch 1 does some preparatory work: it stops resetting ->flowi4_scope
automatically before a route lookup, thus allowing callers to set their
desired scope without having to rely on the RTO_ONLINK flag.

Patch 2-3 convert a few code paths to avoid relying on RTO_ONLINK.

More conversions will have to take place before we can eventually
remove this flag.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents cb1e6bf4 b1ad4138
...@@ -43,6 +43,19 @@ ...@@ -43,6 +43,19 @@
#define RT_CONN_FLAGS(sk) (RT_TOS(inet_sk(sk)->tos) | sock_flag(sk, SOCK_LOCALROUTE)) #define RT_CONN_FLAGS(sk) (RT_TOS(inet_sk(sk)->tos) | sock_flag(sk, SOCK_LOCALROUTE))
#define RT_CONN_FLAGS_TOS(sk,tos) (RT_TOS(tos) | sock_flag(sk, SOCK_LOCALROUTE)) #define RT_CONN_FLAGS_TOS(sk,tos) (RT_TOS(tos) | sock_flag(sk, SOCK_LOCALROUTE))
static inline __u8 ip_sock_rt_scope(const struct sock *sk)
{
if (sock_flag(sk, SOCK_LOCALROUTE))
return RT_SCOPE_LINK;
return RT_SCOPE_UNIVERSE;
}
static inline __u8 ip_sock_rt_tos(const struct sock *sk)
{
return RT_TOS(inet_sk(sk)->tos);
}
struct ip_tunnel_info; struct ip_tunnel_info;
struct fib_nh; struct fib_nh;
struct fib_info; struct fib_info;
...@@ -289,39 +302,38 @@ static inline char rt_tos2priority(u8 tos) ...@@ -289,39 +302,38 @@ static inline char rt_tos2priority(u8 tos)
* ip_route_newports() calls. * ip_route_newports() calls.
*/ */
static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, __be32 src, static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst,
u32 tos, int oif, u8 protocol, __be32 src, int oif, u8 protocol,
__be16 sport, __be16 dport, __be16 sport, __be16 dport,
struct sock *sk) const struct sock *sk)
{ {
__u8 flow_flags = 0; __u8 flow_flags = 0;
if (inet_sk(sk)->transparent) if (inet_sk(sk)->transparent)
flow_flags |= FLOWI_FLAG_ANYSRC; flow_flags |= FLOWI_FLAG_ANYSRC;
flowi4_init_output(fl4, oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, flowi4_init_output(fl4, oif, sk->sk_mark, ip_sock_rt_tos(sk),
protocol, flow_flags, dst, src, dport, sport, ip_sock_rt_scope(sk), protocol, flow_flags, dst,
sk->sk_uid); src, dport, sport, sk->sk_uid);
} }
static inline struct rtable *ip_route_connect(struct flowi4 *fl4, static inline struct rtable *ip_route_connect(struct flowi4 *fl4, __be32 dst,
__be32 dst, __be32 src, u32 tos, __be32 src, int oif, u8 protocol,
int oif, u8 protocol,
__be16 sport, __be16 dport, __be16 sport, __be16 dport,
struct sock *sk) struct sock *sk)
{ {
struct net *net = sock_net(sk); struct net *net = sock_net(sk);
struct rtable *rt; struct rtable *rt;
ip_route_connect_init(fl4, dst, src, tos, oif, protocol, ip_route_connect_init(fl4, dst, src, oif, protocol, sport, dport, sk);
sport, dport, sk);
if (!dst || !src) { if (!dst || !src) {
rt = __ip_route_output_key(net, fl4); rt = __ip_route_output_key(net, fl4);
if (IS_ERR(rt)) if (IS_ERR(rt))
return rt; return rt;
ip_rt_put(rt); ip_rt_put(rt);
flowi4_update_output(fl4, oif, tos, fl4->daddr, fl4->saddr); flowi4_update_output(fl4, oif, fl4->flowi4_tos, fl4->daddr,
fl4->saddr);
} }
security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4)); security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4));
return ip_route_output_flow(net, fl4, sk); return ip_route_output_flow(net, fl4, sk);
......
...@@ -76,9 +76,8 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) ...@@ -76,9 +76,8 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
orig_dport = usin->sin_port; orig_dport = usin->sin_port;
fl4 = &inet->cork.fl.u.ip4; fl4 = &inet->cork.fl.u.ip4;
rt = ip_route_connect(fl4, nexthop, inet->inet_saddr, rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, sk->sk_bound_dev_if, IPPROTO_DCCP, orig_sport,
IPPROTO_DCCP, orig_dport, sk);
orig_sport, orig_dport, sk);
if (IS_ERR(rt)) if (IS_ERR(rt))
return PTR_ERR(rt); return PTR_ERR(rt);
......
...@@ -1233,9 +1233,9 @@ static int inet_sk_reselect_saddr(struct sock *sk) ...@@ -1233,9 +1233,9 @@ static int inet_sk_reselect_saddr(struct sock *sk)
/* Query new route. */ /* Query new route. */
fl4 = &inet->cork.fl.u.ip4; fl4 = &inet->cork.fl.u.ip4;
rt = ip_route_connect(fl4, daddr, 0, RT_CONN_FLAGS(sk), rt = ip_route_connect(fl4, daddr, 0, sk->sk_bound_dev_if,
sk->sk_bound_dev_if, sk->sk_protocol, sk->sk_protocol, inet->inet_sport,
inet->inet_sport, inet->inet_dport, sk); inet->inet_dport, sk);
if (IS_ERR(rt)) if (IS_ERR(rt))
return PTR_ERR(rt); return PTR_ERR(rt);
......
...@@ -44,10 +44,9 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len ...@@ -44,10 +44,9 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
saddr = inet->mc_addr; saddr = inet->mc_addr;
} }
fl4 = &inet->cork.fl.u.ip4; fl4 = &inet->cork.fl.u.ip4;
rt = ip_route_connect(fl4, usin->sin_addr.s_addr, saddr, rt = ip_route_connect(fl4, usin->sin_addr.s_addr, saddr, oif,
RT_CONN_FLAGS(sk), oif, sk->sk_protocol, inet->inet_sport,
sk->sk_protocol, usin->sin_port, sk);
inet->inet_sport, usin->sin_port, sk);
if (IS_ERR(rt)) { if (IS_ERR(rt)) {
err = PTR_ERR(rt); err = PTR_ERR(rt);
if (err == -ENETUNREACH) if (err == -ENETUNREACH)
......
...@@ -503,28 +503,29 @@ static void ip_rt_fix_tos(struct flowi4 *fl4) ...@@ -503,28 +503,29 @@ static void ip_rt_fix_tos(struct flowi4 *fl4)
__u8 tos = RT_FL_TOS(fl4); __u8 tos = RT_FL_TOS(fl4);
fl4->flowi4_tos = tos & IPTOS_RT_MASK; fl4->flowi4_tos = tos & IPTOS_RT_MASK;
fl4->flowi4_scope = tos & RTO_ONLINK ? if (tos & RTO_ONLINK)
RT_SCOPE_LINK : RT_SCOPE_UNIVERSE; fl4->flowi4_scope = RT_SCOPE_LINK;
} }
static void __build_flow_key(const struct net *net, struct flowi4 *fl4, static void __build_flow_key(const struct net *net, struct flowi4 *fl4,
const struct sock *sk, const struct sock *sk, const struct iphdr *iph,
const struct iphdr *iph, int oif, __u8 tos, u8 prot, u32 mark,
int oif, u8 tos, int flow_flags)
u8 prot, u32 mark, int flow_flags)
{ {
__u8 scope = RT_SCOPE_UNIVERSE;
if (sk) { if (sk) {
const struct inet_sock *inet = inet_sk(sk); const struct inet_sock *inet = inet_sk(sk);
oif = sk->sk_bound_dev_if; oif = sk->sk_bound_dev_if;
mark = sk->sk_mark; mark = sk->sk_mark;
tos = RT_CONN_FLAGS(sk); tos = ip_sock_rt_tos(sk);
scope = ip_sock_rt_scope(sk);
prot = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol; prot = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol;
} }
flowi4_init_output(fl4, oif, mark, tos,
RT_SCOPE_UNIVERSE, prot, flowi4_init_output(fl4, oif, mark, tos & IPTOS_RT_MASK, scope,
flow_flags, prot, flow_flags, iph->daddr, iph->saddr, 0, 0,
iph->daddr, iph->saddr, 0, 0,
sock_net_uid(net, sk)); sock_net_uid(net, sk));
} }
...@@ -534,9 +535,9 @@ static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb, ...@@ -534,9 +535,9 @@ static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb,
const struct net *net = dev_net(skb->dev); const struct net *net = dev_net(skb->dev);
const struct iphdr *iph = ip_hdr(skb); const struct iphdr *iph = ip_hdr(skb);
int oif = skb->dev->ifindex; int oif = skb->dev->ifindex;
u8 tos = RT_TOS(iph->tos);
u8 prot = iph->protocol; u8 prot = iph->protocol;
u32 mark = skb->mark; u32 mark = skb->mark;
__u8 tos = iph->tos;
__build_flow_key(net, fl4, sk, iph, oif, tos, prot, mark, 0); __build_flow_key(net, fl4, sk, iph, oif, tos, prot, mark, 0);
} }
...@@ -552,7 +553,8 @@ static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk) ...@@ -552,7 +553,8 @@ static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
if (inet_opt && inet_opt->opt.srr) if (inet_opt && inet_opt->opt.srr)
daddr = inet_opt->opt.faddr; daddr = inet_opt->opt.faddr;
flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, ip_sock_rt_tos(sk) & IPTOS_RT_MASK,
ip_sock_rt_scope(sk),
inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
inet_sk_flowi_flags(sk), inet_sk_flowi_flags(sk),
daddr, inet->inet_saddr, 0, 0, sk->sk_uid); daddr, inet->inet_saddr, 0, 0, sk->sk_uid);
...@@ -825,14 +827,13 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf ...@@ -825,14 +827,13 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf
const struct iphdr *iph = (const struct iphdr *) skb->data; const struct iphdr *iph = (const struct iphdr *) skb->data;
struct net *net = dev_net(skb->dev); struct net *net = dev_net(skb->dev);
int oif = skb->dev->ifindex; int oif = skb->dev->ifindex;
u8 tos = RT_TOS(iph->tos);
u8 prot = iph->protocol; u8 prot = iph->protocol;
u32 mark = skb->mark; u32 mark = skb->mark;
__u8 tos = iph->tos;
rt = (struct rtable *) dst; rt = (struct rtable *) dst;
__build_flow_key(net, &fl4, sk, iph, oif, tos, prot, mark, 0); __build_flow_key(net, &fl4, sk, iph, oif, tos, prot, mark, 0);
ip_rt_fix_tos(&fl4);
__ip_do_redirect(rt, skb, &fl4, true); __ip_do_redirect(rt, skb, &fl4, true);
} }
...@@ -1061,7 +1062,6 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, ...@@ -1061,7 +1062,6 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
struct flowi4 fl4; struct flowi4 fl4;
ip_rt_build_flow_key(&fl4, sk, skb); ip_rt_build_flow_key(&fl4, sk, skb);
ip_rt_fix_tos(&fl4);
/* Don't make lookup fail for bridged encapsulations */ /* Don't make lookup fail for bridged encapsulations */
if (skb && netif_is_any_bridge_port(skb->dev)) if (skb && netif_is_any_bridge_port(skb->dev))
...@@ -1078,8 +1078,8 @@ void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, ...@@ -1078,8 +1078,8 @@ void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
struct rtable *rt; struct rtable *rt;
u32 mark = IP4_REPLY_MARK(net, skb->mark); u32 mark = IP4_REPLY_MARK(net, skb->mark);
__build_flow_key(net, &fl4, NULL, iph, oif, __build_flow_key(net, &fl4, NULL, iph, oif, iph->tos, protocol, mark,
RT_TOS(iph->tos), protocol, mark, 0); 0);
rt = __ip_route_output_key(net, &fl4); rt = __ip_route_output_key(net, &fl4);
if (!IS_ERR(rt)) { if (!IS_ERR(rt)) {
__ip_rt_update_pmtu(rt, &fl4, mtu); __ip_rt_update_pmtu(rt, &fl4, mtu);
...@@ -1136,8 +1136,6 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) ...@@ -1136,8 +1136,6 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
goto out; goto out;
new = true; new = true;
} else {
ip_rt_fix_tos(&fl4);
} }
__ip_rt_update_pmtu((struct rtable *)xfrm_dst_path(&rt->dst), &fl4, mtu); __ip_rt_update_pmtu((struct rtable *)xfrm_dst_path(&rt->dst), &fl4, mtu);
...@@ -1169,8 +1167,7 @@ void ipv4_redirect(struct sk_buff *skb, struct net *net, ...@@ -1169,8 +1167,7 @@ void ipv4_redirect(struct sk_buff *skb, struct net *net,
struct flowi4 fl4; struct flowi4 fl4;
struct rtable *rt; struct rtable *rt;
__build_flow_key(net, &fl4, NULL, iph, oif, __build_flow_key(net, &fl4, NULL, iph, oif, iph->tos, protocol, 0, 0);
RT_TOS(iph->tos), protocol, 0, 0);
rt = __ip_route_output_key(net, &fl4); rt = __ip_route_output_key(net, &fl4);
if (!IS_ERR(rt)) { if (!IS_ERR(rt)) {
__ip_do_redirect(rt, skb, &fl4, false); __ip_do_redirect(rt, skb, &fl4, false);
......
...@@ -229,9 +229,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) ...@@ -229,9 +229,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
orig_dport = usin->sin_port; orig_dport = usin->sin_port;
fl4 = &inet->cork.fl.u.ip4; fl4 = &inet->cork.fl.u.ip4;
rt = ip_route_connect(fl4, nexthop, inet->inet_saddr, rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, sk->sk_bound_dev_if, IPPROTO_TCP, orig_sport,
IPPROTO_TCP, orig_dport, sk);
orig_sport, orig_dport, sk);
if (IS_ERR(rt)) { if (IS_ERR(rt)) {
err = PTR_ERR(rt); err = PTR_ERR(rt);
if (err == -ENETUNREACH) if (err == -ENETUNREACH)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment