Commit 6b2efdc4 authored by Paolo Abeni's avatar Paolo Abeni

Merge branch 'preparations-for-fib-rule-dscp-selector'

Ido Schimmel says:

====================
Preparations for FIB rule DSCP selector

This patchset moves the masking of the upper DSCP bits in 'flowi4_tos'
to the core instead of relying on callers of the FIB lookup API to do
it.

This will allow us to start changing users of the API to initialize the
'flowi4_tos' field with all six bits of the DSCP field. In turn, this
will allow us to extend FIB rules with a new DSCP selector.

By masking the upper DSCP bits in the core we are able to maintain the
behavior of the TOS selector in FIB rules and routes to only match on
the lower DSCP bits.

While working on this I found two users of the API that do not mask the
upper DSCP bits before performing the lookup. The first is an ancient
netlink family that is unlikely to be used. It is adjusted in patch #1
to mask both the upper DSCP bits and the ECN bits before calling the
API.

The second user is a nftables module that differs in this regard from
its equivalent iptables module. It is adjusted in patch #2 to invoke the
API with the upper DSCP bits masked, like all other callers. The
relevant selftest passed, but in the unlikely case that regressions are
reported because of this change, we can restore the existing behavior
using a new flow information flag as discussed here [1].

The last patch moves the masking of the upper DSCP bits to the core,
making the first two patches redundant, but I wanted to post them
separately to call attention to the behavior change for these two users
of the FIB lookup API.

Future patchsets (around 3) will start unmasking the upper DSCP bits
throughout the networking stack before adding support for the new FIB
rule DSCP selector.

Changes from v1 [2]:

Patch #3: Include <linux/ip.h> in <linux/in_route.h> instead of
including it in net/ip_fib.h

[1] https://lore.kernel.org/netdev/ZpqpB8vJU%2FQ6LSqa@debian/
[2] https://lore.kernel.org/netdev/20240725131729.1729103-1-idosch@nvidia.com/
====================

Link: https://patch.msgid.link/20240814125224.972815-1-idosch@nvidia.comSigned-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
parents ccb445ae 1fa3314c
......@@ -22,6 +22,7 @@
#include <linux/percpu.h>
#include <linux/notifier.h>
#include <linux/refcount.h>
#include <linux/in_route.h>
struct fib_config {
u8 fc_dst_len;
......@@ -434,6 +435,11 @@ static inline bool fib4_rules_early_flow_dissect(struct net *net,
#endif /* CONFIG_IP_MULTIPLE_TABLES */
static inline bool fib_dscp_masked_match(dscp_t dscp, const struct flowi4 *fl4)
{
return dscp == inet_dsfield_to_dscp(RT_TOS(fl4->flowi4_tos));
}
/* Exported by fib_frontend.c */
extern const struct nla_policy rtm_ipv4_policy[];
void ip_fib_init(void);
......
......@@ -2,6 +2,8 @@
#ifndef _LINUX_IN_ROUTE_H
#define _LINUX_IN_ROUTE_H
#include <linux/ip.h>
/* IPv4 routing cache flags */
#define RTCF_DEAD RTNH_F_DEAD
......
......@@ -1343,7 +1343,7 @@ static void nl_fib_lookup(struct net *net, struct fib_result_nl *frn)
struct flowi4 fl4 = {
.flowi4_mark = frn->fl_mark,
.daddr = frn->fl_addr,
.flowi4_tos = frn->fl_tos,
.flowi4_tos = frn->fl_tos & IPTOS_RT_MASK,
.flowi4_scope = frn->fl_scope,
};
struct fib_table *tb;
......
......@@ -186,7 +186,7 @@ INDIRECT_CALLABLE_SCOPE int fib4_rule_match(struct fib_rule *rule,
((daddr ^ r->dst) & r->dstmask))
return 0;
if (r->dscp && r->dscp != inet_dsfield_to_dscp(fl4->flowi4_tos))
if (r->dscp && !fib_dscp_masked_match(r->dscp, fl4))
return 0;
if (rule->ip_proto && (rule->ip_proto != fl4->flowi4_proto))
......
......@@ -2066,8 +2066,7 @@ static void fib_select_default(const struct flowi4 *flp, struct fib_result *res)
if (fa->fa_slen != slen)
continue;
if (fa->fa_dscp &&
fa->fa_dscp != inet_dsfield_to_dscp(flp->flowi4_tos))
if (fa->fa_dscp && !fib_dscp_masked_match(fa->fa_dscp, flp))
continue;
if (fa->tb_id != tb->tb_id)
continue;
......
......@@ -1580,8 +1580,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
if (index >= (1ul << fa->fa_slen))
continue;
}
if (fa->fa_dscp &&
inet_dscp_to_dsfield(fa->fa_dscp) != flp->flowi4_tos)
if (fa->fa_dscp && !fib_dscp_masked_match(fa->fa_dscp, flp))
continue;
/* Paired with WRITE_ONCE() in fib_release_info() */
if (READ_ONCE(fi->fib_dead))
......
......@@ -22,8 +22,6 @@ static __be32 get_saddr(__be32 addr)
return addr;
}
#define DSCP_BITS 0xfc
void nft_fib4_eval_type(const struct nft_expr *expr, struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
......@@ -110,7 +108,7 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
if (priv->flags & NFTA_FIB_F_MARK)
fl4.flowi4_mark = pkt->skb->mark;
fl4.flowi4_tos = iph->tos & DSCP_BITS;
fl4.flowi4_tos = iph->tos & IPTOS_RT_MASK;
if (priv->flags & NFTA_FIB_F_DADDR) {
fl4.daddr = iph->daddr;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment