Commit 7dc02d7f authored by Jakub Kicinski's avatar Jakub Kicinski

Merge git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf

Pablo Neira Ayuso says:

====================
Netfilter fixes for net

1) Reduce number of hardware offload retries from flowtable datapath
   which might hog system with retries, from Felix Fietkau.

2) Skip neighbour lookup for PPPoE device, fill_forward_path() already
   provides this and set on destination address from fill_forward_path for
   PPPoE device, also from Felix.

4) When combining PPPoE on top of a VLAN device, set info->outdev to the
   PPPoE device so software offload works, from Felix.

5) Fix TCP teardown flowtable state, races with conntrack gc might result
   in resetting the state to ESTABLISHED and the time to one day. Joint
   work with Oz Shlomo and Sven Auhagen.

6) Call dst_check() from flowtable datapath to check if dst is stale
   instead of doing it from garbage collector path.

7) Disable register tracking infrastructure, either user-space or
   kernel need to pre-fetch keys inconditionally, otherwise register
   tracking assumes data is already available in register that might
   not well be there, leading to incorrect reductions.

* git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf:
  netfilter: nf_tables: disable expression reduction infra
  netfilter: flowtable: move dst_check to packet path
  netfilter: flowtable: fix TCP flow teardown
  netfilter: nft_flow_offload: fix offload with pppoe + vlan
  net: fix dev_fill_forward_path with pppoe + bridge
  netfilter: nft_flow_offload: skip dst neigh lookup for ppp devices
  netfilter: flowtable: fix excessive hw offload attempts after failure
====================

Link: https://lore.kernel.org/r/20220518213841.359653-1-pablo@netfilter.orgSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 6fd45e79 9e539c5b
...@@ -988,6 +988,7 @@ static int pppoe_fill_forward_path(struct net_device_path_ctx *ctx, ...@@ -988,6 +988,7 @@ static int pppoe_fill_forward_path(struct net_device_path_ctx *ctx,
path->encap.proto = htons(ETH_P_PPP_SES); path->encap.proto = htons(ETH_P_PPP_SES);
path->encap.id = be16_to_cpu(po->num); path->encap.id = be16_to_cpu(po->num);
memcpy(path->encap.h_dest, po->pppoe_pa.remote, ETH_ALEN); memcpy(path->encap.h_dest, po->pppoe_pa.remote, ETH_ALEN);
memcpy(ctx->daddr, po->pppoe_pa.remote, ETH_ALEN);
path->dev = ctx->dev; path->dev = ctx->dev;
ctx->dev = dev; ctx->dev = dev;
......
...@@ -900,7 +900,7 @@ struct net_device_path_stack { ...@@ -900,7 +900,7 @@ struct net_device_path_stack {
struct net_device_path_ctx { struct net_device_path_ctx {
const struct net_device *dev; const struct net_device *dev;
const u8 *daddr; u8 daddr[ETH_ALEN];
int num_vlans; int num_vlans;
struct { struct {
......
...@@ -681,11 +681,11 @@ int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr, ...@@ -681,11 +681,11 @@ int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
const struct net_device *last_dev; const struct net_device *last_dev;
struct net_device_path_ctx ctx = { struct net_device_path_ctx ctx = {
.dev = dev, .dev = dev,
.daddr = daddr,
}; };
struct net_device_path *path; struct net_device_path *path;
int ret = 0; int ret = 0;
memcpy(ctx.daddr, daddr, sizeof(ctx.daddr));
stack->num_paths = 0; stack->num_paths = 0;
while (ctx.dev && ctx.dev->netdev_ops->ndo_fill_forward_path) { while (ctx.dev && ctx.dev->netdev_ops->ndo_fill_forward_path) {
last_dev = ctx.dev; last_dev = ctx.dev;
......
...@@ -179,12 +179,11 @@ EXPORT_SYMBOL_GPL(flow_offload_route_init); ...@@ -179,12 +179,11 @@ EXPORT_SYMBOL_GPL(flow_offload_route_init);
static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp) static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
{ {
tcp->state = TCP_CONNTRACK_ESTABLISHED;
tcp->seen[0].td_maxwin = 0; tcp->seen[0].td_maxwin = 0;
tcp->seen[1].td_maxwin = 0; tcp->seen[1].td_maxwin = 0;
} }
static void flow_offload_fixup_ct_timeout(struct nf_conn *ct) static void flow_offload_fixup_ct(struct nf_conn *ct)
{ {
struct net *net = nf_ct_net(ct); struct net *net = nf_ct_net(ct);
int l4num = nf_ct_protonum(ct); int l4num = nf_ct_protonum(ct);
...@@ -193,7 +192,9 @@ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct) ...@@ -193,7 +192,9 @@ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
if (l4num == IPPROTO_TCP) { if (l4num == IPPROTO_TCP) {
struct nf_tcp_net *tn = nf_tcp_pernet(net); struct nf_tcp_net *tn = nf_tcp_pernet(net);
timeout = tn->timeouts[TCP_CONNTRACK_ESTABLISHED]; flow_offload_fixup_tcp(&ct->proto.tcp);
timeout = tn->timeouts[ct->proto.tcp.state];
timeout -= tn->offload_timeout; timeout -= tn->offload_timeout;
} else if (l4num == IPPROTO_UDP) { } else if (l4num == IPPROTO_UDP) {
struct nf_udp_net *tn = nf_udp_pernet(net); struct nf_udp_net *tn = nf_udp_pernet(net);
...@@ -211,18 +212,6 @@ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct) ...@@ -211,18 +212,6 @@ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
WRITE_ONCE(ct->timeout, nfct_time_stamp + timeout); WRITE_ONCE(ct->timeout, nfct_time_stamp + timeout);
} }
static void flow_offload_fixup_ct_state(struct nf_conn *ct)
{
if (nf_ct_protonum(ct) == IPPROTO_TCP)
flow_offload_fixup_tcp(&ct->proto.tcp);
}
static void flow_offload_fixup_ct(struct nf_conn *ct)
{
flow_offload_fixup_ct_state(ct);
flow_offload_fixup_ct_timeout(ct);
}
static void flow_offload_route_release(struct flow_offload *flow) static void flow_offload_route_release(struct flow_offload *flow)
{ {
nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL); nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
...@@ -335,8 +324,10 @@ void flow_offload_refresh(struct nf_flowtable *flow_table, ...@@ -335,8 +324,10 @@ void flow_offload_refresh(struct nf_flowtable *flow_table,
u32 timeout; u32 timeout;
timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow); timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
if (READ_ONCE(flow->timeout) != timeout) if (timeout - READ_ONCE(flow->timeout) > HZ)
WRITE_ONCE(flow->timeout, timeout); WRITE_ONCE(flow->timeout, timeout);
else
return;
if (likely(!nf_flowtable_hw_offload(flow_table))) if (likely(!nf_flowtable_hw_offload(flow_table)))
return; return;
...@@ -359,22 +350,14 @@ static void flow_offload_del(struct nf_flowtable *flow_table, ...@@ -359,22 +350,14 @@ static void flow_offload_del(struct nf_flowtable *flow_table,
rhashtable_remove_fast(&flow_table->rhashtable, rhashtable_remove_fast(&flow_table->rhashtable,
&flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node, &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
nf_flow_offload_rhash_params); nf_flow_offload_rhash_params);
clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status);
if (nf_flow_has_expired(flow))
flow_offload_fixup_ct(flow->ct);
else
flow_offload_fixup_ct_timeout(flow->ct);
flow_offload_free(flow); flow_offload_free(flow);
} }
void flow_offload_teardown(struct flow_offload *flow) void flow_offload_teardown(struct flow_offload *flow)
{ {
clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status);
set_bit(NF_FLOW_TEARDOWN, &flow->flags); set_bit(NF_FLOW_TEARDOWN, &flow->flags);
flow_offload_fixup_ct(flow->ct);
flow_offload_fixup_ct_state(flow->ct);
} }
EXPORT_SYMBOL_GPL(flow_offload_teardown); EXPORT_SYMBOL_GPL(flow_offload_teardown);
...@@ -438,33 +421,12 @@ nf_flow_table_iterate(struct nf_flowtable *flow_table, ...@@ -438,33 +421,12 @@ nf_flow_table_iterate(struct nf_flowtable *flow_table,
return err; return err;
} }
static bool flow_offload_stale_dst(struct flow_offload_tuple *tuple)
{
struct dst_entry *dst;
if (tuple->xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
tuple->xmit_type == FLOW_OFFLOAD_XMIT_XFRM) {
dst = tuple->dst_cache;
if (!dst_check(dst, tuple->dst_cookie))
return true;
}
return false;
}
static bool nf_flow_has_stale_dst(struct flow_offload *flow)
{
return flow_offload_stale_dst(&flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple) ||
flow_offload_stale_dst(&flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple);
}
static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table, static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table,
struct flow_offload *flow, void *data) struct flow_offload *flow, void *data)
{ {
if (nf_flow_has_expired(flow) || if (nf_flow_has_expired(flow) ||
nf_ct_is_dying(flow->ct) || nf_ct_is_dying(flow->ct))
nf_flow_has_stale_dst(flow)) flow_offload_teardown(flow);
set_bit(NF_FLOW_TEARDOWN, &flow->flags);
if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) { if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) {
if (test_bit(NF_FLOW_HW, &flow->flags)) { if (test_bit(NF_FLOW_HW, &flow->flags)) {
......
...@@ -248,6 +248,15 @@ static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) ...@@ -248,6 +248,15 @@ static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
return true; return true;
} }
static inline bool nf_flow_dst_check(struct flow_offload_tuple *tuple)
{
if (tuple->xmit_type != FLOW_OFFLOAD_XMIT_NEIGH &&
tuple->xmit_type != FLOW_OFFLOAD_XMIT_XFRM)
return true;
return dst_check(tuple->dst_cache, tuple->dst_cookie);
}
static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb, static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
const struct nf_hook_state *state, const struct nf_hook_state *state,
struct dst_entry *dst) struct dst_entry *dst)
...@@ -367,6 +376,11 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, ...@@ -367,6 +376,11 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
if (nf_flow_state_check(flow, iph->protocol, skb, thoff)) if (nf_flow_state_check(flow, iph->protocol, skb, thoff))
return NF_ACCEPT; return NF_ACCEPT;
if (!nf_flow_dst_check(&tuplehash->tuple)) {
flow_offload_teardown(flow);
return NF_ACCEPT;
}
if (skb_try_make_writable(skb, thoff + hdrsize)) if (skb_try_make_writable(skb, thoff + hdrsize))
return NF_DROP; return NF_DROP;
...@@ -624,6 +638,11 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, ...@@ -624,6 +638,11 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
if (nf_flow_state_check(flow, ip6h->nexthdr, skb, thoff)) if (nf_flow_state_check(flow, ip6h->nexthdr, skb, thoff))
return NF_ACCEPT; return NF_ACCEPT;
if (!nf_flow_dst_check(&tuplehash->tuple)) {
flow_offload_teardown(flow);
return NF_ACCEPT;
}
if (skb_try_make_writable(skb, thoff + hdrsize)) if (skb_try_make_writable(skb, thoff + hdrsize))
return NF_DROP; return NF_DROP;
......
...@@ -8342,16 +8342,7 @@ EXPORT_SYMBOL_GPL(nf_tables_trans_destroy_flush_work); ...@@ -8342,16 +8342,7 @@ EXPORT_SYMBOL_GPL(nf_tables_trans_destroy_flush_work);
static bool nft_expr_reduce(struct nft_regs_track *track, static bool nft_expr_reduce(struct nft_regs_track *track,
const struct nft_expr *expr) const struct nft_expr *expr)
{ {
if (!expr->ops->reduce) { return false;
pr_warn_once("missing reduce for expression %s ",
expr->ops->type->name);
return false;
}
if (nft_reduce_is_readonly(expr))
return false;
return expr->ops->reduce(track, expr);
} }
static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *chain) static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *chain)
......
...@@ -36,6 +36,15 @@ static void nft_default_forward_path(struct nf_flow_route *route, ...@@ -36,6 +36,15 @@ static void nft_default_forward_path(struct nf_flow_route *route,
route->tuple[dir].xmit_type = nft_xmit_type(dst_cache); route->tuple[dir].xmit_type = nft_xmit_type(dst_cache);
} }
static bool nft_is_valid_ether_device(const struct net_device *dev)
{
if (!dev || (dev->flags & IFF_LOOPBACK) || dev->type != ARPHRD_ETHER ||
dev->addr_len != ETH_ALEN || !is_valid_ether_addr(dev->dev_addr))
return false;
return true;
}
static int nft_dev_fill_forward_path(const struct nf_flow_route *route, static int nft_dev_fill_forward_path(const struct nf_flow_route *route,
const struct dst_entry *dst_cache, const struct dst_entry *dst_cache,
const struct nf_conn *ct, const struct nf_conn *ct,
...@@ -47,6 +56,9 @@ static int nft_dev_fill_forward_path(const struct nf_flow_route *route, ...@@ -47,6 +56,9 @@ static int nft_dev_fill_forward_path(const struct nf_flow_route *route,
struct neighbour *n; struct neighbour *n;
u8 nud_state; u8 nud_state;
if (!nft_is_valid_ether_device(dev))
goto out;
n = dst_neigh_lookup(dst_cache, daddr); n = dst_neigh_lookup(dst_cache, daddr);
if (!n) if (!n)
return -1; return -1;
...@@ -60,6 +72,7 @@ static int nft_dev_fill_forward_path(const struct nf_flow_route *route, ...@@ -60,6 +72,7 @@ static int nft_dev_fill_forward_path(const struct nf_flow_route *route,
if (!(nud_state & NUD_VALID)) if (!(nud_state & NUD_VALID))
return -1; return -1;
out:
return dev_fill_forward_path(dev, ha, stack); return dev_fill_forward_path(dev, ha, stack);
} }
...@@ -78,15 +91,6 @@ struct nft_forward_info { ...@@ -78,15 +91,6 @@ struct nft_forward_info {
enum flow_offload_xmit_type xmit_type; enum flow_offload_xmit_type xmit_type;
}; };
static bool nft_is_valid_ether_device(const struct net_device *dev)
{
if (!dev || (dev->flags & IFF_LOOPBACK) || dev->type != ARPHRD_ETHER ||
dev->addr_len != ETH_ALEN || !is_valid_ether_addr(dev->dev_addr))
return false;
return true;
}
static void nft_dev_path_info(const struct net_device_path_stack *stack, static void nft_dev_path_info(const struct net_device_path_stack *stack,
struct nft_forward_info *info, struct nft_forward_info *info,
unsigned char *ha, struct nf_flowtable *flowtable) unsigned char *ha, struct nf_flowtable *flowtable)
...@@ -119,7 +123,8 @@ static void nft_dev_path_info(const struct net_device_path_stack *stack, ...@@ -119,7 +123,8 @@ static void nft_dev_path_info(const struct net_device_path_stack *stack,
info->indev = NULL; info->indev = NULL;
break; break;
} }
info->outdev = path->dev; if (!info->outdev)
info->outdev = path->dev;
info->encap[info->num_encaps].id = path->encap.id; info->encap[info->num_encaps].id = path->encap.id;
info->encap[info->num_encaps].proto = path->encap.proto; info->encap[info->num_encaps].proto = path->encap.proto;
info->num_encaps++; info->num_encaps++;
...@@ -293,7 +298,8 @@ static void nft_flow_offload_eval(const struct nft_expr *expr, ...@@ -293,7 +298,8 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
case IPPROTO_TCP: case IPPROTO_TCP:
tcph = skb_header_pointer(pkt->skb, nft_thoff(pkt), tcph = skb_header_pointer(pkt->skb, nft_thoff(pkt),
sizeof(_tcph), &_tcph); sizeof(_tcph), &_tcph);
if (unlikely(!tcph || tcph->fin || tcph->rst)) if (unlikely(!tcph || tcph->fin || tcph->rst ||
!nf_conntrack_tcp_established(ct)))
goto out; goto out;
break; break;
case IPPROTO_UDP: case IPPROTO_UDP:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment