Commit 37fb1c81 authored by David S. Miller's avatar David S. Miller

Merge tag 'nf-next-23-10-18' of https://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next

Florian Westphal says:

====================
netfilter next pull request 2023-10-18

This series contains initial netfilter skb drop_reason support, from
myself.

First few patches fix up a few spots to make sure we won't trip
when followup patches embed error numbers in the upper bits
(we already do this in some places).

Then, nftables and bridge netfilter get converted to call kfree_skb_reason
directly to let tooling pinpoint exact location of packet drops,
rather than the existing NF_DROP catchall in nf_hook_slow().

I would like to eventually convert all netfilter modules, but as some
callers cannot deal with NF_STOLEN (notably act_ct), more preparation
work is needed for this.

Last patch gets rid of an ugly 'de-const' cast in nftables.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 810799a0 25600167
......@@ -22,6 +22,16 @@ static inline int NF_DROP_GETERR(int verdict)
return -(verdict >> NF_VERDICT_QBITS);
}
static __always_inline int
NF_DROP_REASON(struct sk_buff *skb, enum skb_drop_reason reason, u32 err)
{
BUILD_BUG_ON(err > 0xffff);
kfree_skb_reason(skb, reason);
return ((err << 16) | NF_STOLEN);
}
static inline int nf_inet_addr_cmp(const union nf_inet_addr *a1,
const union nf_inet_addr *a2)
{
......
......@@ -462,7 +462,7 @@ struct nft_set_ops {
const struct nft_set *set,
const struct nft_set_elem *elem,
unsigned int flags);
void (*commit)(const struct nft_set *set);
void (*commit)(struct nft_set *set);
void (*abort)(const struct nft_set *set);
u64 (*privsize)(const struct nlattr * const nla[],
const struct nft_set_desc *desc);
......
......@@ -486,11 +486,11 @@ static unsigned int br_nf_pre_routing(void *priv,
struct brnf_net *brnet;
if (unlikely(!pskb_may_pull(skb, len)))
return NF_DROP;
return NF_DROP_REASON(skb, SKB_DROP_REASON_PKT_TOO_SMALL, 0);
p = br_port_get_rcu(state->in);
if (p == NULL)
return NF_DROP;
return NF_DROP_REASON(skb, SKB_DROP_REASON_DEV_READY, 0);
br = p->br;
brnet = net_generic(state->net, brnf_net_id);
......@@ -501,7 +501,7 @@ static unsigned int br_nf_pre_routing(void *priv,
return NF_ACCEPT;
if (!ipv6_mod_enabled()) {
pr_warn_once("Module ipv6 is disabled, so call_ip6tables is not supported.");
return NF_DROP;
return NF_DROP_REASON(skb, SKB_DROP_REASON_IPV6DISABLED, 0);
}
nf_bridge_pull_encap_header_rcsum(skb);
......@@ -518,12 +518,12 @@ static unsigned int br_nf_pre_routing(void *priv,
nf_bridge_pull_encap_header_rcsum(skb);
if (br_validate_ipv4(state->net, skb))
return NF_DROP;
return NF_DROP_REASON(skb, SKB_DROP_REASON_IP_INHDR, 0);
if (!nf_bridge_alloc(skb))
return NF_DROP;
return NF_DROP_REASON(skb, SKB_DROP_REASON_NOMEM, 0);
if (!setup_pre_routing(skb, state->net))
return NF_DROP;
return NF_DROP_REASON(skb, SKB_DROP_REASON_DEV_READY, 0);
nf_bridge = nf_bridge_info_get(skb);
nf_bridge->ipv4_daddr = ip_hdr(skb)->daddr;
......@@ -590,15 +590,15 @@ static unsigned int br_nf_forward_ip(void *priv,
/* Need exclusive nf_bridge_info since we might have multiple
* different physoutdevs. */
if (!nf_bridge_unshare(skb))
return NF_DROP;
return NF_DROP_REASON(skb, SKB_DROP_REASON_NOMEM, 0);
nf_bridge = nf_bridge_info_get(skb);
if (!nf_bridge)
return NF_DROP;
return NF_DROP_REASON(skb, SKB_DROP_REASON_NOMEM, 0);
parent = bridge_parent(state->out);
if (!parent)
return NF_DROP;
return NF_DROP_REASON(skb, SKB_DROP_REASON_DEV_READY, 0);
if (IS_IP(skb) || is_vlan_ip(skb, state->net) ||
is_pppoe_ip(skb, state->net))
......@@ -618,13 +618,13 @@ static unsigned int br_nf_forward_ip(void *priv,
if (pf == NFPROTO_IPV4) {
if (br_validate_ipv4(state->net, skb))
return NF_DROP;
return NF_DROP_REASON(skb, SKB_DROP_REASON_IP_INHDR, 0);
IPCB(skb)->frag_max_size = nf_bridge->frag_max_size;
}
if (pf == NFPROTO_IPV6) {
if (br_validate_ipv6(state->net, skb))
return NF_DROP;
return NF_DROP_REASON(skb, SKB_DROP_REASON_IP_INHDR, 0);
IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size;
}
......@@ -666,7 +666,7 @@ static unsigned int br_nf_forward_arp(void *priv,
}
if (unlikely(!pskb_may_pull(skb, sizeof(struct arphdr))))
return NF_DROP;
return NF_DROP_REASON(skb, SKB_DROP_REASON_PKT_TOO_SMALL, 0);
if (arp_hdr(skb)->ar_pln != 4) {
if (is_vlan_arp(skb, state->net))
......@@ -831,7 +831,7 @@ static unsigned int br_nf_post_routing(void *priv,
return NF_ACCEPT;
if (!realoutdev)
return NF_DROP;
return NF_DROP_REASON(skb, SKB_DROP_REASON_DEV_READY, 0);
if (IS_IP(skb) || is_vlan_ip(skb, state->net) ||
is_pppoe_ip(skb, state->net))
......
......@@ -161,13 +161,13 @@ unsigned int br_nf_pre_routing_ipv6(void *priv,
struct nf_bridge_info *nf_bridge;
if (br_validate_ipv6(state->net, skb))
return NF_DROP;
return NF_DROP_REASON(skb, SKB_DROP_REASON_IP_INHDR, 0);
nf_bridge = nf_bridge_alloc(skb);
if (!nf_bridge)
return NF_DROP;
return NF_DROP_REASON(skb, SKB_DROP_REASON_NOMEM, 0);
if (!setup_pre_routing(skb, state->net))
return NF_DROP;
return NF_DROP_REASON(skb, SKB_DROP_REASON_DEV_READY, 0);
nf_bridge = nf_bridge_info_get(skb);
nf_bridge->ipv6_daddr = ipv6_hdr(skb)->daddr;
......
......@@ -36,12 +36,12 @@ static const struct xt_table packet_mangler = {
static unsigned int
ipt_mangle_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *state)
{
unsigned int ret;
unsigned int ret, verdict;
const struct iphdr *iph;
u_int8_t tos;
__be32 saddr, daddr;
u_int32_t mark;
u32 mark;
int err;
u8 tos;
/* Save things which could affect route */
mark = skb->mark;
......@@ -51,8 +51,9 @@ ipt_mangle_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *stat
tos = iph->tos;
ret = ipt_do_table(priv, skb, state);
verdict = ret & NF_VERDICT_MASK;
/* Reroute for ANY change. */
if (ret != NF_DROP && ret != NF_STOLEN) {
if (verdict != NF_DROP && verdict != NF_STOLEN) {
iph = ip_hdr(skb);
if (iph->saddr != saddr ||
......
......@@ -31,10 +31,10 @@ static const struct xt_table packet_mangler = {
static unsigned int
ip6t_mangle_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *state)
{
unsigned int ret;
struct in6_addr saddr, daddr;
u_int8_t hop_limit;
u_int32_t flowlabel, mark;
unsigned int ret, verdict;
u32 flowlabel, mark;
u8 hop_limit;
int err;
/* save source/dest address, mark, hoplimit, flowlabel, priority, */
......@@ -47,8 +47,9 @@ ip6t_mangle_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *sta
flowlabel = *((u_int32_t *)ipv6_hdr(skb));
ret = ip6t_do_table(priv, skb, state);
verdict = ret & NF_VERDICT_MASK;
if (ret != NF_DROP && ret != NF_STOLEN &&
if (verdict != NF_DROP && verdict != NF_STOLEN &&
(!ipv6_addr_equal(&ipv6_hdr(skb)->saddr, &saddr) ||
!ipv6_addr_equal(&ipv6_hdr(skb)->daddr, &daddr) ||
skb->mark != mark ||
......
......@@ -639,10 +639,10 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
if (ret == 1)
continue;
return ret;
case NF_STOLEN:
return NF_DROP_GETERR(verdict);
default:
/* Implicit handling for NF_STOLEN, as well as any other
* non conventional verdicts.
*/
WARN_ON_ONCE(1);
return 0;
}
}
......
......@@ -2169,11 +2169,11 @@ static int __nf_conntrack_update(struct net *net, struct sk_buff *skb,
dataoff = get_l4proto(skb, skb_network_offset(skb), l3num, &l4num);
if (dataoff <= 0)
return -1;
return NF_DROP;
if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num,
l4num, net, &tuple))
return -1;
return NF_DROP;
if (ct->status & IPS_SRC_NAT) {
memcpy(tuple.src.u3.all,
......@@ -2193,7 +2193,7 @@ static int __nf_conntrack_update(struct net *net, struct sk_buff *skb,
h = nf_conntrack_find_get(net, nf_ct_zone(ct), &tuple);
if (!h)
return 0;
return NF_ACCEPT;
/* Store status bits of the conntrack that is clashing to re-do NAT
* mangling according to what it has been done already to this packet.
......@@ -2206,19 +2206,25 @@ static int __nf_conntrack_update(struct net *net, struct sk_buff *skb,
nat_hook = rcu_dereference(nf_nat_hook);
if (!nat_hook)
return 0;
return NF_ACCEPT;
if (status & IPS_SRC_NAT &&
nat_hook->manip_pkt(skb, ct, NF_NAT_MANIP_SRC,
IP_CT_DIR_ORIGINAL) == NF_DROP)
return -1;
if (status & IPS_SRC_NAT) {
unsigned int verdict = nat_hook->manip_pkt(skb, ct,
NF_NAT_MANIP_SRC,
IP_CT_DIR_ORIGINAL);
if (verdict != NF_ACCEPT)
return verdict;
}
if (status & IPS_DST_NAT &&
nat_hook->manip_pkt(skb, ct, NF_NAT_MANIP_DST,
IP_CT_DIR_ORIGINAL) == NF_DROP)
return -1;
if (status & IPS_DST_NAT) {
unsigned int verdict = nat_hook->manip_pkt(skb, ct,
NF_NAT_MANIP_DST,
IP_CT_DIR_ORIGINAL);
if (verdict != NF_ACCEPT)
return verdict;
}
return 0;
return NF_ACCEPT;
}
/* This packet is coming from userspace via nf_queue, complete the packet
......@@ -2233,14 +2239,14 @@ static int nf_confirm_cthelper(struct sk_buff *skb, struct nf_conn *ct,
help = nfct_help(ct);
if (!help)
return 0;
return NF_ACCEPT;
helper = rcu_dereference(help->helper);
if (!helper)
return 0;
return NF_ACCEPT;
if (!(helper->flags & NF_CT_HELPER_F_USERSPACE))
return 0;
return NF_ACCEPT;
switch (nf_ct_l3num(ct)) {
case NFPROTO_IPV4:
......@@ -2255,42 +2261,44 @@ static int nf_confirm_cthelper(struct sk_buff *skb, struct nf_conn *ct,
protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum,
&frag_off);
if (protoff < 0 || (frag_off & htons(~0x7)) != 0)
return 0;
return NF_ACCEPT;
break;
}
#endif
default:
return 0;
return NF_ACCEPT;
}
if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
!nf_is_loopback_packet(skb)) {
if (!nf_ct_seq_adjust(skb, ct, ctinfo, protoff)) {
NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
return -1;
return NF_DROP;
}
}
/* We've seen it coming out the other side: confirm it */
return nf_conntrack_confirm(skb) == NF_DROP ? - 1 : 0;
return nf_conntrack_confirm(skb);
}
static int nf_conntrack_update(struct net *net, struct sk_buff *skb)
{
enum ip_conntrack_info ctinfo;
struct nf_conn *ct;
int err;
ct = nf_ct_get(skb, &ctinfo);
if (!ct)
return 0;
return NF_ACCEPT;
if (!nf_ct_is_confirmed(ct)) {
err = __nf_conntrack_update(net, skb, ct, ctinfo);
if (err < 0)
return err;
int ret = __nf_conntrack_update(net, skb, ct, ctinfo);
if (ret != NF_ACCEPT)
return ret;
ct = nf_ct_get(skb, &ctinfo);
if (!ct)
return NF_ACCEPT;
}
return nf_confirm_cthelper(skb, ct, ctinfo);
......
......@@ -999,11 +999,12 @@ static unsigned int
nf_nat_ipv6_in(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
unsigned int ret;
unsigned int ret, verdict;
struct in6_addr daddr = ipv6_hdr(skb)->daddr;
ret = nf_nat_ipv6_fn(priv, skb, state);
if (ret != NF_DROP && ret != NF_STOLEN &&
verdict = ret & NF_VERDICT_MASK;
if (verdict != NF_DROP && verdict != NF_STOLEN &&
ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr))
skb_dst_drop(skb);
......
......@@ -115,7 +115,7 @@ static noinline void __nft_trace_verdict(const struct nft_pktinfo *pkt,
{
enum nft_trace_types type;
switch (regs->verdict.code) {
switch (regs->verdict.code & NF_VERDICT_MASK) {
case NFT_CONTINUE:
case NFT_RETURN:
type = NFT_TRACETYPE_RETURN;
......@@ -308,10 +308,11 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv)
switch (regs.verdict.code & NF_VERDICT_MASK) {
case NF_ACCEPT:
case NF_DROP:
case NF_QUEUE:
case NF_STOLEN:
return regs.verdict.code;
case NF_DROP:
return NF_DROP_REASON(pkt->skb, SKB_DROP_REASON_NETFILTER_DROP, EPERM);
}
switch (regs.verdict.code) {
......@@ -342,6 +343,9 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv)
if (static_branch_unlikely(&nft_counters_enabled))
nft_update_chain_stats(basechain, pkt);
if (nft_base_chain(basechain)->policy == NF_DROP)
return NF_DROP_REASON(pkt->skb, SKB_DROP_REASON_NETFILTER_DROP, EPERM);
return nft_base_chain(basechain)->policy;
}
EXPORT_SYMBOL_GPL(nft_do_chain);
......
......@@ -258,17 +258,21 @@ void nft_trace_notify(const struct nft_pktinfo *pkt,
case __NFT_TRACETYPE_MAX:
break;
case NFT_TRACETYPE_RETURN:
case NFT_TRACETYPE_RULE:
case NFT_TRACETYPE_RULE: {
unsigned int v;
if (nft_verdict_dump(skb, NFTA_TRACE_VERDICT, verdict))
goto nla_put_failure;
/* pkt->skb undefined iff NF_STOLEN, disable dump */
if (verdict->code == NF_STOLEN)
v = verdict->code & NF_VERDICT_MASK;
if (v == NF_STOLEN)
info->packet_dumped = true;
else
mark = pkt->skb->mark;
break;
}
case NFT_TRACETYPE_POLICY:
mark = pkt->skb->mark;
......
......@@ -228,19 +228,22 @@ find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id)
static void nfqnl_reinject(struct nf_queue_entry *entry, unsigned int verdict)
{
const struct nf_ct_hook *ct_hook;
int err;
if (verdict == NF_ACCEPT ||
verdict == NF_REPEAT ||
verdict == NF_STOP) {
rcu_read_lock();
ct_hook = rcu_dereference(nf_ct_hook);
if (ct_hook) {
err = ct_hook->update(entry->state.net, entry->skb);
if (err < 0)
verdict = NF_DROP;
}
if (ct_hook)
verdict = ct_hook->update(entry->state.net, entry->skb);
rcu_read_unlock();
switch (verdict & NF_VERDICT_MASK) {
case NF_STOLEN:
nf_queue_entry_free(entry);
return;
}
}
nf_reinject(entry, verdict);
}
......
......@@ -1549,12 +1549,11 @@ static void nft_pipapo_gc_deactivate(struct net *net, struct nft_set *set,
/**
* pipapo_gc() - Drop expired entries from set, destroy start and end elements
* @_set: nftables API set representation
* @set: nftables API set representation
* @m: Matching data
*/
static void pipapo_gc(const struct nft_set *_set, struct nft_pipapo_match *m)
static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m)
{
struct nft_set *set = (struct nft_set *) _set;
struct nft_pipapo *priv = nft_set_priv(set);
struct net *net = read_pnet(&set->net);
int rules_f0, first_rule = 0;
......@@ -1672,7 +1671,7 @@ static void pipapo_reclaim_match(struct rcu_head *rcu)
* We also need to create a new working copy for subsequent insertions and
* deletions.
*/
static void nft_pipapo_commit(const struct nft_set *set)
static void nft_pipapo_commit(struct nft_set *set)
{
struct nft_pipapo *priv = nft_set_priv(set);
struct nft_pipapo_match *new_clone, *old;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment