Commit a9e01ed9 authored by David S. Miller's avatar David S. Miller

Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next

Pablo Neira Ayuso says:

====================
Netfilter updates for net-next

This is second pull request includes the conflict resolution patch that
resulted from the updates that we got for the conntrack template through
kmalloc. No changes with regards to the previously sent 15 patches.

The following patchset contains Netfilter updates for your net-next tree, they
are:

1) Rework the existing nf_tables counter expression to make it per-cpu.

2) Prepare and factor out common packet duplication code from the TEE target so
   it can be reused from the new dup expression.

3) Add the new dup expression for the nf_tables IPv4 and IPv6 families.

4) Convert the nf_tables limit expression to use a token-based approach with
   64-bits precision.

5) Enhance the nf_tables limit expression to support limiting at packet byte.
   This comes after several preparation patches.

6) Add a burst parameter to indicate the amount of packets or bytes that can
   exceed the limiting.

7) Add netns support to nfacct, from Andreas Schultz.

8) Pass the nf_conn_zone structure instead of the zone ID in nf_tables to allow
   accessing more zone specific information, from Daniel Borkmann.

9) Allow to define zone per-direction to support netns containers with
   overlapping network addressing, also from Daniel.

10) Extend the CT target to allow setting the zone based on the skb->mark as a
   way to support simple mappings from iptables, also from Daniel.

11) Make the nf_tables payload expression aware of the fact that VLAN offload
    may have removed a vlan header, from Florian Westphal.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 08617f47 81bf1c64
......@@ -2,6 +2,7 @@
#define _NFNL_ACCT_H_
#include <uapi/linux/netfilter/nfnetlink_acct.h>
#include <net/net_namespace.h>
enum {
NFACCT_NO_QUOTA = -1,
......@@ -11,7 +12,7 @@ enum {
struct nf_acct;
struct nf_acct *nfnl_acct_find_get(const char *filter_name);
struct nf_acct *nfnl_acct_find_get(struct net *net, const char *filter_name);
void nfnl_acct_put(struct nf_acct *acct);
void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct);
extern int nfnl_acct_overquota(const struct sk_buff *skb,
......
......@@ -118,6 +118,9 @@ struct net {
#endif
struct sock *nfnl;
struct sock *nfnl_stash;
#if IS_ENABLED(CONFIG_NETFILTER_NETLINK_ACCT)
struct list_head nfnl_acct_list;
#endif
#endif
#ifdef CONFIG_WEXT_CORE
struct sk_buff_head wext_nlevents;
......
#ifndef _NF_DUP_IPV4_H_
#define _NF_DUP_IPV4_H_
void nf_dup_ipv4(struct sk_buff *skb, unsigned int hooknum,
const struct in_addr *gw, int oif);
#endif /* _NF_DUP_IPV4_H_ */
#ifndef _NF_DUP_IPV6_H_
#define _NF_DUP_IPV6_H_
void nf_dup_ipv6(struct sk_buff *skb, unsigned int hooknum,
const struct in6_addr *gw, int oif);
#endif /* _NF_DUP_IPV6_H_ */
......@@ -250,8 +250,12 @@ void nf_ct_untracked_status_or(unsigned long bits);
void nf_ct_iterate_cleanup(struct net *net,
int (*iter)(struct nf_conn *i, void *data),
void *data, u32 portid, int report);
struct nf_conntrack_zone;
void nf_conntrack_free(struct nf_conn *ct);
struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone,
struct nf_conn *nf_conntrack_alloc(struct net *net,
const struct nf_conntrack_zone *zone,
const struct nf_conntrack_tuple *orig,
const struct nf_conntrack_tuple *repl,
gfp_t gfp);
......@@ -291,7 +295,9 @@ extern unsigned int nf_conntrack_max;
extern unsigned int nf_conntrack_hash_rnd;
void init_nf_conntrack_hash_rnd(void);
struct nf_conn *nf_ct_tmpl_alloc(struct net *net, u16 zone, gfp_t flags);
struct nf_conn *nf_ct_tmpl_alloc(struct net *net,
const struct nf_conntrack_zone *zone,
gfp_t flags);
#define NF_CT_STAT_INC(net, count) __this_cpu_inc((net)->ct.stat->count)
#define NF_CT_STAT_INC_ATOMIC(net, count) this_cpu_inc((net)->ct.stat->count)
......
......@@ -52,7 +52,8 @@ bool nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
/* Find a connection corresponding to a tuple. */
struct nf_conntrack_tuple_hash *
nf_conntrack_find_get(struct net *net, u16 zone,
nf_conntrack_find_get(struct net *net,
const struct nf_conntrack_zone *zone,
const struct nf_conntrack_tuple *tuple);
int __nf_conntrack_confirm(struct sk_buff *skb);
......
......@@ -4,7 +4,9 @@
#ifndef _NF_CONNTRACK_EXPECT_H
#define _NF_CONNTRACK_EXPECT_H
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_zones.h>
extern unsigned int nf_ct_expect_hsize;
extern unsigned int nf_ct_expect_max;
......@@ -76,15 +78,18 @@ int nf_conntrack_expect_init(void);
void nf_conntrack_expect_fini(void);
struct nf_conntrack_expect *
__nf_ct_expect_find(struct net *net, u16 zone,
__nf_ct_expect_find(struct net *net,
const struct nf_conntrack_zone *zone,
const struct nf_conntrack_tuple *tuple);
struct nf_conntrack_expect *
nf_ct_expect_find_get(struct net *net, u16 zone,
nf_ct_expect_find_get(struct net *net,
const struct nf_conntrack_zone *zone,
const struct nf_conntrack_tuple *tuple);
struct nf_conntrack_expect *
nf_ct_find_expectation(struct net *net, u16 zone,
nf_ct_find_expectation(struct net *net,
const struct nf_conntrack_zone *zone,
const struct nf_conntrack_tuple *tuple);
void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
......
#ifndef _NF_CONNTRACK_ZONES_H
#define _NF_CONNTRACK_ZONES_H
#define NF_CT_DEFAULT_ZONE 0
#include <linux/netfilter/nf_conntrack_tuple_common.h>
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
#include <net/netfilter/nf_conntrack_extend.h>
#define NF_CT_DEFAULT_ZONE_ID 0
#define NF_CT_ZONE_DIR_ORIG (1 << IP_CT_DIR_ORIGINAL)
#define NF_CT_ZONE_DIR_REPL (1 << IP_CT_DIR_REPLY)
#define NF_CT_DEFAULT_ZONE_DIR (NF_CT_ZONE_DIR_ORIG | NF_CT_ZONE_DIR_REPL)
#define NF_CT_FLAG_MARK 1
struct nf_conntrack_zone {
u16 id;
u8 flags;
u8 dir;
};
static inline u16 nf_ct_zone(const struct nf_conn *ct)
extern const struct nf_conntrack_zone nf_ct_zone_dflt;
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
#include <net/netfilter/nf_conntrack_extend.h>
static inline const struct nf_conntrack_zone *
nf_ct_zone(const struct nf_conn *ct)
{
const struct nf_conntrack_zone *nf_ct_zone = NULL;
#ifdef CONFIG_NF_CONNTRACK_ZONES
struct nf_conntrack_zone *nf_ct_zone;
nf_ct_zone = nf_ct_ext_find(ct, NF_CT_EXT_ZONE);
if (nf_ct_zone)
return nf_ct_zone->id;
#endif
return NF_CT_DEFAULT_ZONE;
return nf_ct_zone ? nf_ct_zone : &nf_ct_zone_dflt;
}
static inline const struct nf_conntrack_zone *
nf_ct_zone_init(struct nf_conntrack_zone *zone, u16 id, u8 dir, u8 flags)
{
zone->id = id;
zone->flags = flags;
zone->dir = dir;
return zone;
}
static inline const struct nf_conntrack_zone *
nf_ct_zone_tmpl(const struct nf_conn *tmpl, const struct sk_buff *skb,
struct nf_conntrack_zone *tmp)
{
const struct nf_conntrack_zone *zone;
if (!tmpl)
return &nf_ct_zone_dflt;
zone = nf_ct_zone(tmpl);
if (zone->flags & NF_CT_FLAG_MARK)
zone = nf_ct_zone_init(tmp, skb->mark, zone->dir, 0);
return zone;
}
static inline int nf_ct_zone_add(struct nf_conn *ct, gfp_t flags,
const struct nf_conntrack_zone *info)
{
#ifdef CONFIG_NF_CONNTRACK_ZONES
struct nf_conntrack_zone *nf_ct_zone;
nf_ct_zone = nf_ct_ext_add(ct, NF_CT_EXT_ZONE, flags);
if (!nf_ct_zone)
return -ENOMEM;
nf_ct_zone_init(nf_ct_zone, info->id, info->dir,
info->flags);
#endif
return 0;
}
#endif /* CONFIG_NF_CONNTRACK || CONFIG_NF_CONNTRACK_MODULE */
static inline bool nf_ct_zone_matches_dir(const struct nf_conntrack_zone *zone,
enum ip_conntrack_dir dir)
{
return zone->dir & (1 << dir);
}
static inline u16 nf_ct_zone_id(const struct nf_conntrack_zone *zone,
enum ip_conntrack_dir dir)
{
return nf_ct_zone_matches_dir(zone, dir) ?
zone->id : NF_CT_DEFAULT_ZONE_ID;
}
static inline bool nf_ct_zone_equal(const struct nf_conn *a,
const struct nf_conntrack_zone *b,
enum ip_conntrack_dir dir)
{
return nf_ct_zone_id(nf_ct_zone(a), dir) ==
nf_ct_zone_id(b, dir);
}
static inline bool nf_ct_zone_equal_any(const struct nf_conn *a,
const struct nf_conntrack_zone *b)
{
return nf_ct_zone(a)->id == b->id;
}
#endif /* IS_ENABLED(CONFIG_NF_CONNTRACK) */
#endif /* _NF_CONNTRACK_ZONES_H */
#ifndef _NFT_DUP_H_
#define _NFT_DUP_H_
struct nft_dup_inet {
enum nft_registers sreg_addr:8;
enum nft_registers sreg_dev:8;
};
#endif /* _NFT_DUP_H_ */
......@@ -756,16 +756,25 @@ enum nft_ct_attributes {
};
#define NFTA_CT_MAX (__NFTA_CT_MAX - 1)
enum nft_limit_type {
NFT_LIMIT_PKTS,
NFT_LIMIT_PKT_BYTES
};
/**
* enum nft_limit_attributes - nf_tables limit expression netlink attributes
*
* @NFTA_LIMIT_RATE: refill rate (NLA_U64)
* @NFTA_LIMIT_UNIT: refill unit (NLA_U64)
* @NFTA_LIMIT_BURST: burst (NLA_U32)
* @NFTA_LIMIT_TYPE: type of limit (NLA_U32: enum nft_limit_type)
*/
enum nft_limit_attributes {
NFTA_LIMIT_UNSPEC,
NFTA_LIMIT_RATE,
NFTA_LIMIT_UNIT,
NFTA_LIMIT_BURST,
NFTA_LIMIT_TYPE,
__NFTA_LIMIT_MAX
};
#define NFTA_LIMIT_MAX (__NFTA_LIMIT_MAX - 1)
......@@ -935,6 +944,20 @@ enum nft_redir_attributes {
};
#define NFTA_REDIR_MAX (__NFTA_REDIR_MAX - 1)
/**
* enum nft_dup_attributes - nf_tables dup expression netlink attributes
*
* @NFTA_DUP_SREG_ADDR: source register of address (NLA_U32: nft_registers)
* @NFTA_DUP_SREG_DEV: source register of output interface (NLA_U32: nft_register)
*/
enum nft_dup_attributes {
NFTA_DUP_UNSPEC,
NFTA_DUP_SREG_ADDR,
NFTA_DUP_SREG_DEV,
__NFTA_DUP_MAX
};
#define NFTA_DUP_MAX (__NFTA_DUP_MAX - 1)
/**
* enum nft_gen_attributes - nf_tables ruleset generation attributes
*
......
......@@ -61,6 +61,7 @@ enum ctattr_tuple {
CTA_TUPLE_UNSPEC,
CTA_TUPLE_IP,
CTA_TUPLE_PROTO,
CTA_TUPLE_ZONE,
__CTA_TUPLE_MAX
};
#define CTA_TUPLE_MAX (__CTA_TUPLE_MAX - 1)
......
......@@ -6,7 +6,13 @@
enum {
XT_CT_NOTRACK = 1 << 0,
XT_CT_NOTRACK_ALIAS = 1 << 1,
XT_CT_MASK = XT_CT_NOTRACK | XT_CT_NOTRACK_ALIAS,
XT_CT_ZONE_DIR_ORIG = 1 << 2,
XT_CT_ZONE_DIR_REPL = 1 << 3,
XT_CT_ZONE_MARK = 1 << 4,
XT_CT_MASK = XT_CT_NOTRACK | XT_CT_NOTRACK_ALIAS |
XT_CT_ZONE_DIR_ORIG | XT_CT_ZONE_DIR_REPL |
XT_CT_ZONE_MARK,
};
struct xt_ct_target_info {
......
......@@ -58,6 +58,12 @@ config NFT_REJECT_IPV4
default NFT_REJECT
tristate
config NFT_DUP_IPV4
tristate "IPv4 nf_tables packet duplication support"
select NF_DUP_IPV4
help
This module enables IPv4 packet duplication support for nf_tables.
endif # NF_TABLES_IPV4
config NF_TABLES_ARP
......@@ -67,6 +73,12 @@ config NF_TABLES_ARP
endif # NF_TABLES
config NF_DUP_IPV4
tristate "Netfilter IPv4 packet duplication to alternate destination"
help
This option enables the nf_dup_ipv4 core, which duplicates an IPv4
packet to be rerouted to another destination.
config NF_LOG_ARP
tristate "ARP packet logging"
default m if NETFILTER_ADVANCED=n
......
......@@ -41,6 +41,7 @@ obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o
obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o
obj-$(CONFIG_NFT_MASQ_IPV4) += nft_masq_ipv4.o
obj-$(CONFIG_NFT_REDIR_IPV4) += nft_redir_ipv4.o
obj-$(CONFIG_NFT_DUP_IPV4) += nft_dup_ipv4.o
obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o
# generic IP tables
......@@ -70,3 +71,5 @@ obj-$(CONFIG_IP_NF_ARP_MANGLE) += arpt_mangle.o
# just filtering instance of ARP tables for now
obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o
obj-$(CONFIG_NF_DUP_IPV4) += nf_dup_ipv4.o
......@@ -280,7 +280,7 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len)
return -EINVAL;
}
h = nf_conntrack_find_get(sock_net(sk), NF_CT_DEFAULT_ZONE, &tuple);
h = nf_conntrack_find_get(sock_net(sk), &nf_ct_zone_dflt, &tuple);
if (h) {
struct sockaddr_in sin;
struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
......
......@@ -134,9 +134,11 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
struct nf_conntrack_tuple innertuple, origtuple;
const struct nf_conntrack_l4proto *innerproto;
const struct nf_conntrack_tuple_hash *h;
u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE;
const struct nf_conntrack_zone *zone;
struct nf_conntrack_zone tmp;
NF_CT_ASSERT(skb->nfct == NULL);
zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
/* Are they talking about one of our connections? */
if (!nf_ct_get_tuplepr(skb,
......
......@@ -43,19 +43,22 @@ static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum,
struct sk_buff *skb)
{
u16 zone = NF_CT_DEFAULT_ZONE;
u16 zone_id = NF_CT_DEFAULT_ZONE_ID;
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
if (skb->nfct)
zone = nf_ct_zone((struct nf_conn *)skb->nfct);
if (skb->nfct) {
enum ip_conntrack_info ctinfo;
const struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
zone_id = nf_ct_zone_id(nf_ct_zone(ct), CTINFO2DIR(ctinfo));
}
#endif
if (nf_bridge_in_prerouting(skb))
return IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone;
return IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id;
if (hooknum == NF_INET_PRE_ROUTING)
return IP_DEFRAG_CONNTRACK_IN + zone;
return IP_DEFRAG_CONNTRACK_IN + zone_id;
else
return IP_DEFRAG_CONNTRACK_OUT + zone;
return IP_DEFRAG_CONNTRACK_OUT + zone_id;
}
static unsigned int ipv4_conntrack_defrag(const struct nf_hook_ops *ops,
......
/*
* (C) 2007 by Sebastian Claßen <sebastian.classen@freenet.ag>
* (C) 2007-2010 by Jan Engelhardt <jengelh@medozas.de>
*
* Extracted from xt_TEE.c
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 or later, as
* published by the Free Software Foundation.
*/
#include <linux/ip.h>
#include <linux/module.h>
#include <linux/percpu.h>
#include <linux/route.h>
#include <linux/skbuff.h>
#include <net/checksum.h>
#include <net/icmp.h>
#include <net/ip.h>
#include <net/route.h>
#include <net/netfilter/ipv4/nf_dup_ipv4.h>
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
#include <net/netfilter/nf_conntrack.h>
#endif
static struct net *pick_net(struct sk_buff *skb)
{
#ifdef CONFIG_NET_NS
const struct dst_entry *dst;
if (skb->dev != NULL)
return dev_net(skb->dev);
dst = skb_dst(skb);
if (dst != NULL && dst->dev != NULL)
return dev_net(dst->dev);
#endif
return &init_net;
}
static bool nf_dup_ipv4_route(struct sk_buff *skb, const struct in_addr *gw,
int oif)
{
const struct iphdr *iph = ip_hdr(skb);
struct net *net = pick_net(skb);
struct rtable *rt;
struct flowi4 fl4;
memset(&fl4, 0, sizeof(fl4));
if (oif != -1)
fl4.flowi4_oif = oif;
fl4.daddr = gw->s_addr;
fl4.flowi4_tos = RT_TOS(iph->tos);
fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
fl4.flowi4_flags = FLOWI_FLAG_KNOWN_NH;
rt = ip_route_output_key(net, &fl4);
if (IS_ERR(rt))
return false;
skb_dst_drop(skb);
skb_dst_set(skb, &rt->dst);
skb->dev = rt->dst.dev;
skb->protocol = htons(ETH_P_IP);
return true;
}
void nf_dup_ipv4(struct sk_buff *skb, unsigned int hooknum,
const struct in_addr *gw, int oif)
{
struct iphdr *iph;
if (this_cpu_read(nf_skb_duplicated))
return;
/*
* Copy the skb, and route the copy. Will later return %XT_CONTINUE for
* the original skb, which should continue on its way as if nothing has
* happened. The copy should be independently delivered to the gateway.
*/
skb = pskb_copy(skb, GFP_ATOMIC);
if (skb == NULL)
return;
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
/* Avoid counting cloned packets towards the original connection. */
nf_conntrack_put(skb->nfct);
skb->nfct = &nf_ct_untracked_get()->ct_general;
skb->nfctinfo = IP_CT_NEW;
nf_conntrack_get(skb->nfct);
#endif
/*
* If we are in PREROUTING/INPUT, the checksum must be recalculated
* since the length could have changed as a result of defragmentation.
*
* We also decrease the TTL to mitigate potential loops between two
* hosts.
*
* Set %IP_DF so that the original source is notified of a potentially
* decreased MTU on the clone route. IPv6 does this too.
*/
iph = ip_hdr(skb);
iph->frag_off |= htons(IP_DF);
if (hooknum == NF_INET_PRE_ROUTING ||
hooknum == NF_INET_LOCAL_IN)
--iph->ttl;
ip_send_check(iph);
if (nf_dup_ipv4_route(skb, gw, oif)) {
__this_cpu_write(nf_skb_duplicated, true);
ip_local_out(skb);
__this_cpu_write(nf_skb_duplicated, false);
} else {
kfree_skb(skb);
}
}
EXPORT_SYMBOL_GPL(nf_dup_ipv4);
MODULE_AUTHOR("Sebastian Claßen <sebastian.classen@freenet.ag>");
MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
MODULE_DESCRIPTION("nf_dup_ipv4: Duplicate IPv4 packet");
MODULE_LICENSE("GPL");
/*
* Copyright (c) 2015 Pablo Neira Ayuso <pablo@netfilter.org>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published by
* the Free Software Foundation.
*/
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/netlink.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/ipv4/nf_dup_ipv4.h>
struct nft_dup_ipv4 {
enum nft_registers sreg_addr:8;
enum nft_registers sreg_dev:8;
};
static void nft_dup_ipv4_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
struct nft_dup_ipv4 *priv = nft_expr_priv(expr);
struct in_addr gw = {
.s_addr = regs->data[priv->sreg_addr],
};
int oif = regs->data[priv->sreg_dev];
nf_dup_ipv4(pkt->skb, pkt->ops->hooknum, &gw, oif);
}
static int nft_dup_ipv4_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
{
struct nft_dup_ipv4 *priv = nft_expr_priv(expr);
int err;
if (tb[NFTA_DUP_SREG_ADDR] == NULL)
return -EINVAL;
priv->sreg_addr = nft_parse_register(tb[NFTA_DUP_SREG_ADDR]);
err = nft_validate_register_load(priv->sreg_addr, sizeof(struct in_addr));
if (err < 0)
return err;
if (tb[NFTA_DUP_SREG_DEV] != NULL) {
priv->sreg_dev = nft_parse_register(tb[NFTA_DUP_SREG_DEV]);
return nft_validate_register_load(priv->sreg_dev, sizeof(int));
}
return 0;
}
static int nft_dup_ipv4_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
struct nft_dup_ipv4 *priv = nft_expr_priv(expr);
if (nft_dump_register(skb, NFTA_DUP_SREG_ADDR, priv->sreg_addr) ||
nft_dump_register(skb, NFTA_DUP_SREG_DEV, priv->sreg_dev))
goto nla_put_failure;
return 0;
nla_put_failure:
return -1;
}
static struct nft_expr_type nft_dup_ipv4_type;
static const struct nft_expr_ops nft_dup_ipv4_ops = {
.type = &nft_dup_ipv4_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_dup_ipv4)),
.eval = nft_dup_ipv4_eval,
.init = nft_dup_ipv4_init,
.dump = nft_dup_ipv4_dump,
};
static const struct nla_policy nft_dup_ipv4_policy[NFTA_DUP_MAX + 1] = {
[NFTA_DUP_SREG_ADDR] = { .type = NLA_U32 },
[NFTA_DUP_SREG_DEV] = { .type = NLA_U32 },
};
static struct nft_expr_type nft_dup_ipv4_type __read_mostly = {
.family = NFPROTO_IPV4,
.name = "dup",
.ops = &nft_dup_ipv4_ops,
.policy = nft_dup_ipv4_policy,
.maxattr = NFTA_DUP_MAX,
.owner = THIS_MODULE,
};
static int __init nft_dup_ipv4_module_init(void)
{
return nft_register_expr(&nft_dup_ipv4_type);
}
static void __exit nft_dup_ipv4_module_exit(void)
{
nft_unregister_expr(&nft_dup_ipv4_type);
}
module_init(nft_dup_ipv4_module_init);
module_exit(nft_dup_ipv4_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "dup");
......@@ -47,9 +47,21 @@ config NFT_REJECT_IPV6
default NFT_REJECT
tristate
config NFT_DUP_IPV6
tristate "IPv6 nf_tables packet duplication support"
select NF_DUP_IPV6
help
This module enables IPv6 packet duplication support for nf_tables.
endif # NF_TABLES_IPV6
endif # NF_TABLES
config NF_DUP_IPV6
tristate "Netfilter IPv6 packet duplication to alternate destination"
help
This option enables the nf_dup_ipv6 core, which duplicates an IPv6
packet to be rerouted to another destination.
config NF_REJECT_IPV6
tristate "IPv6 packet rejection"
default m if NETFILTER_ADVANCED=n
......
......@@ -30,6 +30,8 @@ obj-$(CONFIG_NF_LOG_IPV6) += nf_log_ipv6.o
# reject
obj-$(CONFIG_NF_REJECT_IPV6) += nf_reject_ipv6.o
obj-$(CONFIG_NF_DUP_IPV6) += nf_dup_ipv6.o
# nf_tables
obj-$(CONFIG_NF_TABLES_IPV6) += nf_tables_ipv6.o
obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV6) += nft_chain_route_ipv6.o
......@@ -37,6 +39,7 @@ obj-$(CONFIG_NFT_CHAIN_NAT_IPV6) += nft_chain_nat_ipv6.o
obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o
obj-$(CONFIG_NFT_MASQ_IPV6) += nft_masq_ipv6.o
obj-$(CONFIG_NFT_REDIR_IPV6) += nft_redir_ipv6.o
obj-$(CONFIG_NFT_DUP_IPV6) += nft_dup_ipv6.o
# matches
obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o
......
......@@ -251,7 +251,7 @@ ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len)
if (*len < 0 || (unsigned int) *len < sizeof(sin6))
return -EINVAL;
h = nf_conntrack_find_get(sock_net(sk), NF_CT_DEFAULT_ZONE, &tuple);
h = nf_conntrack_find_get(sock_net(sk), &nf_ct_zone_dflt, &tuple);
if (!h) {
pr_debug("IP6T_SO_ORIGINAL_DST: Can't find %pI6c/%u-%pI6c/%u.\n",
&tuple.src.u3.ip6, ntohs(tuple.src.u.tcp.port),
......
......@@ -150,7 +150,7 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
struct nf_conntrack_tuple intuple, origtuple;
const struct nf_conntrack_tuple_hash *h;
const struct nf_conntrack_l4proto *inproto;
u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE;
struct nf_conntrack_zone tmp;
NF_CT_ASSERT(skb->nfct == NULL);
......@@ -177,7 +177,8 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
*ctinfo = IP_CT_RELATED;
h = nf_conntrack_find_get(net, zone, &intuple);
h = nf_conntrack_find_get(net, nf_ct_zone_tmpl(tmpl, skb, &tmp),
&intuple);
if (!h) {
pr_debug("icmpv6_error: no match\n");
return -NF_ACCEPT;
......
......@@ -33,20 +33,22 @@
static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
struct sk_buff *skb)
{
u16 zone = NF_CT_DEFAULT_ZONE;
u16 zone_id = NF_CT_DEFAULT_ZONE_ID;
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
if (skb->nfct)
zone = nf_ct_zone((struct nf_conn *)skb->nfct);
if (skb->nfct) {
enum ip_conntrack_info ctinfo;
const struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
zone_id = nf_ct_zone_id(nf_ct_zone(ct), CTINFO2DIR(ctinfo));
}
#endif
if (nf_bridge_in_prerouting(skb))
return IP6_DEFRAG_CONNTRACK_BRIDGE_IN + zone;
return IP6_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id;
if (hooknum == NF_INET_PRE_ROUTING)
return IP6_DEFRAG_CONNTRACK_IN + zone;
return IP6_DEFRAG_CONNTRACK_IN + zone_id;
else
return IP6_DEFRAG_CONNTRACK_OUT + zone;
return IP6_DEFRAG_CONNTRACK_OUT + zone_id;
}
static unsigned int ipv6_defrag(const struct nf_hook_ops *ops,
......
/*
* (C) 2007 by Sebastian Claßen <sebastian.classen@freenet.ag>
* (C) 2007-2010 by Jan Engelhardt <jengelh@medozas.de>
*
* Extracted from xt_TEE.c
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 or later, as
* published by the Free Software Foundation.
*/
#include <linux/module.h>
#include <linux/percpu.h>
#include <linux/skbuff.h>
#include <net/ipv6.h>
#include <net/ip6_route.h>
#include <net/netfilter/ipv6/nf_dup_ipv6.h>
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
#include <net/netfilter/nf_conntrack.h>
#endif
static struct net *pick_net(struct sk_buff *skb)
{
#ifdef CONFIG_NET_NS
const struct dst_entry *dst;
if (skb->dev != NULL)
return dev_net(skb->dev);
dst = skb_dst(skb);
if (dst != NULL && dst->dev != NULL)
return dev_net(dst->dev);
#endif
return &init_net;
}
static bool nf_dup_ipv6_route(struct sk_buff *skb, const struct in6_addr *gw,
int oif)
{
const struct ipv6hdr *iph = ipv6_hdr(skb);
struct net *net = pick_net(skb);
struct dst_entry *dst;
struct flowi6 fl6;
memset(&fl6, 0, sizeof(fl6));
if (oif != -1)
fl6.flowi6_oif = oif;
fl6.daddr = *gw;
fl6.flowlabel = ((iph->flow_lbl[0] & 0xF) << 16) |
(iph->flow_lbl[1] << 8) | iph->flow_lbl[2];
dst = ip6_route_output(net, NULL, &fl6);
if (dst->error) {
dst_release(dst);
return false;
}
skb_dst_drop(skb);
skb_dst_set(skb, dst);
skb->dev = dst->dev;
skb->protocol = htons(ETH_P_IPV6);
return true;
}
void nf_dup_ipv6(struct sk_buff *skb, unsigned int hooknum,
const struct in6_addr *gw, int oif)
{
if (this_cpu_read(nf_skb_duplicated))
return;
skb = pskb_copy(skb, GFP_ATOMIC);
if (skb == NULL)
return;
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
nf_conntrack_put(skb->nfct);
skb->nfct = &nf_ct_untracked_get()->ct_general;
skb->nfctinfo = IP_CT_NEW;
nf_conntrack_get(skb->nfct);
#endif
if (hooknum == NF_INET_PRE_ROUTING ||
hooknum == NF_INET_LOCAL_IN) {
struct ipv6hdr *iph = ipv6_hdr(skb);
--iph->hop_limit;
}
if (nf_dup_ipv6_route(skb, gw, oif)) {
__this_cpu_write(nf_skb_duplicated, true);
ip6_local_out(skb);
__this_cpu_write(nf_skb_duplicated, false);
} else {
kfree_skb(skb);
}
}
EXPORT_SYMBOL_GPL(nf_dup_ipv6);
MODULE_AUTHOR("Sebastian Claßen <sebastian.classen@freenet.ag>");
MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
MODULE_DESCRIPTION("nf_dup_ipv6: IPv6 packet duplication");
MODULE_LICENSE("GPL");
/*
* Copyright (c) 2015 Pablo Neira Ayuso <pablo@netfilter.org>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published by
* the Free Software Foundation.
*/
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/netlink.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/ipv6/nf_dup_ipv6.h>
struct nft_dup_ipv6 {
enum nft_registers sreg_addr:8;
enum nft_registers sreg_dev:8;
};
static void nft_dup_ipv6_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
struct nft_dup_ipv6 *priv = nft_expr_priv(expr);
struct in6_addr *gw = (struct in6_addr *)&regs->data[priv->sreg_addr];
int oif = regs->data[priv->sreg_dev];
nf_dup_ipv6(pkt->skb, pkt->ops->hooknum, gw, oif);
}
static int nft_dup_ipv6_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
{
struct nft_dup_ipv6 *priv = nft_expr_priv(expr);
int err;
if (tb[NFTA_DUP_SREG_ADDR] == NULL)
return -EINVAL;
priv->sreg_addr = nft_parse_register(tb[NFTA_DUP_SREG_ADDR]);
err = nft_validate_register_load(priv->sreg_addr, sizeof(struct in6_addr));
if (err < 0)
return err;
if (tb[NFTA_DUP_SREG_DEV] != NULL) {
priv->sreg_dev = nft_parse_register(tb[NFTA_DUP_SREG_DEV]);
return nft_validate_register_load(priv->sreg_dev, sizeof(int));
}
return 0;
}
static int nft_dup_ipv6_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
struct nft_dup_ipv6 *priv = nft_expr_priv(expr);
if (nft_dump_register(skb, NFTA_DUP_SREG_ADDR, priv->sreg_addr) ||
nft_dump_register(skb, NFTA_DUP_SREG_DEV, priv->sreg_dev))
goto nla_put_failure;
return 0;
nla_put_failure:
return -1;
}
static struct nft_expr_type nft_dup_ipv6_type;
static const struct nft_expr_ops nft_dup_ipv6_ops = {
.type = &nft_dup_ipv6_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_dup_ipv6)),
.eval = nft_dup_ipv6_eval,
.init = nft_dup_ipv6_init,
.dump = nft_dup_ipv6_dump,
};
static const struct nla_policy nft_dup_ipv6_policy[NFTA_DUP_MAX + 1] = {
[NFTA_DUP_SREG_ADDR] = { .type = NLA_U32 },
[NFTA_DUP_SREG_DEV] = { .type = NLA_U32 },
};
static struct nft_expr_type nft_dup_ipv6_type __read_mostly = {
.family = NFPROTO_IPV6,
.name = "dup",
.ops = &nft_dup_ipv6_ops,
.policy = nft_dup_ipv6_policy,
.maxattr = NFTA_DUP_MAX,
.owner = THIS_MODULE,
};
static int __init nft_dup_ipv6_module_init(void)
{
return nft_register_expr(&nft_dup_ipv6_type);
}
static void __exit nft_dup_ipv6_module_exit(void)
{
nft_unregister_expr(&nft_dup_ipv6_type);
}
module_init(nft_dup_ipv6_module_init);
module_exit(nft_dup_ipv6_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "dup");
......@@ -867,6 +867,8 @@ config NETFILTER_XT_TARGET_TEE
depends on NETFILTER_ADVANCED
depends on IPV6 || IPV6=n
depends on !NF_CONNTRACK || NF_CONNTRACK
select NF_DUP_IPV4
select NF_DUP_IPV6 if IP6_NF_IPTABLES
---help---
This option adds a "TEE" target with which a packet can be cloned and
this clone be rerouted to another nexthop.
......
......@@ -274,7 +274,7 @@ void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp)
" for conn " FMT_CONN "\n",
__func__, ARG_TUPLE(&tuple), ARG_CONN(cp));
h = nf_conntrack_find_get(ip_vs_conn_net(cp), NF_CT_DEFAULT_ZONE,
h = nf_conntrack_find_get(ip_vs_conn_net(cp), &nf_ct_zone_dflt,
&tuple);
if (h) {
ct = nf_ct_tuplehash_to_ctrack(h);
......
This diff is collapsed.
......@@ -88,7 +88,8 @@ static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple
}
struct nf_conntrack_expect *
__nf_ct_expect_find(struct net *net, u16 zone,
__nf_ct_expect_find(struct net *net,
const struct nf_conntrack_zone *zone,
const struct nf_conntrack_tuple *tuple)
{
struct nf_conntrack_expect *i;
......@@ -100,7 +101,7 @@ __nf_ct_expect_find(struct net *net, u16 zone,
h = nf_ct_expect_dst_hash(tuple);
hlist_for_each_entry_rcu(i, &net->ct.expect_hash[h], hnode) {
if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) &&
nf_ct_zone(i->master) == zone)
nf_ct_zone_equal_any(i->master, zone))
return i;
}
return NULL;
......@@ -109,7 +110,8 @@ EXPORT_SYMBOL_GPL(__nf_ct_expect_find);
/* Just find a expectation corresponding to a tuple. */
struct nf_conntrack_expect *
nf_ct_expect_find_get(struct net *net, u16 zone,
nf_ct_expect_find_get(struct net *net,
const struct nf_conntrack_zone *zone,
const struct nf_conntrack_tuple *tuple)
{
struct nf_conntrack_expect *i;
......@@ -127,7 +129,8 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_find_get);
/* If an expectation for this connection is found, it gets delete from
* global list then returned. */
struct nf_conntrack_expect *
nf_ct_find_expectation(struct net *net, u16 zone,
nf_ct_find_expectation(struct net *net,
const struct nf_conntrack_zone *zone,
const struct nf_conntrack_tuple *tuple)
{
struct nf_conntrack_expect *i, *exp = NULL;
......@@ -140,7 +143,7 @@ nf_ct_find_expectation(struct net *net, u16 zone,
hlist_for_each_entry(i, &net->ct.expect_hash[h], hnode) {
if (!(i->flags & NF_CT_EXPECT_INACTIVE) &&
nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) &&
nf_ct_zone(i->master) == zone) {
nf_ct_zone_equal_any(i->master, zone)) {
exp = i;
break;
}
......@@ -220,16 +223,16 @@ static inline int expect_clash(const struct nf_conntrack_expect *a,
}
return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask) &&
nf_ct_zone(a->master) == nf_ct_zone(b->master);
nf_ct_zone_equal_any(a->master, nf_ct_zone(b->master));
}
static inline int expect_matches(const struct nf_conntrack_expect *a,
const struct nf_conntrack_expect *b)
{
return a->master == b->master && a->class == b->class &&
nf_ct_tuple_equal(&a->tuple, &b->tuple) &&
nf_ct_tuple_mask_equal(&a->mask, &b->mask) &&
nf_ct_zone(a->master) == nf_ct_zone(b->master);
nf_ct_tuple_equal(&a->tuple, &b->tuple) &&
nf_ct_tuple_mask_equal(&a->mask, &b->mask) &&
nf_ct_zone_equal_any(a->master, nf_ct_zone(b->master));
}
/* Generally a bad idea to call this: could have matched already. */
......
This diff is collapsed.
......@@ -143,13 +143,14 @@ static int destroy_sibling_or_exp(struct net *net, struct nf_conn *ct,
const struct nf_conntrack_tuple *t)
{
const struct nf_conntrack_tuple_hash *h;
const struct nf_conntrack_zone *zone;
struct nf_conntrack_expect *exp;
struct nf_conn *sibling;
u16 zone = nf_ct_zone(ct);
pr_debug("trying to timeout ct or exp for tuple ");
nf_ct_dump_tuple(t);
zone = nf_ct_zone(ct);
h = nf_conntrack_find_get(net, zone, t);
if (h) {
sibling = nf_ct_tuplehash_to_ctrack(h);
......
......@@ -140,6 +140,35 @@ static inline void ct_show_secctx(struct seq_file *s, const struct nf_conn *ct)
}
#endif
#ifdef CONFIG_NF_CONNTRACK_ZONES
static void ct_show_zone(struct seq_file *s, const struct nf_conn *ct,
int dir)
{
const struct nf_conntrack_zone *zone = nf_ct_zone(ct);
if (zone->dir != dir)
return;
switch (zone->dir) {
case NF_CT_DEFAULT_ZONE_DIR:
seq_printf(s, "zone=%u ", zone->id);
break;
case NF_CT_ZONE_DIR_ORIG:
seq_printf(s, "zone-orig=%u ", zone->id);
break;
case NF_CT_ZONE_DIR_REPL:
seq_printf(s, "zone-reply=%u ", zone->id);
break;
default:
break;
}
}
#else
static inline void ct_show_zone(struct seq_file *s, const struct nf_conn *ct,
int dir)
{
}
#endif
#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
static void ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct)
{
......@@ -202,6 +231,8 @@ static int ct_seq_show(struct seq_file *s, void *v)
print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
l3proto, l4proto);
ct_show_zone(s, ct, NF_CT_ZONE_DIR_ORIG);
if (seq_has_overflowed(s))
goto release;
......@@ -214,6 +245,8 @@ static int ct_seq_show(struct seq_file *s, void *v)
print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
l3proto, l4proto);
ct_show_zone(s, ct, NF_CT_ZONE_DIR_REPL);
if (seq_print_acct(s, ct, IP_CT_DIR_REPLY))
goto release;
......@@ -228,11 +261,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
#endif
ct_show_secctx(s, ct);
#ifdef CONFIG_NF_CONNTRACK_ZONES
seq_printf(s, "zone=%u ", nf_ct_zone(ct));
#endif
ct_show_zone(s, ct, NF_CT_DEFAULT_ZONE_DIR);
ct_show_delta_time(s, ct);
seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use));
......
......@@ -118,14 +118,13 @@ EXPORT_SYMBOL(nf_xfrm_me_harder);
/* We keep an extra hash for each conntrack, for fast searching. */
static inline unsigned int
hash_by_src(const struct net *net, u16 zone,
const struct nf_conntrack_tuple *tuple)
hash_by_src(const struct net *net, const struct nf_conntrack_tuple *tuple)
{
unsigned int hash;
/* Original src, to ensure we map it consistently if poss. */
hash = jhash2((u32 *)&tuple->src, sizeof(tuple->src) / sizeof(u32),
tuple->dst.protonum ^ zone ^ nf_conntrack_hash_rnd);
tuple->dst.protonum ^ nf_conntrack_hash_rnd);
return reciprocal_scale(hash, net->ct.nat_htable_size);
}
......@@ -185,20 +184,22 @@ same_src(const struct nf_conn *ct,
/* Only called for SRC manip */
static int
find_appropriate_src(struct net *net, u16 zone,
find_appropriate_src(struct net *net,
const struct nf_conntrack_zone *zone,
const struct nf_nat_l3proto *l3proto,
const struct nf_nat_l4proto *l4proto,
const struct nf_conntrack_tuple *tuple,
struct nf_conntrack_tuple *result,
const struct nf_nat_range *range)
{
unsigned int h = hash_by_src(net, zone, tuple);
unsigned int h = hash_by_src(net, tuple);
const struct nf_conn_nat *nat;
const struct nf_conn *ct;
hlist_for_each_entry_rcu(nat, &net->ct.nat_bysource[h], bysource) {
ct = nat->ct;
if (same_src(ct, tuple) && nf_ct_zone(ct) == zone) {
if (same_src(ct, tuple) &&
nf_ct_zone_equal(ct, zone, IP_CT_DIR_ORIGINAL)) {
/* Copy source part from reply tuple. */
nf_ct_invert_tuplepr(result,
&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
......@@ -218,7 +219,8 @@ find_appropriate_src(struct net *net, u16 zone,
* the ip with the lowest src-ip/dst-ip/proto usage.
*/
static void
find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple,
find_best_ips_proto(const struct nf_conntrack_zone *zone,
struct nf_conntrack_tuple *tuple,
const struct nf_nat_range *range,
const struct nf_conn *ct,
enum nf_nat_manip_type maniptype)
......@@ -258,7 +260,7 @@ find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple,
*/
j = jhash2((u32 *)&tuple->src.u3, sizeof(tuple->src.u3) / sizeof(u32),
range->flags & NF_NAT_RANGE_PERSISTENT ?
0 : (__force u32)tuple->dst.u3.all[max] ^ zone);
0 : (__force u32)tuple->dst.u3.all[max] ^ zone->id);
full_range = false;
for (i = 0; i <= max; i++) {
......@@ -297,10 +299,12 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
struct nf_conn *ct,
enum nf_nat_manip_type maniptype)
{
const struct nf_conntrack_zone *zone;
const struct nf_nat_l3proto *l3proto;
const struct nf_nat_l4proto *l4proto;
struct net *net = nf_ct_net(ct);
u16 zone = nf_ct_zone(ct);
zone = nf_ct_zone(ct);
rcu_read_lock();
l3proto = __nf_nat_l3proto_find(orig_tuple->src.l3num);
......@@ -420,7 +424,7 @@ nf_nat_setup_info(struct nf_conn *ct,
if (maniptype == NF_NAT_MANIP_SRC) {
unsigned int srchash;
srchash = hash_by_src(net, nf_ct_zone(ct),
srchash = hash_by_src(net,
&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
spin_lock_bh(&nf_nat_lock);
/* nf_conntrack_alter_reply might re-allocate extension aera */
......
......@@ -17,10 +17,12 @@
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter/xt_tcpudp.h>
#include <linux/netfilter/xt_SYNPROXY.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_extend.h>
#include <net/netfilter/nf_conntrack_seqadj.h>
#include <net/netfilter/nf_conntrack_synproxy.h>
#include <net/netfilter/nf_conntrack_zones.h>
int synproxy_net_id;
EXPORT_SYMBOL_GPL(synproxy_net_id);
......@@ -352,7 +354,7 @@ static int __net_init synproxy_net_init(struct net *net)
struct nf_conn *ct;
int err = -ENOMEM;
ct = nf_ct_tmpl_alloc(net, 0, GFP_KERNEL);
ct = nf_ct_tmpl_alloc(net, &nf_ct_zone_dflt, GFP_KERNEL);
if (!ct)
goto err1;
......
......@@ -27,8 +27,6 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
MODULE_DESCRIPTION("nfacct: Extended Netfilter accounting infrastructure");
static LIST_HEAD(nfnl_acct_list);
struct nf_acct {
atomic64_t pkts;
atomic64_t bytes;
......@@ -53,6 +51,7 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
const struct nlmsghdr *nlh, const struct nlattr * const tb[])
{
struct nf_acct *nfacct, *matching = NULL;
struct net *net = sock_net(nfnl);
char *acct_name;
unsigned int size = 0;
u32 flags = 0;
......@@ -64,7 +63,7 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
if (strlen(acct_name) == 0)
return -EINVAL;
list_for_each_entry(nfacct, &nfnl_acct_list, head) {
list_for_each_entry(nfacct, &net->nfnl_acct_list, head) {
if (strncmp(nfacct->name, acct_name, NFACCT_NAME_MAX) != 0)
continue;
......@@ -124,7 +123,7 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
be64_to_cpu(nla_get_be64(tb[NFACCT_PKTS])));
}
atomic_set(&nfacct->refcnt, 1);
list_add_tail_rcu(&nfacct->head, &nfnl_acct_list);
list_add_tail_rcu(&nfacct->head, &net->nfnl_acct_list);
return 0;
}
......@@ -185,6 +184,7 @@ nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
static int
nfnl_acct_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
struct nf_acct *cur, *last;
const struct nfacct_filter *filter = cb->data;
......@@ -196,7 +196,7 @@ nfnl_acct_dump(struct sk_buff *skb, struct netlink_callback *cb)
cb->args[1] = 0;
rcu_read_lock();
list_for_each_entry_rcu(cur, &nfnl_acct_list, head) {
list_for_each_entry_rcu(cur, &net->nfnl_acct_list, head) {
if (last) {
if (cur != last)
continue;
......@@ -257,6 +257,7 @@ static int
nfnl_acct_get(struct sock *nfnl, struct sk_buff *skb,
const struct nlmsghdr *nlh, const struct nlattr * const tb[])
{
struct net *net = sock_net(nfnl);
int ret = -ENOENT;
struct nf_acct *cur;
char *acct_name;
......@@ -283,7 +284,7 @@ nfnl_acct_get(struct sock *nfnl, struct sk_buff *skb,
return -EINVAL;
acct_name = nla_data(tb[NFACCT_NAME]);
list_for_each_entry(cur, &nfnl_acct_list, head) {
list_for_each_entry(cur, &net->nfnl_acct_list, head) {
struct sk_buff *skb2;
if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX)!= 0)
......@@ -336,19 +337,20 @@ static int
nfnl_acct_del(struct sock *nfnl, struct sk_buff *skb,
const struct nlmsghdr *nlh, const struct nlattr * const tb[])
{
struct net *net = sock_net(nfnl);
char *acct_name;
struct nf_acct *cur;
int ret = -ENOENT;
if (!tb[NFACCT_NAME]) {
list_for_each_entry(cur, &nfnl_acct_list, head)
list_for_each_entry(cur, &net->nfnl_acct_list, head)
nfnl_acct_try_del(cur);
return 0;
}
acct_name = nla_data(tb[NFACCT_NAME]);
list_for_each_entry(cur, &nfnl_acct_list, head) {
list_for_each_entry(cur, &net->nfnl_acct_list, head) {
if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX) != 0)
continue;
......@@ -394,12 +396,12 @@ static const struct nfnetlink_subsystem nfnl_acct_subsys = {
MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_ACCT);
struct nf_acct *nfnl_acct_find_get(const char *acct_name)
struct nf_acct *nfnl_acct_find_get(struct net *net, const char *acct_name)
{
struct nf_acct *cur, *acct = NULL;
rcu_read_lock();
list_for_each_entry_rcu(cur, &nfnl_acct_list, head) {
list_for_each_entry_rcu(cur, &net->nfnl_acct_list, head) {
if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX)!= 0)
continue;
......@@ -422,7 +424,9 @@ EXPORT_SYMBOL_GPL(nfnl_acct_find_get);
void nfnl_acct_put(struct nf_acct *acct)
{
atomic_dec(&acct->refcnt);
if (atomic_dec_and_test(&acct->refcnt))
kfree_rcu(acct, rcu_head);
module_put(THIS_MODULE);
}
EXPORT_SYMBOL_GPL(nfnl_acct_put);
......@@ -478,34 +482,59 @@ int nfnl_acct_overquota(const struct sk_buff *skb, struct nf_acct *nfacct)
}
EXPORT_SYMBOL_GPL(nfnl_acct_overquota);
static int __net_init nfnl_acct_net_init(struct net *net)
{
INIT_LIST_HEAD(&net->nfnl_acct_list);
return 0;
}
static void __net_exit nfnl_acct_net_exit(struct net *net)
{
struct nf_acct *cur, *tmp;
list_for_each_entry_safe(cur, tmp, &net->nfnl_acct_list, head) {
list_del_rcu(&cur->head);
if (atomic_dec_and_test(&cur->refcnt))
kfree_rcu(cur, rcu_head);
}
}
static struct pernet_operations nfnl_acct_ops = {
.init = nfnl_acct_net_init,
.exit = nfnl_acct_net_exit,
};
static int __init nfnl_acct_init(void)
{
int ret;
ret = register_pernet_subsys(&nfnl_acct_ops);
if (ret < 0) {
pr_err("nfnl_acct_init: failed to register pernet ops\n");
goto err_out;
}
pr_info("nfnl_acct: registering with nfnetlink.\n");
ret = nfnetlink_subsys_register(&nfnl_acct_subsys);
if (ret < 0) {
pr_err("nfnl_acct_init: cannot register with nfnetlink.\n");
goto err_out;
goto cleanup_pernet;
}
return 0;
cleanup_pernet:
unregister_pernet_subsys(&nfnl_acct_ops);
err_out:
return ret;
}
static void __exit nfnl_acct_exit(void)
{
struct nf_acct *cur, *tmp;
pr_info("nfnl_acct: unregistering from nfnetlink.\n");
nfnetlink_subsys_unregister(&nfnl_acct_subsys);
list_for_each_entry_safe(cur, tmp, &nfnl_acct_list, head) {
list_del_rcu(&cur->head);
/* We are sure that our objects have no clients at this point,
* it's safe to release them all without checking refcnt. */
kfree_rcu(cur, rcu_head);
}
unregister_pernet_subsys(&nfnl_acct_ops);
}
module_init(nfnl_acct_init);
......
......@@ -18,39 +18,59 @@
#include <net/netfilter/nf_tables.h>
struct nft_counter {
seqlock_t lock;
u64 bytes;
u64 packets;
};
struct nft_counter_percpu {
struct nft_counter counter;
struct u64_stats_sync syncp;
};
struct nft_counter_percpu_priv {
struct nft_counter_percpu __percpu *counter;
};
static void nft_counter_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
struct nft_counter *priv = nft_expr_priv(expr);
write_seqlock_bh(&priv->lock);
priv->bytes += pkt->skb->len;
priv->packets++;
write_sequnlock_bh(&priv->lock);
struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
struct nft_counter_percpu *this_cpu;
local_bh_disable();
this_cpu = this_cpu_ptr(priv->counter);
u64_stats_update_begin(&this_cpu->syncp);
this_cpu->counter.bytes += pkt->skb->len;
this_cpu->counter.packets++;
u64_stats_update_end(&this_cpu->syncp);
local_bh_enable();
}
static int nft_counter_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
struct nft_counter *priv = nft_expr_priv(expr);
struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
struct nft_counter_percpu *cpu_stats;
struct nft_counter total;
u64 bytes, packets;
unsigned int seq;
u64 bytes;
u64 packets;
do {
seq = read_seqbegin(&priv->lock);
bytes = priv->bytes;
packets = priv->packets;
} while (read_seqretry(&priv->lock, seq));
if (nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(bytes)))
goto nla_put_failure;
if (nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(packets)))
int cpu;
memset(&total, 0, sizeof(total));
for_each_possible_cpu(cpu) {
cpu_stats = per_cpu_ptr(priv->counter, cpu);
do {
seq = u64_stats_fetch_begin_irq(&cpu_stats->syncp);
bytes = cpu_stats->counter.bytes;
packets = cpu_stats->counter.packets;
} while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, seq));
total.packets += packets;
total.bytes += bytes;
}
if (nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(total.bytes)) ||
nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(total.packets)))
goto nla_put_failure;
return 0;
......@@ -67,23 +87,44 @@ static int nft_counter_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
{
struct nft_counter *priv = nft_expr_priv(expr);
struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
struct nft_counter_percpu __percpu *cpu_stats;
struct nft_counter_percpu *this_cpu;
cpu_stats = netdev_alloc_pcpu_stats(struct nft_counter_percpu);
if (cpu_stats == NULL)
return ENOMEM;
preempt_disable();
this_cpu = this_cpu_ptr(cpu_stats);
if (tb[NFTA_COUNTER_PACKETS]) {
this_cpu->counter.packets =
be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS]));
}
if (tb[NFTA_COUNTER_BYTES]) {
this_cpu->counter.bytes =
be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES]));
}
preempt_enable();
priv->counter = cpu_stats;
return 0;
}
if (tb[NFTA_COUNTER_PACKETS])
priv->packets = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS]));
if (tb[NFTA_COUNTER_BYTES])
priv->bytes = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES]));
static void nft_counter_destroy(const struct nft_ctx *ctx,
const struct nft_expr *expr)
{
struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
seqlock_init(&priv->lock);
return 0;
free_percpu(priv->counter);
}
static struct nft_expr_type nft_counter_type;
static const struct nft_expr_ops nft_counter_ops = {
.type = &nft_counter_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_counter)),
.size = NFT_EXPR_SIZE(sizeof(struct nft_counter_percpu_priv)),
.eval = nft_counter_eval,
.init = nft_counter_init,
.destroy = nft_counter_destroy,
.dump = nft_counter_dump,
};
......
......@@ -20,63 +20,79 @@
static DEFINE_SPINLOCK(limit_lock);
struct nft_limit {
u64 last;
u64 tokens;
u64 tokens_max;
u64 rate;
u64 unit;
unsigned long stamp;
u64 nsecs;
u32 burst;
};
static void nft_limit_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
static inline bool nft_limit_eval(struct nft_limit *limit, u64 cost)
{
struct nft_limit *priv = nft_expr_priv(expr);
u64 now, tokens;
s64 delta;
spin_lock_bh(&limit_lock);
if (time_after_eq(jiffies, priv->stamp)) {
priv->tokens = priv->rate;
priv->stamp = jiffies + priv->unit * HZ;
}
if (priv->tokens >= 1) {
priv->tokens--;
now = ktime_get_ns();
tokens = limit->tokens + now - limit->last;
if (tokens > limit->tokens_max)
tokens = limit->tokens_max;
limit->last = now;
delta = tokens - cost;
if (delta >= 0) {
limit->tokens = delta;
spin_unlock_bh(&limit_lock);
return;
return false;
}
limit->tokens = tokens;
spin_unlock_bh(&limit_lock);
regs->verdict.code = NFT_BREAK;
return true;
}
static const struct nla_policy nft_limit_policy[NFTA_LIMIT_MAX + 1] = {
[NFTA_LIMIT_RATE] = { .type = NLA_U64 },
[NFTA_LIMIT_UNIT] = { .type = NLA_U64 },
};
static int nft_limit_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
static int nft_limit_init(struct nft_limit *limit,
const struct nlattr * const tb[])
{
struct nft_limit *priv = nft_expr_priv(expr);
u64 unit;
if (tb[NFTA_LIMIT_RATE] == NULL ||
tb[NFTA_LIMIT_UNIT] == NULL)
return -EINVAL;
priv->rate = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_RATE]));
priv->unit = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_UNIT]));
priv->stamp = jiffies + priv->unit * HZ;
priv->tokens = priv->rate;
limit->rate = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_RATE]));
unit = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_UNIT]));
limit->nsecs = unit * NSEC_PER_SEC;
if (limit->rate == 0 || limit->nsecs < unit)
return -EOVERFLOW;
limit->tokens = limit->tokens_max = limit->nsecs;
if (tb[NFTA_LIMIT_BURST]) {
u64 rate;
limit->burst = ntohl(nla_get_be32(tb[NFTA_LIMIT_BURST]));
rate = limit->rate + limit->burst;
if (rate < limit->rate)
return -EOVERFLOW;
limit->rate = rate;
}
limit->last = ktime_get_ns();
return 0;
}
static int nft_limit_dump(struct sk_buff *skb, const struct nft_expr *expr)
static int nft_limit_dump(struct sk_buff *skb, const struct nft_limit *limit,
enum nft_limit_type type)
{
const struct nft_limit *priv = nft_expr_priv(expr);
u64 secs = div_u64(limit->nsecs, NSEC_PER_SEC);
u64 rate = limit->rate - limit->burst;
if (nla_put_be64(skb, NFTA_LIMIT_RATE, cpu_to_be64(priv->rate)))
goto nla_put_failure;
if (nla_put_be64(skb, NFTA_LIMIT_UNIT, cpu_to_be64(priv->unit)))
if (nla_put_be64(skb, NFTA_LIMIT_RATE, cpu_to_be64(rate)) ||
nla_put_be64(skb, NFTA_LIMIT_UNIT, cpu_to_be64(secs)) ||
nla_put_be32(skb, NFTA_LIMIT_BURST, htonl(limit->burst)) ||
nla_put_be32(skb, NFTA_LIMIT_TYPE, htonl(type)))
goto nla_put_failure;
return 0;
......@@ -84,18 +100,114 @@ static int nft_limit_dump(struct sk_buff *skb, const struct nft_expr *expr)
return -1;
}
struct nft_limit_pkts {
struct nft_limit limit;
u64 cost;
};
static void nft_limit_pkts_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
struct nft_limit_pkts *priv = nft_expr_priv(expr);
if (nft_limit_eval(&priv->limit, priv->cost))
regs->verdict.code = NFT_BREAK;
}
static const struct nla_policy nft_limit_policy[NFTA_LIMIT_MAX + 1] = {
[NFTA_LIMIT_RATE] = { .type = NLA_U64 },
[NFTA_LIMIT_UNIT] = { .type = NLA_U64 },
[NFTA_LIMIT_BURST] = { .type = NLA_U32 },
[NFTA_LIMIT_TYPE] = { .type = NLA_U32 },
};
static int nft_limit_pkts_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
{
struct nft_limit_pkts *priv = nft_expr_priv(expr);
int err;
err = nft_limit_init(&priv->limit, tb);
if (err < 0)
return err;
priv->cost = div_u64(priv->limit.nsecs, priv->limit.rate);
return 0;
}
static int nft_limit_pkts_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
const struct nft_limit_pkts *priv = nft_expr_priv(expr);
return nft_limit_dump(skb, &priv->limit, NFT_LIMIT_PKTS);
}
static struct nft_expr_type nft_limit_type;
static const struct nft_expr_ops nft_limit_ops = {
static const struct nft_expr_ops nft_limit_pkts_ops = {
.type = &nft_limit_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_limit_pkts)),
.eval = nft_limit_pkts_eval,
.init = nft_limit_pkts_init,
.dump = nft_limit_pkts_dump,
};
static void nft_limit_pkt_bytes_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
struct nft_limit *priv = nft_expr_priv(expr);
u64 cost = div_u64(priv->nsecs * pkt->skb->len, priv->rate);
if (nft_limit_eval(priv, cost))
regs->verdict.code = NFT_BREAK;
}
static int nft_limit_pkt_bytes_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
{
struct nft_limit *priv = nft_expr_priv(expr);
return nft_limit_init(priv, tb);
}
static int nft_limit_pkt_bytes_dump(struct sk_buff *skb,
const struct nft_expr *expr)
{
const struct nft_limit *priv = nft_expr_priv(expr);
return nft_limit_dump(skb, priv, NFT_LIMIT_PKT_BYTES);
}
static const struct nft_expr_ops nft_limit_pkt_bytes_ops = {
.type = &nft_limit_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_limit)),
.eval = nft_limit_eval,
.init = nft_limit_init,
.dump = nft_limit_dump,
.eval = nft_limit_pkt_bytes_eval,
.init = nft_limit_pkt_bytes_init,
.dump = nft_limit_pkt_bytes_dump,
};
static const struct nft_expr_ops *
nft_limit_select_ops(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
if (tb[NFTA_LIMIT_TYPE] == NULL)
return &nft_limit_pkts_ops;
switch (ntohl(nla_get_be32(tb[NFTA_LIMIT_TYPE]))) {
case NFT_LIMIT_PKTS:
return &nft_limit_pkts_ops;
case NFT_LIMIT_PKT_BYTES:
return &nft_limit_pkt_bytes_ops;
}
return ERR_PTR(-EOPNOTSUPP);
}
static struct nft_expr_type nft_limit_type __read_mostly = {
.name = "limit",
.ops = &nft_limit_ops,
.select_ops = nft_limit_select_ops,
.policy = nft_limit_policy,
.maxattr = NFTA_LIMIT_MAX,
.flags = NFT_EXPR_STATEFUL,
......
......@@ -9,6 +9,7 @@
*/
#include <linux/kernel.h>
#include <linux/if_vlan.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/netlink.h>
......@@ -17,6 +18,53 @@
#include <net/netfilter/nf_tables_core.h>
#include <net/netfilter/nf_tables.h>
/* add vlan header into the user buffer for if tag was removed by offloads */
static bool
nft_payload_copy_vlan(u32 *d, const struct sk_buff *skb, u8 offset, u8 len)
{
int mac_off = skb_mac_header(skb) - skb->data;
u8 vlan_len, *vlanh, *dst_u8 = (u8 *) d;
struct vlan_ethhdr veth;
vlanh = (u8 *) &veth;
if (offset < ETH_HLEN) {
u8 ethlen = min_t(u8, len, ETH_HLEN - offset);
if (skb_copy_bits(skb, mac_off, &veth, ETH_HLEN))
return false;
veth.h_vlan_proto = skb->vlan_proto;
memcpy(dst_u8, vlanh + offset, ethlen);
len -= ethlen;
if (len == 0)
return true;
dst_u8 += ethlen;
offset = ETH_HLEN;
} else if (offset >= VLAN_ETH_HLEN) {
offset -= VLAN_HLEN;
goto skip;
}
veth.h_vlan_TCI = htons(skb_vlan_tag_get(skb));
veth.h_vlan_encapsulated_proto = skb->protocol;
vlanh += offset;
vlan_len = min_t(u8, len, VLAN_ETH_HLEN - offset);
memcpy(dst_u8, vlanh, vlan_len);
len -= vlan_len;
if (!len)
return true;
dst_u8 += vlan_len;
skip:
return skb_copy_bits(skb, offset + mac_off, dst_u8, len) == 0;
}
static void nft_payload_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
......@@ -26,10 +74,18 @@ static void nft_payload_eval(const struct nft_expr *expr,
u32 *dest = &regs->data[priv->dreg];
int offset;
dest[priv->len / NFT_REG32_SIZE] = 0;
switch (priv->base) {
case NFT_PAYLOAD_LL_HEADER:
if (!skb_mac_header_was_set(skb))
goto err;
if (skb_vlan_tag_present(skb)) {
if (!nft_payload_copy_vlan(dest, skb,
priv->offset, priv->len))
goto err;
return;
}
offset = skb_mac_header(skb) - skb->data;
break;
case NFT_PAYLOAD_NETWORK_HEADER:
......@@ -43,7 +99,6 @@ static void nft_payload_eval(const struct nft_expr *expr,
}
offset += priv->offset;
dest[priv->len / NFT_REG32_SIZE] = 0;
if (skb_copy_bits(skb, offset, dest, priv->len) < 0)
goto err;
return;
......
......@@ -181,9 +181,23 @@ xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par,
#endif
}
static u16 xt_ct_flags_to_dir(const struct xt_ct_target_info_v1 *info)
{
switch (info->flags & (XT_CT_ZONE_DIR_ORIG |
XT_CT_ZONE_DIR_REPL)) {
case XT_CT_ZONE_DIR_ORIG:
return NF_CT_ZONE_DIR_ORIG;
case XT_CT_ZONE_DIR_REPL:
return NF_CT_ZONE_DIR_REPL;
default:
return NF_CT_DEFAULT_ZONE_DIR;
}
}
static int xt_ct_tg_check(const struct xt_tgchk_param *par,
struct xt_ct_target_info_v1 *info)
{
struct nf_conntrack_zone zone;
struct nf_conn *ct;
int ret = -EOPNOTSUPP;
......@@ -193,7 +207,9 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par,
}
#ifndef CONFIG_NF_CONNTRACK_ZONES
if (info->zone)
if (info->zone || info->flags & (XT_CT_ZONE_DIR_ORIG |
XT_CT_ZONE_DIR_REPL |
XT_CT_ZONE_MARK))
goto err1;
#endif
......@@ -201,7 +217,13 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par,
if (ret < 0)
goto err1;
ct = nf_ct_tmpl_alloc(par->net, info->zone, GFP_KERNEL);
memset(&zone, 0, sizeof(zone));
zone.id = info->zone;
zone.dir = xt_ct_flags_to_dir(info);
if (info->flags & XT_CT_ZONE_MARK)
zone.flags |= NF_CT_FLAG_MARK;
ct = nf_ct_tmpl_alloc(par->net, &zone, GFP_KERNEL);
if (!ct) {
ret = -ENOMEM;
goto err2;
......
......@@ -10,26 +10,15 @@
* modify it under the terms of the GNU General Public License
* version 2 or later, as published by the Free Software Foundation.
*/
#include <linux/ip.h>
#include <linux/module.h>
#include <linux/percpu.h>
#include <linux/route.h>
#include <linux/skbuff.h>
#include <linux/notifier.h>
#include <net/checksum.h>
#include <net/icmp.h>
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/ip6_route.h>
#include <net/route.h>
#include <linux/route.h>
#include <linux/netfilter/x_tables.h>
#include <net/route.h>
#include <net/netfilter/ipv4/nf_dup_ipv4.h>
#include <net/netfilter/ipv6/nf_dup_ipv6.h>
#include <linux/netfilter/xt_TEE.h>
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
# define WITH_CONNTRACK 1
# include <net/netfilter/nf_conntrack.h>
#endif
struct xt_tee_priv {
struct notifier_block notifier;
struct xt_tee_tginfo *tginfo;
......@@ -38,161 +27,24 @@ struct xt_tee_priv {
static const union nf_inet_addr tee_zero_address;
static struct net *pick_net(struct sk_buff *skb)
{
#ifdef CONFIG_NET_NS
const struct dst_entry *dst;
if (skb->dev != NULL)
return dev_net(skb->dev);
dst = skb_dst(skb);
if (dst != NULL && dst->dev != NULL)
return dev_net(dst->dev);
#endif
return &init_net;
}
static bool
tee_tg_route4(struct sk_buff *skb, const struct xt_tee_tginfo *info)
{
const struct iphdr *iph = ip_hdr(skb);
struct net *net = pick_net(skb);
struct rtable *rt;
struct flowi4 fl4;
memset(&fl4, 0, sizeof(fl4));
if (info->priv) {
if (info->priv->oif == -1)
return false;
fl4.flowi4_oif = info->priv->oif;
}
fl4.daddr = info->gw.ip;
fl4.flowi4_tos = RT_TOS(iph->tos);
fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
fl4.flowi4_flags = FLOWI_FLAG_KNOWN_NH;
rt = ip_route_output_key(net, &fl4);
if (IS_ERR(rt))
return false;
skb_dst_drop(skb);
skb_dst_set(skb, &rt->dst);
skb->dev = rt->dst.dev;
skb->protocol = htons(ETH_P_IP);
return true;
}
static unsigned int
tee_tg4(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_tee_tginfo *info = par->targinfo;
struct iphdr *iph;
if (__this_cpu_read(nf_skb_duplicated))
return XT_CONTINUE;
/*
* Copy the skb, and route the copy. Will later return %XT_CONTINUE for
* the original skb, which should continue on its way as if nothing has
* happened. The copy should be independently delivered to the TEE
* --gateway.
*/
skb = pskb_copy(skb, GFP_ATOMIC);
if (skb == NULL)
return XT_CONTINUE;
#ifdef WITH_CONNTRACK
/* Avoid counting cloned packets towards the original connection. */
nf_conntrack_put(skb->nfct);
skb->nfct = &nf_ct_untracked_get()->ct_general;
skb->nfctinfo = IP_CT_NEW;
nf_conntrack_get(skb->nfct);
#endif
/*
* If we are in PREROUTING/INPUT, the checksum must be recalculated
* since the length could have changed as a result of defragmentation.
*
* We also decrease the TTL to mitigate potential TEE loops
* between two hosts.
*
* Set %IP_DF so that the original source is notified of a potentially
* decreased MTU on the clone route. IPv6 does this too.
*/
iph = ip_hdr(skb);
iph->frag_off |= htons(IP_DF);
if (par->hooknum == NF_INET_PRE_ROUTING ||
par->hooknum == NF_INET_LOCAL_IN)
--iph->ttl;
ip_send_check(iph);
nf_dup_ipv4(skb, par->hooknum, &info->gw.in, info->priv->oif);
if (tee_tg_route4(skb, info)) {
__this_cpu_write(nf_skb_duplicated, true);
ip_local_out(skb);
__this_cpu_write(nf_skb_duplicated, false);
} else {
kfree_skb(skb);
}
return XT_CONTINUE;
}
#if IS_ENABLED(CONFIG_IPV6)
static bool
tee_tg_route6(struct sk_buff *skb, const struct xt_tee_tginfo *info)
{
const struct ipv6hdr *iph = ipv6_hdr(skb);
struct net *net = pick_net(skb);
struct dst_entry *dst;
struct flowi6 fl6;
memset(&fl6, 0, sizeof(fl6));
if (info->priv) {
if (info->priv->oif == -1)
return false;
fl6.flowi6_oif = info->priv->oif;
}
fl6.daddr = info->gw.in6;
fl6.flowlabel = ((iph->flow_lbl[0] & 0xF) << 16) |
(iph->flow_lbl[1] << 8) | iph->flow_lbl[2];
fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH;
dst = ip6_route_output(net, NULL, &fl6);
if (dst->error) {
dst_release(dst);
return false;
}
skb_dst_drop(skb);
skb_dst_set(skb, dst);
skb->dev = dst->dev;
skb->protocol = htons(ETH_P_IPV6);
return true;
}
static unsigned int
tee_tg6(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_tee_tginfo *info = par->targinfo;
if (__this_cpu_read(nf_skb_duplicated))
return XT_CONTINUE;
skb = pskb_copy(skb, GFP_ATOMIC);
if (skb == NULL)
return XT_CONTINUE;
nf_dup_ipv6(skb, par->hooknum, &info->gw.in6, info->priv->oif);
#ifdef WITH_CONNTRACK
nf_conntrack_put(skb->nfct);
skb->nfct = &nf_ct_untracked_get()->ct_general;
skb->nfctinfo = IP_CT_NEW;
nf_conntrack_get(skb->nfct);
#endif
if (par->hooknum == NF_INET_PRE_ROUTING ||
par->hooknum == NF_INET_LOCAL_IN) {
struct ipv6hdr *iph = ipv6_hdr(skb);
--iph->hop_limit;
}
if (tee_tg_route6(skb, info)) {
__this_cpu_write(nf_skb_duplicated, true);
ip6_local_out(skb);
__this_cpu_write(nf_skb_duplicated, false);
} else {
kfree_skb(skb);
}
return XT_CONTINUE;
}
#endif
......
......@@ -134,7 +134,7 @@ static bool add_hlist(struct hlist_head *head,
static unsigned int check_hlist(struct net *net,
struct hlist_head *head,
const struct nf_conntrack_tuple *tuple,
u16 zone,
const struct nf_conntrack_zone *zone,
bool *addit)
{
const struct nf_conntrack_tuple_hash *found;
......@@ -201,7 +201,7 @@ static unsigned int
count_tree(struct net *net, struct rb_root *root,
const struct nf_conntrack_tuple *tuple,
const union nf_inet_addr *addr, const union nf_inet_addr *mask,
u8 family, u16 zone)
u8 family, const struct nf_conntrack_zone *zone)
{
struct xt_connlimit_rb *gc_nodes[CONNLIMIT_GC_MAX_NODES];
struct rb_node **rbnode, *parent;
......@@ -290,7 +290,8 @@ static int count_them(struct net *net,
const struct nf_conntrack_tuple *tuple,
const union nf_inet_addr *addr,
const union nf_inet_addr *mask,
u_int8_t family, u16 zone)
u_int8_t family,
const struct nf_conntrack_zone *zone)
{
struct rb_root *root;
int count;
......@@ -321,10 +322,10 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
union nf_inet_addr addr;
struct nf_conntrack_tuple tuple;
const struct nf_conntrack_tuple *tuple_ptr = &tuple;
const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt;
enum ip_conntrack_info ctinfo;
const struct nf_conn *ct;
unsigned int connections;
u16 zone = NF_CT_DEFAULT_ZONE;
ct = nf_ct_get(skb, &ctinfo);
if (ct != NULL) {
......
......@@ -37,7 +37,7 @@ nfacct_mt_checkentry(const struct xt_mtchk_param *par)
struct xt_nfacct_match_info *info = par->matchinfo;
struct nf_acct *nfacct;
nfacct = nfnl_acct_find_get(info->name);
nfacct = nfnl_acct_find_get(par->net, info->name);
if (nfacct == NULL) {
pr_info("xt_nfacct: accounting object with name `%s' "
"does not exists\n", info->name);
......
......@@ -37,6 +37,7 @@ static int tcf_connmark(struct sk_buff *skb, const struct tc_action *a,
struct nf_conntrack_tuple tuple;
enum ip_conntrack_info ctinfo;
struct tcf_connmark_info *ca = a->priv;
struct nf_conntrack_zone zone;
struct nf_conn *c;
int proto;
......@@ -70,7 +71,10 @@ static int tcf_connmark(struct sk_buff *skb, const struct tc_action *a,
proto, &tuple))
goto out;
thash = nf_conntrack_find_get(dev_net(skb->dev), ca->zone, &tuple);
zone.id = ca->zone;
zone.dir = NF_CT_DEFAULT_ZONE_DIR;
thash = nf_conntrack_find_get(dev_net(skb->dev), &zone, &tuple);
if (!thash)
goto out;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment