Commit 59ce9670 authored by David S. Miller's avatar David S. Miller

Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next

Pablo Neira Ayuso says:

====================
Netfilter updates for net-next

The following patchset contains the first batch of Netfilter updates for
the upcoming 4.5 kernel. This batch contains userspace netfilter header
compilation fixes, support for packet mangling in nf_tables, the new
tracing infrastructure for nf_tables and cgroup2 support for iptables.
More specifically, they are:

1) Two patches to include dependencies in our netfilter userspace
   headers to resolve compilation problems, from Mikko Rapeli.

2) Four comestic cleanup patches for the ebtables codebase, from Ian Morris.

3) Remove duplicate include in the netfilter reject infrastructure,
   from Stephen Hemminger.

4) Two patches to simplify the netfilter defragmentation code for IPv6,
   patch from Florian Westphal.

5) Fix root ownership of /proc/net netfilter for unpriviledged net
   namespaces, from Philip Whineray.

6) Get rid of unused fields in struct nft_pktinfo, from Florian Westphal.

7) Add mangling support to our nf_tables payload expression, from
   Patrick McHardy.

8) Introduce a new netlink-based tracing infrastructure for nf_tables,
   from Florian Westphal.

9) Change setter functions in nfnetlink_log to be void, from
    Rami Rosen.

10) Add netns support to the cttimeout infrastructure.

11) Add cgroup2 support to iptables, from Tejun Heo.

12) Introduce nfnl_dereference_protected() in nfnetlink, from Florian.

13) Add support for mangling pkttype in the nf_tables meta expression,
    also from Florian.

BTW, I need that you pull net into net-next, I have another batch that
requires changes that I don't yet see in net.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 4b402d71 b4aae759
#ifndef _NF_CONNTRACK_SCTP_H
#define _NF_CONNTRACK_SCTP_H
/* SCTP tracking. */
#include <uapi/linux/netfilter/nf_conntrack_sctp.h>
struct ip_ct_sctp {
enum sctp_conntrack state;
__be32 vtag[IP_CT_DIR_MAX];
};
#endif /* _NF_CONNTRACK_SCTP_H */
......@@ -121,6 +121,9 @@ struct net {
#if IS_ENABLED(CONFIG_NETFILTER_NETLINK_ACCT)
struct list_head nfnl_acct_list;
#endif
#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
struct list_head nfct_timeout_list;
#endif
#endif
#ifdef CONFIG_WEXT_CORE
struct sk_buff_head wext_nlevents;
......
......@@ -5,8 +5,7 @@ void nf_defrag_ipv6_enable(void);
int nf_ct_frag6_init(void);
void nf_ct_frag6_cleanup(void);
struct sk_buff *nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user);
void nf_ct_frag6_consume_orig(struct sk_buff *skb);
int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user);
struct inet_frags_ctl;
......
......@@ -104,7 +104,7 @@ static inline void nf_conntrack_timeout_fini(void)
#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
extern struct ctnl_timeout *(*nf_ct_timeout_find_get_hook)(const char *name);
extern struct ctnl_timeout *(*nf_ct_timeout_find_get_hook)(struct net *net, const char *name);
extern void (*nf_ct_timeout_put_hook)(struct ctnl_timeout *timeout);
#endif
......
......@@ -19,8 +19,6 @@ struct nft_pktinfo {
const struct net_device *out;
u8 pf;
u8 hook;
u8 nhoff;
u8 thoff;
u8 tprot;
/* for x_tables compatibility */
struct xt_action_param xt;
......@@ -890,6 +888,38 @@ void nft_unregister_chain_type(const struct nf_chain_type *);
int nft_register_expr(struct nft_expr_type *);
void nft_unregister_expr(struct nft_expr_type *);
int nft_verdict_dump(struct sk_buff *skb, int type,
const struct nft_verdict *v);
/**
* struct nft_traceinfo - nft tracing information and state
*
* @pkt: pktinfo currently processed
* @basechain: base chain currently processed
* @chain: chain currently processed
* @rule: rule that was evaluated
* @verdict: verdict given by rule
* @type: event type (enum nft_trace_types)
* @packet_dumped: packet headers sent in a previous traceinfo message
* @trace: other struct members are initialised
*/
struct nft_traceinfo {
const struct nft_pktinfo *pkt;
const struct nft_base_chain *basechain;
const struct nft_chain *chain;
const struct nft_rule *rule;
const struct nft_verdict *verdict;
enum nft_trace_types type;
bool packet_dumped;
bool trace;
};
void nft_trace_init(struct nft_traceinfo *info, const struct nft_pktinfo *pkt,
const struct nft_verdict *verdict,
const struct nft_chain *basechain);
void nft_trace_notify(struct nft_traceinfo *info);
#define nft_dereference(p) \
nfnl_dereference(p, NFNL_SUBSYS_NFTABLES)
......
......@@ -47,7 +47,17 @@ struct nft_payload {
enum nft_registers dreg:8;
};
struct nft_payload_set {
enum nft_payload_bases base:8;
u8 offset;
u8 len;
enum nft_registers sreg:8;
u8 csum_type;
u8 csum_offset;
};
extern const struct nft_expr_ops nft_payload_fast_ops;
extern struct static_key_false nft_trace_enabled;
int nft_payload_module_init(void);
void nft_payload_module_exit(void);
......
......@@ -33,4 +33,7 @@ void nft_meta_set_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt);
void nft_meta_set_destroy(const struct nft_ctx *ctx,
const struct nft_expr *expr);
#endif
#ifndef _UAPI__IP_SET_BITMAP_H
#define _UAPI__IP_SET_BITMAP_H
#include <linux/netfilter/ipset/ip_set.h>
/* Bitmap type specific error codes */
enum {
/* The element is out of the range of the set */
......
#ifndef _UAPI__IP_SET_HASH_H
#define _UAPI__IP_SET_HASH_H
#include <linux/netfilter/ipset/ip_set.h>
/* Hash type specific error codes */
enum {
/* Hash is full */
......
#ifndef _UAPI__IP_SET_LIST_H
#define _UAPI__IP_SET_LIST_H
#include <linux/netfilter/ipset/ip_set.h>
/* List type specific error codes */
enum {
/* Set name to be added/deleted/tested does not exist. */
......
#ifndef _NF_CONNTRACK_SCTP_H
#define _NF_CONNTRACK_SCTP_H
#ifndef _UAPI_NF_CONNTRACK_SCTP_H
#define _UAPI_NF_CONNTRACK_SCTP_H
/* SCTP tracking. */
#include <linux/netfilter/nf_conntrack_tuple_common.h>
......@@ -18,10 +18,4 @@ enum sctp_conntrack {
SCTP_CONNTRACK_MAX
};
struct ip_ct_sctp {
enum sctp_conntrack state;
__be32 vtag[IP_CT_DIR_MAX];
};
#endif /* _NF_CONNTRACK_SCTP_H */
#endif /* _UAPI_NF_CONNTRACK_SCTP_H */
#ifndef _NF_CONNTRACK_TUPLE_COMMON_H
#define _NF_CONNTRACK_TUPLE_COMMON_H
#include <linux/types.h>
#include <linux/netfilter.h>
enum ip_conntrack_dir {
IP_CT_DIR_ORIGINAL,
IP_CT_DIR_REPLY,
......
......@@ -83,6 +83,7 @@ enum nft_verdicts {
* @NFT_MSG_DELSETELEM: delete a set element (enum nft_set_elem_attributes)
* @NFT_MSG_NEWGEN: announce a new generation, only for events (enum nft_gen_attributes)
* @NFT_MSG_GETGEN: get the rule-set generation (enum nft_gen_attributes)
* @NFT_MSG_TRACE: trace event (enum nft_trace_attributes)
*/
enum nf_tables_msg_types {
NFT_MSG_NEWTABLE,
......@@ -102,6 +103,7 @@ enum nf_tables_msg_types {
NFT_MSG_DELSETELEM,
NFT_MSG_NEWGEN,
NFT_MSG_GETGEN,
NFT_MSG_TRACE,
NFT_MSG_MAX,
};
......@@ -597,6 +599,17 @@ enum nft_payload_bases {
NFT_PAYLOAD_TRANSPORT_HEADER,
};
/**
* enum nft_payload_csum_types - nf_tables payload expression checksum types
*
* @NFT_PAYLOAD_CSUM_NONE: no checksumming
* @NFT_PAYLOAD_CSUM_INET: internet checksum (RFC 791)
*/
enum nft_payload_csum_types {
NFT_PAYLOAD_CSUM_NONE,
NFT_PAYLOAD_CSUM_INET,
};
/**
* enum nft_payload_attributes - nf_tables payload expression netlink attributes
*
......@@ -604,6 +617,9 @@ enum nft_payload_bases {
* @NFTA_PAYLOAD_BASE: payload base (NLA_U32: nft_payload_bases)
* @NFTA_PAYLOAD_OFFSET: payload offset relative to base (NLA_U32)
* @NFTA_PAYLOAD_LEN: payload length (NLA_U32)
* @NFTA_PAYLOAD_SREG: source register to load data from (NLA_U32: nft_registers)
* @NFTA_PAYLOAD_CSUM_TYPE: checksum type (NLA_U32)
* @NFTA_PAYLOAD_CSUM_OFFSET: checksum offset relative to base (NLA_U32)
*/
enum nft_payload_attributes {
NFTA_PAYLOAD_UNSPEC,
......@@ -611,6 +627,9 @@ enum nft_payload_attributes {
NFTA_PAYLOAD_BASE,
NFTA_PAYLOAD_OFFSET,
NFTA_PAYLOAD_LEN,
NFTA_PAYLOAD_SREG,
NFTA_PAYLOAD_CSUM_TYPE,
NFTA_PAYLOAD_CSUM_OFFSET,
__NFTA_PAYLOAD_MAX
};
#define NFTA_PAYLOAD_MAX (__NFTA_PAYLOAD_MAX - 1)
......@@ -970,4 +989,54 @@ enum nft_gen_attributes {
};
#define NFTA_GEN_MAX (__NFTA_GEN_MAX - 1)
/**
* enum nft_trace_attributes - nf_tables trace netlink attributes
*
* @NFTA_TRACE_TABLE: name of the table (NLA_STRING)
* @NFTA_TRACE_CHAIN: name of the chain (NLA_STRING)
* @NFTA_TRACE_RULE_HANDLE: numeric handle of the rule (NLA_U64)
* @NFTA_TRACE_TYPE: type of the event (NLA_U32: nft_trace_types)
* @NFTA_TRACE_VERDICT: verdict returned by hook (NLA_NESTED: nft_verdicts)
* @NFTA_TRACE_ID: pseudo-id, same for each skb traced (NLA_U32)
* @NFTA_TRACE_LL_HEADER: linklayer header (NLA_BINARY)
* @NFTA_TRACE_NETWORK_HEADER: network header (NLA_BINARY)
* @NFTA_TRACE_TRANSPORT_HEADER: transport header (NLA_BINARY)
* @NFTA_TRACE_IIF: indev ifindex (NLA_U32)
* @NFTA_TRACE_IIFTYPE: netdev->type of indev (NLA_U16)
* @NFTA_TRACE_OIF: outdev ifindex (NLA_U32)
* @NFTA_TRACE_OIFTYPE: netdev->type of outdev (NLA_U16)
* @NFTA_TRACE_MARK: nfmark (NLA_U32)
* @NFTA_TRACE_NFPROTO: nf protocol processed (NLA_U32)
* @NFTA_TRACE_POLICY: policy that decided fate of packet (NLA_U32)
*/
enum nft_trace_attibutes {
NFTA_TRACE_UNSPEC,
NFTA_TRACE_TABLE,
NFTA_TRACE_CHAIN,
NFTA_TRACE_RULE_HANDLE,
NFTA_TRACE_TYPE,
NFTA_TRACE_VERDICT,
NFTA_TRACE_ID,
NFTA_TRACE_LL_HEADER,
NFTA_TRACE_NETWORK_HEADER,
NFTA_TRACE_TRANSPORT_HEADER,
NFTA_TRACE_IIF,
NFTA_TRACE_IIFTYPE,
NFTA_TRACE_OIF,
NFTA_TRACE_OIFTYPE,
NFTA_TRACE_MARK,
NFTA_TRACE_NFPROTO,
NFTA_TRACE_POLICY,
__NFTA_TRACE_MAX
};
#define NFTA_TRACE_MAX (__NFTA_TRACE_MAX - 1)
enum nft_trace_types {
NFT_TRACETYPE_UNSPEC,
NFT_TRACETYPE_POLICY,
NFT_TRACETYPE_RETURN,
NFT_TRACETYPE_RULE,
__NFT_TRACETYPE_MAX
};
#define NFT_TRACETYPE_MAX (__NFT_TRACETYPE_MAX - 1)
#endif /* _LINUX_NF_TABLES_H */
......@@ -22,6 +22,8 @@ enum nfnetlink_groups {
#define NFNLGRP_NFTABLES NFNLGRP_NFTABLES
NFNLGRP_ACCT_QUOTA,
#define NFNLGRP_ACCT_QUOTA NFNLGRP_ACCT_QUOTA
NFNLGRP_NFTRACE,
#define NFNLGRP_NFTRACE NFNLGRP_NFTRACE
__NFNLGRP_MAX,
};
#define NFNLGRP_MAX (__NFNLGRP_MAX - 1)
......
......@@ -2,6 +2,7 @@
#define XT_HMARK_H_
#include <linux/types.h>
#include <linux/netfilter.h>
enum {
XT_HMARK_SADDR_MASK,
......
......@@ -2,6 +2,7 @@
#define _XT_RATEEST_TARGET_H
#include <linux/types.h>
#include <linux/if.h>
struct xt_rateest_target_info {
char name[IFNAMSIZ];
......
#ifndef _XT_TEE_TARGET_H
#define _XT_TEE_TARGET_H
#include <linux/netfilter.h>
struct xt_tee_tginfo {
union nf_inet_addr gw;
char oif[16];
......
......@@ -2,6 +2,7 @@
#define _XT_TPROXY_H
#include <linux/types.h>
#include <linux/netfilter.h>
/* TPROXY target is capable of marking the packet to perform
* redirection. We can get rid of that whenever we get support for
......
......@@ -2,10 +2,23 @@
#define _UAPI_XT_CGROUP_H
#include <linux/types.h>
#include <linux/limits.h>
struct xt_cgroup_info {
struct xt_cgroup_info_v0 {
__u32 id;
__u32 invert;
};
struct xt_cgroup_info_v1 {
__u8 has_path;
__u8 has_classid;
__u8 invert_path;
__u8 invert_classid;
char path[PATH_MAX];
__u32 classid;
/* kernel internal data */
void *priv __attribute__((aligned(8)));
};
#endif /* _UAPI_XT_CGROUP_H */
......@@ -2,6 +2,7 @@
#define _UAPI_XT_HASHLIMIT_H
#include <linux/types.h>
#include <linux/if.h>
/* timings are in milliseconds. */
#define XT_HASHLIMIT_SCALE 10000
......
......@@ -2,6 +2,7 @@
#define _XT_IPVS_H
#include <linux/types.h>
#include <linux/netfilter.h>
enum {
XT_IPVS_IPVS_PROPERTY = 1 << 0, /* all other options imply this one */
......
#ifndef _XT_MAC_H
#define _XT_MAC_H
#include <linux/if_ether.h>
struct xt_mac_info {
unsigned char srcaddr[ETH_ALEN];
int invert;
......
......@@ -20,6 +20,8 @@
#define _XT_OSF_H
#include <linux/types.h>
#include <linux/ip.h>
#include <linux/tcp.h>
#define MAXGENRELEN 32
......
......@@ -2,7 +2,7 @@
#define _UAPI_XT_PHYSDEV_H
#include <linux/types.h>
#include <linux/if.h>
#define XT_PHYSDEV_OP_IN 0x01
#define XT_PHYSDEV_OP_OUT 0x02
......
......@@ -2,6 +2,8 @@
#define _XT_POLICY_H
#include <linux/types.h>
#include <linux/in.h>
#include <linux/in6.h>
#define XT_POLICY_MAX_ELEM 4
......
......@@ -2,6 +2,7 @@
#define _XT_RATEEST_MATCH_H
#include <linux/types.h>
#include <linux/if.h>
enum xt_rateest_match_flags {
XT_RATEEST_MATCH_INVERT = 1<<0,
......
......@@ -2,6 +2,7 @@
#define _LINUX_NETFILTER_XT_RECENT_H 1
#include <linux/types.h>
#include <linux/netfilter.h>
enum {
XT_RECENT_CHECK = 1 << 0,
......
......@@ -66,26 +66,26 @@ struct xt_sctp_info {
#define SCTP_CHUNKMAP_IS_CLEAR(chunkmap) \
__sctp_chunkmap_is_clear((chunkmap), ARRAY_SIZE(chunkmap))
static inline bool
static inline _Bool
__sctp_chunkmap_is_clear(const __u32 *chunkmap, unsigned int n)
{
unsigned int i;
for (i = 0; i < n; ++i)
if (chunkmap[i])
return false;
return true;
return 0;
return 1;
}
#define SCTP_CHUNKMAP_IS_ALL_SET(chunkmap) \
__sctp_chunkmap_is_all_set((chunkmap), ARRAY_SIZE(chunkmap))
static inline bool
static inline _Bool
__sctp_chunkmap_is_all_set(const __u32 *chunkmap, unsigned int n)
{
unsigned int i;
for (i = 0; i < n; ++i)
if (chunkmap[i] != ~0U)
return false;
return true;
return 0;
return 1;
}
#endif /* _XT_SCTP_H_ */
......
......@@ -11,6 +11,7 @@
#include <linux/types.h>
#include <linux/compiler.h>
#include <linux/if.h>
#include <linux/netfilter_arp.h>
#include <linux/netfilter/x_tables.h>
......
......@@ -4,6 +4,7 @@
/* bridge-specific defines for netfilter.
*/
#include <linux/in.h>
#include <linux/netfilter.h>
#include <linux/if_ether.h>
#include <linux/if_vlan.h>
......
......@@ -2,6 +2,7 @@
#define __LINUX_BRIDGE_EBT_ARP_H
#include <linux/types.h>
#include <linux/if_ether.h>
#define EBT_ARP_OPCODE 0x01
#define EBT_ARP_HTYPE 0x02
......
#ifndef __LINUX_BRIDGE_EBT_ARPREPLY_H
#define __LINUX_BRIDGE_EBT_ARPREPLY_H
#include <linux/if_ether.h>
struct ebt_arpreply_info {
unsigned char mac[ETH_ALEN];
int target;
......
......@@ -13,6 +13,7 @@
#define __LINUX_BRIDGE_EBT_IP6_H
#include <linux/types.h>
#include <linux/in6.h>
#define EBT_IP6_SOURCE 0x01
#define EBT_IP6_DEST 0x02
......
#ifndef __LINUX_BRIDGE_EBT_NAT_H
#define __LINUX_BRIDGE_EBT_NAT_H
#include <linux/if_ether.h>
#define NAT_ARP_BIT (0x00000010)
struct ebt_nat_info {
unsigned char mac[ETH_ALEN];
......
......@@ -12,6 +12,8 @@
#ifndef _UAPI__LINUX_BRIDGE_EFF_H
#define _UAPI__LINUX_BRIDGE_EFF_H
#include <linux/types.h>
#include <linux/if.h>
#include <linux/netfilter_bridge.h>
#define EBT_TABLE_MAXNAMELEN 32
......@@ -33,8 +35,8 @@ struct xt_match;
struct xt_target;
struct ebt_counter {
uint64_t pcnt;
uint64_t bcnt;
__u64 pcnt;
__u64 bcnt;
};
struct ebt_replace {
......
......@@ -17,6 +17,7 @@
#include <linux/types.h>
#include <linux/compiler.h>
#include <linux/if.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter/x_tables.h>
......
......@@ -17,6 +17,7 @@
#include <linux/types.h>
#include <linux/compiler.h>
#include <linux/if.h>
#include <linux/netfilter_ipv6.h>
#include <linux/netfilter/x_tables.h>
......
......@@ -2,7 +2,7 @@
#define _IP6T_RT_H
#include <linux/types.h>
/*#include <linux/in6.h>*/
#include <linux/in6.h>
#define IP6T_RT_HOPS 16
......
......@@ -65,7 +65,7 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par)
return false;
if (FWINV(info->protocol != nexthdr, EBT_IP6_PROTO))
return false;
if (!(info->bitmask & ( EBT_IP6_DPORT |
if (!(info->bitmask & (EBT_IP6_DPORT |
EBT_IP6_SPORT | EBT_IP6_ICMP6)))
return true;
......
......@@ -36,14 +36,12 @@ static int ebt_log_tg_check(const struct xt_tgchk_param *par)
return 0;
}
struct tcpudphdr
{
struct tcpudphdr {
__be16 src;
__be16 dst;
};
struct arppayload
{
struct arppayload {
unsigned char mac_src[ETH_ALEN];
unsigned char ip_src[4];
unsigned char mac_dst[ETH_ALEN];
......@@ -152,7 +150,8 @@ ebt_log_packet(struct net *net, u_int8_t pf, unsigned int hooknum,
ntohs(ah->ar_op));
/* If it's for Ethernet and the lengths are OK,
* then log the ARP payload */
* then log the ARP payload
*/
if (ah->ar_hrd == htons(1) &&
ah->ar_hln == ETH_ALEN &&
ah->ar_pln == sizeof(__be32)) {
......
......@@ -66,7 +66,8 @@ ebt_vlan_mt(const struct sk_buff *skb, struct xt_action_param *par)
* - Canonical Format Indicator (CFI). The Canonical Format Indicator
* (CFI) is a single bit flag value. Currently ignored.
* - VLAN Identifier (VID). The VID is encoded as
* an unsigned binary number. */
* an unsigned binary number.
*/
id = TCI & VLAN_VID_MASK;
prio = (TCI >> 13) & 0x7;
......@@ -98,7 +99,8 @@ static int ebt_vlan_mt_check(const struct xt_mtchk_param *par)
}
/* Check for bitmask range
* True if even one bit is out of mask */
* True if even one bit is out of mask
*/
if (info->bitmask & ~EBT_VLAN_MASK) {
pr_debug("bitmask %2X is out of mask (%2X)\n",
info->bitmask, EBT_VLAN_MASK);
......@@ -117,7 +119,8 @@ static int ebt_vlan_mt_check(const struct xt_mtchk_param *par)
* 0 - The null VLAN ID.
* 1 - The default Port VID (PVID)
* 0x0FFF - Reserved for implementation use.
* if_vlan.h: VLAN_N_VID 4096. */
* if_vlan.h: VLAN_N_VID 4096.
*/
if (GET_BITMASK(EBT_VLAN_ID)) {
if (!!info->id) { /* if id!=0 => check vid range */
if (info->id > VLAN_N_VID) {
......@@ -128,7 +131,8 @@ static int ebt_vlan_mt_check(const struct xt_mtchk_param *par)
/* Note: This is valid VLAN-tagged frame point.
* Any value of user_priority are acceptable,
* but should be ignored according to 802.1Q Std.
* So we just drop the prio flag. */
* So we just drop the prio flag.
*/
info->bitmask &= ~EBT_VLAN_PRIO;
}
/* Else, id=0 (null VLAN ID) => user_priority range (any?) */
......@@ -143,7 +147,8 @@ static int ebt_vlan_mt_check(const struct xt_mtchk_param *par)
}
/* Check for encapsulated proto range - it is possible to be
* any value for u_short range.
* if_ether.h: ETH_ZLEN 60 - Min. octets in frame sans FCS */
* if_ether.h: ETH_ZLEN 60 - Min. octets in frame sans FCS
*/
if (GET_BITMASK(EBT_VLAN_ENCAP)) {
if ((unsigned short) ntohs(info->encap) < ETH_ZLEN) {
pr_debug("encap frame length %d is less than "
......
......@@ -35,8 +35,7 @@
"report to author: "format, ## args)
/* #define BUGPRINT(format, args...) */
/*
* Each cpu has its own set of counters, so there is no need for write_lock in
/* Each cpu has its own set of counters, so there is no need for write_lock in
* the softirq
* For reading or updating the counters, the user context needs to
* get a write_lock
......@@ -162,7 +161,7 @@ ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb,
for (i = 0; i < 6; i++)
verdict |= (h->h_source[i] ^ e->sourcemac[i]) &
e->sourcemsk[i];
if (FWINV2(verdict != 0, EBT_ISOURCE) )
if (FWINV2(verdict != 0, EBT_ISOURCE))
return 1;
}
if (e->bitmask & EBT_DESTMAC) {
......@@ -170,7 +169,7 @@ ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb,
for (i = 0; i < 6; i++)
verdict |= (h->h_dest[i] ^ e->destmac[i]) &
e->destmsk[i];
if (FWINV2(verdict != 0, EBT_IDEST) )
if (FWINV2(verdict != 0, EBT_IDEST))
return 1;
}
return 0;
......@@ -237,7 +236,8 @@ unsigned int ebt_do_table(struct sk_buff *skb,
(*(counter_base + i)).bcnt += skb->len;
/* these should only watch: not modify, nor tell us
what to do with the packet */
* what to do with the packet
*/
EBT_WATCHER_ITERATE(point, ebt_do_watcher, skb, &acpar);
t = (struct ebt_entry_target *)
......@@ -451,7 +451,8 @@ static int ebt_verify_pointers(const struct ebt_replace *repl,
if (i != NF_BR_NUMHOOKS || !(e->bitmask & EBT_ENTRY_OR_ENTRIES)) {
if (e->bitmask != 0) {
/* we make userspace set this right,
so there is no misunderstanding */
* so there is no misunderstanding
*/
BUGPRINT("EBT_ENTRY_OR_ENTRIES shouldn't be set "
"in distinguisher\n");
return -EINVAL;
......@@ -487,8 +488,7 @@ static int ebt_verify_pointers(const struct ebt_replace *repl,
return 0;
}
/*
* this one is very careful, as it is the first function
/* this one is very careful, as it is the first function
* to parse the userspace data
*/
static inline int
......@@ -504,10 +504,12 @@ ebt_check_entry_size_and_hooks(const struct ebt_entry *e,
break;
}
/* beginning of a new chain
if i == NF_BR_NUMHOOKS it must be a user defined chain */
* if i == NF_BR_NUMHOOKS it must be a user defined chain
*/
if (i != NF_BR_NUMHOOKS || !e->bitmask) {
/* this checks if the previous chain has as many entries
as it said it has */
* as it said it has
*/
if (*n != *cnt) {
BUGPRINT("nentries does not equal the nr of entries "
"in the chain\n");
......@@ -549,15 +551,13 @@ ebt_check_entry_size_and_hooks(const struct ebt_entry *e,
return 0;
}
struct ebt_cl_stack
{
struct ebt_cl_stack {
struct ebt_chainstack cs;
int from;
unsigned int hookmask;
};
/*
* we need these positions to check that the jumps to a different part of the
/* We need these positions to check that the jumps to a different part of the
* entries is a jump to the beginning of a new chain.
*/
static inline int
......@@ -673,7 +673,7 @@ ebt_check_entry(struct ebt_entry *e, struct net *net,
BUGPRINT("Unknown flag for inv bitmask\n");
return -EINVAL;
}
if ( (e->bitmask & EBT_NOPROTO) && (e->bitmask & EBT_802_3) ) {
if ((e->bitmask & EBT_NOPROTO) && (e->bitmask & EBT_802_3)) {
BUGPRINT("NOPROTO & 802_3 not allowed\n");
return -EINVAL;
}
......@@ -687,7 +687,8 @@ ebt_check_entry(struct ebt_entry *e, struct net *net,
break;
}
/* (1 << NF_BR_NUMHOOKS) tells the check functions the rule is on
a base chain */
* a base chain
*/
if (i < NF_BR_NUMHOOKS)
hookmask = (1 << hook) | (1 << NF_BR_NUMHOOKS);
else {
......@@ -758,8 +759,7 @@ ebt_check_entry(struct ebt_entry *e, struct net *net,
return ret;
}
/*
* checks for loops and sets the hook mask for udc
/* checks for loops and sets the hook mask for udc
* the hook mask for udc tells us from which base chains the udc can be
* accessed. This mask is a parameter to the check() functions of the extensions
*/
......@@ -853,7 +853,8 @@ static int translate_table(struct net *net, const char *name,
return -EINVAL;
}
/* make sure chains are ordered after each other in same order
as their corresponding hooks */
* as their corresponding hooks
*/
for (j = i + 1; j < NF_BR_NUMHOOKS; j++) {
if (!newinfo->hook_entry[j])
continue;
......@@ -868,7 +869,8 @@ static int translate_table(struct net *net, const char *name,
i = 0; /* holds the expected nr. of entries for the chain */
j = 0; /* holds the up to now counted entries for the chain */
k = 0; /* holds the total nr. of entries, should equal
newinfo->nentries afterwards */
* newinfo->nentries afterwards
*/
udc_cnt = 0; /* will hold the nr. of user defined chains (udc) */
ret = EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size,
ebt_check_entry_size_and_hooks, newinfo,
......@@ -888,10 +890,12 @@ static int translate_table(struct net *net, const char *name,
}
/* get the location of the udc, put them in an array
while we're at it, allocate the chainstack */
* while we're at it, allocate the chainstack
*/
if (udc_cnt) {
/* this will get free'd in do_replace()/ebt_register_table()
if an error occurs */
* if an error occurs
*/
newinfo->chainstack =
vmalloc(nr_cpu_ids * sizeof(*(newinfo->chainstack)));
if (!newinfo->chainstack)
......@@ -932,14 +936,15 @@ static int translate_table(struct net *net, const char *name,
}
/* we now know the following (along with E=mc²):
- the nr of entries in each chain is right
- the size of the allocated space is right
- all valid hooks have a corresponding chain
- there are no loops
- wrong data can still be on the level of a single entry
- could be there are jumps to places that are not the
beginning of a chain. This can only occur in chains that
are not accessible from any base chains, so we don't care. */
* - the nr of entries in each chain is right
* - the size of the allocated space is right
* - all valid hooks have a corresponding chain
* - there are no loops
* - wrong data can still be on the level of a single entry
* - could be there are jumps to places that are not the
* beginning of a chain. This can only occur in chains that
* are not accessible from any base chains, so we don't care.
*/
/* used to know what we need to clean up if something goes wrong */
i = 0;
......@@ -986,7 +991,8 @@ static int do_replace_finish(struct net *net, struct ebt_replace *repl,
struct ebt_table *t;
/* the user wants counters back
the check on the size is done later, when we have the lock */
* the check on the size is done later, when we have the lock
*/
if (repl->num_counters) {
unsigned long size = repl->num_counters * sizeof(*counterstmp);
counterstmp = vmalloc(size);
......@@ -1038,9 +1044,10 @@ static int do_replace_finish(struct net *net, struct ebt_replace *repl,
write_unlock_bh(&t->lock);
mutex_unlock(&ebt_mutex);
/* so, a user can change the chains while having messed up her counter
allocation. Only reason why this is done is because this way the lock
is held only once, while this doesn't bring the kernel into a
dangerous state. */
* allocation. Only reason why this is done is because this way the lock
* is held only once, while this doesn't bring the kernel into a
* dangerous state.
*/
if (repl->num_counters &&
copy_to_user(repl->counters, counterstmp,
repl->num_counters * sizeof(struct ebt_counter))) {
......@@ -1348,7 +1355,8 @@ static inline int ebt_make_matchname(const struct ebt_entry_match *m,
char name[EBT_FUNCTION_MAXNAMELEN] = {};
/* ebtables expects 32 bytes long names but xt_match names are 29 bytes
long. Copy 29 bytes and fill remaining bytes with zeroes. */
* long. Copy 29 bytes and fill remaining bytes with zeroes.
*/
strlcpy(name, m->u.match->name, sizeof(name));
if (copy_to_user(hlp, name, EBT_FUNCTION_MAXNAMELEN))
return -EFAULT;
......@@ -1362,13 +1370,13 @@ static inline int ebt_make_watchername(const struct ebt_entry_watcher *w,
char name[EBT_FUNCTION_MAXNAMELEN] = {};
strlcpy(name, w->u.watcher->name, sizeof(name));
if (copy_to_user(hlp , name, EBT_FUNCTION_MAXNAMELEN))
if (copy_to_user(hlp, name, EBT_FUNCTION_MAXNAMELEN))
return -EFAULT;
return 0;
}
static inline int
ebt_make_names(struct ebt_entry *e, const char *base, char __user *ubase)
static inline int ebt_make_names(struct ebt_entry *e, const char *base,
char __user *ubase)
{
int ret;
char __user *hlp;
......@@ -1595,8 +1603,7 @@ static int ebt_compat_entry_padsize(void)
static int ebt_compat_match_offset(const struct xt_match *match,
unsigned int userlen)
{
/*
* ebt_among needs special handling. The kernel .matchsize is
/* ebt_among needs special handling. The kernel .matchsize is
* set to -1 at registration time; at runtime an EBT_ALIGN()ed
* value is expected.
* Example: userspace sends 4500, ebt_among.c wants 4504.
......@@ -1966,8 +1973,7 @@ static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt,
return off + match_size;
}
/*
* return size of all matches, watchers or target, including necessary
/* return size of all matches, watchers or target, including necessary
* alignment and padding.
*/
static int ebt_size_mwt(struct compat_ebt_entry_mwt *match32,
......@@ -2070,8 +2076,7 @@ static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base,
if (ret < 0)
return ret;
buf_start = (char *) entry;
/*
* 0: matches offset, always follows ebt_entry.
/* 0: matches offset, always follows ebt_entry.
* 1: watchers offset, from ebt_entry structure
* 2: target offset, from ebt_entry structure
* 3: next ebt_entry offset, from ebt_entry structure
......@@ -2115,8 +2120,7 @@ static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base,
return 0;
}
/*
* repl->entries_size is the size of the ebt_entry blob in userspace.
/* repl->entries_size is the size of the ebt_entry blob in userspace.
* It might need more memory when copied to a 64 bit kernel in case
* userspace is 32-bit. So, first task: find out how much memory is needed.
*
......@@ -2360,8 +2364,7 @@ static int compat_do_ebt_get_ctl(struct sock *sk, int cmd,
break;
case EBT_SO_GET_ENTRIES:
case EBT_SO_GET_INIT_ENTRIES:
/*
* try real handler first in case of userland-side padding.
/* try real handler first in case of userland-side padding.
* in case we are dealing with an 'ordinary' 32 bit binary
* without 64bit compatibility padding, this will fail right
* after copy_from_user when the *len argument is validated.
......
......@@ -84,6 +84,7 @@ static const struct nft_expr_ops nft_meta_bridge_set_ops = {
.size = NFT_EXPR_SIZE(sizeof(struct nft_meta)),
.eval = nft_meta_set_eval,
.init = nft_meta_set_init,
.destroy = nft_meta_set_destroy,
.dump = nft_meta_set_dump,
};
......
......@@ -14,7 +14,6 @@
#include <net/netfilter/ipv4/nf_reject.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_bridge.h>
#include <net/netfilter/ipv4/nf_reject.h>
const struct tcphdr *nf_reject_ip_tcphdr_get(struct sk_buff *oldskb,
struct tcphdr *_oth, int hook)
......
......@@ -56,7 +56,6 @@ struct nf_ct_frag6_skb_cb
{
struct inet6_skb_parm h;
int offset;
struct sk_buff *orig;
};
#define NFCT_FRAG6_CB(skb) ((struct nf_ct_frag6_skb_cb *)((skb)->cb))
......@@ -170,12 +169,6 @@ static unsigned int nf_hashfn(const struct inet_frag_queue *q)
return nf_hash_frag(nq->id, &nq->saddr, &nq->daddr);
}
static void nf_skb_free(struct sk_buff *skb)
{
if (NFCT_FRAG6_CB(skb)->orig)
kfree_skb(NFCT_FRAG6_CB(skb)->orig);
}
static void nf_ct_frag6_expire(unsigned long data)
{
struct frag_queue *fq;
......@@ -369,17 +362,18 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
/*
* Check if this packet is complete.
* Returns NULL on failure by any reason, and pointer
* to current nexthdr field in reassembled frame.
*
* It is called with locked fq, and caller must check that
* queue is eligible for reassembly i.e. it is not COMPLETE,
* the last and the first frames arrived and all the bits are here.
*
* returns true if *prev skb has been transformed into the reassembled
* skb, false otherwise.
*/
static struct sk_buff *
nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
static bool
nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_device *dev)
{
struct sk_buff *fp, *op, *head = fq->q.fragments;
struct sk_buff *fp, *head = fq->q.fragments;
int payload_len;
u8 ecn;
......@@ -390,22 +384,21 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
ecn = ip_frag_ecn_table[fq->ecn];
if (unlikely(ecn == 0xff))
goto out_fail;
return false;
/* Unfragmented part is taken from the first segment. */
payload_len = ((head->data - skb_network_header(head)) -
sizeof(struct ipv6hdr) + fq->q.len -
sizeof(struct frag_hdr));
if (payload_len > IPV6_MAXPLEN) {
pr_debug("payload len is too large.\n");
goto out_oversize;
net_dbg_ratelimited("nf_ct_frag6_reasm: payload len = %d\n",
payload_len);
return false;
}
/* Head of list must not be cloned. */
if (skb_unclone(head, GFP_ATOMIC)) {
pr_debug("skb is cloned but can't expand head");
goto out_oom;
}
if (skb_unclone(head, GFP_ATOMIC))
return false;
/* If the first fragment is fragmented itself, we split
* it to two chunks: the first with data and paged part
......@@ -416,7 +409,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
clone = alloc_skb(0, GFP_ATOMIC);
if (clone == NULL)
goto out_oom;
return false;
clone->next = head->next;
head->next = clone;
......@@ -430,10 +423,41 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
clone->csum = 0;
clone->ip_summed = head->ip_summed;
NFCT_FRAG6_CB(clone)->orig = NULL;
add_frag_mem_limit(fq->q.net, clone->truesize);
}
/* morph head into last received skb: prev.
*
* This allows callers of ipv6 conntrack defrag to continue
* to use the last skb(frag) passed into the reasm engine.
* The last skb frag 'silently' turns into the full reassembled skb.
*
* Since prev is also part of q->fragments we have to clone it first.
*/
if (head != prev) {
struct sk_buff *iter;
fp = skb_clone(prev, GFP_ATOMIC);
if (!fp)
return false;
fp->next = prev->next;
iter = head;
while (iter) {
if (iter->next == prev) {
iter->next = fp;
break;
}
iter = iter->next;
}
skb_morph(prev, head);
prev->next = head->next;
consume_skb(head);
head = prev;
}
/* We have to remove fragment header from datagram and to relocate
* header in order to calculate ICV correctly. */
skb_network_header(head)[fq->nhoffset] = skb_transport_header(head)[0];
......@@ -474,31 +498,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
fq->q.fragments = NULL;
fq->q.fragments_tail = NULL;
/* all original skbs are linked into the NFCT_FRAG6_CB(head).orig */
fp = skb_shinfo(head)->frag_list;
if (fp && NFCT_FRAG6_CB(fp)->orig == NULL)
/* at above code, head skb is divided into two skbs. */
fp = fp->next;
op = NFCT_FRAG6_CB(head)->orig;
for (; fp; fp = fp->next) {
struct sk_buff *orig = NFCT_FRAG6_CB(fp)->orig;
op->next = orig;
op = orig;
NFCT_FRAG6_CB(fp)->orig = NULL;
}
return head;
out_oversize:
net_dbg_ratelimited("nf_ct_frag6_reasm: payload len = %d\n",
payload_len);
goto out_fail;
out_oom:
net_dbg_ratelimited("nf_ct_frag6_reasm: no memory for reassembly\n");
out_fail:
return NULL;
return true;
}
/*
......@@ -564,89 +564,61 @@ find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff)
return 0;
}
struct sk_buff *nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
{
struct sk_buff *clone;
struct net_device *dev = skb->dev;
int fhoff, nhoff, ret;
struct frag_hdr *fhdr;
struct frag_queue *fq;
struct ipv6hdr *hdr;
int fhoff, nhoff;
u8 prevhdr;
struct sk_buff *ret_skb = NULL;
/* Jumbo payload inhibits frag. header */
if (ipv6_hdr(skb)->payload_len == 0) {
pr_debug("payload len = 0\n");
return skb;
return -EINVAL;
}
if (find_prev_fhdr(skb, &prevhdr, &nhoff, &fhoff) < 0)
return skb;
clone = skb_clone(skb, GFP_ATOMIC);
if (clone == NULL) {
pr_debug("Can't clone skb\n");
return skb;
}
return -EINVAL;
NFCT_FRAG6_CB(clone)->orig = skb;
if (!pskb_may_pull(clone, fhoff + sizeof(*fhdr))) {
pr_debug("message is too short.\n");
goto ret_orig;
}
if (!pskb_may_pull(skb, fhoff + sizeof(*fhdr)))
return -ENOMEM;
skb_set_transport_header(clone, fhoff);
hdr = ipv6_hdr(clone);
fhdr = (struct frag_hdr *)skb_transport_header(clone);
skb_set_transport_header(skb, fhoff);
hdr = ipv6_hdr(skb);
fhdr = (struct frag_hdr *)skb_transport_header(skb);
fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr,
skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr));
if (fq == NULL) {
pr_debug("Can't find and can't create new queue\n");
goto ret_orig;
return -ENOMEM;
}
spin_lock_bh(&fq->q.lock);
if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) {
spin_unlock_bh(&fq->q.lock);
pr_debug("Can't insert skb to queue\n");
inet_frag_put(&fq->q, &nf_frags);
goto ret_orig;
if (nf_ct_frag6_queue(fq, skb, fhdr, nhoff) < 0) {
ret = -EINVAL;
goto out_unlock;
}
/* after queue has assumed skb ownership, only 0 or -EINPROGRESS
* must be returned.
*/
ret = -EINPROGRESS;
if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
fq->q.meat == fq->q.len) {
ret_skb = nf_ct_frag6_reasm(fq, dev);
if (ret_skb == NULL)
pr_debug("Can't reassemble fragmented packets\n");
}
spin_unlock_bh(&fq->q.lock);
fq->q.meat == fq->q.len &&
nf_ct_frag6_reasm(fq, skb, dev))
ret = 0;
out_unlock:
spin_unlock_bh(&fq->q.lock);
inet_frag_put(&fq->q, &nf_frags);
return ret_skb;
ret_orig:
kfree_skb(clone);
return skb;
return ret;
}
EXPORT_SYMBOL_GPL(nf_ct_frag6_gather);
void nf_ct_frag6_consume_orig(struct sk_buff *skb)
{
struct sk_buff *s, *s2;
for (s = NFCT_FRAG6_CB(skb)->orig; s;) {
s2 = s->next;
s->next = NULL;
consume_skb(s);
s = s2;
}
}
EXPORT_SYMBOL_GPL(nf_ct_frag6_consume_orig);
static int nf_ct_net_init(struct net *net)
{
int res;
......@@ -681,7 +653,6 @@ int nf_ct_frag6_init(void)
nf_frags.hashfn = nf_hashfn;
nf_frags.constructor = ip6_frag_init;
nf_frags.destructor = NULL;
nf_frags.skb_free = nf_skb_free;
nf_frags.qsize = sizeof(struct frag_queue);
nf_frags.match = ip6_frag_match;
nf_frags.frag_expire = nf_ct_frag6_expire;
......
......@@ -55,7 +55,7 @@ static unsigned int ipv6_defrag(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
struct sk_buff *reasm;
int err;
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
/* Previously seen (loopback)? */
......@@ -63,23 +63,13 @@ static unsigned int ipv6_defrag(void *priv,
return NF_ACCEPT;
#endif
reasm = nf_ct_frag6_gather(state->net, skb,
err = nf_ct_frag6_gather(state->net, skb,
nf_ct6_defrag_user(state->hook, skb));
/* queued */
if (reasm == NULL)
if (err == -EINPROGRESS)
return NF_STOLEN;
/* error occurred or not fragmented */
if (reasm == skb)
return NF_ACCEPT;
nf_ct_frag6_consume_orig(reasm);
NF_HOOK_THRESH(NFPROTO_IPV6, state->hook, state->net, state->sk, reasm,
state->in, state->out,
state->okfn, NF_IP6_PRI_CONNTRACK_DEFRAG + 1);
return NF_STOLEN;
}
static struct nf_hook_ops ipv6_defrag_ops[] = {
......
......@@ -14,7 +14,6 @@
#include <net/netfilter/ipv6/nf_reject.h>
#include <linux/netfilter_ipv6.h>
#include <linux/netfilter_bridge.h>
#include <net/netfilter/ipv6/nf_reject.h>
const struct tcphdr *nf_reject_ip6_tcphdr_get(struct sk_buff *oldskb,
struct tcphdr *otcph,
......
......@@ -67,7 +67,7 @@ obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o
obj-$(CONFIG_NETFILTER_SYNPROXY) += nf_synproxy_core.o
# nf_tables
nf_tables-objs += nf_tables_core.o nf_tables_api.o
nf_tables-objs += nf_tables_core.o nf_tables_api.o nf_tables_trace.o
nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o nft_dynset.o
nf_tables-objs += nft_bitwise.o nft_byteorder.o nft_payload.o
......
......@@ -596,11 +596,18 @@ static int exp_proc_init(struct net *net)
{
#ifdef CONFIG_NF_CONNTRACK_PROCFS
struct proc_dir_entry *proc;
kuid_t root_uid;
kgid_t root_gid;
proc = proc_create("nf_conntrack_expect", 0440, net->proc_net,
&exp_file_ops);
if (!proc)
return -ENOMEM;
root_uid = make_kuid(net->user_ns, 0);
root_gid = make_kgid(net->user_ns, 0);
if (uid_valid(root_uid) && gid_valid(root_gid))
proc_set_user(proc, root_uid, root_gid);
#endif /* CONFIG_NF_CONNTRACK_PROCFS */
return 0;
}
......
......@@ -392,11 +392,18 @@ static const struct file_operations ct_cpu_seq_fops = {
static int nf_conntrack_standalone_init_proc(struct net *net)
{
struct proc_dir_entry *pde;
kuid_t root_uid;
kgid_t root_gid;
pde = proc_create("nf_conntrack", 0440, net->proc_net, &ct_file_ops);
if (!pde)
goto out_nf_conntrack;
root_uid = make_kuid(net->user_ns, 0);
root_gid = make_kgid(net->user_ns, 0);
if (uid_valid(root_uid) && gid_valid(root_gid))
proc_set_user(pde, root_uid, root_gid);
pde = proc_create("nf_conntrack", S_IRUGO, net->proc_net_stat,
&ct_cpu_seq_fops);
if (!pde)
......
......@@ -25,7 +25,7 @@
#include <net/netfilter/nf_conntrack_timeout.h>
struct ctnl_timeout *
(*nf_ct_timeout_find_get_hook)(const char *name) __read_mostly;
(*nf_ct_timeout_find_get_hook)(struct net *net, const char *name) __read_mostly;
EXPORT_SYMBOL_GPL(nf_ct_timeout_find_get_hook);
void (*nf_ct_timeout_put_hook)(struct ctnl_timeout *timeout) __read_mostly;
......
......@@ -4441,22 +4441,22 @@ static void nft_verdict_uninit(const struct nft_data *data)
}
}
static int nft_verdict_dump(struct sk_buff *skb, const struct nft_data *data)
int nft_verdict_dump(struct sk_buff *skb, int type, const struct nft_verdict *v)
{
struct nlattr *nest;
nest = nla_nest_start(skb, NFTA_DATA_VERDICT);
nest = nla_nest_start(skb, type);
if (!nest)
goto nla_put_failure;
if (nla_put_be32(skb, NFTA_VERDICT_CODE, htonl(data->verdict.code)))
if (nla_put_be32(skb, NFTA_VERDICT_CODE, htonl(v->code)))
goto nla_put_failure;
switch (data->verdict.code) {
switch (v->code) {
case NFT_JUMP:
case NFT_GOTO:
if (nla_put_string(skb, NFTA_VERDICT_CHAIN,
data->verdict.chain->name))
v->chain->name))
goto nla_put_failure;
}
nla_nest_end(skb, nest);
......@@ -4567,7 +4567,7 @@ int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data,
err = nft_value_dump(skb, data, len);
break;
case NFT_DATA_VERDICT:
err = nft_verdict_dump(skb, data);
err = nft_verdict_dump(skb, NFTA_DATA_VERDICT, &data->verdict);
break;
default:
err = -EINVAL;
......
......@@ -16,22 +16,17 @@
#include <linux/skbuff.h>
#include <linux/netlink.h>
#include <linux/netfilter.h>
#include <linux/static_key.h>
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables_core.h>
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nf_log.h>
enum nft_trace {
NFT_TRACE_RULE,
NFT_TRACE_RETURN,
NFT_TRACE_POLICY,
};
static const char *const comments[] = {
[NFT_TRACE_RULE] = "rule",
[NFT_TRACE_RETURN] = "return",
[NFT_TRACE_POLICY] = "policy",
static const char *const comments[__NFT_TRACETYPE_MAX] = {
[NFT_TRACETYPE_POLICY] = "policy",
[NFT_TRACETYPE_RETURN] = "return",
[NFT_TRACETYPE_RULE] = "rule",
};
static struct nf_loginfo trace_loginfo = {
......@@ -44,22 +39,36 @@ static struct nf_loginfo trace_loginfo = {
},
};
static void __nft_trace_packet(const struct nft_pktinfo *pkt,
static noinline void __nft_trace_packet(struct nft_traceinfo *info,
const struct nft_chain *chain,
int rulenum, enum nft_trace type)
int rulenum, enum nft_trace_types type)
{
const struct nft_pktinfo *pkt = info->pkt;
if (!info->trace || !pkt->skb->nf_trace)
return;
info->chain = chain;
info->type = type;
nft_trace_notify(info);
nf_log_trace(pkt->net, pkt->pf, pkt->hook, pkt->skb, pkt->in,
pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ",
chain->table->name, chain->name, comments[type],
rulenum);
}
static inline void nft_trace_packet(const struct nft_pktinfo *pkt,
static inline void nft_trace_packet(struct nft_traceinfo *info,
const struct nft_chain *chain,
int rulenum, enum nft_trace type)
const struct nft_rule *rule,
int rulenum,
enum nft_trace_types type)
{
if (unlikely(pkt->skb->nf_trace))
__nft_trace_packet(pkt, chain, rulenum, type);
if (static_branch_unlikely(&nft_trace_enabled)) {
info->rule = rule;
__nft_trace_packet(info, chain, rulenum, type);
}
}
static void nft_cmp_fast_eval(const struct nft_expr *expr,
......@@ -121,7 +130,11 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv)
struct nft_stats *stats;
int rulenum;
unsigned int gencursor = nft_genmask_cur(net);
struct nft_traceinfo info;
info.trace = false;
if (static_branch_unlikely(&nft_trace_enabled))
nft_trace_init(&info, pkt, &regs.verdict, basechain);
do_chain:
rulenum = 0;
rule = list_entry(&chain->rules, struct nft_rule, list);
......@@ -151,7 +164,8 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv)
regs.verdict.code = NFT_CONTINUE;
continue;
case NFT_CONTINUE:
nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
nft_trace_packet(&info, chain, rule,
rulenum, NFT_TRACETYPE_RULE);
continue;
}
break;
......@@ -161,7 +175,8 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv)
case NF_ACCEPT:
case NF_DROP:
case NF_QUEUE:
nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
nft_trace_packet(&info, chain, rule,
rulenum, NFT_TRACETYPE_RULE);
return regs.verdict.code;
}
......@@ -174,7 +189,8 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv)
stackptr++;
/* fall through */
case NFT_GOTO:
nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
nft_trace_packet(&info, chain, rule,
rulenum, NFT_TRACETYPE_RULE);
chain = regs.verdict.chain;
goto do_chain;
......@@ -182,7 +198,8 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv)
rulenum++;
/* fall through */
case NFT_RETURN:
nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RETURN);
nft_trace_packet(&info, chain, rule,
rulenum, NFT_TRACETYPE_RETURN);
break;
default:
WARN_ON(1);
......@@ -196,7 +213,8 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv)
goto next_rule;
}
nft_trace_packet(pkt, basechain, -1, NFT_TRACE_POLICY);
nft_trace_packet(&info, basechain, NULL, -1,
NFT_TRACETYPE_POLICY);
rcu_read_lock_bh();
stats = this_cpu_ptr(rcu_dereference(nft_base_chain(basechain)->stats));
......
/*
* (C) 2015 Red Hat GmbH
* Author: Florian Westphal <fw@strlen.de>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/module.h>
#include <linux/static_key.h>
#include <linux/hash.h>
#include <linux/jhash.h>
#include <linux/if_vlan.h>
#include <linux/init.h>
#include <linux/skbuff.h>
#include <linux/netlink.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables_core.h>
#include <net/netfilter/nf_tables.h>
#define NFT_TRACETYPE_LL_HSIZE 20
#define NFT_TRACETYPE_NETWORK_HSIZE 40
#define NFT_TRACETYPE_TRANSPORT_HSIZE 20
DEFINE_STATIC_KEY_FALSE(nft_trace_enabled);
EXPORT_SYMBOL_GPL(nft_trace_enabled);
static int trace_fill_id(struct sk_buff *nlskb, struct sk_buff *skb)
{
__be32 id;
/* using skb address as ID results in a limited number of
* values (and quick reuse).
*
* So we attempt to use as many skb members that will not
* change while skb is with netfilter.
*/
id = (__be32)jhash_2words(hash32_ptr(skb), skb_get_hash(skb),
skb->skb_iif);
return nla_put_be32(nlskb, NFTA_TRACE_ID, id);
}
static int trace_fill_header(struct sk_buff *nlskb, u16 type,
const struct sk_buff *skb,
int off, unsigned int len)
{
struct nlattr *nla;
if (len == 0)
return 0;
nla = nla_reserve(nlskb, type, len);
if (!nla || skb_copy_bits(skb, off, nla_data(nla), len))
return -1;
return 0;
}
static int nf_trace_fill_ll_header(struct sk_buff *nlskb,
const struct sk_buff *skb)
{
struct vlan_ethhdr veth;
int off;
BUILD_BUG_ON(sizeof(veth) > NFT_TRACETYPE_LL_HSIZE);
off = skb_mac_header(skb) - skb->data;
if (off != -ETH_HLEN)
return -1;
if (skb_copy_bits(skb, off, &veth, ETH_HLEN))
return -1;
veth.h_vlan_proto = skb->vlan_proto;
veth.h_vlan_TCI = htons(skb_vlan_tag_get(skb));
veth.h_vlan_encapsulated_proto = skb->protocol;
return nla_put(nlskb, NFTA_TRACE_LL_HEADER, sizeof(veth), &veth);
}
static int nf_trace_fill_dev_info(struct sk_buff *nlskb,
const struct net_device *indev,
const struct net_device *outdev)
{
if (indev) {
if (nla_put_be32(nlskb, NFTA_TRACE_IIF,
htonl(indev->ifindex)))
return -1;
if (nla_put_be16(nlskb, NFTA_TRACE_IIFTYPE,
htons(indev->type)))
return -1;
}
if (outdev) {
if (nla_put_be32(nlskb, NFTA_TRACE_OIF,
htonl(outdev->ifindex)))
return -1;
if (nla_put_be16(nlskb, NFTA_TRACE_OIFTYPE,
htons(outdev->type)))
return -1;
}
return 0;
}
static int nf_trace_fill_pkt_info(struct sk_buff *nlskb,
const struct nft_pktinfo *pkt)
{
const struct sk_buff *skb = pkt->skb;
unsigned int len = min_t(unsigned int,
pkt->xt.thoff - skb_network_offset(skb),
NFT_TRACETYPE_NETWORK_HSIZE);
int off = skb_network_offset(skb);
if (trace_fill_header(nlskb, NFTA_TRACE_NETWORK_HEADER, skb, off, len))
return -1;
len = min_t(unsigned int, skb->len - pkt->xt.thoff,
NFT_TRACETYPE_TRANSPORT_HSIZE);
if (trace_fill_header(nlskb, NFTA_TRACE_TRANSPORT_HEADER, skb,
pkt->xt.thoff, len))
return -1;
if (!skb_mac_header_was_set(skb))
return 0;
if (skb_vlan_tag_get(skb))
return nf_trace_fill_ll_header(nlskb, skb);
off = skb_mac_header(skb) - skb->data;
len = min_t(unsigned int, -off, NFT_TRACETYPE_LL_HSIZE);
return trace_fill_header(nlskb, NFTA_TRACE_LL_HEADER,
skb, off, len);
}
static int nf_trace_fill_rule_info(struct sk_buff *nlskb,
const struct nft_traceinfo *info)
{
if (!info->rule)
return 0;
/* a continue verdict with ->type == RETURN means that this is
* an implicit return (end of chain reached).
*
* Since no rule matched, the ->rule pointer is invalid.
*/
if (info->type == NFT_TRACETYPE_RETURN &&
info->verdict->code == NFT_CONTINUE)
return 0;
return nla_put_be64(nlskb, NFTA_TRACE_RULE_HANDLE,
cpu_to_be64(info->rule->handle));
}
void nft_trace_notify(struct nft_traceinfo *info)
{
const struct nft_pktinfo *pkt = info->pkt;
struct nfgenmsg *nfmsg;
struct nlmsghdr *nlh;
struct sk_buff *skb;
unsigned int size;
int event = (NFNL_SUBSYS_NFTABLES << 8) | NFT_MSG_TRACE;
if (!nfnetlink_has_listeners(pkt->net, NFNLGRP_NFTRACE))
return;
size = nlmsg_total_size(sizeof(struct nfgenmsg)) +
nla_total_size(NFT_TABLE_MAXNAMELEN) +
nla_total_size(NFT_CHAIN_MAXNAMELEN) +
nla_total_size(sizeof(__be64)) + /* rule handle */
nla_total_size(sizeof(__be32)) + /* trace type */
nla_total_size(0) + /* VERDICT, nested */
nla_total_size(sizeof(u32)) + /* verdict code */
nla_total_size(NFT_CHAIN_MAXNAMELEN) + /* jump target */
nla_total_size(sizeof(u32)) + /* id */
nla_total_size(NFT_TRACETYPE_LL_HSIZE) +
nla_total_size(NFT_TRACETYPE_NETWORK_HSIZE) +
nla_total_size(NFT_TRACETYPE_TRANSPORT_HSIZE) +
nla_total_size(sizeof(u32)) + /* iif */
nla_total_size(sizeof(__be16)) + /* iiftype */
nla_total_size(sizeof(u32)) + /* oif */
nla_total_size(sizeof(__be16)) + /* oiftype */
nla_total_size(sizeof(u32)) + /* mark */
nla_total_size(sizeof(u32)) + /* nfproto */
nla_total_size(sizeof(u32)); /* policy */
skb = nlmsg_new(size, GFP_ATOMIC);
if (!skb)
return;
nlh = nlmsg_put(skb, 0, 0, event, sizeof(struct nfgenmsg), 0);
if (!nlh)
goto nla_put_failure;
nfmsg = nlmsg_data(nlh);
nfmsg->nfgen_family = info->basechain->type->family;
nfmsg->version = NFNETLINK_V0;
nfmsg->res_id = 0;
if (nla_put_be32(skb, NFTA_TRACE_NFPROTO, htonl(pkt->pf)))
goto nla_put_failure;
if (nla_put_be32(skb, NFTA_TRACE_TYPE, htonl(info->type)))
goto nla_put_failure;
if (trace_fill_id(skb, pkt->skb))
goto nla_put_failure;
if (info->chain) {
if (nla_put_string(skb, NFTA_TRACE_CHAIN,
info->chain->name))
goto nla_put_failure;
if (nla_put_string(skb, NFTA_TRACE_TABLE,
info->chain->table->name))
goto nla_put_failure;
}
if (nf_trace_fill_rule_info(skb, info))
goto nla_put_failure;
switch (info->type) {
case NFT_TRACETYPE_UNSPEC:
case __NFT_TRACETYPE_MAX:
break;
case NFT_TRACETYPE_RETURN:
case NFT_TRACETYPE_RULE:
if (nft_verdict_dump(skb, NFTA_TRACE_VERDICT, info->verdict))
goto nla_put_failure;
break;
case NFT_TRACETYPE_POLICY:
if (nla_put_be32(skb, NFTA_TRACE_POLICY,
info->basechain->policy))
goto nla_put_failure;
break;
}
if (pkt->skb->mark &&
nla_put_be32(skb, NFTA_TRACE_MARK, htonl(pkt->skb->mark)))
goto nla_put_failure;
if (!info->packet_dumped) {
if (nf_trace_fill_dev_info(skb, pkt->in, pkt->out))
goto nla_put_failure;
if (nf_trace_fill_pkt_info(skb, pkt))
goto nla_put_failure;
info->packet_dumped = true;
}
nlmsg_end(skb, nlh);
nfnetlink_send(skb, pkt->net, 0, NFNLGRP_NFTRACE, 0, GFP_ATOMIC);
return;
nla_put_failure:
WARN_ON_ONCE(1);
kfree_skb(skb);
}
void nft_trace_init(struct nft_traceinfo *info, const struct nft_pktinfo *pkt,
const struct nft_verdict *verdict,
const struct nft_chain *chain)
{
info->basechain = nft_base_chain(chain);
info->trace = true;
info->packet_dumped = false;
info->pkt = pkt;
info->verdict = verdict;
}
......@@ -33,6 +33,10 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NETFILTER);
#define nfnl_dereference_protected(id) \
rcu_dereference_protected(table[(id)].subsys, \
lockdep_nfnl_is_held((id)))
static char __initdata nfversion[] = "0.30";
static struct {
......@@ -49,6 +53,7 @@ static const int nfnl_group2type[NFNLGRP_MAX+1] = {
[NFNLGRP_CONNTRACK_EXP_DESTROY] = NFNL_SUBSYS_CTNETLINK_EXP,
[NFNLGRP_NFTABLES] = NFNL_SUBSYS_NFTABLES,
[NFNLGRP_ACCT_QUOTA] = NFNL_SUBSYS_ACCT,
[NFNLGRP_NFTRACE] = NFNL_SUBSYS_NFTABLES,
};
void nfnl_lock(__u8 subsys_id)
......@@ -207,8 +212,7 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
} else {
rcu_read_unlock();
nfnl_lock(subsys_id);
if (rcu_dereference_protected(table[subsys_id].subsys,
lockdep_is_held(&table[subsys_id].mutex)) != ss ||
if (nfnl_dereference_protected(subsys_id) != ss ||
nfnetlink_find_client(type, ss) != nc)
err = -EAGAIN;
else if (nc->call)
......@@ -296,15 +300,13 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
return netlink_ack(oskb, nlh, -ENOMEM);
nfnl_lock(subsys_id);
ss = rcu_dereference_protected(table[subsys_id].subsys,
lockdep_is_held(&table[subsys_id].mutex));
ss = nfnl_dereference_protected(subsys_id);
if (!ss) {
#ifdef CONFIG_MODULES
nfnl_unlock(subsys_id);
request_module("nfnetlink-subsys-%d", subsys_id);
nfnl_lock(subsys_id);
ss = rcu_dereference_protected(table[subsys_id].subsys,
lockdep_is_held(&table[subsys_id].mutex));
ss = nfnl_dereference_protected(subsys_id);
if (!ss)
#endif
{
......
......@@ -38,8 +38,6 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
MODULE_DESCRIPTION("cttimeout: Extended Netfilter Connection Tracking timeout tuning");
static LIST_HEAD(cttimeout_list);
static const struct nla_policy cttimeout_nla_policy[CTA_TIMEOUT_MAX+1] = {
[CTA_TIMEOUT_NAME] = { .type = NLA_NUL_STRING,
.len = CTNL_TIMEOUT_NAME_MAX - 1},
......@@ -90,7 +88,7 @@ cttimeout_new_timeout(struct sock *ctnl, struct sk_buff *skb,
l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO]));
l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]);
list_for_each_entry(timeout, &cttimeout_list, head) {
list_for_each_entry(timeout, &net->nfct_timeout_list, head) {
if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0)
continue;
......@@ -145,7 +143,7 @@ cttimeout_new_timeout(struct sock *ctnl, struct sk_buff *skb,
timeout->l3num = l3num;
timeout->l4proto = l4proto;
atomic_set(&timeout->refcnt, 1);
list_add_tail_rcu(&timeout->head, &cttimeout_list);
list_add_tail_rcu(&timeout->head, &net->nfct_timeout_list);
return 0;
err:
......@@ -209,6 +207,7 @@ ctnl_timeout_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
static int
ctnl_timeout_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
struct ctnl_timeout *cur, *last;
if (cb->args[2])
......@@ -219,7 +218,7 @@ ctnl_timeout_dump(struct sk_buff *skb, struct netlink_callback *cb)
cb->args[1] = 0;
rcu_read_lock();
list_for_each_entry_rcu(cur, &cttimeout_list, head) {
list_for_each_entry_rcu(cur, &net->nfct_timeout_list, head) {
if (last) {
if (cur != last)
continue;
......@@ -245,6 +244,7 @@ cttimeout_get_timeout(struct sock *ctnl, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const cda[])
{
struct net *net = sock_net(skb->sk);
int ret = -ENOENT;
char *name;
struct ctnl_timeout *cur;
......@@ -260,7 +260,7 @@ cttimeout_get_timeout(struct sock *ctnl, struct sk_buff *skb,
return -EINVAL;
name = nla_data(cda[CTA_TIMEOUT_NAME]);
list_for_each_entry(cur, &cttimeout_list, head) {
list_for_each_entry(cur, &net->nfct_timeout_list, head) {
struct sk_buff *skb2;
if (strncmp(cur->name, name, CTNL_TIMEOUT_NAME_MAX) != 0)
......@@ -301,17 +301,17 @@ static void untimeout(struct nf_conntrack_tuple_hash *i,
RCU_INIT_POINTER(timeout_ext->timeout, NULL);
}
static void ctnl_untimeout(struct ctnl_timeout *timeout)
static void ctnl_untimeout(struct net *net, struct ctnl_timeout *timeout)
{
struct nf_conntrack_tuple_hash *h;
const struct hlist_nulls_node *nn;
int i;
local_bh_disable();
for (i = 0; i < init_net.ct.htable_size; i++) {
for (i = 0; i < net->ct.htable_size; i++) {
spin_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
if (i < init_net.ct.htable_size) {
hlist_nulls_for_each_entry(h, nn, &init_net.ct.hash[i], hnnode)
if (i < net->ct.htable_size) {
hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode)
untimeout(h, timeout);
}
spin_unlock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
......@@ -320,7 +320,7 @@ static void ctnl_untimeout(struct ctnl_timeout *timeout)
}
/* try to delete object, fail if it is still in use. */
static int ctnl_timeout_try_del(struct ctnl_timeout *timeout)
static int ctnl_timeout_try_del(struct net *net, struct ctnl_timeout *timeout)
{
int ret = 0;
......@@ -329,7 +329,7 @@ static int ctnl_timeout_try_del(struct ctnl_timeout *timeout)
/* We are protected by nfnl mutex. */
list_del_rcu(&timeout->head);
nf_ct_l4proto_put(timeout->l4proto);
ctnl_untimeout(timeout);
ctnl_untimeout(net, timeout);
kfree_rcu(timeout, rcu_head);
} else {
/* still in use, restore reference counter. */
......@@ -344,23 +344,24 @@ cttimeout_del_timeout(struct sock *ctnl, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const cda[])
{
struct net *net = sock_net(skb->sk);
char *name;
struct ctnl_timeout *cur;
int ret = -ENOENT;
if (!cda[CTA_TIMEOUT_NAME]) {
list_for_each_entry(cur, &cttimeout_list, head)
ctnl_timeout_try_del(cur);
list_for_each_entry(cur, &net->nfct_timeout_list, head)
ctnl_timeout_try_del(net, cur);
return 0;
}
name = nla_data(cda[CTA_TIMEOUT_NAME]);
list_for_each_entry(cur, &cttimeout_list, head) {
list_for_each_entry(cur, &net->nfct_timeout_list, head) {
if (strncmp(cur->name, name, CTNL_TIMEOUT_NAME_MAX) != 0)
continue;
ret = ctnl_timeout_try_del(cur);
ret = ctnl_timeout_try_del(net, cur);
if (ret < 0)
return ret;
......@@ -511,12 +512,13 @@ static int cttimeout_default_get(struct sock *ctnl, struct sk_buff *skb,
}
#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
static struct ctnl_timeout *ctnl_timeout_find_get(const char *name)
static struct ctnl_timeout *
ctnl_timeout_find_get(struct net *net, const char *name)
{
struct ctnl_timeout *timeout, *matching = NULL;
rcu_read_lock();
list_for_each_entry_rcu(timeout, &cttimeout_list, head) {
list_for_each_entry_rcu(timeout, &net->nfct_timeout_list, head) {
if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0)
continue;
......@@ -569,10 +571,39 @@ static const struct nfnetlink_subsystem cttimeout_subsys = {
MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK_TIMEOUT);
static int __net_init cttimeout_net_init(struct net *net)
{
INIT_LIST_HEAD(&net->nfct_timeout_list);
return 0;
}
static void __net_exit cttimeout_net_exit(struct net *net)
{
struct ctnl_timeout *cur, *tmp;
ctnl_untimeout(net, NULL);
list_for_each_entry_safe(cur, tmp, &net->nfct_timeout_list, head) {
list_del_rcu(&cur->head);
nf_ct_l4proto_put(cur->l4proto);
kfree_rcu(cur, rcu_head);
}
}
static struct pernet_operations cttimeout_ops = {
.init = cttimeout_net_init,
.exit = cttimeout_net_exit,
};
static int __init cttimeout_init(void)
{
int ret;
ret = register_pernet_subsys(&cttimeout_ops);
if (ret < 0)
return ret;
ret = nfnetlink_subsys_register(&cttimeout_subsys);
if (ret < 0) {
pr_err("cttimeout_init: cannot register cttimeout with "
......@@ -586,28 +617,17 @@ static int __init cttimeout_init(void)
return 0;
err_out:
unregister_pernet_subsys(&cttimeout_ops);
return ret;
}
static void __exit cttimeout_exit(void)
{
struct ctnl_timeout *cur, *tmp;
pr_info("cttimeout: unregistering from nfnetlink.\n");
nfnetlink_subsys_unregister(&cttimeout_subsys);
/* Make sure no conntrack objects refer to custom timeouts anymore. */
ctnl_untimeout(NULL);
list_for_each_entry_safe(cur, tmp, &cttimeout_list, head) {
list_del_rcu(&cur->head);
/* We are sure that our objects have no clients at this point,
* it's safe to release them all without checking refcnt.
*/
nf_ct_l4proto_put(cur->l4proto);
kfree_rcu(cur, rcu_head);
}
unregister_pernet_subsys(&cttimeout_ops);
#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
RCU_INIT_POINTER(nf_ct_timeout_find_get_hook, NULL);
RCU_INIT_POINTER(nf_ct_timeout_put_hook, NULL);
......
......@@ -293,24 +293,20 @@ nfulnl_set_nlbufsiz(struct nfulnl_instance *inst, u_int32_t nlbufsiz)
return status;
}
static int
static void
nfulnl_set_timeout(struct nfulnl_instance *inst, u_int32_t timeout)
{
spin_lock_bh(&inst->lock);
inst->flushtimeout = timeout;
spin_unlock_bh(&inst->lock);
return 0;
}
static int
static void
nfulnl_set_qthresh(struct nfulnl_instance *inst, u_int32_t qthresh)
{
spin_lock_bh(&inst->lock);
inst->qthreshold = qthresh;
spin_unlock_bh(&inst->lock);
return 0;
}
static int
......@@ -1064,15 +1060,26 @@ static int __net_init nfnl_log_net_init(struct net *net)
{
unsigned int i;
struct nfnl_log_net *log = nfnl_log_pernet(net);
#ifdef CONFIG_PROC_FS
struct proc_dir_entry *proc;
kuid_t root_uid;
kgid_t root_gid;
#endif
for (i = 0; i < INSTANCE_BUCKETS; i++)
INIT_HLIST_HEAD(&log->instance_table[i]);
spin_lock_init(&log->instances_lock);
#ifdef CONFIG_PROC_FS
if (!proc_create("nfnetlink_log", 0440,
net->nf.proc_netfilter, &nful_file_ops))
proc = proc_create("nfnetlink_log", 0440,
net->nf.proc_netfilter, &nful_file_ops);
if (!proc)
return -ENOMEM;
root_uid = make_kuid(net->user_ns, 0);
root_gid = make_kgid(net->user_ns, 0);
if (uid_valid(root_uid) && gid_valid(root_gid))
proc_set_user(proc, root_uid, root_gid);
#endif
return 0;
}
......
......@@ -18,12 +18,16 @@
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/smp.h>
#include <linux/static_key.h>
#include <net/dst.h>
#include <net/sock.h>
#include <net/tcp_states.h> /* for TCP_TIME_WAIT */
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables_core.h>
#include <net/netfilter/nft_meta.h>
#include <uapi/linux/netfilter_bridge.h> /* NF_BR_PRE_ROUTING */
void nft_meta_get_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
......@@ -188,6 +192,13 @@ void nft_meta_get_eval(const struct nft_expr *expr,
}
EXPORT_SYMBOL_GPL(nft_meta_get_eval);
/* don't change or set _LOOPBACK, _USER, etc. */
static bool pkt_type_ok(u32 p)
{
return p == PACKET_HOST || p == PACKET_BROADCAST ||
p == PACKET_MULTICAST || p == PACKET_OTHERHOST;
}
void nft_meta_set_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
......@@ -203,6 +214,11 @@ void nft_meta_set_eval(const struct nft_expr *expr,
case NFT_META_PRIORITY:
skb->priority = value;
break;
case NFT_META_PKTTYPE:
if (skb->pkt_type != value &&
pkt_type_ok(value) && pkt_type_ok(skb->pkt_type))
skb->pkt_type = value;
break;
case NFT_META_NFTRACE:
skb->nf_trace = 1;
break;
......@@ -271,6 +287,24 @@ int nft_meta_get_init(const struct nft_ctx *ctx,
}
EXPORT_SYMBOL_GPL(nft_meta_get_init);
static int nft_meta_set_init_pkttype(const struct nft_ctx *ctx)
{
unsigned int hooks;
switch (ctx->afi->family) {
case NFPROTO_BRIDGE:
hooks = 1 << NF_BR_PRE_ROUTING;
break;
case NFPROTO_NETDEV:
hooks = 1 << NF_NETDEV_INGRESS;
break;
default:
return -EOPNOTSUPP;
}
return nft_chain_validate_hooks(ctx->chain, hooks);
}
int nft_meta_set_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
......@@ -288,6 +322,12 @@ int nft_meta_set_init(const struct nft_ctx *ctx,
case NFT_META_NFTRACE:
len = sizeof(u8);
break;
case NFT_META_PKTTYPE:
err = nft_meta_set_init_pkttype(ctx);
if (err)
return err;
len = sizeof(u8);
break;
default:
return -EOPNOTSUPP;
}
......@@ -297,6 +337,9 @@ int nft_meta_set_init(const struct nft_ctx *ctx,
if (err < 0)
return err;
if (priv->key == NFT_META_NFTRACE)
static_branch_inc(&nft_trace_enabled);
return 0;
}
EXPORT_SYMBOL_GPL(nft_meta_set_init);
......@@ -334,6 +377,16 @@ int nft_meta_set_dump(struct sk_buff *skb,
}
EXPORT_SYMBOL_GPL(nft_meta_set_dump);
void nft_meta_set_destroy(const struct nft_ctx *ctx,
const struct nft_expr *expr)
{
const struct nft_meta *priv = nft_expr_priv(expr);
if (priv->key == NFT_META_NFTRACE)
static_branch_dec(&nft_trace_enabled);
}
EXPORT_SYMBOL_GPL(nft_meta_set_destroy);
static struct nft_expr_type nft_meta_type;
static const struct nft_expr_ops nft_meta_get_ops = {
.type = &nft_meta_type,
......@@ -348,6 +401,7 @@ static const struct nft_expr_ops nft_meta_set_ops = {
.size = NFT_EXPR_SIZE(sizeof(struct nft_meta)),
.eval = nft_meta_set_eval,
.init = nft_meta_set_init,
.destroy = nft_meta_set_destroy,
.dump = nft_meta_set_dump,
};
......
......@@ -107,10 +107,13 @@ static void nft_payload_eval(const struct nft_expr *expr,
}
static const struct nla_policy nft_payload_policy[NFTA_PAYLOAD_MAX + 1] = {
[NFTA_PAYLOAD_SREG] = { .type = NLA_U32 },
[NFTA_PAYLOAD_DREG] = { .type = NLA_U32 },
[NFTA_PAYLOAD_BASE] = { .type = NLA_U32 },
[NFTA_PAYLOAD_OFFSET] = { .type = NLA_U32 },
[NFTA_PAYLOAD_LEN] = { .type = NLA_U32 },
[NFTA_PAYLOAD_CSUM_TYPE] = { .type = NLA_U32 },
[NFTA_PAYLOAD_CSUM_OFFSET] = { .type = NLA_U32 },
};
static int nft_payload_init(const struct nft_ctx *ctx,
......@@ -160,6 +163,118 @@ const struct nft_expr_ops nft_payload_fast_ops = {
.dump = nft_payload_dump,
};
static void nft_payload_set_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_payload_set *priv = nft_expr_priv(expr);
struct sk_buff *skb = pkt->skb;
const u32 *src = &regs->data[priv->sreg];
int offset, csum_offset;
__wsum fsum, tsum;
__sum16 sum;
switch (priv->base) {
case NFT_PAYLOAD_LL_HEADER:
if (!skb_mac_header_was_set(skb))
goto err;
offset = skb_mac_header(skb) - skb->data;
break;
case NFT_PAYLOAD_NETWORK_HEADER:
offset = skb_network_offset(skb);
break;
case NFT_PAYLOAD_TRANSPORT_HEADER:
offset = pkt->xt.thoff;
break;
default:
BUG();
}
csum_offset = offset + priv->csum_offset;
offset += priv->offset;
if (priv->csum_type == NFT_PAYLOAD_CSUM_INET &&
(priv->base != NFT_PAYLOAD_TRANSPORT_HEADER ||
skb->ip_summed != CHECKSUM_PARTIAL)) {
if (skb_copy_bits(skb, csum_offset, &sum, sizeof(sum)) < 0)
goto err;
fsum = skb_checksum(skb, offset, priv->len, 0);
tsum = csum_partial(src, priv->len, 0);
sum = csum_fold(csum_add(csum_sub(~csum_unfold(sum), fsum),
tsum));
if (sum == 0)
sum = CSUM_MANGLED_0;
if (!skb_make_writable(skb, csum_offset + sizeof(sum)) ||
skb_store_bits(skb, csum_offset, &sum, sizeof(sum)) < 0)
goto err;
}
if (!skb_make_writable(skb, max(offset + priv->len, 0)) ||
skb_store_bits(skb, offset, src, priv->len) < 0)
goto err;
return;
err:
regs->verdict.code = NFT_BREAK;
}
static int nft_payload_set_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
{
struct nft_payload_set *priv = nft_expr_priv(expr);
priv->base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE]));
priv->offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET]));
priv->len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN]));
priv->sreg = nft_parse_register(tb[NFTA_PAYLOAD_SREG]);
if (tb[NFTA_PAYLOAD_CSUM_TYPE])
priv->csum_type =
ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_TYPE]));
if (tb[NFTA_PAYLOAD_CSUM_OFFSET])
priv->csum_offset =
ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_OFFSET]));
switch (priv->csum_type) {
case NFT_PAYLOAD_CSUM_NONE:
case NFT_PAYLOAD_CSUM_INET:
break;
default:
return -EOPNOTSUPP;
}
return nft_validate_register_load(priv->sreg, priv->len);
}
static int nft_payload_set_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
const struct nft_payload_set *priv = nft_expr_priv(expr);
if (nft_dump_register(skb, NFTA_PAYLOAD_SREG, priv->sreg) ||
nla_put_be32(skb, NFTA_PAYLOAD_BASE, htonl(priv->base)) ||
nla_put_be32(skb, NFTA_PAYLOAD_OFFSET, htonl(priv->offset)) ||
nla_put_be32(skb, NFTA_PAYLOAD_LEN, htonl(priv->len)) ||
nla_put_be32(skb, NFTA_PAYLOAD_CSUM_TYPE, htonl(priv->csum_type)) ||
nla_put_be32(skb, NFTA_PAYLOAD_CSUM_OFFSET,
htonl(priv->csum_offset)))
goto nla_put_failure;
return 0;
nla_put_failure:
return -1;
}
static const struct nft_expr_ops nft_payload_set_ops = {
.type = &nft_payload_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_payload_set)),
.eval = nft_payload_set_eval,
.init = nft_payload_set_init,
.dump = nft_payload_set_dump,
};
static const struct nft_expr_ops *
nft_payload_select_ops(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
......@@ -167,8 +282,7 @@ nft_payload_select_ops(const struct nft_ctx *ctx,
enum nft_payload_bases base;
unsigned int offset, len;
if (tb[NFTA_PAYLOAD_DREG] == NULL ||
tb[NFTA_PAYLOAD_BASE] == NULL ||
if (tb[NFTA_PAYLOAD_BASE] == NULL ||
tb[NFTA_PAYLOAD_OFFSET] == NULL ||
tb[NFTA_PAYLOAD_LEN] == NULL)
return ERR_PTR(-EINVAL);
......@@ -183,6 +297,15 @@ nft_payload_select_ops(const struct nft_ctx *ctx,
return ERR_PTR(-EOPNOTSUPP);
}
if (tb[NFTA_PAYLOAD_SREG] != NULL) {
if (tb[NFTA_PAYLOAD_DREG] != NULL)
return ERR_PTR(-EINVAL);
return &nft_payload_set_ops;
}
if (tb[NFTA_PAYLOAD_DREG] == NULL)
return ERR_PTR(-EINVAL);
offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET]));
len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN]));
......
......@@ -26,6 +26,7 @@
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/audit.h>
#include <linux/user_namespace.h>
#include <net/net_namespace.h>
#include <linux/netfilter/x_tables.h>
......@@ -1226,6 +1227,8 @@ int xt_proto_init(struct net *net, u_int8_t af)
#ifdef CONFIG_PROC_FS
char buf[XT_FUNCTION_MAXNAMELEN];
struct proc_dir_entry *proc;
kuid_t root_uid;
kgid_t root_gid;
#endif
if (af >= ARRAY_SIZE(xt_prefix))
......@@ -1233,12 +1236,17 @@ int xt_proto_init(struct net *net, u_int8_t af)
#ifdef CONFIG_PROC_FS
root_uid = make_kuid(net->user_ns, 0);
root_gid = make_kgid(net->user_ns, 0);
strlcpy(buf, xt_prefix[af], sizeof(buf));
strlcat(buf, FORMAT_TABLES, sizeof(buf));
proc = proc_create_data(buf, 0440, net->proc_net, &xt_table_ops,
(void *)(unsigned long)af);
if (!proc)
goto out;
if (uid_valid(root_uid) && gid_valid(root_gid))
proc_set_user(proc, root_uid, root_gid);
strlcpy(buf, xt_prefix[af], sizeof(buf));
strlcat(buf, FORMAT_MATCHES, sizeof(buf));
......@@ -1246,6 +1254,8 @@ int xt_proto_init(struct net *net, u_int8_t af)
(void *)(unsigned long)af);
if (!proc)
goto out_remove_tables;
if (uid_valid(root_uid) && gid_valid(root_gid))
proc_set_user(proc, root_uid, root_gid);
strlcpy(buf, xt_prefix[af], sizeof(buf));
strlcat(buf, FORMAT_TARGETS, sizeof(buf));
......@@ -1253,6 +1263,8 @@ int xt_proto_init(struct net *net, u_int8_t af)
(void *)(unsigned long)af);
if (!proc)
goto out_remove_matches;
if (uid_valid(root_uid) && gid_valid(root_gid))
proc_set_user(proc, root_uid, root_gid);
#endif
return 0;
......
......@@ -143,7 +143,7 @@ xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par,
goto out;
}
timeout = timeout_find_get(timeout_name);
timeout = timeout_find_get(par->net, timeout_name);
if (timeout == NULL) {
ret = -ENOENT;
pr_info("No such timeout policy \"%s\"\n", timeout_name);
......
......@@ -24,9 +24,9 @@ MODULE_DESCRIPTION("Xtables: process control group matching");
MODULE_ALIAS("ipt_cgroup");
MODULE_ALIAS("ip6t_cgroup");
static int cgroup_mt_check(const struct xt_mtchk_param *par)
static int cgroup_mt_check_v0(const struct xt_mtchk_param *par)
{
struct xt_cgroup_info *info = par->matchinfo;
struct xt_cgroup_info_v0 *info = par->matchinfo;
if (info->invert & ~1)
return -EINVAL;
......@@ -34,10 +34,41 @@ static int cgroup_mt_check(const struct xt_mtchk_param *par)
return 0;
}
static int cgroup_mt_check_v1(const struct xt_mtchk_param *par)
{
struct xt_cgroup_info_v1 *info = par->matchinfo;
struct cgroup *cgrp;
if ((info->invert_path & ~1) || (info->invert_classid & ~1))
return -EINVAL;
if (!info->has_path && !info->has_classid) {
pr_info("xt_cgroup: no path or classid specified\n");
return -EINVAL;
}
if (info->has_path && info->has_classid) {
pr_info("xt_cgroup: both path and classid specified\n");
return -EINVAL;
}
if (info->has_path) {
cgrp = cgroup_get_from_path(info->path);
if (IS_ERR(cgrp)) {
pr_info("xt_cgroup: invalid path, errno=%ld\n",
PTR_ERR(cgrp));
return -EINVAL;
}
info->priv = cgrp;
}
return 0;
}
static bool
cgroup_mt(const struct sk_buff *skb, struct xt_action_param *par)
cgroup_mt_v0(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_cgroup_info *info = par->matchinfo;
const struct xt_cgroup_info_v0 *info = par->matchinfo;
if (skb->sk == NULL || !sk_fullsock(skb->sk))
return false;
......@@ -46,27 +77,67 @@ cgroup_mt(const struct sk_buff *skb, struct xt_action_param *par)
info->invert;
}
static struct xt_match cgroup_mt_reg __read_mostly = {
static bool cgroup_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_cgroup_info_v1 *info = par->matchinfo;
struct sock_cgroup_data *skcd = &skb->sk->sk_cgrp_data;
struct cgroup *ancestor = info->priv;
if (!skb->sk || !sk_fullsock(skb->sk))
return false;
if (ancestor)
return cgroup_is_descendant(sock_cgroup_ptr(skcd), ancestor) ^
info->invert_path;
else
return (info->classid == sock_cgroup_classid(skcd)) ^
info->invert_classid;
}
static void cgroup_mt_destroy_v1(const struct xt_mtdtor_param *par)
{
struct xt_cgroup_info_v1 *info = par->matchinfo;
if (info->priv)
cgroup_put(info->priv);
}
static struct xt_match cgroup_mt_reg[] __read_mostly = {
{
.name = "cgroup",
.revision = 0,
.family = NFPROTO_UNSPEC,
.checkentry = cgroup_mt_check,
.match = cgroup_mt,
.matchsize = sizeof(struct xt_cgroup_info),
.checkentry = cgroup_mt_check_v0,
.match = cgroup_mt_v0,
.matchsize = sizeof(struct xt_cgroup_info_v0),
.me = THIS_MODULE,
.hooks = (1 << NF_INET_LOCAL_OUT) |
(1 << NF_INET_POST_ROUTING) |
(1 << NF_INET_LOCAL_IN),
},
{
.name = "cgroup",
.revision = 1,
.family = NFPROTO_UNSPEC,
.checkentry = cgroup_mt_check_v1,
.match = cgroup_mt_v1,
.matchsize = sizeof(struct xt_cgroup_info_v1),
.destroy = cgroup_mt_destroy_v1,
.me = THIS_MODULE,
.hooks = (1 << NF_INET_LOCAL_OUT) |
(1 << NF_INET_POST_ROUTING) |
(1 << NF_INET_LOCAL_IN),
},
};
static int __init cgroup_mt_init(void)
{
return xt_register_match(&cgroup_mt_reg);
return xt_register_matches(cgroup_mt_reg, ARRAY_SIZE(cgroup_mt_reg));
}
static void __exit cgroup_mt_exit(void)
{
xt_unregister_match(&cgroup_mt_reg);
xt_unregister_matches(cgroup_mt_reg, ARRAY_SIZE(cgroup_mt_reg));
}
module_init(cgroup_mt_init);
......
......@@ -305,10 +305,10 @@ static int handle_fragments(struct net *net, struct sw_flow_key *key,
u16 zone, struct sk_buff *skb)
{
struct ovs_skb_cb ovs_cb = *OVS_CB(skb);
int err;
if (key->eth.type == htons(ETH_P_IP)) {
enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone;
int err;
memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
err = ip_defrag(net, skb, user);
......@@ -319,28 +319,13 @@ static int handle_fragments(struct net *net, struct sw_flow_key *key,
#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
} else if (key->eth.type == htons(ETH_P_IPV6)) {
enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone;
struct sk_buff *reasm;
memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm));
reasm = nf_ct_frag6_gather(net, skb, user);
if (!reasm)
return -EINPROGRESS;
if (skb == reasm) {
kfree_skb(skb);
return -EINVAL;
}
/* Don't free 'skb' even though it is one of the original
* fragments, as we're going to morph it into the head.
*/
skb_get(skb);
nf_ct_frag6_consume_orig(reasm);
err = nf_ct_frag6_gather(net, skb, user);
if (err)
return err;
key->ip.proto = ipv6_hdr(reasm)->nexthdr;
skb_morph(skb, reasm);
skb->next = reasm->next;
consume_skb(reasm);
key->ip.proto = ipv6_hdr(skb)->nexthdr;
ovs_cb.mru = IP6CB(skb)->frag_max_size;
#endif
} else {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment