Commit 4696ad36 authored by David S. Miller's avatar David S. Miller

Merge git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next

Pablo Neira Ayuso says:

====================
Netfilter updates for net-next

The following patchset contains Netfilter updates for net-next:

1) Replace unnecessary list_for_each_entry_continue() in nf_tables,
   from Jakob Koschel.

2) Add struct nf_conntrack_net_ecache to conntrack event cache and
   use it, from Florian Westphal.

3) Refactor ctnetlink_dump_list(), also from Florian.

4) Bump module reference counter on cttimeout object addition/removal,
   from Florian.

5) Consolidate nf_log MAC printer, from Phil Sutter.

6) Add basic logging support for unknown ethertype, from Phil Sutter.

7) Consolidate check for sysctl nf_log_all_netns toggle, also from Phil.

8) Replace hardcode value in nft_bitwise, from Jeremy Sowden.

9) Rename BASIC-like goto tags in nft_bitwise to more meaningful names,
   also from Jeremy.

10) nft_fib support for reverse path filtering with policy-based routing
    on iif. Extend selftests to cover for this new usecase, from Florian.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents a21437d2 0c7b2761
......@@ -43,6 +43,11 @@ union nf_conntrack_expect_proto {
/* insert expect proto private data here */
};
struct nf_conntrack_net_ecache {
struct delayed_work dwork;
struct netns_ct *ct_net;
};
struct nf_conntrack_net {
/* only used when new connection is allocated: */
atomic_t count;
......@@ -58,8 +63,7 @@ struct nf_conntrack_net {
struct ctl_table_header *sysctl_header;
#endif
#ifdef CONFIG_NF_CONNTRACK_EVENTS
struct delayed_work ecache_dwork;
struct netns_ct *ct_net;
struct nf_conntrack_net_ecache ecache;
#endif
};
......
......@@ -112,6 +112,10 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
fl4.daddr = iph->daddr;
fl4.saddr = get_saddr(iph->saddr);
} else {
if (nft_hook(pkt) == NF_INET_FORWARD &&
priv->flags & NFTA_FIB_F_IIF)
fl4.flowi4_iif = nft_out(pkt)->ifindex;
fl4.daddr = iph->saddr;
fl4.saddr = get_saddr(iph->daddr);
}
......
......@@ -30,6 +30,10 @@ static int nft_fib6_flowi_init(struct flowi6 *fl6, const struct nft_fib *priv,
fl6->daddr = iph->daddr;
fl6->saddr = iph->saddr;
} else {
if (nft_hook(pkt) == NF_INET_FORWARD &&
priv->flags & NFTA_FIB_F_IIF)
fl6->flowi6_iif = nft_out(pkt)->ifindex;
fl6->daddr = iph->saddr;
fl6->saddr = iph->daddr;
}
......
......@@ -96,8 +96,8 @@ static enum retry_state ecache_work_evict_list(struct ct_pcpu *pcpu)
static void ecache_work(struct work_struct *work)
{
struct nf_conntrack_net *cnet = container_of(work, struct nf_conntrack_net, ecache_dwork.work);
struct netns_ct *ctnet = cnet->ct_net;
struct nf_conntrack_net *cnet = container_of(work, struct nf_conntrack_net, ecache.dwork.work);
struct netns_ct *ctnet = cnet->ecache.ct_net;
int cpu, delay = -1;
struct ct_pcpu *pcpu;
......@@ -127,7 +127,7 @@ static void ecache_work(struct work_struct *work)
ctnet->ecache_dwork_pending = delay > 0;
if (delay >= 0)
schedule_delayed_work(&cnet->ecache_dwork, delay);
schedule_delayed_work(&cnet->ecache.dwork, delay);
}
static int __nf_conntrack_eventmask_report(struct nf_conntrack_ecache *e,
......@@ -293,12 +293,12 @@ void nf_conntrack_ecache_work(struct net *net, enum nf_ct_ecache_state state)
struct nf_conntrack_net *cnet = nf_ct_pernet(net);
if (state == NFCT_ECACHE_DESTROY_FAIL &&
!delayed_work_pending(&cnet->ecache_dwork)) {
schedule_delayed_work(&cnet->ecache_dwork, HZ);
!delayed_work_pending(&cnet->ecache.dwork)) {
schedule_delayed_work(&cnet->ecache.dwork, HZ);
net->ct.ecache_dwork_pending = true;
} else if (state == NFCT_ECACHE_DESTROY_SENT) {
net->ct.ecache_dwork_pending = false;
mod_delayed_work(system_wq, &cnet->ecache_dwork, 0);
mod_delayed_work(system_wq, &cnet->ecache.dwork, 0);
}
}
......@@ -310,8 +310,9 @@ void nf_conntrack_ecache_pernet_init(struct net *net)
struct nf_conntrack_net *cnet = nf_ct_pernet(net);
net->ct.sysctl_events = nf_ct_events;
cnet->ct_net = &net->ct;
INIT_DELAYED_WORK(&cnet->ecache_dwork, ecache_work);
cnet->ecache.ct_net = &net->ct;
INIT_DELAYED_WORK(&cnet->ecache.dwork, ecache_work);
BUILD_BUG_ON(__IPCT_MAX >= 16); /* e->ctmask is u16 */
}
......@@ -320,5 +321,5 @@ void nf_conntrack_ecache_pernet_fini(struct net *net)
{
struct nf_conntrack_net *cnet = nf_ct_pernet(net);
cancel_delayed_work_sync(&cnet->ecache_dwork);
cancel_delayed_work_sync(&cnet->ecache.dwork);
}
......@@ -1708,6 +1708,47 @@ static int ctnetlink_done_list(struct netlink_callback *cb)
return 0;
}
static int ctnetlink_dump_one_entry(struct sk_buff *skb,
struct netlink_callback *cb,
struct nf_conn *ct,
bool dying)
{
struct ctnetlink_list_dump_ctx *ctx = (void *)cb->ctx;
struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
u8 l3proto = nfmsg->nfgen_family;
int res;
if (l3proto && nf_ct_l3num(ct) != l3proto)
return 0;
if (ctx->last) {
if (ct != ctx->last)
return 0;
ctx->last = NULL;
}
/* We can't dump extension info for the unconfirmed
* list because unconfirmed conntracks can have
* ct->ext reallocated (and thus freed).
*
* In the dying list case ct->ext can't be free'd
* until after we drop pcpu->lock.
*/
res = ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
ct, dying, 0);
if (res < 0) {
if (!refcount_inc_not_zero(&ct->ct_general.use))
return 0;
ctx->last = ct;
}
return res;
}
static int
ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, bool dying)
{
......@@ -1715,12 +1756,9 @@ ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, bool dying
struct nf_conn *ct, *last;
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_node *n;
struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
u_int8_t l3proto = nfmsg->nfgen_family;
int res;
int cpu;
struct hlist_nulls_head *list;
struct net *net = sock_net(skb->sk);
int res, cpu;
if (ctx->done)
return 0;
......@@ -1739,30 +1777,10 @@ ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, bool dying
restart:
hlist_nulls_for_each_entry(h, n, list, hnnode) {
ct = nf_ct_tuplehash_to_ctrack(h);
if (l3proto && nf_ct_l3num(ct) != l3proto)
continue;
if (ctx->last) {
if (ct != last)
continue;
ctx->last = NULL;
}
/* We can't dump extension info for the unconfirmed
* list because unconfirmed conntracks can have
* ct->ext reallocated (and thus freed).
*
* In the dying list case ct->ext can't be free'd
* until after we drop pcpu->lock.
*/
res = ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
ct, dying, 0);
res = ctnetlink_dump_one_entry(skb, cb, ct, dying);
if (res < 0) {
if (!refcount_inc_not_zero(&ct->ct_general.use))
continue;
ctx->cpu = cpu;
ctx->last = ct;
spin_unlock_bh(&pcpu->lock);
goto out;
}
......
......@@ -40,6 +40,12 @@ struct arppayload {
unsigned char ip_dst[4];
};
/* Guard against containers flooding syslog. */
static bool nf_log_allowed(const struct net *net)
{
return net_eq(net, &init_net) || sysctl_nf_log_all_netns;
}
static void nf_log_dump_vlan(struct nf_log_buf *m, const struct sk_buff *skb)
{
u16 vid;
......@@ -133,8 +139,7 @@ static void nf_log_arp_packet(struct net *net, u_int8_t pf,
{
struct nf_log_buf *m;
/* FIXME: Disabled from containers until syslog ns is supported */
if (!net_eq(net, &init_net) && !sysctl_nf_log_all_netns)
if (!nf_log_allowed(net))
return;
m = nf_log_buf_open();
......@@ -766,9 +771,9 @@ dump_ipv6_packet(struct net *net, struct nf_log_buf *m,
nf_log_buf_add(m, "MARK=0x%x ", skb->mark);
}
static void dump_ipv4_mac_header(struct nf_log_buf *m,
const struct nf_loginfo *info,
const struct sk_buff *skb)
static void dump_mac_header(struct nf_log_buf *m,
const struct nf_loginfo *info,
const struct sk_buff *skb)
{
struct net_device *dev = skb->dev;
unsigned int logflags = 0;
......@@ -798,9 +803,26 @@ static void dump_ipv4_mac_header(struct nf_log_buf *m,
const unsigned char *p = skb_mac_header(skb);
unsigned int i;
nf_log_buf_add(m, "%02x", *p++);
for (i = 1; i < dev->hard_header_len; i++, p++)
nf_log_buf_add(m, ":%02x", *p);
if (dev->type == ARPHRD_SIT) {
p -= ETH_HLEN;
if (p < skb->head)
p = NULL;
}
if (p) {
nf_log_buf_add(m, "%02x", *p++);
for (i = 1; i < dev->hard_header_len; i++)
nf_log_buf_add(m, ":%02x", *p++);
}
if (dev->type == ARPHRD_SIT) {
const struct iphdr *iph =
(struct iphdr *)skb_mac_header(skb);
nf_log_buf_add(m, " TUNNEL=%pI4->%pI4", &iph->saddr,
&iph->daddr);
}
}
nf_log_buf_add(m, " ");
}
......@@ -814,8 +836,7 @@ static void nf_log_ip_packet(struct net *net, u_int8_t pf,
{
struct nf_log_buf *m;
/* FIXME: Disabled from containers until syslog ns is supported */
if (!net_eq(net, &init_net) && !sysctl_nf_log_all_netns)
if (!nf_log_allowed(net))
return;
m = nf_log_buf_open();
......@@ -827,7 +848,7 @@ static void nf_log_ip_packet(struct net *net, u_int8_t pf,
out, loginfo, prefix);
if (in)
dump_ipv4_mac_header(m, loginfo, skb);
dump_mac_header(m, loginfo, skb);
dump_ipv4_packet(net, m, loginfo, skb, 0);
......@@ -841,64 +862,6 @@ static struct nf_logger nf_ip_logger __read_mostly = {
.me = THIS_MODULE,
};
static void dump_ipv6_mac_header(struct nf_log_buf *m,
const struct nf_loginfo *info,
const struct sk_buff *skb)
{
struct net_device *dev = skb->dev;
unsigned int logflags = 0;
if (info->type == NF_LOG_TYPE_LOG)
logflags = info->u.log.logflags;
if (!(logflags & NF_LOG_MACDECODE))
goto fallback;
switch (dev->type) {
case ARPHRD_ETHER:
nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM ",
eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest);
nf_log_dump_vlan(m, skb);
nf_log_buf_add(m, "MACPROTO=%04x ",
ntohs(eth_hdr(skb)->h_proto));
return;
default:
break;
}
fallback:
nf_log_buf_add(m, "MAC=");
if (dev->hard_header_len &&
skb->mac_header != skb->network_header) {
const unsigned char *p = skb_mac_header(skb);
unsigned int len = dev->hard_header_len;
unsigned int i;
if (dev->type == ARPHRD_SIT) {
p -= ETH_HLEN;
if (p < skb->head)
p = NULL;
}
if (p) {
nf_log_buf_add(m, "%02x", *p++);
for (i = 1; i < len; i++)
nf_log_buf_add(m, ":%02x", *p++);
}
nf_log_buf_add(m, " ");
if (dev->type == ARPHRD_SIT) {
const struct iphdr *iph =
(struct iphdr *)skb_mac_header(skb);
nf_log_buf_add(m, "TUNNEL=%pI4->%pI4 ", &iph->saddr,
&iph->daddr);
}
} else {
nf_log_buf_add(m, " ");
}
}
static void nf_log_ip6_packet(struct net *net, u_int8_t pf,
unsigned int hooknum, const struct sk_buff *skb,
const struct net_device *in,
......@@ -908,8 +871,7 @@ static void nf_log_ip6_packet(struct net *net, u_int8_t pf,
{
struct nf_log_buf *m;
/* FIXME: Disabled from containers until syslog ns is supported */
if (!net_eq(net, &init_net) && !sysctl_nf_log_all_netns)
if (!nf_log_allowed(net))
return;
m = nf_log_buf_open();
......@@ -921,7 +883,7 @@ static void nf_log_ip6_packet(struct net *net, u_int8_t pf,
loginfo, prefix);
if (in)
dump_ipv6_mac_header(m, loginfo, skb);
dump_mac_header(m, loginfo, skb);
dump_ipv6_packet(net, m, loginfo, skb, skb_network_offset(skb), 1);
......@@ -935,6 +897,32 @@ static struct nf_logger nf_ip6_logger __read_mostly = {
.me = THIS_MODULE,
};
static void nf_log_unknown_packet(struct net *net, u_int8_t pf,
unsigned int hooknum,
const struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
const struct nf_loginfo *loginfo,
const char *prefix)
{
struct nf_log_buf *m;
if (!nf_log_allowed(net))
return;
m = nf_log_buf_open();
if (!loginfo)
loginfo = &default_loginfo;
nf_log_dump_packet_common(m, pf, hooknum, skb, in, out, loginfo,
prefix);
dump_mac_header(m, loginfo, skb);
nf_log_buf_close(m);
}
static void nf_log_netdev_packet(struct net *net, u_int8_t pf,
unsigned int hooknum,
const struct sk_buff *skb,
......@@ -954,6 +942,10 @@ static void nf_log_netdev_packet(struct net *net, u_int8_t pf,
case htons(ETH_P_RARP):
nf_log_arp_packet(net, pf, hooknum, skb, in, out, loginfo, prefix);
break;
default:
nf_log_unknown_packet(net, pf, hooknum, skb,
in, out, loginfo, prefix);
break;
}
}
......
......@@ -8367,10 +8367,8 @@ static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *cha
if (chain->blob_next || !nft_is_active_next(net, chain))
return 0;
rule = list_entry(&chain->rules, struct nft_rule, list);
data_size = 0;
list_for_each_entry_continue(rule, &chain->rules, list) {
list_for_each_entry(rule, &chain->rules, list) {
if (nft_is_active_next(net, rule)) {
data_size += sizeof(*prule) + rule->dlen;
if (data_size > INT_MAX)
......@@ -8387,7 +8385,7 @@ static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *cha
data_boundary = data + data_size;
size = 0;
list_for_each_entry_continue(rule, &chain->rules, list) {
list_for_each_entry(rule, &chain->rules, list) {
if (!nft_is_active_next(net, rule))
continue;
......
......@@ -158,6 +158,7 @@ static int cttimeout_new_timeout(struct sk_buff *skb,
timeout->timeout.l3num = l3num;
timeout->timeout.l4proto = l4proto;
refcount_set(&timeout->refcnt, 1);
__module_get(THIS_MODULE);
list_add_tail_rcu(&timeout->head, &pernet->nfct_timeout_list);
return 0;
......@@ -506,13 +507,8 @@ static struct nf_ct_timeout *ctnl_timeout_find_get(struct net *net,
if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0)
continue;
if (!try_module_get(THIS_MODULE))
if (!refcount_inc_not_zero(&timeout->refcnt))
goto err;
if (!refcount_inc_not_zero(&timeout->refcnt)) {
module_put(THIS_MODULE);
goto err;
}
matching = timeout;
break;
}
......@@ -525,10 +521,10 @@ static void ctnl_timeout_put(struct nf_ct_timeout *t)
struct ctnl_timeout *timeout =
container_of(t, struct ctnl_timeout, timeout);
if (refcount_dec_and_test(&timeout->refcnt))
if (refcount_dec_and_test(&timeout->refcnt)) {
kfree_rcu(timeout, rcu_head);
module_put(THIS_MODULE);
module_put(THIS_MODULE);
}
}
static const struct nfnl_callback cttimeout_cb[IPCTNL_MSG_TIMEOUT_MAX] = {
......
......@@ -30,7 +30,7 @@ static void nft_bitwise_eval_bool(u32 *dst, const u32 *src,
{
unsigned int i;
for (i = 0; i < DIV_ROUND_UP(priv->len, 4); i++)
for (i = 0; i < DIV_ROUND_UP(priv->len, sizeof(u32)); i++)
dst[i] = (src[i] & priv->mask.data[i]) ^ priv->xor.data[i];
}
......@@ -109,22 +109,23 @@ static int nft_bitwise_init_bool(struct nft_bitwise *priv,
return err;
if (mask.type != NFT_DATA_VALUE || mask.len != priv->len) {
err = -EINVAL;
goto err1;
goto err_mask_release;
}
err = nft_data_init(NULL, &priv->xor, sizeof(priv->xor), &xor,
tb[NFTA_BITWISE_XOR]);
if (err < 0)
goto err1;
goto err_mask_release;
if (xor.type != NFT_DATA_VALUE || xor.len != priv->len) {
err = -EINVAL;
goto err2;
goto err_xor_release;
}
return 0;
err2:
err_xor_release:
nft_data_release(&priv->xor, xor.type);
err1:
err_mask_release:
nft_data_release(&priv->mask, mask.type);
return err;
}
......
......@@ -35,6 +35,10 @@ int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
case NFT_FIB_RESULT_OIF:
case NFT_FIB_RESULT_OIFNAME:
hooks = (1 << NF_INET_PRE_ROUTING);
if (priv->flags & NFTA_FIB_F_IIF) {
hooks |= (1 << NF_INET_LOCAL_IN) |
(1 << NF_INET_FORWARD);
}
break;
case NFT_FIB_RESULT_ADDRTYPE:
if (priv->flags & NFTA_FIB_F_IIF)
......
......@@ -66,6 +66,20 @@ table inet filter {
EOF
}
load_pbr_ruleset() {
local netns=$1
ip netns exec ${netns} nft -f /dev/stdin <<EOF
table inet filter {
chain forward {
type filter hook forward priority raw;
fib saddr . iif oif gt 0 accept
log drop
}
}
EOF
}
load_ruleset_count() {
local netns=$1
......@@ -219,4 +233,40 @@ sleep 2
ip netns exec ${ns1} ping -c 3 -q 1c3::c01d > /dev/null
check_fib_counter 3 ${nsrouter} 1c3::c01d || exit 1
# delete all rules
ip netns exec ${ns1} nft flush ruleset
ip netns exec ${ns2} nft flush ruleset
ip netns exec ${nsrouter} nft flush ruleset
ip -net ${ns1} addr add 10.0.1.99/24 dev eth0
ip -net ${ns1} addr add dead:1::99/64 dev eth0
ip -net ${ns1} addr del 10.0.2.99/24 dev eth0
ip -net ${ns1} addr del dead:2::99/64 dev eth0
ip -net ${nsrouter} addr del dead:2::1/64 dev veth0
# ... pbr ruleset for the router, check iif+oif.
load_pbr_ruleset ${nsrouter}
if [ $? -ne 0 ] ; then
echo "SKIP: Could not load fib forward ruleset"
exit $ksft_skip
fi
ip -net ${nsrouter} rule add from all table 128
ip -net ${nsrouter} rule add from all iif veth0 table 129
ip -net ${nsrouter} route add table 128 to 10.0.1.0/24 dev veth0
ip -net ${nsrouter} route add table 129 to 10.0.2.0/24 dev veth1
# drop main ipv4 table
ip -net ${nsrouter} -4 rule delete table main
test_ping 10.0.2.99 dead:2::99
if [ $? -ne 0 ] ; then
ip -net ${nsrouter} nft list ruleset
echo "FAIL: fib mismatch in pbr setup"
exit 1
fi
echo "PASS: fib expression forward check with policy based routing"
exit 0
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment