Commit 93561eef authored by Jakub Kicinski's avatar Jakub Kicinski

Merge tag 'nf-24-01-31' of git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf

Pablo Neira Ayuso says:

====================
Netfilter fixes for net

1) TCP conntrack now only evaluates window negotiation for packets in
   the REPLY direction, from Ryan Schaefer. Otherwise SYN retransmissions
   trigger incorrect window scale negotiation. From Ryan Schaefer.

2) Restrict tunnel objects to NFPROTO_NETDEV which is where it makes sense
   to use this object type.

3) Fix conntrack pick up from the middle of SCTP_CID_SHUTDOWN_ACK packets.
   From Xin Long.

4) Another attempt from Jozsef Kadlecsik to address the slow down of the
   swap command in ipset.

5) Replace a BUG_ON by WARN_ON_ONCE in nf_log, and consolidate check for
   the case that the logger is NULL from the read side lock section.

6) Address lack of sanitization for custom expectations. Restrict layer 3
   and 4 families to what it is supported by userspace.

* tag 'nf-24-01-31' of git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf:
  netfilter: nft_ct: sanitize layer 3 and 4 protocol number in custom expectations
  netfilter: nf_log: replace BUG_ON by WARN_ON_ONCE when putting logger
  netfilter: ipset: fix performance regression in swap operation
  netfilter: conntrack: check SCTP_CID_SHUTDOWN_ACK for vtag setting in sctp_new
  netfilter: nf_tables: restrict tunnel object to NFPROTO_NETDEV
  netfilter: conntrack: correct window scaling with retransmitted SYN
====================

Link: https://lore.kernel.org/r/20240131225943.7536-1-pablo@netfilter.orgSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents f0588b15 8059918a
......@@ -186,6 +186,8 @@ struct ip_set_type_variant {
/* Return true if "b" set is the same as "a"
* according to the create set parameters */
bool (*same_set)(const struct ip_set *a, const struct ip_set *b);
/* Cancel ongoing garbage collectors before destroying the set*/
void (*cancel_gc)(struct ip_set *set);
/* Region-locking is used */
bool region_lock;
};
......@@ -242,6 +244,8 @@ extern void ip_set_type_unregister(struct ip_set_type *set_type);
/* A generic IP set */
struct ip_set {
/* For call_cru in destroy */
struct rcu_head rcu;
/* The name of the set */
char name[IPSET_MAXNAMELEN];
/* Lock protecting the set data */
......
......@@ -1351,6 +1351,7 @@ void nft_obj_notify(struct net *net, const struct nft_table *table,
* @type: stateful object numeric type
* @owner: module owner
* @maxattr: maximum netlink attribute
* @family: address family for AF-specific object types
* @policy: netlink attribute policy
*/
struct nft_object_type {
......@@ -1360,6 +1361,7 @@ struct nft_object_type {
struct list_head list;
u32 type;
unsigned int maxattr;
u8 family;
struct module *owner;
const struct nla_policy *policy;
};
......
......@@ -30,6 +30,7 @@
#define mtype_del IPSET_TOKEN(MTYPE, _del)
#define mtype_list IPSET_TOKEN(MTYPE, _list)
#define mtype_gc IPSET_TOKEN(MTYPE, _gc)
#define mtype_cancel_gc IPSET_TOKEN(MTYPE, _cancel_gc)
#define mtype MTYPE
#define get_ext(set, map, id) ((map)->extensions + ((set)->dsize * (id)))
......@@ -59,9 +60,6 @@ mtype_destroy(struct ip_set *set)
{
struct mtype *map = set->data;
if (SET_WITH_TIMEOUT(set))
del_timer_sync(&map->gc);
if (set->dsize && set->extensions & IPSET_EXT_DESTROY)
mtype_ext_cleanup(set);
ip_set_free(map->members);
......@@ -290,6 +288,15 @@ mtype_gc(struct timer_list *t)
add_timer(&map->gc);
}
static void
mtype_cancel_gc(struct ip_set *set)
{
struct mtype *map = set->data;
if (SET_WITH_TIMEOUT(set))
del_timer_sync(&map->gc);
}
static const struct ip_set_type_variant mtype = {
.kadt = mtype_kadt,
.uadt = mtype_uadt,
......@@ -303,6 +310,7 @@ static const struct ip_set_type_variant mtype = {
.head = mtype_head,
.list = mtype_list,
.same_set = mtype_same_set,
.cancel_gc = mtype_cancel_gc,
};
#endif /* __IP_SET_BITMAP_IP_GEN_H */
......@@ -1182,6 +1182,14 @@ ip_set_destroy_set(struct ip_set *set)
kfree(set);
}
static void
ip_set_destroy_set_rcu(struct rcu_head *head)
{
struct ip_set *set = container_of(head, struct ip_set, rcu);
ip_set_destroy_set(set);
}
static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
const struct nlattr * const attr[])
{
......@@ -1193,8 +1201,6 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
if (unlikely(protocol_min_failed(attr)))
return -IPSET_ERR_PROTOCOL;
/* Must wait for flush to be really finished in list:set */
rcu_barrier();
/* Commands are serialized and references are
* protected by the ip_set_ref_lock.
......@@ -1206,8 +1212,10 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
* counter, so if it's already zero, we can proceed
* without holding the lock.
*/
read_lock_bh(&ip_set_ref_lock);
if (!attr[IPSET_ATTR_SETNAME]) {
/* Must wait for flush to be really finished in list:set */
rcu_barrier();
read_lock_bh(&ip_set_ref_lock);
for (i = 0; i < inst->ip_set_max; i++) {
s = ip_set(inst, i);
if (s && (s->ref || s->ref_netlink)) {
......@@ -1221,6 +1229,8 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
s = ip_set(inst, i);
if (s) {
ip_set(inst, i) = NULL;
/* Must cancel garbage collectors */
s->variant->cancel_gc(s);
ip_set_destroy_set(s);
}
}
......@@ -1228,6 +1238,9 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
inst->is_destroyed = false;
} else {
u32 flags = flag_exist(info->nlh);
u16 features = 0;
read_lock_bh(&ip_set_ref_lock);
s = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]),
&i);
if (!s) {
......@@ -1238,10 +1251,16 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
ret = -IPSET_ERR_BUSY;
goto out;
}
features = s->type->features;
ip_set(inst, i) = NULL;
read_unlock_bh(&ip_set_ref_lock);
ip_set_destroy_set(s);
if (features & IPSET_TYPE_NAME) {
/* Must wait for flush to be really finished */
rcu_barrier();
}
/* Must cancel garbage collectors */
s->variant->cancel_gc(s);
call_rcu(&s->rcu, ip_set_destroy_set_rcu);
}
return 0;
out:
......@@ -1394,9 +1413,6 @@ static int ip_set_swap(struct sk_buff *skb, const struct nfnl_info *info,
ip_set(inst, to_id) = from;
write_unlock_bh(&ip_set_ref_lock);
/* Make sure all readers of the old set pointers are completed. */
synchronize_rcu();
return 0;
}
......@@ -2409,8 +2425,11 @@ ip_set_fini(void)
{
nf_unregister_sockopt(&so_set);
nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
unregister_pernet_subsys(&ip_set_net_ops);
/* Wait for call_rcu() in destroy */
rcu_barrier();
pr_debug("these are the famous last words\n");
}
......
......@@ -222,6 +222,7 @@ static const union nf_inet_addr zeromask = {};
#undef mtype_gc_do
#undef mtype_gc
#undef mtype_gc_init
#undef mtype_cancel_gc
#undef mtype_variant
#undef mtype_data_match
......@@ -266,6 +267,7 @@ static const union nf_inet_addr zeromask = {};
#define mtype_gc_do IPSET_TOKEN(MTYPE, _gc_do)
#define mtype_gc IPSET_TOKEN(MTYPE, _gc)
#define mtype_gc_init IPSET_TOKEN(MTYPE, _gc_init)
#define mtype_cancel_gc IPSET_TOKEN(MTYPE, _cancel_gc)
#define mtype_variant IPSET_TOKEN(MTYPE, _variant)
#define mtype_data_match IPSET_TOKEN(MTYPE, _data_match)
......@@ -450,9 +452,6 @@ mtype_destroy(struct ip_set *set)
struct htype *h = set->data;
struct list_head *l, *lt;
if (SET_WITH_TIMEOUT(set))
cancel_delayed_work_sync(&h->gc.dwork);
mtype_ahash_destroy(set, ipset_dereference_nfnl(h->table), true);
list_for_each_safe(l, lt, &h->ad) {
list_del(l);
......@@ -599,6 +598,15 @@ mtype_gc_init(struct htable_gc *gc)
queue_delayed_work(system_power_efficient_wq, &gc->dwork, HZ);
}
static void
mtype_cancel_gc(struct ip_set *set)
{
struct htype *h = set->data;
if (SET_WITH_TIMEOUT(set))
cancel_delayed_work_sync(&h->gc.dwork);
}
static int
mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
struct ip_set_ext *mext, u32 flags);
......@@ -1441,6 +1449,7 @@ static const struct ip_set_type_variant mtype_variant = {
.uref = mtype_uref,
.resize = mtype_resize,
.same_set = mtype_same_set,
.cancel_gc = mtype_cancel_gc,
.region_lock = true,
};
......
......@@ -426,9 +426,6 @@ list_set_destroy(struct ip_set *set)
struct list_set *map = set->data;
struct set_elem *e, *n;
if (SET_WITH_TIMEOUT(set))
timer_shutdown_sync(&map->gc);
list_for_each_entry_safe(e, n, &map->members, list) {
list_del(&e->list);
ip_set_put_byindex(map->net, e->id);
......@@ -545,6 +542,15 @@ list_set_same_set(const struct ip_set *a, const struct ip_set *b)
a->extensions == b->extensions;
}
static void
list_set_cancel_gc(struct ip_set *set)
{
struct list_set *map = set->data;
if (SET_WITH_TIMEOUT(set))
timer_shutdown_sync(&map->gc);
}
static const struct ip_set_type_variant set_variant = {
.kadt = list_set_kadt,
.uadt = list_set_uadt,
......@@ -558,6 +564,7 @@ static const struct ip_set_type_variant set_variant = {
.head = list_set_head,
.list = list_set_list,
.same_set = list_set_same_set,
.cancel_gc = list_set_cancel_gc,
};
static void
......
......@@ -283,7 +283,7 @@ sctp_new(struct nf_conn *ct, const struct sk_buff *skb,
pr_debug("Setting vtag %x for secondary conntrack\n",
sh->vtag);
ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = sh->vtag;
} else {
} else if (sch->type == SCTP_CID_SHUTDOWN_ACK) {
/* If it is a shutdown ack OOTB packet, we expect a return
shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */
pr_debug("Setting vtag %x for new conn OOTB\n",
......
......@@ -457,7 +457,8 @@ static void tcp_init_sender(struct ip_ct_tcp_state *sender,
const struct sk_buff *skb,
unsigned int dataoff,
const struct tcphdr *tcph,
u32 end, u32 win)
u32 end, u32 win,
enum ip_conntrack_dir dir)
{
/* SYN-ACK in reply to a SYN
* or SYN from reply direction in simultaneous open.
......@@ -471,7 +472,8 @@ static void tcp_init_sender(struct ip_ct_tcp_state *sender,
* Both sides must send the Window Scale option
* to enable window scaling in either direction.
*/
if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
if (dir == IP_CT_DIR_REPLY &&
!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE)) {
sender->td_scale = 0;
receiver->td_scale = 0;
......@@ -542,7 +544,7 @@ tcp_in_window(struct nf_conn *ct, enum ip_conntrack_dir dir,
if (tcph->syn) {
tcp_init_sender(sender, receiver,
skb, dataoff, tcph,
end, win);
end, win, dir);
if (!tcph->ack)
/* Simultaneous open */
return NFCT_TCP_ACCEPT;
......@@ -585,7 +587,7 @@ tcp_in_window(struct nf_conn *ct, enum ip_conntrack_dir dir,
*/
tcp_init_sender(sender, receiver,
skb, dataoff, tcph,
end, win);
end, win, dir);
if (dir == IP_CT_DIR_REPLY && !tcph->ack)
return NFCT_TCP_ACCEPT;
......
......@@ -193,10 +193,11 @@ void nf_logger_put(int pf, enum nf_log_type type)
return;
}
BUG_ON(loggers[pf][type] == NULL);
rcu_read_lock();
logger = rcu_dereference(loggers[pf][type]);
if (!logger)
WARN_ON_ONCE(1);
else
module_put(logger->me);
rcu_read_unlock();
}
......
......@@ -7551,11 +7551,15 @@ static int nft_object_dump(struct sk_buff *skb, unsigned int attr,
return -1;
}
static const struct nft_object_type *__nft_obj_type_get(u32 objtype)
static const struct nft_object_type *__nft_obj_type_get(u32 objtype, u8 family)
{
const struct nft_object_type *type;
list_for_each_entry(type, &nf_tables_objects, list) {
if (type->family != NFPROTO_UNSPEC &&
type->family != family)
continue;
if (objtype == type->type)
return type;
}
......@@ -7563,11 +7567,11 @@ static const struct nft_object_type *__nft_obj_type_get(u32 objtype)
}
static const struct nft_object_type *
nft_obj_type_get(struct net *net, u32 objtype)
nft_obj_type_get(struct net *net, u32 objtype, u8 family)
{
const struct nft_object_type *type;
type = __nft_obj_type_get(objtype);
type = __nft_obj_type_get(objtype, family);
if (type != NULL && try_module_get(type->owner))
return type;
......@@ -7660,7 +7664,7 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info,
if (info->nlh->nlmsg_flags & NLM_F_REPLACE)
return -EOPNOTSUPP;
type = __nft_obj_type_get(objtype);
type = __nft_obj_type_get(objtype, family);
if (WARN_ON_ONCE(!type))
return -ENOENT;
......@@ -7674,7 +7678,7 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info,
if (!nft_use_inc(&table->use))
return -EMFILE;
type = nft_obj_type_get(net, objtype);
type = nft_obj_type_get(net, objtype, family);
if (IS_ERR(type)) {
err = PTR_ERR(type);
goto err_type;
......
......@@ -1250,7 +1250,31 @@ static int nft_ct_expect_obj_init(const struct nft_ctx *ctx,
if (tb[NFTA_CT_EXPECT_L3PROTO])
priv->l3num = ntohs(nla_get_be16(tb[NFTA_CT_EXPECT_L3PROTO]));
switch (priv->l3num) {
case NFPROTO_IPV4:
case NFPROTO_IPV6:
if (priv->l3num != ctx->family)
return -EINVAL;
fallthrough;
case NFPROTO_INET:
break;
default:
return -EOPNOTSUPP;
}
priv->l4proto = nla_get_u8(tb[NFTA_CT_EXPECT_L4PROTO]);
switch (priv->l4proto) {
case IPPROTO_TCP:
case IPPROTO_UDP:
case IPPROTO_UDPLITE:
case IPPROTO_DCCP:
case IPPROTO_SCTP:
break;
default:
return -EOPNOTSUPP;
}
priv->dport = nla_get_be16(tb[NFTA_CT_EXPECT_DPORT]);
priv->timeout = nla_get_u32(tb[NFTA_CT_EXPECT_TIMEOUT]);
priv->size = nla_get_u8(tb[NFTA_CT_EXPECT_SIZE]);
......
......@@ -713,6 +713,7 @@ static const struct nft_object_ops nft_tunnel_obj_ops = {
static struct nft_object_type nft_tunnel_obj_type __read_mostly = {
.type = NFT_OBJECT_TUNNEL,
.family = NFPROTO_NETDEV,
.ops = &nft_tunnel_obj_ops,
.maxattr = NFTA_TUNNEL_KEY_MAX,
.policy = nft_tunnel_key_policy,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment