Commit b57b2d1f authored by Jozsef Kadlecsik's avatar Jozsef Kadlecsik

netfilter: ipset: Prepare the ipset core to use RCU at set level

Replace rwlock_t with spinlock_t in "struct ip_set" and change the locking
accordingly. Convert the comment extension into an rcu-avare object. Also,
simplify the timeout routines.
Signed-off-by: default avatarJozsef Kadlecsik <kadlec@blackhole.kfki.hu>
parent bd55389c
...@@ -108,8 +108,13 @@ struct ip_set_counter { ...@@ -108,8 +108,13 @@ struct ip_set_counter {
atomic64_t packets; atomic64_t packets;
}; };
struct ip_set_comment_rcu {
struct rcu_head rcu;
char str[0];
};
struct ip_set_comment { struct ip_set_comment {
char *str; struct ip_set_comment_rcu __rcu *c;
}; };
struct ip_set_skbinfo { struct ip_set_skbinfo {
...@@ -226,7 +231,7 @@ struct ip_set { ...@@ -226,7 +231,7 @@ struct ip_set {
/* The name of the set */ /* The name of the set */
char name[IPSET_MAXNAMELEN]; char name[IPSET_MAXNAMELEN];
/* Lock protecting the set data */ /* Lock protecting the set data */
rwlock_t lock; spinlock_t lock;
/* References to the set */ /* References to the set */
u32 ref; u32 ref;
/* The core set type */ /* The core set type */
......
...@@ -16,41 +16,57 @@ ip_set_comment_uget(struct nlattr *tb) ...@@ -16,41 +16,57 @@ ip_set_comment_uget(struct nlattr *tb)
return nla_data(tb); return nla_data(tb);
} }
/* Called from uadd only, protected by the set spinlock.
* The kadt functions don't use the comment extensions in any way.
*/
static inline void static inline void
ip_set_init_comment(struct ip_set_comment *comment, ip_set_init_comment(struct ip_set_comment *comment,
const struct ip_set_ext *ext) const struct ip_set_ext *ext)
{ {
struct ip_set_comment_rcu *c = rcu_dereference_protected(comment->c, 1);
size_t len = ext->comment ? strlen(ext->comment) : 0; size_t len = ext->comment ? strlen(ext->comment) : 0;
if (unlikely(comment->str)) { if (unlikely(c)) {
kfree(comment->str); kfree_rcu(c, rcu);
comment->str = NULL; rcu_assign_pointer(comment->c, NULL);
} }
if (!len) if (!len)
return; return;
if (unlikely(len > IPSET_MAX_COMMENT_SIZE)) if (unlikely(len > IPSET_MAX_COMMENT_SIZE))
len = IPSET_MAX_COMMENT_SIZE; len = IPSET_MAX_COMMENT_SIZE;
comment->str = kzalloc(len + 1, GFP_ATOMIC); c = kzalloc(sizeof(*c) + len + 1, GFP_ATOMIC);
if (unlikely(!comment->str)) if (unlikely(!c))
return; return;
strlcpy(comment->str, ext->comment, len + 1); strlcpy(c->str, ext->comment, len + 1);
rcu_assign_pointer(comment->c, c);
} }
/* Used only when dumping a set, protected by rcu_read_lock_bh() */
static inline int static inline int
ip_set_put_comment(struct sk_buff *skb, struct ip_set_comment *comment) ip_set_put_comment(struct sk_buff *skb, struct ip_set_comment *comment)
{ {
if (!comment->str) struct ip_set_comment_rcu *c = rcu_dereference_bh(comment->c);
if (!c)
return 0; return 0;
return nla_put_string(skb, IPSET_ATTR_COMMENT, comment->str); return nla_put_string(skb, IPSET_ATTR_COMMENT, c->str);
} }
/* Called from uadd/udel, flush or the garbage collectors protected
* by the set spinlock.
* Called when the set is destroyed and when there can't be any user
* of the set data anymore.
*/
static inline void static inline void
ip_set_comment_free(struct ip_set_comment *comment) ip_set_comment_free(struct ip_set_comment *comment)
{ {
if (unlikely(!comment->str)) struct ip_set_comment_rcu *c;
c = rcu_dereference_protected(comment->c, 1);
if (unlikely(!c))
return; return;
kfree(comment->str); kfree_rcu(c, rcu);
comment->str = NULL; rcu_assign_pointer(comment->c, NULL);
} }
#endif #endif
......
...@@ -40,31 +40,26 @@ ip_set_timeout_uget(struct nlattr *tb) ...@@ -40,31 +40,26 @@ ip_set_timeout_uget(struct nlattr *tb)
} }
static inline bool static inline bool
ip_set_timeout_test(unsigned long timeout) ip_set_timeout_expired(unsigned long *t)
{ {
return timeout == IPSET_ELEM_PERMANENT || return *t != IPSET_ELEM_PERMANENT && time_is_before_jiffies(*t);
time_is_after_jiffies(timeout);
}
static inline bool
ip_set_timeout_expired(unsigned long *timeout)
{
return *timeout != IPSET_ELEM_PERMANENT &&
time_is_before_jiffies(*timeout);
} }
static inline void static inline void
ip_set_timeout_set(unsigned long *timeout, u32 t) ip_set_timeout_set(unsigned long *timeout, u32 value)
{ {
if (!t) { unsigned long t;
if (!value) {
*timeout = IPSET_ELEM_PERMANENT; *timeout = IPSET_ELEM_PERMANENT;
return; return;
} }
*timeout = msecs_to_jiffies(t * MSEC_PER_SEC) + jiffies; t = msecs_to_jiffies(value * MSEC_PER_SEC) + jiffies;
if (*timeout == IPSET_ELEM_PERMANENT) if (t == IPSET_ELEM_PERMANENT)
/* Bingo! :-) */ /* Bingo! :-) */
(*timeout)--; t--;
*timeout = t;
} }
static inline u32 static inline u32
......
...@@ -209,15 +209,15 @@ ip_set_type_register(struct ip_set_type *type) ...@@ -209,15 +209,15 @@ ip_set_type_register(struct ip_set_type *type)
pr_warn("ip_set type %s, family %s with revision min %u already registered!\n", pr_warn("ip_set type %s, family %s with revision min %u already registered!\n",
type->name, family_name(type->family), type->name, family_name(type->family),
type->revision_min); type->revision_min);
ret = -EINVAL; ip_set_type_unlock();
goto unlock; return -EINVAL;
} }
list_add_rcu(&type->list, &ip_set_type_list); list_add_rcu(&type->list, &ip_set_type_list);
pr_debug("type %s, family %s, revision %u:%u registered.\n", pr_debug("type %s, family %s, revision %u:%u registered.\n",
type->name, family_name(type->family), type->name, family_name(type->family),
type->revision_min, type->revision_max); type->revision_min, type->revision_max);
unlock:
ip_set_type_unlock(); ip_set_type_unlock();
return ret; return ret;
} }
EXPORT_SYMBOL_GPL(ip_set_type_register); EXPORT_SYMBOL_GPL(ip_set_type_register);
...@@ -231,12 +231,12 @@ ip_set_type_unregister(struct ip_set_type *type) ...@@ -231,12 +231,12 @@ ip_set_type_unregister(struct ip_set_type *type)
pr_warn("ip_set type %s, family %s with revision min %u not registered\n", pr_warn("ip_set type %s, family %s with revision min %u not registered\n",
type->name, family_name(type->family), type->name, family_name(type->family),
type->revision_min); type->revision_min);
goto unlock; ip_set_type_unlock();
return;
} }
list_del_rcu(&type->list); list_del_rcu(&type->list);
pr_debug("type %s, family %s with revision min %u unregistered.\n", pr_debug("type %s, family %s with revision min %u unregistered.\n",
type->name, family_name(type->family), type->revision_min); type->name, family_name(type->family), type->revision_min);
unlock:
ip_set_type_unlock(); ip_set_type_unlock();
synchronize_rcu(); synchronize_rcu();
...@@ -531,16 +531,16 @@ ip_set_test(ip_set_id_t index, const struct sk_buff *skb, ...@@ -531,16 +531,16 @@ ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
!(opt->family == set->family || set->family == NFPROTO_UNSPEC)) !(opt->family == set->family || set->family == NFPROTO_UNSPEC))
return 0; return 0;
read_lock_bh(&set->lock); rcu_read_lock_bh();
ret = set->variant->kadt(set, skb, par, IPSET_TEST, opt); ret = set->variant->kadt(set, skb, par, IPSET_TEST, opt);
read_unlock_bh(&set->lock); rcu_read_unlock_bh();
if (ret == -EAGAIN) { if (ret == -EAGAIN) {
/* Type requests element to be completed */ /* Type requests element to be completed */
pr_debug("element must be completed, ADD is triggered\n"); pr_debug("element must be completed, ADD is triggered\n");
write_lock_bh(&set->lock); spin_lock_bh(&set->lock);
set->variant->kadt(set, skb, par, IPSET_ADD, opt); set->variant->kadt(set, skb, par, IPSET_ADD, opt);
write_unlock_bh(&set->lock); spin_unlock_bh(&set->lock);
ret = 1; ret = 1;
} else { } else {
/* --return-nomatch: invert matched element */ /* --return-nomatch: invert matched element */
...@@ -570,9 +570,9 @@ ip_set_add(ip_set_id_t index, const struct sk_buff *skb, ...@@ -570,9 +570,9 @@ ip_set_add(ip_set_id_t index, const struct sk_buff *skb,
!(opt->family == set->family || set->family == NFPROTO_UNSPEC)) !(opt->family == set->family || set->family == NFPROTO_UNSPEC))
return -IPSET_ERR_TYPE_MISMATCH; return -IPSET_ERR_TYPE_MISMATCH;
write_lock_bh(&set->lock); spin_lock_bh(&set->lock);
ret = set->variant->kadt(set, skb, par, IPSET_ADD, opt); ret = set->variant->kadt(set, skb, par, IPSET_ADD, opt);
write_unlock_bh(&set->lock); spin_unlock_bh(&set->lock);
return ret; return ret;
} }
...@@ -593,9 +593,9 @@ ip_set_del(ip_set_id_t index, const struct sk_buff *skb, ...@@ -593,9 +593,9 @@ ip_set_del(ip_set_id_t index, const struct sk_buff *skb,
!(opt->family == set->family || set->family == NFPROTO_UNSPEC)) !(opt->family == set->family || set->family == NFPROTO_UNSPEC))
return -IPSET_ERR_TYPE_MISMATCH; return -IPSET_ERR_TYPE_MISMATCH;
write_lock_bh(&set->lock); spin_lock_bh(&set->lock);
ret = set->variant->kadt(set, skb, par, IPSET_DEL, opt); ret = set->variant->kadt(set, skb, par, IPSET_DEL, opt);
write_unlock_bh(&set->lock); spin_unlock_bh(&set->lock);
return ret; return ret;
} }
...@@ -880,7 +880,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, ...@@ -880,7 +880,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
set = kzalloc(sizeof(struct ip_set), GFP_KERNEL); set = kzalloc(sizeof(struct ip_set), GFP_KERNEL);
if (!set) if (!set)
return -ENOMEM; return -ENOMEM;
rwlock_init(&set->lock); spin_lock_init(&set->lock);
strlcpy(set->name, name, IPSET_MAXNAMELEN); strlcpy(set->name, name, IPSET_MAXNAMELEN);
set->family = family; set->family = family;
set->revision = revision; set->revision = revision;
...@@ -1062,9 +1062,9 @@ ip_set_flush_set(struct ip_set *set) ...@@ -1062,9 +1062,9 @@ ip_set_flush_set(struct ip_set *set)
{ {
pr_debug("set: %s\n", set->name); pr_debug("set: %s\n", set->name);
write_lock_bh(&set->lock); spin_lock_bh(&set->lock);
set->variant->flush(set); set->variant->flush(set);
write_unlock_bh(&set->lock); spin_unlock_bh(&set->lock);
} }
static int static int
...@@ -1377,9 +1377,9 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) ...@@ -1377,9 +1377,9 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
set->variant->uref(set, cb, true); set->variant->uref(set, cb, true);
/* Fall through and add elements */ /* Fall through and add elements */
default: default:
read_lock_bh(&set->lock); rcu_read_lock_bh();
ret = set->variant->list(set, skb, cb); ret = set->variant->list(set, skb, cb);
read_unlock_bh(&set->lock); rcu_read_unlock_bh();
if (!cb->args[IPSET_CB_ARG0]) if (!cb->args[IPSET_CB_ARG0])
/* Set is done, proceed with next one */ /* Set is done, proceed with next one */
goto next_set; goto next_set;
...@@ -1462,9 +1462,9 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set, ...@@ -1462,9 +1462,9 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
bool eexist = flags & IPSET_FLAG_EXIST, retried = false; bool eexist = flags & IPSET_FLAG_EXIST, retried = false;
do { do {
write_lock_bh(&set->lock); spin_lock_bh(&set->lock);
ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried); ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried);
write_unlock_bh(&set->lock); spin_unlock_bh(&set->lock);
retried = true; retried = true;
} while (ret == -EAGAIN && } while (ret == -EAGAIN &&
set->variant->resize && set->variant->resize &&
...@@ -1644,9 +1644,9 @@ ip_set_utest(struct sock *ctnl, struct sk_buff *skb, ...@@ -1644,9 +1644,9 @@ ip_set_utest(struct sock *ctnl, struct sk_buff *skb,
set->type->adt_policy)) set->type->adt_policy))
return -IPSET_ERR_PROTOCOL; return -IPSET_ERR_PROTOCOL;
read_lock_bh(&set->lock); rcu_read_lock_bh();
ret = set->variant->uadt(set, tb, IPSET_TEST, NULL, 0, 0); ret = set->variant->uadt(set, tb, IPSET_TEST, NULL, 0, 0);
read_unlock_bh(&set->lock); rcu_read_unlock_bh();
/* Userspace can't trigger element to be re-added */ /* Userspace can't trigger element to be re-added */
if (ret == -EAGAIN) if (ret == -EAGAIN)
ret = 1; ret = 1;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment