Commit d1665820 authored by David S. Miller's avatar David S. Miller

Merge branch 'master' of git://1984.lsi.us.es/nf-next

Pablo Neira Ayuso says:

====================
The following patchset contains Netfilter and IPVS updates for
your net-next tree, most relevantly they are:

* Add net namespace support to NFLOG, ULOG and ebt_ulog and NFQUEUE.
  The LOG and ebt_log target has been also adapted, but they still
  depend on the syslog netnamespace that seems to be missing, from
  Gao Feng.

* Don't lose indications of congestion in IPv6 fragmentation handling,
  from Hannes Frederic Sowa.i

* IPVS conversion to use RCU, including some code consolidation patches
  and optimizations, also some from Julian Anastasov.

* cpu fanout support for NFQUEUE, from Holger Eitzenberger.

* Better error reporting to userspace when dropping packets from
  all our _*_[xfrm|route]_me_harder functions, from Patrick McHardy.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 19952cc4 b8dd6a22
...@@ -289,11 +289,6 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family) ...@@ -289,11 +289,6 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family)
#endif #endif
} }
#ifdef CONFIG_PROC_FS
#include <linux/proc_fs.h>
extern struct proc_dir_entry *proc_net_netfilter;
#endif
#else /* !CONFIG_NETFILTER */ #else /* !CONFIG_NETFILTER */
#define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb) #define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb)
#define NF_HOOK_COND(pf, hook, skb, indev, outdev, okfn, cond) (okfn)(skb) #define NF_HOOK_COND(pf, hook, skb, indev, outdev, okfn, cond) (okfn)(skb)
......
...@@ -575,7 +575,40 @@ static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst) ...@@ -575,7 +575,40 @@ static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst)
skb->_skb_refdst = (unsigned long)dst; skb->_skb_refdst = (unsigned long)dst;
} }
extern void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst); extern void __skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst,
bool force);
/**
* skb_dst_set_noref - sets skb dst, hopefully, without taking reference
* @skb: buffer
* @dst: dst entry
*
* Sets skb dst, assuming a reference was not taken on dst.
* If dst entry is cached, we do not take reference and dst_release
* will be avoided by refdst_drop. If dst entry is not cached, we take
* reference, so that last dst_release can destroy the dst immediately.
*/
static inline void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst)
{
__skb_dst_set_noref(skb, dst, false);
}
/**
* skb_dst_set_noref_force - sets skb dst, without taking reference
* @skb: buffer
* @dst: dst entry
*
* Sets skb dst, assuming a reference was not taken on dst.
* No reference is taken and no dst_release will be called. While for
* cached dsts deferred reclaim is a basic feature, for entries that are
* not cached it is caller's job to guarantee that last dst_release for
* provided dst happens when nobody uses it, eg. after a RCU grace period.
*/
static inline void skb_dst_set_noref_force(struct sk_buff *skb,
struct dst_entry *dst)
{
__skb_dst_set_noref(skb, dst, true);
}
/** /**
* skb_dst_is_noref - Test if skb dst isn't refcounted * skb_dst_is_noref - Test if skb dst isn't refcounted
......
This diff is collapsed.
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include <net/netns/ipv6.h> #include <net/netns/ipv6.h>
#include <net/netns/sctp.h> #include <net/netns/sctp.h>
#include <net/netns/dccp.h> #include <net/netns/dccp.h>
#include <net/netns/netfilter.h>
#include <net/netns/x_tables.h> #include <net/netns/x_tables.h>
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
#include <net/netns/conntrack.h> #include <net/netns/conntrack.h>
...@@ -94,6 +95,7 @@ struct net { ...@@ -94,6 +95,7 @@ struct net {
struct netns_dccp dccp; struct netns_dccp dccp;
#endif #endif
#ifdef CONFIG_NETFILTER #ifdef CONFIG_NETFILTER
struct netns_nf nf;
struct netns_xt xt; struct netns_xt xt;
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
struct netns_ct ct; struct netns_ct ct;
......
...@@ -49,12 +49,18 @@ struct nf_logger { ...@@ -49,12 +49,18 @@ struct nf_logger {
int nf_log_register(u_int8_t pf, struct nf_logger *logger); int nf_log_register(u_int8_t pf, struct nf_logger *logger);
void nf_log_unregister(struct nf_logger *logger); void nf_log_unregister(struct nf_logger *logger);
int nf_log_bind_pf(u_int8_t pf, const struct nf_logger *logger); void nf_log_set(struct net *net, u_int8_t pf,
void nf_log_unbind_pf(u_int8_t pf); const struct nf_logger *logger);
void nf_log_unset(struct net *net, const struct nf_logger *logger);
int nf_log_bind_pf(struct net *net, u_int8_t pf,
const struct nf_logger *logger);
void nf_log_unbind_pf(struct net *net, u_int8_t pf);
/* Calls the registered backend logging function */ /* Calls the registered backend logging function */
__printf(7, 8) __printf(8, 9)
void nf_log_packet(u_int8_t pf, void nf_log_packet(struct net *net,
u_int8_t pf,
unsigned int hooknum, unsigned int hooknum,
const struct sk_buff *skb, const struct sk_buff *skb,
const struct net_device *in, const struct net_device *in,
......
#ifndef __NETNS_NETFILTER_H
#define __NETNS_NETFILTER_H
#include <linux/proc_fs.h>
#include <linux/netfilter.h>
struct nf_logger;
struct netns_nf {
#if defined CONFIG_PROC_FS
struct proc_dir_entry *proc_netfilter;
#endif
const struct nf_logger __rcu *nf_loggers[NFPROTO_NUMPROTO];
#ifdef CONFIG_SYSCTL
struct ctl_table_header *nf_log_dir_header;
#endif
};
#endif
...@@ -26,4 +26,13 @@ struct xt_NFQ_info_v2 { ...@@ -26,4 +26,13 @@ struct xt_NFQ_info_v2 {
__u16 bypass; __u16 bypass;
}; };
struct xt_NFQ_info_v3 {
__u16 queuenum;
__u16 queues_total;
__u16 flags;
#define NFQ_FLAG_BYPASS 0x01 /* for compatibility with v2 */
#define NFQ_FLAG_CPU_FANOUT 0x02 /* use current CPU (no hashing) */
#define NFQ_FLAG_MASK 0x03
};
#endif /* _XT_NFQ_TARGET_H */ #endif /* _XT_NFQ_TARGET_H */
...@@ -4,9 +4,9 @@ ...@@ -4,9 +4,9 @@
#include <linux/types.h> #include <linux/types.h>
struct ip6t_frag { struct ip6t_frag {
__u32 ids[2]; /* Security Parameter Index */ __u32 ids[2]; /* Identification range */
__u32 hdrlen; /* Header Length */ __u32 hdrlen; /* Header Length */
__u8 flags; /* */ __u8 flags; /* Flags */
__u8 invflags; /* Inverse flags */ __u8 invflags; /* Inverse flags */
}; };
......
...@@ -78,6 +78,11 @@ ebt_log_packet(u_int8_t pf, unsigned int hooknum, ...@@ -78,6 +78,11 @@ ebt_log_packet(u_int8_t pf, unsigned int hooknum,
const char *prefix) const char *prefix)
{ {
unsigned int bitmask; unsigned int bitmask;
struct net *net = dev_net(in ? in : out);
/* FIXME: Disabled from containers until syslog ns is supported */
if (!net_eq(net, &init_net))
return;
spin_lock_bh(&ebt_log_lock); spin_lock_bh(&ebt_log_lock);
printk(KERN_SOH "%c%s IN=%s OUT=%s MAC source = %pM MAC dest = %pM proto = 0x%04x", printk(KERN_SOH "%c%s IN=%s OUT=%s MAC source = %pM MAC dest = %pM proto = 0x%04x",
...@@ -176,17 +181,18 @@ ebt_log_tg(struct sk_buff *skb, const struct xt_action_param *par) ...@@ -176,17 +181,18 @@ ebt_log_tg(struct sk_buff *skb, const struct xt_action_param *par)
{ {
const struct ebt_log_info *info = par->targinfo; const struct ebt_log_info *info = par->targinfo;
struct nf_loginfo li; struct nf_loginfo li;
struct net *net = dev_net(par->in ? par->in : par->out);
li.type = NF_LOG_TYPE_LOG; li.type = NF_LOG_TYPE_LOG;
li.u.log.level = info->loglevel; li.u.log.level = info->loglevel;
li.u.log.logflags = info->bitmask; li.u.log.logflags = info->bitmask;
if (info->bitmask & EBT_LOG_NFLOG) if (info->bitmask & EBT_LOG_NFLOG)
nf_log_packet(NFPROTO_BRIDGE, par->hooknum, skb, par->in, nf_log_packet(net, NFPROTO_BRIDGE, par->hooknum, skb,
par->out, &li, "%s", info->prefix); par->in, par->out, &li, "%s", info->prefix);
else else
ebt_log_packet(NFPROTO_BRIDGE, par->hooknum, skb, par->in, ebt_log_packet(NFPROTO_BRIDGE, par->hooknum, skb, par->in,
par->out, &li, info->prefix); par->out, &li, info->prefix);
return EBT_CONTINUE; return EBT_CONTINUE;
} }
...@@ -206,19 +212,47 @@ static struct nf_logger ebt_log_logger __read_mostly = { ...@@ -206,19 +212,47 @@ static struct nf_logger ebt_log_logger __read_mostly = {
.me = THIS_MODULE, .me = THIS_MODULE,
}; };
static int __net_init ebt_log_net_init(struct net *net)
{
nf_log_set(net, NFPROTO_BRIDGE, &ebt_log_logger);
return 0;
}
static void __net_exit ebt_log_net_fini(struct net *net)
{
nf_log_unset(net, &ebt_log_logger);
}
static struct pernet_operations ebt_log_net_ops = {
.init = ebt_log_net_init,
.exit = ebt_log_net_fini,
};
static int __init ebt_log_init(void) static int __init ebt_log_init(void)
{ {
int ret; int ret;
ret = register_pernet_subsys(&ebt_log_net_ops);
if (ret < 0)
goto err_pernet;
ret = xt_register_target(&ebt_log_tg_reg); ret = xt_register_target(&ebt_log_tg_reg);
if (ret < 0) if (ret < 0)
return ret; goto err_target;
nf_log_register(NFPROTO_BRIDGE, &ebt_log_logger); nf_log_register(NFPROTO_BRIDGE, &ebt_log_logger);
return 0;
return ret;
err_target:
unregister_pernet_subsys(&ebt_log_net_ops);
err_pernet:
return ret;
} }
static void __exit ebt_log_fini(void) static void __exit ebt_log_fini(void)
{ {
unregister_pernet_subsys(&ebt_log_net_ops);
nf_log_unregister(&ebt_log_logger); nf_log_unregister(&ebt_log_logger);
xt_unregister_target(&ebt_log_tg_reg); xt_unregister_target(&ebt_log_tg_reg);
} }
......
...@@ -24,14 +24,15 @@ ebt_nflog_tg(struct sk_buff *skb, const struct xt_action_param *par) ...@@ -24,14 +24,15 @@ ebt_nflog_tg(struct sk_buff *skb, const struct xt_action_param *par)
{ {
const struct ebt_nflog_info *info = par->targinfo; const struct ebt_nflog_info *info = par->targinfo;
struct nf_loginfo li; struct nf_loginfo li;
struct net *net = dev_net(par->in ? par->in : par->out);
li.type = NF_LOG_TYPE_ULOG; li.type = NF_LOG_TYPE_ULOG;
li.u.ulog.copy_len = info->len; li.u.ulog.copy_len = info->len;
li.u.ulog.group = info->group; li.u.ulog.group = info->group;
li.u.ulog.qthreshold = info->threshold; li.u.ulog.qthreshold = info->threshold;
nf_log_packet(PF_BRIDGE, par->hooknum, skb, par->in, par->out, nf_log_packet(net, PF_BRIDGE, par->hooknum, skb, par->in,
&li, "%s", info->prefix); par->out, &li, "%s", info->prefix);
return EBT_CONTINUE; return EBT_CONTINUE;
} }
......
...@@ -41,6 +41,7 @@ ...@@ -41,6 +41,7 @@
#include <linux/netfilter_bridge/ebtables.h> #include <linux/netfilter_bridge/ebtables.h>
#include <linux/netfilter_bridge/ebt_ulog.h> #include <linux/netfilter_bridge/ebt_ulog.h>
#include <net/netfilter/nf_log.h> #include <net/netfilter/nf_log.h>
#include <net/netns/generic.h>
#include <net/sock.h> #include <net/sock.h>
#include "../br_private.h" #include "../br_private.h"
...@@ -62,13 +63,22 @@ typedef struct { ...@@ -62,13 +63,22 @@ typedef struct {
spinlock_t lock; /* the per-queue lock */ spinlock_t lock; /* the per-queue lock */
} ebt_ulog_buff_t; } ebt_ulog_buff_t;
static ebt_ulog_buff_t ulog_buffers[EBT_ULOG_MAXNLGROUPS]; static int ebt_ulog_net_id __read_mostly;
static struct sock *ebtulognl; struct ebt_ulog_net {
unsigned int nlgroup[EBT_ULOG_MAXNLGROUPS];
ebt_ulog_buff_t ulog_buffers[EBT_ULOG_MAXNLGROUPS];
struct sock *ebtulognl;
};
static struct ebt_ulog_net *ebt_ulog_pernet(struct net *net)
{
return net_generic(net, ebt_ulog_net_id);
}
/* send one ulog_buff_t to userspace */ /* send one ulog_buff_t to userspace */
static void ulog_send(unsigned int nlgroup) static void ulog_send(struct ebt_ulog_net *ebt, unsigned int nlgroup)
{ {
ebt_ulog_buff_t *ub = &ulog_buffers[nlgroup]; ebt_ulog_buff_t *ub = &ebt->ulog_buffers[nlgroup];
del_timer(&ub->timer); del_timer(&ub->timer);
...@@ -80,7 +90,7 @@ static void ulog_send(unsigned int nlgroup) ...@@ -80,7 +90,7 @@ static void ulog_send(unsigned int nlgroup)
ub->lastnlh->nlmsg_type = NLMSG_DONE; ub->lastnlh->nlmsg_type = NLMSG_DONE;
NETLINK_CB(ub->skb).dst_group = nlgroup + 1; NETLINK_CB(ub->skb).dst_group = nlgroup + 1;
netlink_broadcast(ebtulognl, ub->skb, 0, nlgroup + 1, GFP_ATOMIC); netlink_broadcast(ebt->ebtulognl, ub->skb, 0, nlgroup + 1, GFP_ATOMIC);
ub->qlen = 0; ub->qlen = 0;
ub->skb = NULL; ub->skb = NULL;
...@@ -89,10 +99,15 @@ static void ulog_send(unsigned int nlgroup) ...@@ -89,10 +99,15 @@ static void ulog_send(unsigned int nlgroup)
/* timer function to flush queue in flushtimeout time */ /* timer function to flush queue in flushtimeout time */
static void ulog_timer(unsigned long data) static void ulog_timer(unsigned long data)
{ {
spin_lock_bh(&ulog_buffers[data].lock); struct ebt_ulog_net *ebt = container_of((void *)data,
if (ulog_buffers[data].skb) struct ebt_ulog_net,
ulog_send(data); nlgroup[*(unsigned int *)data]);
spin_unlock_bh(&ulog_buffers[data].lock);
ebt_ulog_buff_t *ub = &ebt->ulog_buffers[*(unsigned int *)data];
spin_lock_bh(&ub->lock);
if (ub->skb)
ulog_send(ebt, *(unsigned int *)data);
spin_unlock_bh(&ub->lock);
} }
static struct sk_buff *ulog_alloc_skb(unsigned int size) static struct sk_buff *ulog_alloc_skb(unsigned int size)
...@@ -123,8 +138,10 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb, ...@@ -123,8 +138,10 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,
ebt_ulog_packet_msg_t *pm; ebt_ulog_packet_msg_t *pm;
size_t size, copy_len; size_t size, copy_len;
struct nlmsghdr *nlh; struct nlmsghdr *nlh;
struct net *net = dev_net(in ? in : out);
struct ebt_ulog_net *ebt = ebt_ulog_pernet(net);
unsigned int group = uloginfo->nlgroup; unsigned int group = uloginfo->nlgroup;
ebt_ulog_buff_t *ub = &ulog_buffers[group]; ebt_ulog_buff_t *ub = &ebt->ulog_buffers[group];
spinlock_t *lock = &ub->lock; spinlock_t *lock = &ub->lock;
ktime_t kt; ktime_t kt;
...@@ -146,7 +163,7 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb, ...@@ -146,7 +163,7 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,
if (!(ub->skb = ulog_alloc_skb(size))) if (!(ub->skb = ulog_alloc_skb(size)))
goto unlock; goto unlock;
} else if (size > skb_tailroom(ub->skb)) { } else if (size > skb_tailroom(ub->skb)) {
ulog_send(group); ulog_send(ebt, group);
if (!(ub->skb = ulog_alloc_skb(size))) if (!(ub->skb = ulog_alloc_skb(size)))
goto unlock; goto unlock;
...@@ -205,7 +222,7 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb, ...@@ -205,7 +222,7 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,
ub->lastnlh = nlh; ub->lastnlh = nlh;
if (ub->qlen >= uloginfo->qthreshold) if (ub->qlen >= uloginfo->qthreshold)
ulog_send(group); ulog_send(ebt, group);
else if (!timer_pending(&ub->timer)) { else if (!timer_pending(&ub->timer)) {
ub->timer.expires = jiffies + flushtimeout * HZ / 100; ub->timer.expires = jiffies + flushtimeout * HZ / 100;
add_timer(&ub->timer); add_timer(&ub->timer);
...@@ -277,47 +294,39 @@ static struct nf_logger ebt_ulog_logger __read_mostly = { ...@@ -277,47 +294,39 @@ static struct nf_logger ebt_ulog_logger __read_mostly = {
.me = THIS_MODULE, .me = THIS_MODULE,
}; };
static int __init ebt_ulog_init(void) static int __net_init ebt_ulog_net_init(struct net *net)
{ {
int ret;
int i; int i;
struct ebt_ulog_net *ebt = ebt_ulog_pernet(net);
struct netlink_kernel_cfg cfg = { struct netlink_kernel_cfg cfg = {
.groups = EBT_ULOG_MAXNLGROUPS, .groups = EBT_ULOG_MAXNLGROUPS,
}; };
if (nlbufsiz >= 128*1024) {
pr_warning("Netlink buffer has to be <= 128kB,"
" please try a smaller nlbufsiz parameter.\n");
return -EINVAL;
}
/* initialize ulog_buffers */ /* initialize ulog_buffers */
for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) { for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) {
setup_timer(&ulog_buffers[i].timer, ulog_timer, i); ebt->nlgroup[i] = i;
spin_lock_init(&ulog_buffers[i].lock); setup_timer(&ebt->ulog_buffers[i].timer, ulog_timer,
(unsigned long)&ebt->nlgroup[i]);
spin_lock_init(&ebt->ulog_buffers[i].lock);
} }
ebtulognl = netlink_kernel_create(&init_net, NETLINK_NFLOG, &cfg); ebt->ebtulognl = netlink_kernel_create(net, NETLINK_NFLOG, &cfg);
if (!ebtulognl) if (!ebt->ebtulognl)
ret = -ENOMEM; return -ENOMEM;
else if ((ret = xt_register_target(&ebt_ulog_tg_reg)) != 0)
netlink_kernel_release(ebtulognl);
if (ret == 0) nf_log_set(net, NFPROTO_BRIDGE, &ebt_ulog_logger);
nf_log_register(NFPROTO_BRIDGE, &ebt_ulog_logger); return 0;
return ret;
} }
static void __exit ebt_ulog_fini(void) static void __net_exit ebt_ulog_net_fini(struct net *net)
{ {
ebt_ulog_buff_t *ub;
int i; int i;
struct ebt_ulog_net *ebt = ebt_ulog_pernet(net);
nf_log_unregister(&ebt_ulog_logger); nf_log_unset(net, &ebt_ulog_logger);
xt_unregister_target(&ebt_ulog_tg_reg);
for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) { for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) {
ub = &ulog_buffers[i]; ebt_ulog_buff_t *ub = &ebt->ulog_buffers[i];
del_timer(&ub->timer); del_timer(&ub->timer);
if (ub->skb) { if (ub->skb) {
...@@ -325,7 +334,49 @@ static void __exit ebt_ulog_fini(void) ...@@ -325,7 +334,49 @@ static void __exit ebt_ulog_fini(void)
ub->skb = NULL; ub->skb = NULL;
} }
} }
netlink_kernel_release(ebtulognl); netlink_kernel_release(ebt->ebtulognl);
}
static struct pernet_operations ebt_ulog_net_ops = {
.init = ebt_ulog_net_init,
.exit = ebt_ulog_net_fini,
.id = &ebt_ulog_net_id,
.size = sizeof(struct ebt_ulog_net),
};
static int __init ebt_ulog_init(void)
{
int ret;
if (nlbufsiz >= 128*1024) {
pr_warn("Netlink buffer has to be <= 128kB,"
"please try a smaller nlbufsiz parameter.\n");
return -EINVAL;
}
ret = register_pernet_subsys(&ebt_ulog_net_ops);
if (ret)
goto out_pernet;
ret = xt_register_target(&ebt_ulog_tg_reg);
if (ret)
goto out_target;
nf_log_register(NFPROTO_BRIDGE, &ebt_ulog_logger);
return 0;
out_target:
unregister_pernet_subsys(&ebt_ulog_net_ops);
out_pernet:
return ret;
}
static void __exit ebt_ulog_fini(void)
{
nf_log_unregister(&ebt_ulog_logger);
xt_unregister_target(&ebt_ulog_tg_reg);
unregister_pernet_subsys(&ebt_ulog_net_ops);
} }
module_init(ebt_ulog_init); module_init(ebt_ulog_init);
......
...@@ -320,27 +320,28 @@ void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old) ...@@ -320,27 +320,28 @@ void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old)
EXPORT_SYMBOL(__dst_destroy_metrics_generic); EXPORT_SYMBOL(__dst_destroy_metrics_generic);
/** /**
* skb_dst_set_noref - sets skb dst, without a reference * __skb_dst_set_noref - sets skb dst, without a reference
* @skb: buffer * @skb: buffer
* @dst: dst entry * @dst: dst entry
* @force: if force is set, use noref version even for DST_NOCACHE entries
* *
* Sets skb dst, assuming a reference was not taken on dst * Sets skb dst, assuming a reference was not taken on dst
* skb_dst_drop() should not dst_release() this dst * skb_dst_drop() should not dst_release() this dst
*/ */
void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst) void __skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst, bool force)
{ {
WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
/* If dst not in cache, we must take a reference, because /* If dst not in cache, we must take a reference, because
* dst_release() will destroy dst as soon as its refcount becomes zero * dst_release() will destroy dst as soon as its refcount becomes zero
*/ */
if (unlikely(dst->flags & DST_NOCACHE)) { if (unlikely((dst->flags & DST_NOCACHE) && !force)) {
dst_hold(dst); dst_hold(dst);
skb_dst_set(skb, dst); skb_dst_set(skb, dst);
} else { } else {
skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF; skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF;
} }
} }
EXPORT_SYMBOL(skb_dst_set_noref); EXPORT_SYMBOL(__skb_dst_set_noref);
/* Dirty hack. We did it in 2.2 (in __dst_free), /* Dirty hack. We did it in 2.2 (in __dst_free),
* we have _very_ good reasons not to repeat * we have _very_ good reasons not to repeat
......
...@@ -430,8 +430,7 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) ...@@ -430,8 +430,7 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
to->tc_index = from->tc_index; to->tc_index = from->tc_index;
#endif #endif
nf_copy(to, from); nf_copy(to, from);
#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
to->nf_trace = from->nf_trace; to->nf_trace = from->nf_trace;
#endif #endif
#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
......
...@@ -182,8 +182,7 @@ ipt_get_target_c(const struct ipt_entry *e) ...@@ -182,8 +182,7 @@ ipt_get_target_c(const struct ipt_entry *e)
return ipt_get_target((struct ipt_entry *)e); return ipt_get_target((struct ipt_entry *)e);
} }
#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
static const char *const hooknames[] = { static const char *const hooknames[] = {
[NF_INET_PRE_ROUTING] = "PREROUTING", [NF_INET_PRE_ROUTING] = "PREROUTING",
[NF_INET_LOCAL_IN] = "INPUT", [NF_INET_LOCAL_IN] = "INPUT",
...@@ -259,6 +258,7 @@ static void trace_packet(const struct sk_buff *skb, ...@@ -259,6 +258,7 @@ static void trace_packet(const struct sk_buff *skb,
const char *hookname, *chainname, *comment; const char *hookname, *chainname, *comment;
const struct ipt_entry *iter; const struct ipt_entry *iter;
unsigned int rulenum = 0; unsigned int rulenum = 0;
struct net *net = dev_net(in ? in : out);
table_base = private->entries[smp_processor_id()]; table_base = private->entries[smp_processor_id()];
root = get_entry(table_base, private->hook_entry[hook]); root = get_entry(table_base, private->hook_entry[hook]);
...@@ -271,7 +271,7 @@ static void trace_packet(const struct sk_buff *skb, ...@@ -271,7 +271,7 @@ static void trace_packet(const struct sk_buff *skb,
&chainname, &comment, &rulenum) != 0) &chainname, &comment, &rulenum) != 0)
break; break;
nf_log_packet(AF_INET, hook, skb, in, out, &trace_loginfo, nf_log_packet(net, AF_INET, hook, skb, in, out, &trace_loginfo,
"TRACE: %s:%s:%s:%u ", "TRACE: %s:%s:%s:%u ",
tablename, chainname, comment, rulenum); tablename, chainname, comment, rulenum);
} }
...@@ -361,8 +361,7 @@ ipt_do_table(struct sk_buff *skb, ...@@ -361,8 +361,7 @@ ipt_do_table(struct sk_buff *skb,
t = ipt_get_target(e); t = ipt_get_target(e);
IP_NF_ASSERT(t->u.kernel.target); IP_NF_ASSERT(t->u.kernel.target);
#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
/* The packet is traced: log it */ /* The packet is traced: log it */
if (unlikely(skb->nf_trace)) if (unlikely(skb->nf_trace))
trace_packet(skb, hook, in, out, trace_packet(skb, hook, in, out,
......
...@@ -45,6 +45,7 @@ ...@@ -45,6 +45,7 @@
#include <linux/netfilter/x_tables.h> #include <linux/netfilter/x_tables.h>
#include <linux/netfilter_ipv4/ipt_ULOG.h> #include <linux/netfilter_ipv4/ipt_ULOG.h>
#include <net/netfilter/nf_log.h> #include <net/netfilter/nf_log.h>
#include <net/netns/generic.h>
#include <net/sock.h> #include <net/sock.h>
#include <linux/bitops.h> #include <linux/bitops.h>
#include <asm/unaligned.h> #include <asm/unaligned.h>
...@@ -78,15 +79,23 @@ typedef struct { ...@@ -78,15 +79,23 @@ typedef struct {
struct timer_list timer; /* the timer function */ struct timer_list timer; /* the timer function */
} ulog_buff_t; } ulog_buff_t;
static ulog_buff_t ulog_buffers[ULOG_MAXNLGROUPS]; /* array of buffers */ static int ulog_net_id __read_mostly;
struct ulog_net {
unsigned int nlgroup[ULOG_MAXNLGROUPS];
ulog_buff_t ulog_buffers[ULOG_MAXNLGROUPS];
struct sock *nflognl;
spinlock_t lock;
};
static struct sock *nflognl; /* our socket */ static struct ulog_net *ulog_pernet(struct net *net)
static DEFINE_SPINLOCK(ulog_lock); /* spinlock */ {
return net_generic(net, ulog_net_id);
}
/* send one ulog_buff_t to userspace */ /* send one ulog_buff_t to userspace */
static void ulog_send(unsigned int nlgroupnum) static void ulog_send(struct ulog_net *ulog, unsigned int nlgroupnum)
{ {
ulog_buff_t *ub = &ulog_buffers[nlgroupnum]; ulog_buff_t *ub = &ulog->ulog_buffers[nlgroupnum];
pr_debug("ulog_send: timer is deleting\n"); pr_debug("ulog_send: timer is deleting\n");
del_timer(&ub->timer); del_timer(&ub->timer);
...@@ -103,7 +112,8 @@ static void ulog_send(unsigned int nlgroupnum) ...@@ -103,7 +112,8 @@ static void ulog_send(unsigned int nlgroupnum)
NETLINK_CB(ub->skb).dst_group = nlgroupnum + 1; NETLINK_CB(ub->skb).dst_group = nlgroupnum + 1;
pr_debug("throwing %d packets to netlink group %u\n", pr_debug("throwing %d packets to netlink group %u\n",
ub->qlen, nlgroupnum + 1); ub->qlen, nlgroupnum + 1);
netlink_broadcast(nflognl, ub->skb, 0, nlgroupnum + 1, GFP_ATOMIC); netlink_broadcast(ulog->nflognl, ub->skb, 0, nlgroupnum + 1,
GFP_ATOMIC);
ub->qlen = 0; ub->qlen = 0;
ub->skb = NULL; ub->skb = NULL;
...@@ -114,13 +124,16 @@ static void ulog_send(unsigned int nlgroupnum) ...@@ -114,13 +124,16 @@ static void ulog_send(unsigned int nlgroupnum)
/* timer function to flush queue in flushtimeout time */ /* timer function to flush queue in flushtimeout time */
static void ulog_timer(unsigned long data) static void ulog_timer(unsigned long data)
{ {
struct ulog_net *ulog = container_of((void *)data,
struct ulog_net,
nlgroup[*(unsigned int *)data]);
pr_debug("timer function called, calling ulog_send\n"); pr_debug("timer function called, calling ulog_send\n");
/* lock to protect against somebody modifying our structure /* lock to protect against somebody modifying our structure
* from ipt_ulog_target at the same time */ * from ipt_ulog_target at the same time */
spin_lock_bh(&ulog_lock); spin_lock_bh(&ulog->lock);
ulog_send(data); ulog_send(ulog, data);
spin_unlock_bh(&ulog_lock); spin_unlock_bh(&ulog->lock);
} }
static struct sk_buff *ulog_alloc_skb(unsigned int size) static struct sk_buff *ulog_alloc_skb(unsigned int size)
...@@ -160,6 +173,8 @@ static void ipt_ulog_packet(unsigned int hooknum, ...@@ -160,6 +173,8 @@ static void ipt_ulog_packet(unsigned int hooknum,
size_t size, copy_len; size_t size, copy_len;
struct nlmsghdr *nlh; struct nlmsghdr *nlh;
struct timeval tv; struct timeval tv;
struct net *net = dev_net(in ? in : out);
struct ulog_net *ulog = ulog_pernet(net);
/* ffs == find first bit set, necessary because userspace /* ffs == find first bit set, necessary because userspace
* is already shifting groupnumber, but we need unshifted. * is already shifting groupnumber, but we need unshifted.
...@@ -174,9 +189,9 @@ static void ipt_ulog_packet(unsigned int hooknum, ...@@ -174,9 +189,9 @@ static void ipt_ulog_packet(unsigned int hooknum,
size = nlmsg_total_size(sizeof(*pm) + copy_len); size = nlmsg_total_size(sizeof(*pm) + copy_len);
ub = &ulog_buffers[groupnum]; ub = &ulog->ulog_buffers[groupnum];
spin_lock_bh(&ulog_lock); spin_lock_bh(&ulog->lock);
if (!ub->skb) { if (!ub->skb) {
if (!(ub->skb = ulog_alloc_skb(size))) if (!(ub->skb = ulog_alloc_skb(size)))
...@@ -186,7 +201,7 @@ static void ipt_ulog_packet(unsigned int hooknum, ...@@ -186,7 +201,7 @@ static void ipt_ulog_packet(unsigned int hooknum,
/* either the queue len is too high or we don't have /* either the queue len is too high or we don't have
* enough room in nlskb left. send it to userspace. */ * enough room in nlskb left. send it to userspace. */
ulog_send(groupnum); ulog_send(ulog, groupnum);
if (!(ub->skb = ulog_alloc_skb(size))) if (!(ub->skb = ulog_alloc_skb(size)))
goto alloc_failure; goto alloc_failure;
...@@ -260,16 +275,16 @@ static void ipt_ulog_packet(unsigned int hooknum, ...@@ -260,16 +275,16 @@ static void ipt_ulog_packet(unsigned int hooknum,
if (ub->qlen >= loginfo->qthreshold) { if (ub->qlen >= loginfo->qthreshold) {
if (loginfo->qthreshold > 1) if (loginfo->qthreshold > 1)
nlh->nlmsg_type = NLMSG_DONE; nlh->nlmsg_type = NLMSG_DONE;
ulog_send(groupnum); ulog_send(ulog, groupnum);
} }
out_unlock: out_unlock:
spin_unlock_bh(&ulog_lock); spin_unlock_bh(&ulog->lock);
return; return;
alloc_failure: alloc_failure:
pr_debug("Error building netlink message\n"); pr_debug("Error building netlink message\n");
spin_unlock_bh(&ulog_lock); spin_unlock_bh(&ulog->lock);
} }
static unsigned int static unsigned int
...@@ -376,54 +391,43 @@ static struct nf_logger ipt_ulog_logger __read_mostly = { ...@@ -376,54 +391,43 @@ static struct nf_logger ipt_ulog_logger __read_mostly = {
.me = THIS_MODULE, .me = THIS_MODULE,
}; };
static int __init ulog_tg_init(void) static int __net_init ulog_tg_net_init(struct net *net)
{ {
int ret, i; int i;
struct ulog_net *ulog = ulog_pernet(net);
struct netlink_kernel_cfg cfg = { struct netlink_kernel_cfg cfg = {
.groups = ULOG_MAXNLGROUPS, .groups = ULOG_MAXNLGROUPS,
}; };
pr_debug("init module\n"); spin_lock_init(&ulog->lock);
if (nlbufsiz > 128*1024) {
pr_warning("Netlink buffer has to be <= 128kB\n");
return -EINVAL;
}
/* initialize ulog_buffers */ /* initialize ulog_buffers */
for (i = 0; i < ULOG_MAXNLGROUPS; i++) for (i = 0; i < ULOG_MAXNLGROUPS; i++)
setup_timer(&ulog_buffers[i].timer, ulog_timer, i); setup_timer(&ulog->ulog_buffers[i].timer, ulog_timer, i);
nflognl = netlink_kernel_create(&init_net, NETLINK_NFLOG, &cfg); ulog->nflognl = netlink_kernel_create(net, NETLINK_NFLOG, &cfg);
if (!nflognl) if (!ulog->nflognl)
return -ENOMEM; return -ENOMEM;
ret = xt_register_target(&ulog_tg_reg);
if (ret < 0) {
netlink_kernel_release(nflognl);
return ret;
}
if (nflog) if (nflog)
nf_log_register(NFPROTO_IPV4, &ipt_ulog_logger); nf_log_set(net, NFPROTO_IPV4, &ipt_ulog_logger);
return 0; return 0;
} }
static void __exit ulog_tg_exit(void) static void __net_exit ulog_tg_net_exit(struct net *net)
{ {
ulog_buff_t *ub; ulog_buff_t *ub;
int i; int i;
struct ulog_net *ulog = ulog_pernet(net);
pr_debug("cleanup_module\n");
if (nflog) if (nflog)
nf_log_unregister(&ipt_ulog_logger); nf_log_unset(net, &ipt_ulog_logger);
xt_unregister_target(&ulog_tg_reg);
netlink_kernel_release(nflognl); netlink_kernel_release(ulog->nflognl);
/* remove pending timers and free allocated skb's */ /* remove pending timers and free allocated skb's */
for (i = 0; i < ULOG_MAXNLGROUPS; i++) { for (i = 0; i < ULOG_MAXNLGROUPS; i++) {
ub = &ulog_buffers[i]; ub = &ulog->ulog_buffers[i];
pr_debug("timer is deleting\n"); pr_debug("timer is deleting\n");
del_timer(&ub->timer); del_timer(&ub->timer);
...@@ -434,5 +438,50 @@ static void __exit ulog_tg_exit(void) ...@@ -434,5 +438,50 @@ static void __exit ulog_tg_exit(void)
} }
} }
static struct pernet_operations ulog_tg_net_ops = {
.init = ulog_tg_net_init,
.exit = ulog_tg_net_exit,
.id = &ulog_net_id,
.size = sizeof(struct ulog_net),
};
static int __init ulog_tg_init(void)
{
int ret;
pr_debug("init module\n");
if (nlbufsiz > 128*1024) {
pr_warn("Netlink buffer has to be <= 128kB\n");
return -EINVAL;
}
ret = register_pernet_subsys(&ulog_tg_net_ops);
if (ret)
goto out_pernet;
ret = xt_register_target(&ulog_tg_reg);
if (ret < 0)
goto out_target;
if (nflog)
nf_log_register(NFPROTO_IPV4, &ipt_ulog_logger);
return 0;
out_target:
unregister_pernet_subsys(&ulog_tg_net_ops);
out_pernet:
return ret;
}
static void __exit ulog_tg_exit(void)
{
pr_debug("cleanup_module\n");
if (nflog)
nf_log_unregister(&ipt_ulog_logger);
xt_unregister_target(&ulog_tg_reg);
unregister_pernet_subsys(&ulog_tg_net_ops);
}
module_init(ulog_tg_init); module_init(ulog_tg_init);
module_exit(ulog_tg_exit); module_exit(ulog_tg_exit);
...@@ -187,8 +187,8 @@ icmp_error(struct net *net, struct nf_conn *tmpl, ...@@ -187,8 +187,8 @@ icmp_error(struct net *net, struct nf_conn *tmpl,
icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih); icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih);
if (icmph == NULL) { if (icmph == NULL) {
if (LOG_INVALID(net, IPPROTO_ICMP)) if (LOG_INVALID(net, IPPROTO_ICMP))
nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, nf_log_packet(net, PF_INET, 0, skb, NULL, NULL,
"nf_ct_icmp: short packet "); NULL, "nf_ct_icmp: short packet ");
return -NF_ACCEPT; return -NF_ACCEPT;
} }
...@@ -196,7 +196,7 @@ icmp_error(struct net *net, struct nf_conn *tmpl, ...@@ -196,7 +196,7 @@ icmp_error(struct net *net, struct nf_conn *tmpl,
if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
nf_ip_checksum(skb, hooknum, dataoff, 0)) { nf_ip_checksum(skb, hooknum, dataoff, 0)) {
if (LOG_INVALID(net, IPPROTO_ICMP)) if (LOG_INVALID(net, IPPROTO_ICMP))
nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, NULL,
"nf_ct_icmp: bad HW ICMP checksum "); "nf_ct_icmp: bad HW ICMP checksum ");
return -NF_ACCEPT; return -NF_ACCEPT;
} }
...@@ -209,7 +209,7 @@ icmp_error(struct net *net, struct nf_conn *tmpl, ...@@ -209,7 +209,7 @@ icmp_error(struct net *net, struct nf_conn *tmpl,
*/ */
if (icmph->type > NR_ICMP_TYPES) { if (icmph->type > NR_ICMP_TYPES) {
if (LOG_INVALID(net, IPPROTO_ICMP)) if (LOG_INVALID(net, IPPROTO_ICMP))
nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, NULL,
"nf_ct_icmp: invalid ICMP type "); "nf_ct_icmp: invalid ICMP type ");
return -NF_ACCEPT; return -NF_ACCEPT;
} }
......
...@@ -284,6 +284,7 @@ static void trace_packet(const struct sk_buff *skb, ...@@ -284,6 +284,7 @@ static void trace_packet(const struct sk_buff *skb,
const char *hookname, *chainname, *comment; const char *hookname, *chainname, *comment;
const struct ip6t_entry *iter; const struct ip6t_entry *iter;
unsigned int rulenum = 0; unsigned int rulenum = 0;
struct net *net = dev_net(in ? in : out);
table_base = private->entries[smp_processor_id()]; table_base = private->entries[smp_processor_id()];
root = get_entry(table_base, private->hook_entry[hook]); root = get_entry(table_base, private->hook_entry[hook]);
...@@ -296,7 +297,7 @@ static void trace_packet(const struct sk_buff *skb, ...@@ -296,7 +297,7 @@ static void trace_packet(const struct sk_buff *skb,
&chainname, &comment, &rulenum) != 0) &chainname, &comment, &rulenum) != 0)
break; break;
nf_log_packet(AF_INET6, hook, skb, in, out, &trace_loginfo, nf_log_packet(net, AF_INET6, hook, skb, in, out, &trace_loginfo,
"TRACE: %s:%s:%s:%u ", "TRACE: %s:%s:%s:%u ",
tablename, chainname, comment, rulenum); tablename, chainname, comment, rulenum);
} }
......
...@@ -131,7 +131,8 @@ static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb, ...@@ -131,7 +131,8 @@ static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb,
type + 128); type + 128);
nf_ct_dump_tuple_ipv6(&ct->tuplehash[0].tuple); nf_ct_dump_tuple_ipv6(&ct->tuplehash[0].tuple);
if (LOG_INVALID(nf_ct_net(ct), IPPROTO_ICMPV6)) if (LOG_INVALID(nf_ct_net(ct), IPPROTO_ICMPV6))
nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL, nf_log_packet(nf_ct_net(ct), PF_INET6, 0, skb, NULL,
NULL, NULL,
"nf_ct_icmpv6: invalid new with type %d ", "nf_ct_icmpv6: invalid new with type %d ",
type + 128); type + 128);
return false; return false;
...@@ -203,7 +204,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl, ...@@ -203,7 +204,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl,
icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih); icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih);
if (icmp6h == NULL) { if (icmp6h == NULL) {
if (LOG_INVALID(net, IPPROTO_ICMPV6)) if (LOG_INVALID(net, IPPROTO_ICMPV6))
nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL, nf_log_packet(net, PF_INET6, 0, skb, NULL, NULL, NULL,
"nf_ct_icmpv6: short packet "); "nf_ct_icmpv6: short packet ");
return -NF_ACCEPT; return -NF_ACCEPT;
} }
...@@ -211,7 +212,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl, ...@@ -211,7 +212,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl,
if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) { nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) {
if (LOG_INVALID(net, IPPROTO_ICMPV6)) if (LOG_INVALID(net, IPPROTO_ICMPV6))
nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL, nf_log_packet(net, PF_INET6, 0, skb, NULL, NULL, NULL,
"nf_ct_icmpv6: ICMPv6 checksum failed "); "nf_ct_icmpv6: ICMPv6 checksum failed ");
return -NF_ACCEPT; return -NF_ACCEPT;
} }
......
...@@ -41,6 +41,7 @@ ...@@ -41,6 +41,7 @@
#include <net/rawv6.h> #include <net/rawv6.h>
#include <net/ndisc.h> #include <net/ndisc.h>
#include <net/addrconf.h> #include <net/addrconf.h>
#include <net/inet_ecn.h>
#include <net/netfilter/ipv6/nf_conntrack_ipv6.h> #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
#include <linux/sysctl.h> #include <linux/sysctl.h>
#include <linux/netfilter.h> #include <linux/netfilter.h>
...@@ -138,6 +139,11 @@ static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net) ...@@ -138,6 +139,11 @@ static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net)
} }
#endif #endif
static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
{
return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK);
}
static unsigned int nf_hashfn(struct inet_frag_queue *q) static unsigned int nf_hashfn(struct inet_frag_queue *q)
{ {
const struct frag_queue *nq; const struct frag_queue *nq;
...@@ -166,7 +172,7 @@ static void nf_ct_frag6_expire(unsigned long data) ...@@ -166,7 +172,7 @@ static void nf_ct_frag6_expire(unsigned long data)
/* Creation primitives. */ /* Creation primitives. */
static inline struct frag_queue *fq_find(struct net *net, __be32 id, static inline struct frag_queue *fq_find(struct net *net, __be32 id,
u32 user, struct in6_addr *src, u32 user, struct in6_addr *src,
struct in6_addr *dst) struct in6_addr *dst, u8 ecn)
{ {
struct inet_frag_queue *q; struct inet_frag_queue *q;
struct ip6_create_arg arg; struct ip6_create_arg arg;
...@@ -176,6 +182,7 @@ static inline struct frag_queue *fq_find(struct net *net, __be32 id, ...@@ -176,6 +182,7 @@ static inline struct frag_queue *fq_find(struct net *net, __be32 id,
arg.user = user; arg.user = user;
arg.src = src; arg.src = src;
arg.dst = dst; arg.dst = dst;
arg.ecn = ecn;
read_lock_bh(&nf_frags.lock); read_lock_bh(&nf_frags.lock);
hash = inet6_hash_frag(id, src, dst, nf_frags.rnd); hash = inet6_hash_frag(id, src, dst, nf_frags.rnd);
...@@ -196,6 +203,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, ...@@ -196,6 +203,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
struct sk_buff *prev, *next; struct sk_buff *prev, *next;
unsigned int payload_len; unsigned int payload_len;
int offset, end; int offset, end;
u8 ecn;
if (fq->q.last_in & INET_FRAG_COMPLETE) { if (fq->q.last_in & INET_FRAG_COMPLETE) {
pr_debug("Already completed\n"); pr_debug("Already completed\n");
...@@ -213,6 +221,8 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, ...@@ -213,6 +221,8 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
return -1; return -1;
} }
ecn = ip6_frag_ecn(ipv6_hdr(skb));
if (skb->ip_summed == CHECKSUM_COMPLETE) { if (skb->ip_summed == CHECKSUM_COMPLETE) {
const unsigned char *nh = skb_network_header(skb); const unsigned char *nh = skb_network_header(skb);
skb->csum = csum_sub(skb->csum, skb->csum = csum_sub(skb->csum,
...@@ -317,6 +327,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, ...@@ -317,6 +327,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
} }
fq->q.stamp = skb->tstamp; fq->q.stamp = skb->tstamp;
fq->q.meat += skb->len; fq->q.meat += skb->len;
fq->ecn |= ecn;
if (payload_len > fq->q.max_size) if (payload_len > fq->q.max_size)
fq->q.max_size = payload_len; fq->q.max_size = payload_len;
add_frag_mem_limit(&fq->q, skb->truesize); add_frag_mem_limit(&fq->q, skb->truesize);
...@@ -352,12 +363,17 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) ...@@ -352,12 +363,17 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
{ {
struct sk_buff *fp, *op, *head = fq->q.fragments; struct sk_buff *fp, *op, *head = fq->q.fragments;
int payload_len; int payload_len;
u8 ecn;
inet_frag_kill(&fq->q, &nf_frags); inet_frag_kill(&fq->q, &nf_frags);
WARN_ON(head == NULL); WARN_ON(head == NULL);
WARN_ON(NFCT_FRAG6_CB(head)->offset != 0); WARN_ON(NFCT_FRAG6_CB(head)->offset != 0);
ecn = ip_frag_ecn_table[fq->ecn];
if (unlikely(ecn == 0xff))
goto out_fail;
/* Unfragmented part is taken from the first segment. */ /* Unfragmented part is taken from the first segment. */
payload_len = ((head->data - skb_network_header(head)) - payload_len = ((head->data - skb_network_header(head)) -
sizeof(struct ipv6hdr) + fq->q.len - sizeof(struct ipv6hdr) + fq->q.len -
...@@ -428,6 +444,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) ...@@ -428,6 +444,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
head->dev = dev; head->dev = dev;
head->tstamp = fq->q.stamp; head->tstamp = fq->q.stamp;
ipv6_hdr(head)->payload_len = htons(payload_len); ipv6_hdr(head)->payload_len = htons(payload_len);
ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn);
IP6CB(head)->frag_max_size = sizeof(struct ipv6hdr) + fq->q.max_size; IP6CB(head)->frag_max_size = sizeof(struct ipv6hdr) + fq->q.max_size;
/* Yes, and fold redundant checksum back. 8) */ /* Yes, and fold redundant checksum back. 8) */
...@@ -572,7 +589,8 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user) ...@@ -572,7 +589,8 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user)
inet_frag_evictor(&net->nf_frag.frags, &nf_frags, false); inet_frag_evictor(&net->nf_frag.frags, &nf_frags, false);
local_bh_enable(); local_bh_enable();
fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr); fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr,
ip6_frag_ecn(hdr));
if (fq == NULL) { if (fq == NULL) {
pr_debug("Can't find and can't create new queue\n"); pr_debug("Can't find and can't create new queue\n");
goto ret_orig; goto ret_orig;
......
...@@ -276,10 +276,30 @@ void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *); ...@@ -276,10 +276,30 @@ void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *);
EXPORT_SYMBOL(nf_nat_decode_session_hook); EXPORT_SYMBOL(nf_nat_decode_session_hook);
#endif #endif
static int __net_init netfilter_net_init(struct net *net)
{
#ifdef CONFIG_PROC_FS #ifdef CONFIG_PROC_FS
struct proc_dir_entry *proc_net_netfilter; net->nf.proc_netfilter = proc_net_mkdir(net, "netfilter",
EXPORT_SYMBOL(proc_net_netfilter); net->proc_net);
if (!net->nf.proc_netfilter) {
if (!net_eq(net, &init_net))
pr_err("cannot create netfilter proc entry");
return -ENOMEM;
}
#endif #endif
return 0;
}
static void __net_exit netfilter_net_exit(struct net *net)
{
remove_proc_entry("netfilter", net->proc_net);
}
static struct pernet_operations netfilter_net_ops = {
.init = netfilter_net_init,
.exit = netfilter_net_exit,
};
void __init netfilter_init(void) void __init netfilter_init(void)
{ {
...@@ -289,11 +309,8 @@ void __init netfilter_init(void) ...@@ -289,11 +309,8 @@ void __init netfilter_init(void)
INIT_LIST_HEAD(&nf_hooks[i][h]); INIT_LIST_HEAD(&nf_hooks[i][h]);
} }
#ifdef CONFIG_PROC_FS if (register_pernet_subsys(&netfilter_net_ops) < 0)
proc_net_netfilter = proc_mkdir("netfilter", init_net.proc_net);
if (!proc_net_netfilter)
panic("cannot create netfilter proc entry"); panic("cannot create netfilter proc entry");
#endif
if (netfilter_log_init() < 0) if (netfilter_log_init() < 0)
panic("cannot initialize nf_log"); panic("cannot initialize nf_log");
......
...@@ -58,6 +58,18 @@ static inline void ip_vs_app_put(struct ip_vs_app *app) ...@@ -58,6 +58,18 @@ static inline void ip_vs_app_put(struct ip_vs_app *app)
module_put(app->module); module_put(app->module);
} }
static void ip_vs_app_inc_destroy(struct ip_vs_app *inc)
{
kfree(inc->timeout_table);
kfree(inc);
}
static void ip_vs_app_inc_rcu_free(struct rcu_head *head)
{
struct ip_vs_app *inc = container_of(head, struct ip_vs_app, rcu_head);
ip_vs_app_inc_destroy(inc);
}
/* /*
* Allocate/initialize app incarnation and register it in proto apps. * Allocate/initialize app incarnation and register it in proto apps.
...@@ -106,8 +118,7 @@ ip_vs_app_inc_new(struct net *net, struct ip_vs_app *app, __u16 proto, ...@@ -106,8 +118,7 @@ ip_vs_app_inc_new(struct net *net, struct ip_vs_app *app, __u16 proto,
return 0; return 0;
out: out:
kfree(inc->timeout_table); ip_vs_app_inc_destroy(inc);
kfree(inc);
return ret; return ret;
} }
...@@ -131,8 +142,7 @@ ip_vs_app_inc_release(struct net *net, struct ip_vs_app *inc) ...@@ -131,8 +142,7 @@ ip_vs_app_inc_release(struct net *net, struct ip_vs_app *inc)
list_del(&inc->a_list); list_del(&inc->a_list);
kfree(inc->timeout_table); call_rcu(&inc->rcu_head, ip_vs_app_inc_rcu_free);
kfree(inc);
} }
...@@ -144,9 +154,9 @@ int ip_vs_app_inc_get(struct ip_vs_app *inc) ...@@ -144,9 +154,9 @@ int ip_vs_app_inc_get(struct ip_vs_app *inc)
{ {
int result; int result;
atomic_inc(&inc->usecnt); result = ip_vs_app_get(inc->app);
if (unlikely((result = ip_vs_app_get(inc->app)) != 1)) if (result)
atomic_dec(&inc->usecnt); atomic_inc(&inc->usecnt);
return result; return result;
} }
...@@ -156,8 +166,8 @@ int ip_vs_app_inc_get(struct ip_vs_app *inc) ...@@ -156,8 +166,8 @@ int ip_vs_app_inc_get(struct ip_vs_app *inc)
*/ */
void ip_vs_app_inc_put(struct ip_vs_app *inc) void ip_vs_app_inc_put(struct ip_vs_app *inc)
{ {
ip_vs_app_put(inc->app);
atomic_dec(&inc->usecnt); atomic_dec(&inc->usecnt);
ip_vs_app_put(inc->app);
} }
...@@ -218,6 +228,7 @@ struct ip_vs_app *register_ip_vs_app(struct net *net, struct ip_vs_app *app) ...@@ -218,6 +228,7 @@ struct ip_vs_app *register_ip_vs_app(struct net *net, struct ip_vs_app *app)
/* /*
* ip_vs_app unregistration routine * ip_vs_app unregistration routine
* We are sure there are no app incarnations attached to services * We are sure there are no app incarnations attached to services
* Caller should use synchronize_rcu() or rcu_barrier()
*/ */
void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app) void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app)
{ {
...@@ -341,14 +352,14 @@ static inline void vs_seq_update(struct ip_vs_conn *cp, struct ip_vs_seq *vseq, ...@@ -341,14 +352,14 @@ static inline void vs_seq_update(struct ip_vs_conn *cp, struct ip_vs_seq *vseq,
unsigned int flag, __u32 seq, int diff) unsigned int flag, __u32 seq, int diff)
{ {
/* spinlock is to keep updating cp->flags atomic */ /* spinlock is to keep updating cp->flags atomic */
spin_lock(&cp->lock); spin_lock_bh(&cp->lock);
if (!(cp->flags & flag) || after(seq, vseq->init_seq)) { if (!(cp->flags & flag) || after(seq, vseq->init_seq)) {
vseq->previous_delta = vseq->delta; vseq->previous_delta = vseq->delta;
vseq->delta += diff; vseq->delta += diff;
vseq->init_seq = seq; vseq->init_seq = seq;
cp->flags |= flag; cp->flags |= flag;
} }
spin_unlock(&cp->lock); spin_unlock_bh(&cp->lock);
} }
static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb, static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb,
......
This diff is collapsed.
...@@ -203,7 +203,7 @@ ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc, ...@@ -203,7 +203,7 @@ ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,
{ {
ip_vs_conn_fill_param(svc->net, svc->af, protocol, caddr, cport, vaddr, ip_vs_conn_fill_param(svc->net, svc->af, protocol, caddr, cport, vaddr,
vport, p); vport, p);
p->pe = svc->pe; p->pe = rcu_dereference(svc->pe);
if (p->pe && p->pe->fill_param) if (p->pe && p->pe->fill_param)
return p->pe->fill_param(p, skb); return p->pe->fill_param(p, skb);
...@@ -296,12 +296,15 @@ ip_vs_sched_persist(struct ip_vs_service *svc, ...@@ -296,12 +296,15 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
/* Check if a template already exists */ /* Check if a template already exists */
ct = ip_vs_ct_in_get(&param); ct = ip_vs_ct_in_get(&param);
if (!ct || !ip_vs_check_template(ct)) { if (!ct || !ip_vs_check_template(ct)) {
struct ip_vs_scheduler *sched;
/* /*
* No template found or the dest of the connection * No template found or the dest of the connection
* template is not available. * template is not available.
* return *ignored=0 i.e. ICMP and NF_DROP * return *ignored=0 i.e. ICMP and NF_DROP
*/ */
dest = svc->scheduler->schedule(svc, skb); sched = rcu_dereference(svc->scheduler);
dest = sched->schedule(svc, skb);
if (!dest) { if (!dest) {
IP_VS_DBG(1, "p-schedule: no dest found.\n"); IP_VS_DBG(1, "p-schedule: no dest found.\n");
kfree(param.pe_data); kfree(param.pe_data);
...@@ -391,6 +394,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, ...@@ -391,6 +394,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
{ {
struct ip_vs_protocol *pp = pd->pp; struct ip_vs_protocol *pp = pd->pp;
struct ip_vs_conn *cp = NULL; struct ip_vs_conn *cp = NULL;
struct ip_vs_scheduler *sched;
struct ip_vs_dest *dest; struct ip_vs_dest *dest;
__be16 _ports[2], *pptr; __be16 _ports[2], *pptr;
unsigned int flags; unsigned int flags;
...@@ -446,7 +450,8 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, ...@@ -446,7 +450,8 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
return NULL; return NULL;
} }
dest = svc->scheduler->schedule(svc, skb); sched = rcu_dereference(svc->scheduler);
dest = sched->schedule(svc, skb);
if (dest == NULL) { if (dest == NULL) {
IP_VS_DBG(1, "Schedule: no dest found.\n"); IP_VS_DBG(1, "Schedule: no dest found.\n");
return NULL; return NULL;
...@@ -504,7 +509,6 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, ...@@ -504,7 +509,6 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph); pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph);
if (pptr == NULL) { if (pptr == NULL) {
ip_vs_service_put(svc);
return NF_DROP; return NF_DROP;
} }
...@@ -530,8 +534,6 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, ...@@ -530,8 +534,6 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
IP_VS_CONN_F_ONE_PACKET : 0; IP_VS_CONN_F_ONE_PACKET : 0;
union nf_inet_addr daddr = { .all = { 0, 0, 0, 0 } }; union nf_inet_addr daddr = { .all = { 0, 0, 0, 0 } };
ip_vs_service_put(svc);
/* create a new connection entry */ /* create a new connection entry */
IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__); IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__);
{ {
...@@ -568,12 +570,8 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, ...@@ -568,12 +570,8 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
* listed in the ipvs table), pass the packets, because it is * listed in the ipvs table), pass the packets, because it is
* not ipvs job to decide to drop the packets. * not ipvs job to decide to drop the packets.
*/ */
if ((svc->port == FTPPORT) && (pptr[1] != FTPPORT)) { if ((svc->port == FTPPORT) && (pptr[1] != FTPPORT))
ip_vs_service_put(svc);
return NF_ACCEPT; return NF_ACCEPT;
}
ip_vs_service_put(svc);
/* /*
* Notify the client that the destination is unreachable, and * Notify the client that the destination is unreachable, and
...@@ -640,8 +638,11 @@ static inline enum ip_defrag_users ip_vs_defrag_user(unsigned int hooknum) ...@@ -640,8 +638,11 @@ static inline enum ip_defrag_users ip_vs_defrag_user(unsigned int hooknum)
static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user) static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
{ {
int err = ip_defrag(skb, user); int err;
local_bh_disable();
err = ip_defrag(skb, user);
local_bh_enable();
if (!err) if (!err)
ip_send_check(ip_hdr(skb)); ip_send_check(ip_hdr(skb));
...@@ -1161,9 +1162,8 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) ...@@ -1161,9 +1162,8 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
sizeof(_ports), _ports, &iph); sizeof(_ports), _ports, &iph);
if (pptr == NULL) if (pptr == NULL)
return NF_ACCEPT; /* Not for me */ return NF_ACCEPT; /* Not for me */
if (ip_vs_lookup_real_service(net, af, iph.protocol, if (ip_vs_has_real_service(net, af, iph.protocol, &iph.saddr,
&iph.saddr, pptr[0])) {
pptr[0])) {
/* /*
* Notify the real server: there is no * Notify the real server: there is no
* existing entry if it is not RST * existing entry if it is not RST
...@@ -1220,13 +1220,7 @@ ip_vs_local_reply4(unsigned int hooknum, struct sk_buff *skb, ...@@ -1220,13 +1220,7 @@ ip_vs_local_reply4(unsigned int hooknum, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out, const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *)) int (*okfn)(struct sk_buff *))
{ {
unsigned int verdict; return ip_vs_out(hooknum, skb, AF_INET);
/* Disable BH in LOCAL_OUT until all places are fixed */
local_bh_disable();
verdict = ip_vs_out(hooknum, skb, AF_INET);
local_bh_enable();
return verdict;
} }
#ifdef CONFIG_IP_VS_IPV6 #ifdef CONFIG_IP_VS_IPV6
...@@ -1253,13 +1247,7 @@ ip_vs_local_reply6(unsigned int hooknum, struct sk_buff *skb, ...@@ -1253,13 +1247,7 @@ ip_vs_local_reply6(unsigned int hooknum, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out, const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *)) int (*okfn)(struct sk_buff *))
{ {
unsigned int verdict; return ip_vs_out(hooknum, skb, AF_INET6);
/* Disable BH in LOCAL_OUT until all places are fixed */
local_bh_disable();
verdict = ip_vs_out(hooknum, skb, AF_INET6);
local_bh_enable();
return verdict;
} }
#endif #endif
...@@ -1395,10 +1383,13 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) ...@@ -1395,10 +1383,13 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
goto ignore_ipip; goto ignore_ipip;
/* Prefer the resulting PMTU */ /* Prefer the resulting PMTU */
if (dest) { if (dest) {
spin_lock(&dest->dst_lock); struct ip_vs_dest_dst *dest_dst;
if (dest->dst_cache)
mtu = dst_mtu(dest->dst_cache); rcu_read_lock();
spin_unlock(&dest->dst_lock); dest_dst = rcu_dereference(dest->dest_dst);
if (dest_dst)
mtu = dst_mtu(dest_dst->dst_cache);
rcu_read_unlock();
} }
if (mtu > 68 + sizeof(struct iphdr)) if (mtu > 68 + sizeof(struct iphdr))
mtu -= sizeof(struct iphdr); mtu -= sizeof(struct iphdr);
...@@ -1714,13 +1705,7 @@ ip_vs_local_request4(unsigned int hooknum, struct sk_buff *skb, ...@@ -1714,13 +1705,7 @@ ip_vs_local_request4(unsigned int hooknum, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out, const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *)) int (*okfn)(struct sk_buff *))
{ {
unsigned int verdict; return ip_vs_in(hooknum, skb, AF_INET);
/* Disable BH in LOCAL_OUT until all places are fixed */
local_bh_disable();
verdict = ip_vs_in(hooknum, skb, AF_INET);
local_bh_enable();
return verdict;
} }
#ifdef CONFIG_IP_VS_IPV6 #ifdef CONFIG_IP_VS_IPV6
...@@ -1779,13 +1764,7 @@ ip_vs_local_request6(unsigned int hooknum, struct sk_buff *skb, ...@@ -1779,13 +1764,7 @@ ip_vs_local_request6(unsigned int hooknum, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out, const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *)) int (*okfn)(struct sk_buff *))
{ {
unsigned int verdict; return ip_vs_in(hooknum, skb, AF_INET6);
/* Disable BH in LOCAL_OUT until all places are fixed */
local_bh_disable();
verdict = ip_vs_in(hooknum, skb, AF_INET6);
local_bh_enable();
return verdict;
} }
#endif #endif
......
This diff is collapsed.
...@@ -51,7 +51,7 @@ ...@@ -51,7 +51,7 @@
* IPVS DH bucket * IPVS DH bucket
*/ */
struct ip_vs_dh_bucket { struct ip_vs_dh_bucket {
struct ip_vs_dest *dest; /* real server (cache) */ struct ip_vs_dest __rcu *dest; /* real server (cache) */
}; };
/* /*
...@@ -64,6 +64,10 @@ struct ip_vs_dh_bucket { ...@@ -64,6 +64,10 @@ struct ip_vs_dh_bucket {
#define IP_VS_DH_TAB_SIZE (1 << IP_VS_DH_TAB_BITS) #define IP_VS_DH_TAB_SIZE (1 << IP_VS_DH_TAB_BITS)
#define IP_VS_DH_TAB_MASK (IP_VS_DH_TAB_SIZE - 1) #define IP_VS_DH_TAB_MASK (IP_VS_DH_TAB_SIZE - 1)
struct ip_vs_dh_state {
struct ip_vs_dh_bucket buckets[IP_VS_DH_TAB_SIZE];
struct rcu_head rcu_head;
};
/* /*
* Returns hash value for IPVS DH entry * Returns hash value for IPVS DH entry
...@@ -85,10 +89,9 @@ static inline unsigned int ip_vs_dh_hashkey(int af, const union nf_inet_addr *ad ...@@ -85,10 +89,9 @@ static inline unsigned int ip_vs_dh_hashkey(int af, const union nf_inet_addr *ad
* Get ip_vs_dest associated with supplied parameters. * Get ip_vs_dest associated with supplied parameters.
*/ */
static inline struct ip_vs_dest * static inline struct ip_vs_dest *
ip_vs_dh_get(int af, struct ip_vs_dh_bucket *tbl, ip_vs_dh_get(int af, struct ip_vs_dh_state *s, const union nf_inet_addr *addr)
const union nf_inet_addr *addr)
{ {
return (tbl[ip_vs_dh_hashkey(af, addr)]).dest; return rcu_dereference(s->buckets[ip_vs_dh_hashkey(af, addr)].dest);
} }
...@@ -96,25 +99,30 @@ ip_vs_dh_get(int af, struct ip_vs_dh_bucket *tbl, ...@@ -96,25 +99,30 @@ ip_vs_dh_get(int af, struct ip_vs_dh_bucket *tbl,
* Assign all the hash buckets of the specified table with the service. * Assign all the hash buckets of the specified table with the service.
*/ */
static int static int
ip_vs_dh_assign(struct ip_vs_dh_bucket *tbl, struct ip_vs_service *svc) ip_vs_dh_reassign(struct ip_vs_dh_state *s, struct ip_vs_service *svc)
{ {
int i; int i;
struct ip_vs_dh_bucket *b; struct ip_vs_dh_bucket *b;
struct list_head *p; struct list_head *p;
struct ip_vs_dest *dest; struct ip_vs_dest *dest;
bool empty;
b = tbl; b = &s->buckets[0];
p = &svc->destinations; p = &svc->destinations;
empty = list_empty(p);
for (i=0; i<IP_VS_DH_TAB_SIZE; i++) { for (i=0; i<IP_VS_DH_TAB_SIZE; i++) {
if (list_empty(p)) { dest = rcu_dereference_protected(b->dest, 1);
b->dest = NULL; if (dest)
} else { ip_vs_dest_put(dest);
if (empty)
RCU_INIT_POINTER(b->dest, NULL);
else {
if (p == &svc->destinations) if (p == &svc->destinations)
p = p->next; p = p->next;
dest = list_entry(p, struct ip_vs_dest, n_list); dest = list_entry(p, struct ip_vs_dest, n_list);
atomic_inc(&dest->refcnt); ip_vs_dest_hold(dest);
b->dest = dest; RCU_INIT_POINTER(b->dest, dest);
p = p->next; p = p->next;
} }
...@@ -127,16 +135,18 @@ ip_vs_dh_assign(struct ip_vs_dh_bucket *tbl, struct ip_vs_service *svc) ...@@ -127,16 +135,18 @@ ip_vs_dh_assign(struct ip_vs_dh_bucket *tbl, struct ip_vs_service *svc)
/* /*
* Flush all the hash buckets of the specified table. * Flush all the hash buckets of the specified table.
*/ */
static void ip_vs_dh_flush(struct ip_vs_dh_bucket *tbl) static void ip_vs_dh_flush(struct ip_vs_dh_state *s)
{ {
int i; int i;
struct ip_vs_dh_bucket *b; struct ip_vs_dh_bucket *b;
struct ip_vs_dest *dest;
b = tbl; b = &s->buckets[0];
for (i=0; i<IP_VS_DH_TAB_SIZE; i++) { for (i=0; i<IP_VS_DH_TAB_SIZE; i++) {
if (b->dest) { dest = rcu_dereference_protected(b->dest, 1);
atomic_dec(&b->dest->refcnt); if (dest) {
b->dest = NULL; ip_vs_dest_put(dest);
RCU_INIT_POINTER(b->dest, NULL);
} }
b++; b++;
} }
...@@ -145,51 +155,46 @@ static void ip_vs_dh_flush(struct ip_vs_dh_bucket *tbl) ...@@ -145,51 +155,46 @@ static void ip_vs_dh_flush(struct ip_vs_dh_bucket *tbl)
static int ip_vs_dh_init_svc(struct ip_vs_service *svc) static int ip_vs_dh_init_svc(struct ip_vs_service *svc)
{ {
struct ip_vs_dh_bucket *tbl; struct ip_vs_dh_state *s;
/* allocate the DH table for this service */ /* allocate the DH table for this service */
tbl = kmalloc(sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE, s = kzalloc(sizeof(struct ip_vs_dh_state), GFP_KERNEL);
GFP_KERNEL); if (s == NULL)
if (tbl == NULL)
return -ENOMEM; return -ENOMEM;
svc->sched_data = tbl; svc->sched_data = s;
IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) allocated for " IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) allocated for "
"current service\n", "current service\n",
sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE); sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE);
/* assign the hash buckets with the updated service */ /* assign the hash buckets with current dests */
ip_vs_dh_assign(tbl, svc); ip_vs_dh_reassign(s, svc);
return 0; return 0;
} }
static int ip_vs_dh_done_svc(struct ip_vs_service *svc) static void ip_vs_dh_done_svc(struct ip_vs_service *svc)
{ {
struct ip_vs_dh_bucket *tbl = svc->sched_data; struct ip_vs_dh_state *s = svc->sched_data;
/* got to clean up hash buckets here */ /* got to clean up hash buckets here */
ip_vs_dh_flush(tbl); ip_vs_dh_flush(s);
/* release the table itself */ /* release the table itself */
kfree(svc->sched_data); kfree_rcu(s, rcu_head);
IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) released\n", IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) released\n",
sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE); sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE);
return 0;
} }
static int ip_vs_dh_update_svc(struct ip_vs_service *svc) static int ip_vs_dh_dest_changed(struct ip_vs_service *svc,
struct ip_vs_dest *dest)
{ {
struct ip_vs_dh_bucket *tbl = svc->sched_data; struct ip_vs_dh_state *s = svc->sched_data;
/* got to clean up hash buckets here */
ip_vs_dh_flush(tbl);
/* assign the hash buckets with the updated service */ /* assign the hash buckets with the updated service */
ip_vs_dh_assign(tbl, svc); ip_vs_dh_reassign(s, svc);
return 0; return 0;
} }
...@@ -212,19 +217,20 @@ static struct ip_vs_dest * ...@@ -212,19 +217,20 @@ static struct ip_vs_dest *
ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
{ {
struct ip_vs_dest *dest; struct ip_vs_dest *dest;
struct ip_vs_dh_bucket *tbl; struct ip_vs_dh_state *s;
struct ip_vs_iphdr iph; struct ip_vs_iphdr iph;
ip_vs_fill_iph_addr_only(svc->af, skb, &iph); ip_vs_fill_iph_addr_only(svc->af, skb, &iph);
IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
tbl = (struct ip_vs_dh_bucket *)svc->sched_data; s = (struct ip_vs_dh_state *) svc->sched_data;
dest = ip_vs_dh_get(svc->af, tbl, &iph.daddr); dest = ip_vs_dh_get(svc->af, s, &iph.daddr);
if (!dest if (!dest
|| !(dest->flags & IP_VS_DEST_F_AVAILABLE) || !(dest->flags & IP_VS_DEST_F_AVAILABLE)
|| atomic_read(&dest->weight) <= 0 || atomic_read(&dest->weight) <= 0
|| is_overloaded(dest)) { || is_overloaded(dest)) {
ip_vs_scheduler_err(svc, "no destination available");
return NULL; return NULL;
} }
...@@ -248,7 +254,8 @@ static struct ip_vs_scheduler ip_vs_dh_scheduler = ...@@ -248,7 +254,8 @@ static struct ip_vs_scheduler ip_vs_dh_scheduler =
.n_list = LIST_HEAD_INIT(ip_vs_dh_scheduler.n_list), .n_list = LIST_HEAD_INIT(ip_vs_dh_scheduler.n_list),
.init_service = ip_vs_dh_init_svc, .init_service = ip_vs_dh_init_svc,
.done_service = ip_vs_dh_done_svc, .done_service = ip_vs_dh_done_svc,
.update_service = ip_vs_dh_update_svc, .add_dest = ip_vs_dh_dest_changed,
.del_dest = ip_vs_dh_dest_changed,
.schedule = ip_vs_dh_schedule, .schedule = ip_vs_dh_schedule,
}; };
...@@ -262,6 +269,7 @@ static int __init ip_vs_dh_init(void) ...@@ -262,6 +269,7 @@ static int __init ip_vs_dh_init(void)
static void __exit ip_vs_dh_cleanup(void) static void __exit ip_vs_dh_cleanup(void)
{ {
unregister_ip_vs_scheduler(&ip_vs_dh_scheduler); unregister_ip_vs_scheduler(&ip_vs_dh_scheduler);
synchronize_rcu();
} }
......
...@@ -267,10 +267,12 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, ...@@ -267,10 +267,12 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
* hopefully it will succeed on the retransmitted * hopefully it will succeed on the retransmitted
* packet. * packet.
*/ */
rcu_read_lock();
ret = nf_nat_mangle_tcp_packet(skb, ct, ctinfo, ret = nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
iph->ihl * 4, iph->ihl * 4,
start-data, end-start, start-data, end-start,
buf, buf_len); buf, buf_len);
rcu_read_unlock();
if (ret) { if (ret) {
ip_vs_nfct_expect_related(skb, ct, n_cp, ip_vs_nfct_expect_related(skb, ct, n_cp,
IPPROTO_TCP, 0, 0); IPPROTO_TCP, 0, 0);
...@@ -480,6 +482,7 @@ static int __init ip_vs_ftp_init(void) ...@@ -480,6 +482,7 @@ static int __init ip_vs_ftp_init(void)
int rv; int rv;
rv = register_pernet_subsys(&ip_vs_ftp_ops); rv = register_pernet_subsys(&ip_vs_ftp_ops);
/* rcu_barrier() is called by netns on error */
return rv; return rv;
} }
...@@ -489,6 +492,7 @@ static int __init ip_vs_ftp_init(void) ...@@ -489,6 +492,7 @@ static int __init ip_vs_ftp_init(void)
static void __exit ip_vs_ftp_exit(void) static void __exit ip_vs_ftp_exit(void)
{ {
unregister_pernet_subsys(&ip_vs_ftp_ops); unregister_pernet_subsys(&ip_vs_ftp_ops);
/* rcu_barrier() is called by netns */
} }
......
This diff is collapsed.
This diff is collapsed.
...@@ -42,7 +42,7 @@ ip_vs_lc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) ...@@ -42,7 +42,7 @@ ip_vs_lc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
* served, but no new connection is assigned to the server. * served, but no new connection is assigned to the server.
*/ */
list_for_each_entry(dest, &svc->destinations, n_list) { list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
if ((dest->flags & IP_VS_DEST_F_OVERLOAD) || if ((dest->flags & IP_VS_DEST_F_OVERLOAD) ||
atomic_read(&dest->weight) == 0) atomic_read(&dest->weight) == 0)
continue; continue;
...@@ -84,6 +84,7 @@ static int __init ip_vs_lc_init(void) ...@@ -84,6 +84,7 @@ static int __init ip_vs_lc_init(void)
static void __exit ip_vs_lc_cleanup(void) static void __exit ip_vs_lc_cleanup(void)
{ {
unregister_ip_vs_scheduler(&ip_vs_lc_scheduler); unregister_ip_vs_scheduler(&ip_vs_lc_scheduler);
synchronize_rcu();
} }
module_init(ip_vs_lc_init); module_init(ip_vs_lc_init);
......
...@@ -75,7 +75,7 @@ ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) ...@@ -75,7 +75,7 @@ ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
* new connections. * new connections.
*/ */
list_for_each_entry(dest, &svc->destinations, n_list) { list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
if (dest->flags & IP_VS_DEST_F_OVERLOAD || if (dest->flags & IP_VS_DEST_F_OVERLOAD ||
!atomic_read(&dest->weight)) !atomic_read(&dest->weight))
...@@ -133,6 +133,7 @@ static int __init ip_vs_nq_init(void) ...@@ -133,6 +133,7 @@ static int __init ip_vs_nq_init(void)
static void __exit ip_vs_nq_cleanup(void) static void __exit ip_vs_nq_cleanup(void)
{ {
unregister_ip_vs_scheduler(&ip_vs_nq_scheduler); unregister_ip_vs_scheduler(&ip_vs_nq_scheduler);
synchronize_rcu();
} }
module_init(ip_vs_nq_init); module_init(ip_vs_nq_init);
......
...@@ -13,20 +13,8 @@ ...@@ -13,20 +13,8 @@
/* IPVS pe list */ /* IPVS pe list */
static LIST_HEAD(ip_vs_pe); static LIST_HEAD(ip_vs_pe);
/* lock for service table */ /* semaphore for IPVS PEs. */
static DEFINE_SPINLOCK(ip_vs_pe_lock); static DEFINE_MUTEX(ip_vs_pe_mutex);
/* Bind a service with a pe */
void ip_vs_bind_pe(struct ip_vs_service *svc, struct ip_vs_pe *pe)
{
svc->pe = pe;
}
/* Unbind a service from its pe */
void ip_vs_unbind_pe(struct ip_vs_service *svc)
{
svc->pe = NULL;
}
/* Get pe in the pe list by name */ /* Get pe in the pe list by name */
struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name) struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name)
...@@ -36,9 +24,8 @@ struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name) ...@@ -36,9 +24,8 @@ struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name)
IP_VS_DBG(10, "%s(): pe_name \"%s\"\n", __func__, IP_VS_DBG(10, "%s(): pe_name \"%s\"\n", __func__,
pe_name); pe_name);
spin_lock_bh(&ip_vs_pe_lock); rcu_read_lock();
list_for_each_entry_rcu(pe, &ip_vs_pe, n_list) {
list_for_each_entry(pe, &ip_vs_pe, n_list) {
/* Test and get the modules atomically */ /* Test and get the modules atomically */
if (pe->module && if (pe->module &&
!try_module_get(pe->module)) { !try_module_get(pe->module)) {
...@@ -47,14 +34,14 @@ struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name) ...@@ -47,14 +34,14 @@ struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name)
} }
if (strcmp(pe_name, pe->name)==0) { if (strcmp(pe_name, pe->name)==0) {
/* HIT */ /* HIT */
spin_unlock_bh(&ip_vs_pe_lock); rcu_read_unlock();
return pe; return pe;
} }
if (pe->module) if (pe->module)
module_put(pe->module); module_put(pe->module);
} }
rcu_read_unlock();
spin_unlock_bh(&ip_vs_pe_lock);
return NULL; return NULL;
} }
...@@ -83,22 +70,13 @@ int register_ip_vs_pe(struct ip_vs_pe *pe) ...@@ -83,22 +70,13 @@ int register_ip_vs_pe(struct ip_vs_pe *pe)
/* increase the module use count */ /* increase the module use count */
ip_vs_use_count_inc(); ip_vs_use_count_inc();
spin_lock_bh(&ip_vs_pe_lock); mutex_lock(&ip_vs_pe_mutex);
if (!list_empty(&pe->n_list)) {
spin_unlock_bh(&ip_vs_pe_lock);
ip_vs_use_count_dec();
pr_err("%s(): [%s] pe already linked\n",
__func__, pe->name);
return -EINVAL;
}
/* Make sure that the pe with this name doesn't exist /* Make sure that the pe with this name doesn't exist
* in the pe list. * in the pe list.
*/ */
list_for_each_entry(tmp, &ip_vs_pe, n_list) { list_for_each_entry(tmp, &ip_vs_pe, n_list) {
if (strcmp(tmp->name, pe->name) == 0) { if (strcmp(tmp->name, pe->name) == 0) {
spin_unlock_bh(&ip_vs_pe_lock); mutex_unlock(&ip_vs_pe_mutex);
ip_vs_use_count_dec(); ip_vs_use_count_dec();
pr_err("%s(): [%s] pe already existed " pr_err("%s(): [%s] pe already existed "
"in the system\n", __func__, pe->name); "in the system\n", __func__, pe->name);
...@@ -106,8 +84,8 @@ int register_ip_vs_pe(struct ip_vs_pe *pe) ...@@ -106,8 +84,8 @@ int register_ip_vs_pe(struct ip_vs_pe *pe)
} }
} }
/* Add it into the d-linked pe list */ /* Add it into the d-linked pe list */
list_add(&pe->n_list, &ip_vs_pe); list_add_rcu(&pe->n_list, &ip_vs_pe);
spin_unlock_bh(&ip_vs_pe_lock); mutex_unlock(&ip_vs_pe_mutex);
pr_info("[%s] pe registered.\n", pe->name); pr_info("[%s] pe registered.\n", pe->name);
...@@ -118,17 +96,10 @@ EXPORT_SYMBOL_GPL(register_ip_vs_pe); ...@@ -118,17 +96,10 @@ EXPORT_SYMBOL_GPL(register_ip_vs_pe);
/* Unregister a pe from the pe list */ /* Unregister a pe from the pe list */
int unregister_ip_vs_pe(struct ip_vs_pe *pe) int unregister_ip_vs_pe(struct ip_vs_pe *pe)
{ {
spin_lock_bh(&ip_vs_pe_lock); mutex_lock(&ip_vs_pe_mutex);
if (list_empty(&pe->n_list)) {
spin_unlock_bh(&ip_vs_pe_lock);
pr_err("%s(): [%s] pe is not in the list. failed\n",
__func__, pe->name);
return -EINVAL;
}
/* Remove it from the d-linked pe list */ /* Remove it from the d-linked pe list */
list_del(&pe->n_list); list_del_rcu(&pe->n_list);
spin_unlock_bh(&ip_vs_pe_lock); mutex_unlock(&ip_vs_pe_mutex);
/* decrease the module use count */ /* decrease the module use count */
ip_vs_use_count_dec(); ip_vs_use_count_dec();
......
...@@ -172,6 +172,7 @@ static int __init ip_vs_sip_init(void) ...@@ -172,6 +172,7 @@ static int __init ip_vs_sip_init(void)
static void __exit ip_vs_sip_cleanup(void) static void __exit ip_vs_sip_cleanup(void)
{ {
unregister_ip_vs_pe(&ip_vs_sip_pe); unregister_ip_vs_pe(&ip_vs_sip_pe);
synchronize_rcu();
} }
module_init(ip_vs_sip_init); module_init(ip_vs_sip_init);
......
...@@ -27,9 +27,10 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, ...@@ -27,9 +27,10 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
if (sch == NULL) if (sch == NULL)
return 0; return 0;
net = skb_net(skb); net = skb_net(skb);
rcu_read_lock();
if ((sch->type == SCTP_CID_INIT) && if ((sch->type == SCTP_CID_INIT) &&
(svc = ip_vs_service_get(net, af, skb->mark, iph->protocol, (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol,
&iph->daddr, sh->dest))) { &iph->daddr, sh->dest))) {
int ignored; int ignored;
if (ip_vs_todrop(net_ipvs(net))) { if (ip_vs_todrop(net_ipvs(net))) {
...@@ -37,7 +38,7 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, ...@@ -37,7 +38,7 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
* It seems that we are very loaded. * It seems that we are very loaded.
* We have to drop this packet :( * We have to drop this packet :(
*/ */
ip_vs_service_put(svc); rcu_read_unlock();
*verdict = NF_DROP; *verdict = NF_DROP;
return 0; return 0;
} }
...@@ -49,14 +50,13 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, ...@@ -49,14 +50,13 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
if (!*cpp && ignored <= 0) { if (!*cpp && ignored <= 0) {
if (!ignored) if (!ignored)
*verdict = ip_vs_leave(svc, skb, pd, iph); *verdict = ip_vs_leave(svc, skb, pd, iph);
else { else
ip_vs_service_put(svc);
*verdict = NF_DROP; *verdict = NF_DROP;
} rcu_read_unlock();
return 0; return 0;
} }
ip_vs_service_put(svc);
} }
rcu_read_unlock();
/* NF_ACCEPT */ /* NF_ACCEPT */
return 1; return 1;
} }
...@@ -994,9 +994,9 @@ static void ...@@ -994,9 +994,9 @@ static void
sctp_state_transition(struct ip_vs_conn *cp, int direction, sctp_state_transition(struct ip_vs_conn *cp, int direction,
const struct sk_buff *skb, struct ip_vs_proto_data *pd) const struct sk_buff *skb, struct ip_vs_proto_data *pd)
{ {
spin_lock(&cp->lock); spin_lock_bh(&cp->lock);
set_sctp_state(pd, cp, direction, skb); set_sctp_state(pd, cp, direction, skb);
spin_unlock(&cp->lock); spin_unlock_bh(&cp->lock);
} }
static inline __u16 sctp_app_hashkey(__be16 port) static inline __u16 sctp_app_hashkey(__be16 port)
...@@ -1016,30 +1016,25 @@ static int sctp_register_app(struct net *net, struct ip_vs_app *inc) ...@@ -1016,30 +1016,25 @@ static int sctp_register_app(struct net *net, struct ip_vs_app *inc)
hash = sctp_app_hashkey(port); hash = sctp_app_hashkey(port);
spin_lock_bh(&ipvs->sctp_app_lock);
list_for_each_entry(i, &ipvs->sctp_apps[hash], p_list) { list_for_each_entry(i, &ipvs->sctp_apps[hash], p_list) {
if (i->port == port) { if (i->port == port) {
ret = -EEXIST; ret = -EEXIST;
goto out; goto out;
} }
} }
list_add(&inc->p_list, &ipvs->sctp_apps[hash]); list_add_rcu(&inc->p_list, &ipvs->sctp_apps[hash]);
atomic_inc(&pd->appcnt); atomic_inc(&pd->appcnt);
out: out:
spin_unlock_bh(&ipvs->sctp_app_lock);
return ret; return ret;
} }
static void sctp_unregister_app(struct net *net, struct ip_vs_app *inc) static void sctp_unregister_app(struct net *net, struct ip_vs_app *inc)
{ {
struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP); struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP);
spin_lock_bh(&ipvs->sctp_app_lock);
atomic_dec(&pd->appcnt); atomic_dec(&pd->appcnt);
list_del(&inc->p_list); list_del_rcu(&inc->p_list);
spin_unlock_bh(&ipvs->sctp_app_lock);
} }
static int sctp_app_conn_bind(struct ip_vs_conn *cp) static int sctp_app_conn_bind(struct ip_vs_conn *cp)
...@@ -1055,12 +1050,12 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp) ...@@ -1055,12 +1050,12 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp)
/* Lookup application incarnations and bind the right one */ /* Lookup application incarnations and bind the right one */
hash = sctp_app_hashkey(cp->vport); hash = sctp_app_hashkey(cp->vport);
spin_lock(&ipvs->sctp_app_lock); rcu_read_lock();
list_for_each_entry(inc, &ipvs->sctp_apps[hash], p_list) { list_for_each_entry_rcu(inc, &ipvs->sctp_apps[hash], p_list) {
if (inc->port == cp->vport) { if (inc->port == cp->vport) {
if (unlikely(!ip_vs_app_inc_get(inc))) if (unlikely(!ip_vs_app_inc_get(inc)))
break; break;
spin_unlock(&ipvs->sctp_app_lock); rcu_read_unlock();
IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->" IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
"%s:%u to app %s on port %u\n", "%s:%u to app %s on port %u\n",
...@@ -1076,7 +1071,7 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp) ...@@ -1076,7 +1071,7 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp)
goto out; goto out;
} }
} }
spin_unlock(&ipvs->sctp_app_lock); rcu_read_unlock();
out: out:
return result; return result;
} }
...@@ -1090,7 +1085,6 @@ static int __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd) ...@@ -1090,7 +1085,6 @@ static int __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd)
struct netns_ipvs *ipvs = net_ipvs(net); struct netns_ipvs *ipvs = net_ipvs(net);
ip_vs_init_hash_table(ipvs->sctp_apps, SCTP_APP_TAB_SIZE); ip_vs_init_hash_table(ipvs->sctp_apps, SCTP_APP_TAB_SIZE);
spin_lock_init(&ipvs->sctp_app_lock);
pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts, pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts,
sizeof(sctp_timeouts)); sizeof(sctp_timeouts));
if (!pd->timeout_table) if (!pd->timeout_table)
......
...@@ -47,9 +47,10 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, ...@@ -47,9 +47,10 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
} }
net = skb_net(skb); net = skb_net(skb);
/* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */ /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */
rcu_read_lock();
if (th->syn && if (th->syn &&
(svc = ip_vs_service_get(net, af, skb->mark, iph->protocol, (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol,
&iph->daddr, th->dest))) { &iph->daddr, th->dest))) {
int ignored; int ignored;
if (ip_vs_todrop(net_ipvs(net))) { if (ip_vs_todrop(net_ipvs(net))) {
...@@ -57,7 +58,7 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, ...@@ -57,7 +58,7 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
* It seems that we are very loaded. * It seems that we are very loaded.
* We have to drop this packet :( * We have to drop this packet :(
*/ */
ip_vs_service_put(svc); rcu_read_unlock();
*verdict = NF_DROP; *verdict = NF_DROP;
return 0; return 0;
} }
...@@ -70,14 +71,13 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, ...@@ -70,14 +71,13 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
if (!*cpp && ignored <= 0) { if (!*cpp && ignored <= 0) {
if (!ignored) if (!ignored)
*verdict = ip_vs_leave(svc, skb, pd, iph); *verdict = ip_vs_leave(svc, skb, pd, iph);
else { else
ip_vs_service_put(svc);
*verdict = NF_DROP; *verdict = NF_DROP;
} rcu_read_unlock();
return 0; return 0;
} }
ip_vs_service_put(svc);
} }
rcu_read_unlock();
/* NF_ACCEPT */ /* NF_ACCEPT */
return 1; return 1;
} }
...@@ -557,9 +557,9 @@ tcp_state_transition(struct ip_vs_conn *cp, int direction, ...@@ -557,9 +557,9 @@ tcp_state_transition(struct ip_vs_conn *cp, int direction,
if (th == NULL) if (th == NULL)
return; return;
spin_lock(&cp->lock); spin_lock_bh(&cp->lock);
set_tcp_state(pd, cp, direction, th); set_tcp_state(pd, cp, direction, th);
spin_unlock(&cp->lock); spin_unlock_bh(&cp->lock);
} }
static inline __u16 tcp_app_hashkey(__be16 port) static inline __u16 tcp_app_hashkey(__be16 port)
...@@ -580,18 +580,16 @@ static int tcp_register_app(struct net *net, struct ip_vs_app *inc) ...@@ -580,18 +580,16 @@ static int tcp_register_app(struct net *net, struct ip_vs_app *inc)
hash = tcp_app_hashkey(port); hash = tcp_app_hashkey(port);
spin_lock_bh(&ipvs->tcp_app_lock);
list_for_each_entry(i, &ipvs->tcp_apps[hash], p_list) { list_for_each_entry(i, &ipvs->tcp_apps[hash], p_list) {
if (i->port == port) { if (i->port == port) {
ret = -EEXIST; ret = -EEXIST;
goto out; goto out;
} }
} }
list_add(&inc->p_list, &ipvs->tcp_apps[hash]); list_add_rcu(&inc->p_list, &ipvs->tcp_apps[hash]);
atomic_inc(&pd->appcnt); atomic_inc(&pd->appcnt);
out: out:
spin_unlock_bh(&ipvs->tcp_app_lock);
return ret; return ret;
} }
...@@ -599,13 +597,10 @@ static int tcp_register_app(struct net *net, struct ip_vs_app *inc) ...@@ -599,13 +597,10 @@ static int tcp_register_app(struct net *net, struct ip_vs_app *inc)
static void static void
tcp_unregister_app(struct net *net, struct ip_vs_app *inc) tcp_unregister_app(struct net *net, struct ip_vs_app *inc)
{ {
struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP); struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
spin_lock_bh(&ipvs->tcp_app_lock);
atomic_dec(&pd->appcnt); atomic_dec(&pd->appcnt);
list_del(&inc->p_list); list_del_rcu(&inc->p_list);
spin_unlock_bh(&ipvs->tcp_app_lock);
} }
...@@ -624,12 +619,12 @@ tcp_app_conn_bind(struct ip_vs_conn *cp) ...@@ -624,12 +619,12 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
/* Lookup application incarnations and bind the right one */ /* Lookup application incarnations and bind the right one */
hash = tcp_app_hashkey(cp->vport); hash = tcp_app_hashkey(cp->vport);
spin_lock(&ipvs->tcp_app_lock); rcu_read_lock();
list_for_each_entry(inc, &ipvs->tcp_apps[hash], p_list) { list_for_each_entry_rcu(inc, &ipvs->tcp_apps[hash], p_list) {
if (inc->port == cp->vport) { if (inc->port == cp->vport) {
if (unlikely(!ip_vs_app_inc_get(inc))) if (unlikely(!ip_vs_app_inc_get(inc)))
break; break;
spin_unlock(&ipvs->tcp_app_lock); rcu_read_unlock();
IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->" IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
"%s:%u to app %s on port %u\n", "%s:%u to app %s on port %u\n",
...@@ -646,7 +641,7 @@ tcp_app_conn_bind(struct ip_vs_conn *cp) ...@@ -646,7 +641,7 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
goto out; goto out;
} }
} }
spin_unlock(&ipvs->tcp_app_lock); rcu_read_unlock();
out: out:
return result; return result;
...@@ -660,11 +655,11 @@ void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp) ...@@ -660,11 +655,11 @@ void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp)
{ {
struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP); struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
spin_lock(&cp->lock); spin_lock_bh(&cp->lock);
cp->state = IP_VS_TCP_S_LISTEN; cp->state = IP_VS_TCP_S_LISTEN;
cp->timeout = (pd ? pd->timeout_table[IP_VS_TCP_S_LISTEN] cp->timeout = (pd ? pd->timeout_table[IP_VS_TCP_S_LISTEN]
: tcp_timeouts[IP_VS_TCP_S_LISTEN]); : tcp_timeouts[IP_VS_TCP_S_LISTEN]);
spin_unlock(&cp->lock); spin_unlock_bh(&cp->lock);
} }
/* --------------------------------------------- /* ---------------------------------------------
...@@ -676,7 +671,6 @@ static int __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd) ...@@ -676,7 +671,6 @@ static int __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd)
struct netns_ipvs *ipvs = net_ipvs(net); struct netns_ipvs *ipvs = net_ipvs(net);
ip_vs_init_hash_table(ipvs->tcp_apps, TCP_APP_TAB_SIZE); ip_vs_init_hash_table(ipvs->tcp_apps, TCP_APP_TAB_SIZE);
spin_lock_init(&ipvs->tcp_app_lock);
pd->timeout_table = ip_vs_create_timeout_table((int *)tcp_timeouts, pd->timeout_table = ip_vs_create_timeout_table((int *)tcp_timeouts,
sizeof(tcp_timeouts)); sizeof(tcp_timeouts));
if (!pd->timeout_table) if (!pd->timeout_table)
......
...@@ -44,8 +44,9 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, ...@@ -44,8 +44,9 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
return 0; return 0;
} }
net = skb_net(skb); net = skb_net(skb);
svc = ip_vs_service_get(net, af, skb->mark, iph->protocol, rcu_read_lock();
&iph->daddr, uh->dest); svc = ip_vs_service_find(net, af, skb->mark, iph->protocol,
&iph->daddr, uh->dest);
if (svc) { if (svc) {
int ignored; int ignored;
...@@ -54,7 +55,7 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, ...@@ -54,7 +55,7 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
* It seems that we are very loaded. * It seems that we are very loaded.
* We have to drop this packet :( * We have to drop this packet :(
*/ */
ip_vs_service_put(svc); rcu_read_unlock();
*verdict = NF_DROP; *verdict = NF_DROP;
return 0; return 0;
} }
...@@ -67,14 +68,13 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, ...@@ -67,14 +68,13 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
if (!*cpp && ignored <= 0) { if (!*cpp && ignored <= 0) {
if (!ignored) if (!ignored)
*verdict = ip_vs_leave(svc, skb, pd, iph); *verdict = ip_vs_leave(svc, skb, pd, iph);
else { else
ip_vs_service_put(svc);
*verdict = NF_DROP; *verdict = NF_DROP;
} rcu_read_unlock();
return 0; return 0;
} }
ip_vs_service_put(svc);
} }
rcu_read_unlock();
/* NF_ACCEPT */ /* NF_ACCEPT */
return 1; return 1;
} }
...@@ -359,19 +359,16 @@ static int udp_register_app(struct net *net, struct ip_vs_app *inc) ...@@ -359,19 +359,16 @@ static int udp_register_app(struct net *net, struct ip_vs_app *inc)
hash = udp_app_hashkey(port); hash = udp_app_hashkey(port);
spin_lock_bh(&ipvs->udp_app_lock);
list_for_each_entry(i, &ipvs->udp_apps[hash], p_list) { list_for_each_entry(i, &ipvs->udp_apps[hash], p_list) {
if (i->port == port) { if (i->port == port) {
ret = -EEXIST; ret = -EEXIST;
goto out; goto out;
} }
} }
list_add(&inc->p_list, &ipvs->udp_apps[hash]); list_add_rcu(&inc->p_list, &ipvs->udp_apps[hash]);
atomic_inc(&pd->appcnt); atomic_inc(&pd->appcnt);
out: out:
spin_unlock_bh(&ipvs->udp_app_lock);
return ret; return ret;
} }
...@@ -380,12 +377,9 @@ static void ...@@ -380,12 +377,9 @@ static void
udp_unregister_app(struct net *net, struct ip_vs_app *inc) udp_unregister_app(struct net *net, struct ip_vs_app *inc)
{ {
struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP); struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
struct netns_ipvs *ipvs = net_ipvs(net);
spin_lock_bh(&ipvs->udp_app_lock);
atomic_dec(&pd->appcnt); atomic_dec(&pd->appcnt);
list_del(&inc->p_list); list_del_rcu(&inc->p_list);
spin_unlock_bh(&ipvs->udp_app_lock);
} }
...@@ -403,12 +397,12 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp) ...@@ -403,12 +397,12 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp)
/* Lookup application incarnations and bind the right one */ /* Lookup application incarnations and bind the right one */
hash = udp_app_hashkey(cp->vport); hash = udp_app_hashkey(cp->vport);
spin_lock(&ipvs->udp_app_lock); rcu_read_lock();
list_for_each_entry(inc, &ipvs->udp_apps[hash], p_list) { list_for_each_entry_rcu(inc, &ipvs->udp_apps[hash], p_list) {
if (inc->port == cp->vport) { if (inc->port == cp->vport) {
if (unlikely(!ip_vs_app_inc_get(inc))) if (unlikely(!ip_vs_app_inc_get(inc)))
break; break;
spin_unlock(&ipvs->udp_app_lock); rcu_read_unlock();
IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->" IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
"%s:%u to app %s on port %u\n", "%s:%u to app %s on port %u\n",
...@@ -425,7 +419,7 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp) ...@@ -425,7 +419,7 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp)
goto out; goto out;
} }
} }
spin_unlock(&ipvs->udp_app_lock); rcu_read_unlock();
out: out:
return result; return result;
...@@ -467,7 +461,6 @@ static int __udp_init(struct net *net, struct ip_vs_proto_data *pd) ...@@ -467,7 +461,6 @@ static int __udp_init(struct net *net, struct ip_vs_proto_data *pd)
struct netns_ipvs *ipvs = net_ipvs(net); struct netns_ipvs *ipvs = net_ipvs(net);
ip_vs_init_hash_table(ipvs->udp_apps, UDP_APP_TAB_SIZE); ip_vs_init_hash_table(ipvs->udp_apps, UDP_APP_TAB_SIZE);
spin_lock_init(&ipvs->udp_app_lock);
pd->timeout_table = ip_vs_create_timeout_table((int *)udp_timeouts, pd->timeout_table = ip_vs_create_timeout_table((int *)udp_timeouts,
sizeof(udp_timeouts)); sizeof(udp_timeouts));
if (!pd->timeout_table) if (!pd->timeout_table)
......
...@@ -35,9 +35,18 @@ static int ip_vs_rr_init_svc(struct ip_vs_service *svc) ...@@ -35,9 +35,18 @@ static int ip_vs_rr_init_svc(struct ip_vs_service *svc)
} }
static int ip_vs_rr_update_svc(struct ip_vs_service *svc) static int ip_vs_rr_del_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest)
{ {
svc->sched_data = &svc->destinations; struct list_head *p;
spin_lock_bh(&svc->sched_lock);
p = (struct list_head *) svc->sched_data;
/* dest is already unlinked, so p->prev is not valid but
* p->next is valid, use it to reach previous entry.
*/
if (p == &dest->n_list)
svc->sched_data = p->next->prev;
spin_unlock_bh(&svc->sched_lock);
return 0; return 0;
} }
...@@ -48,36 +57,41 @@ static int ip_vs_rr_update_svc(struct ip_vs_service *svc) ...@@ -48,36 +57,41 @@ static int ip_vs_rr_update_svc(struct ip_vs_service *svc)
static struct ip_vs_dest * static struct ip_vs_dest *
ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
{ {
struct list_head *p, *q; struct list_head *p;
struct ip_vs_dest *dest; struct ip_vs_dest *dest, *last;
int pass = 0;
IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
write_lock(&svc->sched_lock); spin_lock_bh(&svc->sched_lock);
p = (struct list_head *)svc->sched_data; p = (struct list_head *) svc->sched_data;
p = p->next; last = dest = list_entry(p, struct ip_vs_dest, n_list);
q = p;
do { do {
/* skip list head */ list_for_each_entry_continue_rcu(dest,
if (q == &svc->destinations) { &svc->destinations,
q = q->next; n_list) {
continue; if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
atomic_read(&dest->weight) > 0)
/* HIT */
goto out;
if (dest == last)
goto stop;
} }
pass++;
dest = list_entry(q, struct ip_vs_dest, n_list); /* Previous dest could be unlinked, do not loop forever.
if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) && * If we stay at head there is no need for 2nd pass.
atomic_read(&dest->weight) > 0) */
/* HIT */ } while (pass < 2 && p != &svc->destinations);
goto out;
q = q->next; stop:
} while (q != p); spin_unlock_bh(&svc->sched_lock);
write_unlock(&svc->sched_lock);
ip_vs_scheduler_err(svc, "no destination available"); ip_vs_scheduler_err(svc, "no destination available");
return NULL; return NULL;
out: out:
svc->sched_data = q; svc->sched_data = &dest->n_list;
write_unlock(&svc->sched_lock); spin_unlock_bh(&svc->sched_lock);
IP_VS_DBG_BUF(6, "RR: server %s:%u " IP_VS_DBG_BUF(6, "RR: server %s:%u "
"activeconns %d refcnt %d weight %d\n", "activeconns %d refcnt %d weight %d\n",
IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port), IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port),
...@@ -94,7 +108,8 @@ static struct ip_vs_scheduler ip_vs_rr_scheduler = { ...@@ -94,7 +108,8 @@ static struct ip_vs_scheduler ip_vs_rr_scheduler = {
.module = THIS_MODULE, .module = THIS_MODULE,
.n_list = LIST_HEAD_INIT(ip_vs_rr_scheduler.n_list), .n_list = LIST_HEAD_INIT(ip_vs_rr_scheduler.n_list),
.init_service = ip_vs_rr_init_svc, .init_service = ip_vs_rr_init_svc,
.update_service = ip_vs_rr_update_svc, .add_dest = NULL,
.del_dest = ip_vs_rr_del_dest,
.schedule = ip_vs_rr_schedule, .schedule = ip_vs_rr_schedule,
}; };
...@@ -106,6 +121,7 @@ static int __init ip_vs_rr_init(void) ...@@ -106,6 +121,7 @@ static int __init ip_vs_rr_init(void)
static void __exit ip_vs_rr_cleanup(void) static void __exit ip_vs_rr_cleanup(void)
{ {
unregister_ip_vs_scheduler(&ip_vs_rr_scheduler); unregister_ip_vs_scheduler(&ip_vs_rr_scheduler);
synchronize_rcu();
} }
module_init(ip_vs_rr_init); module_init(ip_vs_rr_init);
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment