Commit ceec4c38 authored by Julian Anastasov's avatar Julian Anastasov Committed by Pablo Neira Ayuso

ipvs: convert services to rcu

This is the final step in RCU conversion.

Things that are removed:

- svc->usecnt: now svc is accessed under RCU read lock
- svc->inc: and some unused code
- ip_vs_bind_pe and ip_vs_unbind_pe: no ability to replace PE
- __ip_vs_svc_lock: replaced with RCU
- IP_VS_WAIT_WHILE: now readers lookup svcs and dests under
	RCU and work in parallel with configuration

Other changes:

- before now, a RCU read-side critical section included the
calling of the schedule method, now it is extended to include
service lookup
- ip_vs_svc_table and ip_vs_svc_fwm_table are now using hlist
- svc->pe and svc->scheduler remain to the end (of grace period),
	the schedulers are prepared for such RCU readers
	even after done_service is called but they need
	to use synchronize_rcu because last ip_vs_scheduler_put
	can happen while RCU read-side critical sections
	use an outdated svc->scheduler pointer
- as planned, update_service is removed
- empty services can be freed immediately after grace period.
	If dests were present, the services are freed from
	the dest trash code
Signed-off-by: default avatarJulian Anastasov <ja@ssi.bg>
Signed-off-by: default avatarSimon Horman <horms@verge.net.au>
parent 413c2d04
......@@ -359,8 +359,6 @@ static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len,
#define LeaveFunction(level) do {} while (0)
#endif
#define IP_VS_WAIT_WHILE(expr) while (expr) { cpu_relax(); }
/*
* The port number of FTP service (in network order).
......@@ -712,10 +710,9 @@ struct ip_vs_dest_user_kern {
* and the forwarding entries
*/
struct ip_vs_service {
struct list_head s_list; /* for normal service table */
struct list_head f_list; /* for fwmark-based service table */
struct hlist_node s_list; /* for normal service table */
struct hlist_node f_list; /* for fwmark-based service table */
atomic_t refcnt; /* reference counter */
atomic_t usecnt; /* use counter */
u16 af; /* address family */
__u16 protocol; /* which protocol (TCP/UDP) */
......@@ -730,15 +727,16 @@ struct ip_vs_service {
struct list_head destinations; /* real server d-linked list */
__u32 num_dests; /* number of servers */
struct ip_vs_stats stats; /* statistics for the service */
struct ip_vs_app *inc; /* bind conns to this app inc */
/* for scheduling */
struct ip_vs_scheduler *scheduler; /* bound scheduler object */
struct ip_vs_scheduler __rcu *scheduler; /* bound scheduler object */
spinlock_t sched_lock; /* lock sched_data */
void *sched_data; /* scheduler application data */
/* alternate persistence engine */
struct ip_vs_pe *pe;
struct ip_vs_pe __rcu *pe;
struct rcu_head rcu_head;
};
/* Information for cached dst */
......@@ -807,8 +805,6 @@ struct ip_vs_scheduler {
int (*init_service)(struct ip_vs_service *svc);
/* scheduling service finish */
void (*done_service)(struct ip_vs_service *svc);
/* scheduler updating service */
int (*update_service)(struct ip_vs_service *svc);
/* dest is linked */
int (*add_dest)(struct ip_vs_service *svc, struct ip_vs_dest *dest);
/* dest is unlinked */
......@@ -1344,8 +1340,6 @@ extern void ip_vs_app_inc_put(struct ip_vs_app *inc);
extern int ip_vs_app_pkt_out(struct ip_vs_conn *, struct sk_buff *skb);
extern int ip_vs_app_pkt_in(struct ip_vs_conn *, struct sk_buff *skb);
void ip_vs_bind_pe(struct ip_vs_service *svc, struct ip_vs_pe *pe);
void ip_vs_unbind_pe(struct ip_vs_service *svc);
int register_ip_vs_pe(struct ip_vs_pe *pe);
int unregister_ip_vs_pe(struct ip_vs_pe *pe);
struct ip_vs_pe *ip_vs_pe_getbyname(const char *name);
......@@ -1392,7 +1386,8 @@ extern int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler);
extern int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler);
extern int ip_vs_bind_scheduler(struct ip_vs_service *svc,
struct ip_vs_scheduler *scheduler);
extern void ip_vs_unbind_scheduler(struct ip_vs_service *svc);
extern void ip_vs_unbind_scheduler(struct ip_vs_service *svc,
struct ip_vs_scheduler *sched);
extern struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name);
extern void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler);
extern struct ip_vs_conn *
......@@ -1412,14 +1407,9 @@ extern struct ip_vs_stats ip_vs_stats;
extern int sysctl_ip_vs_sync_ver;
extern struct ip_vs_service *
ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
ip_vs_service_find(struct net *net, int af, __u32 fwmark, __u16 protocol,
const union nf_inet_addr *vaddr, __be16 vport);
static inline void ip_vs_service_put(struct ip_vs_service *svc)
{
atomic_dec(&svc->usecnt);
}
extern bool
ip_vs_has_real_service(struct net *net, int af, __u16 protocol,
const union nf_inet_addr *daddr, __be16 dport);
......
......@@ -203,7 +203,7 @@ ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,
{
ip_vs_conn_fill_param(svc->net, svc->af, protocol, caddr, cport, vaddr,
vport, p);
p->pe = svc->pe;
p->pe = rcu_dereference(svc->pe);
if (p->pe && p->pe->fill_param)
return p->pe->fill_param(p, skb);
......@@ -296,15 +296,16 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
/* Check if a template already exists */
ct = ip_vs_ct_in_get(&param);
if (!ct || !ip_vs_check_template(ct)) {
struct ip_vs_scheduler *sched;
/*
* No template found or the dest of the connection
* template is not available.
* return *ignored=0 i.e. ICMP and NF_DROP
*/
rcu_read_lock();
dest = svc->scheduler->schedule(svc, skb);
sched = rcu_dereference(svc->scheduler);
dest = sched->schedule(svc, skb);
if (!dest) {
rcu_read_unlock();
IP_VS_DBG(1, "p-schedule: no dest found.\n");
kfree(param.pe_data);
*ignored = 0;
......@@ -320,7 +321,6 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
* when the template expires */
ct = ip_vs_conn_new(&param, &dest->addr, dport,
IP_VS_CONN_F_TEMPLATE, dest, skb->mark);
rcu_read_unlock();
if (ct == NULL) {
kfree(param.pe_data);
*ignored = -1;
......@@ -394,6 +394,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
{
struct ip_vs_protocol *pp = pd->pp;
struct ip_vs_conn *cp = NULL;
struct ip_vs_scheduler *sched;
struct ip_vs_dest *dest;
__be16 _ports[2], *pptr;
unsigned int flags;
......@@ -449,10 +450,9 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
return NULL;
}
rcu_read_lock();
dest = svc->scheduler->schedule(svc, skb);
sched = rcu_dereference(svc->scheduler);
dest = sched->schedule(svc, skb);
if (dest == NULL) {
rcu_read_unlock();
IP_VS_DBG(1, "Schedule: no dest found.\n");
return NULL;
}
......@@ -473,7 +473,6 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
cp = ip_vs_conn_new(&p, &dest->addr,
dest->port ? dest->port : pptr[1],
flags, dest, skb->mark);
rcu_read_unlock();
if (!cp) {
*ignored = -1;
return NULL;
......@@ -510,7 +509,6 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph);
if (pptr == NULL) {
ip_vs_service_put(svc);
return NF_DROP;
}
......@@ -536,8 +534,6 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
IP_VS_CONN_F_ONE_PACKET : 0;
union nf_inet_addr daddr = { .all = { 0, 0, 0, 0 } };
ip_vs_service_put(svc);
/* create a new connection entry */
IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__);
{
......@@ -574,12 +570,8 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
* listed in the ipvs table), pass the packets, because it is
* not ipvs job to decide to drop the packets.
*/
if ((svc->port == FTPPORT) && (pptr[1] != FTPPORT)) {
ip_vs_service_put(svc);
if ((svc->port == FTPPORT) && (pptr[1] != FTPPORT))
return NF_ACCEPT;
}
ip_vs_service_put(svc);
/*
* Notify the client that the destination is unreachable, and
......
This diff is collapsed.
......@@ -269,6 +269,7 @@ static int __init ip_vs_dh_init(void)
static void __exit ip_vs_dh_cleanup(void)
{
unregister_ip_vs_scheduler(&ip_vs_dh_scheduler);
synchronize_rcu();
}
......
......@@ -633,6 +633,7 @@ static void __exit ip_vs_lblc_cleanup(void)
{
unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler);
unregister_pernet_subsys(&ip_vs_lblc_ops);
synchronize_rcu();
}
......
......@@ -821,6 +821,7 @@ static void __exit ip_vs_lblcr_cleanup(void)
{
unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
unregister_pernet_subsys(&ip_vs_lblcr_ops);
synchronize_rcu();
}
......
......@@ -84,6 +84,7 @@ static int __init ip_vs_lc_init(void)
static void __exit ip_vs_lc_cleanup(void)
{
unregister_ip_vs_scheduler(&ip_vs_lc_scheduler);
synchronize_rcu();
}
module_init(ip_vs_lc_init);
......
......@@ -133,6 +133,7 @@ static int __init ip_vs_nq_init(void)
static void __exit ip_vs_nq_cleanup(void)
{
unregister_ip_vs_scheduler(&ip_vs_nq_scheduler);
synchronize_rcu();
}
module_init(ip_vs_nq_init);
......
......@@ -16,18 +16,6 @@ static LIST_HEAD(ip_vs_pe);
/* semaphore for IPVS PEs. */
static DEFINE_MUTEX(ip_vs_pe_mutex);
/* Bind a service with a pe */
void ip_vs_bind_pe(struct ip_vs_service *svc, struct ip_vs_pe *pe)
{
svc->pe = pe;
}
/* Unbind a service from its pe */
void ip_vs_unbind_pe(struct ip_vs_service *svc)
{
svc->pe = NULL;
}
/* Get pe in the pe list by name */
struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name)
{
......
......@@ -27,9 +27,10 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
if (sch == NULL)
return 0;
net = skb_net(skb);
rcu_read_lock();
if ((sch->type == SCTP_CID_INIT) &&
(svc = ip_vs_service_get(net, af, skb->mark, iph->protocol,
&iph->daddr, sh->dest))) {
(svc = ip_vs_service_find(net, af, skb->mark, iph->protocol,
&iph->daddr, sh->dest))) {
int ignored;
if (ip_vs_todrop(net_ipvs(net))) {
......@@ -37,7 +38,7 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
* It seems that we are very loaded.
* We have to drop this packet :(
*/
ip_vs_service_put(svc);
rcu_read_unlock();
*verdict = NF_DROP;
return 0;
}
......@@ -49,14 +50,13 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
if (!*cpp && ignored <= 0) {
if (!ignored)
*verdict = ip_vs_leave(svc, skb, pd, iph);
else {
ip_vs_service_put(svc);
else
*verdict = NF_DROP;
}
rcu_read_unlock();
return 0;
}
ip_vs_service_put(svc);
}
rcu_read_unlock();
/* NF_ACCEPT */
return 1;
}
......
......@@ -47,9 +47,10 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
}
net = skb_net(skb);
/* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */
rcu_read_lock();
if (th->syn &&
(svc = ip_vs_service_get(net, af, skb->mark, iph->protocol,
&iph->daddr, th->dest))) {
(svc = ip_vs_service_find(net, af, skb->mark, iph->protocol,
&iph->daddr, th->dest))) {
int ignored;
if (ip_vs_todrop(net_ipvs(net))) {
......@@ -57,7 +58,7 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
* It seems that we are very loaded.
* We have to drop this packet :(
*/
ip_vs_service_put(svc);
rcu_read_unlock();
*verdict = NF_DROP;
return 0;
}
......@@ -70,14 +71,13 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
if (!*cpp && ignored <= 0) {
if (!ignored)
*verdict = ip_vs_leave(svc, skb, pd, iph);
else {
ip_vs_service_put(svc);
else
*verdict = NF_DROP;
}
rcu_read_unlock();
return 0;
}
ip_vs_service_put(svc);
}
rcu_read_unlock();
/* NF_ACCEPT */
return 1;
}
......
......@@ -44,8 +44,9 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
return 0;
}
net = skb_net(skb);
svc = ip_vs_service_get(net, af, skb->mark, iph->protocol,
&iph->daddr, uh->dest);
rcu_read_lock();
svc = ip_vs_service_find(net, af, skb->mark, iph->protocol,
&iph->daddr, uh->dest);
if (svc) {
int ignored;
......@@ -54,7 +55,7 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
* It seems that we are very loaded.
* We have to drop this packet :(
*/
ip_vs_service_put(svc);
rcu_read_unlock();
*verdict = NF_DROP;
return 0;
}
......@@ -67,14 +68,13 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
if (!*cpp && ignored <= 0) {
if (!ignored)
*verdict = ip_vs_leave(svc, skb, pd, iph);
else {
ip_vs_service_put(svc);
else
*verdict = NF_DROP;
}
rcu_read_unlock();
return 0;
}
ip_vs_service_put(svc);
}
rcu_read_unlock();
/* NF_ACCEPT */
return 1;
}
......
......@@ -121,6 +121,7 @@ static int __init ip_vs_rr_init(void)
static void __exit ip_vs_rr_cleanup(void)
{
unregister_ip_vs_scheduler(&ip_vs_rr_scheduler);
synchronize_rcu();
}
module_init(ip_vs_rr_init);
......
......@@ -47,8 +47,6 @@ int ip_vs_bind_scheduler(struct ip_vs_service *svc,
{
int ret;
svc->scheduler = scheduler;
if (scheduler->init_service) {
ret = scheduler->init_service(svc);
if (ret) {
......@@ -56,7 +54,7 @@ int ip_vs_bind_scheduler(struct ip_vs_service *svc,
return ret;
}
}
rcu_assign_pointer(svc->scheduler, scheduler);
return 0;
}
......@@ -64,17 +62,19 @@ int ip_vs_bind_scheduler(struct ip_vs_service *svc,
/*
* Unbind a service with its scheduler
*/
void ip_vs_unbind_scheduler(struct ip_vs_service *svc)
void ip_vs_unbind_scheduler(struct ip_vs_service *svc,
struct ip_vs_scheduler *sched)
{
struct ip_vs_scheduler *sched = svc->scheduler;
struct ip_vs_scheduler *cur_sched;
if (!sched)
cur_sched = rcu_dereference_protected(svc->scheduler, 1);
/* This check proves that old 'sched' was installed */
if (!cur_sched)
return;
if (sched->done_service)
sched->done_service(svc);
svc->scheduler = NULL;
/* svc->scheduler can not be set to NULL */
}
......@@ -148,21 +148,21 @@ void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler)
void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg)
{
struct ip_vs_scheduler *sched;
sched = rcu_dereference(svc->scheduler);
if (svc->fwmark) {
IP_VS_ERR_RL("%s: FWM %u 0x%08X - %s\n",
svc->scheduler->name, svc->fwmark,
svc->fwmark, msg);
sched->name, svc->fwmark, svc->fwmark, msg);
#ifdef CONFIG_IP_VS_IPV6
} else if (svc->af == AF_INET6) {
IP_VS_ERR_RL("%s: %s [%pI6c]:%d - %s\n",
svc->scheduler->name,
ip_vs_proto_name(svc->protocol),
sched->name, ip_vs_proto_name(svc->protocol),
&svc->addr.in6, ntohs(svc->port), msg);
#endif
} else {
IP_VS_ERR_RL("%s: %s %pI4:%d - %s\n",
svc->scheduler->name,
ip_vs_proto_name(svc->protocol),
sched->name, ip_vs_proto_name(svc->protocol),
&svc->addr.ip, ntohs(svc->port), msg);
}
}
......
......@@ -134,6 +134,7 @@ static int __init ip_vs_sed_init(void)
static void __exit ip_vs_sed_cleanup(void)
{
unregister_ip_vs_scheduler(&ip_vs_sed_scheduler);
synchronize_rcu();
}
module_init(ip_vs_sed_init);
......
......@@ -283,6 +283,7 @@ static int __init ip_vs_sh_init(void)
static void __exit ip_vs_sh_cleanup(void)
{
unregister_ip_vs_scheduler(&ip_vs_sh_scheduler);
synchronize_rcu();
}
......
......@@ -106,6 +106,7 @@ static int __init ip_vs_wlc_init(void)
static void __exit ip_vs_wlc_cleanup(void)
{
unregister_ip_vs_scheduler(&ip_vs_wlc_scheduler);
synchronize_rcu();
}
module_init(ip_vs_wlc_init);
......
......@@ -261,6 +261,7 @@ static int __init ip_vs_wrr_init(void)
static void __exit ip_vs_wrr_cleanup(void)
{
unregister_ip_vs_scheduler(&ip_vs_wrr_scheduler);
synchronize_rcu();
}
module_init(ip_vs_wrr_init);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment