Commit 026ace06 authored by Julian Anastasov's avatar Julian Anastasov Committed by Pablo Neira Ayuso

ipvs: optimize dst usage for real server

Currently when forwarding requests to real servers
we use dst_lock and atomic operations when cloning the
dst_cache value. As the dst_cache value does not change
most of the time it is better to use RCU and to lock
dst_lock only when we need to replace the obsoleted dst.
For this to work we keep dst_cache in new structure protected
by RCU. For packets to remote real servers we will use noref
version of dst_cache, it will be valid while we are in RCU
read-side critical section because now dst_release for replaced
dsts will be invoked after the grace period. Packets to
local real servers that are passed to local stack with
NF_ACCEPT need a dst clone.
Signed-off-by: default avatarJulian Anastasov <ja@ssi.bg>
Signed-off by: Hans Schillstrom <hans@schillstrom.com>
Signed-off-by: default avatarSimon Horman <horms@verge.net.au>
parent 4115ded1
......@@ -724,6 +724,13 @@ struct ip_vs_service {
struct ip_vs_pe *pe;
};
/* Information for cached dst */
struct ip_vs_dest_dst {
struct dst_entry *dst_cache; /* destination cache entry */
u32 dst_cookie;
union nf_inet_addr dst_saddr;
struct rcu_head rcu_head;
};
/*
* The real server destination forwarding entry
......@@ -752,9 +759,7 @@ struct ip_vs_dest {
/* for destination cache */
spinlock_t dst_lock; /* lock of dst_cache */
struct dst_entry *dst_cache; /* destination cache entry */
u32 dst_cookie;
union nf_inet_addr dst_saddr;
struct ip_vs_dest_dst __rcu *dest_dst; /* cached dst info */
/* for virtual service */
struct ip_vs_service *svc; /* service it belongs to */
......@@ -1427,6 +1432,7 @@ extern int ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
extern int ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
struct ip_vs_protocol *pp, int offset,
unsigned int hooknum, struct ip_vs_iphdr *iph);
extern void ip_vs_dest_dst_rcu_free(struct rcu_head *head);
#ifdef CONFIG_IP_VS_IPV6
extern int ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
......
......@@ -1395,10 +1395,13 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
goto ignore_ipip;
/* Prefer the resulting PMTU */
if (dest) {
spin_lock(&dest->dst_lock);
if (dest->dst_cache)
mtu = dst_mtu(dest->dst_cache);
spin_unlock(&dest->dst_lock);
struct ip_vs_dest_dst *dest_dst;
rcu_read_lock();
dest_dst = rcu_dereference(dest->dest_dst);
if (dest_dst)
mtu = dst_mtu(dest_dst->dst_cache);
rcu_read_unlock();
}
if (mtu > 68 + sizeof(struct iphdr))
mtu -= sizeof(struct iphdr);
......
......@@ -641,15 +641,26 @@ struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
return dest;
}
/* Release dst_cache for dest in user context */
void ip_vs_dest_dst_rcu_free(struct rcu_head *head)
{
struct ip_vs_dest_dst *dest_dst = container_of(head,
struct ip_vs_dest_dst,
rcu_head);
dst_release(dest_dst->dst_cache);
kfree(dest_dst);
}
/* Release dest_dst and dst_cache for dest in user context */
static void __ip_vs_dst_cache_reset(struct ip_vs_dest *dest)
{
struct dst_entry *old_dst;
struct ip_vs_dest_dst *old;
old_dst = dest->dst_cache;
dest->dst_cache = NULL;
dst_release(old_dst);
dest->dst_saddr.ip = 0;
old = rcu_dereference_protected(dest->dest_dst, 1);
if (old) {
RCU_INIT_POINTER(dest->dest_dst, NULL);
call_rcu(&old->rcu_head, ip_vs_dest_dst_rcu_free);
}
}
/*
......@@ -1513,7 +1524,7 @@ static inline void
ip_vs_forget_dev(struct ip_vs_dest *dest, struct net_device *dev)
{
spin_lock_bh(&dest->dst_lock);
if (dest->dst_cache && dest->dst_cache->dev == dev) {
if (dest->dest_dst && dest->dest_dst->dst_cache->dev == dev) {
IP_VS_DBG_BUF(3, "Reset dev:%s dest %s:%u ,dest->refcnt=%d\n",
dev->name,
IP_VS_DBG_ADDR(dest->af, &dest->addr),
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment